summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox')
-rw-r--r--drivers/net/ethernet/mellanox/Kconfig25
-rw-r--r--drivers/net/ethernet/mellanox/Makefile8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/Kconfig48
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/Makefile12
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/alloc.c819
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/catas.c334
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c3443
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cq.c421
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/crdump.c239
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_clock.c303
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_cq.c218
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c753
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_ethtool.c2161
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_main.c394
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c3686
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_port.c432
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_port.h586
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_resources.c115
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c1287
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_selftest.c204
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c1211
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/eq.c1567
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c3108
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.h257
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw_qos.c289
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw_qos.h145
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/icm.c494
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/icm.h144
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/intf.c275
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c4462
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mcg.c1649
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h1486
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h858
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h132
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mr.c1157
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/pd.c295
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/port.c2229
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/profile.c270
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c965
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/reset.c184
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c5427
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/sense.c143
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/srq.c309
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig120
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile61
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c109
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h88
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c80
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h94
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/alloc.c311
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c1919
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c221
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/debugfs.c593
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c498
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c267
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h286
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c950
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h175
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h78
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h979
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h212
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.c235
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.h48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c357
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c325
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h79
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h80
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c547
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h161
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c401
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h60
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c133
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c231
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h106
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c383
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c89
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c710
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_common.c194
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c1206
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dim.c62
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c1701
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs.c1576
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c844
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c5221
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c1295
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h169
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c1470
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c355
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c1404
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h284
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c3063
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h79
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c730
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c144
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c991
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c2219
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h319
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c1368
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c238
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h91
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c1047
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h96
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c327
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h114
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c1533
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h142
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c170
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h212
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c622
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h74
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c768
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h101
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c2720
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h286
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c360
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c527
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c401
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c258
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c697
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h125
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c357
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c691
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c616
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c158
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c202
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h95
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c230
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h64
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mad.c75
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c1721
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mcg.c66
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h217
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mr.c193
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c596
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pd.c62
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c1116
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c636
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rl.c288
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c282
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/srq.c692
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/transobj.c621
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/uar.c329
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c1203
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.c267
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.h261
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/Kconfig13
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/mlxfw.h111
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c275
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c620
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h66
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h60
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h103
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h98
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c126
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h71
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Kconfig109
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Makefile34
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/cmd.h1180
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c1914
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.h395
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c1233
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h71
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c460
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h233
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c346
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_thermal.c414
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/emad.h95
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/i2c.c551
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/i2c.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/ib.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/item.h509
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/minimal.c66
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.c1853
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.h35
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci_hw.h254
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/port.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h8894
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/resources.h148
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c5154
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h728
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c244
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c428
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c342
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c239
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c271
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c51
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c891
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c536
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c204
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c1168
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c199
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c285
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c973
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h228
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c1024
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c176
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h23
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c709
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c1307
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h30
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c961
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c536
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c338
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h66
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c75
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c1049
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h95
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c615
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c757
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c7572
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h121
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c977
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h78
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c2450
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/switchib.c573
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/switchx2.c1736
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/trap.h78
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/txheader.h49
234 files changed, 154253 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/Kconfig b/drivers/net/ethernet/mellanox/Kconfig
new file mode 100644
index 000000000..872548cd9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/Kconfig
@@ -0,0 +1,25 @@
+#
+# Mellanox driver configuration
+#
+
+config NET_VENDOR_MELLANOX
+ bool "Mellanox devices"
+ default y
+ depends on PCI || I2C
+ ---help---
+ If you have a network (Ethernet or RDMA) device belonging to this
+ class, say Y.
+
+ Note that the answer to this question doesn't directly affect the
+ kernel: saying N will just cause the configurator to skip all
+ the questions about Mellanox cards. If you say Y, you will be asked
+ for your specific card in the following questions.
+
+if NET_VENDOR_MELLANOX
+
+source "drivers/net/ethernet/mellanox/mlx4/Kconfig"
+source "drivers/net/ethernet/mellanox/mlx5/core/Kconfig"
+source "drivers/net/ethernet/mellanox/mlxsw/Kconfig"
+source "drivers/net/ethernet/mellanox/mlxfw/Kconfig"
+
+endif # NET_VENDOR_MELLANOX
diff --git a/drivers/net/ethernet/mellanox/Makefile b/drivers/net/ethernet/mellanox/Makefile
new file mode 100644
index 000000000..016aa263b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the Mellanox device drivers.
+#
+
+obj-$(CONFIG_MLX4_CORE) += mlx4/
+obj-$(CONFIG_MLX5_CORE) += mlx5/core/
+obj-$(CONFIG_MLXSW_CORE) += mlxsw/
+obj-$(CONFIG_MLXFW) += mlxfw/
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig
new file mode 100644
index 000000000..f200b8c42
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig
@@ -0,0 +1,48 @@
+#
+# Mellanox driver configuration
+#
+
+config MLX4_EN
+ tristate "Mellanox Technologies 1/10/40Gbit Ethernet support"
+ depends on MAY_USE_DEVLINK
+ depends on PCI && NETDEVICES && ETHERNET && INET
+ select MLX4_CORE
+ imply PTP_1588_CLOCK
+ ---help---
+ This driver supports Mellanox Technologies ConnectX Ethernet
+ devices.
+
+config MLX4_EN_DCB
+ bool "Data Center Bridging (DCB) Support"
+ default y
+ depends on MLX4_EN && DCB
+ ---help---
+ Say Y here if you want to use Data Center Bridging (DCB) in the
+ driver.
+ If set to N, will not be able to configure QoS and ratelimit attributes.
+ This flag is depended on the kernel's DCB support.
+
+ If unsure, set to Y
+
+config MLX4_CORE
+ tristate
+ depends on PCI
+ default n
+
+config MLX4_DEBUG
+ bool "Verbose debugging output" if (MLX4_CORE && EXPERT)
+ depends on MLX4_CORE
+ default y
+ ---help---
+ This option causes debugging code to be compiled into the
+ mlx4_core driver. The output can be turned on via the
+ debug_level module parameter (which can also be set after
+ the driver is loaded through sysfs).
+
+config MLX4_CORE_GEN2
+ bool "Support for old gen2 Mellanox PCI IDs" if (MLX4_CORE)
+ depends on MLX4_CORE
+ default y
+ ---help---
+ Say Y here if you want to use old gen2 Mellanox devices in the
+ driver.
diff --git a/drivers/net/ethernet/mellanox/mlx4/Makefile b/drivers/net/ethernet/mellanox/mlx4/Makefile
new file mode 100644
index 000000000..3f400770f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_MLX4_CORE) += mlx4_core.o
+
+mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o fw_qos.o icm.o intf.o \
+ main.o mcg.o mr.o pd.o port.o profile.o qp.o reset.o sense.o \
+ srq.o resource_tracker.o crdump.o
+
+obj-$(CONFIG_MLX4_EN) += mlx4_en.o
+
+mlx4_en-y := en_main.o en_tx.o en_rx.o en_ethtool.o en_port.o en_cq.o \
+ en_resources.o en_netdev.o en_selftest.o en_clock.o
+mlx4_en-$(CONFIG_MLX4_EN_DCB) += en_dcb_nl.o
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
new file mode 100644
index 000000000..21788d4f9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
@@ -0,0 +1,819 @@
+/*
+ * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <linux/bitmap.h>
+#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
+
+#include "mlx4.h"
+
+u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap)
+{
+ u32 obj;
+
+ spin_lock(&bitmap->lock);
+
+ obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->last);
+ if (obj >= bitmap->max) {
+ bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
+ & bitmap->mask;
+ obj = find_first_zero_bit(bitmap->table, bitmap->max);
+ }
+
+ if (obj < bitmap->max) {
+ set_bit(obj, bitmap->table);
+ bitmap->last = (obj + 1);
+ if (bitmap->last == bitmap->max)
+ bitmap->last = 0;
+ obj |= bitmap->top;
+ } else
+ obj = -1;
+
+ if (obj != -1)
+ --bitmap->avail;
+
+ spin_unlock(&bitmap->lock);
+
+ return obj;
+}
+
+void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr)
+{
+ mlx4_bitmap_free_range(bitmap, obj, 1, use_rr);
+}
+
+static unsigned long find_aligned_range(unsigned long *bitmap,
+ u32 start, u32 nbits,
+ int len, int align, u32 skip_mask)
+{
+ unsigned long end, i;
+
+again:
+ start = ALIGN(start, align);
+
+ while ((start < nbits) && (test_bit(start, bitmap) ||
+ (start & skip_mask)))
+ start += align;
+
+ if (start >= nbits)
+ return -1;
+
+ end = start+len;
+ if (end > nbits)
+ return -1;
+
+ for (i = start + 1; i < end; i++) {
+ if (test_bit(i, bitmap) || ((u32)i & skip_mask)) {
+ start = i + 1;
+ goto again;
+ }
+ }
+
+ return start;
+}
+
+u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt,
+ int align, u32 skip_mask)
+{
+ u32 obj;
+
+ if (likely(cnt == 1 && align == 1 && !skip_mask))
+ return mlx4_bitmap_alloc(bitmap);
+
+ spin_lock(&bitmap->lock);
+
+ obj = find_aligned_range(bitmap->table, bitmap->last,
+ bitmap->max, cnt, align, skip_mask);
+ if (obj >= bitmap->max) {
+ bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
+ & bitmap->mask;
+ obj = find_aligned_range(bitmap->table, 0, bitmap->max,
+ cnt, align, skip_mask);
+ }
+
+ if (obj < bitmap->max) {
+ bitmap_set(bitmap->table, obj, cnt);
+ if (obj == bitmap->last) {
+ bitmap->last = (obj + cnt);
+ if (bitmap->last >= bitmap->max)
+ bitmap->last = 0;
+ }
+ obj |= bitmap->top;
+ } else
+ obj = -1;
+
+ if (obj != -1)
+ bitmap->avail -= cnt;
+
+ spin_unlock(&bitmap->lock);
+
+ return obj;
+}
+
+u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap)
+{
+ return bitmap->avail;
+}
+
+static u32 mlx4_bitmap_masked_value(struct mlx4_bitmap *bitmap, u32 obj)
+{
+ return obj & (bitmap->max + bitmap->reserved_top - 1);
+}
+
+void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt,
+ int use_rr)
+{
+ obj &= bitmap->max + bitmap->reserved_top - 1;
+
+ spin_lock(&bitmap->lock);
+ if (!use_rr) {
+ bitmap->last = min(bitmap->last, obj);
+ bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
+ & bitmap->mask;
+ }
+ bitmap_clear(bitmap->table, obj, cnt);
+ bitmap->avail += cnt;
+ spin_unlock(&bitmap->lock);
+}
+
+int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask,
+ u32 reserved_bot, u32 reserved_top)
+{
+ /* num must be a power of 2 */
+ if (num != roundup_pow_of_two(num))
+ return -EINVAL;
+
+ bitmap->last = 0;
+ bitmap->top = 0;
+ bitmap->max = num - reserved_top;
+ bitmap->mask = mask;
+ bitmap->reserved_top = reserved_top;
+ bitmap->avail = num - reserved_top - reserved_bot;
+ bitmap->effective_len = bitmap->avail;
+ spin_lock_init(&bitmap->lock);
+ bitmap->table = kcalloc(BITS_TO_LONGS(bitmap->max), sizeof(long),
+ GFP_KERNEL);
+ if (!bitmap->table)
+ return -ENOMEM;
+
+ bitmap_set(bitmap->table, 0, reserved_bot);
+
+ return 0;
+}
+
+void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap)
+{
+ kfree(bitmap->table);
+}
+
+struct mlx4_zone_allocator {
+ struct list_head entries;
+ struct list_head prios;
+ u32 last_uid;
+ u32 mask;
+ /* protect the zone_allocator from concurrent accesses */
+ spinlock_t lock;
+ enum mlx4_zone_alloc_flags flags;
+};
+
+struct mlx4_zone_entry {
+ struct list_head list;
+ struct list_head prio_list;
+ u32 uid;
+ struct mlx4_zone_allocator *allocator;
+ struct mlx4_bitmap *bitmap;
+ int use_rr;
+ int priority;
+ int offset;
+ enum mlx4_zone_flags flags;
+};
+
+struct mlx4_zone_allocator *mlx4_zone_allocator_create(enum mlx4_zone_alloc_flags flags)
+{
+ struct mlx4_zone_allocator *zones = kmalloc(sizeof(*zones), GFP_KERNEL);
+
+ if (NULL == zones)
+ return NULL;
+
+ INIT_LIST_HEAD(&zones->entries);
+ INIT_LIST_HEAD(&zones->prios);
+ spin_lock_init(&zones->lock);
+ zones->last_uid = 0;
+ zones->mask = 0;
+ zones->flags = flags;
+
+ return zones;
+}
+
+int mlx4_zone_add_one(struct mlx4_zone_allocator *zone_alloc,
+ struct mlx4_bitmap *bitmap,
+ u32 flags,
+ int priority,
+ int offset,
+ u32 *puid)
+{
+ u32 mask = mlx4_bitmap_masked_value(bitmap, (u32)-1);
+ struct mlx4_zone_entry *it;
+ struct mlx4_zone_entry *zone = kmalloc(sizeof(*zone), GFP_KERNEL);
+
+ if (NULL == zone)
+ return -ENOMEM;
+
+ zone->flags = flags;
+ zone->bitmap = bitmap;
+ zone->use_rr = (flags & MLX4_ZONE_USE_RR) ? MLX4_USE_RR : 0;
+ zone->priority = priority;
+ zone->offset = offset;
+
+ spin_lock(&zone_alloc->lock);
+
+ zone->uid = zone_alloc->last_uid++;
+ zone->allocator = zone_alloc;
+
+ if (zone_alloc->mask < mask)
+ zone_alloc->mask = mask;
+
+ list_for_each_entry(it, &zone_alloc->prios, prio_list)
+ if (it->priority >= priority)
+ break;
+
+ if (&it->prio_list == &zone_alloc->prios || it->priority > priority)
+ list_add_tail(&zone->prio_list, &it->prio_list);
+ list_add_tail(&zone->list, &it->list);
+
+ spin_unlock(&zone_alloc->lock);
+
+ *puid = zone->uid;
+
+ return 0;
+}
+
+/* Should be called under a lock */
+static void __mlx4_zone_remove_one_entry(struct mlx4_zone_entry *entry)
+{
+ struct mlx4_zone_allocator *zone_alloc = entry->allocator;
+
+ if (!list_empty(&entry->prio_list)) {
+ /* Check if we need to add an alternative node to the prio list */
+ if (!list_is_last(&entry->list, &zone_alloc->entries)) {
+ struct mlx4_zone_entry *next = list_first_entry(&entry->list,
+ typeof(*next),
+ list);
+
+ if (next->priority == entry->priority)
+ list_add_tail(&next->prio_list, &entry->prio_list);
+ }
+
+ list_del(&entry->prio_list);
+ }
+
+ list_del(&entry->list);
+
+ if (zone_alloc->flags & MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP) {
+ u32 mask = 0;
+ struct mlx4_zone_entry *it;
+
+ list_for_each_entry(it, &zone_alloc->prios, prio_list) {
+ u32 cur_mask = mlx4_bitmap_masked_value(it->bitmap, (u32)-1);
+
+ if (mask < cur_mask)
+ mask = cur_mask;
+ }
+ zone_alloc->mask = mask;
+ }
+}
+
+void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc)
+{
+ struct mlx4_zone_entry *zone, *tmp;
+
+ spin_lock(&zone_alloc->lock);
+
+ list_for_each_entry_safe(zone, tmp, &zone_alloc->entries, list) {
+ list_del(&zone->list);
+ list_del(&zone->prio_list);
+ kfree(zone);
+ }
+
+ spin_unlock(&zone_alloc->lock);
+ kfree(zone_alloc);
+}
+
+/* Should be called under a lock */
+static u32 __mlx4_alloc_from_zone(struct mlx4_zone_entry *zone, int count,
+ int align, u32 skip_mask, u32 *puid)
+{
+ u32 uid = 0;
+ u32 res;
+ struct mlx4_zone_allocator *zone_alloc = zone->allocator;
+ struct mlx4_zone_entry *curr_node;
+
+ res = mlx4_bitmap_alloc_range(zone->bitmap, count,
+ align, skip_mask);
+
+ if (res != (u32)-1) {
+ res += zone->offset;
+ uid = zone->uid;
+ goto out;
+ }
+
+ list_for_each_entry(curr_node, &zone_alloc->prios, prio_list) {
+ if (unlikely(curr_node->priority == zone->priority))
+ break;
+ }
+
+ if (zone->flags & MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO) {
+ struct mlx4_zone_entry *it = curr_node;
+
+ list_for_each_entry_continue_reverse(it, &zone_alloc->entries, list) {
+ res = mlx4_bitmap_alloc_range(it->bitmap, count,
+ align, skip_mask);
+ if (res != (u32)-1) {
+ res += it->offset;
+ uid = it->uid;
+ goto out;
+ }
+ }
+ }
+
+ if (zone->flags & MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO) {
+ struct mlx4_zone_entry *it = curr_node;
+
+ list_for_each_entry_from(it, &zone_alloc->entries, list) {
+ if (unlikely(it == zone))
+ continue;
+
+ if (unlikely(it->priority != curr_node->priority))
+ break;
+
+ res = mlx4_bitmap_alloc_range(it->bitmap, count,
+ align, skip_mask);
+ if (res != (u32)-1) {
+ res += it->offset;
+ uid = it->uid;
+ goto out;
+ }
+ }
+ }
+
+ if (zone->flags & MLX4_ZONE_FALLBACK_TO_HIGHER_PRIO) {
+ if (list_is_last(&curr_node->prio_list, &zone_alloc->prios))
+ goto out;
+
+ curr_node = list_first_entry(&curr_node->prio_list,
+ typeof(*curr_node),
+ prio_list);
+
+ list_for_each_entry_from(curr_node, &zone_alloc->entries, list) {
+ res = mlx4_bitmap_alloc_range(curr_node->bitmap, count,
+ align, skip_mask);
+ if (res != (u32)-1) {
+ res += curr_node->offset;
+ uid = curr_node->uid;
+ goto out;
+ }
+ }
+ }
+
+out:
+ if (NULL != puid && res != (u32)-1)
+ *puid = uid;
+ return res;
+}
+
+/* Should be called under a lock */
+static void __mlx4_free_from_zone(struct mlx4_zone_entry *zone, u32 obj,
+ u32 count)
+{
+ mlx4_bitmap_free_range(zone->bitmap, obj - zone->offset, count, zone->use_rr);
+}
+
+/* Should be called under a lock */
+static struct mlx4_zone_entry *__mlx4_find_zone_by_uid(
+ struct mlx4_zone_allocator *zones, u32 uid)
+{
+ struct mlx4_zone_entry *zone;
+
+ list_for_each_entry(zone, &zones->entries, list) {
+ if (zone->uid == uid)
+ return zone;
+ }
+
+ return NULL;
+}
+
+struct mlx4_bitmap *mlx4_zone_get_bitmap(struct mlx4_zone_allocator *zones, u32 uid)
+{
+ struct mlx4_zone_entry *zone;
+ struct mlx4_bitmap *bitmap;
+
+ spin_lock(&zones->lock);
+
+ zone = __mlx4_find_zone_by_uid(zones, uid);
+
+ bitmap = zone == NULL ? NULL : zone->bitmap;
+
+ spin_unlock(&zones->lock);
+
+ return bitmap;
+}
+
+int mlx4_zone_remove_one(struct mlx4_zone_allocator *zones, u32 uid)
+{
+ struct mlx4_zone_entry *zone;
+ int res = 0;
+
+ spin_lock(&zones->lock);
+
+ zone = __mlx4_find_zone_by_uid(zones, uid);
+
+ if (NULL == zone) {
+ res = -1;
+ goto out;
+ }
+
+ __mlx4_zone_remove_one_entry(zone);
+
+out:
+ spin_unlock(&zones->lock);
+ kfree(zone);
+
+ return res;
+}
+
+/* Should be called under a lock */
+static struct mlx4_zone_entry *__mlx4_find_zone_by_uid_unique(
+ struct mlx4_zone_allocator *zones, u32 obj)
+{
+ struct mlx4_zone_entry *zone, *zone_candidate = NULL;
+ u32 dist = (u32)-1;
+
+ /* Search for the smallest zone that this obj could be
+ * allocated from. This is done in order to handle
+ * situations when small bitmaps are allocated from bigger
+ * bitmaps (and the allocated space is marked as reserved in
+ * the bigger bitmap.
+ */
+ list_for_each_entry(zone, &zones->entries, list) {
+ if (obj >= zone->offset) {
+ u32 mobj = (obj - zone->offset) & zones->mask;
+
+ if (mobj < zone->bitmap->max) {
+ u32 curr_dist = zone->bitmap->effective_len;
+
+ if (curr_dist < dist) {
+ dist = curr_dist;
+ zone_candidate = zone;
+ }
+ }
+ }
+ }
+
+ return zone_candidate;
+}
+
+u32 mlx4_zone_alloc_entries(struct mlx4_zone_allocator *zones, u32 uid, int count,
+ int align, u32 skip_mask, u32 *puid)
+{
+ struct mlx4_zone_entry *zone;
+ int res = -1;
+
+ spin_lock(&zones->lock);
+
+ zone = __mlx4_find_zone_by_uid(zones, uid);
+
+ if (NULL == zone)
+ goto out;
+
+ res = __mlx4_alloc_from_zone(zone, count, align, skip_mask, puid);
+
+out:
+ spin_unlock(&zones->lock);
+
+ return res;
+}
+
+u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, u32 uid, u32 obj, u32 count)
+{
+ struct mlx4_zone_entry *zone;
+ int res = 0;
+
+ spin_lock(&zones->lock);
+
+ zone = __mlx4_find_zone_by_uid(zones, uid);
+
+ if (NULL == zone) {
+ res = -1;
+ goto out;
+ }
+
+ __mlx4_free_from_zone(zone, obj, count);
+
+out:
+ spin_unlock(&zones->lock);
+
+ return res;
+}
+
+u32 mlx4_zone_free_entries_unique(struct mlx4_zone_allocator *zones, u32 obj, u32 count)
+{
+ struct mlx4_zone_entry *zone;
+ int res;
+
+ if (!(zones->flags & MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP))
+ return -EFAULT;
+
+ spin_lock(&zones->lock);
+
+ zone = __mlx4_find_zone_by_uid_unique(zones, obj);
+
+ if (NULL == zone) {
+ res = -1;
+ goto out;
+ }
+
+ __mlx4_free_from_zone(zone, obj, count);
+ res = 0;
+
+out:
+ spin_unlock(&zones->lock);
+
+ return res;
+}
+
+static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size,
+ struct mlx4_buf *buf)
+{
+ dma_addr_t t;
+
+ buf->nbufs = 1;
+ buf->npages = 1;
+ buf->page_shift = get_order(size) + PAGE_SHIFT;
+ buf->direct.buf =
+ dma_zalloc_coherent(&dev->persist->pdev->dev,
+ size, &t, GFP_KERNEL);
+ if (!buf->direct.buf)
+ return -ENOMEM;
+
+ buf->direct.map = t;
+
+ while (t & ((1 << buf->page_shift) - 1)) {
+ --buf->page_shift;
+ buf->npages *= 2;
+ }
+
+ return 0;
+}
+
+/* Handling for queue buffers -- we allocate a bunch of memory and
+ * register it in a memory region at HCA virtual address 0. If the
+ * requested size is > max_direct, we split the allocation into
+ * multiple pages, so we don't require too much contiguous memory.
+ */
+int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
+ struct mlx4_buf *buf)
+{
+ if (size <= max_direct) {
+ return mlx4_buf_direct_alloc(dev, size, buf);
+ } else {
+ dma_addr_t t;
+ int i;
+
+ buf->direct.buf = NULL;
+ buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+ buf->npages = buf->nbufs;
+ buf->page_shift = PAGE_SHIFT;
+ buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list),
+ GFP_KERNEL);
+ if (!buf->page_list)
+ return -ENOMEM;
+
+ for (i = 0; i < buf->nbufs; ++i) {
+ buf->page_list[i].buf =
+ dma_zalloc_coherent(&dev->persist->pdev->dev,
+ PAGE_SIZE, &t, GFP_KERNEL);
+ if (!buf->page_list[i].buf)
+ goto err_free;
+
+ buf->page_list[i].map = t;
+ }
+ }
+
+ return 0;
+
+err_free:
+ mlx4_buf_free(dev, size, buf);
+
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(mlx4_buf_alloc);
+
+void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
+{
+ if (buf->nbufs == 1) {
+ dma_free_coherent(&dev->persist->pdev->dev, size,
+ buf->direct.buf, buf->direct.map);
+ } else {
+ int i;
+
+ for (i = 0; i < buf->nbufs; ++i)
+ if (buf->page_list[i].buf)
+ dma_free_coherent(&dev->persist->pdev->dev,
+ PAGE_SIZE,
+ buf->page_list[i].buf,
+ buf->page_list[i].map);
+ kfree(buf->page_list);
+ }
+}
+EXPORT_SYMBOL_GPL(mlx4_buf_free);
+
+static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device)
+{
+ struct mlx4_db_pgdir *pgdir;
+
+ pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL);
+ if (!pgdir)
+ return NULL;
+
+ bitmap_fill(pgdir->order1, MLX4_DB_PER_PAGE / 2);
+ pgdir->bits[0] = pgdir->order0;
+ pgdir->bits[1] = pgdir->order1;
+ pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE,
+ &pgdir->db_dma, GFP_KERNEL);
+ if (!pgdir->db_page) {
+ kfree(pgdir);
+ return NULL;
+ }
+
+ return pgdir;
+}
+
+static int mlx4_alloc_db_from_pgdir(struct mlx4_db_pgdir *pgdir,
+ struct mlx4_db *db, int order)
+{
+ int o;
+ int i;
+
+ for (o = order; o <= 1; ++o) {
+ i = find_first_bit(pgdir->bits[o], MLX4_DB_PER_PAGE >> o);
+ if (i < MLX4_DB_PER_PAGE >> o)
+ goto found;
+ }
+
+ return -ENOMEM;
+
+found:
+ clear_bit(i, pgdir->bits[o]);
+
+ i <<= o;
+
+ if (o > order)
+ set_bit(i ^ 1, pgdir->bits[order]);
+
+ db->u.pgdir = pgdir;
+ db->index = i;
+ db->db = pgdir->db_page + db->index;
+ db->dma = pgdir->db_dma + db->index * 4;
+ db->order = order;
+
+ return 0;
+}
+
+int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_db_pgdir *pgdir;
+ int ret = 0;
+
+ mutex_lock(&priv->pgdir_mutex);
+
+ list_for_each_entry(pgdir, &priv->pgdir_list, list)
+ if (!mlx4_alloc_db_from_pgdir(pgdir, db, order))
+ goto out;
+
+ pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev);
+ if (!pgdir) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ list_add(&pgdir->list, &priv->pgdir_list);
+
+ /* This should never fail -- we just allocated an empty page: */
+ WARN_ON(mlx4_alloc_db_from_pgdir(pgdir, db, order));
+
+out:
+ mutex_unlock(&priv->pgdir_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_db_alloc);
+
+void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int o;
+ int i;
+
+ mutex_lock(&priv->pgdir_mutex);
+
+ o = db->order;
+ i = db->index;
+
+ if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) {
+ clear_bit(i ^ 1, db->u.pgdir->order0);
+ ++o;
+ }
+ i >>= o;
+ set_bit(i, db->u.pgdir->bits[o]);
+
+ if (bitmap_full(db->u.pgdir->order1, MLX4_DB_PER_PAGE / 2)) {
+ dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
+ db->u.pgdir->db_page, db->u.pgdir->db_dma);
+ list_del(&db->u.pgdir->list);
+ kfree(db->u.pgdir);
+ }
+
+ mutex_unlock(&priv->pgdir_mutex);
+}
+EXPORT_SYMBOL_GPL(mlx4_db_free);
+
+int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
+ int size)
+{
+ int err;
+
+ err = mlx4_db_alloc(dev, &wqres->db, 1);
+ if (err)
+ return err;
+
+ *wqres->db.db = 0;
+
+ err = mlx4_buf_direct_alloc(dev, size, &wqres->buf);
+ if (err)
+ goto err_db;
+
+ err = mlx4_mtt_init(dev, wqres->buf.npages, wqres->buf.page_shift,
+ &wqres->mtt);
+ if (err)
+ goto err_buf;
+
+ err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf);
+ if (err)
+ goto err_mtt;
+
+ return 0;
+
+err_mtt:
+ mlx4_mtt_cleanup(dev, &wqres->mtt);
+err_buf:
+ mlx4_buf_free(dev, size, &wqres->buf);
+err_db:
+ mlx4_db_free(dev, &wqres->db);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_alloc_hwq_res);
+
+void mlx4_free_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
+ int size)
+{
+ mlx4_mtt_cleanup(dev, &wqres->mtt);
+ mlx4_buf_free(dev, size, &wqres->buf);
+ mlx4_db_free(dev, &wqres->db);
+}
+EXPORT_SYMBOL_GPL(mlx4_free_hwq_res);
diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
new file mode 100644
index 000000000..c81d15bf2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/workqueue.h>
+#include <linux/module.h>
+
+#include "mlx4.h"
+
+enum {
+ MLX4_CATAS_POLL_INTERVAL = 5 * HZ,
+};
+
+
+
+int mlx4_internal_err_reset = 1;
+module_param_named(internal_err_reset, mlx4_internal_err_reset, int, 0644);
+MODULE_PARM_DESC(internal_err_reset,
+ "Reset device on internal errors if non-zero (default 1)");
+
+static int read_vendor_id(struct mlx4_dev *dev)
+{
+ u16 vendor_id = 0;
+ int ret;
+
+ ret = pci_read_config_word(dev->persist->pdev, 0, &vendor_id);
+ if (ret) {
+ mlx4_err(dev, "Failed to read vendor ID, ret=%d\n", ret);
+ return ret;
+ }
+
+ if (vendor_id == 0xffff) {
+ mlx4_err(dev, "PCI can't be accessed to read vendor id\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx4_reset_master(struct mlx4_dev *dev)
+{
+ int err = 0;
+
+ if (mlx4_is_master(dev))
+ mlx4_report_internal_err_comm_event(dev);
+
+ if (!pci_channel_offline(dev->persist->pdev)) {
+ err = read_vendor_id(dev);
+ /* If PCI can't be accessed to read vendor ID we assume that its
+ * link was disabled and chip was already reset.
+ */
+ if (err)
+ return 0;
+
+ err = mlx4_reset(dev);
+ if (err)
+ mlx4_err(dev, "Fail to reset HCA\n");
+ }
+
+ return err;
+}
+
+static int mlx4_reset_slave(struct mlx4_dev *dev)
+{
+#define COM_CHAN_RST_REQ_OFFSET 0x10
+#define COM_CHAN_RST_ACK_OFFSET 0x08
+
+ u32 comm_flags;
+ u32 rst_req;
+ u32 rst_ack;
+ unsigned long end;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (pci_channel_offline(dev->persist->pdev))
+ return 0;
+
+ comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
+ MLX4_COMM_CHAN_FLAGS));
+ if (comm_flags == 0xffffffff) {
+ mlx4_err(dev, "VF reset is not needed\n");
+ return 0;
+ }
+
+ if (!(dev->caps.vf_caps & MLX4_VF_CAP_FLAG_RESET)) {
+ mlx4_err(dev, "VF reset is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >>
+ COM_CHAN_RST_REQ_OFFSET;
+ rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >>
+ COM_CHAN_RST_ACK_OFFSET;
+ if (rst_req != rst_ack) {
+ mlx4_err(dev, "Communication channel isn't sync, fail to send reset\n");
+ return -EIO;
+ }
+
+ rst_req ^= 1;
+ mlx4_warn(dev, "VF is sending reset request to Firmware\n");
+ comm_flags = rst_req << COM_CHAN_RST_REQ_OFFSET;
+ __raw_writel((__force u32)cpu_to_be32(comm_flags),
+ (__iomem char *)priv->mfunc.comm + MLX4_COMM_CHAN_FLAGS);
+ /* Make sure that our comm channel write doesn't
+ * get mixed in with writes from another CPU.
+ */
+ mmiowb();
+
+ end = msecs_to_jiffies(MLX4_COMM_TIME) + jiffies;
+ while (time_before(jiffies, end)) {
+ comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
+ MLX4_COMM_CHAN_FLAGS));
+ rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >>
+ COM_CHAN_RST_ACK_OFFSET;
+
+ /* Reading rst_req again since the communication channel can
+ * be reset at any time by the PF and all its bits will be
+ * set to zero.
+ */
+ rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >>
+ COM_CHAN_RST_REQ_OFFSET;
+
+ if (rst_ack == rst_req) {
+ mlx4_warn(dev, "VF Reset succeed\n");
+ return 0;
+ }
+ cond_resched();
+ }
+ mlx4_err(dev, "Fail to send reset over the communication channel\n");
+ return -ETIMEDOUT;
+}
+
+int mlx4_comm_internal_err(u32 slave_read)
+{
+ return (u32)COMM_CHAN_EVENT_INTERNAL_ERR ==
+ (slave_read & (u32)COMM_CHAN_EVENT_INTERNAL_ERR) ? 1 : 0;
+}
+
+void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
+{
+ int err;
+ struct mlx4_dev *dev;
+
+ if (!mlx4_internal_err_reset)
+ return;
+
+ mutex_lock(&persist->device_state_mutex);
+ if (persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+ goto out;
+
+ dev = persist->dev;
+ mlx4_err(dev, "device is going to be reset\n");
+ if (mlx4_is_slave(dev)) {
+ err = mlx4_reset_slave(dev);
+ } else {
+ mlx4_crdump_collect(dev);
+ err = mlx4_reset_master(dev);
+ }
+
+ if (!err) {
+ mlx4_err(dev, "device was reset successfully\n");
+ } else {
+ /* EEH could have disabled the PCI channel during reset. That's
+ * recoverable and the PCI error flow will handle it.
+ */
+ if (!pci_channel_offline(dev->persist->pdev))
+ BUG_ON(1);
+ }
+ dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR;
+ mutex_unlock(&persist->device_state_mutex);
+
+ /* At that step HW was already reset, now notify clients */
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
+ mlx4_cmd_wake_completions(dev);
+ return;
+
+out:
+ mutex_unlock(&persist->device_state_mutex);
+}
+
+static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist)
+{
+ int err = 0;
+
+ mlx4_enter_error_state(persist);
+ mutex_lock(&persist->interface_state_mutex);
+ if (persist->interface_state & MLX4_INTERFACE_STATE_UP &&
+ !(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) {
+ err = mlx4_restart_one(persist->pdev, false, NULL);
+ mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n",
+ err);
+ }
+ mutex_unlock(&persist->interface_state_mutex);
+}
+
+static void dump_err_buf(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ int i;
+
+ mlx4_err(dev, "Internal error detected:\n");
+ for (i = 0; i < priv->fw.catas_size; ++i)
+ mlx4_err(dev, " buf[%02x]: %08x\n",
+ i, swab32(readl(priv->catas_err.map + i)));
+}
+
+static void poll_catas(struct timer_list *t)
+{
+ struct mlx4_priv *priv = from_timer(priv, t, catas_err.timer);
+ struct mlx4_dev *dev = &priv->dev;
+ u32 slave_read;
+
+ if (mlx4_is_slave(dev)) {
+ slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
+ if (mlx4_comm_internal_err(slave_read)) {
+ mlx4_warn(dev, "Internal error detected on the communication channel\n");
+ goto internal_err;
+ }
+ } else if (readl(priv->catas_err.map)) {
+ dump_err_buf(dev);
+ goto internal_err;
+ }
+
+ if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ mlx4_warn(dev, "Internal error mark was detected on device\n");
+ goto internal_err;
+ }
+
+ mod_timer(&priv->catas_err.timer,
+ round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
+ return;
+
+internal_err:
+ if (mlx4_internal_err_reset)
+ queue_work(dev->persist->catas_wq, &dev->persist->catas_work);
+}
+
+static void catas_reset(struct work_struct *work)
+{
+ struct mlx4_dev_persistent *persist =
+ container_of(work, struct mlx4_dev_persistent,
+ catas_work);
+
+ mlx4_handle_error_state(persist);
+}
+
+void mlx4_start_catas_poll(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ phys_addr_t addr;
+
+ INIT_LIST_HEAD(&priv->catas_err.list);
+ timer_setup(&priv->catas_err.timer, poll_catas, 0);
+ priv->catas_err.map = NULL;
+
+ if (!mlx4_is_slave(dev)) {
+ addr = pci_resource_start(dev->persist->pdev,
+ priv->fw.catas_bar) +
+ priv->fw.catas_offset;
+
+ priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
+ if (!priv->catas_err.map) {
+ mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n",
+ (unsigned long long)addr);
+ return;
+ }
+ }
+
+ priv->catas_err.timer.expires =
+ round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL);
+ add_timer(&priv->catas_err.timer);
+}
+
+void mlx4_stop_catas_poll(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ del_timer_sync(&priv->catas_err.timer);
+
+ if (priv->catas_err.map) {
+ iounmap(priv->catas_err.map);
+ priv->catas_err.map = NULL;
+ }
+
+ if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION)
+ flush_workqueue(dev->persist->catas_wq);
+}
+
+int mlx4_catas_init(struct mlx4_dev *dev)
+{
+ INIT_WORK(&dev->persist->catas_work, catas_reset);
+ dev->persist->catas_wq = create_singlethread_workqueue("mlx4_health");
+ if (!dev->persist->catas_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx4_catas_end(struct mlx4_dev *dev)
+{
+ if (dev->persist->catas_wq) {
+ destroy_workqueue(dev->persist->catas_wq);
+ dev->persist->catas_wq = NULL;
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
new file mode 100644
index 000000000..857588e24
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -0,0 +1,3443 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+
+#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/device.h>
+#include <linux/semaphore.h>
+#include <rdma/ib_smi.h>
+#include <linux/delay.h>
+#include <linux/etherdevice.h>
+
+#include <asm/io.h>
+
+#include "mlx4.h"
+#include "fw.h"
+#include "fw_qos.h"
+#include "mlx4_stats.h"
+
+#define CMD_POLL_TOKEN 0xffff
+#define INBOX_MASK 0xffffffffffffff00ULL
+
+#define CMD_CHAN_VER 1
+#define CMD_CHAN_IF_REV 1
+
+enum {
+ /* command completed successfully: */
+ CMD_STAT_OK = 0x00,
+ /* Internal error (such as a bus error) occurred while processing command: */
+ CMD_STAT_INTERNAL_ERR = 0x01,
+ /* Operation/command not supported or opcode modifier not supported: */
+ CMD_STAT_BAD_OP = 0x02,
+ /* Parameter not supported or parameter out of range: */
+ CMD_STAT_BAD_PARAM = 0x03,
+ /* System not enabled or bad system state: */
+ CMD_STAT_BAD_SYS_STATE = 0x04,
+ /* Attempt to access reserved or unallocaterd resource: */
+ CMD_STAT_BAD_RESOURCE = 0x05,
+ /* Requested resource is currently executing a command, or is otherwise busy: */
+ CMD_STAT_RESOURCE_BUSY = 0x06,
+ /* Required capability exceeds device limits: */
+ CMD_STAT_EXCEED_LIM = 0x08,
+ /* Resource is not in the appropriate state or ownership: */
+ CMD_STAT_BAD_RES_STATE = 0x09,
+ /* Index out of range: */
+ CMD_STAT_BAD_INDEX = 0x0a,
+ /* FW image corrupted: */
+ CMD_STAT_BAD_NVMEM = 0x0b,
+ /* Error in ICM mapping (e.g. not enough auxiliary ICM pages to execute command): */
+ CMD_STAT_ICM_ERROR = 0x0c,
+ /* Attempt to modify a QP/EE which is not in the presumed state: */
+ CMD_STAT_BAD_QP_STATE = 0x10,
+ /* Bad segment parameters (Address/Size): */
+ CMD_STAT_BAD_SEG_PARAM = 0x20,
+ /* Memory Region has Memory Windows bound to: */
+ CMD_STAT_REG_BOUND = 0x21,
+ /* HCA local attached memory not present: */
+ CMD_STAT_LAM_NOT_PRE = 0x22,
+ /* Bad management packet (silently discarded): */
+ CMD_STAT_BAD_PKT = 0x30,
+ /* More outstanding CQEs in CQ than new CQ size: */
+ CMD_STAT_BAD_SIZE = 0x40,
+ /* Multi Function device support required: */
+ CMD_STAT_MULTI_FUNC_REQ = 0x50,
+};
+
+enum {
+ HCR_IN_PARAM_OFFSET = 0x00,
+ HCR_IN_MODIFIER_OFFSET = 0x08,
+ HCR_OUT_PARAM_OFFSET = 0x0c,
+ HCR_TOKEN_OFFSET = 0x14,
+ HCR_STATUS_OFFSET = 0x18,
+
+ HCR_OPMOD_SHIFT = 12,
+ HCR_T_BIT = 21,
+ HCR_E_BIT = 22,
+ HCR_GO_BIT = 23
+};
+
+enum {
+ GO_BIT_TIMEOUT_MSECS = 10000
+};
+
+enum mlx4_vlan_transition {
+ MLX4_VLAN_TRANSITION_VST_VST = 0,
+ MLX4_VLAN_TRANSITION_VST_VGT = 1,
+ MLX4_VLAN_TRANSITION_VGT_VST = 2,
+ MLX4_VLAN_TRANSITION_VGT_VGT = 3,
+};
+
+
+struct mlx4_cmd_context {
+ struct completion done;
+ int result;
+ int next;
+ u64 out_param;
+ u16 token;
+ u8 fw_status;
+};
+
+static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr_cmd *in_vhcr);
+
+static int mlx4_status_to_errno(u8 status)
+{
+ static const int trans_table[] = {
+ [CMD_STAT_INTERNAL_ERR] = -EIO,
+ [CMD_STAT_BAD_OP] = -EPERM,
+ [CMD_STAT_BAD_PARAM] = -EINVAL,
+ [CMD_STAT_BAD_SYS_STATE] = -ENXIO,
+ [CMD_STAT_BAD_RESOURCE] = -EBADF,
+ [CMD_STAT_RESOURCE_BUSY] = -EBUSY,
+ [CMD_STAT_EXCEED_LIM] = -ENOMEM,
+ [CMD_STAT_BAD_RES_STATE] = -EBADF,
+ [CMD_STAT_BAD_INDEX] = -EBADF,
+ [CMD_STAT_BAD_NVMEM] = -EFAULT,
+ [CMD_STAT_ICM_ERROR] = -ENFILE,
+ [CMD_STAT_BAD_QP_STATE] = -EINVAL,
+ [CMD_STAT_BAD_SEG_PARAM] = -EFAULT,
+ [CMD_STAT_REG_BOUND] = -EBUSY,
+ [CMD_STAT_LAM_NOT_PRE] = -EAGAIN,
+ [CMD_STAT_BAD_PKT] = -EINVAL,
+ [CMD_STAT_BAD_SIZE] = -ENOMEM,
+ [CMD_STAT_MULTI_FUNC_REQ] = -EACCES,
+ };
+
+ if (status >= ARRAY_SIZE(trans_table) ||
+ (status != CMD_STAT_OK && trans_table[status] == 0))
+ return -EIO;
+
+ return trans_table[status];
+}
+
+static u8 mlx4_errno_to_status(int errno)
+{
+ switch (errno) {
+ case -EPERM:
+ return CMD_STAT_BAD_OP;
+ case -EINVAL:
+ return CMD_STAT_BAD_PARAM;
+ case -ENXIO:
+ return CMD_STAT_BAD_SYS_STATE;
+ case -EBUSY:
+ return CMD_STAT_RESOURCE_BUSY;
+ case -ENOMEM:
+ return CMD_STAT_EXCEED_LIM;
+ case -ENFILE:
+ return CMD_STAT_ICM_ERROR;
+ default:
+ return CMD_STAT_INTERNAL_ERR;
+ }
+}
+
+static int mlx4_internal_err_ret_value(struct mlx4_dev *dev, u16 op,
+ u8 op_modifier)
+{
+ switch (op) {
+ case MLX4_CMD_UNMAP_ICM:
+ case MLX4_CMD_UNMAP_ICM_AUX:
+ case MLX4_CMD_UNMAP_FA:
+ case MLX4_CMD_2RST_QP:
+ case MLX4_CMD_HW2SW_EQ:
+ case MLX4_CMD_HW2SW_CQ:
+ case MLX4_CMD_HW2SW_SRQ:
+ case MLX4_CMD_HW2SW_MPT:
+ case MLX4_CMD_CLOSE_HCA:
+ case MLX4_QP_FLOW_STEERING_DETACH:
+ case MLX4_CMD_FREE_RES:
+ case MLX4_CMD_CLOSE_PORT:
+ return CMD_STAT_OK;
+
+ case MLX4_CMD_QP_ATTACH:
+ /* On Detach case return success */
+ if (op_modifier == 0)
+ return CMD_STAT_OK;
+ return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+
+ default:
+ return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+ }
+}
+
+static int mlx4_closing_cmd_fatal_error(u16 op, u8 fw_status)
+{
+ /* Any error during the closing commands below is considered fatal */
+ if (op == MLX4_CMD_CLOSE_HCA ||
+ op == MLX4_CMD_HW2SW_EQ ||
+ op == MLX4_CMD_HW2SW_CQ ||
+ op == MLX4_CMD_2RST_QP ||
+ op == MLX4_CMD_HW2SW_SRQ ||
+ op == MLX4_CMD_SYNC_TPT ||
+ op == MLX4_CMD_UNMAP_ICM ||
+ op == MLX4_CMD_UNMAP_ICM_AUX ||
+ op == MLX4_CMD_UNMAP_FA)
+ return 1;
+ /* Error on MLX4_CMD_HW2SW_MPT is fatal except when fw status equals
+ * CMD_STAT_REG_BOUND.
+ * This status indicates that memory region has memory windows bound to it
+ * which may result from invalid user space usage and is not fatal.
+ */
+ if (op == MLX4_CMD_HW2SW_MPT && fw_status != CMD_STAT_REG_BOUND)
+ return 1;
+ return 0;
+}
+
+static int mlx4_cmd_reset_flow(struct mlx4_dev *dev, u16 op, u8 op_modifier,
+ int err)
+{
+ /* Only if reset flow is really active return code is based on
+ * command, otherwise current error code is returned.
+ */
+ if (mlx4_internal_err_reset) {
+ mlx4_enter_error_state(dev->persist);
+ err = mlx4_internal_err_ret_value(dev, op, op_modifier);
+ }
+
+ return err;
+}
+
+static int comm_pending(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u32 status = readl(&priv->mfunc.comm->slave_read);
+
+ return (swab32(status) >> 31) != priv->cmd.comm_toggle;
+}
+
+static int mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u32 val;
+
+ /* To avoid writing to unknown addresses after the device state was
+ * changed to internal error and the function was rest,
+ * check the INTERNAL_ERROR flag which is updated under
+ * device_state_mutex lock.
+ */
+ mutex_lock(&dev->persist->device_state_mutex);
+
+ if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ mutex_unlock(&dev->persist->device_state_mutex);
+ return -EIO;
+ }
+
+ priv->cmd.comm_toggle ^= 1;
+ val = param | (cmd << 16) | (priv->cmd.comm_toggle << 31);
+ __raw_writel((__force u32) cpu_to_be32(val),
+ &priv->mfunc.comm->slave_write);
+ mmiowb();
+ mutex_unlock(&dev->persist->device_state_mutex);
+ return 0;
+}
+
+static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param,
+ unsigned long timeout)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ unsigned long end;
+ int err = 0;
+ int ret_from_pending = 0;
+
+ /* First, verify that the master reports correct status */
+ if (comm_pending(dev)) {
+ mlx4_warn(dev, "Communication channel is not idle - my toggle is %d (cmd:0x%x)\n",
+ priv->cmd.comm_toggle, cmd);
+ return -EAGAIN;
+ }
+
+ /* Write command */
+ down(&priv->cmd.poll_sem);
+ if (mlx4_comm_cmd_post(dev, cmd, param)) {
+ /* Only in case the device state is INTERNAL_ERROR,
+ * mlx4_comm_cmd_post returns with an error
+ */
+ err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+ goto out;
+ }
+
+ end = msecs_to_jiffies(timeout) + jiffies;
+ while (comm_pending(dev) && time_before(jiffies, end))
+ cond_resched();
+ ret_from_pending = comm_pending(dev);
+ if (ret_from_pending) {
+ /* check if the slave is trying to boot in the middle of
+ * FLR process. The only non-zero result in the RESET command
+ * is MLX4_DELAY_RESET_SLAVE*/
+ if ((MLX4_COMM_CMD_RESET == cmd)) {
+ err = MLX4_DELAY_RESET_SLAVE;
+ goto out;
+ } else {
+ mlx4_warn(dev, "Communication channel command 0x%x timed out\n",
+ cmd);
+ err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+ }
+ }
+
+ if (err)
+ mlx4_enter_error_state(dev->persist);
+out:
+ up(&priv->cmd.poll_sem);
+ return err;
+}
+
+static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 vhcr_cmd,
+ u16 param, u16 op, unsigned long timeout)
+{
+ struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
+ struct mlx4_cmd_context *context;
+ unsigned long end;
+ int err = 0;
+
+ down(&cmd->event_sem);
+
+ spin_lock(&cmd->context_lock);
+ BUG_ON(cmd->free_head < 0);
+ context = &cmd->context[cmd->free_head];
+ context->token += cmd->token_mask + 1;
+ cmd->free_head = context->next;
+ spin_unlock(&cmd->context_lock);
+
+ reinit_completion(&context->done);
+
+ if (mlx4_comm_cmd_post(dev, vhcr_cmd, param)) {
+ /* Only in case the device state is INTERNAL_ERROR,
+ * mlx4_comm_cmd_post returns with an error
+ */
+ err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+ goto out;
+ }
+
+ if (!wait_for_completion_timeout(&context->done,
+ msecs_to_jiffies(timeout))) {
+ mlx4_warn(dev, "communication channel command 0x%x (op=0x%x) timed out\n",
+ vhcr_cmd, op);
+ goto out_reset;
+ }
+
+ err = context->result;
+ if (err && context->fw_status != CMD_STAT_MULTI_FUNC_REQ) {
+ mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
+ vhcr_cmd, context->fw_status);
+ if (mlx4_closing_cmd_fatal_error(op, context->fw_status))
+ goto out_reset;
+ }
+
+ /* wait for comm channel ready
+ * this is necessary for prevention the race
+ * when switching between event to polling mode
+ * Skipping this section in case the device is in FATAL_ERROR state,
+ * In this state, no commands are sent via the comm channel until
+ * the device has returned from reset.
+ */
+ if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) {
+ end = msecs_to_jiffies(timeout) + jiffies;
+ while (comm_pending(dev) && time_before(jiffies, end))
+ cond_resched();
+ }
+ goto out;
+
+out_reset:
+ err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+ mlx4_enter_error_state(dev->persist);
+out:
+ spin_lock(&cmd->context_lock);
+ context->next = cmd->free_head;
+ cmd->free_head = context - cmd->context;
+ spin_unlock(&cmd->context_lock);
+
+ up(&cmd->event_sem);
+ return err;
+}
+
+int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
+ u16 op, unsigned long timeout)
+{
+ if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+ return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+
+ if (mlx4_priv(dev)->cmd.use_events)
+ return mlx4_comm_cmd_wait(dev, cmd, param, op, timeout);
+ return mlx4_comm_cmd_poll(dev, cmd, param, timeout);
+}
+
+static int cmd_pending(struct mlx4_dev *dev)
+{
+ u32 status;
+
+ if (pci_channel_offline(dev->persist->pdev))
+ return -EIO;
+
+ status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET);
+
+ return (status & swab32(1 << HCR_GO_BIT)) ||
+ (mlx4_priv(dev)->cmd.toggle ==
+ !!(status & swab32(1 << HCR_T_BIT)));
+}
+
+static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
+ u32 in_modifier, u8 op_modifier, u16 op, u16 token,
+ int event)
+{
+ struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
+ u32 __iomem *hcr = cmd->hcr;
+ int ret = -EIO;
+ unsigned long end;
+
+ mutex_lock(&dev->persist->device_state_mutex);
+ /* To avoid writing to unknown addresses after the device state was
+ * changed to internal error and the chip was reset,
+ * check the INTERNAL_ERROR flag which is updated under
+ * device_state_mutex lock.
+ */
+ if (pci_channel_offline(dev->persist->pdev) ||
+ (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) {
+ /*
+ * Device is going through error recovery
+ * and cannot accept commands.
+ */
+ goto out;
+ }
+
+ end = jiffies;
+ if (event)
+ end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS);
+
+ while (cmd_pending(dev)) {
+ if (pci_channel_offline(dev->persist->pdev)) {
+ /*
+ * Device is going through error recovery
+ * and cannot accept commands.
+ */
+ goto out;
+ }
+
+ if (time_after_eq(jiffies, end)) {
+ mlx4_err(dev, "%s:cmd_pending failed\n", __func__);
+ goto out;
+ }
+ cond_resched();
+ }
+
+ /*
+ * We use writel (instead of something like memcpy_toio)
+ * because writes of less than 32 bits to the HCR don't work
+ * (and some architectures such as ia64 implement memcpy_toio
+ * in terms of writeb).
+ */
+ __raw_writel((__force u32) cpu_to_be32(in_param >> 32), hcr + 0);
+ __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), hcr + 1);
+ __raw_writel((__force u32) cpu_to_be32(in_modifier), hcr + 2);
+ __raw_writel((__force u32) cpu_to_be32(out_param >> 32), hcr + 3);
+ __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), hcr + 4);
+ __raw_writel((__force u32) cpu_to_be32(token << 16), hcr + 5);
+
+ /* __raw_writel may not order writes. */
+ wmb();
+
+ __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) |
+ (cmd->toggle << HCR_T_BIT) |
+ (event ? (1 << HCR_E_BIT) : 0) |
+ (op_modifier << HCR_OPMOD_SHIFT) |
+ op), hcr + 6);
+
+ /*
+ * Make sure that our HCR writes don't get mixed in with
+ * writes from another CPU starting a FW command.
+ */
+ mmiowb();
+
+ cmd->toggle = cmd->toggle ^ 1;
+
+ ret = 0;
+
+out:
+ if (ret)
+ mlx4_warn(dev, "Could not post command 0x%x: ret=%d, in_param=0x%llx, in_mod=0x%x, op_mod=0x%x\n",
+ op, ret, in_param, in_modifier, op_modifier);
+ mutex_unlock(&dev->persist->device_state_mutex);
+
+ return ret;
+}
+
+static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
+ int out_is_imm, u32 in_modifier, u8 op_modifier,
+ u16 op, unsigned long timeout)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_vhcr_cmd *vhcr = priv->mfunc.vhcr;
+ int ret;
+
+ mutex_lock(&priv->cmd.slave_cmd_mutex);
+
+ vhcr->in_param = cpu_to_be64(in_param);
+ vhcr->out_param = out_param ? cpu_to_be64(*out_param) : 0;
+ vhcr->in_modifier = cpu_to_be32(in_modifier);
+ vhcr->opcode = cpu_to_be16((((u16) op_modifier) << 12) | (op & 0xfff));
+ vhcr->token = cpu_to_be16(CMD_POLL_TOKEN);
+ vhcr->status = 0;
+ vhcr->flags = !!(priv->cmd.use_events) << 6;
+
+ if (mlx4_is_master(dev)) {
+ ret = mlx4_master_process_vhcr(dev, dev->caps.function, vhcr);
+ if (!ret) {
+ if (out_is_imm) {
+ if (out_param)
+ *out_param =
+ be64_to_cpu(vhcr->out_param);
+ else {
+ mlx4_err(dev, "response expected while output mailbox is NULL for command 0x%x\n",
+ op);
+ vhcr->status = CMD_STAT_BAD_PARAM;
+ }
+ }
+ ret = mlx4_status_to_errno(vhcr->status);
+ }
+ if (ret &&
+ dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+ ret = mlx4_internal_err_ret_value(dev, op, op_modifier);
+ } else {
+ ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0, op,
+ MLX4_COMM_TIME + timeout);
+ if (!ret) {
+ if (out_is_imm) {
+ if (out_param)
+ *out_param =
+ be64_to_cpu(vhcr->out_param);
+ else {
+ mlx4_err(dev, "response expected while output mailbox is NULL for command 0x%x\n",
+ op);
+ vhcr->status = CMD_STAT_BAD_PARAM;
+ }
+ }
+ ret = mlx4_status_to_errno(vhcr->status);
+ } else {
+ if (dev->persist->state &
+ MLX4_DEVICE_STATE_INTERNAL_ERROR)
+ ret = mlx4_internal_err_ret_value(dev, op,
+ op_modifier);
+ else
+ mlx4_err(dev, "failed execution of VHCR_POST command opcode 0x%x\n", op);
+ }
+ }
+
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ return ret;
+}
+
+static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
+ int out_is_imm, u32 in_modifier, u8 op_modifier,
+ u16 op, unsigned long timeout)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ void __iomem *hcr = priv->cmd.hcr;
+ int err = 0;
+ unsigned long end;
+ u32 stat;
+
+ down(&priv->cmd.poll_sem);
+
+ if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ /*
+ * Device is going through error recovery
+ * and cannot accept commands.
+ */
+ err = mlx4_internal_err_ret_value(dev, op, op_modifier);
+ goto out;
+ }
+
+ if (out_is_imm && !out_param) {
+ mlx4_err(dev, "response expected while output mailbox is NULL for command 0x%x\n",
+ op);
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
+ in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0);
+ if (err)
+ goto out_reset;
+
+ end = msecs_to_jiffies(timeout) + jiffies;
+ while (cmd_pending(dev) && time_before(jiffies, end)) {
+ if (pci_channel_offline(dev->persist->pdev)) {
+ /*
+ * Device is going through error recovery
+ * and cannot accept commands.
+ */
+ err = -EIO;
+ goto out_reset;
+ }
+
+ if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ err = mlx4_internal_err_ret_value(dev, op, op_modifier);
+ goto out;
+ }
+
+ cond_resched();
+ }
+
+ if (cmd_pending(dev)) {
+ mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n",
+ op);
+ err = -EIO;
+ goto out_reset;
+ }
+
+ if (out_is_imm)
+ *out_param =
+ (u64) be32_to_cpu((__force __be32)
+ __raw_readl(hcr + HCR_OUT_PARAM_OFFSET)) << 32 |
+ (u64) be32_to_cpu((__force __be32)
+ __raw_readl(hcr + HCR_OUT_PARAM_OFFSET + 4));
+ stat = be32_to_cpu((__force __be32)
+ __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24;
+ err = mlx4_status_to_errno(stat);
+ if (err) {
+ mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
+ op, stat);
+ if (mlx4_closing_cmd_fatal_error(op, stat))
+ goto out_reset;
+ goto out;
+ }
+
+out_reset:
+ if (err)
+ err = mlx4_cmd_reset_flow(dev, op, op_modifier, err);
+out:
+ up(&priv->cmd.poll_sem);
+ return err;
+}
+
+void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cmd_context *context =
+ &priv->cmd.context[token & priv->cmd.token_mask];
+
+ /* previously timed out command completing at long last */
+ if (token != context->token)
+ return;
+
+ context->fw_status = status;
+ context->result = mlx4_status_to_errno(status);
+ context->out_param = out_param;
+
+ complete(&context->done);
+}
+
+static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
+ int out_is_imm, u32 in_modifier, u8 op_modifier,
+ u16 op, unsigned long timeout)
+{
+ struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
+ struct mlx4_cmd_context *context;
+ long ret_wait;
+ int err = 0;
+
+ down(&cmd->event_sem);
+
+ spin_lock(&cmd->context_lock);
+ BUG_ON(cmd->free_head < 0);
+ context = &cmd->context[cmd->free_head];
+ context->token += cmd->token_mask + 1;
+ cmd->free_head = context->next;
+ spin_unlock(&cmd->context_lock);
+
+ if (out_is_imm && !out_param) {
+ mlx4_err(dev, "response expected while output mailbox is NULL for command 0x%x\n",
+ op);
+ err = -EINVAL;
+ goto out;
+ }
+
+ reinit_completion(&context->done);
+
+ err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
+ in_modifier, op_modifier, op, context->token, 1);
+ if (err)
+ goto out_reset;
+
+ if (op == MLX4_CMD_SENSE_PORT) {
+ ret_wait =
+ wait_for_completion_interruptible_timeout(&context->done,
+ msecs_to_jiffies(timeout));
+ if (ret_wait < 0) {
+ context->fw_status = 0;
+ context->out_param = 0;
+ context->result = 0;
+ }
+ } else {
+ ret_wait = (long)wait_for_completion_timeout(&context->done,
+ msecs_to_jiffies(timeout));
+ }
+ if (!ret_wait) {
+ mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n",
+ op);
+ if (op == MLX4_CMD_NOP) {
+ err = -EBUSY;
+ goto out;
+ } else {
+ err = -EIO;
+ goto out_reset;
+ }
+ }
+
+ err = context->result;
+ if (err) {
+ /* Since we do not want to have this error message always
+ * displayed at driver start when there are ConnectX2 HCAs
+ * on the host, we deprecate the error message for this
+ * specific command/input_mod/opcode_mod/fw-status to be debug.
+ */
+ if (op == MLX4_CMD_SET_PORT &&
+ (in_modifier == 1 || in_modifier == 2) &&
+ op_modifier == MLX4_SET_PORT_IB_OPCODE &&
+ context->fw_status == CMD_STAT_BAD_SIZE)
+ mlx4_dbg(dev, "command 0x%x failed: fw status = 0x%x\n",
+ op, context->fw_status);
+ else
+ mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
+ op, context->fw_status);
+ if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+ err = mlx4_internal_err_ret_value(dev, op, op_modifier);
+ else if (mlx4_closing_cmd_fatal_error(op, context->fw_status))
+ goto out_reset;
+
+ goto out;
+ }
+
+ if (out_is_imm)
+ *out_param = context->out_param;
+
+out_reset:
+ if (err)
+ err = mlx4_cmd_reset_flow(dev, op, op_modifier, err);
+out:
+ spin_lock(&cmd->context_lock);
+ context->next = cmd->free_head;
+ cmd->free_head = context - cmd->context;
+ spin_unlock(&cmd->context_lock);
+
+ up(&cmd->event_sem);
+ return err;
+}
+
+int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
+ int out_is_imm, u32 in_modifier, u8 op_modifier,
+ u16 op, unsigned long timeout, int native)
+{
+ if (pci_channel_offline(dev->persist->pdev))
+ return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO);
+
+ if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) {
+ int ret;
+
+ if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+ return mlx4_internal_err_ret_value(dev, op,
+ op_modifier);
+ down_read(&mlx4_priv(dev)->cmd.switch_sem);
+ if (mlx4_priv(dev)->cmd.use_events)
+ ret = mlx4_cmd_wait(dev, in_param, out_param,
+ out_is_imm, in_modifier,
+ op_modifier, op, timeout);
+ else
+ ret = mlx4_cmd_poll(dev, in_param, out_param,
+ out_is_imm, in_modifier,
+ op_modifier, op, timeout);
+
+ up_read(&mlx4_priv(dev)->cmd.switch_sem);
+ return ret;
+ }
+ return mlx4_slave_cmd(dev, in_param, out_param, out_is_imm,
+ in_modifier, op_modifier, op, timeout);
+}
+EXPORT_SYMBOL_GPL(__mlx4_cmd);
+
+
+int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev)
+{
+ return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+}
+
+static int mlx4_ACCESS_MEM(struct mlx4_dev *dev, u64 master_addr,
+ int slave, u64 slave_addr,
+ int size, int is_read)
+{
+ u64 in_param;
+ u64 out_param;
+
+ if ((slave_addr & 0xfff) | (master_addr & 0xfff) |
+ (slave & ~0x7f) | (size & 0xff)) {
+ mlx4_err(dev, "Bad access mem params - slave_addr:0x%llx master_addr:0x%llx slave_id:%d size:%d\n",
+ slave_addr, master_addr, slave, size);
+ return -EINVAL;
+ }
+
+ if (is_read) {
+ in_param = (u64) slave | slave_addr;
+ out_param = (u64) dev->caps.function | master_addr;
+ } else {
+ in_param = (u64) dev->caps.function | master_addr;
+ out_param = (u64) slave | slave_addr;
+ }
+
+ return mlx4_cmd_imm(dev, in_param, &out_param, size, 0,
+ MLX4_CMD_ACCESS_MEM,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+}
+
+static int query_pkey_block(struct mlx4_dev *dev, u8 port, u16 index, u16 *pkey,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox)
+{
+ struct ib_smp *in_mad = (struct ib_smp *)(inbox->buf);
+ struct ib_smp *out_mad = (struct ib_smp *)(outbox->buf);
+ int err;
+ int i;
+
+ if (index & 0x1f)
+ return -EINVAL;
+
+ in_mad->attr_mod = cpu_to_be32(index / 32);
+
+ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3,
+ MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+
+ for (i = 0; i < 32; ++i)
+ pkey[i] = be16_to_cpu(((__be16 *) out_mad->data)[i]);
+
+ return err;
+}
+
+static int get_full_pkey_table(struct mlx4_dev *dev, u8 port, u16 *table,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox)
+{
+ int i;
+ int err;
+
+ for (i = 0; i < dev->caps.pkey_table_len[port]; i += 32) {
+ err = query_pkey_block(dev, port, i, table + i, inbox, outbox);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+#define PORT_CAPABILITY_LOCATION_IN_SMP 20
+#define PORT_STATE_OFFSET 32
+
+static enum ib_port_state vf_port_state(struct mlx4_dev *dev, int port, int vf)
+{
+ if (mlx4_get_slave_port_state(dev, vf, port) == SLAVE_PORT_UP)
+ return IB_PORT_ACTIVE;
+ else
+ return IB_PORT_DOWN;
+}
+
+static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct ib_smp *smp = inbox->buf;
+ u32 index;
+ u8 port, slave_port;
+ u8 opcode_modifier;
+ u16 *table;
+ int err;
+ int vidx, pidx;
+ int network_view;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct ib_smp *outsmp = outbox->buf;
+ __be16 *outtab = (__be16 *)(outsmp->data);
+ __be32 slave_cap_mask;
+ __be64 slave_node_guid;
+
+ slave_port = vhcr->in_modifier;
+ port = mlx4_slave_convert_port(dev, slave, slave_port);
+
+ /* network-view bit is for driver use only, and should not be passed to FW */
+ opcode_modifier = vhcr->op_modifier & ~0x8; /* clear netw view bit */
+ network_view = !!(vhcr->op_modifier & 0x8);
+
+ if (smp->base_version == 1 &&
+ smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+ smp->class_version == 1) {
+ /* host view is paravirtualized */
+ if (!network_view && smp->method == IB_MGMT_METHOD_GET) {
+ if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) {
+ index = be32_to_cpu(smp->attr_mod);
+ if (port < 1 || port > dev->caps.num_ports)
+ return -EINVAL;
+ table = kcalloc((dev->caps.pkey_table_len[port] / 32) + 1,
+ sizeof(*table) * 32, GFP_KERNEL);
+
+ if (!table)
+ return -ENOMEM;
+ /* need to get the full pkey table because the paravirtualized
+ * pkeys may be scattered among several pkey blocks.
+ */
+ err = get_full_pkey_table(dev, port, table, inbox, outbox);
+ if (!err) {
+ for (vidx = index * 32; vidx < (index + 1) * 32; ++vidx) {
+ pidx = priv->virt2phys_pkey[slave][port - 1][vidx];
+ outtab[vidx % 32] = cpu_to_be16(table[pidx]);
+ }
+ }
+ kfree(table);
+ return err;
+ }
+ if (smp->attr_id == IB_SMP_ATTR_PORT_INFO) {
+ /*get the slave specific caps:*/
+ /*do the command */
+ smp->attr_mod = cpu_to_be32(port);
+ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
+ port, opcode_modifier,
+ vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+ /* modify the response for slaves */
+ if (!err && slave != mlx4_master_func_num(dev)) {
+ u8 *state = outsmp->data + PORT_STATE_OFFSET;
+
+ *state = (*state & 0xf0) | vf_port_state(dev, port, slave);
+ slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port];
+ memcpy(outsmp->data + PORT_CAPABILITY_LOCATION_IN_SMP, &slave_cap_mask, 4);
+ }
+ return err;
+ }
+ if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) {
+ __be64 guid = mlx4_get_admin_guid(dev, slave,
+ port);
+
+ /* set the PF admin guid to the FW/HW burned
+ * GUID, if it wasn't yet set
+ */
+ if (slave == 0 && guid == 0) {
+ smp->attr_mod = 0;
+ err = mlx4_cmd_box(dev,
+ inbox->dma,
+ outbox->dma,
+ vhcr->in_modifier,
+ opcode_modifier,
+ vhcr->op,
+ MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+ mlx4_set_admin_guid(dev,
+ *(__be64 *)outsmp->
+ data, slave, port);
+ } else {
+ memcpy(outsmp->data, &guid, 8);
+ }
+
+ /* clean all other gids */
+ memset(outsmp->data + 8, 0, 56);
+ return 0;
+ }
+ if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) {
+ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
+ port, opcode_modifier,
+ vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+ if (!err) {
+ slave_node_guid = mlx4_get_slave_node_guid(dev, slave);
+ memcpy(outsmp->data + 12, &slave_node_guid, 8);
+ }
+ return err;
+ }
+ }
+ }
+
+ /* Non-privileged VFs are only allowed "host" view LID-routed 'Get' MADs.
+ * These are the MADs used by ib verbs (such as ib_query_gids).
+ */
+ if (slave != mlx4_master_func_num(dev) &&
+ !mlx4_vf_smi_enabled(dev, slave, port)) {
+ if (!(smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+ smp->method == IB_MGMT_METHOD_GET) || network_view) {
+ mlx4_err(dev, "Unprivileged slave %d is trying to execute a Subnet MGMT MAD, class 0x%x, method 0x%x, view=%s for attr 0x%x. Rejecting\n",
+ slave, smp->mgmt_class, smp->method,
+ network_view ? "Network" : "Host",
+ be16_to_cpu(smp->attr_id));
+ return -EPERM;
+ }
+ }
+
+ return mlx4_cmd_box(dev, inbox->dma, outbox->dma,
+ vhcr->in_modifier, opcode_modifier,
+ vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+}
+
+static int mlx4_CMD_EPERM_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ return -EPERM;
+}
+
+int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ u64 in_param;
+ u64 out_param;
+ int err;
+
+ in_param = cmd->has_inbox ? (u64) inbox->dma : vhcr->in_param;
+ out_param = cmd->has_outbox ? (u64) outbox->dma : vhcr->out_param;
+ if (cmd->encode_slave_id) {
+ in_param &= 0xffffffffffffff00ll;
+ in_param |= slave;
+ }
+
+ err = __mlx4_cmd(dev, in_param, &out_param, cmd->out_is_imm,
+ vhcr->in_modifier, vhcr->op_modifier, vhcr->op,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+
+ if (cmd->out_is_imm)
+ vhcr->out_param = out_param;
+
+ return err;
+}
+
+static struct mlx4_cmd_info cmd_info[] = {
+ {
+ .opcode = MLX4_CMD_QUERY_FW,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_FW_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_HCA,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_DEV_CAP,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_DEV_CAP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_FUNC_CAP,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_FUNC_CAP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_ADAPTER,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_INIT_PORT,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_INIT_PORT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_CLOSE_PORT,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_CLOSE_PORT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_PORT,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_PORT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SET_PORT,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SET_PORT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_MAP_EQ,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_MAP_EQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SW2HW_EQ,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_SW2HW_EQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_HW_HEALTH_CHECK,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_NOP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_CONFIG_DEV,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_CONFIG_DEV_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_ALLOC_RES,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = true,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_ALLOC_RES_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_FREE_RES,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_FREE_RES_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SW2HW_MPT,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_SW2HW_MPT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_MPT,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_MPT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_HW2SW_MPT,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_HW2SW_MPT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_READ_MTT,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_WRITE_MTT,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_WRITE_MTT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SYNC_TPT,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_HW2SW_EQ,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_HW2SW_EQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_EQ,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_EQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SW2HW_CQ,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_SW2HW_CQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_HW2SW_CQ,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_HW2SW_CQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_CQ,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_CQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_MODIFY_CQ,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = true,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_MODIFY_CQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SW2HW_SRQ,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_SW2HW_SRQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_HW2SW_SRQ,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_HW2SW_SRQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_SRQ,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_SRQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_ARM_SRQ,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_ARM_SRQ_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_RST2INIT_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = true,
+ .verify = NULL,
+ .wrapper = mlx4_RST2INIT_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_INIT2INIT_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_INIT2INIT_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_INIT2RTR_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_INIT2RTR_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_RTR2RTS_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_RTR2RTS_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_RTS2RTS_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_RTS2RTS_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SQERR2RTS_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SQERR2RTS_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_2ERR_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_GEN_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_RTS2SQD_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_GEN_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SQD2SQD_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SQD2SQD_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SQD2RTS_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SQD2RTS_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_2RST_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_2RST_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_QP,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_GEN_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SUSPEND_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_GEN_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_UNSUSPEND_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_GEN_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_UPDATE_QP,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_UPDATE_QP_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_GET_OP_REQ,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_CMD_EPERM_wrapper,
+ },
+ {
+ .opcode = MLX4_CMD_ALLOCATE_VPP,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_CMD_EPERM_wrapper,
+ },
+ {
+ .opcode = MLX4_CMD_SET_VPORT_QOS,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_CMD_EPERM_wrapper,
+ },
+ {
+ .opcode = MLX4_CMD_CONF_SPECIAL_QP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL, /* XXX verify: only demux can do this */
+ .wrapper = NULL
+ },
+ {
+ .opcode = MLX4_CMD_MAD_IFC,
+ .has_inbox = true,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_MAD_IFC_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_MAD_DEMUX,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_CMD_EPERM_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_QUERY_IF_STAT,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QUERY_IF_STAT_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_ACCESS_REG,
+ .has_inbox = true,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_ACCESS_REG_wrapper,
+ },
+ {
+ .opcode = MLX4_CMD_CONGESTION_CTRL_OPCODE,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_CMD_EPERM_wrapper,
+ },
+ /* Native multicast commands are not available for guests */
+ {
+ .opcode = MLX4_CMD_QP_ATTACH,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QP_ATTACH_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_PROMISC,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_PROMISC_wrapper
+ },
+ /* Ethernet specific commands */
+ {
+ .opcode = MLX4_CMD_SET_VLAN_FLTR,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SET_VLAN_FLTR_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_SET_MCAST_FLTR,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_SET_MCAST_FLTR_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_DUMP_ETH_STATS,
+ .has_inbox = false,
+ .has_outbox = true,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_DUMP_ETH_STATS_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_INFORM_FLR_DONE,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = NULL
+ },
+ /* flow steering commands */
+ {
+ .opcode = MLX4_QP_FLOW_STEERING_ATTACH,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = true,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QP_FLOW_STEERING_ATTACH_wrapper
+ },
+ {
+ .opcode = MLX4_QP_FLOW_STEERING_DETACH,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QP_FLOW_STEERING_DETACH_wrapper
+ },
+ {
+ .opcode = MLX4_FLOW_STEERING_IB_UC_QP_RANGE,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_CMD_EPERM_wrapper
+ },
+ {
+ .opcode = MLX4_CMD_VIRT_PORT_MAP,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_CMD_EPERM_wrapper
+ },
+};
+
+static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr_cmd *in_vhcr)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cmd_info *cmd = NULL;
+ struct mlx4_vhcr_cmd *vhcr_cmd = in_vhcr ? in_vhcr : priv->mfunc.vhcr;
+ struct mlx4_vhcr *vhcr;
+ struct mlx4_cmd_mailbox *inbox = NULL;
+ struct mlx4_cmd_mailbox *outbox = NULL;
+ u64 in_param;
+ u64 out_param;
+ int ret = 0;
+ int i;
+ int err = 0;
+
+ /* Create sw representation of Virtual HCR */
+ vhcr = kzalloc(sizeof(struct mlx4_vhcr), GFP_KERNEL);
+ if (!vhcr)
+ return -ENOMEM;
+
+ /* DMA in the vHCR */
+ if (!in_vhcr) {
+ ret = mlx4_ACCESS_MEM(dev, priv->mfunc.vhcr_dma, slave,
+ priv->mfunc.master.slave_state[slave].vhcr_dma,
+ ALIGN(sizeof(struct mlx4_vhcr_cmd),
+ MLX4_ACCESS_MEM_ALIGN), 1);
+ if (ret) {
+ if (!(dev->persist->state &
+ MLX4_DEVICE_STATE_INTERNAL_ERROR))
+ mlx4_err(dev, "%s: Failed reading vhcr ret: 0x%x\n",
+ __func__, ret);
+ kfree(vhcr);
+ return ret;
+ }
+ }
+
+ /* Fill SW VHCR fields */
+ vhcr->in_param = be64_to_cpu(vhcr_cmd->in_param);
+ vhcr->out_param = be64_to_cpu(vhcr_cmd->out_param);
+ vhcr->in_modifier = be32_to_cpu(vhcr_cmd->in_modifier);
+ vhcr->token = be16_to_cpu(vhcr_cmd->token);
+ vhcr->op = be16_to_cpu(vhcr_cmd->opcode) & 0xfff;
+ vhcr->op_modifier = (u8) (be16_to_cpu(vhcr_cmd->opcode) >> 12);
+ vhcr->e_bit = vhcr_cmd->flags & (1 << 6);
+
+ /* Lookup command */
+ for (i = 0; i < ARRAY_SIZE(cmd_info); ++i) {
+ if (vhcr->op == cmd_info[i].opcode) {
+ cmd = &cmd_info[i];
+ break;
+ }
+ }
+ if (!cmd) {
+ mlx4_err(dev, "Unknown command:0x%x accepted from slave:%d\n",
+ vhcr->op, slave);
+ vhcr_cmd->status = CMD_STAT_BAD_PARAM;
+ goto out_status;
+ }
+
+ /* Read inbox */
+ if (cmd->has_inbox) {
+ vhcr->in_param &= INBOX_MASK;
+ inbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(inbox)) {
+ vhcr_cmd->status = CMD_STAT_BAD_SIZE;
+ inbox = NULL;
+ goto out_status;
+ }
+
+ ret = mlx4_ACCESS_MEM(dev, inbox->dma, slave,
+ vhcr->in_param,
+ MLX4_MAILBOX_SIZE, 1);
+ if (ret) {
+ if (!(dev->persist->state &
+ MLX4_DEVICE_STATE_INTERNAL_ERROR))
+ mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n",
+ __func__, cmd->opcode);
+ vhcr_cmd->status = CMD_STAT_INTERNAL_ERR;
+ goto out_status;
+ }
+ }
+
+ /* Apply permission and bound checks if applicable */
+ if (cmd->verify && cmd->verify(dev, slave, vhcr, inbox)) {
+ mlx4_warn(dev, "Command:0x%x from slave: %d failed protection checks for resource_id:%d\n",
+ vhcr->op, slave, vhcr->in_modifier);
+ vhcr_cmd->status = CMD_STAT_BAD_OP;
+ goto out_status;
+ }
+
+ /* Allocate outbox */
+ if (cmd->has_outbox) {
+ outbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(outbox)) {
+ vhcr_cmd->status = CMD_STAT_BAD_SIZE;
+ outbox = NULL;
+ goto out_status;
+ }
+ }
+
+ /* Execute the command! */
+ if (cmd->wrapper) {
+ err = cmd->wrapper(dev, slave, vhcr, inbox, outbox,
+ cmd);
+ if (cmd->out_is_imm)
+ vhcr_cmd->out_param = cpu_to_be64(vhcr->out_param);
+ } else {
+ in_param = cmd->has_inbox ? (u64) inbox->dma :
+ vhcr->in_param;
+ out_param = cmd->has_outbox ? (u64) outbox->dma :
+ vhcr->out_param;
+ err = __mlx4_cmd(dev, in_param, &out_param,
+ cmd->out_is_imm, vhcr->in_modifier,
+ vhcr->op_modifier, vhcr->op,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+
+ if (cmd->out_is_imm) {
+ vhcr->out_param = out_param;
+ vhcr_cmd->out_param = cpu_to_be64(vhcr->out_param);
+ }
+ }
+
+ if (err) {
+ if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) {
+ if (vhcr->op == MLX4_CMD_ALLOC_RES &&
+ (vhcr->in_modifier & 0xff) == RES_COUNTER &&
+ err == -EDQUOT)
+ mlx4_dbg(dev,
+ "Unable to allocate counter for slave %d (%d)\n",
+ slave, err);
+ else
+ mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n",
+ vhcr->op, slave, vhcr->errno, err);
+ }
+ vhcr_cmd->status = mlx4_errno_to_status(err);
+ goto out_status;
+ }
+
+
+ /* Write outbox if command completed successfully */
+ if (cmd->has_outbox && !vhcr_cmd->status) {
+ ret = mlx4_ACCESS_MEM(dev, outbox->dma, slave,
+ vhcr->out_param,
+ MLX4_MAILBOX_SIZE, MLX4_CMD_WRAPPED);
+ if (ret) {
+ /* If we failed to write back the outbox after the
+ *command was successfully executed, we must fail this
+ * slave, as it is now in undefined state */
+ if (!(dev->persist->state &
+ MLX4_DEVICE_STATE_INTERNAL_ERROR))
+ mlx4_err(dev, "%s:Failed writing outbox\n", __func__);
+ goto out;
+ }
+ }
+
+out_status:
+ /* DMA back vhcr result */
+ if (!in_vhcr) {
+ ret = mlx4_ACCESS_MEM(dev, priv->mfunc.vhcr_dma, slave,
+ priv->mfunc.master.slave_state[slave].vhcr_dma,
+ ALIGN(sizeof(struct mlx4_vhcr),
+ MLX4_ACCESS_MEM_ALIGN),
+ MLX4_CMD_WRAPPED);
+ if (ret)
+ mlx4_err(dev, "%s:Failed writing vhcr result\n",
+ __func__);
+ else if (vhcr->e_bit &&
+ mlx4_GEN_EQE(dev, slave, &priv->mfunc.master.cmd_eqe))
+ mlx4_warn(dev, "Failed to generate command completion eqe for slave %d\n",
+ slave);
+ }
+
+out:
+ kfree(vhcr);
+ mlx4_free_cmd_mailbox(dev, inbox);
+ mlx4_free_cmd_mailbox(dev, outbox);
+ return ret;
+}
+
+static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv,
+ int slave, int port)
+{
+ struct mlx4_vport_oper_state *vp_oper;
+ struct mlx4_vport_state *vp_admin;
+ struct mlx4_vf_immed_vlan_work *work;
+ struct mlx4_dev *dev = &(priv->dev);
+ int err;
+ int admin_vlan_ix = NO_INDX;
+
+ vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+ vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+
+ if (vp_oper->state.default_vlan == vp_admin->default_vlan &&
+ vp_oper->state.default_qos == vp_admin->default_qos &&
+ vp_oper->state.vlan_proto == vp_admin->vlan_proto &&
+ vp_oper->state.link_state == vp_admin->link_state &&
+ vp_oper->state.qos_vport == vp_admin->qos_vport)
+ return 0;
+
+ if (!(priv->mfunc.master.slave_state[slave].active &&
+ dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP)) {
+ /* even if the UPDATE_QP command isn't supported, we still want
+ * to set this VF link according to the admin directive
+ */
+ vp_oper->state.link_state = vp_admin->link_state;
+ return -1;
+ }
+
+ mlx4_dbg(dev, "updating immediately admin params slave %d port %d\n",
+ slave, port);
+ mlx4_dbg(dev, "vlan %d QoS %d link down %d\n",
+ vp_admin->default_vlan, vp_admin->default_qos,
+ vp_admin->link_state);
+
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ if (vp_oper->state.default_vlan != vp_admin->default_vlan) {
+ if (MLX4_VGT != vp_admin->default_vlan) {
+ err = __mlx4_register_vlan(&priv->dev, port,
+ vp_admin->default_vlan,
+ &admin_vlan_ix);
+ if (err) {
+ kfree(work);
+ mlx4_warn(&priv->dev,
+ "No vlan resources slave %d, port %d\n",
+ slave, port);
+ return err;
+ }
+ } else {
+ admin_vlan_ix = NO_INDX;
+ }
+ work->flags |= MLX4_VF_IMMED_VLAN_FLAG_VLAN;
+ mlx4_dbg(&priv->dev,
+ "alloc vlan %d idx %d slave %d port %d\n",
+ (int)(vp_admin->default_vlan),
+ admin_vlan_ix, slave, port);
+ }
+
+ /* save original vlan ix and vlan id */
+ work->orig_vlan_id = vp_oper->state.default_vlan;
+ work->orig_vlan_ix = vp_oper->vlan_idx;
+
+ /* handle new qos */
+ if (vp_oper->state.default_qos != vp_admin->default_qos)
+ work->flags |= MLX4_VF_IMMED_VLAN_FLAG_QOS;
+
+ if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_VLAN)
+ vp_oper->vlan_idx = admin_vlan_ix;
+
+ vp_oper->state.default_vlan = vp_admin->default_vlan;
+ vp_oper->state.default_qos = vp_admin->default_qos;
+ vp_oper->state.vlan_proto = vp_admin->vlan_proto;
+ vp_oper->state.link_state = vp_admin->link_state;
+ vp_oper->state.qos_vport = vp_admin->qos_vport;
+
+ if (vp_admin->link_state == IFLA_VF_LINK_STATE_DISABLE)
+ work->flags |= MLX4_VF_IMMED_VLAN_FLAG_LINK_DISABLE;
+
+ /* iterate over QPs owned by this slave, using UPDATE_QP */
+ work->port = port;
+ work->slave = slave;
+ work->qos = vp_oper->state.default_qos;
+ work->qos_vport = vp_oper->state.qos_vport;
+ work->vlan_id = vp_oper->state.default_vlan;
+ work->vlan_ix = vp_oper->vlan_idx;
+ work->vlan_proto = vp_oper->state.vlan_proto;
+ work->priv = priv;
+ INIT_WORK(&work->work, mlx4_vf_immed_vlan_work_handler);
+ queue_work(priv->mfunc.master.comm_wq, &work->work);
+
+ return 0;
+}
+
+static void mlx4_set_default_port_qos(struct mlx4_dev *dev, int port)
+{
+ struct mlx4_qos_manager *port_qos_ctl;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ port_qos_ctl = &priv->mfunc.master.qos_ctl[port];
+ bitmap_zero(port_qos_ctl->priority_bm, MLX4_NUM_UP);
+
+ /* Enable only default prio at PF init routine */
+ set_bit(MLX4_DEFAULT_QOS_PRIO, port_qos_ctl->priority_bm);
+}
+
+static void mlx4_allocate_port_vpps(struct mlx4_dev *dev, int port)
+{
+ int i;
+ int err;
+ int num_vfs;
+ u16 available_vpp;
+ u8 vpp_param[MLX4_NUM_UP];
+ struct mlx4_qos_manager *port_qos;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ err = mlx4_ALLOCATE_VPP_get(dev, port, &available_vpp, vpp_param);
+ if (err) {
+ mlx4_info(dev, "Failed query available VPPs\n");
+ return;
+ }
+
+ port_qos = &priv->mfunc.master.qos_ctl[port];
+ num_vfs = (available_vpp /
+ bitmap_weight(port_qos->priority_bm, MLX4_NUM_UP));
+
+ for (i = 0; i < MLX4_NUM_UP; i++) {
+ if (test_bit(i, port_qos->priority_bm))
+ vpp_param[i] = num_vfs;
+ }
+
+ err = mlx4_ALLOCATE_VPP_set(dev, port, vpp_param);
+ if (err) {
+ mlx4_info(dev, "Failed allocating VPPs\n");
+ return;
+ }
+
+ /* Query actual allocated VPP, just to make sure */
+ err = mlx4_ALLOCATE_VPP_get(dev, port, &available_vpp, vpp_param);
+ if (err) {
+ mlx4_info(dev, "Failed query available VPPs\n");
+ return;
+ }
+
+ port_qos->num_of_qos_vfs = num_vfs;
+ mlx4_dbg(dev, "Port %d Available VPPs %d\n", port, available_vpp);
+
+ for (i = 0; i < MLX4_NUM_UP; i++)
+ mlx4_dbg(dev, "Port %d UP %d Allocated %d VPPs\n", port, i,
+ vpp_param[i]);
+}
+
+static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave)
+{
+ int port, err;
+ struct mlx4_vport_state *vp_admin;
+ struct mlx4_vport_oper_state *vp_oper;
+ struct mlx4_slave_state *slave_state =
+ &priv->mfunc.master.slave_state[slave];
+ struct mlx4_active_ports actv_ports = mlx4_get_active_ports(
+ &priv->dev, slave);
+ int min_port = find_first_bit(actv_ports.ports,
+ priv->dev.caps.num_ports) + 1;
+ int max_port = min_port - 1 +
+ bitmap_weight(actv_ports.ports, priv->dev.caps.num_ports);
+
+ for (port = min_port; port <= max_port; port++) {
+ if (!test_bit(port - 1, actv_ports.ports))
+ continue;
+ priv->mfunc.master.vf_oper[slave].smi_enabled[port] =
+ priv->mfunc.master.vf_admin[slave].enable_smi[port];
+ vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+ vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+ if (vp_admin->vlan_proto != htons(ETH_P_8021AD) ||
+ slave_state->vst_qinq_supported) {
+ vp_oper->state.vlan_proto = vp_admin->vlan_proto;
+ vp_oper->state.default_vlan = vp_admin->default_vlan;
+ vp_oper->state.default_qos = vp_admin->default_qos;
+ }
+ vp_oper->state.link_state = vp_admin->link_state;
+ vp_oper->state.mac = vp_admin->mac;
+ vp_oper->state.spoofchk = vp_admin->spoofchk;
+ vp_oper->state.tx_rate = vp_admin->tx_rate;
+ vp_oper->state.qos_vport = vp_admin->qos_vport;
+ vp_oper->state.guid = vp_admin->guid;
+
+ if (MLX4_VGT != vp_admin->default_vlan) {
+ err = __mlx4_register_vlan(&priv->dev, port,
+ vp_admin->default_vlan, &(vp_oper->vlan_idx));
+ if (err) {
+ vp_oper->vlan_idx = NO_INDX;
+ vp_oper->state.default_vlan = MLX4_VGT;
+ vp_oper->state.vlan_proto = htons(ETH_P_8021Q);
+ mlx4_warn(&priv->dev,
+ "No vlan resources slave %d, port %d\n",
+ slave, port);
+ return err;
+ }
+ mlx4_dbg(&priv->dev, "alloc vlan %d idx %d slave %d port %d\n",
+ (int)(vp_oper->state.default_vlan),
+ vp_oper->vlan_idx, slave, port);
+ }
+ if (vp_admin->spoofchk) {
+ vp_oper->mac_idx = __mlx4_register_mac(&priv->dev,
+ port,
+ vp_admin->mac);
+ if (0 > vp_oper->mac_idx) {
+ err = vp_oper->mac_idx;
+ vp_oper->mac_idx = NO_INDX;
+ mlx4_warn(&priv->dev,
+ "No mac resources slave %d, port %d\n",
+ slave, port);
+ return err;
+ }
+ mlx4_dbg(&priv->dev, "alloc mac %llx idx %d slave %d port %d\n",
+ vp_oper->state.mac, vp_oper->mac_idx, slave, port);
+ }
+ }
+ return 0;
+}
+
+static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave)
+{
+ int port;
+ struct mlx4_vport_oper_state *vp_oper;
+ struct mlx4_active_ports actv_ports = mlx4_get_active_ports(
+ &priv->dev, slave);
+ int min_port = find_first_bit(actv_ports.ports,
+ priv->dev.caps.num_ports) + 1;
+ int max_port = min_port - 1 +
+ bitmap_weight(actv_ports.ports, priv->dev.caps.num_ports);
+
+
+ for (port = min_port; port <= max_port; port++) {
+ if (!test_bit(port - 1, actv_ports.ports))
+ continue;
+ priv->mfunc.master.vf_oper[slave].smi_enabled[port] =
+ MLX4_VF_SMI_DISABLED;
+ vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+ if (NO_INDX != vp_oper->vlan_idx) {
+ __mlx4_unregister_vlan(&priv->dev,
+ port, vp_oper->state.default_vlan);
+ vp_oper->vlan_idx = NO_INDX;
+ }
+ if (NO_INDX != vp_oper->mac_idx) {
+ __mlx4_unregister_mac(&priv->dev, port, vp_oper->state.mac);
+ vp_oper->mac_idx = NO_INDX;
+ }
+ }
+ return;
+}
+
+static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
+ u16 param, u8 toggle)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state;
+ u32 reply;
+ u8 is_going_down = 0;
+ int i;
+ unsigned long flags;
+
+ slave_state[slave].comm_toggle ^= 1;
+ reply = (u32) slave_state[slave].comm_toggle << 31;
+ if (toggle != slave_state[slave].comm_toggle) {
+ mlx4_warn(dev, "Incorrect toggle %d from slave %d. *** MASTER STATE COMPROMISED ***\n",
+ toggle, slave);
+ goto reset_slave;
+ }
+ if (cmd == MLX4_COMM_CMD_RESET) {
+ mlx4_warn(dev, "Received reset from slave:%d\n", slave);
+ slave_state[slave].active = false;
+ slave_state[slave].old_vlan_api = false;
+ slave_state[slave].vst_qinq_supported = false;
+ mlx4_master_deactivate_admin_state(priv, slave);
+ for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) {
+ slave_state[slave].event_eq[i].eqn = -1;
+ slave_state[slave].event_eq[i].token = 0;
+ }
+ /*check if we are in the middle of FLR process,
+ if so return "retry" status to the slave*/
+ if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd)
+ goto inform_slave_state;
+
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, slave);
+
+ /* write the version in the event field */
+ reply |= mlx4_comm_get_version();
+
+ goto reset_slave;
+ }
+ /*command from slave in the middle of FLR*/
+ if (cmd != MLX4_COMM_CMD_RESET &&
+ MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) {
+ mlx4_warn(dev, "slave:%d is Trying to run cmd(0x%x) in the middle of FLR\n",
+ slave, cmd);
+ return;
+ }
+
+ switch (cmd) {
+ case MLX4_COMM_CMD_VHCR0:
+ if (slave_state[slave].last_cmd != MLX4_COMM_CMD_RESET)
+ goto reset_slave;
+ slave_state[slave].vhcr_dma = ((u64) param) << 48;
+ priv->mfunc.master.slave_state[slave].cookie = 0;
+ break;
+ case MLX4_COMM_CMD_VHCR1:
+ if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR0)
+ goto reset_slave;
+ slave_state[slave].vhcr_dma |= ((u64) param) << 32;
+ break;
+ case MLX4_COMM_CMD_VHCR2:
+ if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR1)
+ goto reset_slave;
+ slave_state[slave].vhcr_dma |= ((u64) param) << 16;
+ break;
+ case MLX4_COMM_CMD_VHCR_EN:
+ if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR2)
+ goto reset_slave;
+ slave_state[slave].vhcr_dma |= param;
+ if (mlx4_master_activate_admin_state(priv, slave))
+ goto reset_slave;
+ slave_state[slave].active = true;
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, slave);
+ break;
+ case MLX4_COMM_CMD_VHCR_POST:
+ if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) &&
+ (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) {
+ mlx4_warn(dev, "slave:%d is out of sync, cmd=0x%x, last command=0x%x, reset is needed\n",
+ slave, cmd, slave_state[slave].last_cmd);
+ goto reset_slave;
+ }
+
+ mutex_lock(&priv->cmd.slave_cmd_mutex);
+ if (mlx4_master_process_vhcr(dev, slave, NULL)) {
+ mlx4_err(dev, "Failed processing vhcr for slave:%d, resetting slave\n",
+ slave);
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ goto reset_slave;
+ }
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ break;
+ default:
+ mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave);
+ goto reset_slave;
+ }
+ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
+ if (!slave_state[slave].is_slave_going_down)
+ slave_state[slave].last_cmd = cmd;
+ else
+ is_going_down = 1;
+ spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
+ if (is_going_down) {
+ mlx4_warn(dev, "Slave is going down aborting command(%d) executing from slave:%d\n",
+ cmd, slave);
+ return;
+ }
+ __raw_writel((__force u32) cpu_to_be32(reply),
+ &priv->mfunc.comm[slave].slave_read);
+ mmiowb();
+
+ return;
+
+reset_slave:
+ /* cleanup any slave resources */
+ if (dev->persist->interface_state & MLX4_INTERFACE_STATE_UP)
+ mlx4_delete_all_resources_for_slave(dev, slave);
+
+ if (cmd != MLX4_COMM_CMD_RESET) {
+ mlx4_warn(dev, "Turn on internal error to force reset, slave=%d, cmd=0x%x\n",
+ slave, cmd);
+ /* Turn on internal error letting slave reset itself immeditaly,
+ * otherwise it might take till timeout on command is passed
+ */
+ reply |= ((u32)COMM_CHAN_EVENT_INTERNAL_ERR);
+ }
+
+ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
+ if (!slave_state[slave].is_slave_going_down)
+ slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET;
+ spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
+ /*with slave in the middle of flr, no need to clean resources again.*/
+inform_slave_state:
+ memset(&slave_state[slave].event_eq, 0,
+ sizeof(struct mlx4_slave_event_eq_info));
+ __raw_writel((__force u32) cpu_to_be32(reply),
+ &priv->mfunc.comm[slave].slave_read);
+ wmb();
+}
+
+/* master command processing */
+void mlx4_master_comm_channel(struct work_struct *work)
+{
+ struct mlx4_mfunc_master_ctx *master =
+ container_of(work,
+ struct mlx4_mfunc_master_ctx,
+ comm_work);
+ struct mlx4_mfunc *mfunc =
+ container_of(master, struct mlx4_mfunc, master);
+ struct mlx4_priv *priv =
+ container_of(mfunc, struct mlx4_priv, mfunc);
+ struct mlx4_dev *dev = &priv->dev;
+ __be32 *bit_vec;
+ u32 comm_cmd;
+ u32 vec;
+ int i, j, slave;
+ int toggle;
+ int served = 0;
+ int reported = 0;
+ u32 slt;
+
+ bit_vec = master->comm_arm_bit_vector;
+ for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++) {
+ vec = be32_to_cpu(bit_vec[i]);
+ for (j = 0; j < 32; j++) {
+ if (!(vec & (1 << j)))
+ continue;
+ ++reported;
+ slave = (i * 32) + j;
+ comm_cmd = swab32(readl(
+ &mfunc->comm[slave].slave_write));
+ slt = swab32(readl(&mfunc->comm[slave].slave_read))
+ >> 31;
+ toggle = comm_cmd >> 31;
+ if (toggle != slt) {
+ if (master->slave_state[slave].comm_toggle
+ != slt) {
+ pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n",
+ slave, slt,
+ master->slave_state[slave].comm_toggle);
+ master->slave_state[slave].comm_toggle =
+ slt;
+ }
+ mlx4_master_do_cmd(dev, slave,
+ comm_cmd >> 16 & 0xff,
+ comm_cmd & 0xffff, toggle);
+ ++served;
+ }
+ }
+ }
+
+ if (reported && reported != served)
+ mlx4_warn(dev, "Got command event with bitmask from %d slaves but %d were served\n",
+ reported, served);
+
+ if (mlx4_ARM_COMM_CHANNEL(dev))
+ mlx4_warn(dev, "Failed to arm comm channel events\n");
+}
+
+static int sync_toggles(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u32 wr_toggle;
+ u32 rd_toggle;
+ unsigned long end;
+
+ wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write));
+ if (wr_toggle == 0xffffffff)
+ end = jiffies + msecs_to_jiffies(30000);
+ else
+ end = jiffies + msecs_to_jiffies(5000);
+
+ while (time_before(jiffies, end)) {
+ rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read));
+ if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) {
+ /* PCI might be offline */
+
+ /* If device removal has been requested,
+ * do not continue retrying.
+ */
+ if (dev->persist->interface_state &
+ MLX4_INTERFACE_STATE_NOWAIT) {
+ mlx4_warn(dev,
+ "communication channel is offline\n");
+ return -EIO;
+ }
+
+ msleep(100);
+ wr_toggle = swab32(readl(&priv->mfunc.comm->
+ slave_write));
+ continue;
+ }
+
+ if (rd_toggle >> 31 == wr_toggle >> 31) {
+ priv->cmd.comm_toggle = rd_toggle >> 31;
+ return 0;
+ }
+
+ cond_resched();
+ }
+
+ /*
+ * we could reach here if for example the previous VM using this
+ * function misbehaved and left the channel with unsynced state. We
+ * should fix this here and give this VM a chance to use a properly
+ * synced channel
+ */
+ mlx4_warn(dev, "recovering from previously mis-behaved VM\n");
+ __raw_writel((__force u32) 0, &priv->mfunc.comm->slave_read);
+ __raw_writel((__force u32) 0, &priv->mfunc.comm->slave_write);
+ priv->cmd.comm_toggle = 0;
+
+ return 0;
+}
+
+int mlx4_multi_func_init(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_state;
+ int i, j, err, port;
+
+ if (mlx4_is_master(dev))
+ priv->mfunc.comm =
+ ioremap(pci_resource_start(dev->persist->pdev,
+ priv->fw.comm_bar) +
+ priv->fw.comm_base, MLX4_COMM_PAGESIZE);
+ else
+ priv->mfunc.comm =
+ ioremap(pci_resource_start(dev->persist->pdev, 2) +
+ MLX4_SLAVE_COMM_BASE, MLX4_COMM_PAGESIZE);
+ if (!priv->mfunc.comm) {
+ mlx4_err(dev, "Couldn't map communication vector\n");
+ goto err_vhcr;
+ }
+
+ if (mlx4_is_master(dev)) {
+ struct mlx4_vf_oper_state *vf_oper;
+ struct mlx4_vf_admin_state *vf_admin;
+
+ priv->mfunc.master.slave_state =
+ kcalloc(dev->num_slaves,
+ sizeof(struct mlx4_slave_state),
+ GFP_KERNEL);
+ if (!priv->mfunc.master.slave_state)
+ goto err_comm;
+
+ priv->mfunc.master.vf_admin =
+ kcalloc(dev->num_slaves,
+ sizeof(struct mlx4_vf_admin_state),
+ GFP_KERNEL);
+ if (!priv->mfunc.master.vf_admin)
+ goto err_comm_admin;
+
+ priv->mfunc.master.vf_oper =
+ kcalloc(dev->num_slaves,
+ sizeof(struct mlx4_vf_oper_state),
+ GFP_KERNEL);
+ if (!priv->mfunc.master.vf_oper)
+ goto err_comm_oper;
+
+ for (i = 0; i < dev->num_slaves; ++i) {
+ vf_admin = &priv->mfunc.master.vf_admin[i];
+ vf_oper = &priv->mfunc.master.vf_oper[i];
+ s_state = &priv->mfunc.master.slave_state[i];
+ s_state->last_cmd = MLX4_COMM_CMD_RESET;
+ s_state->vst_qinq_supported = false;
+ mutex_init(&priv->mfunc.master.gen_eqe_mutex[i]);
+ for (j = 0; j < MLX4_EVENT_TYPES_NUM; ++j)
+ s_state->event_eq[j].eqn = -1;
+ __raw_writel((__force u32) 0,
+ &priv->mfunc.comm[i].slave_write);
+ __raw_writel((__force u32) 0,
+ &priv->mfunc.comm[i].slave_read);
+ mmiowb();
+ for (port = 1; port <= MLX4_MAX_PORTS; port++) {
+ struct mlx4_vport_state *admin_vport;
+ struct mlx4_vport_state *oper_vport;
+
+ s_state->vlan_filter[port] =
+ kzalloc(sizeof(struct mlx4_vlan_fltr),
+ GFP_KERNEL);
+ if (!s_state->vlan_filter[port]) {
+ if (--port)
+ kfree(s_state->vlan_filter[port]);
+ goto err_slaves;
+ }
+
+ admin_vport = &vf_admin->vport[port];
+ oper_vport = &vf_oper->vport[port].state;
+ INIT_LIST_HEAD(&s_state->mcast_filters[port]);
+ admin_vport->default_vlan = MLX4_VGT;
+ oper_vport->default_vlan = MLX4_VGT;
+ admin_vport->qos_vport =
+ MLX4_VPP_DEFAULT_VPORT;
+ oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT;
+ admin_vport->vlan_proto = htons(ETH_P_8021Q);
+ oper_vport->vlan_proto = htons(ETH_P_8021Q);
+ vf_oper->vport[port].vlan_idx = NO_INDX;
+ vf_oper->vport[port].mac_idx = NO_INDX;
+ mlx4_set_random_admin_guid(dev, i, port);
+ }
+ spin_lock_init(&s_state->lock);
+ }
+
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP) {
+ for (port = 1; port <= dev->caps.num_ports; port++) {
+ if (mlx4_is_eth(dev, port)) {
+ mlx4_set_default_port_qos(dev, port);
+ mlx4_allocate_port_vpps(dev, port);
+ }
+ }
+ }
+
+ memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe));
+ priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD;
+ INIT_WORK(&priv->mfunc.master.comm_work,
+ mlx4_master_comm_channel);
+ INIT_WORK(&priv->mfunc.master.slave_event_work,
+ mlx4_gen_slave_eqe);
+ INIT_WORK(&priv->mfunc.master.slave_flr_event_work,
+ mlx4_master_handle_slave_flr);
+ spin_lock_init(&priv->mfunc.master.slave_state_lock);
+ spin_lock_init(&priv->mfunc.master.slave_eq.event_lock);
+ priv->mfunc.master.comm_wq =
+ create_singlethread_workqueue("mlx4_comm");
+ if (!priv->mfunc.master.comm_wq)
+ goto err_slaves;
+
+ if (mlx4_init_resource_tracker(dev))
+ goto err_thread;
+
+ } else {
+ err = sync_toggles(dev);
+ if (err) {
+ mlx4_err(dev, "Couldn't sync toggles\n");
+ goto err_comm;
+ }
+ }
+ return 0;
+
+err_thread:
+ flush_workqueue(priv->mfunc.master.comm_wq);
+ destroy_workqueue(priv->mfunc.master.comm_wq);
+err_slaves:
+ while (i--) {
+ for (port = 1; port <= MLX4_MAX_PORTS; port++)
+ kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]);
+ }
+ kfree(priv->mfunc.master.vf_oper);
+err_comm_oper:
+ kfree(priv->mfunc.master.vf_admin);
+err_comm_admin:
+ kfree(priv->mfunc.master.slave_state);
+err_comm:
+ iounmap(priv->mfunc.comm);
+ priv->mfunc.comm = NULL;
+err_vhcr:
+ dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
+ priv->mfunc.vhcr,
+ priv->mfunc.vhcr_dma);
+ priv->mfunc.vhcr = NULL;
+ return -ENOMEM;
+}
+
+int mlx4_cmd_init(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int flags = 0;
+
+ if (!priv->cmd.initialized) {
+ init_rwsem(&priv->cmd.switch_sem);
+ mutex_init(&priv->cmd.slave_cmd_mutex);
+ sema_init(&priv->cmd.poll_sem, 1);
+ priv->cmd.use_events = 0;
+ priv->cmd.toggle = 1;
+ priv->cmd.initialized = 1;
+ flags |= MLX4_CMD_CLEANUP_STRUCT;
+ }
+
+ if (!mlx4_is_slave(dev) && !priv->cmd.hcr) {
+ priv->cmd.hcr = ioremap(pci_resource_start(dev->persist->pdev,
+ 0) + MLX4_HCR_BASE, MLX4_HCR_SIZE);
+ if (!priv->cmd.hcr) {
+ mlx4_err(dev, "Couldn't map command register\n");
+ goto err;
+ }
+ flags |= MLX4_CMD_CLEANUP_HCR;
+ }
+
+ if (mlx4_is_mfunc(dev) && !priv->mfunc.vhcr) {
+ priv->mfunc.vhcr = dma_alloc_coherent(&dev->persist->pdev->dev,
+ PAGE_SIZE,
+ &priv->mfunc.vhcr_dma,
+ GFP_KERNEL);
+ if (!priv->mfunc.vhcr)
+ goto err;
+
+ flags |= MLX4_CMD_CLEANUP_VHCR;
+ }
+
+ if (!priv->cmd.pool) {
+ priv->cmd.pool = dma_pool_create("mlx4_cmd",
+ &dev->persist->pdev->dev,
+ MLX4_MAILBOX_SIZE,
+ MLX4_MAILBOX_SIZE, 0);
+ if (!priv->cmd.pool)
+ goto err;
+
+ flags |= MLX4_CMD_CLEANUP_POOL;
+ }
+
+ return 0;
+
+err:
+ mlx4_cmd_cleanup(dev, flags);
+ return -ENOMEM;
+}
+
+void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int slave;
+ u32 slave_read;
+
+ /* If the comm channel has not yet been initialized,
+ * skip reporting the internal error event to all
+ * the communication channels.
+ */
+ if (!priv->mfunc.comm)
+ return;
+
+ /* Report an internal error event to all
+ * communication channels.
+ */
+ for (slave = 0; slave < dev->num_slaves; slave++) {
+ slave_read = swab32(readl(&priv->mfunc.comm[slave].slave_read));
+ slave_read |= (u32)COMM_CHAN_EVENT_INTERNAL_ERR;
+ __raw_writel((__force u32)cpu_to_be32(slave_read),
+ &priv->mfunc.comm[slave].slave_read);
+ /* Make sure that our comm channel write doesn't
+ * get mixed in with writes from another CPU.
+ */
+ mmiowb();
+ }
+}
+
+void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i, port;
+
+ if (mlx4_is_master(dev)) {
+ flush_workqueue(priv->mfunc.master.comm_wq);
+ destroy_workqueue(priv->mfunc.master.comm_wq);
+ for (i = 0; i < dev->num_slaves; i++) {
+ for (port = 1; port <= MLX4_MAX_PORTS; port++)
+ kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]);
+ }
+ kfree(priv->mfunc.master.slave_state);
+ kfree(priv->mfunc.master.vf_admin);
+ kfree(priv->mfunc.master.vf_oper);
+ dev->num_slaves = 0;
+ }
+
+ iounmap(priv->mfunc.comm);
+ priv->mfunc.comm = NULL;
+}
+
+void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (priv->cmd.pool && (cleanup_mask & MLX4_CMD_CLEANUP_POOL)) {
+ dma_pool_destroy(priv->cmd.pool);
+ priv->cmd.pool = NULL;
+ }
+
+ if (!mlx4_is_slave(dev) && priv->cmd.hcr &&
+ (cleanup_mask & MLX4_CMD_CLEANUP_HCR)) {
+ iounmap(priv->cmd.hcr);
+ priv->cmd.hcr = NULL;
+ }
+ if (mlx4_is_mfunc(dev) && priv->mfunc.vhcr &&
+ (cleanup_mask & MLX4_CMD_CLEANUP_VHCR)) {
+ dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
+ priv->mfunc.vhcr, priv->mfunc.vhcr_dma);
+ priv->mfunc.vhcr = NULL;
+ }
+ if (priv->cmd.initialized && (cleanup_mask & MLX4_CMD_CLEANUP_STRUCT))
+ priv->cmd.initialized = 0;
+}
+
+/*
+ * Switch to using events to issue FW commands (can only be called
+ * after event queue for command events has been initialized).
+ */
+int mlx4_cmd_use_events(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i;
+ int err = 0;
+
+ priv->cmd.context = kmalloc_array(priv->cmd.max_cmds,
+ sizeof(struct mlx4_cmd_context),
+ GFP_KERNEL);
+ if (!priv->cmd.context)
+ return -ENOMEM;
+
+ if (mlx4_is_mfunc(dev))
+ mutex_lock(&priv->cmd.slave_cmd_mutex);
+ down_write(&priv->cmd.switch_sem);
+ for (i = 0; i < priv->cmd.max_cmds; ++i) {
+ priv->cmd.context[i].token = i;
+ priv->cmd.context[i].next = i + 1;
+ /* To support fatal error flow, initialize all
+ * cmd contexts to allow simulating completions
+ * with complete() at any time.
+ */
+ init_completion(&priv->cmd.context[i].done);
+ }
+
+ priv->cmd.context[priv->cmd.max_cmds - 1].next = -1;
+ priv->cmd.free_head = 0;
+
+ sema_init(&priv->cmd.event_sem, priv->cmd.max_cmds);
+
+ for (priv->cmd.token_mask = 1;
+ priv->cmd.token_mask < priv->cmd.max_cmds;
+ priv->cmd.token_mask <<= 1)
+ ; /* nothing */
+ --priv->cmd.token_mask;
+
+ down(&priv->cmd.poll_sem);
+ priv->cmd.use_events = 1;
+ up_write(&priv->cmd.switch_sem);
+ if (mlx4_is_mfunc(dev))
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+
+ return err;
+}
+
+/*
+ * Switch back to polling (used when shutting down the device)
+ */
+void mlx4_cmd_use_polling(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i;
+
+ if (mlx4_is_mfunc(dev))
+ mutex_lock(&priv->cmd.slave_cmd_mutex);
+ down_write(&priv->cmd.switch_sem);
+ priv->cmd.use_events = 0;
+
+ for (i = 0; i < priv->cmd.max_cmds; ++i)
+ down(&priv->cmd.event_sem);
+
+ kfree(priv->cmd.context);
+ priv->cmd.context = NULL;
+
+ up(&priv->cmd.poll_sem);
+ up_write(&priv->cmd.switch_sem);
+ if (mlx4_is_mfunc(dev))
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+}
+
+struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+
+ mailbox = kmalloc(sizeof(*mailbox), GFP_KERNEL);
+ if (!mailbox)
+ return ERR_PTR(-ENOMEM);
+
+ mailbox->buf = dma_pool_zalloc(mlx4_priv(dev)->cmd.pool, GFP_KERNEL,
+ &mailbox->dma);
+ if (!mailbox->buf) {
+ kfree(mailbox);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return mailbox;
+}
+EXPORT_SYMBOL_GPL(mlx4_alloc_cmd_mailbox);
+
+void mlx4_free_cmd_mailbox(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *mailbox)
+{
+ if (!mailbox)
+ return;
+
+ dma_pool_free(mlx4_priv(dev)->cmd.pool, mailbox->buf, mailbox->dma);
+ kfree(mailbox);
+}
+EXPORT_SYMBOL_GPL(mlx4_free_cmd_mailbox);
+
+u32 mlx4_comm_get_version(void)
+{
+ return ((u32) CMD_CHAN_IF_REV << 8) | (u32) CMD_CHAN_VER;
+}
+
+static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf)
+{
+ if ((vf < 0) || (vf >= dev->persist->num_vfs)) {
+ mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n",
+ vf, dev->persist->num_vfs);
+ return -EINVAL;
+ }
+
+ return vf+1;
+}
+
+int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave)
+{
+ if (slave < 1 || slave > dev->persist->num_vfs) {
+ mlx4_err(dev,
+ "Bad slave number:%d (number of activated slaves: %lu)\n",
+ slave, dev->num_slaves);
+ return -EINVAL;
+ }
+ return slave - 1;
+}
+
+void mlx4_cmd_wake_completions(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cmd_context *context;
+ int i;
+
+ spin_lock(&priv->cmd.context_lock);
+ if (priv->cmd.context) {
+ for (i = 0; i < priv->cmd.max_cmds; ++i) {
+ context = &priv->cmd.context[i];
+ context->fw_status = CMD_STAT_INTERNAL_ERR;
+ context->result =
+ mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+ complete(&context->done);
+ }
+ }
+ spin_unlock(&priv->cmd.context_lock);
+}
+
+struct mlx4_active_ports mlx4_get_active_ports(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_active_ports actv_ports;
+ int vf;
+
+ bitmap_zero(actv_ports.ports, MLX4_MAX_PORTS);
+
+ if (slave == 0) {
+ bitmap_fill(actv_ports.ports, dev->caps.num_ports);
+ return actv_ports;
+ }
+
+ vf = mlx4_get_vf_indx(dev, slave);
+ if (vf < 0)
+ return actv_ports;
+
+ bitmap_set(actv_ports.ports, dev->dev_vfs[vf].min_port - 1,
+ min((int)dev->dev_vfs[mlx4_get_vf_indx(dev, slave)].n_ports,
+ dev->caps.num_ports));
+
+ return actv_ports;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_active_ports);
+
+int mlx4_slave_convert_port(struct mlx4_dev *dev, int slave, int port)
+{
+ unsigned n;
+ struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave);
+ unsigned m = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
+
+ if (port <= 0 || port > m)
+ return -EINVAL;
+
+ n = find_first_bit(actv_ports.ports, dev->caps.num_ports);
+ if (port <= n)
+ port = n + 1;
+
+ return port;
+}
+EXPORT_SYMBOL_GPL(mlx4_slave_convert_port);
+
+int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port)
+{
+ struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave);
+ if (test_bit(port - 1, actv_ports.ports))
+ return port -
+ find_first_bit(actv_ports.ports, dev->caps.num_ports);
+
+ return -1;
+}
+EXPORT_SYMBOL_GPL(mlx4_phys_to_slave_port);
+
+struct mlx4_slaves_pport mlx4_phys_to_slaves_pport(struct mlx4_dev *dev,
+ int port)
+{
+ unsigned i;
+ struct mlx4_slaves_pport slaves_pport;
+
+ bitmap_zero(slaves_pport.slaves, MLX4_MFUNC_MAX);
+
+ if (port <= 0 || port > dev->caps.num_ports)
+ return slaves_pport;
+
+ for (i = 0; i < dev->persist->num_vfs + 1; i++) {
+ struct mlx4_active_ports actv_ports =
+ mlx4_get_active_ports(dev, i);
+ if (test_bit(port - 1, actv_ports.ports))
+ set_bit(i, slaves_pport.slaves);
+ }
+
+ return slaves_pport;
+}
+EXPORT_SYMBOL_GPL(mlx4_phys_to_slaves_pport);
+
+struct mlx4_slaves_pport mlx4_phys_to_slaves_pport_actv(
+ struct mlx4_dev *dev,
+ const struct mlx4_active_ports *crit_ports)
+{
+ unsigned i;
+ struct mlx4_slaves_pport slaves_pport;
+
+ bitmap_zero(slaves_pport.slaves, MLX4_MFUNC_MAX);
+
+ for (i = 0; i < dev->persist->num_vfs + 1; i++) {
+ struct mlx4_active_ports actv_ports =
+ mlx4_get_active_ports(dev, i);
+ if (bitmap_equal(crit_ports->ports, actv_ports.ports,
+ dev->caps.num_ports))
+ set_bit(i, slaves_pport.slaves);
+ }
+
+ return slaves_pport;
+}
+EXPORT_SYMBOL_GPL(mlx4_phys_to_slaves_pport_actv);
+
+static int mlx4_slaves_closest_port(struct mlx4_dev *dev, int slave, int port)
+{
+ struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave);
+ int min_port = find_first_bit(actv_ports.ports, dev->caps.num_ports)
+ + 1;
+ int max_port = min_port +
+ bitmap_weight(actv_ports.ports, dev->caps.num_ports);
+
+ if (port < min_port)
+ port = min_port;
+ else if (port >= max_port)
+ port = max_port - 1;
+
+ return port;
+}
+
+static int mlx4_set_vport_qos(struct mlx4_priv *priv, int slave, int port,
+ int max_tx_rate)
+{
+ int i;
+ int err;
+ struct mlx4_qos_manager *port_qos;
+ struct mlx4_dev *dev = &priv->dev;
+ struct mlx4_vport_qos_param vpp_qos[MLX4_NUM_UP];
+
+ port_qos = &priv->mfunc.master.qos_ctl[port];
+ memset(vpp_qos, 0, sizeof(struct mlx4_vport_qos_param) * MLX4_NUM_UP);
+
+ if (slave > port_qos->num_of_qos_vfs) {
+ mlx4_info(dev, "No available VPP resources for this VF\n");
+ return -EINVAL;
+ }
+
+ /* Query for default QoS values from Vport 0 is needed */
+ err = mlx4_SET_VPORT_QOS_get(dev, port, 0, vpp_qos);
+ if (err) {
+ mlx4_info(dev, "Failed to query Vport 0 QoS values\n");
+ return err;
+ }
+
+ for (i = 0; i < MLX4_NUM_UP; i++) {
+ if (test_bit(i, port_qos->priority_bm) && max_tx_rate) {
+ vpp_qos[i].max_avg_bw = max_tx_rate;
+ vpp_qos[i].enable = 1;
+ } else {
+ /* if user supplied tx_rate == 0, meaning no rate limit
+ * configuration is required. so we are leaving the
+ * value of max_avg_bw as queried from Vport 0.
+ */
+ vpp_qos[i].enable = 0;
+ }
+ }
+
+ err = mlx4_SET_VPORT_QOS_set(dev, port, slave, vpp_qos);
+ if (err) {
+ mlx4_info(dev, "Failed to set Vport %d QoS values\n", slave);
+ return err;
+ }
+
+ return 0;
+}
+
+static bool mlx4_is_vf_vst_and_prio_qos(struct mlx4_dev *dev, int port,
+ struct mlx4_vport_state *vf_admin)
+{
+ struct mlx4_qos_manager *info;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (!mlx4_is_master(dev) ||
+ !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP))
+ return false;
+
+ info = &priv->mfunc.master.qos_ctl[port];
+
+ if (vf_admin->default_vlan != MLX4_VGT &&
+ test_bit(vf_admin->default_qos, info->priority_bm))
+ return true;
+
+ return false;
+}
+
+static bool mlx4_valid_vf_state_change(struct mlx4_dev *dev, int port,
+ struct mlx4_vport_state *vf_admin,
+ int vlan, int qos)
+{
+ struct mlx4_vport_state dummy_admin = {0};
+
+ if (!mlx4_is_vf_vst_and_prio_qos(dev, port, vf_admin) ||
+ !vf_admin->tx_rate)
+ return true;
+
+ dummy_admin.default_qos = qos;
+ dummy_admin.default_vlan = vlan;
+
+ /* VF wants to move to other VST state which is valid with current
+ * rate limit. Either differnt default vlan in VST or other
+ * supported QoS priority. Otherwise we don't allow this change when
+ * the TX rate is still configured.
+ */
+ if (mlx4_is_vf_vst_and_prio_qos(dev, port, &dummy_admin))
+ return true;
+
+ mlx4_info(dev, "Cannot change VF state to %s while rate is set\n",
+ (vlan == MLX4_VGT) ? "VGT" : "VST");
+
+ if (vlan != MLX4_VGT)
+ mlx4_info(dev, "VST priority %d not supported for QoS\n", qos);
+
+ mlx4_info(dev, "Please set rate to 0 prior to this VF state change\n");
+
+ return false;
+}
+
+int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u8 *mac)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_vport_state *s_info;
+ int slave;
+
+ if (!mlx4_is_master(dev))
+ return -EPROTONOSUPPORT;
+
+ if (is_multicast_ether_addr(mac))
+ return -EINVAL;
+
+ slave = mlx4_get_slave_indx(dev, vf);
+ if (slave < 0)
+ return -EINVAL;
+
+ port = mlx4_slaves_closest_port(dev, slave, port);
+ s_info = &priv->mfunc.master.vf_admin[slave].vport[port];
+
+ if (s_info->spoofchk && is_zero_ether_addr(mac)) {
+ mlx4_info(dev, "MAC invalidation is not allowed when spoofchk is on\n");
+ return -EPERM;
+ }
+
+ s_info->mac = mlx4_mac_to_u64(mac);
+ mlx4_info(dev, "default mac on vf %d port %d to %llX will take effect only after vf restart\n",
+ vf, port, s_info->mac);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_mac);
+
+
+int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos,
+ __be16 proto)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_vport_state *vf_admin;
+ struct mlx4_slave_state *slave_state;
+ struct mlx4_vport_oper_state *vf_oper;
+ int slave;
+
+ if ((!mlx4_is_master(dev)) ||
+ !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VLAN_CONTROL))
+ return -EPROTONOSUPPORT;
+
+ if ((vlan > 4095) || (qos > 7))
+ return -EINVAL;
+
+ if (proto == htons(ETH_P_8021AD) &&
+ !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP))
+ return -EPROTONOSUPPORT;
+
+ if (proto != htons(ETH_P_8021Q) &&
+ proto != htons(ETH_P_8021AD))
+ return -EINVAL;
+
+ if ((proto == htons(ETH_P_8021AD)) &&
+ ((vlan == 0) || (vlan == MLX4_VGT)))
+ return -EINVAL;
+
+ slave = mlx4_get_slave_indx(dev, vf);
+ if (slave < 0)
+ return -EINVAL;
+
+ slave_state = &priv->mfunc.master.slave_state[slave];
+ if ((proto == htons(ETH_P_8021AD)) && (slave_state->active) &&
+ (!slave_state->vst_qinq_supported)) {
+ mlx4_err(dev, "vf %d does not support VST QinQ mode\n", vf);
+ return -EPROTONOSUPPORT;
+ }
+ port = mlx4_slaves_closest_port(dev, slave, port);
+ vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+ vf_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+
+ if (!mlx4_valid_vf_state_change(dev, port, vf_admin, vlan, qos))
+ return -EPERM;
+
+ if ((0 == vlan) && (0 == qos))
+ vf_admin->default_vlan = MLX4_VGT;
+ else
+ vf_admin->default_vlan = vlan;
+ vf_admin->default_qos = qos;
+ vf_admin->vlan_proto = proto;
+
+ /* If rate was configured prior to VST, we saved the configured rate
+ * in vf_admin->rate and now, if priority supported we enforce the QoS
+ */
+ if (mlx4_is_vf_vst_and_prio_qos(dev, port, vf_admin) &&
+ vf_admin->tx_rate)
+ vf_admin->qos_vport = slave;
+
+ /* Try to activate new vf state without restart,
+ * this option is not supported while moving to VST QinQ mode.
+ */
+ if ((proto == htons(ETH_P_8021AD) &&
+ vf_oper->state.vlan_proto != proto) ||
+ mlx4_master_immediate_activate_vlan_qos(priv, slave, port))
+ mlx4_info(dev,
+ "updating vf %d port %d config will take effect on next VF restart\n",
+ vf, port);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_vlan);
+
+int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate,
+ int max_tx_rate)
+{
+ int err;
+ int slave;
+ struct mlx4_vport_state *vf_admin;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (!mlx4_is_master(dev) ||
+ !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP))
+ return -EPROTONOSUPPORT;
+
+ if (min_tx_rate) {
+ mlx4_info(dev, "Minimum BW share not supported\n");
+ return -EPROTONOSUPPORT;
+ }
+
+ slave = mlx4_get_slave_indx(dev, vf);
+ if (slave < 0)
+ return -EINVAL;
+
+ port = mlx4_slaves_closest_port(dev, slave, port);
+ vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+
+ err = mlx4_set_vport_qos(priv, slave, port, max_tx_rate);
+ if (err) {
+ mlx4_info(dev, "vf %d failed to set rate %d\n", vf,
+ max_tx_rate);
+ return err;
+ }
+
+ vf_admin->tx_rate = max_tx_rate;
+ /* if VF is not in supported mode (VST with supported prio),
+ * we do not change vport configuration for its QPs, but save
+ * the rate, so it will be enforced when it moves to supported
+ * mode next time.
+ */
+ if (!mlx4_is_vf_vst_and_prio_qos(dev, port, vf_admin)) {
+ mlx4_info(dev,
+ "rate set for VF %d when not in valid state\n", vf);
+
+ if (vf_admin->default_vlan != MLX4_VGT)
+ mlx4_info(dev, "VST priority not supported by QoS\n");
+ else
+ mlx4_info(dev, "VF in VGT mode (needed VST)\n");
+
+ mlx4_info(dev,
+ "rate %d take affect when VF moves to valid state\n",
+ max_tx_rate);
+ return 0;
+ }
+
+ /* If user sets rate 0 assigning default vport for its QPs */
+ vf_admin->qos_vport = max_tx_rate ? slave : MLX4_VPP_DEFAULT_VPORT;
+
+ if (priv->mfunc.master.slave_state[slave].active &&
+ dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP)
+ mlx4_master_immediate_activate_vlan_qos(priv, slave, port);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_rate);
+
+ /* mlx4_get_slave_default_vlan -
+ * return true if VST ( default vlan)
+ * if VST, will return vlan & qos (if not NULL)
+ */
+bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave,
+ u16 *vlan, u8 *qos)
+{
+ struct mlx4_vport_oper_state *vp_oper;
+ struct mlx4_priv *priv;
+
+ priv = mlx4_priv(dev);
+ port = mlx4_slaves_closest_port(dev, slave, port);
+ vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+
+ if (MLX4_VGT != vp_oper->state.default_vlan) {
+ if (vlan)
+ *vlan = vp_oper->state.default_vlan;
+ if (qos)
+ *qos = vp_oper->state.default_qos;
+ return true;
+ }
+ return false;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_slave_default_vlan);
+
+int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_vport_state *s_info;
+ int slave;
+ u8 mac[ETH_ALEN];
+
+ if ((!mlx4_is_master(dev)) ||
+ !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FSM))
+ return -EPROTONOSUPPORT;
+
+ slave = mlx4_get_slave_indx(dev, vf);
+ if (slave < 0)
+ return -EINVAL;
+
+ port = mlx4_slaves_closest_port(dev, slave, port);
+ s_info = &priv->mfunc.master.vf_admin[slave].vport[port];
+
+ mlx4_u64_to_mac(mac, s_info->mac);
+ if (setting && !is_valid_ether_addr(mac)) {
+ mlx4_info(dev, "Illegal MAC with spoofchk\n");
+ return -EPERM;
+ }
+
+ s_info->spoofchk = setting;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_spoofchk);
+
+int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_info *ivf)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_vport_state *s_info;
+ int slave;
+
+ if (!mlx4_is_master(dev))
+ return -EPROTONOSUPPORT;
+
+ slave = mlx4_get_slave_indx(dev, vf);
+ if (slave < 0)
+ return -EINVAL;
+
+ s_info = &priv->mfunc.master.vf_admin[slave].vport[port];
+ ivf->vf = vf;
+
+ /* need to convert it to a func */
+ ivf->mac[0] = ((s_info->mac >> (5*8)) & 0xff);
+ ivf->mac[1] = ((s_info->mac >> (4*8)) & 0xff);
+ ivf->mac[2] = ((s_info->mac >> (3*8)) & 0xff);
+ ivf->mac[3] = ((s_info->mac >> (2*8)) & 0xff);
+ ivf->mac[4] = ((s_info->mac >> (1*8)) & 0xff);
+ ivf->mac[5] = ((s_info->mac) & 0xff);
+
+ ivf->vlan = s_info->default_vlan;
+ ivf->qos = s_info->default_qos;
+ ivf->vlan_proto = s_info->vlan_proto;
+
+ if (mlx4_is_vf_vst_and_prio_qos(dev, port, s_info))
+ ivf->max_tx_rate = s_info->tx_rate;
+ else
+ ivf->max_tx_rate = 0;
+
+ ivf->min_tx_rate = 0;
+ ivf->spoofchk = s_info->spoofchk;
+ ivf->linkstate = s_info->link_state;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_vf_config);
+
+int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_vport_state *s_info;
+ int slave;
+ u8 link_stat_event;
+
+ slave = mlx4_get_slave_indx(dev, vf);
+ if (slave < 0)
+ return -EINVAL;
+
+ port = mlx4_slaves_closest_port(dev, slave, port);
+ switch (link_state) {
+ case IFLA_VF_LINK_STATE_AUTO:
+ /* get current link state */
+ if (!priv->sense.do_sense_port[port])
+ link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_ACTIVE;
+ else
+ link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_DOWN;
+ break;
+
+ case IFLA_VF_LINK_STATE_ENABLE:
+ link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_ACTIVE;
+ break;
+
+ case IFLA_VF_LINK_STATE_DISABLE:
+ link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_DOWN;
+ break;
+
+ default:
+ mlx4_warn(dev, "unknown value for link_state %02x on slave %d port %d\n",
+ link_state, slave, port);
+ return -EINVAL;
+ };
+ s_info = &priv->mfunc.master.vf_admin[slave].vport[port];
+ s_info->link_state = link_state;
+
+ /* send event */
+ mlx4_gen_port_state_change_eqe(dev, slave, port, link_stat_event);
+
+ if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port))
+ mlx4_dbg(dev,
+ "updating vf %d port %d no link state HW enforcement\n",
+ vf, port);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state);
+
+int mlx4_get_counter_stats(struct mlx4_dev *dev, int counter_index,
+ struct mlx4_counter *counter_stats, int reset)
+{
+ struct mlx4_cmd_mailbox *mailbox = NULL;
+ struct mlx4_counter *tmp_counter;
+ int err;
+ u32 if_stat_in_mod;
+
+ if (!counter_stats)
+ return -EINVAL;
+
+ if (counter_index == MLX4_SINK_COUNTER_INDEX(dev))
+ return 0;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ memset(mailbox->buf, 0, sizeof(struct mlx4_counter));
+ if_stat_in_mod = counter_index;
+ if (reset)
+ if_stat_in_mod |= MLX4_QUERY_IF_STAT_RESET;
+ err = mlx4_cmd_box(dev, 0, mailbox->dma,
+ if_stat_in_mod, 0,
+ MLX4_CMD_QUERY_IF_STAT,
+ MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_dbg(dev, "%s: failed to read statistics for counter index %d\n",
+ __func__, counter_index);
+ goto if_stat_out;
+ }
+ tmp_counter = (struct mlx4_counter *)mailbox->buf;
+ counter_stats->counter_mode = tmp_counter->counter_mode;
+ if (counter_stats->counter_mode == 0) {
+ counter_stats->rx_frames =
+ cpu_to_be64(be64_to_cpu(counter_stats->rx_frames) +
+ be64_to_cpu(tmp_counter->rx_frames));
+ counter_stats->tx_frames =
+ cpu_to_be64(be64_to_cpu(counter_stats->tx_frames) +
+ be64_to_cpu(tmp_counter->tx_frames));
+ counter_stats->rx_bytes =
+ cpu_to_be64(be64_to_cpu(counter_stats->rx_bytes) +
+ be64_to_cpu(tmp_counter->rx_bytes));
+ counter_stats->tx_bytes =
+ cpu_to_be64(be64_to_cpu(counter_stats->tx_bytes) +
+ be64_to_cpu(tmp_counter->tx_bytes));
+ }
+
+if_stat_out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_counter_stats);
+
+int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx,
+ struct ifla_vf_stats *vf_stats)
+{
+ struct mlx4_counter tmp_vf_stats;
+ int slave;
+ int err = 0;
+
+ if (!vf_stats)
+ return -EINVAL;
+
+ if (!mlx4_is_master(dev))
+ return -EPROTONOSUPPORT;
+
+ slave = mlx4_get_slave_indx(dev, vf_idx);
+ if (slave < 0)
+ return -EINVAL;
+
+ port = mlx4_slaves_closest_port(dev, slave, port);
+ err = mlx4_calc_vf_counters(dev, slave, port, &tmp_vf_stats);
+ if (!err && tmp_vf_stats.counter_mode == 0) {
+ vf_stats->rx_packets = be64_to_cpu(tmp_vf_stats.rx_frames);
+ vf_stats->tx_packets = be64_to_cpu(tmp_vf_stats.tx_frames);
+ vf_stats->rx_bytes = be64_to_cpu(tmp_vf_stats.rx_bytes);
+ vf_stats->tx_bytes = be64_to_cpu(tmp_vf_stats.tx_bytes);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_vf_stats);
+
+int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (slave < 1 || slave >= dev->num_slaves ||
+ port < 1 || port > MLX4_MAX_PORTS)
+ return 0;
+
+ return priv->mfunc.master.vf_oper[slave].smi_enabled[port] ==
+ MLX4_VF_SMI_ENABLED;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled);
+
+int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (slave == mlx4_master_func_num(dev))
+ return 1;
+
+ if (slave < 1 || slave >= dev->num_slaves ||
+ port < 1 || port > MLX4_MAX_PORTS)
+ return 0;
+
+ return priv->mfunc.master.vf_admin[slave].enable_smi[port] ==
+ MLX4_VF_SMI_ENABLED;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_get_enable_smi_admin);
+
+int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
+ int enabled)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_active_ports actv_ports = mlx4_get_active_ports(
+ &priv->dev, slave);
+ int min_port = find_first_bit(actv_ports.ports,
+ priv->dev.caps.num_ports) + 1;
+ int max_port = min_port - 1 +
+ bitmap_weight(actv_ports.ports, priv->dev.caps.num_ports);
+
+ if (slave == mlx4_master_func_num(dev))
+ return 0;
+
+ if (slave < 1 || slave >= dev->num_slaves ||
+ port < 1 || port > MLX4_MAX_PORTS ||
+ enabled < 0 || enabled > 1)
+ return -EINVAL;
+
+ if (min_port == max_port && dev->caps.num_ports > 1) {
+ mlx4_info(dev, "SMI access disallowed for single ported VFs\n");
+ return -EPROTONOSUPPORT;
+ }
+
+ priv->mfunc.master.vf_admin[slave].enable_smi[port] = enabled;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_set_enable_smi_admin);
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
new file mode 100644
index 000000000..d8e9a3231
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/hardirq.h>
+#include <linux/export.h>
+
+#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/cq.h>
+
+#include "mlx4.h"
+#include "icm.h"
+
+#define MLX4_CQ_STATUS_OK ( 0 << 28)
+#define MLX4_CQ_STATUS_OVERFLOW ( 9 << 28)
+#define MLX4_CQ_STATUS_WRITE_FAIL (10 << 28)
+#define MLX4_CQ_FLAG_CC ( 1 << 18)
+#define MLX4_CQ_FLAG_OI ( 1 << 17)
+#define MLX4_CQ_STATE_ARMED ( 9 << 8)
+#define MLX4_CQ_STATE_ARMED_SOL ( 6 << 8)
+#define MLX4_EQ_STATE_FIRED (10 << 8)
+
+#define TASKLET_MAX_TIME 2
+#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
+
+void mlx4_cq_tasklet_cb(unsigned long data)
+{
+ unsigned long flags;
+ unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
+ struct mlx4_eq_tasklet *ctx = (struct mlx4_eq_tasklet *)data;
+ struct mlx4_cq *mcq, *temp;
+
+ spin_lock_irqsave(&ctx->lock, flags);
+ list_splice_tail_init(&ctx->list, &ctx->process_list);
+ spin_unlock_irqrestore(&ctx->lock, flags);
+
+ list_for_each_entry_safe(mcq, temp, &ctx->process_list, tasklet_ctx.list) {
+ list_del_init(&mcq->tasklet_ctx.list);
+ mcq->tasklet_ctx.comp(mcq);
+ if (refcount_dec_and_test(&mcq->refcount))
+ complete(&mcq->free);
+ if (time_after(jiffies, end))
+ break;
+ }
+
+ if (!list_empty(&ctx->process_list))
+ tasklet_schedule(&ctx->task);
+}
+
+static void mlx4_add_cq_to_tasklet(struct mlx4_cq *cq)
+{
+ struct mlx4_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;
+ unsigned long flags;
+ bool kick;
+
+ spin_lock_irqsave(&tasklet_ctx->lock, flags);
+ /* When migrating CQs between EQs will be implemented, please note
+ * that you need to sync this point. It is possible that
+ * while migrating a CQ, completions on the old EQs could
+ * still arrive.
+ */
+ if (list_empty_careful(&cq->tasklet_ctx.list)) {
+ refcount_inc(&cq->refcount);
+ kick = list_empty(&tasklet_ctx->list);
+ list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
+ if (kick)
+ tasklet_schedule(&tasklet_ctx->task);
+ }
+ spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
+}
+
+void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
+{
+ struct mlx4_cq *cq;
+
+ rcu_read_lock();
+ cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
+ cqn & (dev->caps.num_cqs - 1));
+ rcu_read_unlock();
+
+ if (!cq) {
+ mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
+ return;
+ }
+
+ /* Acessing the CQ outside of rcu_read_lock is safe, because
+ * the CQ is freed only after interrupt handling is completed.
+ */
+ ++cq->arm_sn;
+
+ cq->comp(cq);
+}
+
+void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
+{
+ struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
+ struct mlx4_cq *cq;
+
+ rcu_read_lock();
+ cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
+ rcu_read_unlock();
+
+ if (!cq) {
+ mlx4_dbg(dev, "Async event for bogus CQ %08x\n", cqn);
+ return;
+ }
+
+ /* Acessing the CQ outside of rcu_read_lock is safe, because
+ * the CQ is freed only after interrupt handling is completed.
+ */
+ cq->event(cq, event_type);
+}
+
+static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+ int cq_num)
+{
+ return mlx4_cmd(dev, mailbox->dma, cq_num, 0,
+ MLX4_CMD_SW2HW_CQ, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+}
+
+static int mlx4_MODIFY_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+ int cq_num, u32 opmod)
+{
+ return mlx4_cmd(dev, mailbox->dma, cq_num, opmod, MLX4_CMD_MODIFY_CQ,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+}
+
+static int mlx4_HW2SW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+ int cq_num)
+{
+ return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
+ cq_num, mailbox ? 0 : 1, MLX4_CMD_HW2SW_CQ,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+}
+
+int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq,
+ u16 count, u16 period)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_cq_context *cq_context;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ cq_context = mailbox->buf;
+ cq_context->cq_max_count = cpu_to_be16(count);
+ cq_context->cq_period = cpu_to_be16(period);
+
+ err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, 1);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_cq_modify);
+
+int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq,
+ int entries, struct mlx4_mtt *mtt)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_cq_context *cq_context;
+ u64 mtt_addr;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ cq_context = mailbox->buf;
+ cq_context->logsize_usrpage = cpu_to_be32(ilog2(entries) << 24);
+ cq_context->log_page_size = mtt->page_shift - 12;
+ mtt_addr = mlx4_mtt_addr(dev, mtt);
+ cq_context->mtt_base_addr_h = mtt_addr >> 32;
+ cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
+
+ err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, 0);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_cq_resize);
+
+int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cq_table *cq_table = &priv->cq_table;
+ int err;
+
+ *cqn = mlx4_bitmap_alloc(&cq_table->bitmap);
+ if (*cqn == -1)
+ return -ENOMEM;
+
+ err = mlx4_table_get(dev, &cq_table->table, *cqn);
+ if (err)
+ goto err_out;
+
+ err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn);
+ if (err)
+ goto err_put;
+ return 0;
+
+err_put:
+ mlx4_table_put(dev, &cq_table->table, *cqn);
+
+err_out:
+ mlx4_bitmap_free(&cq_table->bitmap, *cqn, MLX4_NO_RR);
+ return err;
+}
+
+static int mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn, u8 usage)
+{
+ u32 in_modifier = RES_CQ | (((u32)usage & 3) << 30);
+ u64 out_param;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier,
+ RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ return err;
+ else {
+ *cqn = get_param_l(&out_param);
+ return 0;
+ }
+ }
+ return __mlx4_cq_alloc_icm(dev, cqn);
+}
+
+void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cq_table *cq_table = &priv->cq_table;
+
+ mlx4_table_put(dev, &cq_table->cmpt_table, cqn);
+ mlx4_table_put(dev, &cq_table->table, cqn);
+ mlx4_bitmap_free(&cq_table->bitmap, cqn, MLX4_NO_RR);
+}
+
+static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn)
+{
+ u64 in_param = 0;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, cqn);
+ err = mlx4_cmd(dev, in_param, RES_CQ, RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ mlx4_warn(dev, "Failed freeing cq:%d\n", cqn);
+ } else
+ __mlx4_cq_free_icm(dev, cqn);
+}
+
+int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
+ struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec,
+ struct mlx4_cq *cq, unsigned vector, int collapsed,
+ int timestamp_en)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cq_table *cq_table = &priv->cq_table;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_cq_context *cq_context;
+ u64 mtt_addr;
+ int err;
+
+ if (vector >= dev->caps.num_comp_vectors)
+ return -EINVAL;
+
+ cq->vector = vector;
+
+ err = mlx4_cq_alloc_icm(dev, &cq->cqn, cq->usage);
+ if (err)
+ return err;
+
+ spin_lock(&cq_table->lock);
+ err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
+ spin_unlock(&cq_table->lock);
+ if (err)
+ goto err_icm;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto err_radix;
+ }
+
+ cq_context = mailbox->buf;
+ cq_context->flags = cpu_to_be32(!!collapsed << 18);
+ if (timestamp_en)
+ cq_context->flags |= cpu_to_be32(1 << 19);
+
+ cq_context->logsize_usrpage =
+ cpu_to_be32((ilog2(nent) << 24) |
+ mlx4_to_hw_uar_index(dev, uar->index));
+ cq_context->comp_eqn = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn;
+ cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
+
+ mtt_addr = mlx4_mtt_addr(dev, mtt);
+ cq_context->mtt_base_addr_h = mtt_addr >> 32;
+ cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
+ cq_context->db_rec_addr = cpu_to_be64(db_rec);
+
+ err = mlx4_SW2HW_CQ(dev, mailbox, cq->cqn);
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ if (err)
+ goto err_radix;
+
+ cq->cons_index = 0;
+ cq->arm_sn = 1;
+ cq->uar = uar;
+ refcount_set(&cq->refcount, 1);
+ init_completion(&cq->free);
+ cq->comp = mlx4_add_cq_to_tasklet;
+ cq->tasklet_ctx.priv =
+ &priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].tasklet_ctx;
+ INIT_LIST_HEAD(&cq->tasklet_ctx.list);
+
+
+ cq->irq = priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].irq;
+ return 0;
+
+err_radix:
+ spin_lock(&cq_table->lock);
+ radix_tree_delete(&cq_table->tree, cq->cqn);
+ spin_unlock(&cq_table->lock);
+
+err_icm:
+ mlx4_cq_free_icm(dev, cq->cqn);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_cq_alloc);
+
+void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cq_table *cq_table = &priv->cq_table;
+ int err;
+
+ err = mlx4_HW2SW_CQ(dev, NULL, cq->cqn);
+ if (err)
+ mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
+
+ spin_lock(&cq_table->lock);
+ radix_tree_delete(&cq_table->tree, cq->cqn);
+ spin_unlock(&cq_table->lock);
+
+ synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq);
+ if (priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq !=
+ priv->eq_table.eq[MLX4_EQ_ASYNC].irq)
+ synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
+
+ if (refcount_dec_and_test(&cq->refcount))
+ complete(&cq->free);
+ wait_for_completion(&cq->free);
+
+ mlx4_cq_free_icm(dev, cq->cqn);
+}
+EXPORT_SYMBOL_GPL(mlx4_cq_free);
+
+int mlx4_init_cq_table(struct mlx4_dev *dev)
+{
+ struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
+ int err;
+
+ spin_lock_init(&cq_table->lock);
+ INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
+ if (mlx4_is_slave(dev))
+ return 0;
+
+ err = mlx4_bitmap_init(&cq_table->bitmap, dev->caps.num_cqs,
+ dev->caps.num_cqs - 1, dev->caps.reserved_cqs, 0);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+void mlx4_cleanup_cq_table(struct mlx4_dev *dev)
+{
+ if (mlx4_is_slave(dev))
+ return;
+ /* Nothing to do to clean up radix_tree */
+ mlx4_bitmap_cleanup(&mlx4_priv(dev)->cq_table.bitmap);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c
new file mode 100644
index 000000000..88316c743
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/crdump.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "mlx4.h"
+
+#define BAD_ACCESS 0xBADACCE5
+#define HEALTH_BUFFER_SIZE 0x40
+#define CR_ENABLE_BIT swab32(BIT(6))
+#define CR_ENABLE_BIT_OFFSET 0xF3F04
+#define MAX_NUM_OF_DUMPS_TO_STORE (8)
+
+static const char *region_cr_space_str = "cr-space";
+static const char *region_fw_health_str = "fw-health";
+
+/* Set to true in case cr enable bit was set to true before crdump */
+static bool crdump_enbale_bit_set;
+
+static void crdump_enable_crspace_access(struct mlx4_dev *dev,
+ u8 __iomem *cr_space)
+{
+ /* Get current enable bit value */
+ crdump_enbale_bit_set =
+ readl(cr_space + CR_ENABLE_BIT_OFFSET) & CR_ENABLE_BIT;
+
+ /* Enable FW CR filter (set bit6 to 0) */
+ if (crdump_enbale_bit_set)
+ writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) & ~CR_ENABLE_BIT,
+ cr_space + CR_ENABLE_BIT_OFFSET);
+
+ /* Enable block volatile crspace accesses */
+ writel(swab32(1), cr_space + dev->caps.health_buffer_addrs +
+ HEALTH_BUFFER_SIZE);
+}
+
+static void crdump_disable_crspace_access(struct mlx4_dev *dev,
+ u8 __iomem *cr_space)
+{
+ /* Disable block volatile crspace accesses */
+ writel(0, cr_space + dev->caps.health_buffer_addrs +
+ HEALTH_BUFFER_SIZE);
+
+ /* Restore FW CR filter value (set bit6 to original value) */
+ if (crdump_enbale_bit_set)
+ writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) | CR_ENABLE_BIT,
+ cr_space + CR_ENABLE_BIT_OFFSET);
+}
+
+static void mlx4_crdump_collect_crspace(struct mlx4_dev *dev,
+ u8 __iomem *cr_space,
+ u32 id)
+{
+ struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+ struct pci_dev *pdev = dev->persist->pdev;
+ unsigned long cr_res_size;
+ u8 *crspace_data;
+ int offset;
+ int err;
+
+ if (!crdump->region_crspace) {
+ mlx4_err(dev, "crdump: cr-space region is NULL\n");
+ return;
+ }
+
+ /* Try to collect CR space */
+ cr_res_size = pci_resource_len(pdev, 0);
+ crspace_data = kvmalloc(cr_res_size, GFP_KERNEL);
+ if (crspace_data) {
+ for (offset = 0; offset < cr_res_size; offset += 4)
+ *(u32 *)(crspace_data + offset) =
+ readl(cr_space + offset);
+
+ err = devlink_region_snapshot_create(crdump->region_crspace,
+ cr_res_size, crspace_data,
+ id, &kvfree);
+ if (err) {
+ kvfree(crspace_data);
+ mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
+ region_cr_space_str, id, err);
+ } else {
+ mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n",
+ id, region_cr_space_str);
+ }
+ } else {
+ mlx4_err(dev, "crdump: Failed to allocate crspace buffer\n");
+ }
+}
+
+static void mlx4_crdump_collect_fw_health(struct mlx4_dev *dev,
+ u8 __iomem *cr_space,
+ u32 id)
+{
+ struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+ u8 *health_data;
+ int offset;
+ int err;
+
+ if (!crdump->region_fw_health) {
+ mlx4_err(dev, "crdump: fw-health region is NULL\n");
+ return;
+ }
+
+ /* Try to collect health buffer */
+ health_data = kvmalloc(HEALTH_BUFFER_SIZE, GFP_KERNEL);
+ if (health_data) {
+ u8 __iomem *health_buf_start =
+ cr_space + dev->caps.health_buffer_addrs;
+
+ for (offset = 0; offset < HEALTH_BUFFER_SIZE; offset += 4)
+ *(u32 *)(health_data + offset) =
+ readl(health_buf_start + offset);
+
+ err = devlink_region_snapshot_create(crdump->region_fw_health,
+ HEALTH_BUFFER_SIZE,
+ health_data,
+ id, &kvfree);
+ if (err) {
+ kvfree(health_data);
+ mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
+ region_fw_health_str, id, err);
+ } else {
+ mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n",
+ id, region_fw_health_str);
+ }
+ } else {
+ mlx4_err(dev, "crdump: Failed to allocate health buffer\n");
+ }
+}
+
+int mlx4_crdump_collect(struct mlx4_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
+ struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+ struct pci_dev *pdev = dev->persist->pdev;
+ unsigned long cr_res_size;
+ u8 __iomem *cr_space;
+ u32 id;
+
+ if (!dev->caps.health_buffer_addrs) {
+ mlx4_info(dev, "crdump: FW doesn't support health buffer access, skipping\n");
+ return 0;
+ }
+
+ if (!crdump->snapshot_enable) {
+ mlx4_info(dev, "crdump: devlink snapshot disabled, skipping\n");
+ return 0;
+ }
+
+ cr_res_size = pci_resource_len(pdev, 0);
+
+ cr_space = ioremap(pci_resource_start(pdev, 0), cr_res_size);
+ if (!cr_space) {
+ mlx4_err(dev, "crdump: Failed to map pci cr region\n");
+ return -ENODEV;
+ }
+
+ crdump_enable_crspace_access(dev, cr_space);
+
+ /* Get the available snapshot ID for the dumps */
+ id = devlink_region_shapshot_id_get(devlink);
+
+ /* Try to capture dumps */
+ mlx4_crdump_collect_crspace(dev, cr_space, id);
+ mlx4_crdump_collect_fw_health(dev, cr_space, id);
+
+ crdump_disable_crspace_access(dev, cr_space);
+
+ iounmap(cr_space);
+ return 0;
+}
+
+int mlx4_crdump_init(struct mlx4_dev *dev)
+{
+ struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
+ struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+ struct pci_dev *pdev = dev->persist->pdev;
+
+ crdump->snapshot_enable = false;
+
+ /* Create cr-space region */
+ crdump->region_crspace =
+ devlink_region_create(devlink,
+ region_cr_space_str,
+ MAX_NUM_OF_DUMPS_TO_STORE,
+ pci_resource_len(pdev, 0));
+ if (IS_ERR(crdump->region_crspace))
+ mlx4_warn(dev, "crdump: create devlink region %s err %ld\n",
+ region_cr_space_str,
+ PTR_ERR(crdump->region_crspace));
+
+ /* Create fw-health region */
+ crdump->region_fw_health =
+ devlink_region_create(devlink,
+ region_fw_health_str,
+ MAX_NUM_OF_DUMPS_TO_STORE,
+ HEALTH_BUFFER_SIZE);
+ if (IS_ERR(crdump->region_fw_health))
+ mlx4_warn(dev, "crdump: create devlink region %s err %ld\n",
+ region_fw_health_str,
+ PTR_ERR(crdump->region_fw_health));
+
+ return 0;
+}
+
+void mlx4_crdump_end(struct mlx4_dev *dev)
+{
+ struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+
+ devlink_region_destroy(crdump->region_fw_health);
+ devlink_region_destroy(crdump->region_crspace);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
new file mode 100644
index 000000000..024788549
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -0,0 +1,303 @@
+/*
+ * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx4/device.h>
+#include <linux/clocksource.h>
+
+#include "mlx4_en.h"
+
+/* mlx4_en_read_clock - read raw cycle counter (to be used by time counter)
+ */
+static u64 mlx4_en_read_clock(const struct cyclecounter *tc)
+{
+ struct mlx4_en_dev *mdev =
+ container_of(tc, struct mlx4_en_dev, cycles);
+ struct mlx4_dev *dev = mdev->dev;
+
+ return mlx4_read_clock(dev) & tc->mask;
+}
+
+u64 mlx4_en_get_cqe_ts(struct mlx4_cqe *cqe)
+{
+ u64 hi, lo;
+ struct mlx4_ts_cqe *ts_cqe = (struct mlx4_ts_cqe *)cqe;
+
+ lo = (u64)be16_to_cpu(ts_cqe->timestamp_lo);
+ hi = ((u64)be32_to_cpu(ts_cqe->timestamp_hi) + !lo) << 16;
+
+ return hi | lo;
+}
+
+void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev,
+ struct skb_shared_hwtstamps *hwts,
+ u64 timestamp)
+{
+ unsigned int seq;
+ u64 nsec;
+
+ do {
+ seq = read_seqbegin(&mdev->clock_lock);
+ nsec = timecounter_cyc2time(&mdev->clock, timestamp);
+ } while (read_seqretry(&mdev->clock_lock, seq));
+
+ memset(hwts, 0, sizeof(struct skb_shared_hwtstamps));
+ hwts->hwtstamp = ns_to_ktime(nsec);
+}
+
+/**
+ * mlx4_en_remove_timestamp - disable PTP device
+ * @mdev: board private structure
+ *
+ * Stop the PTP support.
+ **/
+void mlx4_en_remove_timestamp(struct mlx4_en_dev *mdev)
+{
+ if (mdev->ptp_clock) {
+ ptp_clock_unregister(mdev->ptp_clock);
+ mdev->ptp_clock = NULL;
+ mlx4_info(mdev, "removed PHC\n");
+ }
+}
+
+#define MLX4_EN_WRAP_AROUND_SEC 10UL
+/* By scheduling the overflow check every 5 seconds, we have a reasonably
+ * good chance we wont miss a wrap around.
+ * TOTO: Use a timer instead of a work queue to increase the guarantee.
+ */
+#define MLX4_EN_OVERFLOW_PERIOD (MLX4_EN_WRAP_AROUND_SEC * HZ / 2)
+
+void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev)
+{
+ bool timeout = time_is_before_jiffies(mdev->last_overflow_check +
+ MLX4_EN_OVERFLOW_PERIOD);
+ unsigned long flags;
+
+ if (timeout) {
+ write_seqlock_irqsave(&mdev->clock_lock, flags);
+ timecounter_read(&mdev->clock);
+ write_sequnlock_irqrestore(&mdev->clock_lock, flags);
+ mdev->last_overflow_check = jiffies;
+ }
+}
+
+/**
+ * mlx4_en_phc_adjfreq - adjust the frequency of the hardware clock
+ * @ptp: ptp clock structure
+ * @delta: Desired frequency change in parts per billion
+ *
+ * Adjust the frequency of the PHC cycle counter by the indicated delta from
+ * the base frequency.
+ **/
+static int mlx4_en_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta)
+{
+ u64 adj;
+ u32 diff, mult;
+ int neg_adj = 0;
+ unsigned long flags;
+ struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev,
+ ptp_clock_info);
+
+ if (delta < 0) {
+ neg_adj = 1;
+ delta = -delta;
+ }
+ mult = mdev->nominal_c_mult;
+ adj = mult;
+ adj *= delta;
+ diff = div_u64(adj, 1000000000ULL);
+
+ write_seqlock_irqsave(&mdev->clock_lock, flags);
+ timecounter_read(&mdev->clock);
+ mdev->cycles.mult = neg_adj ? mult - diff : mult + diff;
+ write_sequnlock_irqrestore(&mdev->clock_lock, flags);
+
+ return 0;
+}
+
+/**
+ * mlx4_en_phc_adjtime - Shift the time of the hardware clock
+ * @ptp: ptp clock structure
+ * @delta: Desired change in nanoseconds
+ *
+ * Adjust the timer by resetting the timecounter structure.
+ **/
+static int mlx4_en_phc_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+ struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev,
+ ptp_clock_info);
+ unsigned long flags;
+
+ write_seqlock_irqsave(&mdev->clock_lock, flags);
+ timecounter_adjtime(&mdev->clock, delta);
+ write_sequnlock_irqrestore(&mdev->clock_lock, flags);
+
+ return 0;
+}
+
+/**
+ * mlx4_en_phc_gettime - Reads the current time from the hardware clock
+ * @ptp: ptp clock structure
+ * @ts: timespec structure to hold the current time value
+ *
+ * Read the timecounter and return the correct value in ns after converting
+ * it into a struct timespec.
+ **/
+static int mlx4_en_phc_gettime(struct ptp_clock_info *ptp,
+ struct timespec64 *ts)
+{
+ struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev,
+ ptp_clock_info);
+ unsigned long flags;
+ u64 ns;
+
+ write_seqlock_irqsave(&mdev->clock_lock, flags);
+ ns = timecounter_read(&mdev->clock);
+ write_sequnlock_irqrestore(&mdev->clock_lock, flags);
+
+ *ts = ns_to_timespec64(ns);
+
+ return 0;
+}
+
+/**
+ * mlx4_en_phc_settime - Set the current time on the hardware clock
+ * @ptp: ptp clock structure
+ * @ts: timespec containing the new time for the cycle counter
+ *
+ * Reset the timecounter to use a new base value instead of the kernel
+ * wall timer value.
+ **/
+static int mlx4_en_phc_settime(struct ptp_clock_info *ptp,
+ const struct timespec64 *ts)
+{
+ struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev,
+ ptp_clock_info);
+ u64 ns = timespec64_to_ns(ts);
+ unsigned long flags;
+
+ /* reset the timecounter */
+ write_seqlock_irqsave(&mdev->clock_lock, flags);
+ timecounter_init(&mdev->clock, &mdev->cycles, ns);
+ write_sequnlock_irqrestore(&mdev->clock_lock, flags);
+
+ return 0;
+}
+
+/**
+ * mlx4_en_phc_enable - enable or disable an ancillary feature
+ * @ptp: ptp clock structure
+ * @request: Desired resource to enable or disable
+ * @on: Caller passes one to enable or zero to disable
+ *
+ * Enable (or disable) ancillary features of the PHC subsystem.
+ * Currently, no ancillary features are supported.
+ **/
+static int mlx4_en_phc_enable(struct ptp_clock_info __always_unused *ptp,
+ struct ptp_clock_request __always_unused *request,
+ int __always_unused on)
+{
+ return -EOPNOTSUPP;
+}
+
+static const struct ptp_clock_info mlx4_en_ptp_clock_info = {
+ .owner = THIS_MODULE,
+ .max_adj = 100000000,
+ .n_alarm = 0,
+ .n_ext_ts = 0,
+ .n_per_out = 0,
+ .n_pins = 0,
+ .pps = 0,
+ .adjfreq = mlx4_en_phc_adjfreq,
+ .adjtime = mlx4_en_phc_adjtime,
+ .gettime64 = mlx4_en_phc_gettime,
+ .settime64 = mlx4_en_phc_settime,
+ .enable = mlx4_en_phc_enable,
+};
+
+
+/* This function calculates the max shift that enables the user range
+ * of MLX4_EN_WRAP_AROUND_SEC values in the cycles register.
+ */
+static u32 freq_to_shift(u16 freq)
+{
+ u32 freq_khz = freq * 1000;
+ u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC;
+ u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1);
+ /* calculate max possible multiplier in order to fit in 64bit */
+ u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded);
+
+ /* This comes from the reverse of clocksource_khz2mult */
+ return ilog2(div_u64(max_mul * freq_khz, 1000000));
+}
+
+void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev)
+{
+ struct mlx4_dev *dev = mdev->dev;
+ unsigned long flags;
+
+ /* mlx4_en_init_timestamp is called for each netdev.
+ * mdev->ptp_clock is common for all ports, skip initialization if
+ * was done for other port.
+ */
+ if (mdev->ptp_clock)
+ return;
+
+ seqlock_init(&mdev->clock_lock);
+
+ memset(&mdev->cycles, 0, sizeof(mdev->cycles));
+ mdev->cycles.read = mlx4_en_read_clock;
+ mdev->cycles.mask = CLOCKSOURCE_MASK(48);
+ mdev->cycles.shift = freq_to_shift(dev->caps.hca_core_clock);
+ mdev->cycles.mult =
+ clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift);
+ mdev->nominal_c_mult = mdev->cycles.mult;
+
+ write_seqlock_irqsave(&mdev->clock_lock, flags);
+ timecounter_init(&mdev->clock, &mdev->cycles,
+ ktime_to_ns(ktime_get_real()));
+ write_sequnlock_irqrestore(&mdev->clock_lock, flags);
+
+ /* Configure the PHC */
+ mdev->ptp_clock_info = mlx4_en_ptp_clock_info;
+ snprintf(mdev->ptp_clock_info.name, 16, "mlx4 ptp");
+
+ mdev->ptp_clock = ptp_clock_register(&mdev->ptp_clock_info,
+ &mdev->pdev->dev);
+ if (IS_ERR(mdev->ptp_clock)) {
+ mdev->ptp_clock = NULL;
+ mlx4_err(mdev, "ptp_clock_register failed\n");
+ } else if (mdev->ptp_clock) {
+ mlx4_info(mdev, "registered PHC clock\n");
+ }
+
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
new file mode 100644
index 000000000..1e487acb4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx4/cq.h>
+#include <linux/mlx4/qp.h>
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4_en.h"
+
+static void mlx4_en_cq_event(struct mlx4_cq *cq, enum mlx4_event event)
+{
+ return;
+}
+
+
+int mlx4_en_create_cq(struct mlx4_en_priv *priv,
+ struct mlx4_en_cq **pcq,
+ int entries, int ring, enum cq_type mode,
+ int node)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_cq *cq;
+ int err;
+
+ cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, node);
+ if (!cq) {
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq) {
+ en_err(priv, "Failed to allocate CQ structure\n");
+ return -ENOMEM;
+ }
+ }
+
+ cq->size = entries;
+ cq->buf_size = cq->size * mdev->dev->caps.cqe_size;
+
+ cq->ring = ring;
+ cq->type = mode;
+ cq->vector = mdev->dev->caps.num_comp_vectors;
+
+ /* Allocate HW buffers on provided NUMA node.
+ * dev->numa_node is used in mtt range allocation flow.
+ */
+ set_dev_node(&mdev->dev->persist->pdev->dev, node);
+ err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres,
+ cq->buf_size);
+ set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
+ if (err)
+ goto err_cq;
+
+ cq->buf = (struct mlx4_cqe *)cq->wqres.buf.direct.buf;
+ *pcq = cq;
+
+ return 0;
+
+err_cq:
+ kfree(cq);
+ *pcq = NULL;
+ return err;
+}
+
+int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
+ int cq_idx)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int err = 0;
+ int timestamp_en = 0;
+ bool assigned_eq = false;
+
+ cq->dev = mdev->pndev[priv->port];
+ cq->mcq.set_ci_db = cq->wqres.db.db;
+ cq->mcq.arm_db = cq->wqres.db.db + 1;
+ *cq->mcq.set_ci_db = 0;
+ *cq->mcq.arm_db = 0;
+ memset(cq->buf, 0, cq->buf_size);
+
+ if (cq->type == RX) {
+ if (!mlx4_is_eq_vector_valid(mdev->dev, priv->port,
+ cq->vector)) {
+ cq->vector = cpumask_first(priv->rx_ring[cq->ring]->affinity_mask);
+
+ err = mlx4_assign_eq(mdev->dev, priv->port,
+ &cq->vector);
+ if (err) {
+ mlx4_err(mdev, "Failed assigning an EQ to CQ vector %d\n",
+ cq->vector);
+ goto free_eq;
+ }
+
+ assigned_eq = true;
+ }
+
+ cq->irq_desc =
+ irq_to_desc(mlx4_eq_get_irq(mdev->dev,
+ cq->vector));
+ } else {
+ /* For TX we use the same irq per
+ ring we assigned for the RX */
+ struct mlx4_en_cq *rx_cq;
+
+ cq_idx = cq_idx % priv->rx_ring_num;
+ rx_cq = priv->rx_cq[cq_idx];
+ cq->vector = rx_cq->vector;
+ }
+
+ if (cq->type == RX)
+ cq->size = priv->rx_ring[cq->ring]->actual_size;
+
+ if ((cq->type != RX && priv->hwtstamp_config.tx_type) ||
+ (cq->type == RX && priv->hwtstamp_config.rx_filter))
+ timestamp_en = 1;
+
+ cq->mcq.usage = MLX4_RES_USAGE_DRIVER;
+ err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt,
+ &mdev->priv_uar, cq->wqres.db.dma, &cq->mcq,
+ cq->vector, 0, timestamp_en);
+ if (err)
+ goto free_eq;
+
+ cq->mcq.event = mlx4_en_cq_event;
+
+ switch (cq->type) {
+ case TX:
+ cq->mcq.comp = mlx4_en_tx_irq;
+ netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq,
+ NAPI_POLL_WEIGHT);
+ napi_enable(&cq->napi);
+ break;
+ case RX:
+ cq->mcq.comp = mlx4_en_rx_irq;
+ netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64);
+ napi_enable(&cq->napi);
+ break;
+ case TX_XDP:
+ /* nothing regarding napi, it's shared with rx ring */
+ cq->xdp_busy = false;
+ break;
+ }
+
+ return 0;
+
+free_eq:
+ if (assigned_eq)
+ mlx4_release_eq(mdev->dev, cq->vector);
+ cq->vector = mdev->dev->caps.num_comp_vectors;
+ return err;
+}
+
+void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_cq *cq = *pcq;
+
+ mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
+ if (mlx4_is_eq_vector_valid(mdev->dev, priv->port, cq->vector) &&
+ cq->type == RX)
+ mlx4_release_eq(priv->mdev->dev, cq->vector);
+ cq->vector = 0;
+ cq->buf_size = 0;
+ cq->buf = NULL;
+ kfree(cq);
+ *pcq = NULL;
+}
+
+void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
+{
+ if (cq->type != TX_XDP) {
+ napi_disable(&cq->napi);
+ netif_napi_del(&cq->napi);
+ }
+
+ mlx4_cq_free(priv->mdev->dev, &cq->mcq);
+}
+
+/* Set rx cq moderation parameters */
+int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
+{
+ return mlx4_cq_modify(priv->mdev->dev, &cq->mcq,
+ cq->moder_cnt, cq->moder_time);
+}
+
+void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
+{
+ mlx4_cq_arm(&cq->mcq, MLX4_CQ_DB_REQ_NOT, priv->mdev->uar_map,
+ &priv->mdev->uar_lock);
+}
+
+
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
new file mode 100644
index 000000000..752a72499
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
@@ -0,0 +1,753 @@
+/*
+ * Copyright (c) 2011 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/dcbnl.h>
+#include <linux/math64.h>
+
+#include "mlx4_en.h"
+#include "fw_qos.h"
+
+enum {
+ MLX4_CEE_STATE_DOWN = 0,
+ MLX4_CEE_STATE_UP = 1,
+};
+
+/* Definitions for QCN
+ */
+
+struct mlx4_congestion_control_mb_prio_802_1_qau_params {
+ __be32 modify_enable_high;
+ __be32 modify_enable_low;
+ __be32 reserved1;
+ __be32 extended_enable;
+ __be32 rppp_max_rps;
+ __be32 rpg_time_reset;
+ __be32 rpg_byte_reset;
+ __be32 rpg_threshold;
+ __be32 rpg_max_rate;
+ __be32 rpg_ai_rate;
+ __be32 rpg_hai_rate;
+ __be32 rpg_gd;
+ __be32 rpg_min_dec_fac;
+ __be32 rpg_min_rate;
+ __be32 max_time_rise;
+ __be32 max_byte_rise;
+ __be32 max_qdelta;
+ __be32 min_qoffset;
+ __be32 gd_coefficient;
+ __be32 reserved2[5];
+ __be32 cp_sample_base;
+ __be32 reserved3[39];
+};
+
+struct mlx4_congestion_control_mb_prio_802_1_qau_statistics {
+ __be64 rppp_rp_centiseconds;
+ __be32 reserved1;
+ __be32 ignored_cnm;
+ __be32 rppp_created_rps;
+ __be32 estimated_total_rate;
+ __be32 max_active_rate_limiter_index;
+ __be32 dropped_cnms_busy_fw;
+ __be32 reserved2;
+ __be32 cnms_handled_successfully;
+ __be32 min_total_limiters_rate;
+ __be32 max_total_limiters_rate;
+ __be32 reserved3[4];
+};
+
+static u8 mlx4_en_dcbnl_getcap(struct net_device *dev, int capid, u8 *cap)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ switch (capid) {
+ case DCB_CAP_ATTR_PFC:
+ *cap = true;
+ break;
+ case DCB_CAP_ATTR_DCBX:
+ *cap = priv->dcbx_cap;
+ break;
+ case DCB_CAP_ATTR_PFC_TCS:
+ *cap = 1 << mlx4_max_tc(priv->mdev->dev);
+ break;
+ default:
+ *cap = false;
+ break;
+ }
+
+ return 0;
+}
+
+static u8 mlx4_en_dcbnl_getpfcstate(struct net_device *netdev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+
+ return priv->cee_config.pfc_state;
+}
+
+static void mlx4_en_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+
+ priv->cee_config.pfc_state = state;
+}
+
+static void mlx4_en_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority,
+ u8 *setting)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+
+ *setting = priv->cee_config.dcb_pfc[priority];
+}
+
+static void mlx4_en_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority,
+ u8 setting)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+
+ priv->cee_config.dcb_pfc[priority] = setting;
+ priv->cee_config.pfc_state = true;
+}
+
+static int mlx4_en_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+
+ if (!(priv->flags & MLX4_EN_FLAG_DCB_ENABLED))
+ return -EINVAL;
+
+ if (tcid == DCB_NUMTCS_ATTR_PFC)
+ *num = mlx4_max_tc(priv->mdev->dev);
+ else
+ *num = 0;
+
+ return 0;
+}
+
+static u8 mlx4_en_dcbnl_set_all(struct net_device *netdev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+ struct mlx4_en_port_profile *prof = priv->prof;
+ struct mlx4_en_dev *mdev = priv->mdev;
+ u8 tx_pause, tx_ppp, rx_pause, rx_ppp;
+
+ if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return 1;
+
+ if (priv->cee_config.pfc_state) {
+ int tc;
+ rx_ppp = prof->rx_ppp;
+ tx_ppp = prof->tx_ppp;
+
+ for (tc = 0; tc < CEE_DCBX_MAX_PRIO; tc++) {
+ u8 tc_mask = 1 << tc;
+
+ switch (priv->cee_config.dcb_pfc[tc]) {
+ case pfc_disabled:
+ tx_ppp &= ~tc_mask;
+ rx_ppp &= ~tc_mask;
+ break;
+ case pfc_enabled_full:
+ tx_ppp |= tc_mask;
+ rx_ppp |= tc_mask;
+ break;
+ case pfc_enabled_tx:
+ tx_ppp |= tc_mask;
+ rx_ppp &= ~tc_mask;
+ break;
+ case pfc_enabled_rx:
+ tx_ppp &= ~tc_mask;
+ rx_ppp |= tc_mask;
+ break;
+ default:
+ break;
+ }
+ }
+ rx_pause = !!(rx_ppp || tx_ppp) ? 0 : prof->rx_pause;
+ tx_pause = !!(rx_ppp || tx_ppp) ? 0 : prof->tx_pause;
+ } else {
+ rx_ppp = 0;
+ tx_ppp = 0;
+ rx_pause = prof->rx_pause;
+ tx_pause = prof->tx_pause;
+ }
+
+ if (mlx4_SET_PORT_general(mdev->dev, priv->port,
+ priv->rx_skb_size + ETH_FCS_LEN,
+ tx_pause, tx_ppp, rx_pause, rx_ppp)) {
+ en_err(priv, "Failed setting pause params\n");
+ return 1;
+ }
+
+ prof->tx_ppp = tx_ppp;
+ prof->rx_ppp = rx_ppp;
+ prof->tx_pause = tx_pause;
+ prof->rx_pause = rx_pause;
+
+ return 0;
+}
+
+static u8 mlx4_en_dcbnl_get_state(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ if (priv->flags & MLX4_EN_FLAG_DCB_ENABLED)
+ return MLX4_CEE_STATE_UP;
+
+ return MLX4_CEE_STATE_DOWN;
+}
+
+static u8 mlx4_en_dcbnl_set_state(struct net_device *dev, u8 state)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ int num_tcs = 0;
+
+ if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return 1;
+
+ if (!!(state) == !!(priv->flags & MLX4_EN_FLAG_DCB_ENABLED))
+ return 0;
+
+ if (state) {
+ priv->flags |= MLX4_EN_FLAG_DCB_ENABLED;
+ num_tcs = IEEE_8021QAZ_MAX_TCS;
+ } else {
+ priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED;
+ }
+
+ if (mlx4_en_alloc_tx_queue_per_tc(dev, num_tcs))
+ return 1;
+
+ return 0;
+}
+
+/* On success returns a non-zero 802.1p user priority bitmap
+ * otherwise returns 0 as the invalid user priority bitmap to
+ * indicate an error.
+ */
+static int mlx4_en_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+ struct dcb_app app = {
+ .selector = idtype,
+ .protocol = id,
+ };
+ if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return 0;
+
+ return dcb_getapp(netdev, &app);
+}
+
+static int mlx4_en_dcbnl_setapp(struct net_device *netdev, u8 idtype,
+ u16 id, u8 up)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+ struct dcb_app app;
+
+ if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+ return -EINVAL;
+
+ memset(&app, 0, sizeof(struct dcb_app));
+ app.selector = idtype;
+ app.protocol = id;
+ app.priority = up;
+
+ return dcb_setapp(netdev, &app);
+}
+
+static int mlx4_en_dcbnl_ieee_getets(struct net_device *dev,
+ struct ieee_ets *ets)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct ieee_ets *my_ets = &priv->ets;
+
+ if (!my_ets)
+ return -EINVAL;
+
+ ets->ets_cap = IEEE_8021QAZ_MAX_TCS;
+ ets->cbs = my_ets->cbs;
+ memcpy(ets->tc_tx_bw, my_ets->tc_tx_bw, sizeof(ets->tc_tx_bw));
+ memcpy(ets->tc_tsa, my_ets->tc_tsa, sizeof(ets->tc_tsa));
+ memcpy(ets->prio_tc, my_ets->prio_tc, sizeof(ets->prio_tc));
+
+ return 0;
+}
+
+static int mlx4_en_ets_validate(struct mlx4_en_priv *priv, struct ieee_ets *ets)
+{
+ int i;
+ int total_ets_bw = 0;
+ int has_ets_tc = 0;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (ets->prio_tc[i] >= MLX4_EN_NUM_UP_HIGH) {
+ en_err(priv, "Bad priority in UP <=> TC mapping. TC: %d, UP: %d\n",
+ i, ets->prio_tc[i]);
+ return -EINVAL;
+ }
+
+ switch (ets->tc_tsa[i]) {
+ case IEEE_8021QAZ_TSA_VENDOR:
+ case IEEE_8021QAZ_TSA_STRICT:
+ break;
+ case IEEE_8021QAZ_TSA_ETS:
+ has_ets_tc = 1;
+ total_ets_bw += ets->tc_tx_bw[i];
+ break;
+ default:
+ en_err(priv, "TC[%d]: Not supported TSA: %d\n",
+ i, ets->tc_tsa[i]);
+ return -EOPNOTSUPP;
+ }
+ }
+
+ if (has_ets_tc && total_ets_bw != MLX4_EN_BW_MAX) {
+ en_err(priv, "Bad ETS BW sum: %d. Should be exactly 100%%\n",
+ total_ets_bw);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx4_en_config_port_scheduler(struct mlx4_en_priv *priv,
+ struct ieee_ets *ets, u16 *ratelimit)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int num_strict = 0;
+ int i;
+ __u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS] = { 0 };
+ __u8 pg[IEEE_8021QAZ_MAX_TCS] = { 0 };
+
+ ets = ets ?: &priv->ets;
+ ratelimit = ratelimit ?: priv->maxrate;
+
+ /* higher TC means higher priority => lower pg */
+ for (i = IEEE_8021QAZ_MAX_TCS - 1; i >= 0; i--) {
+ switch (ets->tc_tsa[i]) {
+ case IEEE_8021QAZ_TSA_VENDOR:
+ pg[i] = MLX4_EN_TC_VENDOR;
+ tc_tx_bw[i] = MLX4_EN_BW_MAX;
+ break;
+ case IEEE_8021QAZ_TSA_STRICT:
+ pg[i] = num_strict++;
+ tc_tx_bw[i] = MLX4_EN_BW_MAX;
+ break;
+ case IEEE_8021QAZ_TSA_ETS:
+ pg[i] = MLX4_EN_TC_ETS;
+ tc_tx_bw[i] = ets->tc_tx_bw[i] ?: MLX4_EN_BW_MIN;
+ break;
+ }
+ }
+
+ return mlx4_SET_PORT_SCHEDULER(mdev->dev, priv->port, tc_tx_bw, pg,
+ ratelimit);
+}
+
+static int
+mlx4_en_dcbnl_ieee_setets(struct net_device *dev, struct ieee_ets *ets)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int err;
+
+ err = mlx4_en_ets_validate(priv, ets);
+ if (err)
+ return err;
+
+ err = mlx4_SET_PORT_PRIO2TC(mdev->dev, priv->port, ets->prio_tc);
+ if (err)
+ return err;
+
+ err = mlx4_en_config_port_scheduler(priv, ets, NULL);
+ if (err)
+ return err;
+
+ memcpy(&priv->ets, ets, sizeof(priv->ets));
+
+ return 0;
+}
+
+static int mlx4_en_dcbnl_ieee_getpfc(struct net_device *dev,
+ struct ieee_pfc *pfc)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS;
+ pfc->pfc_en = priv->prof->tx_ppp;
+
+ return 0;
+}
+
+static int mlx4_en_dcbnl_ieee_setpfc(struct net_device *dev,
+ struct ieee_pfc *pfc)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_port_profile *prof = priv->prof;
+ struct mlx4_en_dev *mdev = priv->mdev;
+ u32 tx_pause, tx_ppp, rx_pause, rx_ppp;
+ int err;
+
+ en_dbg(DRV, priv, "cap: 0x%x en: 0x%x mbc: 0x%x delay: %d\n",
+ pfc->pfc_cap,
+ pfc->pfc_en,
+ pfc->mbc,
+ pfc->delay);
+
+ rx_pause = prof->rx_pause && !pfc->pfc_en;
+ tx_pause = prof->tx_pause && !pfc->pfc_en;
+ rx_ppp = pfc->pfc_en;
+ tx_ppp = pfc->pfc_en;
+
+ err = mlx4_SET_PORT_general(mdev->dev, priv->port,
+ priv->rx_skb_size + ETH_FCS_LEN,
+ tx_pause, tx_ppp, rx_pause, rx_ppp);
+ if (err) {
+ en_err(priv, "Failed setting pause params\n");
+ return err;
+ }
+
+ mlx4_en_update_pfc_stats_bitmap(mdev->dev, &priv->stats_bitmap,
+ rx_ppp, rx_pause, tx_ppp, tx_pause);
+
+ prof->tx_ppp = tx_ppp;
+ prof->rx_ppp = rx_ppp;
+ prof->rx_pause = rx_pause;
+ prof->tx_pause = tx_pause;
+
+ return err;
+}
+
+static u8 mlx4_en_dcbnl_getdcbx(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ return priv->dcbx_cap;
+}
+
+static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct ieee_ets ets = {0};
+ struct ieee_pfc pfc = {0};
+
+ if (mode == priv->dcbx_cap)
+ return 0;
+
+ if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
+ ((mode & DCB_CAP_DCBX_VER_IEEE) &&
+ (mode & DCB_CAP_DCBX_VER_CEE)) ||
+ !(mode & DCB_CAP_DCBX_HOST))
+ goto err;
+
+ priv->dcbx_cap = mode;
+
+ ets.ets_cap = IEEE_8021QAZ_MAX_TCS;
+ pfc.pfc_cap = IEEE_8021QAZ_MAX_TCS;
+
+ if (mode & DCB_CAP_DCBX_VER_IEEE) {
+ if (mlx4_en_dcbnl_ieee_setets(dev, &ets))
+ goto err;
+ if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc))
+ goto err;
+ } else if (mode & DCB_CAP_DCBX_VER_CEE) {
+ if (mlx4_en_dcbnl_set_all(dev))
+ goto err;
+ } else {
+ if (mlx4_en_dcbnl_ieee_setets(dev, &ets))
+ goto err;
+ if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc))
+ goto err;
+ if (mlx4_en_alloc_tx_queue_per_tc(dev, 0))
+ goto err;
+ }
+
+ return 0;
+err:
+ return 1;
+}
+
+#define MLX4_RATELIMIT_UNITS_IN_KB 100000 /* rate-limit HW unit in Kbps */
+static int mlx4_en_dcbnl_ieee_getmaxrate(struct net_device *dev,
+ struct ieee_maxrate *maxrate)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ int i;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ maxrate->tc_maxrate[i] =
+ priv->maxrate[i] * MLX4_RATELIMIT_UNITS_IN_KB;
+
+ return 0;
+}
+
+static int mlx4_en_dcbnl_ieee_setmaxrate(struct net_device *dev,
+ struct ieee_maxrate *maxrate)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ u16 tmp[IEEE_8021QAZ_MAX_TCS];
+ int i, err;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ /* Convert from Kbps into HW units, rounding result up.
+ * Setting to 0, means unlimited BW.
+ */
+ tmp[i] = div_u64(maxrate->tc_maxrate[i] +
+ MLX4_RATELIMIT_UNITS_IN_KB - 1,
+ MLX4_RATELIMIT_UNITS_IN_KB);
+ }
+
+ err = mlx4_en_config_port_scheduler(priv, NULL, tmp);
+ if (err)
+ return err;
+
+ memcpy(priv->maxrate, tmp, sizeof(priv->maxrate));
+
+ return 0;
+}
+
+#define RPG_ENABLE_BIT 31
+#define CN_TAG_BIT 30
+
+static int mlx4_en_dcbnl_ieee_getqcn(struct net_device *dev,
+ struct ieee_qcn *qcn)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_congestion_control_mb_prio_802_1_qau_params *hw_qcn;
+ struct mlx4_cmd_mailbox *mailbox_out = NULL;
+ u64 mailbox_in_dma = 0;
+ u32 inmod = 0;
+ int i, err;
+
+ if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QCN))
+ return -EOPNOTSUPP;
+
+ mailbox_out = mlx4_alloc_cmd_mailbox(priv->mdev->dev);
+ if (IS_ERR(mailbox_out))
+ return -ENOMEM;
+ hw_qcn =
+ (struct mlx4_congestion_control_mb_prio_802_1_qau_params *)
+ mailbox_out->buf;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ inmod = priv->port | ((1 << i) << 8) |
+ (MLX4_CTRL_ALGO_802_1_QAU_REACTION_POINT << 16);
+ err = mlx4_cmd_box(priv->mdev->dev, mailbox_in_dma,
+ mailbox_out->dma,
+ inmod, MLX4_CONGESTION_CONTROL_GET_PARAMS,
+ MLX4_CMD_CONGESTION_CTRL_OPCODE,
+ MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_out);
+ return err;
+ }
+
+ qcn->rpg_enable[i] =
+ be32_to_cpu(hw_qcn->extended_enable) >> RPG_ENABLE_BIT;
+ qcn->rppp_max_rps[i] =
+ be32_to_cpu(hw_qcn->rppp_max_rps);
+ qcn->rpg_time_reset[i] =
+ be32_to_cpu(hw_qcn->rpg_time_reset);
+ qcn->rpg_byte_reset[i] =
+ be32_to_cpu(hw_qcn->rpg_byte_reset);
+ qcn->rpg_threshold[i] =
+ be32_to_cpu(hw_qcn->rpg_threshold);
+ qcn->rpg_max_rate[i] =
+ be32_to_cpu(hw_qcn->rpg_max_rate);
+ qcn->rpg_ai_rate[i] =
+ be32_to_cpu(hw_qcn->rpg_ai_rate);
+ qcn->rpg_hai_rate[i] =
+ be32_to_cpu(hw_qcn->rpg_hai_rate);
+ qcn->rpg_gd[i] =
+ be32_to_cpu(hw_qcn->rpg_gd);
+ qcn->rpg_min_dec_fac[i] =
+ be32_to_cpu(hw_qcn->rpg_min_dec_fac);
+ qcn->rpg_min_rate[i] =
+ be32_to_cpu(hw_qcn->rpg_min_rate);
+ qcn->cndd_state_machine[i] =
+ priv->cndd_state[i];
+ }
+ mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_out);
+ return 0;
+}
+
+static int mlx4_en_dcbnl_ieee_setqcn(struct net_device *dev,
+ struct ieee_qcn *qcn)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_congestion_control_mb_prio_802_1_qau_params *hw_qcn;
+ struct mlx4_cmd_mailbox *mailbox_in = NULL;
+ u64 mailbox_in_dma = 0;
+ u32 inmod = 0;
+ int i, err;
+#define MODIFY_ENABLE_HIGH_MASK 0xc0000000
+#define MODIFY_ENABLE_LOW_MASK 0xffc00000
+
+ if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QCN))
+ return -EOPNOTSUPP;
+
+ mailbox_in = mlx4_alloc_cmd_mailbox(priv->mdev->dev);
+ if (IS_ERR(mailbox_in))
+ return -ENOMEM;
+
+ mailbox_in_dma = mailbox_in->dma;
+ hw_qcn =
+ (struct mlx4_congestion_control_mb_prio_802_1_qau_params *)mailbox_in->buf;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ inmod = priv->port | ((1 << i) << 8) |
+ (MLX4_CTRL_ALGO_802_1_QAU_REACTION_POINT << 16);
+
+ /* Before updating QCN parameter,
+ * need to set it's modify enable bit to 1
+ */
+
+ hw_qcn->modify_enable_high = cpu_to_be32(
+ MODIFY_ENABLE_HIGH_MASK);
+ hw_qcn->modify_enable_low = cpu_to_be32(MODIFY_ENABLE_LOW_MASK);
+
+ hw_qcn->extended_enable = cpu_to_be32(qcn->rpg_enable[i] << RPG_ENABLE_BIT);
+ hw_qcn->rppp_max_rps = cpu_to_be32(qcn->rppp_max_rps[i]);
+ hw_qcn->rpg_time_reset = cpu_to_be32(qcn->rpg_time_reset[i]);
+ hw_qcn->rpg_byte_reset = cpu_to_be32(qcn->rpg_byte_reset[i]);
+ hw_qcn->rpg_threshold = cpu_to_be32(qcn->rpg_threshold[i]);
+ hw_qcn->rpg_max_rate = cpu_to_be32(qcn->rpg_max_rate[i]);
+ hw_qcn->rpg_ai_rate = cpu_to_be32(qcn->rpg_ai_rate[i]);
+ hw_qcn->rpg_hai_rate = cpu_to_be32(qcn->rpg_hai_rate[i]);
+ hw_qcn->rpg_gd = cpu_to_be32(qcn->rpg_gd[i]);
+ hw_qcn->rpg_min_dec_fac = cpu_to_be32(qcn->rpg_min_dec_fac[i]);
+ hw_qcn->rpg_min_rate = cpu_to_be32(qcn->rpg_min_rate[i]);
+ priv->cndd_state[i] = qcn->cndd_state_machine[i];
+ if (qcn->cndd_state_machine[i] == DCB_CNDD_INTERIOR_READY)
+ hw_qcn->extended_enable |= cpu_to_be32(1 << CN_TAG_BIT);
+
+ err = mlx4_cmd(priv->mdev->dev, mailbox_in_dma, inmod,
+ MLX4_CONGESTION_CONTROL_SET_PARAMS,
+ MLX4_CMD_CONGESTION_CTRL_OPCODE,
+ MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_in);
+ return err;
+ }
+ }
+ mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_in);
+ return 0;
+}
+
+static int mlx4_en_dcbnl_ieee_getqcnstats(struct net_device *dev,
+ struct ieee_qcn_stats *qcn_stats)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_congestion_control_mb_prio_802_1_qau_statistics *hw_qcn_stats;
+ struct mlx4_cmd_mailbox *mailbox_out = NULL;
+ u64 mailbox_in_dma = 0;
+ u32 inmod = 0;
+ int i, err;
+
+ if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QCN))
+ return -EOPNOTSUPP;
+
+ mailbox_out = mlx4_alloc_cmd_mailbox(priv->mdev->dev);
+ if (IS_ERR(mailbox_out))
+ return -ENOMEM;
+
+ hw_qcn_stats =
+ (struct mlx4_congestion_control_mb_prio_802_1_qau_statistics *)
+ mailbox_out->buf;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ inmod = priv->port | ((1 << i) << 8) |
+ (MLX4_CTRL_ALGO_802_1_QAU_REACTION_POINT << 16);
+ err = mlx4_cmd_box(priv->mdev->dev, mailbox_in_dma,
+ mailbox_out->dma, inmod,
+ MLX4_CONGESTION_CONTROL_GET_STATISTICS,
+ MLX4_CMD_CONGESTION_CTRL_OPCODE,
+ MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_out);
+ return err;
+ }
+ qcn_stats->rppp_rp_centiseconds[i] =
+ be64_to_cpu(hw_qcn_stats->rppp_rp_centiseconds);
+ qcn_stats->rppp_created_rps[i] =
+ be32_to_cpu(hw_qcn_stats->rppp_created_rps);
+ }
+ mlx4_free_cmd_mailbox(priv->mdev->dev, mailbox_out);
+ return 0;
+}
+
+const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops = {
+ .ieee_getets = mlx4_en_dcbnl_ieee_getets,
+ .ieee_setets = mlx4_en_dcbnl_ieee_setets,
+ .ieee_getmaxrate = mlx4_en_dcbnl_ieee_getmaxrate,
+ .ieee_setmaxrate = mlx4_en_dcbnl_ieee_setmaxrate,
+ .ieee_getqcn = mlx4_en_dcbnl_ieee_getqcn,
+ .ieee_setqcn = mlx4_en_dcbnl_ieee_setqcn,
+ .ieee_getqcnstats = mlx4_en_dcbnl_ieee_getqcnstats,
+ .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc,
+ .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc,
+
+ .getstate = mlx4_en_dcbnl_get_state,
+ .setstate = mlx4_en_dcbnl_set_state,
+ .getpfccfg = mlx4_en_dcbnl_get_pfc_cfg,
+ .setpfccfg = mlx4_en_dcbnl_set_pfc_cfg,
+ .setall = mlx4_en_dcbnl_set_all,
+ .getcap = mlx4_en_dcbnl_getcap,
+ .getnumtcs = mlx4_en_dcbnl_getnumtcs,
+ .getpfcstate = mlx4_en_dcbnl_getpfcstate,
+ .setpfcstate = mlx4_en_dcbnl_setpfcstate,
+ .getapp = mlx4_en_dcbnl_getapp,
+ .setapp = mlx4_en_dcbnl_setapp,
+
+ .getdcbx = mlx4_en_dcbnl_getdcbx,
+ .setdcbx = mlx4_en_dcbnl_setdcbx,
+};
+
+const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops = {
+ .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc,
+ .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc,
+
+ .setstate = mlx4_en_dcbnl_set_state,
+ .getpfccfg = mlx4_en_dcbnl_get_pfc_cfg,
+ .setpfccfg = mlx4_en_dcbnl_set_pfc_cfg,
+ .setall = mlx4_en_dcbnl_set_all,
+ .getnumtcs = mlx4_en_dcbnl_getnumtcs,
+ .getpfcstate = mlx4_en_dcbnl_getpfcstate,
+ .setpfcstate = mlx4_en_dcbnl_setpfcstate,
+ .getapp = mlx4_en_dcbnl_getapp,
+ .setapp = mlx4_en_dcbnl_setapp,
+
+ .getdcbx = mlx4_en_dcbnl_getdcbx,
+ .setdcbx = mlx4_en_dcbnl_setdcbx,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
new file mode 100644
index 000000000..1d33fae52
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -0,0 +1,2161 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <linux/mlx4/driver.h>
+#include <linux/mlx4/device.h>
+#include <linux/in.h>
+#include <net/ip.h>
+#include <linux/bitmap.h>
+
+#include "mlx4_en.h"
+#include "en_port.h"
+
+#define EN_ETHTOOL_QP_ATTACH (1ull << 63)
+#define EN_ETHTOOL_SHORT_MASK cpu_to_be16(0xffff)
+#define EN_ETHTOOL_WORD_MASK cpu_to_be32(0xffffffff)
+
+int mlx4_en_moderation_update(struct mlx4_en_priv *priv)
+{
+ int i, t;
+ int err = 0;
+
+ for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) {
+ for (i = 0; i < priv->tx_ring_num[t]; i++) {
+ priv->tx_cq[t][i]->moder_cnt = priv->tx_frames;
+ priv->tx_cq[t][i]->moder_time = priv->tx_usecs;
+ if (priv->port_up) {
+ err = mlx4_en_set_cq_moder(priv,
+ priv->tx_cq[t][i]);
+ if (err)
+ return err;
+ }
+ }
+ }
+
+ if (priv->adaptive_rx_coal)
+ return 0;
+
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ priv->rx_cq[i]->moder_cnt = priv->rx_frames;
+ priv->rx_cq[i]->moder_time = priv->rx_usecs;
+ priv->last_moder_time[i] = MLX4_EN_AUTO_CONF;
+ if (priv->port_up) {
+ err = mlx4_en_set_cq_moder(priv, priv->rx_cq[i]);
+ if (err)
+ return err;
+ }
+ }
+
+ return err;
+}
+
+static void
+mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+
+ strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, DRV_VERSION,
+ sizeof(drvinfo->version));
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%d",
+ (u16) (mdev->dev->caps.fw_ver >> 32),
+ (u16) ((mdev->dev->caps.fw_ver >> 16) & 0xffff),
+ (u16) (mdev->dev->caps.fw_ver & 0xffff));
+ strlcpy(drvinfo->bus_info, pci_name(mdev->dev->persist->pdev),
+ sizeof(drvinfo->bus_info));
+}
+
+static const char mlx4_en_priv_flags[][ETH_GSTRING_LEN] = {
+ "blueflame",
+ "phv-bit"
+};
+
+static const char main_strings[][ETH_GSTRING_LEN] = {
+ /* main statistics */
+ "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors",
+ "tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions",
+ "rx_length_errors", "rx_over_errors", "rx_crc_errors",
+ "rx_frame_errors", "rx_fifo_errors", "rx_missed_errors",
+ "tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors",
+ "tx_heartbeat_errors", "tx_window_errors",
+
+ /* port statistics */
+ "tso_packets",
+ "xmit_more",
+ "queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_pages",
+ "rx_csum_good", "rx_csum_none", "rx_csum_complete", "tx_chksum_offload",
+
+ /* pf statistics */
+ "pf_rx_packets",
+ "pf_rx_bytes",
+ "pf_tx_packets",
+ "pf_tx_bytes",
+
+ /* priority flow control statistics rx */
+ "rx_pause_prio_0", "rx_pause_duration_prio_0",
+ "rx_pause_transition_prio_0",
+ "rx_pause_prio_1", "rx_pause_duration_prio_1",
+ "rx_pause_transition_prio_1",
+ "rx_pause_prio_2", "rx_pause_duration_prio_2",
+ "rx_pause_transition_prio_2",
+ "rx_pause_prio_3", "rx_pause_duration_prio_3",
+ "rx_pause_transition_prio_3",
+ "rx_pause_prio_4", "rx_pause_duration_prio_4",
+ "rx_pause_transition_prio_4",
+ "rx_pause_prio_5", "rx_pause_duration_prio_5",
+ "rx_pause_transition_prio_5",
+ "rx_pause_prio_6", "rx_pause_duration_prio_6",
+ "rx_pause_transition_prio_6",
+ "rx_pause_prio_7", "rx_pause_duration_prio_7",
+ "rx_pause_transition_prio_7",
+
+ /* flow control statistics rx */
+ "rx_pause", "rx_pause_duration", "rx_pause_transition",
+
+ /* priority flow control statistics tx */
+ "tx_pause_prio_0", "tx_pause_duration_prio_0",
+ "tx_pause_transition_prio_0",
+ "tx_pause_prio_1", "tx_pause_duration_prio_1",
+ "tx_pause_transition_prio_1",
+ "tx_pause_prio_2", "tx_pause_duration_prio_2",
+ "tx_pause_transition_prio_2",
+ "tx_pause_prio_3", "tx_pause_duration_prio_3",
+ "tx_pause_transition_prio_3",
+ "tx_pause_prio_4", "tx_pause_duration_prio_4",
+ "tx_pause_transition_prio_4",
+ "tx_pause_prio_5", "tx_pause_duration_prio_5",
+ "tx_pause_transition_prio_5",
+ "tx_pause_prio_6", "tx_pause_duration_prio_6",
+ "tx_pause_transition_prio_6",
+ "tx_pause_prio_7", "tx_pause_duration_prio_7",
+ "tx_pause_transition_prio_7",
+
+ /* flow control statistics tx */
+ "tx_pause", "tx_pause_duration", "tx_pause_transition",
+
+ /* packet statistics */
+ "rx_multicast_packets",
+ "rx_broadcast_packets",
+ "rx_jabbers",
+ "rx_in_range_length_error",
+ "rx_out_range_length_error",
+ "tx_multicast_packets",
+ "tx_broadcast_packets",
+ "rx_prio_0_packets", "rx_prio_0_bytes",
+ "rx_prio_1_packets", "rx_prio_1_bytes",
+ "rx_prio_2_packets", "rx_prio_2_bytes",
+ "rx_prio_3_packets", "rx_prio_3_bytes",
+ "rx_prio_4_packets", "rx_prio_4_bytes",
+ "rx_prio_5_packets", "rx_prio_5_bytes",
+ "rx_prio_6_packets", "rx_prio_6_bytes",
+ "rx_prio_7_packets", "rx_prio_7_bytes",
+ "rx_novlan_packets", "rx_novlan_bytes",
+ "tx_prio_0_packets", "tx_prio_0_bytes",
+ "tx_prio_1_packets", "tx_prio_1_bytes",
+ "tx_prio_2_packets", "tx_prio_2_bytes",
+ "tx_prio_3_packets", "tx_prio_3_bytes",
+ "tx_prio_4_packets", "tx_prio_4_bytes",
+ "tx_prio_5_packets", "tx_prio_5_bytes",
+ "tx_prio_6_packets", "tx_prio_6_bytes",
+ "tx_prio_7_packets", "tx_prio_7_bytes",
+ "tx_novlan_packets", "tx_novlan_bytes",
+
+ /* xdp statistics */
+ "rx_xdp_drop",
+ "rx_xdp_tx",
+ "rx_xdp_tx_full",
+
+ /* phy statistics */
+ "rx_packets_phy", "rx_bytes_phy",
+ "tx_packets_phy", "tx_bytes_phy",
+};
+
+static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= {
+ "Interrupt Test",
+ "Link Test",
+ "Speed Test",
+ "Register Test",
+ "Loopback Test",
+};
+
+static u32 mlx4_en_get_msglevel(struct net_device *dev)
+{
+ return ((struct mlx4_en_priv *) netdev_priv(dev))->msg_enable;
+}
+
+static void mlx4_en_set_msglevel(struct net_device *dev, u32 val)
+{
+ ((struct mlx4_en_priv *) netdev_priv(dev))->msg_enable = val;
+}
+
+static void mlx4_en_get_wol(struct net_device *netdev,
+ struct ethtool_wolinfo *wol)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+ struct mlx4_caps *caps = &priv->mdev->dev->caps;
+ int err = 0;
+ u64 config = 0;
+ u64 mask;
+
+ if ((priv->port < 1) || (priv->port > 2)) {
+ en_err(priv, "Failed to get WoL information\n");
+ return;
+ }
+
+ mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 :
+ MLX4_DEV_CAP_FLAG_WOL_PORT2;
+
+ if (!(caps->flags & mask)) {
+ wol->supported = 0;
+ wol->wolopts = 0;
+ return;
+ }
+
+ if (caps->wol_port[priv->port])
+ wol->supported = WAKE_MAGIC;
+ else
+ wol->supported = 0;
+
+ err = mlx4_wol_read(priv->mdev->dev, &config, priv->port);
+ if (err) {
+ en_err(priv, "Failed to get WoL information\n");
+ return;
+ }
+
+ if ((config & MLX4_EN_WOL_ENABLED) && (config & MLX4_EN_WOL_MAGIC))
+ wol->wolopts = WAKE_MAGIC;
+ else
+ wol->wolopts = 0;
+}
+
+static int mlx4_en_set_wol(struct net_device *netdev,
+ struct ethtool_wolinfo *wol)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+ u64 config = 0;
+ int err = 0;
+ u64 mask;
+
+ if ((priv->port < 1) || (priv->port > 2))
+ return -EOPNOTSUPP;
+
+ mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 :
+ MLX4_DEV_CAP_FLAG_WOL_PORT2;
+
+ if (!(priv->mdev->dev->caps.flags & mask))
+ return -EOPNOTSUPP;
+
+ if (wol->supported & ~WAKE_MAGIC)
+ return -EINVAL;
+
+ err = mlx4_wol_read(priv->mdev->dev, &config, priv->port);
+ if (err) {
+ en_err(priv, "Failed to get WoL info, unable to modify\n");
+ return err;
+ }
+
+ if (wol->wolopts & WAKE_MAGIC) {
+ config |= MLX4_EN_WOL_DO_MODIFY | MLX4_EN_WOL_ENABLED |
+ MLX4_EN_WOL_MAGIC;
+ } else {
+ config &= ~(MLX4_EN_WOL_ENABLED | MLX4_EN_WOL_MAGIC);
+ config |= MLX4_EN_WOL_DO_MODIFY;
+ }
+
+ err = mlx4_wol_write(priv->mdev->dev, config, priv->port);
+ if (err)
+ en_err(priv, "Failed to set WoL information\n");
+
+ return err;
+}
+
+struct bitmap_iterator {
+ unsigned long *stats_bitmap;
+ unsigned int count;
+ unsigned int iterator;
+ bool advance_array; /* if set, force no increments */
+};
+
+static inline void bitmap_iterator_init(struct bitmap_iterator *h,
+ unsigned long *stats_bitmap,
+ int count)
+{
+ h->iterator = 0;
+ h->advance_array = !bitmap_empty(stats_bitmap, count);
+ h->count = h->advance_array ? bitmap_weight(stats_bitmap, count)
+ : count;
+ h->stats_bitmap = stats_bitmap;
+}
+
+static inline int bitmap_iterator_test(struct bitmap_iterator *h)
+{
+ return !h->advance_array ? 1 : test_bit(h->iterator, h->stats_bitmap);
+}
+
+static inline int bitmap_iterator_inc(struct bitmap_iterator *h)
+{
+ return h->iterator++;
+}
+
+static inline unsigned int
+bitmap_iterator_count(struct bitmap_iterator *h)
+{
+ return h->count;
+}
+
+static int mlx4_en_get_sset_count(struct net_device *dev, int sset)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct bitmap_iterator it;
+
+ bitmap_iterator_init(&it, priv->stats_bitmap.bitmap, NUM_ALL_STATS);
+
+ switch (sset) {
+ case ETH_SS_STATS:
+ return bitmap_iterator_count(&it) +
+ (priv->tx_ring_num[TX] * 2) +
+ (priv->rx_ring_num * (3 + NUM_XDP_STATS));
+ case ETH_SS_TEST:
+ return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags
+ & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) * 2;
+ case ETH_SS_PRIV_FLAGS:
+ return ARRAY_SIZE(mlx4_en_priv_flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void mlx4_en_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats, uint64_t *data)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ int index = 0;
+ int i;
+ struct bitmap_iterator it;
+
+ bitmap_iterator_init(&it, priv->stats_bitmap.bitmap, NUM_ALL_STATS);
+
+ spin_lock_bh(&priv->stats_lock);
+
+ mlx4_en_fold_software_stats(dev);
+
+ for (i = 0; i < NUM_MAIN_STATS; i++, bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ data[index++] = ((unsigned long *)&dev->stats)[i];
+
+ for (i = 0; i < NUM_PORT_STATS; i++, bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ data[index++] = ((unsigned long *)&priv->port_stats)[i];
+
+ for (i = 0; i < NUM_PF_STATS; i++, bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ data[index++] =
+ ((unsigned long *)&priv->pf_stats)[i];
+
+ for (i = 0; i < NUM_FLOW_PRIORITY_STATS_RX;
+ i++, bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ data[index++] =
+ ((u64 *)&priv->rx_priority_flowstats)[i];
+
+ for (i = 0; i < NUM_FLOW_STATS_RX; i++, bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ data[index++] = ((u64 *)&priv->rx_flowstats)[i];
+
+ for (i = 0; i < NUM_FLOW_PRIORITY_STATS_TX;
+ i++, bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ data[index++] =
+ ((u64 *)&priv->tx_priority_flowstats)[i];
+
+ for (i = 0; i < NUM_FLOW_STATS_TX; i++, bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ data[index++] = ((u64 *)&priv->tx_flowstats)[i];
+
+ for (i = 0; i < NUM_PKT_STATS; i++, bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ data[index++] = ((unsigned long *)&priv->pkstats)[i];
+
+ for (i = 0; i < NUM_XDP_STATS; i++, bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ data[index++] = ((unsigned long *)&priv->xdp_stats)[i];
+
+ for (i = 0; i < NUM_PHY_STATS; i++, bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ data[index++] = ((unsigned long *)&priv->phy_stats)[i];
+
+ for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+ data[index++] = priv->tx_ring[TX][i]->packets;
+ data[index++] = priv->tx_ring[TX][i]->bytes;
+ }
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ data[index++] = priv->rx_ring[i]->packets;
+ data[index++] = priv->rx_ring[i]->bytes;
+ data[index++] = priv->rx_ring[i]->dropped;
+ data[index++] = priv->rx_ring[i]->xdp_drop;
+ data[index++] = priv->rx_ring[i]->xdp_tx;
+ data[index++] = priv->rx_ring[i]->xdp_tx_full;
+ }
+ spin_unlock_bh(&priv->stats_lock);
+
+}
+
+static void mlx4_en_self_test(struct net_device *dev,
+ struct ethtool_test *etest, u64 *buf)
+{
+ mlx4_en_ex_selftest(dev, &etest->flags, buf);
+}
+
+static void mlx4_en_get_strings(struct net_device *dev,
+ uint32_t stringset, uint8_t *data)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ int index = 0;
+ int i, strings = 0;
+ struct bitmap_iterator it;
+
+ bitmap_iterator_init(&it, priv->stats_bitmap.bitmap, NUM_ALL_STATS);
+
+ switch (stringset) {
+ case ETH_SS_TEST:
+ for (i = 0; i < MLX4_EN_NUM_SELF_TEST - 2; i++)
+ strcpy(data + i * ETH_GSTRING_LEN, mlx4_en_test_names[i]);
+ if (priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UC_LOOPBACK)
+ for (; i < MLX4_EN_NUM_SELF_TEST; i++)
+ strcpy(data + i * ETH_GSTRING_LEN, mlx4_en_test_names[i]);
+ break;
+
+ case ETH_SS_STATS:
+ /* Add main counters */
+ for (i = 0; i < NUM_MAIN_STATS; i++, strings++,
+ bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ strcpy(data + (index++) * ETH_GSTRING_LEN,
+ main_strings[strings]);
+
+ for (i = 0; i < NUM_PORT_STATS; i++, strings++,
+ bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ strcpy(data + (index++) * ETH_GSTRING_LEN,
+ main_strings[strings]);
+
+ for (i = 0; i < NUM_PF_STATS; i++, strings++,
+ bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ strcpy(data + (index++) * ETH_GSTRING_LEN,
+ main_strings[strings]);
+
+ for (i = 0; i < NUM_FLOW_STATS; i++, strings++,
+ bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ strcpy(data + (index++) * ETH_GSTRING_LEN,
+ main_strings[strings]);
+
+ for (i = 0; i < NUM_PKT_STATS; i++, strings++,
+ bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ strcpy(data + (index++) * ETH_GSTRING_LEN,
+ main_strings[strings]);
+
+ for (i = 0; i < NUM_XDP_STATS; i++, strings++,
+ bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ strcpy(data + (index++) * ETH_GSTRING_LEN,
+ main_strings[strings]);
+
+ for (i = 0; i < NUM_PHY_STATS; i++, strings++,
+ bitmap_iterator_inc(&it))
+ if (bitmap_iterator_test(&it))
+ strcpy(data + (index++) * ETH_GSTRING_LEN,
+ main_strings[strings]);
+
+ for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+ sprintf(data + (index++) * ETH_GSTRING_LEN,
+ "tx%d_packets", i);
+ sprintf(data + (index++) * ETH_GSTRING_LEN,
+ "tx%d_bytes", i);
+ }
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ sprintf(data + (index++) * ETH_GSTRING_LEN,
+ "rx%d_packets", i);
+ sprintf(data + (index++) * ETH_GSTRING_LEN,
+ "rx%d_bytes", i);
+ sprintf(data + (index++) * ETH_GSTRING_LEN,
+ "rx%d_dropped", i);
+ sprintf(data + (index++) * ETH_GSTRING_LEN,
+ "rx%d_xdp_drop", i);
+ sprintf(data + (index++) * ETH_GSTRING_LEN,
+ "rx%d_xdp_tx", i);
+ sprintf(data + (index++) * ETH_GSTRING_LEN,
+ "rx%d_xdp_tx_full", i);
+ }
+ break;
+ case ETH_SS_PRIV_FLAGS:
+ for (i = 0; i < ARRAY_SIZE(mlx4_en_priv_flags); i++)
+ strcpy(data + i * ETH_GSTRING_LEN,
+ mlx4_en_priv_flags[i]);
+ break;
+
+ }
+}
+
+static u32 mlx4_en_autoneg_get(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ u32 autoneg = AUTONEG_DISABLE;
+
+ if ((mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP) &&
+ (priv->port_state.flags & MLX4_EN_PORT_ANE))
+ autoneg = AUTONEG_ENABLE;
+
+ return autoneg;
+}
+
+static void ptys2ethtool_update_supported_port(unsigned long *mask,
+ struct mlx4_ptys_reg *ptys_reg)
+{
+ u32 eth_proto = be32_to_cpu(ptys_reg->eth_proto_cap);
+
+ if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_T)
+ | MLX4_PROT_MASK(MLX4_1000BASE_T)
+ | MLX4_PROT_MASK(MLX4_100BASE_TX))) {
+ __set_bit(ETHTOOL_LINK_MODE_TP_BIT, mask);
+ } else if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_CR)
+ | MLX4_PROT_MASK(MLX4_10GBASE_SR)
+ | MLX4_PROT_MASK(MLX4_56GBASE_SR4)
+ | MLX4_PROT_MASK(MLX4_40GBASE_CR4)
+ | MLX4_PROT_MASK(MLX4_40GBASE_SR4)
+ | MLX4_PROT_MASK(MLX4_1000BASE_CX_SGMII))) {
+ __set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, mask);
+ } else if (eth_proto & (MLX4_PROT_MASK(MLX4_56GBASE_KR4)
+ | MLX4_PROT_MASK(MLX4_40GBASE_KR4)
+ | MLX4_PROT_MASK(MLX4_20GBASE_KR2)
+ | MLX4_PROT_MASK(MLX4_10GBASE_KR)
+ | MLX4_PROT_MASK(MLX4_10GBASE_KX4)
+ | MLX4_PROT_MASK(MLX4_1000BASE_KX))) {
+ __set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, mask);
+ }
+}
+
+static u32 ptys_get_active_port(struct mlx4_ptys_reg *ptys_reg)
+{
+ u32 eth_proto = be32_to_cpu(ptys_reg->eth_proto_oper);
+
+ if (!eth_proto) /* link down */
+ eth_proto = be32_to_cpu(ptys_reg->eth_proto_cap);
+
+ if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_T)
+ | MLX4_PROT_MASK(MLX4_1000BASE_T)
+ | MLX4_PROT_MASK(MLX4_100BASE_TX))) {
+ return PORT_TP;
+ }
+
+ if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_SR)
+ | MLX4_PROT_MASK(MLX4_56GBASE_SR4)
+ | MLX4_PROT_MASK(MLX4_40GBASE_SR4)
+ | MLX4_PROT_MASK(MLX4_1000BASE_CX_SGMII))) {
+ return PORT_FIBRE;
+ }
+
+ if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_CR)
+ | MLX4_PROT_MASK(MLX4_56GBASE_CR4)
+ | MLX4_PROT_MASK(MLX4_40GBASE_CR4))) {
+ return PORT_DA;
+ }
+
+ if (eth_proto & (MLX4_PROT_MASK(MLX4_56GBASE_KR4)
+ | MLX4_PROT_MASK(MLX4_40GBASE_KR4)
+ | MLX4_PROT_MASK(MLX4_20GBASE_KR2)
+ | MLX4_PROT_MASK(MLX4_10GBASE_KR)
+ | MLX4_PROT_MASK(MLX4_10GBASE_KX4)
+ | MLX4_PROT_MASK(MLX4_1000BASE_KX))) {
+ return PORT_NONE;
+ }
+ return PORT_OTHER;
+}
+
+#define MLX4_LINK_MODES_SZ \
+ (FIELD_SIZEOF(struct mlx4_ptys_reg, eth_proto_cap) * 8)
+
+enum ethtool_report {
+ SUPPORTED = 0,
+ ADVERTISED = 1,
+};
+
+struct ptys2ethtool_config {
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised);
+ u32 speed;
+};
+
+static unsigned long *ptys2ethtool_link_mode(struct ptys2ethtool_config *cfg,
+ enum ethtool_report report)
+{
+ switch (report) {
+ case SUPPORTED:
+ return cfg->supported;
+ case ADVERTISED:
+ return cfg->advertised;
+ }
+ return NULL;
+}
+
+#define MLX4_BUILD_PTYS2ETHTOOL_CONFIG(reg_, speed_, ...) \
+ ({ \
+ struct ptys2ethtool_config *cfg; \
+ const unsigned int modes[] = { __VA_ARGS__ }; \
+ unsigned int i; \
+ cfg = &ptys2ethtool_map[reg_]; \
+ cfg->speed = speed_; \
+ bitmap_zero(cfg->supported, \
+ __ETHTOOL_LINK_MODE_MASK_NBITS); \
+ bitmap_zero(cfg->advertised, \
+ __ETHTOOL_LINK_MODE_MASK_NBITS); \
+ for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) { \
+ __set_bit(modes[i], cfg->supported); \
+ __set_bit(modes[i], cfg->advertised); \
+ } \
+ })
+
+/* Translates mlx4 link mode to equivalent ethtool Link modes/speed */
+static struct ptys2ethtool_config ptys2ethtool_map[MLX4_LINK_MODES_SZ];
+
+void __init mlx4_en_init_ptys2ethtool_map(void)
+{
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_100BASE_TX, SPEED_100,
+ ETHTOOL_LINK_MODE_100baseT_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_T, SPEED_1000,
+ ETHTOOL_LINK_MODE_1000baseT_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_CX_SGMII, SPEED_1000,
+ ETHTOOL_LINK_MODE_1000baseX_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_KX, SPEED_1000,
+ ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_T, SPEED_10000,
+ ETHTOOL_LINK_MODE_10000baseT_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_CX4, SPEED_10000,
+ ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_KX4, SPEED_10000,
+ ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_KR, SPEED_10000,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_CR, SPEED_10000,
+ ETHTOOL_LINK_MODE_10000baseCR_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_SR, SPEED_10000,
+ ETHTOOL_LINK_MODE_10000baseSR_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_20GBASE_KR2, SPEED_20000,
+ ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT,
+ ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_40GBASE_CR4, SPEED_40000,
+ ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_40GBASE_KR4, SPEED_40000,
+ ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_40GBASE_SR4, SPEED_40000,
+ ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_56GBASE_KR4, SPEED_56000,
+ ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_56GBASE_CR4, SPEED_56000,
+ ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT);
+ MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_56GBASE_SR4, SPEED_56000,
+ ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT);
+};
+
+static void ptys2ethtool_update_link_modes(unsigned long *link_modes,
+ u32 eth_proto,
+ enum ethtool_report report)
+{
+ int i;
+ for (i = 0; i < MLX4_LINK_MODES_SZ; i++) {
+ if (eth_proto & MLX4_PROT_MASK(i))
+ bitmap_or(link_modes, link_modes,
+ ptys2ethtool_link_mode(&ptys2ethtool_map[i],
+ report),
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+ }
+}
+
+static u32 ethtool2ptys_link_modes(const unsigned long *link_modes,
+ enum ethtool_report report)
+{
+ int i;
+ u32 ptys_modes = 0;
+
+ for (i = 0; i < MLX4_LINK_MODES_SZ; i++) {
+ if (bitmap_intersects(
+ ptys2ethtool_link_mode(&ptys2ethtool_map[i],
+ report),
+ link_modes,
+ __ETHTOOL_LINK_MODE_MASK_NBITS))
+ ptys_modes |= 1 << i;
+ }
+ return ptys_modes;
+}
+
+/* Convert actual speed (SPEED_XXX) to ptys link modes */
+static u32 speed2ptys_link_modes(u32 speed)
+{
+ int i;
+ u32 ptys_modes = 0;
+
+ for (i = 0; i < MLX4_LINK_MODES_SZ; i++) {
+ if (ptys2ethtool_map[i].speed == speed)
+ ptys_modes |= 1 << i;
+ }
+ return ptys_modes;
+}
+
+static int
+ethtool_get_ptys_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_ptys_reg ptys_reg;
+ u32 eth_proto;
+ int ret;
+
+ memset(&ptys_reg, 0, sizeof(ptys_reg));
+ ptys_reg.local_port = priv->port;
+ ptys_reg.proto_mask = MLX4_PTYS_EN;
+ ret = mlx4_ACCESS_PTYS_REG(priv->mdev->dev,
+ MLX4_ACCESS_REG_QUERY, &ptys_reg);
+ if (ret) {
+ en_warn(priv, "Failed to run mlx4_ACCESS_PTYS_REG status(%x)",
+ ret);
+ return ret;
+ }
+ en_dbg(DRV, priv, "ptys_reg.proto_mask %x\n",
+ ptys_reg.proto_mask);
+ en_dbg(DRV, priv, "ptys_reg.eth_proto_cap %x\n",
+ be32_to_cpu(ptys_reg.eth_proto_cap));
+ en_dbg(DRV, priv, "ptys_reg.eth_proto_admin %x\n",
+ be32_to_cpu(ptys_reg.eth_proto_admin));
+ en_dbg(DRV, priv, "ptys_reg.eth_proto_oper %x\n",
+ be32_to_cpu(ptys_reg.eth_proto_oper));
+ en_dbg(DRV, priv, "ptys_reg.eth_proto_lp_adv %x\n",
+ be32_to_cpu(ptys_reg.eth_proto_lp_adv));
+
+ /* reset supported/advertising masks */
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+
+ ptys2ethtool_update_supported_port(link_ksettings->link_modes.supported,
+ &ptys_reg);
+
+ eth_proto = be32_to_cpu(ptys_reg.eth_proto_cap);
+ ptys2ethtool_update_link_modes(link_ksettings->link_modes.supported,
+ eth_proto, SUPPORTED);
+
+ eth_proto = be32_to_cpu(ptys_reg.eth_proto_admin);
+ ptys2ethtool_update_link_modes(link_ksettings->link_modes.advertising,
+ eth_proto, ADVERTISED);
+
+ ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
+ Pause);
+ ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
+ Asym_Pause);
+
+ if (priv->prof->tx_pause)
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, Pause);
+ if (priv->prof->tx_pause ^ priv->prof->rx_pause)
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, Asym_Pause);
+
+ link_ksettings->base.port = ptys_get_active_port(&ptys_reg);
+
+ if (mlx4_en_autoneg_get(dev)) {
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, Autoneg);
+ }
+
+ link_ksettings->base.autoneg
+ = (priv->port_state.flags & MLX4_EN_PORT_ANC) ?
+ AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+ eth_proto = be32_to_cpu(ptys_reg.eth_proto_lp_adv);
+
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising);
+ ptys2ethtool_update_link_modes(
+ link_ksettings->link_modes.lp_advertising,
+ eth_proto, ADVERTISED);
+ if (priv->port_state.flags & MLX4_EN_PORT_ANC)
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ lp_advertising, Autoneg);
+
+ link_ksettings->base.phy_address = 0;
+ link_ksettings->base.mdio_support = 0;
+ link_ksettings->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
+ link_ksettings->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+
+ return ret;
+}
+
+static void
+ethtool_get_default_link_ksettings(
+ struct net_device *dev, struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ int trans_type;
+
+ link_ksettings->base.autoneg = AUTONEG_DISABLE;
+
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
+ ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
+ 10000baseT_Full);
+
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+ ethtool_link_ksettings_add_link_mode(link_ksettings, advertising,
+ 10000baseT_Full);
+
+ trans_type = priv->port_state.transceiver;
+ if (trans_type > 0 && trans_type <= 0xC) {
+ link_ksettings->base.port = PORT_FIBRE;
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, FIBRE);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, FIBRE);
+ } else if (trans_type == 0x80 || trans_type == 0) {
+ link_ksettings->base.port = PORT_TP;
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, TP);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, TP);
+ } else {
+ link_ksettings->base.port = -1;
+ }
+}
+
+static int
+mlx4_en_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ int ret = -EINVAL;
+
+ if (mlx4_en_QUERY_PORT(priv->mdev, priv->port))
+ return -ENOMEM;
+
+ en_dbg(DRV, priv, "query port state.flags ANC(%x) ANE(%x)\n",
+ priv->port_state.flags & MLX4_EN_PORT_ANC,
+ priv->port_state.flags & MLX4_EN_PORT_ANE);
+
+ if (priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL)
+ ret = ethtool_get_ptys_link_ksettings(dev, link_ksettings);
+ if (ret) /* ETH PROT CRTL is not supported or PTYS CMD failed */
+ ethtool_get_default_link_ksettings(dev, link_ksettings);
+
+ if (netif_carrier_ok(dev)) {
+ link_ksettings->base.speed = priv->port_state.link_speed;
+ link_ksettings->base.duplex = DUPLEX_FULL;
+ } else {
+ link_ksettings->base.speed = SPEED_UNKNOWN;
+ link_ksettings->base.duplex = DUPLEX_UNKNOWN;
+ }
+ return 0;
+}
+
+/* Calculate PTYS admin according ethtool speed (SPEED_XXX) */
+static __be32 speed_set_ptys_admin(struct mlx4_en_priv *priv, u32 speed,
+ __be32 proto_cap)
+{
+ __be32 proto_admin = 0;
+
+ if (!speed) { /* Speed = 0 ==> Reset Link modes */
+ proto_admin = proto_cap;
+ en_info(priv, "Speed was set to 0, Reset advertised Link Modes to default (%x)\n",
+ be32_to_cpu(proto_cap));
+ } else {
+ u32 ptys_link_modes = speed2ptys_link_modes(speed);
+
+ proto_admin = cpu_to_be32(ptys_link_modes) & proto_cap;
+ en_info(priv, "Setting Speed to %d\n", speed);
+ }
+ return proto_admin;
+}
+
+static int
+mlx4_en_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_ptys_reg ptys_reg;
+ __be32 proto_admin;
+ u8 cur_autoneg;
+ int ret;
+
+ u32 ptys_adv = ethtool2ptys_link_modes(
+ link_ksettings->link_modes.advertising, ADVERTISED);
+ const int speed = link_ksettings->base.speed;
+
+ en_dbg(DRV, priv,
+ "Set Speed=%d adv={%*pbl} autoneg=%d duplex=%d\n",
+ speed, __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_ksettings->link_modes.advertising,
+ link_ksettings->base.autoneg,
+ link_ksettings->base.duplex);
+
+ if (!(priv->mdev->dev->caps.flags2 &
+ MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL) ||
+ (link_ksettings->base.duplex == DUPLEX_HALF))
+ return -EINVAL;
+
+ memset(&ptys_reg, 0, sizeof(ptys_reg));
+ ptys_reg.local_port = priv->port;
+ ptys_reg.proto_mask = MLX4_PTYS_EN;
+ ret = mlx4_ACCESS_PTYS_REG(priv->mdev->dev,
+ MLX4_ACCESS_REG_QUERY, &ptys_reg);
+ if (ret) {
+ en_warn(priv, "Failed to QUERY mlx4_ACCESS_PTYS_REG status(%x)\n",
+ ret);
+ return 0;
+ }
+
+ cur_autoneg = ptys_reg.flags & MLX4_PTYS_AN_DISABLE_ADMIN ?
+ AUTONEG_DISABLE : AUTONEG_ENABLE;
+
+ if (link_ksettings->base.autoneg == AUTONEG_DISABLE) {
+ proto_admin = speed_set_ptys_admin(priv, speed,
+ ptys_reg.eth_proto_cap);
+ if ((be32_to_cpu(proto_admin) &
+ (MLX4_PROT_MASK(MLX4_1000BASE_CX_SGMII) |
+ MLX4_PROT_MASK(MLX4_1000BASE_KX))) &&
+ (ptys_reg.flags & MLX4_PTYS_AN_DISABLE_CAP))
+ ptys_reg.flags |= MLX4_PTYS_AN_DISABLE_ADMIN;
+ } else {
+ proto_admin = cpu_to_be32(ptys_adv);
+ ptys_reg.flags &= ~MLX4_PTYS_AN_DISABLE_ADMIN;
+ }
+
+ proto_admin &= ptys_reg.eth_proto_cap;
+ if (!proto_admin) {
+ en_warn(priv, "Not supported link mode(s) requested, check supported link modes.\n");
+ return -EINVAL; /* nothing to change due to bad input */
+ }
+
+ if ((proto_admin == ptys_reg.eth_proto_admin) &&
+ ((ptys_reg.flags & MLX4_PTYS_AN_DISABLE_CAP) &&
+ (link_ksettings->base.autoneg == cur_autoneg)))
+ return 0; /* Nothing to change */
+
+ en_dbg(DRV, priv, "mlx4_ACCESS_PTYS_REG SET: ptys_reg.eth_proto_admin = 0x%x\n",
+ be32_to_cpu(proto_admin));
+
+ ptys_reg.eth_proto_admin = proto_admin;
+ ret = mlx4_ACCESS_PTYS_REG(priv->mdev->dev, MLX4_ACCESS_REG_WRITE,
+ &ptys_reg);
+ if (ret) {
+ en_warn(priv, "Failed to write mlx4_ACCESS_PTYS_REG eth_proto_admin(0x%x) status(0x%x)",
+ be32_to_cpu(ptys_reg.eth_proto_admin), ret);
+ return ret;
+ }
+
+ mutex_lock(&priv->mdev->state_lock);
+ if (priv->port_up) {
+ en_warn(priv, "Port link mode changed, restarting port...\n");
+ mlx4_en_stop_port(dev, 1);
+ if (mlx4_en_start_port(dev))
+ en_err(priv, "Failed restarting port %d\n", priv->port);
+ }
+ mutex_unlock(&priv->mdev->state_lock);
+ return 0;
+}
+
+static int mlx4_en_get_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *coal)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ coal->tx_coalesce_usecs = priv->tx_usecs;
+ coal->tx_max_coalesced_frames = priv->tx_frames;
+ coal->tx_max_coalesced_frames_irq = priv->tx_work_limit;
+
+ coal->rx_coalesce_usecs = priv->rx_usecs;
+ coal->rx_max_coalesced_frames = priv->rx_frames;
+
+ coal->pkt_rate_low = priv->pkt_rate_low;
+ coal->rx_coalesce_usecs_low = priv->rx_usecs_low;
+ coal->pkt_rate_high = priv->pkt_rate_high;
+ coal->rx_coalesce_usecs_high = priv->rx_usecs_high;
+ coal->rate_sample_interval = priv->sample_interval;
+ coal->use_adaptive_rx_coalesce = priv->adaptive_rx_coal;
+
+ return 0;
+}
+
+static int mlx4_en_set_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *coal)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ if (!coal->tx_max_coalesced_frames_irq)
+ return -EINVAL;
+
+ if (coal->tx_coalesce_usecs > MLX4_EN_MAX_COAL_TIME ||
+ coal->rx_coalesce_usecs > MLX4_EN_MAX_COAL_TIME ||
+ coal->rx_coalesce_usecs_low > MLX4_EN_MAX_COAL_TIME ||
+ coal->rx_coalesce_usecs_high > MLX4_EN_MAX_COAL_TIME) {
+ netdev_info(dev, "%s: maximum coalesce time supported is %d usecs\n",
+ __func__, MLX4_EN_MAX_COAL_TIME);
+ return -ERANGE;
+ }
+
+ if (coal->tx_max_coalesced_frames > MLX4_EN_MAX_COAL_PKTS ||
+ coal->rx_max_coalesced_frames > MLX4_EN_MAX_COAL_PKTS) {
+ netdev_info(dev, "%s: maximum coalesced frames supported is %d\n",
+ __func__, MLX4_EN_MAX_COAL_PKTS);
+ return -ERANGE;
+ }
+
+ priv->rx_frames = (coal->rx_max_coalesced_frames ==
+ MLX4_EN_AUTO_CONF) ?
+ MLX4_EN_RX_COAL_TARGET :
+ coal->rx_max_coalesced_frames;
+ priv->rx_usecs = (coal->rx_coalesce_usecs ==
+ MLX4_EN_AUTO_CONF) ?
+ MLX4_EN_RX_COAL_TIME :
+ coal->rx_coalesce_usecs;
+
+ /* Setting TX coalescing parameters */
+ if (coal->tx_coalesce_usecs != priv->tx_usecs ||
+ coal->tx_max_coalesced_frames != priv->tx_frames) {
+ priv->tx_usecs = coal->tx_coalesce_usecs;
+ priv->tx_frames = coal->tx_max_coalesced_frames;
+ }
+
+ /* Set adaptive coalescing params */
+ priv->pkt_rate_low = coal->pkt_rate_low;
+ priv->rx_usecs_low = coal->rx_coalesce_usecs_low;
+ priv->pkt_rate_high = coal->pkt_rate_high;
+ priv->rx_usecs_high = coal->rx_coalesce_usecs_high;
+ priv->sample_interval = coal->rate_sample_interval;
+ priv->adaptive_rx_coal = coal->use_adaptive_rx_coalesce;
+ priv->tx_work_limit = coal->tx_max_coalesced_frames_irq;
+
+ return mlx4_en_moderation_update(priv);
+}
+
+static int mlx4_en_set_pauseparam(struct net_device *dev,
+ struct ethtool_pauseparam *pause)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ u8 tx_pause, tx_ppp, rx_pause, rx_ppp;
+ int err;
+
+ if (pause->autoneg)
+ return -EINVAL;
+
+ tx_pause = !!(pause->tx_pause);
+ rx_pause = !!(pause->rx_pause);
+ rx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->rx_ppp;
+ tx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->tx_ppp;
+
+ err = mlx4_SET_PORT_general(mdev->dev, priv->port,
+ priv->rx_skb_size + ETH_FCS_LEN,
+ tx_pause, tx_ppp, rx_pause, rx_ppp);
+ if (err) {
+ en_err(priv, "Failed setting pause params, err = %d\n", err);
+ return err;
+ }
+
+ mlx4_en_update_pfc_stats_bitmap(mdev->dev, &priv->stats_bitmap,
+ rx_ppp, rx_pause, tx_ppp, tx_pause);
+
+ priv->prof->tx_pause = tx_pause;
+ priv->prof->rx_pause = rx_pause;
+ priv->prof->tx_ppp = tx_ppp;
+ priv->prof->rx_ppp = rx_ppp;
+
+ return err;
+}
+
+static void mlx4_en_get_pauseparam(struct net_device *dev,
+ struct ethtool_pauseparam *pause)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ pause->tx_pause = priv->prof->tx_pause;
+ pause->rx_pause = priv->prof->rx_pause;
+}
+
+static int mlx4_en_set_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_port_profile new_prof;
+ struct mlx4_en_priv *tmp;
+ u32 rx_size, tx_size;
+ int port_up = 0;
+ int err = 0;
+
+ if (param->rx_jumbo_pending || param->rx_mini_pending)
+ return -EINVAL;
+
+ if (param->rx_pending < MLX4_EN_MIN_RX_SIZE) {
+ en_warn(priv, "%s: rx_pending (%d) < min (%d)\n",
+ __func__, param->rx_pending,
+ MLX4_EN_MIN_RX_SIZE);
+ return -EINVAL;
+ }
+ if (param->tx_pending < MLX4_EN_MIN_TX_SIZE) {
+ en_warn(priv, "%s: tx_pending (%d) < min (%lu)\n",
+ __func__, param->tx_pending,
+ MLX4_EN_MIN_TX_SIZE);
+ return -EINVAL;
+ }
+
+ rx_size = roundup_pow_of_two(param->rx_pending);
+ tx_size = roundup_pow_of_two(param->tx_pending);
+
+ if (rx_size == (priv->port_up ? priv->rx_ring[0]->actual_size :
+ priv->rx_ring[0]->size) &&
+ tx_size == priv->tx_ring[TX][0]->size)
+ return 0;
+
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ mutex_lock(&mdev->state_lock);
+ memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+ new_prof.tx_ring_size = tx_size;
+ new_prof.rx_ring_size = rx_size;
+ err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true);
+ if (err)
+ goto out;
+
+ if (priv->port_up) {
+ port_up = 1;
+ mlx4_en_stop_port(dev, 1);
+ }
+
+ mlx4_en_safe_replace_resources(priv, tmp);
+
+ if (port_up) {
+ err = mlx4_en_start_port(dev);
+ if (err)
+ en_err(priv, "Failed starting port\n");
+ }
+
+ err = mlx4_en_moderation_update(priv);
+out:
+ kfree(tmp);
+ mutex_unlock(&mdev->state_lock);
+ return err;
+}
+
+static void mlx4_en_get_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ memset(param, 0, sizeof(*param));
+ param->rx_max_pending = MLX4_EN_MAX_RX_SIZE;
+ param->tx_max_pending = MLX4_EN_MAX_TX_SIZE;
+ param->rx_pending = priv->port_up ?
+ priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size;
+ param->tx_pending = priv->tx_ring[TX][0]->size;
+}
+
+static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ return rounddown_pow_of_two(priv->rx_ring_num);
+}
+
+static u32 mlx4_en_get_rxfh_key_size(struct net_device *netdev)
+{
+ return MLX4_EN_RSS_KEY_SIZE;
+}
+
+static int mlx4_en_check_rxfh_func(struct net_device *dev, u8 hfunc)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ /* check if requested function is supported by the device */
+ if (hfunc == ETH_RSS_HASH_TOP) {
+ if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP))
+ return -EINVAL;
+ if (!(dev->features & NETIF_F_RXHASH))
+ en_warn(priv, "Toeplitz hash function should be used in conjunction with RX hashing for optimal performance\n");
+ return 0;
+ } else if (hfunc == ETH_RSS_HASH_XOR) {
+ if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_XOR))
+ return -EINVAL;
+ if (dev->features & NETIF_F_RXHASH)
+ en_warn(priv, "Enabling both XOR Hash function and RX Hashing can limit RPS functionality\n");
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key,
+ u8 *hfunc)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ u32 n = mlx4_en_get_rxfh_indir_size(dev);
+ u32 i, rss_rings;
+ int err = 0;
+
+ rss_rings = priv->prof->rss_rings ?: n;
+ rss_rings = rounddown_pow_of_two(rss_rings);
+
+ for (i = 0; i < n; i++) {
+ if (!ring_index)
+ break;
+ ring_index[i] = i % rss_rings;
+ }
+ if (key)
+ memcpy(key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE);
+ if (hfunc)
+ *hfunc = priv->rss_hash_fn;
+ return err;
+}
+
+static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index,
+ const u8 *key, const u8 hfunc)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ u32 n = mlx4_en_get_rxfh_indir_size(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int port_up = 0;
+ int err = 0;
+ int i;
+ int rss_rings = 0;
+
+ /* Calculate RSS table size and make sure flows are spread evenly
+ * between rings
+ */
+ for (i = 0; i < n; i++) {
+ if (!ring_index)
+ break;
+ if (i > 0 && !ring_index[i] && !rss_rings)
+ rss_rings = i;
+
+ if (ring_index[i] != (i % (rss_rings ?: n)))
+ return -EINVAL;
+ }
+
+ if (!rss_rings)
+ rss_rings = n;
+
+ /* RSS table size must be an order of 2 */
+ if (!is_power_of_2(rss_rings))
+ return -EINVAL;
+
+ if (hfunc != ETH_RSS_HASH_NO_CHANGE) {
+ err = mlx4_en_check_rxfh_func(dev, hfunc);
+ if (err)
+ return err;
+ }
+
+ mutex_lock(&mdev->state_lock);
+ if (priv->port_up) {
+ port_up = 1;
+ mlx4_en_stop_port(dev, 1);
+ }
+
+ if (ring_index)
+ priv->prof->rss_rings = rss_rings;
+ if (key)
+ memcpy(priv->rss_key, key, MLX4_EN_RSS_KEY_SIZE);
+ if (hfunc != ETH_RSS_HASH_NO_CHANGE)
+ priv->rss_hash_fn = hfunc;
+
+ if (port_up) {
+ err = mlx4_en_start_port(dev);
+ if (err)
+ en_err(priv, "Failed starting port\n");
+ }
+
+ mutex_unlock(&mdev->state_lock);
+ return err;
+}
+
+#define all_zeros_or_all_ones(field) \
+ ((field) == 0 || (field) == (__force typeof(field))-1)
+
+static int mlx4_en_validate_flow(struct net_device *dev,
+ struct ethtool_rxnfc *cmd)
+{
+ struct ethtool_usrip4_spec *l3_mask;
+ struct ethtool_tcpip4_spec *l4_mask;
+ struct ethhdr *eth_mask;
+
+ if (cmd->fs.location >= MAX_NUM_OF_FS_RULES)
+ return -EINVAL;
+
+ if (cmd->fs.flow_type & FLOW_MAC_EXT) {
+ /* dest mac mask must be ff:ff:ff:ff:ff:ff */
+ if (!is_broadcast_ether_addr(cmd->fs.m_ext.h_dest))
+ return -EINVAL;
+ }
+
+ switch (cmd->fs.flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ if (cmd->fs.m_u.tcp_ip4_spec.tos)
+ return -EINVAL;
+ l4_mask = &cmd->fs.m_u.tcp_ip4_spec;
+ /* don't allow mask which isn't all 0 or 1 */
+ if (!all_zeros_or_all_ones(l4_mask->ip4src) ||
+ !all_zeros_or_all_ones(l4_mask->ip4dst) ||
+ !all_zeros_or_all_ones(l4_mask->psrc) ||
+ !all_zeros_or_all_ones(l4_mask->pdst))
+ return -EINVAL;
+ break;
+ case IP_USER_FLOW:
+ l3_mask = &cmd->fs.m_u.usr_ip4_spec;
+ if (l3_mask->l4_4_bytes || l3_mask->tos || l3_mask->proto ||
+ cmd->fs.h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4 ||
+ (!l3_mask->ip4src && !l3_mask->ip4dst) ||
+ !all_zeros_or_all_ones(l3_mask->ip4src) ||
+ !all_zeros_or_all_ones(l3_mask->ip4dst))
+ return -EINVAL;
+ break;
+ case ETHER_FLOW:
+ eth_mask = &cmd->fs.m_u.ether_spec;
+ /* source mac mask must not be set */
+ if (!is_zero_ether_addr(eth_mask->h_source))
+ return -EINVAL;
+
+ /* dest mac mask must be ff:ff:ff:ff:ff:ff */
+ if (!is_broadcast_ether_addr(eth_mask->h_dest))
+ return -EINVAL;
+
+ if (!all_zeros_or_all_ones(eth_mask->h_proto))
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if ((cmd->fs.flow_type & FLOW_EXT)) {
+ if (cmd->fs.m_ext.vlan_etype ||
+ !((cmd->fs.m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK)) ==
+ 0 ||
+ (cmd->fs.m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK)) ==
+ cpu_to_be16(VLAN_VID_MASK)))
+ return -EINVAL;
+
+ if (cmd->fs.m_ext.vlan_tci) {
+ if (be16_to_cpu(cmd->fs.h_ext.vlan_tci) >= VLAN_N_VID)
+ return -EINVAL;
+
+ }
+ }
+
+ return 0;
+}
+
+static int mlx4_en_ethtool_add_mac_rule(struct ethtool_rxnfc *cmd,
+ struct list_head *rule_list_h,
+ struct mlx4_spec_list *spec_l2,
+ unsigned char *mac)
+{
+ int err = 0;
+ __be64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+ spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH;
+ memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN);
+ memcpy(spec_l2->eth.dst_mac, mac, ETH_ALEN);
+
+ if ((cmd->fs.flow_type & FLOW_EXT) &&
+ (cmd->fs.m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK))) {
+ spec_l2->eth.vlan_id = cmd->fs.h_ext.vlan_tci;
+ spec_l2->eth.vlan_id_msk = cpu_to_be16(VLAN_VID_MASK);
+ }
+
+ list_add_tail(&spec_l2->list, rule_list_h);
+
+ return err;
+}
+
+static int mlx4_en_ethtool_add_mac_rule_by_ipv4(struct mlx4_en_priv *priv,
+ struct ethtool_rxnfc *cmd,
+ struct list_head *rule_list_h,
+ struct mlx4_spec_list *spec_l2,
+ __be32 ipv4_dst)
+{
+#ifdef CONFIG_INET
+ unsigned char mac[ETH_ALEN];
+
+ if (!ipv4_is_multicast(ipv4_dst)) {
+ if (cmd->fs.flow_type & FLOW_MAC_EXT)
+ memcpy(&mac, cmd->fs.h_ext.h_dest, ETH_ALEN);
+ else
+ memcpy(&mac, priv->dev->dev_addr, ETH_ALEN);
+ } else {
+ ip_eth_mc_map(ipv4_dst, mac);
+ }
+
+ return mlx4_en_ethtool_add_mac_rule(cmd, rule_list_h, spec_l2, &mac[0]);
+#else
+ return -EINVAL;
+#endif
+}
+
+static int add_ip_rule(struct mlx4_en_priv *priv,
+ struct ethtool_rxnfc *cmd,
+ struct list_head *list_h)
+{
+ int err;
+ struct mlx4_spec_list *spec_l2 = NULL;
+ struct mlx4_spec_list *spec_l3 = NULL;
+ struct ethtool_usrip4_spec *l3_mask = &cmd->fs.m_u.usr_ip4_spec;
+
+ spec_l3 = kzalloc(sizeof(*spec_l3), GFP_KERNEL);
+ spec_l2 = kzalloc(sizeof(*spec_l2), GFP_KERNEL);
+ if (!spec_l2 || !spec_l3) {
+ err = -ENOMEM;
+ goto free_spec;
+ }
+
+ err = mlx4_en_ethtool_add_mac_rule_by_ipv4(priv, cmd, list_h, spec_l2,
+ cmd->fs.h_u.
+ usr_ip4_spec.ip4dst);
+ if (err)
+ goto free_spec;
+ spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4;
+ spec_l3->ipv4.src_ip = cmd->fs.h_u.usr_ip4_spec.ip4src;
+ if (l3_mask->ip4src)
+ spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK;
+ spec_l3->ipv4.dst_ip = cmd->fs.h_u.usr_ip4_spec.ip4dst;
+ if (l3_mask->ip4dst)
+ spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK;
+ list_add_tail(&spec_l3->list, list_h);
+
+ return 0;
+
+free_spec:
+ kfree(spec_l2);
+ kfree(spec_l3);
+ return err;
+}
+
+static int add_tcp_udp_rule(struct mlx4_en_priv *priv,
+ struct ethtool_rxnfc *cmd,
+ struct list_head *list_h, int proto)
+{
+ int err;
+ struct mlx4_spec_list *spec_l2 = NULL;
+ struct mlx4_spec_list *spec_l3 = NULL;
+ struct mlx4_spec_list *spec_l4 = NULL;
+ struct ethtool_tcpip4_spec *l4_mask = &cmd->fs.m_u.tcp_ip4_spec;
+
+ spec_l2 = kzalloc(sizeof(*spec_l2), GFP_KERNEL);
+ spec_l3 = kzalloc(sizeof(*spec_l3), GFP_KERNEL);
+ spec_l4 = kzalloc(sizeof(*spec_l4), GFP_KERNEL);
+ if (!spec_l2 || !spec_l3 || !spec_l4) {
+ err = -ENOMEM;
+ goto free_spec;
+ }
+
+ spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4;
+
+ if (proto == TCP_V4_FLOW) {
+ err = mlx4_en_ethtool_add_mac_rule_by_ipv4(priv, cmd, list_h,
+ spec_l2,
+ cmd->fs.h_u.
+ tcp_ip4_spec.ip4dst);
+ if (err)
+ goto free_spec;
+ spec_l4->id = MLX4_NET_TRANS_RULE_ID_TCP;
+ spec_l3->ipv4.src_ip = cmd->fs.h_u.tcp_ip4_spec.ip4src;
+ spec_l3->ipv4.dst_ip = cmd->fs.h_u.tcp_ip4_spec.ip4dst;
+ spec_l4->tcp_udp.src_port = cmd->fs.h_u.tcp_ip4_spec.psrc;
+ spec_l4->tcp_udp.dst_port = cmd->fs.h_u.tcp_ip4_spec.pdst;
+ } else {
+ err = mlx4_en_ethtool_add_mac_rule_by_ipv4(priv, cmd, list_h,
+ spec_l2,
+ cmd->fs.h_u.
+ udp_ip4_spec.ip4dst);
+ if (err)
+ goto free_spec;
+ spec_l4->id = MLX4_NET_TRANS_RULE_ID_UDP;
+ spec_l3->ipv4.src_ip = cmd->fs.h_u.udp_ip4_spec.ip4src;
+ spec_l3->ipv4.dst_ip = cmd->fs.h_u.udp_ip4_spec.ip4dst;
+ spec_l4->tcp_udp.src_port = cmd->fs.h_u.udp_ip4_spec.psrc;
+ spec_l4->tcp_udp.dst_port = cmd->fs.h_u.udp_ip4_spec.pdst;
+ }
+
+ if (l4_mask->ip4src)
+ spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK;
+ if (l4_mask->ip4dst)
+ spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK;
+
+ if (l4_mask->psrc)
+ spec_l4->tcp_udp.src_port_msk = EN_ETHTOOL_SHORT_MASK;
+ if (l4_mask->pdst)
+ spec_l4->tcp_udp.dst_port_msk = EN_ETHTOOL_SHORT_MASK;
+
+ list_add_tail(&spec_l3->list, list_h);
+ list_add_tail(&spec_l4->list, list_h);
+
+ return 0;
+
+free_spec:
+ kfree(spec_l2);
+ kfree(spec_l3);
+ kfree(spec_l4);
+ return err;
+}
+
+static int mlx4_en_ethtool_to_net_trans_rule(struct net_device *dev,
+ struct ethtool_rxnfc *cmd,
+ struct list_head *rule_list_h)
+{
+ int err;
+ struct ethhdr *eth_spec;
+ struct mlx4_spec_list *spec_l2;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ err = mlx4_en_validate_flow(dev, cmd);
+ if (err)
+ return err;
+
+ switch (cmd->fs.flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+ case ETHER_FLOW:
+ spec_l2 = kzalloc(sizeof(*spec_l2), GFP_KERNEL);
+ if (!spec_l2)
+ return -ENOMEM;
+
+ eth_spec = &cmd->fs.h_u.ether_spec;
+ mlx4_en_ethtool_add_mac_rule(cmd, rule_list_h, spec_l2,
+ &eth_spec->h_dest[0]);
+ spec_l2->eth.ether_type = eth_spec->h_proto;
+ if (eth_spec->h_proto)
+ spec_l2->eth.ether_type_enable = 1;
+ break;
+ case IP_USER_FLOW:
+ err = add_ip_rule(priv, cmd, rule_list_h);
+ break;
+ case TCP_V4_FLOW:
+ err = add_tcp_udp_rule(priv, cmd, rule_list_h, TCP_V4_FLOW);
+ break;
+ case UDP_V4_FLOW:
+ err = add_tcp_udp_rule(priv, cmd, rule_list_h, UDP_V4_FLOW);
+ break;
+ }
+
+ return err;
+}
+
+static int mlx4_en_flow_replace(struct net_device *dev,
+ struct ethtool_rxnfc *cmd)
+{
+ int err;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct ethtool_flow_id *loc_rule;
+ struct mlx4_spec_list *spec, *tmp_spec;
+ u32 qpn;
+ u64 reg_id;
+
+ struct mlx4_net_trans_rule rule = {
+ .queue_mode = MLX4_NET_TRANS_Q_FIFO,
+ .exclusive = 0,
+ .allow_loopback = 1,
+ .promisc_mode = MLX4_FS_REGULAR,
+ };
+
+ rule.port = priv->port;
+ rule.priority = MLX4_DOMAIN_ETHTOOL | cmd->fs.location;
+ INIT_LIST_HEAD(&rule.list);
+
+ /* Allow direct QP attaches if the EN_ETHTOOL_QP_ATTACH flag is set */
+ if (cmd->fs.ring_cookie == RX_CLS_FLOW_DISC)
+ qpn = priv->drop_qp.qpn;
+ else if (cmd->fs.ring_cookie & EN_ETHTOOL_QP_ATTACH) {
+ qpn = cmd->fs.ring_cookie & (EN_ETHTOOL_QP_ATTACH - 1);
+ } else {
+ if (cmd->fs.ring_cookie >= priv->rx_ring_num) {
+ en_warn(priv, "rxnfc: RX ring (%llu) doesn't exist\n",
+ cmd->fs.ring_cookie);
+ return -EINVAL;
+ }
+ qpn = priv->rss_map.qps[cmd->fs.ring_cookie].qpn;
+ if (!qpn) {
+ en_warn(priv, "rxnfc: RX ring (%llu) is inactive\n",
+ cmd->fs.ring_cookie);
+ return -EINVAL;
+ }
+ }
+ rule.qpn = qpn;
+ err = mlx4_en_ethtool_to_net_trans_rule(dev, cmd, &rule.list);
+ if (err)
+ goto out_free_list;
+
+ loc_rule = &priv->ethtool_rules[cmd->fs.location];
+ if (loc_rule->id) {
+ err = mlx4_flow_detach(priv->mdev->dev, loc_rule->id);
+ if (err) {
+ en_err(priv, "Fail to detach network rule at location %d. registration id = %llx\n",
+ cmd->fs.location, loc_rule->id);
+ goto out_free_list;
+ }
+ loc_rule->id = 0;
+ memset(&loc_rule->flow_spec, 0,
+ sizeof(struct ethtool_rx_flow_spec));
+ list_del(&loc_rule->list);
+ }
+ err = mlx4_flow_attach(priv->mdev->dev, &rule, &reg_id);
+ if (err) {
+ en_err(priv, "Fail to attach network rule at location %d\n",
+ cmd->fs.location);
+ goto out_free_list;
+ }
+ loc_rule->id = reg_id;
+ memcpy(&loc_rule->flow_spec, &cmd->fs,
+ sizeof(struct ethtool_rx_flow_spec));
+ list_add_tail(&loc_rule->list, &priv->ethtool_list);
+
+out_free_list:
+ list_for_each_entry_safe(spec, tmp_spec, &rule.list, list) {
+ list_del(&spec->list);
+ kfree(spec);
+ }
+ return err;
+}
+
+static int mlx4_en_flow_detach(struct net_device *dev,
+ struct ethtool_rxnfc *cmd)
+{
+ int err = 0;
+ struct ethtool_flow_id *rule;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ if (cmd->fs.location >= MAX_NUM_OF_FS_RULES)
+ return -EINVAL;
+
+ rule = &priv->ethtool_rules[cmd->fs.location];
+ if (!rule->id) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ err = mlx4_flow_detach(priv->mdev->dev, rule->id);
+ if (err) {
+ en_err(priv, "Fail to detach network rule at location %d. registration id = 0x%llx\n",
+ cmd->fs.location, rule->id);
+ goto out;
+ }
+ rule->id = 0;
+ memset(&rule->flow_spec, 0, sizeof(struct ethtool_rx_flow_spec));
+ list_del(&rule->list);
+out:
+ return err;
+
+}
+
+static int mlx4_en_get_flow(struct net_device *dev, struct ethtool_rxnfc *cmd,
+ int loc)
+{
+ int err = 0;
+ struct ethtool_flow_id *rule;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ if (loc < 0 || loc >= MAX_NUM_OF_FS_RULES)
+ return -EINVAL;
+
+ rule = &priv->ethtool_rules[loc];
+ if (rule->id)
+ memcpy(&cmd->fs, &rule->flow_spec,
+ sizeof(struct ethtool_rx_flow_spec));
+ else
+ err = -ENOENT;
+
+ return err;
+}
+
+static int mlx4_en_get_num_flows(struct mlx4_en_priv *priv)
+{
+
+ int i, res = 0;
+ for (i = 0; i < MAX_NUM_OF_FS_RULES; i++) {
+ if (priv->ethtool_rules[i].id)
+ res++;
+ }
+ return res;
+
+}
+
+static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+ u32 *rule_locs)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int err = 0;
+ int i = 0, priority = 0;
+
+ if ((cmd->cmd == ETHTOOL_GRXCLSRLCNT ||
+ cmd->cmd == ETHTOOL_GRXCLSRULE ||
+ cmd->cmd == ETHTOOL_GRXCLSRLALL) &&
+ (mdev->dev->caps.steering_mode !=
+ MLX4_STEERING_MODE_DEVICE_MANAGED || !priv->port_up))
+ return -EINVAL;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_GRXRINGS:
+ cmd->data = priv->rx_ring_num;
+ break;
+ case ETHTOOL_GRXCLSRLCNT:
+ cmd->rule_cnt = mlx4_en_get_num_flows(priv);
+ break;
+ case ETHTOOL_GRXCLSRULE:
+ err = mlx4_en_get_flow(dev, cmd, cmd->fs.location);
+ break;
+ case ETHTOOL_GRXCLSRLALL:
+ cmd->data = MAX_NUM_OF_FS_RULES;
+ while ((!err || err == -ENOENT) && priority < cmd->rule_cnt) {
+ err = mlx4_en_get_flow(dev, cmd, i);
+ if (!err)
+ rule_locs[priority++] = i;
+ i++;
+ }
+ err = 0;
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ return err;
+}
+
+static int mlx4_en_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+ int err = 0;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+
+ if (mdev->dev->caps.steering_mode !=
+ MLX4_STEERING_MODE_DEVICE_MANAGED || !priv->port_up)
+ return -EINVAL;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_SRXCLSRLINS:
+ err = mlx4_en_flow_replace(dev, cmd);
+ break;
+ case ETHTOOL_SRXCLSRLDEL:
+ err = mlx4_en_flow_detach(dev, cmd);
+ break;
+ default:
+ en_warn(priv, "Unsupported ethtool command. (%d)\n", cmd->cmd);
+ return -EINVAL;
+ }
+
+ return err;
+}
+
+static int mlx4_en_get_max_num_rx_rings(struct net_device *dev)
+{
+ return min_t(int, num_online_cpus(), MAX_RX_RINGS);
+}
+
+static void mlx4_en_get_channels(struct net_device *dev,
+ struct ethtool_channels *channel)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ channel->max_rx = mlx4_en_get_max_num_rx_rings(dev);
+ channel->max_tx = priv->mdev->profile.max_num_tx_rings_p_up;
+
+ channel->rx_count = priv->rx_ring_num;
+ channel->tx_count = priv->tx_ring_num[TX] /
+ priv->prof->num_up;
+}
+
+static int mlx4_en_set_channels(struct net_device *dev,
+ struct ethtool_channels *channel)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_port_profile new_prof;
+ struct mlx4_en_priv *tmp;
+ int total_tx_count;
+ int port_up = 0;
+ int xdp_count;
+ int err = 0;
+ u8 up;
+
+ if (!channel->tx_count || !channel->rx_count)
+ return -EINVAL;
+
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ mutex_lock(&mdev->state_lock);
+ xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0;
+ total_tx_count = channel->tx_count * priv->prof->num_up + xdp_count;
+ if (total_tx_count > MAX_TX_RINGS) {
+ err = -EINVAL;
+ en_err(priv,
+ "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n",
+ total_tx_count, MAX_TX_RINGS);
+ goto out;
+ }
+
+ memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+ new_prof.num_tx_rings_p_up = channel->tx_count;
+ new_prof.tx_ring_num[TX] = channel->tx_count * priv->prof->num_up;
+ new_prof.tx_ring_num[TX_XDP] = xdp_count;
+ new_prof.rx_ring_num = channel->rx_count;
+
+ err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true);
+ if (err)
+ goto out;
+
+ if (priv->port_up) {
+ port_up = 1;
+ mlx4_en_stop_port(dev, 1);
+ }
+
+ mlx4_en_safe_replace_resources(priv, tmp);
+
+ netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
+
+ up = (priv->prof->num_up == MLX4_EN_NUM_UP_LOW) ?
+ 0 : priv->prof->num_up;
+ mlx4_en_setup_tc(dev, up);
+
+ en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num[TX]);
+ en_warn(priv, "Using %d RX rings\n", priv->rx_ring_num);
+
+ if (port_up) {
+ err = mlx4_en_start_port(dev);
+ if (err)
+ en_err(priv, "Failed starting port\n");
+ }
+
+ err = mlx4_en_moderation_update(priv);
+out:
+ mutex_unlock(&mdev->state_lock);
+ kfree(tmp);
+ return err;
+}
+
+static int mlx4_en_get_ts_info(struct net_device *dev,
+ struct ethtool_ts_info *info)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int ret;
+
+ ret = ethtool_op_get_ts_info(dev, info);
+ if (ret)
+ return ret;
+
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
+ info->so_timestamping |=
+ SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ info->tx_types =
+ (1 << HWTSTAMP_TX_OFF) |
+ (1 << HWTSTAMP_TX_ON);
+
+ info->rx_filters =
+ (1 << HWTSTAMP_FILTER_NONE) |
+ (1 << HWTSTAMP_FILTER_ALL);
+
+ if (mdev->ptp_clock)
+ info->phc_index = ptp_clock_index(mdev->ptp_clock);
+ }
+
+ return ret;
+}
+
+static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ bool bf_enabled_new = !!(flags & MLX4_EN_PRIV_FLAGS_BLUEFLAME);
+ bool bf_enabled_old = !!(priv->pflags & MLX4_EN_PRIV_FLAGS_BLUEFLAME);
+ bool phv_enabled_new = !!(flags & MLX4_EN_PRIV_FLAGS_PHV);
+ bool phv_enabled_old = !!(priv->pflags & MLX4_EN_PRIV_FLAGS_PHV);
+ int i;
+ int ret = 0;
+
+ if (bf_enabled_new != bf_enabled_old) {
+ int t;
+
+ if (bf_enabled_new) {
+ bool bf_supported = true;
+
+ for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++)
+ for (i = 0; i < priv->tx_ring_num[t]; i++)
+ bf_supported &=
+ priv->tx_ring[t][i]->bf_alloced;
+
+ if (!bf_supported) {
+ en_err(priv, "BlueFlame is not supported\n");
+ return -EINVAL;
+ }
+
+ priv->pflags |= MLX4_EN_PRIV_FLAGS_BLUEFLAME;
+ } else {
+ priv->pflags &= ~MLX4_EN_PRIV_FLAGS_BLUEFLAME;
+ }
+
+ for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++)
+ for (i = 0; i < priv->tx_ring_num[t]; i++)
+ priv->tx_ring[t][i]->bf_enabled =
+ bf_enabled_new;
+
+ en_info(priv, "BlueFlame %s\n",
+ bf_enabled_new ? "Enabled" : "Disabled");
+ }
+
+ if (phv_enabled_new != phv_enabled_old) {
+ ret = set_phv_bit(mdev->dev, priv->port, (int)phv_enabled_new);
+ if (ret)
+ return ret;
+ else if (phv_enabled_new)
+ priv->pflags |= MLX4_EN_PRIV_FLAGS_PHV;
+ else
+ priv->pflags &= ~MLX4_EN_PRIV_FLAGS_PHV;
+ en_info(priv, "PHV bit %s\n",
+ phv_enabled_new ? "Enabled" : "Disabled");
+ }
+ return 0;
+}
+
+static u32 mlx4_en_get_priv_flags(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ return priv->pflags;
+}
+
+static int mlx4_en_get_tunable(struct net_device *dev,
+ const struct ethtool_tunable *tuna,
+ void *data)
+{
+ const struct mlx4_en_priv *priv = netdev_priv(dev);
+ int ret = 0;
+
+ switch (tuna->id) {
+ case ETHTOOL_TX_COPYBREAK:
+ *(u32 *)data = priv->prof->inline_thold;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int mlx4_en_set_tunable(struct net_device *dev,
+ const struct ethtool_tunable *tuna,
+ const void *data)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ int val, ret = 0;
+
+ switch (tuna->id) {
+ case ETHTOOL_TX_COPYBREAK:
+ val = *(u32 *)data;
+ if (val < MIN_PKT_LEN || val > MAX_INLINE)
+ ret = -EINVAL;
+ else
+ priv->prof->inline_thold = val;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int mlx4_en_get_module_info(struct net_device *dev,
+ struct ethtool_modinfo *modinfo)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int ret;
+ u8 data[4];
+
+ /* Read first 2 bytes to get Module & REV ID */
+ ret = mlx4_get_module_info(mdev->dev, priv->port,
+ 0/*offset*/, 2/*size*/, data);
+ if (ret < 2)
+ return -EIO;
+
+ switch (data[0] /* identifier */) {
+ case MLX4_MODULE_ID_QSFP:
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+ break;
+ case MLX4_MODULE_ID_QSFP_PLUS:
+ if (data[1] >= 0x3) { /* revision id */
+ modinfo->type = ETH_MODULE_SFF_8636;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+ }
+ break;
+ case MLX4_MODULE_ID_QSFP28:
+ modinfo->type = ETH_MODULE_SFF_8636;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+ break;
+ case MLX4_MODULE_ID_SFP:
+ modinfo->type = ETH_MODULE_SFF_8472;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx4_en_get_module_eeprom(struct net_device *dev,
+ struct ethtool_eeprom *ee,
+ u8 *data)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int offset = ee->offset;
+ int i = 0, ret;
+
+ if (ee->len == 0)
+ return -EINVAL;
+
+ memset(data, 0, ee->len);
+
+ while (i < ee->len) {
+ en_dbg(DRV, priv,
+ "mlx4_get_module_info i(%d) offset(%d) len(%d)\n",
+ i, offset, ee->len - i);
+
+ ret = mlx4_get_module_info(mdev->dev, priv->port,
+ offset, ee->len - i, data + i);
+
+ if (!ret) /* Done reading */
+ return 0;
+
+ if (ret < 0) {
+ en_err(priv,
+ "mlx4_get_module_info i(%d) offset(%d) bytes_to_read(%d) - FAILED (0x%x)\n",
+ i, offset, ee->len - i, ret);
+ return ret;
+ }
+
+ i += ret;
+ offset += ret;
+ }
+ return 0;
+}
+
+static int mlx4_en_set_phys_id(struct net_device *dev,
+ enum ethtool_phys_id_state state)
+{
+ int err;
+ u16 beacon_duration;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+
+ if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_BEACON))
+ return -EOPNOTSUPP;
+
+ switch (state) {
+ case ETHTOOL_ID_ACTIVE:
+ beacon_duration = PORT_BEACON_MAX_LIMIT;
+ break;
+ case ETHTOOL_ID_INACTIVE:
+ beacon_duration = 0;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ err = mlx4_SET_PORT_BEACON(mdev->dev, priv->port, beacon_duration);
+ return err;
+}
+
+const struct ethtool_ops mlx4_en_ethtool_ops = {
+ .get_drvinfo = mlx4_en_get_drvinfo,
+ .get_link_ksettings = mlx4_en_get_link_ksettings,
+ .set_link_ksettings = mlx4_en_set_link_ksettings,
+ .get_link = ethtool_op_get_link,
+ .get_strings = mlx4_en_get_strings,
+ .get_sset_count = mlx4_en_get_sset_count,
+ .get_ethtool_stats = mlx4_en_get_ethtool_stats,
+ .self_test = mlx4_en_self_test,
+ .set_phys_id = mlx4_en_set_phys_id,
+ .get_wol = mlx4_en_get_wol,
+ .set_wol = mlx4_en_set_wol,
+ .get_msglevel = mlx4_en_get_msglevel,
+ .set_msglevel = mlx4_en_set_msglevel,
+ .get_coalesce = mlx4_en_get_coalesce,
+ .set_coalesce = mlx4_en_set_coalesce,
+ .get_pauseparam = mlx4_en_get_pauseparam,
+ .set_pauseparam = mlx4_en_set_pauseparam,
+ .get_ringparam = mlx4_en_get_ringparam,
+ .set_ringparam = mlx4_en_set_ringparam,
+ .get_rxnfc = mlx4_en_get_rxnfc,
+ .set_rxnfc = mlx4_en_set_rxnfc,
+ .get_rxfh_indir_size = mlx4_en_get_rxfh_indir_size,
+ .get_rxfh_key_size = mlx4_en_get_rxfh_key_size,
+ .get_rxfh = mlx4_en_get_rxfh,
+ .set_rxfh = mlx4_en_set_rxfh,
+ .get_channels = mlx4_en_get_channels,
+ .set_channels = mlx4_en_set_channels,
+ .get_ts_info = mlx4_en_get_ts_info,
+ .set_priv_flags = mlx4_en_set_priv_flags,
+ .get_priv_flags = mlx4_en_get_priv_flags,
+ .get_tunable = mlx4_en_get_tunable,
+ .set_tunable = mlx4_en_set_tunable,
+ .get_module_info = mlx4_en_get_module_info,
+ .get_module_eeprom = mlx4_en_get_module_eeprom
+};
+
+
+
+
+
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
new file mode 100644
index 000000000..d25e16d2c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+
+#include <linux/mlx4/driver.h>
+#include <linux/mlx4/device.h>
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4_en.h"
+
+MODULE_AUTHOR("Liran Liss, Yevgeny Petrilin");
+MODULE_DESCRIPTION("Mellanox ConnectX HCA Ethernet driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static const char mlx4_en_version[] =
+ DRV_NAME ": Mellanox ConnectX HCA Ethernet driver v"
+ DRV_VERSION "\n";
+
+#define MLX4_EN_PARM_INT(X, def_val, desc) \
+ static unsigned int X = def_val;\
+ module_param(X , uint, 0444); \
+ MODULE_PARM_DESC(X, desc);
+
+
+/*
+ * Device scope module parameters
+ */
+
+/* Enable RSS UDP traffic */
+MLX4_EN_PARM_INT(udp_rss, 1,
+ "Enable RSS for incoming UDP traffic or disabled (0)");
+
+/* Priority pausing */
+MLX4_EN_PARM_INT(pfctx, 0, "Priority based Flow Control policy on TX[7:0]."
+ " Per priority bit mask");
+MLX4_EN_PARM_INT(pfcrx, 0, "Priority based Flow Control policy on RX[7:0]."
+ " Per priority bit mask");
+
+MLX4_EN_PARM_INT(inline_thold, MAX_INLINE,
+ "Threshold for using inline data (range: 17-104, default: 104)");
+
+#define MAX_PFC_TX 0xff
+#define MAX_PFC_RX 0xff
+
+void en_print(const char *level, const struct mlx4_en_priv *priv,
+ const char *format, ...)
+{
+ va_list args;
+ struct va_format vaf;
+
+ va_start(args, format);
+
+ vaf.fmt = format;
+ vaf.va = &args;
+ if (priv->registered)
+ printk("%s%s: %s: %pV",
+ level, DRV_NAME, priv->dev->name, &vaf);
+ else
+ printk("%s%s: %s: Port %d: %pV",
+ level, DRV_NAME, dev_name(&priv->mdev->pdev->dev),
+ priv->port, &vaf);
+ va_end(args);
+}
+
+void mlx4_en_update_loopback_state(struct net_device *dev,
+ netdev_features_t features)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ if (features & NETIF_F_LOOPBACK)
+ priv->ctrl_flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
+ else
+ priv->ctrl_flags &= cpu_to_be32(~MLX4_WQE_CTRL_FORCE_LOOPBACK);
+
+ priv->flags &= ~(MLX4_EN_FLAG_RX_FILTER_NEEDED|
+ MLX4_EN_FLAG_ENABLE_HW_LOOPBACK);
+
+ /* Drop the packet if SRIOV is not enabled
+ * and not performing the selftest or flb disabled
+ */
+ if (mlx4_is_mfunc(priv->mdev->dev) &&
+ !(features & NETIF_F_LOOPBACK) && !priv->validate_loopback)
+ priv->flags |= MLX4_EN_FLAG_RX_FILTER_NEEDED;
+
+ /* Set dmac in Tx WQE if we are in SRIOV mode or if loopback selftest
+ * is requested
+ */
+ if (mlx4_is_mfunc(priv->mdev->dev) || priv->validate_loopback)
+ priv->flags |= MLX4_EN_FLAG_ENABLE_HW_LOOPBACK;
+
+ mutex_lock(&priv->mdev->state_lock);
+ if ((priv->mdev->dev->caps.flags2 &
+ MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB) &&
+ priv->rss_map.indir_qp && priv->rss_map.indir_qp->qpn) {
+ int i;
+ int err = 0;
+ int loopback = !!(features & NETIF_F_LOOPBACK);
+
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ int ret;
+
+ ret = mlx4_en_change_mcast_lb(priv,
+ &priv->rss_map.qps[i],
+ loopback);
+ if (!err)
+ err = ret;
+ }
+ if (err)
+ mlx4_warn(priv->mdev, "failed to change mcast loopback\n");
+ }
+ mutex_unlock(&priv->mdev->state_lock);
+}
+
+static void mlx4_en_get_profile(struct mlx4_en_dev *mdev)
+{
+ struct mlx4_en_profile *params = &mdev->profile;
+ int i;
+
+ params->udp_rss = udp_rss;
+ params->max_num_tx_rings_p_up = mlx4_low_memory_profile() ?
+ MLX4_EN_MIN_TX_RING_P_UP :
+ min_t(int, num_online_cpus(), MLX4_EN_MAX_TX_RING_P_UP);
+
+ if (params->udp_rss && !(mdev->dev->caps.flags
+ & MLX4_DEV_CAP_FLAG_UDP_RSS)) {
+ mlx4_warn(mdev, "UDP RSS is not supported on this device\n");
+ params->udp_rss = 0;
+ }
+ for (i = 1; i <= MLX4_MAX_PORTS; i++) {
+ params->prof[i].rx_pause = !(pfcrx || pfctx);
+ params->prof[i].rx_ppp = pfcrx;
+ params->prof[i].tx_pause = !(pfcrx || pfctx);
+ params->prof[i].tx_ppp = pfctx;
+ params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE;
+ params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE;
+ params->prof[i].num_up = MLX4_EN_NUM_UP_LOW;
+ params->prof[i].num_tx_rings_p_up = params->max_num_tx_rings_p_up;
+ params->prof[i].tx_ring_num[TX] = params->max_num_tx_rings_p_up *
+ params->prof[i].num_up;
+ params->prof[i].rss_rings = 0;
+ params->prof[i].inline_thold = inline_thold;
+ }
+}
+
+static void *mlx4_en_get_netdev(struct mlx4_dev *dev, void *ctx, u8 port)
+{
+ struct mlx4_en_dev *endev = ctx;
+
+ return endev->pndev[port];
+}
+
+static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr,
+ enum mlx4_dev_event event, unsigned long port)
+{
+ struct mlx4_en_dev *mdev = (struct mlx4_en_dev *) endev_ptr;
+ struct mlx4_en_priv *priv;
+
+ switch (event) {
+ case MLX4_DEV_EVENT_PORT_UP:
+ case MLX4_DEV_EVENT_PORT_DOWN:
+ if (!mdev->pndev[port])
+ return;
+ priv = netdev_priv(mdev->pndev[port]);
+ /* To prevent races, we poll the link state in a separate
+ task rather than changing it here */
+ priv->link_state = event;
+ queue_work(mdev->workqueue, &priv->linkstate_task);
+ break;
+
+ case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
+ mlx4_err(mdev, "Internal error detected, restarting device\n");
+ break;
+
+ case MLX4_DEV_EVENT_SLAVE_INIT:
+ case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
+ break;
+ default:
+ if (port < 1 || port > dev->caps.num_ports ||
+ !mdev->pndev[port])
+ return;
+ mlx4_warn(mdev, "Unhandled event %d for port %d\n", event,
+ (int) port);
+ }
+}
+
+static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr)
+{
+ struct mlx4_en_dev *mdev = endev_ptr;
+ int i;
+
+ mutex_lock(&mdev->state_lock);
+ mdev->device_up = false;
+ mutex_unlock(&mdev->state_lock);
+
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
+ if (mdev->pndev[i])
+ mlx4_en_destroy_netdev(mdev->pndev[i]);
+
+ flush_workqueue(mdev->workqueue);
+ destroy_workqueue(mdev->workqueue);
+ (void) mlx4_mr_free(dev, &mdev->mr);
+ iounmap(mdev->uar_map);
+ mlx4_uar_free(dev, &mdev->priv_uar);
+ mlx4_pd_free(dev, mdev->priv_pdn);
+ if (mdev->nb.notifier_call)
+ unregister_netdevice_notifier(&mdev->nb);
+ kfree(mdev);
+}
+
+static void mlx4_en_activate(struct mlx4_dev *dev, void *ctx)
+{
+ int i;
+ struct mlx4_en_dev *mdev = ctx;
+
+ /* Create a netdev for each port */
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) {
+ mlx4_info(mdev, "Activating port:%d\n", i);
+ if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i]))
+ mdev->pndev[i] = NULL;
+ }
+
+ /* register notifier */
+ mdev->nb.notifier_call = mlx4_en_netdev_event;
+ if (register_netdevice_notifier(&mdev->nb)) {
+ mdev->nb.notifier_call = NULL;
+ mlx4_err(mdev, "Failed to create notifier\n");
+ }
+}
+
+static void *mlx4_en_add(struct mlx4_dev *dev)
+{
+ struct mlx4_en_dev *mdev;
+ int i;
+
+ printk_once(KERN_INFO "%s", mlx4_en_version);
+
+ mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
+ if (!mdev)
+ goto err_free_res;
+
+ if (mlx4_pd_alloc(dev, &mdev->priv_pdn))
+ goto err_free_dev;
+
+ if (mlx4_uar_alloc(dev, &mdev->priv_uar))
+ goto err_pd;
+
+ mdev->uar_map = ioremap((phys_addr_t) mdev->priv_uar.pfn << PAGE_SHIFT,
+ PAGE_SIZE);
+ if (!mdev->uar_map)
+ goto err_uar;
+ spin_lock_init(&mdev->uar_lock);
+
+ mdev->dev = dev;
+ mdev->dma_device = &dev->persist->pdev->dev;
+ mdev->pdev = dev->persist->pdev;
+ mdev->device_up = false;
+
+ mdev->LSO_support = !!(dev->caps.flags & (1 << 15));
+ if (!mdev->LSO_support)
+ mlx4_warn(mdev, "LSO not supported, please upgrade to later FW version to enable LSO\n");
+
+ if (mlx4_mr_alloc(mdev->dev, mdev->priv_pdn, 0, ~0ull,
+ MLX4_PERM_LOCAL_WRITE | MLX4_PERM_LOCAL_READ,
+ 0, 0, &mdev->mr)) {
+ mlx4_err(mdev, "Failed allocating memory region\n");
+ goto err_map;
+ }
+ if (mlx4_mr_enable(mdev->dev, &mdev->mr)) {
+ mlx4_err(mdev, "Failed enabling memory region\n");
+ goto err_mr;
+ }
+
+ /* Build device profile according to supplied module parameters */
+ mlx4_en_get_profile(mdev);
+
+ /* Configure which ports to start according to module parameters */
+ mdev->port_cnt = 0;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
+ mdev->port_cnt++;
+
+ /* Set default number of RX rings*/
+ mlx4_en_set_num_rx_rings(mdev);
+
+ /* Create our own workqueue for reset/multicast tasks
+ * Note: we cannot use the shared workqueue because of deadlocks caused
+ * by the rtnl lock */
+ mdev->workqueue = create_singlethread_workqueue("mlx4_en");
+ if (!mdev->workqueue)
+ goto err_mr;
+
+ /* At this stage all non-port specific tasks are complete:
+ * mark the card state as up */
+ mutex_init(&mdev->state_lock);
+ mdev->device_up = true;
+
+ return mdev;
+
+err_mr:
+ (void) mlx4_mr_free(dev, &mdev->mr);
+err_map:
+ if (mdev->uar_map)
+ iounmap(mdev->uar_map);
+err_uar:
+ mlx4_uar_free(dev, &mdev->priv_uar);
+err_pd:
+ mlx4_pd_free(dev, mdev->priv_pdn);
+err_free_dev:
+ kfree(mdev);
+err_free_res:
+ return NULL;
+}
+
+static struct mlx4_interface mlx4_en_interface = {
+ .add = mlx4_en_add,
+ .remove = mlx4_en_remove,
+ .event = mlx4_en_event,
+ .get_dev = mlx4_en_get_netdev,
+ .protocol = MLX4_PROT_ETH,
+ .activate = mlx4_en_activate,
+};
+
+static void mlx4_en_verify_params(void)
+{
+ if (pfctx > MAX_PFC_TX) {
+ pr_warn("mlx4_en: WARNING: illegal module parameter pfctx 0x%x - should be in range 0-0x%x, will be changed to default (0)\n",
+ pfctx, MAX_PFC_TX);
+ pfctx = 0;
+ }
+
+ if (pfcrx > MAX_PFC_RX) {
+ pr_warn("mlx4_en: WARNING: illegal module parameter pfcrx 0x%x - should be in range 0-0x%x, will be changed to default (0)\n",
+ pfcrx, MAX_PFC_RX);
+ pfcrx = 0;
+ }
+
+ if (inline_thold < MIN_PKT_LEN || inline_thold > MAX_INLINE) {
+ pr_warn("mlx4_en: WARNING: illegal module parameter inline_thold %d - should be in range %d-%d, will be changed to default (%d)\n",
+ inline_thold, MIN_PKT_LEN, MAX_INLINE, MAX_INLINE);
+ inline_thold = MAX_INLINE;
+ }
+}
+
+static int __init mlx4_en_init(void)
+{
+ mlx4_en_verify_params();
+ mlx4_en_init_ptys2ethtool_map();
+
+ return mlx4_register_interface(&mlx4_en_interface);
+}
+
+static void __exit mlx4_en_cleanup(void)
+{
+ mlx4_unregister_interface(&mlx4_en_interface);
+}
+
+module_init(mlx4_en_init);
+module_exit(mlx4_en_cleanup);
+
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
new file mode 100644
index 000000000..14d46afd2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -0,0 +1,3686 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/bpf.h>
+#include <linux/etherdevice.h>
+#include <linux/tcp.h>
+#include <linux/if_vlan.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/hash.h>
+#include <net/ip.h>
+#include <net/busy_poll.h>
+#include <net/vxlan.h>
+#include <net/devlink.h>
+
+#include <linux/mlx4/driver.h>
+#include <linux/mlx4/device.h>
+#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/cq.h>
+
+#include "mlx4_en.h"
+#include "en_port.h"
+
+#define MLX4_EN_MAX_XDP_MTU ((int)(PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) - \
+ XDP_PACKET_HEADROOM))
+
+int mlx4_en_setup_tc(struct net_device *dev, u8 up)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ int i;
+ unsigned int offset = 0;
+
+ if (up && up != MLX4_EN_NUM_UP_HIGH)
+ return -EINVAL;
+
+ netdev_set_num_tc(dev, up);
+ netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
+ /* Partition Tx queues evenly amongst UP's */
+ for (i = 0; i < up; i++) {
+ netdev_set_tc_queue(dev, i, priv->num_tx_rings_p_up, offset);
+ offset += priv->num_tx_rings_p_up;
+ }
+
+#ifdef CONFIG_MLX4_EN_DCB
+ if (!mlx4_is_slave(priv->mdev->dev)) {
+ if (up) {
+ if (priv->dcbx_cap)
+ priv->flags |= MLX4_EN_FLAG_DCB_ENABLED;
+ } else {
+ priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED;
+ priv->cee_config.pfc_state = false;
+ }
+ }
+#endif /* CONFIG_MLX4_EN_DCB */
+
+ return 0;
+}
+
+int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_port_profile new_prof;
+ struct mlx4_en_priv *tmp;
+ int total_count;
+ int port_up = 0;
+ int err = 0;
+
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ mutex_lock(&mdev->state_lock);
+ memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+ new_prof.num_up = (tc == 0) ? MLX4_EN_NUM_UP_LOW :
+ MLX4_EN_NUM_UP_HIGH;
+ new_prof.tx_ring_num[TX] = new_prof.num_tx_rings_p_up *
+ new_prof.num_up;
+ total_count = new_prof.tx_ring_num[TX] + new_prof.tx_ring_num[TX_XDP];
+ if (total_count > MAX_TX_RINGS) {
+ err = -EINVAL;
+ en_err(priv,
+ "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n",
+ total_count, MAX_TX_RINGS);
+ goto out;
+ }
+ err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true);
+ if (err)
+ goto out;
+
+ if (priv->port_up) {
+ port_up = 1;
+ mlx4_en_stop_port(dev, 1);
+ }
+
+ mlx4_en_safe_replace_resources(priv, tmp);
+ if (port_up) {
+ err = mlx4_en_start_port(dev);
+ if (err) {
+ en_err(priv, "Failed starting port for setup TC\n");
+ goto out;
+ }
+ }
+
+ err = mlx4_en_setup_tc(dev, tc);
+out:
+ mutex_unlock(&mdev->state_lock);
+ kfree(tmp);
+ return err;
+}
+
+static int __mlx4_en_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ struct tc_mqprio_qopt *mqprio = type_data;
+
+ if (type != TC_SETUP_QDISC_MQPRIO)
+ return -EOPNOTSUPP;
+
+ if (mqprio->num_tc && mqprio->num_tc != MLX4_EN_NUM_UP_HIGH)
+ return -EINVAL;
+
+ mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+ return mlx4_en_alloc_tx_queue_per_tc(dev, mqprio->num_tc);
+}
+
+#ifdef CONFIG_RFS_ACCEL
+
+struct mlx4_en_filter {
+ struct list_head next;
+ struct work_struct work;
+
+ u8 ip_proto;
+ __be32 src_ip;
+ __be32 dst_ip;
+ __be16 src_port;
+ __be16 dst_port;
+
+ int rxq_index;
+ struct mlx4_en_priv *priv;
+ u32 flow_id; /* RFS infrastructure id */
+ int id; /* mlx4_en driver id */
+ u64 reg_id; /* Flow steering API id */
+ u8 activated; /* Used to prevent expiry before filter
+ * is attached
+ */
+ struct hlist_node filter_chain;
+};
+
+static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv);
+
+static enum mlx4_net_trans_rule_id mlx4_ip_proto_to_trans_rule_id(u8 ip_proto)
+{
+ switch (ip_proto) {
+ case IPPROTO_UDP:
+ return MLX4_NET_TRANS_RULE_ID_UDP;
+ case IPPROTO_TCP:
+ return MLX4_NET_TRANS_RULE_ID_TCP;
+ default:
+ return MLX4_NET_TRANS_RULE_NUM;
+ }
+};
+
+/* Must not acquire state_lock, as its corresponding work_sync
+ * is done under it.
+ */
+static void mlx4_en_filter_work(struct work_struct *work)
+{
+ struct mlx4_en_filter *filter = container_of(work,
+ struct mlx4_en_filter,
+ work);
+ struct mlx4_en_priv *priv = filter->priv;
+ struct mlx4_spec_list spec_tcp_udp = {
+ .id = mlx4_ip_proto_to_trans_rule_id(filter->ip_proto),
+ {
+ .tcp_udp = {
+ .dst_port = filter->dst_port,
+ .dst_port_msk = (__force __be16)-1,
+ .src_port = filter->src_port,
+ .src_port_msk = (__force __be16)-1,
+ },
+ },
+ };
+ struct mlx4_spec_list spec_ip = {
+ .id = MLX4_NET_TRANS_RULE_ID_IPV4,
+ {
+ .ipv4 = {
+ .dst_ip = filter->dst_ip,
+ .dst_ip_msk = (__force __be32)-1,
+ .src_ip = filter->src_ip,
+ .src_ip_msk = (__force __be32)-1,
+ },
+ },
+ };
+ struct mlx4_spec_list spec_eth = {
+ .id = MLX4_NET_TRANS_RULE_ID_ETH,
+ };
+ struct mlx4_net_trans_rule rule = {
+ .list = LIST_HEAD_INIT(rule.list),
+ .queue_mode = MLX4_NET_TRANS_Q_LIFO,
+ .exclusive = 1,
+ .allow_loopback = 1,
+ .promisc_mode = MLX4_FS_REGULAR,
+ .port = priv->port,
+ .priority = MLX4_DOMAIN_RFS,
+ };
+ int rc;
+ __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+ if (spec_tcp_udp.id >= MLX4_NET_TRANS_RULE_NUM) {
+ en_warn(priv, "RFS: ignoring unsupported ip protocol (%d)\n",
+ filter->ip_proto);
+ goto ignore;
+ }
+ list_add_tail(&spec_eth.list, &rule.list);
+ list_add_tail(&spec_ip.list, &rule.list);
+ list_add_tail(&spec_tcp_udp.list, &rule.list);
+
+ rule.qpn = priv->rss_map.qps[filter->rxq_index].qpn;
+ memcpy(spec_eth.eth.dst_mac, priv->dev->dev_addr, ETH_ALEN);
+ memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN);
+
+ filter->activated = 0;
+
+ if (filter->reg_id) {
+ rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id);
+ if (rc && rc != -ENOENT)
+ en_err(priv, "Error detaching flow. rc = %d\n", rc);
+ }
+
+ rc = mlx4_flow_attach(priv->mdev->dev, &rule, &filter->reg_id);
+ if (rc)
+ en_err(priv, "Error attaching flow. err = %d\n", rc);
+
+ignore:
+ mlx4_en_filter_rfs_expire(priv);
+
+ filter->activated = 1;
+}
+
+static inline struct hlist_head *
+filter_hash_bucket(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip,
+ __be16 src_port, __be16 dst_port)
+{
+ unsigned long l;
+ int bucket_idx;
+
+ l = (__force unsigned long)src_port |
+ ((__force unsigned long)dst_port << 2);
+ l ^= (__force unsigned long)(src_ip ^ dst_ip);
+
+ bucket_idx = hash_long(l, MLX4_EN_FILTER_HASH_SHIFT);
+
+ return &priv->filter_hash[bucket_idx];
+}
+
+static struct mlx4_en_filter *
+mlx4_en_filter_alloc(struct mlx4_en_priv *priv, int rxq_index, __be32 src_ip,
+ __be32 dst_ip, u8 ip_proto, __be16 src_port,
+ __be16 dst_port, u32 flow_id)
+{
+ struct mlx4_en_filter *filter = NULL;
+
+ filter = kzalloc(sizeof(struct mlx4_en_filter), GFP_ATOMIC);
+ if (!filter)
+ return NULL;
+
+ filter->priv = priv;
+ filter->rxq_index = rxq_index;
+ INIT_WORK(&filter->work, mlx4_en_filter_work);
+
+ filter->src_ip = src_ip;
+ filter->dst_ip = dst_ip;
+ filter->ip_proto = ip_proto;
+ filter->src_port = src_port;
+ filter->dst_port = dst_port;
+
+ filter->flow_id = flow_id;
+
+ filter->id = priv->last_filter_id++ % RPS_NO_FILTER;
+
+ list_add_tail(&filter->next, &priv->filters);
+ hlist_add_head(&filter->filter_chain,
+ filter_hash_bucket(priv, src_ip, dst_ip, src_port,
+ dst_port));
+
+ return filter;
+}
+
+static void mlx4_en_filter_free(struct mlx4_en_filter *filter)
+{
+ struct mlx4_en_priv *priv = filter->priv;
+ int rc;
+
+ list_del(&filter->next);
+
+ rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id);
+ if (rc && rc != -ENOENT)
+ en_err(priv, "Error detaching flow. rc = %d\n", rc);
+
+ kfree(filter);
+}
+
+static inline struct mlx4_en_filter *
+mlx4_en_filter_find(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip,
+ u8 ip_proto, __be16 src_port, __be16 dst_port)
+{
+ struct mlx4_en_filter *filter;
+ struct mlx4_en_filter *ret = NULL;
+
+ hlist_for_each_entry(filter,
+ filter_hash_bucket(priv, src_ip, dst_ip,
+ src_port, dst_port),
+ filter_chain) {
+ if (filter->src_ip == src_ip &&
+ filter->dst_ip == dst_ip &&
+ filter->ip_proto == ip_proto &&
+ filter->src_port == src_port &&
+ filter->dst_port == dst_port) {
+ ret = filter;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static int
+mlx4_en_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
+ u16 rxq_index, u32 flow_id)
+{
+ struct mlx4_en_priv *priv = netdev_priv(net_dev);
+ struct mlx4_en_filter *filter;
+ const struct iphdr *ip;
+ const __be16 *ports;
+ u8 ip_proto;
+ __be32 src_ip;
+ __be32 dst_ip;
+ __be16 src_port;
+ __be16 dst_port;
+ int nhoff = skb_network_offset(skb);
+ int ret = 0;
+
+ if (skb->encapsulation)
+ return -EPROTONOSUPPORT;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ return -EPROTONOSUPPORT;
+
+ ip = (const struct iphdr *)(skb->data + nhoff);
+ if (ip_is_fragment(ip))
+ return -EPROTONOSUPPORT;
+
+ if ((ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP))
+ return -EPROTONOSUPPORT;
+ ports = (const __be16 *)(skb->data + nhoff + 4 * ip->ihl);
+
+ ip_proto = ip->protocol;
+ src_ip = ip->saddr;
+ dst_ip = ip->daddr;
+ src_port = ports[0];
+ dst_port = ports[1];
+
+ spin_lock_bh(&priv->filters_lock);
+ filter = mlx4_en_filter_find(priv, src_ip, dst_ip, ip_proto,
+ src_port, dst_port);
+ if (filter) {
+ if (filter->rxq_index == rxq_index)
+ goto out;
+
+ filter->rxq_index = rxq_index;
+ } else {
+ filter = mlx4_en_filter_alloc(priv, rxq_index,
+ src_ip, dst_ip, ip_proto,
+ src_port, dst_port, flow_id);
+ if (!filter) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ }
+
+ queue_work(priv->mdev->workqueue, &filter->work);
+
+out:
+ ret = filter->id;
+err:
+ spin_unlock_bh(&priv->filters_lock);
+
+ return ret;
+}
+
+void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv)
+{
+ struct mlx4_en_filter *filter, *tmp;
+ LIST_HEAD(del_list);
+
+ spin_lock_bh(&priv->filters_lock);
+ list_for_each_entry_safe(filter, tmp, &priv->filters, next) {
+ list_move(&filter->next, &del_list);
+ hlist_del(&filter->filter_chain);
+ }
+ spin_unlock_bh(&priv->filters_lock);
+
+ list_for_each_entry_safe(filter, tmp, &del_list, next) {
+ cancel_work_sync(&filter->work);
+ mlx4_en_filter_free(filter);
+ }
+}
+
+static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv)
+{
+ struct mlx4_en_filter *filter = NULL, *tmp, *last_filter = NULL;
+ LIST_HEAD(del_list);
+ int i = 0;
+
+ spin_lock_bh(&priv->filters_lock);
+ list_for_each_entry_safe(filter, tmp, &priv->filters, next) {
+ if (i > MLX4_EN_FILTER_EXPIRY_QUOTA)
+ break;
+
+ if (filter->activated &&
+ !work_pending(&filter->work) &&
+ rps_may_expire_flow(priv->dev,
+ filter->rxq_index, filter->flow_id,
+ filter->id)) {
+ list_move(&filter->next, &del_list);
+ hlist_del(&filter->filter_chain);
+ } else
+ last_filter = filter;
+
+ i++;
+ }
+
+ if (last_filter && (&last_filter->next != priv->filters.next))
+ list_move(&priv->filters, &last_filter->next);
+
+ spin_unlock_bh(&priv->filters_lock);
+
+ list_for_each_entry_safe(filter, tmp, &del_list, next)
+ mlx4_en_filter_free(filter);
+}
+#endif
+
+static int mlx4_en_vlan_rx_add_vid(struct net_device *dev,
+ __be16 proto, u16 vid)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int err;
+ int idx;
+
+ en_dbg(HW, priv, "adding VLAN:%d\n", vid);
+
+ set_bit(vid, priv->active_vlans);
+
+ /* Add VID to port VLAN filter */
+ mutex_lock(&mdev->state_lock);
+ if (mdev->device_up && priv->port_up) {
+ err = mlx4_SET_VLAN_FLTR(mdev->dev, priv);
+ if (err) {
+ en_err(priv, "Failed configuring VLAN filter\n");
+ goto out;
+ }
+ }
+ err = mlx4_register_vlan(mdev->dev, priv->port, vid, &idx);
+ if (err)
+ en_dbg(HW, priv, "Failed adding vlan %d\n", vid);
+
+out:
+ mutex_unlock(&mdev->state_lock);
+ return err;
+}
+
+static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev,
+ __be16 proto, u16 vid)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int err = 0;
+
+ en_dbg(HW, priv, "Killing VID:%d\n", vid);
+
+ clear_bit(vid, priv->active_vlans);
+
+ /* Remove VID from port VLAN filter */
+ mutex_lock(&mdev->state_lock);
+ mlx4_unregister_vlan(mdev->dev, priv->port, vid);
+
+ if (mdev->device_up && priv->port_up) {
+ err = mlx4_SET_VLAN_FLTR(mdev->dev, priv);
+ if (err)
+ en_err(priv, "Failed configuring VLAN filter\n");
+ }
+ mutex_unlock(&mdev->state_lock);
+
+ return err;
+}
+
+static void mlx4_en_u64_to_mac(unsigned char dst_mac[ETH_ALEN + 2], u64 src_mac)
+{
+ int i;
+ for (i = ETH_ALEN - 1; i >= 0; --i) {
+ dst_mac[i] = src_mac & 0xff;
+ src_mac >>= 8;
+ }
+ memset(&dst_mac[ETH_ALEN], 0, 2);
+}
+
+
+static int mlx4_en_tunnel_steer_add(struct mlx4_en_priv *priv, unsigned char *addr,
+ int qpn, u64 *reg_id)
+{
+ int err;
+
+ if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN ||
+ priv->mdev->dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC)
+ return 0; /* do nothing */
+
+ err = mlx4_tunnel_steer_add(priv->mdev->dev, addr, priv->port, qpn,
+ MLX4_DOMAIN_NIC, reg_id);
+ if (err) {
+ en_err(priv, "failed to add vxlan steering rule, err %d\n", err);
+ return err;
+ }
+ en_dbg(DRV, priv, "added vxlan steering rule, mac %pM reg_id %llx\n", addr, *reg_id);
+ return 0;
+}
+
+
+static int mlx4_en_uc_steer_add(struct mlx4_en_priv *priv,
+ unsigned char *mac, int *qpn, u64 *reg_id)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_dev *dev = mdev->dev;
+ int err;
+
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_B0: {
+ struct mlx4_qp qp;
+ u8 gid[16] = {0};
+
+ qp.qpn = *qpn;
+ memcpy(&gid[10], mac, ETH_ALEN);
+ gid[5] = priv->port;
+
+ err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH);
+ break;
+ }
+ case MLX4_STEERING_MODE_DEVICE_MANAGED: {
+ struct mlx4_spec_list spec_eth = { {NULL} };
+ __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+ struct mlx4_net_trans_rule rule = {
+ .queue_mode = MLX4_NET_TRANS_Q_FIFO,
+ .exclusive = 0,
+ .allow_loopback = 1,
+ .promisc_mode = MLX4_FS_REGULAR,
+ .priority = MLX4_DOMAIN_NIC,
+ };
+
+ rule.port = priv->port;
+ rule.qpn = *qpn;
+ INIT_LIST_HEAD(&rule.list);
+
+ spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH;
+ memcpy(spec_eth.eth.dst_mac, mac, ETH_ALEN);
+ memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN);
+ list_add_tail(&spec_eth.list, &rule.list);
+
+ err = mlx4_flow_attach(dev, &rule, reg_id);
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+ if (err)
+ en_warn(priv, "Failed Attaching Unicast\n");
+
+ return err;
+}
+
+static void mlx4_en_uc_steer_release(struct mlx4_en_priv *priv,
+ unsigned char *mac, int qpn, u64 reg_id)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_dev *dev = mdev->dev;
+
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_B0: {
+ struct mlx4_qp qp;
+ u8 gid[16] = {0};
+
+ qp.qpn = qpn;
+ memcpy(&gid[10], mac, ETH_ALEN);
+ gid[5] = priv->port;
+
+ mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH);
+ break;
+ }
+ case MLX4_STEERING_MODE_DEVICE_MANAGED: {
+ mlx4_flow_detach(dev, reg_id);
+ break;
+ }
+ default:
+ en_err(priv, "Invalid steering mode.\n");
+ }
+}
+
+static int mlx4_en_get_qp(struct mlx4_en_priv *priv)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_dev *dev = mdev->dev;
+ int index = 0;
+ int err = 0;
+ int *qpn = &priv->base_qpn;
+ u64 mac = mlx4_mac_to_u64(priv->dev->dev_addr);
+
+ en_dbg(DRV, priv, "Registering MAC: %pM for adding\n",
+ priv->dev->dev_addr);
+ index = mlx4_register_mac(dev, priv->port, mac);
+ if (index < 0) {
+ err = index;
+ en_err(priv, "Failed adding MAC: %pM\n",
+ priv->dev->dev_addr);
+ return err;
+ }
+
+ en_info(priv, "Steering Mode %d\n", dev->caps.steering_mode);
+
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) {
+ int base_qpn = mlx4_get_base_qpn(dev, priv->port);
+ *qpn = base_qpn + index;
+ return 0;
+ }
+
+ err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP,
+ MLX4_RES_USAGE_DRIVER);
+ en_dbg(DRV, priv, "Reserved qp %d\n", *qpn);
+ if (err) {
+ en_err(priv, "Failed to reserve qp for mac registration\n");
+ mlx4_unregister_mac(dev, priv->port, mac);
+ return err;
+ }
+
+ return 0;
+}
+
+static void mlx4_en_put_qp(struct mlx4_en_priv *priv)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_dev *dev = mdev->dev;
+ int qpn = priv->base_qpn;
+
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) {
+ u64 mac = mlx4_mac_to_u64(priv->dev->dev_addr);
+ en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n",
+ priv->dev->dev_addr);
+ mlx4_unregister_mac(dev, priv->port, mac);
+ } else {
+ en_dbg(DRV, priv, "Releasing qp: port %d, qpn %d\n",
+ priv->port, qpn);
+ mlx4_qp_release_range(dev, qpn, 1);
+ priv->flags &= ~MLX4_EN_FLAG_FORCE_PROMISC;
+ }
+}
+
+static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn,
+ unsigned char *new_mac, unsigned char *prev_mac)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_dev *dev = mdev->dev;
+ int err = 0;
+ u64 new_mac_u64 = mlx4_mac_to_u64(new_mac);
+
+ if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) {
+ struct hlist_head *bucket;
+ unsigned int mac_hash;
+ struct mlx4_mac_entry *entry;
+ struct hlist_node *tmp;
+ u64 prev_mac_u64 = mlx4_mac_to_u64(prev_mac);
+
+ bucket = &priv->mac_hash[prev_mac[MLX4_EN_MAC_HASH_IDX]];
+ hlist_for_each_entry_safe(entry, tmp, bucket, hlist) {
+ if (ether_addr_equal_64bits(entry->mac, prev_mac)) {
+ mlx4_en_uc_steer_release(priv, entry->mac,
+ qpn, entry->reg_id);
+ mlx4_unregister_mac(dev, priv->port,
+ prev_mac_u64);
+ hlist_del_rcu(&entry->hlist);
+ synchronize_rcu();
+ memcpy(entry->mac, new_mac, ETH_ALEN);
+ entry->reg_id = 0;
+ mac_hash = new_mac[MLX4_EN_MAC_HASH_IDX];
+ hlist_add_head_rcu(&entry->hlist,
+ &priv->mac_hash[mac_hash]);
+ mlx4_register_mac(dev, priv->port, new_mac_u64);
+ err = mlx4_en_uc_steer_add(priv, new_mac,
+ &qpn,
+ &entry->reg_id);
+ if (err)
+ return err;
+ if (priv->tunnel_reg_id) {
+ mlx4_flow_detach(priv->mdev->dev, priv->tunnel_reg_id);
+ priv->tunnel_reg_id = 0;
+ }
+ err = mlx4_en_tunnel_steer_add(priv, new_mac, qpn,
+ &priv->tunnel_reg_id);
+ return err;
+ }
+ }
+ return -EINVAL;
+ }
+
+ return __mlx4_replace_mac(dev, priv->port, qpn, new_mac_u64);
+}
+
+static void mlx4_en_update_user_mac(struct mlx4_en_priv *priv,
+ unsigned char new_mac[ETH_ALEN + 2])
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int err;
+
+ if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_USER_MAC_EN))
+ return;
+
+ err = mlx4_SET_PORT_user_mac(mdev->dev, priv->port, new_mac);
+ if (err)
+ en_err(priv, "Failed to pass user MAC(%pM) to Firmware for port %d, with error %d\n",
+ new_mac, priv->port, err);
+}
+
+static int mlx4_en_do_set_mac(struct mlx4_en_priv *priv,
+ unsigned char new_mac[ETH_ALEN + 2])
+{
+ int err = 0;
+
+ if (priv->port_up) {
+ /* Remove old MAC and insert the new one */
+ err = mlx4_en_replace_mac(priv, priv->base_qpn,
+ new_mac, priv->current_mac);
+ if (err)
+ en_err(priv, "Failed changing HW MAC address\n");
+ } else
+ en_dbg(HW, priv, "Port is down while registering mac, exiting...\n");
+
+ if (!err)
+ memcpy(priv->current_mac, new_mac, sizeof(priv->current_mac));
+
+ return err;
+}
+
+static int mlx4_en_set_mac(struct net_device *dev, void *addr)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct sockaddr *saddr = addr;
+ unsigned char new_mac[ETH_ALEN + 2];
+ int err;
+
+ if (!is_valid_ether_addr(saddr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ mutex_lock(&mdev->state_lock);
+ memcpy(new_mac, saddr->sa_data, ETH_ALEN);
+ err = mlx4_en_do_set_mac(priv, new_mac);
+ if (err)
+ goto out;
+
+ memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN);
+ mlx4_en_update_user_mac(priv, new_mac);
+out:
+ mutex_unlock(&mdev->state_lock);
+
+ return err;
+}
+
+static void mlx4_en_clear_list(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_mc_list *tmp, *mc_to_del;
+
+ list_for_each_entry_safe(mc_to_del, tmp, &priv->mc_list, list) {
+ list_del(&mc_to_del->list);
+ kfree(mc_to_del);
+ }
+}
+
+static void mlx4_en_cache_mclist(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct netdev_hw_addr *ha;
+ struct mlx4_en_mc_list *tmp;
+
+ mlx4_en_clear_list(dev);
+ netdev_for_each_mc_addr(ha, dev) {
+ tmp = kzalloc(sizeof(struct mlx4_en_mc_list), GFP_ATOMIC);
+ if (!tmp) {
+ mlx4_en_clear_list(dev);
+ return;
+ }
+ memcpy(tmp->addr, ha->addr, ETH_ALEN);
+ list_add_tail(&tmp->list, &priv->mc_list);
+ }
+}
+
+static void update_mclist_flags(struct mlx4_en_priv *priv,
+ struct list_head *dst,
+ struct list_head *src)
+{
+ struct mlx4_en_mc_list *dst_tmp, *src_tmp, *new_mc;
+ bool found;
+
+ /* Find all the entries that should be removed from dst,
+ * These are the entries that are not found in src
+ */
+ list_for_each_entry(dst_tmp, dst, list) {
+ found = false;
+ list_for_each_entry(src_tmp, src, list) {
+ if (ether_addr_equal(dst_tmp->addr, src_tmp->addr)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ dst_tmp->action = MCLIST_REM;
+ }
+
+ /* Add entries that exist in src but not in dst
+ * mark them as need to add
+ */
+ list_for_each_entry(src_tmp, src, list) {
+ found = false;
+ list_for_each_entry(dst_tmp, dst, list) {
+ if (ether_addr_equal(dst_tmp->addr, src_tmp->addr)) {
+ dst_tmp->action = MCLIST_NONE;
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ new_mc = kmemdup(src_tmp,
+ sizeof(struct mlx4_en_mc_list),
+ GFP_KERNEL);
+ if (!new_mc)
+ return;
+
+ new_mc->action = MCLIST_ADD;
+ list_add_tail(&new_mc->list, dst);
+ }
+ }
+}
+
+static void mlx4_en_set_rx_mode(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ if (!priv->port_up)
+ return;
+
+ queue_work(priv->mdev->workqueue, &priv->rx_mode_task);
+}
+
+static void mlx4_en_set_promisc_mode(struct mlx4_en_priv *priv,
+ struct mlx4_en_dev *mdev)
+{
+ int err = 0;
+
+ if (!(priv->flags & MLX4_EN_FLAG_PROMISC)) {
+ if (netif_msg_rx_status(priv))
+ en_warn(priv, "Entering promiscuous mode\n");
+ priv->flags |= MLX4_EN_FLAG_PROMISC;
+
+ /* Enable promiscouos mode */
+ switch (mdev->dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ err = mlx4_flow_steer_promisc_add(mdev->dev,
+ priv->port,
+ priv->base_qpn,
+ MLX4_FS_ALL_DEFAULT);
+ if (err)
+ en_err(priv, "Failed enabling promiscuous mode\n");
+ priv->flags |= MLX4_EN_FLAG_MC_PROMISC;
+ break;
+
+ case MLX4_STEERING_MODE_B0:
+ err = mlx4_unicast_promisc_add(mdev->dev,
+ priv->base_qpn,
+ priv->port);
+ if (err)
+ en_err(priv, "Failed enabling unicast promiscuous mode\n");
+
+ /* Add the default qp number as multicast
+ * promisc
+ */
+ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) {
+ err = mlx4_multicast_promisc_add(mdev->dev,
+ priv->base_qpn,
+ priv->port);
+ if (err)
+ en_err(priv, "Failed enabling multicast promiscuous mode\n");
+ priv->flags |= MLX4_EN_FLAG_MC_PROMISC;
+ }
+ break;
+
+ case MLX4_STEERING_MODE_A0:
+ err = mlx4_SET_PORT_qpn_calc(mdev->dev,
+ priv->port,
+ priv->base_qpn,
+ 1);
+ if (err)
+ en_err(priv, "Failed enabling promiscuous mode\n");
+ break;
+ }
+
+ /* Disable port multicast filter (unconditionally) */
+ err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
+ 0, MLX4_MCAST_DISABLE);
+ if (err)
+ en_err(priv, "Failed disabling multicast filter\n");
+ }
+}
+
+static void mlx4_en_clear_promisc_mode(struct mlx4_en_priv *priv,
+ struct mlx4_en_dev *mdev)
+{
+ int err = 0;
+
+ if (netif_msg_rx_status(priv))
+ en_warn(priv, "Leaving promiscuous mode\n");
+ priv->flags &= ~MLX4_EN_FLAG_PROMISC;
+
+ /* Disable promiscouos mode */
+ switch (mdev->dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ err = mlx4_flow_steer_promisc_remove(mdev->dev,
+ priv->port,
+ MLX4_FS_ALL_DEFAULT);
+ if (err)
+ en_err(priv, "Failed disabling promiscuous mode\n");
+ priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC;
+ break;
+
+ case MLX4_STEERING_MODE_B0:
+ err = mlx4_unicast_promisc_remove(mdev->dev,
+ priv->base_qpn,
+ priv->port);
+ if (err)
+ en_err(priv, "Failed disabling unicast promiscuous mode\n");
+ /* Disable Multicast promisc */
+ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) {
+ err = mlx4_multicast_promisc_remove(mdev->dev,
+ priv->base_qpn,
+ priv->port);
+ if (err)
+ en_err(priv, "Failed disabling multicast promiscuous mode\n");
+ priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC;
+ }
+ break;
+
+ case MLX4_STEERING_MODE_A0:
+ err = mlx4_SET_PORT_qpn_calc(mdev->dev,
+ priv->port,
+ priv->base_qpn, 0);
+ if (err)
+ en_err(priv, "Failed disabling promiscuous mode\n");
+ break;
+ }
+}
+
+static void mlx4_en_do_multicast(struct mlx4_en_priv *priv,
+ struct net_device *dev,
+ struct mlx4_en_dev *mdev)
+{
+ struct mlx4_en_mc_list *mclist, *tmp;
+ u64 mcast_addr = 0;
+ u8 mc_list[16] = {0};
+ int err = 0;
+
+ /* Enable/disable the multicast filter according to IFF_ALLMULTI */
+ if (dev->flags & IFF_ALLMULTI) {
+ err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
+ 0, MLX4_MCAST_DISABLE);
+ if (err)
+ en_err(priv, "Failed disabling multicast filter\n");
+
+ /* Add the default qp number as multicast promisc */
+ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) {
+ switch (mdev->dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ err = mlx4_flow_steer_promisc_add(mdev->dev,
+ priv->port,
+ priv->base_qpn,
+ MLX4_FS_MC_DEFAULT);
+ break;
+
+ case MLX4_STEERING_MODE_B0:
+ err = mlx4_multicast_promisc_add(mdev->dev,
+ priv->base_qpn,
+ priv->port);
+ break;
+
+ case MLX4_STEERING_MODE_A0:
+ break;
+ }
+ if (err)
+ en_err(priv, "Failed entering multicast promisc mode\n");
+ priv->flags |= MLX4_EN_FLAG_MC_PROMISC;
+ }
+ } else {
+ /* Disable Multicast promisc */
+ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) {
+ switch (mdev->dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ err = mlx4_flow_steer_promisc_remove(mdev->dev,
+ priv->port,
+ MLX4_FS_MC_DEFAULT);
+ break;
+
+ case MLX4_STEERING_MODE_B0:
+ err = mlx4_multicast_promisc_remove(mdev->dev,
+ priv->base_qpn,
+ priv->port);
+ break;
+
+ case MLX4_STEERING_MODE_A0:
+ break;
+ }
+ if (err)
+ en_err(priv, "Failed disabling multicast promiscuous mode\n");
+ priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC;
+ }
+
+ err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
+ 0, MLX4_MCAST_DISABLE);
+ if (err)
+ en_err(priv, "Failed disabling multicast filter\n");
+
+ /* Flush mcast filter and init it with broadcast address */
+ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, ETH_BCAST,
+ 1, MLX4_MCAST_CONFIG);
+
+ /* Update multicast list - we cache all addresses so they won't
+ * change while HW is updated holding the command semaphor */
+ netif_addr_lock_bh(dev);
+ mlx4_en_cache_mclist(dev);
+ netif_addr_unlock_bh(dev);
+ list_for_each_entry(mclist, &priv->mc_list, list) {
+ mcast_addr = mlx4_mac_to_u64(mclist->addr);
+ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port,
+ mcast_addr, 0, MLX4_MCAST_CONFIG);
+ }
+ err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
+ 0, MLX4_MCAST_ENABLE);
+ if (err)
+ en_err(priv, "Failed enabling multicast filter\n");
+
+ update_mclist_flags(priv, &priv->curr_list, &priv->mc_list);
+ list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) {
+ if (mclist->action == MCLIST_REM) {
+ /* detach this address and delete from list */
+ memcpy(&mc_list[10], mclist->addr, ETH_ALEN);
+ mc_list[5] = priv->port;
+ err = mlx4_multicast_detach(mdev->dev,
+ priv->rss_map.indir_qp,
+ mc_list,
+ MLX4_PROT_ETH,
+ mclist->reg_id);
+ if (err)
+ en_err(priv, "Fail to detach multicast address\n");
+
+ if (mclist->tunnel_reg_id) {
+ err = mlx4_flow_detach(priv->mdev->dev, mclist->tunnel_reg_id);
+ if (err)
+ en_err(priv, "Failed to detach multicast address\n");
+ }
+
+ /* remove from list */
+ list_del(&mclist->list);
+ kfree(mclist);
+ } else if (mclist->action == MCLIST_ADD) {
+ /* attach the address */
+ memcpy(&mc_list[10], mclist->addr, ETH_ALEN);
+ /* needed for B0 steering support */
+ mc_list[5] = priv->port;
+ err = mlx4_multicast_attach(mdev->dev,
+ priv->rss_map.indir_qp,
+ mc_list,
+ priv->port, 0,
+ MLX4_PROT_ETH,
+ &mclist->reg_id);
+ if (err)
+ en_err(priv, "Fail to attach multicast address\n");
+
+ err = mlx4_en_tunnel_steer_add(priv, &mc_list[10], priv->base_qpn,
+ &mclist->tunnel_reg_id);
+ if (err)
+ en_err(priv, "Failed to attach multicast address\n");
+ }
+ }
+ }
+}
+
+static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv,
+ struct net_device *dev,
+ struct mlx4_en_dev *mdev)
+{
+ struct netdev_hw_addr *ha;
+ struct mlx4_mac_entry *entry;
+ struct hlist_node *tmp;
+ bool found;
+ u64 mac;
+ int err = 0;
+ struct hlist_head *bucket;
+ unsigned int i;
+ int removed = 0;
+ u32 prev_flags;
+
+ /* Note that we do not need to protect our mac_hash traversal with rcu,
+ * since all modification code is protected by mdev->state_lock
+ */
+
+ /* find what to remove */
+ for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) {
+ bucket = &priv->mac_hash[i];
+ hlist_for_each_entry_safe(entry, tmp, bucket, hlist) {
+ found = false;
+ netdev_for_each_uc_addr(ha, dev) {
+ if (ether_addr_equal_64bits(entry->mac,
+ ha->addr)) {
+ found = true;
+ break;
+ }
+ }
+
+ /* MAC address of the port is not in uc list */
+ if (ether_addr_equal_64bits(entry->mac,
+ priv->current_mac))
+ found = true;
+
+ if (!found) {
+ mac = mlx4_mac_to_u64(entry->mac);
+ mlx4_en_uc_steer_release(priv, entry->mac,
+ priv->base_qpn,
+ entry->reg_id);
+ mlx4_unregister_mac(mdev->dev, priv->port, mac);
+
+ hlist_del_rcu(&entry->hlist);
+ kfree_rcu(entry, rcu);
+ en_dbg(DRV, priv, "Removed MAC %pM on port:%d\n",
+ entry->mac, priv->port);
+ ++removed;
+ }
+ }
+ }
+
+ /* if we didn't remove anything, there is no use in trying to add
+ * again once we are in a forced promisc mode state
+ */
+ if ((priv->flags & MLX4_EN_FLAG_FORCE_PROMISC) && 0 == removed)
+ return;
+
+ prev_flags = priv->flags;
+ priv->flags &= ~MLX4_EN_FLAG_FORCE_PROMISC;
+
+ /* find what to add */
+ netdev_for_each_uc_addr(ha, dev) {
+ found = false;
+ bucket = &priv->mac_hash[ha->addr[MLX4_EN_MAC_HASH_IDX]];
+ hlist_for_each_entry(entry, bucket, hlist) {
+ if (ether_addr_equal_64bits(entry->mac, ha->addr)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ en_err(priv, "Failed adding MAC %pM on port:%d (out of memory)\n",
+ ha->addr, priv->port);
+ priv->flags |= MLX4_EN_FLAG_FORCE_PROMISC;
+ break;
+ }
+ mac = mlx4_mac_to_u64(ha->addr);
+ memcpy(entry->mac, ha->addr, ETH_ALEN);
+ err = mlx4_register_mac(mdev->dev, priv->port, mac);
+ if (err < 0) {
+ en_err(priv, "Failed registering MAC %pM on port %d: %d\n",
+ ha->addr, priv->port, err);
+ kfree(entry);
+ priv->flags |= MLX4_EN_FLAG_FORCE_PROMISC;
+ break;
+ }
+ err = mlx4_en_uc_steer_add(priv, ha->addr,
+ &priv->base_qpn,
+ &entry->reg_id);
+ if (err) {
+ en_err(priv, "Failed adding MAC %pM on port %d: %d\n",
+ ha->addr, priv->port, err);
+ mlx4_unregister_mac(mdev->dev, priv->port, mac);
+ kfree(entry);
+ priv->flags |= MLX4_EN_FLAG_FORCE_PROMISC;
+ break;
+ } else {
+ unsigned int mac_hash;
+ en_dbg(DRV, priv, "Added MAC %pM on port:%d\n",
+ ha->addr, priv->port);
+ mac_hash = ha->addr[MLX4_EN_MAC_HASH_IDX];
+ bucket = &priv->mac_hash[mac_hash];
+ hlist_add_head_rcu(&entry->hlist, bucket);
+ }
+ }
+ }
+
+ if (priv->flags & MLX4_EN_FLAG_FORCE_PROMISC) {
+ en_warn(priv, "Forcing promiscuous mode on port:%d\n",
+ priv->port);
+ } else if (prev_flags & MLX4_EN_FLAG_FORCE_PROMISC) {
+ en_warn(priv, "Stop forcing promiscuous mode on port:%d\n",
+ priv->port);
+ }
+}
+
+static void mlx4_en_do_set_rx_mode(struct work_struct *work)
+{
+ struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
+ rx_mode_task);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct net_device *dev = priv->dev;
+
+ mutex_lock(&mdev->state_lock);
+ if (!mdev->device_up) {
+ en_dbg(HW, priv, "Card is not up, ignoring rx mode change.\n");
+ goto out;
+ }
+ if (!priv->port_up) {
+ en_dbg(HW, priv, "Port is down, ignoring rx mode change.\n");
+ goto out;
+ }
+
+ if (!netif_carrier_ok(dev)) {
+ if (!mlx4_en_QUERY_PORT(mdev, priv->port)) {
+ if (priv->port_state.link_state) {
+ priv->last_link_state = MLX4_DEV_EVENT_PORT_UP;
+ netif_carrier_on(dev);
+ en_dbg(LINK, priv, "Link Up\n");
+ }
+ }
+ }
+
+ if (dev->priv_flags & IFF_UNICAST_FLT)
+ mlx4_en_do_uc_filter(priv, dev, mdev);
+
+ /* Promsicuous mode: disable all filters */
+ if ((dev->flags & IFF_PROMISC) ||
+ (priv->flags & MLX4_EN_FLAG_FORCE_PROMISC)) {
+ mlx4_en_set_promisc_mode(priv, mdev);
+ goto out;
+ }
+
+ /* Not in promiscuous mode */
+ if (priv->flags & MLX4_EN_FLAG_PROMISC)
+ mlx4_en_clear_promisc_mode(priv, mdev);
+
+ mlx4_en_do_multicast(priv, dev, mdev);
+out:
+ mutex_unlock(&mdev->state_lock);
+}
+
+static int mlx4_en_set_rss_steer_rules(struct mlx4_en_priv *priv)
+{
+ u64 reg_id;
+ int err = 0;
+ int *qpn = &priv->base_qpn;
+ struct mlx4_mac_entry *entry;
+
+ err = mlx4_en_uc_steer_add(priv, priv->dev->dev_addr, qpn, &reg_id);
+ if (err)
+ return err;
+
+ err = mlx4_en_tunnel_steer_add(priv, priv->dev->dev_addr, *qpn,
+ &priv->tunnel_reg_id);
+ if (err)
+ goto tunnel_err;
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ err = -ENOMEM;
+ goto alloc_err;
+ }
+
+ memcpy(entry->mac, priv->dev->dev_addr, sizeof(entry->mac));
+ memcpy(priv->current_mac, entry->mac, sizeof(priv->current_mac));
+ entry->reg_id = reg_id;
+ hlist_add_head_rcu(&entry->hlist,
+ &priv->mac_hash[entry->mac[MLX4_EN_MAC_HASH_IDX]]);
+
+ return 0;
+
+alloc_err:
+ if (priv->tunnel_reg_id)
+ mlx4_flow_detach(priv->mdev->dev, priv->tunnel_reg_id);
+
+tunnel_err:
+ mlx4_en_uc_steer_release(priv, priv->dev->dev_addr, *qpn, reg_id);
+ return err;
+}
+
+static void mlx4_en_delete_rss_steer_rules(struct mlx4_en_priv *priv)
+{
+ u64 mac;
+ unsigned int i;
+ int qpn = priv->base_qpn;
+ struct hlist_head *bucket;
+ struct hlist_node *tmp;
+ struct mlx4_mac_entry *entry;
+
+ for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) {
+ bucket = &priv->mac_hash[i];
+ hlist_for_each_entry_safe(entry, tmp, bucket, hlist) {
+ mac = mlx4_mac_to_u64(entry->mac);
+ en_dbg(DRV, priv, "Registering MAC:%pM for deleting\n",
+ entry->mac);
+ mlx4_en_uc_steer_release(priv, entry->mac,
+ qpn, entry->reg_id);
+
+ mlx4_unregister_mac(priv->mdev->dev, priv->port, mac);
+ hlist_del_rcu(&entry->hlist);
+ kfree_rcu(entry, rcu);
+ }
+ }
+
+ if (priv->tunnel_reg_id) {
+ mlx4_flow_detach(priv->mdev->dev, priv->tunnel_reg_id);
+ priv->tunnel_reg_id = 0;
+ }
+}
+
+static void mlx4_en_tx_timeout(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int i;
+
+ if (netif_msg_timer(priv))
+ en_warn(priv, "Tx timeout called on port:%d\n", priv->port);
+
+ for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+ struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][i];
+
+ if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)))
+ continue;
+ en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n",
+ i, tx_ring->qpn, tx_ring->sp_cqn,
+ tx_ring->cons, tx_ring->prod);
+ }
+
+ priv->port_stats.tx_timeout++;
+ if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state)) {
+ en_dbg(DRV, priv, "Scheduling port restart\n");
+ queue_work(mdev->workqueue, &priv->restart_task);
+ }
+}
+
+
+static void
+mlx4_en_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ spin_lock_bh(&priv->stats_lock);
+ mlx4_en_fold_software_stats(dev);
+ netdev_stats_to_stats64(stats, &dev->stats);
+ spin_unlock_bh(&priv->stats_lock);
+}
+
+static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv)
+{
+ struct mlx4_en_cq *cq;
+ int i, t;
+
+ /* If we haven't received a specific coalescing setting
+ * (module param), we set the moderation parameters as follows:
+ * - moder_cnt is set to the number of mtu sized packets to
+ * satisfy our coalescing target.
+ * - moder_time is set to a fixed value.
+ */
+ priv->rx_frames = MLX4_EN_RX_COAL_TARGET;
+ priv->rx_usecs = MLX4_EN_RX_COAL_TIME;
+ priv->tx_frames = MLX4_EN_TX_COAL_PKTS;
+ priv->tx_usecs = MLX4_EN_TX_COAL_TIME;
+ en_dbg(INTR, priv, "Default coalescing params for mtu:%d - rx_frames:%d rx_usecs:%d\n",
+ priv->dev->mtu, priv->rx_frames, priv->rx_usecs);
+
+ /* Setup cq moderation params */
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ cq = priv->rx_cq[i];
+ cq->moder_cnt = priv->rx_frames;
+ cq->moder_time = priv->rx_usecs;
+ priv->last_moder_time[i] = MLX4_EN_AUTO_CONF;
+ priv->last_moder_packets[i] = 0;
+ priv->last_moder_bytes[i] = 0;
+ }
+
+ for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) {
+ for (i = 0; i < priv->tx_ring_num[t]; i++) {
+ cq = priv->tx_cq[t][i];
+ cq->moder_cnt = priv->tx_frames;
+ cq->moder_time = priv->tx_usecs;
+ }
+ }
+
+ /* Reset auto-moderation params */
+ priv->pkt_rate_low = MLX4_EN_RX_RATE_LOW;
+ priv->rx_usecs_low = MLX4_EN_RX_COAL_TIME_LOW;
+ priv->pkt_rate_high = MLX4_EN_RX_RATE_HIGH;
+ priv->rx_usecs_high = MLX4_EN_RX_COAL_TIME_HIGH;
+ priv->sample_interval = MLX4_EN_SAMPLE_INTERVAL;
+ priv->adaptive_rx_coal = 1;
+ priv->last_moder_jiffies = 0;
+ priv->last_moder_tx_packets = 0;
+}
+
+static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv)
+{
+ unsigned long period = (unsigned long) (jiffies - priv->last_moder_jiffies);
+ u32 pkt_rate_high, pkt_rate_low;
+ struct mlx4_en_cq *cq;
+ unsigned long packets;
+ unsigned long rate;
+ unsigned long avg_pkt_size;
+ unsigned long rx_packets;
+ unsigned long rx_bytes;
+ unsigned long rx_pkt_diff;
+ int moder_time;
+ int ring, err;
+
+ if (!priv->adaptive_rx_coal || period < priv->sample_interval * HZ)
+ return;
+
+ pkt_rate_low = READ_ONCE(priv->pkt_rate_low);
+ pkt_rate_high = READ_ONCE(priv->pkt_rate_high);
+
+ for (ring = 0; ring < priv->rx_ring_num; ring++) {
+ rx_packets = READ_ONCE(priv->rx_ring[ring]->packets);
+ rx_bytes = READ_ONCE(priv->rx_ring[ring]->bytes);
+
+ rx_pkt_diff = rx_packets - priv->last_moder_packets[ring];
+ packets = rx_pkt_diff;
+ rate = packets * HZ / period;
+ avg_pkt_size = packets ? (rx_bytes -
+ priv->last_moder_bytes[ring]) / packets : 0;
+
+ /* Apply auto-moderation only when packet rate
+ * exceeds a rate that it matters */
+ if (rate > (MLX4_EN_RX_RATE_THRESH / priv->rx_ring_num) &&
+ avg_pkt_size > MLX4_EN_AVG_PKT_SMALL) {
+ if (rate <= pkt_rate_low)
+ moder_time = priv->rx_usecs_low;
+ else if (rate >= pkt_rate_high)
+ moder_time = priv->rx_usecs_high;
+ else
+ moder_time = (rate - pkt_rate_low) *
+ (priv->rx_usecs_high - priv->rx_usecs_low) /
+ (pkt_rate_high - pkt_rate_low) +
+ priv->rx_usecs_low;
+ } else {
+ moder_time = priv->rx_usecs_low;
+ }
+
+ cq = priv->rx_cq[ring];
+ if (moder_time != priv->last_moder_time[ring] ||
+ cq->moder_cnt != priv->rx_frames) {
+ priv->last_moder_time[ring] = moder_time;
+ cq->moder_time = moder_time;
+ cq->moder_cnt = priv->rx_frames;
+ err = mlx4_en_set_cq_moder(priv, cq);
+ if (err)
+ en_err(priv, "Failed modifying moderation for cq:%d\n",
+ ring);
+ }
+ priv->last_moder_packets[ring] = rx_packets;
+ priv->last_moder_bytes[ring] = rx_bytes;
+ }
+
+ priv->last_moder_jiffies = jiffies;
+}
+
+static void mlx4_en_do_get_stats(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv,
+ stats_task);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int err;
+
+ mutex_lock(&mdev->state_lock);
+ if (mdev->device_up) {
+ if (priv->port_up) {
+ err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0);
+ if (err)
+ en_dbg(HW, priv, "Could not update stats\n");
+
+ mlx4_en_auto_moderation(priv);
+ }
+
+ queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY);
+ }
+ if (mdev->mac_removed[MLX4_MAX_PORTS + 1 - priv->port]) {
+ mlx4_en_do_set_mac(priv, priv->current_mac);
+ mdev->mac_removed[MLX4_MAX_PORTS + 1 - priv->port] = 0;
+ }
+ mutex_unlock(&mdev->state_lock);
+}
+
+/* mlx4_en_service_task - Run service task for tasks that needed to be done
+ * periodically
+ */
+static void mlx4_en_service_task(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv,
+ service_task);
+ struct mlx4_en_dev *mdev = priv->mdev;
+
+ mutex_lock(&mdev->state_lock);
+ if (mdev->device_up) {
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS)
+ mlx4_en_ptp_overflow_check(mdev);
+
+ mlx4_en_recover_from_oom(priv);
+ queue_delayed_work(mdev->workqueue, &priv->service_task,
+ SERVICE_TASK_DELAY);
+ }
+ mutex_unlock(&mdev->state_lock);
+}
+
+static void mlx4_en_linkstate(struct work_struct *work)
+{
+ struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
+ linkstate_task);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int linkstate = priv->link_state;
+
+ mutex_lock(&mdev->state_lock);
+ /* If observable port state changed set carrier state and
+ * report to system log */
+ if (priv->last_link_state != linkstate) {
+ if (linkstate == MLX4_DEV_EVENT_PORT_DOWN) {
+ en_info(priv, "Link Down\n");
+ netif_carrier_off(priv->dev);
+ } else {
+ en_info(priv, "Link Up\n");
+ netif_carrier_on(priv->dev);
+ }
+ }
+ priv->last_link_state = linkstate;
+ mutex_unlock(&mdev->state_lock);
+}
+
+static int mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
+{
+ struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx];
+ int numa_node = priv->mdev->dev->numa_node;
+
+ if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_set_cpu(cpumask_local_spread(ring_idx, numa_node),
+ ring->affinity_mask);
+ return 0;
+}
+
+static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
+{
+ free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask);
+}
+
+static void mlx4_en_init_recycle_ring(struct mlx4_en_priv *priv,
+ int tx_ring_idx)
+{
+ struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX_XDP][tx_ring_idx];
+ int rr_index = tx_ring_idx;
+
+ tx_ring->free_tx_desc = mlx4_en_recycle_tx_desc;
+ tx_ring->recycle_ring = priv->rx_ring[rr_index];
+ en_dbg(DRV, priv, "Set tx_ring[%d][%d]->recycle_ring = rx_ring[%d]\n",
+ TX_XDP, tx_ring_idx, rr_index);
+}
+
+int mlx4_en_start_port(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_cq *cq;
+ struct mlx4_en_tx_ring *tx_ring;
+ int rx_index = 0;
+ int err = 0;
+ int i, t;
+ int j;
+ u8 mc_list[16] = {0};
+
+ if (priv->port_up) {
+ en_dbg(DRV, priv, "start port called while port already up\n");
+ return 0;
+ }
+
+ INIT_LIST_HEAD(&priv->mc_list);
+ INIT_LIST_HEAD(&priv->curr_list);
+ INIT_LIST_HEAD(&priv->ethtool_list);
+ memset(&priv->ethtool_rules[0], 0,
+ sizeof(struct ethtool_flow_id) * MAX_NUM_OF_FS_RULES);
+
+ /* Calculate Rx buf size */
+ dev->mtu = min(dev->mtu, priv->max_mtu);
+ mlx4_en_calc_rx_buf(dev);
+ en_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_skb_size);
+
+ /* Configure rx cq's and rings */
+ err = mlx4_en_activate_rx_rings(priv);
+ if (err) {
+ en_err(priv, "Failed to activate RX rings\n");
+ return err;
+ }
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ cq = priv->rx_cq[i];
+
+ err = mlx4_en_init_affinity_hint(priv, i);
+ if (err) {
+ en_err(priv, "Failed preparing IRQ affinity hint\n");
+ goto cq_err;
+ }
+
+ err = mlx4_en_activate_cq(priv, cq, i);
+ if (err) {
+ en_err(priv, "Failed activating Rx CQ\n");
+ mlx4_en_free_affinity_hint(priv, i);
+ goto cq_err;
+ }
+
+ for (j = 0; j < cq->size; j++) {
+ struct mlx4_cqe *cqe = NULL;
+
+ cqe = mlx4_en_get_cqe(cq->buf, j, priv->cqe_size) +
+ priv->cqe_factor;
+ cqe->owner_sr_opcode = MLX4_CQE_OWNER_MASK;
+ }
+
+ err = mlx4_en_set_cq_moder(priv, cq);
+ if (err) {
+ en_err(priv, "Failed setting cq moderation parameters\n");
+ mlx4_en_deactivate_cq(priv, cq);
+ mlx4_en_free_affinity_hint(priv, i);
+ goto cq_err;
+ }
+ mlx4_en_arm_cq(priv, cq);
+ priv->rx_ring[i]->cqn = cq->mcq.cqn;
+ ++rx_index;
+ }
+
+ /* Set qp number */
+ en_dbg(DRV, priv, "Getting qp number for port %d\n", priv->port);
+ err = mlx4_en_get_qp(priv);
+ if (err) {
+ en_err(priv, "Failed getting eth qp\n");
+ goto cq_err;
+ }
+ mdev->mac_removed[priv->port] = 0;
+
+ priv->counter_index =
+ mlx4_get_default_counter_index(mdev->dev, priv->port);
+
+ err = mlx4_en_config_rss_steer(priv);
+ if (err) {
+ en_err(priv, "Failed configuring rss steering\n");
+ goto mac_err;
+ }
+
+ err = mlx4_en_create_drop_qp(priv);
+ if (err)
+ goto rss_err;
+
+ /* Configure tx cq's and rings */
+ for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) {
+ u8 num_tx_rings_p_up = t == TX ?
+ priv->num_tx_rings_p_up : priv->tx_ring_num[t];
+
+ for (i = 0; i < priv->tx_ring_num[t]; i++) {
+ /* Configure cq */
+ cq = priv->tx_cq[t][i];
+ err = mlx4_en_activate_cq(priv, cq, i);
+ if (err) {
+ en_err(priv, "Failed allocating Tx CQ\n");
+ goto tx_err;
+ }
+ err = mlx4_en_set_cq_moder(priv, cq);
+ if (err) {
+ en_err(priv, "Failed setting cq moderation parameters\n");
+ mlx4_en_deactivate_cq(priv, cq);
+ goto tx_err;
+ }
+ en_dbg(DRV, priv,
+ "Resetting index of collapsed CQ:%d to -1\n", i);
+ cq->buf->wqe_index = cpu_to_be16(0xffff);
+
+ /* Configure ring */
+ tx_ring = priv->tx_ring[t][i];
+ err = mlx4_en_activate_tx_ring(priv, tx_ring,
+ cq->mcq.cqn,
+ i / num_tx_rings_p_up);
+ if (err) {
+ en_err(priv, "Failed allocating Tx ring\n");
+ mlx4_en_deactivate_cq(priv, cq);
+ goto tx_err;
+ }
+ clear_bit(MLX4_EN_TX_RING_STATE_RECOVERING, &tx_ring->state);
+ if (t != TX_XDP) {
+ tx_ring->tx_queue = netdev_get_tx_queue(dev, i);
+ tx_ring->recycle_ring = NULL;
+
+ /* Arm CQ for TX completions */
+ mlx4_en_arm_cq(priv, cq);
+
+ } else {
+ mlx4_en_init_tx_xdp_ring_descs(priv, tx_ring);
+ mlx4_en_init_recycle_ring(priv, i);
+ /* XDP TX CQ should never be armed */
+ }
+
+ /* Set initial ownership of all Tx TXBBs to SW (1) */
+ for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE)
+ *((u32 *)(tx_ring->buf + j)) = 0xffffffff;
+ }
+ }
+
+ /* Configure port */
+ err = mlx4_SET_PORT_general(mdev->dev, priv->port,
+ priv->rx_skb_size + ETH_FCS_LEN,
+ priv->prof->tx_pause,
+ priv->prof->tx_ppp,
+ priv->prof->rx_pause,
+ priv->prof->rx_ppp);
+ if (err) {
+ en_err(priv, "Failed setting port general configurations for port %d, with error %d\n",
+ priv->port, err);
+ goto tx_err;
+ }
+
+ err = mlx4_SET_PORT_user_mtu(mdev->dev, priv->port, dev->mtu);
+ if (err) {
+ en_err(priv, "Failed to pass user MTU(%d) to Firmware for port %d, with error %d\n",
+ dev->mtu, priv->port, err);
+ goto tx_err;
+ }
+
+ /* Set default qp number */
+ err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0);
+ if (err) {
+ en_err(priv, "Failed setting default qp numbers\n");
+ goto tx_err;
+ }
+
+ if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
+ err = mlx4_SET_PORT_VXLAN(mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC, 1);
+ if (err) {
+ en_err(priv, "Failed setting port L2 tunnel configuration, err %d\n",
+ err);
+ goto tx_err;
+ }
+ }
+
+ /* Init port */
+ en_dbg(HW, priv, "Initializing port\n");
+ err = mlx4_INIT_PORT(mdev->dev, priv->port);
+ if (err) {
+ en_err(priv, "Failed Initializing port\n");
+ goto tx_err;
+ }
+
+ /* Set Unicast and VXLAN steering rules */
+ if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_A0 &&
+ mlx4_en_set_rss_steer_rules(priv))
+ mlx4_warn(mdev, "Failed setting steering rules\n");
+
+ /* Attach rx QP to bradcast address */
+ eth_broadcast_addr(&mc_list[10]);
+ mc_list[5] = priv->port; /* needed for B0 steering support */
+ if (mlx4_multicast_attach(mdev->dev, priv->rss_map.indir_qp, mc_list,
+ priv->port, 0, MLX4_PROT_ETH,
+ &priv->broadcast_id))
+ mlx4_warn(mdev, "Failed Attaching Broadcast\n");
+
+ /* Must redo promiscuous mode setup. */
+ priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC);
+
+ /* Schedule multicast task to populate multicast list */
+ queue_work(mdev->workqueue, &priv->rx_mode_task);
+
+ if (priv->mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
+ udp_tunnel_get_rx_info(dev);
+
+ priv->port_up = true;
+
+ /* Process all completions if exist to prevent
+ * the queues freezing if they are full
+ */
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ local_bh_disable();
+ napi_schedule(&priv->rx_cq[i]->napi);
+ local_bh_enable();
+ }
+
+ clear_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state);
+ netif_tx_start_all_queues(dev);
+ netif_device_attach(dev);
+
+ return 0;
+
+tx_err:
+ if (t == MLX4_EN_NUM_TX_TYPES) {
+ t--;
+ i = priv->tx_ring_num[t];
+ }
+ while (t >= 0) {
+ while (i--) {
+ mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[t][i]);
+ mlx4_en_deactivate_cq(priv, priv->tx_cq[t][i]);
+ }
+ if (!t--)
+ break;
+ i = priv->tx_ring_num[t];
+ }
+ mlx4_en_destroy_drop_qp(priv);
+rss_err:
+ mlx4_en_release_rss_steer(priv);
+mac_err:
+ mlx4_en_put_qp(priv);
+cq_err:
+ while (rx_index--) {
+ mlx4_en_deactivate_cq(priv, priv->rx_cq[rx_index]);
+ mlx4_en_free_affinity_hint(priv, rx_index);
+ }
+ for (i = 0; i < priv->rx_ring_num; i++)
+ mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]);
+
+ return err; /* need to close devices */
+}
+
+
+void mlx4_en_stop_port(struct net_device *dev, int detach)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_mc_list *mclist, *tmp;
+ struct ethtool_flow_id *flow, *tmp_flow;
+ int i, t;
+ u8 mc_list[16] = {0};
+
+ if (!priv->port_up) {
+ en_dbg(DRV, priv, "stop port called while port already down\n");
+ return;
+ }
+
+ /* close port*/
+ mlx4_CLOSE_PORT(mdev->dev, priv->port);
+
+ /* Synchronize with tx routine */
+ netif_tx_lock_bh(dev);
+ if (detach)
+ netif_device_detach(dev);
+ netif_tx_stop_all_queues(dev);
+ netif_tx_unlock_bh(dev);
+
+ netif_tx_disable(dev);
+
+ spin_lock_bh(&priv->stats_lock);
+ mlx4_en_fold_software_stats(dev);
+ /* Set port as not active */
+ priv->port_up = false;
+ spin_unlock_bh(&priv->stats_lock);
+
+ priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev);
+
+ /* Promsicuous mode */
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ priv->flags &= ~(MLX4_EN_FLAG_PROMISC |
+ MLX4_EN_FLAG_MC_PROMISC);
+ mlx4_flow_steer_promisc_remove(mdev->dev,
+ priv->port,
+ MLX4_FS_ALL_DEFAULT);
+ mlx4_flow_steer_promisc_remove(mdev->dev,
+ priv->port,
+ MLX4_FS_MC_DEFAULT);
+ } else if (priv->flags & MLX4_EN_FLAG_PROMISC) {
+ priv->flags &= ~MLX4_EN_FLAG_PROMISC;
+
+ /* Disable promiscouos mode */
+ mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn,
+ priv->port);
+
+ /* Disable Multicast promisc */
+ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) {
+ mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn,
+ priv->port);
+ priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC;
+ }
+ }
+
+ /* Detach All multicasts */
+ eth_broadcast_addr(&mc_list[10]);
+ mc_list[5] = priv->port; /* needed for B0 steering support */
+ mlx4_multicast_detach(mdev->dev, priv->rss_map.indir_qp, mc_list,
+ MLX4_PROT_ETH, priv->broadcast_id);
+ list_for_each_entry(mclist, &priv->curr_list, list) {
+ memcpy(&mc_list[10], mclist->addr, ETH_ALEN);
+ mc_list[5] = priv->port;
+ mlx4_multicast_detach(mdev->dev, priv->rss_map.indir_qp,
+ mc_list, MLX4_PROT_ETH, mclist->reg_id);
+ if (mclist->tunnel_reg_id)
+ mlx4_flow_detach(mdev->dev, mclist->tunnel_reg_id);
+ }
+ mlx4_en_clear_list(dev);
+ list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) {
+ list_del(&mclist->list);
+ kfree(mclist);
+ }
+
+ /* Flush multicast filter */
+ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG);
+
+ /* Remove flow steering rules for the port*/
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ ASSERT_RTNL();
+ list_for_each_entry_safe(flow, tmp_flow,
+ &priv->ethtool_list, list) {
+ mlx4_flow_detach(mdev->dev, flow->id);
+ list_del(&flow->list);
+ }
+ }
+
+ mlx4_en_destroy_drop_qp(priv);
+
+ /* Free TX Rings */
+ for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+ for (i = 0; i < priv->tx_ring_num[t]; i++) {
+ mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[t][i]);
+ mlx4_en_deactivate_cq(priv, priv->tx_cq[t][i]);
+ }
+ }
+ msleep(10);
+
+ for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++)
+ for (i = 0; i < priv->tx_ring_num[t]; i++)
+ mlx4_en_free_tx_buf(dev, priv->tx_ring[t][i]);
+
+ if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_A0)
+ mlx4_en_delete_rss_steer_rules(priv);
+
+ /* Free RSS qps */
+ mlx4_en_release_rss_steer(priv);
+
+ /* Unregister Mac address for the port */
+ mlx4_en_put_qp(priv);
+ if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN))
+ mdev->mac_removed[priv->port] = 1;
+
+ /* Free RX Rings */
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ struct mlx4_en_cq *cq = priv->rx_cq[i];
+
+ napi_synchronize(&cq->napi);
+ mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]);
+ mlx4_en_deactivate_cq(priv, cq);
+
+ mlx4_en_free_affinity_hint(priv, i);
+ }
+}
+
+static void mlx4_en_restart(struct work_struct *work)
+{
+ struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
+ restart_task);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct net_device *dev = priv->dev;
+
+ en_dbg(DRV, priv, "Watchdog task called for port %d\n", priv->port);
+
+ rtnl_lock();
+ mutex_lock(&mdev->state_lock);
+ if (priv->port_up) {
+ mlx4_en_stop_port(dev, 1);
+ if (mlx4_en_start_port(dev))
+ en_err(priv, "Failed restarting port %d\n", priv->port);
+ }
+ mutex_unlock(&mdev->state_lock);
+ rtnl_unlock();
+}
+
+static void mlx4_en_clear_stats(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_tx_ring **tx_ring;
+ int i;
+
+ if (!mlx4_is_slave(mdev->dev))
+ if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1))
+ en_dbg(HW, priv, "Failed dumping statistics\n");
+
+ memset(&priv->pstats, 0, sizeof(priv->pstats));
+ memset(&priv->pkstats, 0, sizeof(priv->pkstats));
+ memset(&priv->port_stats, 0, sizeof(priv->port_stats));
+ memset(&priv->rx_flowstats, 0, sizeof(priv->rx_flowstats));
+ memset(&priv->tx_flowstats, 0, sizeof(priv->tx_flowstats));
+ memset(&priv->rx_priority_flowstats, 0,
+ sizeof(priv->rx_priority_flowstats));
+ memset(&priv->tx_priority_flowstats, 0,
+ sizeof(priv->tx_priority_flowstats));
+ memset(&priv->pf_stats, 0, sizeof(priv->pf_stats));
+
+ tx_ring = priv->tx_ring[TX];
+ for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+ tx_ring[i]->bytes = 0;
+ tx_ring[i]->packets = 0;
+ tx_ring[i]->tx_csum = 0;
+ tx_ring[i]->tx_dropped = 0;
+ tx_ring[i]->queue_stopped = 0;
+ tx_ring[i]->wake_queue = 0;
+ tx_ring[i]->tso_packets = 0;
+ tx_ring[i]->xmit_more = 0;
+ }
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ priv->rx_ring[i]->bytes = 0;
+ priv->rx_ring[i]->packets = 0;
+ priv->rx_ring[i]->csum_ok = 0;
+ priv->rx_ring[i]->csum_none = 0;
+ priv->rx_ring[i]->csum_complete = 0;
+ }
+}
+
+static int mlx4_en_open(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int err = 0;
+
+ mutex_lock(&mdev->state_lock);
+
+ if (!mdev->device_up) {
+ en_err(priv, "Cannot open - device down/disabled\n");
+ err = -EBUSY;
+ goto out;
+ }
+
+ /* Reset HW statistics and SW counters */
+ mlx4_en_clear_stats(dev);
+
+ err = mlx4_en_start_port(dev);
+ if (err)
+ en_err(priv, "Failed starting port:%d\n", priv->port);
+
+out:
+ mutex_unlock(&mdev->state_lock);
+ return err;
+}
+
+
+static int mlx4_en_close(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+
+ en_dbg(IFDOWN, priv, "Close port called\n");
+
+ mutex_lock(&mdev->state_lock);
+
+ mlx4_en_stop_port(dev, 0);
+ netif_carrier_off(dev);
+
+ mutex_unlock(&mdev->state_lock);
+ return 0;
+}
+
+static void mlx4_en_free_resources(struct mlx4_en_priv *priv)
+{
+ int i, t;
+
+#ifdef CONFIG_RFS_ACCEL
+ priv->dev->rx_cpu_rmap = NULL;
+#endif
+
+ for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+ for (i = 0; i < priv->tx_ring_num[t]; i++) {
+ if (priv->tx_ring[t] && priv->tx_ring[t][i])
+ mlx4_en_destroy_tx_ring(priv,
+ &priv->tx_ring[t][i]);
+ if (priv->tx_cq[t] && priv->tx_cq[t][i])
+ mlx4_en_destroy_cq(priv, &priv->tx_cq[t][i]);
+ }
+ kfree(priv->tx_ring[t]);
+ kfree(priv->tx_cq[t]);
+ }
+
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ if (priv->rx_ring[i])
+ mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i],
+ priv->prof->rx_ring_size, priv->stride);
+ if (priv->rx_cq[i])
+ mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
+ }
+
+}
+
+static int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
+{
+ struct mlx4_en_port_profile *prof = priv->prof;
+ int i, t;
+ int node;
+
+ /* Create tx Rings */
+ for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+ for (i = 0; i < priv->tx_ring_num[t]; i++) {
+ node = cpu_to_node(i % num_online_cpus());
+ if (mlx4_en_create_cq(priv, &priv->tx_cq[t][i],
+ prof->tx_ring_size, i, t, node))
+ goto err;
+
+ if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[t][i],
+ prof->tx_ring_size,
+ TXBB_SIZE, node, i))
+ goto err;
+ }
+ }
+
+ /* Create rx Rings */
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ node = cpu_to_node(i % num_online_cpus());
+ if (mlx4_en_create_cq(priv, &priv->rx_cq[i],
+ prof->rx_ring_size, i, RX, node))
+ goto err;
+
+ if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i],
+ prof->rx_ring_size, priv->stride,
+ node, i))
+ goto err;
+
+ }
+
+#ifdef CONFIG_RFS_ACCEL
+ priv->dev->rx_cpu_rmap = mlx4_get_cpu_rmap(priv->mdev->dev, priv->port);
+#endif
+
+ return 0;
+
+err:
+ en_err(priv, "Failed to allocate NIC resources\n");
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ if (priv->rx_ring[i])
+ mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i],
+ prof->rx_ring_size,
+ priv->stride);
+ if (priv->rx_cq[i])
+ mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
+ }
+ for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+ for (i = 0; i < priv->tx_ring_num[t]; i++) {
+ if (priv->tx_ring[t][i])
+ mlx4_en_destroy_tx_ring(priv,
+ &priv->tx_ring[t][i]);
+ if (priv->tx_cq[t][i])
+ mlx4_en_destroy_cq(priv, &priv->tx_cq[t][i]);
+ }
+ }
+ return -ENOMEM;
+}
+
+
+static int mlx4_en_copy_priv(struct mlx4_en_priv *dst,
+ struct mlx4_en_priv *src,
+ struct mlx4_en_port_profile *prof)
+{
+ int t;
+
+ memcpy(&dst->hwtstamp_config, &prof->hwtstamp_config,
+ sizeof(dst->hwtstamp_config));
+ dst->num_tx_rings_p_up = prof->num_tx_rings_p_up;
+ dst->rx_ring_num = prof->rx_ring_num;
+ dst->flags = prof->flags;
+ dst->mdev = src->mdev;
+ dst->port = src->port;
+ dst->dev = src->dev;
+ dst->prof = prof;
+ dst->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
+ DS_SIZE * MLX4_EN_MAX_RX_FRAGS);
+
+ for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+ dst->tx_ring_num[t] = prof->tx_ring_num[t];
+ if (!dst->tx_ring_num[t])
+ continue;
+
+ dst->tx_ring[t] = kcalloc(MAX_TX_RINGS,
+ sizeof(struct mlx4_en_tx_ring *),
+ GFP_KERNEL);
+ if (!dst->tx_ring[t])
+ goto err_free_tx;
+
+ dst->tx_cq[t] = kcalloc(MAX_TX_RINGS,
+ sizeof(struct mlx4_en_cq *),
+ GFP_KERNEL);
+ if (!dst->tx_cq[t]) {
+ kfree(dst->tx_ring[t]);
+ goto err_free_tx;
+ }
+ }
+
+ return 0;
+
+err_free_tx:
+ while (t--) {
+ kfree(dst->tx_ring[t]);
+ kfree(dst->tx_cq[t]);
+ }
+ return -ENOMEM;
+}
+
+static void mlx4_en_update_priv(struct mlx4_en_priv *dst,
+ struct mlx4_en_priv *src)
+{
+ int t;
+ memcpy(dst->rx_ring, src->rx_ring,
+ sizeof(struct mlx4_en_rx_ring *) * src->rx_ring_num);
+ memcpy(dst->rx_cq, src->rx_cq,
+ sizeof(struct mlx4_en_cq *) * src->rx_ring_num);
+ memcpy(&dst->hwtstamp_config, &src->hwtstamp_config,
+ sizeof(dst->hwtstamp_config));
+ for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+ dst->tx_ring_num[t] = src->tx_ring_num[t];
+ dst->tx_ring[t] = src->tx_ring[t];
+ dst->tx_cq[t] = src->tx_cq[t];
+ }
+ dst->num_tx_rings_p_up = src->num_tx_rings_p_up;
+ dst->rx_ring_num = src->rx_ring_num;
+ memcpy(dst->prof, src->prof, sizeof(struct mlx4_en_port_profile));
+}
+
+int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
+ struct mlx4_en_priv *tmp,
+ struct mlx4_en_port_profile *prof,
+ bool carry_xdp_prog)
+{
+ struct bpf_prog *xdp_prog;
+ int i, t, ret;
+
+ ret = mlx4_en_copy_priv(tmp, priv, prof);
+ if (ret) {
+ en_warn(priv, "%s: mlx4_en_copy_priv() failed, return\n",
+ __func__);
+ return ret;
+ }
+
+ if (mlx4_en_alloc_resources(tmp)) {
+ en_warn(priv,
+ "%s: Resource allocation failed, using previous configuration\n",
+ __func__);
+ for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+ kfree(tmp->tx_ring[t]);
+ kfree(tmp->tx_cq[t]);
+ }
+ return -ENOMEM;
+ }
+
+ /* All rx_rings has the same xdp_prog. Pick the first one. */
+ xdp_prog = rcu_dereference_protected(
+ priv->rx_ring[0]->xdp_prog,
+ lockdep_is_held(&priv->mdev->state_lock));
+
+ if (xdp_prog && carry_xdp_prog) {
+ xdp_prog = bpf_prog_add(xdp_prog, tmp->rx_ring_num);
+ if (IS_ERR(xdp_prog)) {
+ mlx4_en_free_resources(tmp);
+ return PTR_ERR(xdp_prog);
+ }
+ for (i = 0; i < tmp->rx_ring_num; i++)
+ rcu_assign_pointer(tmp->rx_ring[i]->xdp_prog,
+ xdp_prog);
+ }
+
+ return 0;
+}
+
+void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv,
+ struct mlx4_en_priv *tmp)
+{
+ mlx4_en_free_resources(priv);
+ mlx4_en_update_priv(priv, tmp);
+}
+
+void mlx4_en_destroy_netdev(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+
+ en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port);
+
+ /* Unregister device - this will close the port if it was up */
+ if (priv->registered) {
+ devlink_port_type_clear(mlx4_get_devlink_port(mdev->dev,
+ priv->port));
+ unregister_netdev(dev);
+ }
+
+ if (priv->allocated)
+ mlx4_free_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE);
+
+ cancel_delayed_work(&priv->stats_task);
+ cancel_delayed_work(&priv->service_task);
+ /* flush any pending task for this netdev */
+ flush_workqueue(mdev->workqueue);
+
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS)
+ mlx4_en_remove_timestamp(mdev);
+
+ /* Detach the netdev so tasks would not attempt to access it */
+ mutex_lock(&mdev->state_lock);
+ mdev->pndev[priv->port] = NULL;
+ mdev->upper[priv->port] = NULL;
+
+#ifdef CONFIG_RFS_ACCEL
+ mlx4_en_cleanup_filters(priv);
+#endif
+
+ mlx4_en_free_resources(priv);
+ mutex_unlock(&mdev->state_lock);
+
+ free_netdev(dev);
+}
+
+static bool mlx4_en_check_xdp_mtu(struct net_device *dev, int mtu)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ if (mtu > MLX4_EN_MAX_XDP_MTU) {
+ en_err(priv, "mtu:%d > max:%d when XDP prog is attached\n",
+ mtu, MLX4_EN_MAX_XDP_MTU);
+ return false;
+ }
+
+ return true;
+}
+
+static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int err = 0;
+
+ en_dbg(DRV, priv, "Change MTU called - current:%d new:%d\n",
+ dev->mtu, new_mtu);
+
+ if (priv->tx_ring_num[TX_XDP] &&
+ !mlx4_en_check_xdp_mtu(dev, new_mtu))
+ return -EOPNOTSUPP;
+
+ dev->mtu = new_mtu;
+
+ if (netif_running(dev)) {
+ mutex_lock(&mdev->state_lock);
+ if (!mdev->device_up) {
+ /* NIC is probably restarting - let restart task reset
+ * the port */
+ en_dbg(DRV, priv, "Change MTU called with card down!?\n");
+ } else {
+ mlx4_en_stop_port(dev, 1);
+ err = mlx4_en_start_port(dev);
+ if (err) {
+ en_err(priv, "Failed restarting port:%d\n",
+ priv->port);
+ if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING,
+ &priv->state))
+ queue_work(mdev->workqueue, &priv->restart_task);
+ }
+ }
+ mutex_unlock(&mdev->state_lock);
+ }
+ return 0;
+}
+
+static int mlx4_en_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct hwtstamp_config config;
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+
+ /* reserved for future extensions */
+ if (config.flags)
+ return -EINVAL;
+
+ /* device doesn't support time stamping */
+ if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS))
+ return -EINVAL;
+
+ /* TX HW timestamp */
+ switch (config.tx_type) {
+ case HWTSTAMP_TX_OFF:
+ case HWTSTAMP_TX_ON:
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ /* RX HW timestamp */
+ switch (config.rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ break;
+ case HWTSTAMP_FILTER_ALL:
+ case HWTSTAMP_FILTER_SOME:
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ case HWTSTAMP_FILTER_NTP_ALL:
+ config.rx_filter = HWTSTAMP_FILTER_ALL;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ if (mlx4_en_reset_config(dev, config, dev->features)) {
+ config.tx_type = HWTSTAMP_TX_OFF;
+ config.rx_filter = HWTSTAMP_FILTER_NONE;
+ }
+
+ return copy_to_user(ifr->ifr_data, &config,
+ sizeof(config)) ? -EFAULT : 0;
+}
+
+static int mlx4_en_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ return copy_to_user(ifr->ifr_data, &priv->hwtstamp_config,
+ sizeof(priv->hwtstamp_config)) ? -EFAULT : 0;
+}
+
+static int mlx4_en_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ switch (cmd) {
+ case SIOCSHWTSTAMP:
+ return mlx4_en_hwtstamp_set(dev, ifr);
+ case SIOCGHWTSTAMP:
+ return mlx4_en_hwtstamp_get(dev, ifr);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static netdev_features_t mlx4_en_fix_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct mlx4_en_priv *en_priv = netdev_priv(netdev);
+ struct mlx4_en_dev *mdev = en_priv->mdev;
+
+ /* Since there is no support for separate RX C-TAG/S-TAG vlan accel
+ * enable/disable make sure S-TAG flag is always in same state as
+ * C-TAG.
+ */
+ if (features & NETIF_F_HW_VLAN_CTAG_RX &&
+ !(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN))
+ features |= NETIF_F_HW_VLAN_STAG_RX;
+ else
+ features &= ~NETIF_F_HW_VLAN_STAG_RX;
+
+ return features;
+}
+
+static int mlx4_en_set_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct mlx4_en_priv *priv = netdev_priv(netdev);
+ bool reset = false;
+ int ret = 0;
+
+ if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_RXFCS)) {
+ en_info(priv, "Turn %s RX-FCS\n",
+ (features & NETIF_F_RXFCS) ? "ON" : "OFF");
+ reset = true;
+ }
+
+ if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_RXALL)) {
+ u8 ignore_fcs_value = (features & NETIF_F_RXALL) ? 1 : 0;
+
+ en_info(priv, "Turn %s RX-ALL\n",
+ ignore_fcs_value ? "ON" : "OFF");
+ ret = mlx4_SET_PORT_fcs_check(priv->mdev->dev,
+ priv->port, ignore_fcs_value);
+ if (ret)
+ return ret;
+ }
+
+ if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_HW_VLAN_CTAG_RX)) {
+ en_info(priv, "Turn %s RX vlan strip offload\n",
+ (features & NETIF_F_HW_VLAN_CTAG_RX) ? "ON" : "OFF");
+ reset = true;
+ }
+
+ if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_HW_VLAN_CTAG_TX))
+ en_info(priv, "Turn %s TX vlan strip offload\n",
+ (features & NETIF_F_HW_VLAN_CTAG_TX) ? "ON" : "OFF");
+
+ if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_HW_VLAN_STAG_TX))
+ en_info(priv, "Turn %s TX S-VLAN strip offload\n",
+ (features & NETIF_F_HW_VLAN_STAG_TX) ? "ON" : "OFF");
+
+ if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_LOOPBACK)) {
+ en_info(priv, "Turn %s loopback\n",
+ (features & NETIF_F_LOOPBACK) ? "ON" : "OFF");
+ mlx4_en_update_loopback_state(netdev, features);
+ }
+
+ if (reset) {
+ ret = mlx4_en_reset_config(netdev, priv->hwtstamp_config,
+ features);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int mlx4_en_set_vf_mac(struct net_device *dev, int queue, u8 *mac)
+{
+ struct mlx4_en_priv *en_priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = en_priv->mdev;
+
+ return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac);
+}
+
+static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
+ __be16 vlan_proto)
+{
+ struct mlx4_en_priv *en_priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = en_priv->mdev;
+
+ return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos,
+ vlan_proto);
+}
+
+static int mlx4_en_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
+ int max_tx_rate)
+{
+ struct mlx4_en_priv *en_priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = en_priv->mdev;
+
+ return mlx4_set_vf_rate(mdev->dev, en_priv->port, vf, min_tx_rate,
+ max_tx_rate);
+}
+
+static int mlx4_en_set_vf_spoofchk(struct net_device *dev, int vf, bool setting)
+{
+ struct mlx4_en_priv *en_priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = en_priv->mdev;
+
+ return mlx4_set_vf_spoofchk(mdev->dev, en_priv->port, vf, setting);
+}
+
+static int mlx4_en_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivf)
+{
+ struct mlx4_en_priv *en_priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = en_priv->mdev;
+
+ return mlx4_get_vf_config(mdev->dev, en_priv->port, vf, ivf);
+}
+
+static int mlx4_en_set_vf_link_state(struct net_device *dev, int vf, int link_state)
+{
+ struct mlx4_en_priv *en_priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = en_priv->mdev;
+
+ return mlx4_set_vf_link_state(mdev->dev, en_priv->port, vf, link_state);
+}
+
+static int mlx4_en_get_vf_stats(struct net_device *dev, int vf,
+ struct ifla_vf_stats *vf_stats)
+{
+ struct mlx4_en_priv *en_priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = en_priv->mdev;
+
+ return mlx4_get_vf_stats(mdev->dev, en_priv->port, vf, vf_stats);
+}
+
+#define PORT_ID_BYTE_LEN 8
+static int mlx4_en_get_phys_port_id(struct net_device *dev,
+ struct netdev_phys_item_id *ppid)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_dev *mdev = priv->mdev->dev;
+ int i;
+ u64 phys_port_id = mdev->caps.phys_port_id[priv->port];
+
+ if (!phys_port_id)
+ return -EOPNOTSUPP;
+
+ ppid->id_len = sizeof(phys_port_id);
+ for (i = PORT_ID_BYTE_LEN - 1; i >= 0; --i) {
+ ppid->id[i] = phys_port_id & 0xff;
+ phys_port_id >>= 8;
+ }
+ return 0;
+}
+
+static void mlx4_en_add_vxlan_offloads(struct work_struct *work)
+{
+ int ret;
+ struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
+ vxlan_add_task);
+
+ ret = mlx4_config_vxlan_port(priv->mdev->dev, priv->vxlan_port);
+ if (ret)
+ goto out;
+
+ ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port,
+ VXLAN_STEER_BY_OUTER_MAC, 1);
+out:
+ if (ret) {
+ en_err(priv, "failed setting L2 tunnel configuration ret %d\n", ret);
+ return;
+ }
+
+ /* set offloads */
+ priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+ NETIF_F_RXCSUM |
+ NETIF_F_TSO | NETIF_F_TSO6 |
+ NETIF_F_GSO_UDP_TUNNEL |
+ NETIF_F_GSO_UDP_TUNNEL_CSUM |
+ NETIF_F_GSO_PARTIAL;
+}
+
+static void mlx4_en_del_vxlan_offloads(struct work_struct *work)
+{
+ int ret;
+ struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
+ vxlan_del_task);
+ /* unset offloads */
+ priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+ NETIF_F_RXCSUM |
+ NETIF_F_TSO | NETIF_F_TSO6 |
+ NETIF_F_GSO_UDP_TUNNEL |
+ NETIF_F_GSO_UDP_TUNNEL_CSUM |
+ NETIF_F_GSO_PARTIAL);
+
+ ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port,
+ VXLAN_STEER_BY_OUTER_MAC, 0);
+ if (ret)
+ en_err(priv, "failed setting L2 tunnel configuration ret %d\n", ret);
+
+ priv->vxlan_port = 0;
+}
+
+static void mlx4_en_add_vxlan_port(struct net_device *dev,
+ struct udp_tunnel_info *ti)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ __be16 port = ti->port;
+ __be16 current_port;
+
+ if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+ return;
+
+ if (ti->sa_family != AF_INET)
+ return;
+
+ if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
+ return;
+
+ current_port = priv->vxlan_port;
+ if (current_port && current_port != port) {
+ en_warn(priv, "vxlan port %d configured, can't add port %d\n",
+ ntohs(current_port), ntohs(port));
+ return;
+ }
+
+ priv->vxlan_port = port;
+ queue_work(priv->mdev->workqueue, &priv->vxlan_add_task);
+}
+
+static void mlx4_en_del_vxlan_port(struct net_device *dev,
+ struct udp_tunnel_info *ti)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ __be16 port = ti->port;
+ __be16 current_port;
+
+ if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+ return;
+
+ if (ti->sa_family != AF_INET)
+ return;
+
+ if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
+ return;
+
+ current_port = priv->vxlan_port;
+ if (current_port != port) {
+ en_dbg(DRV, priv, "vxlan port %d isn't configured, ignoring\n", ntohs(port));
+ return;
+ }
+
+ queue_work(priv->mdev->workqueue, &priv->vxlan_del_task);
+}
+
+static netdev_features_t mlx4_en_features_check(struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features)
+{
+ features = vlan_features_check(skb, features);
+ features = vxlan_features_check(skb, features);
+
+ /* The ConnectX-3 doesn't support outer IPv6 checksums but it does
+ * support inner IPv6 checksums and segmentation so we need to
+ * strip that feature if this is an IPv6 encapsulated frame.
+ */
+ if (skb->encapsulation &&
+ (skb->ip_summed == CHECKSUM_PARTIAL)) {
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ if (!priv->vxlan_port ||
+ (ip_hdr(skb)->version != 4) ||
+ (udp_hdr(skb)->dest != priv->vxlan_port))
+ features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+ }
+
+ return features;
+}
+
+static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 maxrate)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][queue_index];
+ struct mlx4_update_qp_params params;
+ int err;
+
+ if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT))
+ return -EOPNOTSUPP;
+
+ /* rate provided to us in Mbs, check if it fits into 12 bits, if not use Gbs */
+ if (maxrate >> 12) {
+ params.rate_unit = MLX4_QP_RATE_LIMIT_GBS;
+ params.rate_val = maxrate / 1000;
+ } else if (maxrate) {
+ params.rate_unit = MLX4_QP_RATE_LIMIT_MBS;
+ params.rate_val = maxrate;
+ } else { /* zero serves to revoke the QP rate-limitation */
+ params.rate_unit = 0;
+ params.rate_val = 0;
+ }
+
+ err = mlx4_update_qp(priv->mdev->dev, tx_ring->qpn, MLX4_UPDATE_QP_RATE_LIMIT,
+ &params);
+ return err;
+}
+
+static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_port_profile new_prof;
+ struct bpf_prog *old_prog;
+ struct mlx4_en_priv *tmp;
+ int tx_changed = 0;
+ int xdp_ring_num;
+ int port_up = 0;
+ int err;
+ int i;
+
+ xdp_ring_num = prog ? priv->rx_ring_num : 0;
+
+ /* No need to reconfigure buffers when simply swapping the
+ * program for a new one.
+ */
+ if (priv->tx_ring_num[TX_XDP] == xdp_ring_num) {
+ if (prog) {
+ prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+ }
+ mutex_lock(&mdev->state_lock);
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ old_prog = rcu_dereference_protected(
+ priv->rx_ring[i]->xdp_prog,
+ lockdep_is_held(&mdev->state_lock));
+ rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ }
+ mutex_unlock(&mdev->state_lock);
+ return 0;
+ }
+
+ if (!mlx4_en_check_xdp_mtu(dev, dev->mtu))
+ return -EOPNOTSUPP;
+
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ if (prog) {
+ prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
+ if (IS_ERR(prog)) {
+ err = PTR_ERR(prog);
+ goto out;
+ }
+ }
+
+ mutex_lock(&mdev->state_lock);
+ memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+ new_prof.tx_ring_num[TX_XDP] = xdp_ring_num;
+
+ if (priv->tx_ring_num[TX] + xdp_ring_num > MAX_TX_RINGS) {
+ tx_changed = 1;
+ new_prof.tx_ring_num[TX] =
+ MAX_TX_RINGS - ALIGN(xdp_ring_num, priv->prof->num_up);
+ en_warn(priv, "Reducing the number of TX rings, to not exceed the max total rings number.\n");
+ }
+
+ err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, false);
+ if (err) {
+ if (prog)
+ bpf_prog_sub(prog, priv->rx_ring_num - 1);
+ goto unlock_out;
+ }
+
+ if (priv->port_up) {
+ port_up = 1;
+ mlx4_en_stop_port(dev, 1);
+ }
+
+ mlx4_en_safe_replace_resources(priv, tmp);
+ if (tx_changed)
+ netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
+
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ old_prog = rcu_dereference_protected(
+ priv->rx_ring[i]->xdp_prog,
+ lockdep_is_held(&mdev->state_lock));
+ rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ }
+
+ if (port_up) {
+ err = mlx4_en_start_port(dev);
+ if (err) {
+ en_err(priv, "Failed starting port %d for XDP change\n",
+ priv->port);
+ if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state))
+ queue_work(mdev->workqueue, &priv->restart_task);
+ }
+ }
+
+unlock_out:
+ mutex_unlock(&mdev->state_lock);
+out:
+ kfree(tmp);
+ return err;
+}
+
+static u32 mlx4_xdp_query(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ const struct bpf_prog *xdp_prog;
+ u32 prog_id = 0;
+
+ if (!priv->tx_ring_num[TX_XDP])
+ return prog_id;
+
+ mutex_lock(&mdev->state_lock);
+ xdp_prog = rcu_dereference_protected(
+ priv->rx_ring[0]->xdp_prog,
+ lockdep_is_held(&mdev->state_lock));
+ if (xdp_prog)
+ prog_id = xdp_prog->aux->id;
+ mutex_unlock(&mdev->state_lock);
+
+ return prog_id;
+}
+
+static int mlx4_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return mlx4_xdp_set(dev, xdp->prog);
+ case XDP_QUERY_PROG:
+ xdp->prog_id = mlx4_xdp_query(dev);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static const struct net_device_ops mlx4_netdev_ops = {
+ .ndo_open = mlx4_en_open,
+ .ndo_stop = mlx4_en_close,
+ .ndo_start_xmit = mlx4_en_xmit,
+ .ndo_select_queue = mlx4_en_select_queue,
+ .ndo_get_stats64 = mlx4_en_get_stats64,
+ .ndo_set_rx_mode = mlx4_en_set_rx_mode,
+ .ndo_set_mac_address = mlx4_en_set_mac,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = mlx4_en_change_mtu,
+ .ndo_do_ioctl = mlx4_en_ioctl,
+ .ndo_tx_timeout = mlx4_en_tx_timeout,
+ .ndo_vlan_rx_add_vid = mlx4_en_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = mlx4_en_vlan_rx_kill_vid,
+ .ndo_set_features = mlx4_en_set_features,
+ .ndo_fix_features = mlx4_en_fix_features,
+ .ndo_setup_tc = __mlx4_en_setup_tc,
+#ifdef CONFIG_RFS_ACCEL
+ .ndo_rx_flow_steer = mlx4_en_filter_rfs,
+#endif
+ .ndo_get_phys_port_id = mlx4_en_get_phys_port_id,
+ .ndo_udp_tunnel_add = mlx4_en_add_vxlan_port,
+ .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port,
+ .ndo_features_check = mlx4_en_features_check,
+ .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate,
+ .ndo_bpf = mlx4_xdp,
+};
+
+static const struct net_device_ops mlx4_netdev_ops_master = {
+ .ndo_open = mlx4_en_open,
+ .ndo_stop = mlx4_en_close,
+ .ndo_start_xmit = mlx4_en_xmit,
+ .ndo_select_queue = mlx4_en_select_queue,
+ .ndo_get_stats64 = mlx4_en_get_stats64,
+ .ndo_set_rx_mode = mlx4_en_set_rx_mode,
+ .ndo_set_mac_address = mlx4_en_set_mac,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = mlx4_en_change_mtu,
+ .ndo_tx_timeout = mlx4_en_tx_timeout,
+ .ndo_vlan_rx_add_vid = mlx4_en_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = mlx4_en_vlan_rx_kill_vid,
+ .ndo_set_vf_mac = mlx4_en_set_vf_mac,
+ .ndo_set_vf_vlan = mlx4_en_set_vf_vlan,
+ .ndo_set_vf_rate = mlx4_en_set_vf_rate,
+ .ndo_set_vf_spoofchk = mlx4_en_set_vf_spoofchk,
+ .ndo_set_vf_link_state = mlx4_en_set_vf_link_state,
+ .ndo_get_vf_stats = mlx4_en_get_vf_stats,
+ .ndo_get_vf_config = mlx4_en_get_vf_config,
+ .ndo_set_features = mlx4_en_set_features,
+ .ndo_fix_features = mlx4_en_fix_features,
+ .ndo_setup_tc = __mlx4_en_setup_tc,
+#ifdef CONFIG_RFS_ACCEL
+ .ndo_rx_flow_steer = mlx4_en_filter_rfs,
+#endif
+ .ndo_get_phys_port_id = mlx4_en_get_phys_port_id,
+ .ndo_udp_tunnel_add = mlx4_en_add_vxlan_port,
+ .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port,
+ .ndo_features_check = mlx4_en_features_check,
+ .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate,
+ .ndo_bpf = mlx4_xdp,
+};
+
+struct mlx4_en_bond {
+ struct work_struct work;
+ struct mlx4_en_priv *priv;
+ int is_bonded;
+ struct mlx4_port_map port_map;
+};
+
+static void mlx4_en_bond_work(struct work_struct *work)
+{
+ struct mlx4_en_bond *bond = container_of(work,
+ struct mlx4_en_bond,
+ work);
+ int err = 0;
+ struct mlx4_dev *dev = bond->priv->mdev->dev;
+
+ if (bond->is_bonded) {
+ if (!mlx4_is_bonded(dev)) {
+ err = mlx4_bond(dev);
+ if (err)
+ en_err(bond->priv, "Fail to bond device\n");
+ }
+ if (!err) {
+ err = mlx4_port_map_set(dev, &bond->port_map);
+ if (err)
+ en_err(bond->priv, "Fail to set port map [%d][%d]: %d\n",
+ bond->port_map.port1,
+ bond->port_map.port2,
+ err);
+ }
+ } else if (mlx4_is_bonded(dev)) {
+ err = mlx4_unbond(dev);
+ if (err)
+ en_err(bond->priv, "Fail to unbond device\n");
+ }
+ dev_put(bond->priv->dev);
+ kfree(bond);
+}
+
+static int mlx4_en_queue_bond_work(struct mlx4_en_priv *priv, int is_bonded,
+ u8 v2p_p1, u8 v2p_p2)
+{
+ struct mlx4_en_bond *bond = NULL;
+
+ bond = kzalloc(sizeof(*bond), GFP_ATOMIC);
+ if (!bond)
+ return -ENOMEM;
+
+ INIT_WORK(&bond->work, mlx4_en_bond_work);
+ bond->priv = priv;
+ bond->is_bonded = is_bonded;
+ bond->port_map.port1 = v2p_p1;
+ bond->port_map.port2 = v2p_p2;
+ dev_hold(priv->dev);
+ queue_work(priv->mdev->workqueue, &bond->work);
+ return 0;
+}
+
+int mlx4_en_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ u8 port = 0;
+ struct mlx4_en_dev *mdev;
+ struct mlx4_dev *dev;
+ int i, num_eth_ports = 0;
+ bool do_bond = true;
+ struct mlx4_en_priv *priv;
+ u8 v2p_port1 = 0;
+ u8 v2p_port2 = 0;
+
+ if (!net_eq(dev_net(ndev), &init_net))
+ return NOTIFY_DONE;
+
+ mdev = container_of(this, struct mlx4_en_dev, nb);
+ dev = mdev->dev;
+
+ /* Go into this mode only when two network devices set on two ports
+ * of the same mlx4 device are slaves of the same bonding master
+ */
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) {
+ ++num_eth_ports;
+ if (!port && (mdev->pndev[i] == ndev))
+ port = i;
+ mdev->upper[i] = mdev->pndev[i] ?
+ netdev_master_upper_dev_get(mdev->pndev[i]) : NULL;
+ /* condition not met: network device is a slave */
+ if (!mdev->upper[i])
+ do_bond = false;
+ if (num_eth_ports < 2)
+ continue;
+ /* condition not met: same master */
+ if (mdev->upper[i] != mdev->upper[i-1])
+ do_bond = false;
+ }
+ /* condition not met: 2 salves */
+ do_bond = (num_eth_ports == 2) ? do_bond : false;
+
+ /* handle only events that come with enough info */
+ if ((do_bond && (event != NETDEV_BONDING_INFO)) || !port)
+ return NOTIFY_DONE;
+
+ priv = netdev_priv(ndev);
+ if (do_bond) {
+ struct netdev_notifier_bonding_info *notifier_info = ptr;
+ struct netdev_bonding_info *bonding_info =
+ &notifier_info->bonding_info;
+
+ /* required mode 1, 2 or 4 */
+ if ((bonding_info->master.bond_mode != BOND_MODE_ACTIVEBACKUP) &&
+ (bonding_info->master.bond_mode != BOND_MODE_XOR) &&
+ (bonding_info->master.bond_mode != BOND_MODE_8023AD))
+ do_bond = false;
+
+ /* require exactly 2 slaves */
+ if (bonding_info->master.num_slaves != 2)
+ do_bond = false;
+
+ /* calc v2p */
+ if (do_bond) {
+ if (bonding_info->master.bond_mode ==
+ BOND_MODE_ACTIVEBACKUP) {
+ /* in active-backup mode virtual ports are
+ * mapped to the physical port of the active
+ * slave */
+ if (bonding_info->slave.state ==
+ BOND_STATE_BACKUP) {
+ if (port == 1) {
+ v2p_port1 = 2;
+ v2p_port2 = 2;
+ } else {
+ v2p_port1 = 1;
+ v2p_port2 = 1;
+ }
+ } else { /* BOND_STATE_ACTIVE */
+ if (port == 1) {
+ v2p_port1 = 1;
+ v2p_port2 = 1;
+ } else {
+ v2p_port1 = 2;
+ v2p_port2 = 2;
+ }
+ }
+ } else { /* Active-Active */
+ /* in active-active mode a virtual port is
+ * mapped to the native physical port if and only
+ * if the physical port is up */
+ __s8 link = bonding_info->slave.link;
+
+ if (port == 1)
+ v2p_port2 = 2;
+ else
+ v2p_port1 = 1;
+ if ((link == BOND_LINK_UP) ||
+ (link == BOND_LINK_FAIL)) {
+ if (port == 1)
+ v2p_port1 = 1;
+ else
+ v2p_port2 = 2;
+ } else { /* BOND_LINK_DOWN || BOND_LINK_BACK */
+ if (port == 1)
+ v2p_port1 = 2;
+ else
+ v2p_port2 = 1;
+ }
+ }
+ }
+ }
+
+ mlx4_en_queue_bond_work(priv, do_bond,
+ v2p_port1, v2p_port2);
+
+ return NOTIFY_DONE;
+}
+
+void mlx4_en_update_pfc_stats_bitmap(struct mlx4_dev *dev,
+ struct mlx4_en_stats_bitmap *stats_bitmap,
+ u8 rx_ppp, u8 rx_pause,
+ u8 tx_ppp, u8 tx_pause)
+{
+ int last_i = NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PF_STATS;
+
+ if (!mlx4_is_slave(dev) &&
+ (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN)) {
+ mutex_lock(&stats_bitmap->mutex);
+ bitmap_clear(stats_bitmap->bitmap, last_i, NUM_FLOW_STATS);
+
+ if (rx_ppp)
+ bitmap_set(stats_bitmap->bitmap, last_i,
+ NUM_FLOW_PRIORITY_STATS_RX);
+ last_i += NUM_FLOW_PRIORITY_STATS_RX;
+
+ if (rx_pause && !(rx_ppp))
+ bitmap_set(stats_bitmap->bitmap, last_i,
+ NUM_FLOW_STATS_RX);
+ last_i += NUM_FLOW_STATS_RX;
+
+ if (tx_ppp)
+ bitmap_set(stats_bitmap->bitmap, last_i,
+ NUM_FLOW_PRIORITY_STATS_TX);
+ last_i += NUM_FLOW_PRIORITY_STATS_TX;
+
+ if (tx_pause && !(tx_ppp))
+ bitmap_set(stats_bitmap->bitmap, last_i,
+ NUM_FLOW_STATS_TX);
+ last_i += NUM_FLOW_STATS_TX;
+
+ mutex_unlock(&stats_bitmap->mutex);
+ }
+}
+
+void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
+ struct mlx4_en_stats_bitmap *stats_bitmap,
+ u8 rx_ppp, u8 rx_pause,
+ u8 tx_ppp, u8 tx_pause)
+{
+ int last_i = 0;
+
+ mutex_init(&stats_bitmap->mutex);
+ bitmap_zero(stats_bitmap->bitmap, NUM_ALL_STATS);
+
+ if (mlx4_is_slave(dev)) {
+ bitmap_set(stats_bitmap->bitmap, last_i +
+ MLX4_FIND_NETDEV_STAT(rx_packets), 1);
+ bitmap_set(stats_bitmap->bitmap, last_i +
+ MLX4_FIND_NETDEV_STAT(tx_packets), 1);
+ bitmap_set(stats_bitmap->bitmap, last_i +
+ MLX4_FIND_NETDEV_STAT(rx_bytes), 1);
+ bitmap_set(stats_bitmap->bitmap, last_i +
+ MLX4_FIND_NETDEV_STAT(tx_bytes), 1);
+ bitmap_set(stats_bitmap->bitmap, last_i +
+ MLX4_FIND_NETDEV_STAT(rx_dropped), 1);
+ bitmap_set(stats_bitmap->bitmap, last_i +
+ MLX4_FIND_NETDEV_STAT(tx_dropped), 1);
+ } else {
+ bitmap_set(stats_bitmap->bitmap, last_i, NUM_MAIN_STATS);
+ }
+ last_i += NUM_MAIN_STATS;
+
+ bitmap_set(stats_bitmap->bitmap, last_i, NUM_PORT_STATS);
+ last_i += NUM_PORT_STATS;
+
+ if (mlx4_is_master(dev))
+ bitmap_set(stats_bitmap->bitmap, last_i,
+ NUM_PF_STATS);
+ last_i += NUM_PF_STATS;
+
+ mlx4_en_update_pfc_stats_bitmap(dev, stats_bitmap,
+ rx_ppp, rx_pause,
+ tx_ppp, tx_pause);
+ last_i += NUM_FLOW_STATS;
+
+ if (!mlx4_is_slave(dev))
+ bitmap_set(stats_bitmap->bitmap, last_i, NUM_PKT_STATS);
+ last_i += NUM_PKT_STATS;
+
+ bitmap_set(stats_bitmap->bitmap, last_i, NUM_XDP_STATS);
+ last_i += NUM_XDP_STATS;
+
+ if (!mlx4_is_slave(dev))
+ bitmap_set(stats_bitmap->bitmap, last_i, NUM_PHY_STATS);
+ last_i += NUM_PHY_STATS;
+}
+
+int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
+ struct mlx4_en_port_profile *prof)
+{
+ struct net_device *dev;
+ struct mlx4_en_priv *priv;
+ int i, t;
+ int err;
+
+ dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv),
+ MAX_TX_RINGS, MAX_RX_RINGS);
+ if (dev == NULL)
+ return -ENOMEM;
+
+ netif_set_real_num_tx_queues(dev, prof->tx_ring_num[TX]);
+ netif_set_real_num_rx_queues(dev, prof->rx_ring_num);
+
+ SET_NETDEV_DEV(dev, &mdev->dev->persist->pdev->dev);
+ dev->dev_port = port - 1;
+
+ /*
+ * Initialize driver private data
+ */
+
+ priv = netdev_priv(dev);
+ memset(priv, 0, sizeof(struct mlx4_en_priv));
+ priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev);
+ spin_lock_init(&priv->stats_lock);
+ INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode);
+ INIT_WORK(&priv->restart_task, mlx4_en_restart);
+ INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate);
+ INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats);
+ INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task);
+ INIT_WORK(&priv->vxlan_add_task, mlx4_en_add_vxlan_offloads);
+ INIT_WORK(&priv->vxlan_del_task, mlx4_en_del_vxlan_offloads);
+#ifdef CONFIG_RFS_ACCEL
+ INIT_LIST_HEAD(&priv->filters);
+ spin_lock_init(&priv->filters_lock);
+#endif
+
+ priv->dev = dev;
+ priv->mdev = mdev;
+ priv->ddev = &mdev->pdev->dev;
+ priv->prof = prof;
+ priv->port = port;
+ priv->port_up = false;
+ priv->flags = prof->flags;
+ priv->pflags = MLX4_EN_PRIV_FLAGS_BLUEFLAME;
+ priv->ctrl_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE |
+ MLX4_WQE_CTRL_SOLICITED);
+ priv->num_tx_rings_p_up = mdev->profile.max_num_tx_rings_p_up;
+ priv->tx_work_limit = MLX4_EN_DEFAULT_TX_WORK;
+ netdev_rss_key_fill(priv->rss_key, sizeof(priv->rss_key));
+
+ for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+ priv->tx_ring_num[t] = prof->tx_ring_num[t];
+ if (!priv->tx_ring_num[t])
+ continue;
+
+ priv->tx_ring[t] = kcalloc(MAX_TX_RINGS,
+ sizeof(struct mlx4_en_tx_ring *),
+ GFP_KERNEL);
+ if (!priv->tx_ring[t]) {
+ err = -ENOMEM;
+ goto out;
+ }
+ priv->tx_cq[t] = kcalloc(MAX_TX_RINGS,
+ sizeof(struct mlx4_en_cq *),
+ GFP_KERNEL);
+ if (!priv->tx_cq[t]) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+ priv->rx_ring_num = prof->rx_ring_num;
+ priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0;
+ priv->cqe_size = mdev->dev->caps.cqe_size;
+ priv->mac_index = -1;
+ priv->msg_enable = MLX4_EN_MSG_LEVEL;
+#ifdef CONFIG_MLX4_EN_DCB
+ if (!mlx4_is_slave(priv->mdev->dev)) {
+ u8 prio;
+
+ for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; ++prio) {
+ priv->ets.prio_tc[prio] = prio;
+ priv->ets.tc_tsa[prio] = IEEE_8021QAZ_TSA_VENDOR;
+ }
+
+ priv->dcbx_cap = DCB_CAP_DCBX_VER_CEE | DCB_CAP_DCBX_HOST |
+ DCB_CAP_DCBX_VER_IEEE;
+ priv->flags |= MLX4_EN_DCB_ENABLED;
+ priv->cee_config.pfc_state = false;
+
+ for (i = 0; i < MLX4_EN_NUM_UP_HIGH; i++)
+ priv->cee_config.dcb_pfc[i] = pfc_disabled;
+
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) {
+ dev->dcbnl_ops = &mlx4_en_dcbnl_ops;
+ } else {
+ en_info(priv, "enabling only PFC DCB ops\n");
+ dev->dcbnl_ops = &mlx4_en_dcbnl_pfc_ops;
+ }
+ }
+#endif
+
+ for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i)
+ INIT_HLIST_HEAD(&priv->mac_hash[i]);
+
+ /* Query for default mac and max mtu */
+ priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port];
+
+ if (mdev->dev->caps.rx_checksum_flags_port[priv->port] &
+ MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP)
+ priv->flags |= MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP;
+
+ /* Set default MAC */
+ dev->addr_len = ETH_ALEN;
+ mlx4_en_u64_to_mac(dev->dev_addr, mdev->dev->caps.def_mac[priv->port]);
+ if (!is_valid_ether_addr(dev->dev_addr)) {
+ en_err(priv, "Port: %d, invalid mac burned: %pM, quiting\n",
+ priv->port, dev->dev_addr);
+ err = -EINVAL;
+ goto out;
+ } else if (mlx4_is_slave(priv->mdev->dev) &&
+ (priv->mdev->dev->port_random_macs & 1 << priv->port)) {
+ /* Random MAC was assigned in mlx4_slave_cap
+ * in mlx4_core module
+ */
+ dev->addr_assign_type |= NET_ADDR_RANDOM;
+ en_warn(priv, "Assigned random MAC address %pM\n", dev->dev_addr);
+ }
+
+ memcpy(priv->current_mac, dev->dev_addr, sizeof(priv->current_mac));
+
+ priv->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
+ DS_SIZE * MLX4_EN_MAX_RX_FRAGS);
+ err = mlx4_en_alloc_resources(priv);
+ if (err)
+ goto out;
+
+ /* Initialize time stamping config */
+ priv->hwtstamp_config.flags = 0;
+ priv->hwtstamp_config.tx_type = HWTSTAMP_TX_OFF;
+ priv->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+
+ /* Allocate page for receive rings */
+ err = mlx4_alloc_hwq_res(mdev->dev, &priv->res,
+ MLX4_EN_PAGE_SIZE);
+ if (err) {
+ en_err(priv, "Failed to allocate page for rx qps\n");
+ goto out;
+ }
+ priv->allocated = 1;
+
+ /*
+ * Initialize netdev entry points
+ */
+ if (mlx4_is_master(priv->mdev->dev))
+ dev->netdev_ops = &mlx4_netdev_ops_master;
+ else
+ dev->netdev_ops = &mlx4_netdev_ops;
+ dev->watchdog_timeo = MLX4_EN_WATCHDOG_TIMEOUT;
+ netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
+ netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
+
+ dev->ethtool_ops = &mlx4_en_ethtool_ops;
+
+ /*
+ * Set driver features
+ */
+ dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+ if (mdev->LSO_support)
+ dev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
+
+ dev->vlan_features = dev->hw_features;
+
+ dev->hw_features |= NETIF_F_RXCSUM | NETIF_F_RXHASH;
+ dev->features = dev->hw_features | NETIF_F_HIGHDMA |
+ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
+ NETIF_F_HW_VLAN_CTAG_FILTER;
+ dev->hw_features |= NETIF_F_LOOPBACK |
+ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
+
+ if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN)) {
+ dev->features |= NETIF_F_HW_VLAN_STAG_RX |
+ NETIF_F_HW_VLAN_STAG_FILTER;
+ dev->hw_features |= NETIF_F_HW_VLAN_STAG_RX;
+ }
+
+ if (mlx4_is_slave(mdev->dev)) {
+ bool vlan_offload_disabled;
+ int phv;
+
+ err = get_phv_bit(mdev->dev, port, &phv);
+ if (!err && phv) {
+ dev->hw_features |= NETIF_F_HW_VLAN_STAG_TX;
+ priv->pflags |= MLX4_EN_PRIV_FLAGS_PHV;
+ }
+ err = mlx4_get_is_vlan_offload_disabled(mdev->dev, port,
+ &vlan_offload_disabled);
+ if (!err && vlan_offload_disabled) {
+ dev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_CTAG_RX |
+ NETIF_F_HW_VLAN_STAG_TX |
+ NETIF_F_HW_VLAN_STAG_RX);
+ dev->features &= ~(NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_CTAG_RX |
+ NETIF_F_HW_VLAN_STAG_TX |
+ NETIF_F_HW_VLAN_STAG_RX);
+ }
+ } else {
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN &&
+ !(mdev->dev->caps.flags2 &
+ MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN))
+ dev->hw_features |= NETIF_F_HW_VLAN_STAG_TX;
+ }
+
+ if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)
+ dev->hw_features |= NETIF_F_RXFCS;
+
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS)
+ dev->hw_features |= NETIF_F_RXALL;
+
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED &&
+ mdev->dev->caps.dmfs_high_steer_mode != MLX4_STEERING_DMFS_A0_STATIC)
+ dev->hw_features |= NETIF_F_NTUPLE;
+
+ if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_A0)
+ dev->priv_flags |= IFF_UNICAST_FLT;
+
+ /* Setting a default hash function value */
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP) {
+ priv->rss_hash_fn = ETH_RSS_HASH_TOP;
+ } else if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_XOR) {
+ priv->rss_hash_fn = ETH_RSS_HASH_XOR;
+ } else {
+ en_warn(priv,
+ "No RSS hash capabilities exposed, using Toeplitz\n");
+ priv->rss_hash_fn = ETH_RSS_HASH_TOP;
+ }
+
+ if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
+ dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL |
+ NETIF_F_GSO_UDP_TUNNEL_CSUM |
+ NETIF_F_GSO_PARTIAL;
+ dev->features |= NETIF_F_GSO_UDP_TUNNEL |
+ NETIF_F_GSO_UDP_TUNNEL_CSUM |
+ NETIF_F_GSO_PARTIAL;
+ dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
+ }
+
+ /* MTU range: 68 - hw-specific max */
+ dev->min_mtu = ETH_MIN_MTU;
+ dev->max_mtu = priv->max_mtu;
+
+ mdev->pndev[port] = dev;
+ mdev->upper[port] = NULL;
+
+ netif_carrier_off(dev);
+ mlx4_en_set_default_moderation(priv);
+
+ en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num[TX]);
+ en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num);
+
+ mlx4_en_update_loopback_state(priv->dev, priv->dev->features);
+
+ /* Configure port */
+ mlx4_en_calc_rx_buf(dev);
+ err = mlx4_SET_PORT_general(mdev->dev, priv->port,
+ priv->rx_skb_size + ETH_FCS_LEN,
+ prof->tx_pause, prof->tx_ppp,
+ prof->rx_pause, prof->rx_ppp);
+ if (err) {
+ en_err(priv, "Failed setting port general configurations for port %d, with error %d\n",
+ priv->port, err);
+ goto out;
+ }
+
+ if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
+ err = mlx4_SET_PORT_VXLAN(mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC, 1);
+ if (err) {
+ en_err(priv, "Failed setting port L2 tunnel configuration, err %d\n",
+ err);
+ goto out;
+ }
+ }
+
+ /* Init port */
+ en_warn(priv, "Initializing port\n");
+ err = mlx4_INIT_PORT(mdev->dev, priv->port);
+ if (err) {
+ en_err(priv, "Failed Initializing port\n");
+ goto out;
+ }
+ queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY);
+
+ /* Initialize time stamp mechanism */
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS)
+ mlx4_en_init_timestamp(mdev);
+
+ queue_delayed_work(mdev->workqueue, &priv->service_task,
+ SERVICE_TASK_DELAY);
+
+ mlx4_en_set_stats_bitmap(mdev->dev, &priv->stats_bitmap,
+ mdev->profile.prof[priv->port].rx_ppp,
+ mdev->profile.prof[priv->port].rx_pause,
+ mdev->profile.prof[priv->port].tx_ppp,
+ mdev->profile.prof[priv->port].tx_pause);
+
+ err = register_netdev(dev);
+ if (err) {
+ en_err(priv, "Netdev registration failed for port %d\n", port);
+ goto out;
+ }
+
+ priv->registered = 1;
+ devlink_port_type_eth_set(mlx4_get_devlink_port(mdev->dev, priv->port),
+ dev);
+
+ return 0;
+
+out:
+ mlx4_en_destroy_netdev(dev);
+ return err;
+}
+
+int mlx4_en_reset_config(struct net_device *dev,
+ struct hwtstamp_config ts_config,
+ netdev_features_t features)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_port_profile new_prof;
+ struct mlx4_en_priv *tmp;
+ int port_up = 0;
+ int err = 0;
+
+ if (priv->hwtstamp_config.tx_type == ts_config.tx_type &&
+ priv->hwtstamp_config.rx_filter == ts_config.rx_filter &&
+ !DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX) &&
+ !DEV_FEATURE_CHANGED(dev, features, NETIF_F_RXFCS))
+ return 0; /* Nothing to change */
+
+ if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX) &&
+ (features & NETIF_F_HW_VLAN_CTAG_RX) &&
+ (priv->hwtstamp_config.rx_filter != HWTSTAMP_FILTER_NONE)) {
+ en_warn(priv, "Can't turn ON rx vlan offload while time-stamping rx filter is ON\n");
+ return -EINVAL;
+ }
+
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ mutex_lock(&mdev->state_lock);
+
+ memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+ memcpy(&new_prof.hwtstamp_config, &ts_config, sizeof(ts_config));
+
+ err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true);
+ if (err)
+ goto out;
+
+ if (priv->port_up) {
+ port_up = 1;
+ mlx4_en_stop_port(dev, 1);
+ }
+
+ mlx4_en_safe_replace_resources(priv, tmp);
+
+ if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) {
+ if (features & NETIF_F_HW_VLAN_CTAG_RX)
+ dev->features |= NETIF_F_HW_VLAN_CTAG_RX;
+ else
+ dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
+ } else if (ts_config.rx_filter == HWTSTAMP_FILTER_NONE) {
+ /* RX time-stamping is OFF, update the RX vlan offload
+ * to the latest wanted state
+ */
+ if (dev->wanted_features & NETIF_F_HW_VLAN_CTAG_RX)
+ dev->features |= NETIF_F_HW_VLAN_CTAG_RX;
+ else
+ dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
+ }
+
+ if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_RXFCS)) {
+ if (features & NETIF_F_RXFCS)
+ dev->features |= NETIF_F_RXFCS;
+ else
+ dev->features &= ~NETIF_F_RXFCS;
+ }
+
+ /* RX vlan offload and RX time-stamping can't co-exist !
+ * Regardless of the caller's choice,
+ * Turn Off RX vlan offload in case of time-stamping is ON
+ */
+ if (ts_config.rx_filter != HWTSTAMP_FILTER_NONE) {
+ if (dev->features & NETIF_F_HW_VLAN_CTAG_RX)
+ en_warn(priv, "Turning off RX vlan offload since RX time-stamping is ON\n");
+ dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
+ }
+
+ if (port_up) {
+ err = mlx4_en_start_port(dev);
+ if (err)
+ en_err(priv, "Failed starting port\n");
+ }
+
+ if (!err)
+ err = mlx4_en_moderation_update(priv);
+out:
+ mutex_unlock(&mdev->state_lock);
+ kfree(tmp);
+ if (!err)
+ netdev_features_change(dev);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
new file mode 100644
index 000000000..0158b88be
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -0,0 +1,432 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#include <linux/if_vlan.h>
+
+#include <linux/mlx4/device.h>
+#include <linux/mlx4/cmd.h>
+
+#include "en_port.h"
+#include "mlx4_en.h"
+
+
+int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_vlan_fltr_mbox *filter;
+ int i;
+ int j;
+ int index = 0;
+ u32 entry;
+ int err = 0;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ filter = mailbox->buf;
+ for (i = VLAN_FLTR_SIZE - 1; i >= 0; i--) {
+ entry = 0;
+ for (j = 0; j < 32; j++)
+ if (test_bit(index++, priv->active_vlans))
+ entry |= 1 << j;
+ filter->entry[i] = cpu_to_be32(entry);
+ }
+ err = mlx4_cmd(dev, mailbox->dma, priv->port, 0, MLX4_CMD_SET_VLAN_FLTR,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port)
+{
+ struct mlx4_en_query_port_context *qport_context;
+ struct mlx4_en_priv *priv = netdev_priv(mdev->pndev[port]);
+ struct mlx4_en_port_state *state = &priv->port_state;
+ struct mlx4_cmd_mailbox *mailbox;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
+ MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ goto out;
+ qport_context = mailbox->buf;
+
+ /* This command is always accessed from Ethtool context
+ * already synchronized, no need in locking */
+ state->link_state = !!(qport_context->link_up & MLX4_EN_LINK_UP_MASK);
+ switch (qport_context->link_speed & MLX4_EN_SPEED_MASK) {
+ case MLX4_EN_100M_SPEED:
+ state->link_speed = SPEED_100;
+ break;
+ case MLX4_EN_1G_SPEED:
+ state->link_speed = SPEED_1000;
+ break;
+ case MLX4_EN_10G_SPEED_XAUI:
+ case MLX4_EN_10G_SPEED_XFI:
+ state->link_speed = SPEED_10000;
+ break;
+ case MLX4_EN_20G_SPEED:
+ state->link_speed = SPEED_20000;
+ break;
+ case MLX4_EN_40G_SPEED:
+ state->link_speed = SPEED_40000;
+ break;
+ case MLX4_EN_56G_SPEED:
+ state->link_speed = SPEED_56000;
+ break;
+ default:
+ state->link_speed = -1;
+ break;
+ }
+
+ state->transceiver = qport_context->transceiver;
+
+ state->flags = 0; /* Reset and recalculate the port flags */
+ state->flags |= (qport_context->link_up & MLX4_EN_ANC_MASK) ?
+ MLX4_EN_PORT_ANC : 0;
+ state->flags |= (qport_context->autoneg & MLX4_EN_AUTONEG_MASK) ?
+ MLX4_EN_PORT_ANE : 0;
+
+out:
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ return err;
+}
+
+/* Each counter set is located in struct mlx4_en_stat_out_mbox
+ * with a const offset between its prio components.
+ * This function runs over a counter set and sum all of it's prio components.
+ */
+static unsigned long en_stats_adder(__be64 *start, __be64 *next, int num)
+{
+ __be64 *curr = start;
+ unsigned long ret = 0;
+ int i;
+ int offset = next - start;
+
+ for (i = 0; i < num; i++) {
+ ret += be64_to_cpu(*curr);
+ curr += offset;
+ }
+
+ return ret;
+}
+
+void mlx4_en_fold_software_stats(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+ unsigned long packets, bytes;
+ int i;
+
+ if (!priv->port_up || mlx4_is_master(mdev->dev))
+ return;
+
+ packets = 0;
+ bytes = 0;
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ const struct mlx4_en_rx_ring *ring = priv->rx_ring[i];
+
+ packets += READ_ONCE(ring->packets);
+ bytes += READ_ONCE(ring->bytes);
+ }
+ dev->stats.rx_packets = packets;
+ dev->stats.rx_bytes = bytes;
+
+ packets = 0;
+ bytes = 0;
+ for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+ const struct mlx4_en_tx_ring *ring = priv->tx_ring[TX][i];
+
+ packets += READ_ONCE(ring->packets);
+ bytes += READ_ONCE(ring->bytes);
+ }
+ dev->stats.tx_packets = packets;
+ dev->stats.tx_bytes = bytes;
+}
+
+int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
+{
+ struct mlx4_counter tmp_counter_stats;
+ struct mlx4_en_stat_out_mbox *mlx4_en_stats;
+ struct mlx4_en_stat_out_flow_control_mbox *flowstats;
+ struct net_device *dev = mdev->pndev[port];
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct net_device_stats *stats = &dev->stats;
+ struct mlx4_cmd_mailbox *mailbox, *mailbox_priority;
+ u64 in_mod = reset << 8 | port;
+ int err;
+ int i, counter_index;
+ unsigned long sw_tx_dropped = 0;
+ unsigned long sw_rx_dropped = 0;
+
+ mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ mailbox_priority = mlx4_alloc_cmd_mailbox(mdev->dev);
+ if (IS_ERR(mailbox_priority)) {
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ return PTR_ERR(mailbox_priority);
+ }
+
+ err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0,
+ MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+ if (err)
+ goto out;
+
+ mlx4_en_stats = mailbox->buf;
+
+ memset(&tmp_counter_stats, 0, sizeof(tmp_counter_stats));
+ counter_index = mlx4_get_default_counter_index(mdev->dev, port);
+ err = mlx4_get_counter_stats(mdev->dev, counter_index,
+ &tmp_counter_stats, reset);
+
+ /* 0xffs indicates invalid value */
+ memset(mailbox_priority->buf, 0xff,
+ sizeof(*flowstats) * MLX4_NUM_PRIORITIES);
+
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) {
+ memset(mailbox_priority->buf, 0,
+ sizeof(*flowstats) * MLX4_NUM_PRIORITIES);
+ err = mlx4_cmd_box(mdev->dev, 0, mailbox_priority->dma,
+ in_mod | MLX4_DUMP_ETH_STATS_FLOW_CONTROL,
+ 0, MLX4_CMD_DUMP_ETH_STATS,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+ if (err)
+ goto out;
+ }
+
+ flowstats = mailbox_priority->buf;
+
+ spin_lock_bh(&priv->stats_lock);
+
+ mlx4_en_fold_software_stats(dev);
+
+ priv->port_stats.rx_chksum_good = 0;
+ priv->port_stats.rx_chksum_none = 0;
+ priv->port_stats.rx_chksum_complete = 0;
+ priv->port_stats.rx_alloc_pages = 0;
+ priv->xdp_stats.rx_xdp_drop = 0;
+ priv->xdp_stats.rx_xdp_tx = 0;
+ priv->xdp_stats.rx_xdp_tx_full = 0;
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ const struct mlx4_en_rx_ring *ring = priv->rx_ring[i];
+
+ sw_rx_dropped += READ_ONCE(ring->dropped);
+ priv->port_stats.rx_chksum_good += READ_ONCE(ring->csum_ok);
+ priv->port_stats.rx_chksum_none += READ_ONCE(ring->csum_none);
+ priv->port_stats.rx_chksum_complete += READ_ONCE(ring->csum_complete);
+ priv->port_stats.rx_alloc_pages += READ_ONCE(ring->rx_alloc_pages);
+ priv->xdp_stats.rx_xdp_drop += READ_ONCE(ring->xdp_drop);
+ priv->xdp_stats.rx_xdp_tx += READ_ONCE(ring->xdp_tx);
+ priv->xdp_stats.rx_xdp_tx_full += READ_ONCE(ring->xdp_tx_full);
+ }
+ priv->port_stats.tx_chksum_offload = 0;
+ priv->port_stats.queue_stopped = 0;
+ priv->port_stats.wake_queue = 0;
+ priv->port_stats.tso_packets = 0;
+ priv->port_stats.xmit_more = 0;
+
+ for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+ const struct mlx4_en_tx_ring *ring = priv->tx_ring[TX][i];
+
+ sw_tx_dropped += READ_ONCE(ring->tx_dropped);
+ priv->port_stats.tx_chksum_offload += READ_ONCE(ring->tx_csum);
+ priv->port_stats.queue_stopped += READ_ONCE(ring->queue_stopped);
+ priv->port_stats.wake_queue += READ_ONCE(ring->wake_queue);
+ priv->port_stats.tso_packets += READ_ONCE(ring->tso_packets);
+ priv->port_stats.xmit_more += READ_ONCE(ring->xmit_more);
+ }
+
+ if (!mlx4_is_slave(mdev->dev)) {
+ struct mlx4_en_phy_stats *p_stats = &priv->phy_stats;
+
+ p_stats->rx_packets_phy =
+ en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
+ &mlx4_en_stats->RTOT_prio_1,
+ NUM_PRIORITIES);
+ p_stats->tx_packets_phy =
+ en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
+ &mlx4_en_stats->TTOT_prio_1,
+ NUM_PRIORITIES);
+ p_stats->rx_bytes_phy =
+ en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
+ &mlx4_en_stats->ROCT_prio_1,
+ NUM_PRIORITIES);
+ p_stats->tx_bytes_phy =
+ en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
+ &mlx4_en_stats->TOCT_prio_1,
+ NUM_PRIORITIES);
+ if (mlx4_is_master(mdev->dev)) {
+ stats->rx_packets = p_stats->rx_packets_phy;
+ stats->tx_packets = p_stats->tx_packets_phy;
+ stats->rx_bytes = p_stats->rx_bytes_phy;
+ stats->tx_bytes = p_stats->tx_bytes_phy;
+ }
+ }
+
+ /* net device stats */
+ stats->rx_errors = be64_to_cpu(mlx4_en_stats->PCS) +
+ be32_to_cpu(mlx4_en_stats->RJBBR) +
+ be32_to_cpu(mlx4_en_stats->RCRC) +
+ be32_to_cpu(mlx4_en_stats->RRUNT) +
+ be64_to_cpu(mlx4_en_stats->RInRangeLengthErr) +
+ be64_to_cpu(mlx4_en_stats->ROutRangeLengthErr) +
+ be32_to_cpu(mlx4_en_stats->RSHORT) +
+ en_stats_adder(&mlx4_en_stats->RGIANT_prio_0,
+ &mlx4_en_stats->RGIANT_prio_1,
+ NUM_PRIORITIES);
+ stats->tx_errors = en_stats_adder(&mlx4_en_stats->TGIANT_prio_0,
+ &mlx4_en_stats->TGIANT_prio_1,
+ NUM_PRIORITIES);
+ stats->multicast = en_stats_adder(&mlx4_en_stats->MCAST_prio_0,
+ &mlx4_en_stats->MCAST_prio_1,
+ NUM_PRIORITIES);
+ stats->rx_dropped = be32_to_cpu(mlx4_en_stats->RDROP) +
+ sw_rx_dropped;
+ stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength);
+ stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC);
+ stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
+ stats->tx_dropped = be32_to_cpu(mlx4_en_stats->TDROP) +
+ sw_tx_dropped;
+
+ /* RX stats */
+ priv->pkstats.rx_multicast_packets = stats->multicast;
+ priv->pkstats.rx_broadcast_packets =
+ en_stats_adder(&mlx4_en_stats->RBCAST_prio_0,
+ &mlx4_en_stats->RBCAST_prio_1,
+ NUM_PRIORITIES);
+ priv->pkstats.rx_jabbers = be32_to_cpu(mlx4_en_stats->RJBBR);
+ priv->pkstats.rx_in_range_length_error =
+ be64_to_cpu(mlx4_en_stats->RInRangeLengthErr);
+ priv->pkstats.rx_out_range_length_error =
+ be64_to_cpu(mlx4_en_stats->ROutRangeLengthErr);
+
+ /* Tx stats */
+ priv->pkstats.tx_multicast_packets =
+ en_stats_adder(&mlx4_en_stats->TMCAST_prio_0,
+ &mlx4_en_stats->TMCAST_prio_1,
+ NUM_PRIORITIES);
+ priv->pkstats.tx_broadcast_packets =
+ en_stats_adder(&mlx4_en_stats->TBCAST_prio_0,
+ &mlx4_en_stats->TBCAST_prio_1,
+ NUM_PRIORITIES);
+
+ priv->pkstats.rx_prio[0][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_0);
+ priv->pkstats.rx_prio[0][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_0);
+ priv->pkstats.rx_prio[1][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_1);
+ priv->pkstats.rx_prio[1][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_1);
+ priv->pkstats.rx_prio[2][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_2);
+ priv->pkstats.rx_prio[2][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_2);
+ priv->pkstats.rx_prio[3][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_3);
+ priv->pkstats.rx_prio[3][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_3);
+ priv->pkstats.rx_prio[4][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_4);
+ priv->pkstats.rx_prio[4][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_4);
+ priv->pkstats.rx_prio[5][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_5);
+ priv->pkstats.rx_prio[5][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_5);
+ priv->pkstats.rx_prio[6][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_6);
+ priv->pkstats.rx_prio[6][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_6);
+ priv->pkstats.rx_prio[7][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_7);
+ priv->pkstats.rx_prio[7][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_7);
+ priv->pkstats.rx_prio[8][0] = be64_to_cpu(mlx4_en_stats->RTOT_novlan);
+ priv->pkstats.rx_prio[8][1] = be64_to_cpu(mlx4_en_stats->ROCT_novlan);
+ priv->pkstats.tx_prio[0][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_0);
+ priv->pkstats.tx_prio[0][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_0);
+ priv->pkstats.tx_prio[1][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_1);
+ priv->pkstats.tx_prio[1][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_1);
+ priv->pkstats.tx_prio[2][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_2);
+ priv->pkstats.tx_prio[2][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_2);
+ priv->pkstats.tx_prio[3][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_3);
+ priv->pkstats.tx_prio[3][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_3);
+ priv->pkstats.tx_prio[4][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_4);
+ priv->pkstats.tx_prio[4][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_4);
+ priv->pkstats.tx_prio[5][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_5);
+ priv->pkstats.tx_prio[5][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_5);
+ priv->pkstats.tx_prio[6][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_6);
+ priv->pkstats.tx_prio[6][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_6);
+ priv->pkstats.tx_prio[7][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_7);
+ priv->pkstats.tx_prio[7][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_7);
+ priv->pkstats.tx_prio[8][0] = be64_to_cpu(mlx4_en_stats->TTOT_novlan);
+ priv->pkstats.tx_prio[8][1] = be64_to_cpu(mlx4_en_stats->TOCT_novlan);
+
+ if (tmp_counter_stats.counter_mode == 0) {
+ priv->pf_stats.rx_bytes = be64_to_cpu(tmp_counter_stats.rx_bytes);
+ priv->pf_stats.tx_bytes = be64_to_cpu(tmp_counter_stats.tx_bytes);
+ priv->pf_stats.rx_packets = be64_to_cpu(tmp_counter_stats.rx_frames);
+ priv->pf_stats.tx_packets = be64_to_cpu(tmp_counter_stats.tx_frames);
+ }
+
+ for (i = 0; i < MLX4_NUM_PRIORITIES; i++) {
+ priv->rx_priority_flowstats[i].rx_pause =
+ be64_to_cpu(flowstats[i].rx_pause);
+ priv->rx_priority_flowstats[i].rx_pause_duration =
+ be64_to_cpu(flowstats[i].rx_pause_duration);
+ priv->rx_priority_flowstats[i].rx_pause_transition =
+ be64_to_cpu(flowstats[i].rx_pause_transition);
+ priv->tx_priority_flowstats[i].tx_pause =
+ be64_to_cpu(flowstats[i].tx_pause);
+ priv->tx_priority_flowstats[i].tx_pause_duration =
+ be64_to_cpu(flowstats[i].tx_pause_duration);
+ priv->tx_priority_flowstats[i].tx_pause_transition =
+ be64_to_cpu(flowstats[i].tx_pause_transition);
+ }
+
+ /* if pfc is not in use, all priorities counters have the same value */
+ priv->rx_flowstats.rx_pause =
+ be64_to_cpu(flowstats[0].rx_pause);
+ priv->rx_flowstats.rx_pause_duration =
+ be64_to_cpu(flowstats[0].rx_pause_duration);
+ priv->rx_flowstats.rx_pause_transition =
+ be64_to_cpu(flowstats[0].rx_pause_transition);
+ priv->tx_flowstats.tx_pause =
+ be64_to_cpu(flowstats[0].tx_pause);
+ priv->tx_flowstats.tx_pause_duration =
+ be64_to_cpu(flowstats[0].tx_pause_duration);
+ priv->tx_flowstats.tx_pause_transition =
+ be64_to_cpu(flowstats[0].tx_pause_transition);
+
+ spin_unlock_bh(&priv->stats_lock);
+
+out:
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox_priority);
+ return err;
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.h b/drivers/net/ethernet/mellanox/mlx4/en_port.h
new file mode 100644
index 000000000..930f961fe
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.h
@@ -0,0 +1,586 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _MLX4_EN_PORT_H_
+#define _MLX4_EN_PORT_H_
+
+
+#define SET_PORT_PROMISC_SHIFT 31
+#define SET_PORT_MC_PROMISC_SHIFT 30
+
+#define MLX4_EN_NUM_TC 8
+
+#define VLAN_FLTR_SIZE 128
+struct mlx4_set_vlan_fltr_mbox {
+ __be32 entry[VLAN_FLTR_SIZE];
+};
+
+
+enum {
+ MLX4_MCAST_CONFIG = 0,
+ MLX4_MCAST_DISABLE = 1,
+ MLX4_MCAST_ENABLE = 2,
+};
+
+enum mlx4_link_mode {
+ MLX4_1000BASE_CX_SGMII = 0,
+ MLX4_1000BASE_KX = 1,
+ MLX4_10GBASE_CX4 = 2,
+ MLX4_10GBASE_KX4 = 3,
+ MLX4_10GBASE_KR = 4,
+ MLX4_20GBASE_KR2 = 5,
+ MLX4_40GBASE_CR4 = 6,
+ MLX4_40GBASE_KR4 = 7,
+ MLX4_56GBASE_KR4 = 8,
+ MLX4_10GBASE_CR = 12,
+ MLX4_10GBASE_SR = 13,
+ MLX4_40GBASE_SR4 = 15,
+ MLX4_56GBASE_CR4 = 17,
+ MLX4_56GBASE_SR4 = 18,
+ MLX4_100BASE_TX = 24,
+ MLX4_1000BASE_T = 25,
+ MLX4_10GBASE_T = 26,
+};
+
+#define MLX4_PROT_MASK(link_mode) (1<<link_mode)
+
+enum {
+ MLX4_EN_100M_SPEED = 0x04,
+ MLX4_EN_10G_SPEED_XAUI = 0x00,
+ MLX4_EN_10G_SPEED_XFI = 0x01,
+ MLX4_EN_1G_SPEED = 0x02,
+ MLX4_EN_20G_SPEED = 0x08,
+ MLX4_EN_40G_SPEED = 0x40,
+ MLX4_EN_56G_SPEED = 0x20,
+ MLX4_EN_OTHER_SPEED = 0x0f,
+};
+
+struct mlx4_en_query_port_context {
+ u8 link_up;
+#define MLX4_EN_LINK_UP_MASK 0x80
+#define MLX4_EN_ANC_MASK 0x40
+ u8 autoneg;
+#define MLX4_EN_AUTONEG_MASK 0x80
+ __be16 mtu;
+ u8 reserved2;
+ u8 link_speed;
+#define MLX4_EN_SPEED_MASK 0x6f
+ u16 reserved3[5];
+ __be64 mac;
+ u8 transceiver;
+};
+
+
+struct mlx4_en_stat_out_mbox {
+ /* Received frames with a length of 64 octets */
+ __be64 R64_prio_0;
+ __be64 R64_prio_1;
+ __be64 R64_prio_2;
+ __be64 R64_prio_3;
+ __be64 R64_prio_4;
+ __be64 R64_prio_5;
+ __be64 R64_prio_6;
+ __be64 R64_prio_7;
+ __be64 R64_novlan;
+ /* Received frames with a length of 127 octets */
+ __be64 R127_prio_0;
+ __be64 R127_prio_1;
+ __be64 R127_prio_2;
+ __be64 R127_prio_3;
+ __be64 R127_prio_4;
+ __be64 R127_prio_5;
+ __be64 R127_prio_6;
+ __be64 R127_prio_7;
+ __be64 R127_novlan;
+ /* Received frames with a length of 255 octets */
+ __be64 R255_prio_0;
+ __be64 R255_prio_1;
+ __be64 R255_prio_2;
+ __be64 R255_prio_3;
+ __be64 R255_prio_4;
+ __be64 R255_prio_5;
+ __be64 R255_prio_6;
+ __be64 R255_prio_7;
+ __be64 R255_novlan;
+ /* Received frames with a length of 511 octets */
+ __be64 R511_prio_0;
+ __be64 R511_prio_1;
+ __be64 R511_prio_2;
+ __be64 R511_prio_3;
+ __be64 R511_prio_4;
+ __be64 R511_prio_5;
+ __be64 R511_prio_6;
+ __be64 R511_prio_7;
+ __be64 R511_novlan;
+ /* Received frames with a length of 1023 octets */
+ __be64 R1023_prio_0;
+ __be64 R1023_prio_1;
+ __be64 R1023_prio_2;
+ __be64 R1023_prio_3;
+ __be64 R1023_prio_4;
+ __be64 R1023_prio_5;
+ __be64 R1023_prio_6;
+ __be64 R1023_prio_7;
+ __be64 R1023_novlan;
+ /* Received frames with a length of 1518 octets */
+ __be64 R1518_prio_0;
+ __be64 R1518_prio_1;
+ __be64 R1518_prio_2;
+ __be64 R1518_prio_3;
+ __be64 R1518_prio_4;
+ __be64 R1518_prio_5;
+ __be64 R1518_prio_6;
+ __be64 R1518_prio_7;
+ __be64 R1518_novlan;
+ /* Received frames with a length of 1522 octets */
+ __be64 R1522_prio_0;
+ __be64 R1522_prio_1;
+ __be64 R1522_prio_2;
+ __be64 R1522_prio_3;
+ __be64 R1522_prio_4;
+ __be64 R1522_prio_5;
+ __be64 R1522_prio_6;
+ __be64 R1522_prio_7;
+ __be64 R1522_novlan;
+ /* Received frames with a length of 1548 octets */
+ __be64 R1548_prio_0;
+ __be64 R1548_prio_1;
+ __be64 R1548_prio_2;
+ __be64 R1548_prio_3;
+ __be64 R1548_prio_4;
+ __be64 R1548_prio_5;
+ __be64 R1548_prio_6;
+ __be64 R1548_prio_7;
+ __be64 R1548_novlan;
+ /* Received frames with a length of 1548 < octets < MTU */
+ __be64 R2MTU_prio_0;
+ __be64 R2MTU_prio_1;
+ __be64 R2MTU_prio_2;
+ __be64 R2MTU_prio_3;
+ __be64 R2MTU_prio_4;
+ __be64 R2MTU_prio_5;
+ __be64 R2MTU_prio_6;
+ __be64 R2MTU_prio_7;
+ __be64 R2MTU_novlan;
+ /* Received frames with a length of MTU< octets and good CRC */
+ __be64 RGIANT_prio_0;
+ __be64 RGIANT_prio_1;
+ __be64 RGIANT_prio_2;
+ __be64 RGIANT_prio_3;
+ __be64 RGIANT_prio_4;
+ __be64 RGIANT_prio_5;
+ __be64 RGIANT_prio_6;
+ __be64 RGIANT_prio_7;
+ __be64 RGIANT_novlan;
+ /* Received broadcast frames with good CRC */
+ __be64 RBCAST_prio_0;
+ __be64 RBCAST_prio_1;
+ __be64 RBCAST_prio_2;
+ __be64 RBCAST_prio_3;
+ __be64 RBCAST_prio_4;
+ __be64 RBCAST_prio_5;
+ __be64 RBCAST_prio_6;
+ __be64 RBCAST_prio_7;
+ __be64 RBCAST_novlan;
+ /* Received multicast frames with good CRC */
+ __be64 MCAST_prio_0;
+ __be64 MCAST_prio_1;
+ __be64 MCAST_prio_2;
+ __be64 MCAST_prio_3;
+ __be64 MCAST_prio_4;
+ __be64 MCAST_prio_5;
+ __be64 MCAST_prio_6;
+ __be64 MCAST_prio_7;
+ __be64 MCAST_novlan;
+ /* Received unicast not short or GIANT frames with good CRC */
+ __be64 RTOTG_prio_0;
+ __be64 RTOTG_prio_1;
+ __be64 RTOTG_prio_2;
+ __be64 RTOTG_prio_3;
+ __be64 RTOTG_prio_4;
+ __be64 RTOTG_prio_5;
+ __be64 RTOTG_prio_6;
+ __be64 RTOTG_prio_7;
+ __be64 RTOTG_novlan;
+
+ /* Count of total octets of received frames, includes framing characters */
+ __be64 RTTLOCT_prio_0;
+ /* Count of total octets of received frames, not including framing
+ characters */
+ __be64 RTTLOCT_NOFRM_prio_0;
+ /* Count of Total number of octets received
+ (only for frames without errors) */
+ __be64 ROCT_prio_0;
+
+ __be64 RTTLOCT_prio_1;
+ __be64 RTTLOCT_NOFRM_prio_1;
+ __be64 ROCT_prio_1;
+
+ __be64 RTTLOCT_prio_2;
+ __be64 RTTLOCT_NOFRM_prio_2;
+ __be64 ROCT_prio_2;
+
+ __be64 RTTLOCT_prio_3;
+ __be64 RTTLOCT_NOFRM_prio_3;
+ __be64 ROCT_prio_3;
+
+ __be64 RTTLOCT_prio_4;
+ __be64 RTTLOCT_NOFRM_prio_4;
+ __be64 ROCT_prio_4;
+
+ __be64 RTTLOCT_prio_5;
+ __be64 RTTLOCT_NOFRM_prio_5;
+ __be64 ROCT_prio_5;
+
+ __be64 RTTLOCT_prio_6;
+ __be64 RTTLOCT_NOFRM_prio_6;
+ __be64 ROCT_prio_6;
+
+ __be64 RTTLOCT_prio_7;
+ __be64 RTTLOCT_NOFRM_prio_7;
+ __be64 ROCT_prio_7;
+
+ __be64 RTTLOCT_novlan;
+ __be64 RTTLOCT_NOFRM_novlan;
+ __be64 ROCT_novlan;
+
+ /* Count of Total received frames including bad frames */
+ __be64 RTOT_prio_0;
+ /* Count of Total number of received frames with 802.1Q encapsulation */
+ __be64 R1Q_prio_0;
+ __be64 reserved1;
+
+ __be64 RTOT_prio_1;
+ __be64 R1Q_prio_1;
+ __be64 reserved2;
+
+ __be64 RTOT_prio_2;
+ __be64 R1Q_prio_2;
+ __be64 reserved3;
+
+ __be64 RTOT_prio_3;
+ __be64 R1Q_prio_3;
+ __be64 reserved4;
+
+ __be64 RTOT_prio_4;
+ __be64 R1Q_prio_4;
+ __be64 reserved5;
+
+ __be64 RTOT_prio_5;
+ __be64 R1Q_prio_5;
+ __be64 reserved6;
+
+ __be64 RTOT_prio_6;
+ __be64 R1Q_prio_6;
+ __be64 reserved7;
+
+ __be64 RTOT_prio_7;
+ __be64 R1Q_prio_7;
+ __be64 reserved8;
+
+ __be64 RTOT_novlan;
+ __be64 R1Q_novlan;
+ __be64 reserved9;
+
+ /* Total number of Successfully Received Control Frames */
+ __be64 RCNTL;
+ __be64 reserved10;
+ __be64 reserved11;
+ __be64 reserved12;
+ /* Count of received frames with a length/type field value between 46
+ (42 for VLANtagged frames) and 1500 (also 1500 for VLAN-tagged frames),
+ inclusive */
+ __be64 RInRangeLengthErr;
+ /* Count of received frames with length/type field between 1501 and 1535
+ decimal, inclusive */
+ __be64 ROutRangeLengthErr;
+ /* Count of received frames that are longer than max allowed size for
+ 802.3 frames (1518/1522) */
+ __be64 RFrmTooLong;
+ /* Count frames received with PCS error */
+ __be64 PCS;
+
+ /* Transmit frames with a length of 64 octets */
+ __be64 T64_prio_0;
+ __be64 T64_prio_1;
+ __be64 T64_prio_2;
+ __be64 T64_prio_3;
+ __be64 T64_prio_4;
+ __be64 T64_prio_5;
+ __be64 T64_prio_6;
+ __be64 T64_prio_7;
+ __be64 T64_novlan;
+ __be64 T64_loopbk;
+ /* Transmit frames with a length of 65 to 127 octets. */
+ __be64 T127_prio_0;
+ __be64 T127_prio_1;
+ __be64 T127_prio_2;
+ __be64 T127_prio_3;
+ __be64 T127_prio_4;
+ __be64 T127_prio_5;
+ __be64 T127_prio_6;
+ __be64 T127_prio_7;
+ __be64 T127_novlan;
+ __be64 T127_loopbk;
+ /* Transmit frames with a length of 128 to 255 octets */
+ __be64 T255_prio_0;
+ __be64 T255_prio_1;
+ __be64 T255_prio_2;
+ __be64 T255_prio_3;
+ __be64 T255_prio_4;
+ __be64 T255_prio_5;
+ __be64 T255_prio_6;
+ __be64 T255_prio_7;
+ __be64 T255_novlan;
+ __be64 T255_loopbk;
+ /* Transmit frames with a length of 256 to 511 octets */
+ __be64 T511_prio_0;
+ __be64 T511_prio_1;
+ __be64 T511_prio_2;
+ __be64 T511_prio_3;
+ __be64 T511_prio_4;
+ __be64 T511_prio_5;
+ __be64 T511_prio_6;
+ __be64 T511_prio_7;
+ __be64 T511_novlan;
+ __be64 T511_loopbk;
+ /* Transmit frames with a length of 512 to 1023 octets */
+ __be64 T1023_prio_0;
+ __be64 T1023_prio_1;
+ __be64 T1023_prio_2;
+ __be64 T1023_prio_3;
+ __be64 T1023_prio_4;
+ __be64 T1023_prio_5;
+ __be64 T1023_prio_6;
+ __be64 T1023_prio_7;
+ __be64 T1023_novlan;
+ __be64 T1023_loopbk;
+ /* Transmit frames with a length of 1024 to 1518 octets */
+ __be64 T1518_prio_0;
+ __be64 T1518_prio_1;
+ __be64 T1518_prio_2;
+ __be64 T1518_prio_3;
+ __be64 T1518_prio_4;
+ __be64 T1518_prio_5;
+ __be64 T1518_prio_6;
+ __be64 T1518_prio_7;
+ __be64 T1518_novlan;
+ __be64 T1518_loopbk;
+ /* Counts transmit frames with a length of 1519 to 1522 bytes */
+ __be64 T1522_prio_0;
+ __be64 T1522_prio_1;
+ __be64 T1522_prio_2;
+ __be64 T1522_prio_3;
+ __be64 T1522_prio_4;
+ __be64 T1522_prio_5;
+ __be64 T1522_prio_6;
+ __be64 T1522_prio_7;
+ __be64 T1522_novlan;
+ __be64 T1522_loopbk;
+ /* Transmit frames with a length of 1523 to 1548 octets */
+ __be64 T1548_prio_0;
+ __be64 T1548_prio_1;
+ __be64 T1548_prio_2;
+ __be64 T1548_prio_3;
+ __be64 T1548_prio_4;
+ __be64 T1548_prio_5;
+ __be64 T1548_prio_6;
+ __be64 T1548_prio_7;
+ __be64 T1548_novlan;
+ __be64 T1548_loopbk;
+ /* Counts transmit frames with a length of 1549 to MTU bytes */
+ __be64 T2MTU_prio_0;
+ __be64 T2MTU_prio_1;
+ __be64 T2MTU_prio_2;
+ __be64 T2MTU_prio_3;
+ __be64 T2MTU_prio_4;
+ __be64 T2MTU_prio_5;
+ __be64 T2MTU_prio_6;
+ __be64 T2MTU_prio_7;
+ __be64 T2MTU_novlan;
+ __be64 T2MTU_loopbk;
+ /* Transmit frames with a length greater than MTU octets and a good CRC. */
+ __be64 TGIANT_prio_0;
+ __be64 TGIANT_prio_1;
+ __be64 TGIANT_prio_2;
+ __be64 TGIANT_prio_3;
+ __be64 TGIANT_prio_4;
+ __be64 TGIANT_prio_5;
+ __be64 TGIANT_prio_6;
+ __be64 TGIANT_prio_7;
+ __be64 TGIANT_novlan;
+ __be64 TGIANT_loopbk;
+ /* Transmit broadcast frames with a good CRC */
+ __be64 TBCAST_prio_0;
+ __be64 TBCAST_prio_1;
+ __be64 TBCAST_prio_2;
+ __be64 TBCAST_prio_3;
+ __be64 TBCAST_prio_4;
+ __be64 TBCAST_prio_5;
+ __be64 TBCAST_prio_6;
+ __be64 TBCAST_prio_7;
+ __be64 TBCAST_novlan;
+ __be64 TBCAST_loopbk;
+ /* Transmit multicast frames with a good CRC */
+ __be64 TMCAST_prio_0;
+ __be64 TMCAST_prio_1;
+ __be64 TMCAST_prio_2;
+ __be64 TMCAST_prio_3;
+ __be64 TMCAST_prio_4;
+ __be64 TMCAST_prio_5;
+ __be64 TMCAST_prio_6;
+ __be64 TMCAST_prio_7;
+ __be64 TMCAST_novlan;
+ __be64 TMCAST_loopbk;
+ /* Transmit good frames that are neither broadcast nor multicast */
+ __be64 TTOTG_prio_0;
+ __be64 TTOTG_prio_1;
+ __be64 TTOTG_prio_2;
+ __be64 TTOTG_prio_3;
+ __be64 TTOTG_prio_4;
+ __be64 TTOTG_prio_5;
+ __be64 TTOTG_prio_6;
+ __be64 TTOTG_prio_7;
+ __be64 TTOTG_novlan;
+ __be64 TTOTG_loopbk;
+
+ /* total octets of transmitted frames, including framing characters */
+ __be64 TTTLOCT_prio_0;
+ /* total octets of transmitted frames, not including framing characters */
+ __be64 TTTLOCT_NOFRM_prio_0;
+ /* ifOutOctets */
+ __be64 TOCT_prio_0;
+
+ __be64 TTTLOCT_prio_1;
+ __be64 TTTLOCT_NOFRM_prio_1;
+ __be64 TOCT_prio_1;
+
+ __be64 TTTLOCT_prio_2;
+ __be64 TTTLOCT_NOFRM_prio_2;
+ __be64 TOCT_prio_2;
+
+ __be64 TTTLOCT_prio_3;
+ __be64 TTTLOCT_NOFRM_prio_3;
+ __be64 TOCT_prio_3;
+
+ __be64 TTTLOCT_prio_4;
+ __be64 TTTLOCT_NOFRM_prio_4;
+ __be64 TOCT_prio_4;
+
+ __be64 TTTLOCT_prio_5;
+ __be64 TTTLOCT_NOFRM_prio_5;
+ __be64 TOCT_prio_5;
+
+ __be64 TTTLOCT_prio_6;
+ __be64 TTTLOCT_NOFRM_prio_6;
+ __be64 TOCT_prio_6;
+
+ __be64 TTTLOCT_prio_7;
+ __be64 TTTLOCT_NOFRM_prio_7;
+ __be64 TOCT_prio_7;
+
+ __be64 TTTLOCT_novlan;
+ __be64 TTTLOCT_NOFRM_novlan;
+ __be64 TOCT_novlan;
+
+ __be64 TTTLOCT_loopbk;
+ __be64 TTTLOCT_NOFRM_loopbk;
+ __be64 TOCT_loopbk;
+
+ /* Total frames transmitted with a good CRC that are not aborted */
+ __be64 TTOT_prio_0;
+ /* Total number of frames transmitted with 802.1Q encapsulation */
+ __be64 T1Q_prio_0;
+ __be64 reserved13;
+
+ __be64 TTOT_prio_1;
+ __be64 T1Q_prio_1;
+ __be64 reserved14;
+
+ __be64 TTOT_prio_2;
+ __be64 T1Q_prio_2;
+ __be64 reserved15;
+
+ __be64 TTOT_prio_3;
+ __be64 T1Q_prio_3;
+ __be64 reserved16;
+
+ __be64 TTOT_prio_4;
+ __be64 T1Q_prio_4;
+ __be64 reserved17;
+
+ __be64 TTOT_prio_5;
+ __be64 T1Q_prio_5;
+ __be64 reserved18;
+
+ __be64 TTOT_prio_6;
+ __be64 T1Q_prio_6;
+ __be64 reserved19;
+
+ __be64 TTOT_prio_7;
+ __be64 T1Q_prio_7;
+ __be64 reserved20;
+
+ __be64 TTOT_novlan;
+ __be64 T1Q_novlan;
+ __be64 reserved21;
+
+ __be64 TTOT_loopbk;
+ __be64 T1Q_loopbk;
+ __be64 reserved22;
+
+ /* Received frames with a length greater than MTU octets and a bad CRC */
+ __be32 RJBBR;
+ /* Received frames with a bad CRC that are not runts, jabbers,
+ or alignment errors */
+ __be32 RCRC;
+ /* Received frames with SFD with a length of less than 64 octets and a
+ bad CRC */
+ __be32 RRUNT;
+ /* Received frames with a length less than 64 octets and a good CRC */
+ __be32 RSHORT;
+ /* Total Number of Received Packets Dropped */
+ __be32 RDROP;
+ /* Drop due to overflow */
+ __be32 RdropOvflw;
+ /* Drop due to overflow */
+ __be32 RdropLength;
+ /* Total of good frames. Does not include frames received with
+ frame-too-long, FCS, or length errors */
+ __be32 RTOTFRMS;
+ /* Total dropped Xmited packets */
+ __be32 TDROP;
+};
+
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
new file mode 100644
index 000000000..6883ac75d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mlx4/qp.h>
+
+#include "mlx4_en.h"
+
+void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
+ int is_tx, int rss, int qpn, int cqn,
+ int user_prio, struct mlx4_qp_context *context)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct net_device *dev = priv->dev;
+
+ memset(context, 0, sizeof(*context));
+ context->flags = cpu_to_be32(7 << 16 | rss << MLX4_RSS_QPC_FLAG_OFFSET);
+ context->pd = cpu_to_be32(mdev->priv_pdn);
+ context->mtu_msgmax = 0xff;
+ if (!is_tx && !rss)
+ context->rq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4);
+ if (is_tx) {
+ context->sq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4);
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)
+ context->params2 |= cpu_to_be32(MLX4_QP_BIT_FPP);
+
+ } else {
+ context->sq_size_stride = ilog2(TXBB_SIZE) - 4;
+ }
+ context->usr_page = cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev,
+ mdev->priv_uar.index));
+ context->local_qpn = cpu_to_be32(qpn);
+ context->pri_path.ackto = 1 & 0x07;
+ context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6;
+ /* force user priority per tx ring */
+ if (user_prio >= 0 && priv->prof->num_up == MLX4_EN_NUM_UP_HIGH) {
+ context->pri_path.sched_queue |= user_prio << 3;
+ context->pri_path.feup = MLX4_FEUP_FORCE_ETH_UP;
+ }
+ context->pri_path.counter_index = priv->counter_index;
+ context->cqn_send = cpu_to_be32(cqn);
+ context->cqn_recv = cpu_to_be32(cqn);
+ if (!rss &&
+ (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK) &&
+ context->pri_path.counter_index !=
+ MLX4_SINK_COUNTER_INDEX(mdev->dev)) {
+ /* disable multicast loopback to qp with same counter */
+ if (!(dev->features & NETIF_F_LOOPBACK))
+ context->pri_path.fl |= MLX4_FL_ETH_SRC_CHECK_MC_LB;
+ context->pri_path.control |= MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER;
+ }
+ context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2);
+ if (!(dev->features & NETIF_F_HW_VLAN_CTAG_RX))
+ context->param3 |= cpu_to_be32(1 << 30);
+
+ if (!is_tx && !rss &&
+ (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)) {
+ en_dbg(HW, priv, "Setting RX qp %x tunnel mode to RX tunneled & non-tunneled\n", qpn);
+ context->srqn = cpu_to_be32(7 << 28); /* this fills bits 30:28 */
+ }
+}
+
+int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp,
+ int loopback)
+{
+ int ret;
+ struct mlx4_update_qp_params qp_params;
+
+ memset(&qp_params, 0, sizeof(qp_params));
+ if (!loopback)
+ qp_params.flags = MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB;
+
+ ret = mlx4_update_qp(priv->mdev->dev, qp->qpn,
+ MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB,
+ &qp_params);
+
+ return ret;
+}
+
+void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event)
+{
+ return;
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
new file mode 100644
index 000000000..f509a6ce3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -0,0 +1,1287 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <net/busy_poll.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <linux/mlx4/cq.h>
+#include <linux/slab.h>
+#include <linux/mlx4/qp.h>
+#include <linux/skbuff.h>
+#include <linux/rculist.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/vmalloc.h>
+#include <linux/irq.h>
+
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ip6_checksum.h>
+#endif
+
+#include "mlx4_en.h"
+
+static int mlx4_alloc_page(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_alloc *frag,
+ gfp_t gfp)
+{
+ struct page *page;
+ dma_addr_t dma;
+
+ page = alloc_page(gfp);
+ if (unlikely(!page))
+ return -ENOMEM;
+ dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir);
+ if (unlikely(dma_mapping_error(priv->ddev, dma))) {
+ __free_page(page);
+ return -ENOMEM;
+ }
+ frag->page = page;
+ frag->dma = dma;
+ frag->page_offset = priv->rx_headroom;
+ return 0;
+}
+
+static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring *ring,
+ struct mlx4_en_rx_desc *rx_desc,
+ struct mlx4_en_rx_alloc *frags,
+ gfp_t gfp)
+{
+ int i;
+
+ for (i = 0; i < priv->num_frags; i++, frags++) {
+ if (!frags->page) {
+ if (mlx4_alloc_page(priv, frags, gfp))
+ return -ENOMEM;
+ ring->rx_alloc_pages++;
+ }
+ rx_desc->data[i].addr = cpu_to_be64(frags->dma +
+ frags->page_offset);
+ }
+ return 0;
+}
+
+static void mlx4_en_free_frag(const struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_alloc *frag)
+{
+ if (frag->page) {
+ dma_unmap_page(priv->ddev, frag->dma,
+ PAGE_SIZE, priv->dma_dir);
+ __free_page(frag->page);
+ }
+ /* We need to clear all fields, otherwise a change of priv->log_rx_info
+ * could lead to see garbage later in frag->page.
+ */
+ memset(frag, 0, sizeof(*frag));
+}
+
+static void mlx4_en_init_rx_desc(const struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring *ring, int index)
+{
+ struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index;
+ int possible_frags;
+ int i;
+
+ /* Set size and memtype fields */
+ for (i = 0; i < priv->num_frags; i++) {
+ rx_desc->data[i].byte_count =
+ cpu_to_be32(priv->frag_info[i].frag_size);
+ rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key);
+ }
+
+ /* If the number of used fragments does not fill up the ring stride,
+ * remaining (unused) fragments must be padded with null address/size
+ * and a special memory key */
+ possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE;
+ for (i = priv->num_frags; i < possible_frags; i++) {
+ rx_desc->data[i].byte_count = 0;
+ rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
+ rx_desc->data[i].addr = 0;
+ }
+}
+
+static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring *ring, int index,
+ gfp_t gfp)
+{
+ struct mlx4_en_rx_desc *rx_desc = ring->buf +
+ (index << ring->log_stride);
+ struct mlx4_en_rx_alloc *frags = ring->rx_info +
+ (index << priv->log_rx_info);
+ if (likely(ring->page_cache.index > 0)) {
+ /* XDP uses a single page per frame */
+ if (!frags->page) {
+ ring->page_cache.index--;
+ frags->page = ring->page_cache.buf[ring->page_cache.index].page;
+ frags->dma = ring->page_cache.buf[ring->page_cache.index].dma;
+ }
+ frags->page_offset = XDP_PACKET_HEADROOM;
+ rx_desc->data[0].addr = cpu_to_be64(frags->dma +
+ XDP_PACKET_HEADROOM);
+ return 0;
+ }
+
+ return mlx4_en_alloc_frags(priv, ring, rx_desc, frags, gfp);
+}
+
+static bool mlx4_en_is_ring_empty(const struct mlx4_en_rx_ring *ring)
+{
+ return ring->prod == ring->cons;
+}
+
+static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
+{
+ *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
+}
+
+/* slow path */
+static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring *ring,
+ int index)
+{
+ struct mlx4_en_rx_alloc *frags;
+ int nr;
+
+ frags = ring->rx_info + (index << priv->log_rx_info);
+ for (nr = 0; nr < priv->num_frags; nr++) {
+ en_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
+ mlx4_en_free_frag(priv, frags + nr);
+ }
+}
+
+/* Function not in fast-path */
+static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
+{
+ struct mlx4_en_rx_ring *ring;
+ int ring_ind;
+ int buf_ind;
+ int new_size;
+
+ for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) {
+ for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
+ ring = priv->rx_ring[ring_ind];
+
+ if (mlx4_en_prepare_rx_desc(priv, ring,
+ ring->actual_size,
+ GFP_KERNEL)) {
+ if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
+ en_err(priv, "Failed to allocate enough rx buffers\n");
+ return -ENOMEM;
+ } else {
+ new_size = rounddown_pow_of_two(ring->actual_size);
+ en_warn(priv, "Only %d buffers allocated reducing ring size to %d\n",
+ ring->actual_size, new_size);
+ goto reduce_rings;
+ }
+ }
+ ring->actual_size++;
+ ring->prod++;
+ }
+ }
+ return 0;
+
+reduce_rings:
+ for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
+ ring = priv->rx_ring[ring_ind];
+ while (ring->actual_size > new_size) {
+ ring->actual_size--;
+ ring->prod--;
+ mlx4_en_free_rx_desc(priv, ring, ring->actual_size);
+ }
+ }
+
+ return 0;
+}
+
+static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring *ring)
+{
+ int index;
+
+ en_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n",
+ ring->cons, ring->prod);
+
+ /* Unmap and free Rx buffers */
+ for (index = 0; index < ring->size; index++) {
+ en_dbg(DRV, priv, "Processing descriptor:%d\n", index);
+ mlx4_en_free_rx_desc(priv, ring, index);
+ }
+ ring->cons = 0;
+ ring->prod = 0;
+}
+
+void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev)
+{
+ int i;
+ int num_of_eqs;
+ int num_rx_rings;
+ struct mlx4_dev *dev = mdev->dev;
+
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) {
+ num_of_eqs = max_t(int, MIN_RX_RINGS,
+ min_t(int,
+ mlx4_get_eqs_per_port(mdev->dev, i),
+ DEF_RX_RINGS));
+
+ num_rx_rings = mlx4_low_memory_profile() ? MIN_RX_RINGS :
+ min_t(int, num_of_eqs, num_online_cpus());
+ mdev->profile.prof[i].rx_ring_num =
+ rounddown_pow_of_two(num_rx_rings);
+ }
+}
+
+int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring **pring,
+ u32 size, u16 stride, int node, int queue_index)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_rx_ring *ring;
+ int err = -ENOMEM;
+ int tmp;
+
+ ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node);
+ if (!ring) {
+ ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+ if (!ring) {
+ en_err(priv, "Failed to allocate RX ring structure\n");
+ return -ENOMEM;
+ }
+ }
+
+ ring->prod = 0;
+ ring->cons = 0;
+ ring->size = size;
+ ring->size_mask = size - 1;
+ ring->stride = stride;
+ ring->log_stride = ffs(ring->stride) - 1;
+ ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
+
+ if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index) < 0)
+ goto err_ring;
+
+ tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
+ sizeof(struct mlx4_en_rx_alloc));
+ ring->rx_info = kvzalloc_node(tmp, GFP_KERNEL, node);
+ if (!ring->rx_info) {
+ err = -ENOMEM;
+ goto err_xdp_info;
+ }
+
+ en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
+ ring->rx_info, tmp);
+
+ /* Allocate HW buffers on provided NUMA node */
+ set_dev_node(&mdev->dev->persist->pdev->dev, node);
+ err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
+ set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
+ if (err)
+ goto err_info;
+
+ ring->buf = ring->wqres.buf.direct.buf;
+
+ ring->hwtstamp_rx_filter = priv->hwtstamp_config.rx_filter;
+
+ *pring = ring;
+ return 0;
+
+err_info:
+ kvfree(ring->rx_info);
+ ring->rx_info = NULL;
+err_xdp_info:
+ xdp_rxq_info_unreg(&ring->xdp_rxq);
+err_ring:
+ kfree(ring);
+ *pring = NULL;
+
+ return err;
+}
+
+int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
+{
+ struct mlx4_en_rx_ring *ring;
+ int i;
+ int ring_ind;
+ int err;
+ int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
+ DS_SIZE * priv->num_frags);
+
+ for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
+ ring = priv->rx_ring[ring_ind];
+
+ ring->prod = 0;
+ ring->cons = 0;
+ ring->actual_size = 0;
+ ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn;
+
+ ring->stride = stride;
+ if (ring->stride <= TXBB_SIZE) {
+ /* Stamp first unused send wqe */
+ __be32 *ptr = (__be32 *)ring->buf;
+ __be32 stamp = cpu_to_be32(1 << STAMP_SHIFT);
+ *ptr = stamp;
+ /* Move pointer to start of rx section */
+ ring->buf += TXBB_SIZE;
+ }
+
+ ring->log_stride = ffs(ring->stride) - 1;
+ ring->buf_size = ring->size * ring->stride;
+
+ memset(ring->buf, 0, ring->buf_size);
+ mlx4_en_update_rx_prod_db(ring);
+
+ /* Initialize all descriptors */
+ for (i = 0; i < ring->size; i++)
+ mlx4_en_init_rx_desc(priv, ring, i);
+ }
+ err = mlx4_en_fill_rx_buffers(priv);
+ if (err)
+ goto err_buffers;
+
+ for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
+ ring = priv->rx_ring[ring_ind];
+
+ ring->size_mask = ring->actual_size - 1;
+ mlx4_en_update_rx_prod_db(ring);
+ }
+
+ return 0;
+
+err_buffers:
+ for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++)
+ mlx4_en_free_rx_buf(priv, priv->rx_ring[ring_ind]);
+
+ ring_ind = priv->rx_ring_num - 1;
+ while (ring_ind >= 0) {
+ if (priv->rx_ring[ring_ind]->stride <= TXBB_SIZE)
+ priv->rx_ring[ring_ind]->buf -= TXBB_SIZE;
+ ring_ind--;
+ }
+ return err;
+}
+
+/* We recover from out of memory by scheduling our napi poll
+ * function (mlx4_en_process_cq), which tries to allocate
+ * all missing RX buffers (call to mlx4_en_refill_rx_buffers).
+ */
+void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv)
+{
+ int ring;
+
+ if (!priv->port_up)
+ return;
+
+ for (ring = 0; ring < priv->rx_ring_num; ring++) {
+ if (mlx4_en_is_ring_empty(priv->rx_ring[ring])) {
+ local_bh_disable();
+ napi_reschedule(&priv->rx_cq[ring]->napi);
+ local_bh_enable();
+ }
+ }
+}
+
+/* When the rx ring is running in page-per-packet mode, a released frame can go
+ * directly into a small cache, to avoid unmapping or touching the page
+ * allocator. In bpf prog performance scenarios, buffers are either forwarded
+ * or dropped, never converted to skbs, so every page can come directly from
+ * this cache when it is sized to be a multiple of the napi budget.
+ */
+bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
+ struct mlx4_en_rx_alloc *frame)
+{
+ struct mlx4_en_page_cache *cache = &ring->page_cache;
+
+ if (cache->index >= MLX4_EN_CACHE_SIZE)
+ return false;
+
+ cache->buf[cache->index].page = frame->page;
+ cache->buf[cache->index].dma = frame->dma;
+ cache->index++;
+ return true;
+}
+
+void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring **pring,
+ u32 size, u16 stride)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_rx_ring *ring = *pring;
+ struct bpf_prog *old_prog;
+
+ old_prog = rcu_dereference_protected(
+ ring->xdp_prog,
+ lockdep_is_held(&mdev->state_lock));
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ xdp_rxq_info_unreg(&ring->xdp_rxq);
+ mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
+ kvfree(ring->rx_info);
+ ring->rx_info = NULL;
+ kfree(ring);
+ *pring = NULL;
+}
+
+void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring *ring)
+{
+ int i;
+
+ for (i = 0; i < ring->page_cache.index; i++) {
+ dma_unmap_page(priv->ddev, ring->page_cache.buf[i].dma,
+ PAGE_SIZE, priv->dma_dir);
+ put_page(ring->page_cache.buf[i].page);
+ }
+ ring->page_cache.index = 0;
+ mlx4_en_free_rx_buf(priv, ring);
+ if (ring->stride <= TXBB_SIZE)
+ ring->buf -= TXBB_SIZE;
+}
+
+
+static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_alloc *frags,
+ struct sk_buff *skb,
+ int length)
+{
+ const struct mlx4_en_frag_info *frag_info = priv->frag_info;
+ unsigned int truesize = 0;
+ bool release = true;
+ int nr, frag_size;
+ struct page *page;
+ dma_addr_t dma;
+
+ /* Collect used fragments while replacing them in the HW descriptors */
+ for (nr = 0;; frags++) {
+ frag_size = min_t(int, length, frag_info->frag_size);
+
+ page = frags->page;
+ if (unlikely(!page))
+ goto fail;
+
+ dma = frags->dma;
+ dma_sync_single_range_for_cpu(priv->ddev, dma, frags->page_offset,
+ frag_size, priv->dma_dir);
+
+ __skb_fill_page_desc(skb, nr, page, frags->page_offset,
+ frag_size);
+
+ truesize += frag_info->frag_stride;
+ if (frag_info->frag_stride == PAGE_SIZE / 2) {
+ frags->page_offset ^= PAGE_SIZE / 2;
+ release = page_count(page) != 1 ||
+ page_is_pfmemalloc(page) ||
+ page_to_nid(page) != numa_mem_id();
+ } else if (!priv->rx_headroom) {
+ /* rx_headroom for non XDP setup is always 0.
+ * When XDP is set, the above condition will
+ * guarantee page is always released.
+ */
+ u32 sz_align = ALIGN(frag_size, SMP_CACHE_BYTES);
+
+ frags->page_offset += sz_align;
+ release = frags->page_offset + frag_info->frag_size > PAGE_SIZE;
+ }
+ if (release) {
+ dma_unmap_page(priv->ddev, dma, PAGE_SIZE, priv->dma_dir);
+ frags->page = NULL;
+ } else {
+ page_ref_inc(page);
+ }
+
+ nr++;
+ length -= frag_size;
+ if (!length)
+ break;
+ frag_info++;
+ }
+ skb->truesize += truesize;
+ return nr;
+
+fail:
+ while (nr > 0) {
+ nr--;
+ __skb_frag_unref(skb_shinfo(skb)->frags + nr);
+ }
+ return 0;
+}
+
+static void validate_loopback(struct mlx4_en_priv *priv, void *va)
+{
+ const unsigned char *data = va + ETH_HLEN;
+ int i;
+
+ for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++) {
+ if (data[i] != (unsigned char)i)
+ return;
+ }
+ /* Loopback found */
+ priv->loopback_ok = 1;
+}
+
+static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring *ring)
+{
+ u32 missing = ring->actual_size - (ring->prod - ring->cons);
+
+ /* Try to batch allocations, but not too much. */
+ if (missing < 8)
+ return;
+ do {
+ if (mlx4_en_prepare_rx_desc(priv, ring,
+ ring->prod & ring->size_mask,
+ GFP_ATOMIC | __GFP_MEMALLOC))
+ break;
+ ring->prod++;
+ } while (likely(--missing));
+
+ mlx4_en_update_rx_prod_db(ring);
+}
+
+/* When hardware doesn't strip the vlan, we need to calculate the checksum
+ * over it and add it to the hardware's checksum calculation
+ */
+static inline __wsum get_fixed_vlan_csum(__wsum hw_checksum,
+ struct vlan_hdr *vlanh)
+{
+ return csum_add(hw_checksum, *(__wsum *)vlanh);
+}
+
+/* Although the stack expects checksum which doesn't include the pseudo
+ * header, the HW adds it. To address that, we are subtracting the pseudo
+ * header checksum from the checksum value provided by the HW.
+ */
+static int get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb,
+ struct iphdr *iph)
+{
+ __u16 length_for_csum = 0;
+ __wsum csum_pseudo_header = 0;
+ __u8 ipproto = iph->protocol;
+
+ if (unlikely(ipproto == IPPROTO_SCTP))
+ return -1;
+
+ length_for_csum = (be16_to_cpu(iph->tot_len) - (iph->ihl << 2));
+ csum_pseudo_header = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+ length_for_csum, ipproto, 0);
+ skb->csum = csum_sub(hw_checksum, csum_pseudo_header);
+ return 0;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+/* In IPv6 packets, hw_checksum lacks 6 bytes from IPv6 header:
+ * 4 first bytes : priority, version, flow_lbl
+ * and 2 additional bytes : nexthdr, hop_limit.
+ */
+static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb,
+ struct ipv6hdr *ipv6h)
+{
+ __u8 nexthdr = ipv6h->nexthdr;
+ __wsum temp;
+
+ if (unlikely(nexthdr == IPPROTO_FRAGMENT ||
+ nexthdr == IPPROTO_HOPOPTS ||
+ nexthdr == IPPROTO_SCTP))
+ return -1;
+
+ /* priority, version, flow_lbl */
+ temp = csum_add(hw_checksum, *(__wsum *)ipv6h);
+ /* nexthdr and hop_limit */
+ skb->csum = csum_add(temp, (__force __wsum)*(__be16 *)&ipv6h->nexthdr);
+ return 0;
+}
+#endif
+
+#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
+
+/* We reach this function only after checking that any of
+ * the (IPv4 | IPv6) bits are set in cqe->status.
+ */
+static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
+ netdev_features_t dev_features)
+{
+ __wsum hw_checksum = 0;
+ void *hdr;
+
+ /* CQE csum doesn't cover padding octets in short ethernet
+ * frames. And the pad field is appended prior to calculating
+ * and appending the FCS field.
+ *
+ * Detecting these padded frames requires to verify and parse
+ * IP headers, so we simply force all those small frames to skip
+ * checksum complete.
+ */
+ if (short_frame(skb->len))
+ return -EINVAL;
+
+ hdr = (u8 *)va + sizeof(struct ethhdr);
+ hw_checksum = csum_unfold((__force __sum16)cqe->checksum);
+
+ if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK) &&
+ !(dev_features & NETIF_F_HW_VLAN_CTAG_RX)) {
+ hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr);
+ hdr += sizeof(struct vlan_hdr);
+ }
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6))
+ return get_fixed_ipv6_csum(hw_checksum, skb, hdr);
+#endif
+ return get_fixed_ipv4_csum(hw_checksum, skb, hdr);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV6)
+#else
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4)
+#endif
+
+int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ int factor = priv->cqe_factor;
+ struct mlx4_en_rx_ring *ring;
+ struct bpf_prog *xdp_prog;
+ int cq_ring = cq->ring;
+ bool doorbell_pending;
+ struct mlx4_cqe *cqe;
+ struct xdp_buff xdp;
+ int polled = 0;
+ int index;
+
+ if (unlikely(!priv->port_up || budget <= 0))
+ return 0;
+
+ ring = priv->rx_ring[cq_ring];
+
+ /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
+ rcu_read_lock();
+ xdp_prog = rcu_dereference(ring->xdp_prog);
+ xdp.rxq = &ring->xdp_rxq;
+ doorbell_pending = 0;
+
+ /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
+ * descriptor offset can be deduced from the CQE index instead of
+ * reading 'cqe->index' */
+ index = cq->mcq.cons_index & ring->size_mask;
+ cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
+
+ /* Process all completed CQEs */
+ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
+ cq->mcq.cons_index & cq->size)) {
+ struct mlx4_en_rx_alloc *frags;
+ enum pkt_hash_types hash_type;
+ struct sk_buff *skb;
+ unsigned int length;
+ int ip_summed;
+ void *va;
+ int nr;
+
+ frags = ring->rx_info + (index << priv->log_rx_info);
+ va = page_address(frags[0].page) + frags[0].page_offset;
+ prefetchw(va);
+ /*
+ * make sure we read the CQE after we read the ownership bit
+ */
+ dma_rmb();
+
+ /* Drop packet on bad receive or bad checksum */
+ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
+ MLX4_CQE_OPCODE_ERROR)) {
+ en_err(priv, "CQE completed in error - vendor syndrom:%d syndrom:%d\n",
+ ((struct mlx4_err_cqe *)cqe)->vendor_err_syndrome,
+ ((struct mlx4_err_cqe *)cqe)->syndrome);
+ goto next;
+ }
+ if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) {
+ en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n");
+ goto next;
+ }
+
+ /* Check if we need to drop the packet if SRIOV is not enabled
+ * and not performing the selftest or flb disabled
+ */
+ if (priv->flags & MLX4_EN_FLAG_RX_FILTER_NEEDED) {
+ const struct ethhdr *ethh = va;
+ dma_addr_t dma;
+ /* Get pointer to first fragment since we haven't
+ * skb yet and cast it to ethhdr struct
+ */
+ dma = frags[0].dma + frags[0].page_offset;
+ dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh),
+ DMA_FROM_DEVICE);
+
+ if (is_multicast_ether_addr(ethh->h_dest)) {
+ struct mlx4_mac_entry *entry;
+ struct hlist_head *bucket;
+ unsigned int mac_hash;
+
+ /* Drop the packet, since HW loopback-ed it */
+ mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX];
+ bucket = &priv->mac_hash[mac_hash];
+ hlist_for_each_entry_rcu(entry, bucket, hlist) {
+ if (ether_addr_equal_64bits(entry->mac,
+ ethh->h_source))
+ goto next;
+ }
+ }
+ }
+
+ if (unlikely(priv->validate_loopback)) {
+ validate_loopback(priv, va);
+ goto next;
+ }
+
+ /*
+ * Packet is OK - process it.
+ */
+ length = be32_to_cpu(cqe->byte_cnt);
+ length -= ring->fcs_del;
+
+ /* A bpf program gets first chance to drop the packet. It may
+ * read bytes but not past the end of the frag.
+ */
+ if (xdp_prog) {
+ dma_addr_t dma;
+ void *orig_data;
+ u32 act;
+
+ dma = frags[0].dma + frags[0].page_offset;
+ dma_sync_single_for_cpu(priv->ddev, dma,
+ priv->frag_info[0].frag_size,
+ DMA_FROM_DEVICE);
+
+ xdp.data_hard_start = va - frags[0].page_offset;
+ xdp.data = va;
+ xdp_set_data_meta_invalid(&xdp);
+ xdp.data_end = xdp.data + length;
+ orig_data = xdp.data;
+
+ act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+ length = xdp.data_end - xdp.data;
+ if (xdp.data != orig_data) {
+ frags[0].page_offset = xdp.data -
+ xdp.data_hard_start;
+ va = xdp.data;
+ }
+
+ switch (act) {
+ case XDP_PASS:
+ break;
+ case XDP_TX:
+ if (likely(!mlx4_en_xmit_frame(ring, frags, priv,
+ length, cq_ring,
+ &doorbell_pending))) {
+ frags[0].page = NULL;
+ goto next;
+ }
+ trace_xdp_exception(dev, xdp_prog, act);
+ goto xdp_drop_no_cnt; /* Drop on xmit failure */
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fall through */
+ case XDP_ABORTED:
+ trace_xdp_exception(dev, xdp_prog, act);
+ /* fall through */
+ case XDP_DROP:
+ ring->xdp_drop++;
+xdp_drop_no_cnt:
+ goto next;
+ }
+ }
+
+ ring->bytes += length;
+ ring->packets++;
+
+ skb = napi_get_frags(&cq->napi);
+ if (unlikely(!skb))
+ goto next;
+
+ if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) {
+ u64 timestamp = mlx4_en_get_cqe_ts(cqe);
+
+ mlx4_en_fill_hwtstamps(priv->mdev, skb_hwtstamps(skb),
+ timestamp);
+ }
+ skb_record_rx_queue(skb, cq_ring);
+
+ if (likely(dev->features & NETIF_F_RXCSUM)) {
+ /* TODO: For IP non TCP/UDP packets when csum complete is
+ * not an option (not supported or any other reason) we can
+ * actually check cqe IPOK status bit and report
+ * CHECKSUM_UNNECESSARY rather than CHECKSUM_NONE
+ */
+ if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
+ MLX4_CQE_STATUS_UDP)) &&
+ (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
+ cqe->checksum == cpu_to_be16(0xffff)) {
+ bool l2_tunnel;
+
+ l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
+ (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
+ ip_summed = CHECKSUM_UNNECESSARY;
+ hash_type = PKT_HASH_TYPE_L4;
+ if (l2_tunnel)
+ skb->csum_level = 1;
+ ring->csum_ok++;
+ } else {
+ if (!(priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP &&
+ (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IP_ANY))))
+ goto csum_none;
+ if (check_csum(cqe, skb, va, dev->features))
+ goto csum_none;
+ ip_summed = CHECKSUM_COMPLETE;
+ hash_type = PKT_HASH_TYPE_L3;
+ ring->csum_complete++;
+ }
+ } else {
+csum_none:
+ ip_summed = CHECKSUM_NONE;
+ hash_type = PKT_HASH_TYPE_L3;
+ ring->csum_none++;
+ }
+ skb->ip_summed = ip_summed;
+ if (dev->features & NETIF_F_RXHASH)
+ skb_set_hash(skb,
+ be32_to_cpu(cqe->immed_rss_invalid),
+ hash_type);
+
+ if ((cqe->vlan_my_qpn &
+ cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) &&
+ (dev->features & NETIF_F_HW_VLAN_CTAG_RX))
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ be16_to_cpu(cqe->sl_vid));
+ else if ((cqe->vlan_my_qpn &
+ cpu_to_be32(MLX4_CQE_SVLAN_PRESENT_MASK)) &&
+ (dev->features & NETIF_F_HW_VLAN_STAG_RX))
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD),
+ be16_to_cpu(cqe->sl_vid));
+
+ nr = mlx4_en_complete_rx_desc(priv, frags, skb, length);
+ if (likely(nr)) {
+ skb_shinfo(skb)->nr_frags = nr;
+ skb->len = length;
+ skb->data_len = length;
+ napi_gro_frags(&cq->napi);
+ } else {
+ skb->vlan_tci = 0;
+ skb_clear_hash(skb);
+ }
+next:
+ ++cq->mcq.cons_index;
+ index = (cq->mcq.cons_index) & ring->size_mask;
+ cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
+ if (unlikely(++polled == budget))
+ break;
+ }
+
+ rcu_read_unlock();
+
+ if (likely(polled)) {
+ if (doorbell_pending) {
+ priv->tx_cq[TX_XDP][cq_ring]->xdp_busy = true;
+ mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq_ring]);
+ }
+
+ mlx4_cq_set_ci(&cq->mcq);
+ wmb(); /* ensure HW sees CQ consumer before we post new buffers */
+ ring->cons = cq->mcq.cons_index;
+ }
+ AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
+
+ mlx4_en_refill_rx_buffers(priv, ring);
+
+ return polled;
+}
+
+
+void mlx4_en_rx_irq(struct mlx4_cq *mcq)
+{
+ struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
+ struct mlx4_en_priv *priv = netdev_priv(cq->dev);
+
+ if (likely(priv->port_up))
+ napi_schedule_irqoff(&cq->napi);
+ else
+ mlx4_en_arm_cq(priv, cq);
+}
+
+/* Rx CQ polling - called by NAPI */
+int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
+{
+ struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
+ struct net_device *dev = cq->dev;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_cq *xdp_tx_cq = NULL;
+ bool clean_complete = true;
+ int done;
+
+ if (!budget)
+ return 0;
+
+ if (priv->tx_ring_num[TX_XDP]) {
+ xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring];
+ if (xdp_tx_cq->xdp_busy) {
+ clean_complete = mlx4_en_process_tx_cq(dev, xdp_tx_cq,
+ budget);
+ xdp_tx_cq->xdp_busy = !clean_complete;
+ }
+ }
+
+ done = mlx4_en_process_rx_cq(dev, cq, budget);
+
+ /* If we used up all the quota - we're probably not done yet... */
+ if (done == budget || !clean_complete) {
+ const struct cpumask *aff;
+ struct irq_data *idata;
+ int cpu_curr;
+
+ /* in case we got here because of !clean_complete */
+ done = budget;
+
+ INC_PERF_COUNTER(priv->pstats.napi_quota);
+
+ cpu_curr = smp_processor_id();
+ idata = irq_desc_get_irq_data(cq->irq_desc);
+ aff = irq_data_get_affinity_mask(idata);
+
+ if (likely(cpumask_test_cpu(cpu_curr, aff)))
+ return budget;
+
+ /* Current cpu is not according to smp_irq_affinity -
+ * probably affinity changed. Need to stop this NAPI
+ * poll, and restart it on the right CPU.
+ * Try to avoid returning a too small value (like 0),
+ * to not fool net_rx_action() and its netdev_budget
+ */
+ if (done)
+ done--;
+ }
+ /* Done for now */
+ if (likely(napi_complete_done(napi, done)))
+ mlx4_en_arm_cq(priv, cq);
+ return done;
+}
+
+void mlx4_en_calc_rx_buf(struct net_device *dev)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu);
+ int i = 0;
+
+ /* bpf requires buffers to be set up as 1 packet per page.
+ * This only works when num_frags == 1.
+ */
+ if (priv->tx_ring_num[TX_XDP]) {
+ priv->frag_info[0].frag_size = eff_mtu;
+ /* This will gain efficient xdp frame recycling at the
+ * expense of more costly truesize accounting
+ */
+ priv->frag_info[0].frag_stride = PAGE_SIZE;
+ priv->dma_dir = PCI_DMA_BIDIRECTIONAL;
+ priv->rx_headroom = XDP_PACKET_HEADROOM;
+ i = 1;
+ } else {
+ int frag_size_max = 2048, buf_size = 0;
+
+ /* should not happen, right ? */
+ if (eff_mtu > PAGE_SIZE + (MLX4_EN_MAX_RX_FRAGS - 1) * 2048)
+ frag_size_max = PAGE_SIZE;
+
+ while (buf_size < eff_mtu) {
+ int frag_stride, frag_size = eff_mtu - buf_size;
+ int pad, nb;
+
+ if (i < MLX4_EN_MAX_RX_FRAGS - 1)
+ frag_size = min(frag_size, frag_size_max);
+
+ priv->frag_info[i].frag_size = frag_size;
+ frag_stride = ALIGN(frag_size, SMP_CACHE_BYTES);
+ /* We can only pack 2 1536-bytes frames in on 4K page
+ * Therefore, each frame would consume more bytes (truesize)
+ */
+ nb = PAGE_SIZE / frag_stride;
+ pad = (PAGE_SIZE - nb * frag_stride) / nb;
+ pad &= ~(SMP_CACHE_BYTES - 1);
+ priv->frag_info[i].frag_stride = frag_stride + pad;
+
+ buf_size += frag_size;
+ i++;
+ }
+ priv->dma_dir = PCI_DMA_FROMDEVICE;
+ priv->rx_headroom = 0;
+ }
+
+ priv->num_frags = i;
+ priv->rx_skb_size = eff_mtu;
+ priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct mlx4_en_rx_alloc));
+
+ en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d num_frags:%d):\n",
+ eff_mtu, priv->num_frags);
+ for (i = 0; i < priv->num_frags; i++) {
+ en_dbg(DRV,
+ priv,
+ " frag:%d - size:%d stride:%d\n",
+ i,
+ priv->frag_info[i].frag_size,
+ priv->frag_info[i].frag_stride);
+ }
+}
+
+/* RSS related functions */
+
+static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
+ struct mlx4_en_rx_ring *ring,
+ enum mlx4_qp_state *state,
+ struct mlx4_qp *qp)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_qp_context *context;
+ int err = 0;
+
+ context = kmalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return -ENOMEM;
+
+ err = mlx4_qp_alloc(mdev->dev, qpn, qp);
+ if (err) {
+ en_err(priv, "Failed to allocate qp #%x\n", qpn);
+ goto out;
+ }
+ qp->event = mlx4_en_sqp_event;
+
+ memset(context, 0, sizeof(*context));
+ mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0,
+ qpn, ring->cqn, -1, context);
+ context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma);
+
+ /* Cancel FCS removal if FW allows */
+ if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) {
+ context->param3 |= cpu_to_be32(1 << 29);
+ if (priv->dev->features & NETIF_F_RXFCS)
+ ring->fcs_del = 0;
+ else
+ ring->fcs_del = ETH_FCS_LEN;
+ } else
+ ring->fcs_del = 0;
+
+ err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, context, qp, state);
+ if (err) {
+ mlx4_qp_remove(mdev->dev, qp);
+ mlx4_qp_free(mdev->dev, qp);
+ }
+ mlx4_en_update_rx_prod_db(ring);
+out:
+ kfree(context);
+ return err;
+}
+
+int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv)
+{
+ int err;
+ u32 qpn;
+
+ err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn,
+ MLX4_RESERVE_A0_QP,
+ MLX4_RES_USAGE_DRIVER);
+ if (err) {
+ en_err(priv, "Failed reserving drop qpn\n");
+ return err;
+ }
+ err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp);
+ if (err) {
+ en_err(priv, "Failed allocating drop qp\n");
+ mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
+ return err;
+ }
+
+ return 0;
+}
+
+void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv)
+{
+ u32 qpn;
+
+ qpn = priv->drop_qp.qpn;
+ mlx4_qp_remove(priv->mdev->dev, &priv->drop_qp);
+ mlx4_qp_free(priv->mdev->dev, &priv->drop_qp);
+ mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
+}
+
+/* Allocate rx qp's and configure them according to rss map */
+int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_rss_map *rss_map = &priv->rss_map;
+ struct mlx4_qp_context context;
+ struct mlx4_rss_context *rss_context;
+ int rss_rings;
+ void *ptr;
+ u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 |
+ MLX4_RSS_TCP_IPV6);
+ int i, qpn;
+ int err = 0;
+ int good_qps = 0;
+ u8 flags;
+
+ en_dbg(DRV, priv, "Configuring rss steering\n");
+
+ flags = priv->rx_ring_num == 1 ? MLX4_RESERVE_A0_QP : 0;
+ err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num,
+ priv->rx_ring_num,
+ &rss_map->base_qpn, flags,
+ MLX4_RES_USAGE_DRIVER);
+ if (err) {
+ en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num);
+ return err;
+ }
+
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ qpn = rss_map->base_qpn + i;
+ err = mlx4_en_config_rss_qp(priv, qpn, priv->rx_ring[i],
+ &rss_map->state[i],
+ &rss_map->qps[i]);
+ if (err)
+ goto rss_err;
+
+ ++good_qps;
+ }
+
+ if (priv->rx_ring_num == 1) {
+ rss_map->indir_qp = &rss_map->qps[0];
+ priv->base_qpn = rss_map->indir_qp->qpn;
+ en_info(priv, "Optimized Non-RSS steering\n");
+ return 0;
+ }
+
+ rss_map->indir_qp = kzalloc(sizeof(*rss_map->indir_qp), GFP_KERNEL);
+ if (!rss_map->indir_qp) {
+ err = -ENOMEM;
+ goto rss_err;
+ }
+
+ /* Configure RSS indirection qp */
+ err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp);
+ if (err) {
+ en_err(priv, "Failed to allocate RSS indirection QP\n");
+ goto qp_alloc_err;
+ }
+
+ rss_map->indir_qp->event = mlx4_en_sqp_event;
+ mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn,
+ priv->rx_ring[0]->cqn, -1, &context);
+
+ if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num)
+ rss_rings = priv->rx_ring_num;
+ else
+ rss_rings = priv->prof->rss_rings;
+
+ ptr = ((void *) &context) + offsetof(struct mlx4_qp_context, pri_path)
+ + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH;
+ rss_context = ptr;
+ rss_context->base_qpn = cpu_to_be32(ilog2(rss_rings) << 24 |
+ (rss_map->base_qpn));
+ rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn);
+ if (priv->mdev->profile.udp_rss) {
+ rss_mask |= MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6;
+ rss_context->base_qpn_udp = rss_context->default_qpn;
+ }
+
+ if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
+ en_info(priv, "Setting RSS context tunnel type to RSS on inner headers\n");
+ rss_mask |= MLX4_RSS_BY_INNER_HEADERS;
+ }
+
+ rss_context->flags = rss_mask;
+ rss_context->hash_fn = MLX4_RSS_HASH_TOP;
+ if (priv->rss_hash_fn == ETH_RSS_HASH_XOR) {
+ rss_context->hash_fn = MLX4_RSS_HASH_XOR;
+ } else if (priv->rss_hash_fn == ETH_RSS_HASH_TOP) {
+ rss_context->hash_fn = MLX4_RSS_HASH_TOP;
+ memcpy(rss_context->rss_key, priv->rss_key,
+ MLX4_EN_RSS_KEY_SIZE);
+ } else {
+ en_err(priv, "Unknown RSS hash function requested\n");
+ err = -EINVAL;
+ goto indir_err;
+ }
+
+ err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context,
+ rss_map->indir_qp, &rss_map->indir_state);
+ if (err)
+ goto indir_err;
+
+ return 0;
+
+indir_err:
+ mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
+ MLX4_QP_STATE_RST, NULL, 0, 0, rss_map->indir_qp);
+ mlx4_qp_remove(mdev->dev, rss_map->indir_qp);
+ mlx4_qp_free(mdev->dev, rss_map->indir_qp);
+qp_alloc_err:
+ kfree(rss_map->indir_qp);
+ rss_map->indir_qp = NULL;
+rss_err:
+ for (i = 0; i < good_qps; i++) {
+ mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
+ MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
+ mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
+ mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
+ }
+ mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
+ return err;
+}
+
+void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_rss_map *rss_map = &priv->rss_map;
+ int i;
+
+ if (priv->rx_ring_num > 1) {
+ mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
+ MLX4_QP_STATE_RST, NULL, 0, 0,
+ rss_map->indir_qp);
+ mlx4_qp_remove(mdev->dev, rss_map->indir_qp);
+ mlx4_qp_free(mdev->dev, rss_map->indir_qp);
+ kfree(rss_map->indir_qp);
+ rss_map->indir_qp = NULL;
+ }
+
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
+ MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
+ mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
+ mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
+ }
+ mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
new file mode 100644
index 000000000..946d9db7c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <linux/delay.h>
+#include <linux/mlx4/driver.h>
+
+#include "mlx4_en.h"
+
+
+static int mlx4_en_test_registers(struct mlx4_en_priv *priv)
+{
+ return mlx4_cmd(priv->mdev->dev, 0, 0, 0, MLX4_CMD_HW_HEALTH_CHECK,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+}
+
+static int mlx4_en_test_loopback_xmit(struct mlx4_en_priv *priv)
+{
+ struct sk_buff *skb;
+ struct ethhdr *ethh;
+ unsigned char *packet;
+ unsigned int packet_size = MLX4_LOOPBACK_TEST_PAYLOAD;
+ unsigned int i;
+ int err;
+
+
+ /* build the pkt before xmit */
+ skb = netdev_alloc_skb(priv->dev, MLX4_LOOPBACK_TEST_PAYLOAD + ETH_HLEN + NET_IP_ALIGN);
+ if (!skb)
+ return -ENOMEM;
+
+ skb_reserve(skb, NET_IP_ALIGN);
+
+ ethh = skb_put(skb, sizeof(struct ethhdr));
+ packet = skb_put(skb, packet_size);
+ memcpy(ethh->h_dest, priv->dev->dev_addr, ETH_ALEN);
+ eth_zero_addr(ethh->h_source);
+ ethh->h_proto = htons(ETH_P_ARP);
+ skb_reset_mac_header(skb);
+ for (i = 0; i < packet_size; ++i) /* fill our packet */
+ packet[i] = (unsigned char)(i & 0xff);
+
+ /* xmit the pkt */
+ err = mlx4_en_xmit(skb, priv->dev);
+ return err;
+}
+
+static int mlx4_en_test_loopback(struct mlx4_en_priv *priv)
+{
+ u32 loopback_ok = 0;
+ int i;
+
+ priv->loopback_ok = 0;
+ priv->validate_loopback = 1;
+
+ mlx4_en_update_loopback_state(priv->dev, priv->dev->features);
+
+ /* xmit */
+ if (mlx4_en_test_loopback_xmit(priv)) {
+ en_err(priv, "Transmitting loopback packet failed\n");
+ goto mlx4_en_test_loopback_exit;
+ }
+
+ /* polling for result */
+ for (i = 0; i < MLX4_EN_LOOPBACK_RETRIES; ++i) {
+ msleep(MLX4_EN_LOOPBACK_TIMEOUT);
+ if (priv->loopback_ok) {
+ loopback_ok = 1;
+ break;
+ }
+ }
+ if (!loopback_ok)
+ en_err(priv, "Loopback packet didn't arrive\n");
+
+mlx4_en_test_loopback_exit:
+
+ priv->validate_loopback = 0;
+
+ mlx4_en_update_loopback_state(priv->dev, priv->dev->features);
+ return !loopback_ok;
+}
+
+static int mlx4_en_test_interrupts(struct mlx4_en_priv *priv)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int err = 0;
+ int i = 0;
+
+ err = mlx4_test_async(mdev->dev);
+ /* When not in MSI_X or slave, test only async */
+ if (!(mdev->dev->flags & MLX4_FLAG_MSI_X) || mlx4_is_slave(mdev->dev))
+ return err;
+
+ /* A loop over all completion vectors of current port,
+ * for each vector check whether it works by mapping command
+ * completions to that vector and performing a NOP command
+ */
+ for (i = 0; i < priv->rx_ring_num; i++) {
+ err = mlx4_test_interrupt(mdev->dev, priv->rx_cq[i]->vector);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static int mlx4_en_test_link(struct mlx4_en_priv *priv)
+{
+ if (mlx4_en_QUERY_PORT(priv->mdev, priv->port))
+ return -ENOMEM;
+ if (priv->port_state.link_state == 1)
+ return 0;
+ else
+ return 1;
+}
+
+static int mlx4_en_test_speed(struct mlx4_en_priv *priv)
+{
+
+ if (mlx4_en_QUERY_PORT(priv->mdev, priv->port))
+ return -ENOMEM;
+
+ /* The device supports 100M, 1G, 10G, 20G, 40G and 56G speed */
+ if (priv->port_state.link_speed != SPEED_100 &&
+ priv->port_state.link_speed != SPEED_1000 &&
+ priv->port_state.link_speed != SPEED_10000 &&
+ priv->port_state.link_speed != SPEED_20000 &&
+ priv->port_state.link_speed != SPEED_40000 &&
+ priv->port_state.link_speed != SPEED_56000)
+ return priv->port_state.link_speed;
+
+ return 0;
+}
+
+
+void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ int i, carrier_ok;
+
+ memset(buf, 0, sizeof(u64) * MLX4_EN_NUM_SELF_TEST);
+
+ if (*flags & ETH_TEST_FL_OFFLINE) {
+ /* disable the interface */
+ carrier_ok = netif_carrier_ok(dev);
+
+ netif_carrier_off(dev);
+ /* Wait until all tx queues are empty.
+ * there should not be any additional incoming traffic
+ * since we turned the carrier off */
+ msleep(200);
+
+ if (priv->mdev->dev->caps.flags &
+ MLX4_DEV_CAP_FLAG_UC_LOOPBACK) {
+ buf[3] = mlx4_en_test_registers(priv);
+ if (priv->port_up && dev->mtu >= MLX4_SELFTEST_LB_MIN_MTU)
+ buf[4] = mlx4_en_test_loopback(priv);
+ }
+
+ if (carrier_ok)
+ netif_carrier_on(dev);
+
+ }
+ buf[0] = mlx4_en_test_interrupts(priv);
+ buf[1] = mlx4_en_test_link(priv);
+ buf[2] = mlx4_en_test_speed(priv);
+
+ for (i = 0; i < MLX4_EN_NUM_SELF_TEST; i++) {
+ if (buf[i])
+ *flags |= ETH_TEST_FL_FAILED;
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
new file mode 100644
index 000000000..6517e53da
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -0,0 +1,1211 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <asm/page.h>
+#include <linux/mlx4/cq.h>
+#include <linux/slab.h>
+#include <linux/mlx4/qp.h>
+#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
+#include <linux/prefetch.h>
+#include <linux/vmalloc.h>
+#include <linux/tcp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/moduleparam.h>
+
+#include "mlx4_en.h"
+
+int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring **pring, u32 size,
+ u16 stride, int node, int queue_index)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_tx_ring *ring;
+ int tmp;
+ int err;
+
+ ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node);
+ if (!ring) {
+ ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+ if (!ring) {
+ en_err(priv, "Failed allocating TX ring\n");
+ return -ENOMEM;
+ }
+ }
+
+ ring->size = size;
+ ring->size_mask = size - 1;
+ ring->sp_stride = stride;
+ ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS;
+
+ tmp = size * sizeof(struct mlx4_en_tx_info);
+ ring->tx_info = kvmalloc_node(tmp, GFP_KERNEL, node);
+ if (!ring->tx_info) {
+ err = -ENOMEM;
+ goto err_ring;
+ }
+
+ en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n",
+ ring->tx_info, tmp);
+
+ ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, GFP_KERNEL, node);
+ if (!ring->bounce_buf) {
+ ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL);
+ if (!ring->bounce_buf) {
+ err = -ENOMEM;
+ goto err_info;
+ }
+ }
+ ring->buf_size = ALIGN(size * ring->sp_stride, MLX4_EN_PAGE_SIZE);
+
+ /* Allocate HW buffers on provided NUMA node */
+ set_dev_node(&mdev->dev->persist->pdev->dev, node);
+ err = mlx4_alloc_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
+ set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
+ if (err) {
+ en_err(priv, "Failed allocating hwq resources\n");
+ goto err_bounce;
+ }
+
+ ring->buf = ring->sp_wqres.buf.direct.buf;
+
+ en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d buf_size:%d dma:%llx\n",
+ ring, ring->buf, ring->size, ring->buf_size,
+ (unsigned long long) ring->sp_wqres.buf.direct.map);
+
+ err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn,
+ MLX4_RESERVE_ETH_BF_QP,
+ MLX4_RES_USAGE_DRIVER);
+ if (err) {
+ en_err(priv, "failed reserving qp for TX ring\n");
+ goto err_hwq_res;
+ }
+
+ err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->sp_qp);
+ if (err) {
+ en_err(priv, "Failed allocating qp %d\n", ring->qpn);
+ goto err_reserve;
+ }
+ ring->sp_qp.event = mlx4_en_sqp_event;
+
+ err = mlx4_bf_alloc(mdev->dev, &ring->bf, node);
+ if (err) {
+ en_dbg(DRV, priv, "working without blueflame (%d)\n", err);
+ ring->bf.uar = &mdev->priv_uar;
+ ring->bf.uar->map = mdev->uar_map;
+ ring->bf_enabled = false;
+ ring->bf_alloced = false;
+ priv->pflags &= ~MLX4_EN_PRIV_FLAGS_BLUEFLAME;
+ } else {
+ ring->bf_alloced = true;
+ ring->bf_enabled = !!(priv->pflags &
+ MLX4_EN_PRIV_FLAGS_BLUEFLAME);
+ }
+
+ ring->hwtstamp_tx_type = priv->hwtstamp_config.tx_type;
+ ring->queue_index = queue_index;
+
+ if (queue_index < priv->num_tx_rings_p_up)
+ cpumask_set_cpu(cpumask_local_spread(queue_index,
+ priv->mdev->dev->numa_node),
+ &ring->sp_affinity_mask);
+
+ *pring = ring;
+ return 0;
+
+err_reserve:
+ mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
+err_hwq_res:
+ mlx4_free_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
+err_bounce:
+ kfree(ring->bounce_buf);
+ ring->bounce_buf = NULL;
+err_info:
+ kvfree(ring->tx_info);
+ ring->tx_info = NULL;
+err_ring:
+ kfree(ring);
+ *pring = NULL;
+ return err;
+}
+
+void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring **pring)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_tx_ring *ring = *pring;
+ en_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn);
+
+ if (ring->bf_alloced)
+ mlx4_bf_free(mdev->dev, &ring->bf);
+ mlx4_qp_remove(mdev->dev, &ring->sp_qp);
+ mlx4_qp_free(mdev->dev, &ring->sp_qp);
+ mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1);
+ mlx4_free_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
+ kfree(ring->bounce_buf);
+ ring->bounce_buf = NULL;
+ kvfree(ring->tx_info);
+ ring->tx_info = NULL;
+ kfree(ring);
+ *pring = NULL;
+}
+
+int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ int cq, int user_prio)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ int err;
+
+ ring->sp_cqn = cq;
+ ring->prod = 0;
+ ring->cons = 0xffffffff;
+ ring->last_nr_txbb = 1;
+ memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info));
+ memset(ring->buf, 0, ring->buf_size);
+ ring->free_tx_desc = mlx4_en_free_tx_desc;
+
+ ring->sp_qp_state = MLX4_QP_STATE_RST;
+ ring->doorbell_qpn = cpu_to_be32(ring->sp_qp.qpn << 8);
+ ring->mr_key = cpu_to_be32(mdev->mr.key);
+
+ mlx4_en_fill_qp_context(priv, ring->size, ring->sp_stride, 1, 0, ring->qpn,
+ ring->sp_cqn, user_prio, &ring->sp_context);
+ if (ring->bf_alloced)
+ ring->sp_context.usr_page =
+ cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev,
+ ring->bf.uar->index));
+
+ err = mlx4_qp_to_ready(mdev->dev, &ring->sp_wqres.mtt, &ring->sp_context,
+ &ring->sp_qp, &ring->sp_qp_state);
+ if (!cpumask_empty(&ring->sp_affinity_mask))
+ netif_set_xps_queue(priv->dev, &ring->sp_affinity_mask,
+ ring->queue_index);
+
+ return err;
+}
+
+void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+
+ mlx4_qp_modify(mdev->dev, NULL, ring->sp_qp_state,
+ MLX4_QP_STATE_RST, NULL, 0, 0, &ring->sp_qp);
+}
+
+static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring)
+{
+ return ring->prod - ring->cons > ring->full_size;
+}
+
+static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring, int index,
+ u8 owner)
+{
+ __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT));
+ struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
+ struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
+ void *end = ring->buf + ring->buf_size;
+ __be32 *ptr = (__be32 *)tx_desc;
+ int i;
+
+ /* Optimize the common case when there are no wraparounds */
+ if (likely((void *)tx_desc +
+ (tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) {
+ /* Stamp the freed descriptor */
+ for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE;
+ i += STAMP_STRIDE) {
+ *ptr = stamp;
+ ptr += STAMP_DWORDS;
+ }
+ } else {
+ /* Stamp the freed descriptor */
+ for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE;
+ i += STAMP_STRIDE) {
+ *ptr = stamp;
+ ptr += STAMP_DWORDS;
+ if ((void *)ptr >= end) {
+ ptr = ring->buf;
+ stamp ^= cpu_to_be32(0x80000000);
+ }
+ }
+ }
+}
+
+
+u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ int index, u64 timestamp,
+ int napi_mode)
+{
+ struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
+ struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
+ struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
+ void *end = ring->buf + ring->buf_size;
+ struct sk_buff *skb = tx_info->skb;
+ int nr_maps = tx_info->nr_maps;
+ int i;
+
+ /* We do not touch skb here, so prefetch skb->users location
+ * to speedup consume_skb()
+ */
+ prefetchw(&skb->users);
+
+ if (unlikely(timestamp)) {
+ struct skb_shared_hwtstamps hwts;
+
+ mlx4_en_fill_hwtstamps(priv->mdev, &hwts, timestamp);
+ skb_tstamp_tx(skb, &hwts);
+ }
+
+ if (!tx_info->inl) {
+ if (tx_info->linear)
+ dma_unmap_single(priv->ddev,
+ tx_info->map0_dma,
+ tx_info->map0_byte_count,
+ PCI_DMA_TODEVICE);
+ else
+ dma_unmap_page(priv->ddev,
+ tx_info->map0_dma,
+ tx_info->map0_byte_count,
+ PCI_DMA_TODEVICE);
+ /* Optimize the common case when there are no wraparounds */
+ if (likely((void *)tx_desc +
+ (tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) {
+ for (i = 1; i < nr_maps; i++) {
+ data++;
+ dma_unmap_page(priv->ddev,
+ (dma_addr_t)be64_to_cpu(data->addr),
+ be32_to_cpu(data->byte_count),
+ PCI_DMA_TODEVICE);
+ }
+ } else {
+ if ((void *)data >= end)
+ data = ring->buf + ((void *)data - end);
+
+ for (i = 1; i < nr_maps; i++) {
+ data++;
+ /* Check for wraparound before unmapping */
+ if ((void *) data >= end)
+ data = ring->buf;
+ dma_unmap_page(priv->ddev,
+ (dma_addr_t)be64_to_cpu(data->addr),
+ be32_to_cpu(data->byte_count),
+ PCI_DMA_TODEVICE);
+ }
+ }
+ }
+ napi_consume_skb(skb, napi_mode);
+
+ return tx_info->nr_txbb;
+}
+
+u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ int index, u64 timestamp,
+ int napi_mode)
+{
+ struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
+ struct mlx4_en_rx_alloc frame = {
+ .page = tx_info->page,
+ .dma = tx_info->map0_dma,
+ };
+
+ if (!napi_mode || !mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
+ dma_unmap_page(priv->ddev, tx_info->map0_dma,
+ PAGE_SIZE, priv->dma_dir);
+ put_page(tx_info->page);
+ }
+
+ return tx_info->nr_txbb;
+}
+
+int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ int cnt = 0;
+
+ /* Skip last polled descriptor */
+ ring->cons += ring->last_nr_txbb;
+ en_dbg(DRV, priv, "Freeing Tx buf - cons:0x%x prod:0x%x\n",
+ ring->cons, ring->prod);
+
+ if ((u32) (ring->prod - ring->cons) > ring->size) {
+ if (netif_msg_tx_err(priv))
+ en_warn(priv, "Tx consumer passed producer!\n");
+ return 0;
+ }
+
+ while (ring->cons != ring->prod) {
+ ring->last_nr_txbb = ring->free_tx_desc(priv, ring,
+ ring->cons & ring->size_mask,
+ 0, 0 /* Non-NAPI caller */);
+ ring->cons += ring->last_nr_txbb;
+ cnt++;
+ }
+
+ if (ring->tx_queue)
+ netdev_tx_reset_queue(ring->tx_queue);
+
+ if (cnt)
+ en_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt);
+
+ return cnt;
+}
+
+static void mlx4_en_handle_err_cqe(struct mlx4_en_priv *priv, struct mlx4_err_cqe *err_cqe,
+ u16 cqe_index, struct mlx4_en_tx_ring *ring)
+{
+ struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_tx_info *tx_info;
+ struct mlx4_en_tx_desc *tx_desc;
+ u16 wqe_index;
+ int desc_size;
+
+ en_err(priv, "CQE error - cqn 0x%x, ci 0x%x, vendor syndrome: 0x%x syndrome: 0x%x\n",
+ ring->sp_cqn, cqe_index, err_cqe->vendor_err_syndrome, err_cqe->syndrome);
+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, err_cqe, sizeof(*err_cqe),
+ false);
+
+ wqe_index = be16_to_cpu(err_cqe->wqe_index) & ring->size_mask;
+ tx_info = &ring->tx_info[wqe_index];
+ desc_size = tx_info->nr_txbb << LOG_TXBB_SIZE;
+ en_err(priv, "Related WQE - qpn 0x%x, wqe index 0x%x, wqe size 0x%x\n", ring->qpn,
+ wqe_index, desc_size);
+ tx_desc = ring->buf + (wqe_index << LOG_TXBB_SIZE);
+ print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, tx_desc, desc_size, false);
+
+ if (test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state))
+ return;
+
+ en_err(priv, "Scheduling port restart\n");
+ queue_work(mdev->workqueue, &priv->restart_task);
+}
+
+bool mlx4_en_process_tx_cq(struct net_device *dev,
+ struct mlx4_en_cq *cq, int napi_budget)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_cq *mcq = &cq->mcq;
+ struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring];
+ struct mlx4_cqe *cqe;
+ u16 index, ring_index, stamp_index;
+ u32 txbbs_skipped = 0;
+ u32 txbbs_stamp = 0;
+ u32 cons_index = mcq->cons_index;
+ int size = cq->size;
+ u32 size_mask = ring->size_mask;
+ struct mlx4_cqe *buf = cq->buf;
+ u32 packets = 0;
+ u32 bytes = 0;
+ int factor = priv->cqe_factor;
+ int done = 0;
+ int budget = priv->tx_work_limit;
+ u32 last_nr_txbb;
+ u32 ring_cons;
+
+ if (unlikely(!priv->port_up))
+ return true;
+
+ netdev_txq_bql_complete_prefetchw(ring->tx_queue);
+
+ index = cons_index & size_mask;
+ cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
+ last_nr_txbb = READ_ONCE(ring->last_nr_txbb);
+ ring_cons = READ_ONCE(ring->cons);
+ ring_index = ring_cons & size_mask;
+ stamp_index = ring_index;
+
+ /* Process all completed CQEs */
+ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
+ cons_index & size) && (done < budget)) {
+ u16 new_index;
+
+ /*
+ * make sure we read the CQE after we read the
+ * ownership bit
+ */
+ dma_rmb();
+
+ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
+ MLX4_CQE_OPCODE_ERROR))
+ if (!test_and_set_bit(MLX4_EN_TX_RING_STATE_RECOVERING, &ring->state))
+ mlx4_en_handle_err_cqe(priv, (struct mlx4_err_cqe *)cqe, index,
+ ring);
+
+ /* Skip over last polled CQE */
+ new_index = be16_to_cpu(cqe->wqe_index) & size_mask;
+
+ do {
+ u64 timestamp = 0;
+
+ txbbs_skipped += last_nr_txbb;
+ ring_index = (ring_index + last_nr_txbb) & size_mask;
+
+ if (unlikely(ring->tx_info[ring_index].ts_requested))
+ timestamp = mlx4_en_get_cqe_ts(cqe);
+
+ /* free next descriptor */
+ last_nr_txbb = ring->free_tx_desc(
+ priv, ring, ring_index,
+ timestamp, napi_budget);
+
+ mlx4_en_stamp_wqe(priv, ring, stamp_index,
+ !!((ring_cons + txbbs_stamp) &
+ ring->size));
+ stamp_index = ring_index;
+ txbbs_stamp = txbbs_skipped;
+ packets++;
+ bytes += ring->tx_info[ring_index].nr_bytes;
+ } while ((++done < budget) && (ring_index != new_index));
+
+ ++cons_index;
+ index = cons_index & size_mask;
+ cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
+ }
+
+ /*
+ * To prevent CQ overflow we first update CQ consumer and only then
+ * the ring consumer.
+ */
+ mcq->cons_index = cons_index;
+ mlx4_cq_set_ci(mcq);
+ wmb();
+
+ /* we want to dirty this cache line once */
+ WRITE_ONCE(ring->last_nr_txbb, last_nr_txbb);
+ WRITE_ONCE(ring->cons, ring_cons + txbbs_skipped);
+
+ if (cq->type == TX_XDP)
+ return done < budget;
+
+ netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
+
+ /* Wakeup Tx queue if this stopped, and ring is not full.
+ */
+ if (netif_tx_queue_stopped(ring->tx_queue) &&
+ !mlx4_en_is_tx_ring_full(ring)) {
+ netif_tx_wake_queue(ring->tx_queue);
+ ring->wake_queue++;
+ }
+
+ return done < budget;
+}
+
+void mlx4_en_tx_irq(struct mlx4_cq *mcq)
+{
+ struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
+ struct mlx4_en_priv *priv = netdev_priv(cq->dev);
+
+ if (likely(priv->port_up))
+ napi_schedule_irqoff(&cq->napi);
+ else
+ mlx4_en_arm_cq(priv, cq);
+}
+
+/* TX CQ polling - called by NAPI */
+int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget)
+{
+ struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
+ struct net_device *dev = cq->dev;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ bool clean_complete;
+
+ clean_complete = mlx4_en_process_tx_cq(dev, cq, budget);
+ if (!clean_complete)
+ return budget;
+
+ napi_complete(napi);
+ mlx4_en_arm_cq(priv, cq);
+
+ return 0;
+}
+
+static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ u32 index,
+ unsigned int desc_size)
+{
+ u32 copy = (ring->size - index) << LOG_TXBB_SIZE;
+ int i;
+
+ for (i = desc_size - copy - 4; i >= 0; i -= 4) {
+ if ((i & (TXBB_SIZE - 1)) == 0)
+ wmb();
+
+ *((u32 *) (ring->buf + i)) =
+ *((u32 *) (ring->bounce_buf + copy + i));
+ }
+
+ for (i = copy - 4; i >= 4 ; i -= 4) {
+ if ((i & (TXBB_SIZE - 1)) == 0)
+ wmb();
+
+ *((u32 *)(ring->buf + (index << LOG_TXBB_SIZE) + i)) =
+ *((u32 *) (ring->bounce_buf + i));
+ }
+
+ /* Return real descriptor location */
+ return ring->buf + (index << LOG_TXBB_SIZE);
+}
+
+/* Decide if skb can be inlined in tx descriptor to avoid dma mapping
+ *
+ * It seems strange we do not simply use skb_copy_bits().
+ * This would allow to inline all skbs iff skb->len <= inline_thold
+ *
+ * Note that caller already checked skb was not a gso packet
+ */
+static bool is_inline(int inline_thold, const struct sk_buff *skb,
+ const struct skb_shared_info *shinfo,
+ void **pfrag)
+{
+ void *ptr;
+
+ if (skb->len > inline_thold || !inline_thold)
+ return false;
+
+ if (shinfo->nr_frags == 1) {
+ ptr = skb_frag_address_safe(&shinfo->frags[0]);
+ if (unlikely(!ptr))
+ return false;
+ *pfrag = ptr;
+ return true;
+ }
+ if (shinfo->nr_frags)
+ return false;
+ return true;
+}
+
+static int inline_size(const struct sk_buff *skb)
+{
+ if (skb->len + CTRL_SIZE + sizeof(struct mlx4_wqe_inline_seg)
+ <= MLX4_INLINE_ALIGN)
+ return ALIGN(skb->len + CTRL_SIZE +
+ sizeof(struct mlx4_wqe_inline_seg), 16);
+ else
+ return ALIGN(skb->len + CTRL_SIZE + 2 *
+ sizeof(struct mlx4_wqe_inline_seg), 16);
+}
+
+static int get_real_size(const struct sk_buff *skb,
+ const struct skb_shared_info *shinfo,
+ struct net_device *dev,
+ int *lso_header_size,
+ bool *inline_ok,
+ void **pfrag)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ int real_size;
+
+ if (shinfo->gso_size) {
+ *inline_ok = false;
+ if (skb->encapsulation)
+ *lso_header_size = (skb_inner_transport_header(skb) - skb->data) + inner_tcp_hdrlen(skb);
+ else
+ *lso_header_size = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ real_size = CTRL_SIZE + shinfo->nr_frags * DS_SIZE +
+ ALIGN(*lso_header_size + 4, DS_SIZE);
+ if (unlikely(*lso_header_size != skb_headlen(skb))) {
+ /* We add a segment for the skb linear buffer only if
+ * it contains data */
+ if (*lso_header_size < skb_headlen(skb))
+ real_size += DS_SIZE;
+ else {
+ if (netif_msg_tx_err(priv))
+ en_warn(priv, "Non-linear headers\n");
+ return 0;
+ }
+ }
+ } else {
+ *lso_header_size = 0;
+ *inline_ok = is_inline(priv->prof->inline_thold, skb,
+ shinfo, pfrag);
+
+ if (*inline_ok)
+ real_size = inline_size(skb);
+ else
+ real_size = CTRL_SIZE +
+ (shinfo->nr_frags + 1) * DS_SIZE;
+ }
+
+ return real_size;
+}
+
+static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
+ const struct sk_buff *skb,
+ const struct skb_shared_info *shinfo,
+ void *fragptr)
+{
+ struct mlx4_wqe_inline_seg *inl = &tx_desc->inl;
+ int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof(*inl);
+ unsigned int hlen = skb_headlen(skb);
+
+ if (skb->len <= spc) {
+ if (likely(skb->len >= MIN_PKT_LEN)) {
+ inl->byte_count = cpu_to_be32(1 << 31 | skb->len);
+ } else {
+ inl->byte_count = cpu_to_be32(1 << 31 | MIN_PKT_LEN);
+ memset(((void *)(inl + 1)) + skb->len, 0,
+ MIN_PKT_LEN - skb->len);
+ }
+ skb_copy_from_linear_data(skb, inl + 1, hlen);
+ if (shinfo->nr_frags)
+ memcpy(((void *)(inl + 1)) + hlen, fragptr,
+ skb_frag_size(&shinfo->frags[0]));
+
+ } else {
+ inl->byte_count = cpu_to_be32(1 << 31 | spc);
+ if (hlen <= spc) {
+ skb_copy_from_linear_data(skb, inl + 1, hlen);
+ if (hlen < spc) {
+ memcpy(((void *)(inl + 1)) + hlen,
+ fragptr, spc - hlen);
+ fragptr += spc - hlen;
+ }
+ inl = (void *) (inl + 1) + spc;
+ memcpy(((void *)(inl + 1)), fragptr, skb->len - spc);
+ } else {
+ skb_copy_from_linear_data(skb, inl + 1, spc);
+ inl = (void *) (inl + 1) + spc;
+ skb_copy_from_linear_data_offset(skb, spc, inl + 1,
+ hlen - spc);
+ if (shinfo->nr_frags)
+ memcpy(((void *)(inl + 1)) + hlen - spc,
+ fragptr,
+ skb_frag_size(&shinfo->frags[0]));
+ }
+
+ dma_wmb();
+ inl->byte_count = cpu_to_be32(1 << 31 | (skb->len - spc));
+ }
+}
+
+u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev,
+ select_queue_fallback_t fallback)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ u16 rings_p_up = priv->num_tx_rings_p_up;
+
+ if (netdev_get_num_tc(dev))
+ return fallback(dev, skb, NULL);
+
+ return fallback(dev, skb, NULL) % rings_p_up;
+}
+
+static void mlx4_bf_copy(void __iomem *dst, const void *src,
+ unsigned int bytecnt)
+{
+ __iowrite64_copy(dst, src, bytecnt / 8);
+}
+
+void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring)
+{
+ wmb();
+ /* Since there is no iowrite*_native() that writes the
+ * value as is, without byteswapping - using the one
+ * the doesn't do byteswapping in the relevant arch
+ * endianness.
+ */
+#if defined(__LITTLE_ENDIAN)
+ iowrite32(
+#else
+ iowrite32be(
+#endif
+ (__force u32)ring->doorbell_qpn,
+ ring->bf.uar->map + MLX4_SEND_DOORBELL);
+}
+
+static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring,
+ struct mlx4_en_tx_desc *tx_desc,
+ union mlx4_wqe_qpn_vlan qpn_vlan,
+ int desc_size, int bf_index,
+ __be32 op_own, bool bf_ok,
+ bool send_doorbell)
+{
+ tx_desc->ctrl.qpn_vlan = qpn_vlan;
+
+ if (bf_ok) {
+ op_own |= htonl((bf_index & 0xffff) << 8);
+ /* Ensure new descriptor hits memory
+ * before setting ownership of this descriptor to HW
+ */
+ dma_wmb();
+ tx_desc->ctrl.owner_opcode = op_own;
+
+ wmb();
+
+ mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl,
+ desc_size);
+
+ wmb();
+
+ ring->bf.offset ^= ring->bf.buf_size;
+ } else {
+ /* Ensure new descriptor hits memory
+ * before setting ownership of this descriptor to HW
+ */
+ dma_wmb();
+ tx_desc->ctrl.owner_opcode = op_own;
+ if (send_doorbell)
+ mlx4_en_xmit_doorbell(ring);
+ else
+ ring->xmit_more++;
+ }
+}
+
+static bool mlx4_en_build_dma_wqe(struct mlx4_en_priv *priv,
+ struct skb_shared_info *shinfo,
+ struct mlx4_wqe_data_seg *data,
+ struct sk_buff *skb,
+ int lso_header_size,
+ __be32 mr_key,
+ struct mlx4_en_tx_info *tx_info)
+{
+ struct device *ddev = priv->ddev;
+ dma_addr_t dma = 0;
+ u32 byte_count = 0;
+ int i_frag;
+
+ /* Map fragments if any */
+ for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) {
+ const struct skb_frag_struct *frag;
+
+ frag = &shinfo->frags[i_frag];
+ byte_count = skb_frag_size(frag);
+ dma = skb_frag_dma_map(ddev, frag,
+ 0, byte_count,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, dma))
+ goto tx_drop_unmap;
+
+ data->addr = cpu_to_be64(dma);
+ data->lkey = mr_key;
+ dma_wmb();
+ data->byte_count = cpu_to_be32(byte_count);
+ --data;
+ }
+
+ /* Map linear part if needed */
+ if (tx_info->linear) {
+ byte_count = skb_headlen(skb) - lso_header_size;
+
+ dma = dma_map_single(ddev, skb->data +
+ lso_header_size, byte_count,
+ PCI_DMA_TODEVICE);
+ if (dma_mapping_error(ddev, dma))
+ goto tx_drop_unmap;
+
+ data->addr = cpu_to_be64(dma);
+ data->lkey = mr_key;
+ dma_wmb();
+ data->byte_count = cpu_to_be32(byte_count);
+ }
+ /* tx completion can avoid cache line miss for common cases */
+ tx_info->map0_dma = dma;
+ tx_info->map0_byte_count = byte_count;
+
+ return true;
+
+tx_drop_unmap:
+ en_err(priv, "DMA mapping error\n");
+
+ while (++i_frag < shinfo->nr_frags) {
+ ++data;
+ dma_unmap_page(ddev, (dma_addr_t)be64_to_cpu(data->addr),
+ be32_to_cpu(data->byte_count),
+ PCI_DMA_TODEVICE);
+ }
+
+ return false;
+}
+
+netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ union mlx4_wqe_qpn_vlan qpn_vlan = {};
+ struct mlx4_en_tx_ring *ring;
+ struct mlx4_en_tx_desc *tx_desc;
+ struct mlx4_wqe_data_seg *data;
+ struct mlx4_en_tx_info *tx_info;
+ u32 __maybe_unused ring_cons;
+ int tx_ind;
+ int nr_txbb;
+ int desc_size;
+ int real_size;
+ u32 index, bf_index;
+ __be32 op_own;
+ int lso_header_size;
+ void *fragptr = NULL;
+ bool bounce = false;
+ bool send_doorbell;
+ bool stop_queue;
+ bool inline_ok;
+ u8 data_offset;
+ bool bf_ok;
+
+ tx_ind = skb_get_queue_mapping(skb);
+ ring = priv->tx_ring[TX][tx_ind];
+
+ if (unlikely(!priv->port_up))
+ goto tx_drop;
+
+ /* fetch ring->cons far ahead before needing it to avoid stall */
+ ring_cons = READ_ONCE(ring->cons);
+
+ real_size = get_real_size(skb, shinfo, dev, &lso_header_size,
+ &inline_ok, &fragptr);
+ if (unlikely(!real_size))
+ goto tx_drop_count;
+
+ /* Align descriptor to TXBB size */
+ desc_size = ALIGN(real_size, TXBB_SIZE);
+ nr_txbb = desc_size >> LOG_TXBB_SIZE;
+ if (unlikely(nr_txbb > MAX_DESC_TXBBS)) {
+ if (netif_msg_tx_err(priv))
+ en_warn(priv, "Oversized header or SG list\n");
+ goto tx_drop_count;
+ }
+
+ bf_ok = ring->bf_enabled;
+ if (skb_vlan_tag_present(skb)) {
+ u16 vlan_proto;
+
+ qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb));
+ vlan_proto = be16_to_cpu(skb->vlan_proto);
+ if (vlan_proto == ETH_P_8021AD)
+ qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN;
+ else if (vlan_proto == ETH_P_8021Q)
+ qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN;
+ else
+ qpn_vlan.ins_vlan = 0;
+ bf_ok = false;
+ }
+
+ netdev_txq_bql_enqueue_prefetchw(ring->tx_queue);
+
+ /* Track current inflight packets for performance analysis */
+ AVG_PERF_COUNTER(priv->pstats.inflight_avg,
+ (u32)(ring->prod - ring_cons - 1));
+
+ /* Packet is good - grab an index and transmit it */
+ index = ring->prod & ring->size_mask;
+ bf_index = ring->prod;
+
+ /* See if we have enough space for whole descriptor TXBB for setting
+ * SW ownership on next descriptor; if not, use a bounce buffer. */
+ if (likely(index + nr_txbb <= ring->size))
+ tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
+ else {
+ tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf;
+ bounce = true;
+ bf_ok = false;
+ }
+
+ /* Save skb in tx_info ring */
+ tx_info = &ring->tx_info[index];
+ tx_info->skb = skb;
+ tx_info->nr_txbb = nr_txbb;
+
+ if (!lso_header_size) {
+ data = &tx_desc->data;
+ data_offset = offsetof(struct mlx4_en_tx_desc, data);
+ } else {
+ int lso_align = ALIGN(lso_header_size + 4, DS_SIZE);
+
+ data = (void *)&tx_desc->lso + lso_align;
+ data_offset = offsetof(struct mlx4_en_tx_desc, lso) + lso_align;
+ }
+
+ /* valid only for none inline segments */
+ tx_info->data_offset = data_offset;
+
+ tx_info->inl = inline_ok;
+
+ tx_info->linear = lso_header_size < skb_headlen(skb) && !inline_ok;
+
+ tx_info->nr_maps = shinfo->nr_frags + tx_info->linear;
+ data += tx_info->nr_maps - 1;
+
+ if (!tx_info->inl)
+ if (!mlx4_en_build_dma_wqe(priv, shinfo, data, skb,
+ lso_header_size, ring->mr_key,
+ tx_info))
+ goto tx_drop_count;
+
+ /*
+ * For timestamping add flag to skb_shinfo and
+ * set flag for further reference
+ */
+ tx_info->ts_requested = 0;
+ if (unlikely(ring->hwtstamp_tx_type == HWTSTAMP_TX_ON &&
+ shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
+ shinfo->tx_flags |= SKBTX_IN_PROGRESS;
+ tx_info->ts_requested = 1;
+ }
+
+ /* Prepare ctrl segement apart opcode+ownership, which depends on
+ * whether LSO is used */
+ tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
+ if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ if (!skb->encapsulation)
+ tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
+ MLX4_WQE_CTRL_TCP_UDP_CSUM);
+ else
+ tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM);
+ ring->tx_csum++;
+ }
+
+ if (priv->flags & MLX4_EN_FLAG_ENABLE_HW_LOOPBACK) {
+ struct ethhdr *ethh;
+
+ /* Copy dst mac address to wqe. This allows loopback in eSwitch,
+ * so that VFs and PF can communicate with each other
+ */
+ ethh = (struct ethhdr *)skb->data;
+ tx_desc->ctrl.srcrb_flags16[0] = get_unaligned((__be16 *)ethh->h_dest);
+ tx_desc->ctrl.imm = get_unaligned((__be32 *)(ethh->h_dest + 2));
+ }
+
+ /* Handle LSO (TSO) packets */
+ if (lso_header_size) {
+ int i;
+
+ /* Mark opcode as LSO */
+ op_own = cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6)) |
+ ((ring->prod & ring->size) ?
+ cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
+
+ /* Fill in the LSO prefix */
+ tx_desc->lso.mss_hdr_size = cpu_to_be32(
+ shinfo->gso_size << 16 | lso_header_size);
+
+ /* Copy headers;
+ * note that we already verified that it is linear */
+ memcpy(tx_desc->lso.header, skb->data, lso_header_size);
+
+ ring->tso_packets++;
+
+ i = shinfo->gso_segs;
+ tx_info->nr_bytes = skb->len + (i - 1) * lso_header_size;
+ ring->packets += i;
+ } else {
+ /* Normal (Non LSO) packet */
+ op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
+ ((ring->prod & ring->size) ?
+ cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
+ tx_info->nr_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
+ ring->packets++;
+ }
+ ring->bytes += tx_info->nr_bytes;
+ netdev_tx_sent_queue(ring->tx_queue, tx_info->nr_bytes);
+ AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len);
+
+ if (tx_info->inl)
+ build_inline_wqe(tx_desc, skb, shinfo, fragptr);
+
+ if (skb->encapsulation) {
+ union {
+ struct iphdr *v4;
+ struct ipv6hdr *v6;
+ unsigned char *hdr;
+ } ip;
+ u8 proto;
+
+ ip.hdr = skb_inner_network_header(skb);
+ proto = (ip.v4->version == 4) ? ip.v4->protocol :
+ ip.v6->nexthdr;
+
+ if (proto == IPPROTO_TCP || proto == IPPROTO_UDP)
+ op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP | MLX4_WQE_CTRL_ILP);
+ else
+ op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP);
+ }
+
+ ring->prod += nr_txbb;
+
+ /* If we used a bounce buffer then copy descriptor back into place */
+ if (unlikely(bounce))
+ tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size);
+
+ skb_tx_timestamp(skb);
+
+ /* Check available TXBBs And 2K spare for prefetch */
+ stop_queue = mlx4_en_is_tx_ring_full(ring);
+ if (unlikely(stop_queue)) {
+ netif_tx_stop_queue(ring->tx_queue);
+ ring->queue_stopped++;
+ }
+ send_doorbell = !skb->xmit_more || netif_xmit_stopped(ring->tx_queue);
+
+ real_size = (real_size / 16) & 0x3f;
+
+ bf_ok &= desc_size <= MAX_BF && send_doorbell;
+
+ if (bf_ok)
+ qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size);
+ else
+ qpn_vlan.fence_size = real_size;
+
+ mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, desc_size, bf_index,
+ op_own, bf_ok, send_doorbell);
+
+ if (unlikely(stop_queue)) {
+ /* If queue was emptied after the if (stop_queue) , and before
+ * the netif_tx_stop_queue() - need to wake the queue,
+ * or else it will remain stopped forever.
+ * Need a memory barrier to make sure ring->cons was not
+ * updated before queue was stopped.
+ */
+ smp_rmb();
+
+ ring_cons = READ_ONCE(ring->cons);
+ if (unlikely(!mlx4_en_is_tx_ring_full(ring))) {
+ netif_tx_wake_queue(ring->tx_queue);
+ ring->wake_queue++;
+ }
+ }
+ return NETDEV_TX_OK;
+
+tx_drop_count:
+ ring->tx_dropped++;
+tx_drop:
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+}
+
+#define MLX4_EN_XDP_TX_NRTXBB 1
+#define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \
+ / 16) & 0x3f)
+
+void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring)
+{
+ int i;
+
+ for (i = 0; i < ring->size; i++) {
+ struct mlx4_en_tx_info *tx_info = &ring->tx_info[i];
+ struct mlx4_en_tx_desc *tx_desc = ring->buf +
+ (i << LOG_TXBB_SIZE);
+
+ tx_info->map0_byte_count = PAGE_SIZE;
+ tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
+ tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
+ tx_info->ts_requested = 0;
+ tx_info->nr_maps = 1;
+ tx_info->linear = 1;
+ tx_info->inl = 0;
+
+ tx_desc->data.lkey = ring->mr_key;
+ tx_desc->ctrl.qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
+ tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
+ }
+}
+
+netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
+ struct mlx4_en_rx_alloc *frame,
+ struct mlx4_en_priv *priv, unsigned int length,
+ int tx_ind, bool *doorbell_pending)
+{
+ struct mlx4_en_tx_desc *tx_desc;
+ struct mlx4_en_tx_info *tx_info;
+ struct mlx4_wqe_data_seg *data;
+ struct mlx4_en_tx_ring *ring;
+ dma_addr_t dma;
+ __be32 op_own;
+ int index;
+
+ if (unlikely(!priv->port_up))
+ goto tx_drop;
+
+ ring = priv->tx_ring[TX_XDP][tx_ind];
+
+ if (unlikely(mlx4_en_is_tx_ring_full(ring)))
+ goto tx_drop_count;
+
+ index = ring->prod & ring->size_mask;
+ tx_info = &ring->tx_info[index];
+
+ /* Track current inflight packets for performance analysis */
+ AVG_PERF_COUNTER(priv->pstats.inflight_avg,
+ (u32)(ring->prod - READ_ONCE(ring->cons) - 1));
+
+ tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
+ data = &tx_desc->data;
+
+ dma = frame->dma;
+
+ tx_info->page = frame->page;
+ frame->page = NULL;
+ tx_info->map0_dma = dma;
+ tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
+
+ dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
+ length, PCI_DMA_TODEVICE);
+
+ data->addr = cpu_to_be64(dma + frame->page_offset);
+ dma_wmb();
+ data->byte_count = cpu_to_be32(length);
+
+ /* tx completion can avoid cache line miss for common cases */
+
+ op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
+ ((ring->prod & ring->size) ?
+ cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
+
+ rx_ring->xdp_tx++;
+ AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length);
+
+ ring->prod += MLX4_EN_XDP_TX_NRTXBB;
+
+ /* Ensure new descriptor hits memory
+ * before setting ownership of this descriptor to HW
+ */
+ dma_wmb();
+ tx_desc->ctrl.owner_opcode = op_own;
+ ring->xmit_more++;
+
+ *doorbell_pending = true;
+
+ return NETDEV_TX_OK;
+
+tx_drop_count:
+ rx_ring->xdp_tx_full++;
+ *doorbell_pending = true;
+tx_drop:
+ return NETDEV_TX_BUSY;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
new file mode 100644
index 000000000..2df92dbd3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -0,0 +1,1567 @@
+/*
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+
+#include <linux/mlx4/cmd.h>
+#include <linux/cpu_rmap.h>
+
+#include "mlx4.h"
+#include "fw.h"
+
+enum {
+ MLX4_IRQNAME_SIZE = 32
+};
+
+enum {
+ MLX4_NUM_ASYNC_EQE = 0x100,
+ MLX4_NUM_SPARE_EQE = 0x80,
+ MLX4_EQ_ENTRY_SIZE = 0x20
+};
+
+#define MLX4_EQ_STATUS_OK ( 0 << 28)
+#define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28)
+#define MLX4_EQ_OWNER_SW ( 0 << 24)
+#define MLX4_EQ_OWNER_HW ( 1 << 24)
+#define MLX4_EQ_FLAG_EC ( 1 << 18)
+#define MLX4_EQ_FLAG_OI ( 1 << 17)
+#define MLX4_EQ_STATE_ARMED ( 9 << 8)
+#define MLX4_EQ_STATE_FIRED (10 << 8)
+#define MLX4_EQ_STATE_ALWAYS_ARMED (11 << 8)
+
+#define MLX4_ASYNC_EVENT_MASK ((1ull << MLX4_EVENT_TYPE_PATH_MIG) | \
+ (1ull << MLX4_EVENT_TYPE_COMM_EST) | \
+ (1ull << MLX4_EVENT_TYPE_SQ_DRAINED) | \
+ (1ull << MLX4_EVENT_TYPE_CQ_ERROR) | \
+ (1ull << MLX4_EVENT_TYPE_WQ_CATAS_ERROR) | \
+ (1ull << MLX4_EVENT_TYPE_EEC_CATAS_ERROR) | \
+ (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED) | \
+ (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
+ (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR) | \
+ (1ull << MLX4_EVENT_TYPE_PORT_CHANGE) | \
+ (1ull << MLX4_EVENT_TYPE_ECC_DETECT) | \
+ (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) | \
+ (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) | \
+ (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \
+ (1ull << MLX4_EVENT_TYPE_CMD) | \
+ (1ull << MLX4_EVENT_TYPE_OP_REQUIRED) | \
+ (1ull << MLX4_EVENT_TYPE_COMM_CHANNEL) | \
+ (1ull << MLX4_EVENT_TYPE_FLR_EVENT) | \
+ (1ull << MLX4_EVENT_TYPE_FATAL_WARNING))
+
+static u64 get_async_ev_mask(struct mlx4_dev *dev)
+{
+ u64 async_ev_mask = MLX4_ASYNC_EVENT_MASK;
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
+ async_ev_mask |= (1ull << MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT);
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT)
+ async_ev_mask |= (1ull << MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT);
+
+ return async_ev_mask;
+}
+
+static void eq_set_ci(struct mlx4_eq *eq, int req_not)
+{
+ __raw_writel((__force u32) cpu_to_be32((eq->cons_index & 0xffffff) |
+ req_not << 31),
+ eq->doorbell);
+ /* We still want ordering, just not swabbing, so add a barrier */
+ mb();
+}
+
+static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor,
+ u8 eqe_size)
+{
+ /* (entry & (eq->nent - 1)) gives us a cyclic array */
+ unsigned long offset = (entry & (eq->nent - 1)) * eqe_size;
+ /* CX3 is capable of extending the EQE from 32 to 64 bytes with
+ * strides of 64B,128B and 256B.
+ * When 64B EQE is used, the first (in the lower addresses)
+ * 32 bytes in the 64 byte EQE are reserved and the next 32 bytes
+ * contain the legacy EQE information.
+ * In all other cases, the first 32B contains the legacy EQE info.
+ */
+ return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE;
+}
+
+static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor, u8 size)
+{
+ struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor, size);
+ return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
+}
+
+static struct mlx4_eqe *next_slave_event_eqe(struct mlx4_slave_event_eq *slave_eq)
+{
+ struct mlx4_eqe *eqe =
+ &slave_eq->event_eqe[slave_eq->cons & (SLAVE_EVENT_EQ_SIZE - 1)];
+ return (!!(eqe->owner & 0x80) ^
+ !!(slave_eq->cons & SLAVE_EVENT_EQ_SIZE)) ?
+ eqe : NULL;
+}
+
+void mlx4_gen_slave_eqe(struct work_struct *work)
+{
+ struct mlx4_mfunc_master_ctx *master =
+ container_of(work, struct mlx4_mfunc_master_ctx,
+ slave_event_work);
+ struct mlx4_mfunc *mfunc =
+ container_of(master, struct mlx4_mfunc, master);
+ struct mlx4_priv *priv = container_of(mfunc, struct mlx4_priv, mfunc);
+ struct mlx4_dev *dev = &priv->dev;
+ struct mlx4_slave_event_eq *slave_eq = &mfunc->master.slave_eq;
+ struct mlx4_eqe *eqe;
+ u8 slave;
+ int i, phys_port, slave_port;
+
+ for (eqe = next_slave_event_eqe(slave_eq); eqe;
+ eqe = next_slave_event_eqe(slave_eq)) {
+ slave = eqe->slave_id;
+
+ if (eqe->type == MLX4_EVENT_TYPE_PORT_CHANGE &&
+ eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN &&
+ mlx4_is_bonded(dev)) {
+ struct mlx4_port_cap port_cap;
+
+ if (!mlx4_QUERY_PORT(dev, 1, &port_cap) && port_cap.link_state)
+ goto consume;
+
+ if (!mlx4_QUERY_PORT(dev, 2, &port_cap) && port_cap.link_state)
+ goto consume;
+ }
+ /* All active slaves need to receive the event */
+ if (slave == ALL_SLAVES) {
+ for (i = 0; i <= dev->persist->num_vfs; i++) {
+ phys_port = 0;
+ if (eqe->type == MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT &&
+ eqe->subtype == MLX4_DEV_PMC_SUBTYPE_PORT_INFO) {
+ phys_port = eqe->event.port_mgmt_change.port;
+ slave_port = mlx4_phys_to_slave_port(dev, i, phys_port);
+ if (slave_port < 0) /* VF doesn't have this port */
+ continue;
+ eqe->event.port_mgmt_change.port = slave_port;
+ }
+ if (mlx4_GEN_EQE(dev, i, eqe))
+ mlx4_warn(dev, "Failed to generate event for slave %d\n",
+ i);
+ if (phys_port)
+ eqe->event.port_mgmt_change.port = phys_port;
+ }
+ } else {
+ if (mlx4_GEN_EQE(dev, slave, eqe))
+ mlx4_warn(dev, "Failed to generate event for slave %d\n",
+ slave);
+ }
+consume:
+ ++slave_eq->cons;
+ }
+}
+
+
+static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_event_eq *slave_eq = &priv->mfunc.master.slave_eq;
+ struct mlx4_eqe *s_eqe;
+ unsigned long flags;
+
+ spin_lock_irqsave(&slave_eq->event_lock, flags);
+ s_eqe = &slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)];
+ if ((!!(s_eqe->owner & 0x80)) ^
+ (!!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE))) {
+ mlx4_warn(dev, "Master failed to generate an EQE for slave: %d. No free EQE on slave events queue\n",
+ slave);
+ spin_unlock_irqrestore(&slave_eq->event_lock, flags);
+ return;
+ }
+
+ memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1);
+ s_eqe->slave_id = slave;
+ /* ensure all information is written before setting the ownersip bit */
+ dma_wmb();
+ s_eqe->owner = !!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE) ? 0x0 : 0x80;
+ ++slave_eq->prod;
+
+ queue_work(priv->mfunc.master.comm_wq,
+ &priv->mfunc.master.slave_event_work);
+ spin_unlock_irqrestore(&slave_eq->event_lock, flags);
+}
+
+static void mlx4_slave_event(struct mlx4_dev *dev, int slave,
+ struct mlx4_eqe *eqe)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (slave < 0 || slave > dev->persist->num_vfs ||
+ slave == dev->caps.function ||
+ !priv->mfunc.master.slave_state[slave].active)
+ return;
+
+ slave_event(dev, slave, eqe);
+}
+
+#if defined(CONFIG_SMP)
+static void mlx4_set_eq_affinity_hint(struct mlx4_priv *priv, int vec)
+{
+ int hint_err;
+ struct mlx4_dev *dev = &priv->dev;
+ struct mlx4_eq *eq = &priv->eq_table.eq[vec];
+
+ if (!cpumask_available(eq->affinity_mask) ||
+ cpumask_empty(eq->affinity_mask))
+ return;
+
+ hint_err = irq_set_affinity_hint(eq->irq, eq->affinity_mask);
+ if (hint_err)
+ mlx4_warn(dev, "irq_set_affinity_hint failed, err %d\n", hint_err);
+}
+#endif
+
+int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port)
+{
+ struct mlx4_eqe eqe;
+
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_slave = &priv->mfunc.master.slave_state[slave];
+
+ if (!s_slave->active)
+ return 0;
+
+ memset(&eqe, 0, sizeof(eqe));
+
+ eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
+ eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE;
+ eqe.event.port_mgmt_change.port = mlx4_phys_to_slave_port(dev, slave, port);
+
+ return mlx4_GEN_EQE(dev, slave, &eqe);
+}
+EXPORT_SYMBOL(mlx4_gen_pkey_eqe);
+
+int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port)
+{
+ struct mlx4_eqe eqe;
+
+ /*don't send if we don't have the that slave */
+ if (dev->persist->num_vfs < slave)
+ return 0;
+ memset(&eqe, 0, sizeof(eqe));
+
+ eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
+ eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO;
+ eqe.event.port_mgmt_change.port = mlx4_phys_to_slave_port(dev, slave, port);
+
+ return mlx4_GEN_EQE(dev, slave, &eqe);
+}
+EXPORT_SYMBOL(mlx4_gen_guid_change_eqe);
+
+int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port,
+ u8 port_subtype_change)
+{
+ struct mlx4_eqe eqe;
+ u8 slave_port = mlx4_phys_to_slave_port(dev, slave, port);
+
+ /*don't send if we don't have the that slave */
+ if (dev->persist->num_vfs < slave)
+ return 0;
+ memset(&eqe, 0, sizeof(eqe));
+
+ eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE;
+ eqe.subtype = port_subtype_change;
+ eqe.event.port_change.port = cpu_to_be32(slave_port << 28);
+
+ mlx4_dbg(dev, "%s: sending: %d to slave: %d on port: %d\n", __func__,
+ port_subtype_change, slave, port);
+ return mlx4_GEN_EQE(dev, slave, &eqe);
+}
+EXPORT_SYMBOL(mlx4_gen_port_state_change_eqe);
+
+enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state;
+ struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave);
+
+ if (slave >= dev->num_slaves || port > dev->caps.num_ports ||
+ port <= 0 || !test_bit(port - 1, actv_ports.ports)) {
+ pr_err("%s: Error: asking for slave:%d, port:%d\n",
+ __func__, slave, port);
+ return SLAVE_PORT_DOWN;
+ }
+ return s_state[slave].port_state[port];
+}
+EXPORT_SYMBOL(mlx4_get_slave_port_state);
+
+static int mlx4_set_slave_port_state(struct mlx4_dev *dev, int slave, u8 port,
+ enum slave_port_state state)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state;
+ struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave);
+
+ if (slave >= dev->num_slaves || port > dev->caps.num_ports ||
+ port <= 0 || !test_bit(port - 1, actv_ports.ports)) {
+ pr_err("%s: Error: asking for slave:%d, port:%d\n",
+ __func__, slave, port);
+ return -1;
+ }
+ s_state[slave].port_state[port] = state;
+
+ return 0;
+}
+
+static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event)
+{
+ int i;
+ enum slave_port_gen_event gen_event;
+ struct mlx4_slaves_pport slaves_pport = mlx4_phys_to_slaves_pport(dev,
+ port);
+
+ for (i = 0; i < dev->persist->num_vfs + 1; i++)
+ if (test_bit(i, slaves_pport.slaves))
+ set_and_calc_slave_port_state(dev, i, port,
+ event, &gen_event);
+}
+/**************************************************************************
+ The function get as input the new event to that port,
+ and according to the prev state change the slave's port state.
+ The events are:
+ MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
+ MLX4_PORT_STATE_DEV_EVENT_PORT_UP
+ MLX4_PORT_STATE_IB_EVENT_GID_VALID
+ MLX4_PORT_STATE_IB_EVENT_GID_INVALID
+***************************************************************************/
+int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave,
+ u8 port, int event,
+ enum slave_port_gen_event *gen_event)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *ctx = NULL;
+ unsigned long flags;
+ int ret = -1;
+ struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave);
+ enum slave_port_state cur_state =
+ mlx4_get_slave_port_state(dev, slave, port);
+
+ *gen_event = SLAVE_PORT_GEN_EVENT_NONE;
+
+ if (slave >= dev->num_slaves || port > dev->caps.num_ports ||
+ port <= 0 || !test_bit(port - 1, actv_ports.ports)) {
+ pr_err("%s: Error: asking for slave:%d, port:%d\n",
+ __func__, slave, port);
+ return ret;
+ }
+
+ ctx = &priv->mfunc.master.slave_state[slave];
+ spin_lock_irqsave(&ctx->lock, flags);
+
+ switch (cur_state) {
+ case SLAVE_PORT_DOWN:
+ if (MLX4_PORT_STATE_DEV_EVENT_PORT_UP == event)
+ mlx4_set_slave_port_state(dev, slave, port,
+ SLAVE_PENDING_UP);
+ break;
+ case SLAVE_PENDING_UP:
+ if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event)
+ mlx4_set_slave_port_state(dev, slave, port,
+ SLAVE_PORT_DOWN);
+ else if (MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID == event) {
+ mlx4_set_slave_port_state(dev, slave, port,
+ SLAVE_PORT_UP);
+ *gen_event = SLAVE_PORT_GEN_EVENT_UP;
+ }
+ break;
+ case SLAVE_PORT_UP:
+ if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) {
+ mlx4_set_slave_port_state(dev, slave, port,
+ SLAVE_PORT_DOWN);
+ *gen_event = SLAVE_PORT_GEN_EVENT_DOWN;
+ } else if (MLX4_PORT_STATE_IB_EVENT_GID_INVALID ==
+ event) {
+ mlx4_set_slave_port_state(dev, slave, port,
+ SLAVE_PENDING_UP);
+ *gen_event = SLAVE_PORT_GEN_EVENT_DOWN;
+ }
+ break;
+ default:
+ pr_err("%s: BUG!!! UNKNOWN state: slave:%d, port:%d\n",
+ __func__, slave, port);
+ goto out;
+ }
+ ret = mlx4_get_slave_port_state(dev, slave, port);
+
+out:
+ spin_unlock_irqrestore(&ctx->lock, flags);
+ return ret;
+}
+
+EXPORT_SYMBOL(set_and_calc_slave_port_state);
+
+int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr)
+{
+ struct mlx4_eqe eqe;
+
+ memset(&eqe, 0, sizeof(eqe));
+
+ eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
+ eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PORT_INFO;
+ eqe.event.port_mgmt_change.port = port;
+ eqe.event.port_mgmt_change.params.port_info.changed_attr =
+ cpu_to_be32((u32) attr);
+
+ slave_event(dev, ALL_SLAVES, &eqe);
+ return 0;
+}
+EXPORT_SYMBOL(mlx4_gen_slaves_port_mgt_ev);
+
+void mlx4_master_handle_slave_flr(struct work_struct *work)
+{
+ struct mlx4_mfunc_master_ctx *master =
+ container_of(work, struct mlx4_mfunc_master_ctx,
+ slave_flr_event_work);
+ struct mlx4_mfunc *mfunc =
+ container_of(master, struct mlx4_mfunc, master);
+ struct mlx4_priv *priv =
+ container_of(mfunc, struct mlx4_priv, mfunc);
+ struct mlx4_dev *dev = &priv->dev;
+ struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state;
+ int i;
+ int err;
+ unsigned long flags;
+
+ mlx4_dbg(dev, "mlx4_handle_slave_flr\n");
+
+ for (i = 0 ; i < dev->num_slaves; i++) {
+
+ if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) {
+ mlx4_dbg(dev, "mlx4_handle_slave_flr: clean slave: %d\n",
+ i);
+ /* In case of 'Reset flow' FLR can be generated for
+ * a slave before mlx4_load_one is done.
+ * make sure interface is up before trying to delete
+ * slave resources which weren't allocated yet.
+ */
+ if (dev->persist->interface_state &
+ MLX4_INTERFACE_STATE_UP)
+ mlx4_delete_all_resources_for_slave(dev, i);
+ /*return the slave to running mode*/
+ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
+ slave_state[i].last_cmd = MLX4_COMM_CMD_RESET;
+ slave_state[i].is_slave_going_down = 0;
+ spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
+ /*notify the FW:*/
+ err = mlx4_cmd(dev, 0, i, 0, MLX4_CMD_INFORM_FLR_DONE,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ mlx4_warn(dev, "Failed to notify FW on FLR done (slave:%d)\n",
+ i);
+ }
+ }
+}
+
+static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_eqe *eqe;
+ int cqn;
+ int eqes_found = 0;
+ int set_ci = 0;
+ int port;
+ int slave = 0;
+ int ret;
+ u32 flr_slave;
+ u8 update_slave_state;
+ int i;
+ enum slave_port_gen_event gen_event;
+ unsigned long flags;
+ struct mlx4_vport_state *s_info;
+ int eqe_size = dev->caps.eqe_size;
+
+ while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor, eqe_size))) {
+ /*
+ * Make sure we read EQ entry contents after we've
+ * checked the ownership bit.
+ */
+ dma_rmb();
+
+ switch (eqe->type) {
+ case MLX4_EVENT_TYPE_COMP:
+ cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff;
+ mlx4_cq_completion(dev, cqn);
+ break;
+
+ case MLX4_EVENT_TYPE_PATH_MIG:
+ case MLX4_EVENT_TYPE_COMM_EST:
+ case MLX4_EVENT_TYPE_SQ_DRAINED:
+ case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
+ case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
+ mlx4_dbg(dev, "event %d arrived\n", eqe->type);
+ if (mlx4_is_master(dev)) {
+ /* forward only to slave owning the QP */
+ ret = mlx4_get_slave_from_resource_id(dev,
+ RES_QP,
+ be32_to_cpu(eqe->event.qp.qpn)
+ & 0xffffff, &slave);
+ if (ret && ret != -ENOENT) {
+ mlx4_dbg(dev, "QP event %02x(%02x) on EQ %d at index %u: could not get slave id (%d)\n",
+ eqe->type, eqe->subtype,
+ eq->eqn, eq->cons_index, ret);
+ break;
+ }
+
+ if (!ret && slave != dev->caps.function) {
+ mlx4_slave_event(dev, slave, eqe);
+ break;
+ }
+
+ }
+ mlx4_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) &
+ 0xffffff, eqe->type);
+ break;
+
+ case MLX4_EVENT_TYPE_SRQ_LIMIT:
+ mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT. srq_no=0x%x, eq 0x%x\n",
+ __func__, be32_to_cpu(eqe->event.srq.srqn),
+ eq->eqn);
+ case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
+ if (mlx4_is_master(dev)) {
+ /* forward only to slave owning the SRQ */
+ ret = mlx4_get_slave_from_resource_id(dev,
+ RES_SRQ,
+ be32_to_cpu(eqe->event.srq.srqn)
+ & 0xffffff,
+ &slave);
+ if (ret && ret != -ENOENT) {
+ mlx4_warn(dev, "SRQ event %02x(%02x) on EQ %d at index %u: could not get slave id (%d)\n",
+ eqe->type, eqe->subtype,
+ eq->eqn, eq->cons_index, ret);
+ break;
+ }
+ if (eqe->type ==
+ MLX4_EVENT_TYPE_SRQ_CATAS_ERROR)
+ mlx4_warn(dev, "%s: slave:%d, srq_no:0x%x, event: %02x(%02x)\n",
+ __func__, slave,
+ be32_to_cpu(eqe->event.srq.srqn),
+ eqe->type, eqe->subtype);
+
+ if (!ret && slave != dev->caps.function) {
+ if (eqe->type ==
+ MLX4_EVENT_TYPE_SRQ_CATAS_ERROR)
+ mlx4_warn(dev, "%s: sending event %02x(%02x) to slave:%d\n",
+ __func__, eqe->type,
+ eqe->subtype, slave);
+ mlx4_slave_event(dev, slave, eqe);
+ break;
+ }
+ }
+ mlx4_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) &
+ 0xffffff, eqe->type);
+ break;
+
+ case MLX4_EVENT_TYPE_CMD:
+ mlx4_cmd_event(dev,
+ be16_to_cpu(eqe->event.cmd.token),
+ eqe->event.cmd.status,
+ be64_to_cpu(eqe->event.cmd.out_param));
+ break;
+
+ case MLX4_EVENT_TYPE_PORT_CHANGE: {
+ struct mlx4_slaves_pport slaves_port;
+ port = be32_to_cpu(eqe->event.port_change.port) >> 28;
+ slaves_port = mlx4_phys_to_slaves_pport(dev, port);
+ if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) {
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN,
+ port);
+ mlx4_priv(dev)->sense.do_sense_port[port] = 1;
+ if (!mlx4_is_master(dev))
+ break;
+ for (i = 0; i < dev->persist->num_vfs + 1;
+ i++) {
+ int reported_port = mlx4_is_bonded(dev) ? 1 : mlx4_phys_to_slave_port(dev, i, port);
+
+ if (!test_bit(i, slaves_port.slaves) && !mlx4_is_bonded(dev))
+ continue;
+ if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) {
+ if (i == mlx4_master_func_num(dev))
+ continue;
+ mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN to slave: %d, port:%d\n",
+ __func__, i, port);
+ s_info = &priv->mfunc.master.vf_oper[i].vport[port].state;
+ if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) {
+ eqe->event.port_change.port =
+ cpu_to_be32(
+ (be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF)
+ | (reported_port << 28));
+ mlx4_slave_event(dev, i, eqe);
+ }
+ } else { /* IB port */
+ set_and_calc_slave_port_state(dev, i, port,
+ MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
+ &gen_event);
+ /*we can be in pending state, then do not send port_down event*/
+ if (SLAVE_PORT_GEN_EVENT_DOWN == gen_event) {
+ if (i == mlx4_master_func_num(dev))
+ continue;
+ eqe->event.port_change.port =
+ cpu_to_be32(
+ (be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF)
+ | (mlx4_phys_to_slave_port(dev, i, port) << 28));
+ mlx4_slave_event(dev, i, eqe);
+ }
+ }
+ }
+ } else {
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP, port);
+
+ mlx4_priv(dev)->sense.do_sense_port[port] = 0;
+
+ if (!mlx4_is_master(dev))
+ break;
+ if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
+ for (i = 0;
+ i < dev->persist->num_vfs + 1;
+ i++) {
+ int reported_port = mlx4_is_bonded(dev) ? 1 : mlx4_phys_to_slave_port(dev, i, port);
+
+ if (!test_bit(i, slaves_port.slaves) && !mlx4_is_bonded(dev))
+ continue;
+ if (i == mlx4_master_func_num(dev))
+ continue;
+ s_info = &priv->mfunc.master.vf_oper[i].vport[port].state;
+ if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) {
+ eqe->event.port_change.port =
+ cpu_to_be32(
+ (be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF)
+ | (reported_port << 28));
+ mlx4_slave_event(dev, i, eqe);
+ }
+ }
+ else /* IB port */
+ /* port-up event will be sent to a slave when the
+ * slave's alias-guid is set. This is done in alias_GUID.c
+ */
+ set_all_slave_state(dev, port, MLX4_DEV_EVENT_PORT_UP);
+ }
+ break;
+ }
+
+ case MLX4_EVENT_TYPE_CQ_ERROR:
+ mlx4_warn(dev, "CQ %s on CQN %06x\n",
+ eqe->event.cq_err.syndrome == 1 ?
+ "overrun" : "access violation",
+ be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
+ if (mlx4_is_master(dev)) {
+ ret = mlx4_get_slave_from_resource_id(dev,
+ RES_CQ,
+ be32_to_cpu(eqe->event.cq_err.cqn)
+ & 0xffffff, &slave);
+ if (ret && ret != -ENOENT) {
+ mlx4_dbg(dev, "CQ event %02x(%02x) on EQ %d at index %u: could not get slave id (%d)\n",
+ eqe->type, eqe->subtype,
+ eq->eqn, eq->cons_index, ret);
+ break;
+ }
+
+ if (!ret && slave != dev->caps.function) {
+ mlx4_slave_event(dev, slave, eqe);
+ break;
+ }
+ }
+ mlx4_cq_event(dev,
+ be32_to_cpu(eqe->event.cq_err.cqn)
+ & 0xffffff,
+ eqe->type);
+ break;
+
+ case MLX4_EVENT_TYPE_EQ_OVERFLOW:
+ mlx4_warn(dev, "EQ overrun on EQN %d\n", eq->eqn);
+ break;
+
+ case MLX4_EVENT_TYPE_OP_REQUIRED:
+ atomic_inc(&priv->opreq_count);
+ /* FW commands can't be executed from interrupt context
+ * working in deferred task
+ */
+ queue_work(mlx4_wq, &priv->opreq_task);
+ break;
+
+ case MLX4_EVENT_TYPE_COMM_CHANNEL:
+ if (!mlx4_is_master(dev)) {
+ mlx4_warn(dev, "Received comm channel event for non master device\n");
+ break;
+ }
+ memcpy(&priv->mfunc.master.comm_arm_bit_vector,
+ eqe->event.comm_channel_arm.bit_vec,
+ sizeof(eqe->event.comm_channel_arm.bit_vec));
+ queue_work(priv->mfunc.master.comm_wq,
+ &priv->mfunc.master.comm_work);
+ break;
+
+ case MLX4_EVENT_TYPE_FLR_EVENT:
+ flr_slave = be32_to_cpu(eqe->event.flr_event.slave_id);
+ if (!mlx4_is_master(dev)) {
+ mlx4_warn(dev, "Non-master function received FLR event\n");
+ break;
+ }
+
+ mlx4_dbg(dev, "FLR event for slave: %d\n", flr_slave);
+
+ if (flr_slave >= dev->num_slaves) {
+ mlx4_warn(dev,
+ "Got FLR for unknown function: %d\n",
+ flr_slave);
+ update_slave_state = 0;
+ } else
+ update_slave_state = 1;
+
+ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
+ if (update_slave_state) {
+ priv->mfunc.master.slave_state[flr_slave].active = false;
+ priv->mfunc.master.slave_state[flr_slave].last_cmd = MLX4_COMM_CMD_FLR;
+ priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1;
+ }
+ spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN,
+ flr_slave);
+ queue_work(priv->mfunc.master.comm_wq,
+ &priv->mfunc.master.slave_flr_event_work);
+ break;
+
+ case MLX4_EVENT_TYPE_FATAL_WARNING:
+ if (eqe->subtype == MLX4_FATAL_WARNING_SUBTYPE_WARMING) {
+ if (mlx4_is_master(dev))
+ for (i = 0; i < dev->num_slaves; i++) {
+ mlx4_dbg(dev, "%s: Sending MLX4_FATAL_WARNING_SUBTYPE_WARMING to slave: %d\n",
+ __func__, i);
+ if (i == dev->caps.function)
+ continue;
+ mlx4_slave_event(dev, i, eqe);
+ }
+ mlx4_err(dev, "Temperature Threshold was reached! Threshold: %d celsius degrees; Current Temperature: %d\n",
+ be16_to_cpu(eqe->event.warming.warning_threshold),
+ be16_to_cpu(eqe->event.warming.current_temperature));
+ } else
+ mlx4_warn(dev, "Unhandled event FATAL WARNING (%02x), subtype %02x on EQ %d at index %u. owner=%x, nent=0x%x, slave=%x, ownership=%s\n",
+ eqe->type, eqe->subtype, eq->eqn,
+ eq->cons_index, eqe->owner, eq->nent,
+ eqe->slave_id,
+ !!(eqe->owner & 0x80) ^
+ !!(eq->cons_index & eq->nent) ? "HW" : "SW");
+
+ break;
+
+ case MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT:
+ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_MGMT_CHANGE,
+ (unsigned long) eqe);
+ break;
+
+ case MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT:
+ switch (eqe->subtype) {
+ case MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_BAD_CABLE:
+ mlx4_warn(dev, "Bad cable detected on port %u\n",
+ eqe->event.bad_cable.port);
+ break;
+ case MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_UNSUPPORTED_CABLE:
+ mlx4_warn(dev, "Unsupported cable detected\n");
+ break;
+ default:
+ mlx4_dbg(dev,
+ "Unhandled recoverable error event detected: %02x(%02x) on EQ %d at index %u. owner=%x, nent=0x%x, ownership=%s\n",
+ eqe->type, eqe->subtype, eq->eqn,
+ eq->cons_index, eqe->owner, eq->nent,
+ !!(eqe->owner & 0x80) ^
+ !!(eq->cons_index & eq->nent) ? "HW" : "SW");
+ break;
+ }
+ break;
+
+ case MLX4_EVENT_TYPE_EEC_CATAS_ERROR:
+ case MLX4_EVENT_TYPE_ECC_DETECT:
+ default:
+ mlx4_warn(dev, "Unhandled event %02x(%02x) on EQ %d at index %u. owner=%x, nent=0x%x, slave=%x, ownership=%s\n",
+ eqe->type, eqe->subtype, eq->eqn,
+ eq->cons_index, eqe->owner, eq->nent,
+ eqe->slave_id,
+ !!(eqe->owner & 0x80) ^
+ !!(eq->cons_index & eq->nent) ? "HW" : "SW");
+ break;
+ };
+
+ ++eq->cons_index;
+ eqes_found = 1;
+ ++set_ci;
+
+ /*
+ * The HCA will think the queue has overflowed if we
+ * don't tell it we've been processing events. We
+ * create our EQs with MLX4_NUM_SPARE_EQE extra
+ * entries, so we must update our consumer index at
+ * least that often.
+ */
+ if (unlikely(set_ci >= MLX4_NUM_SPARE_EQE)) {
+ eq_set_ci(eq, 0);
+ set_ci = 0;
+ }
+ }
+
+ eq_set_ci(eq, 1);
+
+ return eqes_found;
+}
+
+static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr)
+{
+ struct mlx4_dev *dev = dev_ptr;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int work = 0;
+ int i;
+
+ writel(priv->eq_table.clr_mask, priv->eq_table.clr_int);
+
+ for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
+ work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]);
+
+ return IRQ_RETVAL(work);
+}
+
+static irqreturn_t mlx4_msi_x_interrupt(int irq, void *eq_ptr)
+{
+ struct mlx4_eq *eq = eq_ptr;
+ struct mlx4_dev *dev = eq->dev;
+
+ mlx4_eq_int(dev, eq);
+
+ /* MSI-X vectors always belong to us */
+ return IRQ_HANDLED;
+}
+
+int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_event_eq_info *event_eq =
+ priv->mfunc.master.slave_state[slave].event_eq;
+ u32 in_modifier = vhcr->in_modifier;
+ u32 eqn = in_modifier & 0x3FF;
+ u64 in_param = vhcr->in_param;
+ int err = 0;
+ int i;
+
+ if (slave == dev->caps.function)
+ err = mlx4_cmd(dev, in_param, (in_modifier & 0x80000000) | eqn,
+ 0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+ if (!err)
+ for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i)
+ if (in_param & (1LL << i))
+ event_eq[i].eqn = in_modifier >> 31 ? -1 : eqn;
+
+ return err;
+}
+
+static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap,
+ int eq_num)
+{
+ return mlx4_cmd(dev, event_mask, (unmap << 31) | eq_num,
+ 0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+}
+
+static int mlx4_SW2HW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+ int eq_num)
+{
+ return mlx4_cmd(dev, mailbox->dma, eq_num, 0,
+ MLX4_CMD_SW2HW_EQ, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+}
+
+static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, int eq_num)
+{
+ return mlx4_cmd(dev, 0, eq_num, 1, MLX4_CMD_HW2SW_EQ,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+}
+
+static int mlx4_num_eq_uar(struct mlx4_dev *dev)
+{
+ /*
+ * Each UAR holds 4 EQ doorbells. To figure out how many UARs
+ * we need to map, take the difference of highest index and
+ * the lowest index we'll use and add 1.
+ */
+ return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs) / 4 -
+ dev->caps.reserved_eqs / 4 + 1;
+}
+
+static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int index;
+
+ index = eq->eqn / 4 - dev->caps.reserved_eqs / 4;
+
+ if (!priv->eq_table.uar_map[index]) {
+ priv->eq_table.uar_map[index] =
+ ioremap(
+ pci_resource_start(dev->persist->pdev, 2) +
+ ((eq->eqn / 4) << (dev->uar_page_shift)),
+ (1 << (dev->uar_page_shift)));
+ if (!priv->eq_table.uar_map[index]) {
+ mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n",
+ eq->eqn);
+ return NULL;
+ }
+ }
+
+ return priv->eq_table.uar_map[index] + 0x800 + 8 * (eq->eqn % 4);
+}
+
+static void mlx4_unmap_uar(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i;
+
+ for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
+ if (priv->eq_table.uar_map[i]) {
+ iounmap(priv->eq_table.uar_map[i]);
+ priv->eq_table.uar_map[i] = NULL;
+ }
+}
+
+static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
+ u8 intr, struct mlx4_eq *eq)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_eq_context *eq_context;
+ int npages;
+ u64 *dma_list = NULL;
+ dma_addr_t t;
+ u64 mtt_addr;
+ int err = -ENOMEM;
+ int i;
+
+ eq->dev = dev;
+ eq->nent = roundup_pow_of_two(max(nent, 2));
+ /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes, with
+ * strides of 64B,128B and 256B.
+ */
+ npages = PAGE_ALIGN(eq->nent * dev->caps.eqe_size) / PAGE_SIZE;
+
+ eq->page_list = kmalloc_array(npages, sizeof(*eq->page_list),
+ GFP_KERNEL);
+ if (!eq->page_list)
+ goto err_out;
+
+ for (i = 0; i < npages; ++i)
+ eq->page_list[i].buf = NULL;
+
+ dma_list = kmalloc_array(npages, sizeof(*dma_list), GFP_KERNEL);
+ if (!dma_list)
+ goto err_out_free;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ goto err_out_free;
+ eq_context = mailbox->buf;
+
+ for (i = 0; i < npages; ++i) {
+ eq->page_list[i].buf = dma_alloc_coherent(&dev->persist->
+ pdev->dev,
+ PAGE_SIZE, &t,
+ GFP_KERNEL);
+ if (!eq->page_list[i].buf)
+ goto err_out_free_pages;
+
+ dma_list[i] = t;
+ eq->page_list[i].map = t;
+
+ memset(eq->page_list[i].buf, 0, PAGE_SIZE);
+ }
+
+ eq->eqn = mlx4_bitmap_alloc(&priv->eq_table.bitmap);
+ if (eq->eqn == -1)
+ goto err_out_free_pages;
+
+ eq->doorbell = mlx4_get_eq_uar(dev, eq);
+ if (!eq->doorbell) {
+ err = -ENOMEM;
+ goto err_out_free_eq;
+ }
+
+ err = mlx4_mtt_init(dev, npages, PAGE_SHIFT, &eq->mtt);
+ if (err)
+ goto err_out_free_eq;
+
+ err = mlx4_write_mtt(dev, &eq->mtt, 0, npages, dma_list);
+ if (err)
+ goto err_out_free_mtt;
+
+ eq_context->flags = cpu_to_be32(MLX4_EQ_STATUS_OK |
+ MLX4_EQ_STATE_ARMED);
+ eq_context->log_eq_size = ilog2(eq->nent);
+ eq_context->intr = intr;
+ eq_context->log_page_size = PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT;
+
+ mtt_addr = mlx4_mtt_addr(dev, &eq->mtt);
+ eq_context->mtt_base_addr_h = mtt_addr >> 32;
+ eq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
+
+ err = mlx4_SW2HW_EQ(dev, mailbox, eq->eqn);
+ if (err) {
+ mlx4_warn(dev, "SW2HW_EQ failed (%d)\n", err);
+ goto err_out_free_mtt;
+ }
+
+ kfree(dma_list);
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ eq->cons_index = 0;
+
+ INIT_LIST_HEAD(&eq->tasklet_ctx.list);
+ INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
+ spin_lock_init(&eq->tasklet_ctx.lock);
+ tasklet_init(&eq->tasklet_ctx.task, mlx4_cq_tasklet_cb,
+ (unsigned long)&eq->tasklet_ctx);
+
+ return err;
+
+err_out_free_mtt:
+ mlx4_mtt_cleanup(dev, &eq->mtt);
+
+err_out_free_eq:
+ mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR);
+
+err_out_free_pages:
+ for (i = 0; i < npages; ++i)
+ if (eq->page_list[i].buf)
+ dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
+ eq->page_list[i].buf,
+ eq->page_list[i].map);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+err_out_free:
+ kfree(eq->page_list);
+ kfree(dma_list);
+
+err_out:
+ return err;
+}
+
+static void mlx4_free_eq(struct mlx4_dev *dev,
+ struct mlx4_eq *eq)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int err;
+ int i;
+ /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes, with
+ * strides of 64B,128B and 256B
+ */
+ int npages = PAGE_ALIGN(dev->caps.eqe_size * eq->nent) / PAGE_SIZE;
+
+ err = mlx4_HW2SW_EQ(dev, eq->eqn);
+ if (err)
+ mlx4_warn(dev, "HW2SW_EQ failed (%d)\n", err);
+
+ synchronize_irq(eq->irq);
+ tasklet_disable(&eq->tasklet_ctx.task);
+
+ mlx4_mtt_cleanup(dev, &eq->mtt);
+ for (i = 0; i < npages; ++i)
+ dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
+ eq->page_list[i].buf,
+ eq->page_list[i].map);
+
+ kfree(eq->page_list);
+ mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR);
+}
+
+static void mlx4_free_irqs(struct mlx4_dev *dev)
+{
+ struct mlx4_eq_table *eq_table = &mlx4_priv(dev)->eq_table;
+ int i;
+
+ if (eq_table->have_irq)
+ free_irq(dev->persist->pdev->irq, dev);
+
+ for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
+ if (eq_table->eq[i].have_irq) {
+ free_cpumask_var(eq_table->eq[i].affinity_mask);
+#if defined(CONFIG_SMP)
+ irq_set_affinity_hint(eq_table->eq[i].irq, NULL);
+#endif
+ free_irq(eq_table->eq[i].irq, eq_table->eq + i);
+ eq_table->eq[i].have_irq = 0;
+ }
+
+ kfree(eq_table->irq_names);
+}
+
+static int mlx4_map_clr_int(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ priv->clr_base = ioremap(pci_resource_start(dev->persist->pdev,
+ priv->fw.clr_int_bar) +
+ priv->fw.clr_int_base, MLX4_CLR_INT_SIZE);
+ if (!priv->clr_base) {
+ mlx4_err(dev, "Couldn't map interrupt clear register, aborting\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void mlx4_unmap_clr_int(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ iounmap(priv->clr_base);
+}
+
+int mlx4_alloc_eq_table(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ priv->eq_table.eq = kcalloc(dev->caps.num_eqs - dev->caps.reserved_eqs,
+ sizeof(*priv->eq_table.eq), GFP_KERNEL);
+ if (!priv->eq_table.eq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx4_free_eq_table(struct mlx4_dev *dev)
+{
+ kfree(mlx4_priv(dev)->eq_table.eq);
+}
+
+int mlx4_init_eq_table(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int err;
+ int i;
+
+ priv->eq_table.uar_map = kcalloc(mlx4_num_eq_uar(dev),
+ sizeof(*priv->eq_table.uar_map),
+ GFP_KERNEL);
+ if (!priv->eq_table.uar_map) {
+ err = -ENOMEM;
+ goto err_out_free;
+ }
+
+ err = mlx4_bitmap_init(&priv->eq_table.bitmap,
+ roundup_pow_of_two(dev->caps.num_eqs),
+ dev->caps.num_eqs - 1,
+ dev->caps.reserved_eqs,
+ roundup_pow_of_two(dev->caps.num_eqs) -
+ dev->caps.num_eqs);
+ if (err)
+ goto err_out_free;
+
+ for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
+ priv->eq_table.uar_map[i] = NULL;
+
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_map_clr_int(dev);
+ if (err)
+ goto err_out_bitmap;
+
+ priv->eq_table.clr_mask =
+ swab32(1 << (priv->eq_table.inta_pin & 31));
+ priv->eq_table.clr_int = priv->clr_base +
+ (priv->eq_table.inta_pin < 32 ? 4 : 0);
+ }
+
+ priv->eq_table.irq_names =
+ kmalloc_array(MLX4_IRQNAME_SIZE,
+ (dev->caps.num_comp_vectors + 1),
+ GFP_KERNEL);
+ if (!priv->eq_table.irq_names) {
+ err = -ENOMEM;
+ goto err_out_clr_int;
+ }
+
+ for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) {
+ if (i == MLX4_EQ_ASYNC) {
+ err = mlx4_create_eq(dev,
+ MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE,
+ 0, &priv->eq_table.eq[MLX4_EQ_ASYNC]);
+ } else {
+ struct mlx4_eq *eq = &priv->eq_table.eq[i];
+#ifdef CONFIG_RFS_ACCEL
+ int port = find_first_bit(eq->actv_ports.ports,
+ dev->caps.num_ports) + 1;
+
+ if (port <= dev->caps.num_ports) {
+ struct mlx4_port_info *info =
+ &mlx4_priv(dev)->port[port];
+
+ if (!info->rmap) {
+ info->rmap = alloc_irq_cpu_rmap(
+ mlx4_get_eqs_per_port(dev, port));
+ if (!info->rmap) {
+ mlx4_warn(dev, "Failed to allocate cpu rmap\n");
+ err = -ENOMEM;
+ goto err_out_unmap;
+ }
+ }
+
+ err = irq_cpu_rmap_add(
+ info->rmap, eq->irq);
+ if (err)
+ mlx4_warn(dev, "Failed adding irq rmap\n");
+ }
+#endif
+ err = mlx4_create_eq(dev, dev->quotas.cq +
+ MLX4_NUM_SPARE_EQE,
+ (dev->flags & MLX4_FLAG_MSI_X) ?
+ i + 1 - !!(i > MLX4_EQ_ASYNC) : 0,
+ eq);
+ }
+ if (err)
+ goto err_out_unmap;
+ }
+
+ if (dev->flags & MLX4_FLAG_MSI_X) {
+ const char *eq_name;
+
+ snprintf(priv->eq_table.irq_names +
+ MLX4_EQ_ASYNC * MLX4_IRQNAME_SIZE,
+ MLX4_IRQNAME_SIZE,
+ "mlx4-async@pci:%s",
+ pci_name(dev->persist->pdev));
+ eq_name = priv->eq_table.irq_names +
+ MLX4_EQ_ASYNC * MLX4_IRQNAME_SIZE;
+
+ err = request_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq,
+ mlx4_msi_x_interrupt, 0, eq_name,
+ priv->eq_table.eq + MLX4_EQ_ASYNC);
+ if (err)
+ goto err_out_unmap;
+
+ priv->eq_table.eq[MLX4_EQ_ASYNC].have_irq = 1;
+ } else {
+ snprintf(priv->eq_table.irq_names,
+ MLX4_IRQNAME_SIZE,
+ DRV_NAME "@pci:%s",
+ pci_name(dev->persist->pdev));
+ err = request_irq(dev->persist->pdev->irq, mlx4_interrupt,
+ IRQF_SHARED, priv->eq_table.irq_names, dev);
+ if (err)
+ goto err_out_unmap;
+
+ priv->eq_table.have_irq = 1;
+ }
+
+ err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
+ priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
+ if (err)
+ mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
+ priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err);
+
+ /* arm ASYNC eq */
+ eq_set_ci(&priv->eq_table.eq[MLX4_EQ_ASYNC], 1);
+
+ return 0;
+
+err_out_unmap:
+ while (i > 0)
+ mlx4_free_eq(dev, &priv->eq_table.eq[--i]);
+#ifdef CONFIG_RFS_ACCEL
+ for (i = 1; i <= dev->caps.num_ports; i++) {
+ if (mlx4_priv(dev)->port[i].rmap) {
+ free_irq_cpu_rmap(mlx4_priv(dev)->port[i].rmap);
+ mlx4_priv(dev)->port[i].rmap = NULL;
+ }
+ }
+#endif
+ mlx4_free_irqs(dev);
+
+err_out_clr_int:
+ if (!mlx4_is_slave(dev))
+ mlx4_unmap_clr_int(dev);
+
+err_out_bitmap:
+ mlx4_unmap_uar(dev);
+ mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
+
+err_out_free:
+ kfree(priv->eq_table.uar_map);
+
+ return err;
+}
+
+void mlx4_cleanup_eq_table(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i;
+
+ mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 1,
+ priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
+
+#ifdef CONFIG_RFS_ACCEL
+ for (i = 1; i <= dev->caps.num_ports; i++) {
+ if (mlx4_priv(dev)->port[i].rmap) {
+ free_irq_cpu_rmap(mlx4_priv(dev)->port[i].rmap);
+ mlx4_priv(dev)->port[i].rmap = NULL;
+ }
+ }
+#endif
+ mlx4_free_irqs(dev);
+
+ for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
+ mlx4_free_eq(dev, &priv->eq_table.eq[i]);
+
+ if (!mlx4_is_slave(dev))
+ mlx4_unmap_clr_int(dev);
+
+ mlx4_unmap_uar(dev);
+ mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
+
+ kfree(priv->eq_table.uar_map);
+}
+
+/* A test that verifies that we can accept interrupts
+ * on the vector allocated for asynchronous events
+ */
+int mlx4_test_async(struct mlx4_dev *dev)
+{
+ return mlx4_NOP(dev);
+}
+EXPORT_SYMBOL(mlx4_test_async);
+
+/* A test that verifies that we can accept interrupts
+ * on the given irq vector of the tested port.
+ * Interrupts are checked using the NOP command.
+ */
+int mlx4_test_interrupt(struct mlx4_dev *dev, int vector)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int err;
+
+ /* Temporary use polling for command completions */
+ mlx4_cmd_use_polling(dev);
+
+ /* Map the new eq to handle all asynchronous events */
+ err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
+ priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn);
+ if (err) {
+ mlx4_warn(dev, "Failed mapping eq for interrupt test\n");
+ goto out;
+ }
+
+ /* Go back to using events */
+ mlx4_cmd_use_events(dev);
+ err = mlx4_NOP(dev);
+
+ /* Return to default */
+ mlx4_cmd_use_polling(dev);
+out:
+ mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
+ priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
+ mlx4_cmd_use_events(dev);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx4_test_interrupt);
+
+bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ vector = MLX4_CQ_TO_EQ_VECTOR(vector);
+ if (vector < 0 || (vector >= dev->caps.num_comp_vectors + 1) ||
+ (vector == MLX4_EQ_ASYNC))
+ return false;
+
+ return test_bit(port - 1, priv->eq_table.eq[vector].actv_ports.ports);
+}
+EXPORT_SYMBOL(mlx4_is_eq_vector_valid);
+
+u32 mlx4_get_eqs_per_port(struct mlx4_dev *dev, u8 port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ unsigned int i;
+ unsigned int sum = 0;
+
+ for (i = 0; i < dev->caps.num_comp_vectors + 1; i++)
+ sum += !!test_bit(port - 1,
+ priv->eq_table.eq[i].actv_ports.ports);
+
+ return sum;
+}
+EXPORT_SYMBOL(mlx4_get_eqs_per_port);
+
+int mlx4_is_eq_shared(struct mlx4_dev *dev, int vector)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ vector = MLX4_CQ_TO_EQ_VECTOR(vector);
+ if (vector <= 0 || (vector >= dev->caps.num_comp_vectors + 1))
+ return -EINVAL;
+
+ return !!(bitmap_weight(priv->eq_table.eq[vector].actv_ports.ports,
+ dev->caps.num_ports) > 1);
+}
+EXPORT_SYMBOL(mlx4_is_eq_shared);
+
+struct cpu_rmap *mlx4_get_cpu_rmap(struct mlx4_dev *dev, int port)
+{
+ return mlx4_priv(dev)->port[port].rmap;
+}
+EXPORT_SYMBOL(mlx4_get_cpu_rmap);
+
+int mlx4_assign_eq(struct mlx4_dev *dev, u8 port, int *vector)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int err = 0, i = 0;
+ u32 min_ref_count_val = (u32)-1;
+ int requested_vector = MLX4_CQ_TO_EQ_VECTOR(*vector);
+ int *prequested_vector = NULL;
+
+
+ mutex_lock(&priv->msix_ctl.pool_lock);
+ if (requested_vector < (dev->caps.num_comp_vectors + 1) &&
+ (requested_vector >= 0) &&
+ (requested_vector != MLX4_EQ_ASYNC)) {
+ if (test_bit(port - 1,
+ priv->eq_table.eq[requested_vector].actv_ports.ports)) {
+ prequested_vector = &requested_vector;
+ } else {
+ struct mlx4_eq *eq;
+
+ for (i = 1; i < port;
+ requested_vector += mlx4_get_eqs_per_port(dev, i++))
+ ;
+
+ eq = &priv->eq_table.eq[requested_vector];
+ if (requested_vector < dev->caps.num_comp_vectors + 1 &&
+ test_bit(port - 1, eq->actv_ports.ports)) {
+ prequested_vector = &requested_vector;
+ }
+ }
+ }
+
+ if (!prequested_vector) {
+ requested_vector = -1;
+ for (i = 0; min_ref_count_val && i < dev->caps.num_comp_vectors + 1;
+ i++) {
+ struct mlx4_eq *eq = &priv->eq_table.eq[i];
+
+ if (min_ref_count_val > eq->ref_count &&
+ test_bit(port - 1, eq->actv_ports.ports)) {
+ min_ref_count_val = eq->ref_count;
+ requested_vector = i;
+ }
+ }
+
+ if (requested_vector < 0) {
+ err = -ENOSPC;
+ goto err_unlock;
+ }
+
+ prequested_vector = &requested_vector;
+ }
+
+ if (!test_bit(*prequested_vector, priv->msix_ctl.pool_bm) &&
+ dev->flags & MLX4_FLAG_MSI_X) {
+ set_bit(*prequested_vector, priv->msix_ctl.pool_bm);
+ snprintf(priv->eq_table.irq_names +
+ *prequested_vector * MLX4_IRQNAME_SIZE,
+ MLX4_IRQNAME_SIZE, "mlx4-%d@%s",
+ *prequested_vector, dev_name(&dev->persist->pdev->dev));
+
+ err = request_irq(priv->eq_table.eq[*prequested_vector].irq,
+ mlx4_msi_x_interrupt, 0,
+ &priv->eq_table.irq_names[*prequested_vector << 5],
+ priv->eq_table.eq + *prequested_vector);
+
+ if (err) {
+ clear_bit(*prequested_vector, priv->msix_ctl.pool_bm);
+ *prequested_vector = -1;
+ } else {
+#if defined(CONFIG_SMP)
+ mlx4_set_eq_affinity_hint(priv, *prequested_vector);
+#endif
+ eq_set_ci(&priv->eq_table.eq[*prequested_vector], 1);
+ priv->eq_table.eq[*prequested_vector].have_irq = 1;
+ }
+ }
+
+ if (!err && *prequested_vector >= 0)
+ priv->eq_table.eq[*prequested_vector].ref_count++;
+
+err_unlock:
+ mutex_unlock(&priv->msix_ctl.pool_lock);
+
+ if (!err && *prequested_vector >= 0)
+ *vector = MLX4_EQ_TO_CQ_VECTOR(*prequested_vector);
+ else
+ *vector = 0;
+
+ return err;
+}
+EXPORT_SYMBOL(mlx4_assign_eq);
+
+int mlx4_eq_get_irq(struct mlx4_dev *dev, int cq_vec)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ return priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq_vec)].irq;
+}
+EXPORT_SYMBOL(mlx4_eq_get_irq);
+
+void mlx4_release_eq(struct mlx4_dev *dev, int vec)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int eq_vec = MLX4_CQ_TO_EQ_VECTOR(vec);
+
+ mutex_lock(&priv->msix_ctl.pool_lock);
+ priv->eq_table.eq[eq_vec].ref_count--;
+
+ /* once we allocated EQ, we don't release it because it might be binded
+ * to cpu_rmap.
+ */
+ mutex_unlock(&priv->msix_ctl.pool_lock);
+}
+EXPORT_SYMBOL(mlx4_release_eq);
+
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
new file mode 100644
index 000000000..ce57df04a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -0,0 +1,3108 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx4/cmd.h>
+#include <linux/module.h>
+#include <linux/cache.h>
+#include <linux/kernel.h>
+#include <uapi/rdma/mlx4-abi.h>
+
+#include "fw.h"
+#include "icm.h"
+
+enum {
+ MLX4_COMMAND_INTERFACE_MIN_REV = 2,
+ MLX4_COMMAND_INTERFACE_MAX_REV = 3,
+ MLX4_COMMAND_INTERFACE_NEW_PORT_CMDS = 3,
+};
+
+extern void __buggy_use_of_MLX4_GET(void);
+extern void __buggy_use_of_MLX4_PUT(void);
+
+static bool enable_qos;
+module_param(enable_qos, bool, 0444);
+MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: off)");
+
+#define MLX4_GET(dest, source, offset) \
+ do { \
+ void *__p = (char *) (source) + (offset); \
+ __be64 val; \
+ switch (sizeof(dest)) { \
+ case 1: (dest) = *(u8 *) __p; break; \
+ case 2: (dest) = be16_to_cpup(__p); break; \
+ case 4: (dest) = be32_to_cpup(__p); break; \
+ case 8: val = get_unaligned((__be64 *)__p); \
+ (dest) = be64_to_cpu(val); break; \
+ default: __buggy_use_of_MLX4_GET(); \
+ } \
+ } while (0)
+
+#define MLX4_PUT(dest, source, offset) \
+ do { \
+ void *__d = ((char *) (dest) + (offset)); \
+ switch (sizeof(source)) { \
+ case 1: *(u8 *) __d = (source); break; \
+ case 2: *(__be16 *) __d = cpu_to_be16(source); break; \
+ case 4: *(__be32 *) __d = cpu_to_be32(source); break; \
+ case 8: *(__be64 *) __d = cpu_to_be64(source); break; \
+ default: __buggy_use_of_MLX4_PUT(); \
+ } \
+ } while (0)
+
+static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags)
+{
+ static const char *fname[] = {
+ [ 0] = "RC transport",
+ [ 1] = "UC transport",
+ [ 2] = "UD transport",
+ [ 3] = "XRC transport",
+ [ 6] = "SRQ support",
+ [ 7] = "IPoIB checksum offload",
+ [ 8] = "P_Key violation counter",
+ [ 9] = "Q_Key violation counter",
+ [12] = "Dual Port Different Protocol (DPDP) support",
+ [15] = "Big LSO headers",
+ [16] = "MW support",
+ [17] = "APM support",
+ [18] = "Atomic ops support",
+ [19] = "Raw multicast support",
+ [20] = "Address vector port checking support",
+ [21] = "UD multicast support",
+ [30] = "IBoE support",
+ [32] = "Unicast loopback support",
+ [34] = "FCS header control",
+ [37] = "Wake On LAN (port1) support",
+ [38] = "Wake On LAN (port2) support",
+ [40] = "UDP RSS support",
+ [41] = "Unicast VEP steering support",
+ [42] = "Multicast VEP steering support",
+ [48] = "Counters support",
+ [52] = "RSS IP fragments support",
+ [53] = "Port ETS Scheduler support",
+ [55] = "Port link type sensing support",
+ [59] = "Port management change event support",
+ [61] = "64 byte EQE support",
+ [62] = "64 byte CQE support",
+ };
+ int i;
+
+ mlx4_dbg(dev, "DEV_CAP flags:\n");
+ for (i = 0; i < ARRAY_SIZE(fname); ++i)
+ if (fname[i] && (flags & (1LL << i)))
+ mlx4_dbg(dev, " %s\n", fname[i]);
+}
+
+static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
+{
+ static const char * const fname[] = {
+ [0] = "RSS support",
+ [1] = "RSS Toeplitz Hash Function support",
+ [2] = "RSS XOR Hash Function support",
+ [3] = "Device managed flow steering support",
+ [4] = "Automatic MAC reassignment support",
+ [5] = "Time stamping support",
+ [6] = "VST (control vlan insertion/stripping) support",
+ [7] = "FSM (MAC anti-spoofing) support",
+ [8] = "Dynamic QP updates support",
+ [9] = "Device managed flow steering IPoIB support",
+ [10] = "TCP/IP offloads/flow-steering for VXLAN support",
+ [11] = "MAD DEMUX (Secure-Host) support",
+ [12] = "Large cache line (>64B) CQE stride support",
+ [13] = "Large cache line (>64B) EQE stride support",
+ [14] = "Ethernet protocol control support",
+ [15] = "Ethernet Backplane autoneg support",
+ [16] = "CONFIG DEV support",
+ [17] = "Asymmetric EQs support",
+ [18] = "More than 80 VFs support",
+ [19] = "Performance optimized for limited rule configuration flow steering support",
+ [20] = "Recoverable error events support",
+ [21] = "Port Remap support",
+ [22] = "QCN support",
+ [23] = "QP rate limiting support",
+ [24] = "Ethernet Flow control statistics support",
+ [25] = "Granular QoS per VF support",
+ [26] = "Port ETS Scheduler support",
+ [27] = "Port beacon support",
+ [28] = "RX-ALL support",
+ [29] = "802.1ad offload support",
+ [31] = "Modifying loopback source checks using UPDATE_QP support",
+ [32] = "Loopback source checks support",
+ [33] = "RoCEv2 support",
+ [34] = "DMFS Sniffer support (UC & MC)",
+ [35] = "Diag counters per port",
+ [36] = "QinQ VST mode support",
+ [37] = "sl to vl mapping table change event support",
+ [38] = "user MAC support",
+ [39] = "Report driver version to FW support",
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(fname); ++i)
+ if (fname[i] && (flags & (1LL << i)))
+ mlx4_dbg(dev, " %s\n", fname[i]);
+}
+
+int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 *inbox;
+ int err = 0;
+
+#define MOD_STAT_CFG_IN_SIZE 0x100
+
+#define MOD_STAT_CFG_PG_SZ_M_OFFSET 0x002
+#define MOD_STAT_CFG_PG_SZ_OFFSET 0x003
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
+
+ MLX4_PUT(inbox, cfg->log_pg_sz, MOD_STAT_CFG_PG_SZ_OFFSET);
+ MLX4_PUT(inbox, cfg->log_pg_sz_m, MOD_STAT_CFG_PG_SZ_M_OFFSET);
+
+ err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_MOD_STAT_CFG,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+int mlx4_QUERY_FUNC(struct mlx4_dev *dev, struct mlx4_func *func, int slave)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 *outbox;
+ u8 in_modifier;
+ u8 field;
+ u16 field16;
+ int err;
+
+#define QUERY_FUNC_BUS_OFFSET 0x00
+#define QUERY_FUNC_DEVICE_OFFSET 0x01
+#define QUERY_FUNC_FUNCTION_OFFSET 0x01
+#define QUERY_FUNC_PHYSICAL_FUNCTION_OFFSET 0x03
+#define QUERY_FUNC_RSVD_EQS_OFFSET 0x04
+#define QUERY_FUNC_MAX_EQ_OFFSET 0x06
+#define QUERY_FUNC_RSVD_UARS_OFFSET 0x0b
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
+
+ in_modifier = slave;
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, 0,
+ MLX4_CMD_QUERY_FUNC,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ goto out;
+
+ MLX4_GET(field, outbox, QUERY_FUNC_BUS_OFFSET);
+ func->bus = field & 0xf;
+ MLX4_GET(field, outbox, QUERY_FUNC_DEVICE_OFFSET);
+ func->device = field & 0xf1;
+ MLX4_GET(field, outbox, QUERY_FUNC_FUNCTION_OFFSET);
+ func->function = field & 0x7;
+ MLX4_GET(field, outbox, QUERY_FUNC_PHYSICAL_FUNCTION_OFFSET);
+ func->physical_function = field & 0xf;
+ MLX4_GET(field16, outbox, QUERY_FUNC_RSVD_EQS_OFFSET);
+ func->rsvd_eqs = field16 & 0xffff;
+ MLX4_GET(field16, outbox, QUERY_FUNC_MAX_EQ_OFFSET);
+ func->max_eq = field16 & 0xffff;
+ MLX4_GET(field, outbox, QUERY_FUNC_RSVD_UARS_OFFSET);
+ func->rsvd_uars = field & 0x0f;
+
+ mlx4_dbg(dev, "Bus: %d, Device: %d, Function: %d, Physical function: %d, Max EQs: %d, Reserved EQs: %d, Reserved UARs: %d\n",
+ func->bus, func->device, func->function, func->physical_function,
+ func->max_eq, func->rsvd_eqs, func->rsvd_uars);
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+static int mlx4_activate_vst_qinq(struct mlx4_priv *priv, int slave, int port)
+{
+ struct mlx4_vport_oper_state *vp_oper;
+ struct mlx4_vport_state *vp_admin;
+ int err;
+
+ vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+ vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+
+ if (vp_admin->default_vlan != vp_oper->state.default_vlan) {
+ err = __mlx4_register_vlan(&priv->dev, port,
+ vp_admin->default_vlan,
+ &vp_oper->vlan_idx);
+ if (err) {
+ vp_oper->vlan_idx = NO_INDX;
+ mlx4_warn(&priv->dev,
+ "No vlan resources slave %d, port %d\n",
+ slave, port);
+ return err;
+ }
+ mlx4_dbg(&priv->dev, "alloc vlan %d idx %d slave %d port %d\n",
+ (int)(vp_oper->state.default_vlan),
+ vp_oper->vlan_idx, slave, port);
+ }
+ vp_oper->state.vlan_proto = vp_admin->vlan_proto;
+ vp_oper->state.default_vlan = vp_admin->default_vlan;
+ vp_oper->state.default_qos = vp_admin->default_qos;
+
+ return 0;
+}
+
+static int mlx4_handle_vst_qinq(struct mlx4_priv *priv, int slave, int port)
+{
+ struct mlx4_vport_oper_state *vp_oper;
+ struct mlx4_slave_state *slave_state;
+ struct mlx4_vport_state *vp_admin;
+ int err;
+
+ vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+ vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+ slave_state = &priv->mfunc.master.slave_state[slave];
+
+ if ((vp_admin->vlan_proto != htons(ETH_P_8021AD)) ||
+ (!slave_state->active))
+ return 0;
+
+ if (vp_oper->state.vlan_proto == vp_admin->vlan_proto &&
+ vp_oper->state.default_vlan == vp_admin->default_vlan &&
+ vp_oper->state.default_qos == vp_admin->default_qos)
+ return 0;
+
+ if (!slave_state->vst_qinq_supported) {
+ /* Warn and revert the request to set vst QinQ mode */
+ vp_admin->vlan_proto = vp_oper->state.vlan_proto;
+ vp_admin->default_vlan = vp_oper->state.default_vlan;
+ vp_admin->default_qos = vp_oper->state.default_qos;
+
+ mlx4_warn(&priv->dev,
+ "Slave %d does not support VST QinQ mode\n", slave);
+ return 0;
+ }
+
+ err = mlx4_activate_vst_qinq(priv, slave, port);
+ return err;
+}
+
+int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u8 field, port;
+ u32 size, proxy_qp, qkey;
+ int err = 0;
+ struct mlx4_func func;
+
+#define QUERY_FUNC_CAP_FLAGS_OFFSET 0x0
+#define QUERY_FUNC_CAP_NUM_PORTS_OFFSET 0x1
+#define QUERY_FUNC_CAP_PF_BHVR_OFFSET 0x4
+#define QUERY_FUNC_CAP_FMR_OFFSET 0x8
+#define QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP 0x10
+#define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP 0x14
+#define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP 0x18
+#define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP 0x20
+#define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP 0x24
+#define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP 0x28
+#define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c
+#define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0x30
+#define QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET 0x48
+
+#define QUERY_FUNC_CAP_QP_QUOTA_OFFSET 0x50
+#define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET 0x54
+#define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET 0x58
+#define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET 0x60
+#define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x64
+#define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x68
+
+#define QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET 0x6c
+
+#define QUERY_FUNC_CAP_FMR_FLAG 0x80
+#define QUERY_FUNC_CAP_FLAG_RDMA 0x40
+#define QUERY_FUNC_CAP_FLAG_ETH 0x80
+#define QUERY_FUNC_CAP_FLAG_QUOTAS 0x10
+#define QUERY_FUNC_CAP_FLAG_RESD_LKEY 0x08
+#define QUERY_FUNC_CAP_FLAG_VALID_MAILBOX 0x04
+
+#define QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG (1UL << 31)
+#define QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG (1UL << 30)
+
+/* when opcode modifier = 1 */
+#define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3
+#define QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET 0x4
+#define QUERY_FUNC_CAP_FLAGS0_OFFSET 0x8
+#define QUERY_FUNC_CAP_FLAGS1_OFFSET 0xc
+
+#define QUERY_FUNC_CAP_QP0_TUNNEL 0x10
+#define QUERY_FUNC_CAP_QP0_PROXY 0x14
+#define QUERY_FUNC_CAP_QP1_TUNNEL 0x18
+#define QUERY_FUNC_CAP_QP1_PROXY 0x1c
+#define QUERY_FUNC_CAP_PHYS_PORT_ID 0x28
+
+#define QUERY_FUNC_CAP_FLAGS1_FORCE_MAC 0x40
+#define QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN 0x80
+#define QUERY_FUNC_CAP_FLAGS1_NIC_INFO 0x10
+#define QUERY_FUNC_CAP_VF_ENABLE_QP0 0x08
+
+#define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80
+#define QUERY_FUNC_CAP_PHV_BIT 0x40
+#define QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE 0x20
+
+#define QUERY_FUNC_CAP_SUPPORTS_VST_QINQ BIT(30)
+#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS BIT(31)
+
+ if (vhcr->op_modifier == 1) {
+ struct mlx4_active_ports actv_ports =
+ mlx4_get_active_ports(dev, slave);
+ int converted_port = mlx4_slave_convert_port(
+ dev, slave, vhcr->in_modifier);
+ struct mlx4_vport_oper_state *vp_oper;
+
+ if (converted_port < 0)
+ return -EINVAL;
+
+ vhcr->in_modifier = converted_port;
+ /* phys-port = logical-port */
+ field = vhcr->in_modifier -
+ find_first_bit(actv_ports.ports, dev->caps.num_ports);
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
+
+ port = vhcr->in_modifier;
+ proxy_qp = dev->phys_caps.base_proxy_sqpn + 8 * slave + port - 1;
+
+ /* Set nic_info bit to mark new fields support */
+ field = QUERY_FUNC_CAP_FLAGS1_NIC_INFO;
+
+ if (mlx4_vf_smi_enabled(dev, slave, port) &&
+ !mlx4_get_parav_qkey(dev, proxy_qp, &qkey)) {
+ field |= QUERY_FUNC_CAP_VF_ENABLE_QP0;
+ MLX4_PUT(outbox->buf, qkey,
+ QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET);
+ }
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET);
+
+ /* size is now the QP number */
+ size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + port - 1;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL);
+
+ size += 2;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL);
+
+ MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP0_PROXY);
+ proxy_qp += 2;
+ MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP1_PROXY);
+
+ MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier],
+ QUERY_FUNC_CAP_PHYS_PORT_ID);
+
+ vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+ err = mlx4_handle_vst_qinq(priv, slave, port);
+ if (err)
+ return err;
+
+ field = 0;
+ if (dev->caps.phv_bit[port])
+ field |= QUERY_FUNC_CAP_PHV_BIT;
+ if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD))
+ field |= QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE;
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS0_OFFSET);
+
+ } else if (vhcr->op_modifier == 0) {
+ struct mlx4_active_ports actv_ports =
+ mlx4_get_active_ports(dev, slave);
+ struct mlx4_slave_state *slave_state =
+ &priv->mfunc.master.slave_state[slave];
+
+ /* enable rdma and ethernet interfaces, new quota locations,
+ * and reserved lkey
+ */
+ field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA |
+ QUERY_FUNC_CAP_FLAG_QUOTAS | QUERY_FUNC_CAP_FLAG_VALID_MAILBOX |
+ QUERY_FUNC_CAP_FLAG_RESD_LKEY);
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS_OFFSET);
+
+ field = min(
+ bitmap_weight(actv_ports.ports, dev->caps.num_ports),
+ dev->caps.num_ports);
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
+
+ size = dev->caps.function_caps; /* set PF behaviours */
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
+
+ field = 0; /* protected FMR support not available as yet */
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FMR_OFFSET);
+
+ size = priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[slave];
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET);
+ size = dev->caps.num_qps;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP);
+
+ size = priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[slave];
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET);
+ size = dev->caps.num_srqs;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP);
+
+ size = priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[slave];
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET);
+ size = dev->caps.num_cqs;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP);
+
+ if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) ||
+ mlx4_QUERY_FUNC(dev, &func, slave)) {
+ size = vhcr->in_modifier &
+ QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ?
+ dev->caps.num_eqs :
+ rounddown_pow_of_two(dev->caps.num_eqs);
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
+ size = dev->caps.reserved_eqs;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
+ } else {
+ size = vhcr->in_modifier &
+ QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ?
+ func.max_eq :
+ rounddown_pow_of_two(func.max_eq);
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
+ size = func.rsvd_eqs;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
+ }
+
+ size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave];
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
+ size = dev->caps.num_mpts;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP);
+
+ size = priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[slave];
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET);
+ size = dev->caps.num_mtts;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP);
+
+ size = dev->caps.num_mgms + dev->caps.num_amgms;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET);
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP);
+
+ size = QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG |
+ QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG;
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET);
+
+ size = dev->caps.reserved_lkey + ((slave << 8) & 0xFF00);
+ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET);
+
+ if (vhcr->in_modifier & QUERY_FUNC_CAP_SUPPORTS_VST_QINQ)
+ slave_state->vst_qinq_supported = true;
+
+ } else
+ err = -EINVAL;
+
+ return err;
+}
+
+int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port,
+ struct mlx4_func_cap *func_cap)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 *outbox;
+ u8 field, op_modifier;
+ u32 size, qkey;
+ int err = 0, quotas = 0;
+ u32 in_modifier;
+ u32 slave_caps;
+
+ op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */
+ slave_caps = QUERY_FUNC_CAP_SUPPORTS_VST_QINQ |
+ QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS;
+ in_modifier = op_modifier ? gen_or_port : slave_caps;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, op_modifier,
+ MLX4_CMD_QUERY_FUNC_CAP,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ goto out;
+
+ outbox = mailbox->buf;
+
+ if (!op_modifier) {
+ MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET);
+ if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) {
+ mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n");
+ err = -EPROTONOSUPPORT;
+ goto out;
+ }
+ func_cap->flags = field;
+ quotas = !!(func_cap->flags & QUERY_FUNC_CAP_FLAG_QUOTAS);
+
+ MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
+ func_cap->num_ports = field;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
+ func_cap->pf_context_behaviour = size;
+
+ if (quotas) {
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET);
+ func_cap->qp_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET);
+ func_cap->srq_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET);
+ func_cap->cq_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
+ func_cap->mpt_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET);
+ func_cap->mtt_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET);
+ func_cap->mcg_quota = size & 0xFFFFFF;
+
+ } else {
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP);
+ func_cap->qp_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP);
+ func_cap->srq_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP);
+ func_cap->cq_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP);
+ func_cap->mpt_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP);
+ func_cap->mtt_quota = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP);
+ func_cap->mcg_quota = size & 0xFFFFFF;
+ }
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
+ func_cap->max_eq = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
+ func_cap->reserved_eq = size & 0xFFFFFF;
+
+ if (func_cap->flags & QUERY_FUNC_CAP_FLAG_RESD_LKEY) {
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET);
+ func_cap->reserved_lkey = size;
+ } else {
+ func_cap->reserved_lkey = 0;
+ }
+
+ func_cap->extra_flags = 0;
+
+ /* Mailbox data from 0x6c and onward should only be treated if
+ * QUERY_FUNC_CAP_FLAG_VALID_MAILBOX is set in func_cap->flags
+ */
+ if (func_cap->flags & QUERY_FUNC_CAP_FLAG_VALID_MAILBOX) {
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET);
+ if (size & QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG)
+ func_cap->extra_flags |= MLX4_QUERY_FUNC_FLAGS_BF_RES_QP;
+ if (size & QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG)
+ func_cap->extra_flags |= MLX4_QUERY_FUNC_FLAGS_A0_RES_QP;
+ }
+
+ goto out;
+ }
+
+ /* logical port query */
+ if (gen_or_port > dev->caps.num_ports) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ MLX4_GET(func_cap->flags1, outbox, QUERY_FUNC_CAP_FLAGS1_OFFSET);
+ if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) {
+ if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN) {
+ mlx4_err(dev, "VLAN is enforced on this port\n");
+ err = -EPROTONOSUPPORT;
+ goto out;
+ }
+
+ if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_FORCE_MAC) {
+ mlx4_err(dev, "Force mac is enabled on this port\n");
+ err = -EPROTONOSUPPORT;
+ goto out;
+ }
+ } else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) {
+ MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET);
+ if (field & QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID) {
+ mlx4_err(dev, "phy_wqe_gid is enforced on this ib port\n");
+ err = -EPROTONOSUPPORT;
+ goto out;
+ }
+ }
+
+ MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
+ func_cap->physical_port = field;
+ if (func_cap->physical_port != gen_or_port) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (func_cap->flags1 & QUERY_FUNC_CAP_VF_ENABLE_QP0) {
+ MLX4_GET(qkey, outbox, QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET);
+ func_cap->spec_qps.qp0_qkey = qkey;
+ } else {
+ func_cap->spec_qps.qp0_qkey = 0;
+ }
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL);
+ func_cap->spec_qps.qp0_tunnel = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY);
+ func_cap->spec_qps.qp0_proxy = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL);
+ func_cap->spec_qps.qp1_tunnel = size & 0xFFFFFF;
+
+ MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY);
+ func_cap->spec_qps.qp1_proxy = size & 0xFFFFFF;
+
+ if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_NIC_INFO)
+ MLX4_GET(func_cap->phys_port_id, outbox,
+ QUERY_FUNC_CAP_PHYS_PORT_ID);
+
+ MLX4_GET(func_cap->flags0, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET);
+
+ /* All other resources are allocated by the master, but we still report
+ * 'num' and 'reserved' capabilities as follows:
+ * - num remains the maximum resource index
+ * - 'num - reserved' is the total available objects of a resource, but
+ * resource indices may be less than 'reserved'
+ * TODO: set per-resource quotas */
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ return err;
+}
+
+static void disable_unsupported_roce_caps(void *buf);
+
+int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 *outbox;
+ u8 field;
+ u32 field32, flags, ext_flags;
+ u16 size;
+ u16 stat_rate;
+ int err;
+ int i;
+
+#define QUERY_DEV_CAP_OUT_SIZE 0x100
+#define QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET 0x10
+#define QUERY_DEV_CAP_MAX_QP_SZ_OFFSET 0x11
+#define QUERY_DEV_CAP_RSVD_QP_OFFSET 0x12
+#define QUERY_DEV_CAP_MAX_QP_OFFSET 0x13
+#define QUERY_DEV_CAP_RSVD_SRQ_OFFSET 0x14
+#define QUERY_DEV_CAP_MAX_SRQ_OFFSET 0x15
+#define QUERY_DEV_CAP_RSVD_EEC_OFFSET 0x16
+#define QUERY_DEV_CAP_MAX_EEC_OFFSET 0x17
+#define QUERY_DEV_CAP_MAX_CQ_SZ_OFFSET 0x19
+#define QUERY_DEV_CAP_RSVD_CQ_OFFSET 0x1a
+#define QUERY_DEV_CAP_MAX_CQ_OFFSET 0x1b
+#define QUERY_DEV_CAP_MAX_MPT_OFFSET 0x1d
+#define QUERY_DEV_CAP_RSVD_EQ_OFFSET 0x1e
+#define QUERY_DEV_CAP_MAX_EQ_OFFSET 0x1f
+#define QUERY_DEV_CAP_RSVD_MTT_OFFSET 0x20
+#define QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET 0x21
+#define QUERY_DEV_CAP_RSVD_MRW_OFFSET 0x22
+#define QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET 0x23
+#define QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET 0x26
+#define QUERY_DEV_CAP_MAX_AV_OFFSET 0x27
+#define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET 0x29
+#define QUERY_DEV_CAP_MAX_RES_QP_OFFSET 0x2b
+#define QUERY_DEV_CAP_MAX_GSO_OFFSET 0x2d
+#define QUERY_DEV_CAP_RSS_OFFSET 0x2e
+#define QUERY_DEV_CAP_MAX_RDMA_OFFSET 0x2f
+#define QUERY_DEV_CAP_RSZ_SRQ_OFFSET 0x33
+#define QUERY_DEV_CAP_PORT_BEACON_OFFSET 0x34
+#define QUERY_DEV_CAP_ACK_DELAY_OFFSET 0x35
+#define QUERY_DEV_CAP_MTU_WIDTH_OFFSET 0x36
+#define QUERY_DEV_CAP_VL_PORT_OFFSET 0x37
+#define QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET 0x38
+#define QUERY_DEV_CAP_MAX_GID_OFFSET 0x3b
+#define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET 0x3c
+#define QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET 0x3e
+#define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f
+#define QUERY_DEV_CAP_EXT_FLAGS_OFFSET 0x40
+#define QUERY_DEV_CAP_WOL_OFFSET 0x43
+#define QUERY_DEV_CAP_FLAGS_OFFSET 0x44
+#define QUERY_DEV_CAP_RSVD_UAR_OFFSET 0x48
+#define QUERY_DEV_CAP_UAR_SZ_OFFSET 0x49
+#define QUERY_DEV_CAP_PAGE_SZ_OFFSET 0x4b
+#define QUERY_DEV_CAP_BF_OFFSET 0x4c
+#define QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET 0x4d
+#define QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET 0x4e
+#define QUERY_DEV_CAP_LOG_MAX_BF_PAGES_OFFSET 0x4f
+#define QUERY_DEV_CAP_MAX_SG_SQ_OFFSET 0x51
+#define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET 0x52
+#define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET 0x55
+#define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET 0x56
+#define QUERY_DEV_CAP_USER_MAC_EN_OFFSET 0x5C
+#define QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET 0x5D
+#define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET 0x61
+#define QUERY_DEV_CAP_RSVD_MCG_OFFSET 0x62
+#define QUERY_DEV_CAP_MAX_MCG_OFFSET 0x63
+#define QUERY_DEV_CAP_RSVD_PD_OFFSET 0x64
+#define QUERY_DEV_CAP_MAX_PD_OFFSET 0x65
+#define QUERY_DEV_CAP_RSVD_XRC_OFFSET 0x66
+#define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67
+#define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET 0x68
+#define QUERY_DEV_CAP_PORT_FLOWSTATS_COUNTERS_OFFSET 0x70
+#define QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET 0x70
+#define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET 0x74
+#define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76
+#define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77
+#define QUERY_DEV_CAP_SL2VL_EVENT_OFFSET 0x78
+#define QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE 0x7a
+#define QUERY_DEV_CAP_ECN_QCN_VER_OFFSET 0x7b
+#define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80
+#define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82
+#define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84
+#define QUERY_DEV_CAP_ALTC_ENTRY_SZ_OFFSET 0x86
+#define QUERY_DEV_CAP_EQC_ENTRY_SZ_OFFSET 0x88
+#define QUERY_DEV_CAP_CQC_ENTRY_SZ_OFFSET 0x8a
+#define QUERY_DEV_CAP_SRQ_ENTRY_SZ_OFFSET 0x8c
+#define QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET 0x8e
+#define QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET 0x90
+#define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET 0x92
+#define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x94
+#define QUERY_DEV_CAP_CONFIG_DEV_OFFSET 0x94
+#define QUERY_DEV_CAP_PHV_EN_OFFSET 0x96
+#define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98
+#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0
+#define QUERY_DEV_CAP_ETH_BACKPL_OFFSET 0x9c
+#define QUERY_DEV_CAP_DIAG_RPRT_PER_PORT 0x9c
+#define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d
+#define QUERY_DEV_CAP_VXLAN 0x9e
+#define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0
+#define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_BASE_OFFSET 0xa8
+#define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET 0xac
+#define QUERY_DEV_CAP_MAP_CLOCK_TO_USER 0xc1
+#define QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET 0xcc
+#define QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET 0xd0
+#define QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET 0xd2
+#define QUERY_DEV_CAP_HEALTH_BUFFER_ADDRESS_OFFSET 0xe4
+
+ dev_cap->flags2 = 0;
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (err)
+ goto out;
+
+ if (mlx4_is_mfunc(dev))
+ disable_unsupported_roce_caps(outbox);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAP_CLOCK_TO_USER);
+ dev_cap->map_clock_to_user = field & 0x80;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET);
+ dev_cap->reserved_qps = 1 << (field & 0xf);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET);
+ dev_cap->max_qps = 1 << (field & 0x1f);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_SRQ_OFFSET);
+ dev_cap->reserved_srqs = 1 << (field >> 4);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SRQ_OFFSET);
+ dev_cap->max_srqs = 1 << (field & 0x1f);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_CQ_SZ_OFFSET);
+ dev_cap->max_cq_sz = 1 << field;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_CQ_OFFSET);
+ dev_cap->reserved_cqs = 1 << (field & 0xf);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_CQ_OFFSET);
+ dev_cap->max_cqs = 1 << (field & 0x1f);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET);
+ dev_cap->max_mpts = 1 << (field & 0x3f);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET);
+ dev_cap->reserved_eqs = 1 << (field & 0xf);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET);
+ dev_cap->max_eqs = 1 << (field & 0xf);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET);
+ dev_cap->reserved_mtts = 1 << (field >> 4);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MRW_OFFSET);
+ dev_cap->reserved_mrws = 1 << (field & 0xf);
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET);
+ dev_cap->num_sys_eqs = size & 0xfff;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET);
+ dev_cap->max_requester_per_qp = 1 << (field & 0x3f);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET);
+ dev_cap->max_responder_per_qp = 1 << (field & 0x3f);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GSO_OFFSET);
+ field &= 0x1f;
+ if (!field)
+ dev_cap->max_gso_sz = 0;
+ else
+ dev_cap->max_gso_sz = 1 << field;
+
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_RSS_OFFSET);
+ if (field & 0x20)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS_XOR;
+ if (field & 0x10)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS_TOP;
+ field &= 0xf;
+ if (field) {
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS;
+ dev_cap->max_rss_tbl_sz = 1 << field;
+ } else
+ dev_cap->max_rss_tbl_sz = 0;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RDMA_OFFSET);
+ dev_cap->max_rdma_global = 1 << (field & 0x3f);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_ACK_DELAY_OFFSET);
+ dev_cap->local_ca_ack_delay = field & 0x1f;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET);
+ dev_cap->num_ports = field & 0xf;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET);
+ dev_cap->max_msg_sz = 1 << (field & 0x1f);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_FLOWSTATS_COUNTERS_OFFSET);
+ if (field & 0x10)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET);
+ if (field & 0x80)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN;
+ dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f;
+ if (field & 0x20)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_BEACON_OFFSET);
+ if (field & 0x80)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_BEACON;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET);
+ if (field & 0x80)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET);
+ dev_cap->fs_max_num_qp_per_entry = field;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_SL2VL_EVENT_OFFSET);
+ if (field & (1 << 5))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET);
+ if (field & 0x1)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QCN;
+ MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET);
+ dev_cap->stat_rate_support = stat_rate;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET);
+ if (field & 0x80)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_TS;
+ MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+ MLX4_GET(flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET);
+ dev_cap->flags = flags | (u64)ext_flags << 32;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_WOL_OFFSET);
+ dev_cap->wol_port[1] = !!(field & 0x20);
+ dev_cap->wol_port[2] = !!(field & 0x40);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET);
+ dev_cap->reserved_uars = field >> 4;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_UAR_SZ_OFFSET);
+ dev_cap->uar_size = 1 << ((field & 0x3f) + 20);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_PAGE_SZ_OFFSET);
+ dev_cap->min_page_sz = 1 << field;
+
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_BF_OFFSET);
+ if (field & 0x80) {
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET);
+ dev_cap->bf_reg_size = 1 << (field & 0x1f);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET);
+ if ((1 << (field & 0x3f)) > (PAGE_SIZE / dev_cap->bf_reg_size))
+ field = 3;
+ dev_cap->bf_regs_per_page = 1 << (field & 0x3f);
+ } else {
+ dev_cap->bf_reg_size = 0;
+ }
+
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_SQ_OFFSET);
+ dev_cap->max_sq_sg = field;
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET);
+ dev_cap->max_sq_desc_sz = size;
+
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_USER_MAC_EN_OFFSET);
+ if (field & (1 << 2))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_USER_MAC_EN;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET);
+ if (field & 0x1)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_MCG_OFFSET);
+ dev_cap->max_qp_per_mcg = 1 << field;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MCG_OFFSET);
+ dev_cap->reserved_mgms = field & 0xf;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MCG_OFFSET);
+ dev_cap->max_mcgs = 1 << field;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_PD_OFFSET);
+ dev_cap->reserved_pds = field >> 4;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET);
+ dev_cap->max_pds = 1 << (field & 0x3f);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_XRC_OFFSET);
+ dev_cap->reserved_xrcds = field >> 4;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_XRC_OFFSET);
+ dev_cap->max_xrcds = 1 << (field & 0x1f);
+
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET);
+ dev_cap->rdmarc_entry_sz = size;
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET);
+ dev_cap->qpc_entry_sz = size;
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET);
+ dev_cap->aux_entry_sz = size;
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_ALTC_ENTRY_SZ_OFFSET);
+ dev_cap->altc_entry_sz = size;
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_EQC_ENTRY_SZ_OFFSET);
+ dev_cap->eqc_entry_sz = size;
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_CQC_ENTRY_SZ_OFFSET);
+ dev_cap->cqc_entry_sz = size;
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_SRQ_ENTRY_SZ_OFFSET);
+ dev_cap->srq_entry_sz = size;
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET);
+ dev_cap->cmpt_entry_sz = size;
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET);
+ dev_cap->mtt_entry_sz = size;
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET);
+ dev_cap->dmpt_entry_sz = size;
+
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET);
+ dev_cap->max_srq_sz = 1 << field;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_SZ_OFFSET);
+ dev_cap->max_qp_sz = 1 << field;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_RSZ_SRQ_OFFSET);
+ dev_cap->resize_srq = field & 1;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_RQ_OFFSET);
+ dev_cap->max_rq_sg = field;
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET);
+ dev_cap->max_rq_desc_sz = size;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE);
+ if (field & (1 << 4))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QOS_VPP;
+ if (field & (1 << 5))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL;
+ if (field & (1 << 6))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
+ if (field & (1 << 7))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
+ MLX4_GET(dev_cap->bmme_flags, outbox,
+ QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+ if (dev_cap->bmme_flags & MLX4_FLAG_ROCE_V1_V2)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ROCE_V1_V2;
+ if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
+ if (field & 0x20)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CONFIG_DEV;
+ if (field & (1 << 2))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_PHV_EN_OFFSET);
+ if (field & 0x80)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PHV_EN;
+ if (field & 0x40)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN;
+
+ MLX4_GET(dev_cap->reserved_lkey, outbox,
+ QUERY_DEV_CAP_RSVD_LKEY_OFFSET);
+ MLX4_GET(field32, outbox, QUERY_DEV_CAP_ETH_BACKPL_OFFSET);
+ if (field32 & (1 << 0))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP;
+ if (field32 & (1 << 7))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT;
+ if (field32 & (1 << 8))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW;
+ MLX4_GET(field32, outbox, QUERY_DEV_CAP_DIAG_RPRT_PER_PORT);
+ if (field32 & (1 << 17))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC);
+ if (field & 1<<6)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_VXLAN);
+ if (field & 1<<3)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS;
+ if (field & (1 << 5))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG;
+ MLX4_GET(dev_cap->max_icm_sz, outbox,
+ QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET);
+ if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS)
+ MLX4_GET(dev_cap->max_counters, outbox,
+ QUERY_DEV_CAP_MAX_COUNTERS_OFFSET);
+
+ MLX4_GET(field32, outbox,
+ QUERY_DEV_CAP_MAD_DEMUX_OFFSET);
+ if (field32 & (1 << 0))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_MAD_DEMUX;
+
+ MLX4_GET(dev_cap->dmfs_high_rate_qpn_base, outbox,
+ QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_BASE_OFFSET);
+ dev_cap->dmfs_high_rate_qpn_base &= MGM_QPN_MASK;
+ MLX4_GET(dev_cap->dmfs_high_rate_qpn_range, outbox,
+ QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET);
+ dev_cap->dmfs_high_rate_qpn_range &= MGM_QPN_MASK;
+
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET);
+ dev_cap->rl_caps.num_rates = size;
+ if (dev_cap->rl_caps.num_rates) {
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT;
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET);
+ dev_cap->rl_caps.max_val = size & 0xfff;
+ dev_cap->rl_caps.max_unit = size >> 14;
+ MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET);
+ dev_cap->rl_caps.min_val = size & 0xfff;
+ dev_cap->rl_caps.min_unit = size >> 14;
+ }
+
+ MLX4_GET(dev_cap->health_buffer_addrs, outbox,
+ QUERY_DEV_CAP_HEALTH_BUFFER_ADDRESS_OFFSET);
+
+ MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+ if (field32 & (1 << 16))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
+ if (field32 & (1 << 18))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB;
+ if (field32 & (1 << 19))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_LB_SRC_CHK;
+ if (field32 & (1 << 26))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL;
+ if (field32 & (1 << 20))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FSM;
+ if (field32 & (1 << 21))
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_80_VFS;
+
+ for (i = 1; i <= dev_cap->num_ports; i++) {
+ err = mlx4_QUERY_PORT(dev, i, dev_cap->port_cap + i);
+ if (err)
+ goto out;
+ }
+
+ /*
+ * Each UAR has 4 EQ doorbells; so if a UAR is reserved, then
+ * we can't use any EQs whose doorbell falls on that page,
+ * even if the EQ itself isn't reserved.
+ */
+ if (dev_cap->num_sys_eqs == 0)
+ dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4,
+ dev_cap->reserved_eqs);
+ else
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SYS_EQS;
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+void mlx4_dev_cap_dump(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
+{
+ if (dev_cap->bf_reg_size > 0)
+ mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n",
+ dev_cap->bf_reg_size, dev_cap->bf_regs_per_page);
+ else
+ mlx4_dbg(dev, "BlueFlame not available\n");
+
+ mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n",
+ dev_cap->bmme_flags, dev_cap->reserved_lkey);
+ mlx4_dbg(dev, "Max ICM size %lld MB\n",
+ (unsigned long long) dev_cap->max_icm_sz >> 20);
+ mlx4_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n",
+ dev_cap->max_qps, dev_cap->reserved_qps, dev_cap->qpc_entry_sz);
+ mlx4_dbg(dev, "Max SRQs: %d, reserved SRQs: %d, entry size: %d\n",
+ dev_cap->max_srqs, dev_cap->reserved_srqs, dev_cap->srq_entry_sz);
+ mlx4_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n",
+ dev_cap->max_cqs, dev_cap->reserved_cqs, dev_cap->cqc_entry_sz);
+ mlx4_dbg(dev, "Num sys EQs: %d, max EQs: %d, reserved EQs: %d, entry size: %d\n",
+ dev_cap->num_sys_eqs, dev_cap->max_eqs, dev_cap->reserved_eqs,
+ dev_cap->eqc_entry_sz);
+ mlx4_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n",
+ dev_cap->reserved_mrws, dev_cap->reserved_mtts);
+ mlx4_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n",
+ dev_cap->max_pds, dev_cap->reserved_pds, dev_cap->reserved_uars);
+ mlx4_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n",
+ dev_cap->max_pds, dev_cap->reserved_mgms);
+ mlx4_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n",
+ dev_cap->max_cq_sz, dev_cap->max_qp_sz, dev_cap->max_srq_sz);
+ mlx4_dbg(dev, "Local CA ACK delay: %d, max MTU: %d, port width cap: %d\n",
+ dev_cap->local_ca_ack_delay, 128 << dev_cap->port_cap[1].ib_mtu,
+ dev_cap->port_cap[1].max_port_width);
+ mlx4_dbg(dev, "Max SQ desc size: %d, max SQ S/G: %d\n",
+ dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg);
+ mlx4_dbg(dev, "Max RQ desc size: %d, max RQ S/G: %d\n",
+ dev_cap->max_rq_desc_sz, dev_cap->max_rq_sg);
+ mlx4_dbg(dev, "Max GSO size: %d\n", dev_cap->max_gso_sz);
+ mlx4_dbg(dev, "Max counters: %d\n", dev_cap->max_counters);
+ mlx4_dbg(dev, "Max RSS Table size: %d\n", dev_cap->max_rss_tbl_sz);
+ mlx4_dbg(dev, "DMFS high rate steer QPn base: %d\n",
+ dev_cap->dmfs_high_rate_qpn_base);
+ mlx4_dbg(dev, "DMFS high rate steer QPn range: %d\n",
+ dev_cap->dmfs_high_rate_qpn_range);
+
+ if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT) {
+ struct mlx4_rate_limit_caps *rl_caps = &dev_cap->rl_caps;
+
+ mlx4_dbg(dev, "QP Rate-Limit: #rates %d, unit/val max %d/%d, min %d/%d\n",
+ rl_caps->num_rates, rl_caps->max_unit, rl_caps->max_val,
+ rl_caps->min_unit, rl_caps->min_val);
+ }
+
+ dump_dev_cap_flags(dev, dev_cap->flags);
+ dump_dev_cap_flags2(dev, dev_cap->flags2);
+}
+
+int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_cap)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 *outbox;
+ u8 field;
+ u32 field32;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
+
+ if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+
+ if (err)
+ goto out;
+
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET);
+ port_cap->max_vl = field >> 4;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MTU_WIDTH_OFFSET);
+ port_cap->ib_mtu = field >> 4;
+ port_cap->max_port_width = field & 0xf;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GID_OFFSET);
+ port_cap->max_gids = 1 << (field & 0xf);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PKEY_OFFSET);
+ port_cap->max_pkeys = 1 << (field & 0xf);
+ } else {
+#define QUERY_PORT_SUPPORTED_TYPE_OFFSET 0x00
+#define QUERY_PORT_MTU_OFFSET 0x01
+#define QUERY_PORT_ETH_MTU_OFFSET 0x02
+#define QUERY_PORT_WIDTH_OFFSET 0x06
+#define QUERY_PORT_MAX_GID_PKEY_OFFSET 0x07
+#define QUERY_PORT_MAX_MACVLAN_OFFSET 0x0a
+#define QUERY_PORT_MAX_VL_OFFSET 0x0b
+#define QUERY_PORT_MAC_OFFSET 0x10
+#define QUERY_PORT_TRANS_VENDOR_OFFSET 0x18
+#define QUERY_PORT_WAVELENGTH_OFFSET 0x1c
+#define QUERY_PORT_TRANS_CODE_OFFSET 0x20
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, port, 0, MLX4_CMD_QUERY_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+ if (err)
+ goto out;
+
+ MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET);
+ port_cap->link_state = (field & 0x80) >> 7;
+ port_cap->supported_port_types = field & 3;
+ port_cap->suggested_type = (field >> 3) & 1;
+ port_cap->default_sense = (field >> 4) & 1;
+ port_cap->dmfs_optimized_state = (field >> 5) & 1;
+ MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET);
+ port_cap->ib_mtu = field & 0xf;
+ MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET);
+ port_cap->max_port_width = field & 0xf;
+ MLX4_GET(field, outbox, QUERY_PORT_MAX_GID_PKEY_OFFSET);
+ port_cap->max_gids = 1 << (field >> 4);
+ port_cap->max_pkeys = 1 << (field & 0xf);
+ MLX4_GET(field, outbox, QUERY_PORT_MAX_VL_OFFSET);
+ port_cap->max_vl = field & 0xf;
+ port_cap->max_tc_eth = field >> 4;
+ MLX4_GET(field, outbox, QUERY_PORT_MAX_MACVLAN_OFFSET);
+ port_cap->log_max_macs = field & 0xf;
+ port_cap->log_max_vlans = field >> 4;
+ MLX4_GET(port_cap->eth_mtu, outbox, QUERY_PORT_ETH_MTU_OFFSET);
+ MLX4_GET(port_cap->def_mac, outbox, QUERY_PORT_MAC_OFFSET);
+ MLX4_GET(field32, outbox, QUERY_PORT_TRANS_VENDOR_OFFSET);
+ port_cap->trans_type = field32 >> 24;
+ port_cap->vendor_oui = field32 & 0xffffff;
+ MLX4_GET(port_cap->wavelength, outbox, QUERY_PORT_WAVELENGTH_OFFSET);
+ MLX4_GET(port_cap->trans_code, outbox, QUERY_PORT_TRANS_CODE_OFFSET);
+ }
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+#define DEV_CAP_EXT_2_FLAG_PFC_COUNTERS (1 << 28)
+#define DEV_CAP_EXT_2_FLAG_VLAN_CONTROL (1 << 26)
+#define DEV_CAP_EXT_2_FLAG_80_VFS (1 << 21)
+#define DEV_CAP_EXT_2_FLAG_FSM (1 << 20)
+
+int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ u64 flags;
+ int err = 0;
+ u8 field;
+ u16 field16;
+ u32 bmme_flags, field32;
+ int real_port;
+ int slave_port;
+ int first_port;
+ struct mlx4_active_ports actv_ports;
+
+ err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+
+ disable_unsupported_roce_caps(outbox->buf);
+ /* add port mng change event capability and disable mw type 1
+ * unconditionally to slaves
+ */
+ MLX4_GET(flags, outbox->buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+ flags |= MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV;
+ flags &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW;
+ actv_ports = mlx4_get_active_ports(dev, slave);
+ first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports);
+ for (slave_port = 0, real_port = first_port;
+ real_port < first_port +
+ bitmap_weight(actv_ports.ports, dev->caps.num_ports);
+ ++real_port, ++slave_port) {
+ if (flags & (MLX4_DEV_CAP_FLAG_WOL_PORT1 << real_port))
+ flags |= MLX4_DEV_CAP_FLAG_WOL_PORT1 << slave_port;
+ else
+ flags &= ~(MLX4_DEV_CAP_FLAG_WOL_PORT1 << slave_port);
+ }
+ for (; slave_port < dev->caps.num_ports; ++slave_port)
+ flags &= ~(MLX4_DEV_CAP_FLAG_WOL_PORT1 << slave_port);
+
+ /* Not exposing RSS IP fragments to guests */
+ flags &= ~MLX4_DEV_CAP_FLAG_RSS_IP_FRAG;
+ MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+
+ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_VL_PORT_OFFSET);
+ field &= ~0x0F;
+ field |= bitmap_weight(actv_ports.ports, dev->caps.num_ports) & 0x0F;
+ MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_VL_PORT_OFFSET);
+
+ /* For guests, disable timestamp */
+ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET);
+ field &= 0x7f;
+ MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET);
+
+ /* For guests, disable vxlan tunneling and QoS support */
+ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_VXLAN);
+ field &= 0xd7;
+ MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_VXLAN);
+
+ /* For guests, disable port BEACON */
+ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_PORT_BEACON_OFFSET);
+ field &= 0x7f;
+ MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_PORT_BEACON_OFFSET);
+
+ /* For guests, report Blueflame disabled */
+ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_BF_OFFSET);
+ field &= 0x7f;
+ MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET);
+
+ /* For guests, disable mw type 2 and port remap*/
+ MLX4_GET(bmme_flags, outbox->buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+ bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN;
+ bmme_flags &= ~MLX4_FLAG_PORT_REMAP;
+ MLX4_PUT(outbox->buf, bmme_flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+
+ /* turn off device-managed steering capability if not enabled */
+ if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ MLX4_GET(field, outbox->buf,
+ QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET);
+ field &= 0x7f;
+ MLX4_PUT(outbox->buf, field,
+ QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET);
+ }
+
+ /* turn off ipoib managed steering for guests */
+ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET);
+ field &= ~0x80;
+ MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET);
+
+ /* turn off host side virt features (VST, FSM, etc) for guests */
+ MLX4_GET(field32, outbox->buf, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+ field32 &= ~(DEV_CAP_EXT_2_FLAG_VLAN_CONTROL | DEV_CAP_EXT_2_FLAG_80_VFS |
+ DEV_CAP_EXT_2_FLAG_FSM | DEV_CAP_EXT_2_FLAG_PFC_COUNTERS);
+ MLX4_PUT(outbox->buf, field32, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+
+ /* turn off QCN for guests */
+ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET);
+ field &= 0xfe;
+ MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET);
+
+ /* turn off QP max-rate limiting for guests */
+ field16 = 0;
+ MLX4_PUT(outbox->buf, field16, QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET);
+
+ /* turn off QoS per VF support for guests */
+ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE);
+ field &= 0xef;
+ MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE);
+
+ /* turn off ignore FCS feature for guests */
+ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
+ field &= 0xfb;
+ MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
+
+ return 0;
+}
+
+static void disable_unsupported_roce_caps(void *buf)
+{
+ u32 flags;
+
+ MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+ flags &= ~(1UL << 31);
+ MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+ MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+ flags &= ~(1UL << 24);
+ MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+ MLX4_GET(flags, buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+ flags &= ~(MLX4_FLAG_ROCE_V1_V2);
+ MLX4_PUT(buf, flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+}
+
+int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u64 def_mac;
+ u8 port_type;
+ u16 short_field;
+ int err;
+ int admin_link_state;
+ int port = mlx4_slave_convert_port(dev, slave,
+ vhcr->in_modifier & 0xFF);
+
+#define MLX4_VF_PORT_NO_LINK_SENSE_MASK 0xE0
+#define MLX4_PORT_LINK_UP_MASK 0x80
+#define QUERY_PORT_CUR_MAX_PKEY_OFFSET 0x0c
+#define QUERY_PORT_CUR_MAX_GID_OFFSET 0x0e
+
+ if (port < 0)
+ return -EINVAL;
+
+ /* Protect against untrusted guests: enforce that this is the
+ * QUERY_PORT general query.
+ */
+ if (vhcr->op_modifier || vhcr->in_modifier & ~0xFF)
+ return -EINVAL;
+
+ vhcr->in_modifier = port;
+
+ err = mlx4_cmd_box(dev, 0, outbox->dma, vhcr->in_modifier, 0,
+ MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+
+ if (!err && dev->caps.function != slave) {
+ def_mac = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac;
+ MLX4_PUT(outbox->buf, def_mac, QUERY_PORT_MAC_OFFSET);
+
+ /* get port type - currently only eth is enabled */
+ MLX4_GET(port_type, outbox->buf,
+ QUERY_PORT_SUPPORTED_TYPE_OFFSET);
+
+ /* No link sensing allowed */
+ port_type &= MLX4_VF_PORT_NO_LINK_SENSE_MASK;
+ /* set port type to currently operating port type */
+ port_type |= (dev->caps.port_type[vhcr->in_modifier] & 0x3);
+
+ admin_link_state = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.link_state;
+ if (IFLA_VF_LINK_STATE_ENABLE == admin_link_state)
+ port_type |= MLX4_PORT_LINK_UP_MASK;
+ else if (IFLA_VF_LINK_STATE_DISABLE == admin_link_state)
+ port_type &= ~MLX4_PORT_LINK_UP_MASK;
+ else if (IFLA_VF_LINK_STATE_AUTO == admin_link_state && mlx4_is_bonded(dev)) {
+ int other_port = (port == 1) ? 2 : 1;
+ struct mlx4_port_cap port_cap;
+
+ err = mlx4_QUERY_PORT(dev, other_port, &port_cap);
+ if (err)
+ goto out;
+ port_type |= (port_cap.link_state << 7);
+ }
+
+ MLX4_PUT(outbox->buf, port_type,
+ QUERY_PORT_SUPPORTED_TYPE_OFFSET);
+
+ if (dev->caps.port_type[vhcr->in_modifier] == MLX4_PORT_TYPE_ETH)
+ short_field = mlx4_get_slave_num_gids(dev, slave, port);
+ else
+ short_field = 1; /* slave max gids */
+ MLX4_PUT(outbox->buf, short_field,
+ QUERY_PORT_CUR_MAX_GID_OFFSET);
+
+ short_field = dev->caps.pkey_table_len[vhcr->in_modifier];
+ MLX4_PUT(outbox->buf, short_field,
+ QUERY_PORT_CUR_MAX_PKEY_OFFSET);
+ }
+out:
+ return err;
+}
+
+int mlx4_get_slave_pkey_gid_tbl_len(struct mlx4_dev *dev, u8 port,
+ int *gid_tbl_len, int *pkey_tbl_len)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 *outbox;
+ u16 field;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, port, 0,
+ MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ goto out;
+
+ outbox = mailbox->buf;
+
+ MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_GID_OFFSET);
+ *gid_tbl_len = field;
+
+ MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_PKEY_OFFSET);
+ *pkey_tbl_len = field;
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_get_slave_pkey_gid_tbl_len);
+
+int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_icm_iter iter;
+ __be64 *pages;
+ int lg;
+ int nent = 0;
+ int i;
+ int err = 0;
+ int ts = 0, tc = 0;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ pages = mailbox->buf;
+
+ for (mlx4_icm_first(icm, &iter);
+ !mlx4_icm_last(&iter);
+ mlx4_icm_next(&iter)) {
+ /*
+ * We have to pass pages that are aligned to their
+ * size, so find the least significant 1 in the
+ * address or size and use that as our log2 size.
+ */
+ lg = ffs(mlx4_icm_addr(&iter) | mlx4_icm_size(&iter)) - 1;
+ if (lg < MLX4_ICM_PAGE_SHIFT) {
+ mlx4_warn(dev, "Got FW area not aligned to %d (%llx/%lx)\n",
+ MLX4_ICM_PAGE_SIZE,
+ (unsigned long long) mlx4_icm_addr(&iter),
+ mlx4_icm_size(&iter));
+ err = -EINVAL;
+ goto out;
+ }
+
+ for (i = 0; i < mlx4_icm_size(&iter) >> lg; ++i) {
+ if (virt != -1) {
+ pages[nent * 2] = cpu_to_be64(virt);
+ virt += 1ULL << lg;
+ }
+
+ pages[nent * 2 + 1] =
+ cpu_to_be64((mlx4_icm_addr(&iter) + (i << lg)) |
+ (lg - MLX4_ICM_PAGE_SHIFT));
+ ts += 1 << (lg - 10);
+ ++tc;
+
+ if (++nent == MLX4_MAILBOX_SIZE / 16) {
+ err = mlx4_cmd(dev, mailbox->dma, nent, 0, op,
+ MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+ if (err)
+ goto out;
+ nent = 0;
+ }
+ }
+ }
+
+ if (nent)
+ err = mlx4_cmd(dev, mailbox->dma, nent, 0, op,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+ if (err)
+ goto out;
+
+ switch (op) {
+ case MLX4_CMD_MAP_FA:
+ mlx4_dbg(dev, "Mapped %d chunks/%d KB for FW\n", tc, ts);
+ break;
+ case MLX4_CMD_MAP_ICM_AUX:
+ mlx4_dbg(dev, "Mapped %d chunks/%d KB for ICM aux\n", tc, ts);
+ break;
+ case MLX4_CMD_MAP_ICM:
+ mlx4_dbg(dev, "Mapped %d chunks/%d KB at %llx for ICM\n",
+ tc, ts, (unsigned long long) virt - (ts << 10));
+ break;
+ }
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm)
+{
+ return mlx4_map_cmd(dev, MLX4_CMD_MAP_FA, icm, -1);
+}
+
+int mlx4_UNMAP_FA(struct mlx4_dev *dev)
+{
+ return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_FA,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+}
+
+
+int mlx4_RUN_FW(struct mlx4_dev *dev)
+{
+ return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_RUN_FW,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+}
+
+int mlx4_QUERY_FW(struct mlx4_dev *dev)
+{
+ struct mlx4_fw *fw = &mlx4_priv(dev)->fw;
+ struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 *outbox;
+ int err = 0;
+ u64 fw_ver;
+ u16 cmd_if_rev;
+ u8 lg;
+
+#define QUERY_FW_OUT_SIZE 0x100
+#define QUERY_FW_VER_OFFSET 0x00
+#define QUERY_FW_PPF_ID 0x09
+#define QUERY_FW_CMD_IF_REV_OFFSET 0x0a
+#define QUERY_FW_MAX_CMD_OFFSET 0x0f
+#define QUERY_FW_ERR_START_OFFSET 0x30
+#define QUERY_FW_ERR_SIZE_OFFSET 0x38
+#define QUERY_FW_ERR_BAR_OFFSET 0x3c
+
+#define QUERY_FW_SIZE_OFFSET 0x00
+#define QUERY_FW_CLR_INT_BASE_OFFSET 0x20
+#define QUERY_FW_CLR_INT_BAR_OFFSET 0x28
+
+#define QUERY_FW_COMM_BASE_OFFSET 0x40
+#define QUERY_FW_COMM_BAR_OFFSET 0x48
+
+#define QUERY_FW_CLOCK_OFFSET 0x50
+#define QUERY_FW_CLOCK_BAR 0x58
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FW,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (err)
+ goto out;
+
+ MLX4_GET(fw_ver, outbox, QUERY_FW_VER_OFFSET);
+ /*
+ * FW subminor version is at more significant bits than minor
+ * version, so swap here.
+ */
+ dev->caps.fw_ver = (fw_ver & 0xffff00000000ull) |
+ ((fw_ver & 0xffff0000ull) >> 16) |
+ ((fw_ver & 0x0000ffffull) << 16);
+
+ MLX4_GET(lg, outbox, QUERY_FW_PPF_ID);
+ dev->caps.function = lg;
+
+ if (mlx4_is_slave(dev))
+ goto out;
+
+
+ MLX4_GET(cmd_if_rev, outbox, QUERY_FW_CMD_IF_REV_OFFSET);
+ if (cmd_if_rev < MLX4_COMMAND_INTERFACE_MIN_REV ||
+ cmd_if_rev > MLX4_COMMAND_INTERFACE_MAX_REV) {
+ mlx4_err(dev, "Installed FW has unsupported command interface revision %d\n",
+ cmd_if_rev);
+ mlx4_err(dev, "(Installed FW version is %d.%d.%03d)\n",
+ (int) (dev->caps.fw_ver >> 32),
+ (int) (dev->caps.fw_ver >> 16) & 0xffff,
+ (int) dev->caps.fw_ver & 0xffff);
+ mlx4_err(dev, "This driver version supports only revisions %d to %d\n",
+ MLX4_COMMAND_INTERFACE_MIN_REV, MLX4_COMMAND_INTERFACE_MAX_REV);
+ err = -ENODEV;
+ goto out;
+ }
+
+ if (cmd_if_rev < MLX4_COMMAND_INTERFACE_NEW_PORT_CMDS)
+ dev->flags |= MLX4_FLAG_OLD_PORT_CMDS;
+
+ MLX4_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET);
+ cmd->max_cmds = 1 << lg;
+
+ mlx4_dbg(dev, "FW version %d.%d.%03d (cmd intf rev %d), max commands %d\n",
+ (int) (dev->caps.fw_ver >> 32),
+ (int) (dev->caps.fw_ver >> 16) & 0xffff,
+ (int) dev->caps.fw_ver & 0xffff,
+ cmd_if_rev, cmd->max_cmds);
+
+ MLX4_GET(fw->catas_offset, outbox, QUERY_FW_ERR_START_OFFSET);
+ MLX4_GET(fw->catas_size, outbox, QUERY_FW_ERR_SIZE_OFFSET);
+ MLX4_GET(fw->catas_bar, outbox, QUERY_FW_ERR_BAR_OFFSET);
+ fw->catas_bar = (fw->catas_bar >> 6) * 2;
+
+ mlx4_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x, BAR %d\n",
+ (unsigned long long) fw->catas_offset, fw->catas_size, fw->catas_bar);
+
+ MLX4_GET(fw->fw_pages, outbox, QUERY_FW_SIZE_OFFSET);
+ MLX4_GET(fw->clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET);
+ MLX4_GET(fw->clr_int_bar, outbox, QUERY_FW_CLR_INT_BAR_OFFSET);
+ fw->clr_int_bar = (fw->clr_int_bar >> 6) * 2;
+
+ MLX4_GET(fw->comm_base, outbox, QUERY_FW_COMM_BASE_OFFSET);
+ MLX4_GET(fw->comm_bar, outbox, QUERY_FW_COMM_BAR_OFFSET);
+ fw->comm_bar = (fw->comm_bar >> 6) * 2;
+ mlx4_dbg(dev, "Communication vector bar:%d offset:0x%llx\n",
+ fw->comm_bar, fw->comm_base);
+ mlx4_dbg(dev, "FW size %d KB\n", fw->fw_pages >> 2);
+
+ MLX4_GET(fw->clock_offset, outbox, QUERY_FW_CLOCK_OFFSET);
+ MLX4_GET(fw->clock_bar, outbox, QUERY_FW_CLOCK_BAR);
+ fw->clock_bar = (fw->clock_bar >> 6) * 2;
+ mlx4_dbg(dev, "Internal clock bar:%d offset:0x%llx\n",
+ fw->clock_bar, fw->clock_offset);
+
+ /*
+ * Round up number of system pages needed in case
+ * MLX4_ICM_PAGE_SIZE < PAGE_SIZE.
+ */
+ fw->fw_pages =
+ ALIGN(fw->fw_pages, PAGE_SIZE / MLX4_ICM_PAGE_SIZE) >>
+ (PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT);
+
+ mlx4_dbg(dev, "Clear int @ %llx, BAR %d\n",
+ (unsigned long long) fw->clr_int_base, fw->clr_int_bar);
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+int mlx4_QUERY_FW_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ u8 *outbuf;
+ int err;
+
+ outbuf = outbox->buf;
+ err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_FW,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+
+ /* for slaves, set pci PPF ID to invalid and zero out everything
+ * else except FW version */
+ outbuf[0] = outbuf[1] = 0;
+ memset(&outbuf[8], 0, QUERY_FW_OUT_SIZE - 8);
+ outbuf[QUERY_FW_PPF_ID] = MLX4_INVALID_SLAVE_ID;
+
+ return 0;
+}
+
+static void get_board_id(void *vsd, char *board_id)
+{
+ int i;
+
+#define VSD_OFFSET_SIG1 0x00
+#define VSD_OFFSET_SIG2 0xde
+#define VSD_OFFSET_MLX_BOARD_ID 0xd0
+#define VSD_OFFSET_TS_BOARD_ID 0x20
+
+#define VSD_SIGNATURE_TOPSPIN 0x5ad
+
+ memset(board_id, 0, MLX4_BOARD_ID_LEN);
+
+ if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN &&
+ be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) {
+ strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MLX4_BOARD_ID_LEN);
+ } else {
+ /*
+ * The board ID is a string but the firmware byte
+ * swaps each 4-byte word before passing it back to
+ * us. Therefore we need to swab it before printing.
+ */
+ u32 *bid_u32 = (u32 *)board_id;
+
+ for (i = 0; i < 4; ++i) {
+ u32 *addr;
+ u32 val;
+
+ addr = (u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4);
+ val = get_unaligned(addr);
+ val = swab32(val);
+ put_unaligned(val, &bid_u32[i]);
+ }
+ }
+}
+
+int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 *outbox;
+ int err;
+
+#define QUERY_ADAPTER_OUT_SIZE 0x100
+#define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10
+#define QUERY_ADAPTER_VSD_OFFSET 0x20
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_ADAPTER,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (err)
+ goto out;
+
+ MLX4_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET);
+
+ get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4,
+ adapter->board_id);
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ __be32 *inbox;
+ int err;
+ static const u8 a0_dmfs_hw_steering[] = {
+ [MLX4_STEERING_DMFS_A0_DEFAULT] = 0,
+ [MLX4_STEERING_DMFS_A0_DYNAMIC] = 1,
+ [MLX4_STEERING_DMFS_A0_STATIC] = 2,
+ [MLX4_STEERING_DMFS_A0_DISABLE] = 3
+ };
+
+#define INIT_HCA_IN_SIZE 0x200
+#define INIT_HCA_VERSION_OFFSET 0x000
+#define INIT_HCA_VERSION 2
+#define INIT_HCA_VXLAN_OFFSET 0x0c
+#define INIT_HCA_CACHELINE_SZ_OFFSET 0x0e
+#define INIT_HCA_FLAGS_OFFSET 0x014
+#define INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET 0x018
+#define INIT_HCA_QPC_OFFSET 0x020
+#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10)
+#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17)
+#define INIT_HCA_SRQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x28)
+#define INIT_HCA_LOG_SRQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x2f)
+#define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30)
+#define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37)
+#define INIT_HCA_EQE_CQE_OFFSETS (INIT_HCA_QPC_OFFSET + 0x38)
+#define INIT_HCA_EQE_CQE_STRIDE_OFFSET (INIT_HCA_QPC_OFFSET + 0x3b)
+#define INIT_HCA_ALTC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40)
+#define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50)
+#define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60)
+#define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67)
+#define INIT_HCA_NUM_SYS_EQS_OFFSET (INIT_HCA_QPC_OFFSET + 0x6a)
+#define INIT_HCA_RDMARC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70)
+#define INIT_HCA_LOG_RD_OFFSET (INIT_HCA_QPC_OFFSET + 0x77)
+#define INIT_HCA_MCAST_OFFSET 0x0c0
+#define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00)
+#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x13)
+#define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x17)
+#define INIT_HCA_UC_STEERING_OFFSET (INIT_HCA_MCAST_OFFSET + 0x18)
+#define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b)
+#define INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN 0x6
+#define INIT_HCA_DRIVER_VERSION_OFFSET 0x140
+#define INIT_HCA_DRIVER_VERSION_SZ 0x40
+#define INIT_HCA_FS_PARAM_OFFSET 0x1d0
+#define INIT_HCA_FS_BASE_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x00)
+#define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x13)
+#define INIT_HCA_FS_A0_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x18)
+#define INIT_HCA_FS_LOG_TABLE_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x1b)
+#define INIT_HCA_FS_ETH_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x21)
+#define INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x22)
+#define INIT_HCA_FS_IB_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x25)
+#define INIT_HCA_FS_IB_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x26)
+#define INIT_HCA_TPT_OFFSET 0x0f0
+#define INIT_HCA_DMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00)
+#define INIT_HCA_TPT_MW_OFFSET (INIT_HCA_TPT_OFFSET + 0x08)
+#define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b)
+#define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10)
+#define INIT_HCA_CMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x18)
+#define INIT_HCA_UAR_OFFSET 0x120
+#define INIT_HCA_LOG_UAR_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0a)
+#define INIT_HCA_UAR_PAGE_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0b)
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
+
+ *((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION;
+
+ *((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) =
+ ((ilog2(cache_line_size()) - 4) << 5) | (1 << 4);
+
+#if defined(__LITTLE_ENDIAN)
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
+#elif defined(__BIG_ENDIAN)
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1);
+#else
+#error Host endianness not defined
+#endif
+ /* Check port for UD address vector: */
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1);
+
+ /* Enable IPoIB checksumming if we can: */
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3);
+
+ /* Enable QoS support if module parameter set */
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG && enable_qos)
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 2);
+
+ /* enable counters */
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4);
+
+ /* Enable RSS spread to fragmented IP packets when supported */
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_RSS_IP_FRAG)
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 13);
+
+ /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) {
+ *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29);
+ dev->caps.eqe_size = 64;
+ dev->caps.eqe_factor = 1;
+ } else {
+ dev->caps.eqe_size = 32;
+ dev->caps.eqe_factor = 0;
+ }
+
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) {
+ *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30);
+ dev->caps.cqe_size = 64;
+ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
+ } else {
+ dev->caps.cqe_size = 32;
+ }
+
+ /* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
+ if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) &&
+ (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE)) {
+ dev->caps.eqe_size = cache_line_size();
+ dev->caps.cqe_size = cache_line_size();
+ dev->caps.eqe_factor = 0;
+ MLX4_PUT(inbox, (u8)((ilog2(dev->caps.eqe_size) - 5) << 4 |
+ (ilog2(dev->caps.eqe_size) - 5)),
+ INIT_HCA_EQE_CQE_STRIDE_OFFSET);
+
+ /* User still need to know to support CQE > 32B */
+ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
+ }
+
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT)
+ *(inbox + INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET / 4) |= cpu_to_be32(1 << 31);
+
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW) {
+ u8 *dst = (u8 *)(inbox + INIT_HCA_DRIVER_VERSION_OFFSET / 4);
+
+ strncpy(dst, DRV_NAME_FOR_FW, INIT_HCA_DRIVER_VERSION_SZ - 1);
+ mlx4_dbg(dev, "Reporting Driver Version to FW: %s\n", dst);
+ }
+
+ /* QPC/EEC/CQC/EQC/RDMARC attributes */
+
+ MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET);
+ MLX4_PUT(inbox, param->log_num_qps, INIT_HCA_LOG_QP_OFFSET);
+ MLX4_PUT(inbox, param->srqc_base, INIT_HCA_SRQC_BASE_OFFSET);
+ MLX4_PUT(inbox, param->log_num_srqs, INIT_HCA_LOG_SRQ_OFFSET);
+ MLX4_PUT(inbox, param->cqc_base, INIT_HCA_CQC_BASE_OFFSET);
+ MLX4_PUT(inbox, param->log_num_cqs, INIT_HCA_LOG_CQ_OFFSET);
+ MLX4_PUT(inbox, param->altc_base, INIT_HCA_ALTC_BASE_OFFSET);
+ MLX4_PUT(inbox, param->auxc_base, INIT_HCA_AUXC_BASE_OFFSET);
+ MLX4_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET);
+ MLX4_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET);
+ MLX4_PUT(inbox, param->num_sys_eqs, INIT_HCA_NUM_SYS_EQS_OFFSET);
+ MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET);
+ MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET);
+
+ /* steering attributes */
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |=
+ cpu_to_be32(1 <<
+ INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN);
+
+ MLX4_PUT(inbox, param->mc_base, INIT_HCA_FS_BASE_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_entry_sz,
+ INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_table_sz,
+ INIT_HCA_FS_LOG_TABLE_SZ_OFFSET);
+ /* Enable Ethernet flow steering
+ * with udp unicast and tcp unicast
+ */
+ if (dev->caps.dmfs_high_steer_mode !=
+ MLX4_STEERING_DMFS_A0_STATIC)
+ MLX4_PUT(inbox,
+ (u8)(MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN),
+ INIT_HCA_FS_ETH_BITS_OFFSET);
+ MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR,
+ INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET);
+ /* Enable IPoIB flow steering
+ * with udp unicast and tcp unicast
+ */
+ MLX4_PUT(inbox, (u8) (MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN),
+ INIT_HCA_FS_IB_BITS_OFFSET);
+ MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR,
+ INIT_HCA_FS_IB_NUM_ADDRS_OFFSET);
+
+ if (dev->caps.dmfs_high_steer_mode !=
+ MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
+ MLX4_PUT(inbox,
+ ((u8)(a0_dmfs_hw_steering[dev->caps.dmfs_high_steer_mode]
+ << 6)),
+ INIT_HCA_FS_A0_OFFSET);
+ } else {
+ MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_entry_sz,
+ INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_hash_sz,
+ INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_table_sz,
+ INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_B0)
+ MLX4_PUT(inbox, (u8) (1 << 3),
+ INIT_HCA_UC_STEERING_OFFSET);
+ }
+
+ /* TPT attributes */
+
+ MLX4_PUT(inbox, param->dmpt_base, INIT_HCA_DMPT_BASE_OFFSET);
+ MLX4_PUT(inbox, param->mw_enabled, INIT_HCA_TPT_MW_OFFSET);
+ MLX4_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET);
+ MLX4_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET);
+ MLX4_PUT(inbox, param->cmpt_base, INIT_HCA_CMPT_BASE_OFFSET);
+
+ /* UAR attributes */
+
+ MLX4_PUT(inbox, param->uar_page_sz, INIT_HCA_UAR_PAGE_SZ_OFFSET);
+ MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET);
+
+ /* set parser VXLAN attributes */
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS) {
+ u8 parser_params = 0;
+ MLX4_PUT(inbox, parser_params, INIT_HCA_VXLAN_OFFSET);
+ }
+
+ err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA,
+ MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+
+ if (err)
+ mlx4_err(dev, "INIT_HCA returns %d\n", err);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+int mlx4_QUERY_HCA(struct mlx4_dev *dev,
+ struct mlx4_init_hca_param *param)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ __be32 *outbox;
+ u64 qword_field;
+ u32 dword_field;
+ u16 word_field;
+ u8 byte_field;
+ int err;
+ static const u8 a0_dmfs_query_hw_steering[] = {
+ [0] = MLX4_STEERING_DMFS_A0_DEFAULT,
+ [1] = MLX4_STEERING_DMFS_A0_DYNAMIC,
+ [2] = MLX4_STEERING_DMFS_A0_STATIC,
+ [3] = MLX4_STEERING_DMFS_A0_DISABLE
+ };
+
+#define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04
+#define QUERY_HCA_CORE_CLOCK_OFFSET 0x0c
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0,
+ MLX4_CMD_QUERY_HCA,
+ MLX4_CMD_TIME_CLASS_B,
+ !mlx4_is_slave(dev));
+ if (err)
+ goto out;
+
+ MLX4_GET(param->global_caps, outbox, QUERY_HCA_GLOBAL_CAPS_OFFSET);
+ MLX4_GET(param->hca_core_clock, outbox, QUERY_HCA_CORE_CLOCK_OFFSET);
+
+ /* QPC/EEC/CQC/EQC/RDMARC attributes */
+
+ MLX4_GET(qword_field, outbox, INIT_HCA_QPC_BASE_OFFSET);
+ param->qpc_base = qword_field & ~((u64)0x1f);
+ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_QP_OFFSET);
+ param->log_num_qps = byte_field & 0x1f;
+ MLX4_GET(qword_field, outbox, INIT_HCA_SRQC_BASE_OFFSET);
+ param->srqc_base = qword_field & ~((u64)0x1f);
+ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_SRQ_OFFSET);
+ param->log_num_srqs = byte_field & 0x1f;
+ MLX4_GET(qword_field, outbox, INIT_HCA_CQC_BASE_OFFSET);
+ param->cqc_base = qword_field & ~((u64)0x1f);
+ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_CQ_OFFSET);
+ param->log_num_cqs = byte_field & 0x1f;
+ MLX4_GET(qword_field, outbox, INIT_HCA_ALTC_BASE_OFFSET);
+ param->altc_base = qword_field;
+ MLX4_GET(qword_field, outbox, INIT_HCA_AUXC_BASE_OFFSET);
+ param->auxc_base = qword_field;
+ MLX4_GET(qword_field, outbox, INIT_HCA_EQC_BASE_OFFSET);
+ param->eqc_base = qword_field & ~((u64)0x1f);
+ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_EQ_OFFSET);
+ param->log_num_eqs = byte_field & 0x1f;
+ MLX4_GET(word_field, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET);
+ param->num_sys_eqs = word_field & 0xfff;
+ MLX4_GET(qword_field, outbox, INIT_HCA_RDMARC_BASE_OFFSET);
+ param->rdmarc_base = qword_field & ~((u64)0x1f);
+ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_RD_OFFSET);
+ param->log_rd_per_qp = byte_field & 0x7;
+
+ MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET);
+ if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) {
+ param->steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
+ } else {
+ MLX4_GET(byte_field, outbox, INIT_HCA_UC_STEERING_OFFSET);
+ if (byte_field & 0x8)
+ param->steering_mode = MLX4_STEERING_MODE_B0;
+ else
+ param->steering_mode = MLX4_STEERING_MODE_A0;
+ }
+
+ if (dword_field & (1 << 13))
+ param->rss_ip_frags = 1;
+
+ /* steering attributes */
+ if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET);
+ MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET);
+ param->log_mc_entry_sz = byte_field & 0x1f;
+ MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_TABLE_SZ_OFFSET);
+ param->log_mc_table_sz = byte_field & 0x1f;
+ MLX4_GET(byte_field, outbox, INIT_HCA_FS_A0_OFFSET);
+ param->dmfs_high_steer_mode =
+ a0_dmfs_query_hw_steering[(byte_field >> 6) & 3];
+ } else {
+ MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET);
+ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
+ param->log_mc_entry_sz = byte_field & 0x1f;
+ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
+ param->log_mc_hash_sz = byte_field & 0x1f;
+ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+ param->log_mc_table_sz = byte_field & 0x1f;
+ }
+
+ /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
+ MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_OFFSETS);
+ if (byte_field & 0x20) /* 64-bytes eqe enabled */
+ param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
+ if (byte_field & 0x40) /* 64-bytes cqe enabled */
+ param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
+
+ /* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
+ MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_STRIDE_OFFSET);
+ if (byte_field) {
+ param->dev_cap_enabled |= MLX4_DEV_CAP_EQE_STRIDE_ENABLED;
+ param->dev_cap_enabled |= MLX4_DEV_CAP_CQE_STRIDE_ENABLED;
+ param->cqe_size = 1 << ((byte_field &
+ MLX4_CQE_SIZE_MASK_STRIDE) + 5);
+ param->eqe_size = 1 << (((byte_field &
+ MLX4_EQE_SIZE_MASK_STRIDE) >> 4) + 5);
+ }
+
+ /* TPT attributes */
+
+ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET);
+ MLX4_GET(byte_field, outbox, INIT_HCA_TPT_MW_OFFSET);
+ param->mw_enabled = byte_field >> 7;
+ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET);
+ param->log_mpt_sz = byte_field & 0x3f;
+ MLX4_GET(param->mtt_base, outbox, INIT_HCA_MTT_BASE_OFFSET);
+ MLX4_GET(param->cmpt_base, outbox, INIT_HCA_CMPT_BASE_OFFSET);
+
+ /* UAR attributes */
+
+ MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET);
+ MLX4_GET(byte_field, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET);
+ param->log_uar_sz = byte_field & 0xf;
+
+ /* phv_check enable */
+ MLX4_GET(byte_field, outbox, INIT_HCA_CACHELINE_SZ_OFFSET);
+ if (byte_field & 0x2)
+ param->phv_check_en = 1;
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ return err;
+}
+
+static int mlx4_hca_core_clock_update(struct mlx4_dev *dev)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ __be32 *outbox;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ mlx4_warn(dev, "hca_core_clock mailbox allocation failed\n");
+ return PTR_ERR(mailbox);
+ }
+ outbox = mailbox->buf;
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0,
+ MLX4_CMD_QUERY_HCA,
+ MLX4_CMD_TIME_CLASS_B,
+ !mlx4_is_slave(dev));
+ if (err) {
+ mlx4_warn(dev, "hca_core_clock update failed\n");
+ goto out;
+ }
+
+ MLX4_GET(dev->caps.hca_core_clock, outbox, QUERY_HCA_CORE_CLOCK_OFFSET);
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ return err;
+}
+
+/* for IB-type ports only in SRIOV mode. Checks that both proxy QP0
+ * and real QP0 are active, so that the paravirtualized QP0 is ready
+ * to operate */
+static int check_qp0_state(struct mlx4_dev *dev, int function, int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ /* irrelevant if not infiniband */
+ if (priv->mfunc.master.qp0_state[port].proxy_qp0_active &&
+ priv->mfunc.master.qp0_state[port].qp0_active)
+ return 1;
+ return 0;
+}
+
+int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int port = mlx4_slave_convert_port(dev, slave, vhcr->in_modifier);
+ int err;
+
+ if (port < 0)
+ return -EINVAL;
+
+ if (priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port))
+ return 0;
+
+ if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) {
+ /* Enable port only if it was previously disabled */
+ if (!priv->mfunc.master.init_port_ref[port]) {
+ err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+ }
+ priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port);
+ } else {
+ if (slave == mlx4_master_func_num(dev)) {
+ if (check_qp0_state(dev, slave, port) &&
+ !priv->mfunc.master.qp0_state[port].port_active) {
+ err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+ priv->mfunc.master.qp0_state[port].port_active = 1;
+ priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port);
+ }
+ } else
+ priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port);
+ }
+ ++priv->mfunc.master.init_port_ref[port];
+ return 0;
+}
+
+int mlx4_INIT_PORT(struct mlx4_dev *dev, int port)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 *inbox;
+ int err;
+ u32 flags;
+ u16 field;
+
+ if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
+#define INIT_PORT_IN_SIZE 256
+#define INIT_PORT_FLAGS_OFFSET 0x00
+#define INIT_PORT_FLAG_SIG (1 << 18)
+#define INIT_PORT_FLAG_NG (1 << 17)
+#define INIT_PORT_FLAG_G0 (1 << 16)
+#define INIT_PORT_VL_SHIFT 4
+#define INIT_PORT_PORT_WIDTH_SHIFT 8
+#define INIT_PORT_MTU_OFFSET 0x04
+#define INIT_PORT_MAX_GID_OFFSET 0x06
+#define INIT_PORT_MAX_PKEY_OFFSET 0x0a
+#define INIT_PORT_GUID0_OFFSET 0x10
+#define INIT_PORT_NODE_GUID_OFFSET 0x18
+#define INIT_PORT_SI_GUID_OFFSET 0x20
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
+
+ flags = 0;
+ flags |= (dev->caps.vl_cap[port] & 0xf) << INIT_PORT_VL_SHIFT;
+ flags |= (dev->caps.port_width_cap[port] & 0xf) << INIT_PORT_PORT_WIDTH_SHIFT;
+ MLX4_PUT(inbox, flags, INIT_PORT_FLAGS_OFFSET);
+
+ field = 128 << dev->caps.ib_mtu_cap[port];
+ MLX4_PUT(inbox, field, INIT_PORT_MTU_OFFSET);
+ field = dev->caps.gid_table_len[port];
+ MLX4_PUT(inbox, field, INIT_PORT_MAX_GID_OFFSET);
+ field = dev->caps.pkey_table_len[port];
+ MLX4_PUT(inbox, field, INIT_PORT_MAX_PKEY_OFFSET);
+
+ err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_INIT_PORT,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ } else
+ err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+
+ if (!err)
+ mlx4_hca_core_clock_update(dev);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_INIT_PORT);
+
+int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int port = mlx4_slave_convert_port(dev, slave, vhcr->in_modifier);
+ int err;
+
+ if (port < 0)
+ return -EINVAL;
+
+ if (!(priv->mfunc.master.slave_state[slave].init_port_mask &
+ (1 << port)))
+ return 0;
+
+ if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) {
+ if (priv->mfunc.master.init_port_ref[port] == 1) {
+ err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+ }
+ priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
+ } else {
+ /* infiniband port */
+ if (slave == mlx4_master_func_num(dev)) {
+ if (!priv->mfunc.master.qp0_state[port].qp0_active &&
+ priv->mfunc.master.qp0_state[port].port_active) {
+ err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+ priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
+ priv->mfunc.master.qp0_state[port].port_active = 0;
+ }
+ } else
+ priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
+ }
+ --priv->mfunc.master.init_port_ref[port];
+ return 0;
+}
+
+int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port)
+{
+ return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+}
+EXPORT_SYMBOL_GPL(mlx4_CLOSE_PORT);
+
+int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic)
+{
+ return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA,
+ MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+}
+
+struct mlx4_config_dev {
+ __be32 update_flags;
+ __be32 rsvd1[3];
+ __be16 vxlan_udp_dport;
+ __be16 rsvd2;
+ __be16 roce_v2_entropy;
+ __be16 roce_v2_udp_dport;
+ __be32 roce_flags;
+ __be32 rsvd4[25];
+ __be16 rsvd5;
+ u8 rsvd6;
+ u8 rx_checksum_val;
+};
+
+#define MLX4_VXLAN_UDP_DPORT (1 << 0)
+#define MLX4_ROCE_V2_UDP_DPORT BIT(3)
+#define MLX4_DISABLE_RX_PORT BIT(18)
+
+static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev)
+{
+ int err;
+ struct mlx4_cmd_mailbox *mailbox;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ memcpy(mailbox->buf, config_dev, sizeof(*config_dev));
+
+ err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_CONFIG_DEV,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+static int mlx4_CONFIG_DEV_get(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev)
+{
+ int err;
+ struct mlx4_cmd_mailbox *mailbox;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 1, MLX4_CMD_CONFIG_DEV,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+
+ if (!err)
+ memcpy(config_dev, mailbox->buf, sizeof(*config_dev));
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+/* Conversion between the HW values and the actual functionality.
+ * The value represented by the array index,
+ * and the functionality determined by the flags.
+ */
+static const u8 config_dev_csum_flags[] = {
+ [0] = 0,
+ [1] = MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP,
+ [2] = MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP |
+ MLX4_RX_CSUM_MODE_L4,
+ [3] = MLX4_RX_CSUM_MODE_L4 |
+ MLX4_RX_CSUM_MODE_IP_OK_IP_NON_TCP_UDP |
+ MLX4_RX_CSUM_MODE_MULTI_VLAN
+};
+
+int mlx4_config_dev_retrieval(struct mlx4_dev *dev,
+ struct mlx4_config_dev_params *params)
+{
+ struct mlx4_config_dev config_dev = {0};
+ int err;
+ u8 csum_mask;
+
+#define CONFIG_DEV_RX_CSUM_MODE_MASK 0x7
+#define CONFIG_DEV_RX_CSUM_MODE_PORT1_BIT_OFFSET 0
+#define CONFIG_DEV_RX_CSUM_MODE_PORT2_BIT_OFFSET 4
+
+ if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_CONFIG_DEV))
+ return -EOPNOTSUPP;
+
+ err = mlx4_CONFIG_DEV_get(dev, &config_dev);
+ if (err)
+ return err;
+
+ csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT1_BIT_OFFSET) &
+ CONFIG_DEV_RX_CSUM_MODE_MASK;
+
+ if (csum_mask >= ARRAY_SIZE(config_dev_csum_flags))
+ return -EINVAL;
+ params->rx_csum_flags_port_1 = config_dev_csum_flags[csum_mask];
+
+ csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT2_BIT_OFFSET) &
+ CONFIG_DEV_RX_CSUM_MODE_MASK;
+
+ if (csum_mask >= ARRAY_SIZE(config_dev_csum_flags))
+ return -EINVAL;
+ params->rx_csum_flags_port_2 = config_dev_csum_flags[csum_mask];
+
+ params->vxlan_udp_dport = be16_to_cpu(config_dev.vxlan_udp_dport);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_config_dev_retrieval);
+
+int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port)
+{
+ struct mlx4_config_dev config_dev;
+
+ memset(&config_dev, 0, sizeof(config_dev));
+ config_dev.update_flags = cpu_to_be32(MLX4_VXLAN_UDP_DPORT);
+ config_dev.vxlan_udp_dport = udp_port;
+
+ return mlx4_CONFIG_DEV_set(dev, &config_dev);
+}
+EXPORT_SYMBOL_GPL(mlx4_config_vxlan_port);
+
+#define CONFIG_DISABLE_RX_PORT BIT(15)
+int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis)
+{
+ struct mlx4_config_dev config_dev;
+
+ memset(&config_dev, 0, sizeof(config_dev));
+ config_dev.update_flags = cpu_to_be32(MLX4_DISABLE_RX_PORT);
+ if (dis)
+ config_dev.roce_flags =
+ cpu_to_be32(CONFIG_DISABLE_RX_PORT);
+
+ return mlx4_CONFIG_DEV_set(dev, &config_dev);
+}
+
+int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port)
+{
+ struct mlx4_config_dev config_dev;
+
+ memset(&config_dev, 0, sizeof(config_dev));
+ config_dev.update_flags = cpu_to_be32(MLX4_ROCE_V2_UDP_DPORT);
+ config_dev.roce_v2_udp_dport = cpu_to_be16(udp_port);
+
+ return mlx4_CONFIG_DEV_set(dev, &config_dev);
+}
+EXPORT_SYMBOL_GPL(mlx4_config_roce_v2_port);
+
+int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct {
+ __be32 v_port1;
+ __be32 v_port2;
+ } *v2p;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return -ENOMEM;
+
+ v2p = mailbox->buf;
+ v2p->v_port1 = cpu_to_be32(port1);
+ v2p->v_port2 = cpu_to_be32(port2);
+
+ err = mlx4_cmd(dev, mailbox->dma, 0,
+ MLX4_SET_PORT_VIRT2PHY, MLX4_CMD_VIRT_PORT_MAP,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+
+int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages)
+{
+ int ret = mlx4_cmd_imm(dev, icm_size, aux_pages, 0, 0,
+ MLX4_CMD_SET_ICM_SIZE,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ if (ret)
+ return ret;
+
+ /*
+ * Round up number of system pages needed in case
+ * MLX4_ICM_PAGE_SIZE < PAGE_SIZE.
+ */
+ *aux_pages = ALIGN(*aux_pages, PAGE_SIZE / MLX4_ICM_PAGE_SIZE) >>
+ (PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT);
+
+ return 0;
+}
+
+int mlx4_NOP(struct mlx4_dev *dev)
+{
+ /* Input modifier of 0x1f means "finish as soon as possible." */
+ return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+}
+
+int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier,
+ const u32 offset[],
+ u32 value[], size_t array_len, u8 port)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 *outbox;
+ size_t i;
+ int ret;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ outbox = mailbox->buf;
+
+ ret = mlx4_cmd_box(dev, 0, mailbox->dma, port, op_modifier,
+ MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < array_len; i++) {
+ if (offset[i] > MLX4_MAILBOX_SIZE) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ MLX4_GET(value[i], outbox, offset[i]);
+ }
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return ret;
+}
+EXPORT_SYMBOL(mlx4_query_diag_counters);
+
+int mlx4_get_phys_port_id(struct mlx4_dev *dev)
+{
+ u8 port;
+ u32 *outbox;
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 in_mod;
+ u32 guid_hi, guid_lo;
+ int err, ret = 0;
+#define MOD_STAT_CFG_PORT_OFFSET 8
+#define MOD_STAT_CFG_GUID_H 0X14
+#define MOD_STAT_CFG_GUID_L 0X1c
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
+
+ for (port = 1; port <= dev->caps.num_ports; port++) {
+ in_mod = port << MOD_STAT_CFG_PORT_OFFSET;
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, in_mod, 0x2,
+ MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_err(dev, "Fail to get port %d uplink guid\n",
+ port);
+ ret = err;
+ } else {
+ MLX4_GET(guid_hi, outbox, MOD_STAT_CFG_GUID_H);
+ MLX4_GET(guid_lo, outbox, MOD_STAT_CFG_GUID_L);
+ dev->caps.phys_port_id[port] = (u64)guid_lo |
+ (u64)guid_hi << 32;
+ }
+ }
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return ret;
+}
+
+#define MLX4_WOL_SETUP_MODE (5 << 28)
+int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port)
+{
+ u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8;
+
+ return mlx4_cmd_imm(dev, 0, config, in_mod, 0x3,
+ MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+}
+EXPORT_SYMBOL_GPL(mlx4_wol_read);
+
+int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port)
+{
+ u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8;
+
+ return mlx4_cmd(dev, config, in_mod, 0x1, MLX4_CMD_MOD_STAT_CFG,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+}
+EXPORT_SYMBOL_GPL(mlx4_wol_write);
+
+enum {
+ ADD_TO_MCG = 0x26,
+};
+
+
+void mlx4_opreq_action(struct work_struct *work)
+{
+ struct mlx4_priv *priv = container_of(work, struct mlx4_priv,
+ opreq_task);
+ struct mlx4_dev *dev = &priv->dev;
+ int num_tasks = atomic_read(&priv->opreq_count);
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm;
+ u32 *outbox;
+ u32 modifier;
+ u16 token;
+ u16 type;
+ int err;
+ u32 num_qps;
+ struct mlx4_qp qp;
+ int i;
+ u8 rem_mcg;
+ u8 prot;
+
+#define GET_OP_REQ_MODIFIER_OFFSET 0x08
+#define GET_OP_REQ_TOKEN_OFFSET 0x14
+#define GET_OP_REQ_TYPE_OFFSET 0x1a
+#define GET_OP_REQ_DATA_OFFSET 0x20
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ mlx4_err(dev, "Failed to allocate mailbox for GET_OP_REQ\n");
+ return;
+ }
+ outbox = mailbox->buf;
+
+ while (num_tasks) {
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0,
+ MLX4_CMD_GET_OP_REQ, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_err(dev, "Failed to retrieve required operation: %d\n",
+ err);
+ goto out;
+ }
+ MLX4_GET(modifier, outbox, GET_OP_REQ_MODIFIER_OFFSET);
+ MLX4_GET(token, outbox, GET_OP_REQ_TOKEN_OFFSET);
+ MLX4_GET(type, outbox, GET_OP_REQ_TYPE_OFFSET);
+ type &= 0xfff;
+
+ switch (type) {
+ case ADD_TO_MCG:
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ mlx4_warn(dev, "ADD MCG operation is not supported in DEVICE_MANAGED steering mode\n");
+ err = EPERM;
+ break;
+ }
+ mgm = (struct mlx4_mgm *)((u8 *)(outbox) +
+ GET_OP_REQ_DATA_OFFSET);
+ num_qps = be32_to_cpu(mgm->members_count) &
+ MGM_QPN_MASK;
+ rem_mcg = ((u8 *)(&mgm->members_count))[0] & 1;
+ prot = ((u8 *)(&mgm->members_count))[0] >> 6;
+
+ for (i = 0; i < num_qps; i++) {
+ qp.qpn = be32_to_cpu(mgm->qp[i]);
+ if (rem_mcg)
+ err = mlx4_multicast_detach(dev, &qp,
+ mgm->gid,
+ prot, 0);
+ else
+ err = mlx4_multicast_attach(dev, &qp,
+ mgm->gid,
+ mgm->gid[5]
+ , 0, prot,
+ NULL);
+ if (err)
+ break;
+ }
+ break;
+ default:
+ mlx4_warn(dev, "Bad type for required operation\n");
+ err = EINVAL;
+ break;
+ }
+ err = mlx4_cmd(dev, 0, ((u32) err |
+ (__force u32)cpu_to_be32(token) << 16),
+ 1, MLX4_CMD_GET_OP_REQ, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_err(dev, "Failed to acknowledge required request: %d\n",
+ err);
+ goto out;
+ }
+ memset(outbox, 0, 0xffc);
+ num_tasks = atomic_dec_return(&priv->opreq_count);
+ }
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+}
+
+static int mlx4_check_smp_firewall_active(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *mailbox)
+{
+#define MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET 0x10
+#define MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET 0x20
+#define MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET 0x40
+#define MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET 0x70
+
+ u32 set_attr_mask, getresp_attr_mask;
+ u32 trap_attr_mask, traprepress_attr_mask;
+
+ MLX4_GET(set_attr_mask, mailbox->buf,
+ MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET);
+ mlx4_dbg(dev, "SMP firewall set_attribute_mask = 0x%x\n",
+ set_attr_mask);
+
+ MLX4_GET(getresp_attr_mask, mailbox->buf,
+ MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET);
+ mlx4_dbg(dev, "SMP firewall getresp_attribute_mask = 0x%x\n",
+ getresp_attr_mask);
+
+ MLX4_GET(trap_attr_mask, mailbox->buf,
+ MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET);
+ mlx4_dbg(dev, "SMP firewall trap_attribute_mask = 0x%x\n",
+ trap_attr_mask);
+
+ MLX4_GET(traprepress_attr_mask, mailbox->buf,
+ MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET);
+ mlx4_dbg(dev, "SMP firewall traprepress_attribute_mask = 0x%x\n",
+ traprepress_attr_mask);
+
+ if (set_attr_mask && getresp_attr_mask && trap_attr_mask &&
+ traprepress_attr_mask)
+ return 1;
+
+ return 0;
+}
+
+int mlx4_config_mad_demux(struct mlx4_dev *dev)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err;
+
+ /* Check if mad_demux is supported */
+ if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_MAD_DEMUX))
+ return 0;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ mlx4_warn(dev, "Failed to allocate mailbox for cmd MAD_DEMUX");
+ return -ENOMEM;
+ }
+
+ /* Query mad_demux to find out which MADs are handled by internal sma */
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, 0x01 /* subn mgmt class */,
+ MLX4_CMD_MAD_DEMUX_QUERY_RESTR, MLX4_CMD_MAD_DEMUX,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: query restrictions failed (%d)\n",
+ err);
+ goto out;
+ }
+
+ if (mlx4_check_smp_firewall_active(dev, mailbox))
+ dev->flags |= MLX4_FLAG_SECURE_HOST;
+
+ /* Config mad_demux to handle all MADs returned by the query above */
+ err = mlx4_cmd(dev, mailbox->dma, 0x01 /* subn mgmt class */,
+ MLX4_CMD_MAD_DEMUX_CONFIG, MLX4_CMD_MAD_DEMUX,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: configure failed (%d)\n", err);
+ goto out;
+ }
+
+ if (dev->flags & MLX4_FLAG_SECURE_HOST)
+ mlx4_warn(dev, "HCA operating in secure-host mode. SMP firewall activated.\n");
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+/* Access Reg commands */
+enum mlx4_access_reg_masks {
+ MLX4_ACCESS_REG_STATUS_MASK = 0x7f,
+ MLX4_ACCESS_REG_METHOD_MASK = 0x7f,
+ MLX4_ACCESS_REG_LEN_MASK = 0x7ff
+};
+
+struct mlx4_access_reg {
+ __be16 constant1;
+ u8 status;
+ u8 resrvd1;
+ __be16 reg_id;
+ u8 method;
+ u8 constant2;
+ __be32 resrvd2[2];
+ __be16 len_const;
+ __be16 resrvd3;
+#define MLX4_ACCESS_REG_HEADER_SIZE (20)
+ u8 reg_data[MLX4_MAILBOX_SIZE-MLX4_ACCESS_REG_HEADER_SIZE];
+} __attribute__((__packed__));
+
+/**
+ * mlx4_ACCESS_REG - Generic access reg command.
+ * @dev: mlx4_dev.
+ * @reg_id: register ID to access.
+ * @method: Access method Read/Write.
+ * @reg_len: register length to Read/Write in bytes.
+ * @reg_data: reg_data pointer to Read/Write From/To.
+ *
+ * Access ConnectX registers FW command.
+ * Returns 0 on success and copies outbox mlx4_access_reg data
+ * field into reg_data or a negative error code.
+ */
+static int mlx4_ACCESS_REG(struct mlx4_dev *dev, u16 reg_id,
+ enum mlx4_access_reg_method method,
+ u16 reg_len, void *reg_data)
+{
+ struct mlx4_cmd_mailbox *inbox, *outbox;
+ struct mlx4_access_reg *inbuf, *outbuf;
+ int err;
+
+ inbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(inbox))
+ return PTR_ERR(inbox);
+
+ outbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(outbox)) {
+ mlx4_free_cmd_mailbox(dev, inbox);
+ return PTR_ERR(outbox);
+ }
+
+ inbuf = inbox->buf;
+ outbuf = outbox->buf;
+
+ inbuf->constant1 = cpu_to_be16(0x1<<11 | 0x4);
+ inbuf->constant2 = 0x1;
+ inbuf->reg_id = cpu_to_be16(reg_id);
+ inbuf->method = method & MLX4_ACCESS_REG_METHOD_MASK;
+
+ reg_len = min(reg_len, (u16)(sizeof(inbuf->reg_data)));
+ inbuf->len_const =
+ cpu_to_be16(((reg_len/4 + 1) & MLX4_ACCESS_REG_LEN_MASK) |
+ ((0x3) << 12));
+
+ memcpy(inbuf->reg_data, reg_data, reg_len);
+ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, 0, 0,
+ MLX4_CMD_ACCESS_REG, MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ goto out;
+
+ if (outbuf->status & MLX4_ACCESS_REG_STATUS_MASK) {
+ err = outbuf->status & MLX4_ACCESS_REG_STATUS_MASK;
+ mlx4_err(dev,
+ "MLX4_CMD_ACCESS_REG(%x) returned REG status (%x)\n",
+ reg_id, err);
+ goto out;
+ }
+
+ memcpy(reg_data, outbuf->reg_data, reg_len);
+out:
+ mlx4_free_cmd_mailbox(dev, inbox);
+ mlx4_free_cmd_mailbox(dev, outbox);
+ return err;
+}
+
+/* ConnectX registers IDs */
+enum mlx4_reg_id {
+ MLX4_REG_ID_PTYS = 0x5004,
+};
+
+/**
+ * mlx4_ACCESS_PTYS_REG - Access PTYs (Port Type and Speed)
+ * register
+ * @dev: mlx4_dev.
+ * @method: Access method Read/Write.
+ * @ptys_reg: PTYS register data pointer.
+ *
+ * Access ConnectX PTYS register, to Read/Write Port Type/Speed
+ * configuration
+ * Returns 0 on success or a negative error code.
+ */
+int mlx4_ACCESS_PTYS_REG(struct mlx4_dev *dev,
+ enum mlx4_access_reg_method method,
+ struct mlx4_ptys_reg *ptys_reg)
+{
+ return mlx4_ACCESS_REG(dev, MLX4_REG_ID_PTYS,
+ method, sizeof(*ptys_reg), ptys_reg);
+}
+EXPORT_SYMBOL_GPL(mlx4_ACCESS_PTYS_REG);
+
+int mlx4_ACCESS_REG_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_access_reg *inbuf = inbox->buf;
+ u8 method = inbuf->method & MLX4_ACCESS_REG_METHOD_MASK;
+ u16 reg_id = be16_to_cpu(inbuf->reg_id);
+
+ if (slave != mlx4_master_func_num(dev) &&
+ method == MLX4_ACCESS_REG_WRITE)
+ return -EPERM;
+
+ if (reg_id == MLX4_REG_ID_PTYS) {
+ struct mlx4_ptys_reg *ptys_reg =
+ (struct mlx4_ptys_reg *)inbuf->reg_data;
+
+ ptys_reg->local_port =
+ mlx4_slave_convert_port(dev, slave,
+ ptys_reg->local_port);
+ }
+
+ return mlx4_cmd_box(dev, inbox->dma, outbox->dma, vhcr->in_modifier,
+ 0, MLX4_CMD_ACCESS_REG, MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
+}
+
+static int mlx4_SET_PORT_phv_bit(struct mlx4_dev *dev, u8 port, u8 phv_bit)
+{
+#define SET_PORT_GEN_PHV_VALID 0x10
+#define SET_PORT_GEN_PHV_EN 0x80
+
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_general_context *context;
+ u32 in_mod;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ context = mailbox->buf;
+
+ context->flags2 |= SET_PORT_GEN_PHV_VALID;
+ if (phv_bit)
+ context->phv_en |= SET_PORT_GEN_PHV_EN;
+
+ in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
+ MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv)
+{
+ int err;
+ struct mlx4_func_cap func_cap;
+
+ memset(&func_cap, 0, sizeof(func_cap));
+ err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap);
+ if (!err)
+ *phv = func_cap.flags0 & QUERY_FUNC_CAP_PHV_BIT;
+ return err;
+}
+EXPORT_SYMBOL(get_phv_bit);
+
+int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val)
+{
+ int ret;
+
+ if (mlx4_is_slave(dev))
+ return -EPERM;
+
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN &&
+ !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN)) {
+ ret = mlx4_SET_PORT_phv_bit(dev, port, new_val);
+ if (!ret)
+ dev->caps.phv_bit[port] = new_val;
+ return ret;
+ }
+
+ return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(set_phv_bit);
+
+int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port,
+ bool *vlan_offload_disabled)
+{
+ struct mlx4_func_cap func_cap;
+ int err;
+
+ memset(&func_cap, 0, sizeof(func_cap));
+ err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap);
+ if (!err)
+ *vlan_offload_disabled =
+ !!(func_cap.flags0 &
+ QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_get_is_vlan_offload_disabled);
+
+void mlx4_replace_zero_macs(struct mlx4_dev *dev)
+{
+ int i;
+ u8 mac_addr[ETH_ALEN];
+
+ dev->port_random_macs = 0;
+ for (i = 1; i <= dev->caps.num_ports; ++i)
+ if (!dev->caps.def_mac[i] &&
+ dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) {
+ eth_random_addr(mac_addr);
+ dev->port_random_macs |= 1 << i;
+ dev->caps.def_mac[i] = mlx4_mac_to_u64(mac_addr);
+ }
+}
+EXPORT_SYMBOL_GPL(mlx4_replace_zero_macs);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
new file mode 100644
index 000000000..cf64e54ee
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -0,0 +1,257 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX4_FW_H
+#define MLX4_FW_H
+
+#include "mlx4.h"
+#include "icm.h"
+
+struct mlx4_mod_stat_cfg {
+ u8 log_pg_sz;
+ u8 log_pg_sz_m;
+};
+
+struct mlx4_port_cap {
+ u8 link_state;
+ u8 supported_port_types;
+ u8 suggested_type;
+ u8 default_sense;
+ u8 log_max_macs;
+ u8 log_max_vlans;
+ int ib_mtu;
+ int max_port_width;
+ int max_vl;
+ int max_tc_eth;
+ int max_gids;
+ int max_pkeys;
+ u64 def_mac;
+ u16 eth_mtu;
+ int trans_type;
+ int vendor_oui;
+ u16 wavelength;
+ u64 trans_code;
+ u8 dmfs_optimized_state;
+};
+
+struct mlx4_dev_cap {
+ int max_srq_sz;
+ int max_qp_sz;
+ int reserved_qps;
+ int max_qps;
+ int reserved_srqs;
+ int max_srqs;
+ int max_cq_sz;
+ int reserved_cqs;
+ int max_cqs;
+ int max_mpts;
+ int reserved_eqs;
+ int max_eqs;
+ int num_sys_eqs;
+ int reserved_mtts;
+ int reserved_mrws;
+ int max_requester_per_qp;
+ int max_responder_per_qp;
+ int max_rdma_global;
+ int local_ca_ack_delay;
+ int num_ports;
+ u32 max_msg_sz;
+ u16 stat_rate_support;
+ int fs_log_max_ucast_qp_range_size;
+ int fs_max_num_qp_per_entry;
+ u64 flags;
+ u64 flags2;
+ int reserved_uars;
+ int uar_size;
+ int min_page_sz;
+ int bf_reg_size;
+ int bf_regs_per_page;
+ int max_sq_sg;
+ int max_sq_desc_sz;
+ int max_rq_sg;
+ int max_rq_desc_sz;
+ int max_qp_per_mcg;
+ int reserved_mgms;
+ int max_mcgs;
+ int reserved_pds;
+ int max_pds;
+ int reserved_xrcds;
+ int max_xrcds;
+ int qpc_entry_sz;
+ int rdmarc_entry_sz;
+ int altc_entry_sz;
+ int aux_entry_sz;
+ int srq_entry_sz;
+ int cqc_entry_sz;
+ int eqc_entry_sz;
+ int dmpt_entry_sz;
+ int cmpt_entry_sz;
+ int mtt_entry_sz;
+ int resize_srq;
+ u32 bmme_flags;
+ u32 reserved_lkey;
+ u64 max_icm_sz;
+ int max_gso_sz;
+ int max_rss_tbl_sz;
+ u32 max_counters;
+ u32 dmfs_high_rate_qpn_base;
+ u32 dmfs_high_rate_qpn_range;
+ struct mlx4_rate_limit_caps rl_caps;
+ u32 health_buffer_addrs;
+ struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1];
+ bool wol_port[MLX4_MAX_PORTS + 1];
+ bool map_clock_to_user;
+};
+
+struct mlx4_func_cap {
+ u8 num_ports;
+ u8 flags;
+ u32 pf_context_behaviour;
+ int qp_quota;
+ int cq_quota;
+ int srq_quota;
+ int mpt_quota;
+ int mtt_quota;
+ int max_eq;
+ int reserved_eq;
+ int mcg_quota;
+ struct mlx4_spec_qps spec_qps;
+ u32 reserved_lkey;
+ u8 physical_port;
+ u8 flags0;
+ u8 flags1;
+ u64 phys_port_id;
+ u32 extra_flags;
+};
+
+struct mlx4_func {
+ int bus;
+ int device;
+ int function;
+ int physical_function;
+ int rsvd_eqs;
+ int max_eq;
+ int rsvd_uars;
+};
+
+struct mlx4_adapter {
+ char board_id[MLX4_BOARD_ID_LEN];
+ u8 inta_pin;
+};
+
+struct mlx4_init_hca_param {
+ u64 qpc_base;
+ u64 rdmarc_base;
+ u64 auxc_base;
+ u64 altc_base;
+ u64 srqc_base;
+ u64 cqc_base;
+ u64 eqc_base;
+ u64 mc_base;
+ u64 dmpt_base;
+ u64 cmpt_base;
+ u64 mtt_base;
+ u64 global_caps;
+ u8 log_mc_entry_sz;
+ u8 log_mc_hash_sz;
+ u16 hca_core_clock; /* Internal Clock Frequency (in MHz) */
+ u8 log_num_qps;
+ u8 log_num_srqs;
+ u8 log_num_cqs;
+ u8 log_num_eqs;
+ u16 num_sys_eqs;
+ u8 log_rd_per_qp;
+ u8 log_mc_table_sz;
+ u8 log_mpt_sz;
+ u8 log_uar_sz;
+ u8 mw_enabled; /* Enable memory windows */
+ u8 uar_page_sz; /* log pg sz in 4k chunks */
+ u8 steering_mode; /* for QUERY_HCA */
+ u8 dmfs_high_steer_mode; /* for QUERY_HCA */
+ u64 dev_cap_enabled;
+ u16 cqe_size; /* For use only when CQE stride feature enabled */
+ u16 eqe_size; /* For use only when EQE stride feature enabled */
+ u8 rss_ip_frags;
+ u8 phv_check_en; /* for QUERY_HCA */
+};
+
+struct mlx4_init_ib_param {
+ int port_width;
+ int vl_cap;
+ int mtu_cap;
+ u16 gid_cap;
+ u16 pkey_cap;
+ int set_guid0;
+ u64 guid0;
+ int set_node_guid;
+ u64 node_guid;
+ int set_si_guid;
+ u64 si_guid;
+};
+
+struct mlx4_set_ib_param {
+ int set_si_guid;
+ int reset_qkey_viol;
+ u64 si_guid;
+ u32 cap_mask;
+};
+
+void mlx4_dev_cap_dump(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap);
+int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap);
+int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_cap);
+int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port,
+ struct mlx4_func_cap *func_cap);
+int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_FUNC(struct mlx4_dev *dev, struct mlx4_func *func, int slave);
+int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm);
+int mlx4_UNMAP_FA(struct mlx4_dev *dev);
+int mlx4_RUN_FW(struct mlx4_dev *dev);
+int mlx4_QUERY_FW(struct mlx4_dev *dev);
+int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter);
+int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param);
+int mlx4_QUERY_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param);
+int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic);
+int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt);
+int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages);
+int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm);
+int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev);
+int mlx4_NOP(struct mlx4_dev *dev);
+int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg);
+void mlx4_opreq_action(struct work_struct *work);
+
+#endif /* MLX4_FW_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c
new file mode 100644
index 000000000..3a09d7122
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies.
+ * All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/export.h>
+#include "fw_qos.h"
+#include "fw.h"
+
+enum {
+ /* allocate vpp opcode modifiers */
+ MLX4_ALLOCATE_VPP_ALLOCATE = 0x0,
+ MLX4_ALLOCATE_VPP_QUERY = 0x1
+};
+
+enum {
+ /* set vport qos opcode modifiers */
+ MLX4_SET_VPORT_QOS_SET = 0x0,
+ MLX4_SET_VPORT_QOS_QUERY = 0x1
+};
+
+struct mlx4_set_port_prio2tc_context {
+ u8 prio2tc[4];
+};
+
+struct mlx4_port_scheduler_tc_cfg_be {
+ __be16 pg;
+ __be16 bw_precentage;
+ __be16 max_bw_units; /* 3-100Mbps, 4-1Gbps, other values - reserved */
+ __be16 max_bw_value;
+};
+
+struct mlx4_set_port_scheduler_context {
+ struct mlx4_port_scheduler_tc_cfg_be tc[MLX4_NUM_TC];
+};
+
+/* Granular Qos (per VF) section */
+struct mlx4_alloc_vpp_param {
+ __be32 available_vpp;
+ __be32 vpp_p_up[MLX4_NUM_UP];
+};
+
+struct mlx4_prio_qos_param {
+ __be32 bw_share;
+ __be32 max_avg_bw;
+ __be32 reserved;
+ __be32 enable;
+ __be32 reserved1[4];
+};
+
+struct mlx4_set_vport_context {
+ __be32 reserved[8];
+ struct mlx4_prio_qos_param qos_p_up[MLX4_NUM_UP];
+};
+
+int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_prio2tc_context *context;
+ int err;
+ u32 in_mod;
+ int i;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ context = mailbox->buf;
+
+ for (i = 0; i < MLX4_NUM_UP; i += 2)
+ context->prio2tc[i >> 1] = prio2tc[i] << 4 | prio2tc[i + 1];
+
+ in_mod = MLX4_SET_PORT_PRIO2TC << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_PRIO2TC);
+
+int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
+ u8 *pg, u16 *ratelimit)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_scheduler_context *context;
+ int err;
+ u32 in_mod;
+ int i;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ context = mailbox->buf;
+
+ for (i = 0; i < MLX4_NUM_TC; i++) {
+ struct mlx4_port_scheduler_tc_cfg_be *tc = &context->tc[i];
+ u16 r;
+
+ if (ratelimit && ratelimit[i]) {
+ if (ratelimit[i] <= MLX4_MAX_100M_UNITS_VAL) {
+ r = ratelimit[i];
+ tc->max_bw_units =
+ htons(MLX4_RATELIMIT_100M_UNITS);
+ } else {
+ r = ratelimit[i] / 10;
+ tc->max_bw_units =
+ htons(MLX4_RATELIMIT_1G_UNITS);
+ }
+ tc->max_bw_value = htons(r);
+ } else {
+ tc->max_bw_value = htons(MLX4_RATELIMIT_DEFAULT);
+ tc->max_bw_units = htons(MLX4_RATELIMIT_1G_UNITS);
+ }
+
+ tc->pg = htons(pg[i]);
+ tc->bw_precentage = htons(tc_tx_bw[i]);
+ }
+
+ in_mod = MLX4_SET_PORT_SCHEDULER << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER);
+
+int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port,
+ u16 *available_vpp, u8 *vpp_p_up)
+{
+ int i;
+ int err;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_alloc_vpp_param *out_param;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ out_param = mailbox->buf;
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, port,
+ MLX4_ALLOCATE_VPP_QUERY,
+ MLX4_CMD_ALLOCATE_VPP,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ goto out;
+
+ /* Total number of supported VPPs */
+ *available_vpp = (u16)be32_to_cpu(out_param->available_vpp);
+
+ for (i = 0; i < MLX4_NUM_UP; i++)
+ vpp_p_up[i] = (u8)be32_to_cpu(out_param->vpp_p_up[i]);
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx4_ALLOCATE_VPP_get);
+
+int mlx4_ALLOCATE_VPP_set(struct mlx4_dev *dev, u8 port, u8 *vpp_p_up)
+{
+ int i;
+ int err;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_alloc_vpp_param *in_param;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ in_param = mailbox->buf;
+
+ for (i = 0; i < MLX4_NUM_UP; i++)
+ in_param->vpp_p_up[i] = cpu_to_be32(vpp_p_up[i]);
+
+ err = mlx4_cmd(dev, mailbox->dma, port,
+ MLX4_ALLOCATE_VPP_ALLOCATE,
+ MLX4_CMD_ALLOCATE_VPP,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_ALLOCATE_VPP_set);
+
+int mlx4_SET_VPORT_QOS_get(struct mlx4_dev *dev, u8 port, u8 vport,
+ struct mlx4_vport_qos_param *out_param)
+{
+ int i;
+ int err;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_vport_context *ctx;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ctx = mailbox->buf;
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, (vport << 8) | port,
+ MLX4_SET_VPORT_QOS_QUERY,
+ MLX4_CMD_SET_VPORT_QOS,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ goto out;
+
+ for (i = 0; i < MLX4_NUM_UP; i++) {
+ out_param[i].bw_share = be32_to_cpu(ctx->qos_p_up[i].bw_share);
+ out_param[i].max_avg_bw =
+ be32_to_cpu(ctx->qos_p_up[i].max_avg_bw);
+ out_param[i].enable =
+ !!(be32_to_cpu(ctx->qos_p_up[i].enable) & 31);
+ }
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_VPORT_QOS_get);
+
+int mlx4_SET_VPORT_QOS_set(struct mlx4_dev *dev, u8 port, u8 vport,
+ struct mlx4_vport_qos_param *in_param)
+{
+ int i;
+ int err;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_vport_context *ctx;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ctx = mailbox->buf;
+
+ for (i = 0; i < MLX4_NUM_UP; i++) {
+ ctx->qos_p_up[i].bw_share = cpu_to_be32(in_param[i].bw_share);
+ ctx->qos_p_up[i].max_avg_bw =
+ cpu_to_be32(in_param[i].max_avg_bw);
+ ctx->qos_p_up[i].enable =
+ cpu_to_be32(in_param[i].enable << 31);
+ }
+
+ err = mlx4_cmd(dev, mailbox->dma, (vport << 8) | port,
+ MLX4_SET_VPORT_QOS_SET,
+ MLX4_CMD_SET_VPORT_QOS,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_VPORT_QOS_set);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
new file mode 100644
index 000000000..582997577
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies.
+ * All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX4_FW_QOS_H
+#define MLX4_FW_QOS_H
+
+#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/device.h>
+
+#define MLX4_NUM_UP 8
+#define MLX4_NUM_TC 8
+
+/* Default supported priorities for VPP allocation */
+#define MLX4_DEFAULT_QOS_PRIO (0)
+
+/* Derived from FW feature definition, 0 is the default vport fo all QPs */
+#define MLX4_VPP_DEFAULT_VPORT (0)
+
+struct mlx4_vport_qos_param {
+ u32 bw_share;
+ u32 max_avg_bw;
+ u8 enable;
+};
+
+/**
+ * mlx4_SET_PORT_PRIO2TC - This routine maps user priorities to traffic
+ * classes of a given port and device.
+ *
+ * @dev: mlx4_dev.
+ * @port: Physical port number.
+ * @prio2tc: Array of TC associated with each priorities.
+ *
+ * Returns 0 on success or a negative mlx4_core errno code.
+ **/
+int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc);
+
+/**
+ * mlx4_SET_PORT_SCHEDULER - This routine configures the arbitration between
+ * traffic classes (ETS) and configured rate limit for traffic classes.
+ * tc_tx_bw, pg and ratelimit are arrays where each index represents a TC.
+ * The description for those parameters below refers to a single TC.
+ *
+ * @dev: mlx4_dev.
+ * @port: Physical port number.
+ * @tc_tx_bw: The percentage of the bandwidth allocated for traffic class
+ * within a TC group. The sum of the bw_percentage of all the traffic
+ * classes within a TC group must equal 100% for correct operation.
+ * @pg: The TC group the traffic class is associated with.
+ * @ratelimit: The maximal bandwidth allowed for the use by this traffic class.
+ *
+ * Returns 0 on success or a negative mlx4_core errno code.
+ **/
+int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
+ u8 *pg, u16 *ratelimit);
+/**
+ * mlx4_ALLOCATE_VPP_get - Query port VPP available resources and allocation.
+ * Before distribution of VPPs to priorities, only available_vpp is returned.
+ * After initialization it returns the distribution of VPPs among priorities.
+ *
+ * @dev: mlx4_dev.
+ * @port: Physical port number.
+ * @available_vpp: Pointer to variable where number of available VPPs is stored
+ * @vpp_p_up: Distribution of VPPs to priorities is stored in this array
+ *
+ * Returns 0 on success or a negative mlx4_core errno code.
+ **/
+int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port,
+ u16 *available_vpp, u8 *vpp_p_up);
+/**
+ * mlx4_ALLOCATE_VPP_set - Distribution of VPPs among differnt priorities.
+ * The total number of VPPs assigned to all for a port must not exceed
+ * the value reported by available_vpp in mlx4_ALLOCATE_VPP_get.
+ * VPP allocation is allowed only after the port type has been set,
+ * and while no QPs are open for this port.
+ *
+ * @dev: mlx4_dev.
+ * @port: Physical port number.
+ * @vpp_p_up: Allocation of VPPs to different priorities.
+ *
+ * Returns 0 on success or a negative mlx4_core errno code.
+ **/
+int mlx4_ALLOCATE_VPP_set(struct mlx4_dev *dev, u8 port, u8 *vpp_p_up);
+
+/**
+ * mlx4_SET_VPORT_QOS_get - Query QoS proporties of a Vport.
+ * Each priority allowed for the Vport is assigned with a share of the BW,
+ * and a BW limitation. This commands query the current QoS values.
+ *
+ * @dev: mlx4_dev.
+ * @port: Physical port number.
+ * @vport: Vport id.
+ * @out_param: Array of mlx4_vport_qos_param that will contain the values.
+ *
+ * Returns 0 on success or a negative mlx4_core errno code.
+ **/
+int mlx4_SET_VPORT_QOS_get(struct mlx4_dev *dev, u8 port, u8 vport,
+ struct mlx4_vport_qos_param *out_param);
+
+/**
+ * mlx4_SET_VPORT_QOS_set - Set QoS proporties of a Vport.
+ * QoS parameters can be modified at any time, but must be initialized
+ * before any QP is associated with the VPort.
+ *
+ * @dev: mlx4_dev.
+ * @port: Physical port number.
+ * @vport: Vport id.
+ * @out_param: Array of mlx4_vport_qos_param which holds the requested values.
+ *
+ * Returns 0 on success or a negative mlx4_core errno code.
+ **/
+int mlx4_SET_VPORT_QOS_set(struct mlx4_dev *dev, u8 port, u8 vport,
+ struct mlx4_vport_qos_param *in_param);
+
+#endif /* MLX4_FW_QOS_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
new file mode 100644
index 000000000..288fca826
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4.h"
+#include "icm.h"
+#include "fw.h"
+
+/*
+ * We allocate in as big chunks as we can, up to a maximum of 256 KB
+ * per chunk. Note that the chunks are not necessarily in contiguous
+ * physical memory.
+ */
+enum {
+ MLX4_ICM_ALLOC_SIZE = 1 << 18,
+ MLX4_TABLE_CHUNK_SIZE = 1 << 18,
+};
+
+static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk)
+{
+ int i;
+
+ if (chunk->nsg > 0)
+ pci_unmap_sg(dev->persist->pdev, chunk->sg, chunk->npages,
+ PCI_DMA_BIDIRECTIONAL);
+
+ for (i = 0; i < chunk->npages; ++i)
+ __free_pages(sg_page(&chunk->sg[i]),
+ get_order(chunk->sg[i].length));
+}
+
+static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk)
+{
+ int i;
+
+ for (i = 0; i < chunk->npages; ++i)
+ dma_free_coherent(&dev->persist->pdev->dev,
+ chunk->buf[i].size,
+ chunk->buf[i].addr,
+ chunk->buf[i].dma_addr);
+}
+
+void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent)
+{
+ struct mlx4_icm_chunk *chunk, *tmp;
+
+ if (!icm)
+ return;
+
+ list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
+ if (coherent)
+ mlx4_free_icm_coherent(dev, chunk);
+ else
+ mlx4_free_icm_pages(dev, chunk);
+
+ kfree(chunk);
+ }
+
+ kfree(icm);
+}
+
+static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order,
+ gfp_t gfp_mask, int node)
+{
+ struct page *page;
+
+ page = alloc_pages_node(node, gfp_mask, order);
+ if (!page) {
+ page = alloc_pages(gfp_mask, order);
+ if (!page)
+ return -ENOMEM;
+ }
+
+ sg_set_page(mem, page, PAGE_SIZE << order, 0);
+ return 0;
+}
+
+static int mlx4_alloc_icm_coherent(struct device *dev, struct mlx4_icm_buf *buf,
+ int order, gfp_t gfp_mask)
+{
+ buf->addr = dma_alloc_coherent(dev, PAGE_SIZE << order,
+ &buf->dma_addr, gfp_mask);
+ if (!buf->addr)
+ return -ENOMEM;
+
+ if (offset_in_page(buf->addr)) {
+ dma_free_coherent(dev, PAGE_SIZE << order, buf->addr,
+ buf->dma_addr);
+ return -ENOMEM;
+ }
+
+ buf->size = PAGE_SIZE << order;
+ return 0;
+}
+
+struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
+ gfp_t gfp_mask, int coherent)
+{
+ struct mlx4_icm *icm;
+ struct mlx4_icm_chunk *chunk = NULL;
+ int cur_order;
+ gfp_t mask;
+ int ret;
+
+ /* We use sg_set_buf for coherent allocs, which assumes low memory */
+ BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));
+
+ icm = kmalloc_node(sizeof(*icm),
+ gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN),
+ dev->numa_node);
+ if (!icm) {
+ icm = kmalloc(sizeof(*icm),
+ gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
+ if (!icm)
+ return NULL;
+ }
+
+ icm->refcount = 0;
+ INIT_LIST_HEAD(&icm->chunk_list);
+
+ cur_order = get_order(MLX4_ICM_ALLOC_SIZE);
+
+ while (npages > 0) {
+ if (!chunk) {
+ chunk = kzalloc_node(sizeof(*chunk),
+ gfp_mask & ~(__GFP_HIGHMEM |
+ __GFP_NOWARN),
+ dev->numa_node);
+ if (!chunk) {
+ chunk = kzalloc(sizeof(*chunk),
+ gfp_mask & ~(__GFP_HIGHMEM |
+ __GFP_NOWARN));
+ if (!chunk)
+ goto fail;
+ }
+ chunk->coherent = coherent;
+
+ if (!coherent)
+ sg_init_table(chunk->sg, MLX4_ICM_CHUNK_LEN);
+ list_add_tail(&chunk->list, &icm->chunk_list);
+ }
+
+ while (1 << cur_order > npages)
+ --cur_order;
+
+ mask = gfp_mask;
+ if (cur_order)
+ mask &= ~__GFP_DIRECT_RECLAIM;
+
+ if (coherent)
+ ret = mlx4_alloc_icm_coherent(&dev->persist->pdev->dev,
+ &chunk->buf[chunk->npages],
+ cur_order, mask);
+ else
+ ret = mlx4_alloc_icm_pages(&chunk->sg[chunk->npages],
+ cur_order, mask,
+ dev->numa_node);
+
+ if (ret) {
+ if (--cur_order < 0)
+ goto fail;
+ else
+ continue;
+ }
+
+ ++chunk->npages;
+
+ if (coherent)
+ ++chunk->nsg;
+ else if (chunk->npages == MLX4_ICM_CHUNK_LEN) {
+ chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->sg,
+ chunk->npages,
+ PCI_DMA_BIDIRECTIONAL);
+
+ if (chunk->nsg <= 0)
+ goto fail;
+ }
+
+ if (chunk->npages == MLX4_ICM_CHUNK_LEN)
+ chunk = NULL;
+
+ npages -= 1 << cur_order;
+ }
+
+ if (!coherent && chunk) {
+ chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->sg,
+ chunk->npages,
+ PCI_DMA_BIDIRECTIONAL);
+
+ if (chunk->nsg <= 0)
+ goto fail;
+ }
+
+ return icm;
+
+fail:
+ mlx4_free_icm(dev, icm, coherent);
+ return NULL;
+}
+
+static int mlx4_MAP_ICM(struct mlx4_dev *dev, struct mlx4_icm *icm, u64 virt)
+{
+ return mlx4_map_cmd(dev, MLX4_CMD_MAP_ICM, icm, virt);
+}
+
+static int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count)
+{
+ return mlx4_cmd(dev, virt, page_count, 0, MLX4_CMD_UNMAP_ICM,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+}
+
+int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm)
+{
+ return mlx4_map_cmd(dev, MLX4_CMD_MAP_ICM_AUX, icm, -1);
+}
+
+int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev)
+{
+ return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_ICM_AUX,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+}
+
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
+{
+ u32 i = (obj & (table->num_obj - 1)) /
+ (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
+ int ret = 0;
+
+ mutex_lock(&table->mutex);
+
+ if (table->icm[i]) {
+ ++table->icm[i]->refcount;
+ goto out;
+ }
+
+ table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
+ (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
+ __GFP_NOWARN, table->coherent);
+ if (!table->icm[i]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (mlx4_MAP_ICM(dev, table->icm[i], table->virt +
+ (u64) i * MLX4_TABLE_CHUNK_SIZE)) {
+ mlx4_free_icm(dev, table->icm[i], table->coherent);
+ table->icm[i] = NULL;
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ++table->icm[i]->refcount;
+
+out:
+ mutex_unlock(&table->mutex);
+ return ret;
+}
+
+void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
+{
+ u32 i;
+ u64 offset;
+
+ i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
+
+ mutex_lock(&table->mutex);
+
+ if (--table->icm[i]->refcount == 0) {
+ offset = (u64) i * MLX4_TABLE_CHUNK_SIZE;
+ mlx4_UNMAP_ICM(dev, table->virt + offset,
+ MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
+ mlx4_free_icm(dev, table->icm[i], table->coherent);
+ table->icm[i] = NULL;
+ }
+
+ mutex_unlock(&table->mutex);
+}
+
+void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj,
+ dma_addr_t *dma_handle)
+{
+ int offset, dma_offset, i;
+ u64 idx;
+ struct mlx4_icm_chunk *chunk;
+ struct mlx4_icm *icm;
+ void *addr = NULL;
+
+ if (!table->lowmem)
+ return NULL;
+
+ mutex_lock(&table->mutex);
+
+ idx = (u64) (obj & (table->num_obj - 1)) * table->obj_size;
+ icm = table->icm[idx / MLX4_TABLE_CHUNK_SIZE];
+ dma_offset = offset = idx % MLX4_TABLE_CHUNK_SIZE;
+
+ if (!icm)
+ goto out;
+
+ list_for_each_entry(chunk, &icm->chunk_list, list) {
+ for (i = 0; i < chunk->npages; ++i) {
+ dma_addr_t dma_addr;
+ size_t len;
+
+ if (table->coherent) {
+ len = chunk->buf[i].size;
+ dma_addr = chunk->buf[i].dma_addr;
+ addr = chunk->buf[i].addr;
+ } else {
+ struct page *page;
+
+ len = sg_dma_len(&chunk->sg[i]);
+ dma_addr = sg_dma_address(&chunk->sg[i]);
+
+ /* XXX: we should never do this for highmem
+ * allocation. This function either needs
+ * to be split, or the kernel virtual address
+ * return needs to be made optional.
+ */
+ page = sg_page(&chunk->sg[i]);
+ addr = lowmem_page_address(page);
+ }
+
+ if (dma_handle && dma_offset >= 0) {
+ if (len > dma_offset)
+ *dma_handle = dma_addr + dma_offset;
+ dma_offset -= len;
+ }
+
+ /*
+ * DMA mapping can merge pages but not split them,
+ * so if we found the page, dma_handle has already
+ * been assigned to.
+ */
+ if (len > offset)
+ goto out;
+ offset -= len;
+ }
+ }
+
+ addr = NULL;
+out:
+ mutex_unlock(&table->mutex);
+ return addr ? addr + offset : NULL;
+}
+
+int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
+ u32 start, u32 end)
+{
+ int inc = MLX4_TABLE_CHUNK_SIZE / table->obj_size;
+ int err;
+ u32 i;
+
+ for (i = start; i <= end; i += inc) {
+ err = mlx4_table_get(dev, table, i);
+ if (err)
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ while (i > start) {
+ i -= inc;
+ mlx4_table_put(dev, table, i);
+ }
+
+ return err;
+}
+
+void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
+ u32 start, u32 end)
+{
+ u32 i;
+
+ for (i = start; i <= end; i += MLX4_TABLE_CHUNK_SIZE / table->obj_size)
+ mlx4_table_put(dev, table, i);
+}
+
+int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
+ u64 virt, int obj_size, u32 nobj, int reserved,
+ int use_lowmem, int use_coherent)
+{
+ int obj_per_chunk;
+ int num_icm;
+ unsigned chunk_size;
+ int i;
+ u64 size;
+
+ obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size;
+ if (WARN_ON(!obj_per_chunk))
+ return -EINVAL;
+ num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk;
+
+ table->icm = kvcalloc(num_icm, sizeof(*table->icm), GFP_KERNEL);
+ if (!table->icm)
+ return -ENOMEM;
+ table->virt = virt;
+ table->num_icm = num_icm;
+ table->num_obj = nobj;
+ table->obj_size = obj_size;
+ table->lowmem = use_lowmem;
+ table->coherent = use_coherent;
+ mutex_init(&table->mutex);
+
+ size = (u64) nobj * obj_size;
+ for (i = 0; i * MLX4_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
+ chunk_size = MLX4_TABLE_CHUNK_SIZE;
+ if ((i + 1) * MLX4_TABLE_CHUNK_SIZE > size)
+ chunk_size = PAGE_ALIGN(size -
+ i * MLX4_TABLE_CHUNK_SIZE);
+
+ table->icm[i] = mlx4_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
+ (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
+ __GFP_NOWARN, use_coherent);
+ if (!table->icm[i])
+ goto err;
+ if (mlx4_MAP_ICM(dev, table->icm[i], virt + i * MLX4_TABLE_CHUNK_SIZE)) {
+ mlx4_free_icm(dev, table->icm[i], use_coherent);
+ table->icm[i] = NULL;
+ goto err;
+ }
+
+ /*
+ * Add a reference to this ICM chunk so that it never
+ * gets freed (since it contains reserved firmware objects).
+ */
+ ++table->icm[i]->refcount;
+ }
+
+ return 0;
+
+err:
+ for (i = 0; i < num_icm; ++i)
+ if (table->icm[i]) {
+ mlx4_UNMAP_ICM(dev, virt + i * MLX4_TABLE_CHUNK_SIZE,
+ MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
+ mlx4_free_icm(dev, table->icm[i], use_coherent);
+ }
+
+ kvfree(table->icm);
+
+ return -ENOMEM;
+}
+
+void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table)
+{
+ int i;
+
+ for (i = 0; i < table->num_icm; ++i)
+ if (table->icm[i]) {
+ mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE,
+ MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
+ mlx4_free_icm(dev, table->icm[i], table->coherent);
+ }
+
+ kvfree(table->icm);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h
new file mode 100644
index 000000000..d199874b1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX4_ICM_H
+#define MLX4_ICM_H
+
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/mutex.h>
+
+#define MLX4_ICM_CHUNK_LEN \
+ ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \
+ (sizeof(struct scatterlist)))
+
+enum {
+ MLX4_ICM_PAGE_SHIFT = 12,
+ MLX4_ICM_PAGE_SIZE = 1 << MLX4_ICM_PAGE_SHIFT,
+};
+
+struct mlx4_icm_buf {
+ void *addr;
+ size_t size;
+ dma_addr_t dma_addr;
+};
+
+struct mlx4_icm_chunk {
+ struct list_head list;
+ int npages;
+ int nsg;
+ bool coherent;
+ union {
+ struct scatterlist sg[MLX4_ICM_CHUNK_LEN];
+ struct mlx4_icm_buf buf[MLX4_ICM_CHUNK_LEN];
+ };
+};
+
+struct mlx4_icm {
+ struct list_head chunk_list;
+ int refcount;
+};
+
+struct mlx4_icm_iter {
+ struct mlx4_icm *icm;
+ struct mlx4_icm_chunk *chunk;
+ int page_idx;
+};
+
+struct mlx4_dev;
+
+struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
+ gfp_t gfp_mask, int coherent);
+void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent);
+
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
+void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
+int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
+ u32 start, u32 end);
+void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
+ u32 start, u32 end);
+int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
+ u64 virt, int obj_size, u32 nobj, int reserved,
+ int use_lowmem, int use_coherent);
+void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table);
+void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj, dma_addr_t *dma_handle);
+
+static inline void mlx4_icm_first(struct mlx4_icm *icm,
+ struct mlx4_icm_iter *iter)
+{
+ iter->icm = icm;
+ iter->chunk = list_empty(&icm->chunk_list) ?
+ NULL : list_entry(icm->chunk_list.next,
+ struct mlx4_icm_chunk, list);
+ iter->page_idx = 0;
+}
+
+static inline int mlx4_icm_last(struct mlx4_icm_iter *iter)
+{
+ return !iter->chunk;
+}
+
+static inline void mlx4_icm_next(struct mlx4_icm_iter *iter)
+{
+ if (++iter->page_idx >= iter->chunk->nsg) {
+ if (iter->chunk->list.next == &iter->icm->chunk_list) {
+ iter->chunk = NULL;
+ return;
+ }
+
+ iter->chunk = list_entry(iter->chunk->list.next,
+ struct mlx4_icm_chunk, list);
+ iter->page_idx = 0;
+ }
+}
+
+static inline dma_addr_t mlx4_icm_addr(struct mlx4_icm_iter *iter)
+{
+ if (iter->chunk->coherent)
+ return iter->chunk->buf[iter->page_idx].dma_addr;
+ else
+ return sg_dma_address(&iter->chunk->sg[iter->page_idx]);
+}
+
+static inline unsigned long mlx4_icm_size(struct mlx4_icm_iter *iter)
+{
+ if (iter->chunk->coherent)
+ return iter->chunk->buf[iter->page_idx].size;
+ else
+ return sg_dma_len(&iter->chunk->sg[iter->page_idx]);
+}
+
+int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm);
+int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev);
+
+#endif /* MLX4_ICM_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
new file mode 100644
index 000000000..65482f004
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <net/devlink.h>
+
+#include "mlx4.h"
+
+struct mlx4_device_context {
+ struct list_head list;
+ struct list_head bond_list;
+ struct mlx4_interface *intf;
+ void *context;
+};
+
+static LIST_HEAD(intf_list);
+static LIST_HEAD(dev_list);
+static DEFINE_MUTEX(intf_mutex);
+
+static void mlx4_add_device(struct mlx4_interface *intf, struct mlx4_priv *priv)
+{
+ struct mlx4_device_context *dev_ctx;
+
+ dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL);
+ if (!dev_ctx)
+ return;
+
+ dev_ctx->intf = intf;
+ dev_ctx->context = intf->add(&priv->dev);
+
+ if (dev_ctx->context) {
+ spin_lock_irq(&priv->ctx_lock);
+ list_add_tail(&dev_ctx->list, &priv->ctx_list);
+ spin_unlock_irq(&priv->ctx_lock);
+ if (intf->activate)
+ intf->activate(&priv->dev, dev_ctx->context);
+ } else
+ kfree(dev_ctx);
+
+}
+
+static void mlx4_remove_device(struct mlx4_interface *intf, struct mlx4_priv *priv)
+{
+ struct mlx4_device_context *dev_ctx;
+
+ list_for_each_entry(dev_ctx, &priv->ctx_list, list)
+ if (dev_ctx->intf == intf) {
+ spin_lock_irq(&priv->ctx_lock);
+ list_del(&dev_ctx->list);
+ spin_unlock_irq(&priv->ctx_lock);
+
+ intf->remove(&priv->dev, dev_ctx->context);
+ kfree(dev_ctx);
+ return;
+ }
+}
+
+int mlx4_register_interface(struct mlx4_interface *intf)
+{
+ struct mlx4_priv *priv;
+
+ if (!intf->add || !intf->remove)
+ return -EINVAL;
+
+ mutex_lock(&intf_mutex);
+
+ list_add_tail(&intf->list, &intf_list);
+ list_for_each_entry(priv, &dev_list, dev_list) {
+ if (mlx4_is_mfunc(&priv->dev) && (intf->flags & MLX4_INTFF_BONDING)) {
+ mlx4_dbg(&priv->dev,
+ "SRIOV, disabling HA mode for intf proto %d\n", intf->protocol);
+ intf->flags &= ~MLX4_INTFF_BONDING;
+ }
+ mlx4_add_device(intf, priv);
+ }
+
+ mutex_unlock(&intf_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_register_interface);
+
+void mlx4_unregister_interface(struct mlx4_interface *intf)
+{
+ struct mlx4_priv *priv;
+
+ mutex_lock(&intf_mutex);
+
+ list_for_each_entry(priv, &dev_list, dev_list)
+ mlx4_remove_device(intf, priv);
+
+ list_del(&intf->list);
+
+ mutex_unlock(&intf_mutex);
+}
+EXPORT_SYMBOL_GPL(mlx4_unregister_interface);
+
+int mlx4_do_bond(struct mlx4_dev *dev, bool enable)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_device_context *dev_ctx = NULL, *temp_dev_ctx;
+ unsigned long flags;
+ int ret;
+ LIST_HEAD(bond_list);
+
+ if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
+ return -EOPNOTSUPP;
+
+ ret = mlx4_disable_rx_port_check(dev, enable);
+ if (ret) {
+ mlx4_err(dev, "Fail to %s rx port check\n",
+ enable ? "enable" : "disable");
+ return ret;
+ }
+ if (enable) {
+ dev->flags |= MLX4_FLAG_BONDED;
+ } else {
+ ret = mlx4_virt2phy_port_map(dev, 1, 2);
+ if (ret) {
+ mlx4_err(dev, "Fail to reset port map\n");
+ return ret;
+ }
+ dev->flags &= ~MLX4_FLAG_BONDED;
+ }
+
+ spin_lock_irqsave(&priv->ctx_lock, flags);
+ list_for_each_entry_safe(dev_ctx, temp_dev_ctx, &priv->ctx_list, list) {
+ if (dev_ctx->intf->flags & MLX4_INTFF_BONDING) {
+ list_add_tail(&dev_ctx->bond_list, &bond_list);
+ list_del(&dev_ctx->list);
+ }
+ }
+ spin_unlock_irqrestore(&priv->ctx_lock, flags);
+
+ list_for_each_entry(dev_ctx, &bond_list, bond_list) {
+ dev_ctx->intf->remove(dev, dev_ctx->context);
+ dev_ctx->context = dev_ctx->intf->add(dev);
+
+ spin_lock_irqsave(&priv->ctx_lock, flags);
+ list_add_tail(&dev_ctx->list, &priv->ctx_list);
+ spin_unlock_irqrestore(&priv->ctx_lock, flags);
+
+ mlx4_dbg(dev, "Interface for protocol %d restarted with bonded mode %s\n",
+ dev_ctx->intf->protocol, enable ?
+ "enabled" : "disabled");
+ }
+ return 0;
+}
+
+void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type,
+ unsigned long param)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_device_context *dev_ctx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->ctx_lock, flags);
+
+ list_for_each_entry(dev_ctx, &priv->ctx_list, list)
+ if (dev_ctx->intf->event)
+ dev_ctx->intf->event(dev, dev_ctx->context, type, param);
+
+ spin_unlock_irqrestore(&priv->ctx_lock, flags);
+}
+
+int mlx4_register_device(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_interface *intf;
+
+ mutex_lock(&intf_mutex);
+
+ dev->persist->interface_state |= MLX4_INTERFACE_STATE_UP;
+ list_add_tail(&priv->dev_list, &dev_list);
+ list_for_each_entry(intf, &intf_list, list)
+ mlx4_add_device(intf, priv);
+
+ mutex_unlock(&intf_mutex);
+ mlx4_start_catas_poll(dev);
+
+ return 0;
+}
+
+void mlx4_unregister_device(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_interface *intf;
+
+ if (!(dev->persist->interface_state & MLX4_INTERFACE_STATE_UP))
+ return;
+
+ mlx4_stop_catas_poll(dev);
+ if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION &&
+ mlx4_is_slave(dev)) {
+ /* In mlx4_remove_one on a VF */
+ u32 slave_read =
+ swab32(readl(&mlx4_priv(dev)->mfunc.comm->slave_read));
+
+ if (mlx4_comm_internal_err(slave_read)) {
+ mlx4_dbg(dev, "%s: comm channel is down, entering error state.\n",
+ __func__);
+ mlx4_enter_error_state(dev->persist);
+ }
+ }
+ mutex_lock(&intf_mutex);
+
+ list_for_each_entry(intf, &intf_list, list)
+ mlx4_remove_device(intf, priv);
+
+ list_del(&priv->dev_list);
+ dev->persist->interface_state &= ~MLX4_INTERFACE_STATE_UP;
+
+ mutex_unlock(&intf_mutex);
+}
+
+void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_device_context *dev_ctx;
+ unsigned long flags;
+ void *result = NULL;
+
+ spin_lock_irqsave(&priv->ctx_lock, flags);
+
+ list_for_each_entry(dev_ctx, &priv->ctx_list, list)
+ if (dev_ctx->intf->protocol == proto && dev_ctx->intf->get_dev) {
+ result = dev_ctx->intf->get_dev(dev, dev_ctx->context, port);
+ break;
+ }
+
+ spin_unlock_irqrestore(&priv->ctx_lock, flags);
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_protocol_dev);
+
+struct devlink_port *mlx4_get_devlink_port(struct mlx4_dev *dev, int port)
+{
+ struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+
+ return &info->devlink_port;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_devlink_port);
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
new file mode 100644
index 000000000..d9707d47f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -0,0 +1,4462 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/io-mapping.h>
+#include <linux/delay.h>
+#include <linux/kmod.h>
+#include <linux/etherdevice.h>
+#include <net/devlink.h>
+
+#include <uapi/rdma/mlx4-abi.h>
+#include <linux/mlx4/device.h>
+#include <linux/mlx4/doorbell.h>
+
+#include "mlx4.h"
+#include "fw.h"
+#include "icm.h"
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+struct workqueue_struct *mlx4_wq;
+
+#ifdef CONFIG_MLX4_DEBUG
+
+int mlx4_debug_level = 0;
+module_param_named(debug_level, mlx4_debug_level, int, 0644);
+MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
+
+#endif /* CONFIG_MLX4_DEBUG */
+
+#ifdef CONFIG_PCI_MSI
+
+static int msi_x = 1;
+module_param(msi_x, int, 0444);
+MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x");
+
+#else /* CONFIG_PCI_MSI */
+
+#define msi_x (0)
+
+#endif /* CONFIG_PCI_MSI */
+
+static uint8_t num_vfs[3] = {0, 0, 0};
+static int num_vfs_argc;
+module_param_array(num_vfs, byte , &num_vfs_argc, 0444);
+MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n"
+ "num_vfs=port1,port2,port1+2");
+
+static uint8_t probe_vf[3] = {0, 0, 0};
+static int probe_vfs_argc;
+module_param_array(probe_vf, byte, &probe_vfs_argc, 0444);
+MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n"
+ "probe_vf=port1,port2,port1+2");
+
+static int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
+module_param_named(log_num_mgm_entry_size,
+ mlx4_log_num_mgm_entry_size, int, 0444);
+MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
+ " of qp per mcg, for example:"
+ " 10 gives 248.range: 7 <="
+ " log_num_mgm_entry_size <= 12."
+ " To activate device managed"
+ " flow steering when available, set to -1");
+
+static bool enable_64b_cqe_eqe = true;
+module_param(enable_64b_cqe_eqe, bool, 0444);
+MODULE_PARM_DESC(enable_64b_cqe_eqe,
+ "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
+
+static bool enable_4k_uar;
+module_param(enable_4k_uar, bool, 0444);
+MODULE_PARM_DESC(enable_4k_uar,
+ "Enable using 4K UAR. Should not be enabled if have VFs which do not support 4K UARs (default: false)");
+
+#define PF_CONTEXT_BEHAVIOUR_MASK (MLX4_FUNC_CAP_64B_EQE_CQE | \
+ MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
+ MLX4_FUNC_CAP_DMFS_A0_STATIC)
+
+#define RESET_PERSIST_MASK_FLAGS (MLX4_FLAG_SRIOV)
+
+static char mlx4_version[] =
+ DRV_NAME ": Mellanox ConnectX core driver v"
+ DRV_VERSION "\n";
+
+static const struct mlx4_profile default_profile = {
+ .num_qp = 1 << 18,
+ .num_srq = 1 << 16,
+ .rdmarc_per_qp = 1 << 4,
+ .num_cq = 1 << 16,
+ .num_mcg = 1 << 13,
+ .num_mpt = 1 << 19,
+ .num_mtt = 1 << 20, /* It is really num mtt segements */
+};
+
+static const struct mlx4_profile low_mem_profile = {
+ .num_qp = 1 << 17,
+ .num_srq = 1 << 6,
+ .rdmarc_per_qp = 1 << 4,
+ .num_cq = 1 << 8,
+ .num_mcg = 1 << 8,
+ .num_mpt = 1 << 9,
+ .num_mtt = 1 << 7,
+};
+
+static int log_num_mac = 7;
+module_param_named(log_num_mac, log_num_mac, int, 0444);
+MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
+
+static int log_num_vlan;
+module_param_named(log_num_vlan, log_num_vlan, int, 0444);
+MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
+/* Log2 max number of VLANs per ETH port (0-7) */
+#define MLX4_LOG_NUM_VLANS 7
+#define MLX4_MIN_LOG_NUM_VLANS 0
+#define MLX4_MIN_LOG_NUM_MAC 1
+
+static bool use_prio;
+module_param_named(use_prio, use_prio, bool, 0444);
+MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)");
+
+int log_mtts_per_seg = ilog2(1);
+module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
+MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment "
+ "(0-7) (default: 0)");
+
+static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
+static int arr_argc = 2;
+module_param_array(port_type_array, int, &arr_argc, 0444);
+MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
+ "1 for IB, 2 for Ethernet");
+
+struct mlx4_port_config {
+ struct list_head list;
+ enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
+ struct pci_dev *pdev;
+};
+
+static atomic_t pf_loading = ATOMIC_INIT(0);
+
+static int mlx4_devlink_ierr_reset_get(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ ctx->val.vbool = !!mlx4_internal_err_reset;
+ return 0;
+}
+
+static int mlx4_devlink_ierr_reset_set(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ mlx4_internal_err_reset = ctx->val.vbool;
+ return 0;
+}
+
+static int mlx4_devlink_crdump_snapshot_get(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx4_priv *priv = devlink_priv(devlink);
+ struct mlx4_dev *dev = &priv->dev;
+
+ ctx->val.vbool = dev->persist->crdump.snapshot_enable;
+ return 0;
+}
+
+static int mlx4_devlink_crdump_snapshot_set(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx4_priv *priv = devlink_priv(devlink);
+ struct mlx4_dev *dev = &priv->dev;
+
+ dev->persist->crdump.snapshot_enable = ctx->val.vbool;
+ return 0;
+}
+
+static int
+mlx4_devlink_max_macs_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ u32 value = val.vu32;
+
+ if (value < 1 || value > 128)
+ return -ERANGE;
+
+ if (!is_power_of_2(value)) {
+ NL_SET_ERR_MSG_MOD(extack, "max_macs supported must be power of 2");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+enum mlx4_devlink_param_id {
+ MLX4_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+ MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
+ MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
+};
+
+static const struct devlink_param mlx4_devlink_params[] = {
+ DEVLINK_PARAM_GENERIC(INT_ERR_RESET,
+ BIT(DEVLINK_PARAM_CMODE_RUNTIME) |
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ mlx4_devlink_ierr_reset_get,
+ mlx4_devlink_ierr_reset_set, NULL),
+ DEVLINK_PARAM_GENERIC(MAX_MACS,
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, mlx4_devlink_max_macs_validate),
+ DEVLINK_PARAM_GENERIC(REGION_SNAPSHOT,
+ BIT(DEVLINK_PARAM_CMODE_RUNTIME) |
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ mlx4_devlink_crdump_snapshot_get,
+ mlx4_devlink_crdump_snapshot_set, NULL),
+ DEVLINK_PARAM_DRIVER(MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
+ "enable_64b_cqe_eqe", DEVLINK_PARAM_TYPE_BOOL,
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, NULL),
+ DEVLINK_PARAM_DRIVER(MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
+ "enable_4k_uar", DEVLINK_PARAM_TYPE_BOOL,
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+ NULL, NULL, NULL),
+};
+
+static void mlx4_devlink_set_params_init_values(struct devlink *devlink)
+{
+ union devlink_param_value value;
+
+ value.vbool = !!mlx4_internal_err_reset;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
+ value);
+
+ value.vu32 = 1UL << log_num_mac;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+ value);
+
+ value.vbool = enable_64b_cqe_eqe;
+ devlink_param_driverinit_value_set(devlink,
+ MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
+ value);
+
+ value.vbool = enable_4k_uar;
+ devlink_param_driverinit_value_set(devlink,
+ MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
+ value);
+
+ value.vbool = false;
+ devlink_param_driverinit_value_set(devlink,
+ DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+ value);
+}
+
+static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev,
+ struct mlx4_dev_cap *dev_cap)
+{
+ /* The reserved_uars is calculated by system page size unit.
+ * Therefore, adjustment is added when the uar page size is less
+ * than the system page size
+ */
+ dev->caps.reserved_uars =
+ max_t(int,
+ mlx4_get_num_reserved_uar(dev),
+ dev_cap->reserved_uars /
+ (1 << (PAGE_SHIFT - dev->uar_page_shift)));
+}
+
+int mlx4_check_port_params(struct mlx4_dev *dev,
+ enum mlx4_port_type *port_type)
+{
+ int i;
+
+ if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
+ for (i = 0; i < dev->caps.num_ports - 1; i++) {
+ if (port_type[i] != port_type[i + 1]) {
+ mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
+ return -EOPNOTSUPP;
+ }
+ }
+ }
+
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ if (!(port_type[i] & dev->caps.supported_type[i+1])) {
+ mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n",
+ i + 1);
+ return -EOPNOTSUPP;
+ }
+ }
+ return 0;
+}
+
+static void mlx4_set_port_mask(struct mlx4_dev *dev)
+{
+ int i;
+
+ for (i = 1; i <= dev->caps.num_ports; ++i)
+ dev->caps.port_mask[i] = dev->caps.port_type[i];
+}
+
+enum {
+ MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0,
+};
+
+static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
+{
+ int err = 0;
+ struct mlx4_func func;
+
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
+ err = mlx4_QUERY_FUNC(dev, &func, 0);
+ if (err) {
+ mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+ return err;
+ }
+ dev_cap->max_eqs = func.max_eq;
+ dev_cap->reserved_eqs = func.rsvd_eqs;
+ dev_cap->reserved_uars = func.rsvd_uars;
+ err |= MLX4_QUERY_FUNC_NUM_SYS_EQS;
+ }
+ return err;
+}
+
+static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
+{
+ struct mlx4_caps *dev_cap = &dev->caps;
+
+ /* FW not supporting or cancelled by user */
+ if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) ||
+ !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE))
+ return;
+
+ /* Must have 64B CQE_EQE enabled by FW to use bigger stride
+ * When FW has NCSI it may decide not to report 64B CQE/EQEs
+ */
+ if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) ||
+ !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) {
+ dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
+ dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
+ return;
+ }
+
+ if (cache_line_size() == 128 || cache_line_size() == 256) {
+ mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n");
+ /* Changing the real data inside CQE size to 32B */
+ dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
+ dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
+
+ if (mlx4_is_master(dev))
+ dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE;
+ } else {
+ if (cache_line_size() != 32 && cache_line_size() != 64)
+ mlx4_dbg(dev, "Disabling CQE stride, cacheLine size unsupported\n");
+ dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
+ dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
+ }
+}
+
+static int _mlx4_dev_port(struct mlx4_dev *dev, int port,
+ struct mlx4_port_cap *port_cap)
+{
+ dev->caps.vl_cap[port] = port_cap->max_vl;
+ dev->caps.ib_mtu_cap[port] = port_cap->ib_mtu;
+ dev->phys_caps.gid_phys_table_len[port] = port_cap->max_gids;
+ dev->phys_caps.pkey_phys_table_len[port] = port_cap->max_pkeys;
+ /* set gid and pkey table operating lengths by default
+ * to non-sriov values
+ */
+ dev->caps.gid_table_len[port] = port_cap->max_gids;
+ dev->caps.pkey_table_len[port] = port_cap->max_pkeys;
+ dev->caps.port_width_cap[port] = port_cap->max_port_width;
+ dev->caps.eth_mtu_cap[port] = port_cap->eth_mtu;
+ dev->caps.max_tc_eth = port_cap->max_tc_eth;
+ dev->caps.def_mac[port] = port_cap->def_mac;
+ dev->caps.supported_type[port] = port_cap->supported_port_types;
+ dev->caps.suggested_type[port] = port_cap->suggested_type;
+ dev->caps.default_sense[port] = port_cap->default_sense;
+ dev->caps.trans_type[port] = port_cap->trans_type;
+ dev->caps.vendor_oui[port] = port_cap->vendor_oui;
+ dev->caps.wavelength[port] = port_cap->wavelength;
+ dev->caps.trans_code[port] = port_cap->trans_code;
+
+ return 0;
+}
+
+static int mlx4_dev_port(struct mlx4_dev *dev, int port,
+ struct mlx4_port_cap *port_cap)
+{
+ int err = 0;
+
+ err = mlx4_QUERY_PORT(dev, port, port_cap);
+
+ if (err)
+ mlx4_err(dev, "QUERY_PORT command failed.\n");
+
+ return err;
+}
+
+static inline void mlx4_enable_ignore_fcs(struct mlx4_dev *dev)
+{
+ if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS))
+ return;
+
+ if (mlx4_is_mfunc(dev)) {
+ mlx4_dbg(dev, "SRIOV mode - Disabling Ignore FCS");
+ dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
+ return;
+ }
+
+ if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
+ mlx4_dbg(dev,
+ "Keep FCS is not supported - Disabling Ignore FCS");
+ dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
+ return;
+ }
+}
+
+#define MLX4_A0_STEERING_TABLE_SIZE 256
+static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
+{
+ int err;
+ int i;
+
+ err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
+ return err;
+ }
+ mlx4_dev_cap_dump(dev, dev_cap);
+
+ if (dev_cap->min_page_sz > PAGE_SIZE) {
+ mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
+ dev_cap->min_page_sz, PAGE_SIZE);
+ return -ENODEV;
+ }
+ if (dev_cap->num_ports > MLX4_MAX_PORTS) {
+ mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
+ dev_cap->num_ports, MLX4_MAX_PORTS);
+ return -ENODEV;
+ }
+
+ if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) {
+ mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
+ dev_cap->uar_size,
+ (unsigned long long)
+ pci_resource_len(dev->persist->pdev, 2));
+ return -ENODEV;
+ }
+
+ dev->caps.num_ports = dev_cap->num_ports;
+ dev->caps.num_sys_eqs = dev_cap->num_sys_eqs;
+ dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ?
+ dev->caps.num_sys_eqs :
+ MLX4_MAX_EQ_NUM;
+ for (i = 1; i <= dev->caps.num_ports; ++i) {
+ err = _mlx4_dev_port(dev, i, dev_cap->port_cap + i);
+ if (err) {
+ mlx4_err(dev, "QUERY_PORT command failed, aborting\n");
+ return err;
+ }
+ }
+
+ dev->caps.map_clock_to_user = dev_cap->map_clock_to_user;
+ dev->caps.uar_page_size = PAGE_SIZE;
+ dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE;
+ dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
+ dev->caps.bf_reg_size = dev_cap->bf_reg_size;
+ dev->caps.bf_regs_per_page = dev_cap->bf_regs_per_page;
+ dev->caps.max_sq_sg = dev_cap->max_sq_sg;
+ dev->caps.max_rq_sg = dev_cap->max_rq_sg;
+ dev->caps.max_wqes = dev_cap->max_qp_sz;
+ dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp;
+ dev->caps.max_srq_wqes = dev_cap->max_srq_sz;
+ dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1;
+ dev->caps.reserved_srqs = dev_cap->reserved_srqs;
+ dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz;
+ dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz;
+ /*
+ * Subtract 1 from the limit because we need to allocate a
+ * spare CQE so the HCA HW can tell the difference between an
+ * empty CQ and a full CQ.
+ */
+ dev->caps.max_cqes = dev_cap->max_cq_sz - 1;
+ dev->caps.reserved_cqs = dev_cap->reserved_cqs;
+ dev->caps.reserved_eqs = dev_cap->reserved_eqs;
+ dev->caps.reserved_mtts = dev_cap->reserved_mtts;
+ dev->caps.reserved_mrws = dev_cap->reserved_mrws;
+
+ dev->caps.reserved_pds = dev_cap->reserved_pds;
+ dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
+ dev_cap->reserved_xrcds : 0;
+ dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
+ dev_cap->max_xrcds : 0;
+ dev->caps.mtt_entry_sz = dev_cap->mtt_entry_sz;
+
+ dev->caps.max_msg_sz = dev_cap->max_msg_sz;
+ dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1);
+ dev->caps.flags = dev_cap->flags;
+ dev->caps.flags2 = dev_cap->flags2;
+ dev->caps.bmme_flags = dev_cap->bmme_flags;
+ dev->caps.reserved_lkey = dev_cap->reserved_lkey;
+ dev->caps.stat_rate_support = dev_cap->stat_rate_support;
+ dev->caps.max_gso_sz = dev_cap->max_gso_sz;
+ dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz;
+ dev->caps.wol_port[1] = dev_cap->wol_port[1];
+ dev->caps.wol_port[2] = dev_cap->wol_port[2];
+ dev->caps.health_buffer_addrs = dev_cap->health_buffer_addrs;
+
+ /* Save uar page shift */
+ if (!mlx4_is_slave(dev)) {
+ /* Virtual PCI function needs to determine UAR page size from
+ * firmware. Only master PCI function can set the uar page size
+ */
+ if (enable_4k_uar || !dev->persist->num_vfs)
+ dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT;
+ else
+ dev->uar_page_shift = PAGE_SHIFT;
+
+ mlx4_set_num_reserved_uars(dev, dev_cap);
+ }
+
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) {
+ struct mlx4_init_hca_param hca_param;
+
+ memset(&hca_param, 0, sizeof(hca_param));
+ err = mlx4_QUERY_HCA(dev, &hca_param);
+ /* Turn off PHV_EN flag in case phv_check_en is set.
+ * phv_check_en is a HW check that parse the packet and verify
+ * phv bit was reported correctly in the wqe. To allow QinQ
+ * PHV_EN flag should be set and phv_check_en must be cleared
+ * otherwise QinQ packets will be drop by the HW.
+ */
+ if (err || hca_param.phv_check_en)
+ dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_PHV_EN;
+ }
+
+ /* Sense port always allowed on supported devices for ConnectX-1 and -2 */
+ if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
+ dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
+ /* Don't do sense port on multifunction devices (for now at least) */
+ if (mlx4_is_mfunc(dev))
+ dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
+
+ if (mlx4_low_memory_profile()) {
+ dev->caps.log_num_macs = MLX4_MIN_LOG_NUM_MAC;
+ dev->caps.log_num_vlans = MLX4_MIN_LOG_NUM_VLANS;
+ } else {
+ dev->caps.log_num_macs = log_num_mac;
+ dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
+ }
+
+ for (i = 1; i <= dev->caps.num_ports; ++i) {
+ dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
+ if (dev->caps.supported_type[i]) {
+ /* if only ETH is supported - assign ETH */
+ if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
+ dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
+ /* if only IB is supported, assign IB */
+ else if (dev->caps.supported_type[i] ==
+ MLX4_PORT_TYPE_IB)
+ dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
+ else {
+ /* if IB and ETH are supported, we set the port
+ * type according to user selection of port type;
+ * if user selected none, take the FW hint */
+ if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE)
+ dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
+ MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
+ else
+ dev->caps.port_type[i] = port_type_array[i - 1];
+ }
+ }
+ /*
+ * Link sensing is allowed on the port if 3 conditions are true:
+ * 1. Both protocols are supported on the port.
+ * 2. Different types are supported on the port
+ * 3. FW declared that it supports link sensing
+ */
+ mlx4_priv(dev)->sense.sense_allowed[i] =
+ ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
+ (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
+ (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
+
+ /*
+ * If "default_sense" bit is set, we move the port to "AUTO" mode
+ * and perform sense_port FW command to try and set the correct
+ * port type from beginning
+ */
+ if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
+ enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
+ dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
+ mlx4_SENSE_PORT(dev, i, &sensed_port);
+ if (sensed_port != MLX4_PORT_TYPE_NONE)
+ dev->caps.port_type[i] = sensed_port;
+ } else {
+ dev->caps.possible_type[i] = dev->caps.port_type[i];
+ }
+
+ if (dev->caps.log_num_macs > dev_cap->port_cap[i].log_max_macs) {
+ dev->caps.log_num_macs = dev_cap->port_cap[i].log_max_macs;
+ mlx4_warn(dev, "Requested number of MACs is too much for port %d, reducing to %d\n",
+ i, 1 << dev->caps.log_num_macs);
+ }
+ if (dev->caps.log_num_vlans > dev_cap->port_cap[i].log_max_vlans) {
+ dev->caps.log_num_vlans = dev_cap->port_cap[i].log_max_vlans;
+ mlx4_warn(dev, "Requested number of VLANs is too much for port %d, reducing to %d\n",
+ i, 1 << dev->caps.log_num_vlans);
+ }
+ }
+
+ if (mlx4_is_master(dev) && (dev->caps.num_ports == 2) &&
+ (port_type_array[0] == MLX4_PORT_TYPE_IB) &&
+ (port_type_array[1] == MLX4_PORT_TYPE_ETH)) {
+ mlx4_warn(dev,
+ "Granular QoS per VF not supported with IB/Eth configuration\n");
+ dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_QOS_VPP;
+ }
+
+ dev->caps.max_counters = dev_cap->max_counters;
+
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
+ (1 << dev->caps.log_num_macs) *
+ (1 << dev->caps.log_num_vlans) *
+ dev->caps.num_ports;
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
+
+ if (dev_cap->dmfs_high_rate_qpn_base > 0 &&
+ dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)
+ dev->caps.dmfs_high_rate_qpn_base = dev_cap->dmfs_high_rate_qpn_base;
+ else
+ dev->caps.dmfs_high_rate_qpn_base =
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
+
+ if (dev_cap->dmfs_high_rate_qpn_range > 0 &&
+ dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) {
+ dev->caps.dmfs_high_rate_qpn_range = dev_cap->dmfs_high_rate_qpn_range;
+ dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DEFAULT;
+ dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_FS_A0;
+ } else {
+ dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_NOT_SUPPORTED;
+ dev->caps.dmfs_high_rate_qpn_base =
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
+ dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE;
+ }
+
+ dev->caps.rl_caps = dev_cap->rl_caps;
+
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
+ dev->caps.dmfs_high_rate_qpn_range;
+
+ dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
+
+ dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
+
+ if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) {
+ if (dev_cap->flags &
+ (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
+ mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
+ dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
+ dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
+ }
+
+ if (dev_cap->flags2 &
+ (MLX4_DEV_CAP_FLAG2_CQE_STRIDE |
+ MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) {
+ mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n");
+ dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
+ dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
+ }
+ }
+
+ if ((dev->caps.flags &
+ (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
+ mlx4_is_master(dev))
+ dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
+
+ if (!mlx4_is_slave(dev)) {
+ mlx4_enable_cqe_eqe_stride(dev);
+ dev->caps.alloc_res_qp_mask =
+ (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) |
+ MLX4_RESERVE_A0_QP;
+
+ if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) &&
+ dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) {
+ mlx4_warn(dev, "Old device ETS support detected\n");
+ mlx4_warn(dev, "Consider upgrading device FW.\n");
+ dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG;
+ }
+
+ } else {
+ dev->caps.alloc_res_qp_mask = 0;
+ }
+
+ mlx4_enable_ignore_fcs(dev);
+
+ return 0;
+}
+
+/*The function checks if there are live vf, return the num of them*/
+static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_state;
+ int i;
+ int ret = 0;
+
+ for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
+ s_state = &priv->mfunc.master.slave_state[i];
+ if (s_state->active && s_state->last_cmd !=
+ MLX4_COMM_CMD_RESET) {
+ mlx4_warn(dev, "%s: slave: %d is still active\n",
+ __func__, i);
+ ret++;
+ }
+ }
+ return ret;
+}
+
+int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
+{
+ u32 qk = MLX4_RESERVED_QKEY_BASE;
+
+ if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
+ qpn < dev->phys_caps.base_proxy_sqpn)
+ return -EINVAL;
+
+ if (qpn >= dev->phys_caps.base_tunnel_sqpn)
+ /* tunnel qp */
+ qk += qpn - dev->phys_caps.base_tunnel_sqpn;
+ else
+ qk += qpn - dev->phys_caps.base_proxy_sqpn;
+ *qkey = qk;
+ return 0;
+}
+EXPORT_SYMBOL(mlx4_get_parav_qkey);
+
+void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
+{
+ struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
+
+ if (!mlx4_is_master(dev))
+ return;
+
+ priv->virt2phys_pkey[slave][port - 1][i] = val;
+}
+EXPORT_SYMBOL(mlx4_sync_pkey_table);
+
+void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
+{
+ struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
+
+ if (!mlx4_is_master(dev))
+ return;
+
+ priv->slave_node_guids[slave] = guid;
+}
+EXPORT_SYMBOL(mlx4_put_slave_node_guid);
+
+__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
+
+ if (!mlx4_is_master(dev))
+ return 0;
+
+ return priv->slave_node_guids[slave];
+}
+EXPORT_SYMBOL(mlx4_get_slave_node_guid);
+
+int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *s_slave;
+
+ if (!mlx4_is_master(dev))
+ return 0;
+
+ s_slave = &priv->mfunc.master.slave_state[slave];
+ return !!s_slave->active;
+}
+EXPORT_SYMBOL(mlx4_is_slave_active);
+
+void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
+ struct _rule_hw *eth_header)
+{
+ if (is_multicast_ether_addr(eth_header->eth.dst_mac) ||
+ is_broadcast_ether_addr(eth_header->eth.dst_mac)) {
+ struct mlx4_net_trans_rule_hw_eth *eth =
+ (struct mlx4_net_trans_rule_hw_eth *)eth_header;
+ struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1);
+ bool last_rule = next_rule->size == 0 && next_rule->id == 0 &&
+ next_rule->rsvd == 0;
+
+ if (last_rule)
+ ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC);
+ }
+}
+EXPORT_SYMBOL(mlx4_handle_eth_header_mcast_prio);
+
+static void slave_adjust_steering_mode(struct mlx4_dev *dev,
+ struct mlx4_dev_cap *dev_cap,
+ struct mlx4_init_hca_param *hca_param)
+{
+ dev->caps.steering_mode = hca_param->steering_mode;
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
+ dev->caps.fs_log_max_ucast_qp_range_size =
+ dev_cap->fs_log_max_ucast_qp_range_size;
+ } else
+ dev->caps.num_qp_per_mgm =
+ 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);
+
+ mlx4_dbg(dev, "Steering mode is: %s\n",
+ mlx4_steering_mode_str(dev->caps.steering_mode));
+}
+
+static void mlx4_slave_destroy_special_qp_cap(struct mlx4_dev *dev)
+{
+ kfree(dev->caps.spec_qps);
+ dev->caps.spec_qps = NULL;
+}
+
+static int mlx4_slave_special_qp_cap(struct mlx4_dev *dev)
+{
+ struct mlx4_func_cap *func_cap = NULL;
+ struct mlx4_caps *caps = &dev->caps;
+ int i, err = 0;
+
+ func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL);
+ caps->spec_qps = kcalloc(caps->num_ports, sizeof(*caps->spec_qps), GFP_KERNEL);
+
+ if (!func_cap || !caps->spec_qps) {
+ mlx4_err(dev, "Failed to allocate memory for special qps cap\n");
+ err = -ENOMEM;
+ goto err_mem;
+ }
+
+ for (i = 1; i <= caps->num_ports; ++i) {
+ err = mlx4_QUERY_FUNC_CAP(dev, i, func_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n",
+ i, err);
+ goto err_mem;
+ }
+ caps->spec_qps[i - 1] = func_cap->spec_qps;
+ caps->port_mask[i] = caps->port_type[i];
+ caps->phys_port_id[i] = func_cap->phys_port_id;
+ err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
+ &caps->gid_table_len[i],
+ &caps->pkey_table_len[i]);
+ if (err) {
+ mlx4_err(dev, "QUERY_PORT command failed for port %d, aborting (%d)\n",
+ i, err);
+ goto err_mem;
+ }
+ }
+
+err_mem:
+ if (err)
+ mlx4_slave_destroy_special_qp_cap(dev);
+ kfree(func_cap);
+ return err;
+}
+
+static int mlx4_slave_cap(struct mlx4_dev *dev)
+{
+ int err;
+ u32 page_size;
+ struct mlx4_dev_cap *dev_cap = NULL;
+ struct mlx4_func_cap *func_cap = NULL;
+ struct mlx4_init_hca_param *hca_param = NULL;
+
+ hca_param = kzalloc(sizeof(*hca_param), GFP_KERNEL);
+ func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL);
+ dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
+ if (!hca_param || !func_cap || !dev_cap) {
+ mlx4_err(dev, "Failed to allocate memory for slave_cap\n");
+ err = -ENOMEM;
+ goto free_mem;
+ }
+
+ err = mlx4_QUERY_HCA(dev, hca_param);
+ if (err) {
+ mlx4_err(dev, "QUERY_HCA command failed, aborting\n");
+ goto free_mem;
+ }
+
+ /* fail if the hca has an unknown global capability
+ * at this time global_caps should be always zeroed
+ */
+ if (hca_param->global_caps) {
+ mlx4_err(dev, "Unknown hca global capabilities\n");
+ err = -EINVAL;
+ goto free_mem;
+ }
+
+ dev->caps.hca_core_clock = hca_param->hca_core_clock;
+
+ dev->caps.max_qp_dest_rdma = 1 << hca_param->log_rd_per_qp;
+ err = mlx4_dev_cap(dev, dev_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
+ goto free_mem;
+ }
+
+ err = mlx4_QUERY_FW(dev);
+ if (err)
+ mlx4_err(dev, "QUERY_FW command failed: could not get FW version\n");
+
+ page_size = ~dev->caps.page_size_cap + 1;
+ mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
+ if (page_size > PAGE_SIZE) {
+ mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
+ page_size, PAGE_SIZE);
+ err = -ENODEV;
+ goto free_mem;
+ }
+
+ /* Set uar_page_shift for VF */
+ dev->uar_page_shift = hca_param->uar_page_sz + 12;
+
+ /* Make sure the master uar page size is valid */
+ if (dev->uar_page_shift > PAGE_SHIFT) {
+ mlx4_err(dev,
+ "Invalid configuration: uar page size is larger than system page size\n");
+ err = -ENODEV;
+ goto free_mem;
+ }
+
+ /* Set reserved_uars based on the uar_page_shift */
+ mlx4_set_num_reserved_uars(dev, dev_cap);
+
+ /* Although uar page size in FW differs from system page size,
+ * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core)
+ * still works with assumption that uar page size == system page size
+ */
+ dev->caps.uar_page_size = PAGE_SIZE;
+
+ err = mlx4_QUERY_FUNC_CAP(dev, 0, func_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n",
+ err);
+ goto free_mem;
+ }
+
+ if ((func_cap->pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
+ PF_CONTEXT_BEHAVIOUR_MASK) {
+ mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n",
+ func_cap->pf_context_behaviour,
+ PF_CONTEXT_BEHAVIOUR_MASK);
+ err = -EINVAL;
+ goto free_mem;
+ }
+
+ dev->caps.num_ports = func_cap->num_ports;
+ dev->quotas.qp = func_cap->qp_quota;
+ dev->quotas.srq = func_cap->srq_quota;
+ dev->quotas.cq = func_cap->cq_quota;
+ dev->quotas.mpt = func_cap->mpt_quota;
+ dev->quotas.mtt = func_cap->mtt_quota;
+ dev->caps.num_qps = 1 << hca_param->log_num_qps;
+ dev->caps.num_srqs = 1 << hca_param->log_num_srqs;
+ dev->caps.num_cqs = 1 << hca_param->log_num_cqs;
+ dev->caps.num_mpts = 1 << hca_param->log_mpt_sz;
+ dev->caps.num_eqs = func_cap->max_eq;
+ dev->caps.reserved_eqs = func_cap->reserved_eq;
+ dev->caps.reserved_lkey = func_cap->reserved_lkey;
+ dev->caps.num_pds = MLX4_NUM_PDS;
+ dev->caps.num_mgms = 0;
+ dev->caps.num_amgms = 0;
+
+ if (dev->caps.num_ports > MLX4_MAX_PORTS) {
+ mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
+ dev->caps.num_ports, MLX4_MAX_PORTS);
+ err = -ENODEV;
+ goto free_mem;
+ }
+
+ mlx4_replace_zero_macs(dev);
+
+ err = mlx4_slave_special_qp_cap(dev);
+ if (err) {
+ mlx4_err(dev, "Set special QP caps failed. aborting\n");
+ goto free_mem;
+ }
+
+ if (dev->caps.uar_page_size * (dev->caps.num_uars -
+ dev->caps.reserved_uars) >
+ pci_resource_len(dev->persist->pdev,
+ 2)) {
+ mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
+ dev->caps.uar_page_size * dev->caps.num_uars,
+ (unsigned long long)
+ pci_resource_len(dev->persist->pdev, 2));
+ err = -ENOMEM;
+ goto err_mem;
+ }
+
+ if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
+ dev->caps.eqe_size = 64;
+ dev->caps.eqe_factor = 1;
+ } else {
+ dev->caps.eqe_size = 32;
+ dev->caps.eqe_factor = 0;
+ }
+
+ if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
+ dev->caps.cqe_size = 64;
+ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
+ } else {
+ dev->caps.cqe_size = 32;
+ }
+
+ if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
+ dev->caps.eqe_size = hca_param->eqe_size;
+ dev->caps.eqe_factor = 0;
+ }
+
+ if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
+ dev->caps.cqe_size = hca_param->cqe_size;
+ /* User still need to know when CQE > 32B */
+ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
+ }
+
+ dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
+ mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
+
+ dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_USER_MAC_EN;
+ mlx4_dbg(dev, "User MAC FW update is not supported in slave mode\n");
+
+ slave_adjust_steering_mode(dev, dev_cap, hca_param);
+ mlx4_dbg(dev, "RSS support for IP fragments is %s\n",
+ hca_param->rss_ip_frags ? "on" : "off");
+
+ if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
+ dev->caps.bf_reg_size)
+ dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;
+
+ if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP)
+ dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP;
+
+err_mem:
+ if (err)
+ mlx4_slave_destroy_special_qp_cap(dev);
+free_mem:
+ kfree(hca_param);
+ kfree(func_cap);
+ kfree(dev_cap);
+ return err;
+}
+
+static void mlx4_request_modules(struct mlx4_dev *dev)
+{
+ int port;
+ int has_ib_port = false;
+ int has_eth_port = false;
+#define EN_DRV_NAME "mlx4_en"
+#define IB_DRV_NAME "mlx4_ib"
+
+ for (port = 1; port <= dev->caps.num_ports; port++) {
+ if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
+ has_ib_port = true;
+ else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
+ has_eth_port = true;
+ }
+
+ if (has_eth_port)
+ request_module_nowait(EN_DRV_NAME);
+ if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
+ request_module_nowait(IB_DRV_NAME);
+}
+
+/*
+ * Change the port configuration of the device.
+ * Every user of this function must hold the port mutex.
+ */
+int mlx4_change_port_types(struct mlx4_dev *dev,
+ enum mlx4_port_type *port_types)
+{
+ int err = 0;
+ int change = 0;
+ int port;
+
+ for (port = 0; port < dev->caps.num_ports; port++) {
+ /* Change the port type only if the new type is different
+ * from the current, and not set to Auto */
+ if (port_types[port] != dev->caps.port_type[port + 1])
+ change = 1;
+ }
+ if (change) {
+ mlx4_unregister_device(dev);
+ for (port = 1; port <= dev->caps.num_ports; port++) {
+ mlx4_CLOSE_PORT(dev, port);
+ dev->caps.port_type[port] = port_types[port - 1];
+ err = mlx4_SET_PORT(dev, port, -1);
+ if (err) {
+ mlx4_err(dev, "Failed to set port %d, aborting\n",
+ port);
+ goto out;
+ }
+ }
+ mlx4_set_port_mask(dev);
+ err = mlx4_register_device(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to register device\n");
+ goto out;
+ }
+ mlx4_request_modules(dev);
+ }
+
+out:
+ return err;
+}
+
+static ssize_t show_port_type(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
+ port_attr);
+ struct mlx4_dev *mdev = info->dev;
+ char type[8];
+
+ sprintf(type, "%s",
+ (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
+ "ib" : "eth");
+ if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
+ sprintf(buf, "auto (%s)\n", type);
+ else
+ sprintf(buf, "%s\n", type);
+
+ return strlen(buf);
+}
+
+static int __set_port_type(struct mlx4_port_info *info,
+ enum mlx4_port_type port_type)
+{
+ struct mlx4_dev *mdev = info->dev;
+ struct mlx4_priv *priv = mlx4_priv(mdev);
+ enum mlx4_port_type types[MLX4_MAX_PORTS];
+ enum mlx4_port_type new_types[MLX4_MAX_PORTS];
+ int i;
+ int err = 0;
+
+ if ((port_type & mdev->caps.supported_type[info->port]) != port_type) {
+ mlx4_err(mdev,
+ "Requested port type for port %d is not supported on this HCA\n",
+ info->port);
+ return -EOPNOTSUPP;
+ }
+
+ mlx4_stop_sense(mdev);
+ mutex_lock(&priv->port_mutex);
+ info->tmp_type = port_type;
+
+ /* Possible type is always the one that was delivered */
+ mdev->caps.possible_type[info->port] = info->tmp_type;
+
+ for (i = 0; i < mdev->caps.num_ports; i++) {
+ types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
+ mdev->caps.possible_type[i+1];
+ if (types[i] == MLX4_PORT_TYPE_AUTO)
+ types[i] = mdev->caps.port_type[i+1];
+ }
+
+ if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
+ !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
+ for (i = 1; i <= mdev->caps.num_ports; i++) {
+ if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
+ mdev->caps.possible_type[i] = mdev->caps.port_type[i];
+ err = -EOPNOTSUPP;
+ }
+ }
+ }
+ if (err) {
+ mlx4_err(mdev, "Auto sensing is not supported on this HCA. Set only 'eth' or 'ib' for both ports (should be the same)\n");
+ goto out;
+ }
+
+ mlx4_do_sense_ports(mdev, new_types, types);
+
+ err = mlx4_check_port_params(mdev, new_types);
+ if (err)
+ goto out;
+
+ /* We are about to apply the changes after the configuration
+ * was verified, no need to remember the temporary types
+ * any more */
+ for (i = 0; i < mdev->caps.num_ports; i++)
+ priv->port[i + 1].tmp_type = 0;
+
+ err = mlx4_change_port_types(mdev, new_types);
+
+out:
+ mlx4_start_sense(mdev);
+ mutex_unlock(&priv->port_mutex);
+
+ return err;
+}
+
+static ssize_t set_port_type(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
+ port_attr);
+ struct mlx4_dev *mdev = info->dev;
+ enum mlx4_port_type port_type;
+ static DEFINE_MUTEX(set_port_type_mutex);
+ int err;
+
+ mutex_lock(&set_port_type_mutex);
+
+ if (!strcmp(buf, "ib\n")) {
+ port_type = MLX4_PORT_TYPE_IB;
+ } else if (!strcmp(buf, "eth\n")) {
+ port_type = MLX4_PORT_TYPE_ETH;
+ } else if (!strcmp(buf, "auto\n")) {
+ port_type = MLX4_PORT_TYPE_AUTO;
+ } else {
+ mlx4_err(mdev, "%s is not supported port type\n", buf);
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ err = __set_port_type(info, port_type);
+
+err_out:
+ mutex_unlock(&set_port_type_mutex);
+
+ return err ? err : count;
+}
+
+enum ibta_mtu {
+ IB_MTU_256 = 1,
+ IB_MTU_512 = 2,
+ IB_MTU_1024 = 3,
+ IB_MTU_2048 = 4,
+ IB_MTU_4096 = 5
+};
+
+static inline int int_to_ibta_mtu(int mtu)
+{
+ switch (mtu) {
+ case 256: return IB_MTU_256;
+ case 512: return IB_MTU_512;
+ case 1024: return IB_MTU_1024;
+ case 2048: return IB_MTU_2048;
+ case 4096: return IB_MTU_4096;
+ default: return -1;
+ }
+}
+
+static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
+{
+ switch (mtu) {
+ case IB_MTU_256: return 256;
+ case IB_MTU_512: return 512;
+ case IB_MTU_1024: return 1024;
+ case IB_MTU_2048: return 2048;
+ case IB_MTU_4096: return 4096;
+ default: return -1;
+ }
+}
+
+static ssize_t show_port_ib_mtu(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
+ port_mtu_attr);
+ struct mlx4_dev *mdev = info->dev;
+
+ if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
+ mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
+
+ sprintf(buf, "%d\n",
+ ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
+ return strlen(buf);
+}
+
+static ssize_t set_port_ib_mtu(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
+ port_mtu_attr);
+ struct mlx4_dev *mdev = info->dev;
+ struct mlx4_priv *priv = mlx4_priv(mdev);
+ int err, port, mtu, ibta_mtu = -1;
+
+ if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
+ mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
+ return -EINVAL;
+ }
+
+ err = kstrtoint(buf, 0, &mtu);
+ if (!err)
+ ibta_mtu = int_to_ibta_mtu(mtu);
+
+ if (err || ibta_mtu < 0) {
+ mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
+ return -EINVAL;
+ }
+
+ mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
+
+ mlx4_stop_sense(mdev);
+ mutex_lock(&priv->port_mutex);
+ mlx4_unregister_device(mdev);
+ for (port = 1; port <= mdev->caps.num_ports; port++) {
+ mlx4_CLOSE_PORT(mdev, port);
+ err = mlx4_SET_PORT(mdev, port, -1);
+ if (err) {
+ mlx4_err(mdev, "Failed to set port %d, aborting\n",
+ port);
+ goto err_set_port;
+ }
+ }
+ err = mlx4_register_device(mdev);
+err_set_port:
+ mutex_unlock(&priv->port_mutex);
+ mlx4_start_sense(mdev);
+ return err ? err : count;
+}
+
+/* bond for multi-function device */
+#define MAX_MF_BOND_ALLOWED_SLAVES 63
+static int mlx4_mf_bond(struct mlx4_dev *dev)
+{
+ int err = 0;
+ int nvfs;
+ struct mlx4_slaves_pport slaves_port1;
+ struct mlx4_slaves_pport slaves_port2;
+ DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX);
+
+ slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1);
+ slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2);
+ bitmap_and(slaves_port_1_2,
+ slaves_port1.slaves, slaves_port2.slaves,
+ dev->persist->num_vfs + 1);
+
+ /* only single port vfs are allowed */
+ if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) {
+ mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n");
+ return -EINVAL;
+ }
+
+ /* number of virtual functions is number of total functions minus one
+ * physical function for each port.
+ */
+ nvfs = bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) +
+ bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1) - 2;
+
+ /* limit on maximum allowed VFs */
+ if (nvfs > MAX_MF_BOND_ALLOWED_SLAVES) {
+ mlx4_warn(dev, "HA mode is not supported for %d VFs (max %d are allowed)\n",
+ nvfs, MAX_MF_BOND_ALLOWED_SLAVES);
+ return -EINVAL;
+ }
+
+ if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n");
+ return -EINVAL;
+ }
+
+ err = mlx4_bond_mac_table(dev);
+ if (err)
+ return err;
+ err = mlx4_bond_vlan_table(dev);
+ if (err)
+ goto err1;
+ err = mlx4_bond_fs_rules(dev);
+ if (err)
+ goto err2;
+
+ return 0;
+err2:
+ (void)mlx4_unbond_vlan_table(dev);
+err1:
+ (void)mlx4_unbond_mac_table(dev);
+ return err;
+}
+
+static int mlx4_mf_unbond(struct mlx4_dev *dev)
+{
+ int ret, ret1;
+
+ ret = mlx4_unbond_fs_rules(dev);
+ if (ret)
+ mlx4_warn(dev, "multifunction unbond for flow rules failed (%d)\n", ret);
+ ret1 = mlx4_unbond_mac_table(dev);
+ if (ret1) {
+ mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1);
+ ret = ret1;
+ }
+ ret1 = mlx4_unbond_vlan_table(dev);
+ if (ret1) {
+ mlx4_warn(dev, "multifunction unbond for VLAN table failed (%d)\n", ret1);
+ ret = ret1;
+ }
+ return ret;
+}
+
+int mlx4_bond(struct mlx4_dev *dev)
+{
+ int ret = 0;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ mutex_lock(&priv->bond_mutex);
+
+ if (!mlx4_is_bonded(dev)) {
+ ret = mlx4_do_bond(dev, true);
+ if (ret)
+ mlx4_err(dev, "Failed to bond device: %d\n", ret);
+ if (!ret && mlx4_is_master(dev)) {
+ ret = mlx4_mf_bond(dev);
+ if (ret) {
+ mlx4_err(dev, "bond for multifunction failed\n");
+ mlx4_do_bond(dev, false);
+ }
+ }
+ }
+
+ mutex_unlock(&priv->bond_mutex);
+ if (!ret)
+ mlx4_dbg(dev, "Device is bonded\n");
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_bond);
+
+int mlx4_unbond(struct mlx4_dev *dev)
+{
+ int ret = 0;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ mutex_lock(&priv->bond_mutex);
+
+ if (mlx4_is_bonded(dev)) {
+ int ret2 = 0;
+
+ ret = mlx4_do_bond(dev, false);
+ if (ret)
+ mlx4_err(dev, "Failed to unbond device: %d\n", ret);
+ if (mlx4_is_master(dev))
+ ret2 = mlx4_mf_unbond(dev);
+ if (ret2) {
+ mlx4_warn(dev, "Failed to unbond device for multifunction (%d)\n", ret2);
+ ret = ret2;
+ }
+ }
+
+ mutex_unlock(&priv->bond_mutex);
+ if (!ret)
+ mlx4_dbg(dev, "Device is unbonded\n");
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_unbond);
+
+
+int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p)
+{
+ u8 port1 = v2p->port1;
+ u8 port2 = v2p->port2;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int err;
+
+ if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&priv->bond_mutex);
+
+ /* zero means keep current mapping for this port */
+ if (port1 == 0)
+ port1 = priv->v2p.port1;
+ if (port2 == 0)
+ port2 = priv->v2p.port2;
+
+ if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) ||
+ (port2 < 1) || (port2 > MLX4_MAX_PORTS) ||
+ (port1 == 2 && port2 == 1)) {
+ /* besides boundary checks cross mapping makes
+ * no sense and therefore not allowed */
+ err = -EINVAL;
+ } else if ((port1 == priv->v2p.port1) &&
+ (port2 == priv->v2p.port2)) {
+ err = 0;
+ } else {
+ err = mlx4_virt2phy_port_map(dev, port1, port2);
+ if (!err) {
+ mlx4_dbg(dev, "port map changed: [%d][%d]\n",
+ port1, port2);
+ priv->v2p.port1 = port1;
+ priv->v2p.port2 = port2;
+ } else {
+ mlx4_err(dev, "Failed to change port mape: %d\n", err);
+ }
+ }
+
+ mutex_unlock(&priv->bond_mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_port_map_set);
+
+static int mlx4_load_fw(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int err;
+
+ priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
+ GFP_HIGHUSER | __GFP_NOWARN, 0);
+ if (!priv->fw.fw_icm) {
+ mlx4_err(dev, "Couldn't allocate FW area, aborting\n");
+ return -ENOMEM;
+ }
+
+ err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
+ if (err) {
+ mlx4_err(dev, "MAP_FA command failed, aborting\n");
+ goto err_free;
+ }
+
+ err = mlx4_RUN_FW(dev);
+ if (err) {
+ mlx4_err(dev, "RUN_FW command failed, aborting\n");
+ goto err_unmap_fa;
+ }
+
+ return 0;
+
+err_unmap_fa:
+ mlx4_UNMAP_FA(dev);
+
+err_free:
+ mlx4_free_icm(dev, priv->fw.fw_icm, 0);
+ return err;
+}
+
+static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
+ int cmpt_entry_sz)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int err;
+ int num_eqs;
+
+ err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
+ cmpt_base +
+ ((u64) (MLX4_CMPT_TYPE_QP *
+ cmpt_entry_sz) << MLX4_CMPT_SHIFT),
+ cmpt_entry_sz, dev->caps.num_qps,
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
+ 0, 0);
+ if (err)
+ goto err;
+
+ err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
+ cmpt_base +
+ ((u64) (MLX4_CMPT_TYPE_SRQ *
+ cmpt_entry_sz) << MLX4_CMPT_SHIFT),
+ cmpt_entry_sz, dev->caps.num_srqs,
+ dev->caps.reserved_srqs, 0, 0);
+ if (err)
+ goto err_qp;
+
+ err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
+ cmpt_base +
+ ((u64) (MLX4_CMPT_TYPE_CQ *
+ cmpt_entry_sz) << MLX4_CMPT_SHIFT),
+ cmpt_entry_sz, dev->caps.num_cqs,
+ dev->caps.reserved_cqs, 0, 0);
+ if (err)
+ goto err_srq;
+
+ num_eqs = dev->phys_caps.num_phys_eqs;
+ err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
+ cmpt_base +
+ ((u64) (MLX4_CMPT_TYPE_EQ *
+ cmpt_entry_sz) << MLX4_CMPT_SHIFT),
+ cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
+ if (err)
+ goto err_cq;
+
+ return 0;
+
+err_cq:
+ mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
+
+err_srq:
+ mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
+
+err_qp:
+ mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
+
+err:
+ return err;
+}
+
+static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
+ struct mlx4_init_hca_param *init_hca, u64 icm_size)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u64 aux_pages;
+ int num_eqs;
+ int err;
+
+ err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
+ if (err) {
+ mlx4_err(dev, "SET_ICM_SIZE command failed, aborting\n");
+ return err;
+ }
+
+ mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory\n",
+ (unsigned long long) icm_size >> 10,
+ (unsigned long long) aux_pages << 2);
+
+ priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
+ GFP_HIGHUSER | __GFP_NOWARN, 0);
+ if (!priv->fw.aux_icm) {
+ mlx4_err(dev, "Couldn't allocate aux memory, aborting\n");
+ return -ENOMEM;
+ }
+
+ err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
+ if (err) {
+ mlx4_err(dev, "MAP_ICM_AUX command failed, aborting\n");
+ goto err_free_aux;
+ }
+
+ err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
+ if (err) {
+ mlx4_err(dev, "Failed to map cMPT context memory, aborting\n");
+ goto err_unmap_aux;
+ }
+
+
+ num_eqs = dev->phys_caps.num_phys_eqs;
+ err = mlx4_init_icm_table(dev, &priv->eq_table.table,
+ init_hca->eqc_base, dev_cap->eqc_entry_sz,
+ num_eqs, num_eqs, 0, 0);
+ if (err) {
+ mlx4_err(dev, "Failed to map EQ context memory, aborting\n");
+ goto err_unmap_cmpt;
+ }
+
+ /*
+ * Reserved MTT entries must be aligned up to a cacheline
+ * boundary, since the FW will write to them, while the driver
+ * writes to all other MTT entries. (The variable
+ * dev->caps.mtt_entry_sz below is really the MTT segment
+ * size, not the raw entry size)
+ */
+ dev->caps.reserved_mtts =
+ ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
+ dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
+
+ err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
+ init_hca->mtt_base,
+ dev->caps.mtt_entry_sz,
+ dev->caps.num_mtts,
+ dev->caps.reserved_mtts, 1, 0);
+ if (err) {
+ mlx4_err(dev, "Failed to map MTT context memory, aborting\n");
+ goto err_unmap_eq;
+ }
+
+ err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
+ init_hca->dmpt_base,
+ dev_cap->dmpt_entry_sz,
+ dev->caps.num_mpts,
+ dev->caps.reserved_mrws, 1, 1);
+ if (err) {
+ mlx4_err(dev, "Failed to map dMPT context memory, aborting\n");
+ goto err_unmap_mtt;
+ }
+
+ err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
+ init_hca->qpc_base,
+ dev_cap->qpc_entry_sz,
+ dev->caps.num_qps,
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
+ 0, 0);
+ if (err) {
+ mlx4_err(dev, "Failed to map QP context memory, aborting\n");
+ goto err_unmap_dmpt;
+ }
+
+ err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
+ init_hca->auxc_base,
+ dev_cap->aux_entry_sz,
+ dev->caps.num_qps,
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
+ 0, 0);
+ if (err) {
+ mlx4_err(dev, "Failed to map AUXC context memory, aborting\n");
+ goto err_unmap_qp;
+ }
+
+ err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
+ init_hca->altc_base,
+ dev_cap->altc_entry_sz,
+ dev->caps.num_qps,
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
+ 0, 0);
+ if (err) {
+ mlx4_err(dev, "Failed to map ALTC context memory, aborting\n");
+ goto err_unmap_auxc;
+ }
+
+ err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
+ init_hca->rdmarc_base,
+ dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
+ dev->caps.num_qps,
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
+ 0, 0);
+ if (err) {
+ mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
+ goto err_unmap_altc;
+ }
+
+ err = mlx4_init_icm_table(dev, &priv->cq_table.table,
+ init_hca->cqc_base,
+ dev_cap->cqc_entry_sz,
+ dev->caps.num_cqs,
+ dev->caps.reserved_cqs, 0, 0);
+ if (err) {
+ mlx4_err(dev, "Failed to map CQ context memory, aborting\n");
+ goto err_unmap_rdmarc;
+ }
+
+ err = mlx4_init_icm_table(dev, &priv->srq_table.table,
+ init_hca->srqc_base,
+ dev_cap->srq_entry_sz,
+ dev->caps.num_srqs,
+ dev->caps.reserved_srqs, 0, 0);
+ if (err) {
+ mlx4_err(dev, "Failed to map SRQ context memory, aborting\n");
+ goto err_unmap_cq;
+ }
+
+ /*
+ * For flow steering device managed mode it is required to use
+ * mlx4_init_icm_table. For B0 steering mode it's not strictly
+ * required, but for simplicity just map the whole multicast
+ * group table now. The table isn't very big and it's a lot
+ * easier than trying to track ref counts.
+ */
+ err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
+ init_hca->mc_base,
+ mlx4_get_mgm_entry_size(dev),
+ dev->caps.num_mgms + dev->caps.num_amgms,
+ dev->caps.num_mgms + dev->caps.num_amgms,
+ 0, 0);
+ if (err) {
+ mlx4_err(dev, "Failed to map MCG context memory, aborting\n");
+ goto err_unmap_srq;
+ }
+
+ return 0;
+
+err_unmap_srq:
+ mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
+
+err_unmap_cq:
+ mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
+
+err_unmap_rdmarc:
+ mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
+
+err_unmap_altc:
+ mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
+
+err_unmap_auxc:
+ mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
+
+err_unmap_qp:
+ mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
+
+err_unmap_dmpt:
+ mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
+
+err_unmap_mtt:
+ mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
+
+err_unmap_eq:
+ mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
+
+err_unmap_cmpt:
+ mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
+ mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
+ mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
+ mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
+
+err_unmap_aux:
+ mlx4_UNMAP_ICM_AUX(dev);
+
+err_free_aux:
+ mlx4_free_icm(dev, priv->fw.aux_icm, 0);
+
+ return err;
+}
+
+static void mlx4_free_icms(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
+ mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
+ mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
+ mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
+ mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
+ mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
+ mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
+ mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
+ mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
+ mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
+ mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
+ mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
+ mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
+ mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
+
+ mlx4_UNMAP_ICM_AUX(dev);
+ mlx4_free_icm(dev, priv->fw.aux_icm, 0);
+}
+
+static void mlx4_slave_exit(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ mutex_lock(&priv->cmd.slave_cmd_mutex);
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP,
+ MLX4_COMM_TIME))
+ mlx4_warn(dev, "Failed to close slave function\n");
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+}
+
+static int map_bf_area(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ resource_size_t bf_start;
+ resource_size_t bf_len;
+ int err = 0;
+
+ if (!dev->caps.bf_reg_size)
+ return -ENXIO;
+
+ bf_start = pci_resource_start(dev->persist->pdev, 2) +
+ (dev->caps.num_uars << PAGE_SHIFT);
+ bf_len = pci_resource_len(dev->persist->pdev, 2) -
+ (dev->caps.num_uars << PAGE_SHIFT);
+ priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
+ if (!priv->bf_mapping)
+ err = -ENOMEM;
+
+ return err;
+}
+
+static void unmap_bf_area(struct mlx4_dev *dev)
+{
+ if (mlx4_priv(dev)->bf_mapping)
+ io_mapping_free(mlx4_priv(dev)->bf_mapping);
+}
+
+u64 mlx4_read_clock(struct mlx4_dev *dev)
+{
+ u32 clockhi, clocklo, clockhi1;
+ u64 cycles;
+ int i;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ for (i = 0; i < 10; i++) {
+ clockhi = swab32(readl(priv->clock_mapping));
+ clocklo = swab32(readl(priv->clock_mapping + 4));
+ clockhi1 = swab32(readl(priv->clock_mapping));
+ if (clockhi == clockhi1)
+ break;
+ }
+
+ cycles = (u64) clockhi << 32 | (u64) clocklo;
+
+ return cycles;
+}
+EXPORT_SYMBOL_GPL(mlx4_read_clock);
+
+
+static int map_internal_clock(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ priv->clock_mapping =
+ ioremap(pci_resource_start(dev->persist->pdev,
+ priv->fw.clock_bar) +
+ priv->fw.clock_offset, MLX4_CLOCK_SIZE);
+
+ if (!priv->clock_mapping)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
+ struct mlx4_clock_params *params)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (mlx4_is_slave(dev))
+ return -EOPNOTSUPP;
+
+ if (!dev->caps.map_clock_to_user) {
+ mlx4_dbg(dev, "Map clock to user is not supported.\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (!params)
+ return -EINVAL;
+
+ params->bar = priv->fw.clock_bar;
+ params->offset = priv->fw.clock_offset;
+ params->size = MLX4_CLOCK_SIZE;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params);
+
+static void unmap_internal_clock(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (priv->clock_mapping)
+ iounmap(priv->clock_mapping);
+}
+
+static void mlx4_close_hca(struct mlx4_dev *dev)
+{
+ unmap_internal_clock(dev);
+ unmap_bf_area(dev);
+ if (mlx4_is_slave(dev))
+ mlx4_slave_exit(dev);
+ else {
+ mlx4_CLOSE_HCA(dev, 0);
+ mlx4_free_icms(dev);
+ }
+}
+
+static void mlx4_close_fw(struct mlx4_dev *dev)
+{
+ if (!mlx4_is_slave(dev)) {
+ mlx4_UNMAP_FA(dev);
+ mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
+ }
+}
+
+static int mlx4_comm_check_offline(struct mlx4_dev *dev)
+{
+#define COMM_CHAN_OFFLINE_OFFSET 0x09
+
+ u32 comm_flags;
+ u32 offline_bit;
+ unsigned long end;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies;
+ while (time_before(jiffies, end)) {
+ comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
+ MLX4_COMM_CHAN_FLAGS));
+ offline_bit = (comm_flags &
+ (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
+ if (!offline_bit)
+ return 0;
+
+ /* If device removal has been requested,
+ * do not continue retrying.
+ */
+ if (dev->persist->interface_state &
+ MLX4_INTERFACE_STATE_NOWAIT)
+ break;
+
+ /* There are cases as part of AER/Reset flow that PF needs
+ * around 100 msec to load. We therefore sleep for 100 msec
+ * to allow other tasks to make use of that CPU during this
+ * time interval.
+ */
+ msleep(100);
+ }
+ mlx4_err(dev, "Communication channel is offline.\n");
+ return -EIO;
+}
+
+static void mlx4_reset_vf_support(struct mlx4_dev *dev)
+{
+#define COMM_CHAN_RST_OFFSET 0x1e
+
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u32 comm_rst;
+ u32 comm_caps;
+
+ comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm +
+ MLX4_COMM_CHAN_CAPS));
+ comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET));
+
+ if (comm_rst)
+ dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET;
+}
+
+static int mlx4_init_slave(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ u64 dma = (u64) priv->mfunc.vhcr_dma;
+ int ret_from_reset = 0;
+ u32 slave_read;
+ u32 cmd_channel_ver;
+
+ if (atomic_read(&pf_loading)) {
+ mlx4_warn(dev, "PF is not ready - Deferring probe\n");
+ return -EPROBE_DEFER;
+ }
+
+ mutex_lock(&priv->cmd.slave_cmd_mutex);
+ priv->cmd.max_cmds = 1;
+ if (mlx4_comm_check_offline(dev)) {
+ mlx4_err(dev, "PF is not responsive, skipping initialization\n");
+ goto err_offline;
+ }
+
+ mlx4_reset_vf_support(dev);
+ mlx4_warn(dev, "Sending reset\n");
+ ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
+ MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME);
+ /* if we are in the middle of flr the slave will try
+ * NUM_OF_RESET_RETRIES times before leaving.*/
+ if (ret_from_reset) {
+ if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
+ mlx4_warn(dev, "slave is currently in the middle of FLR - Deferring probe\n");
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ return -EPROBE_DEFER;
+ } else
+ goto err;
+ }
+
+ /* check the driver version - the slave I/F revision
+ * must match the master's */
+ slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
+ cmd_channel_ver = mlx4_comm_get_version();
+
+ if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
+ MLX4_COMM_GET_IF_REV(slave_read)) {
+ mlx4_err(dev, "slave driver version is not supported by the master\n");
+ goto err;
+ }
+
+ mlx4_warn(dev, "Sending vhcr0\n");
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
+ MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
+ goto err;
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
+ MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
+ goto err;
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
+ MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
+ goto err;
+ if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma,
+ MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
+ goto err;
+
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ return 0;
+
+err:
+ mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0);
+err_offline:
+ mutex_unlock(&priv->cmd.slave_cmd_mutex);
+ return -EIO;
+}
+
+static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
+{
+ int i;
+
+ for (i = 1; i <= dev->caps.num_ports; i++) {
+ if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
+ dev->caps.gid_table_len[i] =
+ mlx4_get_slave_num_gids(dev, 0, i);
+ else
+ dev->caps.gid_table_len[i] = 1;
+ dev->caps.pkey_table_len[i] =
+ dev->phys_caps.pkey_phys_table_len[i] - 1;
+ }
+}
+
+static int choose_log_fs_mgm_entry_size(int qp_per_entry)
+{
+ int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
+
+ for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
+ i++) {
+ if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
+ break;
+ }
+
+ return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
+}
+
+static const char *dmfs_high_rate_steering_mode_str(int dmfs_high_steer_mode)
+{
+ switch (dmfs_high_steer_mode) {
+ case MLX4_STEERING_DMFS_A0_DEFAULT:
+ return "default performance";
+
+ case MLX4_STEERING_DMFS_A0_DYNAMIC:
+ return "dynamic hybrid mode";
+
+ case MLX4_STEERING_DMFS_A0_STATIC:
+ return "performance optimized for limited rule configuration (static)";
+
+ case MLX4_STEERING_DMFS_A0_DISABLE:
+ return "disabled performance optimized steering";
+
+ case MLX4_STEERING_DMFS_A0_NOT_SUPPORTED:
+ return "performance optimized steering not supported";
+
+ default:
+ return "Unrecognized mode";
+ }
+}
+
+#define MLX4_DMFS_A0_STEERING (1UL << 2)
+
+static void choose_steering_mode(struct mlx4_dev *dev,
+ struct mlx4_dev_cap *dev_cap)
+{
+ if (mlx4_log_num_mgm_entry_size <= 0) {
+ if ((-mlx4_log_num_mgm_entry_size) & MLX4_DMFS_A0_STEERING) {
+ if (dev->caps.dmfs_high_steer_mode ==
+ MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
+ mlx4_err(dev, "DMFS high rate mode not supported\n");
+ else
+ dev->caps.dmfs_high_steer_mode =
+ MLX4_STEERING_DMFS_A0_STATIC;
+ }
+ }
+
+ if (mlx4_log_num_mgm_entry_size <= 0 &&
+ dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
+ (!mlx4_is_mfunc(dev) ||
+ (dev_cap->fs_max_num_qp_per_entry >=
+ (dev->persist->num_vfs + 1))) &&
+ choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
+ MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
+ dev->oper_log_mgm_entry_size =
+ choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
+ dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
+ dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
+ dev->caps.fs_log_max_ucast_qp_range_size =
+ dev_cap->fs_log_max_ucast_qp_range_size;
+ } else {
+ if (dev->caps.dmfs_high_steer_mode !=
+ MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
+ dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DISABLE;
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
+ dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
+ dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
+ else {
+ dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
+
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
+ dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
+ mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags set to use B0 steering - falling back to A0 steering mode\n");
+ }
+ dev->oper_log_mgm_entry_size =
+ mlx4_log_num_mgm_entry_size > 0 ?
+ mlx4_log_num_mgm_entry_size :
+ MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
+ dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
+ }
+ mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, modparam log_num_mgm_entry_size = %d\n",
+ mlx4_steering_mode_str(dev->caps.steering_mode),
+ dev->oper_log_mgm_entry_size,
+ mlx4_log_num_mgm_entry_size);
+}
+
+static void choose_tunnel_offload_mode(struct mlx4_dev *dev,
+ struct mlx4_dev_cap *dev_cap)
+{
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
+ dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS)
+ dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN;
+ else
+ dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE;
+
+ mlx4_dbg(dev, "Tunneling offload mode is: %s\n", (dev->caps.tunnel_offload_mode
+ == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none");
+}
+
+static int mlx4_validate_optimized_steering(struct mlx4_dev *dev)
+{
+ int i;
+ struct mlx4_port_cap port_cap;
+
+ if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
+ return -EINVAL;
+
+ for (i = 1; i <= dev->caps.num_ports; i++) {
+ if (mlx4_dev_port(dev, i, &port_cap)) {
+ mlx4_err(dev,
+ "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n");
+ } else if ((dev->caps.dmfs_high_steer_mode !=
+ MLX4_STEERING_DMFS_A0_DEFAULT) &&
+ (port_cap.dmfs_optimized_state ==
+ !!(dev->caps.dmfs_high_steer_mode ==
+ MLX4_STEERING_DMFS_A0_DISABLE))) {
+ mlx4_err(dev,
+ "DMFS high rate steer mode differ, driver requested %s but %s in FW.\n",
+ dmfs_high_rate_steering_mode_str(
+ dev->caps.dmfs_high_steer_mode),
+ (port_cap.dmfs_optimized_state ?
+ "enabled" : "disabled"));
+ }
+ }
+
+ return 0;
+}
+
+static int mlx4_init_fw(struct mlx4_dev *dev)
+{
+ struct mlx4_mod_stat_cfg mlx4_cfg;
+ int err = 0;
+
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_QUERY_FW(dev);
+ if (err) {
+ if (err == -EACCES)
+ mlx4_info(dev, "non-primary physical function, skipping\n");
+ else
+ mlx4_err(dev, "QUERY_FW command failed, aborting\n");
+ return err;
+ }
+
+ err = mlx4_load_fw(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to start FW, aborting\n");
+ return err;
+ }
+
+ mlx4_cfg.log_pg_sz_m = 1;
+ mlx4_cfg.log_pg_sz = 0;
+ err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
+ if (err)
+ mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
+ }
+
+ return err;
+}
+
+static int mlx4_init_hca(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_adapter adapter;
+ struct mlx4_dev_cap dev_cap;
+ struct mlx4_profile profile;
+ struct mlx4_init_hca_param init_hca;
+ u64 icm_size;
+ struct mlx4_config_dev_params params;
+ int err;
+
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_dev_cap(dev, &dev_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
+ return err;
+ }
+
+ choose_steering_mode(dev, &dev_cap);
+ choose_tunnel_offload_mode(dev, &dev_cap);
+
+ if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC &&
+ mlx4_is_master(dev))
+ dev->caps.function_caps |= MLX4_FUNC_CAP_DMFS_A0_STATIC;
+
+ err = mlx4_get_phys_port_id(dev);
+ if (err)
+ mlx4_err(dev, "Fail to get physical port id\n");
+
+ if (mlx4_is_master(dev))
+ mlx4_parav_master_pf_caps(dev);
+
+ if (mlx4_low_memory_profile()) {
+ mlx4_info(dev, "Running from within kdump kernel. Using low memory profile\n");
+ profile = low_mem_profile;
+ } else {
+ profile = default_profile;
+ }
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ profile.num_mcg = MLX4_FS_NUM_MCG;
+
+ icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
+ &init_hca);
+ if ((long long) icm_size < 0) {
+ err = icm_size;
+ return err;
+ }
+
+ dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
+
+ if (enable_4k_uar || !dev->persist->num_vfs) {
+ init_hca.log_uar_sz = ilog2(dev->caps.num_uars) +
+ PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT;
+ init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
+ } else {
+ init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
+ init_hca.uar_page_sz = PAGE_SHIFT - 12;
+ }
+
+ init_hca.mw_enabled = 0;
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
+ dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
+ init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE;
+
+ err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
+ if (err)
+ return err;
+
+ err = mlx4_INIT_HCA(dev, &init_hca);
+ if (err) {
+ mlx4_err(dev, "INIT_HCA command failed, aborting\n");
+ goto err_free_icm;
+ }
+
+ if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
+ err = mlx4_query_func(dev, &dev_cap);
+ if (err < 0) {
+ mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
+ goto err_close;
+ } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
+ dev->caps.num_eqs = dev_cap.max_eqs;
+ dev->caps.reserved_eqs = dev_cap.reserved_eqs;
+ dev->caps.reserved_uars = dev_cap.reserved_uars;
+ }
+ }
+
+ /*
+ * If TS is supported by FW
+ * read HCA frequency by QUERY_HCA command
+ */
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
+ memset(&init_hca, 0, sizeof(init_hca));
+ err = mlx4_QUERY_HCA(dev, &init_hca);
+ if (err) {
+ mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n");
+ dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
+ } else {
+ dev->caps.hca_core_clock =
+ init_hca.hca_core_clock;
+ }
+
+ /* In case we got HCA frequency 0 - disable timestamping
+ * to avoid dividing by zero
+ */
+ if (!dev->caps.hca_core_clock) {
+ dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
+ mlx4_err(dev,
+ "HCA frequency is 0 - timestamping is not supported\n");
+ } else if (map_internal_clock(dev)) {
+ /*
+ * Map internal clock,
+ * in case of failure disable timestamping
+ */
+ dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
+ mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported\n");
+ }
+ }
+
+ if (dev->caps.dmfs_high_steer_mode !=
+ MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) {
+ if (mlx4_validate_optimized_steering(dev))
+ mlx4_warn(dev, "Optimized steering validation failed\n");
+
+ if (dev->caps.dmfs_high_steer_mode ==
+ MLX4_STEERING_DMFS_A0_DISABLE) {
+ dev->caps.dmfs_high_rate_qpn_base =
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
+ dev->caps.dmfs_high_rate_qpn_range =
+ MLX4_A0_STEERING_TABLE_SIZE;
+ }
+
+ mlx4_info(dev, "DMFS high rate steer mode is: %s\n",
+ dmfs_high_rate_steering_mode_str(
+ dev->caps.dmfs_high_steer_mode));
+ }
+ } else {
+ err = mlx4_init_slave(dev);
+ if (err) {
+ if (err != -EPROBE_DEFER)
+ mlx4_err(dev, "Failed to initialize slave\n");
+ return err;
+ }
+
+ err = mlx4_slave_cap(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to obtain slave caps\n");
+ goto err_close;
+ }
+ }
+
+ if (map_bf_area(dev))
+ mlx4_dbg(dev, "Failed to map blue flame area\n");
+
+ /*Only the master set the ports, all the rest got it from it.*/
+ if (!mlx4_is_slave(dev))
+ mlx4_set_port_mask(dev);
+
+ err = mlx4_QUERY_ADAPTER(dev, &adapter);
+ if (err) {
+ mlx4_err(dev, "QUERY_ADAPTER command failed, aborting\n");
+ goto unmap_bf;
+ }
+
+ /* Query CONFIG_DEV parameters */
+ err = mlx4_config_dev_retrieval(dev, &params);
+ if (err && err != -EOPNOTSUPP) {
+ mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n");
+ } else if (!err) {
+ dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1;
+ dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2;
+ }
+ priv->eq_table.inta_pin = adapter.inta_pin;
+ memcpy(dev->board_id, adapter.board_id, sizeof(dev->board_id));
+
+ return 0;
+
+unmap_bf:
+ unmap_internal_clock(dev);
+ unmap_bf_area(dev);
+
+ if (mlx4_is_slave(dev))
+ mlx4_slave_destroy_special_qp_cap(dev);
+
+err_close:
+ if (mlx4_is_slave(dev))
+ mlx4_slave_exit(dev);
+ else
+ mlx4_CLOSE_HCA(dev, 0);
+
+err_free_icm:
+ if (!mlx4_is_slave(dev))
+ mlx4_free_icms(dev);
+
+ return err;
+}
+
+static int mlx4_init_counters_table(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int nent_pow2;
+
+ if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
+ return -ENOENT;
+
+ if (!dev->caps.max_counters)
+ return -ENOSPC;
+
+ nent_pow2 = roundup_pow_of_two(dev->caps.max_counters);
+ /* reserve last counter index for sink counter */
+ return mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2,
+ nent_pow2 - 1, 0,
+ nent_pow2 - dev->caps.max_counters + 1);
+}
+
+static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
+{
+ if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
+ return;
+
+ if (!dev->caps.max_counters)
+ return;
+
+ mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
+}
+
+static void mlx4_cleanup_default_counters(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int port;
+
+ for (port = 0; port < dev->caps.num_ports; port++)
+ if (priv->def_counter[port] != -1)
+ mlx4_counter_free(dev, priv->def_counter[port]);
+}
+
+static int mlx4_allocate_default_counters(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int port, err = 0;
+ u32 idx;
+
+ for (port = 0; port < dev->caps.num_ports; port++)
+ priv->def_counter[port] = -1;
+
+ for (port = 0; port < dev->caps.num_ports; port++) {
+ err = mlx4_counter_alloc(dev, &idx, MLX4_RES_USAGE_DRIVER);
+
+ if (!err || err == -ENOSPC) {
+ priv->def_counter[port] = idx;
+ err = 0;
+ } else if (err == -ENOENT) {
+ err = 0;
+ continue;
+ } else if (mlx4_is_slave(dev) && err == -EINVAL) {
+ priv->def_counter[port] = MLX4_SINK_COUNTER_INDEX(dev);
+ mlx4_warn(dev, "can't allocate counter from old PF driver, using index %d\n",
+ MLX4_SINK_COUNTER_INDEX(dev));
+ err = 0;
+ } else {
+ mlx4_err(dev, "%s: failed to allocate default counter port %d err %d\n",
+ __func__, port + 1, err);
+ mlx4_cleanup_default_counters(dev);
+ return err;
+ }
+
+ mlx4_dbg(dev, "%s: default counter index %d for port %d\n",
+ __func__, priv->def_counter[port], port + 1);
+ }
+
+ return err;
+}
+
+int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
+ return -ENOENT;
+
+ *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
+ if (*idx == -1) {
+ *idx = MLX4_SINK_COUNTER_INDEX(dev);
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
+int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage)
+{
+ u32 in_modifier = RES_COUNTER | (((u32)usage & 3) << 30);
+ u64 out_param;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier,
+ RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (!err)
+ *idx = get_param_l(&out_param);
+ if (WARN_ON(err == -ENOSPC))
+ err = -EINVAL;
+ return err;
+ }
+ return __mlx4_counter_alloc(dev, idx);
+}
+EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
+
+static int __mlx4_clear_if_stat(struct mlx4_dev *dev,
+ u8 counter_index)
+{
+ struct mlx4_cmd_mailbox *if_stat_mailbox;
+ int err;
+ u32 if_stat_in_mod = (counter_index & 0xff) | MLX4_QUERY_IF_STAT_RESET;
+
+ if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(if_stat_mailbox))
+ return PTR_ERR(if_stat_mailbox);
+
+ err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0,
+ MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, if_stat_mailbox);
+ return err;
+}
+
+void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
+{
+ if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
+ return;
+
+ if (idx == MLX4_SINK_COUNTER_INDEX(dev))
+ return;
+
+ __mlx4_clear_if_stat(dev, idx);
+
+ mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR);
+ return;
+}
+
+void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
+{
+ u64 in_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, idx);
+ mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE,
+ MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+ return;
+ }
+ __mlx4_counter_free(dev, idx);
+}
+EXPORT_SYMBOL_GPL(mlx4_counter_free);
+
+int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ return priv->def_counter[port - 1];
+}
+EXPORT_SYMBOL_GPL(mlx4_get_default_counter_index);
+
+void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_admin_guid);
+
+__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ return priv->mfunc.master.vf_admin[entry].vport[port].guid;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_admin_guid);
+
+void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ __be64 guid;
+
+ /* hw GUID */
+ if (entry == 0)
+ return;
+
+ get_random_bytes((char *)&guid, sizeof(guid));
+ guid &= ~(cpu_to_be64(1ULL << 56));
+ guid |= cpu_to_be64(1ULL << 57);
+ priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
+}
+
+static int mlx4_setup_hca(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int err;
+ int port;
+ __be32 ib_port_default_caps;
+
+ err = mlx4_init_uar_table(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to initialize user access region table, aborting\n");
+ return err;
+ }
+
+ err = mlx4_uar_alloc(dev, &priv->driver_uar);
+ if (err) {
+ mlx4_err(dev, "Failed to allocate driver access region, aborting\n");
+ goto err_uar_table_free;
+ }
+
+ priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+ if (!priv->kar) {
+ mlx4_err(dev, "Couldn't map kernel access region, aborting\n");
+ err = -ENOMEM;
+ goto err_uar_free;
+ }
+
+ err = mlx4_init_pd_table(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to initialize protection domain table, aborting\n");
+ goto err_kar_unmap;
+ }
+
+ err = mlx4_init_xrcd_table(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to initialize reliable connection domain table, aborting\n");
+ goto err_pd_table_free;
+ }
+
+ err = mlx4_init_mr_table(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to initialize memory region table, aborting\n");
+ goto err_xrcd_table_free;
+ }
+
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_init_mcg_table(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
+ goto err_mr_table_free;
+ }
+ err = mlx4_config_mad_demux(dev);
+ if (err) {
+ mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
+ goto err_mcg_table_free;
+ }
+ }
+
+ err = mlx4_init_eq_table(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to initialize event queue table, aborting\n");
+ goto err_mcg_table_free;
+ }
+
+ err = mlx4_cmd_use_events(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to switch to event-driven firmware commands, aborting\n");
+ goto err_eq_table_free;
+ }
+
+ err = mlx4_NOP(dev);
+ if (err) {
+ if (dev->flags & MLX4_FLAG_MSI_X) {
+ mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n",
+ priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
+ mlx4_warn(dev, "Trying again without MSI-X\n");
+ } else {
+ mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n",
+ priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
+ mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
+ }
+
+ goto err_cmd_poll;
+ }
+
+ mlx4_dbg(dev, "NOP command IRQ test passed\n");
+
+ err = mlx4_init_cq_table(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to initialize completion queue table, aborting\n");
+ goto err_cmd_poll;
+ }
+
+ err = mlx4_init_srq_table(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to initialize shared receive queue table, aborting\n");
+ goto err_cq_table_free;
+ }
+
+ err = mlx4_init_qp_table(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to initialize queue pair table, aborting\n");
+ goto err_srq_table_free;
+ }
+
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_init_counters_table(dev);
+ if (err && err != -ENOENT) {
+ mlx4_err(dev, "Failed to initialize counters table, aborting\n");
+ goto err_qp_table_free;
+ }
+ }
+
+ err = mlx4_allocate_default_counters(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to allocate default counters, aborting\n");
+ goto err_counters_table_free;
+ }
+
+ if (!mlx4_is_slave(dev)) {
+ for (port = 1; port <= dev->caps.num_ports; port++) {
+ ib_port_default_caps = 0;
+ err = mlx4_get_port_ib_caps(dev, port,
+ &ib_port_default_caps);
+ if (err)
+ mlx4_warn(dev, "failed to get port %d default ib capabilities (%d). Continuing with caps = 0\n",
+ port, err);
+ dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
+
+ /* initialize per-slave default ib port capabilities */
+ if (mlx4_is_master(dev)) {
+ int i;
+ for (i = 0; i < dev->num_slaves; i++) {
+ if (i == mlx4_master_func_num(dev))
+ continue;
+ priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
+ ib_port_default_caps;
+ }
+ }
+
+ if (mlx4_is_mfunc(dev))
+ dev->caps.port_ib_mtu[port] = IB_MTU_2048;
+ else
+ dev->caps.port_ib_mtu[port] = IB_MTU_4096;
+
+ err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
+ dev->caps.pkey_table_len[port] : -1);
+ if (err) {
+ mlx4_err(dev, "Failed to set port %d, aborting\n",
+ port);
+ goto err_default_countes_free;
+ }
+ }
+ }
+
+ return 0;
+
+err_default_countes_free:
+ mlx4_cleanup_default_counters(dev);
+
+err_counters_table_free:
+ if (!mlx4_is_slave(dev))
+ mlx4_cleanup_counters_table(dev);
+
+err_qp_table_free:
+ mlx4_cleanup_qp_table(dev);
+
+err_srq_table_free:
+ mlx4_cleanup_srq_table(dev);
+
+err_cq_table_free:
+ mlx4_cleanup_cq_table(dev);
+
+err_cmd_poll:
+ mlx4_cmd_use_polling(dev);
+
+err_eq_table_free:
+ mlx4_cleanup_eq_table(dev);
+
+err_mcg_table_free:
+ if (!mlx4_is_slave(dev))
+ mlx4_cleanup_mcg_table(dev);
+
+err_mr_table_free:
+ mlx4_cleanup_mr_table(dev);
+
+err_xrcd_table_free:
+ mlx4_cleanup_xrcd_table(dev);
+
+err_pd_table_free:
+ mlx4_cleanup_pd_table(dev);
+
+err_kar_unmap:
+ iounmap(priv->kar);
+
+err_uar_free:
+ mlx4_uar_free(dev, &priv->driver_uar);
+
+err_uar_table_free:
+ mlx4_cleanup_uar_table(dev);
+ return err;
+}
+
+static int mlx4_init_affinity_hint(struct mlx4_dev *dev, int port, int eqn)
+{
+ int requested_cpu = 0;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_eq *eq;
+ int off = 0;
+ int i;
+
+ if (eqn > dev->caps.num_comp_vectors)
+ return -EINVAL;
+
+ for (i = 1; i < port; i++)
+ off += mlx4_get_eqs_per_port(dev, i);
+
+ requested_cpu = eqn - off - !!(eqn > MLX4_EQ_ASYNC);
+
+ /* Meaning EQs are shared, and this call comes from the second port */
+ if (requested_cpu < 0)
+ return 0;
+
+ eq = &priv->eq_table.eq[eqn];
+
+ if (!zalloc_cpumask_var(&eq->affinity_mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_set_cpu(requested_cpu, eq->affinity_mask);
+
+ return 0;
+}
+
+static void mlx4_enable_msi_x(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct msix_entry *entries;
+ int i;
+ int port = 0;
+
+ if (msi_x) {
+ int nreq = min3(dev->caps.num_ports *
+ (int)num_online_cpus() + 1,
+ dev->caps.num_eqs - dev->caps.reserved_eqs,
+ MAX_MSIX);
+
+ if (msi_x > 1)
+ nreq = min_t(int, nreq, msi_x);
+
+ entries = kcalloc(nreq, sizeof(*entries), GFP_KERNEL);
+ if (!entries)
+ goto no_msi;
+
+ for (i = 0; i < nreq; ++i)
+ entries[i].entry = i;
+
+ nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2,
+ nreq);
+
+ if (nreq < 0 || nreq < MLX4_EQ_ASYNC) {
+ kfree(entries);
+ goto no_msi;
+ }
+ /* 1 is reserved for events (asyncrounous EQ) */
+ dev->caps.num_comp_vectors = nreq - 1;
+
+ priv->eq_table.eq[MLX4_EQ_ASYNC].irq = entries[0].vector;
+ bitmap_zero(priv->eq_table.eq[MLX4_EQ_ASYNC].actv_ports.ports,
+ dev->caps.num_ports);
+
+ for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) {
+ if (i == MLX4_EQ_ASYNC)
+ continue;
+
+ priv->eq_table.eq[i].irq =
+ entries[i + 1 - !!(i > MLX4_EQ_ASYNC)].vector;
+
+ if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) {
+ bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
+ dev->caps.num_ports);
+ /* We don't set affinity hint when there
+ * aren't enough EQs
+ */
+ } else {
+ set_bit(port,
+ priv->eq_table.eq[i].actv_ports.ports);
+ if (mlx4_init_affinity_hint(dev, port + 1, i))
+ mlx4_warn(dev, "Couldn't init hint cpumask for EQ %d\n",
+ i);
+ }
+ /* We divide the Eqs evenly between the two ports.
+ * (dev->caps.num_comp_vectors / dev->caps.num_ports)
+ * refers to the number of Eqs per port
+ * (i.e eqs_per_port). Theoretically, we would like to
+ * write something like (i + 1) % eqs_per_port == 0.
+ * However, since there's an asynchronous Eq, we have
+ * to skip over it by comparing this condition to
+ * !!((i + 1) > MLX4_EQ_ASYNC).
+ */
+ if ((dev->caps.num_comp_vectors > dev->caps.num_ports) &&
+ ((i + 1) %
+ (dev->caps.num_comp_vectors / dev->caps.num_ports)) ==
+ !!((i + 1) > MLX4_EQ_ASYNC))
+ /* If dev->caps.num_comp_vectors < dev->caps.num_ports,
+ * everything is shared anyway.
+ */
+ port++;
+ }
+
+ dev->flags |= MLX4_FLAG_MSI_X;
+
+ kfree(entries);
+ return;
+ }
+
+no_msi:
+ dev->caps.num_comp_vectors = 1;
+
+ BUG_ON(MLX4_EQ_ASYNC >= 2);
+ for (i = 0; i < 2; ++i) {
+ priv->eq_table.eq[i].irq = dev->persist->pdev->irq;
+ if (i != MLX4_EQ_ASYNC) {
+ bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
+ dev->caps.num_ports);
+ }
+ }
+}
+
+static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
+{
+ struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
+ struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+ int err;
+
+ err = devlink_port_register(devlink, &info->devlink_port, port);
+ if (err)
+ return err;
+
+ info->dev = dev;
+ info->port = port;
+ if (!mlx4_is_slave(dev)) {
+ mlx4_init_mac_table(dev, &info->mac_table);
+ mlx4_init_vlan_table(dev, &info->vlan_table);
+ mlx4_init_roce_gid_table(dev, &info->gid_table);
+ info->base_qpn = mlx4_get_base_qpn(dev, port);
+ }
+
+ sprintf(info->dev_name, "mlx4_port%d", port);
+ info->port_attr.attr.name = info->dev_name;
+ if (mlx4_is_mfunc(dev)) {
+ info->port_attr.attr.mode = 0444;
+ } else {
+ info->port_attr.attr.mode = 0644;
+ info->port_attr.store = set_port_type;
+ }
+ info->port_attr.show = show_port_type;
+ sysfs_attr_init(&info->port_attr.attr);
+
+ err = device_create_file(&dev->persist->pdev->dev, &info->port_attr);
+ if (err) {
+ mlx4_err(dev, "Failed to create file for port %d\n", port);
+ devlink_port_unregister(&info->devlink_port);
+ info->port = -1;
+ return err;
+ }
+
+ sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
+ info->port_mtu_attr.attr.name = info->dev_mtu_name;
+ if (mlx4_is_mfunc(dev)) {
+ info->port_mtu_attr.attr.mode = 0444;
+ } else {
+ info->port_mtu_attr.attr.mode = 0644;
+ info->port_mtu_attr.store = set_port_ib_mtu;
+ }
+ info->port_mtu_attr.show = show_port_ib_mtu;
+ sysfs_attr_init(&info->port_mtu_attr.attr);
+
+ err = device_create_file(&dev->persist->pdev->dev,
+ &info->port_mtu_attr);
+ if (err) {
+ mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
+ device_remove_file(&info->dev->persist->pdev->dev,
+ &info->port_attr);
+ devlink_port_unregister(&info->devlink_port);
+ info->port = -1;
+ return err;
+ }
+
+ return 0;
+}
+
+static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
+{
+ if (info->port < 0)
+ return;
+
+ device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
+ device_remove_file(&info->dev->persist->pdev->dev,
+ &info->port_mtu_attr);
+ devlink_port_unregister(&info->devlink_port);
+
+#ifdef CONFIG_RFS_ACCEL
+ free_irq_cpu_rmap(info->rmap);
+ info->rmap = NULL;
+#endif
+}
+
+static int mlx4_init_steering(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int num_entries = dev->caps.num_ports;
+ int i, j;
+
+ priv->steer = kcalloc(num_entries, sizeof(struct mlx4_steer),
+ GFP_KERNEL);
+ if (!priv->steer)
+ return -ENOMEM;
+
+ for (i = 0; i < num_entries; i++)
+ for (j = 0; j < MLX4_NUM_STEERS; j++) {
+ INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
+ INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
+ }
+ return 0;
+}
+
+static void mlx4_clear_steering(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_steer_index *entry, *tmp_entry;
+ struct mlx4_promisc_qp *pqp, *tmp_pqp;
+ int num_entries = dev->caps.num_ports;
+ int i, j;
+
+ for (i = 0; i < num_entries; i++) {
+ for (j = 0; j < MLX4_NUM_STEERS; j++) {
+ list_for_each_entry_safe(pqp, tmp_pqp,
+ &priv->steer[i].promisc_qps[j],
+ list) {
+ list_del(&pqp->list);
+ kfree(pqp);
+ }
+ list_for_each_entry_safe(entry, tmp_entry,
+ &priv->steer[i].steer_entries[j],
+ list) {
+ list_del(&entry->list);
+ list_for_each_entry_safe(pqp, tmp_pqp,
+ &entry->duplicates,
+ list) {
+ list_del(&pqp->list);
+ kfree(pqp);
+ }
+ kfree(entry);
+ }
+ }
+ }
+ kfree(priv->steer);
+}
+
+static int extended_func_num(struct pci_dev *pdev)
+{
+ return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
+}
+
+#define MLX4_OWNER_BASE 0x8069c
+#define MLX4_OWNER_SIZE 4
+
+static int mlx4_get_ownership(struct mlx4_dev *dev)
+{
+ void __iomem *owner;
+ u32 ret;
+
+ if (pci_channel_offline(dev->persist->pdev))
+ return -EIO;
+
+ owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
+ MLX4_OWNER_BASE,
+ MLX4_OWNER_SIZE);
+ if (!owner) {
+ mlx4_err(dev, "Failed to obtain ownership bit\n");
+ return -ENOMEM;
+ }
+
+ ret = readl(owner);
+ iounmap(owner);
+ return (int) !!ret;
+}
+
+static void mlx4_free_ownership(struct mlx4_dev *dev)
+{
+ void __iomem *owner;
+
+ if (pci_channel_offline(dev->persist->pdev))
+ return;
+
+ owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
+ MLX4_OWNER_BASE,
+ MLX4_OWNER_SIZE);
+ if (!owner) {
+ mlx4_err(dev, "Failed to obtain ownership bit\n");
+ return;
+ }
+ writel(0, owner);
+ msleep(1000);
+ iounmap(owner);
+}
+
+#define SRIOV_VALID_STATE(flags) (!!((flags) & MLX4_FLAG_SRIOV) ==\
+ !!((flags) & MLX4_FLAG_MASTER))
+
+static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
+ u8 total_vfs, int existing_vfs, int reset_flow)
+{
+ u64 dev_flags = dev->flags;
+ int err = 0;
+ int fw_enabled_sriov_vfs = min(pci_sriov_get_totalvfs(pdev),
+ MLX4_MAX_NUM_VF);
+
+ if (reset_flow) {
+ dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs),
+ GFP_KERNEL);
+ if (!dev->dev_vfs)
+ goto free_mem;
+ return dev_flags;
+ }
+
+ atomic_inc(&pf_loading);
+ if (dev->flags & MLX4_FLAG_SRIOV) {
+ if (existing_vfs != total_vfs) {
+ mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
+ existing_vfs, total_vfs);
+ total_vfs = existing_vfs;
+ }
+ }
+
+ dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs), GFP_KERNEL);
+ if (NULL == dev->dev_vfs) {
+ mlx4_err(dev, "Failed to allocate memory for VFs\n");
+ goto disable_sriov;
+ }
+
+ if (!(dev->flags & MLX4_FLAG_SRIOV)) {
+ if (total_vfs > fw_enabled_sriov_vfs) {
+ mlx4_err(dev, "requested vfs (%d) > available vfs (%d). Continuing without SR_IOV\n",
+ total_vfs, fw_enabled_sriov_vfs);
+ err = -ENOMEM;
+ goto disable_sriov;
+ }
+ mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
+ err = pci_enable_sriov(pdev, total_vfs);
+ }
+ if (err) {
+ mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
+ err);
+ goto disable_sriov;
+ } else {
+ mlx4_warn(dev, "Running in master mode\n");
+ dev_flags |= MLX4_FLAG_SRIOV |
+ MLX4_FLAG_MASTER;
+ dev_flags &= ~MLX4_FLAG_SLAVE;
+ dev->persist->num_vfs = total_vfs;
+ }
+ return dev_flags;
+
+disable_sriov:
+ atomic_dec(&pf_loading);
+free_mem:
+ dev->persist->num_vfs = 0;
+ kfree(dev->dev_vfs);
+ dev->dev_vfs = NULL;
+ return dev_flags & ~MLX4_FLAG_MASTER;
+}
+
+enum {
+ MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64 = -1,
+};
+
+static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
+ int *nvfs)
+{
+ int requested_vfs = nvfs[0] + nvfs[1] + nvfs[2];
+ /* Checking for 64 VFs as a limitation of CX2 */
+ if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_80_VFS) &&
+ requested_vfs >= 64) {
+ mlx4_err(dev, "Requested %d VFs, but FW does not support more than 64\n",
+ requested_vfs);
+ return MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64;
+ }
+ return 0;
+}
+
+static int mlx4_pci_enable_device(struct mlx4_dev *dev)
+{
+ struct pci_dev *pdev = dev->persist->pdev;
+ int err = 0;
+
+ mutex_lock(&dev->persist->pci_status_mutex);
+ if (dev->persist->pci_status == MLX4_PCI_STATUS_DISABLED) {
+ err = pci_enable_device(pdev);
+ if (!err)
+ dev->persist->pci_status = MLX4_PCI_STATUS_ENABLED;
+ }
+ mutex_unlock(&dev->persist->pci_status_mutex);
+
+ return err;
+}
+
+static void mlx4_pci_disable_device(struct mlx4_dev *dev)
+{
+ struct pci_dev *pdev = dev->persist->pdev;
+
+ mutex_lock(&dev->persist->pci_status_mutex);
+ if (dev->persist->pci_status == MLX4_PCI_STATUS_ENABLED) {
+ pci_disable_device(pdev);
+ dev->persist->pci_status = MLX4_PCI_STATUS_DISABLED;
+ }
+ mutex_unlock(&dev->persist->pci_status_mutex);
+}
+
+static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
+ int total_vfs, int *nvfs, struct mlx4_priv *priv,
+ int reset_flow)
+{
+ struct mlx4_dev *dev;
+ unsigned sum = 0;
+ int err;
+ int port;
+ int i;
+ struct mlx4_dev_cap *dev_cap = NULL;
+ int existing_vfs = 0;
+
+ dev = &priv->dev;
+
+ INIT_LIST_HEAD(&priv->ctx_list);
+ spin_lock_init(&priv->ctx_lock);
+
+ mutex_init(&priv->port_mutex);
+ mutex_init(&priv->bond_mutex);
+
+ INIT_LIST_HEAD(&priv->pgdir_list);
+ mutex_init(&priv->pgdir_mutex);
+ spin_lock_init(&priv->cmd.context_lock);
+
+ INIT_LIST_HEAD(&priv->bf_list);
+ mutex_init(&priv->bf_mutex);
+
+ dev->rev_id = pdev->revision;
+ dev->numa_node = dev_to_node(&pdev->dev);
+
+ /* Detect if this device is a virtual function */
+ if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
+ mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
+ dev->flags |= MLX4_FLAG_SLAVE;
+ } else {
+ /* We reset the device and enable SRIOV only for physical
+ * devices. Try to claim ownership on the device;
+ * if already taken, skip -- do not allow multiple PFs */
+ err = mlx4_get_ownership(dev);
+ if (err) {
+ if (err < 0)
+ return err;
+ else {
+ mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
+ return -EINVAL;
+ }
+ }
+
+ atomic_set(&priv->opreq_count, 0);
+ INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
+
+ /*
+ * Now reset the HCA before we touch the PCI capabilities or
+ * attempt a firmware command, since a boot ROM may have left
+ * the HCA in an undefined state.
+ */
+ err = mlx4_reset(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to reset HCA, aborting\n");
+ goto err_sriov;
+ }
+
+ if (total_vfs) {
+ dev->flags = MLX4_FLAG_MASTER;
+ existing_vfs = pci_num_vf(pdev);
+ if (existing_vfs)
+ dev->flags |= MLX4_FLAG_SRIOV;
+ dev->persist->num_vfs = total_vfs;
+ }
+ }
+
+ /* on load remove any previous indication of internal error,
+ * device is up.
+ */
+ dev->persist->state = MLX4_DEVICE_STATE_UP;
+
+slave_start:
+ err = mlx4_cmd_init(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to init command interface, aborting\n");
+ goto err_sriov;
+ }
+
+ /* In slave functions, the communication channel must be initialized
+ * before posting commands. Also, init num_slaves before calling
+ * mlx4_init_hca */
+ if (mlx4_is_mfunc(dev)) {
+ if (mlx4_is_master(dev)) {
+ dev->num_slaves = MLX4_MAX_NUM_SLAVES;
+
+ } else {
+ dev->num_slaves = 0;
+ err = mlx4_multi_func_init(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to init slave mfunc interface, aborting\n");
+ goto err_cmd;
+ }
+ }
+ }
+
+ err = mlx4_init_fw(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to init fw, aborting.\n");
+ goto err_mfunc;
+ }
+
+ if (mlx4_is_master(dev)) {
+ /* when we hit the goto slave_start below, dev_cap already initialized */
+ if (!dev_cap) {
+ dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
+
+ if (!dev_cap) {
+ err = -ENOMEM;
+ goto err_fw;
+ }
+
+ err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+ goto err_fw;
+ }
+
+ if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
+ goto err_fw;
+
+ if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
+ u64 dev_flags = mlx4_enable_sriov(dev, pdev,
+ total_vfs,
+ existing_vfs,
+ reset_flow);
+
+ mlx4_close_fw(dev);
+ mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
+ dev->flags = dev_flags;
+ if (!SRIOV_VALID_STATE(dev->flags)) {
+ mlx4_err(dev, "Invalid SRIOV state\n");
+ goto err_sriov;
+ }
+ err = mlx4_reset(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to reset HCA, aborting.\n");
+ goto err_sriov;
+ }
+ goto slave_start;
+ }
+ } else {
+ /* Legacy mode FW requires SRIOV to be enabled before
+ * doing QUERY_DEV_CAP, since max_eq's value is different if
+ * SRIOV is enabled.
+ */
+ memset(dev_cap, 0, sizeof(*dev_cap));
+ err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
+ if (err) {
+ mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+ goto err_fw;
+ }
+
+ if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
+ goto err_fw;
+ }
+ }
+
+ err = mlx4_init_hca(dev);
+ if (err) {
+ if (err == -EACCES) {
+ /* Not primary Physical function
+ * Running in slave mode */
+ mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
+ /* We're not a PF */
+ if (dev->flags & MLX4_FLAG_SRIOV) {
+ if (!existing_vfs)
+ pci_disable_sriov(pdev);
+ if (mlx4_is_master(dev) && !reset_flow)
+ atomic_dec(&pf_loading);
+ dev->flags &= ~MLX4_FLAG_SRIOV;
+ }
+ if (!mlx4_is_slave(dev))
+ mlx4_free_ownership(dev);
+ dev->flags |= MLX4_FLAG_SLAVE;
+ dev->flags &= ~MLX4_FLAG_MASTER;
+ goto slave_start;
+ } else
+ goto err_fw;
+ }
+
+ if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
+ u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
+ existing_vfs, reset_flow);
+
+ if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
+ mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
+ dev->flags = dev_flags;
+ err = mlx4_cmd_init(dev);
+ if (err) {
+ /* Only VHCR is cleaned up, so could still
+ * send FW commands
+ */
+ mlx4_err(dev, "Failed to init VHCR command interface, aborting\n");
+ goto err_close;
+ }
+ } else {
+ dev->flags = dev_flags;
+ }
+
+ if (!SRIOV_VALID_STATE(dev->flags)) {
+ mlx4_err(dev, "Invalid SRIOV state\n");
+ err = -EINVAL;
+ goto err_close;
+ }
+ }
+
+ /* check if the device is functioning at its maximum possible speed.
+ * No return code for this call, just warn the user in case of PCI
+ * express device capabilities are under-satisfied by the bus.
+ */
+ if (!mlx4_is_slave(dev))
+ pcie_print_link_status(dev->persist->pdev);
+
+ /* In master functions, the communication channel must be initialized
+ * after obtaining its address from fw */
+ if (mlx4_is_master(dev)) {
+ if (dev->caps.num_ports < 2 &&
+ num_vfs_argc > 1) {
+ err = -EINVAL;
+ mlx4_err(dev,
+ "Error: Trying to configure VFs on port 2, but HCA has only %d physical ports\n",
+ dev->caps.num_ports);
+ goto err_close;
+ }
+ memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs));
+
+ for (i = 0;
+ i < sizeof(dev->persist->nvfs)/
+ sizeof(dev->persist->nvfs[0]); i++) {
+ unsigned j;
+
+ for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) {
+ dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1;
+ dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
+ dev->caps.num_ports;
+ }
+ }
+
+ /* In master functions, the communication channel
+ * must be initialized after obtaining its address from fw
+ */
+ err = mlx4_multi_func_init(dev);
+ if (err) {
+ mlx4_err(dev, "Failed to init master mfunc interface, aborting.\n");
+ goto err_close;
+ }
+ }
+
+ err = mlx4_alloc_eq_table(dev);
+ if (err)
+ goto err_master_mfunc;
+
+ bitmap_zero(priv->msix_ctl.pool_bm, MAX_MSIX);
+ mutex_init(&priv->msix_ctl.pool_lock);
+
+ mlx4_enable_msi_x(dev);
+ if ((mlx4_is_mfunc(dev)) &&
+ !(dev->flags & MLX4_FLAG_MSI_X)) {
+ err = -EOPNOTSUPP;
+ mlx4_err(dev, "INTx is not supported in multi-function mode, aborting\n");
+ goto err_free_eq;
+ }
+
+ if (!mlx4_is_slave(dev)) {
+ err = mlx4_init_steering(dev);
+ if (err)
+ goto err_disable_msix;
+ }
+
+ mlx4_init_quotas(dev);
+
+ err = mlx4_setup_hca(dev);
+ if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
+ !mlx4_is_mfunc(dev)) {
+ dev->flags &= ~MLX4_FLAG_MSI_X;
+ dev->caps.num_comp_vectors = 1;
+ pci_disable_msix(pdev);
+ err = mlx4_setup_hca(dev);
+ }
+
+ if (err)
+ goto err_steer;
+
+ /* When PF resources are ready arm its comm channel to enable
+ * getting commands
+ */
+ if (mlx4_is_master(dev)) {
+ err = mlx4_ARM_COMM_CHANNEL(dev);
+ if (err) {
+ mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
+ err);
+ goto err_steer;
+ }
+ }
+
+ for (port = 1; port <= dev->caps.num_ports; port++) {
+ err = mlx4_init_port_info(dev, port);
+ if (err)
+ goto err_port;
+ }
+
+ priv->v2p.port1 = 1;
+ priv->v2p.port2 = 2;
+
+ err = mlx4_register_device(dev);
+ if (err)
+ goto err_port;
+
+ mlx4_request_modules(dev);
+
+ mlx4_sense_init(dev);
+ mlx4_start_sense(dev);
+
+ priv->removed = 0;
+
+ if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
+ atomic_dec(&pf_loading);
+
+ kfree(dev_cap);
+ return 0;
+
+err_port:
+ for (--port; port >= 1; --port)
+ mlx4_cleanup_port_info(&priv->port[port]);
+
+ mlx4_cleanup_default_counters(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_cleanup_counters_table(dev);
+ mlx4_cleanup_qp_table(dev);
+ mlx4_cleanup_srq_table(dev);
+ mlx4_cleanup_cq_table(dev);
+ mlx4_cmd_use_polling(dev);
+ mlx4_cleanup_eq_table(dev);
+ mlx4_cleanup_mcg_table(dev);
+ mlx4_cleanup_mr_table(dev);
+ mlx4_cleanup_xrcd_table(dev);
+ mlx4_cleanup_pd_table(dev);
+ mlx4_cleanup_uar_table(dev);
+
+err_steer:
+ if (!mlx4_is_slave(dev))
+ mlx4_clear_steering(dev);
+
+err_disable_msix:
+ if (dev->flags & MLX4_FLAG_MSI_X)
+ pci_disable_msix(pdev);
+
+err_free_eq:
+ mlx4_free_eq_table(dev);
+
+err_master_mfunc:
+ if (mlx4_is_master(dev)) {
+ mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY);
+ mlx4_multi_func_cleanup(dev);
+ }
+
+ if (mlx4_is_slave(dev))
+ mlx4_slave_destroy_special_qp_cap(dev);
+
+err_close:
+ mlx4_close_hca(dev);
+
+err_fw:
+ mlx4_close_fw(dev);
+
+err_mfunc:
+ if (mlx4_is_slave(dev))
+ mlx4_multi_func_cleanup(dev);
+
+err_cmd:
+ mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
+
+err_sriov:
+ if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) {
+ pci_disable_sriov(pdev);
+ dev->flags &= ~MLX4_FLAG_SRIOV;
+ }
+
+ if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
+ atomic_dec(&pf_loading);
+
+ kfree(priv->dev.dev_vfs);
+
+ if (!mlx4_is_slave(dev))
+ mlx4_free_ownership(dev);
+
+ kfree(dev_cap);
+ return err;
+}
+
+static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
+ struct mlx4_priv *priv)
+{
+ int err;
+ int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
+ int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
+ const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
+ {2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
+ unsigned total_vfs = 0;
+ unsigned int i;
+
+ pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
+
+ err = mlx4_pci_enable_device(&priv->dev);
+ if (err) {
+ dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
+ return err;
+ }
+
+ /* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
+ * per port, we must limit the number of VFs to 63 (since their are
+ * 128 MACs)
+ */
+ for (i = 0; i < ARRAY_SIZE(nvfs) && i < num_vfs_argc;
+ total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
+ nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
+ if (nvfs[i] < 0) {
+ dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
+ err = -EINVAL;
+ goto err_disable_pdev;
+ }
+ }
+ for (i = 0; i < ARRAY_SIZE(prb_vf) && i < probe_vfs_argc;
+ i++) {
+ prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
+ if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
+ dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
+ err = -EINVAL;
+ goto err_disable_pdev;
+ }
+ }
+ if (total_vfs > MLX4_MAX_NUM_VF) {
+ dev_err(&pdev->dev,
+ "Requested more VF's (%d) than allowed by hw (%d)\n",
+ total_vfs, MLX4_MAX_NUM_VF);
+ err = -EINVAL;
+ goto err_disable_pdev;
+ }
+
+ for (i = 0; i < MLX4_MAX_PORTS; i++) {
+ if (nvfs[i] + nvfs[2] > MLX4_MAX_NUM_VF_P_PORT) {
+ dev_err(&pdev->dev,
+ "Requested more VF's (%d) for port (%d) than allowed by driver (%d)\n",
+ nvfs[i] + nvfs[2], i + 1,
+ MLX4_MAX_NUM_VF_P_PORT);
+ err = -EINVAL;
+ goto err_disable_pdev;
+ }
+ }
+
+ /* Check for BARs. */
+ if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
+ !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+ dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
+ pci_dev_data, pci_resource_flags(pdev, 0));
+ err = -ENODEV;
+ goto err_disable_pdev;
+ }
+ if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
+ dev_err(&pdev->dev, "Missing UAR, aborting\n");
+ err = -ENODEV;
+ goto err_disable_pdev;
+ }
+
+ err = pci_request_regions(pdev, DRV_NAME);
+ if (err) {
+ dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
+ goto err_disable_pdev;
+ }
+
+ pci_set_master(pdev);
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
+ goto err_release_regions;
+ }
+ }
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
+ goto err_release_regions;
+ }
+ }
+
+ /* Allow large DMA segments, up to the firmware limit of 1 GB */
+ dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
+ /* Detect if this device is a virtual function */
+ if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
+ /* When acting as pf, we normally skip vfs unless explicitly
+ * requested to probe them.
+ */
+ if (total_vfs) {
+ unsigned vfs_offset = 0;
+
+ for (i = 0; i < ARRAY_SIZE(nvfs) &&
+ vfs_offset + nvfs[i] < extended_func_num(pdev);
+ vfs_offset += nvfs[i], i++)
+ ;
+ if (i == ARRAY_SIZE(nvfs)) {
+ err = -ENODEV;
+ goto err_release_regions;
+ }
+ if ((extended_func_num(pdev) - vfs_offset)
+ > prb_vf[i]) {
+ dev_warn(&pdev->dev, "Skipping virtual function:%d\n",
+ extended_func_num(pdev));
+ err = -ENODEV;
+ goto err_release_regions;
+ }
+ }
+ }
+
+ err = mlx4_crdump_init(&priv->dev);
+ if (err)
+ goto err_release_regions;
+
+ err = mlx4_catas_init(&priv->dev);
+ if (err)
+ goto err_crdump;
+
+ err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0);
+ if (err)
+ goto err_catas;
+
+ return 0;
+
+err_catas:
+ mlx4_catas_end(&priv->dev);
+
+err_crdump:
+ mlx4_crdump_end(&priv->dev);
+
+err_release_regions:
+ pci_release_regions(pdev);
+
+err_disable_pdev:
+ mlx4_pci_disable_device(&priv->dev);
+ return err;
+}
+
+static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port,
+ enum devlink_port_type port_type)
+{
+ struct mlx4_port_info *info = container_of(devlink_port,
+ struct mlx4_port_info,
+ devlink_port);
+ enum mlx4_port_type mlx4_port_type;
+
+ switch (port_type) {
+ case DEVLINK_PORT_TYPE_AUTO:
+ mlx4_port_type = MLX4_PORT_TYPE_AUTO;
+ break;
+ case DEVLINK_PORT_TYPE_ETH:
+ mlx4_port_type = MLX4_PORT_TYPE_ETH;
+ break;
+ case DEVLINK_PORT_TYPE_IB:
+ mlx4_port_type = MLX4_PORT_TYPE_IB;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return __set_port_type(info, mlx4_port_type);
+}
+
+static void mlx4_devlink_param_load_driverinit_values(struct devlink *devlink)
+{
+ struct mlx4_priv *priv = devlink_priv(devlink);
+ struct mlx4_dev *dev = &priv->dev;
+ struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+ union devlink_param_value saved_value;
+ int err;
+
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
+ &saved_value);
+ if (!err && mlx4_internal_err_reset != saved_value.vbool) {
+ mlx4_internal_err_reset = saved_value.vbool;
+ /* Notify on value changed on runtime configuration mode */
+ devlink_param_value_changed(devlink,
+ DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET);
+ }
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+ &saved_value);
+ if (!err)
+ log_num_mac = order_base_2(saved_value.vu32);
+ err = devlink_param_driverinit_value_get(devlink,
+ MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
+ &saved_value);
+ if (!err)
+ enable_64b_cqe_eqe = saved_value.vbool;
+ err = devlink_param_driverinit_value_get(devlink,
+ MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
+ &saved_value);
+ if (!err)
+ enable_4k_uar = saved_value.vbool;
+ err = devlink_param_driverinit_value_get(devlink,
+ DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+ &saved_value);
+ if (!err && crdump->snapshot_enable != saved_value.vbool) {
+ crdump->snapshot_enable = saved_value.vbool;
+ devlink_param_value_changed(devlink,
+ DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT);
+ }
+}
+
+static int mlx4_devlink_reload(struct devlink *devlink,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx4_priv *priv = devlink_priv(devlink);
+ struct mlx4_dev *dev = &priv->dev;
+ struct mlx4_dev_persistent *persist = dev->persist;
+ int err;
+
+ if (persist->num_vfs)
+ mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n");
+ err = mlx4_restart_one(persist->pdev, true, devlink);
+ if (err)
+ mlx4_err(persist->dev, "mlx4_restart_one failed, ret=%d\n", err);
+
+ return err;
+}
+
+static const struct devlink_ops mlx4_devlink_ops = {
+ .port_type_set = mlx4_devlink_port_type_set,
+ .reload = mlx4_devlink_reload,
+};
+
+static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct devlink *devlink;
+ struct mlx4_priv *priv;
+ struct mlx4_dev *dev;
+ int ret;
+
+ printk_once(KERN_INFO "%s", mlx4_version);
+
+ devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv));
+ if (!devlink)
+ return -ENOMEM;
+ priv = devlink_priv(devlink);
+
+ dev = &priv->dev;
+ dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL);
+ if (!dev->persist) {
+ ret = -ENOMEM;
+ goto err_devlink_free;
+ }
+ dev->persist->pdev = pdev;
+ dev->persist->dev = dev;
+ pci_set_drvdata(pdev, dev->persist);
+ priv->pci_dev_data = id->driver_data;
+ mutex_init(&dev->persist->device_state_mutex);
+ mutex_init(&dev->persist->interface_state_mutex);
+ mutex_init(&dev->persist->pci_status_mutex);
+
+ ret = devlink_register(devlink, &pdev->dev);
+ if (ret)
+ goto err_persist_free;
+ ret = devlink_params_register(devlink, mlx4_devlink_params,
+ ARRAY_SIZE(mlx4_devlink_params));
+ if (ret)
+ goto err_devlink_unregister;
+ mlx4_devlink_set_params_init_values(devlink);
+ ret = __mlx4_init_one(pdev, id->driver_data, priv);
+ if (ret)
+ goto err_params_unregister;
+
+ pci_save_state(pdev);
+ return 0;
+
+err_params_unregister:
+ devlink_params_unregister(devlink, mlx4_devlink_params,
+ ARRAY_SIZE(mlx4_devlink_params));
+err_devlink_unregister:
+ devlink_unregister(devlink);
+err_persist_free:
+ kfree(dev->persist);
+err_devlink_free:
+ devlink_free(devlink);
+ return ret;
+}
+
+static void mlx4_clean_dev(struct mlx4_dev *dev)
+{
+ struct mlx4_dev_persistent *persist = dev->persist;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ unsigned long flags = (dev->flags & RESET_PERSIST_MASK_FLAGS);
+
+ memset(priv, 0, sizeof(*priv));
+ priv->dev.persist = persist;
+ priv->dev.flags = flags;
+}
+
+static void mlx4_unload_one(struct pci_dev *pdev)
+{
+ struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+ struct mlx4_dev *dev = persist->dev;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int pci_dev_data;
+ int p, i;
+
+ if (priv->removed)
+ return;
+
+ /* saving current ports type for further use */
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1];
+ dev->persist->curr_port_poss_type[i] = dev->caps.
+ possible_type[i + 1];
+ }
+
+ pci_dev_data = priv->pci_dev_data;
+
+ mlx4_stop_sense(dev);
+ mlx4_unregister_device(dev);
+
+ for (p = 1; p <= dev->caps.num_ports; p++) {
+ mlx4_cleanup_port_info(&priv->port[p]);
+ mlx4_CLOSE_PORT(dev, p);
+ }
+
+ if (mlx4_is_master(dev))
+ mlx4_free_resource_tracker(dev,
+ RES_TR_FREE_SLAVES_ONLY);
+
+ mlx4_cleanup_default_counters(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_cleanup_counters_table(dev);
+ mlx4_cleanup_qp_table(dev);
+ mlx4_cleanup_srq_table(dev);
+ mlx4_cleanup_cq_table(dev);
+ mlx4_cmd_use_polling(dev);
+ mlx4_cleanup_eq_table(dev);
+ mlx4_cleanup_mcg_table(dev);
+ mlx4_cleanup_mr_table(dev);
+ mlx4_cleanup_xrcd_table(dev);
+ mlx4_cleanup_pd_table(dev);
+
+ if (mlx4_is_master(dev))
+ mlx4_free_resource_tracker(dev,
+ RES_TR_FREE_STRUCTS_ONLY);
+
+ iounmap(priv->kar);
+ mlx4_uar_free(dev, &priv->driver_uar);
+ mlx4_cleanup_uar_table(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_clear_steering(dev);
+ mlx4_free_eq_table(dev);
+ if (mlx4_is_master(dev))
+ mlx4_multi_func_cleanup(dev);
+ mlx4_close_hca(dev);
+ mlx4_close_fw(dev);
+ if (mlx4_is_slave(dev))
+ mlx4_multi_func_cleanup(dev);
+ mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
+
+ if (dev->flags & MLX4_FLAG_MSI_X)
+ pci_disable_msix(pdev);
+
+ if (!mlx4_is_slave(dev))
+ mlx4_free_ownership(dev);
+
+ mlx4_slave_destroy_special_qp_cap(dev);
+ kfree(dev->dev_vfs);
+
+ mlx4_clean_dev(dev);
+ priv->pci_dev_data = pci_dev_data;
+ priv->removed = 1;
+}
+
+static void mlx4_remove_one(struct pci_dev *pdev)
+{
+ struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+ struct mlx4_dev *dev = persist->dev;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct devlink *devlink = priv_to_devlink(priv);
+ int active_vfs = 0;
+
+ if (mlx4_is_slave(dev))
+ persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT;
+
+ mutex_lock(&persist->interface_state_mutex);
+ persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
+ mutex_unlock(&persist->interface_state_mutex);
+
+ /* Disabling SR-IOV is not allowed while there are active vf's */
+ if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) {
+ active_vfs = mlx4_how_many_lives_vf(dev);
+ if (active_vfs) {
+ pr_warn("Removing PF when there are active VF's !!\n");
+ pr_warn("Will not disable SR-IOV.\n");
+ }
+ }
+
+ /* device marked to be under deletion running now without the lock
+ * letting other tasks to be terminated
+ */
+ if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
+ mlx4_unload_one(pdev);
+ else
+ mlx4_info(dev, "%s: interface is down\n", __func__);
+ mlx4_catas_end(dev);
+ mlx4_crdump_end(dev);
+ if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
+ mlx4_warn(dev, "Disabling SR-IOV\n");
+ pci_disable_sriov(pdev);
+ }
+
+ pci_release_regions(pdev);
+ mlx4_pci_disable_device(dev);
+ devlink_params_unregister(devlink, mlx4_devlink_params,
+ ARRAY_SIZE(mlx4_devlink_params));
+ devlink_unregister(devlink);
+ kfree(dev->persist);
+ devlink_free(devlink);
+}
+
+static int restore_current_port_types(struct mlx4_dev *dev,
+ enum mlx4_port_type *types,
+ enum mlx4_port_type *poss_types)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int err, i;
+
+ mlx4_stop_sense(dev);
+
+ mutex_lock(&priv->port_mutex);
+ for (i = 0; i < dev->caps.num_ports; i++)
+ dev->caps.possible_type[i + 1] = poss_types[i];
+ err = mlx4_change_port_types(dev, types);
+ mlx4_start_sense(dev);
+ mutex_unlock(&priv->port_mutex);
+
+ return err;
+}
+
+int mlx4_restart_one(struct pci_dev *pdev, bool reload, struct devlink *devlink)
+{
+ struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+ struct mlx4_dev *dev = persist->dev;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
+ int pci_dev_data, err, total_vfs;
+
+ pci_dev_data = priv->pci_dev_data;
+ total_vfs = dev->persist->num_vfs;
+ memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
+
+ mlx4_unload_one(pdev);
+ if (reload)
+ mlx4_devlink_param_load_driverinit_values(devlink);
+ err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1);
+ if (err) {
+ mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
+ __func__, pci_name(pdev), err);
+ return err;
+ }
+
+ err = restore_current_port_types(dev, dev->persist->curr_port_type,
+ dev->persist->curr_port_poss_type);
+ if (err)
+ mlx4_err(dev, "could not restore original port types (%d)\n",
+ err);
+
+ return err;
+}
+
+#define MLX_SP(id) { PCI_VDEVICE(MELLANOX, id), MLX4_PCI_DEV_FORCE_SENSE_PORT }
+#define MLX_VF(id) { PCI_VDEVICE(MELLANOX, id), MLX4_PCI_DEV_IS_VF }
+#define MLX_GN(id) { PCI_VDEVICE(MELLANOX, id), 0 }
+
+static const struct pci_device_id mlx4_pci_table[] = {
+#ifdef CONFIG_MLX4_CORE_GEN2
+ /* MT25408 "Hermon" */
+ MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_SDR), /* SDR */
+ MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_DDR), /* DDR */
+ MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_QDR), /* QDR */
+ MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2), /* DDR Gen2 */
+ MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2), /* QDR Gen2 */
+ MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_EN), /* EN 10GigE */
+ MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2), /* EN 10GigE Gen2 */
+ /* MT25458 ConnectX EN 10GBASE-T */
+ MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN),
+ MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2), /* Gen2 */
+ /* MT26468 ConnectX EN 10GigE PCIe Gen2*/
+ MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2),
+ /* MT26438 ConnectX EN 40GigE PCIe Gen2 5GT/s */
+ MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2),
+ /* MT26478 ConnectX2 40GigE PCIe Gen2 */
+ MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX2),
+ /* MT25400 Family [ConnectX-2] */
+ MLX_VF(0x1002), /* Virtual Function */
+#endif /* CONFIG_MLX4_CORE_GEN2 */
+ /* MT27500 Family [ConnectX-3] */
+ MLX_GN(PCI_DEVICE_ID_MELLANOX_CONNECTX3),
+ MLX_VF(0x1004), /* Virtual Function */
+ MLX_GN(0x1005), /* MT27510 Family */
+ MLX_GN(0x1006), /* MT27511 Family */
+ MLX_GN(PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO), /* MT27520 Family */
+ MLX_GN(0x1008), /* MT27521 Family */
+ MLX_GN(0x1009), /* MT27530 Family */
+ MLX_GN(0x100a), /* MT27531 Family */
+ MLX_GN(0x100b), /* MT27540 Family */
+ MLX_GN(0x100c), /* MT27541 Family */
+ MLX_GN(0x100d), /* MT27550 Family */
+ MLX_GN(0x100e), /* MT27551 Family */
+ MLX_GN(0x100f), /* MT27560 Family */
+ MLX_GN(0x1010), /* MT27561 Family */
+
+ /*
+ * See the mellanox_check_broken_intx_masking() quirk when
+ * adding devices
+ */
+
+ { 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
+
+static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+
+ mlx4_err(persist->dev, "mlx4_pci_err_detected was called\n");
+ mlx4_enter_error_state(persist);
+
+ mutex_lock(&persist->interface_state_mutex);
+ if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
+ mlx4_unload_one(pdev);
+
+ mutex_unlock(&persist->interface_state_mutex);
+ if (state == pci_channel_io_perm_failure)
+ return PCI_ERS_RESULT_DISCONNECT;
+
+ mlx4_pci_disable_device(persist->dev);
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
+{
+ struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+ struct mlx4_dev *dev = persist->dev;
+ int err;
+
+ mlx4_err(dev, "mlx4_pci_slot_reset was called\n");
+ err = mlx4_pci_enable_device(dev);
+ if (err) {
+ mlx4_err(dev, "Can not re-enable device, err=%d\n", err);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ pci_set_master(pdev);
+ pci_restore_state(pdev);
+ pci_save_state(pdev);
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void mlx4_pci_resume(struct pci_dev *pdev)
+{
+ struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+ struct mlx4_dev *dev = persist->dev;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
+ int total_vfs;
+ int err;
+
+ mlx4_err(dev, "%s was called\n", __func__);
+ total_vfs = dev->persist->num_vfs;
+ memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
+
+ mutex_lock(&persist->interface_state_mutex);
+ if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
+ err = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs,
+ priv, 1);
+ if (err) {
+ mlx4_err(dev, "%s: mlx4_load_one failed, err=%d\n",
+ __func__, err);
+ goto end;
+ }
+
+ err = restore_current_port_types(dev, dev->persist->
+ curr_port_type, dev->persist->
+ curr_port_poss_type);
+ if (err)
+ mlx4_err(dev, "could not restore original port types (%d)\n", err);
+ }
+end:
+ mutex_unlock(&persist->interface_state_mutex);
+
+}
+
+static void mlx4_shutdown(struct pci_dev *pdev)
+{
+ struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+ struct mlx4_dev *dev = persist->dev;
+
+ mlx4_info(persist->dev, "mlx4_shutdown was called\n");
+ mutex_lock(&persist->interface_state_mutex);
+ if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
+ mlx4_unload_one(pdev);
+ mutex_unlock(&persist->interface_state_mutex);
+ mlx4_pci_disable_device(dev);
+}
+
+static const struct pci_error_handlers mlx4_err_handler = {
+ .error_detected = mlx4_pci_err_detected,
+ .slot_reset = mlx4_pci_slot_reset,
+ .resume = mlx4_pci_resume,
+};
+
+static int mlx4_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+ struct mlx4_dev *dev = persist->dev;
+
+ mlx4_err(dev, "suspend was called\n");
+ mutex_lock(&persist->interface_state_mutex);
+ if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
+ mlx4_unload_one(pdev);
+ mutex_unlock(&persist->interface_state_mutex);
+
+ return 0;
+}
+
+static int mlx4_resume(struct pci_dev *pdev)
+{
+ struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+ struct mlx4_dev *dev = persist->dev;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
+ int total_vfs;
+ int ret = 0;
+
+ mlx4_err(dev, "resume was called\n");
+ total_vfs = dev->persist->num_vfs;
+ memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
+
+ mutex_lock(&persist->interface_state_mutex);
+ if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
+ ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs,
+ nvfs, priv, 1);
+ if (!ret) {
+ ret = restore_current_port_types(dev,
+ dev->persist->curr_port_type,
+ dev->persist->curr_port_poss_type);
+ if (ret)
+ mlx4_err(dev, "resume: could not restore original port types (%d)\n", ret);
+ }
+ }
+ mutex_unlock(&persist->interface_state_mutex);
+
+ return ret;
+}
+
+static struct pci_driver mlx4_driver = {
+ .name = DRV_NAME,
+ .id_table = mlx4_pci_table,
+ .probe = mlx4_init_one,
+ .shutdown = mlx4_shutdown,
+ .remove = mlx4_remove_one,
+ .suspend = mlx4_suspend,
+ .resume = mlx4_resume,
+ .err_handler = &mlx4_err_handler,
+};
+
+static int __init mlx4_verify_params(void)
+{
+ if (msi_x < 0) {
+ pr_warn("mlx4_core: bad msi_x: %d\n", msi_x);
+ return -1;
+ }
+
+ if ((log_num_mac < 0) || (log_num_mac > 7)) {
+ pr_warn("mlx4_core: bad num_mac: %d\n", log_num_mac);
+ return -1;
+ }
+
+ if (log_num_vlan != 0)
+ pr_warn("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
+ MLX4_LOG_NUM_VLANS);
+
+ if (use_prio != 0)
+ pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n");
+
+ if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) {
+ pr_warn("mlx4_core: bad log_mtts_per_seg: %d\n",
+ log_mtts_per_seg);
+ return -1;
+ }
+
+ /* Check if module param for ports type has legal combination */
+ if (port_type_array[0] == false && port_type_array[1] == true) {
+ pr_warn("Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
+ port_type_array[0] = true;
+ }
+
+ if (mlx4_log_num_mgm_entry_size < -7 ||
+ (mlx4_log_num_mgm_entry_size > 0 &&
+ (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
+ mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE))) {
+ pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-7..0 or %d..%d)\n",
+ mlx4_log_num_mgm_entry_size,
+ MLX4_MIN_MGM_LOG_ENTRY_SIZE,
+ MLX4_MAX_MGM_LOG_ENTRY_SIZE);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int __init mlx4_init(void)
+{
+ int ret;
+
+ if (mlx4_verify_params())
+ return -EINVAL;
+
+
+ mlx4_wq = create_singlethread_workqueue("mlx4");
+ if (!mlx4_wq)
+ return -ENOMEM;
+
+ ret = pci_register_driver(&mlx4_driver);
+ if (ret < 0)
+ destroy_workqueue(mlx4_wq);
+ return ret < 0 ? ret : 0;
+}
+
+static void __exit mlx4_cleanup(void)
+{
+ pci_unregister_driver(&mlx4_driver);
+ destroy_workqueue(mlx4_wq);
+}
+
+module_init(mlx4_init);
+module_exit(mlx4_cleanup);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
new file mode 100644
index 000000000..9c481823b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -0,0 +1,1649 @@
+/*
+ * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/string.h>
+#include <linux/etherdevice.h>
+
+#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/qp.h>
+#include <linux/export.h>
+
+#include "mlx4.h"
+
+int mlx4_get_mgm_entry_size(struct mlx4_dev *dev)
+{
+ return 1 << dev->oper_log_mgm_entry_size;
+}
+
+int mlx4_get_qp_per_mgm(struct mlx4_dev *dev)
+{
+ return 4 * (mlx4_get_mgm_entry_size(dev) / 16 - 2);
+}
+
+static int mlx4_QP_FLOW_STEERING_ATTACH(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *mailbox,
+ u32 size,
+ u64 *reg_id)
+{
+ u64 imm;
+ int err = 0;
+
+ err = mlx4_cmd_imm(dev, mailbox->dma, &imm, size, 0,
+ MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+ *reg_id = imm;
+
+ return err;
+}
+
+static int mlx4_QP_FLOW_STEERING_DETACH(struct mlx4_dev *dev, u64 regid)
+{
+ int err = 0;
+
+ err = mlx4_cmd(dev, regid, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+
+ return err;
+}
+
+static int mlx4_READ_ENTRY(struct mlx4_dev *dev, int index,
+ struct mlx4_cmd_mailbox *mailbox)
+{
+ return mlx4_cmd_box(dev, 0, mailbox->dma, index, 0, MLX4_CMD_READ_MCG,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+}
+
+static int mlx4_WRITE_ENTRY(struct mlx4_dev *dev, int index,
+ struct mlx4_cmd_mailbox *mailbox)
+{
+ return mlx4_cmd(dev, mailbox->dma, index, 0, MLX4_CMD_WRITE_MCG,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+}
+
+static int mlx4_WRITE_PROMISC(struct mlx4_dev *dev, u8 port, u8 steer,
+ struct mlx4_cmd_mailbox *mailbox)
+{
+ u32 in_mod;
+
+ in_mod = (u32) port << 16 | steer << 1;
+ return mlx4_cmd(dev, mailbox->dma, in_mod, 0x1,
+ MLX4_CMD_WRITE_MCG, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+}
+
+static int mlx4_GID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+ u16 *hash, u8 op_mod)
+{
+ u64 imm;
+ int err;
+
+ err = mlx4_cmd_imm(dev, mailbox->dma, &imm, 0, op_mod,
+ MLX4_CMD_MGID_HASH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+
+ if (!err)
+ *hash = imm;
+
+ return err;
+}
+
+static struct mlx4_promisc_qp *get_promisc_qp(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer,
+ u32 qpn)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_promisc_qp *pqp;
+
+ if (port < 1 || port > dev->caps.num_ports)
+ return NULL;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+
+ list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) {
+ if (pqp->qpn == qpn)
+ return pqp;
+ }
+ /* not found */
+ return NULL;
+}
+
+/*
+ * Add new entry to steering data structure.
+ * All promisc QPs should be added as well
+ */
+static int new_steering_entry(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer,
+ unsigned int index, u32 qpn)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm;
+ u32 members_count;
+ struct mlx4_steer_index *new_entry;
+ struct mlx4_promisc_qp *pqp;
+ struct mlx4_promisc_qp *dqp = NULL;
+ u32 prot;
+ int err;
+
+ if (port < 1 || port > dev->caps.num_ports)
+ return -EINVAL;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+ new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
+ if (!new_entry)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&new_entry->duplicates);
+ new_entry->index = index;
+ list_add_tail(&new_entry->list, &s_steer->steer_entries[steer]);
+
+ /* If the given qpn is also a promisc qp,
+ * it should be inserted to duplicates list
+ */
+ pqp = get_promisc_qp(dev, port, steer, qpn);
+ if (pqp) {
+ dqp = kmalloc(sizeof(*dqp), GFP_KERNEL);
+ if (!dqp) {
+ err = -ENOMEM;
+ goto out_alloc;
+ }
+ dqp->qpn = qpn;
+ list_add_tail(&dqp->list, &new_entry->duplicates);
+ }
+
+ /* if no promisc qps for this vep, we are done */
+ if (list_empty(&s_steer->promisc_qps[steer]))
+ return 0;
+
+ /* now need to add all the promisc qps to the new
+ * steering entry, as they should also receive the packets
+ * destined to this address */
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = -ENOMEM;
+ goto out_alloc;
+ }
+ mgm = mailbox->buf;
+
+ err = mlx4_READ_ENTRY(dev, index, mailbox);
+ if (err)
+ goto out_mailbox;
+
+ members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
+ prot = be32_to_cpu(mgm->members_count) >> 30;
+ list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) {
+ /* don't add already existing qpn */
+ if (pqp->qpn == qpn)
+ continue;
+ if (members_count == dev->caps.num_qp_per_mgm) {
+ /* out of space */
+ err = -ENOMEM;
+ goto out_mailbox;
+ }
+
+ /* add the qpn */
+ mgm->qp[members_count++] = cpu_to_be32(pqp->qpn & MGM_QPN_MASK);
+ }
+ /* update the qps count and update the entry with all the promisc qps*/
+ mgm->members_count = cpu_to_be32(members_count | (prot << 30));
+ err = mlx4_WRITE_ENTRY(dev, index, mailbox);
+
+out_mailbox:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ if (!err)
+ return 0;
+out_alloc:
+ if (dqp) {
+ list_del(&dqp->list);
+ kfree(dqp);
+ }
+ list_del(&new_entry->list);
+ kfree(new_entry);
+ return err;
+}
+
+/* update the data structures with existing steering entry */
+static int existing_steering_entry(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer,
+ unsigned int index, u32 qpn)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_steer_index *tmp_entry, *entry = NULL;
+ struct mlx4_promisc_qp *pqp;
+ struct mlx4_promisc_qp *dqp;
+
+ if (port < 1 || port > dev->caps.num_ports)
+ return -EINVAL;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+
+ pqp = get_promisc_qp(dev, port, steer, qpn);
+ if (!pqp)
+ return 0; /* nothing to do */
+
+ list_for_each_entry(tmp_entry, &s_steer->steer_entries[steer], list) {
+ if (tmp_entry->index == index) {
+ entry = tmp_entry;
+ break;
+ }
+ }
+ if (unlikely(!entry)) {
+ mlx4_warn(dev, "Steering entry at index %x is not registered\n", index);
+ return -EINVAL;
+ }
+
+ /* the given qpn is listed as a promisc qpn
+ * we need to add it as a duplicate to this entry
+ * for future references */
+ list_for_each_entry(dqp, &entry->duplicates, list) {
+ if (qpn == dqp->qpn)
+ return 0; /* qp is already duplicated */
+ }
+
+ /* add the qp as a duplicate on this index */
+ dqp = kmalloc(sizeof(*dqp), GFP_KERNEL);
+ if (!dqp)
+ return -ENOMEM;
+ dqp->qpn = qpn;
+ list_add_tail(&dqp->list, &entry->duplicates);
+
+ return 0;
+}
+
+/* Check whether a qpn is a duplicate on steering entry
+ * If so, it should not be removed from mgm */
+static bool check_duplicate_entry(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer,
+ unsigned int index, u32 qpn)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_steer_index *tmp_entry, *entry = NULL;
+ struct mlx4_promisc_qp *dqp, *tmp_dqp;
+
+ if (port < 1 || port > dev->caps.num_ports)
+ return NULL;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+
+ /* if qp is not promisc, it cannot be duplicated */
+ if (!get_promisc_qp(dev, port, steer, qpn))
+ return false;
+
+ /* The qp is promisc qp so it is a duplicate on this index
+ * Find the index entry, and remove the duplicate */
+ list_for_each_entry(tmp_entry, &s_steer->steer_entries[steer], list) {
+ if (tmp_entry->index == index) {
+ entry = tmp_entry;
+ break;
+ }
+ }
+ if (unlikely(!entry)) {
+ mlx4_warn(dev, "Steering entry for index %x is not registered\n", index);
+ return false;
+ }
+ list_for_each_entry_safe(dqp, tmp_dqp, &entry->duplicates, list) {
+ if (dqp->qpn == qpn) {
+ list_del(&dqp->list);
+ kfree(dqp);
+ }
+ }
+ return true;
+}
+
+/* Returns true if all the QPs != tqpn contained in this entry
+ * are Promisc QPs. Returns false otherwise.
+ */
+static bool promisc_steering_entry(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer,
+ unsigned int index, u32 tqpn,
+ u32 *members_count)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm;
+ u32 m_count;
+ bool ret = false;
+ int i;
+
+ if (port < 1 || port > dev->caps.num_ports)
+ return false;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return false;
+ mgm = mailbox->buf;
+
+ if (mlx4_READ_ENTRY(dev, index, mailbox))
+ goto out;
+ m_count = be32_to_cpu(mgm->members_count) & 0xffffff;
+ if (members_count)
+ *members_count = m_count;
+
+ for (i = 0; i < m_count; i++) {
+ u32 qpn = be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK;
+ if (!get_promisc_qp(dev, port, steer, qpn) && qpn != tqpn) {
+ /* the qp is not promisc, the entry can't be removed */
+ goto out;
+ }
+ }
+ ret = true;
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return ret;
+}
+
+/* IF a steering entry contains only promisc QPs, it can be removed. */
+static bool can_remove_steering_entry(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer,
+ unsigned int index, u32 tqpn)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_steer_index *entry = NULL, *tmp_entry;
+ u32 members_count;
+ bool ret = false;
+
+ if (port < 1 || port > dev->caps.num_ports)
+ return NULL;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+
+ if (!promisc_steering_entry(dev, port, steer, index,
+ tqpn, &members_count))
+ goto out;
+
+ /* All the qps currently registered for this entry are promiscuous,
+ * Checking for duplicates */
+ ret = true;
+ list_for_each_entry_safe(entry, tmp_entry, &s_steer->steer_entries[steer], list) {
+ if (entry->index == index) {
+ if (list_empty(&entry->duplicates) ||
+ members_count == 1) {
+ struct mlx4_promisc_qp *pqp, *tmp_pqp;
+ /* If there is only 1 entry in duplicates then
+ * this is the QP we want to delete, going over
+ * the list and deleting the entry.
+ */
+ list_del(&entry->list);
+ list_for_each_entry_safe(pqp, tmp_pqp,
+ &entry->duplicates,
+ list) {
+ list_del(&pqp->list);
+ kfree(pqp);
+ }
+ kfree(entry);
+ } else {
+ /* This entry contains duplicates so it shouldn't be removed */
+ ret = false;
+ goto out;
+ }
+ }
+ }
+
+out:
+ return ret;
+}
+
+static int add_promisc_qp(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer, u32 qpn)
+{
+ struct mlx4_steer *s_steer;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm;
+ struct mlx4_steer_index *entry;
+ struct mlx4_promisc_qp *pqp;
+ struct mlx4_promisc_qp *dqp;
+ u32 members_count;
+ u32 prot;
+ int i;
+ bool found;
+ int err;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (port < 1 || port > dev->caps.num_ports)
+ return -EINVAL;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+
+ mutex_lock(&priv->mcg_table.mutex);
+
+ if (get_promisc_qp(dev, port, steer, qpn)) {
+ err = 0; /* Noting to do, already exists */
+ goto out_mutex;
+ }
+
+ pqp = kmalloc(sizeof(*pqp), GFP_KERNEL);
+ if (!pqp) {
+ err = -ENOMEM;
+ goto out_mutex;
+ }
+ pqp->qpn = qpn;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = -ENOMEM;
+ goto out_alloc;
+ }
+ mgm = mailbox->buf;
+
+ if (!(mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER)) {
+ /* The promisc QP needs to be added for each one of the steering
+ * entries. If it already exists, needs to be added as
+ * a duplicate for this entry.
+ */
+ list_for_each_entry(entry,
+ &s_steer->steer_entries[steer],
+ list) {
+ err = mlx4_READ_ENTRY(dev, entry->index, mailbox);
+ if (err)
+ goto out_mailbox;
+
+ members_count = be32_to_cpu(mgm->members_count) &
+ 0xffffff;
+ prot = be32_to_cpu(mgm->members_count) >> 30;
+ found = false;
+ for (i = 0; i < members_count; i++) {
+ if ((be32_to_cpu(mgm->qp[i]) &
+ MGM_QPN_MASK) == qpn) {
+ /* Entry already exists.
+ * Add to duplicates.
+ */
+ dqp = kmalloc(sizeof(*dqp), GFP_KERNEL);
+ if (!dqp) {
+ err = -ENOMEM;
+ goto out_mailbox;
+ }
+ dqp->qpn = qpn;
+ list_add_tail(&dqp->list,
+ &entry->duplicates);
+ found = true;
+ }
+ }
+ if (!found) {
+ /* Need to add the qpn to mgm */
+ if (members_count ==
+ dev->caps.num_qp_per_mgm) {
+ /* entry is full */
+ err = -ENOMEM;
+ goto out_mailbox;
+ }
+ mgm->qp[members_count++] =
+ cpu_to_be32(qpn & MGM_QPN_MASK);
+ mgm->members_count =
+ cpu_to_be32(members_count |
+ (prot << 30));
+ err = mlx4_WRITE_ENTRY(dev, entry->index,
+ mailbox);
+ if (err)
+ goto out_mailbox;
+ }
+ }
+ }
+
+ /* add the new qpn to list of promisc qps */
+ list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]);
+ /* now need to add all the promisc qps to default entry */
+ memset(mgm, 0, sizeof(*mgm));
+ members_count = 0;
+ list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list) {
+ if (members_count == dev->caps.num_qp_per_mgm) {
+ /* entry is full */
+ err = -ENOMEM;
+ goto out_list;
+ }
+ mgm->qp[members_count++] = cpu_to_be32(dqp->qpn & MGM_QPN_MASK);
+ }
+ mgm->members_count = cpu_to_be32(members_count | MLX4_PROT_ETH << 30);
+
+ err = mlx4_WRITE_PROMISC(dev, port, steer, mailbox);
+ if (err)
+ goto out_list;
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ mutex_unlock(&priv->mcg_table.mutex);
+ return 0;
+
+out_list:
+ list_del(&pqp->list);
+out_mailbox:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+out_alloc:
+ kfree(pqp);
+out_mutex:
+ mutex_unlock(&priv->mcg_table.mutex);
+ return err;
+}
+
+static int remove_promisc_qp(struct mlx4_dev *dev, u8 port,
+ enum mlx4_steer_type steer, u32 qpn)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_steer *s_steer;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm;
+ struct mlx4_steer_index *entry, *tmp_entry;
+ struct mlx4_promisc_qp *pqp;
+ struct mlx4_promisc_qp *dqp;
+ u32 members_count;
+ bool found;
+ bool back_to_list = false;
+ int i;
+ int err;
+
+ if (port < 1 || port > dev->caps.num_ports)
+ return -EINVAL;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+ mutex_lock(&priv->mcg_table.mutex);
+
+ pqp = get_promisc_qp(dev, port, steer, qpn);
+ if (unlikely(!pqp)) {
+ mlx4_warn(dev, "QP %x is not promiscuous QP\n", qpn);
+ /* nothing to do */
+ err = 0;
+ goto out_mutex;
+ }
+
+ /*remove from list of promisc qps */
+ list_del(&pqp->list);
+
+ /* set the default entry not to include the removed one */
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = -ENOMEM;
+ back_to_list = true;
+ goto out_list;
+ }
+ mgm = mailbox->buf;
+ members_count = 0;
+ list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list)
+ mgm->qp[members_count++] = cpu_to_be32(dqp->qpn & MGM_QPN_MASK);
+ mgm->members_count = cpu_to_be32(members_count | MLX4_PROT_ETH << 30);
+
+ err = mlx4_WRITE_PROMISC(dev, port, steer, mailbox);
+ if (err)
+ goto out_mailbox;
+
+ if (!(mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER)) {
+ /* Remove the QP from all the steering entries */
+ list_for_each_entry_safe(entry, tmp_entry,
+ &s_steer->steer_entries[steer],
+ list) {
+ found = false;
+ list_for_each_entry(dqp, &entry->duplicates, list) {
+ if (dqp->qpn == qpn) {
+ found = true;
+ break;
+ }
+ }
+ if (found) {
+ /* A duplicate, no need to change the MGM,
+ * only update the duplicates list
+ */
+ list_del(&dqp->list);
+ kfree(dqp);
+ } else {
+ int loc = -1;
+
+ err = mlx4_READ_ENTRY(dev,
+ entry->index,
+ mailbox);
+ if (err)
+ goto out_mailbox;
+ members_count =
+ be32_to_cpu(mgm->members_count) &
+ 0xffffff;
+ if (!members_count) {
+ mlx4_warn(dev, "QP %06x wasn't found in entry %x mcount=0. deleting entry...\n",
+ qpn, entry->index);
+ list_del(&entry->list);
+ kfree(entry);
+ continue;
+ }
+
+ for (i = 0; i < members_count; ++i)
+ if ((be32_to_cpu(mgm->qp[i]) &
+ MGM_QPN_MASK) == qpn) {
+ loc = i;
+ break;
+ }
+
+ if (loc < 0) {
+ mlx4_err(dev, "QP %06x wasn't found in entry %d\n",
+ qpn, entry->index);
+ err = -EINVAL;
+ goto out_mailbox;
+ }
+
+ /* Copy the last QP in this MGM
+ * over removed QP
+ */
+ mgm->qp[loc] = mgm->qp[members_count - 1];
+ mgm->qp[members_count - 1] = 0;
+ mgm->members_count =
+ cpu_to_be32(--members_count |
+ (MLX4_PROT_ETH << 30));
+
+ err = mlx4_WRITE_ENTRY(dev,
+ entry->index,
+ mailbox);
+ if (err)
+ goto out_mailbox;
+ }
+ }
+ }
+
+out_mailbox:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+out_list:
+ if (back_to_list)
+ list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]);
+ else
+ kfree(pqp);
+out_mutex:
+ mutex_unlock(&priv->mcg_table.mutex);
+ return err;
+}
+
+/*
+ * Caller must hold MCG table semaphore. gid and mgm parameters must
+ * be properly aligned for command interface.
+ *
+ * Returns 0 unless a firmware command error occurs.
+ *
+ * If GID is found in MGM or MGM is empty, *index = *hash, *prev = -1
+ * and *mgm holds MGM entry.
+ *
+ * if GID is found in AMGM, *index = index in AMGM, *prev = index of
+ * previous entry in hash chain and *mgm holds AMGM entry.
+ *
+ * If no AMGM exists for given gid, *index = -1, *prev = index of last
+ * entry in hash chain and *mgm holds end of hash chain.
+ */
+static int find_entry(struct mlx4_dev *dev, u8 port,
+ u8 *gid, enum mlx4_protocol prot,
+ struct mlx4_cmd_mailbox *mgm_mailbox,
+ int *prev, int *index)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm = mgm_mailbox->buf;
+ u8 *mgid;
+ int err;
+ u16 hash;
+ u8 op_mod = (prot == MLX4_PROT_ETH) ?
+ !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) : 0;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return -ENOMEM;
+ mgid = mailbox->buf;
+
+ memcpy(mgid, gid, 16);
+
+ err = mlx4_GID_HASH(dev, mailbox, &hash, op_mod);
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ if (err)
+ return err;
+
+ if (0)
+ mlx4_dbg(dev, "Hash for %pI6 is %04x\n", gid, hash);
+
+ *index = hash;
+ *prev = -1;
+
+ do {
+ err = mlx4_READ_ENTRY(dev, *index, mgm_mailbox);
+ if (err)
+ return err;
+
+ if (!(be32_to_cpu(mgm->members_count) & 0xffffff)) {
+ if (*index != hash) {
+ mlx4_err(dev, "Found zero MGID in AMGM\n");
+ err = -EINVAL;
+ }
+ return err;
+ }
+
+ if (!memcmp(mgm->gid, gid, 16) &&
+ be32_to_cpu(mgm->members_count) >> 30 == prot)
+ return err;
+
+ *prev = *index;
+ *index = be32_to_cpu(mgm->next_gid_index) >> 6;
+ } while (*index);
+
+ *index = -1;
+ return err;
+}
+
+static const u8 __promisc_mode[] = {
+ [MLX4_FS_REGULAR] = 0x0,
+ [MLX4_FS_ALL_DEFAULT] = 0x1,
+ [MLX4_FS_MC_DEFAULT] = 0x3,
+ [MLX4_FS_MIRROR_RX_PORT] = 0x4,
+ [MLX4_FS_MIRROR_SX_PORT] = 0x5,
+ [MLX4_FS_UC_SNIFFER] = 0x6,
+ [MLX4_FS_MC_SNIFFER] = 0x7,
+};
+
+int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev,
+ enum mlx4_net_trans_promisc_mode flow_type)
+{
+ if (flow_type >= MLX4_FS_MODE_NUM) {
+ mlx4_err(dev, "Invalid flow type. type = %d\n", flow_type);
+ return -EINVAL;
+ }
+ return __promisc_mode[flow_type];
+}
+EXPORT_SYMBOL_GPL(mlx4_map_sw_to_hw_steering_mode);
+
+static void trans_rule_ctrl_to_hw(struct mlx4_net_trans_rule *ctrl,
+ struct mlx4_net_trans_rule_hw_ctrl *hw)
+{
+ u8 flags = 0;
+
+ flags = ctrl->queue_mode == MLX4_NET_TRANS_Q_LIFO ? 1 : 0;
+ flags |= ctrl->exclusive ? (1 << 2) : 0;
+ flags |= ctrl->allow_loopback ? (1 << 3) : 0;
+
+ hw->flags = flags;
+ hw->type = __promisc_mode[ctrl->promisc_mode];
+ hw->prio = cpu_to_be16(ctrl->priority);
+ hw->port = ctrl->port;
+ hw->qpn = cpu_to_be32(ctrl->qpn);
+}
+
+const u16 __sw_id_hw[] = {
+ [MLX4_NET_TRANS_RULE_ID_ETH] = 0xE001,
+ [MLX4_NET_TRANS_RULE_ID_IB] = 0xE005,
+ [MLX4_NET_TRANS_RULE_ID_IPV6] = 0xE003,
+ [MLX4_NET_TRANS_RULE_ID_IPV4] = 0xE002,
+ [MLX4_NET_TRANS_RULE_ID_TCP] = 0xE004,
+ [MLX4_NET_TRANS_RULE_ID_UDP] = 0xE006,
+ [MLX4_NET_TRANS_RULE_ID_VXLAN] = 0xE008
+};
+
+int mlx4_map_sw_to_hw_steering_id(struct mlx4_dev *dev,
+ enum mlx4_net_trans_rule_id id)
+{
+ if (id >= MLX4_NET_TRANS_RULE_NUM) {
+ mlx4_err(dev, "Invalid network rule id. id = %d\n", id);
+ return -EINVAL;
+ }
+ return __sw_id_hw[id];
+}
+EXPORT_SYMBOL_GPL(mlx4_map_sw_to_hw_steering_id);
+
+static const int __rule_hw_sz[] = {
+ [MLX4_NET_TRANS_RULE_ID_ETH] =
+ sizeof(struct mlx4_net_trans_rule_hw_eth),
+ [MLX4_NET_TRANS_RULE_ID_IB] =
+ sizeof(struct mlx4_net_trans_rule_hw_ib),
+ [MLX4_NET_TRANS_RULE_ID_IPV6] = 0,
+ [MLX4_NET_TRANS_RULE_ID_IPV4] =
+ sizeof(struct mlx4_net_trans_rule_hw_ipv4),
+ [MLX4_NET_TRANS_RULE_ID_TCP] =
+ sizeof(struct mlx4_net_trans_rule_hw_tcp_udp),
+ [MLX4_NET_TRANS_RULE_ID_UDP] =
+ sizeof(struct mlx4_net_trans_rule_hw_tcp_udp),
+ [MLX4_NET_TRANS_RULE_ID_VXLAN] =
+ sizeof(struct mlx4_net_trans_rule_hw_vxlan)
+};
+
+int mlx4_hw_rule_sz(struct mlx4_dev *dev,
+ enum mlx4_net_trans_rule_id id)
+{
+ if (id >= MLX4_NET_TRANS_RULE_NUM) {
+ mlx4_err(dev, "Invalid network rule id. id = %d\n", id);
+ return -EINVAL;
+ }
+
+ return __rule_hw_sz[id];
+}
+EXPORT_SYMBOL_GPL(mlx4_hw_rule_sz);
+
+static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec,
+ struct _rule_hw *rule_hw)
+{
+ if (mlx4_hw_rule_sz(dev, spec->id) < 0)
+ return -EINVAL;
+ memset(rule_hw, 0, mlx4_hw_rule_sz(dev, spec->id));
+ rule_hw->id = cpu_to_be16(__sw_id_hw[spec->id]);
+ rule_hw->size = mlx4_hw_rule_sz(dev, spec->id) >> 2;
+
+ switch (spec->id) {
+ case MLX4_NET_TRANS_RULE_ID_ETH:
+ memcpy(rule_hw->eth.dst_mac, spec->eth.dst_mac, ETH_ALEN);
+ memcpy(rule_hw->eth.dst_mac_msk, spec->eth.dst_mac_msk,
+ ETH_ALEN);
+ memcpy(rule_hw->eth.src_mac, spec->eth.src_mac, ETH_ALEN);
+ memcpy(rule_hw->eth.src_mac_msk, spec->eth.src_mac_msk,
+ ETH_ALEN);
+ if (spec->eth.ether_type_enable) {
+ rule_hw->eth.ether_type_enable = 1;
+ rule_hw->eth.ether_type = spec->eth.ether_type;
+ }
+ rule_hw->eth.vlan_tag = spec->eth.vlan_id;
+ rule_hw->eth.vlan_tag_msk = spec->eth.vlan_id_msk;
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IB:
+ rule_hw->ib.l3_qpn = spec->ib.l3_qpn;
+ rule_hw->ib.qpn_mask = spec->ib.qpn_msk;
+ memcpy(&rule_hw->ib.dst_gid, &spec->ib.dst_gid, 16);
+ memcpy(&rule_hw->ib.dst_gid_msk, &spec->ib.dst_gid_msk, 16);
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IPV6:
+ return -EOPNOTSUPP;
+
+ case MLX4_NET_TRANS_RULE_ID_IPV4:
+ rule_hw->ipv4.src_ip = spec->ipv4.src_ip;
+ rule_hw->ipv4.src_ip_msk = spec->ipv4.src_ip_msk;
+ rule_hw->ipv4.dst_ip = spec->ipv4.dst_ip;
+ rule_hw->ipv4.dst_ip_msk = spec->ipv4.dst_ip_msk;
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_TCP:
+ case MLX4_NET_TRANS_RULE_ID_UDP:
+ rule_hw->tcp_udp.dst_port = spec->tcp_udp.dst_port;
+ rule_hw->tcp_udp.dst_port_msk = spec->tcp_udp.dst_port_msk;
+ rule_hw->tcp_udp.src_port = spec->tcp_udp.src_port;
+ rule_hw->tcp_udp.src_port_msk = spec->tcp_udp.src_port_msk;
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_VXLAN:
+ rule_hw->vxlan.vni =
+ cpu_to_be32(be32_to_cpu(spec->vxlan.vni) << 8);
+ rule_hw->vxlan.vni_mask =
+ cpu_to_be32(be32_to_cpu(spec->vxlan.vni_mask) << 8);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return __rule_hw_sz[spec->id];
+}
+
+static void mlx4_err_rule(struct mlx4_dev *dev, char *str,
+ struct mlx4_net_trans_rule *rule)
+{
+#define BUF_SIZE 256
+ struct mlx4_spec_list *cur;
+ char buf[BUF_SIZE];
+ int len = 0;
+
+ mlx4_err(dev, "%s", str);
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "port = %d prio = 0x%x qp = 0x%x ",
+ rule->port, rule->priority, rule->qpn);
+
+ list_for_each_entry(cur, &rule->list, list) {
+ switch (cur->id) {
+ case MLX4_NET_TRANS_RULE_ID_ETH:
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dmac = %pM ", &cur->eth.dst_mac);
+ if (cur->eth.ether_type)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "ethertype = 0x%x ",
+ be16_to_cpu(cur->eth.ether_type));
+ if (cur->eth.vlan_id)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "vlan-id = %d ",
+ be16_to_cpu(cur->eth.vlan_id));
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IPV4:
+ if (cur->ipv4.src_ip)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "src-ip = %pI4 ",
+ &cur->ipv4.src_ip);
+ if (cur->ipv4.dst_ip)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dst-ip = %pI4 ",
+ &cur->ipv4.dst_ip);
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_TCP:
+ case MLX4_NET_TRANS_RULE_ID_UDP:
+ if (cur->tcp_udp.src_port)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "src-port = %d ",
+ be16_to_cpu(cur->tcp_udp.src_port));
+ if (cur->tcp_udp.dst_port)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dst-port = %d ",
+ be16_to_cpu(cur->tcp_udp.dst_port));
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IB:
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dst-gid = %pI6\n", cur->ib.dst_gid);
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dst-gid-mask = %pI6\n",
+ cur->ib.dst_gid_msk);
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_VXLAN:
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "VNID = %d ", be32_to_cpu(cur->vxlan.vni));
+ break;
+ case MLX4_NET_TRANS_RULE_ID_IPV6:
+ break;
+
+ default:
+ break;
+ }
+ }
+ len += snprintf(buf + len, BUF_SIZE - len, "\n");
+ mlx4_err(dev, "%s", buf);
+
+ if (len >= BUF_SIZE)
+ mlx4_err(dev, "Network rule error message was truncated, print buffer is too small\n");
+}
+
+int mlx4_flow_attach(struct mlx4_dev *dev,
+ struct mlx4_net_trans_rule *rule, u64 *reg_id)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_spec_list *cur;
+ u32 size = 0;
+ int ret;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ if (!mlx4_qp_lookup(dev, rule->qpn)) {
+ mlx4_err_rule(dev, "QP doesn't exist\n", rule);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ trans_rule_ctrl_to_hw(rule, mailbox->buf);
+
+ size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
+
+ list_for_each_entry(cur, &rule->list, list) {
+ ret = parse_trans_rule(dev, cur, mailbox->buf + size);
+ if (ret < 0)
+ goto out;
+
+ size += ret;
+ }
+
+ ret = mlx4_QP_FLOW_STEERING_ATTACH(dev, mailbox, size >> 2, reg_id);
+ if (ret == -ENOMEM) {
+ mlx4_err_rule(dev,
+ "mcg table is full. Fail to register network rule\n",
+ rule);
+ } else if (ret) {
+ if (ret == -ENXIO) {
+ if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED)
+ mlx4_err_rule(dev,
+ "DMFS is not enabled, "
+ "failed to register network rule.\n",
+ rule);
+ else
+ mlx4_err_rule(dev,
+ "Rule exceeds the dmfs_high_rate_mode limitations, "
+ "failed to register network rule.\n",
+ rule);
+
+ } else {
+ mlx4_err_rule(dev, "Fail to register network rule.\n", rule);
+ }
+ }
+
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_flow_attach);
+
+int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id)
+{
+ int err;
+
+ err = mlx4_QP_FLOW_STEERING_DETACH(dev, reg_id);
+ if (err)
+ mlx4_err(dev, "Fail to detach network rule. registration id = 0x%llx\n",
+ reg_id);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_flow_detach);
+
+int mlx4_tunnel_steer_add(struct mlx4_dev *dev, unsigned char *addr,
+ int port, int qpn, u16 prio, u64 *reg_id)
+{
+ int err;
+ struct mlx4_spec_list spec_eth_outer = { {NULL} };
+ struct mlx4_spec_list spec_vxlan = { {NULL} };
+ struct mlx4_spec_list spec_eth_inner = { {NULL} };
+
+ struct mlx4_net_trans_rule rule = {
+ .queue_mode = MLX4_NET_TRANS_Q_FIFO,
+ .exclusive = 0,
+ .allow_loopback = 1,
+ .promisc_mode = MLX4_FS_REGULAR,
+ };
+
+ __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+ rule.port = port;
+ rule.qpn = qpn;
+ rule.priority = prio;
+ INIT_LIST_HEAD(&rule.list);
+
+ spec_eth_outer.id = MLX4_NET_TRANS_RULE_ID_ETH;
+ memcpy(spec_eth_outer.eth.dst_mac, addr, ETH_ALEN);
+ memcpy(spec_eth_outer.eth.dst_mac_msk, &mac_mask, ETH_ALEN);
+
+ spec_vxlan.id = MLX4_NET_TRANS_RULE_ID_VXLAN; /* any vxlan header */
+ spec_eth_inner.id = MLX4_NET_TRANS_RULE_ID_ETH; /* any inner eth header */
+
+ list_add_tail(&spec_eth_outer.list, &rule.list);
+ list_add_tail(&spec_vxlan.list, &rule.list);
+ list_add_tail(&spec_eth_inner.list, &rule.list);
+
+ err = mlx4_flow_attach(dev, &rule, reg_id);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_tunnel_steer_add);
+
+int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn,
+ u32 max_range_qpn)
+{
+ int err;
+ u64 in_param;
+
+ in_param = ((u64) min_range_qpn) << 32;
+ in_param |= ((u64) max_range_qpn) & 0xFFFFFFFF;
+
+ err = mlx4_cmd(dev, in_param, 0, 0,
+ MLX4_FLOW_STEERING_IB_UC_QP_RANGE,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_FLOW_STEERING_IB_UC_QP_RANGE);
+
+int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ int block_mcast_loopback, enum mlx4_protocol prot,
+ enum mlx4_steer_type steer)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm;
+ u32 members_count;
+ int index = -1, prev;
+ int link = 0;
+ int i;
+ int err;
+ u8 port = gid[5];
+ u8 new_entry = 0;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ mgm = mailbox->buf;
+
+ mutex_lock(&priv->mcg_table.mutex);
+ err = find_entry(dev, port, gid, prot,
+ mailbox, &prev, &index);
+ if (err)
+ goto out;
+
+ if (index != -1) {
+ if (!(be32_to_cpu(mgm->members_count) & 0xffffff)) {
+ new_entry = 1;
+ memcpy(mgm->gid, gid, 16);
+ }
+ } else {
+ link = 1;
+
+ index = mlx4_bitmap_alloc(&priv->mcg_table.bitmap);
+ if (index == -1) {
+ mlx4_err(dev, "No AMGM entries left\n");
+ err = -ENOMEM;
+ goto out;
+ }
+ index += dev->caps.num_mgms;
+
+ new_entry = 1;
+ memset(mgm, 0, sizeof(*mgm));
+ memcpy(mgm->gid, gid, 16);
+ }
+
+ members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
+ if (members_count == dev->caps.num_qp_per_mgm) {
+ mlx4_err(dev, "MGM at index %x is full\n", index);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < members_count; ++i)
+ if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn) {
+ mlx4_dbg(dev, "QP %06x already a member of MGM\n", qp->qpn);
+ err = 0;
+ goto out;
+ }
+
+ if (block_mcast_loopback)
+ mgm->qp[members_count++] = cpu_to_be32((qp->qpn & MGM_QPN_MASK) |
+ (1U << MGM_BLCK_LB_BIT));
+ else
+ mgm->qp[members_count++] = cpu_to_be32(qp->qpn & MGM_QPN_MASK);
+
+ mgm->members_count = cpu_to_be32(members_count | (u32) prot << 30);
+
+ err = mlx4_WRITE_ENTRY(dev, index, mailbox);
+ if (err)
+ goto out;
+
+ if (!link)
+ goto out;
+
+ err = mlx4_READ_ENTRY(dev, prev, mailbox);
+ if (err)
+ goto out;
+
+ mgm->next_gid_index = cpu_to_be32(index << 6);
+
+ err = mlx4_WRITE_ENTRY(dev, prev, mailbox);
+ if (err)
+ goto out;
+
+out:
+ if (prot == MLX4_PROT_ETH && index != -1) {
+ /* manage the steering entry for promisc mode */
+ if (new_entry)
+ err = new_steering_entry(dev, port, steer,
+ index, qp->qpn);
+ else
+ err = existing_steering_entry(dev, port, steer,
+ index, qp->qpn);
+ }
+ if (err && link && index != -1) {
+ if (index < dev->caps.num_mgms)
+ mlx4_warn(dev, "Got AMGM index %d < %d\n",
+ index, dev->caps.num_mgms);
+ else
+ mlx4_bitmap_free(&priv->mcg_table.bitmap,
+ index - dev->caps.num_mgms, MLX4_USE_RR);
+ }
+ mutex_unlock(&priv->mcg_table.mutex);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ enum mlx4_protocol prot, enum mlx4_steer_type steer)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mgm *mgm;
+ u32 members_count;
+ int prev, index;
+ int i, loc = -1;
+ int err;
+ u8 port = gid[5];
+ bool removed_entry = false;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ mgm = mailbox->buf;
+
+ mutex_lock(&priv->mcg_table.mutex);
+
+ err = find_entry(dev, port, gid, prot,
+ mailbox, &prev, &index);
+ if (err)
+ goto out;
+
+ if (index == -1) {
+ mlx4_err(dev, "MGID %pI6 not found\n", gid);
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* If this QP is also a promisc QP, it shouldn't be removed only if
+ * at least one none promisc QP is also attached to this MCG
+ */
+ if (prot == MLX4_PROT_ETH &&
+ check_duplicate_entry(dev, port, steer, index, qp->qpn) &&
+ !promisc_steering_entry(dev, port, steer, index, qp->qpn, NULL))
+ goto out;
+
+ members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
+ for (i = 0; i < members_count; ++i)
+ if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn) {
+ loc = i;
+ break;
+ }
+
+ if (loc == -1) {
+ mlx4_err(dev, "QP %06x not found in MGM\n", qp->qpn);
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* copy the last QP in this MGM over removed QP */
+ mgm->qp[loc] = mgm->qp[members_count - 1];
+ mgm->qp[members_count - 1] = 0;
+ mgm->members_count = cpu_to_be32(--members_count | (u32) prot << 30);
+
+ if (prot == MLX4_PROT_ETH)
+ removed_entry = can_remove_steering_entry(dev, port, steer,
+ index, qp->qpn);
+ if (members_count && (prot != MLX4_PROT_ETH || !removed_entry)) {
+ err = mlx4_WRITE_ENTRY(dev, index, mailbox);
+ goto out;
+ }
+
+ /* We are going to delete the entry, members count should be 0 */
+ mgm->members_count = cpu_to_be32((u32) prot << 30);
+
+ if (prev == -1) {
+ /* Remove entry from MGM */
+ int amgm_index = be32_to_cpu(mgm->next_gid_index) >> 6;
+ if (amgm_index) {
+ err = mlx4_READ_ENTRY(dev, amgm_index, mailbox);
+ if (err)
+ goto out;
+ } else
+ memset(mgm->gid, 0, 16);
+
+ err = mlx4_WRITE_ENTRY(dev, index, mailbox);
+ if (err)
+ goto out;
+
+ if (amgm_index) {
+ if (amgm_index < dev->caps.num_mgms)
+ mlx4_warn(dev, "MGM entry %d had AMGM index %d < %d\n",
+ index, amgm_index, dev->caps.num_mgms);
+ else
+ mlx4_bitmap_free(&priv->mcg_table.bitmap,
+ amgm_index - dev->caps.num_mgms, MLX4_USE_RR);
+ }
+ } else {
+ /* Remove entry from AMGM */
+ int cur_next_index = be32_to_cpu(mgm->next_gid_index) >> 6;
+ err = mlx4_READ_ENTRY(dev, prev, mailbox);
+ if (err)
+ goto out;
+
+ mgm->next_gid_index = cpu_to_be32(cur_next_index << 6);
+
+ err = mlx4_WRITE_ENTRY(dev, prev, mailbox);
+ if (err)
+ goto out;
+
+ if (index < dev->caps.num_mgms)
+ mlx4_warn(dev, "entry %d had next AMGM index %d < %d\n",
+ prev, index, dev->caps.num_mgms);
+ else
+ mlx4_bitmap_free(&priv->mcg_table.bitmap,
+ index - dev->caps.num_mgms, MLX4_USE_RR);
+ }
+
+out:
+ mutex_unlock(&priv->mcg_table.mutex);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ if (err && dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+ /* In case device is under an error, return success as a closing command */
+ err = 0;
+ return err;
+}
+
+static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp,
+ u8 gid[16], u8 attach, u8 block_loopback,
+ enum mlx4_protocol prot)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err = 0;
+ int qpn;
+
+ if (!mlx4_is_mfunc(dev))
+ return -EBADF;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ memcpy(mailbox->buf, gid, 16);
+ qpn = qp->qpn;
+ qpn |= (prot << 28);
+ if (attach && block_loopback)
+ qpn |= (1 << 31);
+
+ err = mlx4_cmd(dev, mailbox->dma, qpn, attach,
+ MLX4_CMD_QP_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ if (err && !attach &&
+ dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+ err = 0;
+ return err;
+}
+
+int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, struct mlx4_qp *qp,
+ u8 gid[16], u8 port,
+ int block_mcast_loopback,
+ enum mlx4_protocol prot, u64 *reg_id)
+{
+ struct mlx4_spec_list spec = { {NULL} };
+ __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+ struct mlx4_net_trans_rule rule = {
+ .queue_mode = MLX4_NET_TRANS_Q_FIFO,
+ .exclusive = 0,
+ .promisc_mode = MLX4_FS_REGULAR,
+ .priority = MLX4_DOMAIN_NIC,
+ };
+
+ rule.allow_loopback = !block_mcast_loopback;
+ rule.port = port;
+ rule.qpn = qp->qpn;
+ INIT_LIST_HEAD(&rule.list);
+
+ switch (prot) {
+ case MLX4_PROT_ETH:
+ spec.id = MLX4_NET_TRANS_RULE_ID_ETH;
+ memcpy(spec.eth.dst_mac, &gid[10], ETH_ALEN);
+ memcpy(spec.eth.dst_mac_msk, &mac_mask, ETH_ALEN);
+ break;
+
+ case MLX4_PROT_IB_IPV6:
+ spec.id = MLX4_NET_TRANS_RULE_ID_IB;
+ memcpy(spec.ib.dst_gid, gid, 16);
+ memset(&spec.ib.dst_gid_msk, 0xff, 16);
+ break;
+ default:
+ return -EINVAL;
+ }
+ list_add_tail(&spec.list, &rule.list);
+
+ return mlx4_flow_attach(dev, &rule, reg_id);
+}
+
+int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ u8 port, int block_mcast_loopback,
+ enum mlx4_protocol prot, u64 *reg_id)
+{
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_A0:
+ if (prot == MLX4_PROT_ETH)
+ return 0;
+ /* fall through */
+
+ case MLX4_STEERING_MODE_B0:
+ if (prot == MLX4_PROT_ETH)
+ gid[7] |= (MLX4_MC_STEER << 1);
+
+ if (mlx4_is_mfunc(dev))
+ return mlx4_QP_ATTACH(dev, qp, gid, 1,
+ block_mcast_loopback, prot);
+ return mlx4_qp_attach_common(dev, qp, gid,
+ block_mcast_loopback, prot,
+ MLX4_MC_STEER);
+
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ return mlx4_trans_to_dmfs_attach(dev, qp, gid, port,
+ block_mcast_loopback,
+ prot, reg_id);
+ default:
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL_GPL(mlx4_multicast_attach);
+
+int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ enum mlx4_protocol prot, u64 reg_id)
+{
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_A0:
+ if (prot == MLX4_PROT_ETH)
+ return 0;
+ /* fall through */
+
+ case MLX4_STEERING_MODE_B0:
+ if (prot == MLX4_PROT_ETH)
+ gid[7] |= (MLX4_MC_STEER << 1);
+
+ if (mlx4_is_mfunc(dev))
+ return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot);
+
+ return mlx4_qp_detach_common(dev, qp, gid, prot,
+ MLX4_MC_STEER);
+
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ return mlx4_flow_detach(dev, reg_id);
+
+ default:
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL_GPL(mlx4_multicast_detach);
+
+int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port,
+ u32 qpn, enum mlx4_net_trans_promisc_mode mode)
+{
+ struct mlx4_net_trans_rule rule = {
+ .queue_mode = MLX4_NET_TRANS_Q_FIFO,
+ .exclusive = 0,
+ .allow_loopback = 1,
+ };
+
+ u64 *regid_p;
+
+ switch (mode) {
+ case MLX4_FS_ALL_DEFAULT:
+ regid_p = &dev->regid_promisc_array[port];
+ break;
+ case MLX4_FS_MC_DEFAULT:
+ regid_p = &dev->regid_allmulti_array[port];
+ break;
+ default:
+ return -1;
+ }
+
+ if (*regid_p != 0)
+ return -1;
+
+ rule.promisc_mode = mode;
+ rule.port = port;
+ rule.qpn = qpn;
+ INIT_LIST_HEAD(&rule.list);
+ mlx4_info(dev, "going promisc on %x\n", port);
+
+ return mlx4_flow_attach(dev, &rule, regid_p);
+}
+EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_add);
+
+int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port,
+ enum mlx4_net_trans_promisc_mode mode)
+{
+ int ret;
+ u64 *regid_p;
+
+ switch (mode) {
+ case MLX4_FS_ALL_DEFAULT:
+ regid_p = &dev->regid_promisc_array[port];
+ break;
+ case MLX4_FS_MC_DEFAULT:
+ regid_p = &dev->regid_allmulti_array[port];
+ break;
+ default:
+ return -1;
+ }
+
+ if (*regid_p == 0)
+ return -1;
+
+ ret = mlx4_flow_detach(dev, *regid_p);
+ if (ret == 0)
+ *regid_p = 0;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_remove);
+
+int mlx4_unicast_attach(struct mlx4_dev *dev,
+ struct mlx4_qp *qp, u8 gid[16],
+ int block_mcast_loopback, enum mlx4_protocol prot)
+{
+ if (prot == MLX4_PROT_ETH)
+ gid[7] |= (MLX4_UC_STEER << 1);
+
+ if (mlx4_is_mfunc(dev))
+ return mlx4_QP_ATTACH(dev, qp, gid, 1,
+ block_mcast_loopback, prot);
+
+ return mlx4_qp_attach_common(dev, qp, gid, block_mcast_loopback,
+ prot, MLX4_UC_STEER);
+}
+EXPORT_SYMBOL_GPL(mlx4_unicast_attach);
+
+int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp,
+ u8 gid[16], enum mlx4_protocol prot)
+{
+ if (prot == MLX4_PROT_ETH)
+ gid[7] |= (MLX4_UC_STEER << 1);
+
+ if (mlx4_is_mfunc(dev))
+ return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot);
+
+ return mlx4_qp_detach_common(dev, qp, gid, prot, MLX4_UC_STEER);
+}
+EXPORT_SYMBOL_GPL(mlx4_unicast_detach);
+
+int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ u32 qpn = (u32) vhcr->in_param & 0xffffffff;
+ int port = mlx4_slave_convert_port(dev, slave, vhcr->in_param >> 62);
+ enum mlx4_steer_type steer = vhcr->in_modifier;
+
+ if (port < 0)
+ return -EINVAL;
+
+ /* Promiscuous unicast is not allowed in mfunc */
+ if (mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER)
+ return 0;
+
+ if (vhcr->op_modifier)
+ return add_promisc_qp(dev, port, steer, qpn);
+ else
+ return remove_promisc_qp(dev, port, steer, qpn);
+}
+
+static int mlx4_PROMISC(struct mlx4_dev *dev, u32 qpn,
+ enum mlx4_steer_type steer, u8 add, u8 port)
+{
+ return mlx4_cmd(dev, (u64) qpn | (u64) port << 62, (u32) steer, add,
+ MLX4_CMD_PROMISC, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+}
+
+int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port)
+{
+ if (mlx4_is_mfunc(dev))
+ return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 1, port);
+
+ return add_promisc_qp(dev, port, MLX4_MC_STEER, qpn);
+}
+EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_add);
+
+int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port)
+{
+ if (mlx4_is_mfunc(dev))
+ return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 0, port);
+
+ return remove_promisc_qp(dev, port, MLX4_MC_STEER, qpn);
+}
+EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_remove);
+
+int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port)
+{
+ if (mlx4_is_mfunc(dev))
+ return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 1, port);
+
+ return add_promisc_qp(dev, port, MLX4_UC_STEER, qpn);
+}
+EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_add);
+
+int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port)
+{
+ if (mlx4_is_mfunc(dev))
+ return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 0, port);
+
+ return remove_promisc_qp(dev, port, MLX4_UC_STEER, qpn);
+}
+EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_remove);
+
+int mlx4_init_mcg_table(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int err;
+
+ /* No need for mcg_table when fw managed the mcg table*/
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return 0;
+ err = mlx4_bitmap_init(&priv->mcg_table.bitmap, dev->caps.num_amgms,
+ dev->caps.num_amgms - 1, 0, 0);
+ if (err)
+ return err;
+
+ mutex_init(&priv->mcg_table.mutex);
+
+ return 0;
+}
+
+void mlx4_cleanup_mcg_table(struct mlx4_dev *dev)
+{
+ if (dev->caps.steering_mode !=
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
new file mode 100644
index 000000000..23f1b5b51
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -0,0 +1,1486 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX4_H
+#define MLX4_H
+
+#include <linux/mutex.h>
+#include <linux/radix-tree.h>
+#include <linux/rbtree.h>
+#include <linux/timer.h>
+#include <linux/semaphore.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <net/devlink.h>
+#include <linux/rwsem.h>
+
+#include <linux/mlx4/device.h>
+#include <linux/mlx4/driver.h>
+#include <linux/mlx4/doorbell.h>
+#include <linux/mlx4/cmd.h>
+#include "fw_qos.h"
+
+#define DRV_NAME "mlx4_core"
+#define DRV_VERSION "4.0-0"
+#define DRV_NAME_FOR_FW "Linux," DRV_NAME "," DRV_VERSION
+
+#define MLX4_FS_UDP_UC_EN (1 << 1)
+#define MLX4_FS_TCP_UC_EN (1 << 2)
+#define MLX4_FS_NUM_OF_L2_ADDR 8
+#define MLX4_FS_MGM_LOG_ENTRY_SIZE 7
+#define MLX4_FS_NUM_MCG (1 << 17)
+
+#define INIT_HCA_TPT_MW_ENABLE (1 << 7)
+
+#define MLX4_QUERY_IF_STAT_RESET BIT(31)
+
+enum {
+ MLX4_HCR_BASE = 0x80680,
+ MLX4_HCR_SIZE = 0x0001c,
+ MLX4_CLR_INT_SIZE = 0x00008,
+ MLX4_SLAVE_COMM_BASE = 0x0,
+ MLX4_COMM_PAGESIZE = 0x1000,
+ MLX4_CLOCK_SIZE = 0x00008,
+ MLX4_COMM_CHAN_CAPS = 0x8,
+ MLX4_COMM_CHAN_FLAGS = 0xc
+};
+
+enum {
+ MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE = 10,
+ MLX4_MIN_MGM_LOG_ENTRY_SIZE = 7,
+ MLX4_MAX_MGM_LOG_ENTRY_SIZE = 12,
+ MLX4_MAX_QP_PER_MGM = 4 * ((1 << MLX4_MAX_MGM_LOG_ENTRY_SIZE) / 16 - 2),
+};
+
+enum {
+ MLX4_NUM_PDS = 1 << 15
+};
+
+enum {
+ MLX4_CMPT_TYPE_QP = 0,
+ MLX4_CMPT_TYPE_SRQ = 1,
+ MLX4_CMPT_TYPE_CQ = 2,
+ MLX4_CMPT_TYPE_EQ = 3,
+ MLX4_CMPT_NUM_TYPE
+};
+
+enum {
+ MLX4_CMPT_SHIFT = 24,
+ MLX4_NUM_CMPTS = MLX4_CMPT_NUM_TYPE << MLX4_CMPT_SHIFT
+};
+
+enum mlx4_mpt_state {
+ MLX4_MPT_DISABLED = 0,
+ MLX4_MPT_EN_HW,
+ MLX4_MPT_EN_SW
+};
+
+#define MLX4_COMM_TIME 10000
+#define MLX4_COMM_OFFLINE_TIME_OUT 30000
+#define MLX4_COMM_CMD_NA_OP 0x0
+
+
+enum {
+ MLX4_COMM_CMD_RESET,
+ MLX4_COMM_CMD_VHCR0,
+ MLX4_COMM_CMD_VHCR1,
+ MLX4_COMM_CMD_VHCR2,
+ MLX4_COMM_CMD_VHCR_EN,
+ MLX4_COMM_CMD_VHCR_POST,
+ MLX4_COMM_CMD_FLR = 254
+};
+
+enum {
+ MLX4_VF_SMI_DISABLED,
+ MLX4_VF_SMI_ENABLED
+};
+
+/*The flag indicates that the slave should delay the RESET cmd*/
+#define MLX4_DELAY_RESET_SLAVE 0xbbbbbbb
+/*indicates how many retries will be done if we are in the middle of FLR*/
+#define NUM_OF_RESET_RETRIES 10
+#define SLEEP_TIME_IN_RESET (2 * 1000)
+enum mlx4_resource {
+ RES_QP,
+ RES_CQ,
+ RES_SRQ,
+ RES_XRCD,
+ RES_MPT,
+ RES_MTT,
+ RES_MAC,
+ RES_VLAN,
+ RES_NPORT_ID,
+ RES_COUNTER,
+ RES_FS_RULE,
+ RES_EQ,
+ MLX4_NUM_OF_RESOURCE_TYPE
+};
+
+enum mlx4_alloc_mode {
+ RES_OP_RESERVE,
+ RES_OP_RESERVE_AND_MAP,
+ RES_OP_MAP_ICM,
+};
+
+enum mlx4_res_tracker_free_type {
+ RES_TR_FREE_ALL,
+ RES_TR_FREE_SLAVES_ONLY,
+ RES_TR_FREE_STRUCTS_ONLY,
+};
+
+/*
+ *Virtual HCR structures.
+ * mlx4_vhcr is the sw representation, in machine endianness
+ *
+ * mlx4_vhcr_cmd is the formalized structure, the one that is passed
+ * to FW to go through communication channel.
+ * It is big endian, and has the same structure as the physical HCR
+ * used by command interface
+ */
+struct mlx4_vhcr {
+ u64 in_param;
+ u64 out_param;
+ u32 in_modifier;
+ u32 errno;
+ u16 op;
+ u16 token;
+ u8 op_modifier;
+ u8 e_bit;
+};
+
+struct mlx4_vhcr_cmd {
+ __be64 in_param;
+ __be32 in_modifier;
+ u32 reserved1;
+ __be64 out_param;
+ __be16 token;
+ u16 reserved;
+ u8 status;
+ u8 flags;
+ __be16 opcode;
+};
+
+struct mlx4_cmd_info {
+ u16 opcode;
+ bool has_inbox;
+ bool has_outbox;
+ bool out_is_imm;
+ bool encode_slave_id;
+ int (*verify)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox);
+ int (*wrapper)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+};
+
+#ifdef CONFIG_MLX4_DEBUG
+extern int mlx4_debug_level;
+#else /* CONFIG_MLX4_DEBUG */
+#define mlx4_debug_level (0)
+#endif /* CONFIG_MLX4_DEBUG */
+
+#define mlx4_dbg(mdev, format, ...) \
+do { \
+ if (mlx4_debug_level) \
+ dev_printk(KERN_DEBUG, \
+ &(mdev)->persist->pdev->dev, format, \
+ ##__VA_ARGS__); \
+} while (0)
+
+#define mlx4_err(mdev, format, ...) \
+ dev_err(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
+#define mlx4_info(mdev, format, ...) \
+ dev_info(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
+#define mlx4_warn(mdev, format, ...) \
+ dev_warn(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
+
+extern int log_mtts_per_seg;
+extern int mlx4_internal_err_reset;
+
+#define MLX4_MAX_NUM_SLAVES (min(MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF, \
+ MLX4_MFUNC_MAX))
+#define ALL_SLAVES 0xff
+
+struct mlx4_bitmap {
+ u32 last;
+ u32 top;
+ u32 max;
+ u32 reserved_top;
+ u32 mask;
+ u32 avail;
+ u32 effective_len;
+ spinlock_t lock;
+ unsigned long *table;
+};
+
+struct mlx4_buddy {
+ unsigned long **bits;
+ unsigned int *num_free;
+ u32 max_order;
+ spinlock_t lock;
+};
+
+struct mlx4_icm;
+
+struct mlx4_icm_table {
+ u64 virt;
+ int num_icm;
+ u32 num_obj;
+ int obj_size;
+ int lowmem;
+ int coherent;
+ struct mutex mutex;
+ struct mlx4_icm **icm;
+};
+
+#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28)
+#define MLX4_MPT_FLAG_FREE (0x3UL << 28)
+#define MLX4_MPT_FLAG_MIO (1 << 17)
+#define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15)
+#define MLX4_MPT_FLAG_PHYSICAL (1 << 9)
+#define MLX4_MPT_FLAG_REGION (1 << 8)
+
+#define MLX4_MPT_PD_MASK (0x1FFFFUL)
+#define MLX4_MPT_PD_VF_MASK (0xFE0000UL)
+#define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27)
+#define MLX4_MPT_PD_FLAG_RAE (1 << 28)
+#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24)
+
+#define MLX4_MPT_QP_FLAG_BOUND_QP (1 << 7)
+
+#define MLX4_MPT_STATUS_SW 0xF0
+#define MLX4_MPT_STATUS_HW 0x00
+
+#define MLX4_CQE_SIZE_MASK_STRIDE 0x3
+#define MLX4_EQE_SIZE_MASK_STRIDE 0x30
+
+#define MLX4_EQ_ASYNC 0
+#define MLX4_EQ_TO_CQ_VECTOR(vector) ((vector) - \
+ !!((int)(vector) >= MLX4_EQ_ASYNC))
+#define MLX4_CQ_TO_EQ_VECTOR(vector) ((vector) + \
+ !!((int)(vector) >= MLX4_EQ_ASYNC))
+
+/*
+ * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
+ */
+struct mlx4_mpt_entry {
+ __be32 flags;
+ __be32 qpn;
+ __be32 key;
+ __be32 pd_flags;
+ __be64 start;
+ __be64 length;
+ __be32 lkey;
+ __be32 win_cnt;
+ u8 reserved1[3];
+ u8 mtt_rep;
+ __be64 mtt_addr;
+ __be32 mtt_sz;
+ __be32 entity_size;
+ __be32 first_byte_offset;
+} __packed;
+
+/*
+ * Must be packed because start is 64 bits but only aligned to 32 bits.
+ */
+struct mlx4_eq_context {
+ __be32 flags;
+ u16 reserved1[3];
+ __be16 page_offset;
+ u8 log_eq_size;
+ u8 reserved2[4];
+ u8 eq_period;
+ u8 reserved3;
+ u8 eq_max_count;
+ u8 reserved4[3];
+ u8 intr;
+ u8 log_page_size;
+ u8 reserved5[2];
+ u8 mtt_base_addr_h;
+ __be32 mtt_base_addr_l;
+ u32 reserved6[2];
+ __be32 consumer_index;
+ __be32 producer_index;
+ u32 reserved7[4];
+};
+
+struct mlx4_cq_context {
+ __be32 flags;
+ u16 reserved1[3];
+ __be16 page_offset;
+ __be32 logsize_usrpage;
+ __be16 cq_period;
+ __be16 cq_max_count;
+ u8 reserved2[3];
+ u8 comp_eqn;
+ u8 log_page_size;
+ u8 reserved3[2];
+ u8 mtt_base_addr_h;
+ __be32 mtt_base_addr_l;
+ __be32 last_notified_index;
+ __be32 solicit_producer_index;
+ __be32 consumer_index;
+ __be32 producer_index;
+ u32 reserved4[2];
+ __be64 db_rec_addr;
+};
+
+struct mlx4_srq_context {
+ __be32 state_logsize_srqn;
+ u8 logstride;
+ u8 reserved1;
+ __be16 xrcd;
+ __be32 pg_offset_cqn;
+ u32 reserved2;
+ u8 log_page_size;
+ u8 reserved3[2];
+ u8 mtt_base_addr_h;
+ __be32 mtt_base_addr_l;
+ __be32 pd;
+ __be16 limit_watermark;
+ __be16 wqe_cnt;
+ u16 reserved4;
+ __be16 wqe_counter;
+ u32 reserved5;
+ __be64 db_rec_addr;
+};
+
+struct mlx4_eq_tasklet {
+ struct list_head list;
+ struct list_head process_list;
+ struct tasklet_struct task;
+ /* lock on completion tasklet list */
+ spinlock_t lock;
+};
+
+struct mlx4_eq {
+ struct mlx4_dev *dev;
+ void __iomem *doorbell;
+ int eqn;
+ u32 cons_index;
+ u16 irq;
+ u16 have_irq;
+ int nent;
+ struct mlx4_buf_list *page_list;
+ struct mlx4_mtt mtt;
+ struct mlx4_eq_tasklet tasklet_ctx;
+ struct mlx4_active_ports actv_ports;
+ u32 ref_count;
+ cpumask_var_t affinity_mask;
+};
+
+struct mlx4_slave_eqe {
+ u8 type;
+ u8 port;
+ u32 param;
+};
+
+struct mlx4_slave_event_eq_info {
+ int eqn;
+ u16 token;
+};
+
+struct mlx4_profile {
+ int num_qp;
+ int rdmarc_per_qp;
+ int num_srq;
+ int num_cq;
+ int num_mcg;
+ int num_mpt;
+ unsigned num_mtt;
+};
+
+struct mlx4_fw {
+ u64 clr_int_base;
+ u64 catas_offset;
+ u64 comm_base;
+ u64 clock_offset;
+ struct mlx4_icm *fw_icm;
+ struct mlx4_icm *aux_icm;
+ u32 catas_size;
+ u16 fw_pages;
+ u8 clr_int_bar;
+ u8 catas_bar;
+ u8 comm_bar;
+ u8 clock_bar;
+};
+
+struct mlx4_comm {
+ u32 slave_write;
+ u32 slave_read;
+};
+
+enum {
+ MLX4_MCAST_CONFIG = 0,
+ MLX4_MCAST_DISABLE = 1,
+ MLX4_MCAST_ENABLE = 2,
+};
+
+#define VLAN_FLTR_SIZE 128
+
+struct mlx4_vlan_fltr {
+ __be32 entry[VLAN_FLTR_SIZE];
+};
+
+struct mlx4_mcast_entry {
+ struct list_head list;
+ u64 addr;
+};
+
+struct mlx4_promisc_qp {
+ struct list_head list;
+ u32 qpn;
+};
+
+struct mlx4_steer_index {
+ struct list_head list;
+ unsigned int index;
+ struct list_head duplicates;
+};
+
+#define MLX4_EVENT_TYPES_NUM 64
+
+struct mlx4_slave_state {
+ u8 comm_toggle;
+ u8 last_cmd;
+ u8 init_port_mask;
+ bool active;
+ bool old_vlan_api;
+ bool vst_qinq_supported;
+ u8 function;
+ dma_addr_t vhcr_dma;
+ u16 user_mtu[MLX4_MAX_PORTS + 1];
+ u16 mtu[MLX4_MAX_PORTS + 1];
+ __be32 ib_cap_mask[MLX4_MAX_PORTS + 1];
+ struct mlx4_slave_eqe eq[MLX4_MFUNC_MAX_EQES];
+ struct list_head mcast_filters[MLX4_MAX_PORTS + 1];
+ struct mlx4_vlan_fltr *vlan_filter[MLX4_MAX_PORTS + 1];
+ /* event type to eq number lookup */
+ struct mlx4_slave_event_eq_info event_eq[MLX4_EVENT_TYPES_NUM];
+ u16 eq_pi;
+ u16 eq_ci;
+ spinlock_t lock;
+ /*initialized via the kzalloc*/
+ u8 is_slave_going_down;
+ u32 cookie;
+ enum slave_port_state port_state[MLX4_MAX_PORTS + 1];
+};
+
+#define MLX4_VGT 4095
+#define NO_INDX (-1)
+
+struct mlx4_vport_state {
+ u64 mac;
+ u16 default_vlan;
+ u8 default_qos;
+ __be16 vlan_proto;
+ u32 tx_rate;
+ bool spoofchk;
+ u32 link_state;
+ u8 qos_vport;
+ __be64 guid;
+};
+
+struct mlx4_vf_admin_state {
+ struct mlx4_vport_state vport[MLX4_MAX_PORTS + 1];
+ u8 enable_smi[MLX4_MAX_PORTS + 1];
+};
+
+struct mlx4_vport_oper_state {
+ struct mlx4_vport_state state;
+ int mac_idx;
+ int vlan_idx;
+};
+
+struct mlx4_vf_oper_state {
+ struct mlx4_vport_oper_state vport[MLX4_MAX_PORTS + 1];
+ u8 smi_enabled[MLX4_MAX_PORTS + 1];
+};
+
+struct slave_list {
+ struct mutex mutex;
+ struct list_head res_list[MLX4_NUM_OF_RESOURCE_TYPE];
+};
+
+struct resource_allocator {
+ spinlock_t alloc_lock; /* protect quotas */
+ union {
+ unsigned int res_reserved;
+ unsigned int res_port_rsvd[MLX4_MAX_PORTS];
+ };
+ union {
+ int res_free;
+ int res_port_free[MLX4_MAX_PORTS];
+ };
+ int *quota;
+ int *allocated;
+ int *guaranteed;
+};
+
+struct mlx4_resource_tracker {
+ spinlock_t lock;
+ /* tree for each resources */
+ struct rb_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE];
+ /* num_of_slave's lists, one per slave */
+ struct slave_list *slave_list;
+ struct resource_allocator res_alloc[MLX4_NUM_OF_RESOURCE_TYPE];
+};
+
+#define SLAVE_EVENT_EQ_SIZE 128
+struct mlx4_slave_event_eq {
+ u32 eqn;
+ u32 cons;
+ u32 prod;
+ spinlock_t event_lock;
+ struct mlx4_eqe event_eqe[SLAVE_EVENT_EQ_SIZE];
+};
+
+struct mlx4_qos_manager {
+ int num_of_qos_vfs;
+ DECLARE_BITMAP(priority_bm, MLX4_NUM_UP);
+};
+
+struct mlx4_master_qp0_state {
+ int proxy_qp0_active;
+ int qp0_active;
+ int port_active;
+};
+
+struct mlx4_mfunc_master_ctx {
+ struct mlx4_slave_state *slave_state;
+ struct mlx4_vf_admin_state *vf_admin;
+ struct mlx4_vf_oper_state *vf_oper;
+ struct mlx4_master_qp0_state qp0_state[MLX4_MAX_PORTS + 1];
+ int init_port_ref[MLX4_MAX_PORTS + 1];
+ u16 max_mtu[MLX4_MAX_PORTS + 1];
+ u16 max_user_mtu[MLX4_MAX_PORTS + 1];
+ u8 pptx;
+ u8 pprx;
+ int disable_mcast_ref[MLX4_MAX_PORTS + 1];
+ struct mlx4_resource_tracker res_tracker;
+ struct workqueue_struct *comm_wq;
+ struct work_struct comm_work;
+ struct work_struct slave_event_work;
+ struct work_struct slave_flr_event_work;
+ spinlock_t slave_state_lock;
+ __be32 comm_arm_bit_vector[4];
+ struct mlx4_eqe cmd_eqe;
+ struct mlx4_slave_event_eq slave_eq;
+ struct mutex gen_eqe_mutex[MLX4_MFUNC_MAX];
+ struct mlx4_qos_manager qos_ctl[MLX4_MAX_PORTS + 1];
+};
+
+struct mlx4_mfunc {
+ struct mlx4_comm __iomem *comm;
+ struct mlx4_vhcr_cmd *vhcr;
+ dma_addr_t vhcr_dma;
+
+ struct mlx4_mfunc_master_ctx master;
+};
+
+#define MGM_QPN_MASK 0x00FFFFFF
+#define MGM_BLCK_LB_BIT 30
+
+struct mlx4_mgm {
+ __be32 next_gid_index;
+ __be32 members_count;
+ u32 reserved[2];
+ u8 gid[16];
+ __be32 qp[MLX4_MAX_QP_PER_MGM];
+};
+
+struct mlx4_cmd {
+ struct dma_pool *pool;
+ void __iomem *hcr;
+ struct mutex slave_cmd_mutex;
+ struct semaphore poll_sem;
+ struct semaphore event_sem;
+ struct rw_semaphore switch_sem;
+ int max_cmds;
+ spinlock_t context_lock;
+ int free_head;
+ struct mlx4_cmd_context *context;
+ u16 token_mask;
+ u8 use_events;
+ u8 toggle;
+ u8 comm_toggle;
+ u8 initialized;
+};
+
+enum {
+ MLX4_VF_IMMED_VLAN_FLAG_VLAN = 1 << 0,
+ MLX4_VF_IMMED_VLAN_FLAG_QOS = 1 << 1,
+ MLX4_VF_IMMED_VLAN_FLAG_LINK_DISABLE = 1 << 2,
+};
+struct mlx4_vf_immed_vlan_work {
+ struct work_struct work;
+ struct mlx4_priv *priv;
+ int flags;
+ int slave;
+ int vlan_ix;
+ int orig_vlan_ix;
+ u8 port;
+ u8 qos;
+ u8 qos_vport;
+ u16 vlan_id;
+ u16 orig_vlan_id;
+ __be16 vlan_proto;
+};
+
+
+struct mlx4_uar_table {
+ struct mlx4_bitmap bitmap;
+};
+
+struct mlx4_mr_table {
+ struct mlx4_bitmap mpt_bitmap;
+ struct mlx4_buddy mtt_buddy;
+ u64 mtt_base;
+ u64 mpt_base;
+ struct mlx4_icm_table mtt_table;
+ struct mlx4_icm_table dmpt_table;
+};
+
+struct mlx4_cq_table {
+ struct mlx4_bitmap bitmap;
+ spinlock_t lock;
+ struct radix_tree_root tree;
+ struct mlx4_icm_table table;
+ struct mlx4_icm_table cmpt_table;
+};
+
+struct mlx4_eq_table {
+ struct mlx4_bitmap bitmap;
+ char *irq_names;
+ void __iomem *clr_int;
+ void __iomem **uar_map;
+ u32 clr_mask;
+ struct mlx4_eq *eq;
+ struct mlx4_icm_table table;
+ struct mlx4_icm_table cmpt_table;
+ int have_irq;
+ u8 inta_pin;
+};
+
+struct mlx4_srq_table {
+ struct mlx4_bitmap bitmap;
+ spinlock_t lock;
+ struct radix_tree_root tree;
+ struct mlx4_icm_table table;
+ struct mlx4_icm_table cmpt_table;
+};
+
+enum mlx4_qp_table_zones {
+ MLX4_QP_TABLE_ZONE_GENERAL,
+ MLX4_QP_TABLE_ZONE_RSS,
+ MLX4_QP_TABLE_ZONE_RAW_ETH,
+ MLX4_QP_TABLE_ZONE_NUM
+};
+
+struct mlx4_qp_table {
+ struct mlx4_bitmap *bitmap_gen;
+ struct mlx4_zone_allocator *zones;
+ u32 zones_uids[MLX4_QP_TABLE_ZONE_NUM];
+ u32 rdmarc_base;
+ int rdmarc_shift;
+ spinlock_t lock;
+ struct mlx4_icm_table qp_table;
+ struct mlx4_icm_table auxc_table;
+ struct mlx4_icm_table altc_table;
+ struct mlx4_icm_table rdmarc_table;
+ struct mlx4_icm_table cmpt_table;
+};
+
+struct mlx4_mcg_table {
+ struct mutex mutex;
+ struct mlx4_bitmap bitmap;
+ struct mlx4_icm_table table;
+};
+
+struct mlx4_catas_err {
+ u32 __iomem *map;
+ struct timer_list timer;
+ struct list_head list;
+};
+
+#define MLX4_MAX_MAC_NUM 128
+#define MLX4_MAC_TABLE_SIZE (MLX4_MAX_MAC_NUM << 3)
+
+struct mlx4_mac_table {
+ __be64 entries[MLX4_MAX_MAC_NUM];
+ int refs[MLX4_MAX_MAC_NUM];
+ bool is_dup[MLX4_MAX_MAC_NUM];
+ struct mutex mutex;
+ int total;
+ int max;
+};
+
+#define MLX4_ROCE_GID_ENTRY_SIZE 16
+
+struct mlx4_roce_gid_entry {
+ u8 raw[MLX4_ROCE_GID_ENTRY_SIZE];
+};
+
+struct mlx4_roce_gid_table {
+ struct mlx4_roce_gid_entry roce_gids[MLX4_ROCE_MAX_GIDS];
+ struct mutex mutex;
+};
+
+#define MLX4_MAX_VLAN_NUM 128
+#define MLX4_VLAN_TABLE_SIZE (MLX4_MAX_VLAN_NUM << 2)
+
+struct mlx4_vlan_table {
+ __be32 entries[MLX4_MAX_VLAN_NUM];
+ int refs[MLX4_MAX_VLAN_NUM];
+ int is_dup[MLX4_MAX_VLAN_NUM];
+ struct mutex mutex;
+ int total;
+ int max;
+};
+
+#define SET_PORT_GEN_ALL_VALID (MLX4_FLAG_V_MTU_MASK | \
+ MLX4_FLAG_V_PPRX_MASK | \
+ MLX4_FLAG_V_PPTX_MASK)
+#define SET_PORT_PROMISC_SHIFT 31
+#define SET_PORT_MC_PROMISC_SHIFT 30
+
+enum {
+ MCAST_DIRECT_ONLY = 0,
+ MCAST_DIRECT = 1,
+ MCAST_DEFAULT = 2
+};
+
+
+struct mlx4_set_port_general_context {
+ u16 reserved1;
+ u8 flags2;
+ u8 flags;
+ union {
+ u8 ignore_fcs;
+ u8 roce_mode;
+ };
+ u8 reserved2;
+ __be16 mtu;
+ u8 pptx;
+ u8 pfctx;
+ u16 reserved3;
+ u8 pprx;
+ u8 pfcrx;
+ u16 reserved4;
+ u32 reserved5;
+ u8 phv_en;
+ u8 reserved6[5];
+ __be16 user_mtu;
+ u16 reserved7;
+ u8 user_mac[6];
+};
+
+struct mlx4_set_port_rqp_calc_context {
+ __be32 base_qpn;
+ u8 rererved;
+ u8 n_mac;
+ u8 n_vlan;
+ u8 n_prio;
+ u8 reserved2[3];
+ u8 mac_miss;
+ u8 intra_no_vlan;
+ u8 no_vlan;
+ u8 intra_vlan_miss;
+ u8 vlan_miss;
+ u8 reserved3[3];
+ u8 no_vlan_prio;
+ __be32 promisc;
+ __be32 mcast;
+};
+
+struct mlx4_port_info {
+ struct mlx4_dev *dev;
+ int port;
+ char dev_name[16];
+ struct device_attribute port_attr;
+ enum mlx4_port_type tmp_type;
+ char dev_mtu_name[16];
+ struct device_attribute port_mtu_attr;
+ struct mlx4_mac_table mac_table;
+ struct mlx4_vlan_table vlan_table;
+ struct mlx4_roce_gid_table gid_table;
+ int base_qpn;
+ struct cpu_rmap *rmap;
+ struct devlink_port devlink_port;
+};
+
+struct mlx4_sense {
+ struct mlx4_dev *dev;
+ u8 do_sense_port[MLX4_MAX_PORTS + 1];
+ u8 sense_allowed[MLX4_MAX_PORTS + 1];
+ struct delayed_work sense_poll;
+};
+
+struct mlx4_msix_ctl {
+ DECLARE_BITMAP(pool_bm, MAX_MSIX);
+ struct mutex pool_lock;
+};
+
+struct mlx4_steer {
+ struct list_head promisc_qps[MLX4_NUM_STEERS];
+ struct list_head steer_entries[MLX4_NUM_STEERS];
+};
+
+enum {
+ MLX4_PCI_DEV_IS_VF = 1 << 0,
+ MLX4_PCI_DEV_FORCE_SENSE_PORT = 1 << 1,
+};
+
+enum {
+ MLX4_NO_RR = 0,
+ MLX4_USE_RR = 1,
+};
+
+struct mlx4_priv {
+ struct mlx4_dev dev;
+
+ struct list_head dev_list;
+ struct list_head ctx_list;
+ spinlock_t ctx_lock;
+
+ int pci_dev_data;
+ int removed;
+
+ struct list_head pgdir_list;
+ struct mutex pgdir_mutex;
+
+ struct mlx4_fw fw;
+ struct mlx4_cmd cmd;
+ struct mlx4_mfunc mfunc;
+
+ struct mlx4_bitmap pd_bitmap;
+ struct mlx4_bitmap xrcd_bitmap;
+ struct mlx4_uar_table uar_table;
+ struct mlx4_mr_table mr_table;
+ struct mlx4_cq_table cq_table;
+ struct mlx4_eq_table eq_table;
+ struct mlx4_srq_table srq_table;
+ struct mlx4_qp_table qp_table;
+ struct mlx4_mcg_table mcg_table;
+ struct mlx4_bitmap counters_bitmap;
+ int def_counter[MLX4_MAX_PORTS];
+
+ struct mlx4_catas_err catas_err;
+
+ void __iomem *clr_base;
+
+ struct mlx4_uar driver_uar;
+ void __iomem *kar;
+ struct mlx4_port_info port[MLX4_MAX_PORTS + 1];
+ struct mlx4_sense sense;
+ struct mutex port_mutex;
+ struct mlx4_msix_ctl msix_ctl;
+ struct mlx4_steer *steer;
+ struct list_head bf_list;
+ struct mutex bf_mutex;
+ struct io_mapping *bf_mapping;
+ void __iomem *clock_mapping;
+ int reserved_mtts;
+ int fs_hash_mode;
+ u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
+ struct mlx4_port_map v2p; /* cached port mapping configuration */
+ struct mutex bond_mutex; /* for bond mode */
+ __be64 slave_node_guids[MLX4_MFUNC_MAX];
+
+ atomic_t opreq_count;
+ struct work_struct opreq_task;
+};
+
+static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
+{
+ return container_of(dev, struct mlx4_priv, dev);
+}
+
+#define MLX4_SENSE_RANGE (HZ * 3)
+
+extern struct workqueue_struct *mlx4_wq;
+
+u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap);
+void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr);
+u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt,
+ int align, u32 skip_mask);
+void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt,
+ int use_rr);
+u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap);
+int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask,
+ u32 reserved_bot, u32 resetrved_top);
+void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap);
+
+int mlx4_reset(struct mlx4_dev *dev);
+
+int mlx4_alloc_eq_table(struct mlx4_dev *dev);
+void mlx4_free_eq_table(struct mlx4_dev *dev);
+
+int mlx4_init_pd_table(struct mlx4_dev *dev);
+int mlx4_init_xrcd_table(struct mlx4_dev *dev);
+int mlx4_init_uar_table(struct mlx4_dev *dev);
+int mlx4_init_mr_table(struct mlx4_dev *dev);
+int mlx4_init_eq_table(struct mlx4_dev *dev);
+int mlx4_init_cq_table(struct mlx4_dev *dev);
+int mlx4_init_qp_table(struct mlx4_dev *dev);
+int mlx4_init_srq_table(struct mlx4_dev *dev);
+int mlx4_init_mcg_table(struct mlx4_dev *dev);
+
+void mlx4_cleanup_pd_table(struct mlx4_dev *dev);
+void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev);
+void mlx4_cleanup_uar_table(struct mlx4_dev *dev);
+void mlx4_cleanup_mr_table(struct mlx4_dev *dev);
+void mlx4_cleanup_eq_table(struct mlx4_dev *dev);
+void mlx4_cleanup_cq_table(struct mlx4_dev *dev);
+void mlx4_cleanup_qp_table(struct mlx4_dev *dev);
+void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
+void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn);
+void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn);
+int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn);
+void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn);
+int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn);
+void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn);
+int __mlx4_mpt_reserve(struct mlx4_dev *dev);
+void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index);
+int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index);
+void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index);
+u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order);
+void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order);
+
+int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SYNC_TPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_CONFIG_DEV_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
+ int *base, u8 flags);
+void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
+int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac);
+void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
+int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ int start_index, int npages, u64 *page_list);
+int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
+void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx);
+int mlx4_calc_vf_counters(struct mlx4_dev *dev, int slave, int port,
+ struct mlx4_counter *data);
+int __mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn);
+void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn);
+
+void mlx4_start_catas_poll(struct mlx4_dev *dev);
+void mlx4_stop_catas_poll(struct mlx4_dev *dev);
+int mlx4_catas_init(struct mlx4_dev *dev);
+void mlx4_catas_end(struct mlx4_dev *dev);
+int mlx4_crdump_init(struct mlx4_dev *dev);
+void mlx4_crdump_end(struct mlx4_dev *dev);
+int mlx4_restart_one(struct pci_dev *pdev, bool reload,
+ struct devlink *devlink);
+int mlx4_register_device(struct mlx4_dev *dev);
+void mlx4_unregister_device(struct mlx4_dev *dev);
+void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type,
+ unsigned long param);
+
+struct mlx4_dev_cap;
+struct mlx4_init_hca_param;
+
+u64 mlx4_make_profile(struct mlx4_dev *dev,
+ struct mlx4_profile *request,
+ struct mlx4_dev_cap *dev_cap,
+ struct mlx4_init_hca_param *init_hca);
+void mlx4_master_comm_channel(struct work_struct *work);
+void mlx4_gen_slave_eqe(struct work_struct *work);
+void mlx4_master_handle_slave_flr(struct work_struct *work);
+
+int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_COMM_INT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_MODIFY_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_ARM_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_GEN_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_2ERR_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_RTS2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+
+int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe);
+
+enum {
+ MLX4_CMD_CLEANUP_STRUCT = 1UL << 0,
+ MLX4_CMD_CLEANUP_POOL = 1UL << 1,
+ MLX4_CMD_CLEANUP_HCR = 1UL << 2,
+ MLX4_CMD_CLEANUP_VHCR = 1UL << 3,
+ MLX4_CMD_CLEANUP_ALL = (MLX4_CMD_CLEANUP_VHCR << 1) - 1
+};
+
+int mlx4_cmd_init(struct mlx4_dev *dev);
+void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask);
+int mlx4_multi_func_init(struct mlx4_dev *dev);
+int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev);
+void mlx4_multi_func_cleanup(struct mlx4_dev *dev);
+void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param);
+int mlx4_cmd_use_events(struct mlx4_dev *dev);
+void mlx4_cmd_use_polling(struct mlx4_dev *dev);
+
+int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
+ u16 op, unsigned long timeout);
+
+void mlx4_cq_tasklet_cb(unsigned long data);
+void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn);
+void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type);
+
+void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type);
+
+void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type);
+
+void mlx4_enter_error_state(struct mlx4_dev_persistent *persist);
+int mlx4_comm_internal_err(u32 slave_read);
+
+int mlx4_crdump_collect(struct mlx4_dev *dev);
+
+int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
+ enum mlx4_port_type *type);
+void mlx4_do_sense_ports(struct mlx4_dev *dev,
+ enum mlx4_port_type *stype,
+ enum mlx4_port_type *defaults);
+void mlx4_start_sense(struct mlx4_dev *dev);
+void mlx4_stop_sense(struct mlx4_dev *dev);
+void mlx4_sense_init(struct mlx4_dev *dev);
+int mlx4_check_port_params(struct mlx4_dev *dev,
+ enum mlx4_port_type *port_type);
+int mlx4_change_port_types(struct mlx4_dev *dev,
+ enum mlx4_port_type *port_types);
+
+void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table);
+void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table);
+void mlx4_init_roce_gid_table(struct mlx4_dev *dev,
+ struct mlx4_roce_gid_table *table);
+void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
+int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
+int mlx4_bond_vlan_table(struct mlx4_dev *dev);
+int mlx4_unbond_vlan_table(struct mlx4_dev *dev);
+int mlx4_bond_mac_table(struct mlx4_dev *dev);
+int mlx4_unbond_mac_table(struct mlx4_dev *dev);
+
+int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz);
+/* resource tracker functions*/
+int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev,
+ enum mlx4_resource resource_type,
+ u64 resource_id, int *slave);
+void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave_id);
+void mlx4_reset_roce_gids(struct mlx4_dev *dev, int slave);
+int mlx4_init_resource_tracker(struct mlx4_dev *dev);
+
+void mlx4_free_resource_tracker(struct mlx4_dev *dev,
+ enum mlx4_res_tracker_free_type type);
+
+int mlx4_QUERY_FW_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps);
+
+int mlx4_get_slave_pkey_gid_tbl_len(struct mlx4_dev *dev, u8 port,
+ int *gid_tbl_len, int *pkey_tbl_len);
+
+int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+
+int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+
+int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ enum mlx4_protocol prot, enum mlx4_steer_type steer);
+int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+ int block_mcast_loopback, enum mlx4_protocol prot,
+ enum mlx4_steer_type steer);
+int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, struct mlx4_qp *qp,
+ u8 gid[16], u8 port,
+ int block_mcast_loopback,
+ enum mlx4_protocol prot, u64 *reg_id);
+int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_common_set_vlan_fltr(struct mlx4_dev *dev, int function,
+ int port, void *buf);
+int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_PKEY_TABLE_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_ACCESS_REG_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+
+int mlx4_get_mgm_entry_size(struct mlx4_dev *dev);
+int mlx4_get_qp_per_mgm(struct mlx4_dev *dev);
+
+static inline void set_param_l(u64 *arg, u32 val)
+{
+ *arg = (*arg & 0xffffffff00000000ULL) | (u64) val;
+}
+
+static inline void set_param_h(u64 *arg, u32 val)
+{
+ *arg = (*arg & 0xffffffff) | ((u64) val << 32);
+}
+
+static inline u32 get_param_l(u64 *arg)
+{
+ return (u32) (*arg & 0xffffffff);
+}
+
+static inline u32 get_param_h(u64 *arg)
+{
+ return (u32)(*arg >> 32);
+}
+
+static inline spinlock_t *mlx4_tlock(struct mlx4_dev *dev)
+{
+ return &mlx4_priv(dev)->mfunc.master.res_tracker.lock;
+}
+
+#define NOT_MASKED_PD_BITS 17
+
+void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work);
+
+void mlx4_init_quotas(struct mlx4_dev *dev);
+
+/* for VFs, replace zero MACs with randomly-generated MACs at driver start */
+void mlx4_replace_zero_macs(struct mlx4_dev *dev);
+int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port);
+/* Returns the VF index of slave */
+int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave);
+int mlx4_config_mad_demux(struct mlx4_dev *dev);
+int mlx4_do_bond(struct mlx4_dev *dev, bool enable);
+int mlx4_bond_fs_rules(struct mlx4_dev *dev);
+int mlx4_unbond_fs_rules(struct mlx4_dev *dev);
+
+enum mlx4_zone_flags {
+ MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO = 1UL << 0,
+ MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO = 1UL << 1,
+ MLX4_ZONE_FALLBACK_TO_HIGHER_PRIO = 1UL << 2,
+ MLX4_ZONE_USE_RR = 1UL << 3,
+};
+
+enum mlx4_zone_alloc_flags {
+ /* No two objects could overlap between zones. UID
+ * could be left unused. If this flag is given and
+ * two overlapped zones are used, an object will be free'd
+ * from the smallest possible matching zone.
+ */
+ MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP = 1UL << 0,
+};
+
+struct mlx4_zone_allocator;
+
+/* Create a new zone allocator */
+struct mlx4_zone_allocator *mlx4_zone_allocator_create(enum mlx4_zone_alloc_flags flags);
+
+/* Attach a mlx4_bitmap <bitmap> of priority <priority> to the zone allocator
+ * <zone_alloc>. Allocating an object from this zone adds an offset <offset>.
+ * Similarly, when searching for an object to free, this offset it taken into
+ * account. The use_rr mlx4_ib parameter for allocating objects from this <bitmap>
+ * is given through the MLX4_ZONE_USE_RR flag in <flags>.
+ * When an allocation fails, <zone_alloc> tries to allocate from other zones
+ * according to the policy set by <flags>. <puid> is the unique identifier
+ * received to this zone.
+ */
+int mlx4_zone_add_one(struct mlx4_zone_allocator *zone_alloc,
+ struct mlx4_bitmap *bitmap,
+ u32 flags,
+ int priority,
+ int offset,
+ u32 *puid);
+
+/* Remove bitmap indicated by <uid> from <zone_alloc> */
+int mlx4_zone_remove_one(struct mlx4_zone_allocator *zone_alloc, u32 uid);
+
+/* Delete the zone allocator <zone_alloc. This function doesn't destroy
+ * the attached bitmaps.
+ */
+void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc);
+
+/* Allocate <count> objects with align <align> and skip_mask <skip_mask>
+ * from the mlx4_bitmap whose uid is <uid>. The bitmap which we actually
+ * allocated from is returned in <puid>. If the allocation fails, a negative
+ * number is returned. Otherwise, the offset of the first object is returned.
+ */
+u32 mlx4_zone_alloc_entries(struct mlx4_zone_allocator *zones, u32 uid, int count,
+ int align, u32 skip_mask, u32 *puid);
+
+/* Free <count> objects, start from <obj> of the uid <uid> from zone_allocator
+ * <zones>.
+ */
+u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones,
+ u32 uid, u32 obj, u32 count);
+
+/* If <zones> was allocated with MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP, instead of
+ * specifying the uid when freeing an object, zone allocator could figure it by
+ * itself. Other parameters are similar to mlx4_zone_free.
+ */
+u32 mlx4_zone_free_entries_unique(struct mlx4_zone_allocator *zones, u32 obj, u32 count);
+
+/* Returns a pointer to mlx4_bitmap that was attached to <zones> with <uid> */
+struct mlx4_bitmap *mlx4_zone_get_bitmap(struct mlx4_zone_allocator *zones, u32 uid);
+
+#endif /* MLX4_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
new file mode 100644
index 000000000..3d5597d5b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -0,0 +1,858 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _MLX4_EN_H_
+#define _MLX4_EN_H_
+
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/net_tstamp.h>
+#ifdef CONFIG_MLX4_EN_DCB
+#include <linux/dcbnl.h>
+#endif
+#include <linux/cpu_rmap.h>
+#include <linux/ptp_clock_kernel.h>
+#include <net/xdp.h>
+
+#include <linux/mlx4/device.h>
+#include <linux/mlx4/qp.h>
+#include <linux/mlx4/cq.h>
+#include <linux/mlx4/srq.h>
+#include <linux/mlx4/doorbell.h>
+#include <linux/mlx4/cmd.h>
+
+#include "en_port.h"
+#include "mlx4_stats.h"
+
+#define DRV_NAME "mlx4_en"
+#define DRV_VERSION "4.0-0"
+
+#define MLX4_EN_MSG_LEVEL (NETIF_MSG_LINK | NETIF_MSG_IFDOWN)
+
+/*
+ * Device constants
+ */
+
+
+#define MLX4_EN_PAGE_SHIFT 12
+#define MLX4_EN_PAGE_SIZE (1 << MLX4_EN_PAGE_SHIFT)
+#define DEF_RX_RINGS 16
+#define MAX_RX_RINGS 128
+#define MIN_RX_RINGS 4
+#define LOG_TXBB_SIZE 6
+#define TXBB_SIZE BIT(LOG_TXBB_SIZE)
+#define HEADROOM (2048 / TXBB_SIZE + 1)
+#define STAMP_STRIDE 64
+#define STAMP_DWORDS (STAMP_STRIDE / 4)
+#define STAMP_SHIFT 31
+#define STAMP_VAL 0x7fffffff
+#define STATS_DELAY (HZ / 4)
+#define SERVICE_TASK_DELAY (HZ / 4)
+#define MAX_NUM_OF_FS_RULES 256
+
+#define MLX4_EN_FILTER_HASH_SHIFT 4
+#define MLX4_EN_FILTER_EXPIRY_QUOTA 60
+
+/* Typical TSO descriptor with 16 gather entries is 352 bytes... */
+#define MAX_DESC_SIZE 512
+#define MAX_DESC_TXBBS (MAX_DESC_SIZE / TXBB_SIZE)
+
+/*
+ * OS related constants and tunables
+ */
+
+#define MLX4_EN_PRIV_FLAGS_BLUEFLAME 1
+#define MLX4_EN_PRIV_FLAGS_PHV 2
+
+#define MLX4_EN_WATCHDOG_TIMEOUT (15 * HZ)
+
+/* Use the maximum between 16384 and a single page */
+#define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(16384)
+
+#define MLX4_EN_MAX_RX_FRAGS 4
+
+/* Maximum ring sizes */
+#define MLX4_EN_MAX_TX_SIZE 8192
+#define MLX4_EN_MAX_RX_SIZE 8192
+
+/* Minimum ring size for our page-allocation scheme to work */
+#define MLX4_EN_MIN_RX_SIZE (MLX4_EN_ALLOC_SIZE / SMP_CACHE_BYTES)
+#define MLX4_EN_MIN_TX_SIZE (4096 / TXBB_SIZE)
+
+#define MLX4_EN_SMALL_PKT_SIZE 64
+#define MLX4_EN_MIN_TX_RING_P_UP 1
+#define MLX4_EN_MAX_TX_RING_P_UP 32
+#define MLX4_EN_NUM_UP_LOW 1
+#define MLX4_EN_NUM_UP_HIGH 8
+#define MLX4_EN_DEF_RX_RING_SIZE 1024
+#define MLX4_EN_DEF_TX_RING_SIZE MLX4_EN_DEF_RX_RING_SIZE
+#define MAX_TX_RINGS (MLX4_EN_MAX_TX_RING_P_UP * \
+ MLX4_EN_NUM_UP_HIGH)
+
+#define MLX4_EN_DEFAULT_TX_WORK 256
+
+/* Target number of packets to coalesce with interrupt moderation */
+#define MLX4_EN_RX_COAL_TARGET 44
+#define MLX4_EN_RX_COAL_TIME 0x10
+
+#define MLX4_EN_TX_COAL_PKTS 16
+#define MLX4_EN_TX_COAL_TIME 0x10
+
+#define MLX4_EN_MAX_COAL_PKTS U16_MAX
+#define MLX4_EN_MAX_COAL_TIME U16_MAX
+
+#define MLX4_EN_RX_RATE_LOW 400000
+#define MLX4_EN_RX_COAL_TIME_LOW 0
+#define MLX4_EN_RX_RATE_HIGH 450000
+#define MLX4_EN_RX_COAL_TIME_HIGH 128
+#define MLX4_EN_RX_SIZE_THRESH 1024
+#define MLX4_EN_RX_RATE_THRESH (1000000 / MLX4_EN_RX_COAL_TIME_HIGH)
+#define MLX4_EN_SAMPLE_INTERVAL 0
+#define MLX4_EN_AVG_PKT_SMALL 256
+
+#define MLX4_EN_AUTO_CONF 0xffff
+
+#define MLX4_EN_DEF_RX_PAUSE 1
+#define MLX4_EN_DEF_TX_PAUSE 1
+
+/* Interval between successive polls in the Tx routine when polling is used
+ instead of interrupts (in per-core Tx rings) - should be power of 2 */
+#define MLX4_EN_TX_POLL_MODER 16
+#define MLX4_EN_TX_POLL_TIMEOUT (HZ / 4)
+
+#define SMALL_PACKET_SIZE (256 - NET_IP_ALIGN)
+#define HEADER_COPY_SIZE (128 - NET_IP_ALIGN)
+#define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN)
+#define PREAMBLE_LEN 8
+#define MLX4_SELFTEST_LB_MIN_MTU (MLX4_LOOPBACK_TEST_PAYLOAD + NET_IP_ALIGN + \
+ ETH_HLEN + PREAMBLE_LEN)
+
+/* VLAN_HLEN is added twice,to support skb vlan tagged with multiple
+ * headers. (For example: ETH_P_8021Q and ETH_P_8021AD).
+ */
+#define MLX4_EN_EFF_MTU(mtu) ((mtu) + ETH_HLEN + (2 * VLAN_HLEN))
+#define ETH_BCAST 0xffffffffffffULL
+
+#define MLX4_EN_LOOPBACK_RETRIES 5
+#define MLX4_EN_LOOPBACK_TIMEOUT 100
+
+#ifdef MLX4_EN_PERF_STAT
+/* Number of samples to 'average' */
+#define AVG_SIZE 128
+#define AVG_FACTOR 1024
+
+#define INC_PERF_COUNTER(cnt) (++(cnt))
+#define ADD_PERF_COUNTER(cnt, add) ((cnt) += (add))
+#define AVG_PERF_COUNTER(cnt, sample) \
+ ((cnt) = ((cnt) * (AVG_SIZE - 1) + (sample) * AVG_FACTOR) / AVG_SIZE)
+#define GET_PERF_COUNTER(cnt) (cnt)
+#define GET_AVG_PERF_COUNTER(cnt) ((cnt) / AVG_FACTOR)
+
+#else
+
+#define INC_PERF_COUNTER(cnt) do {} while (0)
+#define ADD_PERF_COUNTER(cnt, add) do {} while (0)
+#define AVG_PERF_COUNTER(cnt, sample) do {} while (0)
+#define GET_PERF_COUNTER(cnt) (0)
+#define GET_AVG_PERF_COUNTER(cnt) (0)
+#endif /* MLX4_EN_PERF_STAT */
+
+/* Constants for TX flow */
+enum {
+ MAX_INLINE = 104, /* 128 - 16 - 4 - 4 */
+ MAX_BF = 256,
+ MIN_PKT_LEN = 17,
+};
+
+/*
+ * Configurables
+ */
+
+enum cq_type {
+ /* keep tx types first */
+ TX,
+ TX_XDP,
+#define MLX4_EN_NUM_TX_TYPES (TX_XDP + 1)
+ RX,
+};
+
+
+/*
+ * Useful macros
+ */
+#define ROUNDUP_LOG2(x) ilog2(roundup_pow_of_two(x))
+#define XNOR(x, y) (!(x) == !(y))
+
+
+struct mlx4_en_tx_info {
+ union {
+ struct sk_buff *skb;
+ struct page *page;
+ };
+ dma_addr_t map0_dma;
+ u32 map0_byte_count;
+ u32 nr_txbb;
+ u32 nr_bytes;
+ u8 linear;
+ u8 data_offset;
+ u8 inl;
+ u8 ts_requested;
+ u8 nr_maps;
+} ____cacheline_aligned_in_smp;
+
+
+#define MLX4_EN_BIT_DESC_OWN 0x80000000
+#define CTRL_SIZE sizeof(struct mlx4_wqe_ctrl_seg)
+#define MLX4_EN_MEMTYPE_PAD 0x100
+#define DS_SIZE sizeof(struct mlx4_wqe_data_seg)
+
+
+struct mlx4_en_tx_desc {
+ struct mlx4_wqe_ctrl_seg ctrl;
+ union {
+ struct mlx4_wqe_data_seg data; /* at least one data segment */
+ struct mlx4_wqe_lso_seg lso;
+ struct mlx4_wqe_inline_seg inl;
+ };
+};
+
+#define MLX4_EN_USE_SRQ 0x01000000
+
+#define MLX4_EN_CX3_LOW_ID 0x1000
+#define MLX4_EN_CX3_HIGH_ID 0x1005
+
+struct mlx4_en_rx_alloc {
+ struct page *page;
+ dma_addr_t dma;
+ u32 page_offset;
+};
+
+#define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT)
+
+struct mlx4_en_page_cache {
+ u32 index;
+ struct {
+ struct page *page;
+ dma_addr_t dma;
+ } buf[MLX4_EN_CACHE_SIZE];
+};
+
+enum {
+ MLX4_EN_TX_RING_STATE_RECOVERING,
+};
+
+struct mlx4_en_priv;
+
+struct mlx4_en_tx_ring {
+ /* cache line used and dirtied in tx completion
+ * (mlx4_en_free_tx_buf())
+ */
+ u32 last_nr_txbb;
+ u32 cons;
+ unsigned long wake_queue;
+ struct netdev_queue *tx_queue;
+ u32 (*free_tx_desc)(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ int index,
+ u64 timestamp, int napi_mode);
+ struct mlx4_en_rx_ring *recycle_ring;
+
+ /* cache line used and dirtied in mlx4_en_xmit() */
+ u32 prod ____cacheline_aligned_in_smp;
+ unsigned int tx_dropped;
+ unsigned long bytes;
+ unsigned long packets;
+ unsigned long tx_csum;
+ unsigned long tso_packets;
+ unsigned long xmit_more;
+ struct mlx4_bf bf;
+
+ /* Following part should be mostly read */
+ __be32 doorbell_qpn;
+ __be32 mr_key;
+ u32 size; /* number of TXBBs */
+ u32 size_mask;
+ u32 full_size;
+ u32 buf_size;
+ void *buf;
+ struct mlx4_en_tx_info *tx_info;
+ int qpn;
+ u8 queue_index;
+ bool bf_enabled;
+ bool bf_alloced;
+ u8 hwtstamp_tx_type;
+ u8 *bounce_buf;
+
+ /* Not used in fast path
+ * Only queue_stopped might be used if BQL is not properly working.
+ */
+ unsigned long queue_stopped;
+ unsigned long state;
+ struct mlx4_hwq_resources sp_wqres;
+ struct mlx4_qp sp_qp;
+ struct mlx4_qp_context sp_context;
+ cpumask_t sp_affinity_mask;
+ enum mlx4_qp_state sp_qp_state;
+ u16 sp_stride;
+ u16 sp_cqn; /* index of port CQ associated with this ring */
+} ____cacheline_aligned_in_smp;
+
+struct mlx4_en_rx_desc {
+ /* actual number of entries depends on rx ring stride */
+ struct mlx4_wqe_data_seg data[0];
+};
+
+struct mlx4_en_rx_ring {
+ struct mlx4_hwq_resources wqres;
+ u32 size ; /* number of Rx descs*/
+ u32 actual_size;
+ u32 size_mask;
+ u16 stride;
+ u16 log_stride;
+ u16 cqn; /* index of port CQ associated with this ring */
+ u32 prod;
+ u32 cons;
+ u32 buf_size;
+ u8 fcs_del;
+ void *buf;
+ void *rx_info;
+ struct bpf_prog __rcu *xdp_prog;
+ struct mlx4_en_page_cache page_cache;
+ unsigned long bytes;
+ unsigned long packets;
+ unsigned long csum_ok;
+ unsigned long csum_none;
+ unsigned long csum_complete;
+ unsigned long rx_alloc_pages;
+ unsigned long xdp_drop;
+ unsigned long xdp_tx;
+ unsigned long xdp_tx_full;
+ unsigned long dropped;
+ int hwtstamp_rx_filter;
+ cpumask_var_t affinity_mask;
+ struct xdp_rxq_info xdp_rxq;
+};
+
+struct mlx4_en_cq {
+ struct mlx4_cq mcq;
+ struct mlx4_hwq_resources wqres;
+ int ring;
+ struct net_device *dev;
+ union {
+ struct napi_struct napi;
+ bool xdp_busy;
+ };
+ int size;
+ int buf_size;
+ int vector;
+ enum cq_type type;
+ u16 moder_time;
+ u16 moder_cnt;
+ struct mlx4_cqe *buf;
+#define MLX4_EN_OPCODE_ERROR 0x1e
+
+ struct irq_desc *irq_desc;
+};
+
+struct mlx4_en_port_profile {
+ u32 flags;
+ u32 tx_ring_num[MLX4_EN_NUM_TX_TYPES];
+ u32 rx_ring_num;
+ u32 tx_ring_size;
+ u32 rx_ring_size;
+ u8 num_tx_rings_p_up;
+ u8 rx_pause;
+ u8 rx_ppp;
+ u8 tx_pause;
+ u8 tx_ppp;
+ u8 num_up;
+ int rss_rings;
+ int inline_thold;
+ struct hwtstamp_config hwtstamp_config;
+};
+
+struct mlx4_en_profile {
+ int udp_rss;
+ u8 rss_mask;
+ u32 active_ports;
+ u32 small_pkt_int;
+ u8 no_reset;
+ u8 max_num_tx_rings_p_up;
+ struct mlx4_en_port_profile prof[MLX4_MAX_PORTS + 1];
+};
+
+struct mlx4_en_dev {
+ struct mlx4_dev *dev;
+ struct pci_dev *pdev;
+ struct mutex state_lock;
+ struct net_device *pndev[MLX4_MAX_PORTS + 1];
+ struct net_device *upper[MLX4_MAX_PORTS + 1];
+ u32 port_cnt;
+ bool device_up;
+ struct mlx4_en_profile profile;
+ u32 LSO_support;
+ struct workqueue_struct *workqueue;
+ struct device *dma_device;
+ void __iomem *uar_map;
+ struct mlx4_uar priv_uar;
+ struct mlx4_mr mr;
+ u32 priv_pdn;
+ spinlock_t uar_lock;
+ u8 mac_removed[MLX4_MAX_PORTS + 1];
+ u32 nominal_c_mult;
+ struct cyclecounter cycles;
+ seqlock_t clock_lock;
+ struct timecounter clock;
+ unsigned long last_overflow_check;
+ struct ptp_clock *ptp_clock;
+ struct ptp_clock_info ptp_clock_info;
+ struct notifier_block nb;
+};
+
+
+struct mlx4_en_rss_map {
+ int base_qpn;
+ struct mlx4_qp qps[MAX_RX_RINGS];
+ enum mlx4_qp_state state[MAX_RX_RINGS];
+ struct mlx4_qp *indir_qp;
+ enum mlx4_qp_state indir_state;
+};
+
+enum mlx4_en_port_flag {
+ MLX4_EN_PORT_ANC = 1<<0, /* Auto-negotiation complete */
+ MLX4_EN_PORT_ANE = 1<<1, /* Auto-negotiation enabled */
+};
+
+struct mlx4_en_port_state {
+ int link_state;
+ int link_speed;
+ int transceiver;
+ u32 flags;
+};
+
+enum mlx4_en_mclist_act {
+ MCLIST_NONE,
+ MCLIST_REM,
+ MCLIST_ADD,
+};
+
+struct mlx4_en_mc_list {
+ struct list_head list;
+ enum mlx4_en_mclist_act action;
+ u8 addr[ETH_ALEN];
+ u64 reg_id;
+ u64 tunnel_reg_id;
+};
+
+struct mlx4_en_frag_info {
+ u16 frag_size;
+ u32 frag_stride;
+};
+
+#ifdef CONFIG_MLX4_EN_DCB
+/* Minimal TC BW - setting to 0 will block traffic */
+#define MLX4_EN_BW_MIN 1
+#define MLX4_EN_BW_MAX 100 /* Utilize 100% of the line */
+
+#define MLX4_EN_TC_VENDOR 0
+#define MLX4_EN_TC_ETS 7
+
+enum dcb_pfc_type {
+ pfc_disabled = 0,
+ pfc_enabled_full,
+ pfc_enabled_tx,
+ pfc_enabled_rx
+};
+
+struct mlx4_en_cee_config {
+ bool pfc_state;
+ enum dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP_HIGH];
+};
+#endif
+
+struct ethtool_flow_id {
+ struct list_head list;
+ struct ethtool_rx_flow_spec flow_spec;
+ u64 id;
+};
+
+enum {
+ MLX4_EN_FLAG_PROMISC = (1 << 0),
+ MLX4_EN_FLAG_MC_PROMISC = (1 << 1),
+ /* whether we need to enable hardware loopback by putting dmac
+ * in Tx WQE
+ */
+ MLX4_EN_FLAG_ENABLE_HW_LOOPBACK = (1 << 2),
+ /* whether we need to drop packets that hardware loopback-ed */
+ MLX4_EN_FLAG_RX_FILTER_NEEDED = (1 << 3),
+ MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4),
+ MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP = (1 << 5),
+#ifdef CONFIG_MLX4_EN_DCB
+ MLX4_EN_FLAG_DCB_ENABLED = (1 << 6),
+#endif
+};
+
+#define PORT_BEACON_MAX_LIMIT (65535)
+#define MLX4_EN_MAC_HASH_SIZE (1 << BITS_PER_BYTE)
+#define MLX4_EN_MAC_HASH_IDX 5
+
+struct mlx4_en_stats_bitmap {
+ DECLARE_BITMAP(bitmap, NUM_ALL_STATS);
+ struct mutex mutex; /* for mutual access to stats bitmap */
+};
+
+enum {
+ MLX4_EN_STATE_FLAG_RESTARTING,
+};
+
+struct mlx4_en_priv {
+ struct mlx4_en_dev *mdev;
+ struct mlx4_en_port_profile *prof;
+ struct net_device *dev;
+ unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+ struct mlx4_en_port_state port_state;
+ spinlock_t stats_lock;
+ struct ethtool_flow_id ethtool_rules[MAX_NUM_OF_FS_RULES];
+ /* To allow rules removal while port is going down */
+ struct list_head ethtool_list;
+
+ unsigned long last_moder_packets[MAX_RX_RINGS];
+ unsigned long last_moder_tx_packets;
+ unsigned long last_moder_bytes[MAX_RX_RINGS];
+ unsigned long last_moder_jiffies;
+ int last_moder_time[MAX_RX_RINGS];
+ u16 rx_usecs;
+ u16 rx_frames;
+ u16 tx_usecs;
+ u16 tx_frames;
+ u32 pkt_rate_low;
+ u16 rx_usecs_low;
+ u32 pkt_rate_high;
+ u16 rx_usecs_high;
+ u32 sample_interval;
+ u32 adaptive_rx_coal;
+ u32 msg_enable;
+ u32 loopback_ok;
+ u32 validate_loopback;
+
+ struct mlx4_hwq_resources res;
+ int link_state;
+ int last_link_state;
+ bool port_up;
+ int port;
+ int registered;
+ int allocated;
+ int stride;
+ unsigned char current_mac[ETH_ALEN + 2];
+ int mac_index;
+ unsigned max_mtu;
+ int base_qpn;
+ int cqe_factor;
+ int cqe_size;
+
+ struct mlx4_en_rss_map rss_map;
+ __be32 ctrl_flags;
+ u32 flags;
+ u8 num_tx_rings_p_up;
+ u32 tx_work_limit;
+ u32 tx_ring_num[MLX4_EN_NUM_TX_TYPES];
+ u32 rx_ring_num;
+ u32 rx_skb_size;
+ struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS];
+ u8 num_frags;
+ u8 log_rx_info;
+ u8 dma_dir;
+ u16 rx_headroom;
+
+ struct mlx4_en_tx_ring **tx_ring[MLX4_EN_NUM_TX_TYPES];
+ struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS];
+ struct mlx4_en_cq **tx_cq[MLX4_EN_NUM_TX_TYPES];
+ struct mlx4_en_cq *rx_cq[MAX_RX_RINGS];
+ struct mlx4_qp drop_qp;
+ struct work_struct rx_mode_task;
+ struct work_struct restart_task;
+ struct work_struct linkstate_task;
+ struct delayed_work stats_task;
+ struct delayed_work service_task;
+ struct work_struct vxlan_add_task;
+ struct work_struct vxlan_del_task;
+ struct mlx4_en_perf_stats pstats;
+ struct mlx4_en_pkt_stats pkstats;
+ struct mlx4_en_counter_stats pf_stats;
+ struct mlx4_en_flow_stats_rx rx_priority_flowstats[MLX4_NUM_PRIORITIES];
+ struct mlx4_en_flow_stats_tx tx_priority_flowstats[MLX4_NUM_PRIORITIES];
+ struct mlx4_en_flow_stats_rx rx_flowstats;
+ struct mlx4_en_flow_stats_tx tx_flowstats;
+ struct mlx4_en_port_stats port_stats;
+ struct mlx4_en_xdp_stats xdp_stats;
+ struct mlx4_en_phy_stats phy_stats;
+ struct mlx4_en_stats_bitmap stats_bitmap;
+ struct list_head mc_list;
+ struct list_head curr_list;
+ u64 broadcast_id;
+ struct mlx4_en_stat_out_mbox hw_stats;
+ int vids[128];
+ bool wol;
+ struct device *ddev;
+ struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE];
+ struct hwtstamp_config hwtstamp_config;
+ u32 counter_index;
+
+#ifdef CONFIG_MLX4_EN_DCB
+#define MLX4_EN_DCB_ENABLED 0x3
+ struct ieee_ets ets;
+ u16 maxrate[IEEE_8021QAZ_MAX_TCS];
+ enum dcbnl_cndd_states cndd_state[IEEE_8021QAZ_MAX_TCS];
+ struct mlx4_en_cee_config cee_config;
+ u8 dcbx_cap;
+#endif
+#ifdef CONFIG_RFS_ACCEL
+ spinlock_t filters_lock;
+ int last_filter_id;
+ struct list_head filters;
+ struct hlist_head filter_hash[1 << MLX4_EN_FILTER_HASH_SHIFT];
+#endif
+ u64 tunnel_reg_id;
+ __be16 vxlan_port;
+
+ u32 pflags;
+ u8 rss_key[MLX4_EN_RSS_KEY_SIZE];
+ u8 rss_hash_fn;
+ unsigned long state;
+};
+
+enum mlx4_en_wol {
+ MLX4_EN_WOL_MAGIC = (1ULL << 61),
+ MLX4_EN_WOL_ENABLED = (1ULL << 62),
+};
+
+struct mlx4_mac_entry {
+ struct hlist_node hlist;
+ unsigned char mac[ETH_ALEN + 2];
+ u64 reg_id;
+ struct rcu_head rcu;
+};
+
+static inline struct mlx4_cqe *mlx4_en_get_cqe(void *buf, int idx, int cqe_sz)
+{
+ return buf + idx * cqe_sz;
+}
+
+#define MLX4_EN_WOL_DO_MODIFY (1ULL << 63)
+
+void mlx4_en_init_ptys2ethtool_map(void);
+void mlx4_en_update_loopback_state(struct net_device *dev,
+ netdev_features_t features);
+
+void mlx4_en_destroy_netdev(struct net_device *dev);
+int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
+ struct mlx4_en_port_profile *prof);
+
+int mlx4_en_start_port(struct net_device *dev);
+void mlx4_en_stop_port(struct net_device *dev, int detach);
+
+void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
+ struct mlx4_en_stats_bitmap *stats_bitmap,
+ u8 rx_ppp, u8 rx_pause,
+ u8 tx_ppp, u8 tx_pause);
+
+int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
+ struct mlx4_en_priv *tmp,
+ struct mlx4_en_port_profile *prof,
+ bool carry_xdp_prog);
+void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv,
+ struct mlx4_en_priv *tmp);
+
+int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq,
+ int entries, int ring, enum cq_type mode, int node);
+void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq);
+int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
+ int cq_idx);
+void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
+int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
+void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
+
+void mlx4_en_tx_irq(struct mlx4_cq *mcq);
+u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev,
+ select_queue_fallback_t fallback);
+netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
+netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
+ struct mlx4_en_rx_alloc *frame,
+ struct mlx4_en_priv *priv, unsigned int length,
+ int tx_ind, bool *doorbell_pending);
+void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring);
+bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
+ struct mlx4_en_rx_alloc *frame);
+
+int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring **pring,
+ u32 size, u16 stride,
+ int node, int queue_index);
+void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring **pring);
+void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring);
+int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ int cq, int user_prio);
+void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring);
+void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev);
+void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv);
+int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring **pring,
+ u32 size, u16 stride, int node, int queue_index);
+void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring **pring,
+ u32 size, u16 stride);
+int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv);
+void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring *ring);
+int mlx4_en_process_rx_cq(struct net_device *dev,
+ struct mlx4_en_cq *cq,
+ int budget);
+int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget);
+int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget);
+bool mlx4_en_process_tx_cq(struct net_device *dev,
+ struct mlx4_en_cq *cq, int napi_budget);
+u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ int index, u64 timestamp,
+ int napi_mode);
+u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ int index, u64 timestamp,
+ int napi_mode);
+void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
+ int is_tx, int rss, int qpn, int cqn, int user_prio,
+ struct mlx4_qp_context *context);
+void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event);
+int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp,
+ int loopback);
+void mlx4_en_calc_rx_buf(struct net_device *dev);
+int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv);
+void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv);
+int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv);
+void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv);
+int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring);
+void mlx4_en_rx_irq(struct mlx4_cq *mcq);
+
+int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
+int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv);
+
+void mlx4_en_fold_software_stats(struct net_device *dev);
+int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset);
+int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port);
+
+#ifdef CONFIG_MLX4_EN_DCB
+extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops;
+extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops;
+#endif
+
+int mlx4_en_setup_tc(struct net_device *dev, u8 up);
+int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc);
+
+#ifdef CONFIG_RFS_ACCEL
+void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv);
+#endif
+
+#define MLX4_EN_NUM_SELF_TEST 5
+void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf);
+void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev);
+
+#define DEV_FEATURE_CHANGED(dev, new_features, feature) \
+ ((dev->features & feature) ^ (new_features & feature))
+
+int mlx4_en_moderation_update(struct mlx4_en_priv *priv);
+int mlx4_en_reset_config(struct net_device *dev,
+ struct hwtstamp_config ts_config,
+ netdev_features_t new_features);
+void mlx4_en_update_pfc_stats_bitmap(struct mlx4_dev *dev,
+ struct mlx4_en_stats_bitmap *stats_bitmap,
+ u8 rx_ppp, u8 rx_pause,
+ u8 tx_ppp, u8 tx_pause);
+int mlx4_en_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr);
+
+/*
+ * Functions for time stamping
+ */
+u64 mlx4_en_get_cqe_ts(struct mlx4_cqe *cqe);
+void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev,
+ struct skb_shared_hwtstamps *hwts,
+ u64 timestamp);
+void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev);
+void mlx4_en_remove_timestamp(struct mlx4_en_dev *mdev);
+
+/* Globals
+ */
+extern const struct ethtool_ops mlx4_en_ethtool_ops;
+
+
+
+/*
+ * printk / logging functions
+ */
+
+__printf(3, 4)
+void en_print(const char *level, const struct mlx4_en_priv *priv,
+ const char *format, ...);
+
+#define en_dbg(mlevel, priv, format, ...) \
+do { \
+ if (NETIF_MSG_##mlevel & (priv)->msg_enable) \
+ en_print(KERN_DEBUG, priv, format, ##__VA_ARGS__); \
+} while (0)
+#define en_warn(priv, format, ...) \
+ en_print(KERN_WARNING, priv, format, ##__VA_ARGS__)
+#define en_err(priv, format, ...) \
+ en_print(KERN_ERR, priv, format, ##__VA_ARGS__)
+#define en_info(priv, format, ...) \
+ en_print(KERN_INFO, priv, format, ##__VA_ARGS__)
+
+#define mlx4_err(mdev, format, ...) \
+ pr_err(DRV_NAME " %s: " format, \
+ dev_name(&(mdev)->pdev->dev), ##__VA_ARGS__)
+#define mlx4_info(mdev, format, ...) \
+ pr_info(DRV_NAME " %s: " format, \
+ dev_name(&(mdev)->pdev->dev), ##__VA_ARGS__)
+#define mlx4_warn(mdev, format, ...) \
+ pr_warn(DRV_NAME " %s: " format, \
+ dev_name(&(mdev)->pdev->dev), ##__VA_ARGS__)
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
new file mode 100644
index 000000000..86b6051da
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MLX4_STATS_
+#define _MLX4_STATS_
+
+#ifdef MLX4_EN_PERF_STAT
+#define NUM_PERF_STATS NUM_PERF_COUNTERS
+#else
+#define NUM_PERF_STATS 0
+#endif
+
+#define NUM_PRIORITIES 9
+#define NUM_PRIORITY_STATS 2
+
+struct mlx4_en_pkt_stats {
+ unsigned long rx_multicast_packets;
+ unsigned long rx_broadcast_packets;
+ unsigned long rx_jabbers;
+ unsigned long rx_in_range_length_error;
+ unsigned long rx_out_range_length_error;
+ unsigned long tx_multicast_packets;
+ unsigned long tx_broadcast_packets;
+ unsigned long rx_prio[NUM_PRIORITIES][NUM_PRIORITY_STATS];
+ unsigned long tx_prio[NUM_PRIORITIES][NUM_PRIORITY_STATS];
+#define NUM_PKT_STATS 43
+};
+
+struct mlx4_en_counter_stats {
+ unsigned long rx_packets;
+ unsigned long rx_bytes;
+ unsigned long tx_packets;
+ unsigned long tx_bytes;
+#define NUM_PF_STATS 4
+};
+
+struct mlx4_en_port_stats {
+ unsigned long tso_packets;
+ unsigned long xmit_more;
+ unsigned long queue_stopped;
+ unsigned long wake_queue;
+ unsigned long tx_timeout;
+ unsigned long rx_alloc_pages;
+ unsigned long rx_chksum_good;
+ unsigned long rx_chksum_none;
+ unsigned long rx_chksum_complete;
+ unsigned long tx_chksum_offload;
+#define NUM_PORT_STATS 10
+};
+
+struct mlx4_en_perf_stats {
+ u32 tx_poll;
+ u64 tx_pktsz_avg;
+ u32 inflight_avg;
+ u16 tx_coal_avg;
+ u16 rx_coal_avg;
+ u32 napi_quota;
+#define NUM_PERF_COUNTERS 6
+};
+
+struct mlx4_en_xdp_stats {
+ unsigned long rx_xdp_drop;
+ unsigned long rx_xdp_tx;
+ unsigned long rx_xdp_tx_full;
+#define NUM_XDP_STATS 3
+};
+
+struct mlx4_en_phy_stats {
+ unsigned long rx_packets_phy;
+ unsigned long rx_bytes_phy;
+ unsigned long tx_packets_phy;
+ unsigned long tx_bytes_phy;
+#define NUM_PHY_STATS 4
+};
+
+#define NUM_MAIN_STATS 21
+
+#define MLX4_NUM_PRIORITIES 8
+
+struct mlx4_en_flow_stats_rx {
+ u64 rx_pause;
+ u64 rx_pause_duration;
+ u64 rx_pause_transition;
+#define NUM_FLOW_STATS_RX 3
+#define NUM_FLOW_PRIORITY_STATS_RX (NUM_FLOW_STATS_RX * \
+ MLX4_NUM_PRIORITIES)
+};
+
+struct mlx4_en_flow_stats_tx {
+ u64 tx_pause;
+ u64 tx_pause_duration;
+ u64 tx_pause_transition;
+#define NUM_FLOW_STATS_TX 3
+#define NUM_FLOW_PRIORITY_STATS_TX (NUM_FLOW_STATS_TX * \
+ MLX4_NUM_PRIORITIES)
+};
+
+#define NUM_FLOW_STATS (NUM_FLOW_STATS_RX + NUM_FLOW_STATS_TX + \
+ NUM_FLOW_PRIORITY_STATS_TX + \
+ NUM_FLOW_PRIORITY_STATS_RX)
+
+struct mlx4_en_stat_out_flow_control_mbox {
+ /* Total number of PAUSE frames received from the far-end port */
+ __be64 rx_pause;
+ /* Total number of microseconds that far-end port requested to pause
+ * transmission of packets
+ */
+ __be64 rx_pause_duration;
+ /* Number of received transmission from XOFF state to XON state */
+ __be64 rx_pause_transition;
+ /* Total number of PAUSE frames sent from the far-end port */
+ __be64 tx_pause;
+ /* Total time in microseconds that transmission of packets has been
+ * paused
+ */
+ __be64 tx_pause_duration;
+ /* Number of transmitter transitions from XOFF state to XON state */
+ __be64 tx_pause_transition;
+ /* Reserverd */
+ __be64 reserved[2];
+};
+
+enum {
+ MLX4_DUMP_ETH_STATS_FLOW_CONTROL = 1 << 12
+};
+
+#define NUM_ALL_STATS (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \
+ NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS + \
+ NUM_XDP_STATS + NUM_PHY_STATS)
+
+#define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \
+ sizeof(((struct net_device_stats *)0)->n))
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
new file mode 100644
index 000000000..cfa0bba39
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -0,0 +1,1157 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4.h"
+#include "icm.h"
+
+static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order)
+{
+ int o;
+ int m;
+ u32 seg;
+
+ spin_lock(&buddy->lock);
+
+ for (o = order; o <= buddy->max_order; ++o)
+ if (buddy->num_free[o]) {
+ m = 1 << (buddy->max_order - o);
+ seg = find_first_bit(buddy->bits[o], m);
+ if (seg < m)
+ goto found;
+ }
+
+ spin_unlock(&buddy->lock);
+ return -1;
+
+ found:
+ clear_bit(seg, buddy->bits[o]);
+ --buddy->num_free[o];
+
+ while (o > order) {
+ --o;
+ seg <<= 1;
+ set_bit(seg ^ 1, buddy->bits[o]);
+ ++buddy->num_free[o];
+ }
+
+ spin_unlock(&buddy->lock);
+
+ seg <<= order;
+
+ return seg;
+}
+
+static void mlx4_buddy_free(struct mlx4_buddy *buddy, u32 seg, int order)
+{
+ seg >>= order;
+
+ spin_lock(&buddy->lock);
+
+ while (test_bit(seg ^ 1, buddy->bits[order])) {
+ clear_bit(seg ^ 1, buddy->bits[order]);
+ --buddy->num_free[order];
+ seg >>= 1;
+ ++order;
+ }
+
+ set_bit(seg, buddy->bits[order]);
+ ++buddy->num_free[order];
+
+ spin_unlock(&buddy->lock);
+}
+
+static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
+{
+ int i, s;
+
+ buddy->max_order = max_order;
+ spin_lock_init(&buddy->lock);
+
+ buddy->bits = kcalloc(buddy->max_order + 1, sizeof(long *),
+ GFP_KERNEL);
+ buddy->num_free = kcalloc(buddy->max_order + 1, sizeof(*buddy->num_free),
+ GFP_KERNEL);
+ if (!buddy->bits || !buddy->num_free)
+ goto err_out;
+
+ for (i = 0; i <= buddy->max_order; ++i) {
+ s = BITS_TO_LONGS(1UL << (buddy->max_order - i));
+ buddy->bits[i] = kvmalloc_array(s, sizeof(long), GFP_KERNEL | __GFP_ZERO);
+ if (!buddy->bits[i])
+ goto err_out_free;
+ }
+
+ set_bit(0, buddy->bits[buddy->max_order]);
+ buddy->num_free[buddy->max_order] = 1;
+
+ return 0;
+
+err_out_free:
+ for (i = 0; i <= buddy->max_order; ++i)
+ kvfree(buddy->bits[i]);
+
+err_out:
+ kfree(buddy->bits);
+ kfree(buddy->num_free);
+
+ return -ENOMEM;
+}
+
+static void mlx4_buddy_cleanup(struct mlx4_buddy *buddy)
+{
+ int i;
+
+ for (i = 0; i <= buddy->max_order; ++i)
+ kvfree(buddy->bits[i]);
+
+ kfree(buddy->bits);
+ kfree(buddy->num_free);
+}
+
+u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
+{
+ struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
+ u32 seg;
+ int seg_order;
+ u32 offset;
+
+ seg_order = max_t(int, order - log_mtts_per_seg, 0);
+
+ seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, seg_order);
+ if (seg == -1)
+ return -1;
+
+ offset = seg * (1 << log_mtts_per_seg);
+
+ if (mlx4_table_get_range(dev, &mr_table->mtt_table, offset,
+ offset + (1 << order) - 1)) {
+ mlx4_buddy_free(&mr_table->mtt_buddy, seg, seg_order);
+ return -1;
+ }
+
+ return offset;
+}
+
+static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
+{
+ u64 in_param = 0;
+ u64 out_param;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, order);
+ err = mlx4_cmd_imm(dev, in_param, &out_param, RES_MTT,
+ RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ return -1;
+ return get_param_l(&out_param);
+ }
+ return __mlx4_alloc_mtt_range(dev, order);
+}
+
+int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
+ struct mlx4_mtt *mtt)
+{
+ int i;
+
+ if (!npages) {
+ mtt->order = -1;
+ mtt->page_shift = MLX4_ICM_PAGE_SHIFT;
+ return 0;
+ } else
+ mtt->page_shift = page_shift;
+
+ for (mtt->order = 0, i = 1; i < npages; i <<= 1)
+ ++mtt->order;
+
+ mtt->offset = mlx4_alloc_mtt_range(dev, mtt->order);
+ if (mtt->offset == -1)
+ return -ENOMEM;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mtt_init);
+
+void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
+{
+ u32 first_seg;
+ int seg_order;
+ struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
+
+ seg_order = max_t(int, order - log_mtts_per_seg, 0);
+ first_seg = offset / (1 << log_mtts_per_seg);
+
+ mlx4_buddy_free(&mr_table->mtt_buddy, first_seg, seg_order);
+ mlx4_table_put_range(dev, &mr_table->mtt_table, offset,
+ offset + (1 << order) - 1);
+}
+
+static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
+{
+ u64 in_param = 0;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, offset);
+ set_param_h(&in_param, order);
+ err = mlx4_cmd(dev, in_param, RES_MTT, RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ mlx4_warn(dev, "Failed to free mtt range at:%d order:%d\n",
+ offset, order);
+ return;
+ }
+ __mlx4_free_mtt_range(dev, offset, order);
+}
+
+void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
+{
+ if (mtt->order < 0)
+ return;
+
+ mlx4_free_mtt_range(dev, mtt->offset, mtt->order);
+}
+EXPORT_SYMBOL_GPL(mlx4_mtt_cleanup);
+
+u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
+{
+ return (u64) mtt->offset * dev->caps.mtt_entry_sz;
+}
+EXPORT_SYMBOL_GPL(mlx4_mtt_addr);
+
+static u32 hw_index_to_key(u32 ind)
+{
+ return (ind >> 24) | (ind << 8);
+}
+
+static u32 key_to_hw_index(u32 key)
+{
+ return (key << 24) | (key >> 8);
+}
+
+static int mlx4_SW2HW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+ int mpt_index)
+{
+ return mlx4_cmd(dev, mailbox->dma, mpt_index,
+ 0, MLX4_CMD_SW2HW_MPT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+}
+
+static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+ int mpt_index)
+{
+ return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index,
+ !mailbox, MLX4_CMD_HW2SW_MPT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
+}
+
+/* Must protect against concurrent access */
+int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+ struct mlx4_mpt_entry ***mpt_entry)
+{
+ int err;
+ int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
+ struct mlx4_cmd_mailbox *mailbox = NULL;
+
+ if (mmr->enabled != MLX4_MPT_EN_HW)
+ return -EINVAL;
+
+ err = mlx4_HW2SW_MPT(dev, NULL, key);
+ if (err) {
+ mlx4_warn(dev, "HW2SW_MPT failed (%d).", err);
+ mlx4_warn(dev, "Most likely the MR has MWs bound to it.\n");
+ return err;
+ }
+
+ mmr->enabled = MLX4_MPT_EN_SW;
+
+ if (!mlx4_is_mfunc(dev)) {
+ **mpt_entry = mlx4_table_find(
+ &mlx4_priv(dev)->mr_table.dmpt_table,
+ key, NULL);
+ } else {
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, key,
+ 0, MLX4_CMD_QUERY_MPT,
+ MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ goto free_mailbox;
+
+ *mpt_entry = (struct mlx4_mpt_entry **)&mailbox->buf;
+ }
+
+ if (!(*mpt_entry) || !(**mpt_entry)) {
+ err = -ENOMEM;
+ goto free_mailbox;
+ }
+
+ return 0;
+
+free_mailbox:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_get_mpt);
+
+int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+ struct mlx4_mpt_entry **mpt_entry)
+{
+ int err;
+
+ if (!mlx4_is_mfunc(dev)) {
+ /* Make sure any changes to this entry are flushed */
+ wmb();
+
+ *(u8 *)(*mpt_entry) = MLX4_MPT_STATUS_HW;
+
+ /* Make sure the new status is written */
+ wmb();
+
+ err = mlx4_SYNC_TPT(dev);
+ } else {
+ int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
+
+ struct mlx4_cmd_mailbox *mailbox =
+ container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
+ buf);
+
+ (*mpt_entry)->lkey = 0;
+ err = mlx4_SW2HW_MPT(dev, mailbox, key);
+ }
+
+ if (!err) {
+ mmr->pd = be32_to_cpu((*mpt_entry)->pd_flags) & MLX4_MPT_PD_MASK;
+ mmr->enabled = MLX4_MPT_EN_HW;
+ }
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_write_mpt);
+
+void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev,
+ struct mlx4_mpt_entry **mpt_entry)
+{
+ if (mlx4_is_mfunc(dev)) {
+ struct mlx4_cmd_mailbox *mailbox =
+ container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
+ buf);
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ }
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_put_mpt);
+
+int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry,
+ u32 pdn)
+{
+ u32 pd_flags = be32_to_cpu(mpt_entry->pd_flags) & ~MLX4_MPT_PD_MASK;
+ /* The wrapper function will put the slave's id here */
+ if (mlx4_is_mfunc(dev))
+ pd_flags &= ~MLX4_MPT_PD_VF_MASK;
+
+ mpt_entry->pd_flags = cpu_to_be32(pd_flags |
+ (pdn & MLX4_MPT_PD_MASK)
+ | MLX4_MPT_PD_FLAG_EN_INV);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_pd);
+
+int mlx4_mr_hw_change_access(struct mlx4_dev *dev,
+ struct mlx4_mpt_entry *mpt_entry,
+ u32 access)
+{
+ u32 flags = (be32_to_cpu(mpt_entry->flags) & ~MLX4_PERM_MASK) |
+ (access & MLX4_PERM_MASK);
+
+ mpt_entry->flags = cpu_to_be32(flags);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_access);
+
+static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
+ u64 iova, u64 size, u32 access, int npages,
+ int page_shift, struct mlx4_mr *mr)
+{
+ mr->iova = iova;
+ mr->size = size;
+ mr->pd = pd;
+ mr->access = access;
+ mr->enabled = MLX4_MPT_DISABLED;
+ mr->key = hw_index_to_key(mridx);
+
+ return mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
+}
+
+static int mlx4_WRITE_MTT(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *mailbox,
+ int num_entries)
+{
+ return mlx4_cmd(dev, mailbox->dma, num_entries, 0, MLX4_CMD_WRITE_MTT,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+}
+
+int __mlx4_mpt_reserve(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ return mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap);
+}
+
+static int mlx4_mpt_reserve(struct mlx4_dev *dev)
+{
+ u64 out_param;
+
+ if (mlx4_is_mfunc(dev)) {
+ if (mlx4_cmd_imm(dev, 0, &out_param, RES_MPT, RES_OP_RESERVE,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED))
+ return -1;
+ return get_param_l(&out_param);
+ }
+ return __mlx4_mpt_reserve(dev);
+}
+
+void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index, MLX4_NO_RR);
+}
+
+static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index)
+{
+ u64 in_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, index);
+ if (mlx4_cmd(dev, in_param, RES_MPT, RES_OP_RESERVE,
+ MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED))
+ mlx4_warn(dev, "Failed to release mr index:%d\n",
+ index);
+ return;
+ }
+ __mlx4_mpt_release(dev, index);
+}
+
+int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
+{
+ struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
+
+ return mlx4_table_get(dev, &mr_table->dmpt_table, index);
+}
+
+static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
+{
+ u64 param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&param, index);
+ return mlx4_cmd_imm(dev, param, &param, RES_MPT, RES_OP_MAP_ICM,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+ }
+ return __mlx4_mpt_alloc_icm(dev, index);
+}
+
+void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index)
+{
+ struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
+
+ mlx4_table_put(dev, &mr_table->dmpt_table, index);
+}
+
+static void mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index)
+{
+ u64 in_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, index);
+ if (mlx4_cmd(dev, in_param, RES_MPT, RES_OP_MAP_ICM,
+ MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED))
+ mlx4_warn(dev, "Failed to free icm of mr index:%d\n",
+ index);
+ return;
+ }
+ return __mlx4_mpt_free_icm(dev, index);
+}
+
+int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
+ int npages, int page_shift, struct mlx4_mr *mr)
+{
+ u32 index;
+ int err;
+
+ index = mlx4_mpt_reserve(dev);
+ if (index == -1)
+ return -ENOMEM;
+
+ err = mlx4_mr_alloc_reserved(dev, index, pd, iova, size,
+ access, npages, page_shift, mr);
+ if (err)
+ mlx4_mpt_release(dev, index);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_alloc);
+
+static int mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr)
+{
+ int err;
+
+ if (mr->enabled == MLX4_MPT_EN_HW) {
+ err = mlx4_HW2SW_MPT(dev, NULL,
+ key_to_hw_index(mr->key) &
+ (dev->caps.num_mpts - 1));
+ if (err) {
+ mlx4_warn(dev, "HW2SW_MPT failed (%d), MR has MWs bound to it\n",
+ err);
+ return err;
+ }
+
+ mr->enabled = MLX4_MPT_EN_SW;
+ }
+ mlx4_mtt_cleanup(dev, &mr->mtt);
+
+ return 0;
+}
+
+int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
+{
+ int ret;
+
+ ret = mlx4_mr_free_reserved(dev, mr);
+ if (ret)
+ return ret;
+ if (mr->enabled)
+ mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key));
+ mlx4_mpt_release(dev, key_to_hw_index(mr->key));
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_free);
+
+void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr)
+{
+ mlx4_mtt_cleanup(dev, &mr->mtt);
+ mr->mtt.order = -1;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_cleanup);
+
+int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
+ u64 iova, u64 size, int npages,
+ int page_shift, struct mlx4_mpt_entry *mpt_entry)
+{
+ int err;
+
+ err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
+ if (err)
+ return err;
+
+ mpt_entry->start = cpu_to_be64(iova);
+ mpt_entry->length = cpu_to_be64(size);
+ mpt_entry->entity_size = cpu_to_be32(page_shift);
+ mpt_entry->flags &= ~(cpu_to_be32(MLX4_MPT_FLAG_FREE |
+ MLX4_MPT_FLAG_SW_OWNS));
+ if (mr->mtt.order < 0) {
+ mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
+ mpt_entry->mtt_addr = 0;
+ } else {
+ mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev,
+ &mr->mtt));
+ if (mr->mtt.page_shift == 0)
+ mpt_entry->mtt_sz = cpu_to_be32(1 << mr->mtt.order);
+ }
+ if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) {
+ /* fast register MR in free state */
+ mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
+ mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG |
+ MLX4_MPT_PD_FLAG_RAE);
+ } else {
+ mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
+ }
+ mr->enabled = MLX4_MPT_EN_SW;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_write);
+
+int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mpt_entry *mpt_entry;
+ int err;
+
+ err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key));
+ if (err)
+ return err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto err_table;
+ }
+ mpt_entry = mailbox->buf;
+ mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_MIO |
+ MLX4_MPT_FLAG_REGION |
+ mr->access);
+
+ mpt_entry->key = cpu_to_be32(key_to_hw_index(mr->key));
+ mpt_entry->pd_flags = cpu_to_be32(mr->pd | MLX4_MPT_PD_FLAG_EN_INV);
+ mpt_entry->start = cpu_to_be64(mr->iova);
+ mpt_entry->length = cpu_to_be64(mr->size);
+ mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
+
+ if (mr->mtt.order < 0) {
+ mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
+ mpt_entry->mtt_addr = 0;
+ } else {
+ mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev,
+ &mr->mtt));
+ }
+
+ if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) {
+ /* fast register MR in free state */
+ mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
+ mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG |
+ MLX4_MPT_PD_FLAG_RAE);
+ mpt_entry->mtt_sz = cpu_to_be32(1 << mr->mtt.order);
+ } else {
+ mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
+ }
+
+ err = mlx4_SW2HW_MPT(dev, mailbox,
+ key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1));
+ if (err) {
+ mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err);
+ goto err_cmd;
+ }
+ mr->enabled = MLX4_MPT_EN_HW;
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ return 0;
+
+err_cmd:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+err_table:
+ mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key));
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_enable);
+
+static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ int start_index, int npages, u64 *page_list)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ __be64 *mtts;
+ dma_addr_t dma_handle;
+ int i;
+
+ mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->offset +
+ start_index, &dma_handle);
+
+ if (!mtts)
+ return -ENOMEM;
+
+ dma_sync_single_for_cpu(&dev->persist->pdev->dev, dma_handle,
+ npages * sizeof(u64), DMA_TO_DEVICE);
+
+ for (i = 0; i < npages; ++i)
+ mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
+
+ dma_sync_single_for_device(&dev->persist->pdev->dev, dma_handle,
+ npages * sizeof(u64), DMA_TO_DEVICE);
+
+ return 0;
+}
+
+int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ int start_index, int npages, u64 *page_list)
+{
+ int err = 0;
+ int chunk;
+ int mtts_per_page;
+ int max_mtts_first_page;
+
+ /* compute how may mtts fit in the first page */
+ mtts_per_page = PAGE_SIZE / sizeof(u64);
+ max_mtts_first_page = mtts_per_page - (mtt->offset + start_index)
+ % mtts_per_page;
+
+ chunk = min_t(int, max_mtts_first_page, npages);
+
+ while (npages > 0) {
+ err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, page_list);
+ if (err)
+ return err;
+ npages -= chunk;
+ start_index += chunk;
+ page_list += chunk;
+
+ chunk = min_t(int, mtts_per_page, npages);
+ }
+ return err;
+}
+
+int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ int start_index, int npages, u64 *page_list)
+{
+ struct mlx4_cmd_mailbox *mailbox = NULL;
+ __be64 *inbox = NULL;
+ int chunk;
+ int err = 0;
+ int i;
+
+ if (mtt->order < 0)
+ return -EINVAL;
+
+ if (mlx4_is_mfunc(dev)) {
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
+
+ while (npages > 0) {
+ chunk = min_t(int, MLX4_MAILBOX_SIZE / sizeof(u64) - 2,
+ npages);
+ inbox[0] = cpu_to_be64(mtt->offset + start_index);
+ inbox[1] = 0;
+ for (i = 0; i < chunk; ++i)
+ inbox[i + 2] = cpu_to_be64(page_list[i] |
+ MLX4_MTT_FLAG_PRESENT);
+ err = mlx4_WRITE_MTT(dev, mailbox, chunk);
+ if (err) {
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+ }
+
+ npages -= chunk;
+ start_index += chunk;
+ page_list += chunk;
+ }
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+ }
+
+ return __mlx4_write_mtt(dev, mtt, start_index, npages, page_list);
+}
+EXPORT_SYMBOL_GPL(mlx4_write_mtt);
+
+int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ struct mlx4_buf *buf)
+{
+ u64 *page_list;
+ int err;
+ int i;
+
+ page_list = kcalloc(buf->npages, sizeof(*page_list), GFP_KERNEL);
+ if (!page_list)
+ return -ENOMEM;
+
+ for (i = 0; i < buf->npages; ++i)
+ if (buf->nbufs == 1)
+ page_list[i] = buf->direct.map + (i << buf->page_shift);
+ else
+ page_list[i] = buf->page_list[i].map;
+
+ err = mlx4_write_mtt(dev, mtt, 0, buf->npages, page_list);
+
+ kfree(page_list);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt);
+
+int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type,
+ struct mlx4_mw *mw)
+{
+ u32 index;
+
+ if ((type == MLX4_MW_TYPE_1 &&
+ !(dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)) ||
+ (type == MLX4_MW_TYPE_2 &&
+ !(dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)))
+ return -EOPNOTSUPP;
+
+ index = mlx4_mpt_reserve(dev);
+ if (index == -1)
+ return -ENOMEM;
+
+ mw->key = hw_index_to_key(index);
+ mw->pd = pd;
+ mw->type = type;
+ mw->enabled = MLX4_MPT_DISABLED;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mw_alloc);
+
+int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_mpt_entry *mpt_entry;
+ int err;
+
+ err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key));
+ if (err)
+ return err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto err_table;
+ }
+ mpt_entry = mailbox->buf;
+
+ /* Note that the MLX4_MPT_FLAG_REGION bit in mpt_entry->flags is turned
+ * off, thus creating a memory window and not a memory region.
+ */
+ mpt_entry->key = cpu_to_be32(key_to_hw_index(mw->key));
+ mpt_entry->pd_flags = cpu_to_be32(mw->pd);
+ if (mw->type == MLX4_MW_TYPE_2) {
+ mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
+ mpt_entry->qpn = cpu_to_be32(MLX4_MPT_QP_FLAG_BOUND_QP);
+ mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_EN_INV);
+ }
+
+ err = mlx4_SW2HW_MPT(dev, mailbox,
+ key_to_hw_index(mw->key) &
+ (dev->caps.num_mpts - 1));
+ if (err) {
+ mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err);
+ goto err_cmd;
+ }
+ mw->enabled = MLX4_MPT_EN_HW;
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ return 0;
+
+err_cmd:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+err_table:
+ mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key));
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_mw_enable);
+
+void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw)
+{
+ int err;
+
+ if (mw->enabled == MLX4_MPT_EN_HW) {
+ err = mlx4_HW2SW_MPT(dev, NULL,
+ key_to_hw_index(mw->key) &
+ (dev->caps.num_mpts - 1));
+ if (err)
+ mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err);
+
+ mw->enabled = MLX4_MPT_EN_SW;
+ }
+ if (mw->enabled)
+ mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key));
+ mlx4_mpt_release(dev, key_to_hw_index(mw->key));
+}
+EXPORT_SYMBOL_GPL(mlx4_mw_free);
+
+int mlx4_init_mr_table(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_mr_table *mr_table = &priv->mr_table;
+ int err;
+
+ /* Nothing to do for slaves - all MR handling is forwarded
+ * to the master */
+ if (mlx4_is_slave(dev))
+ return 0;
+
+ if (!is_power_of_2(dev->caps.num_mpts))
+ return -EINVAL;
+
+ err = mlx4_bitmap_init(&mr_table->mpt_bitmap, dev->caps.num_mpts,
+ ~0, dev->caps.reserved_mrws, 0);
+ if (err)
+ return err;
+
+ err = mlx4_buddy_init(&mr_table->mtt_buddy,
+ ilog2((u32)dev->caps.num_mtts /
+ (1 << log_mtts_per_seg)));
+ if (err)
+ goto err_buddy;
+
+ if (dev->caps.reserved_mtts) {
+ priv->reserved_mtts =
+ mlx4_alloc_mtt_range(dev,
+ fls(dev->caps.reserved_mtts - 1));
+ if (priv->reserved_mtts < 0) {
+ mlx4_warn(dev, "MTT table of order %u is too small\n",
+ mr_table->mtt_buddy.max_order);
+ err = -ENOMEM;
+ goto err_reserve_mtts;
+ }
+ }
+
+ return 0;
+
+err_reserve_mtts:
+ mlx4_buddy_cleanup(&mr_table->mtt_buddy);
+
+err_buddy:
+ mlx4_bitmap_cleanup(&mr_table->mpt_bitmap);
+
+ return err;
+}
+
+void mlx4_cleanup_mr_table(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_mr_table *mr_table = &priv->mr_table;
+
+ if (mlx4_is_slave(dev))
+ return;
+ if (priv->reserved_mtts >= 0)
+ mlx4_free_mtt_range(dev, priv->reserved_mtts,
+ fls(dev->caps.reserved_mtts - 1));
+ mlx4_buddy_cleanup(&mr_table->mtt_buddy);
+ mlx4_bitmap_cleanup(&mr_table->mpt_bitmap);
+}
+
+static inline int mlx4_check_fmr(struct mlx4_fmr *fmr, u64 *page_list,
+ int npages, u64 iova)
+{
+ int i, page_mask;
+
+ if (npages > fmr->max_pages)
+ return -EINVAL;
+
+ page_mask = (1 << fmr->page_shift) - 1;
+
+ /* We are getting page lists, so va must be page aligned. */
+ if (iova & page_mask)
+ return -EINVAL;
+
+ /* Trust the user not to pass misaligned data in page_list */
+ if (0)
+ for (i = 0; i < npages; ++i) {
+ if (page_list[i] & ~page_mask)
+ return -EINVAL;
+ }
+
+ if (fmr->maps >= fmr->max_maps)
+ return -EINVAL;
+
+ return 0;
+}
+
+int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
+ int npages, u64 iova, u32 *lkey, u32 *rkey)
+{
+ u32 key;
+ int i, err;
+
+ err = mlx4_check_fmr(fmr, page_list, npages, iova);
+ if (err)
+ return err;
+
+ ++fmr->maps;
+
+ key = key_to_hw_index(fmr->mr.key);
+ key += dev->caps.num_mpts;
+ *lkey = *rkey = fmr->mr.key = hw_index_to_key(key);
+
+ *(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW;
+
+ /* Make sure MPT status is visible before writing MTT entries */
+ wmb();
+
+ dma_sync_single_for_cpu(&dev->persist->pdev->dev, fmr->dma_handle,
+ npages * sizeof(u64), DMA_TO_DEVICE);
+
+ for (i = 0; i < npages; ++i)
+ fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
+
+ dma_sync_single_for_device(&dev->persist->pdev->dev, fmr->dma_handle,
+ npages * sizeof(u64), DMA_TO_DEVICE);
+
+ fmr->mpt->key = cpu_to_be32(key);
+ fmr->mpt->lkey = cpu_to_be32(key);
+ fmr->mpt->length = cpu_to_be64(npages * (1ull << fmr->page_shift));
+ fmr->mpt->start = cpu_to_be64(iova);
+
+ /* Make MTT entries are visible before setting MPT status */
+ wmb();
+
+ *(u8 *) fmr->mpt = MLX4_MPT_STATUS_HW;
+
+ /* Make sure MPT status is visible before consumer can use FMR */
+ wmb();
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr);
+
+int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
+ int max_maps, u8 page_shift, struct mlx4_fmr *fmr)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int err = -ENOMEM;
+
+ if (max_maps > dev->caps.max_fmr_maps)
+ return -EINVAL;
+
+ if (page_shift < (ffs(dev->caps.page_size_cap) - 1) || page_shift >= 32)
+ return -EINVAL;
+
+ /* All MTTs must fit in the same page */
+ if (max_pages * sizeof(*fmr->mtts) > PAGE_SIZE)
+ return -EINVAL;
+
+ fmr->page_shift = page_shift;
+ fmr->max_pages = max_pages;
+ fmr->max_maps = max_maps;
+ fmr->maps = 0;
+
+ err = mlx4_mr_alloc(dev, pd, 0, 0, access, max_pages,
+ page_shift, &fmr->mr);
+ if (err)
+ return err;
+
+ fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table,
+ fmr->mr.mtt.offset,
+ &fmr->dma_handle);
+
+ if (!fmr->mtts) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ return 0;
+
+err_free:
+ (void) mlx4_mr_free(dev, &fmr->mr);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_fmr_alloc);
+
+int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int err;
+
+ err = mlx4_mr_enable(dev, &fmr->mr);
+ if (err)
+ return err;
+
+ fmr->mpt = mlx4_table_find(&priv->mr_table.dmpt_table,
+ key_to_hw_index(fmr->mr.key), NULL);
+ if (!fmr->mpt)
+ return -ENOMEM;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_fmr_enable);
+
+void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
+ u32 *lkey, u32 *rkey)
+{
+ if (!fmr->maps)
+ return;
+
+ /* To unmap: it is sufficient to take back ownership from HW */
+ *(u8 *)fmr->mpt = MLX4_MPT_STATUS_SW;
+
+ /* Make sure MPT status is visible */
+ wmb();
+
+ fmr->maps = 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_fmr_unmap);
+
+int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
+{
+ int ret;
+
+ if (fmr->maps)
+ return -EBUSY;
+ if (fmr->mr.enabled == MLX4_MPT_EN_HW) {
+ /* In case of FMR was enabled and unmapped
+ * make sure to give ownership of MPT back to HW
+ * so HW2SW_MPT command will success.
+ */
+ *(u8 *)fmr->mpt = MLX4_MPT_STATUS_SW;
+ /* Make sure MPT status is visible before changing MPT fields */
+ wmb();
+ fmr->mpt->length = 0;
+ fmr->mpt->start = 0;
+ /* Make sure MPT data is visible after changing MPT status */
+ wmb();
+ *(u8 *)fmr->mpt = MLX4_MPT_STATUS_HW;
+ /* make sure MPT status is visible */
+ wmb();
+ }
+
+ ret = mlx4_mr_free(dev, &fmr->mr);
+ if (ret)
+ return ret;
+ fmr->mr.enabled = MLX4_MPT_DISABLED;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_fmr_free);
+
+int mlx4_SYNC_TPT(struct mlx4_dev *dev)
+{
+ return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+}
+EXPORT_SYMBOL_GPL(mlx4_SYNC_TPT);
diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c
new file mode 100644
index 000000000..6fc156a39
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/pd.c
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/io-mapping.h>
+
+#include <asm/page.h>
+
+#include "mlx4.h"
+#include "icm.h"
+
+enum {
+ MLX4_NUM_RESERVED_UARS = 8
+};
+
+int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ *pdn = mlx4_bitmap_alloc(&priv->pd_bitmap);
+ if (*pdn == -1)
+ return -ENOMEM;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_pd_alloc);
+
+void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn)
+{
+ mlx4_bitmap_free(&mlx4_priv(dev)->pd_bitmap, pdn, MLX4_USE_RR);
+}
+EXPORT_SYMBOL_GPL(mlx4_pd_free);
+
+int __mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ *xrcdn = mlx4_bitmap_alloc(&priv->xrcd_bitmap);
+ if (*xrcdn == -1)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn)
+{
+ u64 out_param;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ err = mlx4_cmd_imm(dev, 0, &out_param,
+ RES_XRCD, RES_OP_RESERVE,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ return err;
+
+ *xrcdn = get_param_l(&out_param);
+ return 0;
+ }
+ return __mlx4_xrcd_alloc(dev, xrcdn);
+}
+EXPORT_SYMBOL_GPL(mlx4_xrcd_alloc);
+
+void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn)
+{
+ mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn, MLX4_USE_RR);
+}
+
+void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn)
+{
+ u64 in_param = 0;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, xrcdn);
+ err = mlx4_cmd(dev, in_param, RES_XRCD,
+ RES_OP_RESERVE, MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ mlx4_warn(dev, "Failed to release xrcdn %d\n", xrcdn);
+ } else
+ __mlx4_xrcd_free(dev, xrcdn);
+}
+EXPORT_SYMBOL_GPL(mlx4_xrcd_free);
+
+int mlx4_init_pd_table(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ return mlx4_bitmap_init(&priv->pd_bitmap, dev->caps.num_pds,
+ (1 << NOT_MASKED_PD_BITS) - 1,
+ dev->caps.reserved_pds, 0);
+}
+
+void mlx4_cleanup_pd_table(struct mlx4_dev *dev)
+{
+ mlx4_bitmap_cleanup(&mlx4_priv(dev)->pd_bitmap);
+}
+
+int mlx4_init_xrcd_table(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ return mlx4_bitmap_init(&priv->xrcd_bitmap, (1 << 16),
+ (1 << 16) - 1, dev->caps.reserved_xrcds + 1, 0);
+}
+
+void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev)
+{
+ mlx4_bitmap_cleanup(&mlx4_priv(dev)->xrcd_bitmap);
+}
+
+int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar)
+{
+ int offset;
+
+ uar->index = mlx4_bitmap_alloc(&mlx4_priv(dev)->uar_table.bitmap);
+ if (uar->index == -1)
+ return -ENOMEM;
+
+ if (mlx4_is_slave(dev))
+ offset = uar->index % ((int)pci_resource_len(dev->persist->pdev,
+ 2) /
+ dev->caps.uar_page_size);
+ else
+ offset = uar->index;
+ uar->pfn = (pci_resource_start(dev->persist->pdev, 2) >> PAGE_SHIFT)
+ + offset;
+ uar->map = NULL;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_uar_alloc);
+
+void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar)
+{
+ mlx4_bitmap_free(&mlx4_priv(dev)->uar_table.bitmap, uar->index, MLX4_USE_RR);
+}
+EXPORT_SYMBOL_GPL(mlx4_uar_free);
+
+int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_uar *uar;
+ int err = 0;
+ int idx;
+
+ if (!priv->bf_mapping)
+ return -ENOMEM;
+
+ mutex_lock(&priv->bf_mutex);
+ if (!list_empty(&priv->bf_list))
+ uar = list_entry(priv->bf_list.next, struct mlx4_uar, bf_list);
+ else {
+ if (mlx4_bitmap_avail(&priv->uar_table.bitmap) < MLX4_NUM_RESERVED_UARS) {
+ err = -ENOMEM;
+ goto out;
+ }
+ uar = kmalloc_node(sizeof(*uar), GFP_KERNEL, node);
+ if (!uar) {
+ uar = kmalloc(sizeof(*uar), GFP_KERNEL);
+ if (!uar) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+ err = mlx4_uar_alloc(dev, uar);
+ if (err)
+ goto free_kmalloc;
+
+ uar->map = ioremap(uar->pfn << PAGE_SHIFT, PAGE_SIZE);
+ if (!uar->map) {
+ err = -ENOMEM;
+ goto free_uar;
+ }
+
+ uar->bf_map = io_mapping_map_wc(priv->bf_mapping,
+ uar->index << PAGE_SHIFT,
+ PAGE_SIZE);
+ if (!uar->bf_map) {
+ err = -ENOMEM;
+ goto unamp_uar;
+ }
+ uar->free_bf_bmap = 0;
+ list_add(&uar->bf_list, &priv->bf_list);
+ }
+
+ idx = ffz(uar->free_bf_bmap);
+ uar->free_bf_bmap |= 1 << idx;
+ bf->uar = uar;
+ bf->offset = 0;
+ bf->buf_size = dev->caps.bf_reg_size / 2;
+ bf->reg = uar->bf_map + idx * dev->caps.bf_reg_size;
+ if (uar->free_bf_bmap == (1 << dev->caps.bf_regs_per_page) - 1)
+ list_del_init(&uar->bf_list);
+
+ goto out;
+
+unamp_uar:
+ bf->uar = NULL;
+ iounmap(uar->map);
+
+free_uar:
+ mlx4_uar_free(dev, uar);
+
+free_kmalloc:
+ kfree(uar);
+
+out:
+ mutex_unlock(&priv->bf_mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_bf_alloc);
+
+void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int idx;
+
+ if (!bf->uar || !bf->uar->bf_map)
+ return;
+
+ mutex_lock(&priv->bf_mutex);
+ idx = (bf->reg - bf->uar->bf_map) / dev->caps.bf_reg_size;
+ bf->uar->free_bf_bmap &= ~(1 << idx);
+ if (!bf->uar->free_bf_bmap) {
+ if (!list_empty(&bf->uar->bf_list))
+ list_del(&bf->uar->bf_list);
+
+ io_mapping_unmap(bf->uar->bf_map);
+ iounmap(bf->uar->map);
+ mlx4_uar_free(dev, bf->uar);
+ kfree(bf->uar);
+ } else if (list_empty(&bf->uar->bf_list))
+ list_add(&bf->uar->bf_list, &priv->bf_list);
+
+ mutex_unlock(&priv->bf_mutex);
+}
+EXPORT_SYMBOL_GPL(mlx4_bf_free);
+
+int mlx4_init_uar_table(struct mlx4_dev *dev)
+{
+ int num_reserved_uar = mlx4_get_num_reserved_uar(dev);
+
+ mlx4_dbg(dev, "uar_page_shift = %d", dev->uar_page_shift);
+ mlx4_dbg(dev, "Effective reserved_uars=%d", dev->caps.reserved_uars);
+
+ if (dev->caps.num_uars <= num_reserved_uar) {
+ mlx4_err(
+ dev, "Only %d UAR pages (need more than %d)\n",
+ dev->caps.num_uars, num_reserved_uar);
+ mlx4_err(dev, "Increase firmware log2_uar_bar_megabytes?\n");
+ return -ENODEV;
+ }
+
+ return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap,
+ dev->caps.num_uars, dev->caps.num_uars - 1,
+ dev->caps.reserved_uars, 0);
+}
+
+void mlx4_cleanup_uar_table(struct mlx4_dev *dev)
+{
+ mlx4_bitmap_cleanup(&mlx4_priv(dev)->uar_table.bitmap);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
new file mode 100644
index 000000000..256a06b3c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -0,0 +1,2229 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/export.h>
+
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4.h"
+#include "mlx4_stats.h"
+
+#define MLX4_MAC_VALID (1ull << 63)
+
+#define MLX4_VLAN_VALID (1u << 31)
+#define MLX4_VLAN_MASK 0xfff
+
+#define MLX4_STATS_TRAFFIC_COUNTERS_MASK 0xfULL
+#define MLX4_STATS_TRAFFIC_DROPS_MASK 0xc0ULL
+#define MLX4_STATS_ERROR_COUNTERS_MASK 0x1ffc30ULL
+#define MLX4_STATS_PORT_COUNTERS_MASK 0x1fe00000ULL
+
+#define MLX4_FLAG2_V_IGNORE_FCS_MASK BIT(1)
+#define MLX4_FLAG2_V_USER_MTU_MASK BIT(5)
+#define MLX4_FLAG2_V_USER_MAC_MASK BIT(6)
+#define MLX4_FLAG_V_MTU_MASK BIT(0)
+#define MLX4_FLAG_V_PPRX_MASK BIT(1)
+#define MLX4_FLAG_V_PPTX_MASK BIT(2)
+#define MLX4_IGNORE_FCS_MASK 0x1
+#define MLX4_TC_MAX_NUMBER 8
+
+void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table)
+{
+ int i;
+
+ mutex_init(&table->mutex);
+ for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+ table->entries[i] = 0;
+ table->refs[i] = 0;
+ table->is_dup[i] = false;
+ }
+ table->max = 1 << dev->caps.log_num_macs;
+ table->total = 0;
+}
+
+void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table)
+{
+ int i;
+
+ mutex_init(&table->mutex);
+ for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) {
+ table->entries[i] = 0;
+ table->refs[i] = 0;
+ table->is_dup[i] = false;
+ }
+ table->max = (1 << dev->caps.log_num_vlans) - MLX4_VLAN_REGULAR;
+ table->total = 0;
+}
+
+void mlx4_init_roce_gid_table(struct mlx4_dev *dev,
+ struct mlx4_roce_gid_table *table)
+{
+ int i;
+
+ mutex_init(&table->mutex);
+ for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++)
+ memset(table->roce_gids[i].raw, 0, MLX4_ROCE_GID_ENTRY_SIZE);
+}
+
+static int validate_index(struct mlx4_dev *dev,
+ struct mlx4_mac_table *table, int index)
+{
+ int err = 0;
+
+ if (index < 0 || index >= table->max || !table->entries[index]) {
+ mlx4_warn(dev, "No valid Mac entry for the given index\n");
+ err = -EINVAL;
+ }
+ return err;
+}
+
+static int find_index(struct mlx4_dev *dev,
+ struct mlx4_mac_table *table, u64 mac)
+{
+ int i;
+
+ for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+ if (table->refs[i] &&
+ (MLX4_MAC_MASK & mac) ==
+ (MLX4_MAC_MASK & be64_to_cpu(table->entries[i])))
+ return i;
+ }
+ /* Mac not found */
+ return -EINVAL;
+}
+
+static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
+ __be64 *entries)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 in_mod;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ memcpy(mailbox->buf, entries, MLX4_MAC_TABLE_SIZE);
+
+ in_mod = MLX4_SET_PORT_MAC_TABLE << 8 | port;
+
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
+ MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx)
+{
+ struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+ struct mlx4_mac_table *table = &info->mac_table;
+ int i;
+
+ for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+ if (!table->refs[i])
+ continue;
+
+ if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) {
+ *idx = i;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(mlx4_find_cached_mac);
+
+static bool mlx4_need_mf_bond(struct mlx4_dev *dev)
+{
+ int i, num_eth_ports = 0;
+
+ if (!mlx4_is_mfunc(dev))
+ return false;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
+ ++num_eth_ports;
+
+ return (num_eth_ports == 2) ? true : false;
+}
+
+int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
+{
+ struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+ struct mlx4_mac_table *table = &info->mac_table;
+ int i, err = 0;
+ int free = -1;
+ int free_for_dup = -1;
+ bool dup = mlx4_is_mf_bonded(dev);
+ u8 dup_port = (port == 1) ? 2 : 1;
+ struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table;
+ bool need_mf_bond = mlx4_need_mf_bond(dev);
+ bool can_mf_bond = true;
+
+ mlx4_dbg(dev, "Registering MAC: 0x%llx for port %d %s duplicate\n",
+ (unsigned long long)mac, port,
+ dup ? "with" : "without");
+
+ if (need_mf_bond) {
+ if (port == 1) {
+ mutex_lock(&table->mutex);
+ mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
+ } else {
+ mutex_lock(&dup_table->mutex);
+ mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
+ }
+ } else {
+ mutex_lock(&table->mutex);
+ }
+
+ if (need_mf_bond) {
+ int index_at_port = -1;
+ int index_at_dup_port = -1;
+
+ for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+ if (((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))))
+ index_at_port = i;
+ if (((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(dup_table->entries[i]))))
+ index_at_dup_port = i;
+ }
+
+ /* check that same mac is not in the tables at different indices */
+ if ((index_at_port != index_at_dup_port) &&
+ (index_at_port >= 0) &&
+ (index_at_dup_port >= 0))
+ can_mf_bond = false;
+
+ /* If the mac is already in the primary table, the slot must be
+ * available in the duplicate table as well.
+ */
+ if (index_at_port >= 0 && index_at_dup_port < 0 &&
+ dup_table->refs[index_at_port]) {
+ can_mf_bond = false;
+ }
+ /* If the mac is already in the duplicate table, check that the
+ * corresponding index is not occupied in the primary table, or
+ * the primary table already contains the mac at the same index.
+ * Otherwise, you cannot bond (primary contains a different mac
+ * at that index).
+ */
+ if (index_at_dup_port >= 0) {
+ if (!table->refs[index_at_dup_port] ||
+ ((MLX4_MAC_MASK & mac) == (MLX4_MAC_MASK & be64_to_cpu(table->entries[index_at_dup_port]))))
+ free_for_dup = index_at_dup_port;
+ else
+ can_mf_bond = false;
+ }
+ }
+
+ for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+ if (!table->refs[i]) {
+ if (free < 0)
+ free = i;
+ if (free_for_dup < 0 && need_mf_bond && can_mf_bond) {
+ if (!dup_table->refs[i])
+ free_for_dup = i;
+ }
+ continue;
+ }
+
+ if ((MLX4_MAC_MASK & mac) ==
+ (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) {
+ /* MAC already registered, increment ref count */
+ err = i;
+ ++table->refs[i];
+ if (dup) {
+ u64 dup_mac = MLX4_MAC_MASK & be64_to_cpu(dup_table->entries[i]);
+
+ if (dup_mac != mac || !dup_table->is_dup[i]) {
+ mlx4_warn(dev, "register mac: expect duplicate mac 0x%llx on port %d index %d\n",
+ mac, dup_port, i);
+ }
+ }
+ goto out;
+ }
+ }
+
+ if (need_mf_bond && (free_for_dup < 0)) {
+ if (dup) {
+ mlx4_warn(dev, "Fail to allocate duplicate MAC table entry\n");
+ mlx4_warn(dev, "High Availability for virtual functions may not work as expected\n");
+ dup = false;
+ }
+ can_mf_bond = false;
+ }
+
+ if (need_mf_bond && can_mf_bond)
+ free = free_for_dup;
+
+ mlx4_dbg(dev, "Free MAC index is %d\n", free);
+
+ if (table->total == table->max) {
+ /* No free mac entries */
+ err = -ENOSPC;
+ goto out;
+ }
+
+ /* Register new MAC */
+ table->entries[free] = cpu_to_be64(mac | MLX4_MAC_VALID);
+
+ err = mlx4_set_port_mac_table(dev, port, table->entries);
+ if (unlikely(err)) {
+ mlx4_err(dev, "Failed adding MAC: 0x%llx\n",
+ (unsigned long long) mac);
+ table->entries[free] = 0;
+ goto out;
+ }
+ table->refs[free] = 1;
+ table->is_dup[free] = false;
+ ++table->total;
+ if (dup) {
+ dup_table->refs[free] = 0;
+ dup_table->is_dup[free] = true;
+ dup_table->entries[free] = cpu_to_be64(mac | MLX4_MAC_VALID);
+
+ err = mlx4_set_port_mac_table(dev, dup_port, dup_table->entries);
+ if (unlikely(err)) {
+ mlx4_warn(dev, "Failed adding duplicate mac: 0x%llx\n", mac);
+ dup_table->is_dup[free] = false;
+ dup_table->entries[free] = 0;
+ goto out;
+ }
+ ++dup_table->total;
+ }
+ err = free;
+out:
+ if (need_mf_bond) {
+ if (port == 2) {
+ mutex_unlock(&table->mutex);
+ mutex_unlock(&dup_table->mutex);
+ } else {
+ mutex_unlock(&dup_table->mutex);
+ mutex_unlock(&table->mutex);
+ }
+ } else {
+ mutex_unlock(&table->mutex);
+ }
+ return err;
+}
+EXPORT_SYMBOL_GPL(__mlx4_register_mac);
+
+int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
+{
+ u64 out_param = 0;
+ int err = -EINVAL;
+
+ if (mlx4_is_mfunc(dev)) {
+ if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) {
+ err = mlx4_cmd_imm(dev, mac, &out_param,
+ ((u32) port) << 8 | (u32) RES_MAC,
+ RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ }
+ if (err && err == -EINVAL && mlx4_is_slave(dev)) {
+ /* retry using old REG_MAC format */
+ set_param_l(&out_param, port);
+ err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC,
+ RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (!err)
+ dev->flags |= MLX4_FLAG_OLD_REG_MAC;
+ }
+ if (err)
+ return err;
+
+ return get_param_l(&out_param);
+ }
+ return __mlx4_register_mac(dev, port, mac);
+}
+EXPORT_SYMBOL_GPL(mlx4_register_mac);
+
+int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port)
+{
+ return dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] +
+ (port - 1) * (1 << dev->caps.log_num_macs);
+}
+EXPORT_SYMBOL_GPL(mlx4_get_base_qpn);
+
+void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
+{
+ struct mlx4_port_info *info;
+ struct mlx4_mac_table *table;
+ int index;
+ bool dup = mlx4_is_mf_bonded(dev);
+ u8 dup_port = (port == 1) ? 2 : 1;
+ struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table;
+
+ if (port < 1 || port > dev->caps.num_ports) {
+ mlx4_warn(dev, "invalid port number (%d), aborting...\n", port);
+ return;
+ }
+ info = &mlx4_priv(dev)->port[port];
+ table = &info->mac_table;
+
+ if (dup) {
+ if (port == 1) {
+ mutex_lock(&table->mutex);
+ mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
+ } else {
+ mutex_lock(&dup_table->mutex);
+ mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
+ }
+ } else {
+ mutex_lock(&table->mutex);
+ }
+
+ index = find_index(dev, table, mac);
+
+ if (validate_index(dev, table, index))
+ goto out;
+
+ if (--table->refs[index] || table->is_dup[index]) {
+ mlx4_dbg(dev, "Have more references for index %d, no need to modify mac table\n",
+ index);
+ if (!table->refs[index])
+ dup_table->is_dup[index] = false;
+ goto out;
+ }
+
+ table->entries[index] = 0;
+ if (mlx4_set_port_mac_table(dev, port, table->entries))
+ mlx4_warn(dev, "Fail to set mac in port %d during unregister\n", port);
+ --table->total;
+
+ if (dup) {
+ dup_table->is_dup[index] = false;
+ if (dup_table->refs[index])
+ goto out;
+ dup_table->entries[index] = 0;
+ if (mlx4_set_port_mac_table(dev, dup_port, dup_table->entries))
+ mlx4_warn(dev, "Fail to set mac in duplicate port %d during unregister\n", dup_port);
+
+ --table->total;
+ }
+out:
+ if (dup) {
+ if (port == 2) {
+ mutex_unlock(&table->mutex);
+ mutex_unlock(&dup_table->mutex);
+ } else {
+ mutex_unlock(&dup_table->mutex);
+ mutex_unlock(&table->mutex);
+ }
+ } else {
+ mutex_unlock(&table->mutex);
+ }
+}
+EXPORT_SYMBOL_GPL(__mlx4_unregister_mac);
+
+void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
+{
+ u64 out_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) {
+ (void) mlx4_cmd_imm(dev, mac, &out_param,
+ ((u32) port) << 8 | (u32) RES_MAC,
+ RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ } else {
+ /* use old unregister mac format */
+ set_param_l(&out_param, port);
+ (void) mlx4_cmd_imm(dev, mac, &out_param, RES_MAC,
+ RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ }
+ return;
+ }
+ __mlx4_unregister_mac(dev, port, mac);
+ return;
+}
+EXPORT_SYMBOL_GPL(mlx4_unregister_mac);
+
+int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac)
+{
+ struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+ struct mlx4_mac_table *table = &info->mac_table;
+ int index = qpn - info->base_qpn;
+ int err = 0;
+ bool dup = mlx4_is_mf_bonded(dev);
+ u8 dup_port = (port == 1) ? 2 : 1;
+ struct mlx4_mac_table *dup_table = &mlx4_priv(dev)->port[dup_port].mac_table;
+
+ /* CX1 doesn't support multi-functions */
+ if (dup) {
+ if (port == 1) {
+ mutex_lock(&table->mutex);
+ mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
+ } else {
+ mutex_lock(&dup_table->mutex);
+ mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
+ }
+ } else {
+ mutex_lock(&table->mutex);
+ }
+
+ err = validate_index(dev, table, index);
+ if (err)
+ goto out;
+
+ table->entries[index] = cpu_to_be64(new_mac | MLX4_MAC_VALID);
+
+ err = mlx4_set_port_mac_table(dev, port, table->entries);
+ if (unlikely(err)) {
+ mlx4_err(dev, "Failed adding MAC: 0x%llx\n",
+ (unsigned long long) new_mac);
+ table->entries[index] = 0;
+ } else {
+ if (dup) {
+ dup_table->entries[index] = cpu_to_be64(new_mac | MLX4_MAC_VALID);
+
+ err = mlx4_set_port_mac_table(dev, dup_port, dup_table->entries);
+ if (unlikely(err)) {
+ mlx4_err(dev, "Failed adding duplicate MAC: 0x%llx\n",
+ (unsigned long long)new_mac);
+ dup_table->entries[index] = 0;
+ }
+ }
+ }
+out:
+ if (dup) {
+ if (port == 2) {
+ mutex_unlock(&table->mutex);
+ mutex_unlock(&dup_table->mutex);
+ } else {
+ mutex_unlock(&dup_table->mutex);
+ mutex_unlock(&table->mutex);
+ }
+ } else {
+ mutex_unlock(&table->mutex);
+ }
+ return err;
+}
+EXPORT_SYMBOL_GPL(__mlx4_replace_mac);
+
+static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port,
+ __be32 *entries)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 in_mod;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ memcpy(mailbox->buf, entries, MLX4_VLAN_TABLE_SIZE);
+ in_mod = MLX4_SET_PORT_VLAN_TABLE << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
+ MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ return err;
+}
+
+int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx)
+{
+ struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
+ int i;
+
+ for (i = 0; i < MLX4_MAX_VLAN_NUM; ++i) {
+ if (table->refs[i] &&
+ (vid == (MLX4_VLAN_MASK &
+ be32_to_cpu(table->entries[i])))) {
+ /* VLAN already registered, increase reference count */
+ *idx = i;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(mlx4_find_cached_vlan);
+
+int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan,
+ int *index)
+{
+ struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
+ int i, err = 0;
+ int free = -1;
+ int free_for_dup = -1;
+ bool dup = mlx4_is_mf_bonded(dev);
+ u8 dup_port = (port == 1) ? 2 : 1;
+ struct mlx4_vlan_table *dup_table = &mlx4_priv(dev)->port[dup_port].vlan_table;
+ bool need_mf_bond = mlx4_need_mf_bond(dev);
+ bool can_mf_bond = true;
+
+ mlx4_dbg(dev, "Registering VLAN: %d for port %d %s duplicate\n",
+ vlan, port,
+ dup ? "with" : "without");
+
+ if (need_mf_bond) {
+ if (port == 1) {
+ mutex_lock(&table->mutex);
+ mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
+ } else {
+ mutex_lock(&dup_table->mutex);
+ mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
+ }
+ } else {
+ mutex_lock(&table->mutex);
+ }
+
+ if (table->total == table->max) {
+ /* No free vlan entries */
+ err = -ENOSPC;
+ goto out;
+ }
+
+ if (need_mf_bond) {
+ int index_at_port = -1;
+ int index_at_dup_port = -1;
+
+ for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) {
+ if (vlan == (MLX4_VLAN_MASK & be32_to_cpu(table->entries[i])))
+ index_at_port = i;
+ if (vlan == (MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[i])))
+ index_at_dup_port = i;
+ }
+ /* check that same vlan is not in the tables at different indices */
+ if ((index_at_port != index_at_dup_port) &&
+ (index_at_port >= 0) &&
+ (index_at_dup_port >= 0))
+ can_mf_bond = false;
+
+ /* If the vlan is already in the primary table, the slot must be
+ * available in the duplicate table as well.
+ */
+ if (index_at_port >= 0 && index_at_dup_port < 0 &&
+ dup_table->refs[index_at_port]) {
+ can_mf_bond = false;
+ }
+ /* If the vlan is already in the duplicate table, check that the
+ * corresponding index is not occupied in the primary table, or
+ * the primary table already contains the vlan at the same index.
+ * Otherwise, you cannot bond (primary contains a different vlan
+ * at that index).
+ */
+ if (index_at_dup_port >= 0) {
+ if (!table->refs[index_at_dup_port] ||
+ (vlan == (MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[index_at_dup_port]))))
+ free_for_dup = index_at_dup_port;
+ else
+ can_mf_bond = false;
+ }
+ }
+
+ for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) {
+ if (!table->refs[i]) {
+ if (free < 0)
+ free = i;
+ if (free_for_dup < 0 && need_mf_bond && can_mf_bond) {
+ if (!dup_table->refs[i])
+ free_for_dup = i;
+ }
+ }
+
+ if ((table->refs[i] || table->is_dup[i]) &&
+ (vlan == (MLX4_VLAN_MASK &
+ be32_to_cpu(table->entries[i])))) {
+ /* Vlan already registered, increase references count */
+ mlx4_dbg(dev, "vlan %u is already registered.\n", vlan);
+ *index = i;
+ ++table->refs[i];
+ if (dup) {
+ u16 dup_vlan = MLX4_VLAN_MASK & be32_to_cpu(dup_table->entries[i]);
+
+ if (dup_vlan != vlan || !dup_table->is_dup[i]) {
+ mlx4_warn(dev, "register vlan: expected duplicate vlan %u on port %d index %d\n",
+ vlan, dup_port, i);
+ }
+ }
+ goto out;
+ }
+ }
+
+ if (need_mf_bond && (free_for_dup < 0)) {
+ if (dup) {
+ mlx4_warn(dev, "Fail to allocate duplicate VLAN table entry\n");
+ mlx4_warn(dev, "High Availability for virtual functions may not work as expected\n");
+ dup = false;
+ }
+ can_mf_bond = false;
+ }
+
+ if (need_mf_bond && can_mf_bond)
+ free = free_for_dup;
+
+ if (free < 0) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* Register new VLAN */
+ table->refs[free] = 1;
+ table->is_dup[free] = false;
+ table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID);
+
+ err = mlx4_set_port_vlan_table(dev, port, table->entries);
+ if (unlikely(err)) {
+ mlx4_warn(dev, "Failed adding vlan: %u\n", vlan);
+ table->refs[free] = 0;
+ table->entries[free] = 0;
+ goto out;
+ }
+ ++table->total;
+ if (dup) {
+ dup_table->refs[free] = 0;
+ dup_table->is_dup[free] = true;
+ dup_table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID);
+
+ err = mlx4_set_port_vlan_table(dev, dup_port, dup_table->entries);
+ if (unlikely(err)) {
+ mlx4_warn(dev, "Failed adding duplicate vlan: %u\n", vlan);
+ dup_table->is_dup[free] = false;
+ dup_table->entries[free] = 0;
+ goto out;
+ }
+ ++dup_table->total;
+ }
+
+ *index = free;
+out:
+ if (need_mf_bond) {
+ if (port == 2) {
+ mutex_unlock(&table->mutex);
+ mutex_unlock(&dup_table->mutex);
+ } else {
+ mutex_unlock(&dup_table->mutex);
+ mutex_unlock(&table->mutex);
+ }
+ } else {
+ mutex_unlock(&table->mutex);
+ }
+ return err;
+}
+
+int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
+{
+ u64 out_param = 0;
+ int err;
+
+ if (vlan > 4095)
+ return -EINVAL;
+
+ if (mlx4_is_mfunc(dev)) {
+ err = mlx4_cmd_imm(dev, vlan, &out_param,
+ ((u32) port) << 8 | (u32) RES_VLAN,
+ RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (!err)
+ *index = get_param_l(&out_param);
+
+ return err;
+ }
+ return __mlx4_register_vlan(dev, port, vlan, index);
+}
+EXPORT_SYMBOL_GPL(mlx4_register_vlan);
+
+void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan)
+{
+ struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
+ int index;
+ bool dup = mlx4_is_mf_bonded(dev);
+ u8 dup_port = (port == 1) ? 2 : 1;
+ struct mlx4_vlan_table *dup_table = &mlx4_priv(dev)->port[dup_port].vlan_table;
+
+ if (dup) {
+ if (port == 1) {
+ mutex_lock(&table->mutex);
+ mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
+ } else {
+ mutex_lock(&dup_table->mutex);
+ mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
+ }
+ } else {
+ mutex_lock(&table->mutex);
+ }
+
+ if (mlx4_find_cached_vlan(dev, port, vlan, &index)) {
+ mlx4_warn(dev, "vlan 0x%x is not in the vlan table\n", vlan);
+ goto out;
+ }
+
+ if (index < MLX4_VLAN_REGULAR) {
+ mlx4_warn(dev, "Trying to free special vlan index %d\n", index);
+ goto out;
+ }
+
+ if (--table->refs[index] || table->is_dup[index]) {
+ mlx4_dbg(dev, "Have %d more references for index %d, no need to modify vlan table\n",
+ table->refs[index], index);
+ if (!table->refs[index])
+ dup_table->is_dup[index] = false;
+ goto out;
+ }
+ table->entries[index] = 0;
+ if (mlx4_set_port_vlan_table(dev, port, table->entries))
+ mlx4_warn(dev, "Fail to set vlan in port %d during unregister\n", port);
+ --table->total;
+ if (dup) {
+ dup_table->is_dup[index] = false;
+ if (dup_table->refs[index])
+ goto out;
+ dup_table->entries[index] = 0;
+ if (mlx4_set_port_vlan_table(dev, dup_port, dup_table->entries))
+ mlx4_warn(dev, "Fail to set vlan in duplicate port %d during unregister\n", dup_port);
+ --dup_table->total;
+ }
+out:
+ if (dup) {
+ if (port == 2) {
+ mutex_unlock(&table->mutex);
+ mutex_unlock(&dup_table->mutex);
+ } else {
+ mutex_unlock(&dup_table->mutex);
+ mutex_unlock(&table->mutex);
+ }
+ } else {
+ mutex_unlock(&table->mutex);
+ }
+}
+
+void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan)
+{
+ u64 out_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ (void) mlx4_cmd_imm(dev, vlan, &out_param,
+ ((u32) port) << 8 | (u32) RES_VLAN,
+ RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+ return;
+ }
+ __mlx4_unregister_vlan(dev, port, vlan);
+}
+EXPORT_SYMBOL_GPL(mlx4_unregister_vlan);
+
+int mlx4_bond_mac_table(struct mlx4_dev *dev)
+{
+ struct mlx4_mac_table *t1 = &mlx4_priv(dev)->port[1].mac_table;
+ struct mlx4_mac_table *t2 = &mlx4_priv(dev)->port[2].mac_table;
+ int ret = 0;
+ int i;
+ bool update1 = false;
+ bool update2 = false;
+
+ mutex_lock(&t1->mutex);
+ mutex_lock(&t2->mutex);
+ for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+ if ((t1->entries[i] != t2->entries[i]) &&
+ t1->entries[i] && t2->entries[i]) {
+ mlx4_warn(dev, "can't duplicate entry %d in mac table\n", i);
+ ret = -EINVAL;
+ goto unlock;
+ }
+ }
+
+ for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+ if (t1->entries[i] && !t2->entries[i]) {
+ t2->entries[i] = t1->entries[i];
+ t2->is_dup[i] = true;
+ update2 = true;
+ } else if (!t1->entries[i] && t2->entries[i]) {
+ t1->entries[i] = t2->entries[i];
+ t1->is_dup[i] = true;
+ update1 = true;
+ } else if (t1->entries[i] && t2->entries[i]) {
+ t1->is_dup[i] = true;
+ t2->is_dup[i] = true;
+ }
+ }
+
+ if (update1) {
+ ret = mlx4_set_port_mac_table(dev, 1, t1->entries);
+ if (ret)
+ mlx4_warn(dev, "failed to set MAC table for port 1 (%d)\n", ret);
+ }
+ if (!ret && update2) {
+ ret = mlx4_set_port_mac_table(dev, 2, t2->entries);
+ if (ret)
+ mlx4_warn(dev, "failed to set MAC table for port 2 (%d)\n", ret);
+ }
+
+ if (ret)
+ mlx4_warn(dev, "failed to create mirror MAC tables\n");
+unlock:
+ mutex_unlock(&t2->mutex);
+ mutex_unlock(&t1->mutex);
+ return ret;
+}
+
+int mlx4_unbond_mac_table(struct mlx4_dev *dev)
+{
+ struct mlx4_mac_table *t1 = &mlx4_priv(dev)->port[1].mac_table;
+ struct mlx4_mac_table *t2 = &mlx4_priv(dev)->port[2].mac_table;
+ int ret = 0;
+ int ret1;
+ int i;
+ bool update1 = false;
+ bool update2 = false;
+
+ mutex_lock(&t1->mutex);
+ mutex_lock(&t2->mutex);
+ for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+ if (t1->entries[i] != t2->entries[i]) {
+ mlx4_warn(dev, "mac table is in an unexpected state when trying to unbond\n");
+ ret = -EINVAL;
+ goto unlock;
+ }
+ }
+
+ for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+ if (!t1->entries[i])
+ continue;
+ t1->is_dup[i] = false;
+ if (!t1->refs[i]) {
+ t1->entries[i] = 0;
+ update1 = true;
+ }
+ t2->is_dup[i] = false;
+ if (!t2->refs[i]) {
+ t2->entries[i] = 0;
+ update2 = true;
+ }
+ }
+
+ if (update1) {
+ ret = mlx4_set_port_mac_table(dev, 1, t1->entries);
+ if (ret)
+ mlx4_warn(dev, "failed to unmirror MAC tables for port 1(%d)\n", ret);
+ }
+ if (update2) {
+ ret1 = mlx4_set_port_mac_table(dev, 2, t2->entries);
+ if (ret1) {
+ mlx4_warn(dev, "failed to unmirror MAC tables for port 2(%d)\n", ret1);
+ ret = ret1;
+ }
+ }
+unlock:
+ mutex_unlock(&t2->mutex);
+ mutex_unlock(&t1->mutex);
+ return ret;
+}
+
+int mlx4_bond_vlan_table(struct mlx4_dev *dev)
+{
+ struct mlx4_vlan_table *t1 = &mlx4_priv(dev)->port[1].vlan_table;
+ struct mlx4_vlan_table *t2 = &mlx4_priv(dev)->port[2].vlan_table;
+ int ret = 0;
+ int i;
+ bool update1 = false;
+ bool update2 = false;
+
+ mutex_lock(&t1->mutex);
+ mutex_lock(&t2->mutex);
+ for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) {
+ if ((t1->entries[i] != t2->entries[i]) &&
+ t1->entries[i] && t2->entries[i]) {
+ mlx4_warn(dev, "can't duplicate entry %d in vlan table\n", i);
+ ret = -EINVAL;
+ goto unlock;
+ }
+ }
+
+ for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) {
+ if (t1->entries[i] && !t2->entries[i]) {
+ t2->entries[i] = t1->entries[i];
+ t2->is_dup[i] = true;
+ update2 = true;
+ } else if (!t1->entries[i] && t2->entries[i]) {
+ t1->entries[i] = t2->entries[i];
+ t1->is_dup[i] = true;
+ update1 = true;
+ } else if (t1->entries[i] && t2->entries[i]) {
+ t1->is_dup[i] = true;
+ t2->is_dup[i] = true;
+ }
+ }
+
+ if (update1) {
+ ret = mlx4_set_port_vlan_table(dev, 1, t1->entries);
+ if (ret)
+ mlx4_warn(dev, "failed to set VLAN table for port 1 (%d)\n", ret);
+ }
+ if (!ret && update2) {
+ ret = mlx4_set_port_vlan_table(dev, 2, t2->entries);
+ if (ret)
+ mlx4_warn(dev, "failed to set VLAN table for port 2 (%d)\n", ret);
+ }
+
+ if (ret)
+ mlx4_warn(dev, "failed to create mirror VLAN tables\n");
+unlock:
+ mutex_unlock(&t2->mutex);
+ mutex_unlock(&t1->mutex);
+ return ret;
+}
+
+int mlx4_unbond_vlan_table(struct mlx4_dev *dev)
+{
+ struct mlx4_vlan_table *t1 = &mlx4_priv(dev)->port[1].vlan_table;
+ struct mlx4_vlan_table *t2 = &mlx4_priv(dev)->port[2].vlan_table;
+ int ret = 0;
+ int ret1;
+ int i;
+ bool update1 = false;
+ bool update2 = false;
+
+ mutex_lock(&t1->mutex);
+ mutex_lock(&t2->mutex);
+ for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) {
+ if (t1->entries[i] != t2->entries[i]) {
+ mlx4_warn(dev, "vlan table is in an unexpected state when trying to unbond\n");
+ ret = -EINVAL;
+ goto unlock;
+ }
+ }
+
+ for (i = 0; i < MLX4_MAX_VLAN_NUM; i++) {
+ if (!t1->entries[i])
+ continue;
+ t1->is_dup[i] = false;
+ if (!t1->refs[i]) {
+ t1->entries[i] = 0;
+ update1 = true;
+ }
+ t2->is_dup[i] = false;
+ if (!t2->refs[i]) {
+ t2->entries[i] = 0;
+ update2 = true;
+ }
+ }
+
+ if (update1) {
+ ret = mlx4_set_port_vlan_table(dev, 1, t1->entries);
+ if (ret)
+ mlx4_warn(dev, "failed to unmirror VLAN tables for port 1(%d)\n", ret);
+ }
+ if (update2) {
+ ret1 = mlx4_set_port_vlan_table(dev, 2, t2->entries);
+ if (ret1) {
+ mlx4_warn(dev, "failed to unmirror VLAN tables for port 2(%d)\n", ret1);
+ ret = ret1;
+ }
+ }
+unlock:
+ mutex_unlock(&t2->mutex);
+ mutex_unlock(&t1->mutex);
+ return ret;
+}
+
+int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps)
+{
+ struct mlx4_cmd_mailbox *inmailbox, *outmailbox;
+ u8 *inbuf, *outbuf;
+ int err;
+
+ inmailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(inmailbox))
+ return PTR_ERR(inmailbox);
+
+ outmailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(outmailbox)) {
+ mlx4_free_cmd_mailbox(dev, inmailbox);
+ return PTR_ERR(outmailbox);
+ }
+
+ inbuf = inmailbox->buf;
+ outbuf = outmailbox->buf;
+ inbuf[0] = 1;
+ inbuf[1] = 1;
+ inbuf[2] = 1;
+ inbuf[3] = 1;
+ *(__be16 *) (&inbuf[16]) = cpu_to_be16(0x0015);
+ *(__be32 *) (&inbuf[20]) = cpu_to_be32(port);
+
+ err = mlx4_cmd_box(dev, inmailbox->dma, outmailbox->dma, port, 3,
+ MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
+ if (!err)
+ *caps = *(__be32 *) (outbuf + 84);
+ mlx4_free_cmd_mailbox(dev, inmailbox);
+ mlx4_free_cmd_mailbox(dev, outmailbox);
+ return err;
+}
+static struct mlx4_roce_gid_entry zgid_entry;
+
+int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port)
+{
+ int vfs;
+ int slave_gid = slave;
+ unsigned i;
+ struct mlx4_slaves_pport slaves_pport;
+ struct mlx4_active_ports actv_ports;
+ unsigned max_port_p_one;
+
+ if (slave == 0)
+ return MLX4_ROCE_PF_GIDS;
+
+ /* Slave is a VF */
+ slaves_pport = mlx4_phys_to_slaves_pport(dev, port);
+ actv_ports = mlx4_get_active_ports(dev, slave);
+ max_port_p_one = find_first_bit(actv_ports.ports, dev->caps.num_ports) +
+ bitmap_weight(actv_ports.ports, dev->caps.num_ports) + 1;
+
+ for (i = 1; i < max_port_p_one; i++) {
+ struct mlx4_active_ports exclusive_ports;
+ struct mlx4_slaves_pport slaves_pport_actv;
+ bitmap_zero(exclusive_ports.ports, dev->caps.num_ports);
+ set_bit(i - 1, exclusive_ports.ports);
+ if (i == port)
+ continue;
+ slaves_pport_actv = mlx4_phys_to_slaves_pport_actv(
+ dev, &exclusive_ports);
+ slave_gid -= bitmap_weight(slaves_pport_actv.slaves,
+ dev->persist->num_vfs + 1);
+ }
+ vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1;
+ if (slave_gid <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % vfs))
+ return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs) + 1;
+ return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs;
+}
+
+int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port)
+{
+ int gids;
+ unsigned i;
+ int slave_gid = slave;
+ int vfs;
+
+ struct mlx4_slaves_pport slaves_pport;
+ struct mlx4_active_ports actv_ports;
+ unsigned max_port_p_one;
+
+ if (slave == 0)
+ return 0;
+
+ slaves_pport = mlx4_phys_to_slaves_pport(dev, port);
+ actv_ports = mlx4_get_active_ports(dev, slave);
+ max_port_p_one = find_first_bit(actv_ports.ports, dev->caps.num_ports) +
+ bitmap_weight(actv_ports.ports, dev->caps.num_ports) + 1;
+
+ for (i = 1; i < max_port_p_one; i++) {
+ struct mlx4_active_ports exclusive_ports;
+ struct mlx4_slaves_pport slaves_pport_actv;
+ bitmap_zero(exclusive_ports.ports, dev->caps.num_ports);
+ set_bit(i - 1, exclusive_ports.ports);
+ if (i == port)
+ continue;
+ slaves_pport_actv = mlx4_phys_to_slaves_pport_actv(
+ dev, &exclusive_ports);
+ slave_gid -= bitmap_weight(slaves_pport_actv.slaves,
+ dev->persist->num_vfs + 1);
+ }
+ gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
+ vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1;
+ if (slave_gid <= gids % vfs)
+ return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave_gid - 1);
+
+ return MLX4_ROCE_PF_GIDS + (gids % vfs) +
+ ((gids / vfs) * (slave_gid - 1));
+}
+EXPORT_SYMBOL_GPL(mlx4_get_base_gid_ix);
+
+static int mlx4_reset_roce_port_gids(struct mlx4_dev *dev, int slave,
+ int port, struct mlx4_cmd_mailbox *mailbox)
+{
+ struct mlx4_roce_gid_entry *gid_entry_mbox;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int num_gids, base, offset;
+ int i, err;
+
+ num_gids = mlx4_get_slave_num_gids(dev, slave, port);
+ base = mlx4_get_base_gid_ix(dev, slave, port);
+
+ memset(mailbox->buf, 0, MLX4_MAILBOX_SIZE);
+
+ mutex_lock(&(priv->port[port].gid_table.mutex));
+ /* Zero-out gids belonging to that slave in the port GID table */
+ for (i = 0, offset = base; i < num_gids; offset++, i++)
+ memcpy(priv->port[port].gid_table.roce_gids[offset].raw,
+ zgid_entry.raw, MLX4_ROCE_GID_ENTRY_SIZE);
+
+ /* Now, copy roce port gids table to mailbox for passing to FW */
+ gid_entry_mbox = (struct mlx4_roce_gid_entry *)mailbox->buf;
+ for (i = 0; i < MLX4_ROCE_MAX_GIDS; gid_entry_mbox++, i++)
+ memcpy(gid_entry_mbox->raw,
+ priv->port[port].gid_table.roce_gids[i].raw,
+ MLX4_ROCE_GID_ENTRY_SIZE);
+
+ err = mlx4_cmd(dev, mailbox->dma,
+ ((u32)port) | (MLX4_SET_PORT_GID_TABLE << 8),
+ MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+ mutex_unlock(&(priv->port[port].gid_table.mutex));
+ return err;
+}
+
+
+void mlx4_reset_roce_gids(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_active_ports actv_ports;
+ struct mlx4_cmd_mailbox *mailbox;
+ int num_eth_ports, err;
+ int i;
+
+ if (slave < 0 || slave > dev->persist->num_vfs)
+ return;
+
+ actv_ports = mlx4_get_active_ports(dev, slave);
+
+ for (i = 0, num_eth_ports = 0; i < dev->caps.num_ports; i++) {
+ if (test_bit(i, actv_ports.ports)) {
+ if (dev->caps.port_type[i + 1] != MLX4_PORT_TYPE_ETH)
+ continue;
+ num_eth_ports++;
+ }
+ }
+
+ if (!num_eth_ports)
+ return;
+
+ /* have ETH ports. Alloc mailbox for SET_PORT command */
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return;
+
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ if (test_bit(i, actv_ports.ports)) {
+ if (dev->caps.port_type[i + 1] != MLX4_PORT_TYPE_ETH)
+ continue;
+ err = mlx4_reset_roce_port_gids(dev, slave, i + 1, mailbox);
+ if (err)
+ mlx4_warn(dev, "Could not reset ETH port GID table for slave %d, port %d (%d)\n",
+ slave, i + 1, err);
+ }
+ }
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return;
+}
+
+static void
+mlx4_en_set_port_mtu(struct mlx4_dev *dev, int slave, int port,
+ struct mlx4_set_port_general_context *gen_context)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master;
+ struct mlx4_slave_state *slave_st = &master->slave_state[slave];
+ u16 mtu, prev_mtu;
+
+ /* Mtu is configured as the max USER_MTU among all
+ * the functions on the port.
+ */
+ mtu = be16_to_cpu(gen_context->mtu);
+ mtu = min_t(int, mtu, dev->caps.eth_mtu_cap[port] +
+ ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
+ prev_mtu = slave_st->mtu[port];
+ slave_st->mtu[port] = mtu;
+ if (mtu > master->max_mtu[port])
+ master->max_mtu[port] = mtu;
+ if (mtu < prev_mtu && prev_mtu == master->max_mtu[port]) {
+ int i;
+
+ slave_st->mtu[port] = mtu;
+ master->max_mtu[port] = mtu;
+ for (i = 0; i < dev->num_slaves; i++)
+ master->max_mtu[port] =
+ max_t(u16, master->max_mtu[port],
+ master->slave_state[i].mtu[port]);
+ }
+ gen_context->mtu = cpu_to_be16(master->max_mtu[port]);
+}
+
+static void
+mlx4_en_set_port_user_mtu(struct mlx4_dev *dev, int slave, int port,
+ struct mlx4_set_port_general_context *gen_context)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master;
+ struct mlx4_slave_state *slave_st = &master->slave_state[slave];
+ u16 user_mtu, prev_user_mtu;
+
+ /* User Mtu is configured as the max USER_MTU among all
+ * the functions on the port.
+ */
+ user_mtu = be16_to_cpu(gen_context->user_mtu);
+ user_mtu = min_t(int, user_mtu, dev->caps.eth_mtu_cap[port]);
+ prev_user_mtu = slave_st->user_mtu[port];
+ slave_st->user_mtu[port] = user_mtu;
+ if (user_mtu > master->max_user_mtu[port])
+ master->max_user_mtu[port] = user_mtu;
+ if (user_mtu < prev_user_mtu &&
+ prev_user_mtu == master->max_user_mtu[port]) {
+ int i;
+
+ slave_st->user_mtu[port] = user_mtu;
+ master->max_user_mtu[port] = user_mtu;
+ for (i = 0; i < dev->num_slaves; i++)
+ master->max_user_mtu[port] =
+ max_t(u16, master->max_user_mtu[port],
+ master->slave_state[i].user_mtu[port]);
+ }
+ gen_context->user_mtu = cpu_to_be16(master->max_user_mtu[port]);
+}
+
+static void
+mlx4_en_set_port_global_pause(struct mlx4_dev *dev, int slave,
+ struct mlx4_set_port_general_context *gen_context)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master;
+
+ /* Slave cannot change Global Pause configuration */
+ if (slave != mlx4_master_func_num(dev) &&
+ (gen_context->pptx != master->pptx ||
+ gen_context->pprx != master->pprx)) {
+ gen_context->pptx = master->pptx;
+ gen_context->pprx = master->pprx;
+ mlx4_warn(dev, "denying Global Pause change for slave:%d\n",
+ slave);
+ } else {
+ master->pptx = gen_context->pptx;
+ master->pprx = gen_context->pprx;
+ }
+}
+
+static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod,
+ u8 op_mod, struct mlx4_cmd_mailbox *inbox)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_port_info *port_info;
+ struct mlx4_set_port_rqp_calc_context *qpn_context;
+ struct mlx4_set_port_general_context *gen_context;
+ struct mlx4_roce_gid_entry *gid_entry_tbl, *gid_entry_mbox, *gid_entry_mb1;
+ int reset_qkey_viols;
+ int port;
+ int is_eth;
+ int num_gids;
+ int base;
+ u32 in_modifier;
+ u32 promisc;
+ int err;
+ int i, j;
+ int offset;
+ __be32 agg_cap_mask;
+ __be32 slave_cap_mask;
+ __be32 new_cap_mask;
+
+ port = in_mod & 0xff;
+ in_modifier = in_mod >> 8;
+ is_eth = op_mod;
+ port_info = &priv->port[port];
+
+ /* Slaves cannot perform SET_PORT operations,
+ * except for changing MTU and USER_MTU.
+ */
+ if (is_eth) {
+ if (slave != dev->caps.function &&
+ in_modifier != MLX4_SET_PORT_GENERAL &&
+ in_modifier != MLX4_SET_PORT_GID_TABLE) {
+ mlx4_warn(dev, "denying SET_PORT for slave:%d\n",
+ slave);
+ return -EINVAL;
+ }
+ switch (in_modifier) {
+ case MLX4_SET_PORT_RQP_CALC:
+ qpn_context = inbox->buf;
+ qpn_context->base_qpn =
+ cpu_to_be32(port_info->base_qpn);
+ qpn_context->n_mac = 0x7;
+ promisc = be32_to_cpu(qpn_context->promisc) >>
+ SET_PORT_PROMISC_SHIFT;
+ qpn_context->promisc = cpu_to_be32(
+ promisc << SET_PORT_PROMISC_SHIFT |
+ port_info->base_qpn);
+ promisc = be32_to_cpu(qpn_context->mcast) >>
+ SET_PORT_MC_PROMISC_SHIFT;
+ qpn_context->mcast = cpu_to_be32(
+ promisc << SET_PORT_MC_PROMISC_SHIFT |
+ port_info->base_qpn);
+ break;
+ case MLX4_SET_PORT_GENERAL:
+ gen_context = inbox->buf;
+
+ if (gen_context->flags & MLX4_FLAG_V_MTU_MASK)
+ mlx4_en_set_port_mtu(dev, slave, port,
+ gen_context);
+
+ if (gen_context->flags2 & MLX4_FLAG2_V_USER_MTU_MASK)
+ mlx4_en_set_port_user_mtu(dev, slave, port,
+ gen_context);
+
+ if (gen_context->flags &
+ (MLX4_FLAG_V_PPRX_MASK | MLX4_FLAG_V_PPTX_MASK))
+ mlx4_en_set_port_global_pause(dev, slave,
+ gen_context);
+
+ break;
+ case MLX4_SET_PORT_GID_TABLE:
+ /* change to MULTIPLE entries: number of guest's gids
+ * need a FOR-loop here over number of gids the guest has.
+ * 1. Check no duplicates in gids passed by slave
+ */
+ num_gids = mlx4_get_slave_num_gids(dev, slave, port);
+ base = mlx4_get_base_gid_ix(dev, slave, port);
+ gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf);
+ for (i = 0; i < num_gids; gid_entry_mbox++, i++) {
+ if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw,
+ sizeof(zgid_entry)))
+ continue;
+ gid_entry_mb1 = gid_entry_mbox + 1;
+ for (j = i + 1; j < num_gids; gid_entry_mb1++, j++) {
+ if (!memcmp(gid_entry_mb1->raw,
+ zgid_entry.raw, sizeof(zgid_entry)))
+ continue;
+ if (!memcmp(gid_entry_mb1->raw, gid_entry_mbox->raw,
+ sizeof(gid_entry_mbox->raw))) {
+ /* found duplicate */
+ return -EINVAL;
+ }
+ }
+ }
+
+ /* 2. Check that do not have duplicates in OTHER
+ * entries in the port GID table
+ */
+
+ mutex_lock(&(priv->port[port].gid_table.mutex));
+ for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) {
+ if (i >= base && i < base + num_gids)
+ continue; /* don't compare to slave's current gids */
+ gid_entry_tbl = &priv->port[port].gid_table.roce_gids[i];
+ if (!memcmp(gid_entry_tbl->raw, zgid_entry.raw, sizeof(zgid_entry)))
+ continue;
+ gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf);
+ for (j = 0; j < num_gids; gid_entry_mbox++, j++) {
+ if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw,
+ sizeof(zgid_entry)))
+ continue;
+ if (!memcmp(gid_entry_mbox->raw, gid_entry_tbl->raw,
+ sizeof(gid_entry_tbl->raw))) {
+ /* found duplicate */
+ mlx4_warn(dev, "requested gid entry for slave:%d is a duplicate of gid at index %d\n",
+ slave, i);
+ mutex_unlock(&(priv->port[port].gid_table.mutex));
+ return -EINVAL;
+ }
+ }
+ }
+
+ /* insert slave GIDs with memcpy, starting at slave's base index */
+ gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf);
+ for (i = 0, offset = base; i < num_gids; gid_entry_mbox++, offset++, i++)
+ memcpy(priv->port[port].gid_table.roce_gids[offset].raw,
+ gid_entry_mbox->raw, MLX4_ROCE_GID_ENTRY_SIZE);
+
+ /* Now, copy roce port gids table to current mailbox for passing to FW */
+ gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf);
+ for (i = 0; i < MLX4_ROCE_MAX_GIDS; gid_entry_mbox++, i++)
+ memcpy(gid_entry_mbox->raw,
+ priv->port[port].gid_table.roce_gids[i].raw,
+ MLX4_ROCE_GID_ENTRY_SIZE);
+
+ err = mlx4_cmd(dev, inbox->dma, in_mod & 0xffff, op_mod,
+ MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+ mutex_unlock(&(priv->port[port].gid_table.mutex));
+ return err;
+ }
+
+ return mlx4_cmd(dev, inbox->dma, in_mod & 0xffff, op_mod,
+ MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+ }
+
+ /* Slaves are not allowed to SET_PORT beacon (LED) blink */
+ if (op_mod == MLX4_SET_PORT_BEACON_OPCODE) {
+ mlx4_warn(dev, "denying SET_PORT Beacon slave:%d\n", slave);
+ return -EPERM;
+ }
+
+ /* For IB, we only consider:
+ * - The capability mask, which is set to the aggregate of all
+ * slave function capabilities
+ * - The QKey violatin counter - reset according to each request.
+ */
+
+ if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
+ reset_qkey_viols = (*(u8 *) inbox->buf) & 0x40;
+ new_cap_mask = ((__be32 *) inbox->buf)[2];
+ } else {
+ reset_qkey_viols = ((u8 *) inbox->buf)[3] & 0x1;
+ new_cap_mask = ((__be32 *) inbox->buf)[1];
+ }
+
+ /* slave may not set the IS_SM capability for the port */
+ if (slave != mlx4_master_func_num(dev) &&
+ (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_IS_SM))
+ return -EINVAL;
+
+ /* No DEV_MGMT in multifunc mode */
+ if (mlx4_is_mfunc(dev) &&
+ (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_DEV_MGMT_SUP))
+ return -EINVAL;
+
+ agg_cap_mask = 0;
+ slave_cap_mask =
+ priv->mfunc.master.slave_state[slave].ib_cap_mask[port];
+ priv->mfunc.master.slave_state[slave].ib_cap_mask[port] = new_cap_mask;
+ for (i = 0; i < dev->num_slaves; i++)
+ agg_cap_mask |=
+ priv->mfunc.master.slave_state[i].ib_cap_mask[port];
+
+ /* only clear mailbox for guests. Master may be setting
+ * MTU or PKEY table size
+ */
+ if (slave != dev->caps.function)
+ memset(inbox->buf, 0, 256);
+ if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
+ *(u8 *) inbox->buf |= !!reset_qkey_viols << 6;
+ ((__be32 *) inbox->buf)[2] = agg_cap_mask;
+ } else {
+ ((u8 *) inbox->buf)[3] |= !!reset_qkey_viols;
+ ((__be32 *) inbox->buf)[1] = agg_cap_mask;
+ }
+
+ err = mlx4_cmd(dev, inbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+ if (err)
+ priv->mfunc.master.slave_state[slave].ib_cap_mask[port] =
+ slave_cap_mask;
+ return err;
+}
+
+int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int port = mlx4_slave_convert_port(
+ dev, slave, vhcr->in_modifier & 0xFF);
+
+ if (port < 0)
+ return -EINVAL;
+
+ vhcr->in_modifier = (vhcr->in_modifier & ~0xFF) |
+ (port & 0xFF);
+
+ return mlx4_common_set_port(dev, slave, vhcr->in_modifier,
+ vhcr->op_modifier, inbox);
+}
+
+/* bit locations for set port command with zero op modifier */
+enum {
+ MLX4_SET_PORT_VL_CAP = 4, /* bits 7:4 */
+ MLX4_SET_PORT_MTU_CAP = 12, /* bits 15:12 */
+ MLX4_CHANGE_PORT_PKEY_TBL_SZ = 20,
+ MLX4_CHANGE_PORT_VL_CAP = 21,
+ MLX4_CHANGE_PORT_MTU_CAP = 22,
+};
+
+int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err, vl_cap, pkey_tbl_flag = 0;
+
+ if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
+ return 0;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port];
+
+ if (pkey_tbl_sz >= 0 && mlx4_is_master(dev)) {
+ pkey_tbl_flag = 1;
+ ((__be16 *) mailbox->buf)[20] = cpu_to_be16(pkey_tbl_sz);
+ }
+
+ /* IB VL CAP enum isn't used by the firmware, just numerical values */
+ for (vl_cap = 8; vl_cap >= 1; vl_cap >>= 1) {
+ ((__be32 *) mailbox->buf)[0] = cpu_to_be32(
+ (1 << MLX4_CHANGE_PORT_MTU_CAP) |
+ (1 << MLX4_CHANGE_PORT_VL_CAP) |
+ (pkey_tbl_flag << MLX4_CHANGE_PORT_PKEY_TBL_SZ) |
+ (dev->caps.port_ib_mtu[port] << MLX4_SET_PORT_MTU_CAP) |
+ (vl_cap << MLX4_SET_PORT_VL_CAP));
+ err = mlx4_cmd(dev, mailbox->dma, port,
+ MLX4_SET_PORT_IB_OPCODE, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
+ if (err != -ENOMEM)
+ break;
+ }
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+#define SET_PORT_ROCE_2_FLAGS 0x10
+#define MLX4_SET_PORT_ROCE_V1_V2 0x2
+int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
+ u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_general_context *context;
+ int err;
+ u32 in_mod;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ context = mailbox->buf;
+ context->flags = SET_PORT_GEN_ALL_VALID;
+ context->mtu = cpu_to_be16(mtu);
+ context->pptx = (pptx * (!pfctx)) << 7;
+ context->pfctx = pfctx;
+ context->pprx = (pprx * (!pfcrx)) << 7;
+ context->pfcrx = pfcrx;
+
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ context->flags |= SET_PORT_ROCE_2_FLAGS;
+ context->roce_mode |=
+ MLX4_SET_PORT_ROCE_V1_V2 << 4;
+ }
+ in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
+ MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_general);
+
+int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
+ u8 promisc)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_rqp_calc_context *context;
+ int err;
+ u32 in_mod;
+ u32 m_promisc = (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) ?
+ MCAST_DIRECT : MCAST_DEFAULT;
+
+ if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0)
+ return 0;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ context = mailbox->buf;
+ context->base_qpn = cpu_to_be32(base_qpn);
+ context->n_mac = dev->caps.log_num_macs;
+ context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_SHIFT |
+ base_qpn);
+ context->mcast = cpu_to_be32(m_promisc << SET_PORT_MC_PROMISC_SHIFT |
+ base_qpn);
+ context->intra_no_vlan = 0;
+ context->no_vlan = MLX4_NO_VLAN_IDX;
+ context->intra_vlan_miss = 0;
+ context->vlan_miss = MLX4_VLAN_MISS_IDX;
+
+ in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
+ MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_qpn_calc);
+
+int mlx4_SET_PORT_user_mtu(struct mlx4_dev *dev, u8 port, u16 user_mtu)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_general_context *context;
+ u32 in_mod;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ context = mailbox->buf;
+ context->flags2 |= MLX4_FLAG2_V_USER_MTU_MASK;
+ context->user_mtu = cpu_to_be16(user_mtu);
+
+ in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
+ MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_user_mtu);
+
+int mlx4_SET_PORT_user_mac(struct mlx4_dev *dev, u8 port, u8 *user_mac)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_general_context *context;
+ u32 in_mod;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ context = mailbox->buf;
+ context->flags2 |= MLX4_FLAG2_V_USER_MAC_MASK;
+ memcpy(context->user_mac, user_mac, sizeof(context->user_mac));
+
+ in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
+ MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_user_mac);
+
+int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, u8 ignore_fcs_value)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_general_context *context;
+ u32 in_mod;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ context = mailbox->buf;
+ context->flags2 |= MLX4_FLAG2_V_IGNORE_FCS_MASK;
+ if (ignore_fcs_value)
+ context->ignore_fcs |= MLX4_IGNORE_FCS_MASK;
+ else
+ context->ignore_fcs &= ~MLX4_IGNORE_FCS_MASK;
+
+ in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_fcs_check);
+
+enum {
+ VXLAN_ENABLE_MODIFY = 1 << 7,
+ VXLAN_STEERING_MODIFY = 1 << 6,
+
+ VXLAN_ENABLE = 1 << 7,
+};
+
+struct mlx4_set_port_vxlan_context {
+ u32 reserved1;
+ u8 modify_flags;
+ u8 reserved2;
+ u8 enable_flags;
+ u8 steering;
+};
+
+int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable)
+{
+ int err;
+ u32 in_mod;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_vxlan_context *context;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ context = mailbox->buf;
+ memset(context, 0, sizeof(*context));
+
+ context->modify_flags = VXLAN_ENABLE_MODIFY | VXLAN_STEERING_MODIFY;
+ if (enable)
+ context->enable_flags = VXLAN_ENABLE;
+ context->steering = steering;
+
+ in_mod = MLX4_SET_PORT_VXLAN << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
+ MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_VXLAN);
+
+int mlx4_SET_PORT_BEACON(struct mlx4_dev *dev, u8 port, u16 time)
+{
+ int err;
+ struct mlx4_cmd_mailbox *mailbox;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ *((__be32 *)mailbox->buf) = cpu_to_be32(time);
+
+ err = mlx4_cmd(dev, mailbox->dma, port, MLX4_SET_PORT_BEACON_OPCODE,
+ MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_BEACON);
+
+int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err = 0;
+
+ return err;
+}
+
+int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port,
+ u64 mac, u64 clear, u8 mode)
+{
+ return mlx4_cmd(dev, (mac | (clear << 63)), port, mode,
+ MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+}
+EXPORT_SYMBOL(mlx4_SET_MCAST_FLTR);
+
+int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err = 0;
+
+ return err;
+}
+
+int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ return 0;
+}
+
+int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid,
+ int *slave_id)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i, found_ix = -1;
+ int vf_gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
+ struct mlx4_slaves_pport slaves_pport;
+ unsigned num_vfs;
+ int slave_gid;
+
+ if (!mlx4_is_mfunc(dev))
+ return -EINVAL;
+
+ slaves_pport = mlx4_phys_to_slaves_pport(dev, port);
+ num_vfs = bitmap_weight(slaves_pport.slaves,
+ dev->persist->num_vfs + 1) - 1;
+
+ for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) {
+ if (!memcmp(priv->port[port].gid_table.roce_gids[i].raw, gid,
+ MLX4_ROCE_GID_ENTRY_SIZE)) {
+ found_ix = i;
+ break;
+ }
+ }
+
+ if (found_ix >= 0) {
+ /* Calculate a slave_gid which is the slave number in the gid
+ * table and not a globally unique slave number.
+ */
+ if (found_ix < MLX4_ROCE_PF_GIDS)
+ slave_gid = 0;
+ else if (found_ix < MLX4_ROCE_PF_GIDS + (vf_gids % num_vfs) *
+ (vf_gids / num_vfs + 1))
+ slave_gid = ((found_ix - MLX4_ROCE_PF_GIDS) /
+ (vf_gids / num_vfs + 1)) + 1;
+ else
+ slave_gid =
+ ((found_ix - MLX4_ROCE_PF_GIDS -
+ ((vf_gids % num_vfs) * ((vf_gids / num_vfs + 1)))) /
+ (vf_gids / num_vfs)) + vf_gids % num_vfs + 1;
+
+ /* Calculate the globally unique slave id */
+ if (slave_gid) {
+ struct mlx4_active_ports exclusive_ports;
+ struct mlx4_active_ports actv_ports;
+ struct mlx4_slaves_pport slaves_pport_actv;
+ unsigned max_port_p_one;
+ int num_vfs_before = 0;
+ int candidate_slave_gid;
+
+ /* Calculate how many VFs are on the previous port, if exists */
+ for (i = 1; i < port; i++) {
+ bitmap_zero(exclusive_ports.ports, dev->caps.num_ports);
+ set_bit(i - 1, exclusive_ports.ports);
+ slaves_pport_actv =
+ mlx4_phys_to_slaves_pport_actv(
+ dev, &exclusive_ports);
+ num_vfs_before += bitmap_weight(
+ slaves_pport_actv.slaves,
+ dev->persist->num_vfs + 1);
+ }
+
+ /* candidate_slave_gid isn't necessarily the correct slave, but
+ * it has the same number of ports and is assigned to the same
+ * ports as the real slave we're looking for. On dual port VF,
+ * slave_gid = [single port VFs on port <port>] +
+ * [offset of the current slave from the first dual port VF] +
+ * 1 (for the PF).
+ */
+ candidate_slave_gid = slave_gid + num_vfs_before;
+
+ actv_ports = mlx4_get_active_ports(dev, candidate_slave_gid);
+ max_port_p_one = find_first_bit(
+ actv_ports.ports, dev->caps.num_ports) +
+ bitmap_weight(actv_ports.ports,
+ dev->caps.num_ports) + 1;
+
+ /* Calculate the real slave number */
+ for (i = 1; i < max_port_p_one; i++) {
+ if (i == port)
+ continue;
+ bitmap_zero(exclusive_ports.ports,
+ dev->caps.num_ports);
+ set_bit(i - 1, exclusive_ports.ports);
+ slaves_pport_actv =
+ mlx4_phys_to_slaves_pport_actv(
+ dev, &exclusive_ports);
+ slave_gid += bitmap_weight(
+ slaves_pport_actv.slaves,
+ dev->persist->num_vfs + 1);
+ }
+ }
+ *slave_id = slave_gid;
+ }
+
+ return (found_ix >= 0) ? 0 : -EINVAL;
+}
+EXPORT_SYMBOL(mlx4_get_slave_from_roce_gid);
+
+int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id,
+ u8 *gid)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ if (!mlx4_is_master(dev))
+ return -EINVAL;
+
+ memcpy(gid, priv->port[port].gid_table.roce_gids[slave_id].raw,
+ MLX4_ROCE_GID_ENTRY_SIZE);
+ return 0;
+}
+EXPORT_SYMBOL(mlx4_get_roce_gid_from_slave);
+
+/* Cable Module Info */
+#define MODULE_INFO_MAX_READ 48
+
+#define I2C_ADDR_LOW 0x50
+#define I2C_ADDR_HIGH 0x51
+#define I2C_PAGE_SIZE 256
+#define I2C_HIGH_PAGE_SIZE 128
+
+/* Module Info Data */
+struct mlx4_cable_info {
+ u8 i2c_addr;
+ u8 page_num;
+ __be16 dev_mem_address;
+ __be16 reserved1;
+ __be16 size;
+ __be32 reserved2[2];
+ u8 data[MODULE_INFO_MAX_READ];
+};
+
+enum cable_info_err {
+ CABLE_INF_INV_PORT = 0x1,
+ CABLE_INF_OP_NOSUP = 0x2,
+ CABLE_INF_NOT_CONN = 0x3,
+ CABLE_INF_NO_EEPRM = 0x4,
+ CABLE_INF_PAGE_ERR = 0x5,
+ CABLE_INF_INV_ADDR = 0x6,
+ CABLE_INF_I2C_ADDR = 0x7,
+ CABLE_INF_QSFP_VIO = 0x8,
+ CABLE_INF_I2C_BUSY = 0x9,
+};
+
+#define MAD_STATUS_2_CABLE_ERR(mad_status) ((mad_status >> 8) & 0xFF)
+
+static inline const char *cable_info_mad_err_str(u16 mad_status)
+{
+ u8 err = MAD_STATUS_2_CABLE_ERR(mad_status);
+
+ switch (err) {
+ case CABLE_INF_INV_PORT:
+ return "invalid port selected";
+ case CABLE_INF_OP_NOSUP:
+ return "operation not supported for this port (the port is of type CX4 or internal)";
+ case CABLE_INF_NOT_CONN:
+ return "cable is not connected";
+ case CABLE_INF_NO_EEPRM:
+ return "the connected cable has no EPROM (passive copper cable)";
+ case CABLE_INF_PAGE_ERR:
+ return "page number is greater than 15";
+ case CABLE_INF_INV_ADDR:
+ return "invalid device_address or size (that is, size equals 0 or address+size is greater than 256)";
+ case CABLE_INF_I2C_ADDR:
+ return "invalid I2C slave address";
+ case CABLE_INF_QSFP_VIO:
+ return "at least one cable violates the QSFP specification and ignores the modsel signal";
+ case CABLE_INF_I2C_BUSY:
+ return "I2C bus is constantly busy";
+ }
+ return "Unknown Error";
+}
+
+static int mlx4_get_module_id(struct mlx4_dev *dev, u8 port, u8 *module_id)
+{
+ struct mlx4_cmd_mailbox *inbox, *outbox;
+ struct mlx4_mad_ifc *inmad, *outmad;
+ struct mlx4_cable_info *cable_info;
+ int ret;
+
+ inbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(inbox))
+ return PTR_ERR(inbox);
+
+ outbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(outbox)) {
+ mlx4_free_cmd_mailbox(dev, inbox);
+ return PTR_ERR(outbox);
+ }
+
+ inmad = (struct mlx4_mad_ifc *)(inbox->buf);
+ outmad = (struct mlx4_mad_ifc *)(outbox->buf);
+
+ inmad->method = 0x1; /* Get */
+ inmad->class_version = 0x1;
+ inmad->mgmt_class = 0x1;
+ inmad->base_version = 0x1;
+ inmad->attr_id = cpu_to_be16(0xFF60); /* Module Info */
+
+ cable_info = (struct mlx4_cable_info *)inmad->data;
+ cable_info->dev_mem_address = 0;
+ cable_info->page_num = 0;
+ cable_info->i2c_addr = I2C_ADDR_LOW;
+ cable_info->size = cpu_to_be16(1);
+
+ ret = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3,
+ MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
+ if (ret)
+ goto out;
+
+ if (be16_to_cpu(outmad->status)) {
+ /* Mad returned with bad status */
+ ret = be16_to_cpu(outmad->status);
+ mlx4_warn(dev,
+ "MLX4_CMD_MAD_IFC Get Module ID attr(%x) port(%d) i2c_addr(%x) offset(%d) size(%d): Response Mad Status(%x) - %s\n",
+ 0xFF60, port, I2C_ADDR_LOW, 0, 1, ret,
+ cable_info_mad_err_str(ret));
+ ret = -ret;
+ goto out;
+ }
+ cable_info = (struct mlx4_cable_info *)outmad->data;
+ *module_id = cable_info->data[0];
+out:
+ mlx4_free_cmd_mailbox(dev, inbox);
+ mlx4_free_cmd_mailbox(dev, outbox);
+ return ret;
+}
+
+static void mlx4_sfp_eeprom_params_set(u8 *i2c_addr, u8 *page_num, u16 *offset)
+{
+ *i2c_addr = I2C_ADDR_LOW;
+ *page_num = 0;
+
+ if (*offset < I2C_PAGE_SIZE)
+ return;
+
+ *i2c_addr = I2C_ADDR_HIGH;
+ *offset -= I2C_PAGE_SIZE;
+}
+
+static void mlx4_qsfp_eeprom_params_set(u8 *i2c_addr, u8 *page_num, u16 *offset)
+{
+ /* Offsets 0-255 belong to page 0.
+ * Offsets 256-639 belong to pages 01, 02, 03.
+ * For example, offset 400 is page 02: 1 + (400 - 256) / 128 = 2
+ */
+ if (*offset < I2C_PAGE_SIZE)
+ *page_num = 0;
+ else
+ *page_num = 1 + (*offset - I2C_PAGE_SIZE) / I2C_HIGH_PAGE_SIZE;
+ *i2c_addr = I2C_ADDR_LOW;
+ *offset -= *page_num * I2C_HIGH_PAGE_SIZE;
+}
+
+/**
+ * mlx4_get_module_info - Read cable module eeprom data
+ * @dev: mlx4_dev.
+ * @port: port number.
+ * @offset: byte offset in eeprom to start reading data from.
+ * @size: num of bytes to read.
+ * @data: output buffer to put the requested data into.
+ *
+ * Reads cable module eeprom data, puts the outcome data into
+ * data pointer paramer.
+ * Returns num of read bytes on success or a negative error
+ * code.
+ */
+int mlx4_get_module_info(struct mlx4_dev *dev, u8 port,
+ u16 offset, u16 size, u8 *data)
+{
+ struct mlx4_cmd_mailbox *inbox, *outbox;
+ struct mlx4_mad_ifc *inmad, *outmad;
+ struct mlx4_cable_info *cable_info;
+ u8 module_id, i2c_addr, page_num;
+ int ret;
+
+ if (size > MODULE_INFO_MAX_READ)
+ size = MODULE_INFO_MAX_READ;
+
+ ret = mlx4_get_module_id(dev, port, &module_id);
+ if (ret)
+ return ret;
+
+ switch (module_id) {
+ case MLX4_MODULE_ID_SFP:
+ mlx4_sfp_eeprom_params_set(&i2c_addr, &page_num, &offset);
+ break;
+ case MLX4_MODULE_ID_QSFP:
+ case MLX4_MODULE_ID_QSFP_PLUS:
+ case MLX4_MODULE_ID_QSFP28:
+ mlx4_qsfp_eeprom_params_set(&i2c_addr, &page_num, &offset);
+ break;
+ default:
+ mlx4_err(dev, "Module ID not recognized: %#x\n", module_id);
+ return -EINVAL;
+ }
+
+ inbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(inbox))
+ return PTR_ERR(inbox);
+
+ outbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(outbox)) {
+ mlx4_free_cmd_mailbox(dev, inbox);
+ return PTR_ERR(outbox);
+ }
+
+ inmad = (struct mlx4_mad_ifc *)(inbox->buf);
+ outmad = (struct mlx4_mad_ifc *)(outbox->buf);
+
+ inmad->method = 0x1; /* Get */
+ inmad->class_version = 0x1;
+ inmad->mgmt_class = 0x1;
+ inmad->base_version = 0x1;
+ inmad->attr_id = cpu_to_be16(0xFF60); /* Module Info */
+
+ if (offset < I2C_PAGE_SIZE && offset + size > I2C_PAGE_SIZE)
+ /* Cross pages reads are not allowed
+ * read until offset 256 in low page
+ */
+ size -= offset + size - I2C_PAGE_SIZE;
+
+ cable_info = (struct mlx4_cable_info *)inmad->data;
+ cable_info->dev_mem_address = cpu_to_be16(offset);
+ cable_info->page_num = page_num;
+ cable_info->i2c_addr = i2c_addr;
+ cable_info->size = cpu_to_be16(size);
+
+ ret = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3,
+ MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_NATIVE);
+ if (ret)
+ goto out;
+
+ if (be16_to_cpu(outmad->status)) {
+ /* Mad returned with bad status */
+ ret = be16_to_cpu(outmad->status);
+ mlx4_warn(dev,
+ "MLX4_CMD_MAD_IFC Get Module info attr(%x) port(%d) i2c_addr(%x) offset(%d) size(%d): Response Mad Status(%x) - %s\n",
+ 0xFF60, port, i2c_addr, offset, size,
+ ret, cable_info_mad_err_str(ret));
+
+ if (i2c_addr == I2C_ADDR_HIGH &&
+ MAD_STATUS_2_CABLE_ERR(ret) == CABLE_INF_I2C_ADDR)
+ /* Some SFP cables do not support i2c slave
+ * address 0x51 (high page), abort silently.
+ */
+ ret = 0;
+ else
+ ret = -ret;
+ goto out;
+ }
+ cable_info = (struct mlx4_cable_info *)outmad->data;
+ memcpy(data, cable_info->data, size);
+ ret = size;
+out:
+ mlx4_free_cmd_mailbox(dev, inbox);
+ mlx4_free_cmd_mailbox(dev, outbox);
+ return ret;
+}
+EXPORT_SYMBOL(mlx4_get_module_info);
+
+int mlx4_max_tc(struct mlx4_dev *dev)
+{
+ u8 num_tc = dev->caps.max_tc_eth;
+
+ if (!num_tc)
+ num_tc = MLX4_TC_MAX_NUMBER;
+
+ return num_tc;
+}
+EXPORT_SYMBOL(mlx4_max_tc);
diff --git a/drivers/net/ethernet/mellanox/mlx4/profile.c b/drivers/net/ethernet/mellanox/mlx4/profile.c
new file mode 100644
index 000000000..ba361c5fb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/profile.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/slab.h>
+
+#include "mlx4.h"
+#include "fw.h"
+
+enum {
+ MLX4_RES_QP,
+ MLX4_RES_RDMARC,
+ MLX4_RES_ALTC,
+ MLX4_RES_AUXC,
+ MLX4_RES_SRQ,
+ MLX4_RES_CQ,
+ MLX4_RES_EQ,
+ MLX4_RES_DMPT,
+ MLX4_RES_CMPT,
+ MLX4_RES_MTT,
+ MLX4_RES_MCG,
+ MLX4_RES_NUM
+};
+
+static const char *res_name[] = {
+ [MLX4_RES_QP] = "QP",
+ [MLX4_RES_RDMARC] = "RDMARC",
+ [MLX4_RES_ALTC] = "ALTC",
+ [MLX4_RES_AUXC] = "AUXC",
+ [MLX4_RES_SRQ] = "SRQ",
+ [MLX4_RES_CQ] = "CQ",
+ [MLX4_RES_EQ] = "EQ",
+ [MLX4_RES_DMPT] = "DMPT",
+ [MLX4_RES_CMPT] = "CMPT",
+ [MLX4_RES_MTT] = "MTT",
+ [MLX4_RES_MCG] = "MCG",
+};
+
+u64 mlx4_make_profile(struct mlx4_dev *dev,
+ struct mlx4_profile *request,
+ struct mlx4_dev_cap *dev_cap,
+ struct mlx4_init_hca_param *init_hca)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource {
+ u64 size;
+ u64 start;
+ int type;
+ u32 num;
+ int log_num;
+ };
+
+ u64 total_size = 0;
+ struct mlx4_resource *profile;
+ struct sysinfo si;
+ int i, j;
+
+ profile = kcalloc(MLX4_RES_NUM, sizeof(*profile), GFP_KERNEL);
+ if (!profile)
+ return -ENOMEM;
+
+ /*
+ * We want to scale the number of MTTs with the size of the
+ * system memory, since it makes sense to register a lot of
+ * memory on a system with a lot of memory. As a heuristic,
+ * make sure we have enough MTTs to cover twice the system
+ * memory (with PAGE_SIZE entries).
+ *
+ * This number has to be a power of two and fit into 32 bits
+ * due to device limitations, so cap this at 2^31 as well.
+ * That limits us to 8TB of memory registration per HCA with
+ * 4KB pages, which is probably OK for the next few months.
+ */
+ si_meminfo(&si);
+ request->num_mtt =
+ roundup_pow_of_two(max_t(unsigned, request->num_mtt,
+ min(1UL << (31 - log_mtts_per_seg),
+ (si.totalram << 1) >> log_mtts_per_seg)));
+
+
+ profile[MLX4_RES_QP].size = dev_cap->qpc_entry_sz;
+ profile[MLX4_RES_RDMARC].size = dev_cap->rdmarc_entry_sz;
+ profile[MLX4_RES_ALTC].size = dev_cap->altc_entry_sz;
+ profile[MLX4_RES_AUXC].size = dev_cap->aux_entry_sz;
+ profile[MLX4_RES_SRQ].size = dev_cap->srq_entry_sz;
+ profile[MLX4_RES_CQ].size = dev_cap->cqc_entry_sz;
+ profile[MLX4_RES_EQ].size = dev_cap->eqc_entry_sz;
+ profile[MLX4_RES_DMPT].size = dev_cap->dmpt_entry_sz;
+ profile[MLX4_RES_CMPT].size = dev_cap->cmpt_entry_sz;
+ profile[MLX4_RES_MTT].size = dev_cap->mtt_entry_sz;
+ profile[MLX4_RES_MCG].size = mlx4_get_mgm_entry_size(dev);
+
+ profile[MLX4_RES_QP].num = request->num_qp;
+ profile[MLX4_RES_RDMARC].num = request->num_qp * request->rdmarc_per_qp;
+ profile[MLX4_RES_ALTC].num = request->num_qp;
+ profile[MLX4_RES_AUXC].num = request->num_qp;
+ profile[MLX4_RES_SRQ].num = request->num_srq;
+ profile[MLX4_RES_CQ].num = request->num_cq;
+ profile[MLX4_RES_EQ].num = mlx4_is_mfunc(dev) ? dev->phys_caps.num_phys_eqs :
+ min_t(unsigned, dev_cap->max_eqs, MAX_MSIX);
+ profile[MLX4_RES_DMPT].num = request->num_mpt;
+ profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS;
+ profile[MLX4_RES_MTT].num = request->num_mtt * (1 << log_mtts_per_seg);
+ profile[MLX4_RES_MCG].num = request->num_mcg;
+
+ for (i = 0; i < MLX4_RES_NUM; ++i) {
+ profile[i].type = i;
+ profile[i].num = roundup_pow_of_two(profile[i].num);
+ profile[i].log_num = ilog2(profile[i].num);
+ profile[i].size *= profile[i].num;
+ profile[i].size = max(profile[i].size, (u64) PAGE_SIZE);
+ }
+
+ /*
+ * Sort the resources in decreasing order of size. Since they
+ * all have sizes that are powers of 2, we'll be able to keep
+ * resources aligned to their size and pack them without gaps
+ * using the sorted order.
+ */
+ for (i = MLX4_RES_NUM; i > 0; --i)
+ for (j = 1; j < i; ++j) {
+ if (profile[j].size > profile[j - 1].size)
+ swap(profile[j], profile[j - 1]);
+ }
+
+ for (i = 0; i < MLX4_RES_NUM; ++i) {
+ if (profile[i].size) {
+ profile[i].start = total_size;
+ total_size += profile[i].size;
+ }
+
+ if (total_size > dev_cap->max_icm_sz) {
+ mlx4_err(dev, "Profile requires 0x%llx bytes; won't fit in 0x%llx bytes of context memory\n",
+ (unsigned long long) total_size,
+ (unsigned long long) dev_cap->max_icm_sz);
+ kfree(profile);
+ return -ENOMEM;
+ }
+
+ if (profile[i].size)
+ mlx4_dbg(dev, " profile[%2d] (%6s): 2^%02d entries @ 0x%10llx, size 0x%10llx\n",
+ i, res_name[profile[i].type],
+ profile[i].log_num,
+ (unsigned long long) profile[i].start,
+ (unsigned long long) profile[i].size);
+ }
+
+ mlx4_dbg(dev, "HCA context memory: reserving %d KB\n",
+ (int) (total_size >> 10));
+
+ for (i = 0; i < MLX4_RES_NUM; ++i) {
+ switch (profile[i].type) {
+ case MLX4_RES_QP:
+ dev->caps.num_qps = profile[i].num;
+ init_hca->qpc_base = profile[i].start;
+ init_hca->log_num_qps = profile[i].log_num;
+ break;
+ case MLX4_RES_RDMARC:
+ for (priv->qp_table.rdmarc_shift = 0;
+ request->num_qp << priv->qp_table.rdmarc_shift < profile[i].num;
+ ++priv->qp_table.rdmarc_shift)
+ ; /* nothing */
+ dev->caps.max_qp_dest_rdma = 1 << priv->qp_table.rdmarc_shift;
+ priv->qp_table.rdmarc_base = (u32) profile[i].start;
+ init_hca->rdmarc_base = profile[i].start;
+ init_hca->log_rd_per_qp = priv->qp_table.rdmarc_shift;
+ break;
+ case MLX4_RES_ALTC:
+ init_hca->altc_base = profile[i].start;
+ break;
+ case MLX4_RES_AUXC:
+ init_hca->auxc_base = profile[i].start;
+ break;
+ case MLX4_RES_SRQ:
+ dev->caps.num_srqs = profile[i].num;
+ init_hca->srqc_base = profile[i].start;
+ init_hca->log_num_srqs = profile[i].log_num;
+ break;
+ case MLX4_RES_CQ:
+ dev->caps.num_cqs = profile[i].num;
+ init_hca->cqc_base = profile[i].start;
+ init_hca->log_num_cqs = profile[i].log_num;
+ break;
+ case MLX4_RES_EQ:
+ if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
+ init_hca->log_num_eqs = 0x1f;
+ init_hca->eqc_base = profile[i].start;
+ init_hca->num_sys_eqs = dev_cap->num_sys_eqs;
+ } else {
+ dev->caps.num_eqs = roundup_pow_of_two(
+ min_t(unsigned,
+ dev_cap->max_eqs,
+ MAX_MSIX));
+ init_hca->eqc_base = profile[i].start;
+ init_hca->log_num_eqs = ilog2(dev->caps.num_eqs);
+ }
+ break;
+ case MLX4_RES_DMPT:
+ dev->caps.num_mpts = profile[i].num;
+ priv->mr_table.mpt_base = profile[i].start;
+ init_hca->dmpt_base = profile[i].start;
+ init_hca->log_mpt_sz = profile[i].log_num;
+ break;
+ case MLX4_RES_CMPT:
+ init_hca->cmpt_base = profile[i].start;
+ break;
+ case MLX4_RES_MTT:
+ dev->caps.num_mtts = profile[i].num;
+ priv->mr_table.mtt_base = profile[i].start;
+ init_hca->mtt_base = profile[i].start;
+ break;
+ case MLX4_RES_MCG:
+ init_hca->mc_base = profile[i].start;
+ init_hca->log_mc_entry_sz =
+ ilog2(mlx4_get_mgm_entry_size(dev));
+ init_hca->log_mc_table_sz = profile[i].log_num;
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ dev->caps.num_mgms = profile[i].num;
+ } else {
+ init_hca->log_mc_hash_sz =
+ profile[i].log_num - 1;
+ dev->caps.num_mgms = profile[i].num >> 1;
+ dev->caps.num_amgms = profile[i].num >> 1;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ /*
+ * PDs don't take any HCA memory, but we assign them as part
+ * of the HCA profile anyway.
+ */
+ dev->caps.num_pds = MLX4_NUM_PDS;
+
+ kfree(profile);
+ return total_size;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
new file mode 100644
index 000000000..427e7a318
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -0,0 +1,965 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/gfp.h>
+#include <linux/export.h>
+
+#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/qp.h>
+
+#include "mlx4.h"
+#include "icm.h"
+
+/* QP to support BF should have bits 6,7 cleared */
+#define MLX4_BF_QP_SKIP_MASK 0xc0
+#define MLX4_MAX_BF_QP_RANGE 0x40
+
+void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
+{
+ struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
+ struct mlx4_qp *qp;
+
+ spin_lock(&qp_table->lock);
+
+ qp = __mlx4_qp_lookup(dev, qpn);
+ if (qp)
+ refcount_inc(&qp->refcount);
+
+ spin_unlock(&qp_table->lock);
+
+ if (!qp) {
+ mlx4_dbg(dev, "Async event for none existent QP %08x\n", qpn);
+ return;
+ }
+
+ qp->event(qp, event_type);
+
+ if (refcount_dec_and_test(&qp->refcount))
+ complete(&qp->free);
+}
+
+/* used for INIT/CLOSE port logic */
+static int is_master_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0)
+{
+ /* this procedure is called after we already know we are on the master */
+ /* qp0 is either the proxy qp0, or the real qp0 */
+ u32 pf_proxy_offset = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev);
+ *proxy_qp0 = qp->qpn >= pf_proxy_offset && qp->qpn <= pf_proxy_offset + 1;
+
+ *real_qp0 = qp->qpn >= dev->phys_caps.base_sqpn &&
+ qp->qpn <= dev->phys_caps.base_sqpn + 1;
+
+ return *real_qp0 || *proxy_qp0;
+}
+
+static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
+ struct mlx4_qp_context *context,
+ enum mlx4_qp_optpar optpar,
+ int sqd_event, struct mlx4_qp *qp, int native)
+{
+ static const u16 op[MLX4_QP_NUM_STATE][MLX4_QP_NUM_STATE] = {
+ [MLX4_QP_STATE_RST] = {
+ [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
+ [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
+ [MLX4_QP_STATE_INIT] = MLX4_CMD_RST2INIT_QP,
+ },
+ [MLX4_QP_STATE_INIT] = {
+ [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
+ [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
+ [MLX4_QP_STATE_INIT] = MLX4_CMD_INIT2INIT_QP,
+ [MLX4_QP_STATE_RTR] = MLX4_CMD_INIT2RTR_QP,
+ },
+ [MLX4_QP_STATE_RTR] = {
+ [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
+ [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
+ [MLX4_QP_STATE_RTS] = MLX4_CMD_RTR2RTS_QP,
+ },
+ [MLX4_QP_STATE_RTS] = {
+ [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
+ [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
+ [MLX4_QP_STATE_RTS] = MLX4_CMD_RTS2RTS_QP,
+ [MLX4_QP_STATE_SQD] = MLX4_CMD_RTS2SQD_QP,
+ },
+ [MLX4_QP_STATE_SQD] = {
+ [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
+ [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
+ [MLX4_QP_STATE_RTS] = MLX4_CMD_SQD2RTS_QP,
+ [MLX4_QP_STATE_SQD] = MLX4_CMD_SQD2SQD_QP,
+ },
+ [MLX4_QP_STATE_SQER] = {
+ [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
+ [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
+ [MLX4_QP_STATE_RTS] = MLX4_CMD_SQERR2RTS_QP,
+ },
+ [MLX4_QP_STATE_ERR] = {
+ [MLX4_QP_STATE_RST] = MLX4_CMD_2RST_QP,
+ [MLX4_QP_STATE_ERR] = MLX4_CMD_2ERR_QP,
+ }
+ };
+
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_cmd_mailbox *mailbox;
+ int ret = 0;
+ int real_qp0 = 0;
+ int proxy_qp0 = 0;
+ u8 port;
+
+ if (cur_state >= MLX4_QP_NUM_STATE || new_state >= MLX4_QP_NUM_STATE ||
+ !op[cur_state][new_state])
+ return -EINVAL;
+
+ if (op[cur_state][new_state] == MLX4_CMD_2RST_QP) {
+ ret = mlx4_cmd(dev, 0, qp->qpn, 2,
+ MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, native);
+ if (mlx4_is_master(dev) && cur_state != MLX4_QP_STATE_ERR &&
+ cur_state != MLX4_QP_STATE_RST &&
+ is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
+ port = (qp->qpn & 1) + 1;
+ if (proxy_qp0)
+ priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0;
+ else
+ priv->mfunc.master.qp0_state[port].qp0_active = 0;
+ }
+ return ret;
+ }
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ if (cur_state == MLX4_QP_STATE_RST && new_state == MLX4_QP_STATE_INIT) {
+ u64 mtt_addr = mlx4_mtt_addr(dev, mtt);
+ context->mtt_base_addr_h = mtt_addr >> 32;
+ context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
+ context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
+ }
+
+ if ((cur_state == MLX4_QP_STATE_RTR) &&
+ (new_state == MLX4_QP_STATE_RTS) &&
+ dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+ context->roce_entropy =
+ cpu_to_be16(mlx4_qp_roce_entropy(dev, qp->qpn));
+
+ *(__be32 *) mailbox->buf = cpu_to_be32(optpar);
+ memcpy(mailbox->buf + 8, context, sizeof(*context));
+
+ ((struct mlx4_qp_context *) (mailbox->buf + 8))->local_qpn =
+ cpu_to_be32(qp->qpn);
+
+ ret = mlx4_cmd(dev, mailbox->dma,
+ qp->qpn | (!!sqd_event << 31),
+ new_state == MLX4_QP_STATE_RST ? 2 : 0,
+ op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native);
+
+ if (mlx4_is_master(dev) && is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
+ port = (qp->qpn & 1) + 1;
+ if (cur_state != MLX4_QP_STATE_ERR &&
+ cur_state != MLX4_QP_STATE_RST &&
+ new_state == MLX4_QP_STATE_ERR) {
+ if (proxy_qp0)
+ priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0;
+ else
+ priv->mfunc.master.qp0_state[port].qp0_active = 0;
+ } else if (new_state == MLX4_QP_STATE_RTR) {
+ if (proxy_qp0)
+ priv->mfunc.master.qp0_state[port].proxy_qp0_active = 1;
+ else
+ priv->mfunc.master.qp0_state[port].qp0_active = 1;
+ }
+ }
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return ret;
+}
+
+int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
+ struct mlx4_qp_context *context,
+ enum mlx4_qp_optpar optpar,
+ int sqd_event, struct mlx4_qp *qp)
+{
+ return __mlx4_qp_modify(dev, mtt, cur_state, new_state, context,
+ optpar, sqd_event, qp, 0);
+}
+EXPORT_SYMBOL_GPL(mlx4_qp_modify);
+
+int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
+ int *base, u8 flags)
+{
+ u32 uid;
+ int bf_qp = !!(flags & (u8)MLX4_RESERVE_ETH_BF_QP);
+
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_qp_table *qp_table = &priv->qp_table;
+
+ if (cnt > MLX4_MAX_BF_QP_RANGE && bf_qp)
+ return -ENOMEM;
+
+ uid = MLX4_QP_TABLE_ZONE_GENERAL;
+ if (flags & (u8)MLX4_RESERVE_A0_QP) {
+ if (bf_qp)
+ uid = MLX4_QP_TABLE_ZONE_RAW_ETH;
+ else
+ uid = MLX4_QP_TABLE_ZONE_RSS;
+ }
+
+ *base = mlx4_zone_alloc_entries(qp_table->zones, uid, cnt, align,
+ bf_qp ? MLX4_BF_QP_SKIP_MASK : 0, NULL);
+ if (*base == -1)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
+ int *base, u8 flags, u8 usage)
+{
+ u32 in_modifier = RES_QP | (((u32)usage & 3) << 30);
+ u64 in_param = 0;
+ u64 out_param;
+ int err;
+
+ /* Turn off all unsupported QP allocation flags */
+ flags &= dev->caps.alloc_res_qp_mask;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, (((u32)flags) << 24) | (u32)cnt);
+ set_param_h(&in_param, align);
+ err = mlx4_cmd_imm(dev, in_param, &out_param,
+ in_modifier, RES_OP_RESERVE,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err)
+ return err;
+
+ *base = get_param_l(&out_param);
+ return 0;
+ }
+ return __mlx4_qp_reserve_range(dev, cnt, align, base, flags);
+}
+EXPORT_SYMBOL_GPL(mlx4_qp_reserve_range);
+
+void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_qp_table *qp_table = &priv->qp_table;
+
+ if (mlx4_is_qp_reserved(dev, (u32) base_qpn))
+ return;
+ mlx4_zone_free_entries_unique(qp_table->zones, base_qpn, cnt);
+}
+
+void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
+{
+ u64 in_param = 0;
+ int err;
+
+ if (!cnt)
+ return;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, base_qpn);
+ set_param_h(&in_param, cnt);
+ err = mlx4_cmd(dev, in_param, RES_QP, RES_OP_RESERVE,
+ MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (err) {
+ mlx4_warn(dev, "Failed to release qp range base:%d cnt:%d\n",
+ base_qpn, cnt);
+ }
+ } else
+ __mlx4_qp_release_range(dev, base_qpn, cnt);
+}
+EXPORT_SYMBOL_GPL(mlx4_qp_release_range);
+
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_qp_table *qp_table = &priv->qp_table;
+ int err;
+
+ err = mlx4_table_get(dev, &qp_table->qp_table, qpn);
+ if (err)
+ goto err_out;
+
+ err = mlx4_table_get(dev, &qp_table->auxc_table, qpn);
+ if (err)
+ goto err_put_qp;
+
+ err = mlx4_table_get(dev, &qp_table->altc_table, qpn);
+ if (err)
+ goto err_put_auxc;
+
+ err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn);
+ if (err)
+ goto err_put_altc;
+
+ err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn);
+ if (err)
+ goto err_put_rdmarc;
+
+ return 0;
+
+err_put_rdmarc:
+ mlx4_table_put(dev, &qp_table->rdmarc_table, qpn);
+
+err_put_altc:
+ mlx4_table_put(dev, &qp_table->altc_table, qpn);
+
+err_put_auxc:
+ mlx4_table_put(dev, &qp_table->auxc_table, qpn);
+
+err_put_qp:
+ mlx4_table_put(dev, &qp_table->qp_table, qpn);
+
+err_out:
+ return err;
+}
+
+static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
+{
+ u64 param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&param, qpn);
+ return mlx4_cmd_imm(dev, param, &param, RES_QP, RES_OP_MAP_ICM,
+ MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+ }
+ return __mlx4_qp_alloc_icm(dev, qpn);
+}
+
+void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_qp_table *qp_table = &priv->qp_table;
+
+ mlx4_table_put(dev, &qp_table->cmpt_table, qpn);
+ mlx4_table_put(dev, &qp_table->rdmarc_table, qpn);
+ mlx4_table_put(dev, &qp_table->altc_table, qpn);
+ mlx4_table_put(dev, &qp_table->auxc_table, qpn);
+ mlx4_table_put(dev, &qp_table->qp_table, qpn);
+}
+
+static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
+{
+ u64 in_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, qpn);
+ if (mlx4_cmd(dev, in_param, RES_QP, RES_OP_MAP_ICM,
+ MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED))
+ mlx4_warn(dev, "Failed to free icm of qp:%d\n", qpn);
+ } else
+ __mlx4_qp_free_icm(dev, qpn);
+}
+
+struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
+{
+ struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
+ struct mlx4_qp *qp;
+
+ spin_lock_irq(&qp_table->lock);
+
+ qp = __mlx4_qp_lookup(dev, qpn);
+
+ spin_unlock_irq(&qp_table->lock);
+ return qp;
+}
+
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_qp_table *qp_table = &priv->qp_table;
+ int err;
+
+ if (!qpn)
+ return -EINVAL;
+
+ qp->qpn = qpn;
+
+ err = mlx4_qp_alloc_icm(dev, qpn);
+ if (err)
+ return err;
+
+ spin_lock_irq(&qp_table->lock);
+ err = radix_tree_insert(&dev->qp_table_tree, qp->qpn &
+ (dev->caps.num_qps - 1), qp);
+ spin_unlock_irq(&qp_table->lock);
+ if (err)
+ goto err_icm;
+
+ refcount_set(&qp->refcount, 1);
+ init_completion(&qp->free);
+
+ return 0;
+
+err_icm:
+ mlx4_qp_free_icm(dev, qpn);
+ return err;
+}
+
+EXPORT_SYMBOL_GPL(mlx4_qp_alloc);
+
+int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
+ enum mlx4_update_qp_attr attr,
+ struct mlx4_update_qp_params *params)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_update_qp_context *cmd;
+ u64 pri_addr_path_mask = 0;
+ u64 qp_mask = 0;
+ int err = 0;
+
+ if (!attr || (attr & ~MLX4_UPDATE_QP_SUPPORTED_ATTRS))
+ return -EINVAL;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ cmd = (struct mlx4_update_qp_context *)mailbox->buf;
+
+ if (attr & MLX4_UPDATE_QP_SMAC) {
+ pri_addr_path_mask |= 1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX;
+ cmd->qp_context.pri_path.grh_mylmc = params->smac_index;
+ }
+
+ if (attr & MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB) {
+ if (!(dev->caps.flags2
+ & MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) {
+ mlx4_warn(dev,
+ "Trying to set src check LB, but it isn't supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ pri_addr_path_mask |=
+ 1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB;
+ if (params->flags &
+ MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB) {
+ cmd->qp_context.pri_path.fl |=
+ MLX4_FL_ETH_SRC_CHECK_MC_LB;
+ }
+ }
+
+ if (attr & MLX4_UPDATE_QP_VSD) {
+ qp_mask |= 1ULL << MLX4_UPD_QP_MASK_VSD;
+ if (params->flags & MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE)
+ cmd->qp_context.param3 |= cpu_to_be32(MLX4_STRIP_VLAN);
+ }
+
+ if (attr & MLX4_UPDATE_QP_RATE_LIMIT) {
+ qp_mask |= 1ULL << MLX4_UPD_QP_MASK_RATE_LIMIT;
+ cmd->qp_context.rate_limit_params = cpu_to_be16((params->rate_unit << 14) | params->rate_val);
+ }
+
+ if (attr & MLX4_UPDATE_QP_QOS_VPORT) {
+ if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP)) {
+ mlx4_warn(dev, "Granular QoS per VF is not enabled\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ qp_mask |= 1ULL << MLX4_UPD_QP_MASK_QOS_VPP;
+ cmd->qp_context.qos_vport = params->qos_vport;
+ }
+
+ cmd->primary_addr_path_mask = cpu_to_be64(pri_addr_path_mask);
+ cmd->qp_mask = cpu_to_be64(qp_mask);
+
+ err = mlx4_cmd(dev, mailbox->dma, qpn & 0xffffff, 0,
+ MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_update_qp);
+
+void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp)
+{
+ struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp_table->lock, flags);
+ radix_tree_delete(&dev->qp_table_tree, qp->qpn & (dev->caps.num_qps - 1));
+ spin_unlock_irqrestore(&qp_table->lock, flags);
+}
+EXPORT_SYMBOL_GPL(mlx4_qp_remove);
+
+void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp)
+{
+ if (refcount_dec_and_test(&qp->refcount))
+ complete(&qp->free);
+ wait_for_completion(&qp->free);
+
+ mlx4_qp_free_icm(dev, qp->qpn);
+}
+EXPORT_SYMBOL_GPL(mlx4_qp_free);
+
+static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn)
+{
+ return mlx4_cmd(dev, 0, base_qpn, 0, MLX4_CMD_CONF_SPECIAL_QP,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+}
+
+#define MLX4_QP_TABLE_RSS_ETH_PRIORITY 2
+#define MLX4_QP_TABLE_RAW_ETH_PRIORITY 1
+#define MLX4_QP_TABLE_RAW_ETH_SIZE 256
+
+static int mlx4_create_zones(struct mlx4_dev *dev,
+ u32 reserved_bottom_general,
+ u32 reserved_top_general,
+ u32 reserved_bottom_rss,
+ u32 start_offset_rss,
+ u32 max_table_offset)
+{
+ struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
+ struct mlx4_bitmap (*bitmap)[MLX4_QP_TABLE_ZONE_NUM] = NULL;
+ int bitmap_initialized = 0;
+ u32 last_offset;
+ int k;
+ int err;
+
+ qp_table->zones = mlx4_zone_allocator_create(MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP);
+
+ if (NULL == qp_table->zones)
+ return -ENOMEM;
+
+ bitmap = kmalloc(sizeof(*bitmap), GFP_KERNEL);
+
+ if (NULL == bitmap) {
+ err = -ENOMEM;
+ goto free_zone;
+ }
+
+ err = mlx4_bitmap_init(*bitmap + MLX4_QP_TABLE_ZONE_GENERAL, dev->caps.num_qps,
+ (1 << 23) - 1, reserved_bottom_general,
+ reserved_top_general);
+
+ if (err)
+ goto free_bitmap;
+
+ ++bitmap_initialized;
+
+ err = mlx4_zone_add_one(qp_table->zones, *bitmap + MLX4_QP_TABLE_ZONE_GENERAL,
+ MLX4_ZONE_FALLBACK_TO_HIGHER_PRIO |
+ MLX4_ZONE_USE_RR, 0,
+ 0, qp_table->zones_uids + MLX4_QP_TABLE_ZONE_GENERAL);
+
+ if (err)
+ goto free_bitmap;
+
+ err = mlx4_bitmap_init(*bitmap + MLX4_QP_TABLE_ZONE_RSS,
+ reserved_bottom_rss,
+ reserved_bottom_rss - 1,
+ dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
+ reserved_bottom_rss - start_offset_rss);
+
+ if (err)
+ goto free_bitmap;
+
+ ++bitmap_initialized;
+
+ err = mlx4_zone_add_one(qp_table->zones, *bitmap + MLX4_QP_TABLE_ZONE_RSS,
+ MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO |
+ MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO |
+ MLX4_ZONE_USE_RR, MLX4_QP_TABLE_RSS_ETH_PRIORITY,
+ 0, qp_table->zones_uids + MLX4_QP_TABLE_ZONE_RSS);
+
+ if (err)
+ goto free_bitmap;
+
+ last_offset = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
+ /* We have a single zone for the A0 steering QPs area of the FW. This area
+ * needs to be split into subareas. One set of subareas is for RSS QPs
+ * (in which qp number bits 6 and/or 7 are set); the other set of subareas
+ * is for RAW_ETH QPs, which require that both bits 6 and 7 are zero.
+ * Currently, the values returned by the FW (A0 steering area starting qp number
+ * and A0 steering area size) are such that there are only two subareas -- one
+ * for RSS and one for RAW_ETH.
+ */
+ for (k = MLX4_QP_TABLE_ZONE_RSS + 1; k < sizeof(*bitmap)/sizeof((*bitmap)[0]);
+ k++) {
+ int size;
+ u32 offset = start_offset_rss;
+ u32 bf_mask;
+ u32 requested_size;
+
+ /* Assuming MLX4_BF_QP_SKIP_MASK is consecutive ones, this calculates
+ * a mask of all LSB bits set until (and not including) the first
+ * set bit of MLX4_BF_QP_SKIP_MASK. For example, if MLX4_BF_QP_SKIP_MASK
+ * is 0xc0, bf_mask will be 0x3f.
+ */
+ bf_mask = (MLX4_BF_QP_SKIP_MASK & ~(MLX4_BF_QP_SKIP_MASK - 1)) - 1;
+ requested_size = min((u32)MLX4_QP_TABLE_RAW_ETH_SIZE, bf_mask + 1);
+
+ if (((last_offset & MLX4_BF_QP_SKIP_MASK) &&
+ ((int)(max_table_offset - last_offset)) >=
+ roundup_pow_of_two(MLX4_BF_QP_SKIP_MASK)) ||
+ (!(last_offset & MLX4_BF_QP_SKIP_MASK) &&
+ !((last_offset + requested_size - 1) &
+ MLX4_BF_QP_SKIP_MASK)))
+ size = requested_size;
+ else {
+ u32 candidate_offset =
+ (last_offset | MLX4_BF_QP_SKIP_MASK | bf_mask) + 1;
+
+ if (last_offset & MLX4_BF_QP_SKIP_MASK)
+ last_offset = candidate_offset;
+
+ /* From this point, the BF bits are 0 */
+
+ if (last_offset > max_table_offset) {
+ /* need to skip */
+ size = -1;
+ } else {
+ size = min3(max_table_offset - last_offset,
+ bf_mask - (last_offset & bf_mask),
+ requested_size);
+ if (size < requested_size) {
+ int candidate_size;
+
+ candidate_size = min3(
+ max_table_offset - candidate_offset,
+ bf_mask - (last_offset & bf_mask),
+ requested_size);
+
+ /* We will not take this path if last_offset was
+ * already set above to candidate_offset
+ */
+ if (candidate_size > size) {
+ last_offset = candidate_offset;
+ size = candidate_size;
+ }
+ }
+ }
+ }
+
+ if (size > 0) {
+ /* mlx4_bitmap_alloc_range will find a contiguous range of "size"
+ * QPs in which both bits 6 and 7 are zero, because we pass it the
+ * MLX4_BF_SKIP_MASK).
+ */
+ offset = mlx4_bitmap_alloc_range(
+ *bitmap + MLX4_QP_TABLE_ZONE_RSS,
+ size, 1,
+ MLX4_BF_QP_SKIP_MASK);
+
+ if (offset == (u32)-1) {
+ err = -ENOMEM;
+ break;
+ }
+
+ last_offset = offset + size;
+
+ err = mlx4_bitmap_init(*bitmap + k, roundup_pow_of_two(size),
+ roundup_pow_of_two(size) - 1, 0,
+ roundup_pow_of_two(size) - size);
+ } else {
+ /* Add an empty bitmap, we'll allocate from different zones (since
+ * at least one is reserved)
+ */
+ err = mlx4_bitmap_init(*bitmap + k, 1,
+ MLX4_QP_TABLE_RAW_ETH_SIZE - 1, 0,
+ 0);
+ mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0);
+ }
+
+ if (err)
+ break;
+
+ ++bitmap_initialized;
+
+ err = mlx4_zone_add_one(qp_table->zones, *bitmap + k,
+ MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO |
+ MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO |
+ MLX4_ZONE_USE_RR, MLX4_QP_TABLE_RAW_ETH_PRIORITY,
+ offset, qp_table->zones_uids + k);
+
+ if (err)
+ break;
+ }
+
+ if (err)
+ goto free_bitmap;
+
+ qp_table->bitmap_gen = *bitmap;
+
+ return err;
+
+free_bitmap:
+ for (k = 0; k < bitmap_initialized; k++)
+ mlx4_bitmap_cleanup(*bitmap + k);
+ kfree(bitmap);
+free_zone:
+ mlx4_zone_allocator_destroy(qp_table->zones);
+ return err;
+}
+
+static void mlx4_cleanup_qp_zones(struct mlx4_dev *dev)
+{
+ struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
+
+ if (qp_table->zones) {
+ int i;
+
+ for (i = 0;
+ i < sizeof(qp_table->zones_uids)/sizeof(qp_table->zones_uids[0]);
+ i++) {
+ struct mlx4_bitmap *bitmap =
+ mlx4_zone_get_bitmap(qp_table->zones,
+ qp_table->zones_uids[i]);
+
+ mlx4_zone_remove_one(qp_table->zones, qp_table->zones_uids[i]);
+ if (NULL == bitmap)
+ continue;
+
+ mlx4_bitmap_cleanup(bitmap);
+ }
+ mlx4_zone_allocator_destroy(qp_table->zones);
+ kfree(qp_table->bitmap_gen);
+ qp_table->bitmap_gen = NULL;
+ qp_table->zones = NULL;
+ }
+}
+
+int mlx4_init_qp_table(struct mlx4_dev *dev)
+{
+ struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
+ int err;
+ int reserved_from_top = 0;
+ int reserved_from_bot;
+ int k;
+ int fixed_reserved_from_bot_rv = 0;
+ int bottom_reserved_for_rss_bitmap;
+ u32 max_table_offset = dev->caps.dmfs_high_rate_qpn_base +
+ dev->caps.dmfs_high_rate_qpn_range;
+
+ spin_lock_init(&qp_table->lock);
+ INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC);
+ if (mlx4_is_slave(dev))
+ return 0;
+
+ /* We reserve 2 extra QPs per port for the special QPs. The
+ * block of special QPs must be aligned to a multiple of 8, so
+ * round up.
+ *
+ * We also reserve the MSB of the 24-bit QP number to indicate
+ * that a QP is an XRC QP.
+ */
+ for (k = 0; k <= MLX4_QP_REGION_BOTTOM; k++)
+ fixed_reserved_from_bot_rv += dev->caps.reserved_qps_cnt[k];
+
+ if (fixed_reserved_from_bot_rv < max_table_offset)
+ fixed_reserved_from_bot_rv = max_table_offset;
+
+ /* We reserve at least 1 extra for bitmaps that we don't have enough space for*/
+ bottom_reserved_for_rss_bitmap =
+ roundup_pow_of_two(fixed_reserved_from_bot_rv + 1);
+ dev->phys_caps.base_sqpn = ALIGN(bottom_reserved_for_rss_bitmap, 8);
+
+ {
+ int sort[MLX4_NUM_QP_REGION];
+ int i, j;
+ int last_base = dev->caps.num_qps;
+
+ for (i = 1; i < MLX4_NUM_QP_REGION; ++i)
+ sort[i] = i;
+
+ for (i = MLX4_NUM_QP_REGION; i > MLX4_QP_REGION_BOTTOM; --i) {
+ for (j = MLX4_QP_REGION_BOTTOM + 2; j < i; ++j) {
+ if (dev->caps.reserved_qps_cnt[sort[j]] >
+ dev->caps.reserved_qps_cnt[sort[j - 1]])
+ swap(sort[j], sort[j - 1]);
+ }
+ }
+
+ for (i = MLX4_QP_REGION_BOTTOM + 1; i < MLX4_NUM_QP_REGION; ++i) {
+ last_base -= dev->caps.reserved_qps_cnt[sort[i]];
+ dev->caps.reserved_qps_base[sort[i]] = last_base;
+ reserved_from_top +=
+ dev->caps.reserved_qps_cnt[sort[i]];
+ }
+ }
+
+ /* Reserve 8 real SQPs in both native and SRIOV modes.
+ * In addition, in SRIOV mode, reserve 8 proxy SQPs per function
+ * (for all PFs and VFs), and 8 corresponding tunnel QPs.
+ * Each proxy SQP works opposite its own tunnel QP.
+ *
+ * The QPs are arranged as follows:
+ * a. 8 real SQPs
+ * b. All the proxy SQPs (8 per function)
+ * c. All the tunnel QPs (8 per function)
+ */
+ reserved_from_bot = mlx4_num_reserved_sqps(dev);
+ if (reserved_from_bot + reserved_from_top > dev->caps.num_qps) {
+ mlx4_err(dev, "Number of reserved QPs is higher than number of QPs\n");
+ return -EINVAL;
+ }
+
+ err = mlx4_create_zones(dev, reserved_from_bot, reserved_from_bot,
+ bottom_reserved_for_rss_bitmap,
+ fixed_reserved_from_bot_rv,
+ max_table_offset);
+
+ if (err)
+ return err;
+
+ if (mlx4_is_mfunc(dev)) {
+ /* for PPF use */
+ dev->phys_caps.base_proxy_sqpn = dev->phys_caps.base_sqpn + 8;
+ dev->phys_caps.base_tunnel_sqpn = dev->phys_caps.base_sqpn + 8 + 8 * MLX4_MFUNC_MAX;
+
+ /* In mfunc, calculate proxy and tunnel qp offsets for the PF here,
+ * since the PF does not call mlx4_slave_caps */
+ dev->caps.spec_qps = kcalloc(dev->caps.num_ports,
+ sizeof(*dev->caps.spec_qps),
+ GFP_KERNEL);
+ if (!dev->caps.spec_qps) {
+ err = -ENOMEM;
+ goto err_mem;
+ }
+
+ for (k = 0; k < dev->caps.num_ports; k++) {
+ dev->caps.spec_qps[k].qp0_proxy = dev->phys_caps.base_proxy_sqpn +
+ 8 * mlx4_master_func_num(dev) + k;
+ dev->caps.spec_qps[k].qp0_tunnel = dev->caps.spec_qps[k].qp0_proxy + 8 * MLX4_MFUNC_MAX;
+ dev->caps.spec_qps[k].qp1_proxy = dev->phys_caps.base_proxy_sqpn +
+ 8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k;
+ dev->caps.spec_qps[k].qp1_tunnel = dev->caps.spec_qps[k].qp1_proxy + 8 * MLX4_MFUNC_MAX;
+ }
+ }
+
+
+ err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn);
+ if (err)
+ goto err_mem;
+
+ return err;
+
+err_mem:
+ kfree(dev->caps.spec_qps);
+ dev->caps.spec_qps = NULL;
+ mlx4_cleanup_qp_zones(dev);
+ return err;
+}
+
+void mlx4_cleanup_qp_table(struct mlx4_dev *dev)
+{
+ if (mlx4_is_slave(dev))
+ return;
+
+ mlx4_CONF_SPECIAL_QP(dev, 0);
+
+ mlx4_cleanup_qp_zones(dev);
+}
+
+int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
+ struct mlx4_qp_context *context)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, qp->qpn, 0,
+ MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+ if (!err)
+ memcpy(context, mailbox->buf + 8, sizeof(*context));
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_qp_query);
+
+int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+ struct mlx4_qp_context *context,
+ struct mlx4_qp *qp, enum mlx4_qp_state *qp_state)
+{
+ int err;
+ int i;
+ enum mlx4_qp_state states[] = {
+ MLX4_QP_STATE_RST,
+ MLX4_QP_STATE_INIT,
+ MLX4_QP_STATE_RTR,
+ MLX4_QP_STATE_RTS
+ };
+
+ for (i = 0; i < ARRAY_SIZE(states) - 1; i++) {
+ context->flags &= cpu_to_be32(~(0xf << 28));
+ context->flags |= cpu_to_be32(states[i + 1] << 28);
+ if (states[i + 1] != MLX4_QP_STATE_RTR)
+ context->params2 &= ~cpu_to_be32(MLX4_QP_BIT_FPP);
+ err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1],
+ context, 0, 0, qp);
+ if (err) {
+ mlx4_err(dev, "Failed to bring QP to state: %d with error: %d\n",
+ states[i + 1], err);
+ return err;
+ }
+
+ *qp_state = states[i + 1];
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_qp_to_ready);
+
+u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn)
+{
+ struct mlx4_qp_context context;
+ struct mlx4_qp qp;
+ int err;
+
+ qp.qpn = qpn;
+ err = mlx4_qp_query(dev, &qp, &context);
+ if (!err) {
+ u32 dest_qpn = be32_to_cpu(context.remote_qpn) & 0xffffff;
+ u16 folded_dst = folded_qp(dest_qpn);
+ u16 folded_src = folded_qp(qpn);
+
+ return (dest_qpn != qpn) ?
+ ((folded_dst ^ folded_src) | 0xC000) :
+ folded_src | 0xC000;
+ }
+ return 0xdead;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/reset.c b/drivers/net/ethernet/mellanox/mlx4/reset.c
new file mode 100644
index 000000000..0076d8858
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/reset.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+
+#include "mlx4.h"
+
+int mlx4_reset(struct mlx4_dev *dev)
+{
+ void __iomem *reset;
+ u32 *hca_header = NULL;
+ int pcie_cap;
+ u16 devctl;
+ u16 linkctl;
+ u16 vendor;
+ unsigned long end;
+ u32 sem;
+ int i;
+ int err = 0;
+
+#define MLX4_RESET_BASE 0xf0000
+#define MLX4_RESET_SIZE 0x400
+#define MLX4_SEM_OFFSET 0x3fc
+#define MLX4_RESET_OFFSET 0x10
+#define MLX4_RESET_VALUE swab32(1)
+
+#define MLX4_SEM_TIMEOUT_JIFFIES (10 * HZ)
+#define MLX4_RESET_TIMEOUT_JIFFIES (2 * HZ)
+
+ /*
+ * Reset the chip. This is somewhat ugly because we have to
+ * save off the PCI header before reset and then restore it
+ * after the chip reboots. We skip config space offsets 22
+ * and 23 since those have a special meaning.
+ */
+
+ /* Do we need to save off the full 4K PCI Express header?? */
+ hca_header = kmalloc(256, GFP_KERNEL);
+ if (!hca_header) {
+ err = -ENOMEM;
+ mlx4_err(dev, "Couldn't allocate memory to save HCA PCI header, aborting\n");
+ goto out;
+ }
+
+ pcie_cap = pci_pcie_cap(dev->persist->pdev);
+
+ for (i = 0; i < 64; ++i) {
+ if (i == 22 || i == 23)
+ continue;
+ if (pci_read_config_dword(dev->persist->pdev, i * 4,
+ hca_header + i)) {
+ err = -ENODEV;
+ mlx4_err(dev, "Couldn't save HCA PCI header, aborting\n");
+ goto out;
+ }
+ }
+
+ reset = ioremap(pci_resource_start(dev->persist->pdev, 0) +
+ MLX4_RESET_BASE,
+ MLX4_RESET_SIZE);
+ if (!reset) {
+ err = -ENOMEM;
+ mlx4_err(dev, "Couldn't map HCA reset register, aborting\n");
+ goto out;
+ }
+
+ /* grab HW semaphore to lock out flash updates */
+ end = jiffies + MLX4_SEM_TIMEOUT_JIFFIES;
+ do {
+ sem = readl(reset + MLX4_SEM_OFFSET);
+ if (!sem)
+ break;
+
+ msleep(1);
+ } while (time_before(jiffies, end));
+
+ if (sem) {
+ mlx4_err(dev, "Failed to obtain HW semaphore, aborting\n");
+ err = -EAGAIN;
+ iounmap(reset);
+ goto out;
+ }
+
+ /* actually hit reset */
+ writel(MLX4_RESET_VALUE, reset + MLX4_RESET_OFFSET);
+ iounmap(reset);
+
+ /* Docs say to wait one second before accessing device */
+ msleep(1000);
+
+ end = jiffies + MLX4_RESET_TIMEOUT_JIFFIES;
+ do {
+ if (!pci_read_config_word(dev->persist->pdev, PCI_VENDOR_ID,
+ &vendor) && vendor != 0xffff)
+ break;
+
+ msleep(1);
+ } while (time_before(jiffies, end));
+
+ if (vendor == 0xffff) {
+ err = -ENODEV;
+ mlx4_err(dev, "PCI device did not come back after reset, aborting\n");
+ goto out;
+ }
+
+ /* Now restore the PCI headers */
+ if (pcie_cap) {
+ devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4];
+ if (pcie_capability_write_word(dev->persist->pdev,
+ PCI_EXP_DEVCTL,
+ devctl)) {
+ err = -ENODEV;
+ mlx4_err(dev, "Couldn't restore HCA PCI Express Device Control register, aborting\n");
+ goto out;
+ }
+ linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4];
+ if (pcie_capability_write_word(dev->persist->pdev,
+ PCI_EXP_LNKCTL,
+ linkctl)) {
+ err = -ENODEV;
+ mlx4_err(dev, "Couldn't restore HCA PCI Express Link control register, aborting\n");
+ goto out;
+ }
+ }
+
+ for (i = 0; i < 16; ++i) {
+ if (i * 4 == PCI_COMMAND)
+ continue;
+
+ if (pci_write_config_dword(dev->persist->pdev, i * 4,
+ hca_header[i])) {
+ err = -ENODEV;
+ mlx4_err(dev, "Couldn't restore HCA reg %x, aborting\n",
+ i);
+ goto out;
+ }
+ }
+
+ if (pci_write_config_dword(dev->persist->pdev, PCI_COMMAND,
+ hca_header[PCI_COMMAND / 4])) {
+ err = -ENODEV;
+ mlx4_err(dev, "Couldn't restore HCA COMMAND, aborting\n");
+ goto out;
+ }
+
+out:
+ kfree(hca_header);
+
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
new file mode 100644
index 000000000..550e48932
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -0,0 +1,5427 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies.
+ * All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/qp.h>
+#include <linux/if_ether.h>
+#include <linux/etherdevice.h>
+
+#include "mlx4.h"
+#include "fw.h"
+#include "mlx4_stats.h"
+
+#define MLX4_MAC_VALID (1ull << 63)
+#define MLX4_PF_COUNTERS_PER_PORT 2
+#define MLX4_VF_COUNTERS_PER_PORT 1
+
+struct mac_res {
+ struct list_head list;
+ u64 mac;
+ int ref_count;
+ u8 smac_index;
+ u8 port;
+};
+
+struct vlan_res {
+ struct list_head list;
+ u16 vlan;
+ int ref_count;
+ int vlan_index;
+ u8 port;
+};
+
+struct res_common {
+ struct list_head list;
+ struct rb_node node;
+ u64 res_id;
+ int owner;
+ int state;
+ int from_state;
+ int to_state;
+ int removing;
+ const char *func_name;
+};
+
+enum {
+ RES_ANY_BUSY = 1
+};
+
+struct res_gid {
+ struct list_head list;
+ u8 gid[16];
+ enum mlx4_protocol prot;
+ enum mlx4_steer_type steer;
+ u64 reg_id;
+};
+
+enum res_qp_states {
+ RES_QP_BUSY = RES_ANY_BUSY,
+
+ /* QP number was allocated */
+ RES_QP_RESERVED,
+
+ /* ICM memory for QP context was mapped */
+ RES_QP_MAPPED,
+
+ /* QP is in hw ownership */
+ RES_QP_HW
+};
+
+struct res_qp {
+ struct res_common com;
+ struct res_mtt *mtt;
+ struct res_cq *rcq;
+ struct res_cq *scq;
+ struct res_srq *srq;
+ struct list_head mcg_list;
+ spinlock_t mcg_spl;
+ int local_qpn;
+ atomic_t ref_count;
+ u32 qpc_flags;
+ /* saved qp params before VST enforcement in order to restore on VGT */
+ u8 sched_queue;
+ __be32 param3;
+ u8 vlan_control;
+ u8 fvl_rx;
+ u8 pri_path_fl;
+ u8 vlan_index;
+ u8 feup;
+};
+
+enum res_mtt_states {
+ RES_MTT_BUSY = RES_ANY_BUSY,
+ RES_MTT_ALLOCATED,
+};
+
+static inline const char *mtt_states_str(enum res_mtt_states state)
+{
+ switch (state) {
+ case RES_MTT_BUSY: return "RES_MTT_BUSY";
+ case RES_MTT_ALLOCATED: return "RES_MTT_ALLOCATED";
+ default: return "Unknown";
+ }
+}
+
+struct res_mtt {
+ struct res_common com;
+ int order;
+ atomic_t ref_count;
+};
+
+enum res_mpt_states {
+ RES_MPT_BUSY = RES_ANY_BUSY,
+ RES_MPT_RESERVED,
+ RES_MPT_MAPPED,
+ RES_MPT_HW,
+};
+
+struct res_mpt {
+ struct res_common com;
+ struct res_mtt *mtt;
+ int key;
+};
+
+enum res_eq_states {
+ RES_EQ_BUSY = RES_ANY_BUSY,
+ RES_EQ_RESERVED,
+ RES_EQ_HW,
+};
+
+struct res_eq {
+ struct res_common com;
+ struct res_mtt *mtt;
+};
+
+enum res_cq_states {
+ RES_CQ_BUSY = RES_ANY_BUSY,
+ RES_CQ_ALLOCATED,
+ RES_CQ_HW,
+};
+
+struct res_cq {
+ struct res_common com;
+ struct res_mtt *mtt;
+ atomic_t ref_count;
+};
+
+enum res_srq_states {
+ RES_SRQ_BUSY = RES_ANY_BUSY,
+ RES_SRQ_ALLOCATED,
+ RES_SRQ_HW,
+};
+
+struct res_srq {
+ struct res_common com;
+ struct res_mtt *mtt;
+ struct res_cq *cq;
+ atomic_t ref_count;
+};
+
+enum res_counter_states {
+ RES_COUNTER_BUSY = RES_ANY_BUSY,
+ RES_COUNTER_ALLOCATED,
+};
+
+struct res_counter {
+ struct res_common com;
+ int port;
+};
+
+enum res_xrcdn_states {
+ RES_XRCD_BUSY = RES_ANY_BUSY,
+ RES_XRCD_ALLOCATED,
+};
+
+struct res_xrcdn {
+ struct res_common com;
+ int port;
+};
+
+enum res_fs_rule_states {
+ RES_FS_RULE_BUSY = RES_ANY_BUSY,
+ RES_FS_RULE_ALLOCATED,
+};
+
+struct res_fs_rule {
+ struct res_common com;
+ int qpn;
+ /* VF DMFS mbox with port flipped */
+ void *mirr_mbox;
+ /* > 0 --> apply mirror when getting into HA mode */
+ /* = 0 --> un-apply mirror when getting out of HA mode */
+ u32 mirr_mbox_size;
+ struct list_head mirr_list;
+ u64 mirr_rule_id;
+};
+
+static void *res_tracker_lookup(struct rb_root *root, u64 res_id)
+{
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct res_common *res = rb_entry(node, struct res_common,
+ node);
+
+ if (res_id < res->res_id)
+ node = node->rb_left;
+ else if (res_id > res->res_id)
+ node = node->rb_right;
+ else
+ return res;
+ }
+ return NULL;
+}
+
+static int res_tracker_insert(struct rb_root *root, struct res_common *res)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct res_common *this = rb_entry(*new, struct res_common,
+ node);
+
+ parent = *new;
+ if (res->res_id < this->res_id)
+ new = &((*new)->rb_left);
+ else if (res->res_id > this->res_id)
+ new = &((*new)->rb_right);
+ else
+ return -EEXIST;
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&res->node, parent, new);
+ rb_insert_color(&res->node, root);
+
+ return 0;
+}
+
+enum qp_transition {
+ QP_TRANS_INIT2RTR,
+ QP_TRANS_RTR2RTS,
+ QP_TRANS_RTS2RTS,
+ QP_TRANS_SQERR2RTS,
+ QP_TRANS_SQD2SQD,
+ QP_TRANS_SQD2RTS
+};
+
+/* For Debug uses */
+static const char *resource_str(enum mlx4_resource rt)
+{
+ switch (rt) {
+ case RES_QP: return "RES_QP";
+ case RES_CQ: return "RES_CQ";
+ case RES_SRQ: return "RES_SRQ";
+ case RES_MPT: return "RES_MPT";
+ case RES_MTT: return "RES_MTT";
+ case RES_MAC: return "RES_MAC";
+ case RES_VLAN: return "RES_VLAN";
+ case RES_EQ: return "RES_EQ";
+ case RES_COUNTER: return "RES_COUNTER";
+ case RES_FS_RULE: return "RES_FS_RULE";
+ case RES_XRCD: return "RES_XRCD";
+ default: return "Unknown resource type !!!";
+ };
+}
+
+static void rem_slave_vlans(struct mlx4_dev *dev, int slave);
+static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource res_type, int count,
+ int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct resource_allocator *res_alloc =
+ &priv->mfunc.master.res_tracker.res_alloc[res_type];
+ int err = -EDQUOT;
+ int allocated, free, reserved, guaranteed, from_free;
+ int from_rsvd;
+
+ if (slave > dev->persist->num_vfs)
+ return -EINVAL;
+
+ spin_lock(&res_alloc->alloc_lock);
+ allocated = (port > 0) ?
+ res_alloc->allocated[(port - 1) *
+ (dev->persist->num_vfs + 1) + slave] :
+ res_alloc->allocated[slave];
+ free = (port > 0) ? res_alloc->res_port_free[port - 1] :
+ res_alloc->res_free;
+ reserved = (port > 0) ? res_alloc->res_port_rsvd[port - 1] :
+ res_alloc->res_reserved;
+ guaranteed = res_alloc->guaranteed[slave];
+
+ if (allocated + count > res_alloc->quota[slave]) {
+ mlx4_warn(dev, "VF %d port %d res %s: quota exceeded, count %d alloc %d quota %d\n",
+ slave, port, resource_str(res_type), count,
+ allocated, res_alloc->quota[slave]);
+ goto out;
+ }
+
+ if (allocated + count <= guaranteed) {
+ err = 0;
+ from_rsvd = count;
+ } else {
+ /* portion may need to be obtained from free area */
+ if (guaranteed - allocated > 0)
+ from_free = count - (guaranteed - allocated);
+ else
+ from_free = count;
+
+ from_rsvd = count - from_free;
+
+ if (free - from_free >= reserved)
+ err = 0;
+ else
+ mlx4_warn(dev, "VF %d port %d res %s: free pool empty, free %d from_free %d rsvd %d\n",
+ slave, port, resource_str(res_type), free,
+ from_free, reserved);
+ }
+
+ if (!err) {
+ /* grant the request */
+ if (port > 0) {
+ res_alloc->allocated[(port - 1) *
+ (dev->persist->num_vfs + 1) + slave] += count;
+ res_alloc->res_port_free[port - 1] -= count;
+ res_alloc->res_port_rsvd[port - 1] -= from_rsvd;
+ } else {
+ res_alloc->allocated[slave] += count;
+ res_alloc->res_free -= count;
+ res_alloc->res_reserved -= from_rsvd;
+ }
+ }
+
+out:
+ spin_unlock(&res_alloc->alloc_lock);
+ return err;
+}
+
+static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource res_type, int count,
+ int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct resource_allocator *res_alloc =
+ &priv->mfunc.master.res_tracker.res_alloc[res_type];
+ int allocated, guaranteed, from_rsvd;
+
+ if (slave > dev->persist->num_vfs)
+ return;
+
+ spin_lock(&res_alloc->alloc_lock);
+
+ allocated = (port > 0) ?
+ res_alloc->allocated[(port - 1) *
+ (dev->persist->num_vfs + 1) + slave] :
+ res_alloc->allocated[slave];
+ guaranteed = res_alloc->guaranteed[slave];
+
+ if (allocated - count >= guaranteed) {
+ from_rsvd = 0;
+ } else {
+ /* portion may need to be returned to reserved area */
+ if (allocated - guaranteed > 0)
+ from_rsvd = count - (allocated - guaranteed);
+ else
+ from_rsvd = count;
+ }
+
+ if (port > 0) {
+ res_alloc->allocated[(port - 1) *
+ (dev->persist->num_vfs + 1) + slave] -= count;
+ res_alloc->res_port_free[port - 1] += count;
+ res_alloc->res_port_rsvd[port - 1] += from_rsvd;
+ } else {
+ res_alloc->allocated[slave] -= count;
+ res_alloc->res_free += count;
+ res_alloc->res_reserved += from_rsvd;
+ }
+
+ spin_unlock(&res_alloc->alloc_lock);
+ return;
+}
+
+static inline void initialize_res_quotas(struct mlx4_dev *dev,
+ struct resource_allocator *res_alloc,
+ enum mlx4_resource res_type,
+ int vf, int num_instances)
+{
+ res_alloc->guaranteed[vf] = num_instances /
+ (2 * (dev->persist->num_vfs + 1));
+ res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf];
+ if (vf == mlx4_master_func_num(dev)) {
+ res_alloc->res_free = num_instances;
+ if (res_type == RES_MTT) {
+ /* reserved mtts will be taken out of the PF allocation */
+ res_alloc->res_free += dev->caps.reserved_mtts;
+ res_alloc->guaranteed[vf] += dev->caps.reserved_mtts;
+ res_alloc->quota[vf] += dev->caps.reserved_mtts;
+ }
+ }
+}
+
+void mlx4_init_quotas(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int pf;
+
+ /* quotas for VFs are initialized in mlx4_slave_cap */
+ if (mlx4_is_slave(dev))
+ return;
+
+ if (!mlx4_is_mfunc(dev)) {
+ dev->quotas.qp = dev->caps.num_qps - dev->caps.reserved_qps -
+ mlx4_num_reserved_sqps(dev);
+ dev->quotas.cq = dev->caps.num_cqs - dev->caps.reserved_cqs;
+ dev->quotas.srq = dev->caps.num_srqs - dev->caps.reserved_srqs;
+ dev->quotas.mtt = dev->caps.num_mtts - dev->caps.reserved_mtts;
+ dev->quotas.mpt = dev->caps.num_mpts - dev->caps.reserved_mrws;
+ return;
+ }
+
+ pf = mlx4_master_func_num(dev);
+ dev->quotas.qp =
+ priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[pf];
+ dev->quotas.cq =
+ priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[pf];
+ dev->quotas.srq =
+ priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[pf];
+ dev->quotas.mtt =
+ priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[pf];
+ dev->quotas.mpt =
+ priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf];
+}
+
+static int
+mlx4_calc_res_counter_guaranteed(struct mlx4_dev *dev,
+ struct resource_allocator *res_alloc,
+ int vf)
+{
+ struct mlx4_active_ports actv_ports;
+ int ports, counters_guaranteed;
+
+ /* For master, only allocate according to the number of phys ports */
+ if (vf == mlx4_master_func_num(dev))
+ return MLX4_PF_COUNTERS_PER_PORT * dev->caps.num_ports;
+
+ /* calculate real number of ports for the VF */
+ actv_ports = mlx4_get_active_ports(dev, vf);
+ ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
+ counters_guaranteed = ports * MLX4_VF_COUNTERS_PER_PORT;
+
+ /* If we do not have enough counters for this VF, do not
+ * allocate any for it. '-1' to reduce the sink counter.
+ */
+ if ((res_alloc->res_reserved + counters_guaranteed) >
+ (dev->caps.max_counters - 1))
+ return 0;
+
+ return counters_guaranteed;
+}
+
+int mlx4_init_resource_tracker(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i, j;
+ int t;
+
+ priv->mfunc.master.res_tracker.slave_list =
+ kcalloc(dev->num_slaves, sizeof(struct slave_list),
+ GFP_KERNEL);
+ if (!priv->mfunc.master.res_tracker.slave_list)
+ return -ENOMEM;
+
+ for (i = 0 ; i < dev->num_slaves; i++) {
+ for (t = 0; t < MLX4_NUM_OF_RESOURCE_TYPE; ++t)
+ INIT_LIST_HEAD(&priv->mfunc.master.res_tracker.
+ slave_list[i].res_list[t]);
+ mutex_init(&priv->mfunc.master.res_tracker.slave_list[i].mutex);
+ }
+
+ mlx4_dbg(dev, "Started init_resource_tracker: %ld slaves\n",
+ dev->num_slaves);
+ for (i = 0 ; i < MLX4_NUM_OF_RESOURCE_TYPE; i++)
+ priv->mfunc.master.res_tracker.res_tree[i] = RB_ROOT;
+
+ for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
+ struct resource_allocator *res_alloc =
+ &priv->mfunc.master.res_tracker.res_alloc[i];
+ res_alloc->quota = kmalloc_array(dev->persist->num_vfs + 1,
+ sizeof(int),
+ GFP_KERNEL);
+ res_alloc->guaranteed = kmalloc_array(dev->persist->num_vfs + 1,
+ sizeof(int),
+ GFP_KERNEL);
+ if (i == RES_MAC || i == RES_VLAN)
+ res_alloc->allocated =
+ kcalloc(MLX4_MAX_PORTS *
+ (dev->persist->num_vfs + 1),
+ sizeof(int), GFP_KERNEL);
+ else
+ res_alloc->allocated =
+ kcalloc(dev->persist->num_vfs + 1,
+ sizeof(int), GFP_KERNEL);
+ /* Reduce the sink counter */
+ if (i == RES_COUNTER)
+ res_alloc->res_free = dev->caps.max_counters - 1;
+
+ if (!res_alloc->quota || !res_alloc->guaranteed ||
+ !res_alloc->allocated)
+ goto no_mem_err;
+
+ spin_lock_init(&res_alloc->alloc_lock);
+ for (t = 0; t < dev->persist->num_vfs + 1; t++) {
+ struct mlx4_active_ports actv_ports =
+ mlx4_get_active_ports(dev, t);
+ switch (i) {
+ case RES_QP:
+ initialize_res_quotas(dev, res_alloc, RES_QP,
+ t, dev->caps.num_qps -
+ dev->caps.reserved_qps -
+ mlx4_num_reserved_sqps(dev));
+ break;
+ case RES_CQ:
+ initialize_res_quotas(dev, res_alloc, RES_CQ,
+ t, dev->caps.num_cqs -
+ dev->caps.reserved_cqs);
+ break;
+ case RES_SRQ:
+ initialize_res_quotas(dev, res_alloc, RES_SRQ,
+ t, dev->caps.num_srqs -
+ dev->caps.reserved_srqs);
+ break;
+ case RES_MPT:
+ initialize_res_quotas(dev, res_alloc, RES_MPT,
+ t, dev->caps.num_mpts -
+ dev->caps.reserved_mrws);
+ break;
+ case RES_MTT:
+ initialize_res_quotas(dev, res_alloc, RES_MTT,
+ t, dev->caps.num_mtts -
+ dev->caps.reserved_mtts);
+ break;
+ case RES_MAC:
+ if (t == mlx4_master_func_num(dev)) {
+ int max_vfs_pport = 0;
+ /* Calculate the max vfs per port for */
+ /* both ports. */
+ for (j = 0; j < dev->caps.num_ports;
+ j++) {
+ struct mlx4_slaves_pport slaves_pport =
+ mlx4_phys_to_slaves_pport(dev, j + 1);
+ unsigned current_slaves =
+ bitmap_weight(slaves_pport.slaves,
+ dev->caps.num_ports) - 1;
+ if (max_vfs_pport < current_slaves)
+ max_vfs_pport =
+ current_slaves;
+ }
+ res_alloc->quota[t] =
+ MLX4_MAX_MAC_NUM -
+ 2 * max_vfs_pport;
+ res_alloc->guaranteed[t] = 2;
+ for (j = 0; j < MLX4_MAX_PORTS; j++)
+ res_alloc->res_port_free[j] =
+ MLX4_MAX_MAC_NUM;
+ } else {
+ res_alloc->quota[t] = MLX4_MAX_MAC_NUM;
+ res_alloc->guaranteed[t] = 2;
+ }
+ break;
+ case RES_VLAN:
+ if (t == mlx4_master_func_num(dev)) {
+ res_alloc->quota[t] = MLX4_MAX_VLAN_NUM;
+ res_alloc->guaranteed[t] = MLX4_MAX_VLAN_NUM / 2;
+ for (j = 0; j < MLX4_MAX_PORTS; j++)
+ res_alloc->res_port_free[j] =
+ res_alloc->quota[t];
+ } else {
+ res_alloc->quota[t] = MLX4_MAX_VLAN_NUM / 2;
+ res_alloc->guaranteed[t] = 0;
+ }
+ break;
+ case RES_COUNTER:
+ res_alloc->quota[t] = dev->caps.max_counters;
+ res_alloc->guaranteed[t] =
+ mlx4_calc_res_counter_guaranteed(dev, res_alloc, t);
+ break;
+ default:
+ break;
+ }
+ if (i == RES_MAC || i == RES_VLAN) {
+ for (j = 0; j < dev->caps.num_ports; j++)
+ if (test_bit(j, actv_ports.ports))
+ res_alloc->res_port_rsvd[j] +=
+ res_alloc->guaranteed[t];
+ } else {
+ res_alloc->res_reserved += res_alloc->guaranteed[t];
+ }
+ }
+ }
+ spin_lock_init(&priv->mfunc.master.res_tracker.lock);
+ return 0;
+
+no_mem_err:
+ for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated);
+ priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL;
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed);
+ priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL;
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota);
+ priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL;
+ }
+ return -ENOMEM;
+}
+
+void mlx4_free_resource_tracker(struct mlx4_dev *dev,
+ enum mlx4_res_tracker_free_type type)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int i;
+
+ if (priv->mfunc.master.res_tracker.slave_list) {
+ if (type != RES_TR_FREE_STRUCTS_ONLY) {
+ for (i = 0; i < dev->num_slaves; i++) {
+ if (type == RES_TR_FREE_ALL ||
+ dev->caps.function != i)
+ mlx4_delete_all_resources_for_slave(dev, i);
+ }
+ /* free master's vlans */
+ i = dev->caps.function;
+ mlx4_reset_roce_gids(dev, i);
+ mutex_lock(&priv->mfunc.master.res_tracker.slave_list[i].mutex);
+ rem_slave_vlans(dev, i);
+ mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[i].mutex);
+ }
+
+ if (type != RES_TR_FREE_SLAVES_ONLY) {
+ for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated);
+ priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL;
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed);
+ priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL;
+ kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota);
+ priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL;
+ }
+ kfree(priv->mfunc.master.res_tracker.slave_list);
+ priv->mfunc.master.res_tracker.slave_list = NULL;
+ }
+ }
+}
+
+static void update_pkey_index(struct mlx4_dev *dev, int slave,
+ struct mlx4_cmd_mailbox *inbox)
+{
+ u8 sched = *(u8 *)(inbox->buf + 64);
+ u8 orig_index = *(u8 *)(inbox->buf + 35);
+ u8 new_index;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int port;
+
+ port = (sched >> 6 & 1) + 1;
+
+ new_index = priv->virt2phys_pkey[slave][port - 1][orig_index];
+ *(u8 *)(inbox->buf + 35) = new_index;
+}
+
+static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox,
+ u8 slave)
+{
+ struct mlx4_qp_context *qp_ctx = inbox->buf + 8;
+ enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *) inbox->buf);
+ u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
+ int port;
+
+ if (MLX4_QP_ST_UD == ts) {
+ port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
+ if (mlx4_is_eth(dev, port))
+ qp_ctx->pri_path.mgid_index =
+ mlx4_get_base_gid_ix(dev, slave, port) | 0x80;
+ else
+ qp_ctx->pri_path.mgid_index = slave | 0x80;
+
+ } else if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_XRC == ts || MLX4_QP_ST_UC == ts) {
+ if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) {
+ port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
+ if (mlx4_is_eth(dev, port)) {
+ qp_ctx->pri_path.mgid_index +=
+ mlx4_get_base_gid_ix(dev, slave, port);
+ qp_ctx->pri_path.mgid_index &= 0x7f;
+ } else {
+ qp_ctx->pri_path.mgid_index = slave & 0x7F;
+ }
+ }
+ if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) {
+ port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1;
+ if (mlx4_is_eth(dev, port)) {
+ qp_ctx->alt_path.mgid_index +=
+ mlx4_get_base_gid_ix(dev, slave, port);
+ qp_ctx->alt_path.mgid_index &= 0x7f;
+ } else {
+ qp_ctx->alt_path.mgid_index = slave & 0x7F;
+ }
+ }
+ }
+}
+
+static int handle_counter(struct mlx4_dev *dev, struct mlx4_qp_context *qpc,
+ u8 slave, int port);
+
+static int update_vport_qp_param(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *inbox,
+ u8 slave, u32 qpn)
+{
+ struct mlx4_qp_context *qpc = inbox->buf + 8;
+ struct mlx4_vport_oper_state *vp_oper;
+ struct mlx4_priv *priv;
+ u32 qp_type;
+ int port, err = 0;
+
+ port = (qpc->pri_path.sched_queue & 0x40) ? 2 : 1;
+ priv = mlx4_priv(dev);
+ vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+ qp_type = (be32_to_cpu(qpc->flags) >> 16) & 0xff;
+
+ err = handle_counter(dev, qpc, slave, port);
+ if (err)
+ goto out;
+
+ if (MLX4_VGT != vp_oper->state.default_vlan) {
+ /* the reserved QPs (special, proxy, tunnel)
+ * do not operate over vlans
+ */
+ if (mlx4_is_qp_reserved(dev, qpn))
+ return 0;
+
+ /* force strip vlan by clear vsd, MLX QP refers to Raw Ethernet */
+ if (qp_type == MLX4_QP_ST_UD ||
+ (qp_type == MLX4_QP_ST_MLX && mlx4_is_eth(dev, port))) {
+ if (dev->caps.bmme_flags & MLX4_BMME_FLAG_VSD_INIT2RTR) {
+ *(__be32 *)inbox->buf =
+ cpu_to_be32(be32_to_cpu(*(__be32 *)inbox->buf) |
+ MLX4_QP_OPTPAR_VLAN_STRIPPING);
+ qpc->param3 &= ~cpu_to_be32(MLX4_STRIP_VLAN);
+ } else {
+ struct mlx4_update_qp_params params = {.flags = 0};
+
+ err = mlx4_update_qp(dev, qpn, MLX4_UPDATE_QP_VSD, &params);
+ if (err)
+ goto out;
+ }
+ }
+
+ /* preserve IF_COUNTER flag */
+ qpc->pri_path.vlan_control &=
+ MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER;
+ if (vp_oper->state.link_state == IFLA_VF_LINK_STATE_DISABLE &&
+ dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP) {
+ qpc->pri_path.vlan_control |=
+ MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+ MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED |
+ MLX4_VLAN_CTRL_ETH_TX_BLOCK_UNTAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
+ } else if (0 != vp_oper->state.default_vlan) {
+ if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) {
+ /* vst QinQ should block untagged on TX,
+ * but cvlan is in payload and phv is set so
+ * hw see it as untagged. Block tagged instead.
+ */
+ qpc->pri_path.vlan_control |=
+ MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED |
+ MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
+ } else { /* vst 802.1Q */
+ qpc->pri_path.vlan_control |=
+ MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
+ }
+ } else { /* priority tagged */
+ qpc->pri_path.vlan_control |=
+ MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
+ }
+
+ qpc->pri_path.fvl_rx |= MLX4_FVL_RX_FORCE_ETH_VLAN;
+ qpc->pri_path.vlan_index = vp_oper->vlan_idx;
+ qpc->pri_path.fl |= MLX4_FL_ETH_HIDE_CQE_VLAN;
+ if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD))
+ qpc->pri_path.fl |= MLX4_FL_SV;
+ else
+ qpc->pri_path.fl |= MLX4_FL_CV;
+ qpc->pri_path.feup |= MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN;
+ qpc->pri_path.sched_queue &= 0xC7;
+ qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3;
+ qpc->qos_vport = vp_oper->state.qos_vport;
+ }
+ if (vp_oper->state.spoofchk) {
+ qpc->pri_path.feup |= MLX4_FSM_FORCE_ETH_SRC_MAC;
+ qpc->pri_path.grh_mylmc = (0x80 & qpc->pri_path.grh_mylmc) + vp_oper->mac_idx;
+ }
+out:
+ return err;
+}
+
+static int mpt_mask(struct mlx4_dev *dev)
+{
+ return dev->caps.num_mpts - 1;
+}
+
+static const char *mlx4_resource_type_to_str(enum mlx4_resource t)
+{
+ switch (t) {
+ case RES_QP:
+ return "QP";
+ case RES_CQ:
+ return "CQ";
+ case RES_SRQ:
+ return "SRQ";
+ case RES_XRCD:
+ return "XRCD";
+ case RES_MPT:
+ return "MPT";
+ case RES_MTT:
+ return "MTT";
+ case RES_MAC:
+ return "MAC";
+ case RES_VLAN:
+ return "VLAN";
+ case RES_COUNTER:
+ return "COUNTER";
+ case RES_FS_RULE:
+ return "FS_RULE";
+ case RES_EQ:
+ return "EQ";
+ default:
+ return "INVALID RESOURCE";
+ }
+}
+
+static void *find_res(struct mlx4_dev *dev, u64 res_id,
+ enum mlx4_resource type)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+
+ return res_tracker_lookup(&priv->mfunc.master.res_tracker.res_tree[type],
+ res_id);
+}
+
+static int _get_res(struct mlx4_dev *dev, int slave, u64 res_id,
+ enum mlx4_resource type,
+ void *res, const char *func_name)
+{
+ struct res_common *r;
+ int err = 0;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = find_res(dev, res_id, type);
+ if (!r) {
+ err = -ENONET;
+ goto exit;
+ }
+
+ if (r->state == RES_ANY_BUSY) {
+ mlx4_warn(dev,
+ "%s(%d) trying to get resource %llx of type %s, but it's already taken by %s\n",
+ func_name, slave, res_id, mlx4_resource_type_to_str(type),
+ r->func_name);
+ err = -EBUSY;
+ goto exit;
+ }
+
+ if (r->owner != slave) {
+ err = -EPERM;
+ goto exit;
+ }
+
+ r->from_state = r->state;
+ r->state = RES_ANY_BUSY;
+ r->func_name = func_name;
+
+ if (res)
+ *((struct res_common **)res) = r;
+
+exit:
+ spin_unlock_irq(mlx4_tlock(dev));
+ return err;
+}
+
+#define get_res(dev, slave, res_id, type, res) \
+ _get_res((dev), (slave), (res_id), (type), (res), __func__)
+
+int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev,
+ enum mlx4_resource type,
+ u64 res_id, int *slave)
+{
+
+ struct res_common *r;
+ int err = -ENOENT;
+ int id = res_id;
+
+ if (type == RES_QP)
+ id &= 0x7fffff;
+ spin_lock(mlx4_tlock(dev));
+
+ r = find_res(dev, id, type);
+ if (r) {
+ *slave = r->owner;
+ err = 0;
+ }
+ spin_unlock(mlx4_tlock(dev));
+
+ return err;
+}
+
+static void put_res(struct mlx4_dev *dev, int slave, u64 res_id,
+ enum mlx4_resource type)
+{
+ struct res_common *r;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = find_res(dev, res_id, type);
+ if (r) {
+ r->state = r->from_state;
+ r->func_name = "";
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static int counter_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param, int port);
+
+static int handle_existing_counter(struct mlx4_dev *dev, u8 slave, int port,
+ int counter_index)
+{
+ struct res_common *r;
+ struct res_counter *counter;
+ int ret = 0;
+
+ if (counter_index == MLX4_SINK_COUNTER_INDEX(dev))
+ return ret;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = find_res(dev, counter_index, RES_COUNTER);
+ if (!r || r->owner != slave) {
+ ret = -EINVAL;
+ } else {
+ counter = container_of(r, struct res_counter, com);
+ if (!counter->port)
+ counter->port = port;
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+ return ret;
+}
+
+static int handle_unexisting_counter(struct mlx4_dev *dev,
+ struct mlx4_qp_context *qpc, u8 slave,
+ int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_common *tmp;
+ struct res_counter *counter;
+ u64 counter_idx = MLX4_SINK_COUNTER_INDEX(dev);
+ int err = 0;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry(tmp,
+ &tracker->slave_list[slave].res_list[RES_COUNTER],
+ list) {
+ counter = container_of(tmp, struct res_counter, com);
+ if (port == counter->port) {
+ qpc->pri_path.counter_index = counter->com.res_id;
+ spin_unlock_irq(mlx4_tlock(dev));
+ return 0;
+ }
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ /* No existing counter, need to allocate a new counter */
+ err = counter_alloc_res(dev, slave, RES_OP_RESERVE, 0, 0, &counter_idx,
+ port);
+ if (err == -ENOENT) {
+ err = 0;
+ } else if (err && err != -ENOSPC) {
+ mlx4_err(dev, "%s: failed to create new counter for slave %d err %d\n",
+ __func__, slave, err);
+ } else {
+ qpc->pri_path.counter_index = counter_idx;
+ mlx4_dbg(dev, "%s: alloc new counter for slave %d index %d\n",
+ __func__, slave, qpc->pri_path.counter_index);
+ err = 0;
+ }
+
+ return err;
+}
+
+static int handle_counter(struct mlx4_dev *dev, struct mlx4_qp_context *qpc,
+ u8 slave, int port)
+{
+ if (qpc->pri_path.counter_index != MLX4_SINK_COUNTER_INDEX(dev))
+ return handle_existing_counter(dev, slave, port,
+ qpc->pri_path.counter_index);
+
+ return handle_unexisting_counter(dev, qpc, slave, port);
+}
+
+static struct res_common *alloc_qp_tr(int id)
+{
+ struct res_qp *ret;
+
+ ret = kzalloc(sizeof(*ret), GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_QP_RESERVED;
+ ret->local_qpn = id;
+ INIT_LIST_HEAD(&ret->mcg_list);
+ spin_lock_init(&ret->mcg_spl);
+ atomic_set(&ret->ref_count, 0);
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_mtt_tr(int id, int order)
+{
+ struct res_mtt *ret;
+
+ ret = kzalloc(sizeof(*ret), GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->order = order;
+ ret->com.state = RES_MTT_ALLOCATED;
+ atomic_set(&ret->ref_count, 0);
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_mpt_tr(int id, int key)
+{
+ struct res_mpt *ret;
+
+ ret = kzalloc(sizeof(*ret), GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_MPT_RESERVED;
+ ret->key = key;
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_eq_tr(int id)
+{
+ struct res_eq *ret;
+
+ ret = kzalloc(sizeof(*ret), GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_EQ_RESERVED;
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_cq_tr(int id)
+{
+ struct res_cq *ret;
+
+ ret = kzalloc(sizeof(*ret), GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_CQ_ALLOCATED;
+ atomic_set(&ret->ref_count, 0);
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_srq_tr(int id)
+{
+ struct res_srq *ret;
+
+ ret = kzalloc(sizeof(*ret), GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_SRQ_ALLOCATED;
+ atomic_set(&ret->ref_count, 0);
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_counter_tr(int id, int port)
+{
+ struct res_counter *ret;
+
+ ret = kzalloc(sizeof(*ret), GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_COUNTER_ALLOCATED;
+ ret->port = port;
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_xrcdn_tr(int id)
+{
+ struct res_xrcdn *ret;
+
+ ret = kzalloc(sizeof(*ret), GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_XRCD_ALLOCATED;
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_fs_rule_tr(u64 id, int qpn)
+{
+ struct res_fs_rule *ret;
+
+ ret = kzalloc(sizeof(*ret), GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_FS_RULE_ALLOCATED;
+ ret->qpn = qpn;
+ return &ret->com;
+}
+
+static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave,
+ int extra)
+{
+ struct res_common *ret;
+
+ switch (type) {
+ case RES_QP:
+ ret = alloc_qp_tr(id);
+ break;
+ case RES_MPT:
+ ret = alloc_mpt_tr(id, extra);
+ break;
+ case RES_MTT:
+ ret = alloc_mtt_tr(id, extra);
+ break;
+ case RES_EQ:
+ ret = alloc_eq_tr(id);
+ break;
+ case RES_CQ:
+ ret = alloc_cq_tr(id);
+ break;
+ case RES_SRQ:
+ ret = alloc_srq_tr(id);
+ break;
+ case RES_MAC:
+ pr_err("implementation missing\n");
+ return NULL;
+ case RES_COUNTER:
+ ret = alloc_counter_tr(id, extra);
+ break;
+ case RES_XRCD:
+ ret = alloc_xrcdn_tr(id);
+ break;
+ case RES_FS_RULE:
+ ret = alloc_fs_rule_tr(id, extra);
+ break;
+ default:
+ return NULL;
+ }
+ if (ret)
+ ret->owner = slave;
+
+ return ret;
+}
+
+int mlx4_calc_vf_counters(struct mlx4_dev *dev, int slave, int port,
+ struct mlx4_counter *data)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_common *tmp;
+ struct res_counter *counter;
+ int *counters_arr;
+ int i = 0, err = 0;
+
+ memset(data, 0, sizeof(*data));
+
+ counters_arr = kmalloc_array(dev->caps.max_counters,
+ sizeof(*counters_arr), GFP_KERNEL);
+ if (!counters_arr)
+ return -ENOMEM;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry(tmp,
+ &tracker->slave_list[slave].res_list[RES_COUNTER],
+ list) {
+ counter = container_of(tmp, struct res_counter, com);
+ if (counter->port == port) {
+ counters_arr[i] = (int)tmp->res_id;
+ i++;
+ }
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+ counters_arr[i] = -1;
+
+ i = 0;
+
+ while (counters_arr[i] != -1) {
+ err = mlx4_get_counter_stats(dev, counters_arr[i], data,
+ 0);
+ if (err) {
+ memset(data, 0, sizeof(*data));
+ goto table_changed;
+ }
+ i++;
+ }
+
+table_changed:
+ kfree(counters_arr);
+ return 0;
+}
+
+static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count,
+ enum mlx4_resource type, int extra)
+{
+ int i;
+ int err;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct res_common **res_arr;
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct rb_root *root = &tracker->res_tree[type];
+
+ res_arr = kcalloc(count, sizeof(*res_arr), GFP_KERNEL);
+ if (!res_arr)
+ return -ENOMEM;
+
+ for (i = 0; i < count; ++i) {
+ res_arr[i] = alloc_tr(base + i, type, slave, extra);
+ if (!res_arr[i]) {
+ for (--i; i >= 0; --i)
+ kfree(res_arr[i]);
+
+ kfree(res_arr);
+ return -ENOMEM;
+ }
+ }
+
+ spin_lock_irq(mlx4_tlock(dev));
+ for (i = 0; i < count; ++i) {
+ if (find_res(dev, base + i, type)) {
+ err = -EEXIST;
+ goto undo;
+ }
+ err = res_tracker_insert(root, res_arr[i]);
+ if (err)
+ goto undo;
+ list_add_tail(&res_arr[i]->list,
+ &tracker->slave_list[slave].res_list[type]);
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+ kfree(res_arr);
+
+ return 0;
+
+undo:
+ for (--i; i >= 0; --i) {
+ rb_erase(&res_arr[i]->node, root);
+ list_del_init(&res_arr[i]->list);
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ for (i = 0; i < count; ++i)
+ kfree(res_arr[i]);
+
+ kfree(res_arr);
+
+ return err;
+}
+
+static int remove_qp_ok(struct res_qp *res)
+{
+ if (res->com.state == RES_QP_BUSY || atomic_read(&res->ref_count) ||
+ !list_empty(&res->mcg_list)) {
+ pr_err("resource tracker: fail to remove qp, state %d, ref_count %d\n",
+ res->com.state, atomic_read(&res->ref_count));
+ return -EBUSY;
+ } else if (res->com.state != RES_QP_RESERVED) {
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+static int remove_mtt_ok(struct res_mtt *res, int order)
+{
+ if (res->com.state == RES_MTT_BUSY ||
+ atomic_read(&res->ref_count)) {
+ pr_devel("%s-%d: state %s, ref_count %d\n",
+ __func__, __LINE__,
+ mtt_states_str(res->com.state),
+ atomic_read(&res->ref_count));
+ return -EBUSY;
+ } else if (res->com.state != RES_MTT_ALLOCATED)
+ return -EPERM;
+ else if (res->order != order)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int remove_mpt_ok(struct res_mpt *res)
+{
+ if (res->com.state == RES_MPT_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_MPT_RESERVED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_eq_ok(struct res_eq *res)
+{
+ if (res->com.state == RES_MPT_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_MPT_RESERVED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_counter_ok(struct res_counter *res)
+{
+ if (res->com.state == RES_COUNTER_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_COUNTER_ALLOCATED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_xrcdn_ok(struct res_xrcdn *res)
+{
+ if (res->com.state == RES_XRCD_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_XRCD_ALLOCATED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_fs_rule_ok(struct res_fs_rule *res)
+{
+ if (res->com.state == RES_FS_RULE_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_FS_RULE_ALLOCATED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_cq_ok(struct res_cq *res)
+{
+ if (res->com.state == RES_CQ_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_CQ_ALLOCATED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_srq_ok(struct res_srq *res)
+{
+ if (res->com.state == RES_SRQ_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_SRQ_ALLOCATED)
+ return -EPERM;
+
+ return 0;
+}
+
+static int remove_ok(struct res_common *res, enum mlx4_resource type, int extra)
+{
+ switch (type) {
+ case RES_QP:
+ return remove_qp_ok((struct res_qp *)res);
+ case RES_CQ:
+ return remove_cq_ok((struct res_cq *)res);
+ case RES_SRQ:
+ return remove_srq_ok((struct res_srq *)res);
+ case RES_MPT:
+ return remove_mpt_ok((struct res_mpt *)res);
+ case RES_MTT:
+ return remove_mtt_ok((struct res_mtt *)res, extra);
+ case RES_MAC:
+ return -EOPNOTSUPP;
+ case RES_EQ:
+ return remove_eq_ok((struct res_eq *)res);
+ case RES_COUNTER:
+ return remove_counter_ok((struct res_counter *)res);
+ case RES_XRCD:
+ return remove_xrcdn_ok((struct res_xrcdn *)res);
+ case RES_FS_RULE:
+ return remove_fs_rule_ok((struct res_fs_rule *)res);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int rem_res_range(struct mlx4_dev *dev, int slave, u64 base, int count,
+ enum mlx4_resource type, int extra)
+{
+ u64 i;
+ int err;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_common *r;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ for (i = base; i < base + count; ++i) {
+ r = res_tracker_lookup(&tracker->res_tree[type], i);
+ if (!r) {
+ err = -ENOENT;
+ goto out;
+ }
+ if (r->owner != slave) {
+ err = -EPERM;
+ goto out;
+ }
+ err = remove_ok(r, type, extra);
+ if (err)
+ goto out;
+ }
+
+ for (i = base; i < base + count; ++i) {
+ r = res_tracker_lookup(&tracker->res_tree[type], i);
+ rb_erase(&r->node, &tracker->res_tree[type]);
+ list_del(&r->list);
+ kfree(r);
+ }
+ err = 0;
+
+out:
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn,
+ enum res_qp_states state, struct res_qp **qp,
+ int alloc)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_qp *r;
+ int err = 0;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[RES_QP], qpn);
+ if (!r)
+ err = -ENOENT;
+ else if (r->com.owner != slave)
+ err = -EPERM;
+ else {
+ switch (state) {
+ case RES_QP_BUSY:
+ mlx4_dbg(dev, "%s: failed RES_QP, 0x%llx\n",
+ __func__, r->com.res_id);
+ err = -EBUSY;
+ break;
+
+ case RES_QP_RESERVED:
+ if (r->com.state == RES_QP_MAPPED && !alloc)
+ break;
+
+ mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", r->com.res_id);
+ err = -EINVAL;
+ break;
+
+ case RES_QP_MAPPED:
+ if ((r->com.state == RES_QP_RESERVED && alloc) ||
+ r->com.state == RES_QP_HW)
+ break;
+ else {
+ mlx4_dbg(dev, "failed RES_QP, 0x%llx\n",
+ r->com.res_id);
+ err = -EINVAL;
+ }
+
+ break;
+
+ case RES_QP_HW:
+ if (r->com.state != RES_QP_MAPPED)
+ err = -EINVAL;
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ if (!err) {
+ r->com.from_state = r->com.state;
+ r->com.to_state = state;
+ r->com.state = RES_QP_BUSY;
+ if (qp)
+ *qp = r;
+ }
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static int mr_res_start_move_to(struct mlx4_dev *dev, int slave, int index,
+ enum res_mpt_states state, struct res_mpt **mpt)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_mpt *r;
+ int err = 0;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[RES_MPT], index);
+ if (!r)
+ err = -ENOENT;
+ else if (r->com.owner != slave)
+ err = -EPERM;
+ else {
+ switch (state) {
+ case RES_MPT_BUSY:
+ err = -EINVAL;
+ break;
+
+ case RES_MPT_RESERVED:
+ if (r->com.state != RES_MPT_MAPPED)
+ err = -EINVAL;
+ break;
+
+ case RES_MPT_MAPPED:
+ if (r->com.state != RES_MPT_RESERVED &&
+ r->com.state != RES_MPT_HW)
+ err = -EINVAL;
+ break;
+
+ case RES_MPT_HW:
+ if (r->com.state != RES_MPT_MAPPED)
+ err = -EINVAL;
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ if (!err) {
+ r->com.from_state = r->com.state;
+ r->com.to_state = state;
+ r->com.state = RES_MPT_BUSY;
+ if (mpt)
+ *mpt = r;
+ }
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static int eq_res_start_move_to(struct mlx4_dev *dev, int slave, int index,
+ enum res_eq_states state, struct res_eq **eq)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_eq *r;
+ int err = 0;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[RES_EQ], index);
+ if (!r)
+ err = -ENOENT;
+ else if (r->com.owner != slave)
+ err = -EPERM;
+ else {
+ switch (state) {
+ case RES_EQ_BUSY:
+ err = -EINVAL;
+ break;
+
+ case RES_EQ_RESERVED:
+ if (r->com.state != RES_EQ_HW)
+ err = -EINVAL;
+ break;
+
+ case RES_EQ_HW:
+ if (r->com.state != RES_EQ_RESERVED)
+ err = -EINVAL;
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+ if (!err) {
+ r->com.from_state = r->com.state;
+ r->com.to_state = state;
+ r->com.state = RES_EQ_BUSY;
+ }
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ if (!err && eq)
+ *eq = r;
+
+ return err;
+}
+
+static int cq_res_start_move_to(struct mlx4_dev *dev, int slave, int cqn,
+ enum res_cq_states state, struct res_cq **cq)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_cq *r;
+ int err;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[RES_CQ], cqn);
+ if (!r) {
+ err = -ENOENT;
+ } else if (r->com.owner != slave) {
+ err = -EPERM;
+ } else if (state == RES_CQ_ALLOCATED) {
+ if (r->com.state != RES_CQ_HW)
+ err = -EINVAL;
+ else if (atomic_read(&r->ref_count))
+ err = -EBUSY;
+ else
+ err = 0;
+ } else if (state != RES_CQ_HW || r->com.state != RES_CQ_ALLOCATED) {
+ err = -EINVAL;
+ } else {
+ err = 0;
+ }
+
+ if (!err) {
+ r->com.from_state = r->com.state;
+ r->com.to_state = state;
+ r->com.state = RES_CQ_BUSY;
+ if (cq)
+ *cq = r;
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static int srq_res_start_move_to(struct mlx4_dev *dev, int slave, int index,
+ enum res_srq_states state, struct res_srq **srq)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_srq *r;
+ int err = 0;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[RES_SRQ], index);
+ if (!r) {
+ err = -ENOENT;
+ } else if (r->com.owner != slave) {
+ err = -EPERM;
+ } else if (state == RES_SRQ_ALLOCATED) {
+ if (r->com.state != RES_SRQ_HW)
+ err = -EINVAL;
+ else if (atomic_read(&r->ref_count))
+ err = -EBUSY;
+ } else if (state != RES_SRQ_HW || r->com.state != RES_SRQ_ALLOCATED) {
+ err = -EINVAL;
+ }
+
+ if (!err) {
+ r->com.from_state = r->com.state;
+ r->com.to_state = state;
+ r->com.state = RES_SRQ_BUSY;
+ if (srq)
+ *srq = r;
+ }
+
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static void res_abort_move(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource type, int id)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_common *r;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[type], id);
+ if (r && (r->owner == slave))
+ r->state = r->from_state;
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void res_end_move(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource type, int id)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_common *r;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ r = res_tracker_lookup(&tracker->res_tree[type], id);
+ if (r && (r->owner == slave))
+ r->state = r->to_state;
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static int valid_reserved(struct mlx4_dev *dev, int slave, int qpn)
+{
+ return mlx4_is_qp_reserved(dev, qpn) &&
+ (mlx4_is_master(dev) || mlx4_is_guest_proxy(dev, slave, qpn));
+}
+
+static int fw_reserved(struct mlx4_dev *dev, int qpn)
+{
+ return qpn < dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
+}
+
+static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int err;
+ int count;
+ int align;
+ int base;
+ int qpn;
+ u8 flags;
+
+ switch (op) {
+ case RES_OP_RESERVE:
+ count = get_param_l(&in_param) & 0xffffff;
+ /* Turn off all unsupported QP allocation flags that the
+ * slave tries to set.
+ */
+ flags = (get_param_l(&in_param) >> 24) & dev->caps.alloc_res_qp_mask;
+ align = get_param_h(&in_param);
+ err = mlx4_grant_resource(dev, slave, RES_QP, count, 0);
+ if (err)
+ return err;
+
+ err = __mlx4_qp_reserve_range(dev, count, align, &base, flags);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_QP, count, 0);
+ return err;
+ }
+
+ err = add_res_range(dev, slave, base, count, RES_QP, 0);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_QP, count, 0);
+ __mlx4_qp_release_range(dev, base, count);
+ return err;
+ }
+ set_param_l(out_param, base);
+ break;
+ case RES_OP_MAP_ICM:
+ qpn = get_param_l(&in_param) & 0x7fffff;
+ if (valid_reserved(dev, slave, qpn)) {
+ err = add_res_range(dev, slave, qpn, 1, RES_QP, 0);
+ if (err)
+ return err;
+ }
+
+ err = qp_res_start_move_to(dev, slave, qpn, RES_QP_MAPPED,
+ NULL, 1);
+ if (err)
+ return err;
+
+ if (!fw_reserved(dev, qpn)) {
+ err = __mlx4_qp_alloc_icm(dev, qpn);
+ if (err) {
+ res_abort_move(dev, slave, RES_QP, qpn);
+ return err;
+ }
+ }
+
+ res_end_move(dev, slave, RES_QP, qpn);
+ break;
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+ return err;
+}
+
+static int mtt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int err = -EINVAL;
+ int base;
+ int order;
+
+ if (op != RES_OP_RESERVE_AND_MAP)
+ return err;
+
+ order = get_param_l(&in_param);
+
+ err = mlx4_grant_resource(dev, slave, RES_MTT, 1 << order, 0);
+ if (err)
+ return err;
+
+ base = __mlx4_alloc_mtt_range(dev, order);
+ if (base == -1) {
+ mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0);
+ return -ENOMEM;
+ }
+
+ err = add_res_range(dev, slave, base, 1, RES_MTT, order);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0);
+ __mlx4_free_mtt_range(dev, base, order);
+ } else {
+ set_param_l(out_param, base);
+ }
+
+ return err;
+}
+
+static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int err = -EINVAL;
+ int index;
+ int id;
+ struct res_mpt *mpt;
+
+ switch (op) {
+ case RES_OP_RESERVE:
+ err = mlx4_grant_resource(dev, slave, RES_MPT, 1, 0);
+ if (err)
+ break;
+
+ index = __mlx4_mpt_reserve(dev);
+ if (index == -1) {
+ mlx4_release_resource(dev, slave, RES_MPT, 1, 0);
+ break;
+ }
+ id = index & mpt_mask(dev);
+
+ err = add_res_range(dev, slave, id, 1, RES_MPT, index);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_MPT, 1, 0);
+ __mlx4_mpt_release(dev, index);
+ break;
+ }
+ set_param_l(out_param, index);
+ break;
+ case RES_OP_MAP_ICM:
+ index = get_param_l(&in_param);
+ id = index & mpt_mask(dev);
+ err = mr_res_start_move_to(dev, slave, id,
+ RES_MPT_MAPPED, &mpt);
+ if (err)
+ return err;
+
+ err = __mlx4_mpt_alloc_icm(dev, mpt->key);
+ if (err) {
+ res_abort_move(dev, slave, RES_MPT, id);
+ return err;
+ }
+
+ res_end_move(dev, slave, RES_MPT, id);
+ break;
+ }
+ return err;
+}
+
+static int cq_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int cqn;
+ int err;
+
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ err = mlx4_grant_resource(dev, slave, RES_CQ, 1, 0);
+ if (err)
+ break;
+
+ err = __mlx4_cq_alloc_icm(dev, &cqn);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_CQ, 1, 0);
+ break;
+ }
+
+ err = add_res_range(dev, slave, cqn, 1, RES_CQ, 0);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_CQ, 1, 0);
+ __mlx4_cq_free_icm(dev, cqn);
+ break;
+ }
+
+ set_param_l(out_param, cqn);
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static int srq_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int srqn;
+ int err;
+
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ err = mlx4_grant_resource(dev, slave, RES_SRQ, 1, 0);
+ if (err)
+ break;
+
+ err = __mlx4_srq_alloc_icm(dev, &srqn);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_SRQ, 1, 0);
+ break;
+ }
+
+ err = add_res_range(dev, slave, srqn, 1, RES_SRQ, 0);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_SRQ, 1, 0);
+ __mlx4_srq_free_icm(dev, srqn);
+ break;
+ }
+
+ set_param_l(out_param, srqn);
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static int mac_find_smac_ix_in_slave(struct mlx4_dev *dev, int slave, int port,
+ u8 smac_index, u64 *mac)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *mac_list =
+ &tracker->slave_list[slave].res_list[RES_MAC];
+ struct mac_res *res, *tmp;
+
+ list_for_each_entry_safe(res, tmp, mac_list, list) {
+ if (res->smac_index == smac_index && res->port == (u8) port) {
+ *mac = res->mac;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+static int mac_add_to_slave(struct mlx4_dev *dev, int slave, u64 mac, int port, u8 smac_index)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *mac_list =
+ &tracker->slave_list[slave].res_list[RES_MAC];
+ struct mac_res *res, *tmp;
+
+ list_for_each_entry_safe(res, tmp, mac_list, list) {
+ if (res->mac == mac && res->port == (u8) port) {
+ /* mac found. update ref count */
+ ++res->ref_count;
+ return 0;
+ }
+ }
+
+ if (mlx4_grant_resource(dev, slave, RES_MAC, 1, port))
+ return -EINVAL;
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res) {
+ mlx4_release_resource(dev, slave, RES_MAC, 1, port);
+ return -ENOMEM;
+ }
+ res->mac = mac;
+ res->port = (u8) port;
+ res->smac_index = smac_index;
+ res->ref_count = 1;
+ list_add_tail(&res->list,
+ &tracker->slave_list[slave].res_list[RES_MAC]);
+ return 0;
+}
+
+static void mac_del_from_slave(struct mlx4_dev *dev, int slave, u64 mac,
+ int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *mac_list =
+ &tracker->slave_list[slave].res_list[RES_MAC];
+ struct mac_res *res, *tmp;
+
+ list_for_each_entry_safe(res, tmp, mac_list, list) {
+ if (res->mac == mac && res->port == (u8) port) {
+ if (!--res->ref_count) {
+ list_del(&res->list);
+ mlx4_release_resource(dev, slave, RES_MAC, 1, port);
+ kfree(res);
+ }
+ break;
+ }
+ }
+}
+
+static void rem_slave_macs(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *mac_list =
+ &tracker->slave_list[slave].res_list[RES_MAC];
+ struct mac_res *res, *tmp;
+ int i;
+
+ list_for_each_entry_safe(res, tmp, mac_list, list) {
+ list_del(&res->list);
+ /* dereference the mac the num times the slave referenced it */
+ for (i = 0; i < res->ref_count; i++)
+ __mlx4_unregister_mac(dev, res->port, res->mac);
+ mlx4_release_resource(dev, slave, RES_MAC, 1, res->port);
+ kfree(res);
+ }
+}
+
+static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param, int in_port)
+{
+ int err = -EINVAL;
+ int port;
+ u64 mac;
+ u8 smac_index;
+
+ if (op != RES_OP_RESERVE_AND_MAP)
+ return err;
+
+ port = !in_port ? get_param_l(out_param) : in_port;
+ port = mlx4_slave_convert_port(
+ dev, slave, port);
+
+ if (port < 0)
+ return -EINVAL;
+ mac = in_param;
+
+ err = __mlx4_register_mac(dev, port, mac);
+ if (err >= 0) {
+ smac_index = err;
+ set_param_l(out_param, err);
+ err = 0;
+ }
+
+ if (!err) {
+ err = mac_add_to_slave(dev, slave, mac, port, smac_index);
+ if (err)
+ __mlx4_unregister_mac(dev, port, mac);
+ }
+ return err;
+}
+
+static int vlan_add_to_slave(struct mlx4_dev *dev, int slave, u16 vlan,
+ int port, int vlan_index)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *vlan_list =
+ &tracker->slave_list[slave].res_list[RES_VLAN];
+ struct vlan_res *res, *tmp;
+
+ list_for_each_entry_safe(res, tmp, vlan_list, list) {
+ if (res->vlan == vlan && res->port == (u8) port) {
+ /* vlan found. update ref count */
+ ++res->ref_count;
+ return 0;
+ }
+ }
+
+ if (mlx4_grant_resource(dev, slave, RES_VLAN, 1, port))
+ return -EINVAL;
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res) {
+ mlx4_release_resource(dev, slave, RES_VLAN, 1, port);
+ return -ENOMEM;
+ }
+ res->vlan = vlan;
+ res->port = (u8) port;
+ res->vlan_index = vlan_index;
+ res->ref_count = 1;
+ list_add_tail(&res->list,
+ &tracker->slave_list[slave].res_list[RES_VLAN]);
+ return 0;
+}
+
+
+static void vlan_del_from_slave(struct mlx4_dev *dev, int slave, u16 vlan,
+ int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *vlan_list =
+ &tracker->slave_list[slave].res_list[RES_VLAN];
+ struct vlan_res *res, *tmp;
+
+ list_for_each_entry_safe(res, tmp, vlan_list, list) {
+ if (res->vlan == vlan && res->port == (u8) port) {
+ if (!--res->ref_count) {
+ list_del(&res->list);
+ mlx4_release_resource(dev, slave, RES_VLAN,
+ 1, port);
+ kfree(res);
+ }
+ break;
+ }
+ }
+}
+
+static void rem_slave_vlans(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *vlan_list =
+ &tracker->slave_list[slave].res_list[RES_VLAN];
+ struct vlan_res *res, *tmp;
+ int i;
+
+ list_for_each_entry_safe(res, tmp, vlan_list, list) {
+ list_del(&res->list);
+ /* dereference the vlan the num times the slave referenced it */
+ for (i = 0; i < res->ref_count; i++)
+ __mlx4_unregister_vlan(dev, res->port, res->vlan);
+ mlx4_release_resource(dev, slave, RES_VLAN, 1, res->port);
+ kfree(res);
+ }
+}
+
+static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param, int in_port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state;
+ int err;
+ u16 vlan;
+ int vlan_index;
+ int port;
+
+ port = !in_port ? get_param_l(out_param) : in_port;
+
+ if (!port || op != RES_OP_RESERVE_AND_MAP)
+ return -EINVAL;
+
+ port = mlx4_slave_convert_port(
+ dev, slave, port);
+
+ if (port < 0)
+ return -EINVAL;
+ /* upstream kernels had NOP for reg/unreg vlan. Continue this. */
+ if (!in_port && port > 0 && port <= dev->caps.num_ports) {
+ slave_state[slave].old_vlan_api = true;
+ return 0;
+ }
+
+ vlan = (u16) in_param;
+
+ err = __mlx4_register_vlan(dev, port, vlan, &vlan_index);
+ if (!err) {
+ set_param_l(out_param, (u32) vlan_index);
+ err = vlan_add_to_slave(dev, slave, vlan, port, vlan_index);
+ if (err)
+ __mlx4_unregister_vlan(dev, port, vlan);
+ }
+ return err;
+}
+
+static int counter_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param, int port)
+{
+ u32 index;
+ int err;
+
+ if (op != RES_OP_RESERVE)
+ return -EINVAL;
+
+ err = mlx4_grant_resource(dev, slave, RES_COUNTER, 1, 0);
+ if (err)
+ return err;
+
+ err = __mlx4_counter_alloc(dev, &index);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0);
+ return err;
+ }
+
+ err = add_res_range(dev, slave, index, 1, RES_COUNTER, port);
+ if (err) {
+ __mlx4_counter_free(dev, index);
+ mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0);
+ } else {
+ set_param_l(out_param, index);
+ }
+
+ return err;
+}
+
+static int xrcdn_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ u32 xrcdn;
+ int err;
+
+ if (op != RES_OP_RESERVE)
+ return -EINVAL;
+
+ err = __mlx4_xrcd_alloc(dev, &xrcdn);
+ if (err)
+ return err;
+
+ err = add_res_range(dev, slave, xrcdn, 1, RES_XRCD, 0);
+ if (err)
+ __mlx4_xrcd_free(dev, xrcdn);
+ else
+ set_param_l(out_param, xrcdn);
+
+ return err;
+}
+
+int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int alop = vhcr->op_modifier;
+
+ switch (vhcr->in_modifier & 0xFF) {
+ case RES_QP:
+ err = qp_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_MTT:
+ err = mtt_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_MPT:
+ err = mpt_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_CQ:
+ err = cq_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_SRQ:
+ err = srq_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_MAC:
+ err = mac_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param,
+ (vhcr->in_modifier >> 8) & 0xFF);
+ break;
+
+ case RES_VLAN:
+ err = vlan_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param,
+ (vhcr->in_modifier >> 8) & 0xFF);
+ break;
+
+ case RES_COUNTER:
+ err = counter_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param, 0);
+ break;
+
+ case RES_XRCD:
+ err = xrcdn_alloc_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static int qp_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param)
+{
+ int err;
+ int count;
+ int base;
+ int qpn;
+
+ switch (op) {
+ case RES_OP_RESERVE:
+ base = get_param_l(&in_param) & 0x7fffff;
+ count = get_param_h(&in_param);
+ err = rem_res_range(dev, slave, base, count, RES_QP, 0);
+ if (err)
+ break;
+ mlx4_release_resource(dev, slave, RES_QP, count, 0);
+ __mlx4_qp_release_range(dev, base, count);
+ break;
+ case RES_OP_MAP_ICM:
+ qpn = get_param_l(&in_param) & 0x7fffff;
+ err = qp_res_start_move_to(dev, slave, qpn, RES_QP_RESERVED,
+ NULL, 0);
+ if (err)
+ return err;
+
+ if (!fw_reserved(dev, qpn))
+ __mlx4_qp_free_icm(dev, qpn);
+
+ res_end_move(dev, slave, RES_QP, qpn);
+
+ if (valid_reserved(dev, slave, qpn))
+ err = rem_res_range(dev, slave, qpn, 1, RES_QP, 0);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+ return err;
+}
+
+static int mtt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int err = -EINVAL;
+ int base;
+ int order;
+
+ if (op != RES_OP_RESERVE_AND_MAP)
+ return err;
+
+ base = get_param_l(&in_param);
+ order = get_param_h(&in_param);
+ err = rem_res_range(dev, slave, base, 1, RES_MTT, order);
+ if (!err) {
+ mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0);
+ __mlx4_free_mtt_range(dev, base, order);
+ }
+ return err;
+}
+
+static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param)
+{
+ int err = -EINVAL;
+ int index;
+ int id;
+ struct res_mpt *mpt;
+
+ switch (op) {
+ case RES_OP_RESERVE:
+ index = get_param_l(&in_param);
+ id = index & mpt_mask(dev);
+ err = get_res(dev, slave, id, RES_MPT, &mpt);
+ if (err)
+ break;
+ index = mpt->key;
+ put_res(dev, slave, id, RES_MPT);
+
+ err = rem_res_range(dev, slave, id, 1, RES_MPT, 0);
+ if (err)
+ break;
+ mlx4_release_resource(dev, slave, RES_MPT, 1, 0);
+ __mlx4_mpt_release(dev, index);
+ break;
+ case RES_OP_MAP_ICM:
+ index = get_param_l(&in_param);
+ id = index & mpt_mask(dev);
+ err = mr_res_start_move_to(dev, slave, id,
+ RES_MPT_RESERVED, &mpt);
+ if (err)
+ return err;
+
+ __mlx4_mpt_free_icm(dev, mpt->key);
+ res_end_move(dev, slave, RES_MPT, id);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+ return err;
+}
+
+static int cq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int cqn;
+ int err;
+
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ cqn = get_param_l(&in_param);
+ err = rem_res_range(dev, slave, cqn, 1, RES_CQ, 0);
+ if (err)
+ break;
+
+ mlx4_release_resource(dev, slave, RES_CQ, 1, 0);
+ __mlx4_cq_free_icm(dev, cqn);
+ break;
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static int srq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int srqn;
+ int err;
+
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ srqn = get_param_l(&in_param);
+ err = rem_res_range(dev, slave, srqn, 1, RES_SRQ, 0);
+ if (err)
+ break;
+
+ mlx4_release_resource(dev, slave, RES_SRQ, 1, 0);
+ __mlx4_srq_free_icm(dev, srqn);
+ break;
+
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param, int in_port)
+{
+ int port;
+ int err = 0;
+
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ port = !in_port ? get_param_l(out_param) : in_port;
+ port = mlx4_slave_convert_port(
+ dev, slave, port);
+
+ if (port < 0)
+ return -EINVAL;
+ mac_del_from_slave(dev, slave, in_param, port);
+ __mlx4_unregister_mac(dev, port, in_param);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+
+}
+
+static int vlan_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param, int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state;
+ int err = 0;
+
+ port = mlx4_slave_convert_port(
+ dev, slave, port);
+
+ if (port < 0)
+ return -EINVAL;
+ switch (op) {
+ case RES_OP_RESERVE_AND_MAP:
+ if (slave_state[slave].old_vlan_api)
+ return 0;
+ if (!port)
+ return -EINVAL;
+ vlan_del_from_slave(dev, slave, in_param, port);
+ __mlx4_unregister_vlan(dev, port, in_param);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static int counter_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int index;
+ int err;
+
+ if (op != RES_OP_RESERVE)
+ return -EINVAL;
+
+ index = get_param_l(&in_param);
+ if (index == MLX4_SINK_COUNTER_INDEX(dev))
+ return 0;
+
+ err = rem_res_range(dev, slave, index, 1, RES_COUNTER, 0);
+ if (err)
+ return err;
+
+ __mlx4_counter_free(dev, index);
+ mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0);
+
+ return err;
+}
+
+static int xrcdn_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+ u64 in_param, u64 *out_param)
+{
+ int xrcdn;
+ int err;
+
+ if (op != RES_OP_RESERVE)
+ return -EINVAL;
+
+ xrcdn = get_param_l(&in_param);
+ err = rem_res_range(dev, slave, xrcdn, 1, RES_XRCD, 0);
+ if (err)
+ return err;
+
+ __mlx4_xrcd_free(dev, xrcdn);
+
+ return err;
+}
+
+int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err = -EINVAL;
+ int alop = vhcr->op_modifier;
+
+ switch (vhcr->in_modifier & 0xFF) {
+ case RES_QP:
+ err = qp_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param);
+ break;
+
+ case RES_MTT:
+ err = mtt_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_MPT:
+ err = mpt_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param);
+ break;
+
+ case RES_CQ:
+ err = cq_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_SRQ:
+ err = srq_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_MAC:
+ err = mac_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param,
+ (vhcr->in_modifier >> 8) & 0xFF);
+ break;
+
+ case RES_VLAN:
+ err = vlan_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param,
+ (vhcr->in_modifier >> 8) & 0xFF);
+ break;
+
+ case RES_COUNTER:
+ err = counter_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+ break;
+
+ case RES_XRCD:
+ err = xrcdn_free_res(dev, slave, vhcr->op_modifier, alop,
+ vhcr->in_param, &vhcr->out_param);
+
+ default:
+ break;
+ }
+ return err;
+}
+
+/* ugly but other choices are uglier */
+static int mr_phys_mpt(struct mlx4_mpt_entry *mpt)
+{
+ return (be32_to_cpu(mpt->flags) >> 9) & 1;
+}
+
+static int mr_get_mtt_addr(struct mlx4_mpt_entry *mpt)
+{
+ return (int)be64_to_cpu(mpt->mtt_addr) & 0xfffffff8;
+}
+
+static int mr_get_mtt_size(struct mlx4_mpt_entry *mpt)
+{
+ return be32_to_cpu(mpt->mtt_sz);
+}
+
+static u32 mr_get_pd(struct mlx4_mpt_entry *mpt)
+{
+ return be32_to_cpu(mpt->pd_flags) & 0x00ffffff;
+}
+
+static int mr_is_fmr(struct mlx4_mpt_entry *mpt)
+{
+ return be32_to_cpu(mpt->pd_flags) & MLX4_MPT_PD_FLAG_FAST_REG;
+}
+
+static int mr_is_bind_enabled(struct mlx4_mpt_entry *mpt)
+{
+ return be32_to_cpu(mpt->flags) & MLX4_MPT_FLAG_BIND_ENABLE;
+}
+
+static int mr_is_region(struct mlx4_mpt_entry *mpt)
+{
+ return be32_to_cpu(mpt->flags) & MLX4_MPT_FLAG_REGION;
+}
+
+static int qp_get_mtt_addr(struct mlx4_qp_context *qpc)
+{
+ return be32_to_cpu(qpc->mtt_base_addr_l) & 0xfffffff8;
+}
+
+static int srq_get_mtt_addr(struct mlx4_srq_context *srqc)
+{
+ return be32_to_cpu(srqc->mtt_base_addr_l) & 0xfffffff8;
+}
+
+static int qp_get_mtt_size(struct mlx4_qp_context *qpc)
+{
+ int page_shift = (qpc->log_page_size & 0x3f) + 12;
+ int log_sq_size = (qpc->sq_size_stride >> 3) & 0xf;
+ int log_sq_sride = qpc->sq_size_stride & 7;
+ int log_rq_size = (qpc->rq_size_stride >> 3) & 0xf;
+ int log_rq_stride = qpc->rq_size_stride & 7;
+ int srq = (be32_to_cpu(qpc->srqn) >> 24) & 1;
+ int rss = (be32_to_cpu(qpc->flags) >> 13) & 1;
+ u32 ts = (be32_to_cpu(qpc->flags) >> 16) & 0xff;
+ int xrc = (ts == MLX4_QP_ST_XRC) ? 1 : 0;
+ int sq_size;
+ int rq_size;
+ int total_pages;
+ int total_mem;
+ int page_offset = (be32_to_cpu(qpc->params2) >> 6) & 0x3f;
+ int tot;
+
+ sq_size = 1 << (log_sq_size + log_sq_sride + 4);
+ rq_size = (srq|rss|xrc) ? 0 : (1 << (log_rq_size + log_rq_stride + 4));
+ total_mem = sq_size + rq_size;
+ tot = (total_mem + (page_offset << 6)) >> page_shift;
+ total_pages = !tot ? 1 : roundup_pow_of_two(tot);
+
+ return total_pages;
+}
+
+static int check_mtt_range(struct mlx4_dev *dev, int slave, int start,
+ int size, struct res_mtt *mtt)
+{
+ int res_start = mtt->com.res_id;
+ int res_size = (1 << mtt->order);
+
+ if (start < res_start || start + size > res_start + res_size)
+ return -EPERM;
+ return 0;
+}
+
+int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int index = vhcr->in_modifier;
+ struct res_mtt *mtt;
+ struct res_mpt *mpt = NULL;
+ int mtt_base = mr_get_mtt_addr(inbox->buf) / dev->caps.mtt_entry_sz;
+ int phys;
+ int id;
+ u32 pd;
+ int pd_slave;
+
+ id = index & mpt_mask(dev);
+ err = mr_res_start_move_to(dev, slave, id, RES_MPT_HW, &mpt);
+ if (err)
+ return err;
+
+ /* Disable memory windows for VFs. */
+ if (!mr_is_region(inbox->buf)) {
+ err = -EPERM;
+ goto ex_abort;
+ }
+
+ /* Make sure that the PD bits related to the slave id are zeros. */
+ pd = mr_get_pd(inbox->buf);
+ pd_slave = (pd >> 17) & 0x7f;
+ if (pd_slave != 0 && --pd_slave != slave) {
+ err = -EPERM;
+ goto ex_abort;
+ }
+
+ if (mr_is_fmr(inbox->buf)) {
+ /* FMR and Bind Enable are forbidden in slave devices. */
+ if (mr_is_bind_enabled(inbox->buf)) {
+ err = -EPERM;
+ goto ex_abort;
+ }
+ /* FMR and Memory Windows are also forbidden. */
+ if (!mr_is_region(inbox->buf)) {
+ err = -EPERM;
+ goto ex_abort;
+ }
+ }
+
+ phys = mr_phys_mpt(inbox->buf);
+ if (!phys) {
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto ex_abort;
+
+ err = check_mtt_range(dev, slave, mtt_base,
+ mr_get_mtt_size(inbox->buf), mtt);
+ if (err)
+ goto ex_put;
+
+ mpt->mtt = mtt;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_put;
+
+ if (!phys) {
+ atomic_inc(&mtt->ref_count);
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ }
+
+ res_end_move(dev, slave, RES_MPT, id);
+ return 0;
+
+ex_put:
+ if (!phys)
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ex_abort:
+ res_abort_move(dev, slave, RES_MPT, id);
+
+ return err;
+}
+
+int mlx4_HW2SW_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int index = vhcr->in_modifier;
+ struct res_mpt *mpt;
+ int id;
+
+ id = index & mpt_mask(dev);
+ err = mr_res_start_move_to(dev, slave, id, RES_MPT_MAPPED, &mpt);
+ if (err)
+ return err;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_abort;
+
+ if (mpt->mtt)
+ atomic_dec(&mpt->mtt->ref_count);
+
+ res_end_move(dev, slave, RES_MPT, id);
+ return 0;
+
+ex_abort:
+ res_abort_move(dev, slave, RES_MPT, id);
+
+ return err;
+}
+
+int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int index = vhcr->in_modifier;
+ struct res_mpt *mpt;
+ int id;
+
+ id = index & mpt_mask(dev);
+ err = get_res(dev, slave, id, RES_MPT, &mpt);
+ if (err)
+ return err;
+
+ if (mpt->com.from_state == RES_MPT_MAPPED) {
+ /* In order to allow rereg in SRIOV, we need to alter the MPT entry. To do
+ * that, the VF must read the MPT. But since the MPT entry memory is not
+ * in the VF's virtual memory space, it must use QUERY_MPT to obtain the
+ * entry contents. To guarantee that the MPT cannot be changed, the driver
+ * must perform HW2SW_MPT before this query and return the MPT entry to HW
+ * ownership fofollowing the change. The change here allows the VF to
+ * perform QUERY_MPT also when the entry is in SW ownership.
+ */
+ struct mlx4_mpt_entry *mpt_entry = mlx4_table_find(
+ &mlx4_priv(dev)->mr_table.dmpt_table,
+ mpt->key, NULL);
+
+ if (NULL == mpt_entry || NULL == outbox->buf) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ memcpy(outbox->buf, mpt_entry, sizeof(*mpt_entry));
+
+ err = 0;
+ } else if (mpt->com.from_state == RES_MPT_HW) {
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ } else {
+ err = -EBUSY;
+ goto out;
+ }
+
+
+out:
+ put_res(dev, slave, id, RES_MPT);
+ return err;
+}
+
+static int qp_get_rcqn(struct mlx4_qp_context *qpc)
+{
+ return be32_to_cpu(qpc->cqn_recv) & 0xffffff;
+}
+
+static int qp_get_scqn(struct mlx4_qp_context *qpc)
+{
+ return be32_to_cpu(qpc->cqn_send) & 0xffffff;
+}
+
+static u32 qp_get_srqn(struct mlx4_qp_context *qpc)
+{
+ return be32_to_cpu(qpc->srqn) & 0x1ffffff;
+}
+
+static void adjust_proxy_tun_qkey(struct mlx4_dev *dev, struct mlx4_vhcr *vhcr,
+ struct mlx4_qp_context *context)
+{
+ u32 qpn = vhcr->in_modifier & 0xffffff;
+ u32 qkey = 0;
+
+ if (mlx4_get_parav_qkey(dev, qpn, &qkey))
+ return;
+
+ /* adjust qkey in qp context */
+ context->qkey = cpu_to_be32(qkey);
+}
+
+static int adjust_qp_sched_queue(struct mlx4_dev *dev, int slave,
+ struct mlx4_qp_context *qpc,
+ struct mlx4_cmd_mailbox *inbox);
+
+int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int qpn = vhcr->in_modifier & 0x7fffff;
+ struct res_mtt *mtt;
+ struct res_qp *qp;
+ struct mlx4_qp_context *qpc = inbox->buf + 8;
+ int mtt_base = qp_get_mtt_addr(qpc) / dev->caps.mtt_entry_sz;
+ int mtt_size = qp_get_mtt_size(qpc);
+ struct res_cq *rcq;
+ struct res_cq *scq;
+ int rcqn = qp_get_rcqn(qpc);
+ int scqn = qp_get_scqn(qpc);
+ u32 srqn = qp_get_srqn(qpc) & 0xffffff;
+ int use_srq = (qp_get_srqn(qpc) >> 24) & 1;
+ struct res_srq *srq;
+ int local_qpn = vhcr->in_modifier & 0xffffff;
+
+ err = adjust_qp_sched_queue(dev, slave, qpc, inbox);
+ if (err)
+ return err;
+
+ err = qp_res_start_move_to(dev, slave, qpn, RES_QP_HW, &qp, 0);
+ if (err)
+ return err;
+ qp->local_qpn = local_qpn;
+ qp->sched_queue = 0;
+ qp->param3 = 0;
+ qp->vlan_control = 0;
+ qp->fvl_rx = 0;
+ qp->pri_path_fl = 0;
+ qp->vlan_index = 0;
+ qp->feup = 0;
+ qp->qpc_flags = be32_to_cpu(qpc->flags);
+
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto ex_abort;
+
+ err = check_mtt_range(dev, slave, mtt_base, mtt_size, mtt);
+ if (err)
+ goto ex_put_mtt;
+
+ err = get_res(dev, slave, rcqn, RES_CQ, &rcq);
+ if (err)
+ goto ex_put_mtt;
+
+ if (scqn != rcqn) {
+ err = get_res(dev, slave, scqn, RES_CQ, &scq);
+ if (err)
+ goto ex_put_rcq;
+ } else
+ scq = rcq;
+
+ if (use_srq) {
+ err = get_res(dev, slave, srqn, RES_SRQ, &srq);
+ if (err)
+ goto ex_put_scq;
+ }
+
+ adjust_proxy_tun_qkey(dev, vhcr, qpc);
+ update_pkey_index(dev, slave, inbox);
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_put_srq;
+ atomic_inc(&mtt->ref_count);
+ qp->mtt = mtt;
+ atomic_inc(&rcq->ref_count);
+ qp->rcq = rcq;
+ atomic_inc(&scq->ref_count);
+ qp->scq = scq;
+
+ if (scqn != rcqn)
+ put_res(dev, slave, scqn, RES_CQ);
+
+ if (use_srq) {
+ atomic_inc(&srq->ref_count);
+ put_res(dev, slave, srqn, RES_SRQ);
+ qp->srq = srq;
+ }
+
+ /* Save param3 for dynamic changes from VST back to VGT */
+ qp->param3 = qpc->param3;
+ put_res(dev, slave, rcqn, RES_CQ);
+ put_res(dev, slave, mtt_base, RES_MTT);
+ res_end_move(dev, slave, RES_QP, qpn);
+
+ return 0;
+
+ex_put_srq:
+ if (use_srq)
+ put_res(dev, slave, srqn, RES_SRQ);
+ex_put_scq:
+ if (scqn != rcqn)
+ put_res(dev, slave, scqn, RES_CQ);
+ex_put_rcq:
+ put_res(dev, slave, rcqn, RES_CQ);
+ex_put_mtt:
+ put_res(dev, slave, mtt_base, RES_MTT);
+ex_abort:
+ res_abort_move(dev, slave, RES_QP, qpn);
+
+ return err;
+}
+
+static int eq_get_mtt_addr(struct mlx4_eq_context *eqc)
+{
+ return be32_to_cpu(eqc->mtt_base_addr_l) & 0xfffffff8;
+}
+
+static int eq_get_mtt_size(struct mlx4_eq_context *eqc)
+{
+ int log_eq_size = eqc->log_eq_size & 0x1f;
+ int page_shift = (eqc->log_page_size & 0x3f) + 12;
+
+ if (log_eq_size + 5 < page_shift)
+ return 1;
+
+ return 1 << (log_eq_size + 5 - page_shift);
+}
+
+static int cq_get_mtt_addr(struct mlx4_cq_context *cqc)
+{
+ return be32_to_cpu(cqc->mtt_base_addr_l) & 0xfffffff8;
+}
+
+static int cq_get_mtt_size(struct mlx4_cq_context *cqc)
+{
+ int log_cq_size = (be32_to_cpu(cqc->logsize_usrpage) >> 24) & 0x1f;
+ int page_shift = (cqc->log_page_size & 0x3f) + 12;
+
+ if (log_cq_size + 5 < page_shift)
+ return 1;
+
+ return 1 << (log_cq_size + 5 - page_shift);
+}
+
+int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int eqn = vhcr->in_modifier;
+ int res_id = (slave << 10) | eqn;
+ struct mlx4_eq_context *eqc = inbox->buf;
+ int mtt_base = eq_get_mtt_addr(eqc) / dev->caps.mtt_entry_sz;
+ int mtt_size = eq_get_mtt_size(eqc);
+ struct res_eq *eq;
+ struct res_mtt *mtt;
+
+ err = add_res_range(dev, slave, res_id, 1, RES_EQ, 0);
+ if (err)
+ return err;
+ err = eq_res_start_move_to(dev, slave, res_id, RES_EQ_HW, &eq);
+ if (err)
+ goto out_add;
+
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto out_move;
+
+ err = check_mtt_range(dev, slave, mtt_base, mtt_size, mtt);
+ if (err)
+ goto out_put;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto out_put;
+
+ atomic_inc(&mtt->ref_count);
+ eq->mtt = mtt;
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ res_end_move(dev, slave, RES_EQ, res_id);
+ return 0;
+
+out_put:
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+out_move:
+ res_abort_move(dev, slave, RES_EQ, res_id);
+out_add:
+ rem_res_range(dev, slave, res_id, 1, RES_EQ, 0);
+ return err;
+}
+
+int mlx4_CONFIG_DEV_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ u8 get = vhcr->op_modifier;
+
+ if (get != 1)
+ return -EPERM;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+
+ return err;
+}
+
+static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start,
+ int len, struct res_mtt **res)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct res_mtt *mtt;
+ int err = -EINVAL;
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry(mtt, &tracker->slave_list[slave].res_list[RES_MTT],
+ com.list) {
+ if (!check_mtt_range(dev, slave, start, len, mtt)) {
+ *res = mtt;
+ mtt->com.from_state = mtt->com.state;
+ mtt->com.state = RES_MTT_BUSY;
+ err = 0;
+ break;
+ }
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return err;
+}
+
+static int verify_qp_parameters(struct mlx4_dev *dev,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ enum qp_transition transition, u8 slave)
+{
+ u32 qp_type;
+ u32 qpn;
+ struct mlx4_qp_context *qp_ctx;
+ enum mlx4_qp_optpar optpar;
+ int port;
+ int num_gids;
+
+ qp_ctx = inbox->buf + 8;
+ qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
+ optpar = be32_to_cpu(*(__be32 *) inbox->buf);
+
+ if (slave != mlx4_master_func_num(dev)) {
+ qp_ctx->params2 &= ~cpu_to_be32(MLX4_QP_BIT_FPP);
+ /* setting QP rate-limit is disallowed for VFs */
+ if (qp_ctx->rate_limit_params)
+ return -EPERM;
+ }
+
+ switch (qp_type) {
+ case MLX4_QP_ST_RC:
+ case MLX4_QP_ST_XRC:
+ case MLX4_QP_ST_UC:
+ switch (transition) {
+ case QP_TRANS_INIT2RTR:
+ case QP_TRANS_RTR2RTS:
+ case QP_TRANS_RTS2RTS:
+ case QP_TRANS_SQD2SQD:
+ case QP_TRANS_SQD2RTS:
+ if (slave != mlx4_master_func_num(dev)) {
+ if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) {
+ port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
+ if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB)
+ num_gids = mlx4_get_slave_num_gids(dev, slave, port);
+ else
+ num_gids = 1;
+ if (qp_ctx->pri_path.mgid_index >= num_gids)
+ return -EINVAL;
+ }
+ if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) {
+ port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1;
+ if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB)
+ num_gids = mlx4_get_slave_num_gids(dev, slave, port);
+ else
+ num_gids = 1;
+ if (qp_ctx->alt_path.mgid_index >= num_gids)
+ return -EINVAL;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+ break;
+
+ case MLX4_QP_ST_MLX:
+ qpn = vhcr->in_modifier & 0x7fffff;
+ port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
+ if (transition == QP_TRANS_INIT2RTR &&
+ slave != mlx4_master_func_num(dev) &&
+ mlx4_is_qp_reserved(dev, qpn) &&
+ !mlx4_vf_smi_enabled(dev, slave, port)) {
+ /* only enabled VFs may create MLX proxy QPs */
+ mlx4_err(dev, "%s: unprivileged slave %d attempting to create an MLX proxy special QP on port %d\n",
+ __func__, slave, port);
+ return -EPERM;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_mtt mtt;
+ __be64 *page_list = inbox->buf;
+ u64 *pg_list = (u64 *)page_list;
+ int i;
+ struct res_mtt *rmtt = NULL;
+ int start = be64_to_cpu(page_list[0]);
+ int npages = vhcr->in_modifier;
+ int err;
+
+ err = get_containing_mtt(dev, slave, start, npages, &rmtt);
+ if (err)
+ return err;
+
+ /* Call the SW implementation of write_mtt:
+ * - Prepare a dummy mtt struct
+ * - Translate inbox contents to simple addresses in host endianness */
+ mtt.offset = 0; /* TBD this is broken but I don't handle it since
+ we don't really use it */
+ mtt.order = 0;
+ mtt.page_shift = 0;
+ for (i = 0; i < npages; ++i)
+ pg_list[i + 2] = (be64_to_cpu(page_list[i + 2]) & ~1ULL);
+
+ err = __mlx4_write_mtt(dev, &mtt, be64_to_cpu(page_list[0]), npages,
+ ((u64 *)page_list + 2));
+
+ if (rmtt)
+ put_res(dev, slave, rmtt->com.res_id, RES_MTT);
+
+ return err;
+}
+
+int mlx4_HW2SW_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int eqn = vhcr->in_modifier;
+ int res_id = eqn | (slave << 10);
+ struct res_eq *eq;
+ int err;
+
+ err = eq_res_start_move_to(dev, slave, res_id, RES_EQ_RESERVED, &eq);
+ if (err)
+ return err;
+
+ err = get_res(dev, slave, eq->mtt->com.res_id, RES_MTT, NULL);
+ if (err)
+ goto ex_abort;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_put;
+
+ atomic_dec(&eq->mtt->ref_count);
+ put_res(dev, slave, eq->mtt->com.res_id, RES_MTT);
+ res_end_move(dev, slave, RES_EQ, res_id);
+ rem_res_range(dev, slave, res_id, 1, RES_EQ, 0);
+
+ return 0;
+
+ex_put:
+ put_res(dev, slave, eq->mtt->com.res_id, RES_MTT);
+ex_abort:
+ res_abort_move(dev, slave, RES_EQ, res_id);
+
+ return err;
+}
+
+int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_slave_event_eq_info *event_eq;
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 in_modifier = 0;
+ int err;
+ int res_id;
+ struct res_eq *req;
+
+ if (!priv->mfunc.master.slave_state)
+ return -EINVAL;
+
+ /* check for slave valid, slave not PF, and slave active */
+ if (slave < 0 || slave > dev->persist->num_vfs ||
+ slave == dev->caps.function ||
+ !priv->mfunc.master.slave_state[slave].active)
+ return 0;
+
+ event_eq = &priv->mfunc.master.slave_state[slave].event_eq[eqe->type];
+
+ /* Create the event only if the slave is registered */
+ if (event_eq->eqn < 0)
+ return 0;
+
+ mutex_lock(&priv->mfunc.master.gen_eqe_mutex[slave]);
+ res_id = (slave << 10) | event_eq->eqn;
+ err = get_res(dev, slave, res_id, RES_EQ, &req);
+ if (err)
+ goto unlock;
+
+ if (req->com.from_state != RES_EQ_HW) {
+ err = -EINVAL;
+ goto put;
+ }
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto put;
+ }
+
+ if (eqe->type == MLX4_EVENT_TYPE_CMD) {
+ ++event_eq->token;
+ eqe->event.cmd.token = cpu_to_be16(event_eq->token);
+ }
+
+ memcpy(mailbox->buf, (u8 *) eqe, 28);
+
+ in_modifier = (slave & 0xff) | ((event_eq->eqn & 0x3ff) << 16);
+
+ err = mlx4_cmd(dev, mailbox->dma, in_modifier, 0,
+ MLX4_CMD_GEN_EQE, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_NATIVE);
+
+ put_res(dev, slave, res_id, RES_EQ);
+ mutex_unlock(&priv->mfunc.master.gen_eqe_mutex[slave]);
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+
+put:
+ put_res(dev, slave, res_id, RES_EQ);
+
+unlock:
+ mutex_unlock(&priv->mfunc.master.gen_eqe_mutex[slave]);
+ return err;
+}
+
+int mlx4_QUERY_EQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int eqn = vhcr->in_modifier;
+ int res_id = eqn | (slave << 10);
+ struct res_eq *eq;
+ int err;
+
+ err = get_res(dev, slave, res_id, RES_EQ, &eq);
+ if (err)
+ return err;
+
+ if (eq->com.from_state != RES_EQ_HW) {
+ err = -EINVAL;
+ goto ex_put;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+
+ex_put:
+ put_res(dev, slave, res_id, RES_EQ);
+ return err;
+}
+
+int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int cqn = vhcr->in_modifier;
+ struct mlx4_cq_context *cqc = inbox->buf;
+ int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz;
+ struct res_cq *cq = NULL;
+ struct res_mtt *mtt;
+
+ err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_HW, &cq);
+ if (err)
+ return err;
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto out_move;
+ err = check_mtt_range(dev, slave, mtt_base, cq_get_mtt_size(cqc), mtt);
+ if (err)
+ goto out_put;
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto out_put;
+ atomic_inc(&mtt->ref_count);
+ cq->mtt = mtt;
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ res_end_move(dev, slave, RES_CQ, cqn);
+ return 0;
+
+out_put:
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+out_move:
+ res_abort_move(dev, slave, RES_CQ, cqn);
+ return err;
+}
+
+int mlx4_HW2SW_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int cqn = vhcr->in_modifier;
+ struct res_cq *cq = NULL;
+
+ err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_ALLOCATED, &cq);
+ if (err)
+ return err;
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto out_move;
+ atomic_dec(&cq->mtt->ref_count);
+ res_end_move(dev, slave, RES_CQ, cqn);
+ return 0;
+
+out_move:
+ res_abort_move(dev, slave, RES_CQ, cqn);
+ return err;
+}
+
+int mlx4_QUERY_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int cqn = vhcr->in_modifier;
+ struct res_cq *cq;
+ int err;
+
+ err = get_res(dev, slave, cqn, RES_CQ, &cq);
+ if (err)
+ return err;
+
+ if (cq->com.from_state != RES_CQ_HW)
+ goto ex_put;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ex_put:
+ put_res(dev, slave, cqn, RES_CQ);
+
+ return err;
+}
+
+static int handle_resize(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd,
+ struct res_cq *cq)
+{
+ int err;
+ struct res_mtt *orig_mtt;
+ struct res_mtt *mtt;
+ struct mlx4_cq_context *cqc = inbox->buf;
+ int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz;
+
+ err = get_res(dev, slave, cq->mtt->com.res_id, RES_MTT, &orig_mtt);
+ if (err)
+ return err;
+
+ if (orig_mtt != cq->mtt) {
+ err = -EINVAL;
+ goto ex_put;
+ }
+
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto ex_put;
+
+ err = check_mtt_range(dev, slave, mtt_base, cq_get_mtt_size(cqc), mtt);
+ if (err)
+ goto ex_put1;
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_put1;
+ atomic_dec(&orig_mtt->ref_count);
+ put_res(dev, slave, orig_mtt->com.res_id, RES_MTT);
+ atomic_inc(&mtt->ref_count);
+ cq->mtt = mtt;
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ return 0;
+
+ex_put1:
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ex_put:
+ put_res(dev, slave, orig_mtt->com.res_id, RES_MTT);
+
+ return err;
+
+}
+
+int mlx4_MODIFY_CQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int cqn = vhcr->in_modifier;
+ struct res_cq *cq;
+ int err;
+
+ err = get_res(dev, slave, cqn, RES_CQ, &cq);
+ if (err)
+ return err;
+
+ if (cq->com.from_state != RES_CQ_HW)
+ goto ex_put;
+
+ if (vhcr->op_modifier == 0) {
+ err = handle_resize(dev, slave, vhcr, inbox, outbox, cmd, cq);
+ goto ex_put;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ex_put:
+ put_res(dev, slave, cqn, RES_CQ);
+
+ return err;
+}
+
+static int srq_get_mtt_size(struct mlx4_srq_context *srqc)
+{
+ int log_srq_size = (be32_to_cpu(srqc->state_logsize_srqn) >> 24) & 0xf;
+ int log_rq_stride = srqc->logstride & 7;
+ int page_shift = (srqc->log_page_size & 0x3f) + 12;
+
+ if (log_srq_size + log_rq_stride + 4 < page_shift)
+ return 1;
+
+ return 1 << (log_srq_size + log_rq_stride + 4 - page_shift);
+}
+
+int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int srqn = vhcr->in_modifier;
+ struct res_mtt *mtt;
+ struct res_srq *srq = NULL;
+ struct mlx4_srq_context *srqc = inbox->buf;
+ int mtt_base = srq_get_mtt_addr(srqc) / dev->caps.mtt_entry_sz;
+
+ if (srqn != (be32_to_cpu(srqc->state_logsize_srqn) & 0xffffff))
+ return -EINVAL;
+
+ err = srq_res_start_move_to(dev, slave, srqn, RES_SRQ_HW, &srq);
+ if (err)
+ return err;
+ err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
+ if (err)
+ goto ex_abort;
+ err = check_mtt_range(dev, slave, mtt_base, srq_get_mtt_size(srqc),
+ mtt);
+ if (err)
+ goto ex_put_mtt;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_put_mtt;
+
+ atomic_inc(&mtt->ref_count);
+ srq->mtt = mtt;
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ res_end_move(dev, slave, RES_SRQ, srqn);
+ return 0;
+
+ex_put_mtt:
+ put_res(dev, slave, mtt->com.res_id, RES_MTT);
+ex_abort:
+ res_abort_move(dev, slave, RES_SRQ, srqn);
+
+ return err;
+}
+
+int mlx4_HW2SW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int srqn = vhcr->in_modifier;
+ struct res_srq *srq = NULL;
+
+ err = srq_res_start_move_to(dev, slave, srqn, RES_SRQ_ALLOCATED, &srq);
+ if (err)
+ return err;
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_abort;
+ atomic_dec(&srq->mtt->ref_count);
+ if (srq->cq)
+ atomic_dec(&srq->cq->ref_count);
+ res_end_move(dev, slave, RES_SRQ, srqn);
+
+ return 0;
+
+ex_abort:
+ res_abort_move(dev, slave, RES_SRQ, srqn);
+
+ return err;
+}
+
+int mlx4_QUERY_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int srqn = vhcr->in_modifier;
+ struct res_srq *srq;
+
+ err = get_res(dev, slave, srqn, RES_SRQ, &srq);
+ if (err)
+ return err;
+ if (srq->com.from_state != RES_SRQ_HW) {
+ err = -EBUSY;
+ goto out;
+ }
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+out:
+ put_res(dev, slave, srqn, RES_SRQ);
+ return err;
+}
+
+int mlx4_ARM_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int srqn = vhcr->in_modifier;
+ struct res_srq *srq;
+
+ err = get_res(dev, slave, srqn, RES_SRQ, &srq);
+ if (err)
+ return err;
+
+ if (srq->com.from_state != RES_SRQ_HW) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+out:
+ put_res(dev, slave, srqn, RES_SRQ);
+ return err;
+}
+
+int mlx4_GEN_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int qpn = vhcr->in_modifier & 0x7fffff;
+ struct res_qp *qp;
+
+ err = get_res(dev, slave, qpn, RES_QP, &qp);
+ if (err)
+ return err;
+ if (qp->com.from_state != RES_QP_HW) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+out:
+ put_res(dev, slave, qpn, RES_QP);
+ return err;
+}
+
+int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_qp_context *context = inbox->buf + 8;
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ update_pkey_index(dev, slave, inbox);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+static int adjust_qp_sched_queue(struct mlx4_dev *dev, int slave,
+ struct mlx4_qp_context *qpc,
+ struct mlx4_cmd_mailbox *inbox)
+{
+ enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *)inbox->buf);
+ u8 pri_sched_queue;
+ int port = mlx4_slave_convert_port(
+ dev, slave, (qpc->pri_path.sched_queue >> 6 & 1) + 1) - 1;
+
+ if (port < 0)
+ return -EINVAL;
+
+ pri_sched_queue = (qpc->pri_path.sched_queue & ~(1 << 6)) |
+ ((port & 1) << 6);
+
+ if (optpar & (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | MLX4_QP_OPTPAR_SCHED_QUEUE) ||
+ qpc->pri_path.sched_queue || mlx4_is_eth(dev, port + 1)) {
+ qpc->pri_path.sched_queue = pri_sched_queue;
+ }
+
+ if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) {
+ port = mlx4_slave_convert_port(
+ dev, slave, (qpc->alt_path.sched_queue >> 6 & 1)
+ + 1) - 1;
+ if (port < 0)
+ return -EINVAL;
+ qpc->alt_path.sched_queue =
+ (qpc->alt_path.sched_queue & ~(1 << 6)) |
+ (port & 1) << 6;
+ }
+ return 0;
+}
+
+static int roce_verify_mac(struct mlx4_dev *dev, int slave,
+ struct mlx4_qp_context *qpc,
+ struct mlx4_cmd_mailbox *inbox)
+{
+ u64 mac;
+ int port;
+ u32 ts = (be32_to_cpu(qpc->flags) >> 16) & 0xff;
+ u8 sched = *(u8 *)(inbox->buf + 64);
+ u8 smac_ix;
+
+ port = (sched >> 6 & 1) + 1;
+ if (mlx4_is_eth(dev, port) && (ts != MLX4_QP_ST_MLX)) {
+ smac_ix = qpc->pri_path.grh_mylmc & 0x7f;
+ if (mac_find_smac_ix_in_slave(dev, slave, port, smac_ix, &mac))
+ return -ENOENT;
+ }
+ return 0;
+}
+
+int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ struct mlx4_qp_context *qpc = inbox->buf + 8;
+ int qpn = vhcr->in_modifier & 0x7fffff;
+ struct res_qp *qp;
+ u8 orig_sched_queue;
+ u8 orig_vlan_control = qpc->pri_path.vlan_control;
+ u8 orig_fvl_rx = qpc->pri_path.fvl_rx;
+ u8 orig_pri_path_fl = qpc->pri_path.fl;
+ u8 orig_vlan_index = qpc->pri_path.vlan_index;
+ u8 orig_feup = qpc->pri_path.feup;
+
+ err = adjust_qp_sched_queue(dev, slave, qpc, inbox);
+ if (err)
+ return err;
+ err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_INIT2RTR, slave);
+ if (err)
+ return err;
+
+ if (roce_verify_mac(dev, slave, qpc, inbox))
+ return -EINVAL;
+
+ update_pkey_index(dev, slave, inbox);
+ update_gid(dev, inbox, (u8)slave);
+ adjust_proxy_tun_qkey(dev, vhcr, qpc);
+ orig_sched_queue = qpc->pri_path.sched_queue;
+
+ err = get_res(dev, slave, qpn, RES_QP, &qp);
+ if (err)
+ return err;
+ if (qp->com.from_state != RES_QP_HW) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ err = update_vport_qp_param(dev, inbox, slave, qpn);
+ if (err)
+ goto out;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+out:
+ /* if no error, save sched queue value passed in by VF. This is
+ * essentially the QOS value provided by the VF. This will be useful
+ * if we allow dynamic changes from VST back to VGT
+ */
+ if (!err) {
+ qp->sched_queue = orig_sched_queue;
+ qp->vlan_control = orig_vlan_control;
+ qp->fvl_rx = orig_fvl_rx;
+ qp->pri_path_fl = orig_pri_path_fl;
+ qp->vlan_index = orig_vlan_index;
+ qp->feup = orig_feup;
+ }
+ put_res(dev, slave, qpn, RES_QP);
+ return err;
+}
+
+int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ struct mlx4_qp_context *context = inbox->buf + 8;
+
+ err = adjust_qp_sched_queue(dev, slave, context, inbox);
+ if (err)
+ return err;
+ err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTR2RTS, slave);
+ if (err)
+ return err;
+
+ update_pkey_index(dev, slave, inbox);
+ update_gid(dev, inbox, (u8)slave);
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ struct mlx4_qp_context *context = inbox->buf + 8;
+
+ err = adjust_qp_sched_queue(dev, slave, context, inbox);
+ if (err)
+ return err;
+ err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTS2RTS, slave);
+ if (err)
+ return err;
+
+ update_pkey_index(dev, slave, inbox);
+ update_gid(dev, inbox, (u8)slave);
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+
+int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_qp_context *context = inbox->buf + 8;
+ int err = adjust_qp_sched_queue(dev, slave, context, inbox);
+ if (err)
+ return err;
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ struct mlx4_qp_context *context = inbox->buf + 8;
+
+ err = adjust_qp_sched_queue(dev, slave, context, inbox);
+ if (err)
+ return err;
+ err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2SQD, slave);
+ if (err)
+ return err;
+
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ update_gid(dev, inbox, (u8)slave);
+ update_pkey_index(dev, slave, inbox);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ struct mlx4_qp_context *context = inbox->buf + 8;
+
+ err = adjust_qp_sched_queue(dev, slave, context, inbox);
+ if (err)
+ return err;
+ err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2RTS, slave);
+ if (err)
+ return err;
+
+ adjust_proxy_tun_qkey(dev, vhcr, context);
+ update_gid(dev, inbox, (u8)slave);
+ update_pkey_index(dev, slave, inbox);
+ return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+}
+
+int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int qpn = vhcr->in_modifier & 0x7fffff;
+ struct res_qp *qp;
+
+ err = qp_res_start_move_to(dev, slave, qpn, RES_QP_MAPPED, &qp, 0);
+ if (err)
+ return err;
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ if (err)
+ goto ex_abort;
+
+ atomic_dec(&qp->mtt->ref_count);
+ atomic_dec(&qp->rcq->ref_count);
+ atomic_dec(&qp->scq->ref_count);
+ if (qp->srq)
+ atomic_dec(&qp->srq->ref_count);
+ res_end_move(dev, slave, RES_QP, qpn);
+ return 0;
+
+ex_abort:
+ res_abort_move(dev, slave, RES_QP, qpn);
+
+ return err;
+}
+
+static struct res_gid *find_gid(struct mlx4_dev *dev, int slave,
+ struct res_qp *rqp, u8 *gid)
+{
+ struct res_gid *res;
+
+ list_for_each_entry(res, &rqp->mcg_list, list) {
+ if (!memcmp(res->gid, gid, 16))
+ return res;
+ }
+ return NULL;
+}
+
+static int add_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp,
+ u8 *gid, enum mlx4_protocol prot,
+ enum mlx4_steer_type steer, u64 reg_id)
+{
+ struct res_gid *res;
+ int err;
+
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ return -ENOMEM;
+
+ spin_lock_irq(&rqp->mcg_spl);
+ if (find_gid(dev, slave, rqp, gid)) {
+ kfree(res);
+ err = -EEXIST;
+ } else {
+ memcpy(res->gid, gid, 16);
+ res->prot = prot;
+ res->steer = steer;
+ res->reg_id = reg_id;
+ list_add_tail(&res->list, &rqp->mcg_list);
+ err = 0;
+ }
+ spin_unlock_irq(&rqp->mcg_spl);
+
+ return err;
+}
+
+static int rem_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp,
+ u8 *gid, enum mlx4_protocol prot,
+ enum mlx4_steer_type steer, u64 *reg_id)
+{
+ struct res_gid *res;
+ int err;
+
+ spin_lock_irq(&rqp->mcg_spl);
+ res = find_gid(dev, slave, rqp, gid);
+ if (!res || res->prot != prot || res->steer != steer)
+ err = -EINVAL;
+ else {
+ *reg_id = res->reg_id;
+ list_del(&res->list);
+ kfree(res);
+ err = 0;
+ }
+ spin_unlock_irq(&rqp->mcg_spl);
+
+ return err;
+}
+
+static int qp_attach(struct mlx4_dev *dev, int slave, struct mlx4_qp *qp,
+ u8 gid[16], int block_loopback, enum mlx4_protocol prot,
+ enum mlx4_steer_type type, u64 *reg_id)
+{
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED: {
+ int port = mlx4_slave_convert_port(dev, slave, gid[5]);
+ if (port < 0)
+ return port;
+ return mlx4_trans_to_dmfs_attach(dev, qp, gid, port,
+ block_loopback, prot,
+ reg_id);
+ }
+ case MLX4_STEERING_MODE_B0:
+ if (prot == MLX4_PROT_ETH) {
+ int port = mlx4_slave_convert_port(dev, slave, gid[5]);
+ if (port < 0)
+ return port;
+ gid[5] = port;
+ }
+ return mlx4_qp_attach_common(dev, qp, gid,
+ block_loopback, prot, type);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int qp_detach(struct mlx4_dev *dev, struct mlx4_qp *qp,
+ u8 gid[16], enum mlx4_protocol prot,
+ enum mlx4_steer_type type, u64 reg_id)
+{
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ return mlx4_flow_detach(dev, reg_id);
+ case MLX4_STEERING_MODE_B0:
+ return mlx4_qp_detach_common(dev, qp, gid, prot, type);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int mlx4_adjust_port(struct mlx4_dev *dev, int slave,
+ u8 *gid, enum mlx4_protocol prot)
+{
+ int real_port;
+
+ if (prot != MLX4_PROT_ETH)
+ return 0;
+
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_B0 ||
+ dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ real_port = mlx4_slave_convert_port(dev, slave, gid[5]);
+ if (real_port < 0)
+ return -EINVAL;
+ gid[5] = real_port;
+ }
+
+ return 0;
+}
+
+int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ struct mlx4_qp qp; /* dummy for calling attach/detach */
+ u8 *gid = inbox->buf;
+ enum mlx4_protocol prot = (vhcr->in_modifier >> 28) & 0x7;
+ int err;
+ int qpn;
+ struct res_qp *rqp;
+ u64 reg_id = 0;
+ int attach = vhcr->op_modifier;
+ int block_loopback = vhcr->in_modifier >> 31;
+ u8 steer_type_mask = 2;
+ enum mlx4_steer_type type = (gid[7] & steer_type_mask) >> 1;
+
+ qpn = vhcr->in_modifier & 0xffffff;
+ err = get_res(dev, slave, qpn, RES_QP, &rqp);
+ if (err)
+ return err;
+
+ qp.qpn = qpn;
+ if (attach) {
+ err = qp_attach(dev, slave, &qp, gid, block_loopback, prot,
+ type, &reg_id);
+ if (err) {
+ pr_err("Fail to attach rule to qp 0x%x\n", qpn);
+ goto ex_put;
+ }
+ err = add_mcg_res(dev, slave, rqp, gid, prot, type, reg_id);
+ if (err)
+ goto ex_detach;
+ } else {
+ err = mlx4_adjust_port(dev, slave, gid, prot);
+ if (err)
+ goto ex_put;
+
+ err = rem_mcg_res(dev, slave, rqp, gid, prot, type, &reg_id);
+ if (err)
+ goto ex_put;
+
+ err = qp_detach(dev, &qp, gid, prot, type, reg_id);
+ if (err)
+ pr_err("Fail to detach rule from qp 0x%x reg_id = 0x%llx\n",
+ qpn, reg_id);
+ }
+ put_res(dev, slave, qpn, RES_QP);
+ return err;
+
+ex_detach:
+ qp_detach(dev, &qp, gid, prot, type, reg_id);
+ex_put:
+ put_res(dev, slave, qpn, RES_QP);
+ return err;
+}
+
+/*
+ * MAC validation for Flow Steering rules.
+ * VF can attach rules only with a mac address which is assigned to it.
+ */
+static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header,
+ struct list_head *rlist)
+{
+ struct mac_res *res, *tmp;
+ __be64 be_mac;
+
+ /* make sure it isn't multicast or broadcast mac*/
+ if (!is_multicast_ether_addr(eth_header->eth.dst_mac) &&
+ !is_broadcast_ether_addr(eth_header->eth.dst_mac)) {
+ list_for_each_entry_safe(res, tmp, rlist, list) {
+ be_mac = cpu_to_be64(res->mac << 16);
+ if (ether_addr_equal((u8 *)&be_mac, eth_header->eth.dst_mac))
+ return 0;
+ }
+ pr_err("MAC %pM doesn't belong to VF %d, Steering rule rejected\n",
+ eth_header->eth.dst_mac, slave);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * In case of missing eth header, append eth header with a MAC address
+ * assigned to the VF.
+ */
+static int add_eth_header(struct mlx4_dev *dev, int slave,
+ struct mlx4_cmd_mailbox *inbox,
+ struct list_head *rlist, int header_id)
+{
+ struct mac_res *res, *tmp;
+ u8 port;
+ struct mlx4_net_trans_rule_hw_ctrl *ctrl;
+ struct mlx4_net_trans_rule_hw_eth *eth_header;
+ struct mlx4_net_trans_rule_hw_ipv4 *ip_header;
+ struct mlx4_net_trans_rule_hw_tcp_udp *l4_header;
+ __be64 be_mac = 0;
+ __be64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+ ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf;
+ port = ctrl->port;
+ eth_header = (struct mlx4_net_trans_rule_hw_eth *)(ctrl + 1);
+
+ /* Clear a space in the inbox for eth header */
+ switch (header_id) {
+ case MLX4_NET_TRANS_RULE_ID_IPV4:
+ ip_header =
+ (struct mlx4_net_trans_rule_hw_ipv4 *)(eth_header + 1);
+ memmove(ip_header, eth_header,
+ sizeof(*ip_header) + sizeof(*l4_header));
+ break;
+ case MLX4_NET_TRANS_RULE_ID_TCP:
+ case MLX4_NET_TRANS_RULE_ID_UDP:
+ l4_header = (struct mlx4_net_trans_rule_hw_tcp_udp *)
+ (eth_header + 1);
+ memmove(l4_header, eth_header, sizeof(*l4_header));
+ break;
+ default:
+ return -EINVAL;
+ }
+ list_for_each_entry_safe(res, tmp, rlist, list) {
+ if (port == res->port) {
+ be_mac = cpu_to_be64(res->mac << 16);
+ break;
+ }
+ }
+ if (!be_mac) {
+ pr_err("Failed adding eth header to FS rule, Can't find matching MAC for port %d\n",
+ port);
+ return -EINVAL;
+ }
+
+ memset(eth_header, 0, sizeof(*eth_header));
+ eth_header->size = sizeof(*eth_header) >> 2;
+ eth_header->id = cpu_to_be16(__sw_id_hw[MLX4_NET_TRANS_RULE_ID_ETH]);
+ memcpy(eth_header->dst_mac, &be_mac, ETH_ALEN);
+ memcpy(eth_header->dst_mac_msk, &mac_msk, ETH_ALEN);
+
+ return 0;
+
+}
+
+#define MLX4_UPD_QP_PATH_MASK_SUPPORTED ( \
+ 1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX |\
+ 1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)
+int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd_info)
+{
+ int err;
+ u32 qpn = vhcr->in_modifier & 0xffffff;
+ struct res_qp *rqp;
+ u64 mac;
+ unsigned port;
+ u64 pri_addr_path_mask;
+ struct mlx4_update_qp_context *cmd;
+ int smac_index;
+
+ cmd = (struct mlx4_update_qp_context *)inbox->buf;
+
+ pri_addr_path_mask = be64_to_cpu(cmd->primary_addr_path_mask);
+ if (cmd->qp_mask || cmd->secondary_addr_path_mask ||
+ (pri_addr_path_mask & ~MLX4_UPD_QP_PATH_MASK_SUPPORTED))
+ return -EPERM;
+
+ if ((pri_addr_path_mask &
+ (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)) &&
+ !(dev->caps.flags2 &
+ MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) {
+ mlx4_warn(dev, "Src check LB for slave %d isn't supported\n",
+ slave);
+ return -EOPNOTSUPP;
+ }
+
+ /* Just change the smac for the QP */
+ err = get_res(dev, slave, qpn, RES_QP, &rqp);
+ if (err) {
+ mlx4_err(dev, "Updating qpn 0x%x for slave %d rejected\n", qpn, slave);
+ return err;
+ }
+
+ port = (rqp->sched_queue >> 6 & 1) + 1;
+
+ if (pri_addr_path_mask & (1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX)) {
+ smac_index = cmd->qp_context.pri_path.grh_mylmc;
+ err = mac_find_smac_ix_in_slave(dev, slave, port,
+ smac_index, &mac);
+
+ if (err) {
+ mlx4_err(dev, "Failed to update qpn 0x%x, MAC is invalid. smac_ix: %d\n",
+ qpn, smac_index);
+ goto err_mac;
+ }
+ }
+
+ err = mlx4_cmd(dev, inbox->dma,
+ vhcr->in_modifier, 0,
+ MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_err(dev, "Failed to update qpn on qpn 0x%x, command failed\n", qpn);
+ goto err_mac;
+ }
+
+err_mac:
+ put_res(dev, slave, qpn, RES_QP);
+ return err;
+}
+
+static u32 qp_attach_mbox_size(void *mbox)
+{
+ u32 size = sizeof(struct mlx4_net_trans_rule_hw_ctrl);
+ struct _rule_hw *rule_header;
+
+ rule_header = (struct _rule_hw *)(mbox + size);
+
+ while (rule_header->size) {
+ size += rule_header->size * sizeof(u32);
+ rule_header += 1;
+ }
+ return size;
+}
+
+static int mlx4_do_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule);
+
+int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *rlist = &tracker->slave_list[slave].res_list[RES_MAC];
+ int err;
+ int qpn;
+ struct res_qp *rqp;
+ struct mlx4_net_trans_rule_hw_ctrl *ctrl;
+ struct _rule_hw *rule_header;
+ int header_id;
+ struct res_fs_rule *rrule;
+ u32 mbox_size;
+
+ if (dev->caps.steering_mode !=
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return -EOPNOTSUPP;
+
+ ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf;
+ err = mlx4_slave_convert_port(dev, slave, ctrl->port);
+ if (err <= 0)
+ return -EINVAL;
+ ctrl->port = err;
+ qpn = be32_to_cpu(ctrl->qpn) & 0xffffff;
+ err = get_res(dev, slave, qpn, RES_QP, &rqp);
+ if (err) {
+ pr_err("Steering rule with qpn 0x%x rejected\n", qpn);
+ return err;
+ }
+ rule_header = (struct _rule_hw *)(ctrl + 1);
+ header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id));
+
+ if (header_id == MLX4_NET_TRANS_RULE_ID_ETH)
+ mlx4_handle_eth_header_mcast_prio(ctrl, rule_header);
+
+ switch (header_id) {
+ case MLX4_NET_TRANS_RULE_ID_ETH:
+ if (validate_eth_header_mac(slave, rule_header, rlist)) {
+ err = -EINVAL;
+ goto err_put_qp;
+ }
+ break;
+ case MLX4_NET_TRANS_RULE_ID_IB:
+ break;
+ case MLX4_NET_TRANS_RULE_ID_IPV4:
+ case MLX4_NET_TRANS_RULE_ID_TCP:
+ case MLX4_NET_TRANS_RULE_ID_UDP:
+ pr_warn("Can't attach FS rule without L2 headers, adding L2 header\n");
+ if (add_eth_header(dev, slave, inbox, rlist, header_id)) {
+ err = -EINVAL;
+ goto err_put_qp;
+ }
+ vhcr->in_modifier +=
+ sizeof(struct mlx4_net_trans_rule_hw_eth) >> 2;
+ break;
+ default:
+ pr_err("Corrupted mailbox\n");
+ err = -EINVAL;
+ goto err_put_qp;
+ }
+
+ err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param,
+ vhcr->in_modifier, 0,
+ MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ goto err_put_qp;
+
+
+ err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, qpn);
+ if (err) {
+ mlx4_err(dev, "Fail to add flow steering resources\n");
+ goto err_detach;
+ }
+
+ err = get_res(dev, slave, vhcr->out_param, RES_FS_RULE, &rrule);
+ if (err)
+ goto err_detach;
+
+ mbox_size = qp_attach_mbox_size(inbox->buf);
+ rrule->mirr_mbox = kmalloc(mbox_size, GFP_KERNEL);
+ if (!rrule->mirr_mbox) {
+ err = -ENOMEM;
+ goto err_put_rule;
+ }
+ rrule->mirr_mbox_size = mbox_size;
+ rrule->mirr_rule_id = 0;
+ memcpy(rrule->mirr_mbox, inbox->buf, mbox_size);
+
+ /* set different port */
+ ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)rrule->mirr_mbox;
+ if (ctrl->port == 1)
+ ctrl->port = 2;
+ else
+ ctrl->port = 1;
+
+ if (mlx4_is_bonded(dev))
+ mlx4_do_mirror_rule(dev, rrule);
+
+ atomic_inc(&rqp->ref_count);
+
+err_put_rule:
+ put_res(dev, slave, vhcr->out_param, RES_FS_RULE);
+err_detach:
+ /* detach rule on error */
+ if (err)
+ mlx4_cmd(dev, vhcr->out_param, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+err_put_qp:
+ put_res(dev, slave, qpn, RES_QP);
+ return err;
+}
+
+static int mlx4_undo_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule)
+{
+ int err;
+
+ err = rem_res_range(dev, fs_rule->com.owner, fs_rule->com.res_id, 1, RES_FS_RULE, 0);
+ if (err) {
+ mlx4_err(dev, "Fail to remove flow steering resources\n");
+ return err;
+ }
+
+ mlx4_cmd(dev, fs_rule->com.res_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+ return 0;
+}
+
+int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ struct res_qp *rqp;
+ struct res_fs_rule *rrule;
+ u64 mirr_reg_id;
+ int qpn;
+
+ if (dev->caps.steering_mode !=
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return -EOPNOTSUPP;
+
+ err = get_res(dev, slave, vhcr->in_param, RES_FS_RULE, &rrule);
+ if (err)
+ return err;
+
+ if (!rrule->mirr_mbox) {
+ mlx4_err(dev, "Mirror rules cannot be removed explicitly\n");
+ put_res(dev, slave, vhcr->in_param, RES_FS_RULE);
+ return -EINVAL;
+ }
+ mirr_reg_id = rrule->mirr_rule_id;
+ kfree(rrule->mirr_mbox);
+ qpn = rrule->qpn;
+
+ /* Release the rule form busy state before removal */
+ put_res(dev, slave, vhcr->in_param, RES_FS_RULE);
+ err = get_res(dev, slave, qpn, RES_QP, &rqp);
+ if (err)
+ return err;
+
+ if (mirr_reg_id && mlx4_is_bonded(dev)) {
+ err = get_res(dev, slave, mirr_reg_id, RES_FS_RULE, &rrule);
+ if (err) {
+ mlx4_err(dev, "Fail to get resource of mirror rule\n");
+ } else {
+ put_res(dev, slave, mirr_reg_id, RES_FS_RULE);
+ mlx4_undo_mirror_rule(dev, rrule);
+ }
+ }
+ err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE, 0);
+ if (err) {
+ mlx4_err(dev, "Fail to remove flow steering resources\n");
+ goto out;
+ }
+
+ err = mlx4_cmd(dev, vhcr->in_param, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (!err)
+ atomic_dec(&rqp->ref_count);
+out:
+ put_res(dev, slave, qpn, RES_QP);
+ return err;
+}
+
+enum {
+ BUSY_MAX_RETRIES = 10
+};
+
+int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+ int index = vhcr->in_modifier & 0xffff;
+
+ err = get_res(dev, slave, index, RES_COUNTER, NULL);
+ if (err)
+ return err;
+
+ err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+ put_res(dev, slave, index, RES_COUNTER);
+ return err;
+}
+
+static void detach_qp(struct mlx4_dev *dev, int slave, struct res_qp *rqp)
+{
+ struct res_gid *rgid;
+ struct res_gid *tmp;
+ struct mlx4_qp qp; /* dummy for calling attach/detach */
+
+ list_for_each_entry_safe(rgid, tmp, &rqp->mcg_list, list) {
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ mlx4_flow_detach(dev, rgid->reg_id);
+ break;
+ case MLX4_STEERING_MODE_B0:
+ qp.qpn = rqp->local_qpn;
+ (void) mlx4_qp_detach_common(dev, &qp, rgid->gid,
+ rgid->prot, rgid->steer);
+ break;
+ }
+ list_del(&rgid->list);
+ kfree(rgid);
+ }
+}
+
+static int _move_all_busy(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource type, int print)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker =
+ &priv->mfunc.master.res_tracker;
+ struct list_head *rlist = &tracker->slave_list[slave].res_list[type];
+ struct res_common *r;
+ struct res_common *tmp;
+ int busy;
+
+ busy = 0;
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(r, tmp, rlist, list) {
+ if (r->owner == slave) {
+ if (!r->removing) {
+ if (r->state == RES_ANY_BUSY) {
+ if (print)
+ mlx4_dbg(dev,
+ "%s id 0x%llx is busy\n",
+ resource_str(type),
+ r->res_id);
+ ++busy;
+ } else {
+ r->from_state = r->state;
+ r->state = RES_ANY_BUSY;
+ r->removing = 1;
+ }
+ }
+ }
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ return busy;
+}
+
+static int move_all_busy(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource type)
+{
+ unsigned long begin;
+ int busy;
+
+ begin = jiffies;
+ do {
+ busy = _move_all_busy(dev, slave, type, 0);
+ if (time_after(jiffies, begin + 5 * HZ))
+ break;
+ if (busy)
+ cond_resched();
+ } while (busy);
+
+ if (busy)
+ busy = _move_all_busy(dev, slave, type, 1);
+
+ return busy;
+}
+static void rem_slave_qps(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *qp_list =
+ &tracker->slave_list[slave].res_list[RES_QP];
+ struct res_qp *qp;
+ struct res_qp *tmp;
+ int state;
+ u64 in_param;
+ int qpn;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_QP);
+ if (err)
+ mlx4_warn(dev, "rem_slave_qps: Could not move all qps to busy for slave %d\n",
+ slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(qp, tmp, qp_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (qp->com.owner == slave) {
+ qpn = qp->com.res_id;
+ detach_qp(dev, slave, qp);
+ state = qp->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_QP_RESERVED:
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&qp->com.node,
+ &tracker->res_tree[RES_QP]);
+ list_del(&qp->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (!valid_reserved(dev, slave, qpn)) {
+ __mlx4_qp_release_range(dev, qpn, 1);
+ mlx4_release_resource(dev, slave,
+ RES_QP, 1, 0);
+ }
+ kfree(qp);
+ state = 0;
+ break;
+ case RES_QP_MAPPED:
+ if (!valid_reserved(dev, slave, qpn))
+ __mlx4_qp_free_icm(dev, qpn);
+ state = RES_QP_RESERVED;
+ break;
+ case RES_QP_HW:
+ in_param = slave;
+ err = mlx4_cmd(dev, in_param,
+ qp->local_qpn, 2,
+ MLX4_CMD_2RST_QP,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ mlx4_dbg(dev, "rem_slave_qps: failed to move slave %d qpn %d to reset\n",
+ slave, qp->local_qpn);
+ atomic_dec(&qp->rcq->ref_count);
+ atomic_dec(&qp->scq->ref_count);
+ atomic_dec(&qp->mtt->ref_count);
+ if (qp->srq)
+ atomic_dec(&qp->srq->ref_count);
+ state = RES_QP_MAPPED;
+ break;
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_srqs(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *srq_list =
+ &tracker->slave_list[slave].res_list[RES_SRQ];
+ struct res_srq *srq;
+ struct res_srq *tmp;
+ int state;
+ u64 in_param;
+ LIST_HEAD(tlist);
+ int srqn;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_SRQ);
+ if (err)
+ mlx4_warn(dev, "rem_slave_srqs: Could not move all srqs - too busy for slave %d\n",
+ slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(srq, tmp, srq_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (srq->com.owner == slave) {
+ srqn = srq->com.res_id;
+ state = srq->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_SRQ_ALLOCATED:
+ __mlx4_srq_free_icm(dev, srqn);
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&srq->com.node,
+ &tracker->res_tree[RES_SRQ]);
+ list_del(&srq->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ mlx4_release_resource(dev, slave,
+ RES_SRQ, 1, 0);
+ kfree(srq);
+ state = 0;
+ break;
+
+ case RES_SRQ_HW:
+ in_param = slave;
+ err = mlx4_cmd(dev, in_param, srqn, 1,
+ MLX4_CMD_HW2SW_SRQ,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ mlx4_dbg(dev, "rem_slave_srqs: failed to move slave %d srq %d to SW ownership\n",
+ slave, srqn);
+
+ atomic_dec(&srq->mtt->ref_count);
+ if (srq->cq)
+ atomic_dec(&srq->cq->ref_count);
+ state = RES_SRQ_ALLOCATED;
+ break;
+
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_cqs(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *cq_list =
+ &tracker->slave_list[slave].res_list[RES_CQ];
+ struct res_cq *cq;
+ struct res_cq *tmp;
+ int state;
+ u64 in_param;
+ LIST_HEAD(tlist);
+ int cqn;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_CQ);
+ if (err)
+ mlx4_warn(dev, "rem_slave_cqs: Could not move all cqs - too busy for slave %d\n",
+ slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(cq, tmp, cq_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (cq->com.owner == slave && !atomic_read(&cq->ref_count)) {
+ cqn = cq->com.res_id;
+ state = cq->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_CQ_ALLOCATED:
+ __mlx4_cq_free_icm(dev, cqn);
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&cq->com.node,
+ &tracker->res_tree[RES_CQ]);
+ list_del(&cq->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ mlx4_release_resource(dev, slave,
+ RES_CQ, 1, 0);
+ kfree(cq);
+ state = 0;
+ break;
+
+ case RES_CQ_HW:
+ in_param = slave;
+ err = mlx4_cmd(dev, in_param, cqn, 1,
+ MLX4_CMD_HW2SW_CQ,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ mlx4_dbg(dev, "rem_slave_cqs: failed to move slave %d cq %d to SW ownership\n",
+ slave, cqn);
+ atomic_dec(&cq->mtt->ref_count);
+ state = RES_CQ_ALLOCATED;
+ break;
+
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_mrs(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *mpt_list =
+ &tracker->slave_list[slave].res_list[RES_MPT];
+ struct res_mpt *mpt;
+ struct res_mpt *tmp;
+ int state;
+ u64 in_param;
+ LIST_HEAD(tlist);
+ int mptn;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_MPT);
+ if (err)
+ mlx4_warn(dev, "rem_slave_mrs: Could not move all mpts - too busy for slave %d\n",
+ slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(mpt, tmp, mpt_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (mpt->com.owner == slave) {
+ mptn = mpt->com.res_id;
+ state = mpt->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_MPT_RESERVED:
+ __mlx4_mpt_release(dev, mpt->key);
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&mpt->com.node,
+ &tracker->res_tree[RES_MPT]);
+ list_del(&mpt->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ mlx4_release_resource(dev, slave,
+ RES_MPT, 1, 0);
+ kfree(mpt);
+ state = 0;
+ break;
+
+ case RES_MPT_MAPPED:
+ __mlx4_mpt_free_icm(dev, mpt->key);
+ state = RES_MPT_RESERVED;
+ break;
+
+ case RES_MPT_HW:
+ in_param = slave;
+ err = mlx4_cmd(dev, in_param, mptn, 0,
+ MLX4_CMD_HW2SW_MPT,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ mlx4_dbg(dev, "rem_slave_mrs: failed to move slave %d mpt %d to SW ownership\n",
+ slave, mptn);
+ if (mpt->mtt)
+ atomic_dec(&mpt->mtt->ref_count);
+ state = RES_MPT_MAPPED;
+ break;
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_mtts(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker =
+ &priv->mfunc.master.res_tracker;
+ struct list_head *mtt_list =
+ &tracker->slave_list[slave].res_list[RES_MTT];
+ struct res_mtt *mtt;
+ struct res_mtt *tmp;
+ int state;
+ LIST_HEAD(tlist);
+ int base;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_MTT);
+ if (err)
+ mlx4_warn(dev, "rem_slave_mtts: Could not move all mtts - too busy for slave %d\n",
+ slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(mtt, tmp, mtt_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (mtt->com.owner == slave) {
+ base = mtt->com.res_id;
+ state = mtt->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_MTT_ALLOCATED:
+ __mlx4_free_mtt_range(dev, base,
+ mtt->order);
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&mtt->com.node,
+ &tracker->res_tree[RES_MTT]);
+ list_del(&mtt->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ mlx4_release_resource(dev, slave, RES_MTT,
+ 1 << mtt->order, 0);
+ kfree(mtt);
+ state = 0;
+ break;
+
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static int mlx4_do_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err;
+ struct res_fs_rule *mirr_rule;
+ u64 reg_id;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ if (!fs_rule->mirr_mbox) {
+ mlx4_err(dev, "rule mirroring mailbox is null\n");
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return -EINVAL;
+ }
+ memcpy(mailbox->buf, fs_rule->mirr_mbox, fs_rule->mirr_mbox_size);
+ err = mlx4_cmd_imm(dev, mailbox->dma, &reg_id, fs_rule->mirr_mbox_size >> 2, 0,
+ MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ if (err)
+ goto err;
+
+ err = add_res_range(dev, fs_rule->com.owner, reg_id, 1, RES_FS_RULE, fs_rule->qpn);
+ if (err)
+ goto err_detach;
+
+ err = get_res(dev, fs_rule->com.owner, reg_id, RES_FS_RULE, &mirr_rule);
+ if (err)
+ goto err_rem;
+
+ fs_rule->mirr_rule_id = reg_id;
+ mirr_rule->mirr_rule_id = 0;
+ mirr_rule->mirr_mbox_size = 0;
+ mirr_rule->mirr_mbox = NULL;
+ put_res(dev, fs_rule->com.owner, reg_id, RES_FS_RULE);
+
+ return 0;
+err_rem:
+ rem_res_range(dev, fs_rule->com.owner, reg_id, 1, RES_FS_RULE, 0);
+err_detach:
+ mlx4_cmd(dev, reg_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+err:
+ return err;
+}
+
+static int mlx4_mirror_fs_rules(struct mlx4_dev *dev, bool bond)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker =
+ &priv->mfunc.master.res_tracker;
+ struct rb_root *root = &tracker->res_tree[RES_FS_RULE];
+ struct rb_node *p;
+ struct res_fs_rule *fs_rule;
+ int err = 0;
+ LIST_HEAD(mirr_list);
+
+ for (p = rb_first(root); p; p = rb_next(p)) {
+ fs_rule = rb_entry(p, struct res_fs_rule, com.node);
+ if ((bond && fs_rule->mirr_mbox_size) ||
+ (!bond && !fs_rule->mirr_mbox_size))
+ list_add_tail(&fs_rule->mirr_list, &mirr_list);
+ }
+
+ list_for_each_entry(fs_rule, &mirr_list, mirr_list) {
+ if (bond)
+ err += mlx4_do_mirror_rule(dev, fs_rule);
+ else
+ err += mlx4_undo_mirror_rule(dev, fs_rule);
+ }
+ return err;
+}
+
+int mlx4_bond_fs_rules(struct mlx4_dev *dev)
+{
+ return mlx4_mirror_fs_rules(dev, true);
+}
+
+int mlx4_unbond_fs_rules(struct mlx4_dev *dev)
+{
+ return mlx4_mirror_fs_rules(dev, false);
+}
+
+static void rem_slave_fs_rule(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker =
+ &priv->mfunc.master.res_tracker;
+ struct list_head *fs_rule_list =
+ &tracker->slave_list[slave].res_list[RES_FS_RULE];
+ struct res_fs_rule *fs_rule;
+ struct res_fs_rule *tmp;
+ int state;
+ u64 base;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_FS_RULE);
+ if (err)
+ mlx4_warn(dev, "rem_slave_fs_rule: Could not move all mtts to busy for slave %d\n",
+ slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(fs_rule, tmp, fs_rule_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (fs_rule->com.owner == slave) {
+ base = fs_rule->com.res_id;
+ state = fs_rule->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_FS_RULE_ALLOCATED:
+ /* detach rule */
+ err = mlx4_cmd(dev, base, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&fs_rule->com.node,
+ &tracker->res_tree[RES_FS_RULE]);
+ list_del(&fs_rule->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ kfree(fs_rule->mirr_mbox);
+ kfree(fs_rule);
+ state = 0;
+ break;
+
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_eqs(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *eq_list =
+ &tracker->slave_list[slave].res_list[RES_EQ];
+ struct res_eq *eq;
+ struct res_eq *tmp;
+ int err;
+ int state;
+ LIST_HEAD(tlist);
+ int eqn;
+
+ err = move_all_busy(dev, slave, RES_EQ);
+ if (err)
+ mlx4_warn(dev, "rem_slave_eqs: Could not move all eqs - too busy for slave %d\n",
+ slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(eq, tmp, eq_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (eq->com.owner == slave) {
+ eqn = eq->com.res_id;
+ state = eq->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_EQ_RESERVED:
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&eq->com.node,
+ &tracker->res_tree[RES_EQ]);
+ list_del(&eq->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ kfree(eq);
+ state = 0;
+ break;
+
+ case RES_EQ_HW:
+ err = mlx4_cmd(dev, slave, eqn & 0x3ff,
+ 1, MLX4_CMD_HW2SW_EQ,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ mlx4_dbg(dev, "rem_slave_eqs: failed to move slave %d eqs %d to SW ownership\n",
+ slave, eqn & 0x3ff);
+ atomic_dec(&eq->mtt->ref_count);
+ state = RES_EQ_RESERVED;
+ break;
+
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+static void rem_slave_counters(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *counter_list =
+ &tracker->slave_list[slave].res_list[RES_COUNTER];
+ struct res_counter *counter;
+ struct res_counter *tmp;
+ int err;
+ int *counters_arr = NULL;
+ int i, j;
+
+ err = move_all_busy(dev, slave, RES_COUNTER);
+ if (err)
+ mlx4_warn(dev, "rem_slave_counters: Could not move all counters - too busy for slave %d\n",
+ slave);
+
+ counters_arr = kmalloc_array(dev->caps.max_counters,
+ sizeof(*counters_arr), GFP_KERNEL);
+ if (!counters_arr)
+ return;
+
+ do {
+ i = 0;
+ j = 0;
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(counter, tmp, counter_list, com.list) {
+ if (counter->com.owner == slave) {
+ counters_arr[i++] = counter->com.res_id;
+ rb_erase(&counter->com.node,
+ &tracker->res_tree[RES_COUNTER]);
+ list_del(&counter->com.list);
+ kfree(counter);
+ }
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ while (j < i) {
+ __mlx4_counter_free(dev, counters_arr[j++]);
+ mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0);
+ }
+ } while (i);
+
+ kfree(counters_arr);
+}
+
+static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+ struct list_head *xrcdn_list =
+ &tracker->slave_list[slave].res_list[RES_XRCD];
+ struct res_xrcdn *xrcd;
+ struct res_xrcdn *tmp;
+ int err;
+ int xrcdn;
+
+ err = move_all_busy(dev, slave, RES_XRCD);
+ if (err)
+ mlx4_warn(dev, "rem_slave_xrcdns: Could not move all xrcdns - too busy for slave %d\n",
+ slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(xrcd, tmp, xrcdn_list, com.list) {
+ if (xrcd->com.owner == slave) {
+ xrcdn = xrcd->com.res_id;
+ rb_erase(&xrcd->com.node, &tracker->res_tree[RES_XRCD]);
+ list_del(&xrcd->com.list);
+ kfree(xrcd);
+ __mlx4_xrcd_free(dev, xrcdn);
+ }
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
+void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ mlx4_reset_roce_gids(dev, slave);
+ mutex_lock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex);
+ rem_slave_vlans(dev, slave);
+ rem_slave_macs(dev, slave);
+ rem_slave_fs_rule(dev, slave);
+ rem_slave_qps(dev, slave);
+ rem_slave_srqs(dev, slave);
+ rem_slave_cqs(dev, slave);
+ rem_slave_mrs(dev, slave);
+ rem_slave_eqs(dev, slave);
+ rem_slave_mtts(dev, slave);
+ rem_slave_counters(dev, slave);
+ rem_slave_xrcdns(dev, slave);
+ mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex);
+}
+
+static void update_qos_vpp(struct mlx4_update_qp_context *ctx,
+ struct mlx4_vf_immed_vlan_work *work)
+{
+ ctx->qp_mask |= cpu_to_be64(1ULL << MLX4_UPD_QP_MASK_QOS_VPP);
+ ctx->qp_context.qos_vport = work->qos_vport;
+}
+
+void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work)
+{
+ struct mlx4_vf_immed_vlan_work *work =
+ container_of(_work, struct mlx4_vf_immed_vlan_work, work);
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_update_qp_context *upd_context;
+ struct mlx4_dev *dev = &work->priv->dev;
+ struct mlx4_resource_tracker *tracker =
+ &work->priv->mfunc.master.res_tracker;
+ struct list_head *qp_list =
+ &tracker->slave_list[work->slave].res_list[RES_QP];
+ struct res_qp *qp;
+ struct res_qp *tmp;
+ u64 qp_path_mask_vlan_ctrl =
+ ((1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_UNTAGGED) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_1P) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_TAGGED) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_UNTAGGED) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_1P) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_TAGGED));
+
+ u64 qp_path_mask = ((1ULL << MLX4_UPD_QP_PATH_MASK_VLAN_INDEX) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_FVL) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_CV) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_SV) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_HIDE_CQE_VLAN) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_FEUP) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_FVL_RX) |
+ (1ULL << MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE));
+
+ int err;
+ int port, errors = 0;
+ u8 vlan_control;
+
+ if (mlx4_is_slave(dev)) {
+ mlx4_warn(dev, "Trying to update-qp in slave %d\n",
+ work->slave);
+ goto out;
+ }
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ goto out;
+ if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_LINK_DISABLE) /* block all */
+ vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+ MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED |
+ MLX4_VLAN_CTRL_ETH_TX_BLOCK_UNTAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
+ else if (!work->vlan_id)
+ vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
+ else if (work->vlan_proto == htons(ETH_P_8021AD))
+ vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED |
+ MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
+ else /* vst 802.1Q */
+ vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
+ MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
+
+ upd_context = mailbox->buf;
+ upd_context->qp_mask = cpu_to_be64(1ULL << MLX4_UPD_QP_MASK_VSD);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(qp, tmp, qp_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (qp->com.owner == work->slave) {
+ if (qp->com.from_state != RES_QP_HW ||
+ !qp->sched_queue || /* no INIT2RTR trans yet */
+ mlx4_is_qp_reserved(dev, qp->local_qpn) ||
+ qp->qpc_flags & (1 << MLX4_RSS_QPC_FLAG_OFFSET)) {
+ spin_lock_irq(mlx4_tlock(dev));
+ continue;
+ }
+ port = (qp->sched_queue >> 6 & 1) + 1;
+ if (port != work->port) {
+ spin_lock_irq(mlx4_tlock(dev));
+ continue;
+ }
+ if (MLX4_QP_ST_RC == ((qp->qpc_flags >> 16) & 0xff))
+ upd_context->primary_addr_path_mask = cpu_to_be64(qp_path_mask);
+ else
+ upd_context->primary_addr_path_mask =
+ cpu_to_be64(qp_path_mask | qp_path_mask_vlan_ctrl);
+ if (work->vlan_id == MLX4_VGT) {
+ upd_context->qp_context.param3 = qp->param3;
+ upd_context->qp_context.pri_path.vlan_control = qp->vlan_control;
+ upd_context->qp_context.pri_path.fvl_rx = qp->fvl_rx;
+ upd_context->qp_context.pri_path.vlan_index = qp->vlan_index;
+ upd_context->qp_context.pri_path.fl = qp->pri_path_fl;
+ upd_context->qp_context.pri_path.feup = qp->feup;
+ upd_context->qp_context.pri_path.sched_queue =
+ qp->sched_queue;
+ } else {
+ upd_context->qp_context.param3 = qp->param3 & ~cpu_to_be32(MLX4_STRIP_VLAN);
+ upd_context->qp_context.pri_path.vlan_control = vlan_control;
+ upd_context->qp_context.pri_path.vlan_index = work->vlan_ix;
+ upd_context->qp_context.pri_path.fvl_rx =
+ qp->fvl_rx | MLX4_FVL_RX_FORCE_ETH_VLAN;
+ upd_context->qp_context.pri_path.fl =
+ qp->pri_path_fl | MLX4_FL_ETH_HIDE_CQE_VLAN;
+ if (work->vlan_proto == htons(ETH_P_8021AD))
+ upd_context->qp_context.pri_path.fl |= MLX4_FL_SV;
+ else
+ upd_context->qp_context.pri_path.fl |= MLX4_FL_CV;
+ upd_context->qp_context.pri_path.feup =
+ qp->feup | MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN;
+ upd_context->qp_context.pri_path.sched_queue =
+ qp->sched_queue & 0xC7;
+ upd_context->qp_context.pri_path.sched_queue |=
+ ((work->qos & 0x7) << 3);
+
+ if (dev->caps.flags2 &
+ MLX4_DEV_CAP_FLAG2_QOS_VPP)
+ update_qos_vpp(upd_context, work);
+ }
+
+ err = mlx4_cmd(dev, mailbox->dma,
+ qp->local_qpn & 0xffffff,
+ 0, MLX4_CMD_UPDATE_QP,
+ MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_info(dev, "UPDATE_QP failed for slave %d, port %d, qpn %d (%d)\n",
+ work->slave, port, qp->local_qpn, err);
+ errors++;
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ if (errors)
+ mlx4_err(dev, "%d UPDATE_QP failures for slave %d, port %d\n",
+ errors, work->slave, work->port);
+
+ /* unregister previous vlan_id if needed and we had no errors
+ * while updating the QPs
+ */
+ if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_VLAN && !errors &&
+ NO_INDX != work->orig_vlan_ix)
+ __mlx4_unregister_vlan(&work->priv->dev, work->port,
+ work->orig_vlan_id);
+out:
+ kfree(work);
+ return;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/sense.c b/drivers/net/ethernet/mellanox/mlx4/sense.c
new file mode 100644
index 000000000..094773d88
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/sense.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/if_ether.h>
+
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4.h"
+
+int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
+ enum mlx4_port_type *type)
+{
+ u64 out_param;
+ int err = 0;
+
+ err = mlx4_cmd_imm(dev, 0, &out_param, port, 0,
+ MLX4_CMD_SENSE_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (err) {
+ mlx4_err(dev, "Sense command failed for port: %d\n", port);
+ return err;
+ }
+
+ if (out_param > 2) {
+ mlx4_err(dev, "Sense returned illegal value: 0x%llx\n", out_param);
+ return -EINVAL;
+ }
+
+ *type = out_param;
+ return 0;
+}
+
+void mlx4_do_sense_ports(struct mlx4_dev *dev,
+ enum mlx4_port_type *stype,
+ enum mlx4_port_type *defaults)
+{
+ struct mlx4_sense *sense = &mlx4_priv(dev)->sense;
+ int err;
+ int i;
+
+ for (i = 1; i <= dev->caps.num_ports; i++) {
+ stype[i - 1] = 0;
+ if (sense->do_sense_port[i] && sense->sense_allowed[i] &&
+ dev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
+ err = mlx4_SENSE_PORT(dev, i, &stype[i - 1]);
+ if (err)
+ stype[i - 1] = defaults[i - 1];
+ } else
+ stype[i - 1] = defaults[i - 1];
+ }
+
+ /*
+ * If sensed nothing, remain in current configuration.
+ */
+ for (i = 0; i < dev->caps.num_ports; i++)
+ stype[i] = stype[i] ? stype[i] : defaults[i];
+
+}
+
+static void mlx4_sense_port(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ struct mlx4_sense *sense = container_of(delay, struct mlx4_sense,
+ sense_poll);
+ struct mlx4_dev *dev = sense->dev;
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ enum mlx4_port_type stype[MLX4_MAX_PORTS];
+
+ mutex_lock(&priv->port_mutex);
+ mlx4_do_sense_ports(dev, stype, &dev->caps.port_type[1]);
+
+ if (mlx4_check_port_params(dev, stype))
+ goto sense_again;
+
+ if (mlx4_change_port_types(dev, stype))
+ mlx4_err(dev, "Failed to change port_types\n");
+
+sense_again:
+ mutex_unlock(&priv->port_mutex);
+ queue_delayed_work(mlx4_wq , &sense->sense_poll,
+ round_jiffies_relative(MLX4_SENSE_RANGE));
+}
+
+void mlx4_start_sense(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_sense *sense = &priv->sense;
+
+ if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP))
+ return;
+
+ queue_delayed_work(mlx4_wq , &sense->sense_poll,
+ round_jiffies_relative(MLX4_SENSE_RANGE));
+}
+
+void mlx4_stop_sense(struct mlx4_dev *dev)
+{
+ cancel_delayed_work_sync(&mlx4_priv(dev)->sense.sense_poll);
+}
+
+void mlx4_sense_init(struct mlx4_dev *dev)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_sense *sense = &priv->sense;
+ int port;
+
+ sense->dev = dev;
+ for (port = 1; port <= dev->caps.num_ports; port++)
+ sense->do_sense_port[port] = 1;
+
+ INIT_DEFERRABLE_WORK(&sense->sense_poll, mlx4_sense_port);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
new file mode 100644
index 000000000..cbe4d9746
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/srq.h>
+#include <linux/export.h>
+#include <linux/gfp.h>
+
+#include "mlx4.h"
+#include "icm.h"
+
+void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
+{
+ struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
+ struct mlx4_srq *srq;
+
+ rcu_read_lock();
+ srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1));
+ rcu_read_unlock();
+ if (srq)
+ refcount_inc(&srq->refcount);
+ else {
+ mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
+ return;
+ }
+
+ srq->event(srq, event_type);
+
+ if (refcount_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+}
+
+static int mlx4_SW2HW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+ int srq_num)
+{
+ return mlx4_cmd(dev, mailbox->dma, srq_num, 0,
+ MLX4_CMD_SW2HW_SRQ, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_WRAPPED);
+}
+
+static int mlx4_HW2SW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+ int srq_num)
+{
+ return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, srq_num,
+ mailbox ? 0 : 1, MLX4_CMD_HW2SW_SRQ,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+}
+
+static int mlx4_ARM_SRQ(struct mlx4_dev *dev, int srq_num, int limit_watermark)
+{
+ return mlx4_cmd(dev, limit_watermark, srq_num, 0, MLX4_CMD_ARM_SRQ,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
+}
+
+static int mlx4_QUERY_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+ int srq_num)
+{
+ return mlx4_cmd_box(dev, 0, mailbox->dma, srq_num, 0, MLX4_CMD_QUERY_SRQ,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+}
+
+int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn)
+{
+ struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
+ int err;
+
+
+ *srqn = mlx4_bitmap_alloc(&srq_table->bitmap);
+ if (*srqn == -1)
+ return -ENOMEM;
+
+ err = mlx4_table_get(dev, &srq_table->table, *srqn);
+ if (err)
+ goto err_out;
+
+ err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn);
+ if (err)
+ goto err_put;
+ return 0;
+
+err_put:
+ mlx4_table_put(dev, &srq_table->table, *srqn);
+
+err_out:
+ mlx4_bitmap_free(&srq_table->bitmap, *srqn, MLX4_NO_RR);
+ return err;
+}
+
+static int mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn)
+{
+ u64 out_param;
+ int err;
+
+ if (mlx4_is_mfunc(dev)) {
+ err = mlx4_cmd_imm(dev, 0, &out_param, RES_SRQ,
+ RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_ALLOC_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+ if (!err)
+ *srqn = get_param_l(&out_param);
+
+ return err;
+ }
+ return __mlx4_srq_alloc_icm(dev, srqn);
+}
+
+void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn)
+{
+ struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
+
+ mlx4_table_put(dev, &srq_table->cmpt_table, srqn);
+ mlx4_table_put(dev, &srq_table->table, srqn);
+ mlx4_bitmap_free(&srq_table->bitmap, srqn, MLX4_NO_RR);
+}
+
+static void mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn)
+{
+ u64 in_param = 0;
+
+ if (mlx4_is_mfunc(dev)) {
+ set_param_l(&in_param, srqn);
+ if (mlx4_cmd(dev, in_param, RES_SRQ, RES_OP_RESERVE_AND_MAP,
+ MLX4_CMD_FREE_RES,
+ MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED))
+ mlx4_warn(dev, "Failed freeing cq:%d\n", srqn);
+ return;
+ }
+ __mlx4_srq_free_icm(dev, srqn);
+}
+
+int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
+ struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq)
+{
+ struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_srq_context *srq_context;
+ u64 mtt_addr;
+ int err;
+
+ err = mlx4_srq_alloc_icm(dev, &srq->srqn);
+ if (err)
+ return err;
+
+ spin_lock_irq(&srq_table->lock);
+ err = radix_tree_insert(&srq_table->tree, srq->srqn, srq);
+ spin_unlock_irq(&srq_table->lock);
+ if (err)
+ goto err_icm;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto err_radix;
+ }
+
+ srq_context = mailbox->buf;
+ srq_context->state_logsize_srqn = cpu_to_be32((ilog2(srq->max) << 24) |
+ srq->srqn);
+ srq_context->logstride = srq->wqe_shift - 4;
+ srq_context->xrcd = cpu_to_be16(xrcd);
+ srq_context->pg_offset_cqn = cpu_to_be32(cqn & 0xffffff);
+ srq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
+
+ mtt_addr = mlx4_mtt_addr(dev, mtt);
+ srq_context->mtt_base_addr_h = mtt_addr >> 32;
+ srq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
+ srq_context->pd = cpu_to_be32(pdn);
+ srq_context->db_rec_addr = cpu_to_be64(db_rec);
+
+ err = mlx4_SW2HW_SRQ(dev, mailbox, srq->srqn);
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ if (err)
+ goto err_radix;
+
+ refcount_set(&srq->refcount, 1);
+ init_completion(&srq->free);
+
+ return 0;
+
+err_radix:
+ spin_lock_irq(&srq_table->lock);
+ radix_tree_delete(&srq_table->tree, srq->srqn);
+ spin_unlock_irq(&srq_table->lock);
+
+err_icm:
+ mlx4_srq_free_icm(dev, srq->srqn);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_srq_alloc);
+
+void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq)
+{
+ struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
+ int err;
+
+ err = mlx4_HW2SW_SRQ(dev, NULL, srq->srqn);
+ if (err)
+ mlx4_warn(dev, "HW2SW_SRQ failed (%d) for SRQN %06x\n", err, srq->srqn);
+
+ spin_lock_irq(&srq_table->lock);
+ radix_tree_delete(&srq_table->tree, srq->srqn);
+ spin_unlock_irq(&srq_table->lock);
+
+ if (refcount_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+ wait_for_completion(&srq->free);
+
+ mlx4_srq_free_icm(dev, srq->srqn);
+}
+EXPORT_SYMBOL_GPL(mlx4_srq_free);
+
+int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark)
+{
+ return mlx4_ARM_SRQ(dev, srq->srqn, limit_watermark);
+}
+EXPORT_SYMBOL_GPL(mlx4_srq_arm);
+
+int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_srq_context *srq_context;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ srq_context = mailbox->buf;
+
+ err = mlx4_QUERY_SRQ(dev, mailbox, srq->srqn);
+ if (err)
+ goto err_out;
+ *limit_watermark = be16_to_cpu(srq_context->limit_watermark);
+
+err_out:
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_srq_query);
+
+int mlx4_init_srq_table(struct mlx4_dev *dev)
+{
+ struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
+ int err;
+
+ spin_lock_init(&srq_table->lock);
+ INIT_RADIX_TREE(&srq_table->tree, GFP_ATOMIC);
+ if (mlx4_is_slave(dev))
+ return 0;
+
+ err = mlx4_bitmap_init(&srq_table->bitmap, dev->caps.num_srqs,
+ dev->caps.num_srqs - 1, dev->caps.reserved_srqs, 0);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+void mlx4_cleanup_srq_table(struct mlx4_dev *dev)
+{
+ if (mlx4_is_slave(dev))
+ return;
+ mlx4_bitmap_cleanup(&mlx4_priv(dev)->srq_table.bitmap);
+}
+
+struct mlx4_srq *mlx4_srq_lookup(struct mlx4_dev *dev, u32 srqn)
+{
+ struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
+ struct mlx4_srq *srq;
+
+ rcu_read_lock();
+ srq = radix_tree_lookup(&srq_table->tree,
+ srqn & (dev->caps.num_srqs - 1));
+ rcu_read_unlock();
+
+ return srq;
+}
+EXPORT_SYMBOL_GPL(mlx4_srq_lookup);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
new file mode 100644
index 000000000..b7e3b8902
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -0,0 +1,120 @@
+#
+# Mellanox driver configuration
+#
+
+config MLX5_CORE
+ tristate "Mellanox 5th generation network adapters (ConnectX series) core driver"
+ depends on MAY_USE_DEVLINK
+ depends on PCI
+ imply PTP_1588_CLOCK
+ imply VXLAN
+ imply MLXFW
+ default n
+ ---help---
+ Core driver for low level functionality of the ConnectX-4 and
+ Connect-IB cards by Mellanox Technologies.
+
+config MLX5_ACCEL
+ bool
+
+config MLX5_FPGA
+ bool "Mellanox Technologies Innova support"
+ depends on MLX5_CORE
+ select MLX5_ACCEL
+ ---help---
+ Build support for the Innova family of network cards by Mellanox
+ Technologies. Innova network cards are comprised of a ConnectX chip
+ and an FPGA chip on one board. If you select this option, the
+ mlx5_core driver will include the Innova FPGA core and allow building
+ sandbox-specific client drivers.
+
+config MLX5_CORE_EN
+ bool "Mellanox 5th generation network adapters (ConnectX series) Ethernet support"
+ depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
+ depends on IPV6=y || IPV6=n || MLX5_CORE=m
+ select PAGE_POOL
+ default n
+ ---help---
+ Ethernet support in Mellanox Technologies ConnectX-4 NIC.
+
+config MLX5_EN_ARFS
+ bool "Mellanox MLX5 ethernet accelerated receive flow steering (ARFS) support"
+ depends on MLX5_CORE_EN && RFS_ACCEL
+ default y
+ ---help---
+ Mellanox MLX5 ethernet hardware-accelerated receive flow steering support,
+ Enables ethernet netdevice arfs support and ntuple filtering.
+
+config MLX5_EN_RXNFC
+ bool "Mellanox MLX5 ethernet rx nfc flow steering support"
+ depends on MLX5_CORE_EN
+ default y
+ ---help---
+ Mellanox MLX5 ethernet rx nfc flow steering support
+ Enables ethtool receive network flow classification, which allows user defined
+ flow rules to direct traffic into arbitrary rx queue via ethtool set/get_rxnfc
+ API.
+
+config MLX5_MPFS
+ bool "Mellanox Technologies MLX5 MPFS support"
+ depends on MLX5_CORE_EN
+ default y
+ ---help---
+ Mellanox Technologies Ethernet Multi-Physical Function Switch (MPFS)
+ support in ConnectX NIC. MPFs is required for when multi-PF configuration
+ is enabled to allow passing user configured unicast MAC addresses to the
+ requesting PF.
+
+config MLX5_ESWITCH
+ bool "Mellanox Technologies MLX5 SRIOV E-Switch support"
+ depends on MLX5_CORE_EN && NET_SWITCHDEV
+ default y
+ ---help---
+ Mellanox Technologies Ethernet SRIOV E-Switch support in ConnectX NIC.
+ E-Switch provides internal SRIOV packet steering and switching for the
+ enabled VFs and PF in two available modes:
+ Legacy SRIOV mode (L2 mac vlan steering based).
+ Switchdev mode (eswitch offloads).
+
+config MLX5_CORE_EN_DCB
+ bool "Data Center Bridging (DCB) Support"
+ default y
+ depends on MLX5_CORE_EN && DCB
+ ---help---
+ Say Y here if you want to use Data Center Bridging (DCB) in the
+ driver.
+ If set to N, will not be able to configure QoS and ratelimit attributes.
+ This flag is depended on the kernel's DCB support.
+
+ If unsure, set to Y
+
+config MLX5_CORE_IPOIB
+ bool "Mellanox 5th generation network adapters (connectX series) IPoIB offloads support"
+ depends on MLX5_CORE_EN
+ default n
+ ---help---
+ MLX5 IPoIB offloads & acceleration support.
+
+config MLX5_EN_IPSEC
+ bool "IPSec XFRM cryptography-offload accelaration"
+ depends on MLX5_ACCEL
+ depends on MLX5_CORE_EN
+ depends on XFRM_OFFLOAD
+ depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+ default n
+ ---help---
+ Build support for IPsec cryptography-offload accelaration in the NIC.
+ Note: Support for hardware with this capability needs to be selected
+ for this option to become available.
+
+config MLX5_EN_TLS
+ bool "TLS cryptography-offload accelaration"
+ depends on MLX5_CORE_EN
+ depends on TLS_DEVICE
+ depends on TLS=y || MLX5_CORE=m
+ depends on MLX5_ACCEL
+ default n
+ ---help---
+ Build support for TLS cryptography-offload accelaration in the NIC.
+ Note: Support for hardware with this capability needs to be selected
+ for this option to become available.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
new file mode 100644
index 000000000..d324a3884
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox 5th generation network adapters
+# (ConnectX series) core & netdev driver
+#
+
+subdir-ccflags-y += -I$(src)
+
+obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
+
+#
+# mlx5 core basic
+#
+mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
+ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
+ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
+ fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \
+ diag/fs_tracepoint.o diag/fw_tracer.o
+
+#
+# Netdev basic
+#
+mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
+ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
+ en_selftest.o en/port.o
+
+#
+# Netdev extra
+#
+mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
+mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o
+mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o
+
+#
+# Core extra
+#
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o
+mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
+mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
+mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
+
+#
+# Ipoib netdev
+#
+mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib_vlan.o
+
+#
+# Accelerations & FPGA
+#
+mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o accel/tls.o
+
+mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
+ fpga/ipsec.o fpga/tls.o
+
+mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
+ en_accel/ipsec_stats.o
+
+mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o
+
+CFLAGS_tracepoint.o := -I$(src)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile
new file mode 100644
index 000000000..d8e17110f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h
new file mode 100644
index 000000000..c13260467
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h
@@ -0,0 +1,37 @@
+#ifndef __MLX5E_ACCEL_H__
+#define __MLX5E_ACCEL_H__
+
+#ifdef CONFIG_MLX5_ACCEL
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include "en.h"
+
+static inline bool is_metadata_hdr_valid(struct sk_buff *skb)
+{
+ __be16 *ethtype;
+
+ if (unlikely(skb->len < ETH_HLEN + MLX5E_METADATA_ETHER_LEN))
+ return false;
+ ethtype = (__be16 *)(skb->data + ETH_ALEN * 2);
+ if (*ethtype != cpu_to_be16(MLX5E_METADATA_ETHER_TYPE))
+ return false;
+ return true;
+}
+
+static inline void remove_metadata_hdr(struct sk_buff *skb)
+{
+ struct ethhdr *old_eth;
+ struct ethhdr *new_eth;
+
+ /* Remove the metadata from the buffer */
+ old_eth = (struct ethhdr *)skb->data;
+ new_eth = (struct ethhdr *)(skb->data + MLX5E_METADATA_ETHER_LEN);
+ memmove(new_eth, old_eth, 2 * ETH_ALEN);
+ /* Ethertype is already in its new place */
+ skb_pull_inline(skb, MLX5E_METADATA_ETHER_LEN);
+}
+
+#endif /* CONFIG_MLX5_ACCEL */
+
+#endif /* __MLX5E_EN_ACCEL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
new file mode 100644
index 000000000..9f1b19397
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+
+#include "accel/ipsec.h"
+#include "mlx5_core.h"
+#include "fpga/ipsec.h"
+
+u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_ipsec_device_caps(mdev);
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_ipsec_device_caps);
+
+unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_ipsec_counters_count(mdev);
+}
+
+int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+ unsigned int count)
+{
+ return mlx5_fpga_ipsec_counters_read(mdev, counters, count);
+}
+
+void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm *xfrm,
+ const __be32 saddr[4],
+ const __be32 daddr[4],
+ const __be32 spi, bool is_ipv6)
+{
+ return mlx5_fpga_ipsec_create_sa_ctx(mdev, xfrm, saddr, daddr,
+ spi, is_ipv6);
+}
+
+void mlx5_accel_esp_free_hw_context(void *context)
+{
+ mlx5_fpga_ipsec_delete_sa_ctx(context);
+}
+
+int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_ipsec_init(mdev);
+}
+
+void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+ mlx5_fpga_ipsec_cleanup(mdev);
+}
+
+struct mlx5_accel_esp_xfrm *
+mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs,
+ u32 flags)
+{
+ struct mlx5_accel_esp_xfrm *xfrm;
+
+ xfrm = mlx5_fpga_esp_create_xfrm(mdev, attrs, flags);
+ if (IS_ERR(xfrm))
+ return xfrm;
+
+ xfrm->mdev = mdev;
+ return xfrm;
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_create_xfrm);
+
+void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+ mlx5_fpga_esp_destroy_xfrm(xfrm);
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_destroy_xfrm);
+
+int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ return mlx5_fpga_esp_modify_xfrm(xfrm, attrs);
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
new file mode 100644
index 000000000..024dbd22a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_ACCEL_IPSEC_H__
+#define __MLX5_ACCEL_IPSEC_H__
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/accel.h>
+
+#ifdef CONFIG_MLX5_ACCEL
+
+#define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
+ MLX5_ACCEL_IPSEC_CAP_DEVICE)
+
+unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev);
+int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+ unsigned int count);
+
+void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm *xfrm,
+ const __be32 saddr[4],
+ const __be32 daddr[4],
+ const __be32 spi, bool is_ipv6);
+void mlx5_accel_esp_free_hw_context(void *context);
+
+int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
+void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
+
+#else
+
+#define MLX5_IPSEC_DEV(mdev) false
+
+static inline void *
+mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm *xfrm,
+ const __be32 saddr[4],
+ const __be32 daddr[4],
+ const __be32 spi, bool is_ipv6)
+{
+ return NULL;
+}
+
+static inline void mlx5_accel_esp_free_hw_context(void *context)
+{
+}
+
+static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+}
+
+#endif
+
+#endif /* __MLX5_ACCEL_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
new file mode 100644
index 000000000..da7bd2636
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+
+#include "accel/tls.h"
+#include "mlx5_core.h"
+#include "fpga/tls.h"
+
+int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn, u32 *p_swid,
+ bool direction_sx)
+{
+ return mlx5_fpga_tls_add_flow(mdev, flow, crypto_info,
+ start_offload_tcp_sn, p_swid,
+ direction_sx);
+}
+
+void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+ bool direction_sx)
+{
+ mlx5_fpga_tls_del_flow(mdev, swid, GFP_KERNEL, direction_sx);
+}
+
+int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+ u64 rcd_sn)
+{
+ return mlx5_fpga_tls_resync_rx(mdev, handle, seq, rcd_sn);
+}
+
+bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_is_tls_device(mdev);
+}
+
+u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_tls_device_caps(mdev);
+}
+
+int mlx5_accel_tls_init(struct mlx5_core_dev *mdev)
+{
+ return mlx5_fpga_tls_init(mdev);
+}
+
+void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev)
+{
+ mlx5_fpga_tls_cleanup(mdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
new file mode 100644
index 000000000..def4093eb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_ACCEL_TLS_H__
+#define __MLX5_ACCEL_TLS_H__
+
+#include <linux/mlx5/driver.h>
+#include <linux/tls.h>
+
+#ifdef CONFIG_MLX5_ACCEL
+
+enum {
+ MLX5_ACCEL_TLS_TX = BIT(0),
+ MLX5_ACCEL_TLS_RX = BIT(1),
+ MLX5_ACCEL_TLS_V12 = BIT(2),
+ MLX5_ACCEL_TLS_V13 = BIT(3),
+ MLX5_ACCEL_TLS_LRO = BIT(4),
+ MLX5_ACCEL_TLS_IPV6 = BIT(5),
+ MLX5_ACCEL_TLS_AES_GCM128 = BIT(30),
+ MLX5_ACCEL_TLS_AES_GCM256 = BIT(31),
+};
+
+struct mlx5_ifc_tls_flow_bits {
+ u8 src_port[0x10];
+ u8 dst_port[0x10];
+ union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
+ union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
+ u8 ipv6[0x1];
+ u8 direction_sx[0x1];
+ u8 reserved_at_2[0x1e];
+};
+
+int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn, u32 *p_swid,
+ bool direction_sx);
+void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+ bool direction_sx);
+int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+ u64 rcd_sn);
+bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev);
+u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev);
+int mlx5_accel_tls_init(struct mlx5_core_dev *mdev);
+void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev);
+
+#else
+
+static inline int
+mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn, u32 *p_swid,
+ bool direction_sx) { return -ENOTSUPP; }
+static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+ bool direction_sx) { }
+static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle,
+ u32 seq, u64 rcd_sn) { return 0; }
+static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; }
+static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; }
+static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; }
+static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { }
+
+#endif
+
+#endif /* __MLX5_ACCEL_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
new file mode 100644
index 000000000..456f30007
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -0,0 +1,311 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <linux/bitmap.h>
+#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+
+struct mlx5_db_pgdir {
+ struct list_head list;
+ unsigned long *bitmap;
+ __be32 *db_page;
+ dma_addr_t db_dma;
+};
+
+/* Handling for queue buffers -- we allocate a bunch of memory and
+ * register it in a memory region at HCA virtual address 0.
+ */
+
+static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
+ size_t size, dma_addr_t *dma_handle,
+ int node)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ int original_node;
+ void *cpu_handle;
+
+ mutex_lock(&priv->alloc_mutex);
+ original_node = dev_to_node(&dev->pdev->dev);
+ set_dev_node(&dev->pdev->dev, node);
+ cpu_handle = dma_zalloc_coherent(&dev->pdev->dev, size,
+ dma_handle, GFP_KERNEL);
+ set_dev_node(&dev->pdev->dev, original_node);
+ mutex_unlock(&priv->alloc_mutex);
+ return cpu_handle;
+}
+
+int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
+ struct mlx5_frag_buf *buf, int node)
+{
+ dma_addr_t t;
+
+ buf->size = size;
+ buf->npages = 1;
+ buf->page_shift = (u8)get_order(size) + PAGE_SHIFT;
+
+ buf->frags = kzalloc(sizeof(*buf->frags), GFP_KERNEL);
+ if (!buf->frags)
+ return -ENOMEM;
+
+ buf->frags->buf = mlx5_dma_zalloc_coherent_node(dev, size,
+ &t, node);
+ if (!buf->frags->buf)
+ goto err_out;
+
+ buf->frags->map = t;
+
+ while (t & ((1 << buf->page_shift) - 1)) {
+ --buf->page_shift;
+ buf->npages *= 2;
+ }
+
+ return 0;
+err_out:
+ kfree(buf->frags);
+ return -ENOMEM;
+}
+
+int mlx5_buf_alloc(struct mlx5_core_dev *dev,
+ int size, struct mlx5_frag_buf *buf)
+{
+ return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node);
+}
+EXPORT_SYMBOL(mlx5_buf_alloc);
+
+void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
+{
+ dma_free_coherent(&dev->pdev->dev, buf->size, buf->frags->buf,
+ buf->frags->map);
+
+ kfree(buf->frags);
+}
+EXPORT_SYMBOL_GPL(mlx5_buf_free);
+
+int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
+ struct mlx5_frag_buf *buf, int node)
+{
+ int i;
+
+ buf->size = size;
+ buf->npages = DIV_ROUND_UP(size, PAGE_SIZE);
+ buf->page_shift = PAGE_SHIFT;
+ buf->frags = kcalloc(buf->npages, sizeof(struct mlx5_buf_list),
+ GFP_KERNEL);
+ if (!buf->frags)
+ goto err_out;
+
+ for (i = 0; i < buf->npages; i++) {
+ struct mlx5_buf_list *frag = &buf->frags[i];
+ int frag_sz = min_t(int, size, PAGE_SIZE);
+
+ frag->buf = mlx5_dma_zalloc_coherent_node(dev, frag_sz,
+ &frag->map, node);
+ if (!frag->buf)
+ goto err_free_buf;
+ if (frag->map & ((1 << buf->page_shift) - 1)) {
+ dma_free_coherent(&dev->pdev->dev, frag_sz,
+ buf->frags[i].buf, buf->frags[i].map);
+ mlx5_core_warn(dev, "unexpected map alignment: %pad, page_shift=%d\n",
+ &frag->map, buf->page_shift);
+ goto err_free_buf;
+ }
+ size -= frag_sz;
+ }
+
+ return 0;
+
+err_free_buf:
+ while (i--)
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, buf->frags[i].buf,
+ buf->frags[i].map);
+ kfree(buf->frags);
+err_out:
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(mlx5_frag_buf_alloc_node);
+
+void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
+{
+ int size = buf->size;
+ int i;
+
+ for (i = 0; i < buf->npages; i++) {
+ int frag_sz = min_t(int, size, PAGE_SIZE);
+
+ dma_free_coherent(&dev->pdev->dev, frag_sz, buf->frags[i].buf,
+ buf->frags[i].map);
+ size -= frag_sz;
+ }
+ kfree(buf->frags);
+}
+EXPORT_SYMBOL_GPL(mlx5_frag_buf_free);
+
+static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
+ int node)
+{
+ u32 db_per_page = PAGE_SIZE / cache_line_size();
+ struct mlx5_db_pgdir *pgdir;
+
+ pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL);
+ if (!pgdir)
+ return NULL;
+
+ pgdir->bitmap = kcalloc(BITS_TO_LONGS(db_per_page),
+ sizeof(unsigned long),
+ GFP_KERNEL);
+
+ if (!pgdir->bitmap) {
+ kfree(pgdir);
+ return NULL;
+ }
+
+ bitmap_fill(pgdir->bitmap, db_per_page);
+
+ pgdir->db_page = mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE,
+ &pgdir->db_dma, node);
+ if (!pgdir->db_page) {
+ kfree(pgdir->bitmap);
+ kfree(pgdir);
+ return NULL;
+ }
+
+ return pgdir;
+}
+
+static int mlx5_alloc_db_from_pgdir(struct mlx5_db_pgdir *pgdir,
+ struct mlx5_db *db)
+{
+ u32 db_per_page = PAGE_SIZE / cache_line_size();
+ int offset;
+ int i;
+
+ i = find_first_bit(pgdir->bitmap, db_per_page);
+ if (i >= db_per_page)
+ return -ENOMEM;
+
+ __clear_bit(i, pgdir->bitmap);
+
+ db->u.pgdir = pgdir;
+ db->index = i;
+ offset = db->index * cache_line_size();
+ db->db = pgdir->db_page + offset / sizeof(*pgdir->db_page);
+ db->dma = pgdir->db_dma + offset;
+
+ db->db[0] = 0;
+ db->db[1] = 0;
+
+ return 0;
+}
+
+int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, int node)
+{
+ struct mlx5_db_pgdir *pgdir;
+ int ret = 0;
+
+ mutex_lock(&dev->priv.pgdir_mutex);
+
+ list_for_each_entry(pgdir, &dev->priv.pgdir_list, list)
+ if (!mlx5_alloc_db_from_pgdir(pgdir, db))
+ goto out;
+
+ pgdir = mlx5_alloc_db_pgdir(dev, node);
+ if (!pgdir) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ list_add(&pgdir->list, &dev->priv.pgdir_list);
+
+ /* This should never fail -- we just allocated an empty page: */
+ WARN_ON(mlx5_alloc_db_from_pgdir(pgdir, db));
+
+out:
+ mutex_unlock(&dev->priv.pgdir_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx5_db_alloc_node);
+
+int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db)
+{
+ return mlx5_db_alloc_node(dev, db, dev->priv.numa_node);
+}
+EXPORT_SYMBOL_GPL(mlx5_db_alloc);
+
+void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
+{
+ u32 db_per_page = PAGE_SIZE / cache_line_size();
+
+ mutex_lock(&dev->priv.pgdir_mutex);
+
+ __set_bit(db->index, db->u.pgdir->bitmap);
+
+ if (bitmap_full(db->u.pgdir->bitmap, db_per_page)) {
+ dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+ db->u.pgdir->db_page, db->u.pgdir->db_dma);
+ list_del(&db->u.pgdir->list);
+ kfree(db->u.pgdir->bitmap);
+ kfree(db->u.pgdir);
+ }
+
+ mutex_unlock(&dev->priv.pgdir_mutex);
+}
+EXPORT_SYMBOL_GPL(mlx5_db_free);
+
+void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas)
+{
+ u64 addr;
+ int i;
+
+ for (i = 0; i < buf->npages; i++) {
+ addr = buf->frags->map + (i << buf->page_shift);
+
+ pas[i] = cpu_to_be64(addr);
+ }
+}
+EXPORT_SYMBOL_GPL(mlx5_fill_page_array);
+
+void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas)
+{
+ int i;
+
+ for (i = 0; i < buf->npages; i++)
+ pas[i] = cpu_to_be64(buf->frags[i].map);
+}
+EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
new file mode 100644
index 000000000..a68608276
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -0,0 +1,1919 @@
+/*
+ * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/random.h>
+#include <linux/io-mapping.h>
+#include <linux/mlx5/driver.h>
+#include <linux/debugfs.h>
+
+#include "mlx5_core.h"
+
+enum {
+ CMD_IF_REV = 5,
+};
+
+enum {
+ CMD_MODE_POLLING,
+ CMD_MODE_EVENTS
+};
+
+enum {
+ MLX5_CMD_DELIVERY_STAT_OK = 0x0,
+ MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR = 0x1,
+ MLX5_CMD_DELIVERY_STAT_TOK_ERR = 0x2,
+ MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR = 0x3,
+ MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR = 0x4,
+ MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR = 0x5,
+ MLX5_CMD_DELIVERY_STAT_FW_ERR = 0x6,
+ MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR = 0x7,
+ MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR = 0x8,
+ MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR = 0x9,
+ MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10,
+};
+
+static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
+ struct mlx5_cmd_msg *in,
+ struct mlx5_cmd_msg *out,
+ void *uout, int uout_size,
+ mlx5_cmd_cbk_t cbk,
+ void *context, int page_queue)
+{
+ gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL;
+ struct mlx5_cmd_work_ent *ent;
+
+ ent = kzalloc(sizeof(*ent), alloc_flags);
+ if (!ent)
+ return ERR_PTR(-ENOMEM);
+
+ ent->in = in;
+ ent->out = out;
+ ent->uout = uout;
+ ent->uout_size = uout_size;
+ ent->callback = cbk;
+ ent->context = context;
+ ent->cmd = cmd;
+ ent->page_queue = page_queue;
+
+ return ent;
+}
+
+static u8 alloc_token(struct mlx5_cmd *cmd)
+{
+ u8 token;
+
+ spin_lock(&cmd->token_lock);
+ cmd->token++;
+ if (cmd->token == 0)
+ cmd->token++;
+ token = cmd->token;
+ spin_unlock(&cmd->token_lock);
+
+ return token;
+}
+
+static int alloc_ent(struct mlx5_cmd *cmd)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cmd->alloc_lock, flags);
+ ret = find_first_bit(&cmd->bitmask, cmd->max_reg_cmds);
+ if (ret < cmd->max_reg_cmds)
+ clear_bit(ret, &cmd->bitmask);
+ spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+
+ return ret < cmd->max_reg_cmds ? ret : -ENOMEM;
+}
+
+static void free_ent(struct mlx5_cmd *cmd, int idx)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cmd->alloc_lock, flags);
+ set_bit(idx, &cmd->bitmask);
+ spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+}
+
+static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
+{
+ return cmd->cmd_buf + (idx << cmd->log_stride);
+}
+
+static int mlx5_calc_cmd_blocks(struct mlx5_cmd_msg *msg)
+{
+ int size = msg->len;
+ int blen = size - min_t(int, sizeof(msg->first.data), size);
+
+ return DIV_ROUND_UP(blen, MLX5_CMD_DATA_BLOCK_SIZE);
+}
+
+static u8 xor8_buf(void *buf, size_t offset, int len)
+{
+ u8 *ptr = buf;
+ u8 sum = 0;
+ int i;
+ int end = len + offset;
+
+ for (i = offset; i < end; i++)
+ sum ^= ptr[i];
+
+ return sum;
+}
+
+static int verify_block_sig(struct mlx5_cmd_prot_block *block)
+{
+ size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
+ int xor_len = sizeof(*block) - sizeof(block->data) - 1;
+
+ if (xor8_buf(block, rsvd0_off, xor_len) != 0xff)
+ return -EINVAL;
+
+ if (xor8_buf(block, 0, sizeof(*block)) != 0xff)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void calc_block_sig(struct mlx5_cmd_prot_block *block)
+{
+ int ctrl_xor_len = sizeof(*block) - sizeof(block->data) - 2;
+ size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
+
+ block->ctrl_sig = ~xor8_buf(block, rsvd0_off, ctrl_xor_len);
+ block->sig = ~xor8_buf(block, 0, sizeof(*block) - 1);
+}
+
+static void calc_chain_sig(struct mlx5_cmd_msg *msg)
+{
+ struct mlx5_cmd_mailbox *next = msg->next;
+ int n = mlx5_calc_cmd_blocks(msg);
+ int i = 0;
+
+ for (i = 0; i < n && next; i++) {
+ calc_block_sig(next->buf);
+ next = next->next;
+ }
+}
+
+static void set_signature(struct mlx5_cmd_work_ent *ent, int csum)
+{
+ ent->lay->sig = ~xor8_buf(ent->lay, 0, sizeof(*ent->lay));
+ if (csum) {
+ calc_chain_sig(ent->in);
+ calc_chain_sig(ent->out);
+ }
+}
+
+static void poll_timeout(struct mlx5_cmd_work_ent *ent)
+{
+ unsigned long poll_end = jiffies + msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC + 1000);
+ u8 own;
+
+ do {
+ own = READ_ONCE(ent->lay->status_own);
+ if (!(own & CMD_OWNER_HW)) {
+ ent->ret = 0;
+ return;
+ }
+ cond_resched();
+ } while (time_before(jiffies, poll_end));
+
+ ent->ret = -ETIMEDOUT;
+}
+
+static void free_cmd(struct mlx5_cmd_work_ent *ent)
+{
+ kfree(ent);
+}
+
+static int verify_signature(struct mlx5_cmd_work_ent *ent)
+{
+ struct mlx5_cmd_mailbox *next = ent->out->next;
+ int n = mlx5_calc_cmd_blocks(ent->out);
+ int err;
+ u8 sig;
+ int i = 0;
+
+ sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay));
+ if (sig != 0xff)
+ return -EINVAL;
+
+ for (i = 0; i < n && next; i++) {
+ err = verify_block_sig(next->buf);
+ if (err)
+ return err;
+
+ next = next->next;
+ }
+
+ return 0;
+}
+
+static void dump_buf(void *buf, int size, int data_only, int offset)
+{
+ __be32 *p = buf;
+ int i;
+
+ for (i = 0; i < size; i += 16) {
+ pr_debug("%03x: %08x %08x %08x %08x\n", offset, be32_to_cpu(p[0]),
+ be32_to_cpu(p[1]), be32_to_cpu(p[2]),
+ be32_to_cpu(p[3]));
+ p += 4;
+ offset += 16;
+ }
+ if (!data_only)
+ pr_debug("\n");
+}
+
+static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
+ u32 *synd, u8 *status)
+{
+ *synd = 0;
+ *status = 0;
+
+ switch (op) {
+ case MLX5_CMD_OP_TEARDOWN_HCA:
+ case MLX5_CMD_OP_DISABLE_HCA:
+ case MLX5_CMD_OP_MANAGE_PAGES:
+ case MLX5_CMD_OP_DESTROY_MKEY:
+ case MLX5_CMD_OP_DESTROY_EQ:
+ case MLX5_CMD_OP_DESTROY_CQ:
+ case MLX5_CMD_OP_DESTROY_QP:
+ case MLX5_CMD_OP_DESTROY_PSV:
+ case MLX5_CMD_OP_DESTROY_SRQ:
+ case MLX5_CMD_OP_DESTROY_XRC_SRQ:
+ case MLX5_CMD_OP_DESTROY_XRQ:
+ case MLX5_CMD_OP_DESTROY_DCT:
+ case MLX5_CMD_OP_DEALLOC_Q_COUNTER:
+ case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT:
+ case MLX5_CMD_OP_DEALLOC_PD:
+ case MLX5_CMD_OP_DEALLOC_UAR:
+ case MLX5_CMD_OP_DETACH_FROM_MCG:
+ case MLX5_CMD_OP_DEALLOC_XRCD:
+ case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN:
+ case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT:
+ case MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_DESTROY_LAG:
+ case MLX5_CMD_OP_DESTROY_VPORT_LAG:
+ case MLX5_CMD_OP_DESTROY_TIR:
+ case MLX5_CMD_OP_DESTROY_SQ:
+ case MLX5_CMD_OP_DESTROY_RQ:
+ case MLX5_CMD_OP_DESTROY_RMP:
+ case MLX5_CMD_OP_DESTROY_TIS:
+ case MLX5_CMD_OP_DESTROY_RQT:
+ case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
+ case MLX5_CMD_OP_DESTROY_FLOW_GROUP:
+ case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY:
+ case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER:
+ case MLX5_CMD_OP_2ERR_QP:
+ case MLX5_CMD_OP_2RST_QP:
+ case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT:
+ case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
+ case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
+ case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
+ case MLX5_CMD_OP_FPGA_DESTROY_QP:
+ case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
+ return MLX5_CMD_STAT_OK;
+
+ case MLX5_CMD_OP_QUERY_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_ADAPTER:
+ case MLX5_CMD_OP_INIT_HCA:
+ case MLX5_CMD_OP_ENABLE_HCA:
+ case MLX5_CMD_OP_QUERY_PAGES:
+ case MLX5_CMD_OP_SET_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_ISSI:
+ case MLX5_CMD_OP_SET_ISSI:
+ case MLX5_CMD_OP_CREATE_MKEY:
+ case MLX5_CMD_OP_QUERY_MKEY:
+ case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
+ case MLX5_CMD_OP_PAGE_FAULT_RESUME:
+ case MLX5_CMD_OP_CREATE_EQ:
+ case MLX5_CMD_OP_QUERY_EQ:
+ case MLX5_CMD_OP_GEN_EQE:
+ case MLX5_CMD_OP_CREATE_CQ:
+ case MLX5_CMD_OP_QUERY_CQ:
+ case MLX5_CMD_OP_MODIFY_CQ:
+ case MLX5_CMD_OP_CREATE_QP:
+ case MLX5_CMD_OP_RST2INIT_QP:
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ case MLX5_CMD_OP_SQERR2RTS_QP:
+ case MLX5_CMD_OP_QUERY_QP:
+ case MLX5_CMD_OP_SQD_RTS_QP:
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ case MLX5_CMD_OP_CREATE_PSV:
+ case MLX5_CMD_OP_CREATE_SRQ:
+ case MLX5_CMD_OP_QUERY_SRQ:
+ case MLX5_CMD_OP_ARM_RQ:
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ case MLX5_CMD_OP_QUERY_XRC_SRQ:
+ case MLX5_CMD_OP_ARM_XRC_SRQ:
+ case MLX5_CMD_OP_CREATE_XRQ:
+ case MLX5_CMD_OP_QUERY_XRQ:
+ case MLX5_CMD_OP_ARM_XRQ:
+ case MLX5_CMD_OP_CREATE_DCT:
+ case MLX5_CMD_OP_DRAIN_DCT:
+ case MLX5_CMD_OP_QUERY_DCT:
+ case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
+ case MLX5_CMD_OP_QUERY_VPORT_STATE:
+ case MLX5_CMD_OP_MODIFY_VPORT_STATE:
+ case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
+ case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
+ case MLX5_CMD_OP_SET_ROCE_ADDRESS:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+ case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_GID:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY:
+ case MLX5_CMD_OP_QUERY_VNIC_ENV:
+ case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
+ case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+ case MLX5_CMD_OP_QUERY_Q_COUNTER:
+ case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
+ case MLX5_CMD_OP_QUERY_RATE_LIMIT:
+ case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT:
+ case MLX5_CMD_OP_ALLOC_PD:
+ case MLX5_CMD_OP_ALLOC_UAR:
+ case MLX5_CMD_OP_CONFIG_INT_MODERATION:
+ case MLX5_CMD_OP_ACCESS_REG:
+ case MLX5_CMD_OP_ATTACH_TO_MCG:
+ case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG:
+ case MLX5_CMD_OP_MAD_IFC:
+ case MLX5_CMD_OP_QUERY_MAD_DEMUX:
+ case MLX5_CMD_OP_SET_MAD_DEMUX:
+ case MLX5_CMD_OP_NOP:
+ case MLX5_CMD_OP_ALLOC_XRCD:
+ case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+ case MLX5_CMD_OP_QUERY_CONG_STATUS:
+ case MLX5_CMD_OP_MODIFY_CONG_STATUS:
+ case MLX5_CMD_OP_QUERY_CONG_PARAMS:
+ case MLX5_CMD_OP_MODIFY_CONG_PARAMS:
+ case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_CREATE_LAG:
+ case MLX5_CMD_OP_MODIFY_LAG:
+ case MLX5_CMD_OP_QUERY_LAG:
+ case MLX5_CMD_OP_CREATE_VPORT_LAG:
+ case MLX5_CMD_OP_CREATE_TIR:
+ case MLX5_CMD_OP_MODIFY_TIR:
+ case MLX5_CMD_OP_QUERY_TIR:
+ case MLX5_CMD_OP_CREATE_SQ:
+ case MLX5_CMD_OP_MODIFY_SQ:
+ case MLX5_CMD_OP_QUERY_SQ:
+ case MLX5_CMD_OP_CREATE_RQ:
+ case MLX5_CMD_OP_MODIFY_RQ:
+ case MLX5_CMD_OP_QUERY_RQ:
+ case MLX5_CMD_OP_CREATE_RMP:
+ case MLX5_CMD_OP_MODIFY_RMP:
+ case MLX5_CMD_OP_QUERY_RMP:
+ case MLX5_CMD_OP_CREATE_TIS:
+ case MLX5_CMD_OP_MODIFY_TIS:
+ case MLX5_CMD_OP_QUERY_TIS:
+ case MLX5_CMD_OP_CREATE_RQT:
+ case MLX5_CMD_OP_MODIFY_RQT:
+ case MLX5_CMD_OP_QUERY_RQT:
+
+ case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+ case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+ case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+ case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+ case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+ case MLX5_CMD_OP_FPGA_CREATE_QP:
+ case MLX5_CMD_OP_FPGA_MODIFY_QP:
+ case MLX5_CMD_OP_FPGA_QUERY_QP:
+ case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+ *status = MLX5_DRIVER_STATUS_ABORTED;
+ *synd = MLX5_DRIVER_SYND;
+ return -EIO;
+ default:
+ mlx5_core_err(dev, "Unknown FW command (%d)\n", op);
+ return -EINVAL;
+ }
+}
+
+const char *mlx5_command_str(int command)
+{
+#define MLX5_COMMAND_STR_CASE(__cmd) case MLX5_CMD_OP_ ## __cmd: return #__cmd
+
+ switch (command) {
+ MLX5_COMMAND_STR_CASE(QUERY_HCA_CAP);
+ MLX5_COMMAND_STR_CASE(QUERY_ADAPTER);
+ MLX5_COMMAND_STR_CASE(INIT_HCA);
+ MLX5_COMMAND_STR_CASE(TEARDOWN_HCA);
+ MLX5_COMMAND_STR_CASE(ENABLE_HCA);
+ MLX5_COMMAND_STR_CASE(DISABLE_HCA);
+ MLX5_COMMAND_STR_CASE(QUERY_PAGES);
+ MLX5_COMMAND_STR_CASE(MANAGE_PAGES);
+ MLX5_COMMAND_STR_CASE(SET_HCA_CAP);
+ MLX5_COMMAND_STR_CASE(QUERY_ISSI);
+ MLX5_COMMAND_STR_CASE(SET_ISSI);
+ MLX5_COMMAND_STR_CASE(SET_DRIVER_VERSION);
+ MLX5_COMMAND_STR_CASE(CREATE_MKEY);
+ MLX5_COMMAND_STR_CASE(QUERY_MKEY);
+ MLX5_COMMAND_STR_CASE(DESTROY_MKEY);
+ MLX5_COMMAND_STR_CASE(QUERY_SPECIAL_CONTEXTS);
+ MLX5_COMMAND_STR_CASE(PAGE_FAULT_RESUME);
+ MLX5_COMMAND_STR_CASE(CREATE_EQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_EQ);
+ MLX5_COMMAND_STR_CASE(QUERY_EQ);
+ MLX5_COMMAND_STR_CASE(GEN_EQE);
+ MLX5_COMMAND_STR_CASE(CREATE_CQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_CQ);
+ MLX5_COMMAND_STR_CASE(QUERY_CQ);
+ MLX5_COMMAND_STR_CASE(MODIFY_CQ);
+ MLX5_COMMAND_STR_CASE(CREATE_QP);
+ MLX5_COMMAND_STR_CASE(DESTROY_QP);
+ MLX5_COMMAND_STR_CASE(RST2INIT_QP);
+ MLX5_COMMAND_STR_CASE(INIT2RTR_QP);
+ MLX5_COMMAND_STR_CASE(RTR2RTS_QP);
+ MLX5_COMMAND_STR_CASE(RTS2RTS_QP);
+ MLX5_COMMAND_STR_CASE(SQERR2RTS_QP);
+ MLX5_COMMAND_STR_CASE(2ERR_QP);
+ MLX5_COMMAND_STR_CASE(2RST_QP);
+ MLX5_COMMAND_STR_CASE(QUERY_QP);
+ MLX5_COMMAND_STR_CASE(SQD_RTS_QP);
+ MLX5_COMMAND_STR_CASE(INIT2INIT_QP);
+ MLX5_COMMAND_STR_CASE(CREATE_PSV);
+ MLX5_COMMAND_STR_CASE(DESTROY_PSV);
+ MLX5_COMMAND_STR_CASE(CREATE_SRQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_SRQ);
+ MLX5_COMMAND_STR_CASE(QUERY_SRQ);
+ MLX5_COMMAND_STR_CASE(ARM_RQ);
+ MLX5_COMMAND_STR_CASE(CREATE_XRC_SRQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_XRC_SRQ);
+ MLX5_COMMAND_STR_CASE(QUERY_XRC_SRQ);
+ MLX5_COMMAND_STR_CASE(ARM_XRC_SRQ);
+ MLX5_COMMAND_STR_CASE(CREATE_DCT);
+ MLX5_COMMAND_STR_CASE(DESTROY_DCT);
+ MLX5_COMMAND_STR_CASE(DRAIN_DCT);
+ MLX5_COMMAND_STR_CASE(QUERY_DCT);
+ MLX5_COMMAND_STR_CASE(ARM_DCT_FOR_KEY_VIOLATION);
+ MLX5_COMMAND_STR_CASE(QUERY_VPORT_STATE);
+ MLX5_COMMAND_STR_CASE(MODIFY_VPORT_STATE);
+ MLX5_COMMAND_STR_CASE(QUERY_ESW_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(MODIFY_ESW_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(QUERY_NIC_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(MODIFY_NIC_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(QUERY_ROCE_ADDRESS);
+ MLX5_COMMAND_STR_CASE(SET_ROCE_ADDRESS);
+ MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(MODIFY_HCA_VPORT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_GID);
+ MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_PKEY);
+ MLX5_COMMAND_STR_CASE(QUERY_VNIC_ENV);
+ MLX5_COMMAND_STR_CASE(QUERY_VPORT_COUNTER);
+ MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
+ MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
+ MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
+ MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
+ MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
+ MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
+ MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
+ MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT);
+ MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT);
+ MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT);
+ MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT);
+ MLX5_COMMAND_STR_CASE(ALLOC_PD);
+ MLX5_COMMAND_STR_CASE(DEALLOC_PD);
+ MLX5_COMMAND_STR_CASE(ALLOC_UAR);
+ MLX5_COMMAND_STR_CASE(DEALLOC_UAR);
+ MLX5_COMMAND_STR_CASE(CONFIG_INT_MODERATION);
+ MLX5_COMMAND_STR_CASE(ACCESS_REG);
+ MLX5_COMMAND_STR_CASE(ATTACH_TO_MCG);
+ MLX5_COMMAND_STR_CASE(DETACH_FROM_MCG);
+ MLX5_COMMAND_STR_CASE(GET_DROPPED_PACKET_LOG);
+ MLX5_COMMAND_STR_CASE(MAD_IFC);
+ MLX5_COMMAND_STR_CASE(QUERY_MAD_DEMUX);
+ MLX5_COMMAND_STR_CASE(SET_MAD_DEMUX);
+ MLX5_COMMAND_STR_CASE(NOP);
+ MLX5_COMMAND_STR_CASE(ALLOC_XRCD);
+ MLX5_COMMAND_STR_CASE(DEALLOC_XRCD);
+ MLX5_COMMAND_STR_CASE(ALLOC_TRANSPORT_DOMAIN);
+ MLX5_COMMAND_STR_CASE(DEALLOC_TRANSPORT_DOMAIN);
+ MLX5_COMMAND_STR_CASE(QUERY_CONG_STATUS);
+ MLX5_COMMAND_STR_CASE(MODIFY_CONG_STATUS);
+ MLX5_COMMAND_STR_CASE(QUERY_CONG_PARAMS);
+ MLX5_COMMAND_STR_CASE(MODIFY_CONG_PARAMS);
+ MLX5_COMMAND_STR_CASE(QUERY_CONG_STATISTICS);
+ MLX5_COMMAND_STR_CASE(ADD_VXLAN_UDP_DPORT);
+ MLX5_COMMAND_STR_CASE(DELETE_VXLAN_UDP_DPORT);
+ MLX5_COMMAND_STR_CASE(SET_L2_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(QUERY_L2_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(DELETE_L2_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(SET_WOL_ROL);
+ MLX5_COMMAND_STR_CASE(QUERY_WOL_ROL);
+ MLX5_COMMAND_STR_CASE(CREATE_LAG);
+ MLX5_COMMAND_STR_CASE(MODIFY_LAG);
+ MLX5_COMMAND_STR_CASE(QUERY_LAG);
+ MLX5_COMMAND_STR_CASE(DESTROY_LAG);
+ MLX5_COMMAND_STR_CASE(CREATE_VPORT_LAG);
+ MLX5_COMMAND_STR_CASE(DESTROY_VPORT_LAG);
+ MLX5_COMMAND_STR_CASE(CREATE_TIR);
+ MLX5_COMMAND_STR_CASE(MODIFY_TIR);
+ MLX5_COMMAND_STR_CASE(DESTROY_TIR);
+ MLX5_COMMAND_STR_CASE(QUERY_TIR);
+ MLX5_COMMAND_STR_CASE(CREATE_SQ);
+ MLX5_COMMAND_STR_CASE(MODIFY_SQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_SQ);
+ MLX5_COMMAND_STR_CASE(QUERY_SQ);
+ MLX5_COMMAND_STR_CASE(CREATE_RQ);
+ MLX5_COMMAND_STR_CASE(MODIFY_RQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_RQ);
+ MLX5_COMMAND_STR_CASE(QUERY_RQ);
+ MLX5_COMMAND_STR_CASE(CREATE_RMP);
+ MLX5_COMMAND_STR_CASE(MODIFY_RMP);
+ MLX5_COMMAND_STR_CASE(DESTROY_RMP);
+ MLX5_COMMAND_STR_CASE(QUERY_RMP);
+ MLX5_COMMAND_STR_CASE(CREATE_TIS);
+ MLX5_COMMAND_STR_CASE(MODIFY_TIS);
+ MLX5_COMMAND_STR_CASE(DESTROY_TIS);
+ MLX5_COMMAND_STR_CASE(QUERY_TIS);
+ MLX5_COMMAND_STR_CASE(CREATE_RQT);
+ MLX5_COMMAND_STR_CASE(MODIFY_RQT);
+ MLX5_COMMAND_STR_CASE(DESTROY_RQT);
+ MLX5_COMMAND_STR_CASE(QUERY_RQT);
+ MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ROOT);
+ MLX5_COMMAND_STR_CASE(CREATE_FLOW_TABLE);
+ MLX5_COMMAND_STR_CASE(DESTROY_FLOW_TABLE);
+ MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE);
+ MLX5_COMMAND_STR_CASE(CREATE_FLOW_GROUP);
+ MLX5_COMMAND_STR_CASE(DESTROY_FLOW_GROUP);
+ MLX5_COMMAND_STR_CASE(QUERY_FLOW_GROUP);
+ MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(DELETE_FLOW_TABLE_ENTRY);
+ MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER);
+ MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
+ MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
+ MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
+ MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER);
+ MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
+ MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP);
+ MLX5_COMMAND_STR_CASE(FPGA_MODIFY_QP);
+ MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP);
+ MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP_COUNTERS);
+ MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP);
+ MLX5_COMMAND_STR_CASE(CREATE_XRQ);
+ MLX5_COMMAND_STR_CASE(DESTROY_XRQ);
+ MLX5_COMMAND_STR_CASE(QUERY_XRQ);
+ MLX5_COMMAND_STR_CASE(ARM_XRQ);
+ MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT);
+ MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT);
+ MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT);
+ MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT);
+ MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
+ default: return "unknown command opcode";
+ }
+}
+
+static const char *cmd_status_str(u8 status)
+{
+ switch (status) {
+ case MLX5_CMD_STAT_OK:
+ return "OK";
+ case MLX5_CMD_STAT_INT_ERR:
+ return "internal error";
+ case MLX5_CMD_STAT_BAD_OP_ERR:
+ return "bad operation";
+ case MLX5_CMD_STAT_BAD_PARAM_ERR:
+ return "bad parameter";
+ case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:
+ return "bad system state";
+ case MLX5_CMD_STAT_BAD_RES_ERR:
+ return "bad resource";
+ case MLX5_CMD_STAT_RES_BUSY:
+ return "resource busy";
+ case MLX5_CMD_STAT_LIM_ERR:
+ return "limits exceeded";
+ case MLX5_CMD_STAT_BAD_RES_STATE_ERR:
+ return "bad resource state";
+ case MLX5_CMD_STAT_IX_ERR:
+ return "bad index";
+ case MLX5_CMD_STAT_NO_RES_ERR:
+ return "no resources";
+ case MLX5_CMD_STAT_BAD_INP_LEN_ERR:
+ return "bad input length";
+ case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:
+ return "bad output length";
+ case MLX5_CMD_STAT_BAD_QP_STATE_ERR:
+ return "bad QP state";
+ case MLX5_CMD_STAT_BAD_PKT_ERR:
+ return "bad packet (discarded)";
+ case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:
+ return "bad size too many outstanding CQEs";
+ default:
+ return "unknown status";
+ }
+}
+
+static int cmd_status_to_err(u8 status)
+{
+ switch (status) {
+ case MLX5_CMD_STAT_OK: return 0;
+ case MLX5_CMD_STAT_INT_ERR: return -EIO;
+ case MLX5_CMD_STAT_BAD_OP_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_BAD_PARAM_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: return -EIO;
+ case MLX5_CMD_STAT_BAD_RES_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_RES_BUSY: return -EBUSY;
+ case MLX5_CMD_STAT_LIM_ERR: return -ENOMEM;
+ case MLX5_CMD_STAT_BAD_RES_STATE_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_IX_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_NO_RES_ERR: return -EAGAIN;
+ case MLX5_CMD_STAT_BAD_INP_LEN_ERR: return -EIO;
+ case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: return -EIO;
+ case MLX5_CMD_STAT_BAD_QP_STATE_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_BAD_PKT_ERR: return -EINVAL;
+ case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: return -EINVAL;
+ default: return -EIO;
+ }
+}
+
+struct mlx5_ifc_mbox_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_mbox_in_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+};
+
+void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome)
+{
+ *status = MLX5_GET(mbox_out, out, status);
+ *syndrome = MLX5_GET(mbox_out, out, syndrome);
+}
+
+static int mlx5_cmd_check(struct mlx5_core_dev *dev, void *in, void *out)
+{
+ u32 syndrome;
+ u8 status;
+ u16 opcode;
+ u16 op_mod;
+ u16 uid;
+
+ mlx5_cmd_mbox_status(out, &status, &syndrome);
+ if (!status)
+ return 0;
+
+ opcode = MLX5_GET(mbox_in, in, opcode);
+ op_mod = MLX5_GET(mbox_in, in, op_mod);
+ uid = MLX5_GET(mbox_in, in, uid);
+
+ if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY)
+ mlx5_core_err_rl(dev,
+ "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n",
+ mlx5_command_str(opcode), opcode, op_mod,
+ cmd_status_str(status), status, syndrome);
+ else
+ mlx5_core_dbg(dev,
+ "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n",
+ mlx5_command_str(opcode),
+ opcode, op_mod,
+ cmd_status_str(status),
+ status,
+ syndrome);
+
+ return cmd_status_to_err(status);
+}
+
+static void dump_command(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_work_ent *ent, int input)
+{
+ struct mlx5_cmd_msg *msg = input ? ent->in : ent->out;
+ u16 op = MLX5_GET(mbox_in, ent->lay->in, opcode);
+ struct mlx5_cmd_mailbox *next = msg->next;
+ int n = mlx5_calc_cmd_blocks(msg);
+ int data_only;
+ u32 offset = 0;
+ int dump_len;
+ int i;
+
+ data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA));
+
+ if (data_only)
+ mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_DATA,
+ "dump command data %s(0x%x) %s\n",
+ mlx5_command_str(op), op,
+ input ? "INPUT" : "OUTPUT");
+ else
+ mlx5_core_dbg(dev, "dump command %s(0x%x) %s\n",
+ mlx5_command_str(op), op,
+ input ? "INPUT" : "OUTPUT");
+
+ if (data_only) {
+ if (input) {
+ dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset);
+ offset += sizeof(ent->lay->in);
+ } else {
+ dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset);
+ offset += sizeof(ent->lay->out);
+ }
+ } else {
+ dump_buf(ent->lay, sizeof(*ent->lay), 0, offset);
+ offset += sizeof(*ent->lay);
+ }
+
+ for (i = 0; i < n && next; i++) {
+ if (data_only) {
+ dump_len = min_t(int, MLX5_CMD_DATA_BLOCK_SIZE, msg->len - offset);
+ dump_buf(next->buf, dump_len, 1, offset);
+ offset += MLX5_CMD_DATA_BLOCK_SIZE;
+ } else {
+ mlx5_core_dbg(dev, "command block:\n");
+ dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset);
+ offset += sizeof(struct mlx5_cmd_prot_block);
+ }
+ next = next->next;
+ }
+
+ if (data_only)
+ pr_debug("\n");
+}
+
+static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
+{
+ return MLX5_GET(mbox_in, in->first.data, opcode);
+}
+
+static void cb_timeout_handler(struct work_struct *work)
+{
+ struct delayed_work *dwork = container_of(work, struct delayed_work,
+ work);
+ struct mlx5_cmd_work_ent *ent = container_of(dwork,
+ struct mlx5_cmd_work_ent,
+ cb_timeout_work);
+ struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev,
+ cmd);
+
+ ent->ret = -ETIMEDOUT;
+ mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
+ mlx5_command_str(msg_to_opcode(ent->in)),
+ msg_to_opcode(ent->in));
+ mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+}
+
+static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg);
+static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_msg *msg);
+
+static void cmd_work_handler(struct work_struct *work)
+{
+ struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
+ struct mlx5_cmd *cmd = ent->cmd;
+ struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd);
+ unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
+ struct mlx5_cmd_layout *lay;
+ struct semaphore *sem;
+ unsigned long flags;
+ bool poll_cmd = ent->polling;
+ int alloc_ret;
+ int cmd_mode;
+
+ complete(&ent->handling);
+ sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
+ down(sem);
+ if (!ent->page_queue) {
+ alloc_ret = alloc_ent(cmd);
+ if (alloc_ret < 0) {
+ mlx5_core_err(dev, "failed to allocate command entry\n");
+ if (ent->callback) {
+ ent->callback(-EAGAIN, ent->context);
+ mlx5_free_cmd_msg(dev, ent->out);
+ free_msg(dev, ent->in);
+ free_cmd(ent);
+ } else {
+ ent->ret = -EAGAIN;
+ complete(&ent->done);
+ }
+ up(sem);
+ return;
+ }
+ ent->idx = alloc_ret;
+ } else {
+ ent->idx = cmd->max_reg_cmds;
+ spin_lock_irqsave(&cmd->alloc_lock, flags);
+ clear_bit(ent->idx, &cmd->bitmask);
+ spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+ }
+
+ cmd->ent_arr[ent->idx] = ent;
+ lay = get_inst(cmd, ent->idx);
+ ent->lay = lay;
+ memset(lay, 0, sizeof(*lay));
+ memcpy(lay->in, ent->in->first.data, sizeof(lay->in));
+ ent->op = be32_to_cpu(lay->in[0]) >> 16;
+ if (ent->in->next)
+ lay->in_ptr = cpu_to_be64(ent->in->next->dma);
+ lay->inlen = cpu_to_be32(ent->in->len);
+ if (ent->out->next)
+ lay->out_ptr = cpu_to_be64(ent->out->next->dma);
+ lay->outlen = cpu_to_be32(ent->out->len);
+ lay->type = MLX5_PCI_CMD_XPORT;
+ lay->token = ent->token;
+ lay->status_own = CMD_OWNER_HW;
+ set_signature(ent, !cmd->checksum_disabled);
+ dump_command(dev, ent, 1);
+ ent->ts1 = ktime_get_ns();
+ cmd_mode = cmd->mode;
+
+ if (ent->callback)
+ schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
+ set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
+
+ /* Skip sending command to fw if internal error */
+ if (pci_channel_offline(dev->pdev) ||
+ dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ u8 status = 0;
+ u32 drv_synd;
+
+ ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status);
+ MLX5_SET(mbox_out, ent->out, status, status);
+ MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);
+
+ mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+ /* no doorbell, no need to keep the entry */
+ free_ent(cmd, ent->idx);
+ if (ent->callback)
+ free_cmd(ent);
+ return;
+ }
+
+ /* ring doorbell after the descriptor is valid */
+ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
+ wmb();
+ iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell);
+ mmiowb();
+ /* if not in polling don't use ent after this point */
+ if (cmd_mode == CMD_MODE_POLLING || poll_cmd) {
+ poll_timeout(ent);
+ /* make sure we read the descriptor after ownership is SW */
+ rmb();
+ mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT));
+ }
+}
+
+static const char *deliv_status_to_str(u8 status)
+{
+ switch (status) {
+ case MLX5_CMD_DELIVERY_STAT_OK:
+ return "no errors";
+ case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR:
+ return "signature error";
+ case MLX5_CMD_DELIVERY_STAT_TOK_ERR:
+ return "token error";
+ case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR:
+ return "bad block number";
+ case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR:
+ return "output pointer not aligned to block size";
+ case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR:
+ return "input pointer not aligned to block size";
+ case MLX5_CMD_DELIVERY_STAT_FW_ERR:
+ return "firmware internal error";
+ case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR:
+ return "command input length error";
+ case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR:
+ return "command output length error";
+ case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR:
+ return "reserved fields not cleared";
+ case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR:
+ return "bad command descriptor type";
+ default:
+ return "unknown status code";
+ }
+}
+
+static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
+{
+ unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
+ struct mlx5_cmd *cmd = &dev->cmd;
+ int err;
+
+ if (!wait_for_completion_timeout(&ent->handling, timeout) &&
+ cancel_work_sync(&ent->work)) {
+ ent->ret = -ECANCELED;
+ goto out_err;
+ }
+ if (cmd->mode == CMD_MODE_POLLING || ent->polling) {
+ wait_for_completion(&ent->done);
+ } else if (!wait_for_completion_timeout(&ent->done, timeout)) {
+ ent->ret = -ETIMEDOUT;
+ mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+ }
+
+out_err:
+ err = ent->ret;
+
+ if (err == -ETIMEDOUT) {
+ mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
+ mlx5_command_str(msg_to_opcode(ent->in)),
+ msg_to_opcode(ent->in));
+ } else if (err == -ECANCELED) {
+ mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n",
+ mlx5_command_str(msg_to_opcode(ent->in)),
+ msg_to_opcode(ent->in));
+ }
+ mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n",
+ err, deliv_status_to_str(ent->status), ent->status);
+
+ return err;
+}
+
+/* Notes:
+ * 1. Callback functions may not sleep
+ * 2. page queue commands do not support asynchrous completion
+ */
+static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
+ struct mlx5_cmd_msg *out, void *uout, int uout_size,
+ mlx5_cmd_cbk_t callback,
+ void *context, int page_queue, u8 *status,
+ u8 token, bool force_polling)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ struct mlx5_cmd_work_ent *ent;
+ struct mlx5_cmd_stats *stats;
+ int err = 0;
+ s64 ds;
+ u16 op;
+
+ if (callback && page_queue)
+ return -EINVAL;
+
+ ent = alloc_cmd(cmd, in, out, uout, uout_size, callback, context,
+ page_queue);
+ if (IS_ERR(ent))
+ return PTR_ERR(ent);
+
+ ent->token = token;
+ ent->polling = force_polling;
+
+ init_completion(&ent->handling);
+ if (!callback)
+ init_completion(&ent->done);
+
+ INIT_DELAYED_WORK(&ent->cb_timeout_work, cb_timeout_handler);
+ INIT_WORK(&ent->work, cmd_work_handler);
+ if (page_queue) {
+ cmd_work_handler(&ent->work);
+ } else if (!queue_work(cmd->wq, &ent->work)) {
+ mlx5_core_warn(dev, "failed to queue work\n");
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ if (callback)
+ goto out;
+
+ err = wait_func(dev, ent);
+ if (err == -ETIMEDOUT)
+ goto out;
+ if (err == -ECANCELED)
+ goto out_free;
+
+ ds = ent->ts2 - ent->ts1;
+ op = MLX5_GET(mbox_in, in->first.data, opcode);
+ if (op < ARRAY_SIZE(cmd->stats)) {
+ stats = &cmd->stats[op];
+ spin_lock_irq(&stats->lock);
+ stats->sum += ds;
+ ++stats->n;
+ spin_unlock_irq(&stats->lock);
+ }
+ mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
+ "fw exec time for %s is %lld nsec\n",
+ mlx5_command_str(op), ds);
+ *status = ent->status;
+
+out_free:
+ free_cmd(ent);
+out:
+ return err;
+}
+
+static ssize_t dbg_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_core_dev *dev = filp->private_data;
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+ char lbuf[3];
+ int err;
+
+ if (!dbg->in_msg || !dbg->out_msg)
+ return -ENOMEM;
+
+ if (count < sizeof(lbuf) - 1)
+ return -EINVAL;
+
+ if (copy_from_user(lbuf, buf, sizeof(lbuf) - 1))
+ return -EFAULT;
+
+ lbuf[sizeof(lbuf) - 1] = 0;
+
+ if (strcmp(lbuf, "go"))
+ return -EINVAL;
+
+ err = mlx5_cmd_exec(dev, dbg->in_msg, dbg->inlen, dbg->out_msg, dbg->outlen);
+
+ return err ? err : count;
+}
+
+static const struct file_operations fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = dbg_write,
+};
+
+static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size,
+ u8 token)
+{
+ struct mlx5_cmd_prot_block *block;
+ struct mlx5_cmd_mailbox *next;
+ int copy;
+
+ if (!to || !from)
+ return -ENOMEM;
+
+ copy = min_t(int, size, sizeof(to->first.data));
+ memcpy(to->first.data, from, copy);
+ size -= copy;
+ from += copy;
+
+ next = to->next;
+ while (size) {
+ if (!next) {
+ /* this is a BUG */
+ return -ENOMEM;
+ }
+
+ copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE);
+ block = next->buf;
+ memcpy(block->data, from, copy);
+ from += copy;
+ size -= copy;
+ block->token = token;
+ next = next->next;
+ }
+
+ return 0;
+}
+
+static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size)
+{
+ struct mlx5_cmd_prot_block *block;
+ struct mlx5_cmd_mailbox *next;
+ int copy;
+
+ if (!to || !from)
+ return -ENOMEM;
+
+ copy = min_t(int, size, sizeof(from->first.data));
+ memcpy(to, from->first.data, copy);
+ size -= copy;
+ to += copy;
+
+ next = from->next;
+ while (size) {
+ if (!next) {
+ /* this is a BUG */
+ return -ENOMEM;
+ }
+
+ copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE);
+ block = next->buf;
+
+ memcpy(to, block->data, copy);
+ to += copy;
+ size -= copy;
+ next = next->next;
+ }
+
+ return 0;
+}
+
+static struct mlx5_cmd_mailbox *alloc_cmd_box(struct mlx5_core_dev *dev,
+ gfp_t flags)
+{
+ struct mlx5_cmd_mailbox *mailbox;
+
+ mailbox = kmalloc(sizeof(*mailbox), flags);
+ if (!mailbox)
+ return ERR_PTR(-ENOMEM);
+
+ mailbox->buf = dma_pool_zalloc(dev->cmd.pool, flags,
+ &mailbox->dma);
+ if (!mailbox->buf) {
+ mlx5_core_dbg(dev, "failed allocation\n");
+ kfree(mailbox);
+ return ERR_PTR(-ENOMEM);
+ }
+ mailbox->next = NULL;
+
+ return mailbox;
+}
+
+static void free_cmd_box(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_mailbox *mailbox)
+{
+ dma_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma);
+ kfree(mailbox);
+}
+
+static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
+ gfp_t flags, int size,
+ u8 token)
+{
+ struct mlx5_cmd_mailbox *tmp, *head = NULL;
+ struct mlx5_cmd_prot_block *block;
+ struct mlx5_cmd_msg *msg;
+ int err;
+ int n;
+ int i;
+
+ msg = kzalloc(sizeof(*msg), flags);
+ if (!msg)
+ return ERR_PTR(-ENOMEM);
+
+ msg->len = size;
+ n = mlx5_calc_cmd_blocks(msg);
+
+ for (i = 0; i < n; i++) {
+ tmp = alloc_cmd_box(dev, flags);
+ if (IS_ERR(tmp)) {
+ mlx5_core_warn(dev, "failed allocating block\n");
+ err = PTR_ERR(tmp);
+ goto err_alloc;
+ }
+
+ block = tmp->buf;
+ tmp->next = head;
+ block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0);
+ block->block_num = cpu_to_be32(n - i - 1);
+ block->token = token;
+ head = tmp;
+ }
+ msg->next = head;
+ return msg;
+
+err_alloc:
+ while (head) {
+ tmp = head->next;
+ free_cmd_box(dev, head);
+ head = tmp;
+ }
+ kfree(msg);
+
+ return ERR_PTR(err);
+}
+
+static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_msg *msg)
+{
+ struct mlx5_cmd_mailbox *head = msg->next;
+ struct mlx5_cmd_mailbox *next;
+
+ while (head) {
+ next = head->next;
+ free_cmd_box(dev, head);
+ head = next;
+ }
+ kfree(msg);
+}
+
+static ssize_t data_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_core_dev *dev = filp->private_data;
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+ void *ptr;
+
+ if (*pos != 0)
+ return -EINVAL;
+
+ kfree(dbg->in_msg);
+ dbg->in_msg = NULL;
+ dbg->inlen = 0;
+ ptr = memdup_user(buf, count);
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+ dbg->in_msg = ptr;
+ dbg->inlen = count;
+
+ *pos = count;
+
+ return count;
+}
+
+static ssize_t data_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_core_dev *dev = filp->private_data;
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+
+ if (!dbg->out_msg)
+ return -ENOMEM;
+
+ return simple_read_from_buffer(buf, count, pos, dbg->out_msg,
+ dbg->outlen);
+}
+
+static const struct file_operations dfops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = data_write,
+ .read = data_read,
+};
+
+static ssize_t outlen_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_core_dev *dev = filp->private_data;
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+ char outlen[8];
+ int err;
+
+ err = snprintf(outlen, sizeof(outlen), "%d", dbg->outlen);
+ if (err < 0)
+ return err;
+
+ return simple_read_from_buffer(buf, count, pos, outlen, err);
+}
+
+static ssize_t outlen_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_core_dev *dev = filp->private_data;
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+ char outlen_str[8] = {0};
+ int outlen;
+ void *ptr;
+ int err;
+
+ if (*pos != 0 || count > 6)
+ return -EINVAL;
+
+ kfree(dbg->out_msg);
+ dbg->out_msg = NULL;
+ dbg->outlen = 0;
+
+ if (copy_from_user(outlen_str, buf, count))
+ return -EFAULT;
+
+ err = sscanf(outlen_str, "%d", &outlen);
+ if (err < 0)
+ return err;
+
+ ptr = kzalloc(outlen, GFP_KERNEL);
+ if (!ptr)
+ return -ENOMEM;
+
+ dbg->out_msg = ptr;
+ dbg->outlen = outlen;
+
+ *pos = count;
+
+ return count;
+}
+
+static const struct file_operations olfops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = outlen_write,
+ .read = outlen_read,
+};
+
+static void set_wqname(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+
+ snprintf(cmd->wq_name, sizeof(cmd->wq_name), "mlx5_cmd_%s",
+ dev_name(&dev->pdev->dev));
+}
+
+static void clean_debug_files(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+
+ if (!mlx5_debugfs_root)
+ return;
+
+ mlx5_cmdif_debugfs_cleanup(dev);
+ debugfs_remove_recursive(dbg->dbg_root);
+}
+
+static int create_debugfs_files(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
+ int err = -ENOMEM;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root);
+ if (!dbg->dbg_root)
+ return err;
+
+ dbg->dbg_in = debugfs_create_file("in", 0400, dbg->dbg_root,
+ dev, &dfops);
+ if (!dbg->dbg_in)
+ goto err_dbg;
+
+ dbg->dbg_out = debugfs_create_file("out", 0200, dbg->dbg_root,
+ dev, &dfops);
+ if (!dbg->dbg_out)
+ goto err_dbg;
+
+ dbg->dbg_outlen = debugfs_create_file("out_len", 0600, dbg->dbg_root,
+ dev, &olfops);
+ if (!dbg->dbg_outlen)
+ goto err_dbg;
+
+ dbg->dbg_status = debugfs_create_u8("status", 0600, dbg->dbg_root,
+ &dbg->status);
+ if (!dbg->dbg_status)
+ goto err_dbg;
+
+ dbg->dbg_run = debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops);
+ if (!dbg->dbg_run)
+ goto err_dbg;
+
+ mlx5_cmdif_debugfs_init(dev);
+
+ return 0;
+
+err_dbg:
+ clean_debug_files(dev);
+ return err;
+}
+
+static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ int i;
+
+ for (i = 0; i < cmd->max_reg_cmds; i++)
+ down(&cmd->sem);
+ down(&cmd->pages_sem);
+
+ cmd->mode = mode;
+
+ up(&cmd->pages_sem);
+ for (i = 0; i < cmd->max_reg_cmds; i++)
+ up(&cmd->sem);
+}
+
+void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
+{
+ mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS);
+}
+
+void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
+{
+ mlx5_cmd_change_mod(dev, CMD_MODE_POLLING);
+}
+
+static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
+{
+ unsigned long flags;
+
+ if (msg->parent) {
+ spin_lock_irqsave(&msg->parent->lock, flags);
+ list_add_tail(&msg->list, &msg->parent->head);
+ spin_unlock_irqrestore(&msg->parent->lock, flags);
+ } else {
+ mlx5_free_cmd_msg(dev, msg);
+ }
+}
+
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ struct mlx5_cmd_work_ent *ent;
+ mlx5_cmd_cbk_t callback;
+ void *context;
+ int err;
+ int i;
+ s64 ds;
+ struct mlx5_cmd_stats *stats;
+ unsigned long flags;
+ unsigned long vector;
+
+ /* there can be at most 32 command queues */
+ vector = vec & 0xffffffff;
+ for (i = 0; i < (1 << cmd->log_sz); i++) {
+ if (test_bit(i, &vector)) {
+ struct semaphore *sem;
+
+ ent = cmd->ent_arr[i];
+
+ /* if we already completed the command, ignore it */
+ if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP,
+ &ent->state)) {
+ /* only real completion can free the cmd slot */
+ if (!forced) {
+ mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
+ ent->idx);
+ free_ent(cmd, ent->idx);
+ free_cmd(ent);
+ }
+ continue;
+ }
+
+ if (ent->callback)
+ cancel_delayed_work(&ent->cb_timeout_work);
+ if (ent->page_queue)
+ sem = &cmd->pages_sem;
+ else
+ sem = &cmd->sem;
+ ent->ts2 = ktime_get_ns();
+ memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out));
+ dump_command(dev, ent, 0);
+ if (!ent->ret) {
+ if (!cmd->checksum_disabled)
+ ent->ret = verify_signature(ent);
+ else
+ ent->ret = 0;
+ if (vec & MLX5_TRIGGERED_CMD_COMP)
+ ent->status = MLX5_DRIVER_STATUS_ABORTED;
+ else
+ ent->status = ent->lay->status_own >> 1;
+
+ mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n",
+ ent->ret, deliv_status_to_str(ent->status), ent->status);
+ }
+
+ /* only real completion will free the entry slot */
+ if (!forced)
+ free_ent(cmd, ent->idx);
+
+ if (ent->callback) {
+ ds = ent->ts2 - ent->ts1;
+ if (ent->op < ARRAY_SIZE(cmd->stats)) {
+ stats = &cmd->stats[ent->op];
+ spin_lock_irqsave(&stats->lock, flags);
+ stats->sum += ds;
+ ++stats->n;
+ spin_unlock_irqrestore(&stats->lock, flags);
+ }
+
+ callback = ent->callback;
+ context = ent->context;
+ err = ent->ret;
+ if (!err) {
+ err = mlx5_copy_from_msg(ent->uout,
+ ent->out,
+ ent->uout_size);
+
+ err = err ? err : mlx5_cmd_check(dev,
+ ent->in->first.data,
+ ent->uout);
+ }
+
+ mlx5_free_cmd_msg(dev, ent->out);
+ free_msg(dev, ent->in);
+
+ err = err ? err : ent->status;
+ if (!forced)
+ free_cmd(ent);
+ callback(err, context);
+ } else {
+ complete(&ent->done);
+ }
+ up(sem);
+ }
+ }
+}
+EXPORT_SYMBOL(mlx5_cmd_comp_handler);
+
+static int status_to_err(u8 status)
+{
+ return status ? -1 : 0; /* TBD more meaningful codes */
+}
+
+static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
+ gfp_t gfp)
+{
+ struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM);
+ struct cmd_msg_cache *ch = NULL;
+ struct mlx5_cmd *cmd = &dev->cmd;
+ int i;
+
+ if (in_size <= 16)
+ goto cache_miss;
+
+ for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) {
+ ch = &cmd->cache[i];
+ if (in_size > ch->max_inbox_size)
+ continue;
+ spin_lock_irq(&ch->lock);
+ if (list_empty(&ch->head)) {
+ spin_unlock_irq(&ch->lock);
+ continue;
+ }
+ msg = list_entry(ch->head.next, typeof(*msg), list);
+ /* For cached lists, we must explicitly state what is
+ * the real size
+ */
+ msg->len = in_size;
+ list_del(&msg->list);
+ spin_unlock_irq(&ch->lock);
+ break;
+ }
+
+ if (!IS_ERR(msg))
+ return msg;
+
+cache_miss:
+ msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0);
+ return msg;
+}
+
+static int is_manage_pages(void *in)
+{
+ return MLX5_GET(mbox_in, in, opcode) == MLX5_CMD_OP_MANAGE_PAGES;
+}
+
+static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+ int out_size, mlx5_cmd_cbk_t callback, void *context,
+ bool force_polling)
+{
+ struct mlx5_cmd_msg *inb;
+ struct mlx5_cmd_msg *outb;
+ int pages_queue;
+ gfp_t gfp;
+ int err;
+ u8 status = 0;
+ u32 drv_synd;
+ u8 token;
+
+ if (pci_channel_offline(dev->pdev) ||
+ dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ u16 opcode = MLX5_GET(mbox_in, in, opcode);
+
+ err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
+ MLX5_SET(mbox_out, out, status, status);
+ MLX5_SET(mbox_out, out, syndrome, drv_synd);
+ return err;
+ }
+
+ pages_queue = is_manage_pages(in);
+ gfp = callback ? GFP_ATOMIC : GFP_KERNEL;
+
+ inb = alloc_msg(dev, in_size, gfp);
+ if (IS_ERR(inb)) {
+ err = PTR_ERR(inb);
+ return err;
+ }
+
+ token = alloc_token(&dev->cmd);
+
+ err = mlx5_copy_to_msg(inb, in, in_size, token);
+ if (err) {
+ mlx5_core_warn(dev, "err %d\n", err);
+ goto out_in;
+ }
+
+ outb = mlx5_alloc_cmd_msg(dev, gfp, out_size, token);
+ if (IS_ERR(outb)) {
+ err = PTR_ERR(outb);
+ goto out_in;
+ }
+
+ err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
+ pages_queue, &status, token, force_polling);
+ if (err)
+ goto out_out;
+
+ mlx5_core_dbg(dev, "err %d, status %d\n", err, status);
+ if (status) {
+ err = status_to_err(status);
+ goto out_out;
+ }
+
+ if (!callback)
+ err = mlx5_copy_from_msg(out, outb, out_size);
+
+out_out:
+ if (!callback)
+ mlx5_free_cmd_msg(dev, outb);
+
+out_in:
+ if (!callback)
+ free_msg(dev, inb);
+ return err;
+}
+
+int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
+ int out_size)
+{
+ int err;
+
+ err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false);
+ return err ? : mlx5_cmd_check(dev, in, out);
+}
+EXPORT_SYMBOL(mlx5_cmd_exec);
+
+int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
+ void *out, int out_size, mlx5_cmd_cbk_t callback,
+ void *context)
+{
+ return cmd_exec(dev, in, in_size, out, out_size, callback, context,
+ false);
+}
+EXPORT_SYMBOL(mlx5_cmd_exec_cb);
+
+int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
+ void *out, int out_size)
+{
+ int err;
+
+ err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true);
+
+ return err ? : mlx5_cmd_check(dev, in, out);
+}
+EXPORT_SYMBOL(mlx5_cmd_exec_polling);
+
+static void destroy_msg_cache(struct mlx5_core_dev *dev)
+{
+ struct cmd_msg_cache *ch;
+ struct mlx5_cmd_msg *msg;
+ struct mlx5_cmd_msg *n;
+ int i;
+
+ for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) {
+ ch = &dev->cmd.cache[i];
+ list_for_each_entry_safe(msg, n, &ch->head, list) {
+ list_del(&msg->list);
+ mlx5_free_cmd_msg(dev, msg);
+ }
+ }
+}
+
+static unsigned cmd_cache_num_ent[MLX5_NUM_COMMAND_CACHES] = {
+ 512, 32, 16, 8, 2
+};
+
+static unsigned cmd_cache_ent_size[MLX5_NUM_COMMAND_CACHES] = {
+ 16 + MLX5_CMD_DATA_BLOCK_SIZE,
+ 16 + MLX5_CMD_DATA_BLOCK_SIZE * 2,
+ 16 + MLX5_CMD_DATA_BLOCK_SIZE * 16,
+ 16 + MLX5_CMD_DATA_BLOCK_SIZE * 256,
+ 16 + MLX5_CMD_DATA_BLOCK_SIZE * 512,
+};
+
+static void create_msg_cache(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ struct cmd_msg_cache *ch;
+ struct mlx5_cmd_msg *msg;
+ int i;
+ int k;
+
+ /* Initialize and fill the caches with initial entries */
+ for (k = 0; k < MLX5_NUM_COMMAND_CACHES; k++) {
+ ch = &cmd->cache[k];
+ spin_lock_init(&ch->lock);
+ INIT_LIST_HEAD(&ch->head);
+ ch->num_ent = cmd_cache_num_ent[k];
+ ch->max_inbox_size = cmd_cache_ent_size[k];
+ for (i = 0; i < ch->num_ent; i++) {
+ msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL | __GFP_NOWARN,
+ ch->max_inbox_size, 0);
+ if (IS_ERR(msg))
+ break;
+ msg->parent = ch;
+ list_add_tail(&msg->list, &ch->head);
+ }
+ }
+}
+
+static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
+{
+ struct device *ddev = &dev->pdev->dev;
+
+ cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE,
+ &cmd->alloc_dma, GFP_KERNEL);
+ if (!cmd->cmd_alloc_buf)
+ return -ENOMEM;
+
+ /* make sure it is aligned to 4K */
+ if (!((uintptr_t)cmd->cmd_alloc_buf & (MLX5_ADAPTER_PAGE_SIZE - 1))) {
+ cmd->cmd_buf = cmd->cmd_alloc_buf;
+ cmd->dma = cmd->alloc_dma;
+ cmd->alloc_size = MLX5_ADAPTER_PAGE_SIZE;
+ return 0;
+ }
+
+ dma_free_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf,
+ cmd->alloc_dma);
+ cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev,
+ 2 * MLX5_ADAPTER_PAGE_SIZE - 1,
+ &cmd->alloc_dma, GFP_KERNEL);
+ if (!cmd->cmd_alloc_buf)
+ return -ENOMEM;
+
+ cmd->cmd_buf = PTR_ALIGN(cmd->cmd_alloc_buf, MLX5_ADAPTER_PAGE_SIZE);
+ cmd->dma = ALIGN(cmd->alloc_dma, MLX5_ADAPTER_PAGE_SIZE);
+ cmd->alloc_size = 2 * MLX5_ADAPTER_PAGE_SIZE - 1;
+ return 0;
+}
+
+static void free_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
+{
+ struct device *ddev = &dev->pdev->dev;
+
+ dma_free_coherent(ddev, cmd->alloc_size, cmd->cmd_alloc_buf,
+ cmd->alloc_dma);
+}
+
+int mlx5_cmd_init(struct mlx5_core_dev *dev)
+{
+ int size = sizeof(struct mlx5_cmd_prot_block);
+ int align = roundup_pow_of_two(size);
+ struct mlx5_cmd *cmd = &dev->cmd;
+ u32 cmd_h, cmd_l;
+ u16 cmd_if_rev;
+ int err;
+ int i;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd_if_rev = cmdif_rev(dev);
+ if (cmd_if_rev != CMD_IF_REV) {
+ dev_err(&dev->pdev->dev,
+ "Driver cmdif rev(%d) differs from firmware's(%d)\n",
+ CMD_IF_REV, cmd_if_rev);
+ return -EINVAL;
+ }
+
+ cmd->pool = dma_pool_create("mlx5_cmd", &dev->pdev->dev, size, align,
+ 0);
+ if (!cmd->pool)
+ return -ENOMEM;
+
+ err = alloc_cmd_page(dev, cmd);
+ if (err)
+ goto err_free_pool;
+
+ cmd_l = ioread32be(&dev->iseg->cmdq_addr_l_sz) & 0xff;
+ cmd->log_sz = cmd_l >> 4 & 0xf;
+ cmd->log_stride = cmd_l & 0xf;
+ if (1 << cmd->log_sz > MLX5_MAX_COMMANDS) {
+ dev_err(&dev->pdev->dev, "firmware reports too many outstanding commands %d\n",
+ 1 << cmd->log_sz);
+ err = -EINVAL;
+ goto err_free_page;
+ }
+
+ if (cmd->log_sz + cmd->log_stride > MLX5_ADAPTER_PAGE_SHIFT) {
+ dev_err(&dev->pdev->dev, "command queue size overflow\n");
+ err = -EINVAL;
+ goto err_free_page;
+ }
+
+ cmd->checksum_disabled = 1;
+ cmd->max_reg_cmds = (1 << cmd->log_sz) - 1;
+ cmd->bitmask = (1UL << cmd->max_reg_cmds) - 1;
+
+ cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
+ if (cmd->cmdif_rev > CMD_IF_REV) {
+ dev_err(&dev->pdev->dev, "driver does not support command interface version. driver %d, firmware %d\n",
+ CMD_IF_REV, cmd->cmdif_rev);
+ err = -EOPNOTSUPP;
+ goto err_free_page;
+ }
+
+ spin_lock_init(&cmd->alloc_lock);
+ spin_lock_init(&cmd->token_lock);
+ for (i = 0; i < ARRAY_SIZE(cmd->stats); i++)
+ spin_lock_init(&cmd->stats[i].lock);
+
+ sema_init(&cmd->sem, cmd->max_reg_cmds);
+ sema_init(&cmd->pages_sem, 1);
+
+ cmd_h = (u32)((u64)(cmd->dma) >> 32);
+ cmd_l = (u32)(cmd->dma);
+ if (cmd_l & 0xfff) {
+ dev_err(&dev->pdev->dev, "invalid command queue address\n");
+ err = -ENOMEM;
+ goto err_free_page;
+ }
+
+ iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h);
+ iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz);
+
+ /* Make sure firmware sees the complete address before we proceed */
+ wmb();
+
+ mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma));
+
+ cmd->mode = CMD_MODE_POLLING;
+
+ create_msg_cache(dev);
+
+ set_wqname(dev);
+ cmd->wq = create_singlethread_workqueue(cmd->wq_name);
+ if (!cmd->wq) {
+ dev_err(&dev->pdev->dev, "failed to create command workqueue\n");
+ err = -ENOMEM;
+ goto err_cache;
+ }
+
+ err = create_debugfs_files(dev);
+ if (err) {
+ err = -ENOMEM;
+ goto err_wq;
+ }
+
+ return 0;
+
+err_wq:
+ destroy_workqueue(cmd->wq);
+
+err_cache:
+ destroy_msg_cache(dev);
+
+err_free_page:
+ free_cmd_page(dev, cmd);
+
+err_free_pool:
+ dma_pool_destroy(cmd->pool);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_cmd_init);
+
+void mlx5_cmd_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+
+ clean_debug_files(dev);
+ destroy_workqueue(cmd->wq);
+ destroy_msg_cache(dev);
+ free_cmd_page(dev, cmd);
+ dma_pool_destroy(cmd->pool);
+}
+EXPORT_SYMBOL(mlx5_cmd_cleanup);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
new file mode 100644
index 000000000..a4179122a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include <rdma/ib_verbs.h>
+#include <linux/mlx5/cq.h>
+#include "mlx5_core.h"
+
+#define TASKLET_MAX_TIME 2
+#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
+
+void mlx5_cq_tasklet_cb(unsigned long data)
+{
+ unsigned long flags;
+ unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
+ struct mlx5_eq_tasklet *ctx = (struct mlx5_eq_tasklet *)data;
+ struct mlx5_core_cq *mcq;
+ struct mlx5_core_cq *temp;
+
+ spin_lock_irqsave(&ctx->lock, flags);
+ list_splice_tail_init(&ctx->list, &ctx->process_list);
+ spin_unlock_irqrestore(&ctx->lock, flags);
+
+ list_for_each_entry_safe(mcq, temp, &ctx->process_list,
+ tasklet_ctx.list) {
+ list_del_init(&mcq->tasklet_ctx.list);
+ mcq->tasklet_ctx.comp(mcq);
+ mlx5_cq_put(mcq);
+ if (time_after(jiffies, end))
+ break;
+ }
+
+ if (!list_empty(&ctx->process_list))
+ tasklet_schedule(&ctx->task);
+}
+
+static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
+{
+ unsigned long flags;
+ struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;
+
+ spin_lock_irqsave(&tasklet_ctx->lock, flags);
+ /* When migrating CQs between EQs will be implemented, please note
+ * that you need to sync this point. It is possible that
+ * while migrating a CQ, completions on the old EQs could
+ * still arrive.
+ */
+ if (list_empty_careful(&cq->tasklet_ctx.list)) {
+ mlx5_cq_hold(cq);
+ list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
+ }
+ spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
+}
+
+int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ u32 *in, int inlen)
+{
+ int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
+ u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+ u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
+ struct mlx5_eq *eq;
+ int err;
+
+ eq = mlx5_eqn2eq(dev, eqn);
+ if (IS_ERR(eq))
+ return PTR_ERR(eq);
+
+ memset(out, 0, sizeof(out));
+ MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ);
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (err)
+ return err;
+
+ cq->cqn = MLX5_GET(create_cq_out, out, cqn);
+ cq->cons_index = 0;
+ cq->arm_sn = 0;
+ cq->eq = eq;
+ refcount_set(&cq->refcount, 1);
+ init_completion(&cq->free);
+ if (!cq->comp)
+ cq->comp = mlx5_add_cq_to_tasklet;
+ /* assuming CQ will be deleted before the EQ */
+ cq->tasklet_ctx.priv = &eq->tasklet_ctx;
+ INIT_LIST_HEAD(&cq->tasklet_ctx.list);
+
+ /* Add to comp EQ CQ tree to recv comp events */
+ err = mlx5_eq_add_cq(eq, cq);
+ if (err)
+ goto err_cmd;
+
+ /* Add to async EQ CQ tree to recv async events */
+ err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq);
+ if (err)
+ goto err_cq_add;
+
+ cq->pid = current->pid;
+ err = mlx5_debug_cq_add(dev, cq);
+ if (err)
+ mlx5_core_dbg(dev, "failed adding CP 0x%x to debug file system\n",
+ cq->cqn);
+
+ cq->uar = dev->priv.uar;
+
+ return 0;
+
+err_cq_add:
+ mlx5_eq_del_cq(eq, cq);
+err_cmd:
+ memset(din, 0, sizeof(din));
+ memset(dout, 0, sizeof(dout));
+ MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
+ MLX5_SET(destroy_cq_in, din, cqn, cq->cqn);
+ mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_cq);
+
+int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
+{
+ u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
+ int err;
+
+ err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq);
+ if (err)
+ return err;
+
+ err = mlx5_eq_del_cq(cq->eq, cq);
+ if (err)
+ return err;
+
+ MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
+ MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ synchronize_irq(cq->irqn);
+
+ mlx5_debug_cq_remove(dev, cq);
+ mlx5_cq_put(cq);
+ wait_for_completion(&cq->free);
+
+ return 0;
+}
+EXPORT_SYMBOL(mlx5_core_destroy_cq);
+
+int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_cq_in)] = {0};
+
+ MLX5_SET(query_cq_in, in, opcode, MLX5_CMD_OP_QUERY_CQ);
+ MLX5_SET(query_cq_in, in, cqn, cq->cqn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_cq);
+
+int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ u32 *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0};
+
+ MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ);
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_modify_cq);
+
+int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev,
+ struct mlx5_core_cq *cq,
+ u16 cq_period,
+ u16 cq_max_count)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {0};
+ void *cqc;
+
+ MLX5_SET(modify_cq_in, in, cqn, cq->cqn);
+ cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context);
+ MLX5_SET(cqc, cqc, cq_period, cq_period);
+ MLX5_SET(cqc, cqc, cq_max_count, cq_max_count);
+ MLX5_SET(modify_cq_in, in,
+ modify_field_select_resize_field_select.modify_field_select.modify_field_select,
+ MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT);
+
+ return mlx5_core_modify_cq(dev, cq, in, sizeof(in));
+}
+EXPORT_SYMBOL(mlx5_core_modify_cq_moderation);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
new file mode 100644
index 000000000..90fabd612
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -0,0 +1,593 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+enum {
+ QP_PID,
+ QP_STATE,
+ QP_XPORT,
+ QP_MTU,
+ QP_N_RECV,
+ QP_RECV_SZ,
+ QP_N_SEND,
+ QP_LOG_PG_SZ,
+ QP_RQPN,
+};
+
+static char *qp_fields[] = {
+ [QP_PID] = "pid",
+ [QP_STATE] = "state",
+ [QP_XPORT] = "transport",
+ [QP_MTU] = "mtu",
+ [QP_N_RECV] = "num_recv",
+ [QP_RECV_SZ] = "rcv_wqe_sz",
+ [QP_N_SEND] = "num_send",
+ [QP_LOG_PG_SZ] = "log2_page_sz",
+ [QP_RQPN] = "remote_qpn",
+};
+
+enum {
+ EQ_NUM_EQES,
+ EQ_INTR,
+ EQ_LOG_PG_SZ,
+};
+
+static char *eq_fields[] = {
+ [EQ_NUM_EQES] = "num_eqes",
+ [EQ_INTR] = "intr",
+ [EQ_LOG_PG_SZ] = "log_page_size",
+};
+
+enum {
+ CQ_PID,
+ CQ_NUM_CQES,
+ CQ_LOG_PG_SZ,
+};
+
+static char *cq_fields[] = {
+ [CQ_PID] = "pid",
+ [CQ_NUM_CQES] = "num_cqes",
+ [CQ_LOG_PG_SZ] = "log_page_size",
+};
+
+struct dentry *mlx5_debugfs_root;
+EXPORT_SYMBOL(mlx5_debugfs_root);
+
+void mlx5_register_debugfs(void)
+{
+ mlx5_debugfs_root = debugfs_create_dir("mlx5", NULL);
+ if (IS_ERR_OR_NULL(mlx5_debugfs_root))
+ mlx5_debugfs_root = NULL;
+}
+
+void mlx5_unregister_debugfs(void)
+{
+ debugfs_remove(mlx5_debugfs_root);
+}
+
+int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ atomic_set(&dev->num_qps, 0);
+
+ dev->priv.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg_root);
+ if (!dev->priv.qp_debugfs)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ debugfs_remove_recursive(dev->priv.qp_debugfs);
+}
+
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ dev->priv.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg_root);
+ if (!dev->priv.eq_debugfs)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ debugfs_remove_recursive(dev->priv.eq_debugfs);
+}
+
+static ssize_t average_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_cmd_stats *stats;
+ u64 field = 0;
+ int ret;
+ char tbuf[22];
+
+ stats = filp->private_data;
+ spin_lock_irq(&stats->lock);
+ if (stats->n)
+ field = div64_u64(stats->sum, stats->n);
+ spin_unlock_irq(&stats->lock);
+ ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field);
+ return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+}
+
+static ssize_t average_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_cmd_stats *stats;
+
+ stats = filp->private_data;
+ spin_lock_irq(&stats->lock);
+ stats->sum = 0;
+ stats->n = 0;
+ spin_unlock_irq(&stats->lock);
+
+ *pos += count;
+
+ return count;
+}
+
+static const struct file_operations stats_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = average_read,
+ .write = average_write,
+};
+
+int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd_stats *stats;
+ struct dentry **cmd;
+ const char *namep;
+ int err;
+ int i;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ cmd = &dev->priv.cmdif_debugfs;
+ *cmd = debugfs_create_dir("commands", dev->priv.dbg_root);
+ if (!*cmd)
+ return -ENOMEM;
+
+ for (i = 0; i < ARRAY_SIZE(dev->cmd.stats); i++) {
+ stats = &dev->cmd.stats[i];
+ namep = mlx5_command_str(i);
+ if (strcmp(namep, "unknown command opcode")) {
+ stats->root = debugfs_create_dir(namep, *cmd);
+ if (!stats->root) {
+ mlx5_core_warn(dev, "failed adding command %d\n",
+ i);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ stats->avg = debugfs_create_file("average", 0400,
+ stats->root, stats,
+ &stats_fops);
+ if (!stats->avg) {
+ mlx5_core_warn(dev, "failed creating debugfs file\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ stats->count = debugfs_create_u64("n", 0400,
+ stats->root,
+ &stats->n);
+ if (!stats->count) {
+ mlx5_core_warn(dev, "failed creating debugfs file\n");
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+ }
+
+ return 0;
+out:
+ debugfs_remove_recursive(dev->priv.cmdif_debugfs);
+ return err;
+}
+
+void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ debugfs_remove_recursive(dev->priv.cmdif_debugfs);
+}
+
+int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ dev->priv.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg_root);
+ if (!dev->priv.cq_debugfs)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ debugfs_remove_recursive(dev->priv.cq_debugfs);
+}
+
+static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
+ int index, int *is_str)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
+ struct mlx5_qp_context *ctx;
+ u64 param = 0;
+ u32 *out;
+ int err;
+ int no_sq;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return param;
+
+ err = mlx5_core_qp_query(dev, qp, out, outlen);
+ if (err) {
+ mlx5_core_warn(dev, "failed to query qp err=%d\n", err);
+ goto out;
+ }
+
+ *is_str = 0;
+
+ /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */
+ ctx = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, out, qpc);
+
+ switch (index) {
+ case QP_PID:
+ param = qp->pid;
+ break;
+ case QP_STATE:
+ param = (unsigned long)mlx5_qp_state_str(be32_to_cpu(ctx->flags) >> 28);
+ *is_str = 1;
+ break;
+ case QP_XPORT:
+ param = (unsigned long)mlx5_qp_type_str((be32_to_cpu(ctx->flags) >> 16) & 0xff);
+ *is_str = 1;
+ break;
+ case QP_MTU:
+ switch (ctx->mtu_msgmax >> 5) {
+ case IB_MTU_256:
+ param = 256;
+ break;
+ case IB_MTU_512:
+ param = 512;
+ break;
+ case IB_MTU_1024:
+ param = 1024;
+ break;
+ case IB_MTU_2048:
+ param = 2048;
+ break;
+ case IB_MTU_4096:
+ param = 4096;
+ break;
+ default:
+ param = 0;
+ }
+ break;
+ case QP_N_RECV:
+ param = 1 << ((ctx->rq_size_stride >> 3) & 0xf);
+ break;
+ case QP_RECV_SZ:
+ param = 1 << ((ctx->rq_size_stride & 7) + 4);
+ break;
+ case QP_N_SEND:
+ no_sq = be16_to_cpu(ctx->sq_crq_size) >> 15;
+ if (!no_sq)
+ param = 1 << (be16_to_cpu(ctx->sq_crq_size) >> 11);
+ else
+ param = 0;
+ break;
+ case QP_LOG_PG_SZ:
+ param = (be32_to_cpu(ctx->log_pg_sz_remote_qpn) >> 24) & 0x1f;
+ param += 12;
+ break;
+ case QP_RQPN:
+ param = be32_to_cpu(ctx->log_pg_sz_remote_qpn) & 0xffffff;
+ break;
+ }
+
+out:
+ kfree(out);
+ return param;
+}
+
+static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ int index)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_eq_out);
+ u64 param = 0;
+ void *ctx;
+ u32 *out;
+ int err;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return param;
+
+ err = mlx5_core_eq_query(dev, eq, out, outlen);
+ if (err) {
+ mlx5_core_warn(dev, "failed to query eq\n");
+ goto out;
+ }
+ ctx = MLX5_ADDR_OF(query_eq_out, out, eq_context_entry);
+
+ switch (index) {
+ case EQ_NUM_EQES:
+ param = 1 << MLX5_GET(eqc, ctx, log_eq_size);
+ break;
+ case EQ_INTR:
+ param = MLX5_GET(eqc, ctx, intr);
+ break;
+ case EQ_LOG_PG_SZ:
+ param = MLX5_GET(eqc, ctx, log_page_size) + 12;
+ break;
+ }
+
+out:
+ kfree(out);
+ return param;
+}
+
+static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
+ int index)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_cq_out);
+ u64 param = 0;
+ void *ctx;
+ u32 *out;
+ int err;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return param;
+
+ err = mlx5_core_query_cq(dev, cq, out, outlen);
+ if (err) {
+ mlx5_core_warn(dev, "failed to query cq\n");
+ goto out;
+ }
+ ctx = MLX5_ADDR_OF(query_cq_out, out, cq_context);
+
+ switch (index) {
+ case CQ_PID:
+ param = cq->pid;
+ break;
+ case CQ_NUM_CQES:
+ param = 1 << MLX5_GET(cqc, ctx, log_cq_size);
+ break;
+ case CQ_LOG_PG_SZ:
+ param = MLX5_GET(cqc, ctx, log_page_size);
+ break;
+ }
+
+out:
+ kvfree(out);
+ return param;
+}
+
+static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_field_desc *desc;
+ struct mlx5_rsc_debug *d;
+ char tbuf[18];
+ int is_str = 0;
+ u64 field;
+ int ret;
+
+ desc = filp->private_data;
+ d = (void *)(desc - desc->i) - sizeof(*d);
+ switch (d->type) {
+ case MLX5_DBG_RSC_QP:
+ field = qp_read_field(d->dev, d->object, desc->i, &is_str);
+ break;
+
+ case MLX5_DBG_RSC_EQ:
+ field = eq_read_field(d->dev, d->object, desc->i);
+ break;
+
+ case MLX5_DBG_RSC_CQ:
+ field = cq_read_field(d->dev, d->object, desc->i);
+ break;
+
+ default:
+ mlx5_core_warn(d->dev, "invalid resource type %d\n", d->type);
+ return -EINVAL;
+ }
+
+ if (is_str)
+ ret = snprintf(tbuf, sizeof(tbuf), "%s\n", (const char *)(unsigned long)field);
+ else
+ ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field);
+
+ return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+}
+
+static const struct file_operations fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = dbg_read,
+};
+
+static int add_res_tree(struct mlx5_core_dev *dev, enum dbg_rsc_type type,
+ struct dentry *root, struct mlx5_rsc_debug **dbg,
+ int rsn, char **field, int nfile, void *data)
+{
+ struct mlx5_rsc_debug *d;
+ char resn[32];
+ int err;
+ int i;
+
+ d = kzalloc(struct_size(d, fields, nfile), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+
+ d->dev = dev;
+ d->object = data;
+ d->type = type;
+ sprintf(resn, "0x%x", rsn);
+ d->root = debugfs_create_dir(resn, root);
+ if (!d->root) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ for (i = 0; i < nfile; i++) {
+ d->fields[i].i = i;
+ d->fields[i].dent = debugfs_create_file(field[i], 0400,
+ d->root, &d->fields[i],
+ &fops);
+ if (!d->fields[i].dent) {
+ err = -ENOMEM;
+ goto out_rem;
+ }
+ }
+ *dbg = d;
+
+ return 0;
+out_rem:
+ debugfs_remove_recursive(d->root);
+
+out_free:
+ kfree(d);
+ return err;
+}
+
+static void rem_res_tree(struct mlx5_rsc_debug *d)
+{
+ debugfs_remove_recursive(d->root);
+ kfree(d);
+}
+
+int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
+{
+ int err;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.qp_debugfs,
+ &qp->dbg, qp->qpn, qp_fields,
+ ARRAY_SIZE(qp_fields), qp);
+ if (err)
+ qp->dbg = NULL;
+
+ return err;
+}
+
+void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ if (qp->dbg)
+ rem_res_tree(qp->dbg);
+}
+
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ int err;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.eq_debugfs,
+ &eq->dbg, eq->eqn, eq_fields,
+ ARRAY_SIZE(eq_fields), eq);
+ if (err)
+ eq->dbg = NULL;
+
+ return err;
+}
+
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ if (eq->dbg)
+ rem_res_tree(eq->dbg);
+}
+
+int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
+{
+ int err;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.cq_debugfs,
+ &cq->dbg, cq->cqn, cq_fields,
+ ARRAY_SIZE(cq_fields), cq);
+ if (err)
+ cq->dbg = NULL;
+
+ return err;
+}
+
+void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ if (cq->dbg)
+ rem_res_tree(cq->dbg);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
new file mode 100644
index 000000000..3692d6a1c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -0,0 +1,498 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+static LIST_HEAD(intf_list);
+static LIST_HEAD(mlx5_dev_list);
+/* intf dev list mutex */
+static DEFINE_MUTEX(mlx5_intf_mutex);
+
+struct mlx5_device_context {
+ struct list_head list;
+ struct mlx5_interface *intf;
+ void *context;
+ unsigned long state;
+};
+
+struct mlx5_delayed_event {
+ struct list_head list;
+ struct mlx5_core_dev *dev;
+ enum mlx5_dev_event event;
+ unsigned long param;
+};
+
+enum {
+ MLX5_INTERFACE_ADDED,
+ MLX5_INTERFACE_ATTACHED,
+};
+
+static void add_delayed_event(struct mlx5_priv *priv,
+ struct mlx5_core_dev *dev,
+ enum mlx5_dev_event event,
+ unsigned long param)
+{
+ struct mlx5_delayed_event *delayed_event;
+
+ delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC);
+ if (!delayed_event) {
+ mlx5_core_err(dev, "event %d is missed\n", event);
+ return;
+ }
+
+ mlx5_core_dbg(dev, "Accumulating event %d\n", event);
+ delayed_event->dev = dev;
+ delayed_event->event = event;
+ delayed_event->param = param;
+ list_add_tail(&delayed_event->list, &priv->waiting_events_list);
+}
+
+static void delayed_event_release(struct mlx5_device_context *dev_ctx,
+ struct mlx5_priv *priv)
+{
+ struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+ struct mlx5_delayed_event *de;
+ struct mlx5_delayed_event *n;
+ struct list_head temp;
+
+ INIT_LIST_HEAD(&temp);
+
+ spin_lock_irq(&priv->ctx_lock);
+
+ priv->is_accum_events = false;
+ list_splice_init(&priv->waiting_events_list, &temp);
+ if (!dev_ctx->context)
+ goto out;
+ list_for_each_entry_safe(de, n, &temp, list)
+ dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param);
+
+out:
+ spin_unlock_irq(&priv->ctx_lock);
+
+ list_for_each_entry_safe(de, n, &temp, list) {
+ list_del(&de->list);
+ kfree(de);
+ }
+}
+
+/* accumulating events that can come after mlx5_ib calls to
+ * ib_register_device, till adding that interface to the events list.
+ */
+static void delayed_event_start(struct mlx5_priv *priv)
+{
+ spin_lock_irq(&priv->ctx_lock);
+ priv->is_accum_events = true;
+ spin_unlock_irq(&priv->ctx_lock);
+}
+
+void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+ struct mlx5_device_context *dev_ctx;
+ struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ if (!mlx5_lag_intf_add(intf, priv))
+ return;
+
+ dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL);
+ if (!dev_ctx)
+ return;
+
+ dev_ctx->intf = intf;
+
+ delayed_event_start(priv);
+
+ dev_ctx->context = intf->add(dev);
+ if (dev_ctx->context) {
+ set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
+ if (intf->attach)
+ set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
+
+ spin_lock_irq(&priv->ctx_lock);
+ list_add_tail(&dev_ctx->list, &priv->ctx_list);
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (dev_ctx->intf->pfault) {
+ if (priv->pfault) {
+ mlx5_core_err(dev, "multiple page fault handlers not supported");
+ } else {
+ priv->pfault_ctx = dev_ctx->context;
+ priv->pfault = dev_ctx->intf->pfault;
+ }
+ }
+#endif
+ spin_unlock_irq(&priv->ctx_lock);
+ }
+
+ delayed_event_release(dev_ctx, priv);
+
+ if (!dev_ctx->context)
+ kfree(dev_ctx);
+}
+
+static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf,
+ struct mlx5_priv *priv)
+{
+ struct mlx5_device_context *dev_ctx;
+
+ list_for_each_entry(dev_ctx, &priv->ctx_list, list)
+ if (dev_ctx->intf == intf)
+ return dev_ctx;
+ return NULL;
+}
+
+void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+ struct mlx5_device_context *dev_ctx;
+ struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ dev_ctx = mlx5_get_device(intf, priv);
+ if (!dev_ctx)
+ return;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ spin_lock_irq(&priv->ctx_lock);
+ if (priv->pfault == dev_ctx->intf->pfault)
+ priv->pfault = NULL;
+ spin_unlock_irq(&priv->ctx_lock);
+
+ synchronize_srcu(&priv->pfault_srcu);
+#endif
+
+ spin_lock_irq(&priv->ctx_lock);
+ list_del(&dev_ctx->list);
+ spin_unlock_irq(&priv->ctx_lock);
+
+ if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
+ intf->remove(dev, dev_ctx->context);
+
+ kfree(dev_ctx);
+}
+
+static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+ struct mlx5_device_context *dev_ctx;
+ struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ dev_ctx = mlx5_get_device(intf, priv);
+ if (!dev_ctx)
+ return;
+
+ delayed_event_start(priv);
+ if (intf->attach) {
+ if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
+ goto out;
+ if (intf->attach(dev, dev_ctx->context))
+ goto out;
+
+ set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
+ } else {
+ if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
+ goto out;
+ dev_ctx->context = intf->add(dev);
+ if (!dev_ctx->context)
+ goto out;
+
+ set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
+ }
+
+out:
+ delayed_event_release(dev_ctx, priv);
+}
+
+void mlx5_attach_device(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_interface *intf;
+
+ mutex_lock(&mlx5_intf_mutex);
+ list_for_each_entry(intf, &intf_list, list)
+ mlx5_attach_interface(intf, priv);
+ mutex_unlock(&mlx5_intf_mutex);
+}
+
+static void mlx5_detach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+ struct mlx5_device_context *dev_ctx;
+ struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ dev_ctx = mlx5_get_device(intf, priv);
+ if (!dev_ctx)
+ return;
+
+ if (intf->detach) {
+ if (!test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
+ return;
+ intf->detach(dev, dev_ctx->context);
+ clear_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
+ } else {
+ if (!test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
+ return;
+ intf->remove(dev, dev_ctx->context);
+ clear_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
+ }
+}
+
+void mlx5_detach_device(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_interface *intf;
+
+ mutex_lock(&mlx5_intf_mutex);
+ list_for_each_entry(intf, &intf_list, list)
+ mlx5_detach_interface(intf, priv);
+ mutex_unlock(&mlx5_intf_mutex);
+}
+
+bool mlx5_device_registered(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv;
+ bool found = false;
+
+ mutex_lock(&mlx5_intf_mutex);
+ list_for_each_entry(priv, &mlx5_dev_list, dev_list)
+ if (priv == &dev->priv)
+ found = true;
+ mutex_unlock(&mlx5_intf_mutex);
+
+ return found;
+}
+
+int mlx5_register_device(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_interface *intf;
+
+ mutex_lock(&mlx5_intf_mutex);
+ list_add_tail(&priv->dev_list, &mlx5_dev_list);
+ list_for_each_entry(intf, &intf_list, list)
+ mlx5_add_device(intf, priv);
+ mutex_unlock(&mlx5_intf_mutex);
+
+ return 0;
+}
+
+void mlx5_unregister_device(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_interface *intf;
+
+ mutex_lock(&mlx5_intf_mutex);
+ list_for_each_entry_reverse(intf, &intf_list, list)
+ mlx5_remove_device(intf, priv);
+ list_del(&priv->dev_list);
+ mutex_unlock(&mlx5_intf_mutex);
+}
+
+int mlx5_register_interface(struct mlx5_interface *intf)
+{
+ struct mlx5_priv *priv;
+
+ if (!intf->add || !intf->remove)
+ return -EINVAL;
+
+ mutex_lock(&mlx5_intf_mutex);
+ list_add_tail(&intf->list, &intf_list);
+ list_for_each_entry(priv, &mlx5_dev_list, dev_list)
+ mlx5_add_device(intf, priv);
+ mutex_unlock(&mlx5_intf_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL(mlx5_register_interface);
+
+void mlx5_unregister_interface(struct mlx5_interface *intf)
+{
+ struct mlx5_priv *priv;
+
+ mutex_lock(&mlx5_intf_mutex);
+ list_for_each_entry(priv, &mlx5_dev_list, dev_list)
+ mlx5_remove_device(intf, priv);
+ list_del(&intf->list);
+ mutex_unlock(&mlx5_intf_mutex);
+}
+EXPORT_SYMBOL(mlx5_unregister_interface);
+
+/* Must be called with intf_mutex held */
+static bool mlx5_has_added_dev_by_protocol(struct mlx5_core_dev *mdev, int protocol)
+{
+ struct mlx5_device_context *dev_ctx;
+ struct mlx5_interface *intf;
+ bool found = false;
+
+ list_for_each_entry(intf, &intf_list, list) {
+ if (intf->protocol == protocol) {
+ dev_ctx = mlx5_get_device(intf, &mdev->priv);
+ if (dev_ctx && test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
+ found = true;
+ break;
+ }
+ }
+
+ return found;
+}
+
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
+{
+ mutex_lock(&mlx5_intf_mutex);
+ if (mlx5_has_added_dev_by_protocol(mdev, protocol)) {
+ mlx5_remove_dev_by_protocol(mdev, protocol);
+ mlx5_add_dev_by_protocol(mdev, protocol);
+ }
+ mutex_unlock(&mlx5_intf_mutex);
+}
+
+void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
+{
+ struct mlx5_priv *priv = &mdev->priv;
+ struct mlx5_device_context *dev_ctx;
+ unsigned long flags;
+ void *result = NULL;
+
+ spin_lock_irqsave(&priv->ctx_lock, flags);
+
+ list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
+ if ((dev_ctx->intf->protocol == protocol) &&
+ dev_ctx->intf->get_dev) {
+ result = dev_ctx->intf->get_dev(dev_ctx->context);
+ break;
+ }
+
+ spin_unlock_irqrestore(&priv->ctx_lock, flags);
+
+ return result;
+}
+EXPORT_SYMBOL(mlx5_get_protocol_dev);
+
+/* Must be called with intf_mutex held */
+void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
+{
+ struct mlx5_interface *intf;
+
+ list_for_each_entry(intf, &intf_list, list)
+ if (intf->protocol == protocol) {
+ mlx5_add_device(intf, &dev->priv);
+ break;
+ }
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
+{
+ struct mlx5_interface *intf;
+
+ list_for_each_entry(intf, &intf_list, list)
+ if (intf->protocol == protocol) {
+ mlx5_remove_device(intf, &dev->priv);
+ break;
+ }
+}
+
+static u32 mlx5_gen_pci_id(struct mlx5_core_dev *dev)
+{
+ return (u32)((pci_domain_nr(dev->pdev->bus) << 16) |
+ (dev->pdev->bus->number << 8) |
+ PCI_SLOT(dev->pdev->devfn));
+}
+
+/* Must be called with intf_mutex held */
+struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
+{
+ u32 pci_id = mlx5_gen_pci_id(dev);
+ struct mlx5_core_dev *res = NULL;
+ struct mlx5_core_dev *tmp_dev;
+ struct mlx5_priv *priv;
+
+ list_for_each_entry(priv, &mlx5_dev_list, dev_list) {
+ tmp_dev = container_of(priv, struct mlx5_core_dev, priv);
+ if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) {
+ res = tmp_dev;
+ break;
+ }
+ }
+
+ return res;
+}
+
+void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
+ unsigned long param)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_device_context *dev_ctx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->ctx_lock, flags);
+
+ if (priv->is_accum_events)
+ add_delayed_event(priv, dev, event, param);
+
+ /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is
+ * still in priv->ctx_list. In this case, only notify the dev_ctx if its
+ * ADDED or ATTACHED bit are set.
+ */
+ list_for_each_entry(dev_ctx, &priv->ctx_list, list)
+ if (dev_ctx->intf->event &&
+ (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) ||
+ test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)))
+ dev_ctx->intf->event(dev, dev_ctx->context, event, param);
+
+ spin_unlock_irqrestore(&priv->ctx_lock, flags);
+}
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+void mlx5_core_page_fault(struct mlx5_core_dev *dev,
+ struct mlx5_pagefault *pfault)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ int srcu_idx;
+
+ srcu_idx = srcu_read_lock(&priv->pfault_srcu);
+ if (priv->pfault)
+ priv->pfault(dev, priv->pfault_ctx, pfault);
+ srcu_read_unlock(&priv->pfault_srcu, srcu_idx);
+}
+#endif
+
+void mlx5_dev_list_lock(void)
+{
+ mutex_lock(&mlx5_intf_mutex);
+}
+
+void mlx5_dev_list_unlock(void)
+{
+ mutex_unlock(&mlx5_intf_mutex);
+}
+
+int mlx5_dev_list_trylock(void)
+{
+ return mutex_trylock(&mlx5_intf_mutex);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile
new file mode 100644
index 000000000..d8e17110f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
new file mode 100644
index 000000000..0f11fff32
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define CREATE_TRACE_POINTS
+
+#include "fs_tracepoint.h"
+#include <linux/stringify.h>
+
+#define DECLARE_MASK_VAL(type, name) struct {type m; type v; } name
+#define MASK_VAL(type, spec, name, mask, val, fld) \
+ DECLARE_MASK_VAL(type, name) = \
+ {.m = MLX5_GET(spec, mask, fld),\
+ .v = MLX5_GET(spec, val, fld)}
+#define MASK_VAL_BE(type, spec, name, mask, val, fld) \
+ DECLARE_MASK_VAL(type, name) = \
+ {.m = MLX5_GET_BE(type, spec, mask, fld),\
+ .v = MLX5_GET_BE(type, spec, val, fld)}
+#define GET_MASKED_VAL(name) (name.m & name.v)
+
+#define GET_MASK_VAL(name, type, mask, val, fld) \
+ (name.m = MLX5_GET(type, mask, fld), \
+ name.v = MLX5_GET(type, val, fld), \
+ name.m & name.v)
+#define PRINT_MASKED_VAL(name, p, format) { \
+ if (name.m) \
+ trace_seq_printf(p, __stringify(name) "=" format " ", name.v); \
+ }
+#define PRINT_MASKED_VALP(name, cast, p, format) { \
+ if (name.m) \
+ trace_seq_printf(p, __stringify(name) "=" format " ", \
+ (cast)&name.v);\
+ }
+
+static void print_lyr_2_4_hdrs(struct trace_seq *p,
+ const u32 *mask, const u32 *value)
+{
+#define MASK_VAL_L2(type, name, fld) \
+ MASK_VAL(type, fte_match_set_lyr_2_4, name, mask, value, fld)
+ DECLARE_MASK_VAL(u64, smac) = {
+ .m = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16) << 16 |
+ MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0),
+ .v = MLX5_GET(fte_match_set_lyr_2_4, value, smac_47_16) << 16 |
+ MLX5_GET(fte_match_set_lyr_2_4, value, smac_15_0)};
+ DECLARE_MASK_VAL(u64, dmac) = {
+ .m = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16) << 16 |
+ MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0),
+ .v = MLX5_GET(fte_match_set_lyr_2_4, value, dmac_47_16) << 16 |
+ MLX5_GET(fte_match_set_lyr_2_4, value, dmac_15_0)};
+ MASK_VAL_L2(u16, ethertype, ethertype);
+
+ PRINT_MASKED_VALP(smac, u8 *, p, "%pM");
+ PRINT_MASKED_VALP(dmac, u8 *, p, "%pM");
+ PRINT_MASKED_VAL(ethertype, p, "%04x");
+
+ if (ethertype.m == 0xffff) {
+ if (ethertype.v == ETH_P_IP) {
+#define MASK_VAL_L2_BE(type, name, fld) \
+ MASK_VAL_BE(type, fte_match_set_lyr_2_4, name, mask, value, fld)
+ MASK_VAL_L2_BE(u32, src_ipv4,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MASK_VAL_L2_BE(u32, dst_ipv4,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+ PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p,
+ "%pI4");
+ PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p,
+ "%pI4");
+ } else if (ethertype.v == ETH_P_IPV6) {
+ static const struct in6_addr full_ones = {
+ .in6_u.u6_addr32 = {__constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff),
+ __constant_htonl(0xffffffff)},
+ };
+ DECLARE_MASK_VAL(struct in6_addr, src_ipv6);
+ DECLARE_MASK_VAL(struct in6_addr, dst_ipv6);
+
+ memcpy(src_ipv6.m.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ sizeof(src_ipv6.m));
+ memcpy(dst_ipv6.m.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ sizeof(dst_ipv6.m));
+ memcpy(src_ipv6.v.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ sizeof(src_ipv6.v));
+ memcpy(dst_ipv6.v.in6_u.u6_addr8,
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ sizeof(dst_ipv6.v));
+
+ if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones)))
+ trace_seq_printf(p, "src_ipv6=%pI6 ",
+ &src_ipv6.v);
+ if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones)))
+ trace_seq_printf(p, "dst_ipv6=%pI6 ",
+ &dst_ipv6.v);
+ }
+ }
+
+#define PRINT_MASKED_VAL_L2(type, name, fld, p, format) {\
+ MASK_VAL_L2(type, name, fld); \
+ PRINT_MASKED_VAL(name, p, format); \
+}
+
+ PRINT_MASKED_VAL_L2(u8, ip_protocol, ip_protocol, p, "%02x");
+ PRINT_MASKED_VAL_L2(u16, tcp_flags, tcp_flags, p, "%x");
+ PRINT_MASKED_VAL_L2(u16, tcp_sport, tcp_sport, p, "%u");
+ PRINT_MASKED_VAL_L2(u16, tcp_dport, tcp_dport, p, "%u");
+ PRINT_MASKED_VAL_L2(u16, udp_sport, udp_sport, p, "%u");
+ PRINT_MASKED_VAL_L2(u16, udp_dport, udp_dport, p, "%u");
+ PRINT_MASKED_VAL_L2(u16, first_vid, first_vid, p, "%04x");
+ PRINT_MASKED_VAL_L2(u8, first_prio, first_prio, p, "%x");
+ PRINT_MASKED_VAL_L2(u8, first_cfi, first_cfi, p, "%d");
+ PRINT_MASKED_VAL_L2(u8, ip_dscp, ip_dscp, p, "%02x");
+ PRINT_MASKED_VAL_L2(u8, ip_ecn, ip_ecn, p, "%x");
+ PRINT_MASKED_VAL_L2(u8, cvlan_tag, cvlan_tag, p, "%d");
+ PRINT_MASKED_VAL_L2(u8, svlan_tag, svlan_tag, p, "%d");
+ PRINT_MASKED_VAL_L2(u8, frag, frag, p, "%d");
+}
+
+static void print_misc_parameters_hdrs(struct trace_seq *p,
+ const u32 *mask, const u32 *value)
+{
+#define MASK_VAL_MISC(type, name, fld) \
+ MASK_VAL(type, fte_match_set_misc, name, mask, value, fld)
+#define PRINT_MASKED_VAL_MISC(type, name, fld, p, format) {\
+ MASK_VAL_MISC(type, name, fld); \
+ PRINT_MASKED_VAL(name, p, format); \
+}
+ DECLARE_MASK_VAL(u64, gre_key) = {
+ .m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 |
+ MLX5_GET(fte_match_set_misc, mask, gre_key_l),
+ .v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 |
+ MLX5_GET(fte_match_set_misc, value, gre_key_l)};
+
+ PRINT_MASKED_VAL(gre_key, p, "%llu");
+ PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u");
+ PRINT_MASKED_VAL_MISC(u16, source_port, source_port, p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, outer_second_prio, outer_second_prio,
+ p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, outer_second_cfi, outer_second_cfi, p, "%u");
+ PRINT_MASKED_VAL_MISC(u16, outer_second_vid, outer_second_vid, p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, inner_second_prio, inner_second_prio,
+ p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, inner_second_cfi, inner_second_cfi, p, "%u");
+ PRINT_MASKED_VAL_MISC(u16, inner_second_vid, inner_second_vid, p, "%u");
+
+ PRINT_MASKED_VAL_MISC(u8, outer_second_cvlan_tag,
+ outer_second_cvlan_tag, p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, inner_second_cvlan_tag,
+ inner_second_cvlan_tag, p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, outer_second_svlan_tag,
+ outer_second_svlan_tag, p, "%u");
+ PRINT_MASKED_VAL_MISC(u8, inner_second_svlan_tag,
+ inner_second_svlan_tag, p, "%u");
+
+ PRINT_MASKED_VAL_MISC(u8, gre_protocol, gre_protocol, p, "%u");
+
+ PRINT_MASKED_VAL_MISC(u32, vxlan_vni, vxlan_vni, p, "%u");
+ PRINT_MASKED_VAL_MISC(u32, outer_ipv6_flow_label, outer_ipv6_flow_label,
+ p, "%x");
+ PRINT_MASKED_VAL_MISC(u32, inner_ipv6_flow_label, inner_ipv6_flow_label,
+ p, "%x");
+}
+
+const char *parse_fs_hdrs(struct trace_seq *p,
+ u8 match_criteria_enable,
+ const u32 *mask_outer,
+ const u32 *mask_misc,
+ const u32 *mask_inner,
+ const u32 *value_outer,
+ const u32 *value_misc,
+ const u32 *value_inner)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ if (match_criteria_enable &
+ 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) {
+ trace_seq_printf(p, "[outer] ");
+ print_lyr_2_4_hdrs(p, mask_outer, value_outer);
+ }
+
+ if (match_criteria_enable &
+ 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) {
+ trace_seq_printf(p, "[misc] ");
+ print_misc_parameters_hdrs(p, mask_misc, value_misc);
+ }
+ if (match_criteria_enable &
+ 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) {
+ trace_seq_printf(p, "[inner] ");
+ print_lyr_2_4_hdrs(p, mask_inner, value_inner);
+ }
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+const char *parse_fs_dst(struct trace_seq *p,
+ const struct mlx5_flow_destination *dst,
+ u32 counter_id)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ switch (dst->type) {
+ case MLX5_FLOW_DESTINATION_TYPE_VPORT:
+ trace_seq_printf(p, "vport=%u\n", dst->vport.num);
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+ trace_seq_printf(p, "ft=%p\n", dst->ft);
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+ trace_seq_printf(p, "ft_num=%u\n", dst->ft_num);
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_TIR:
+ trace_seq_printf(p, "tir=%u\n", dst->tir_num);
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_COUNTER:
+ trace_seq_printf(p, "counter_id=%u\n", counter_id);
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_PORT:
+ trace_seq_printf(p, "port\n");
+ break;
+ }
+
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_fg);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fg);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_set_fte);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fte);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_rule);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_rule);
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
new file mode 100644
index 000000000..0240aee91
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(_MLX5_FS_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_FS_TP_
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+#include "../fs_core.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#define __parse_fs_hdrs(match_criteria_enable, mouter, mmisc, minner, vouter, \
+ vinner, vmisc) \
+ parse_fs_hdrs(p, match_criteria_enable, mouter, mmisc, minner, vouter,\
+ vinner, vmisc)
+
+const char *parse_fs_hdrs(struct trace_seq *p,
+ u8 match_criteria_enable,
+ const u32 *mask_outer,
+ const u32 *mask_misc,
+ const u32 *mask_inner,
+ const u32 *value_outer,
+ const u32 *value_misc,
+ const u32 *value_inner);
+
+#define __parse_fs_dst(dst, counter_id) \
+ parse_fs_dst(p, (const struct mlx5_flow_destination *)dst, counter_id)
+
+const char *parse_fs_dst(struct trace_seq *p,
+ const struct mlx5_flow_destination *dst,
+ u32 counter_id);
+
+TRACE_EVENT(mlx5_fs_add_fg,
+ TP_PROTO(const struct mlx5_flow_group *fg),
+ TP_ARGS(fg),
+ TP_STRUCT__entry(
+ __field(const struct mlx5_flow_group *, fg)
+ __field(const struct mlx5_flow_table *, ft)
+ __field(u32, start_index)
+ __field(u32, end_index)
+ __field(u32, id)
+ __field(u8, mask_enable)
+ __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc))
+ ),
+ TP_fast_assign(
+ __entry->fg = fg;
+ fs_get_obj(__entry->ft, fg->node.parent);
+ __entry->start_index = fg->start_index;
+ __entry->end_index = fg->start_index + fg->max_ftes;
+ __entry->id = fg->id;
+ __entry->mask_enable = fg->mask.match_criteria_enable;
+ memcpy(__entry->mask_outer,
+ MLX5_ADDR_OF(fte_match_param,
+ &fg->mask.match_criteria,
+ outer_headers),
+ sizeof(__entry->mask_outer));
+ memcpy(__entry->mask_inner,
+ MLX5_ADDR_OF(fte_match_param,
+ &fg->mask.match_criteria,
+ inner_headers),
+ sizeof(__entry->mask_inner));
+ memcpy(__entry->mask_misc,
+ MLX5_ADDR_OF(fte_match_param,
+ &fg->mask.match_criteria,
+ misc_parameters),
+ sizeof(__entry->mask_misc));
+
+ ),
+ TP_printk("fg=%p ft=%p id=%u start=%u end=%u bit_mask=%02x %s\n",
+ __entry->fg, __entry->ft, __entry->id,
+ __entry->start_index, __entry->end_index,
+ __entry->mask_enable,
+ __parse_fs_hdrs(__entry->mask_enable,
+ __entry->mask_outer,
+ __entry->mask_misc,
+ __entry->mask_inner,
+ __entry->mask_outer,
+ __entry->mask_misc,
+ __entry->mask_inner))
+ );
+
+TRACE_EVENT(mlx5_fs_del_fg,
+ TP_PROTO(const struct mlx5_flow_group *fg),
+ TP_ARGS(fg),
+ TP_STRUCT__entry(
+ __field(const struct mlx5_flow_group *, fg)
+ __field(u32, id)
+ ),
+ TP_fast_assign(
+ __entry->fg = fg;
+ __entry->id = fg->id;
+
+ ),
+ TP_printk("fg=%p id=%u\n",
+ __entry->fg, __entry->id)
+ );
+
+#define ACTION_FLAGS \
+ {MLX5_FLOW_CONTEXT_ACTION_ALLOW, "ALLOW"},\
+ {MLX5_FLOW_CONTEXT_ACTION_DROP, "DROP"},\
+ {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, "FWD"},\
+ {MLX5_FLOW_CONTEXT_ACTION_COUNT, "CNT"},\
+ {MLX5_FLOW_CONTEXT_ACTION_ENCAP, "ENCAP"},\
+ {MLX5_FLOW_CONTEXT_ACTION_DECAP, "DECAP"},\
+ {MLX5_FLOW_CONTEXT_ACTION_MOD_HDR, "MOD_HDR"},\
+ {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH, "VLAN_PUSH"},\
+ {MLX5_FLOW_CONTEXT_ACTION_VLAN_POP, "VLAN_POP"},\
+ {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2, "VLAN_PUSH_2"},\
+ {MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2, "VLAN_POP_2"},\
+ {MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"}
+
+TRACE_EVENT(mlx5_fs_set_fte,
+ TP_PROTO(const struct fs_fte *fte, int new_fte),
+ TP_ARGS(fte, new_fte),
+ TP_STRUCT__entry(
+ __field(const struct fs_fte *, fte)
+ __field(const struct mlx5_flow_group *, fg)
+ __field(u32, group_index)
+ __field(u32, index)
+ __field(u32, action)
+ __field(u32, flow_tag)
+ __field(u8, mask_enable)
+ __field(int, new_fte)
+ __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc))
+ __array(u32, value_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, value_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+ __array(u32, value_misc, MLX5_ST_SZ_DW(fte_match_set_misc))
+ ),
+ TP_fast_assign(
+ __entry->fte = fte;
+ __entry->new_fte = new_fte;
+ fs_get_obj(__entry->fg, fte->node.parent);
+ __entry->group_index = __entry->fg->id;
+ __entry->index = fte->index;
+ __entry->action = fte->action.action;
+ __entry->mask_enable = __entry->fg->mask.match_criteria_enable;
+ __entry->flow_tag = fte->action.flow_tag;
+ memcpy(__entry->mask_outer,
+ MLX5_ADDR_OF(fte_match_param,
+ &__entry->fg->mask.match_criteria,
+ outer_headers),
+ sizeof(__entry->mask_outer));
+ memcpy(__entry->mask_inner,
+ MLX5_ADDR_OF(fte_match_param,
+ &__entry->fg->mask.match_criteria,
+ inner_headers),
+ sizeof(__entry->mask_inner));
+ memcpy(__entry->mask_misc,
+ MLX5_ADDR_OF(fte_match_param,
+ &__entry->fg->mask.match_criteria,
+ misc_parameters),
+ sizeof(__entry->mask_misc));
+ memcpy(__entry->value_outer,
+ MLX5_ADDR_OF(fte_match_param,
+ &fte->val,
+ outer_headers),
+ sizeof(__entry->value_outer));
+ memcpy(__entry->value_inner,
+ MLX5_ADDR_OF(fte_match_param,
+ &fte->val,
+ inner_headers),
+ sizeof(__entry->value_inner));
+ memcpy(__entry->value_misc,
+ MLX5_ADDR_OF(fte_match_param,
+ &fte->val,
+ misc_parameters),
+ sizeof(__entry->value_misc));
+
+ ),
+ TP_printk("op=%s fte=%p fg=%p index=%u group_index=%u action=<%s> flow_tag=%x %s\n",
+ __entry->new_fte ? "add" : "set",
+ __entry->fte, __entry->fg, __entry->index,
+ __entry->group_index, __print_flags(__entry->action, "|",
+ ACTION_FLAGS),
+ __entry->flow_tag,
+ __parse_fs_hdrs(__entry->mask_enable,
+ __entry->mask_outer,
+ __entry->mask_misc,
+ __entry->mask_inner,
+ __entry->value_outer,
+ __entry->value_misc,
+ __entry->value_inner))
+ );
+
+TRACE_EVENT(mlx5_fs_del_fte,
+ TP_PROTO(const struct fs_fte *fte),
+ TP_ARGS(fte),
+ TP_STRUCT__entry(
+ __field(const struct fs_fte *, fte)
+ __field(u32, index)
+ ),
+ TP_fast_assign(
+ __entry->fte = fte;
+ __entry->index = fte->index;
+
+ ),
+ TP_printk("fte=%p index=%u\n",
+ __entry->fte, __entry->index)
+ );
+
+TRACE_EVENT(mlx5_fs_add_rule,
+ TP_PROTO(const struct mlx5_flow_rule *rule),
+ TP_ARGS(rule),
+ TP_STRUCT__entry(
+ __field(const struct mlx5_flow_rule *, rule)
+ __field(const struct fs_fte *, fte)
+ __field(u32, sw_action)
+ __field(u32, index)
+ __field(u32, counter_id)
+ __array(u8, destination, sizeof(struct mlx5_flow_destination))
+ ),
+ TP_fast_assign(
+ __entry->rule = rule;
+ fs_get_obj(__entry->fte, rule->node.parent);
+ __entry->index = __entry->fte->dests_size - 1;
+ __entry->sw_action = rule->sw_action;
+ memcpy(__entry->destination,
+ &rule->dest_attr,
+ sizeof(__entry->destination));
+ if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER &&
+ rule->dest_attr.counter)
+ __entry->counter_id =
+ rule->dest_attr.counter->id;
+ ),
+ TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n",
+ __entry->rule, __entry->fte, __entry->index,
+ __print_flags(__entry->sw_action, "|", ACTION_FLAGS),
+ __parse_fs_dst(__entry->destination, __entry->counter_id))
+ );
+
+TRACE_EVENT(mlx5_fs_del_rule,
+ TP_PROTO(const struct mlx5_flow_rule *rule),
+ TP_ARGS(rule),
+ TP_STRUCT__entry(
+ __field(const struct mlx5_flow_rule *, rule)
+ __field(const struct fs_fte *, fte)
+ ),
+ TP_fast_assign(
+ __entry->rule = rule;
+ fs_get_obj(__entry->fte, rule->node.parent);
+ ),
+ TP_printk("rule=%p fte=%p\n",
+ __entry->rule, __entry->fte)
+ );
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ./diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE fs_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
new file mode 100644
index 000000000..a22e932a0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -0,0 +1,950 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#define CREATE_TRACE_POINTS
+#include "fw_tracer.h"
+#include "fw_tracer_tracepoint.h"
+
+static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer)
+{
+ u32 *string_db_base_address_out = tracer->str_db.base_address_out;
+ u32 *string_db_size_out = tracer->str_db.size_out;
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+ void *mtrc_cap_sp;
+ int err, i;
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MTRC_CAP, 0, 0);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Error reading tracer caps %d\n",
+ err);
+ return err;
+ }
+
+ if (!MLX5_GET(mtrc_cap, out, trace_to_memory)) {
+ mlx5_core_dbg(dev, "FWTracer: Device does not support logging traces to memory\n");
+ return -ENOTSUPP;
+ }
+
+ tracer->trc_ver = MLX5_GET(mtrc_cap, out, trc_ver);
+ tracer->str_db.first_string_trace =
+ MLX5_GET(mtrc_cap, out, first_string_trace);
+ tracer->str_db.num_string_trace =
+ MLX5_GET(mtrc_cap, out, num_string_trace);
+ tracer->str_db.num_string_db = MLX5_GET(mtrc_cap, out, num_string_db);
+ tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner);
+
+ for (i = 0; i < tracer->str_db.num_string_db; i++) {
+ mtrc_cap_sp = MLX5_ADDR_OF(mtrc_cap, out, string_db_param[i]);
+ string_db_base_address_out[i] = MLX5_GET(mtrc_string_db_param,
+ mtrc_cap_sp,
+ string_db_base_address);
+ string_db_size_out[i] = MLX5_GET(mtrc_string_db_param,
+ mtrc_cap_sp, string_db_size);
+ }
+
+ return err;
+}
+
+static int mlx5_set_mtrc_caps_trace_owner(struct mlx5_fw_tracer *tracer,
+ u32 *out, u32 out_size,
+ u8 trace_owner)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+
+ MLX5_SET(mtrc_cap, in, trace_owner, trace_owner);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out, out_size,
+ MLX5_REG_MTRC_CAP, 0, 1);
+}
+
+static int mlx5_fw_tracer_ownership_acquire(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+ int err;
+
+ err = mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out),
+ MLX5_FW_TRACER_ACQUIRE_OWNERSHIP);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Acquire tracer ownership failed %d\n",
+ err);
+ return err;
+ }
+
+ tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner);
+
+ if (!tracer->owner)
+ return -EBUSY;
+
+ return 0;
+}
+
+static void mlx5_fw_tracer_ownership_release(struct mlx5_fw_tracer *tracer)
+{
+ u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+
+ mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out),
+ MLX5_FW_TRACER_RELEASE_OWNERSHIP);
+ tracer->owner = false;
+}
+
+static int mlx5_fw_tracer_create_log_buf(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ struct device *ddev = &dev->pdev->dev;
+ dma_addr_t dma;
+ void *buff;
+ gfp_t gfp;
+ int err;
+
+ tracer->buff.size = TRACE_BUFFER_SIZE_BYTE;
+
+ gfp = GFP_KERNEL | __GFP_ZERO;
+ buff = (void *)__get_free_pages(gfp,
+ get_order(tracer->buff.size));
+ if (!buff) {
+ err = -ENOMEM;
+ mlx5_core_warn(dev, "FWTracer: Failed to allocate pages, %d\n", err);
+ return err;
+ }
+ tracer->buff.log_buf = buff;
+
+ dma = dma_map_single(ddev, buff, tracer->buff.size, DMA_FROM_DEVICE);
+ if (dma_mapping_error(ddev, dma)) {
+ mlx5_core_warn(dev, "FWTracer: Unable to map DMA: %d\n",
+ dma_mapping_error(ddev, dma));
+ err = -ENOMEM;
+ goto free_pages;
+ }
+ tracer->buff.dma = dma;
+
+ return 0;
+
+free_pages:
+ free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size));
+
+ return err;
+}
+
+static void mlx5_fw_tracer_destroy_log_buf(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ struct device *ddev = &dev->pdev->dev;
+
+ if (!tracer->buff.log_buf)
+ return;
+
+ dma_unmap_single(ddev, tracer->buff.dma, tracer->buff.size, DMA_FROM_DEVICE);
+ free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size));
+}
+
+static int mlx5_fw_tracer_create_mkey(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ int err, inlen, i;
+ __be64 *mtt;
+ void *mkc;
+ u32 *in;
+
+ inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
+ sizeof(*mtt) * round_up(TRACER_BUFFER_PAGE_NUM, 2);
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
+ DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2));
+ mtt = (u64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ for (i = 0 ; i < TRACER_BUFFER_PAGE_NUM ; i++)
+ mtt[i] = cpu_to_be64(tracer->buff.dma + i * PAGE_SIZE);
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, pd, tracer->buff.pdn);
+ MLX5_SET(mkc, mkc, bsf_octword_size, 0);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2));
+ MLX5_SET64(mkc, mkc, start_addr, tracer->buff.dma);
+ MLX5_SET64(mkc, mkc, len, tracer->buff.size);
+ err = mlx5_core_create_mkey(dev, &tracer->buff.mkey, in, inlen);
+ if (err)
+ mlx5_core_warn(dev, "FWTracer: Failed to create mkey, %d\n", err);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void mlx5_fw_tracer_free_strings_db(struct mlx5_fw_tracer *tracer)
+{
+ u32 num_string_db = tracer->str_db.num_string_db;
+ int i;
+
+ for (i = 0; i < num_string_db; i++) {
+ kfree(tracer->str_db.buffer[i]);
+ tracer->str_db.buffer[i] = NULL;
+ }
+}
+
+static int mlx5_fw_tracer_allocate_strings_db(struct mlx5_fw_tracer *tracer)
+{
+ u32 *string_db_size_out = tracer->str_db.size_out;
+ u32 num_string_db = tracer->str_db.num_string_db;
+ int i;
+
+ for (i = 0; i < num_string_db; i++) {
+ tracer->str_db.buffer[i] = kzalloc(string_db_size_out[i], GFP_KERNEL);
+ if (!tracer->str_db.buffer[i])
+ goto free_strings_db;
+ }
+
+ return 0;
+
+free_strings_db:
+ mlx5_fw_tracer_free_strings_db(tracer);
+ return -ENOMEM;
+}
+
+static void mlx5_tracer_read_strings_db(struct work_struct *work)
+{
+ struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer,
+ read_fw_strings_work);
+ u32 num_of_reads, num_string_db = tracer->str_db.num_string_db;
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+ u32 leftovers, offset;
+ int err = 0, i, j;
+ u32 *out, outlen;
+ void *out_value;
+
+ outlen = MLX5_ST_SZ_BYTES(mtrc_stdb) + STRINGS_DB_READ_SIZE_BYTES;
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < num_string_db; i++) {
+ offset = 0;
+ MLX5_SET(mtrc_stdb, in, string_db_index, i);
+ num_of_reads = tracer->str_db.size_out[i] /
+ STRINGS_DB_READ_SIZE_BYTES;
+ leftovers = (tracer->str_db.size_out[i] %
+ STRINGS_DB_READ_SIZE_BYTES) /
+ STRINGS_DB_LEFTOVER_SIZE_BYTES;
+
+ MLX5_SET(mtrc_stdb, in, read_size, STRINGS_DB_READ_SIZE_BYTES);
+ for (j = 0; j < num_of_reads; j++) {
+ MLX5_SET(mtrc_stdb, in, start_offset, offset);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ outlen, MLX5_REG_MTRC_STDB,
+ 0, 1);
+ if (err) {
+ mlx5_core_dbg(dev, "FWTracer: Failed to read strings DB %d\n",
+ err);
+ goto out_free;
+ }
+
+ out_value = MLX5_ADDR_OF(mtrc_stdb, out, string_db_data);
+ memcpy(tracer->str_db.buffer[i] + offset, out_value,
+ STRINGS_DB_READ_SIZE_BYTES);
+ offset += STRINGS_DB_READ_SIZE_BYTES;
+ }
+
+ /* Strings database is aligned to 64, need to read leftovers*/
+ MLX5_SET(mtrc_stdb, in, read_size,
+ STRINGS_DB_LEFTOVER_SIZE_BYTES);
+ for (j = 0; j < leftovers; j++) {
+ MLX5_SET(mtrc_stdb, in, start_offset, offset);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ outlen, MLX5_REG_MTRC_STDB,
+ 0, 1);
+ if (err) {
+ mlx5_core_dbg(dev, "FWTracer: Failed to read strings DB %d\n",
+ err);
+ goto out_free;
+ }
+
+ out_value = MLX5_ADDR_OF(mtrc_stdb, out, string_db_data);
+ memcpy(tracer->str_db.buffer[i] + offset, out_value,
+ STRINGS_DB_LEFTOVER_SIZE_BYTES);
+ offset += STRINGS_DB_LEFTOVER_SIZE_BYTES;
+ }
+ }
+
+ tracer->str_db.loaded = true;
+
+out_free:
+ kfree(out);
+out:
+ return;
+}
+
+static void mlx5_fw_tracer_arm(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+ int err;
+
+ MLX5_SET(mtrc_ctrl, in, arm_event, 1);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MTRC_CTRL, 0, 1);
+ if (err)
+ mlx5_core_warn(dev, "FWTracer: Failed to arm tracer event %d\n", err);
+}
+
+static const char *VAL_PARM = "%llx";
+static const char *REPLACE_64_VAL_PARM = "%x%x";
+static const char *PARAM_CHAR = "%";
+
+static int mlx5_tracer_message_hash(u32 message_id)
+{
+ return jhash_1word(message_id, 0) & (MESSAGE_HASH_SIZE - 1);
+}
+
+static struct tracer_string_format *mlx5_tracer_message_insert(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ struct hlist_head *head =
+ &tracer->hash[mlx5_tracer_message_hash(tracer_event->string_event.tmsn)];
+ struct tracer_string_format *cur_string;
+
+ cur_string = kzalloc(sizeof(*cur_string), GFP_KERNEL);
+ if (!cur_string)
+ return NULL;
+
+ hlist_add_head(&cur_string->hlist, head);
+
+ return cur_string;
+}
+
+static struct tracer_string_format *mlx5_tracer_get_string(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ struct tracer_string_format *cur_string;
+ u32 str_ptr, offset;
+ int i;
+
+ str_ptr = tracer_event->string_event.string_param;
+
+ for (i = 0; i < tracer->str_db.num_string_db; i++) {
+ if (str_ptr > tracer->str_db.base_address_out[i] &&
+ str_ptr < tracer->str_db.base_address_out[i] +
+ tracer->str_db.size_out[i]) {
+ offset = str_ptr - tracer->str_db.base_address_out[i];
+ /* add it to the hash */
+ cur_string = mlx5_tracer_message_insert(tracer, tracer_event);
+ if (!cur_string)
+ return NULL;
+ cur_string->string = (char *)(tracer->str_db.buffer[i] +
+ offset);
+ return cur_string;
+ }
+ }
+
+ return NULL;
+}
+
+static void mlx5_tracer_clean_message(struct tracer_string_format *str_frmt)
+{
+ hlist_del(&str_frmt->hlist);
+ kfree(str_frmt);
+}
+
+static int mlx5_tracer_get_num_of_params(char *str)
+{
+ char *substr, *pstr = str;
+ int num_of_params = 0;
+
+ /* replace %llx with %x%x */
+ substr = strstr(pstr, VAL_PARM);
+ while (substr) {
+ memcpy(substr, REPLACE_64_VAL_PARM, 4);
+ pstr = substr;
+ substr = strstr(pstr, VAL_PARM);
+ }
+
+ /* count all the % characters */
+ substr = strstr(str, PARAM_CHAR);
+ while (substr) {
+ num_of_params += 1;
+ str = substr + 1;
+ substr = strstr(str, PARAM_CHAR);
+ }
+
+ return num_of_params;
+}
+
+static struct tracer_string_format *mlx5_tracer_message_find(struct hlist_head *head,
+ u8 event_id, u32 tmsn)
+{
+ struct tracer_string_format *message;
+
+ hlist_for_each_entry(message, head, hlist)
+ if (message->event_id == event_id && message->tmsn == tmsn)
+ return message;
+
+ return NULL;
+}
+
+static struct tracer_string_format *mlx5_tracer_message_get(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ struct hlist_head *head =
+ &tracer->hash[mlx5_tracer_message_hash(tracer_event->string_event.tmsn)];
+
+ return mlx5_tracer_message_find(head, tracer_event->event_id, tracer_event->string_event.tmsn);
+}
+
+static void poll_trace(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event, u64 *trace)
+{
+ u32 timestamp_low, timestamp_mid, timestamp_high, urts;
+
+ tracer_event->event_id = MLX5_GET(tracer_event, trace, event_id);
+ tracer_event->lost_event = MLX5_GET(tracer_event, trace, lost);
+
+ switch (tracer_event->event_id) {
+ case TRACER_EVENT_TYPE_TIMESTAMP:
+ tracer_event->type = TRACER_EVENT_TYPE_TIMESTAMP;
+ urts = MLX5_GET(tracer_timestamp_event, trace, urts);
+ if (tracer->trc_ver == 0)
+ tracer_event->timestamp_event.unreliable = !!(urts >> 2);
+ else
+ tracer_event->timestamp_event.unreliable = !!(urts & 1);
+
+ timestamp_low = MLX5_GET(tracer_timestamp_event,
+ trace, timestamp7_0);
+ timestamp_mid = MLX5_GET(tracer_timestamp_event,
+ trace, timestamp39_8);
+ timestamp_high = MLX5_GET(tracer_timestamp_event,
+ trace, timestamp52_40);
+
+ tracer_event->timestamp_event.timestamp =
+ ((u64)timestamp_high << 40) |
+ ((u64)timestamp_mid << 8) |
+ (u64)timestamp_low;
+ break;
+ default:
+ if (tracer_event->event_id >= tracer->str_db.first_string_trace ||
+ tracer_event->event_id <= tracer->str_db.first_string_trace +
+ tracer->str_db.num_string_trace) {
+ tracer_event->type = TRACER_EVENT_TYPE_STRING;
+ tracer_event->string_event.timestamp =
+ MLX5_GET(tracer_string_event, trace, timestamp);
+ tracer_event->string_event.string_param =
+ MLX5_GET(tracer_string_event, trace, string_param);
+ tracer_event->string_event.tmsn =
+ MLX5_GET(tracer_string_event, trace, tmsn);
+ tracer_event->string_event.tdsn =
+ MLX5_GET(tracer_string_event, trace, tdsn);
+ } else {
+ tracer_event->type = TRACER_EVENT_TYPE_UNRECOGNIZED;
+ }
+ break;
+ }
+}
+
+static u64 get_block_timestamp(struct mlx5_fw_tracer *tracer, u64 *ts_event)
+{
+ struct tracer_event tracer_event;
+ u8 event_id;
+
+ event_id = MLX5_GET(tracer_event, ts_event, event_id);
+
+ if (event_id == TRACER_EVENT_TYPE_TIMESTAMP)
+ poll_trace(tracer, &tracer_event, ts_event);
+ else
+ tracer_event.timestamp_event.timestamp = 0;
+
+ return tracer_event.timestamp_event.timestamp;
+}
+
+static void mlx5_fw_tracer_clean_print_hash(struct mlx5_fw_tracer *tracer)
+{
+ struct tracer_string_format *str_frmt;
+ struct hlist_node *n;
+ int i;
+
+ for (i = 0; i < MESSAGE_HASH_SIZE; i++) {
+ hlist_for_each_entry_safe(str_frmt, n, &tracer->hash[i], hlist)
+ mlx5_tracer_clean_message(str_frmt);
+ }
+}
+
+static void mlx5_fw_tracer_clean_ready_list(struct mlx5_fw_tracer *tracer)
+{
+ struct tracer_string_format *str_frmt, *tmp_str;
+
+ list_for_each_entry_safe(str_frmt, tmp_str, &tracer->ready_strings_list,
+ list)
+ list_del(&str_frmt->list);
+}
+
+static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
+ struct mlx5_core_dev *dev,
+ u64 trace_timestamp)
+{
+ char tmp[512];
+
+ snprintf(tmp, sizeof(tmp), str_frmt->string,
+ str_frmt->params[0],
+ str_frmt->params[1],
+ str_frmt->params[2],
+ str_frmt->params[3],
+ str_frmt->params[4],
+ str_frmt->params[5],
+ str_frmt->params[6]);
+
+ trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost,
+ str_frmt->event_id, tmp);
+
+ /* remove it from hash */
+ mlx5_tracer_clean_message(str_frmt);
+}
+
+static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ struct tracer_string_format *cur_string;
+
+ if (tracer_event->string_event.tdsn == 0) {
+ cur_string = mlx5_tracer_get_string(tracer, tracer_event);
+ if (!cur_string)
+ return -1;
+
+ cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string);
+ cur_string->last_param_num = 0;
+ cur_string->event_id = tracer_event->event_id;
+ cur_string->tmsn = tracer_event->string_event.tmsn;
+ cur_string->timestamp = tracer_event->string_event.timestamp;
+ cur_string->lost = tracer_event->lost_event;
+ if (cur_string->num_of_params == 0) /* trace with no params */
+ list_add_tail(&cur_string->list, &tracer->ready_strings_list);
+ } else {
+ cur_string = mlx5_tracer_message_get(tracer, tracer_event);
+ if (!cur_string) {
+ pr_debug("%s Got string event for unknown string tdsm: %d\n",
+ __func__, tracer_event->string_event.tmsn);
+ return -1;
+ }
+ cur_string->last_param_num += 1;
+ if (cur_string->last_param_num > TRACER_MAX_PARAMS) {
+ pr_debug("%s Number of params exceeds the max (%d)\n",
+ __func__, TRACER_MAX_PARAMS);
+ list_add_tail(&cur_string->list, &tracer->ready_strings_list);
+ return 0;
+ }
+ /* keep the new parameter */
+ cur_string->params[cur_string->last_param_num - 1] =
+ tracer_event->string_event.string_param;
+ if (cur_string->last_param_num == cur_string->num_of_params)
+ list_add_tail(&cur_string->list, &tracer->ready_strings_list);
+ }
+
+ return 0;
+}
+
+static void mlx5_tracer_handle_timestamp_trace(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ struct tracer_timestamp_event timestamp_event =
+ tracer_event->timestamp_event;
+ struct tracer_string_format *str_frmt, *tmp_str;
+ struct mlx5_core_dev *dev = tracer->dev;
+ u64 trace_timestamp;
+
+ list_for_each_entry_safe(str_frmt, tmp_str, &tracer->ready_strings_list, list) {
+ list_del(&str_frmt->list);
+ if (str_frmt->timestamp < (timestamp_event.timestamp & MASK_6_0))
+ trace_timestamp = (timestamp_event.timestamp & MASK_52_7) |
+ (str_frmt->timestamp & MASK_6_0);
+ else
+ trace_timestamp = ((timestamp_event.timestamp & MASK_52_7) - 1) |
+ (str_frmt->timestamp & MASK_6_0);
+
+ mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp);
+ }
+}
+
+static int mlx5_tracer_handle_trace(struct mlx5_fw_tracer *tracer,
+ struct tracer_event *tracer_event)
+{
+ if (tracer_event->type == TRACER_EVENT_TYPE_STRING) {
+ mlx5_tracer_handle_string_trace(tracer, tracer_event);
+ } else if (tracer_event->type == TRACER_EVENT_TYPE_TIMESTAMP) {
+ if (!tracer_event->timestamp_event.unreliable)
+ mlx5_tracer_handle_timestamp_trace(tracer, tracer_event);
+ } else {
+ pr_debug("%s Got unrecognised type %d for parsing, exiting..\n",
+ __func__, tracer_event->type);
+ }
+ return 0;
+}
+
+static void mlx5_fw_tracer_handle_traces(struct work_struct *work)
+{
+ struct mlx5_fw_tracer *tracer =
+ container_of(work, struct mlx5_fw_tracer, handle_traces_work);
+ u64 block_timestamp, last_block_timestamp, tmp_trace_block[TRACES_PER_BLOCK];
+ u32 block_count, start_offset, prev_start_offset, prev_consumer_index;
+ u32 trace_event_size = MLX5_ST_SZ_BYTES(tracer_event);
+ struct mlx5_core_dev *dev = tracer->dev;
+ struct tracer_event tracer_event;
+ int i;
+
+ mlx5_core_dbg(dev, "FWTracer: Handle Trace event, owner=(%d)\n", tracer->owner);
+ if (!tracer->owner)
+ return;
+
+ if (unlikely(!tracer->str_db.loaded))
+ goto arm;
+
+ block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE;
+ start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
+
+ /* Copy the block to local buffer to avoid HW override while being processed*/
+ memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset,
+ TRACER_BLOCK_SIZE_BYTE);
+
+ block_timestamp =
+ get_block_timestamp(tracer, &tmp_trace_block[TRACES_PER_BLOCK - 1]);
+
+ while (block_timestamp > tracer->last_timestamp) {
+ /* Check block override if its not the first block */
+ if (!tracer->last_timestamp) {
+ u64 *ts_event;
+ /* To avoid block override be the HW in case of buffer
+ * wraparound, the time stamp of the previous block
+ * should be compared to the last timestamp handled
+ * by the driver.
+ */
+ prev_consumer_index =
+ (tracer->buff.consumer_index - 1) & (block_count - 1);
+ prev_start_offset = prev_consumer_index * TRACER_BLOCK_SIZE_BYTE;
+
+ ts_event = tracer->buff.log_buf + prev_start_offset +
+ (TRACES_PER_BLOCK - 1) * trace_event_size;
+ last_block_timestamp = get_block_timestamp(tracer, ts_event);
+ /* If previous timestamp different from last stored
+ * timestamp then there is a good chance that the
+ * current buffer is overwritten and therefore should
+ * not be parsed.
+ */
+ if (tracer->last_timestamp != last_block_timestamp) {
+ mlx5_core_warn(dev, "FWTracer: Events were lost\n");
+ tracer->last_timestamp = block_timestamp;
+ tracer->buff.consumer_index =
+ (tracer->buff.consumer_index + 1) & (block_count - 1);
+ break;
+ }
+ }
+
+ /* Parse events */
+ for (i = 0; i < TRACES_PER_BLOCK ; i++) {
+ poll_trace(tracer, &tracer_event, &tmp_trace_block[i]);
+ mlx5_tracer_handle_trace(tracer, &tracer_event);
+ }
+
+ tracer->buff.consumer_index =
+ (tracer->buff.consumer_index + 1) & (block_count - 1);
+
+ tracer->last_timestamp = block_timestamp;
+ start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
+ memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset,
+ TRACER_BLOCK_SIZE_BYTE);
+ block_timestamp = get_block_timestamp(tracer,
+ &tmp_trace_block[TRACES_PER_BLOCK - 1]);
+ }
+
+arm:
+ mlx5_fw_tracer_arm(dev);
+}
+
+static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 out[MLX5_ST_SZ_DW(mtrc_conf)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtrc_conf)] = {0};
+ int err;
+
+ MLX5_SET(mtrc_conf, in, trace_mode, TRACE_TO_MEMORY);
+ MLX5_SET(mtrc_conf, in, log_trace_buffer_size,
+ ilog2(TRACER_BUFFER_PAGE_NUM));
+ MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey.key);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MTRC_CONF, 0, 1);
+ if (err)
+ mlx5_core_warn(dev, "FWTracer: Failed to set tracer configurations %d\n", err);
+
+ return err;
+}
+
+static int mlx5_fw_tracer_set_mtrc_ctrl(struct mlx5_fw_tracer *tracer, u8 status, u8 arm)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+ int err;
+
+ MLX5_SET(mtrc_ctrl, in, modify_field_select, TRACE_STATUS);
+ MLX5_SET(mtrc_ctrl, in, trace_status, status);
+ MLX5_SET(mtrc_ctrl, in, arm_event, arm);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_MTRC_CTRL, 0, 1);
+
+ if (!err && status)
+ tracer->last_timestamp = 0;
+
+ return err;
+}
+
+static int mlx5_fw_tracer_start(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev = tracer->dev;
+ int err;
+
+ err = mlx5_fw_tracer_ownership_acquire(tracer);
+ if (err) {
+ mlx5_core_dbg(dev, "FWTracer: Ownership was not granted %d\n", err);
+ /* Don't fail since ownership can be acquired on a later FW event */
+ return 0;
+ }
+
+ err = mlx5_fw_tracer_set_mtrc_conf(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Failed to set tracer configuration %d\n", err);
+ goto release_ownership;
+ }
+
+ /* enable tracer & trace events */
+ err = mlx5_fw_tracer_set_mtrc_ctrl(tracer, 1, 1);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Failed to enable tracer %d\n", err);
+ goto release_ownership;
+ }
+
+ mlx5_core_dbg(dev, "FWTracer: Ownership granted and active\n");
+ return 0;
+
+release_ownership:
+ mlx5_fw_tracer_ownership_release(tracer);
+ return err;
+}
+
+static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
+{
+ struct mlx5_fw_tracer *tracer =
+ container_of(work, struct mlx5_fw_tracer, ownership_change_work);
+
+ mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner);
+ if (tracer->owner) {
+ tracer->owner = false;
+ tracer->buff.consumer_index = 0;
+ return;
+ }
+
+ mlx5_fw_tracer_start(tracer);
+}
+
+/* Create software resources (Buffers, etc ..) */
+struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_tracer *tracer = NULL;
+ int err;
+
+ if (!MLX5_CAP_MCAM_REG(dev, tracer_registers)) {
+ mlx5_core_dbg(dev, "FWTracer: Tracer capability not present\n");
+ return NULL;
+ }
+
+ tracer = kvzalloc(sizeof(*tracer), GFP_KERNEL);
+ if (!tracer)
+ return ERR_PTR(-ENOMEM);
+
+ tracer->work_queue = create_singlethread_workqueue("mlx5_fw_tracer");
+ if (!tracer->work_queue) {
+ err = -ENOMEM;
+ goto free_tracer;
+ }
+
+ tracer->dev = dev;
+
+ INIT_LIST_HEAD(&tracer->ready_strings_list);
+ INIT_WORK(&tracer->ownership_change_work, mlx5_fw_tracer_ownership_change);
+ INIT_WORK(&tracer->read_fw_strings_work, mlx5_tracer_read_strings_db);
+ INIT_WORK(&tracer->handle_traces_work, mlx5_fw_tracer_handle_traces);
+
+
+ err = mlx5_query_mtrc_caps(tracer);
+ if (err) {
+ mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err);
+ goto destroy_workqueue;
+ }
+
+ err = mlx5_fw_tracer_create_log_buf(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Create log buffer failed %d\n", err);
+ goto destroy_workqueue;
+ }
+
+ err = mlx5_fw_tracer_allocate_strings_db(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Allocate strings database failed %d\n", err);
+ goto free_log_buf;
+ }
+
+ mlx5_core_dbg(dev, "FWTracer: Tracer created\n");
+
+ return tracer;
+
+free_log_buf:
+ mlx5_fw_tracer_destroy_log_buf(tracer);
+destroy_workqueue:
+ tracer->dev = NULL;
+ destroy_workqueue(tracer->work_queue);
+free_tracer:
+ kvfree(tracer);
+ return ERR_PTR(err);
+}
+
+/* Create HW resources + start tracer
+ * must be called before Async EQ is created
+ */
+int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
+{
+ struct mlx5_core_dev *dev;
+ int err;
+
+ if (IS_ERR_OR_NULL(tracer))
+ return 0;
+
+ dev = tracer->dev;
+
+ if (!tracer->str_db.loaded)
+ queue_work(tracer->work_queue, &tracer->read_fw_strings_work);
+
+ err = mlx5_core_alloc_pd(dev, &tracer->buff.pdn);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Failed to allocate PD %d\n", err);
+ return err;
+ }
+
+ err = mlx5_fw_tracer_create_mkey(tracer);
+ if (err) {
+ mlx5_core_warn(dev, "FWTracer: Failed to create mkey %d\n", err);
+ goto err_dealloc_pd;
+ }
+
+ mlx5_fw_tracer_start(tracer);
+
+ return 0;
+
+err_dealloc_pd:
+ mlx5_core_dealloc_pd(dev, tracer->buff.pdn);
+ return err;
+}
+
+/* Stop tracer + Cleanup HW resources
+ * must be called after Async EQ is destroyed
+ */
+void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
+{
+ if (IS_ERR_OR_NULL(tracer))
+ return;
+
+ mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n",
+ tracer->owner);
+
+ cancel_work_sync(&tracer->ownership_change_work);
+ cancel_work_sync(&tracer->handle_traces_work);
+
+ if (tracer->owner)
+ mlx5_fw_tracer_ownership_release(tracer);
+
+ mlx5_core_destroy_mkey(tracer->dev, &tracer->buff.mkey);
+ mlx5_core_dealloc_pd(tracer->dev, tracer->buff.pdn);
+}
+
+/* Free software resources (Buffers, etc ..) */
+void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
+{
+ if (IS_ERR_OR_NULL(tracer))
+ return;
+
+ mlx5_core_dbg(tracer->dev, "FWTracer: Destroy\n");
+
+ cancel_work_sync(&tracer->read_fw_strings_work);
+ mlx5_fw_tracer_clean_ready_list(tracer);
+ mlx5_fw_tracer_clean_print_hash(tracer);
+ mlx5_fw_tracer_free_strings_db(tracer);
+ mlx5_fw_tracer_destroy_log_buf(tracer);
+ flush_workqueue(tracer->work_queue);
+ destroy_workqueue(tracer->work_queue);
+ kvfree(tracer);
+}
+
+void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+{
+ struct mlx5_fw_tracer *tracer = dev->tracer;
+
+ if (!tracer)
+ return;
+
+ switch (eqe->sub_type) {
+ case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
+ if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state))
+ queue_work(tracer->work_queue, &tracer->ownership_change_work);
+ break;
+ case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE:
+ queue_work(tracer->work_queue, &tracer->handle_traces_work);
+ break;
+ default:
+ mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
+ eqe->sub_type);
+ }
+}
+
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
new file mode 100644
index 000000000..0347f2dd5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_TRACER_H__
+#define __LIB_TRACER_H__
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+#define STRINGS_DB_SECTIONS_NUM 8
+#define STRINGS_DB_READ_SIZE_BYTES 256
+#define STRINGS_DB_LEFTOVER_SIZE_BYTES 64
+#define TRACER_BUFFER_PAGE_NUM 64
+#define TRACER_BUFFER_CHUNK 4096
+#define TRACE_BUFFER_SIZE_BYTE (TRACER_BUFFER_PAGE_NUM * TRACER_BUFFER_CHUNK)
+
+#define TRACER_BLOCK_SIZE_BYTE 256
+#define TRACES_PER_BLOCK 32
+
+#define TRACER_MAX_PARAMS 7
+#define MESSAGE_HASH_BITS 6
+#define MESSAGE_HASH_SIZE BIT(MESSAGE_HASH_BITS)
+
+#define MASK_52_7 (0x1FFFFFFFFFFF80)
+#define MASK_6_0 (0x7F)
+
+struct mlx5_fw_tracer {
+ struct mlx5_core_dev *dev;
+ bool owner;
+ u8 trc_ver;
+ struct workqueue_struct *work_queue;
+ struct work_struct ownership_change_work;
+ struct work_struct read_fw_strings_work;
+
+ /* Strings DB */
+ struct {
+ u8 first_string_trace;
+ u8 num_string_trace;
+ u32 num_string_db;
+ u32 base_address_out[STRINGS_DB_SECTIONS_NUM];
+ u32 size_out[STRINGS_DB_SECTIONS_NUM];
+ void *buffer[STRINGS_DB_SECTIONS_NUM];
+ bool loaded;
+ } str_db;
+
+ /* Log Buffer */
+ struct {
+ u32 pdn;
+ void *log_buf;
+ dma_addr_t dma;
+ u32 size;
+ struct mlx5_core_mkey mkey;
+ u32 consumer_index;
+ } buff;
+
+ u64 last_timestamp;
+ struct work_struct handle_traces_work;
+ struct hlist_head hash[MESSAGE_HASH_SIZE];
+ struct list_head ready_strings_list;
+};
+
+struct tracer_string_format {
+ char *string;
+ int params[TRACER_MAX_PARAMS];
+ int num_of_params;
+ int last_param_num;
+ u8 event_id;
+ u32 tmsn;
+ struct hlist_node hlist;
+ struct list_head list;
+ u32 timestamp;
+ bool lost;
+};
+
+enum mlx5_fw_tracer_ownership_state {
+ MLX5_FW_TRACER_RELEASE_OWNERSHIP,
+ MLX5_FW_TRACER_ACQUIRE_OWNERSHIP,
+};
+
+enum tracer_ctrl_fields_select {
+ TRACE_STATUS = 1 << 0,
+};
+
+enum tracer_event_type {
+ TRACER_EVENT_TYPE_STRING,
+ TRACER_EVENT_TYPE_TIMESTAMP = 0xFF,
+ TRACER_EVENT_TYPE_UNRECOGNIZED,
+};
+
+enum tracing_mode {
+ TRACE_TO_MEMORY = 1 << 0,
+};
+
+struct tracer_timestamp_event {
+ u64 timestamp;
+ u8 unreliable;
+};
+
+struct tracer_string_event {
+ u32 timestamp;
+ u32 tmsn;
+ u32 tdsn;
+ u32 string_param;
+};
+
+struct tracer_event {
+ bool lost_event;
+ u32 type;
+ u8 event_id;
+ union {
+ struct tracer_string_event string_event;
+ struct tracer_timestamp_event timestamp_event;
+ };
+};
+
+struct mlx5_ifc_tracer_event_bits {
+ u8 lost[0x1];
+ u8 timestamp[0x7];
+ u8 event_id[0x8];
+ u8 event_data[0x30];
+};
+
+struct mlx5_ifc_tracer_string_event_bits {
+ u8 lost[0x1];
+ u8 timestamp[0x7];
+ u8 event_id[0x8];
+ u8 tmsn[0xd];
+ u8 tdsn[0x3];
+ u8 string_param[0x20];
+};
+
+struct mlx5_ifc_tracer_timestamp_event_bits {
+ u8 timestamp7_0[0x8];
+ u8 event_id[0x8];
+ u8 urts[0x3];
+ u8 timestamp52_40[0xd];
+ u8 timestamp39_8[0x20];
+};
+
+struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
+int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
+void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
+void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
+void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h
new file mode 100644
index 000000000..83f90e9af
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer_tracepoint.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(__LIB_TRACER_TRACEPOINT_H__) || defined(TRACE_HEADER_MULTI_READ)
+#define __LIB_TRACER_TRACEPOINT_H__
+
+#include <linux/tracepoint.h>
+#include "fw_tracer.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+/* Tracepoint for FWTracer messages: */
+TRACE_EVENT(mlx5_fw,
+ TP_PROTO(const struct mlx5_fw_tracer *tracer, u64 trace_timestamp,
+ bool lost, u8 event_id, const char *msg),
+
+ TP_ARGS(tracer, trace_timestamp, lost, event_id, msg),
+
+ TP_STRUCT__entry(
+ __string(dev_name, dev_name(&tracer->dev->pdev->dev))
+ __field(u64, trace_timestamp)
+ __field(bool, lost)
+ __field(u8, event_id)
+ __string(msg, msg)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev_name, dev_name(&tracer->dev->pdev->dev));
+ __entry->trace_timestamp = trace_timestamp;
+ __entry->lost = lost;
+ __entry->event_id = event_id;
+ __assign_str(msg, msg);
+ ),
+
+ TP_printk("%s [0x%llx] %d [0x%x] %s",
+ __get_str(dev_name),
+ __entry->trace_timestamp,
+ __entry->lost, __entry->event_id,
+ __get_str(msg))
+);
+
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH ./diag
+#define TRACE_INCLUDE_FILE fw_tracer_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
new file mode 100644
index 000000000..d79e177f8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -0,0 +1,979 @@
+/*
+ * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_EN_H__
+#define __MLX5_EN_H__
+
+#include <linux/if_vlan.h>
+#include <linux/etherdevice.h>
+#include <linux/timecounter.h>
+#include <linux/net_tstamp.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/crash_dump.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/port.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/transobj.h>
+#include <linux/mlx5/fs.h>
+#include <linux/rhashtable.h>
+#include <net/switchdev.h>
+#include <net/xdp.h>
+#include <linux/net_dim.h>
+#include "wq.h"
+#include "mlx5_core.h"
+#include "en_stats.h"
+#include "en/fs.h"
+
+extern const struct net_device_ops mlx5e_netdev_ops;
+struct page_pool;
+
+#define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
+#define MLX5E_METADATA_ETHER_LEN 8
+
+#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
+
+#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
+
+#define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
+#define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu))
+
+#define MLX5E_MAX_PRIORITY 8
+#define MLX5E_MAX_DSCP 64
+#define MLX5E_MAX_NUM_TC 8
+
+#define MLX5_RX_HEADROOM NET_SKB_PAD
+#define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
+#define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \
+ (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */
+#define MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, req) \
+ max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
+#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6)
+#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8)
+#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \
+ (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \
+ MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev))
+
+#define MLX5_MPWRQ_LOG_WQE_SZ 18
+#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
+ MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
+#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
+
+#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
+#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
+#define MLX5E_LOG_ALIGNED_MPWQE_PPW (ilog2(MLX5E_REQUIRED_WQE_MTTS))
+#define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS)
+#define MLX5E_MAX_RQ_NUM_MTTS \
+ ((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */
+#define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024))
+#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW \
+ (ilog2(MLX5E_MAX_RQ_NUM_MTTS / MLX5E_REQUIRED_WQE_MTTS))
+#define MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW \
+ (MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW + \
+ (MLX5_MPWRQ_LOG_WQE_SZ - MLX5E_ORDER2_MAX_PACKET_MTU))
+
+#define MLX5E_MIN_SKB_FRAG_SZ (MLX5_SKB_FRAG_SZ(MLX5_RX_HEADROOM))
+#define MLX5E_LOG_MAX_RX_WQE_BULK \
+ (ilog2(PAGE_SIZE / roundup_pow_of_two(MLX5E_MIN_SKB_FRAG_SZ)))
+
+#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x6
+#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa
+#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd
+
+#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE (1 + MLX5E_LOG_MAX_RX_WQE_BULK)
+#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa
+#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE min_t(u8, 0xd, \
+ MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW)
+
+#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2
+
+#define MLX5E_RX_MAX_HEAD (256)
+
+#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024)
+#define MLX5E_DEFAULT_LRO_TIMEOUT 32
+#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4
+
+#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10
+#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3
+#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20
+#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10
+#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE 0x10
+#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20
+#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80
+#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2
+
+#define MLX5E_LOG_INDIR_RQT_SIZE 0x7
+#define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE)
+#define MLX5E_MIN_NUM_CHANNELS 0x1
+#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1)
+#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
+#define MLX5E_TX_CQ_POLL_BUDGET 128
+#define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */
+
+#define MLX5E_UMR_WQE_INLINE_SZ \
+ (sizeof(struct mlx5e_umr_wqe) + \
+ ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(struct mlx5_mtt), \
+ MLX5_UMR_MTT_ALIGNMENT))
+#define MLX5E_UMR_WQEBBS \
+ (DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB))
+#define MLX5E_ICOSQ_MAX_WQEBBS MLX5E_UMR_WQEBBS
+
+#define MLX5E_NUM_MAIN_GROUPS 9
+
+#define MLX5E_MSG_LEVEL NETIF_MSG_LINK
+
+#define mlx5e_dbg(mlevel, priv, format, ...) \
+do { \
+ if (NETIF_MSG_##mlevel & (priv)->msglevel) \
+ netdev_warn(priv->netdev, format, \
+ ##__VA_ARGS__); \
+} while (0)
+
+
+static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
+{
+ switch (wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW,
+ wq_size / 2);
+ default:
+ return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES,
+ wq_size / 2);
+ }
+}
+
+static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
+{
+ return is_kdump_kernel() ?
+ MLX5E_MIN_NUM_CHANNELS :
+ min_t(int, mdev->priv.eq_table.num_comp_vectors,
+ MLX5E_MAX_NUM_CHANNELS);
+}
+
+struct mlx5e_tx_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_eth_seg eth;
+ struct mlx5_wqe_data_seg data[0];
+};
+
+struct mlx5e_rx_wqe_ll {
+ struct mlx5_wqe_srq_next_seg next;
+ struct mlx5_wqe_data_seg data[0];
+};
+
+struct mlx5e_rx_wqe_cyc {
+ struct mlx5_wqe_data_seg data[0];
+};
+
+struct mlx5e_umr_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_umr_ctrl_seg uctrl;
+ struct mlx5_mkey_seg mkc;
+ struct mlx5_mtt inline_mtts[0];
+};
+
+extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
+
+static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
+ "rx_cqe_moder",
+ "tx_cqe_moder",
+ "rx_cqe_compress",
+ "rx_striding_rq",
+ "rx_no_csum_complete",
+};
+
+enum mlx5e_priv_flag {
+ MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0),
+ MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1),
+ MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2),
+ MLX5E_PFLAG_RX_STRIDING_RQ = (1 << 3),
+ MLX5E_PFLAG_RX_NO_CSUM_COMPLETE = (1 << 4),
+};
+
+#define MLX5E_SET_PFLAG(params, pflag, enable) \
+ do { \
+ if (enable) \
+ (params)->pflags |= (pflag); \
+ else \
+ (params)->pflags &= ~(pflag); \
+ } while (0)
+
+#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (pflag)))
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
+#endif
+
+struct mlx5e_params {
+ u8 log_sq_size;
+ u8 rq_wq_type;
+ u8 log_rq_mtu_frames;
+ u16 num_channels;
+ u8 num_tc;
+ bool rx_cqe_compress_def;
+ struct net_dim_cq_moder rx_cq_moderation;
+ struct net_dim_cq_moder tx_cq_moderation;
+ bool lro_en;
+ u32 lro_wqe_sz;
+ u8 tx_min_inline_mode;
+ u8 rss_hfunc;
+ u8 toeplitz_hash_key[40];
+ u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
+ bool vlan_strip_disable;
+ bool scatter_fcs_en;
+ bool rx_dim_enabled;
+ bool tx_dim_enabled;
+ u32 lro_timeout;
+ u32 pflags;
+ struct bpf_prog *xdp_prog;
+ unsigned int sw_mtu;
+ int hard_mtu;
+};
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+struct mlx5e_cee_config {
+ /* bw pct for priority group */
+ u8 pg_bw_pct[CEE_DCBX_MAX_PGS];
+ u8 prio_to_pg_map[CEE_DCBX_MAX_PRIO];
+ bool pfc_setting[CEE_DCBX_MAX_PRIO];
+ bool pfc_enable;
+};
+
+enum {
+ MLX5_DCB_CHG_RESET,
+ MLX5_DCB_NO_CHG,
+ MLX5_DCB_CHG_NO_RESET,
+};
+
+struct mlx5e_dcbx {
+ enum mlx5_dcbx_oper_mode mode;
+ struct mlx5e_cee_config cee_cfg; /* pending configuration */
+ u8 dscp_app_cnt;
+
+ /* The only setting that cannot be read from FW */
+ u8 tc_tsa[IEEE_8021QAZ_MAX_TCS];
+ u8 cap;
+
+ /* Buffer configuration */
+ bool manual_buffer;
+ u32 cable_len;
+ u32 xoff;
+};
+
+struct mlx5e_dcbx_dp {
+ u8 dscp2prio[MLX5E_MAX_DSCP];
+ u8 trust_state;
+};
+#endif
+
+enum {
+ MLX5E_RQ_STATE_ENABLED,
+ MLX5E_RQ_STATE_AM,
+ MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
+};
+
+struct mlx5e_cq {
+ /* data path - accessed per cqe */
+ struct mlx5_cqwq wq;
+
+ /* data path - accessed per napi poll */
+ u16 event_ctr;
+ struct napi_struct *napi;
+ struct mlx5_core_cq mcq;
+ struct mlx5e_channel *channel;
+
+ /* cqe decompression */
+ struct mlx5_cqe64 title;
+ struct mlx5_mini_cqe8 mini_arr[MLX5_MINI_CQE_ARRAY_SIZE];
+ u8 mini_arr_idx;
+ u16 decmprs_left;
+ u16 decmprs_wqe_counter;
+
+ /* control */
+ struct mlx5_core_dev *mdev;
+ struct mlx5_wq_ctrl wq_ctrl;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5e_tx_wqe_info {
+ struct sk_buff *skb;
+ u32 num_bytes;
+ u8 num_wqebbs;
+ u8 num_dma;
+};
+
+enum mlx5e_dma_map_type {
+ MLX5E_DMA_MAP_SINGLE,
+ MLX5E_DMA_MAP_PAGE
+};
+
+struct mlx5e_sq_dma {
+ dma_addr_t addr;
+ u32 size;
+ enum mlx5e_dma_map_type type;
+};
+
+enum {
+ MLX5E_SQ_STATE_ENABLED,
+ MLX5E_SQ_STATE_RECOVERING,
+ MLX5E_SQ_STATE_IPSEC,
+ MLX5E_SQ_STATE_AM,
+ MLX5E_SQ_STATE_TLS,
+ MLX5E_SQ_STATE_REDIRECT,
+};
+
+struct mlx5e_sq_wqe_info {
+ u8 opcode;
+};
+
+struct mlx5e_txqsq {
+ /* data path */
+
+ /* dirtied @completion */
+ u16 cc;
+ u32 dma_fifo_cc;
+ struct net_dim dim; /* Adaptive Moderation */
+
+ /* dirtied @xmit */
+ u16 pc ____cacheline_aligned_in_smp;
+ u32 dma_fifo_pc;
+
+ struct mlx5e_cq cq;
+
+ /* read only */
+ struct mlx5_wq_cyc wq;
+ u32 dma_fifo_mask;
+ struct mlx5e_sq_stats *stats;
+ struct {
+ struct mlx5e_sq_dma *dma_fifo;
+ struct mlx5e_tx_wqe_info *wqe_info;
+ } db;
+ void __iomem *uar_map;
+ struct netdev_queue *txq;
+ u32 sqn;
+ u8 min_inline_mode;
+ struct device *pdev;
+ __be32 mkey_be;
+ unsigned long state;
+ struct hwtstamp_config *tstamp;
+ struct mlx5_clock *clock;
+
+ /* control path */
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5e_channel *channel;
+ int txq_ix;
+ u32 rate_limit;
+ struct mlx5e_txqsq_recover {
+ struct work_struct recover_work;
+ u64 last_recover;
+ } recover;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5e_dma_info {
+ struct page *page;
+ dma_addr_t addr;
+};
+
+struct mlx5e_xdp_info {
+ struct xdp_frame *xdpf;
+ dma_addr_t dma_addr;
+ struct mlx5e_dma_info di;
+};
+
+struct mlx5e_xdpsq {
+ /* data path */
+
+ /* dirtied @completion */
+ u16 cc;
+ bool redirect_flush;
+
+ /* dirtied @xmit */
+ u16 pc ____cacheline_aligned_in_smp;
+ bool doorbell;
+
+ struct mlx5e_cq cq;
+
+ /* read only */
+ struct mlx5_wq_cyc wq;
+ struct mlx5e_xdpsq_stats *stats;
+ struct {
+ struct mlx5e_xdp_info *xdpi;
+ } db;
+ void __iomem *uar_map;
+ u32 sqn;
+ struct device *pdev;
+ __be32 mkey_be;
+ u8 min_inline_mode;
+ unsigned long state;
+ unsigned int hw_mtu;
+
+ /* control path */
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5e_channel *channel;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5e_icosq {
+ /* data path */
+
+ /* dirtied @xmit */
+ u16 pc ____cacheline_aligned_in_smp;
+
+ struct mlx5e_cq cq;
+
+ /* write@xmit, read@completion */
+ struct {
+ struct mlx5e_sq_wqe_info *ico_wqe;
+ } db;
+
+ /* read only */
+ struct mlx5_wq_cyc wq;
+ void __iomem *uar_map;
+ u32 sqn;
+ unsigned long state;
+
+ /* control path */
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5e_channel *channel;
+} ____cacheline_aligned_in_smp;
+
+static inline bool
+mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
+{
+ return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
+}
+
+struct mlx5e_wqe_frag_info {
+ struct mlx5e_dma_info *di;
+ u32 offset;
+ bool last_in_page;
+};
+
+struct mlx5e_umr_dma_info {
+ struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE];
+};
+
+struct mlx5e_mpw_info {
+ struct mlx5e_umr_dma_info umr;
+ u16 consumed_strides;
+ DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
+};
+
+#define MLX5E_MAX_RX_FRAGS 4
+
+/* a single cache unit is capable to serve one napi call (for non-striding rq)
+ * or a MPWQE (for striding rq).
+ */
+#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
+ MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
+#define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
+struct mlx5e_page_cache {
+ u32 head;
+ u32 tail;
+ struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE];
+};
+
+struct mlx5e_rq;
+typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*);
+typedef struct sk_buff *
+(*mlx5e_fp_skb_from_cqe_mpwrq)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx);
+typedef struct sk_buff *
+(*mlx5e_fp_skb_from_cqe)(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+ struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
+typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq);
+typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
+
+enum mlx5e_rq_flag {
+ MLX5E_RQ_FLAG_XDP_XMIT = BIT(0),
+};
+
+struct mlx5e_rq_frag_info {
+ int frag_size;
+ int frag_stride;
+};
+
+struct mlx5e_rq_frags_info {
+ struct mlx5e_rq_frag_info arr[MLX5E_MAX_RX_FRAGS];
+ u8 num_frags;
+ u8 log_num_frags;
+ u8 wqe_bulk;
+};
+
+struct mlx5e_rq {
+ /* data path */
+ union {
+ struct {
+ struct mlx5_wq_cyc wq;
+ struct mlx5e_wqe_frag_info *frags;
+ struct mlx5e_dma_info *di;
+ struct mlx5e_rq_frags_info info;
+ mlx5e_fp_skb_from_cqe skb_from_cqe;
+ } wqe;
+ struct {
+ struct mlx5_wq_ll wq;
+ struct mlx5e_umr_wqe umr_wqe;
+ struct mlx5e_mpw_info *info;
+ mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq;
+ u16 num_strides;
+ u8 log_stride_sz;
+ bool umr_in_progress;
+ } mpwqe;
+ };
+ struct {
+ u16 headroom;
+ u8 map_dir; /* dma map direction */
+ } buff;
+
+ struct mlx5e_channel *channel;
+ struct device *pdev;
+ struct net_device *netdev;
+ struct mlx5e_rq_stats *stats;
+ struct mlx5e_cq cq;
+ struct mlx5e_page_cache page_cache;
+ struct hwtstamp_config *tstamp;
+ struct mlx5_clock *clock;
+
+ mlx5e_fp_handle_rx_cqe handle_rx_cqe;
+ mlx5e_fp_post_rx_wqes post_wqes;
+ mlx5e_fp_dealloc_wqe dealloc_wqe;
+
+ unsigned long state;
+ int ix;
+ unsigned int hw_mtu;
+
+ struct net_dim dim; /* Dynamic Interrupt Moderation */
+
+ /* XDP */
+ struct bpf_prog *xdp_prog;
+ struct mlx5e_xdpsq xdpsq;
+ DECLARE_BITMAP(flags, 8);
+ struct page_pool *page_pool;
+
+ /* control */
+ struct mlx5_wq_ctrl wq_ctrl;
+ __be32 mkey_be;
+ u8 wq_type;
+ u32 rqn;
+ struct mlx5_core_dev *mdev;
+ struct mlx5_core_mkey umr_mkey;
+
+ /* XDP read-mostly */
+ struct xdp_rxq_info xdp_rxq;
+} ____cacheline_aligned_in_smp;
+
+struct mlx5e_channel {
+ /* data path */
+ struct mlx5e_rq rq;
+ struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC];
+ struct mlx5e_icosq icosq; /* internal control operations */
+ bool xdp;
+ struct napi_struct napi;
+ struct device *pdev;
+ struct net_device *netdev;
+ __be32 mkey_be;
+ u8 num_tc;
+
+ /* XDP_REDIRECT */
+ struct mlx5e_xdpsq xdpsq;
+
+ /* data path - accessed per napi poll */
+ struct irq_desc *irq_desc;
+ struct mlx5e_ch_stats *stats;
+
+ /* control */
+ struct mlx5e_priv *priv;
+ struct mlx5_core_dev *mdev;
+ struct hwtstamp_config *tstamp;
+ int ix;
+ int cpu;
+};
+
+struct mlx5e_channels {
+ struct mlx5e_channel **c;
+ unsigned int num;
+ struct mlx5e_params params;
+};
+
+struct mlx5e_channel_stats {
+ struct mlx5e_ch_stats ch;
+ struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC];
+ struct mlx5e_rq_stats rq;
+ struct mlx5e_xdpsq_stats rq_xdpsq;
+ struct mlx5e_xdpsq_stats xdpsq;
+} ____cacheline_aligned_in_smp;
+
+enum {
+ MLX5E_STATE_ASYNC_EVENTS_ENABLED,
+ MLX5E_STATE_OPENED,
+ MLX5E_STATE_DESTROYING,
+ MLX5E_STATE_XDP_TX_ENABLED,
+};
+
+struct mlx5e_rqt {
+ u32 rqtn;
+ bool enabled;
+};
+
+struct mlx5e_tir {
+ u32 tirn;
+ struct mlx5e_rqt rqt;
+ struct list_head list;
+};
+
+enum {
+ MLX5E_TC_PRIO = 0,
+ MLX5E_NIC_PRIO
+};
+
+struct mlx5e_priv {
+ /* priv data path fields - start */
+ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
+ int channel_tc2txq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ struct mlx5e_dcbx_dp dcbx_dp;
+#endif
+ /* priv data path fields - end */
+
+ u32 msglevel;
+ unsigned long state;
+ struct mutex state_lock; /* Protects Interface state */
+ struct mlx5e_rq drop_rq;
+
+ struct mlx5e_channels channels;
+ u32 tisn[MLX5E_MAX_NUM_TC];
+ struct mlx5e_rqt indir_rqt;
+ struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS];
+ u32 tx_rates[MLX5E_MAX_NUM_SQS];
+
+ struct mlx5e_flow_steering fs;
+
+ struct workqueue_struct *wq;
+ struct work_struct update_carrier_work;
+ struct work_struct set_rx_mode_work;
+ struct work_struct tx_timeout_work;
+ struct delayed_work update_stats_work;
+
+ struct mlx5_core_dev *mdev;
+ struct net_device *netdev;
+ struct mlx5e_stats stats;
+ struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS];
+ u8 max_opened_tc;
+ struct hwtstamp_config tstamp;
+ u16 q_counter;
+ u16 drop_rq_q_counter;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ struct mlx5e_dcbx dcbx;
+#endif
+
+ const struct mlx5e_profile *profile;
+ void *ppriv;
+#ifdef CONFIG_MLX5_EN_IPSEC
+ struct mlx5e_ipsec *ipsec;
+#endif
+#ifdef CONFIG_MLX5_EN_TLS
+ struct mlx5e_tls *tls;
+#endif
+};
+
+struct mlx5e_profile {
+ void (*init)(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile, void *ppriv);
+ void (*cleanup)(struct mlx5e_priv *priv);
+ int (*init_rx)(struct mlx5e_priv *priv);
+ void (*cleanup_rx)(struct mlx5e_priv *priv);
+ int (*init_tx)(struct mlx5e_priv *priv);
+ void (*cleanup_tx)(struct mlx5e_priv *priv);
+ void (*enable)(struct mlx5e_priv *priv);
+ void (*disable)(struct mlx5e_priv *priv);
+ void (*update_stats)(struct mlx5e_priv *priv);
+ void (*update_carrier)(struct mlx5e_priv *priv);
+ int (*max_nch)(struct mlx5_core_dev *mdev);
+ struct {
+ mlx5e_fp_handle_rx_cqe handle_rx_cqe;
+ mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe;
+ } rx_handlers;
+ int max_tc;
+};
+
+void mlx5e_build_ptys2ethtool_map(void);
+
+u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev,
+ select_queue_fallback_t fallback);
+netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
+netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_tx_wqe *wqe, u16 pi);
+
+void mlx5e_completion_event(struct mlx5_core_cq *mcq);
+void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
+int mlx5e_napi_poll(struct napi_struct *napi, int budget);
+bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
+int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
+void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
+
+bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
+bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+
+void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info);
+void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
+ bool recycle);
+void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
+bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq);
+void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
+void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
+struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx);
+struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx);
+struct sk_buff *
+mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+ struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
+struct sk_buff *
+mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+ struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
+
+void mlx5e_update_stats(struct mlx5e_priv *priv);
+
+void mlx5e_init_l2_addr(struct mlx5e_priv *priv);
+int mlx5e_self_test_num(struct mlx5e_priv *priv);
+void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
+ u64 *buf);
+void mlx5e_set_rx_mode_work(struct work_struct *work);
+
+int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr);
+int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr);
+int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val);
+
+int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto,
+ u16 vid);
+int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
+ u16 vid);
+void mlx5e_timestamp_init(struct mlx5e_priv *priv);
+
+struct mlx5e_redirect_rqt_param {
+ bool is_rss;
+ union {
+ u32 rqn; /* Direct RQN (Non-RSS) */
+ struct {
+ u8 hfunc;
+ struct mlx5e_channels *channels;
+ } rss; /* RSS data */
+ };
+};
+
+int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
+ struct mlx5e_redirect_rqt_param rrp);
+void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
+ enum mlx5e_traffic_types tt,
+ void *tirc, bool inner);
+
+int mlx5e_open_locked(struct net_device *netdev);
+int mlx5e_close_locked(struct net_device *netdev);
+
+int mlx5e_open_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *chs);
+void mlx5e_close_channels(struct mlx5e_channels *chs);
+
+/* Function pointer to be used to modify WH settings while
+ * switching channels
+ */
+typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv);
+void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *new_chs,
+ mlx5e_fp_hw_modify hw_modify);
+void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
+void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
+
+void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
+ int num_channels);
+void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
+ u8 cq_period_mode);
+void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
+ u8 cq_period_mode);
+void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+
+static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
+{
+ return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version));
+}
+
+static inline void mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe **wqe,
+ u16 *pi)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+
+ *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ *wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
+ memset(*wqe, 0, sizeof(**wqe));
+}
+
+static inline
+struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
+{
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc);
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+
+ memset(cseg, 0, sizeof(*cseg));
+
+ cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
+ cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01);
+
+ (*pc)++;
+
+ return wqe;
+}
+
+static inline
+void mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc,
+ void __iomem *uar_map,
+ struct mlx5_wqe_ctrl_seg *ctrl)
+{
+ ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ /* ensure wqe is visible to device before updating doorbell record */
+ dma_wmb();
+
+ *wq->db = cpu_to_be32(pc);
+
+ /* ensure doorbell record is visible to device before ringing the
+ * doorbell
+ */
+ wmb();
+
+ mlx5_write64((__be32 *)ctrl, uar_map, NULL);
+}
+
+static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
+{
+ struct mlx5_core_cq *mcq;
+
+ mcq = &cq->mcq;
+ mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc);
+}
+
+extern const struct ethtool_ops mlx5e_ethtool_ops;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
+int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets);
+void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv);
+void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv);
+void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv);
+#endif
+
+int mlx5e_create_tir(struct mlx5_core_dev *mdev,
+ struct mlx5e_tir *tir, u32 *in, int inlen);
+void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
+ struct mlx5e_tir *tir);
+int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
+void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
+int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb);
+
+/* common netdev helpers */
+int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv);
+
+int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv);
+void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv);
+
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
+void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv);
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv);
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv);
+void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
+
+int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
+ u32 underlay_qpn, u32 *tisn);
+void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
+
+int mlx5e_create_tises(struct mlx5e_priv *priv);
+void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv);
+int mlx5e_close(struct net_device *netdev);
+int mlx5e_open(struct net_device *netdev);
+void mlx5e_update_stats_work(struct work_struct *work);
+
+int mlx5e_bits_invert(unsigned long a, int size);
+
+typedef int (*change_hw_mtu_cb)(struct mlx5e_priv *priv);
+int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
+ change_hw_mtu_cb set_mtu_cb);
+
+/* ethtool helpers */
+void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
+ struct ethtool_drvinfo *drvinfo);
+void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv,
+ uint32_t stringset, uint8_t *data);
+int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset);
+void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
+ struct ethtool_stats *stats, u64 *data);
+void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv,
+ struct ethtool_ringparam *param);
+int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
+ struct ethtool_ringparam *param);
+void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
+ struct ethtool_channels *ch);
+int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
+ struct ethtool_channels *ch);
+int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
+ struct ethtool_coalesce *coal);
+int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
+ struct ethtool_coalesce *coal);
+int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
+ struct ethtool_ts_info *info);
+int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
+ struct ethtool_flash *flash);
+
+/* mlx5e generic netdev management API */
+struct net_device*
+mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile,
+ void *ppriv);
+int mlx5e_attach_netdev(struct mlx5e_priv *priv);
+void mlx5e_detach_netdev(struct mlx5e_priv *priv);
+void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
+void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ u16 max_channels, u16 mtu);
+u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
+void mlx5e_rx_dim_work(struct work_struct *work);
+void mlx5e_tx_dim_work(struct work_struct *work);
+#endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile
new file mode 100644
index 000000000..d8e17110f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
new file mode 100644
index 000000000..1431232c9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#ifndef __MLX5E_FLOW_STEER_H__
+#define __MLX5E_FLOW_STEER_H__
+
+enum {
+ MLX5E_TC_FT_LEVEL = 0,
+ MLX5E_TC_TTC_FT_LEVEL,
+};
+
+struct mlx5e_tc_table {
+ struct mlx5_flow_table *t;
+
+ struct rhashtable ht;
+
+ DECLARE_HASHTABLE(mod_hdr_tbl, 8);
+ DECLARE_HASHTABLE(hairpin_tbl, 8);
+
+ struct notifier_block netdevice_nb;
+};
+
+struct mlx5e_flow_table {
+ int num_groups;
+ struct mlx5_flow_table *t;
+ struct mlx5_flow_group **g;
+};
+
+struct mlx5e_l2_rule {
+ u8 addr[ETH_ALEN + 2];
+ struct mlx5_flow_handle *rule;
+};
+
+#define MLX5E_L2_ADDR_HASH_SIZE BIT(BITS_PER_BYTE)
+
+struct mlx5e_vlan_table {
+ struct mlx5e_flow_table ft;
+ DECLARE_BITMAP(active_cvlans, VLAN_N_VID);
+ DECLARE_BITMAP(active_svlans, VLAN_N_VID);
+ struct mlx5_flow_handle *active_cvlans_rule[VLAN_N_VID];
+ struct mlx5_flow_handle *active_svlans_rule[VLAN_N_VID];
+ struct mlx5_flow_handle *untagged_rule;
+ struct mlx5_flow_handle *any_cvlan_rule;
+ struct mlx5_flow_handle *any_svlan_rule;
+ bool cvlan_filter_disabled;
+};
+
+struct mlx5e_l2_table {
+ struct mlx5e_flow_table ft;
+ struct hlist_head netdev_uc[MLX5E_L2_ADDR_HASH_SIZE];
+ struct hlist_head netdev_mc[MLX5E_L2_ADDR_HASH_SIZE];
+ struct mlx5e_l2_rule broadcast;
+ struct mlx5e_l2_rule allmulti;
+ struct mlx5e_l2_rule promisc;
+ bool broadcast_enabled;
+ bool allmulti_enabled;
+ bool promisc_enabled;
+};
+
+enum mlx5e_traffic_types {
+ MLX5E_TT_IPV4_TCP,
+ MLX5E_TT_IPV6_TCP,
+ MLX5E_TT_IPV4_UDP,
+ MLX5E_TT_IPV6_UDP,
+ MLX5E_TT_IPV4_IPSEC_AH,
+ MLX5E_TT_IPV6_IPSEC_AH,
+ MLX5E_TT_IPV4_IPSEC_ESP,
+ MLX5E_TT_IPV6_IPSEC_ESP,
+ MLX5E_TT_IPV4,
+ MLX5E_TT_IPV6,
+ MLX5E_TT_ANY,
+ MLX5E_NUM_TT,
+ MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY,
+};
+
+enum mlx5e_tunnel_types {
+ MLX5E_TT_IPV4_GRE,
+ MLX5E_TT_IPV6_GRE,
+ MLX5E_NUM_TUNNEL_TT,
+};
+
+/* L3/L4 traffic type classifier */
+struct mlx5e_ttc_table {
+ struct mlx5e_flow_table ft;
+ struct mlx5_flow_handle *rules[MLX5E_NUM_TT];
+ struct mlx5_flow_handle *tunnel_rules[MLX5E_NUM_TUNNEL_TT];
+};
+
+/* NIC prio FTS */
+enum {
+ MLX5E_VLAN_FT_LEVEL = 0,
+ MLX5E_L2_FT_LEVEL,
+ MLX5E_TTC_FT_LEVEL,
+ MLX5E_INNER_TTC_FT_LEVEL,
+#ifdef CONFIG_MLX5_EN_ARFS
+ MLX5E_ARFS_FT_LEVEL
+#endif
+};
+
+#ifdef CONFIG_MLX5_EN_RXNFC
+
+struct mlx5e_ethtool_table {
+ struct mlx5_flow_table *ft;
+ int num_rules;
+};
+
+#define ETHTOOL_NUM_L3_L4_FTS 7
+#define ETHTOOL_NUM_L2_FTS 4
+
+struct mlx5e_ethtool_steering {
+ struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS];
+ struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS];
+ struct list_head rules;
+ int tot_num_rules;
+};
+
+void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv);
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv);
+int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd);
+int mlx5e_get_rxnfc(struct net_device *dev,
+ struct ethtool_rxnfc *info, u32 *rule_locs);
+#else
+static inline void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) { }
+static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) { }
+#endif /* CONFIG_MLX5_EN_RXNFC */
+
+#ifdef CONFIG_MLX5_EN_ARFS
+#define ARFS_HASH_SHIFT BITS_PER_BYTE
+#define ARFS_HASH_SIZE BIT(BITS_PER_BYTE)
+
+struct arfs_table {
+ struct mlx5e_flow_table ft;
+ struct mlx5_flow_handle *default_rule;
+ struct hlist_head rules_hash[ARFS_HASH_SIZE];
+};
+
+enum arfs_type {
+ ARFS_IPV4_TCP,
+ ARFS_IPV6_TCP,
+ ARFS_IPV4_UDP,
+ ARFS_IPV6_UDP,
+ ARFS_NUM_TYPES,
+};
+
+struct mlx5e_arfs_tables {
+ struct arfs_table arfs_tables[ARFS_NUM_TYPES];
+ /* Protect aRFS rules list */
+ spinlock_t arfs_lock;
+ struct list_head rules;
+ int last_filter_id;
+ struct workqueue_struct *wq;
+};
+
+int mlx5e_arfs_create_tables(struct mlx5e_priv *priv);
+void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv);
+int mlx5e_arfs_enable(struct mlx5e_priv *priv);
+int mlx5e_arfs_disable(struct mlx5e_priv *priv);
+int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
+ u16 rxq_index, u32 flow_id);
+#else
+static inline int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) {}
+static inline int mlx5e_arfs_enable(struct mlx5e_priv *priv) { return -EOPNOTSUPP; }
+static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) { return -EOPNOTSUPP; }
+#endif
+
+struct mlx5e_flow_steering {
+ struct mlx5_flow_namespace *ns;
+#ifdef CONFIG_MLX5_EN_RXNFC
+ struct mlx5e_ethtool_steering ethtool;
+#endif
+ struct mlx5e_tc_table tc;
+ struct mlx5e_vlan_table vlan;
+ struct mlx5e_l2_table l2;
+ struct mlx5e_ttc_table ttc;
+ struct mlx5e_ttc_table inner_ttc;
+#ifdef CONFIG_MLX5_EN_ARFS
+ struct mlx5e_arfs_tables arfs;
+#endif
+};
+
+struct ttc_params {
+ struct mlx5_flow_table_attr ft_attr;
+ u32 any_tt_tirn;
+ u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_ttc_table *inner_ttc;
+};
+
+void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, struct ttc_params *ttc_params);
+void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params);
+void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params);
+
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
+ struct mlx5e_ttc_table *ttc);
+void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv,
+ struct mlx5e_ttc_table *ttc);
+
+int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
+ struct mlx5e_ttc_table *ttc);
+void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv,
+ struct mlx5e_ttc_table *ttc);
+
+void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
+
+void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv);
+void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv);
+
+int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
+void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
+
+#endif /* __MLX5E_FLOW_STEER_H__ */
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
new file mode 100644
index 000000000..12e1682f9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "port.h"
+
+/* speed in units of 1Mb */
+static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = {
+ [MLX5E_1000BASE_CX_SGMII] = 1000,
+ [MLX5E_1000BASE_KX] = 1000,
+ [MLX5E_10GBASE_CX4] = 10000,
+ [MLX5E_10GBASE_KX4] = 10000,
+ [MLX5E_10GBASE_KR] = 10000,
+ [MLX5E_20GBASE_KR2] = 20000,
+ [MLX5E_40GBASE_CR4] = 40000,
+ [MLX5E_40GBASE_KR4] = 40000,
+ [MLX5E_56GBASE_R4] = 56000,
+ [MLX5E_10GBASE_CR] = 10000,
+ [MLX5E_10GBASE_SR] = 10000,
+ [MLX5E_10GBASE_ER] = 10000,
+ [MLX5E_40GBASE_SR4] = 40000,
+ [MLX5E_40GBASE_LR4] = 40000,
+ [MLX5E_50GBASE_SR2] = 50000,
+ [MLX5E_100GBASE_CR4] = 100000,
+ [MLX5E_100GBASE_SR4] = 100000,
+ [MLX5E_100GBASE_KR4] = 100000,
+ [MLX5E_100GBASE_LR4] = 100000,
+ [MLX5E_100BASE_TX] = 100,
+ [MLX5E_1000BASE_T] = 1000,
+ [MLX5E_10GBASE_T] = 10000,
+ [MLX5E_25GBASE_CR] = 25000,
+ [MLX5E_25GBASE_KR] = 25000,
+ [MLX5E_25GBASE_SR] = 25000,
+ [MLX5E_50GBASE_CR2] = 50000,
+ [MLX5E_50GBASE_KR2] = 50000,
+};
+
+u32 mlx5e_port_ptys2speed(u32 eth_proto_oper)
+{
+ unsigned long temp = eth_proto_oper;
+ u32 speed = 0;
+ int i;
+
+ i = find_first_bit(&temp, MLX5E_LINK_MODES_NUMBER);
+ if (i < MLX5E_LINK_MODES_NUMBER)
+ speed = mlx5e_link_speed[i];
+
+ return speed;
+}
+
+int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {};
+ u32 eth_proto_oper;
+ int err;
+
+ err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
+ if (err)
+ return err;
+
+ eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
+ *speed = mlx5e_port_ptys2speed(eth_proto_oper);
+ if (!(*speed))
+ err = -EINVAL;
+
+ return err;
+}
+
+int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
+{
+ u32 max_speed = 0;
+ u32 proto_cap;
+ int err;
+ int i;
+
+ err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN);
+ if (err)
+ return err;
+
+ for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i)
+ if (proto_cap & MLX5E_PROT_MASK(i))
+ max_speed = max(max_speed, mlx5e_link_speed[i]);
+
+ *speed = max_speed;
+ return 0;
+}
+
+u32 mlx5e_port_speed2linkmodes(u32 speed)
+{
+ u32 link_modes = 0;
+ int i;
+
+ for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
+ if (mlx5e_link_speed[i] == speed)
+ link_modes |= MLX5E_PROT_MASK(i);
+ }
+
+ return link_modes;
+}
+
+int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out)
+{
+ int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+ void *in;
+ int err;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(pbmc_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 0);
+
+ kfree(in);
+ return err;
+}
+
+int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in)
+{
+ int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+ void *out;
+ int err;
+
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(pbmc_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 1);
+
+ kfree(out);
+ return err;
+}
+
+/* buffer[i]: buffer that priority i mapped to */
+int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer)
+{
+ int sz = MLX5_ST_SZ_BYTES(pptb_reg);
+ u32 prio_x_buff;
+ void *out;
+ void *in;
+ int prio;
+ int err;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(pptb_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0);
+ if (err)
+ goto out;
+
+ prio_x_buff = MLX5_GET(pptb_reg, out, prio_x_buff);
+ for (prio = 0; prio < 8; prio++) {
+ buffer[prio] = (u8)(prio_x_buff >> (4 * prio)) & 0xF;
+ mlx5_core_dbg(mdev, "prio %d, buffer %d\n", prio, buffer[prio]);
+ }
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
+
+int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer)
+{
+ int sz = MLX5_ST_SZ_BYTES(pptb_reg);
+ u32 prio_x_buff;
+ void *out;
+ void *in;
+ int prio;
+ int err;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* First query the pptb register */
+ MLX5_SET(pptb_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0);
+ if (err)
+ goto out;
+
+ memcpy(in, out, sz);
+ MLX5_SET(pptb_reg, in, local_port, 1);
+
+ /* Update the pm and prio_x_buff */
+ MLX5_SET(pptb_reg, in, pm, 0xFF);
+
+ prio_x_buff = 0;
+ for (prio = 0; prio < 8; prio++)
+ prio_x_buff |= (buffer[prio] << (4 * prio));
+ MLX5_SET(pptb_reg, in, prio_x_buff, prio_x_buff);
+
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 1);
+
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
new file mode 100644
index 000000000..f8cbd8194
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5E_EN_PORT_H
+#define __MLX5E_EN_PORT_H
+
+#include <linux/mlx5/driver.h>
+#include "en.h"
+
+u32 mlx5e_port_ptys2speed(u32 eth_proto_oper);
+int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
+int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
+u32 mlx5e_port_speed2linkmodes(u32 speed);
+
+int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out);
+int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in);
+int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
+int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
new file mode 100644
index 000000000..28d56e44e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "port_buffer.h"
+
+int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
+ struct mlx5e_port_buffer *port_buffer)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+ u32 total_used = 0;
+ void *buffer;
+ void *out;
+ int err;
+ int i;
+
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5e_port_query_pbmc(mdev, out);
+ if (err)
+ goto out;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]);
+ port_buffer->buffer[i].lossy =
+ MLX5_GET(bufferx_reg, buffer, lossy);
+ port_buffer->buffer[i].epsb =
+ MLX5_GET(bufferx_reg, buffer, epsb);
+ port_buffer->buffer[i].size =
+ MLX5_GET(bufferx_reg, buffer, size) << MLX5E_BUFFER_CELL_SHIFT;
+ port_buffer->buffer[i].xon =
+ MLX5_GET(bufferx_reg, buffer, xon_threshold) << MLX5E_BUFFER_CELL_SHIFT;
+ port_buffer->buffer[i].xoff =
+ MLX5_GET(bufferx_reg, buffer, xoff_threshold) << MLX5E_BUFFER_CELL_SHIFT;
+ total_used += port_buffer->buffer[i].size;
+
+ mlx5e_dbg(HW, priv, "buffer %d: size=%d, xon=%d, xoff=%d, epsb=%d, lossy=%d\n", i,
+ port_buffer->buffer[i].size,
+ port_buffer->buffer[i].xon,
+ port_buffer->buffer[i].xoff,
+ port_buffer->buffer[i].epsb,
+ port_buffer->buffer[i].lossy);
+ }
+
+ port_buffer->port_buffer_size =
+ MLX5_GET(pbmc_reg, out, port_buffer_size) << MLX5E_BUFFER_CELL_SHIFT;
+ port_buffer->spare_buffer_size =
+ port_buffer->port_buffer_size - total_used;
+
+ mlx5e_dbg(HW, priv, "total buffer size=%d, spare buffer size=%d\n",
+ port_buffer->port_buffer_size,
+ port_buffer->spare_buffer_size);
+out:
+ kfree(out);
+ return err;
+}
+
+static int port_set_buffer(struct mlx5e_priv *priv,
+ struct mlx5e_port_buffer *port_buffer)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
+ void *buffer;
+ void *in;
+ int err;
+ int i;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ err = mlx5e_port_query_pbmc(mdev, in);
+ if (err)
+ goto out;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]);
+
+ MLX5_SET(bufferx_reg, buffer, size,
+ port_buffer->buffer[i].size >> MLX5E_BUFFER_CELL_SHIFT);
+ MLX5_SET(bufferx_reg, buffer, lossy,
+ port_buffer->buffer[i].lossy);
+ MLX5_SET(bufferx_reg, buffer, xoff_threshold,
+ port_buffer->buffer[i].xoff >> MLX5E_BUFFER_CELL_SHIFT);
+ MLX5_SET(bufferx_reg, buffer, xon_threshold,
+ port_buffer->buffer[i].xon >> MLX5E_BUFFER_CELL_SHIFT);
+ }
+
+ err = mlx5e_port_set_pbmc(mdev, in);
+out:
+ kfree(in);
+ return err;
+}
+
+/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B])
+ * minimum speed value is 40Gbps
+ */
+static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
+{
+ u32 speed;
+ u32 xoff;
+ int err;
+
+ err = mlx5e_port_linkspeed(priv->mdev, &speed);
+ if (err)
+ speed = SPEED_40000;
+ speed = max_t(u32, speed, SPEED_40000);
+
+ xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100;
+
+ mlx5e_dbg(HW, priv, "%s: xoff=%d\n", __func__, xoff);
+ return xoff;
+}
+
+static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
+ u32 xoff, unsigned int max_mtu)
+{
+ int i;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ if (port_buffer->buffer[i].lossy) {
+ port_buffer->buffer[i].xoff = 0;
+ port_buffer->buffer[i].xon = 0;
+ continue;
+ }
+
+ if (port_buffer->buffer[i].size <
+ (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) {
+ pr_err("buffer_size[%d]=%d is not enough for lossless buffer\n",
+ i, port_buffer->buffer[i].size);
+ return -ENOMEM;
+ }
+
+ port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff;
+ port_buffer->buffer[i].xon =
+ port_buffer->buffer[i].xoff - max_mtu;
+ }
+
+ return 0;
+}
+
+/**
+ * update_buffer_lossy()
+ * max_mtu: netdev's max_mtu
+ * pfc_en: <input> current pfc configuration
+ * buffer: <input> current prio to buffer mapping
+ * xoff: <input> xoff value
+ * port_buffer: <output> port receive buffer configuration
+ * change: <output>
+ *
+ * Update buffer configuration based on pfc configuraiton and priority
+ * to buffer mapping.
+ * Buffer's lossy bit is changed to:
+ * lossless if there is at least one PFC enabled priority mapped to this buffer
+ * lossy if all priorities mapped to this buffer are PFC disabled
+ *
+ * Return:
+ * Return 0 if no error.
+ * Set change to true if buffer configuration is modified.
+ */
+static int update_buffer_lossy(unsigned int max_mtu,
+ u8 pfc_en, u8 *buffer, u32 xoff,
+ struct mlx5e_port_buffer *port_buffer,
+ bool *change)
+{
+ bool changed = false;
+ u8 lossy_count;
+ u8 prio_count;
+ u8 lossy;
+ int prio;
+ int err;
+ int i;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ prio_count = 0;
+ lossy_count = 0;
+
+ for (prio = 0; prio < MLX5E_MAX_PRIORITY; prio++) {
+ if (buffer[prio] != i)
+ continue;
+
+ prio_count++;
+ lossy_count += !(pfc_en & (1 << prio));
+ }
+
+ if (lossy_count == prio_count)
+ lossy = 1;
+ else /* lossy_count < prio_count */
+ lossy = 0;
+
+ if (lossy != port_buffer->buffer[i].lossy) {
+ port_buffer->buffer[i].lossy = lossy;
+ changed = true;
+ }
+ }
+
+ if (changed) {
+ err = update_xoff_threshold(port_buffer, xoff, max_mtu);
+ if (err)
+ return err;
+
+ *change = true;
+ }
+
+ return 0;
+}
+
+static int fill_pfc_en(struct mlx5_core_dev *mdev, u8 *pfc_en)
+{
+ u32 g_rx_pause, g_tx_pause;
+ int err;
+
+ err = mlx5_query_port_pause(mdev, &g_rx_pause, &g_tx_pause);
+ if (err)
+ return err;
+
+ /* If global pause enabled, set all active buffers to lossless.
+ * Otherwise, check PFC setting.
+ */
+ if (g_rx_pause || g_tx_pause)
+ *pfc_en = 0xff;
+ else
+ err = mlx5_query_port_pfc(mdev, pfc_en, NULL);
+
+ return err;
+}
+
+#define MINIMUM_MAX_MTU 9216
+int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
+ u32 change, unsigned int mtu,
+ struct ieee_pfc *pfc,
+ u32 *buffer_size,
+ u8 *prio2buffer)
+{
+ struct mlx5e_port_buffer port_buffer;
+ u32 xoff = calculate_xoff(priv, mtu);
+ bool update_prio2buffer = false;
+ u8 buffer[MLX5E_MAX_PRIORITY];
+ bool update_buffer = false;
+ unsigned int max_mtu;
+ u32 total_used = 0;
+ u8 curr_pfc_en;
+ int err;
+ int i;
+
+ mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change);
+ max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU);
+
+ err = mlx5e_port_query_buffer(priv, &port_buffer);
+ if (err)
+ return err;
+
+ if (change & MLX5E_PORT_BUFFER_CABLE_LEN) {
+ update_buffer = true;
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
+ if (err)
+ return err;
+ }
+
+ if (change & MLX5E_PORT_BUFFER_PFC) {
+ err = mlx5e_port_query_priority2buffer(priv->mdev, buffer);
+ if (err)
+ return err;
+
+ err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff,
+ &port_buffer, &update_buffer);
+ if (err)
+ return err;
+ }
+
+ if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) {
+ update_prio2buffer = true;
+ err = fill_pfc_en(priv->mdev, &curr_pfc_en);
+ if (err)
+ return err;
+
+ err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer,
+ xoff, &port_buffer, &update_buffer);
+ if (err)
+ return err;
+ }
+
+ if (change & MLX5E_PORT_BUFFER_SIZE) {
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ mlx5e_dbg(HW, priv, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]);
+ if (!port_buffer.buffer[i].lossy && !buffer_size[i]) {
+ mlx5e_dbg(HW, priv, "%s: lossless buffer[%d] size cannot be zero\n",
+ __func__, i);
+ return -EINVAL;
+ }
+
+ port_buffer.buffer[i].size = buffer_size[i];
+ total_used += buffer_size[i];
+ }
+
+ mlx5e_dbg(HW, priv, "%s: total buffer requested=%d\n", __func__, total_used);
+
+ if (total_used > port_buffer.port_buffer_size)
+ return -EINVAL;
+
+ update_buffer = true;
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
+ if (err)
+ return err;
+ }
+
+ /* Need to update buffer configuration if xoff value is changed */
+ if (!update_buffer && xoff != priv->dcbx.xoff) {
+ update_buffer = true;
+ err = update_xoff_threshold(&port_buffer, xoff, max_mtu);
+ if (err)
+ return err;
+ }
+ priv->dcbx.xoff = xoff;
+
+ /* Apply the settings */
+ if (update_buffer) {
+ err = port_set_buffer(priv, &port_buffer);
+ if (err)
+ return err;
+ }
+
+ if (update_prio2buffer)
+ err = mlx5e_port_set_priority2buffer(priv->mdev, prio2buffer);
+
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
new file mode 100644
index 000000000..34f55b81a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_EN_PORT_BUFFER_H__
+#define __MLX5_EN_PORT_BUFFER_H__
+
+#include "en.h"
+#include "port.h"
+
+#define MLX5E_MAX_BUFFER 8
+#define MLX5E_BUFFER_CELL_SHIFT 7
+#define MLX5E_DEFAULT_CABLE_LEN 7 /* 7 meters */
+
+#define MLX5_BUFFER_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, pcam_reg) && \
+ MLX5_CAP_PCAM_REG(mdev, pbmc) && \
+ MLX5_CAP_PCAM_REG(mdev, pptb))
+
+enum {
+ MLX5E_PORT_BUFFER_CABLE_LEN = BIT(0),
+ MLX5E_PORT_BUFFER_PFC = BIT(1),
+ MLX5E_PORT_BUFFER_PRIO2BUFFER = BIT(2),
+ MLX5E_PORT_BUFFER_SIZE = BIT(3),
+};
+
+struct mlx5e_bufferx_reg {
+ u8 lossy;
+ u8 epsb;
+ u32 size;
+ u32 xoff;
+ u32 xon;
+};
+
+struct mlx5e_port_buffer {
+ u32 port_buffer_size;
+ u32 spare_buffer_size;
+ struct mlx5e_bufferx_reg buffer[MLX5E_MAX_BUFFER];
+};
+
+int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
+ u32 change, unsigned int mtu,
+ struct ieee_pfc *pfc,
+ u32 *buffer_size,
+ u8 *prio2buffer);
+
+int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
+ struct mlx5e_port_buffer *port_buffer);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
new file mode 100644
index 000000000..599114ab7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -0,0 +1,325 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bpf_trace.h>
+#include "en/xdp.h"
+
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
+{
+ int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM;
+
+ /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
+ * The condition checked in mlx5e_rx_is_linear_skb is:
+ * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1)
+ * (Note that hw_mtu == sw_mtu + hard_mtu.)
+ * What is returned from this function is:
+ * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2)
+ * After assigning sw_mtu := max_mtu, the left side of (1) turns to
+ * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
+ * because both PAGE_SIZE and S are already aligned. Any number greater
+ * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
+ * so max_mtu is the maximum MTU allowed.
+ */
+
+ return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
+}
+
+static inline bool
+mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
+ struct xdp_buff *xdp)
+{
+ struct mlx5e_xdp_info xdpi;
+
+ xdpi.xdpf = convert_to_xdp_frame(xdp);
+ if (unlikely(!xdpi.xdpf))
+ return false;
+ xdpi.dma_addr = di->addr + (xdpi.xdpf->data - (void *)xdpi.xdpf);
+ dma_sync_single_for_device(sq->pdev, xdpi.dma_addr,
+ xdpi.xdpf->len, PCI_DMA_TODEVICE);
+ xdpi.di = *di;
+
+ return mlx5e_xmit_xdp_frame(sq, &xdpi);
+}
+
+/* returns true if packet was consumed by xdp */
+bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
+ void *va, u16 *rx_headroom, u32 *len)
+{
+ struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
+ struct xdp_buff xdp;
+ u32 act;
+ int err;
+
+ if (!prog)
+ return false;
+
+ xdp.data = va + *rx_headroom;
+ xdp_set_data_meta_invalid(&xdp);
+ xdp.data_end = xdp.data + *len;
+ xdp.data_hard_start = va;
+ xdp.rxq = &rq->xdp_rxq;
+
+ act = bpf_prog_run_xdp(prog, &xdp);
+ switch (act) {
+ case XDP_PASS:
+ *rx_headroom = xdp.data - xdp.data_hard_start;
+ *len = xdp.data_end - xdp.data;
+ return false;
+ case XDP_TX:
+ if (unlikely(!mlx5e_xmit_xdp_buff(&rq->xdpsq, di, &xdp)))
+ goto xdp_abort;
+ __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
+ return true;
+ case XDP_REDIRECT:
+ /* When XDP enabled then page-refcnt==1 here */
+ err = xdp_do_redirect(rq->netdev, &xdp, prog);
+ if (unlikely(err))
+ goto xdp_abort;
+ __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
+ rq->xdpsq.redirect_flush = true;
+ mlx5e_page_dma_unmap(rq, di);
+ rq->stats->xdp_redirect++;
+ return true;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fall through */
+ case XDP_ABORTED:
+xdp_abort:
+ trace_xdp_exception(rq->netdev, prog, act);
+ /* fall through */
+ case XDP_DROP:
+ rq->stats->xdp_drop++;
+ return true;
+ }
+}
+
+bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
+ struct mlx5_wqe_data_seg *dseg = wqe->data;
+
+ struct xdp_frame *xdpf = xdpi->xdpf;
+ dma_addr_t dma_addr = xdpi->dma_addr;
+ unsigned int dma_len = xdpf->len;
+
+ struct mlx5e_xdpsq_stats *stats = sq->stats;
+
+ prefetchw(wqe);
+
+ if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
+ stats->err++;
+ return false;
+ }
+
+ if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) {
+ if (sq->doorbell) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ sq->doorbell = false;
+ }
+ stats->full++;
+ return false;
+ }
+
+ cseg->fm_ce_se = 0;
+
+ /* copy the inline part if required */
+ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+ memcpy(eseg->inline_hdr.start, xdpf->data, MLX5E_XDP_MIN_INLINE);
+ eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
+ dma_len -= MLX5E_XDP_MIN_INLINE;
+ dma_addr += MLX5E_XDP_MIN_INLINE;
+ dseg++;
+ }
+
+ /* write the dma part */
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->byte_count = cpu_to_be32(dma_len);
+
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
+
+ /* move page to reference to sq responsibility,
+ * and mark so it's not put back in page-cache.
+ */
+ sq->db.xdpi[pi] = *xdpi;
+ sq->pc++;
+
+ sq->doorbell = true;
+
+ stats->xmit++;
+ return true;
+}
+
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
+{
+ struct mlx5e_xdpsq *sq;
+ struct mlx5_cqe64 *cqe;
+ struct mlx5e_rq *rq;
+ bool is_redirect;
+ u16 sqcc;
+ int i;
+
+ sq = container_of(cq, struct mlx5e_xdpsq, cq);
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+ return false;
+
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
+ if (!cqe)
+ return false;
+
+ is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
+ rq = container_of(sq, struct mlx5e_rq, xdpsq);
+
+ /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+ * otherwise a cq overrun may occur
+ */
+ sqcc = sq->cc;
+
+ i = 0;
+ do {
+ u16 wqe_counter;
+ bool last_wqe;
+
+ mlx5_cqwq_pop(&cq->wq);
+
+ wqe_counter = be16_to_cpu(cqe->wqe_counter);
+
+ do {
+ u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+ struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci];
+
+ last_wqe = (sqcc == wqe_counter);
+ sqcc++;
+
+ if (is_redirect) {
+ dma_unmap_single(sq->pdev, xdpi->dma_addr,
+ xdpi->xdpf->len, DMA_TO_DEVICE);
+ xdp_return_frame(xdpi->xdpf);
+ } else {
+ /* Recycle RX page */
+ mlx5e_page_release(rq, &xdpi->di, true);
+ }
+ } while (!last_wqe);
+ } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+
+ sq->stats->cqes += i;
+
+ mlx5_cqwq_update_db_record(&cq->wq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ sq->cc = sqcc;
+ return (i == MLX5E_TX_CQ_POLL_BUDGET);
+}
+
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5e_rq *rq;
+ bool is_redirect;
+
+ is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
+ rq = is_redirect ? NULL : container_of(sq, struct mlx5e_rq, xdpsq);
+
+ while (sq->cc != sq->pc) {
+ u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
+ struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci];
+
+ sq->cc++;
+
+ if (is_redirect) {
+ dma_unmap_single(sq->pdev, xdpi->dma_addr,
+ xdpi->xdpf->len, DMA_TO_DEVICE);
+ xdp_return_frame(xdpi->xdpf);
+ } else {
+ /* Recycle RX page */
+ mlx5e_page_release(rq, &xdpi->di, false);
+ }
+ }
+}
+
+int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_xdpsq *sq;
+ int drops = 0;
+ int sq_num;
+ int i;
+
+ /* this flag is sufficient, no need to test internal sq state */
+ if (unlikely(!mlx5e_xdp_tx_is_enabled(priv)))
+ return -ENETDOWN;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ sq_num = smp_processor_id();
+
+ if (unlikely(sq_num >= priv->channels.num))
+ return -ENXIO;
+
+ sq = &priv->channels.c[sq_num]->xdpsq;
+
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+ struct mlx5e_xdp_info xdpi;
+
+ xdpi.dma_addr = dma_map_single(sq->pdev, xdpf->data, xdpf->len,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, xdpi.dma_addr))) {
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ continue;
+ }
+
+ xdpi.xdpf = xdpf;
+
+ if (unlikely(!mlx5e_xmit_xdp_frame(sq, &xdpi))) {
+ dma_unmap_single(sq->pdev, xdpi.dma_addr,
+ xdpf->len, DMA_TO_DEVICE);
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ }
+ }
+
+ if (flags & XDP_XMIT_FLUSH)
+ mlx5e_xmit_xdp_doorbell(sq);
+
+ return n - drops;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
new file mode 100644
index 000000000..827ceef5f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_EN_XDP_H__
+#define __MLX5_EN_XDP_H__
+
+#include "en.h"
+
+#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
+#define MLX5E_XDP_TX_DS_COUNT \
+ ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */)
+
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params);
+bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
+ void *va, u16 *rx_headroom, u32 *len);
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
+
+bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi);
+int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
+
+static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv)
+{
+ set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+}
+
+static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv)
+{
+ clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+ /* let other device's napi(s) see our new state */
+ synchronize_rcu();
+}
+
+static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
+{
+ return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+}
+
+static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc - 1); /* last pi */
+
+ wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+
+ mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &wqe->ctrl);
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile
new file mode 100644
index 000000000..d8e17110f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
new file mode 100644
index 000000000..1dd225380
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_EN_ACCEL_H__
+#define __MLX5E_EN_ACCEL_H__
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/tls_rxtx.h"
+#include "en.h"
+
+static inline void
+mlx5e_udp_gso_handle_tx_skb(struct sk_buff *skb)
+{
+ int payload_len = skb_shinfo(skb)->gso_size + sizeof(struct udphdr);
+
+ udp_hdr(skb)->len = htons(payload_len);
+}
+
+static inline struct sk_buff *
+mlx5e_accel_handle_tx(struct sk_buff *skb,
+ struct mlx5e_txqsq *sq,
+ struct net_device *dev,
+ struct mlx5e_tx_wqe **wqe,
+ u16 *pi)
+{
+#ifdef CONFIG_MLX5_EN_TLS
+ if (test_bit(MLX5E_SQ_STATE_TLS, &sq->state)) {
+ skb = mlx5e_tls_handle_tx_skb(dev, sq, skb, wqe, pi);
+ if (unlikely(!skb))
+ return NULL;
+ }
+#endif
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) {
+ skb = mlx5e_ipsec_handle_tx_skb(dev, *wqe, skb);
+ if (unlikely(!skb))
+ return NULL;
+ }
+#endif
+
+ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+ mlx5e_udp_gso_handle_tx_skb(skb);
+
+ return skb;
+}
+
+#endif /* __MLX5E_EN_ACCEL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
new file mode 100644
index 000000000..c467f5e98
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -0,0 +1,547 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <crypto/internal/geniv.h>
+#include <crypto/aead.h>
+#include <linux/inetdevice.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+
+#include "en.h"
+#include "en_accel/ipsec.h"
+#include "en_accel/ipsec_rxtx.h"
+
+
+static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
+{
+ struct mlx5e_ipsec_sa_entry *sa;
+
+ if (!x)
+ return NULL;
+
+ sa = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+ if (!sa)
+ return NULL;
+
+ WARN_ON(sa->x != x);
+ return sa;
+}
+
+struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
+ unsigned int handle)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+ struct xfrm_state *ret = NULL;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(ipsec->sadb_rx, sa_entry, hlist, handle)
+ if (sa_entry->handle == handle) {
+ ret = sa_entry->x;
+ xfrm_state_hold(ret);
+ break;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+ unsigned long flags;
+ int ret;
+
+ ret = ida_simple_get(&ipsec->halloc, 1, 0, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+
+ spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+ sa_entry->handle = ret;
+ hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle);
+ spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+
+ return 0;
+}
+
+static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+ hash_del_rcu(&sa_entry->hlist);
+ spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+}
+
+static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+
+ /* xfrm already doing sync rcu between del and free callbacks */
+
+ ida_simple_remove(&ipsec->halloc, sa_entry->handle);
+}
+
+static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+ struct xfrm_replay_state_esn *replay_esn;
+ u32 seq_bottom;
+ u8 overlap;
+ u32 *esn;
+
+ if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) {
+ sa_entry->esn_state.trigger = 0;
+ return false;
+ }
+
+ replay_esn = sa_entry->x->replay_esn;
+ seq_bottom = replay_esn->seq - replay_esn->replay_window + 1;
+ overlap = sa_entry->esn_state.overlap;
+
+ sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x,
+ htonl(seq_bottom));
+ esn = &sa_entry->esn_state.esn;
+
+ sa_entry->esn_state.trigger = 1;
+ if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
+ ++(*esn);
+ sa_entry->esn_state.overlap = 0;
+ return true;
+ } else if (unlikely(!overlap &&
+ (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) {
+ sa_entry->esn_state.overlap = 1;
+ return true;
+ }
+
+ return false;
+}
+
+static void
+mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
+ struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ struct xfrm_state *x = sa_entry->x;
+ struct aes_gcm_keymat *aes_gcm = &attrs->keymat.aes_gcm;
+ struct aead_geniv_ctx *geniv_ctx;
+ struct crypto_aead *aead;
+ unsigned int crypto_data_len, key_len;
+ int ivsize;
+
+ memset(attrs, 0, sizeof(*attrs));
+
+ /* key */
+ crypto_data_len = (x->aead->alg_key_len + 7) / 8;
+ key_len = crypto_data_len - 4; /* 4 bytes salt at end */
+
+ memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len);
+ aes_gcm->key_len = key_len * 8;
+
+ /* salt and seq_iv */
+ aead = x->data;
+ geniv_ctx = crypto_aead_ctx(aead);
+ ivsize = crypto_aead_ivsize(aead);
+ memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize);
+ memcpy(&aes_gcm->salt, x->aead->alg_key + key_len,
+ sizeof(aes_gcm->salt));
+
+ /* iv len */
+ aes_gcm->icv_len = x->aead->alg_icv_len;
+
+ /* esn */
+ if (sa_entry->esn_state.trigger) {
+ attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
+ attrs->esn = sa_entry->esn_state.esn;
+ if (sa_entry->esn_state.overlap)
+ attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+ }
+
+ /* rx handle */
+ attrs->sa_handle = sa_entry->handle;
+
+ /* algo type */
+ attrs->keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
+
+ /* action */
+ attrs->action = (!(x->xso.flags & XFRM_OFFLOAD_INBOUND)) ?
+ MLX5_ACCEL_ESP_ACTION_ENCRYPT :
+ MLX5_ACCEL_ESP_ACTION_DECRYPT;
+ /* flags */
+ attrs->flags |= (x->props.mode == XFRM_MODE_TRANSPORT) ?
+ MLX5_ACCEL_ESP_FLAGS_TRANSPORT :
+ MLX5_ACCEL_ESP_FLAGS_TUNNEL;
+}
+
+static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
+{
+ struct net_device *netdev = x->xso.dev;
+ struct mlx5e_priv *priv;
+
+ priv = netdev_priv(netdev);
+
+ if (x->props.aalgo != SADB_AALG_NONE) {
+ netdev_info(netdev, "Cannot offload authenticated xfrm states\n");
+ return -EINVAL;
+ }
+ if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) {
+ netdev_info(netdev, "Only AES-GCM-ICV16 xfrm state may be offloaded\n");
+ return -EINVAL;
+ }
+ if (x->props.calgo != SADB_X_CALG_NONE) {
+ netdev_info(netdev, "Cannot offload compressed xfrm states\n");
+ return -EINVAL;
+ }
+ if (x->props.flags & XFRM_STATE_ESN &&
+ !(mlx5_accel_ipsec_device_caps(priv->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_ESN)) {
+ netdev_info(netdev, "Cannot offload ESN xfrm states\n");
+ return -EINVAL;
+ }
+ if (x->props.family != AF_INET &&
+ x->props.family != AF_INET6) {
+ netdev_info(netdev, "Only IPv4/6 xfrm states may be offloaded\n");
+ return -EINVAL;
+ }
+ if (x->props.mode != XFRM_MODE_TRANSPORT &&
+ x->props.mode != XFRM_MODE_TUNNEL) {
+ dev_info(&netdev->dev, "Only transport and tunnel xfrm states may be offloaded\n");
+ return -EINVAL;
+ }
+ if (x->id.proto != IPPROTO_ESP) {
+ netdev_info(netdev, "Only ESP xfrm state may be offloaded\n");
+ return -EINVAL;
+ }
+ if (x->encap) {
+ netdev_info(netdev, "Encapsulated xfrm state may not be offloaded\n");
+ return -EINVAL;
+ }
+ if (!x->aead) {
+ netdev_info(netdev, "Cannot offload xfrm states without aead\n");
+ return -EINVAL;
+ }
+ if (x->aead->alg_icv_len != 128) {
+ netdev_info(netdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n");
+ return -EINVAL;
+ }
+ if ((x->aead->alg_key_len != 128 + 32) &&
+ (x->aead->alg_key_len != 256 + 32)) {
+ netdev_info(netdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+ return -EINVAL;
+ }
+ if (x->tfcpad) {
+ netdev_info(netdev, "Cannot offload xfrm states with tfc padding\n");
+ return -EINVAL;
+ }
+ if (!x->geniv) {
+ netdev_info(netdev, "Cannot offload xfrm states without geniv\n");
+ return -EINVAL;
+ }
+ if (strcmp(x->geniv, "seqiv")) {
+ netdev_info(netdev, "Cannot offload xfrm states with geniv other than seqiv\n");
+ return -EINVAL;
+ }
+ if (x->props.family == AF_INET6 &&
+ !(mlx5_accel_ipsec_device_caps(priv->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_IPV6)) {
+ netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int mlx5e_xfrm_add_state(struct xfrm_state *x)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
+ struct net_device *netdev = x->xso.dev;
+ struct mlx5_accel_esp_xfrm_attrs attrs;
+ struct mlx5e_priv *priv;
+ __be32 saddr[4] = {0}, daddr[4] = {0}, spi;
+ bool is_ipv6 = false;
+ int err;
+
+ priv = netdev_priv(netdev);
+
+ err = mlx5e_xfrm_validate_state(x);
+ if (err)
+ return err;
+
+ sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
+ if (!sa_entry) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ sa_entry->x = x;
+ sa_entry->ipsec = priv->ipsec;
+
+ /* Add the SA to handle processed incoming packets before the add SA
+ * completion was received
+ */
+ if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
+ err = mlx5e_ipsec_sadb_rx_add(sa_entry);
+ if (err) {
+ netdev_info(netdev, "Failed adding to SADB_RX: %d\n", err);
+ goto err_entry;
+ }
+ } else {
+ sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ?
+ mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv;
+ }
+
+ /* check esn */
+ mlx5e_ipsec_update_esn_state(sa_entry);
+
+ /* create xfrm */
+ mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs);
+ sa_entry->xfrm =
+ mlx5_accel_esp_create_xfrm(priv->mdev, &attrs,
+ MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA);
+ if (IS_ERR(sa_entry->xfrm)) {
+ err = PTR_ERR(sa_entry->xfrm);
+ goto err_sadb_rx;
+ }
+
+ /* create hw context */
+ if (x->props.family == AF_INET) {
+ saddr[3] = x->props.saddr.a4;
+ daddr[3] = x->id.daddr.a4;
+ } else {
+ memcpy(saddr, x->props.saddr.a6, sizeof(saddr));
+ memcpy(daddr, x->id.daddr.a6, sizeof(daddr));
+ is_ipv6 = true;
+ }
+ spi = x->id.spi;
+ sa_entry->hw_context =
+ mlx5_accel_esp_create_hw_context(priv->mdev,
+ sa_entry->xfrm,
+ saddr, daddr, spi,
+ is_ipv6);
+ if (IS_ERR(sa_entry->hw_context)) {
+ err = PTR_ERR(sa_entry->hw_context);
+ goto err_xfrm;
+ }
+
+ x->xso.offload_handle = (unsigned long)sa_entry;
+ goto out;
+
+err_xfrm:
+ mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
+err_sadb_rx:
+ if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
+ mlx5e_ipsec_sadb_rx_del(sa_entry);
+ mlx5e_ipsec_sadb_rx_free(sa_entry);
+ }
+err_entry:
+ kfree(sa_entry);
+out:
+ return err;
+}
+
+static void mlx5e_xfrm_del_state(struct xfrm_state *x)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+
+ if (!sa_entry)
+ return;
+
+ if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
+ mlx5e_ipsec_sadb_rx_del(sa_entry);
+}
+
+static void mlx5e_xfrm_free_state(struct xfrm_state *x)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+
+ if (!sa_entry)
+ return;
+
+ if (sa_entry->hw_context) {
+ flush_workqueue(sa_entry->ipsec->wq);
+ mlx5_accel_esp_free_hw_context(sa_entry->hw_context);
+ mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
+ }
+
+ if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
+ mlx5e_ipsec_sadb_rx_free(sa_entry);
+
+ kfree(sa_entry);
+}
+
+int mlx5e_ipsec_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec *ipsec = NULL;
+
+ if (!MLX5_IPSEC_DEV(priv->mdev)) {
+ netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
+ return 0;
+ }
+
+ ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
+ if (!ipsec)
+ return -ENOMEM;
+
+ hash_init(ipsec->sadb_rx);
+ spin_lock_init(&ipsec->sadb_rx_lock);
+ ida_init(&ipsec->halloc);
+ ipsec->en_priv = priv;
+ ipsec->en_priv->ipsec = ipsec;
+ ipsec->no_trailer = !!(mlx5_accel_ipsec_device_caps(priv->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER);
+ ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0,
+ priv->netdev->name);
+ if (!ipsec->wq) {
+ kfree(ipsec);
+ return -ENOMEM;
+ }
+ netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
+ return 0;
+}
+
+void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ipsec *ipsec = priv->ipsec;
+
+ if (!ipsec)
+ return;
+
+ drain_workqueue(ipsec->wq);
+ destroy_workqueue(ipsec->wq);
+
+ ida_destroy(&ipsec->halloc);
+ kfree(ipsec);
+ priv->ipsec = NULL;
+}
+
+static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+ if (x->props.family == AF_INET) {
+ /* Offload with IPv4 options is not supported yet */
+ if (ip_hdr(skb)->ihl > 5)
+ return false;
+ } else {
+ /* Offload with IPv6 extension headers is not support yet */
+ if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+ return false;
+ }
+
+ return true;
+}
+
+struct mlx5e_ipsec_modify_state_work {
+ struct work_struct work;
+ struct mlx5_accel_esp_xfrm_attrs attrs;
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+};
+
+static void _update_xfrm_state(struct work_struct *work)
+{
+ int ret;
+ struct mlx5e_ipsec_modify_state_work *modify_work =
+ container_of(work, struct mlx5e_ipsec_modify_state_work, work);
+ struct mlx5e_ipsec_sa_entry *sa_entry = modify_work->sa_entry;
+
+ ret = mlx5_accel_esp_modify_xfrm(sa_entry->xfrm,
+ &modify_work->attrs);
+ if (ret)
+ netdev_warn(sa_entry->ipsec->en_priv->netdev,
+ "Not an IPSec offload device\n");
+
+ kfree(modify_work);
+}
+
+static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
+{
+ struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+ struct mlx5e_ipsec_modify_state_work *modify_work;
+ bool need_update;
+
+ if (!sa_entry)
+ return;
+
+ need_update = mlx5e_ipsec_update_esn_state(sa_entry);
+ if (!need_update)
+ return;
+
+ modify_work = kzalloc(sizeof(*modify_work), GFP_ATOMIC);
+ if (!modify_work)
+ return;
+
+ mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs);
+ modify_work->sa_entry = sa_entry;
+
+ INIT_WORK(&modify_work->work, _update_xfrm_state);
+ WARN_ON(!queue_work(sa_entry->ipsec->wq, &modify_work->work));
+}
+
+static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
+ .xdo_dev_state_add = mlx5e_xfrm_add_state,
+ .xdo_dev_state_delete = mlx5e_xfrm_del_state,
+ .xdo_dev_state_free = mlx5e_xfrm_free_state,
+ .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok,
+ .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
+};
+
+void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct net_device *netdev = priv->netdev;
+
+ if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) ||
+ !MLX5_CAP_ETH(mdev, swp)) {
+ mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
+ return;
+ }
+
+ mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
+ netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
+ netdev->features |= NETIF_F_HW_ESP;
+ netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+ if (!MLX5_CAP_ETH(mdev, swp_csum)) {
+ mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n");
+ return;
+ }
+
+ netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
+ netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
+
+ if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_LSO) ||
+ !MLX5_CAP_ETH(mdev, swp_lso)) {
+ mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
+ return;
+ }
+
+ mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
+ netdev->features |= NETIF_F_GSO_ESP;
+ netdev->hw_features |= NETIF_F_GSO_ESP;
+ netdev->hw_enc_features |= NETIF_F_GSO_ESP;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
new file mode 100644
index 000000000..93bf10e65
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_IPSEC_H__
+#define __MLX5E_IPSEC_H__
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+
+#include <linux/mlx5/device.h>
+#include <net/xfrm.h>
+#include <linux/idr.h>
+
+#include "accel/ipsec.h"
+
+#define MLX5E_IPSEC_SADB_RX_BITS 10
+#define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L
+
+struct mlx5e_priv;
+
+struct mlx5e_ipsec_sw_stats {
+ atomic64_t ipsec_rx_drop_sp_alloc;
+ atomic64_t ipsec_rx_drop_sadb_miss;
+ atomic64_t ipsec_rx_drop_syndrome;
+ atomic64_t ipsec_tx_drop_bundle;
+ atomic64_t ipsec_tx_drop_no_state;
+ atomic64_t ipsec_tx_drop_not_ip;
+ atomic64_t ipsec_tx_drop_trailer;
+ atomic64_t ipsec_tx_drop_metadata;
+};
+
+struct mlx5e_ipsec_stats {
+ u64 ipsec_dec_in_packets;
+ u64 ipsec_dec_out_packets;
+ u64 ipsec_dec_bypass_packets;
+ u64 ipsec_enc_in_packets;
+ u64 ipsec_enc_out_packets;
+ u64 ipsec_enc_bypass_packets;
+ u64 ipsec_dec_drop_packets;
+ u64 ipsec_dec_auth_fail_packets;
+ u64 ipsec_enc_drop_packets;
+ u64 ipsec_add_sa_success;
+ u64 ipsec_add_sa_fail;
+ u64 ipsec_del_sa_success;
+ u64 ipsec_del_sa_fail;
+ u64 ipsec_cmd_drop;
+};
+
+struct mlx5e_ipsec {
+ struct mlx5e_priv *en_priv;
+ DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS);
+ bool no_trailer;
+ spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */
+ struct ida halloc;
+ struct mlx5e_ipsec_sw_stats sw_stats;
+ struct mlx5e_ipsec_stats stats;
+ struct workqueue_struct *wq;
+};
+
+struct mlx5e_ipsec_esn_state {
+ u32 esn;
+ u8 trigger: 1;
+ u8 overlap: 1;
+};
+
+struct mlx5e_ipsec_sa_entry {
+ struct hlist_node hlist; /* Item in SADB_RX hashtable */
+ struct mlx5e_ipsec_esn_state esn_state;
+ unsigned int handle; /* Handle in SADB_RX */
+ struct xfrm_state *x;
+ struct mlx5e_ipsec *ipsec;
+ struct mlx5_accel_esp_xfrm *xfrm;
+ void *hw_context;
+ void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x,
+ struct xfrm_offload *xo);
+};
+
+void mlx5e_ipsec_build_inverse_table(void);
+int mlx5e_ipsec_init(struct mlx5e_priv *priv);
+void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv);
+void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv);
+
+int mlx5e_ipsec_get_count(struct mlx5e_priv *priv);
+int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data);
+void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv);
+int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data);
+
+struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev,
+ unsigned int handle);
+
+#else
+
+static inline void mlx5e_ipsec_build_inverse_table(void)
+{
+}
+
+static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv)
+{
+ return 0;
+}
+
+static inline void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
+{
+}
+
+static inline void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+{
+}
+
+static inline int mlx5e_ipsec_get_count(struct mlx5e_priv *priv)
+{
+ return 0;
+}
+
+static inline int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv,
+ uint8_t *data)
+{
+ return 0;
+}
+
+static inline void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv)
+{
+}
+
+static inline int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+ return 0;
+}
+
+#endif
+
+#endif /* __MLX5E_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
new file mode 100644
index 000000000..128a82b1d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <crypto/aead.h>
+#include <net/xfrm.h>
+#include <net/esp.h>
+
+#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/ipsec.h"
+#include "accel/accel.h"
+#include "en.h"
+
+enum {
+ MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11,
+ MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12,
+ MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO = 0x17,
+};
+
+struct mlx5e_ipsec_rx_metadata {
+ unsigned char nexthdr;
+ __be32 sa_handle;
+} __packed;
+
+enum {
+ MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8,
+ MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9,
+};
+
+struct mlx5e_ipsec_tx_metadata {
+ __be16 mss_inv; /* 1/MSS in 16bit fixed point, only for LSO */
+ __be16 seq; /* LSBs of the first TCP seq, only for LSO */
+ u8 esp_next_proto; /* Next protocol of ESP */
+} __packed;
+
+struct mlx5e_ipsec_metadata {
+ unsigned char syndrome;
+ union {
+ unsigned char raw[5];
+ /* from FPGA to host, on successful decrypt */
+ struct mlx5e_ipsec_rx_metadata rx;
+ /* from host to FPGA */
+ struct mlx5e_ipsec_tx_metadata tx;
+ } __packed content;
+ /* packet type ID field */
+ __be16 ethertype;
+} __packed;
+
+#define MAX_LSO_MSS 2048
+
+/* Pre-calculated (Q0.16) fixed-point inverse 1/x function */
+static __be16 mlx5e_ipsec_inverse_table[MAX_LSO_MSS];
+
+static inline __be16 mlx5e_ipsec_mss_inv(struct sk_buff *skb)
+{
+ return mlx5e_ipsec_inverse_table[skb_shinfo(skb)->gso_size];
+}
+
+static struct mlx5e_ipsec_metadata *mlx5e_ipsec_add_metadata(struct sk_buff *skb)
+{
+ struct mlx5e_ipsec_metadata *mdata;
+ struct ethhdr *eth;
+
+ if (unlikely(skb_cow_head(skb, sizeof(*mdata))))
+ return ERR_PTR(-ENOMEM);
+
+ eth = (struct ethhdr *)skb_push(skb, sizeof(*mdata));
+ skb->mac_header -= sizeof(*mdata);
+ mdata = (struct mlx5e_ipsec_metadata *)(eth + 1);
+
+ memmove(skb->data, skb->data + sizeof(*mdata),
+ 2 * ETH_ALEN);
+
+ eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
+
+ memset(mdata->content.raw, 0, sizeof(mdata->content.raw));
+ return mdata;
+}
+
+static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)
+{
+ unsigned int alen = crypto_aead_authsize(x->data);
+ struct ipv6hdr *ipv6hdr = ipv6_hdr(skb);
+ struct iphdr *ipv4hdr = ip_hdr(skb);
+ unsigned int trailer_len;
+ u8 plen;
+ int ret;
+
+ ret = skb_copy_bits(skb, skb->len - alen - 2, &plen, 1);
+ if (unlikely(ret))
+ return ret;
+
+ trailer_len = alen + plen + 2;
+
+ pskb_trim(skb, skb->len - trailer_len);
+ if (skb->protocol == htons(ETH_P_IP)) {
+ ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len);
+ ip_send_check(ipv4hdr);
+ } else {
+ ipv6hdr->payload_len = htons(ntohs(ipv6hdr->payload_len) -
+ trailer_len);
+ }
+ return 0;
+}
+
+static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg, u8 mode,
+ struct xfrm_offload *xo)
+{
+ u8 proto;
+
+ /* Tunnel Mode:
+ * SWP: OutL3 InL3 InL4
+ * Pkt: MAC IP ESP IP L4
+ *
+ * Transport Mode:
+ * SWP: OutL3 InL4
+ * InL3
+ * Pkt: MAC IP ESP L4
+ *
+ * Offsets are in 2-byte words, counting from start of frame
+ */
+ eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
+ if (skb->protocol == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
+
+ if (mode == XFRM_MODE_TUNNEL) {
+ eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+ if (xo->proto == IPPROTO_IPV6) {
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ proto = inner_ipv6_hdr(skb)->nexthdr;
+ } else {
+ proto = inner_ip_hdr(skb)->protocol;
+ }
+ } else {
+ eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
+ if (skb->protocol == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ proto = xo->proto;
+ }
+ switch (proto) {
+ case IPPROTO_UDP:
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+ /* Fall through */
+ case IPPROTO_TCP:
+ eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+ break;
+ }
+}
+
+void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
+ struct xfrm_offload *xo)
+{
+ struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
+ __u32 oseq = replay_esn->oseq;
+ int iv_offset;
+ __be64 seqno;
+ u32 seq_hi;
+
+ if (unlikely(skb_is_gso(skb) && oseq < MLX5E_IPSEC_ESN_SCOPE_MID &&
+ MLX5E_IPSEC_ESN_SCOPE_MID < (oseq - skb_shinfo(skb)->gso_segs))) {
+ seq_hi = xo->seq.hi - 1;
+ } else {
+ seq_hi = xo->seq.hi;
+ }
+
+ /* Place the SN in the IV field */
+ seqno = cpu_to_be64(xo->seq.low + ((u64)seq_hi << 32));
+ iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr);
+ skb_store_bits(skb, iv_offset, &seqno, 8);
+}
+
+void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
+ struct xfrm_offload *xo)
+{
+ int iv_offset;
+ __be64 seqno;
+
+ /* Place the SN in the IV field */
+ seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
+ iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr);
+ skb_store_bits(skb, iv_offset, &seqno, 8);
+}
+
+static void mlx5e_ipsec_set_metadata(struct sk_buff *skb,
+ struct mlx5e_ipsec_metadata *mdata,
+ struct xfrm_offload *xo)
+{
+ struct ip_esp_hdr *esph;
+ struct tcphdr *tcph;
+
+ if (skb_is_gso(skb)) {
+ /* Add LSO metadata indication */
+ esph = ip_esp_hdr(skb);
+ tcph = inner_tcp_hdr(skb);
+ netdev_dbg(skb->dev, " Offloading GSO packet outer L3 %u; L4 %u; Inner L3 %u; L4 %u\n",
+ skb->network_header,
+ skb->transport_header,
+ skb->inner_network_header,
+ skb->inner_transport_header);
+ netdev_dbg(skb->dev, " Offloading GSO packet of len %u; mss %u; TCP sp %u dp %u seq 0x%x ESP seq 0x%x\n",
+ skb->len, skb_shinfo(skb)->gso_size,
+ ntohs(tcph->source), ntohs(tcph->dest),
+ ntohl(tcph->seq), ntohl(esph->seq_no));
+ mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP;
+ mdata->content.tx.mss_inv = mlx5e_ipsec_mss_inv(skb);
+ mdata->content.tx.seq = htons(ntohl(tcph->seq) & 0xFFFF);
+ } else {
+ mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD;
+ }
+ mdata->content.tx.esp_next_proto = xo->proto;
+
+ netdev_dbg(skb->dev, " TX metadata syndrome %u proto %u mss_inv %04x seq %04x\n",
+ mdata->syndrome, mdata->content.tx.esp_next_proto,
+ ntohs(mdata->content.tx.mss_inv),
+ ntohs(mdata->content.tx.seq));
+}
+
+struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+ struct mlx5e_tx_wqe *wqe,
+ struct sk_buff *skb)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct mlx5e_ipsec_metadata *mdata;
+ struct mlx5e_ipsec_sa_entry *sa_entry;
+ struct xfrm_state *x;
+
+ if (!xo)
+ return skb;
+
+ if (unlikely(skb->sp->len != 1)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle);
+ goto drop;
+ }
+
+ x = xfrm_input_state(skb);
+ if (unlikely(!x)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_no_state);
+ goto drop;
+ }
+
+ if (unlikely(!x->xso.offload_handle ||
+ (skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_IPV6)))) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_not_ip);
+ goto drop;
+ }
+
+ if (!skb_is_gso(skb))
+ if (unlikely(mlx5e_ipsec_remove_trailer(skb, x))) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer);
+ goto drop;
+ }
+ mdata = mlx5e_ipsec_add_metadata(skb);
+ if (IS_ERR(mdata)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
+ goto drop;
+ }
+ mlx5e_ipsec_set_swp(skb, &wqe->eth, x->props.mode, xo);
+ sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+ sa_entry->set_iv_op(skb, x, xo);
+ mlx5e_ipsec_set_metadata(skb, mdata, xo);
+
+ return skb;
+
+drop:
+ kfree_skb(skb);
+ return NULL;
+}
+
+static inline struct xfrm_state *
+mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
+ struct mlx5e_ipsec_metadata *mdata)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct xfrm_offload *xo;
+ struct xfrm_state *xs;
+ u32 sa_handle;
+
+ skb->sp = secpath_dup(skb->sp);
+ if (unlikely(!skb->sp)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc);
+ return NULL;
+ }
+
+ sa_handle = be32_to_cpu(mdata->content.rx.sa_handle);
+ xs = mlx5e_ipsec_sadb_rx_lookup(priv->ipsec, sa_handle);
+ if (unlikely(!xs)) {
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sadb_miss);
+ return NULL;
+ }
+
+ skb->sp->xvec[skb->sp->len++] = xs;
+ skb->sp->olen++;
+
+ xo = xfrm_offload(skb);
+ xo->flags = CRYPTO_DONE;
+ switch (mdata->syndrome) {
+ case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED:
+ xo->status = CRYPTO_SUCCESS;
+ if (likely(priv->ipsec->no_trailer)) {
+ xo->flags |= XFRM_ESP_NO_TRAILER;
+ xo->proto = mdata->content.rx.nexthdr;
+ }
+ break;
+ case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED:
+ xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED;
+ break;
+ case MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO:
+ xo->status = CRYPTO_INVALID_PROTOCOL;
+ break;
+ default:
+ atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome);
+ return NULL;
+ }
+ return xs;
+}
+
+struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb, u32 *cqe_bcnt)
+{
+ struct mlx5e_ipsec_metadata *mdata;
+ struct xfrm_state *xs;
+
+ if (!is_metadata_hdr_valid(skb))
+ return skb;
+
+ /* Use the metadata */
+ mdata = (struct mlx5e_ipsec_metadata *)(skb->data + ETH_HLEN);
+ xs = mlx5e_ipsec_build_sp(netdev, skb, mdata);
+ if (unlikely(!xs)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ remove_metadata_hdr(skb);
+ *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN;
+
+ return skb;
+}
+
+bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct xfrm_state *x;
+
+ if (skb->sp && skb->sp->len) {
+ x = skb->sp->xvec[0];
+ if (x && x->xso.offload_handle)
+ return true;
+ }
+ return false;
+}
+
+void mlx5e_ipsec_build_inverse_table(void)
+{
+ u16 mss_inv;
+ u32 mss;
+
+ /* Calculate 1/x inverse table for use in GSO data path.
+ * Using this table, we provide the IPSec accelerator with the value of
+ * 1/gso_size so that it can infer the position of each segment inside
+ * the GSO, and increment the ESP sequence number, and generate the IV.
+ * The HW needs this value in Q0.16 fixed-point number format
+ */
+ mlx5e_ipsec_inverse_table[1] = htons(0xFFFF);
+ for (mss = 2; mss < MAX_LSO_MSS; mss++) {
+ mss_inv = div_u64(1ULL << 32, mss) >> 16;
+ mlx5e_ipsec_inverse_table[mss] = htons(mss_inv);
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
new file mode 100644
index 000000000..ca47c0540
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_IPSEC_RXTX_H__
+#define __MLX5E_IPSEC_RXTX_H__
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+
+#include <linux/skbuff.h>
+#include <net/xfrm.h>
+#include "en.h"
+
+struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
+ struct sk_buff *skb, u32 *cqe_bcnt);
+void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+
+void mlx5e_ipsec_inverse_table_init(void);
+bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
+ netdev_features_t features);
+void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
+ struct xfrm_offload *xo);
+void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
+ struct xfrm_offload *xo);
+struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+ struct mlx5e_tx_wqe *wqe,
+ struct sk_buff *skb);
+
+#endif /* CONFIG_MLX5_EN_IPSEC */
+
+#endif /* __MLX5E_IPSEC_RXTX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
new file mode 100644
index 000000000..6fea59223
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <net/sock.h>
+
+#include "en.h"
+#include "accel/ipsec.h"
+#include "fpga/sdk.h"
+#include "en_accel/ipsec.h"
+
+static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_in_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_out_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_bypass_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_in_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_out_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_bypass_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_drop_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_auth_fail_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_drop_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_success) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_fail) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_success) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_fail) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_cmd_drop) },
+};
+
+static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sadb_miss) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_syndrome) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_bundle) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_trailer) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_metadata) },
+};
+
+#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
+ atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
+
+#define NUM_IPSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_hw_stats_desc)
+#define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc)
+
+#define NUM_IPSEC_COUNTERS (NUM_IPSEC_HW_COUNTERS + NUM_IPSEC_SW_COUNTERS)
+
+int mlx5e_ipsec_get_count(struct mlx5e_priv *priv)
+{
+ if (!priv->ipsec)
+ return 0;
+
+ return NUM_IPSEC_COUNTERS;
+}
+
+int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data)
+{
+ unsigned int i, idx = 0;
+
+ if (!priv->ipsec)
+ return 0;
+
+ for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ mlx5e_ipsec_hw_stats_desc[i].format);
+
+ for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ mlx5e_ipsec_sw_stats_desc[i].format);
+
+ return NUM_IPSEC_COUNTERS;
+}
+
+void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv)
+{
+ int ret;
+
+ if (!priv->ipsec)
+ return;
+
+ ret = mlx5_accel_ipsec_counters_read(priv->mdev, (u64 *)&priv->ipsec->stats,
+ NUM_IPSEC_HW_COUNTERS);
+ if (ret)
+ memset(&priv->ipsec->stats, 0, sizeof(priv->ipsec->stats));
+}
+
+int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+ int i, idx = 0;
+
+ if (!priv->ipsec)
+ return 0;
+
+ for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(&priv->ipsec->stats,
+ mlx5e_ipsec_hw_stats_desc, i);
+
+ for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->sw_stats,
+ mlx5e_ipsec_sw_stats_desc, i);
+
+ return NUM_IPSEC_COUNTERS;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
new file mode 100644
index 000000000..e88340e19
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/netdevice.h>
+#include <net/ipv6.h>
+#include "en_accel/tls.h"
+#include "accel/tls.h"
+
+static void mlx5e_tls_set_ipv4_flow(void *flow, struct sock *sk)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ MLX5_SET(tls_flow, flow, ipv6, 0);
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &inet->inet_daddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4));
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &inet->inet_rcv_saddr, MLX5_FLD_SZ_BYTES(ipv4_layout, ipv4));
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void mlx5e_tls_set_ipv6_flow(void *flow, struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ MLX5_SET(tls_flow, flow, ipv6, 1);
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &sk->sk_v6_daddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &np->saddr, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+}
+#endif
+
+static void mlx5e_tls_set_flow_tcp_ports(void *flow, struct sock *sk)
+{
+ struct inet_sock *inet = inet_sk(sk);
+
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, src_port), &inet->inet_sport,
+ MLX5_FLD_SZ_BYTES(tls_flow, src_port));
+ memcpy(MLX5_ADDR_OF(tls_flow, flow, dst_port), &inet->inet_dport,
+ MLX5_FLD_SZ_BYTES(tls_flow, dst_port));
+}
+
+static int mlx5e_tls_set_flow(void *flow, struct sock *sk, u32 caps)
+{
+ switch (sk->sk_family) {
+ case AF_INET:
+ mlx5e_tls_set_ipv4_flow(flow, sk);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ if (!sk->sk_ipv6only &&
+ ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) {
+ mlx5e_tls_set_ipv4_flow(flow, sk);
+ break;
+ }
+ if (!(caps & MLX5_ACCEL_TLS_IPV6))
+ goto error_out;
+
+ mlx5e_tls_set_ipv6_flow(flow, sk);
+ break;
+#endif
+ default:
+ goto error_out;
+ }
+
+ mlx5e_tls_set_flow_tcp_ports(flow, sk);
+ return 0;
+error_out:
+ return -EINVAL;
+}
+
+static int mlx5e_tls_add(struct net_device *netdev, struct sock *sk,
+ enum tls_offload_ctx_dir direction,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 caps = mlx5_accel_tls_device_caps(mdev);
+ int ret = -ENOMEM;
+ void *flow;
+ u32 swid;
+
+ flow = kzalloc(MLX5_ST_SZ_BYTES(tls_flow), GFP_KERNEL);
+ if (!flow)
+ return ret;
+
+ ret = mlx5e_tls_set_flow(flow, sk, caps);
+ if (ret)
+ goto free_flow;
+
+ ret = mlx5_accel_tls_add_flow(mdev, flow, crypto_info,
+ start_offload_tcp_sn, &swid,
+ direction == TLS_OFFLOAD_CTX_DIR_TX);
+ if (ret < 0)
+ goto free_flow;
+
+ if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
+ struct mlx5e_tls_offload_context_tx *tx_ctx =
+ mlx5e_get_tls_tx_context(tls_ctx);
+
+ tx_ctx->swid = htonl(swid);
+ tx_ctx->expected_seq = start_offload_tcp_sn;
+ } else {
+ struct mlx5e_tls_offload_context_rx *rx_ctx =
+ mlx5e_get_tls_rx_context(tls_ctx);
+
+ rx_ctx->handle = htonl(swid);
+ }
+
+ return 0;
+free_flow:
+ kfree(flow);
+ return ret;
+}
+
+static void mlx5e_tls_del(struct net_device *netdev,
+ struct tls_context *tls_ctx,
+ enum tls_offload_ctx_dir direction)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ unsigned int handle;
+
+ handle = ntohl((direction == TLS_OFFLOAD_CTX_DIR_TX) ?
+ mlx5e_get_tls_tx_context(tls_ctx)->swid :
+ mlx5e_get_tls_rx_context(tls_ctx)->handle);
+
+ mlx5_accel_tls_del_flow(priv->mdev, handle,
+ direction == TLS_OFFLOAD_CTX_DIR_TX);
+}
+
+static void mlx5e_tls_resync_rx(struct net_device *netdev, struct sock *sk,
+ u32 seq, u64 rcd_sn)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_tls_offload_context_rx *rx_ctx;
+
+ rx_ctx = mlx5e_get_tls_rx_context(tls_ctx);
+
+ netdev_info(netdev, "resyncing seq %d rcd %lld\n", seq,
+ be64_to_cpu(rcd_sn));
+ mlx5_accel_tls_resync_rx(priv->mdev, rx_ctx->handle, seq, rcd_sn);
+ atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_reply);
+}
+
+static const struct tlsdev_ops mlx5e_tls_ops = {
+ .tls_dev_add = mlx5e_tls_add,
+ .tls_dev_del = mlx5e_tls_del,
+ .tls_dev_resync_rx = mlx5e_tls_resync_rx,
+};
+
+void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+ u32 caps;
+
+ if (!mlx5_accel_is_tls_device(priv->mdev))
+ return;
+
+ caps = mlx5_accel_tls_device_caps(priv->mdev);
+ if (caps & MLX5_ACCEL_TLS_TX) {
+ netdev->features |= NETIF_F_HW_TLS_TX;
+ netdev->hw_features |= NETIF_F_HW_TLS_TX;
+ }
+
+ if (caps & MLX5_ACCEL_TLS_RX) {
+ netdev->features |= NETIF_F_HW_TLS_RX;
+ netdev->hw_features |= NETIF_F_HW_TLS_RX;
+ }
+
+ if (!(caps & MLX5_ACCEL_TLS_LRO)) {
+ netdev->features &= ~NETIF_F_LRO;
+ netdev->hw_features &= ~NETIF_F_LRO;
+ }
+
+ netdev->tlsdev_ops = &mlx5e_tls_ops;
+}
+
+int mlx5e_tls_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tls *tls = kzalloc(sizeof(*tls), GFP_KERNEL);
+
+ if (!tls)
+ return -ENOMEM;
+
+ priv->tls = tls;
+ return 0;
+}
+
+void mlx5e_tls_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tls *tls = priv->tls;
+
+ if (!tls)
+ return;
+
+ kfree(tls);
+ priv->tls = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
new file mode 100644
index 000000000..3f5d72163
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#ifndef __MLX5E_TLS_H__
+#define __MLX5E_TLS_H__
+
+#ifdef CONFIG_MLX5_EN_TLS
+
+#include <net/tls.h>
+#include "en.h"
+
+struct mlx5e_tls_sw_stats {
+ atomic64_t tx_tls_drop_metadata;
+ atomic64_t tx_tls_drop_resync_alloc;
+ atomic64_t tx_tls_drop_no_sync_data;
+ atomic64_t tx_tls_drop_bypass_required;
+ atomic64_t rx_tls_drop_resync_request;
+ atomic64_t rx_tls_resync_request;
+ atomic64_t rx_tls_resync_reply;
+ atomic64_t rx_tls_auth_fail;
+};
+
+struct mlx5e_tls {
+ struct mlx5e_tls_sw_stats sw_stats;
+};
+
+struct mlx5e_tls_offload_context_tx {
+ struct tls_offload_context_tx base;
+ u32 expected_seq;
+ __be32 swid;
+};
+
+static inline struct mlx5e_tls_offload_context_tx *
+mlx5e_get_tls_tx_context(struct tls_context *tls_ctx)
+{
+ BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_tx) >
+ TLS_OFFLOAD_CONTEXT_SIZE_TX);
+ return container_of(tls_offload_ctx_tx(tls_ctx),
+ struct mlx5e_tls_offload_context_tx,
+ base);
+}
+
+struct mlx5e_tls_offload_context_rx {
+ struct tls_offload_context_rx base;
+ __be32 handle;
+};
+
+static inline struct mlx5e_tls_offload_context_rx *
+mlx5e_get_tls_rx_context(struct tls_context *tls_ctx)
+{
+ BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context_rx) >
+ TLS_OFFLOAD_CONTEXT_SIZE_RX);
+ return container_of(tls_offload_ctx_rx(tls_ctx),
+ struct mlx5e_tls_offload_context_rx,
+ base);
+}
+
+void mlx5e_tls_build_netdev(struct mlx5e_priv *priv);
+int mlx5e_tls_init(struct mlx5e_priv *priv);
+void mlx5e_tls_cleanup(struct mlx5e_priv *priv);
+
+int mlx5e_tls_get_count(struct mlx5e_priv *priv);
+int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data);
+int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data);
+
+#else
+
+static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) { }
+static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { }
+static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; }
+static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; }
+static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; }
+
+#endif
+
+#endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
new file mode 100644
index 000000000..be137d4a9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include "en_accel/tls.h"
+#include "en_accel/tls_rxtx.h"
+#include "accel/accel.h"
+
+#include <net/inet6_hashtables.h>
+#include <linux/ipv6.h>
+
+#define SYNDROM_DECRYPTED 0x30
+#define SYNDROM_RESYNC_REQUEST 0x31
+#define SYNDROM_AUTH_FAILED 0x32
+
+#define SYNDROME_OFFLOAD_REQUIRED 32
+#define SYNDROME_SYNC 33
+
+struct sync_info {
+ u64 rcd_sn;
+ s32 sync_len;
+ int nr_frags;
+ skb_frag_t frags[MAX_SKB_FRAGS];
+};
+
+struct recv_metadata_content {
+ u8 syndrome;
+ u8 reserved;
+ __be32 sync_seq;
+} __packed;
+
+struct send_metadata_content {
+ /* One byte of syndrome followed by 3 bytes of swid */
+ __be32 syndrome_swid;
+ __be16 first_seq;
+} __packed;
+
+struct mlx5e_tls_metadata {
+ union {
+ /* from fpga to host */
+ struct recv_metadata_content recv;
+ /* from host to fpga */
+ struct send_metadata_content send;
+ unsigned char raw[6];
+ } __packed content;
+ /* packet type ID field */
+ __be16 ethertype;
+} __packed;
+
+static int mlx5e_tls_add_metadata(struct sk_buff *skb, __be32 swid)
+{
+ struct mlx5e_tls_metadata *pet;
+ struct ethhdr *eth;
+
+ if (skb_cow_head(skb, sizeof(struct mlx5e_tls_metadata)))
+ return -ENOMEM;
+
+ eth = (struct ethhdr *)skb_push(skb, sizeof(struct mlx5e_tls_metadata));
+ skb->mac_header -= sizeof(struct mlx5e_tls_metadata);
+ pet = (struct mlx5e_tls_metadata *)(eth + 1);
+
+ memmove(skb->data, skb->data + sizeof(struct mlx5e_tls_metadata),
+ 2 * ETH_ALEN);
+
+ eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
+ pet->content.send.syndrome_swid =
+ htonl(SYNDROME_OFFLOAD_REQUIRED << 24) | swid;
+
+ return 0;
+}
+
+static int mlx5e_tls_get_sync_data(struct mlx5e_tls_offload_context_tx *context,
+ u32 tcp_seq, struct sync_info *info)
+{
+ int remaining, i = 0, ret = -EINVAL;
+ struct tls_record_info *record;
+ unsigned long flags;
+ s32 sync_size;
+
+ spin_lock_irqsave(&context->base.lock, flags);
+ record = tls_get_record(&context->base, tcp_seq, &info->rcd_sn);
+
+ if (unlikely(!record))
+ goto out;
+
+ sync_size = tcp_seq - tls_record_start_seq(record);
+ info->sync_len = sync_size;
+ if (unlikely(sync_size < 0)) {
+ if (tls_record_is_start_marker(record))
+ goto done;
+
+ goto out;
+ }
+
+ remaining = sync_size;
+ while (remaining > 0) {
+ info->frags[i] = record->frags[i];
+ __skb_frag_ref(&info->frags[i]);
+ remaining -= skb_frag_size(&info->frags[i]);
+
+ if (remaining < 0)
+ skb_frag_size_add(&info->frags[i], remaining);
+
+ i++;
+ }
+ info->nr_frags = i;
+done:
+ ret = 0;
+out:
+ spin_unlock_irqrestore(&context->base.lock, flags);
+ return ret;
+}
+
+static void mlx5e_tls_complete_sync_skb(struct sk_buff *skb,
+ struct sk_buff *nskb, u32 tcp_seq,
+ int headln, __be64 rcd_sn)
+{
+ struct mlx5e_tls_metadata *pet;
+ u8 syndrome = SYNDROME_SYNC;
+ struct iphdr *iph;
+ struct tcphdr *th;
+ int data_len, mss;
+
+ nskb->dev = skb->dev;
+ skb_reset_mac_header(nskb);
+ skb_set_network_header(nskb, skb_network_offset(skb));
+ skb_set_transport_header(nskb, skb_transport_offset(skb));
+ memcpy(nskb->data, skb->data, headln);
+ memcpy(nskb->data + headln, &rcd_sn, sizeof(rcd_sn));
+
+ iph = ip_hdr(nskb);
+ iph->tot_len = htons(nskb->len - skb_network_offset(nskb));
+ th = tcp_hdr(nskb);
+ data_len = nskb->len - headln;
+ tcp_seq -= data_len;
+ th->seq = htonl(tcp_seq);
+
+ mss = nskb->dev->mtu - (headln - skb_network_offset(nskb));
+ skb_shinfo(nskb)->gso_size = 0;
+ if (data_len > mss) {
+ skb_shinfo(nskb)->gso_size = mss;
+ skb_shinfo(nskb)->gso_segs = DIV_ROUND_UP(data_len, mss);
+ }
+ skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;
+
+ pet = (struct mlx5e_tls_metadata *)(nskb->data + sizeof(struct ethhdr));
+ memcpy(pet, &syndrome, sizeof(syndrome));
+ pet->content.send.first_seq = htons(tcp_seq);
+
+ /* MLX5 devices don't care about the checksum partial start, offset
+ * and pseudo header
+ */
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+
+ nskb->xmit_more = 1;
+ nskb->queue_mapping = skb->queue_mapping;
+}
+
+static struct sk_buff *
+mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
+ struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_tx_wqe **wqe,
+ u16 *pi,
+ struct mlx5e_tls *tls)
+{
+ u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
+ struct sync_info info;
+ struct sk_buff *nskb;
+ int linear_len = 0;
+ int headln;
+ int i;
+
+ sq->stats->tls_ooo++;
+
+ if (mlx5e_tls_get_sync_data(context, tcp_seq, &info)) {
+ /* We might get here if a retransmission reaches the driver
+ * after the relevant record is acked.
+ * It should be safe to drop the packet in this case
+ */
+ atomic64_inc(&tls->sw_stats.tx_tls_drop_no_sync_data);
+ goto err_out;
+ }
+
+ if (unlikely(info.sync_len < 0)) {
+ u32 payload;
+
+ headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ payload = skb->len - headln;
+ if (likely(payload <= -info.sync_len))
+ /* SKB payload doesn't require offload
+ */
+ return skb;
+
+ atomic64_inc(&tls->sw_stats.tx_tls_drop_bypass_required);
+ goto err_out;
+ }
+
+ if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) {
+ atomic64_inc(&tls->sw_stats.tx_tls_drop_metadata);
+ goto err_out;
+ }
+
+ headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ linear_len += headln + sizeof(info.rcd_sn);
+ nskb = alloc_skb(linear_len, GFP_ATOMIC);
+ if (unlikely(!nskb)) {
+ atomic64_inc(&tls->sw_stats.tx_tls_drop_resync_alloc);
+ goto err_out;
+ }
+
+ context->expected_seq = tcp_seq + skb->len - headln;
+ skb_put(nskb, linear_len);
+ for (i = 0; i < info.nr_frags; i++)
+ skb_shinfo(nskb)->frags[i] = info.frags[i];
+
+ skb_shinfo(nskb)->nr_frags = info.nr_frags;
+ nskb->data_len = info.sync_len;
+ nskb->len += info.sync_len;
+ sq->stats->tls_resync_bytes += nskb->len;
+ mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
+ cpu_to_be64(info.rcd_sn));
+ mlx5e_sq_xmit(sq, nskb, *wqe, *pi);
+ mlx5e_sq_fetch_wqe(sq, wqe, pi);
+ return skb;
+
+err_out:
+ dev_kfree_skb_any(skb);
+ return NULL;
+}
+
+struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
+ struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ struct mlx5e_tx_wqe **wqe,
+ u16 *pi)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_tls_offload_context_tx *context;
+ struct tls_context *tls_ctx;
+ u32 expected_seq;
+ int datalen;
+ u32 skb_seq;
+
+ if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
+ goto out;
+
+ datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
+ if (!datalen)
+ goto out;
+
+ tls_ctx = tls_get_ctx(skb->sk);
+ if (unlikely(tls_ctx->netdev != netdev))
+ goto out;
+
+ skb_seq = ntohl(tcp_hdr(skb)->seq);
+ context = mlx5e_get_tls_tx_context(tls_ctx);
+ expected_seq = context->expected_seq;
+
+ if (unlikely(expected_seq != skb_seq)) {
+ skb = mlx5e_tls_handle_ooo(context, sq, skb, wqe, pi, priv->tls);
+ goto out;
+ }
+
+ if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) {
+ atomic64_inc(&priv->tls->sw_stats.tx_tls_drop_metadata);
+ dev_kfree_skb_any(skb);
+ skb = NULL;
+ goto out;
+ }
+
+ context->expected_seq = skb_seq + datalen;
+out:
+ return skb;
+}
+
+static int tls_update_resync_sn(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct mlx5e_tls_metadata *mdata)
+{
+ struct sock *sk = NULL;
+ struct iphdr *iph;
+ struct tcphdr *th;
+ __be32 seq;
+
+ if (mdata->ethertype != htons(ETH_P_IP))
+ return -EINVAL;
+
+ iph = (struct iphdr *)(mdata + 1);
+
+ th = ((void *)iph) + iph->ihl * 4;
+
+ if (iph->version == 4) {
+ sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo,
+ iph->saddr, th->source, iph->daddr,
+ th->dest, netdev->ifindex);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ struct ipv6hdr *ipv6h = (struct ipv6hdr *)iph;
+
+ sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo,
+ &ipv6h->saddr, th->source,
+ &ipv6h->daddr, ntohs(th->dest),
+ netdev->ifindex, 0);
+#endif
+ }
+ if (!sk || sk->sk_state == TCP_TIME_WAIT) {
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ atomic64_inc(&priv->tls->sw_stats.rx_tls_drop_resync_request);
+ goto out;
+ }
+
+ skb->sk = sk;
+ skb->destructor = sock_edemux;
+
+ memcpy(&seq, &mdata->content.recv.sync_seq, sizeof(seq));
+ tls_offload_rx_resync_request(sk, seq);
+out:
+ return 0;
+}
+
+void mlx5e_tls_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb,
+ u32 *cqe_bcnt)
+{
+ struct mlx5e_tls_metadata *mdata;
+ struct mlx5e_priv *priv;
+
+ if (!is_metadata_hdr_valid(skb))
+ return;
+
+ /* Use the metadata */
+ mdata = (struct mlx5e_tls_metadata *)(skb->data + ETH_HLEN);
+ switch (mdata->content.recv.syndrome) {
+ case SYNDROM_DECRYPTED:
+ skb->decrypted = 1;
+ break;
+ case SYNDROM_RESYNC_REQUEST:
+ tls_update_resync_sn(netdev, skb, mdata);
+ priv = netdev_priv(netdev);
+ atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_request);
+ break;
+ case SYNDROM_AUTH_FAILED:
+ /* Authentication failure will be observed and verified by kTLS */
+ priv = netdev_priv(netdev);
+ atomic64_inc(&priv->tls->sw_stats.rx_tls_auth_fail);
+ break;
+ default:
+ /* Bypass the metadata header to others */
+ return;
+ }
+
+ remove_metadata_hdr(skb);
+ *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
new file mode 100644
index 000000000..311667ec7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_TLS_RXTX_H__
+#define __MLX5E_TLS_RXTX_H__
+
+#ifdef CONFIG_MLX5_EN_TLS
+
+#include <linux/skbuff.h>
+#include "en.h"
+
+struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
+ struct mlx5e_txqsq *sq,
+ struct sk_buff *skb,
+ struct mlx5e_tx_wqe **wqe,
+ u16 *pi);
+
+void mlx5e_tls_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb,
+ u32 *cqe_bcnt);
+
+#endif /* CONFIG_MLX5_EN_TLS */
+
+#endif /* __MLX5E_TLS_RXTX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
new file mode 100644
index 000000000..01468ec27
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <net/sock.h>
+
+#include "en.h"
+#include "accel/tls.h"
+#include "fpga/sdk.h"
+#include "en_accel/tls.h"
+
+static const struct counter_desc mlx5e_tls_sw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_metadata) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_resync_alloc) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_no_sync_data) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_bypass_required) },
+};
+
+#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
+ atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
+
+#define NUM_TLS_SW_COUNTERS ARRAY_SIZE(mlx5e_tls_sw_stats_desc)
+
+int mlx5e_tls_get_count(struct mlx5e_priv *priv)
+{
+ if (!priv->tls)
+ return 0;
+
+ return NUM_TLS_SW_COUNTERS;
+}
+
+int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data)
+{
+ unsigned int i, idx = 0;
+
+ if (!priv->tls)
+ return 0;
+
+ for (i = 0; i < NUM_TLS_SW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ mlx5e_tls_sw_stats_desc[i].format);
+
+ return NUM_TLS_SW_COUNTERS;
+}
+
+int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+ int i, idx = 0;
+
+ if (!priv->tls)
+ return 0;
+
+ for (i = 0; i < NUM_TLS_SW_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats,
+ mlx5e_tls_sw_stats_desc, i);
+
+ return NUM_TLS_SW_COUNTERS;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
new file mode 100644
index 000000000..a4be04deb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -0,0 +1,710 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/hash.h>
+#include <linux/mlx5/fs.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "en.h"
+
+struct arfs_tuple {
+ __be16 etype;
+ u8 ip_proto;
+ union {
+ __be32 src_ipv4;
+ struct in6_addr src_ipv6;
+ };
+ union {
+ __be32 dst_ipv4;
+ struct in6_addr dst_ipv6;
+ };
+ __be16 src_port;
+ __be16 dst_port;
+};
+
+struct arfs_rule {
+ struct mlx5e_priv *priv;
+ struct work_struct arfs_work;
+ struct mlx5_flow_handle *rule;
+ struct hlist_node hlist;
+ int rxq;
+ /* Flow ID passed to ndo_rx_flow_steer */
+ int flow_id;
+ /* Filter ID returned by ndo_rx_flow_steer */
+ int filter_id;
+ struct arfs_tuple tuple;
+};
+
+#define mlx5e_for_each_arfs_rule(hn, tmp, arfs_tables, i, j) \
+ for (i = 0; i < ARFS_NUM_TYPES; i++) \
+ mlx5e_for_each_hash_arfs_rule(hn, tmp, arfs_tables[i].rules_hash, j)
+
+#define mlx5e_for_each_hash_arfs_rule(hn, tmp, hash, j) \
+ for (j = 0; j < ARFS_HASH_SIZE; j++) \
+ hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist)
+
+static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type)
+{
+ switch (type) {
+ case ARFS_IPV4_TCP:
+ return MLX5E_TT_IPV4_TCP;
+ case ARFS_IPV4_UDP:
+ return MLX5E_TT_IPV4_UDP;
+ case ARFS_IPV6_TCP:
+ return MLX5E_TT_IPV6_TCP;
+ case ARFS_IPV6_UDP:
+ return MLX5E_TT_IPV6_UDP;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int arfs_disable(struct mlx5e_priv *priv)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5e_tir *tir = priv->indir_tir;
+ int err = 0;
+ int tt;
+ int i;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ for (i = 0; i < ARFS_NUM_TYPES; i++) {
+ dest.tir_num = tir[i].tirn;
+ tt = arfs_get_tt(i);
+ /* Modify ttc rules destination to bypass the aRFS tables*/
+ err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
+ &dest, NULL);
+ if (err) {
+ netdev_err(priv->netdev,
+ "%s: modify ttc destination failed\n",
+ __func__);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static void arfs_del_rules(struct mlx5e_priv *priv);
+
+int mlx5e_arfs_disable(struct mlx5e_priv *priv)
+{
+ arfs_del_rules(priv);
+
+ return arfs_disable(priv);
+}
+
+int mlx5e_arfs_enable(struct mlx5e_priv *priv)
+{
+ struct mlx5_flow_destination dest = {};
+ int err = 0;
+ int tt;
+ int i;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ for (i = 0; i < ARFS_NUM_TYPES; i++) {
+ dest.ft = priv->fs.arfs.arfs_tables[i].ft.t;
+ tt = arfs_get_tt(i);
+ /* Modify ttc rules destination to point on the aRFS FTs */
+ err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
+ &dest, NULL);
+ if (err) {
+ netdev_err(priv->netdev,
+ "%s: modify ttc destination failed err=%d\n",
+ __func__, err);
+ arfs_disable(priv);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static void arfs_destroy_table(struct arfs_table *arfs_t)
+{
+ mlx5_del_flow_rules(arfs_t->default_rule);
+ mlx5e_destroy_flow_table(&arfs_t->ft);
+}
+
+void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv)
+{
+ int i;
+
+ if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
+ return;
+
+ arfs_del_rules(priv);
+ destroy_workqueue(priv->fs.arfs.wq);
+ for (i = 0; i < ARFS_NUM_TYPES; i++) {
+ if (!IS_ERR_OR_NULL(priv->fs.arfs.arfs_tables[i].ft.t))
+ arfs_destroy_table(&priv->fs.arfs.arfs_tables[i]);
+ }
+}
+
+static int arfs_add_default_rule(struct mlx5e_priv *priv,
+ enum arfs_type type)
+{
+ struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type];
+ struct mlx5e_tir *tir = priv->indir_tir;
+ struct mlx5_flow_destination dest = {};
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_spec *spec;
+ enum mlx5e_traffic_types tt;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ tt = arfs_get_tt(type);
+ if (tt == -EINVAL) {
+ netdev_err(priv->netdev, "%s: bad arfs_type: %d\n",
+ __func__, type);
+ err = -EINVAL;
+ goto out;
+ }
+
+ dest.tir_num = tir[tt].tirn;
+
+ arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec,
+ &flow_act,
+ &dest, 1);
+ if (IS_ERR(arfs_t->default_rule)) {
+ err = PTR_ERR(arfs_t->default_rule);
+ arfs_t->default_rule = NULL;
+ netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n",
+ __func__, type);
+ }
+out:
+ kvfree(spec);
+ return err;
+}
+
+#define MLX5E_ARFS_NUM_GROUPS 2
+#define MLX5E_ARFS_GROUP1_SIZE (BIT(16) - 1)
+#define MLX5E_ARFS_GROUP2_SIZE BIT(0)
+#define MLX5E_ARFS_TABLE_SIZE (MLX5E_ARFS_GROUP1_SIZE +\
+ MLX5E_ARFS_GROUP2_SIZE)
+static int arfs_create_groups(struct mlx5e_flow_table *ft,
+ enum arfs_type type)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *outer_headers_c;
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS,
+ sizeof(*ft->g), GFP_KERNEL);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in || !ft->g) {
+ kvfree(ft->g);
+ kvfree(in);
+ return -ENOMEM;
+ }
+
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc,
+ outer_headers);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype);
+ switch (type) {
+ case ARFS_IPV4_TCP:
+ case ARFS_IPV6_TCP:
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport);
+ break;
+ case ARFS_IPV4_UDP:
+ case ARFS_IPV6_UDP:
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_sport);
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+
+ switch (type) {
+ case ARFS_IPV4_TCP:
+ case ARFS_IPV4_UDP:
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ break;
+ case ARFS_IPV6_TCP:
+ case ARFS_IPV6_UDP:
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xff, 16);
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_ARFS_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_ARFS_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+out:
+ kvfree(in);
+
+ return err;
+}
+
+static int arfs_create_table(struct mlx5e_priv *priv,
+ enum arfs_type type)
+{
+ struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
+ struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft;
+ struct mlx5_flow_table_attr ft_attr = {};
+ int err;
+
+ ft->num_groups = 0;
+
+ ft_attr.max_fte = MLX5E_ARFS_TABLE_SIZE;
+ ft_attr.level = MLX5E_ARFS_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ err = arfs_create_groups(ft, type);
+ if (err)
+ goto err;
+
+ err = arfs_add_default_rule(priv, type);
+ if (err)
+ goto err;
+
+ return 0;
+err:
+ mlx5e_destroy_flow_table(ft);
+ return err;
+}
+
+int mlx5e_arfs_create_tables(struct mlx5e_priv *priv)
+{
+ int err = 0;
+ int i;
+
+ if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
+ return 0;
+
+ spin_lock_init(&priv->fs.arfs.arfs_lock);
+ INIT_LIST_HEAD(&priv->fs.arfs.rules);
+ priv->fs.arfs.wq = create_singlethread_workqueue("mlx5e_arfs");
+ if (!priv->fs.arfs.wq)
+ return -ENOMEM;
+
+ for (i = 0; i < ARFS_NUM_TYPES; i++) {
+ err = arfs_create_table(priv, i);
+ if (err)
+ goto err;
+ }
+ return 0;
+err:
+ mlx5e_arfs_destroy_tables(priv);
+ return err;
+}
+
+#define MLX5E_ARFS_EXPIRY_QUOTA 60
+
+static void arfs_may_expire_flow(struct mlx5e_priv *priv)
+{
+ struct arfs_rule *arfs_rule;
+ struct hlist_node *htmp;
+ int quota = 0;
+ int i;
+ int j;
+
+ HLIST_HEAD(del_list);
+ spin_lock_bh(&priv->fs.arfs.arfs_lock);
+ mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
+ if (!work_pending(&arfs_rule->arfs_work) &&
+ rps_may_expire_flow(priv->netdev,
+ arfs_rule->rxq, arfs_rule->flow_id,
+ arfs_rule->filter_id)) {
+ hlist_del_init(&arfs_rule->hlist);
+ hlist_add_head(&arfs_rule->hlist, &del_list);
+ if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA)
+ break;
+ }
+ }
+ spin_unlock_bh(&priv->fs.arfs.arfs_lock);
+ hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) {
+ if (arfs_rule->rule)
+ mlx5_del_flow_rules(arfs_rule->rule);
+ hlist_del(&arfs_rule->hlist);
+ kfree(arfs_rule);
+ }
+}
+
+static void arfs_del_rules(struct mlx5e_priv *priv)
+{
+ struct hlist_node *htmp;
+ struct arfs_rule *rule;
+ int i;
+ int j;
+
+ HLIST_HEAD(del_list);
+ spin_lock_bh(&priv->fs.arfs.arfs_lock);
+ mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
+ hlist_del_init(&rule->hlist);
+ hlist_add_head(&rule->hlist, &del_list);
+ }
+ spin_unlock_bh(&priv->fs.arfs.arfs_lock);
+
+ hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) {
+ cancel_work_sync(&rule->arfs_work);
+ if (rule->rule)
+ mlx5_del_flow_rules(rule->rule);
+ hlist_del(&rule->hlist);
+ kfree(rule);
+ }
+}
+
+static struct hlist_head *
+arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port,
+ __be16 dst_port)
+{
+ unsigned long l;
+ int bucket_idx;
+
+ l = (__force unsigned long)src_port |
+ ((__force unsigned long)dst_port << 2);
+
+ bucket_idx = hash_long(l, ARFS_HASH_SHIFT);
+
+ return &arfs_t->rules_hash[bucket_idx];
+}
+
+static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
+ u8 ip_proto, __be16 etype)
+{
+ if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_TCP)
+ return &arfs->arfs_tables[ARFS_IPV4_TCP];
+ if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_UDP)
+ return &arfs->arfs_tables[ARFS_IPV4_UDP];
+ if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_TCP)
+ return &arfs->arfs_tables[ARFS_IPV6_TCP];
+ if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_UDP)
+ return &arfs->arfs_tables[ARFS_IPV6_UDP];
+
+ return NULL;
+}
+
+static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
+ struct arfs_rule *arfs_rule)
+{
+ struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
+ struct arfs_tuple *tuple = &arfs_rule->tuple;
+ struct mlx5_flow_handle *rule = NULL;
+ struct mlx5_flow_destination dest = {};
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct arfs_table *arfs_table;
+ struct mlx5_flow_spec *spec;
+ struct mlx5_flow_table *ft;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype,
+ ntohs(tuple->etype));
+ arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype);
+ if (!arfs_table) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ ft = arfs_table->ft.t;
+ if (tuple->ip_proto == IPPROTO_TCP) {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.tcp_dport);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.tcp_sport);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport,
+ ntohs(tuple->dst_port));
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport,
+ ntohs(tuple->src_port));
+ } else {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.udp_dport);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.udp_sport);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport,
+ ntohs(tuple->dst_port));
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_sport,
+ ntohs(tuple->src_port));
+ }
+ if (tuple->etype == htons(ETH_P_IP)) {
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &tuple->src_ipv4,
+ 4);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &tuple->dst_ipv4,
+ 4);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ } else {
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &tuple->src_ipv6,
+ 16);
+ memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &tuple->dst_ipv6,
+ 16);
+ memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ 0xff,
+ 16);
+ memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xff,
+ 16);
+ }
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn;
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n",
+ __func__, arfs_rule->filter_id, arfs_rule->rxq, err);
+ }
+
+out:
+ kvfree(spec);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule, u16 rxq)
+{
+ struct mlx5_flow_destination dst = {};
+ int err = 0;
+
+ dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dst.tir_num = priv->direct_tir[rxq].tirn;
+ err = mlx5_modify_rule_destination(rule, &dst, NULL);
+ if (err)
+ netdev_warn(priv->netdev,
+ "Failed to modify aRFS rule destination to rq=%d\n", rxq);
+}
+
+static void arfs_handle_work(struct work_struct *work)
+{
+ struct arfs_rule *arfs_rule = container_of(work,
+ struct arfs_rule,
+ arfs_work);
+ struct mlx5e_priv *priv = arfs_rule->priv;
+ struct mlx5_flow_handle *rule;
+
+ mutex_lock(&priv->state_lock);
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ spin_lock_bh(&priv->fs.arfs.arfs_lock);
+ hlist_del(&arfs_rule->hlist);
+ spin_unlock_bh(&priv->fs.arfs.arfs_lock);
+
+ mutex_unlock(&priv->state_lock);
+ kfree(arfs_rule);
+ goto out;
+ }
+ mutex_unlock(&priv->state_lock);
+
+ if (!arfs_rule->rule) {
+ rule = arfs_add_rule(priv, arfs_rule);
+ if (IS_ERR(rule))
+ goto out;
+ arfs_rule->rule = rule;
+ } else {
+ arfs_modify_rule_rq(priv, arfs_rule->rule,
+ arfs_rule->rxq);
+ }
+out:
+ arfs_may_expire_flow(priv);
+}
+
+static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv,
+ struct arfs_table *arfs_t,
+ const struct flow_keys *fk,
+ u16 rxq, u32 flow_id)
+{
+ struct arfs_rule *rule;
+ struct arfs_tuple *tuple;
+
+ rule = kzalloc(sizeof(*rule), GFP_ATOMIC);
+ if (!rule)
+ return NULL;
+
+ rule->priv = priv;
+ rule->rxq = rxq;
+ INIT_WORK(&rule->arfs_work, arfs_handle_work);
+
+ tuple = &rule->tuple;
+ tuple->etype = fk->basic.n_proto;
+ tuple->ip_proto = fk->basic.ip_proto;
+ if (tuple->etype == htons(ETH_P_IP)) {
+ tuple->src_ipv4 = fk->addrs.v4addrs.src;
+ tuple->dst_ipv4 = fk->addrs.v4addrs.dst;
+ } else {
+ memcpy(&tuple->src_ipv6, &fk->addrs.v6addrs.src,
+ sizeof(struct in6_addr));
+ memcpy(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst,
+ sizeof(struct in6_addr));
+ }
+ tuple->src_port = fk->ports.src;
+ tuple->dst_port = fk->ports.dst;
+
+ rule->flow_id = flow_id;
+ rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER;
+
+ hlist_add_head(&rule->hlist,
+ arfs_hash_bucket(arfs_t, tuple->src_port,
+ tuple->dst_port));
+ return rule;
+}
+
+static bool arfs_cmp(const struct arfs_tuple *tuple, const struct flow_keys *fk)
+{
+ if (tuple->src_port != fk->ports.src || tuple->dst_port != fk->ports.dst)
+ return false;
+ if (tuple->etype != fk->basic.n_proto)
+ return false;
+ if (tuple->etype == htons(ETH_P_IP))
+ return tuple->src_ipv4 == fk->addrs.v4addrs.src &&
+ tuple->dst_ipv4 == fk->addrs.v4addrs.dst;
+ if (tuple->etype == htons(ETH_P_IPV6))
+ return !memcmp(&tuple->src_ipv6, &fk->addrs.v6addrs.src,
+ sizeof(struct in6_addr)) &&
+ !memcmp(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst,
+ sizeof(struct in6_addr));
+ return false;
+}
+
+static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t,
+ const struct flow_keys *fk)
+{
+ struct arfs_rule *arfs_rule;
+ struct hlist_head *head;
+
+ head = arfs_hash_bucket(arfs_t, fk->ports.src, fk->ports.dst);
+ hlist_for_each_entry(arfs_rule, head, hlist) {
+ if (arfs_cmp(&arfs_rule->tuple, fk))
+ return arfs_rule;
+ }
+
+ return NULL;
+}
+
+int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
+ u16 rxq_index, u32 flow_id)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
+ struct arfs_table *arfs_t;
+ struct arfs_rule *arfs_rule;
+ struct flow_keys fk;
+
+ if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
+ return -EPROTONOSUPPORT;
+
+ if (fk.basic.n_proto != htons(ETH_P_IP) &&
+ fk.basic.n_proto != htons(ETH_P_IPV6))
+ return -EPROTONOSUPPORT;
+
+ if (skb->encapsulation)
+ return -EPROTONOSUPPORT;
+
+ arfs_t = arfs_get_table(arfs, fk.basic.ip_proto, fk.basic.n_proto);
+ if (!arfs_t)
+ return -EPROTONOSUPPORT;
+
+ spin_lock_bh(&arfs->arfs_lock);
+ arfs_rule = arfs_find_rule(arfs_t, &fk);
+ if (arfs_rule) {
+ if (arfs_rule->rxq == rxq_index) {
+ spin_unlock_bh(&arfs->arfs_lock);
+ return arfs_rule->filter_id;
+ }
+ arfs_rule->rxq = rxq_index;
+ } else {
+ arfs_rule = arfs_alloc_rule(priv, arfs_t, &fk, rxq_index, flow_id);
+ if (!arfs_rule) {
+ spin_unlock_bh(&arfs->arfs_lock);
+ return -ENOMEM;
+ }
+ }
+ queue_work(priv->fs.arfs.wq, &arfs_rule->arfs_work);
+ spin_unlock_bh(&arfs->arfs_lock);
+ return arfs_rule->filter_id;
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
new file mode 100644
index 000000000..124e4567a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "en.h"
+
+/* mlx5e global resources should be placed in this file.
+ * Global resources are common to all the netdevices crated on the same nic.
+ */
+
+int mlx5e_create_tir(struct mlx5_core_dev *mdev,
+ struct mlx5e_tir *tir, u32 *in, int inlen)
+{
+ int err;
+
+ err = mlx5_core_create_tir(mdev, in, inlen, &tir->tirn);
+ if (err)
+ return err;
+
+ mutex_lock(&mdev->mlx5e_res.td.list_lock);
+ list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list);
+ mutex_unlock(&mdev->mlx5e_res.td.list_lock);
+
+ return 0;
+}
+
+void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
+ struct mlx5e_tir *tir)
+{
+ mutex_lock(&mdev->mlx5e_res.td.list_lock);
+ mlx5_core_destroy_tir(mdev, tir->tirn);
+ list_del(&tir->list);
+ mutex_unlock(&mdev->mlx5e_res.td.list_lock);
+}
+
+static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
+ struct mlx5_core_mkey *mkey)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ void *mkc;
+ u32 *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ MLX5_SET(mkc, mkc, pd, pdn);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+ err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
+{
+ struct mlx5e_resources *res = &mdev->mlx5e_res;
+ int err;
+
+ err = mlx5_core_alloc_pd(mdev, &res->pdn);
+ if (err) {
+ mlx5_core_err(mdev, "alloc pd failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5_core_alloc_transport_domain(mdev, &res->td.tdn);
+ if (err) {
+ mlx5_core_err(mdev, "alloc td failed, %d\n", err);
+ goto err_dealloc_pd;
+ }
+
+ err = mlx5e_create_mkey(mdev, res->pdn, &res->mkey);
+ if (err) {
+ mlx5_core_err(mdev, "create mkey failed, %d\n", err);
+ goto err_dealloc_transport_domain;
+ }
+
+ err = mlx5_alloc_bfreg(mdev, &res->bfreg, false, false);
+ if (err) {
+ mlx5_core_err(mdev, "alloc bfreg failed, %d\n", err);
+ goto err_destroy_mkey;
+ }
+
+ INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list);
+ mutex_init(&mdev->mlx5e_res.td.list_lock);
+
+ return 0;
+
+err_destroy_mkey:
+ mlx5_core_destroy_mkey(mdev, &res->mkey);
+err_dealloc_transport_domain:
+ mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
+err_dealloc_pd:
+ mlx5_core_dealloc_pd(mdev, res->pdn);
+ return err;
+}
+
+void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
+{
+ struct mlx5e_resources *res = &mdev->mlx5e_res;
+
+ mlx5_free_bfreg(mdev, &res->bfreg);
+ mlx5_core_destroy_mkey(mdev, &res->mkey);
+ mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
+ mlx5_core_dealloc_pd(mdev, res->pdn);
+ memset(res, 0, sizeof(*res));
+}
+
+int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_tir *tir;
+ int err = 0;
+ u32 tirn = 0;
+ int inlen;
+ void *in;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (enable_uc_lb)
+ MLX5_SET(modify_tir_in, in, ctx.self_lb_block,
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
+ MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
+
+ mutex_lock(&mdev->mlx5e_res.td.list_lock);
+ list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
+ tirn = tir->tirn;
+ err = mlx5_core_modify_tir(mdev, tirn, in, inlen);
+ if (err)
+ goto out;
+ }
+
+out:
+ kvfree(in);
+ if (err)
+ netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err);
+ mutex_unlock(&mdev->mlx5e_res.td.list_lock);
+
+ return err;
+}
+
+u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev)
+{
+ u8 min_inline_mode;
+
+ mlx5_query_min_inline(mdev, &min_inline_mode);
+ if (min_inline_mode == MLX5_INLINE_MODE_NONE &&
+ !MLX5_CAP_ETH(mdev, wqe_vlan_insert))
+ min_inline_mode = MLX5_INLINE_MODE_L2;
+
+ return min_inline_mode;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
new file mode 100644
index 000000000..722998d68
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -0,0 +1,1206 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include "en.h"
+#include "en/port.h"
+#include "en/port_buffer.h"
+
+#define MLX5E_100MB (100000)
+#define MLX5E_1GB (1000000)
+
+#define MLX5E_CEE_STATE_UP 1
+#define MLX5E_CEE_STATE_DOWN 0
+
+/* Max supported cable length is 1000 meters */
+#define MLX5E_MAX_CABLE_LENGTH 1000
+
+enum {
+ MLX5E_VENDOR_TC_GROUP_NUM = 7,
+ MLX5E_LOWEST_PRIO_GROUP = 0,
+};
+
+#define MLX5_DSCP_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, qcam_reg) && \
+ MLX5_CAP_QCAM_REG(mdev, qpts) && \
+ MLX5_CAP_QCAM_REG(mdev, qpdpm))
+
+static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state);
+static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio);
+
+/* If dcbx mode is non-host set the dcbx mode to host.
+ */
+static int mlx5e_dcbnl_set_dcbx_mode(struct mlx5e_priv *priv,
+ enum mlx5_dcbx_oper_mode mode)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 param[MLX5_ST_SZ_DW(dcbx_param)];
+ int err;
+
+ err = mlx5_query_port_dcbx_param(mdev, param);
+ if (err)
+ return err;
+
+ MLX5_SET(dcbx_param, param, version_admin, mode);
+ if (mode != MLX5E_DCBX_PARAM_VER_OPER_HOST)
+ MLX5_SET(dcbx_param, param, willing_admin, 1);
+
+ return mlx5_set_port_dcbx_param(mdev, param);
+}
+
+static int mlx5e_dcbnl_switch_to_host_mode(struct mlx5e_priv *priv)
+{
+ struct mlx5e_dcbx *dcbx = &priv->dcbx;
+ int err;
+
+ if (!MLX5_CAP_GEN(priv->mdev, dcbx))
+ return 0;
+
+ if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
+ return 0;
+
+ err = mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_HOST);
+ if (err)
+ return err;
+
+ dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_HOST;
+ return 0;
+}
+
+static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev,
+ struct ieee_ets *ets)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 tc_group[IEEE_8021QAZ_MAX_TCS];
+ bool is_tc_group_6_exist = false;
+ bool is_zero_bw_ets_tc = false;
+ int err = 0;
+ int i;
+
+ if (!MLX5_CAP_GEN(priv->mdev, ets))
+ return -EOPNOTSUPP;
+
+ ets->ets_cap = mlx5_max_tc(priv->mdev) + 1;
+ for (i = 0; i < ets->ets_cap; i++) {
+ err = mlx5_query_port_prio_tc(mdev, i, &ets->prio_tc[i]);
+ if (err)
+ return err;
+
+ err = mlx5_query_port_tc_group(mdev, i, &tc_group[i]);
+ if (err)
+ return err;
+
+ err = mlx5_query_port_tc_bw_alloc(mdev, i, &ets->tc_tx_bw[i]);
+ if (err)
+ return err;
+
+ if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC &&
+ tc_group[i] == (MLX5E_LOWEST_PRIO_GROUP + 1))
+ is_zero_bw_ets_tc = true;
+
+ if (tc_group[i] == (MLX5E_VENDOR_TC_GROUP_NUM - 1))
+ is_tc_group_6_exist = true;
+ }
+
+ /* Report 0% ets tc if exits*/
+ if (is_zero_bw_ets_tc) {
+ for (i = 0; i < ets->ets_cap; i++)
+ if (tc_group[i] == MLX5E_LOWEST_PRIO_GROUP)
+ ets->tc_tx_bw[i] = 0;
+ }
+
+ /* Update tc_tsa based on fw setting*/
+ for (i = 0; i < ets->ets_cap; i++) {
+ if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC)
+ priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS;
+ else if (tc_group[i] == MLX5E_VENDOR_TC_GROUP_NUM &&
+ !is_tc_group_6_exist)
+ priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR;
+ }
+ memcpy(ets->tc_tsa, priv->dcbx.tc_tsa, sizeof(ets->tc_tsa));
+
+ return err;
+}
+
+static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc)
+{
+ bool any_tc_mapped_to_ets = false;
+ bool ets_zero_bw = false;
+ int strict_group;
+ int i;
+
+ for (i = 0; i <= max_tc; i++) {
+ if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
+ any_tc_mapped_to_ets = true;
+ if (!ets->tc_tx_bw[i])
+ ets_zero_bw = true;
+ }
+ }
+
+ /* strict group has higher priority than ets group */
+ strict_group = MLX5E_LOWEST_PRIO_GROUP;
+ if (any_tc_mapped_to_ets)
+ strict_group++;
+ if (ets_zero_bw)
+ strict_group++;
+
+ for (i = 0; i <= max_tc; i++) {
+ switch (ets->tc_tsa[i]) {
+ case IEEE_8021QAZ_TSA_VENDOR:
+ tc_group[i] = MLX5E_VENDOR_TC_GROUP_NUM;
+ break;
+ case IEEE_8021QAZ_TSA_STRICT:
+ tc_group[i] = strict_group++;
+ break;
+ case IEEE_8021QAZ_TSA_ETS:
+ tc_group[i] = MLX5E_LOWEST_PRIO_GROUP;
+ if (ets->tc_tx_bw[i] && ets_zero_bw)
+ tc_group[i] = MLX5E_LOWEST_PRIO_GROUP + 1;
+ break;
+ }
+ }
+}
+
+static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw,
+ u8 *tc_group, int max_tc)
+{
+ int bw_for_ets_zero_bw_tc = 0;
+ int last_ets_zero_bw_tc = -1;
+ int num_ets_zero_bw = 0;
+ int i;
+
+ for (i = 0; i <= max_tc; i++) {
+ if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS &&
+ !ets->tc_tx_bw[i]) {
+ num_ets_zero_bw++;
+ last_ets_zero_bw_tc = i;
+ }
+ }
+
+ if (num_ets_zero_bw)
+ bw_for_ets_zero_bw_tc = MLX5E_MAX_BW_ALLOC / num_ets_zero_bw;
+
+ for (i = 0; i <= max_tc; i++) {
+ switch (ets->tc_tsa[i]) {
+ case IEEE_8021QAZ_TSA_VENDOR:
+ tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
+ break;
+ case IEEE_8021QAZ_TSA_STRICT:
+ tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
+ break;
+ case IEEE_8021QAZ_TSA_ETS:
+ tc_tx_bw[i] = ets->tc_tx_bw[i] ?
+ ets->tc_tx_bw[i] :
+ bw_for_ets_zero_bw_tc;
+ break;
+ }
+ }
+
+ /* Make sure the total bw for ets zero bw group is 100% */
+ if (last_ets_zero_bw_tc != -1)
+ tc_tx_bw[last_ets_zero_bw_tc] +=
+ MLX5E_MAX_BW_ALLOC % num_ets_zero_bw;
+}
+
+/* If there are ETS BW 0,
+ * Set ETS group # to 1 for all ETS non zero BW tcs. Their sum must be 100%.
+ * Set group #0 to all the ETS BW 0 tcs and
+ * equally splits the 100% BW between them
+ * Report both group #0 and #1 as ETS type.
+ * All the tcs in group #0 will be reported with 0% BW.
+ */
+int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS];
+ u8 tc_group[IEEE_8021QAZ_MAX_TCS];
+ int max_tc = mlx5_max_tc(mdev);
+ int err, i;
+
+ mlx5e_build_tc_group(ets, tc_group, max_tc);
+ mlx5e_build_tc_tx_bw(ets, tc_tx_bw, tc_group, max_tc);
+
+ err = mlx5_set_port_prio_tc(mdev, ets->prio_tc);
+ if (err)
+ return err;
+
+ err = mlx5_set_port_tc_group(mdev, tc_group);
+ if (err)
+ return err;
+
+ err = mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw);
+
+ if (err)
+ return err;
+
+ memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa));
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ mlx5e_dbg(HW, priv, "%s: prio_%d <=> tc_%d\n",
+ __func__, i, ets->prio_tc[i]);
+ mlx5e_dbg(HW, priv, "%s: tc_%d <=> tx_bw_%d%%, group_%d\n",
+ __func__, i, tc_tx_bw[i], tc_group[i]);
+ }
+
+ return err;
+}
+
+static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
+ struct ieee_ets *ets,
+ bool zero_sum_allowed)
+{
+ bool have_ets_tc = false;
+ int bw_sum = 0;
+ int i;
+
+ /* Validate Priority */
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY) {
+ netdev_err(netdev,
+ "Failed to validate ETS: priority value greater than max(%d)\n",
+ MLX5E_MAX_PRIORITY);
+ return -EINVAL;
+ }
+ }
+
+ /* Validate Bandwidth Sum */
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
+ have_ets_tc = true;
+ bw_sum += ets->tc_tx_bw[i];
+ }
+ }
+
+ if (have_ets_tc && bw_sum != 100) {
+ if (bw_sum || (!bw_sum && !zero_sum_allowed))
+ netdev_err(netdev,
+ "Failed to validate ETS: BW sum is illegal\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev,
+ struct ieee_ets *ets)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ if (!MLX5_CAP_GEN(priv->mdev, ets))
+ return -EOPNOTSUPP;
+
+ err = mlx5e_dbcnl_validate_ets(netdev, ets, false);
+ if (err)
+ return err;
+
+ err = mlx5e_dcbnl_ieee_setets_core(priv, ets);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int mlx5e_dcbnl_ieee_getpfc(struct net_device *dev,
+ struct ieee_pfc *pfc)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ int i;
+
+ pfc->pfc_cap = mlx5_max_tc(mdev) + 1;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ pfc->requests[i] = PPORT_PER_PRIO_GET(pstats, i, tx_pause);
+ pfc->indications[i] = PPORT_PER_PRIO_GET(pstats, i, rx_pause);
+ }
+
+ if (MLX5_BUFFER_SUPPORTED(mdev))
+ pfc->delay = priv->dcbx.cable_len;
+
+ return mlx5_query_port_pfc(mdev, &pfc->pfc_en, NULL);
+}
+
+static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev,
+ struct ieee_pfc *pfc)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 old_cable_len = priv->dcbx.cable_len;
+ struct ieee_pfc pfc_new;
+ u32 changed = 0;
+ u8 curr_pfc_en;
+ int ret = 0;
+
+ /* pfc_en */
+ mlx5_query_port_pfc(mdev, &curr_pfc_en, NULL);
+ if (pfc->pfc_en != curr_pfc_en) {
+ ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en);
+ if (ret)
+ return ret;
+ mlx5_toggle_port_link(mdev);
+ changed |= MLX5E_PORT_BUFFER_PFC;
+ }
+
+ if (pfc->delay &&
+ pfc->delay < MLX5E_MAX_CABLE_LENGTH &&
+ pfc->delay != priv->dcbx.cable_len) {
+ priv->dcbx.cable_len = pfc->delay;
+ changed |= MLX5E_PORT_BUFFER_CABLE_LEN;
+ }
+
+ if (MLX5_BUFFER_SUPPORTED(mdev)) {
+ pfc_new.pfc_en = (changed & MLX5E_PORT_BUFFER_PFC) ? pfc->pfc_en : curr_pfc_en;
+ if (priv->dcbx.manual_buffer)
+ ret = mlx5e_port_manual_buffer_config(priv, changed,
+ dev->mtu, &pfc_new,
+ NULL, NULL);
+
+ if (ret && (changed & MLX5E_PORT_BUFFER_CABLE_LEN))
+ priv->dcbx.cable_len = old_cable_len;
+ }
+
+ if (!ret) {
+ mlx5e_dbg(HW, priv,
+ "%s: PFC per priority bit mask: 0x%x\n",
+ __func__, pfc->pfc_en);
+ }
+ return ret;
+}
+
+static u8 mlx5e_dcbnl_getdcbx(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return priv->dcbx.cap;
+}
+
+static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_dcbx *dcbx = &priv->dcbx;
+
+ if (mode & DCB_CAP_DCBX_LLD_MANAGED)
+ return 1;
+
+ if ((!mode) && MLX5_CAP_GEN(priv->mdev, dcbx)) {
+ if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_AUTO)
+ return 0;
+
+ /* set dcbx to fw controlled */
+ if (!mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_AUTO)) {
+ dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO;
+ dcbx->cap &= ~DCB_CAP_DCBX_HOST;
+ return 0;
+ }
+
+ return 1;
+ }
+
+ if (!(mode & DCB_CAP_DCBX_HOST))
+ return 1;
+
+ if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
+ return 1;
+
+ dcbx->cap = mode;
+
+ return 0;
+}
+
+static int mlx5e_dcbnl_ieee_setapp(struct net_device *dev, struct dcb_app *app)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct dcb_app temp;
+ bool is_new;
+ int err;
+
+ if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager) ||
+ !MLX5_DSCP_SUPPORTED(priv->mdev))
+ return -EOPNOTSUPP;
+
+ if ((app->selector != IEEE_8021QAZ_APP_SEL_DSCP) ||
+ (app->protocol >= MLX5E_MAX_DSCP))
+ return -EINVAL;
+
+ /* Save the old entry info */
+ temp.selector = IEEE_8021QAZ_APP_SEL_DSCP;
+ temp.protocol = app->protocol;
+ temp.priority = priv->dcbx_dp.dscp2prio[app->protocol];
+
+ /* Check if need to switch to dscp trust state */
+ if (!priv->dcbx.dscp_app_cnt) {
+ err = mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_DSCP);
+ if (err)
+ return err;
+ }
+
+ /* Skip the fw command if new and old mapping are the same */
+ if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol]) {
+ err = mlx5e_set_dscp2prio(priv, app->protocol, app->priority);
+ if (err)
+ goto fw_err;
+ }
+
+ /* Delete the old entry if exists */
+ is_new = false;
+ err = dcb_ieee_delapp(dev, &temp);
+ if (err)
+ is_new = true;
+
+ /* Add new entry and update counter */
+ err = dcb_ieee_setapp(dev, app);
+ if (err)
+ return err;
+
+ if (is_new)
+ priv->dcbx.dscp_app_cnt++;
+
+ return err;
+
+fw_err:
+ mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP);
+ return err;
+}
+
+static int mlx5e_dcbnl_ieee_delapp(struct net_device *dev, struct dcb_app *app)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int err;
+
+ if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager) ||
+ !MLX5_DSCP_SUPPORTED(priv->mdev))
+ return -EOPNOTSUPP;
+
+ if ((app->selector != IEEE_8021QAZ_APP_SEL_DSCP) ||
+ (app->protocol >= MLX5E_MAX_DSCP))
+ return -EINVAL;
+
+ /* Skip if no dscp app entry */
+ if (!priv->dcbx.dscp_app_cnt)
+ return -ENOENT;
+
+ /* Check if the entry matches fw setting */
+ if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol])
+ return -ENOENT;
+
+ /* Delete the app entry */
+ err = dcb_ieee_delapp(dev, app);
+ if (err)
+ return err;
+
+ /* Reset the priority mapping back to zero */
+ err = mlx5e_set_dscp2prio(priv, app->protocol, 0);
+ if (err)
+ goto fw_err;
+
+ priv->dcbx.dscp_app_cnt--;
+
+ /* Check if need to switch to pcp trust state */
+ if (!priv->dcbx.dscp_app_cnt)
+ err = mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP);
+
+ return err;
+
+fw_err:
+ mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP);
+ return err;
+}
+
+static int mlx5e_dcbnl_ieee_getmaxrate(struct net_device *netdev,
+ struct ieee_maxrate *maxrate)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 max_bw_value[IEEE_8021QAZ_MAX_TCS];
+ u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS];
+ int err;
+ int i;
+
+ err = mlx5_query_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit);
+ if (err)
+ return err;
+
+ memset(maxrate->tc_maxrate, 0, sizeof(maxrate->tc_maxrate));
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ switch (max_bw_unit[i]) {
+ case MLX5_100_MBPS_UNIT:
+ maxrate->tc_maxrate[i] = max_bw_value[i] * MLX5E_100MB;
+ break;
+ case MLX5_GBPS_UNIT:
+ maxrate->tc_maxrate[i] = max_bw_value[i] * MLX5E_1GB;
+ break;
+ case MLX5_BW_NO_LIMIT:
+ break;
+ default:
+ WARN(true, "non-supported BW unit");
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev,
+ struct ieee_maxrate *maxrate)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 max_bw_value[IEEE_8021QAZ_MAX_TCS];
+ u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS];
+ __u64 upper_limit_mbps = roundup(255 * MLX5E_100MB, MLX5E_1GB);
+ int i;
+
+ memset(max_bw_value, 0, sizeof(max_bw_value));
+ memset(max_bw_unit, 0, sizeof(max_bw_unit));
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ if (!maxrate->tc_maxrate[i]) {
+ max_bw_unit[i] = MLX5_BW_NO_LIMIT;
+ continue;
+ }
+ if (maxrate->tc_maxrate[i] < upper_limit_mbps) {
+ max_bw_value[i] = div_u64(maxrate->tc_maxrate[i],
+ MLX5E_100MB);
+ max_bw_value[i] = max_bw_value[i] ? max_bw_value[i] : 1;
+ max_bw_unit[i] = MLX5_100_MBPS_UNIT;
+ } else {
+ max_bw_value[i] = div_u64(maxrate->tc_maxrate[i],
+ MLX5E_1GB);
+ max_bw_unit[i] = MLX5_GBPS_UNIT;
+ }
+ }
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ mlx5e_dbg(HW, priv, "%s: tc_%d <=> max_bw %d Gbps\n",
+ __func__, i, max_bw_value[i]);
+ }
+
+ return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit);
+}
+
+static u8 mlx5e_dcbnl_setall(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct ieee_ets ets;
+ struct ieee_pfc pfc;
+ int err = -EOPNOTSUPP;
+ int i;
+
+ if (!MLX5_CAP_GEN(mdev, ets))
+ goto out;
+
+ memset(&ets, 0, sizeof(ets));
+ memset(&pfc, 0, sizeof(pfc));
+
+ ets.ets_cap = IEEE_8021QAZ_MAX_TCS;
+ for (i = 0; i < CEE_DCBX_MAX_PGS; i++) {
+ ets.tc_tx_bw[i] = cee_cfg->pg_bw_pct[i];
+ ets.tc_rx_bw[i] = cee_cfg->pg_bw_pct[i];
+ ets.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS;
+ ets.prio_tc[i] = cee_cfg->prio_to_pg_map[i];
+ mlx5e_dbg(HW, priv,
+ "%s: Priority group %d: tx_bw %d, rx_bw %d, prio_tc %d\n",
+ __func__, i, ets.tc_tx_bw[i], ets.tc_rx_bw[i],
+ ets.prio_tc[i]);
+ }
+
+ err = mlx5e_dbcnl_validate_ets(netdev, &ets, true);
+ if (err)
+ goto out;
+
+ err = mlx5e_dcbnl_ieee_setets_core(priv, &ets);
+ if (err) {
+ netdev_err(netdev,
+ "%s, Failed to set ETS: %d\n", __func__, err);
+ goto out;
+ }
+
+ /* Set PFC */
+ pfc.pfc_cap = mlx5_max_tc(mdev) + 1;
+ if (!cee_cfg->pfc_enable)
+ pfc.pfc_en = 0;
+ else
+ for (i = 0; i < CEE_DCBX_MAX_PRIO; i++)
+ pfc.pfc_en |= cee_cfg->pfc_setting[i] << i;
+
+ err = mlx5e_dcbnl_ieee_setpfc(netdev, &pfc);
+ if (err) {
+ netdev_err(netdev,
+ "%s, Failed to set PFC: %d\n", __func__, err);
+ goto out;
+ }
+out:
+ return err ? MLX5_DCB_NO_CHG : MLX5_DCB_CHG_RESET;
+}
+
+static u8 mlx5e_dcbnl_getstate(struct net_device *netdev)
+{
+ return MLX5E_CEE_STATE_UP;
+}
+
+static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev,
+ u8 *perm_addr)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ if (!perm_addr)
+ return;
+
+ memset(perm_addr, 0xff, MAX_ADDR_LEN);
+
+ mlx5_query_nic_vport_mac_address(priv->mdev, 0, perm_addr);
+}
+
+static void mlx5e_dcbnl_setpgtccfgtx(struct net_device *netdev,
+ int priority, u8 prio_type,
+ u8 pgid, u8 bw_pct, u8 up_map)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+ if (priority >= CEE_DCBX_MAX_PRIO) {
+ netdev_err(netdev,
+ "%s, priority is out of range\n", __func__);
+ return;
+ }
+
+ if (pgid >= CEE_DCBX_MAX_PGS) {
+ netdev_err(netdev,
+ "%s, priority group is out of range\n", __func__);
+ return;
+ }
+
+ cee_cfg->prio_to_pg_map[priority] = pgid;
+}
+
+static void mlx5e_dcbnl_setpgbwgcfgtx(struct net_device *netdev,
+ int pgid, u8 bw_pct)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+ if (pgid >= CEE_DCBX_MAX_PGS) {
+ netdev_err(netdev,
+ "%s, priority group is out of range\n", __func__);
+ return;
+ }
+
+ cee_cfg->pg_bw_pct[pgid] = bw_pct;
+}
+
+static void mlx5e_dcbnl_getpgtccfgtx(struct net_device *netdev,
+ int priority, u8 *prio_type,
+ u8 *pgid, u8 *bw_pct, u8 *up_map)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!MLX5_CAP_GEN(priv->mdev, ets)) {
+ netdev_err(netdev, "%s, ets is not supported\n", __func__);
+ return;
+ }
+
+ if (priority >= CEE_DCBX_MAX_PRIO) {
+ netdev_err(netdev,
+ "%s, priority is out of range\n", __func__);
+ return;
+ }
+
+ *prio_type = 0;
+ *bw_pct = 0;
+ *up_map = 0;
+
+ if (mlx5_query_port_prio_tc(mdev, priority, pgid))
+ *pgid = 0;
+}
+
+static void mlx5e_dcbnl_getpgbwgcfgtx(struct net_device *netdev,
+ int pgid, u8 *bw_pct)
+{
+ struct ieee_ets ets;
+
+ if (pgid >= CEE_DCBX_MAX_PGS) {
+ netdev_err(netdev,
+ "%s, priority group is out of range\n", __func__);
+ return;
+ }
+
+ mlx5e_dcbnl_ieee_getets(netdev, &ets);
+ *bw_pct = ets.tc_tx_bw[pgid];
+}
+
+static void mlx5e_dcbnl_setpfccfg(struct net_device *netdev,
+ int priority, u8 setting)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+ if (priority >= CEE_DCBX_MAX_PRIO) {
+ netdev_err(netdev,
+ "%s, priority is out of range\n", __func__);
+ return;
+ }
+
+ if (setting > 1)
+ return;
+
+ cee_cfg->pfc_setting[priority] = setting;
+}
+
+static int
+mlx5e_dcbnl_get_priority_pfc(struct net_device *netdev,
+ int priority, u8 *setting)
+{
+ struct ieee_pfc pfc;
+ int err;
+
+ err = mlx5e_dcbnl_ieee_getpfc(netdev, &pfc);
+
+ if (err)
+ *setting = 0;
+ else
+ *setting = (pfc.pfc_en >> priority) & 0x01;
+
+ return err;
+}
+
+static void mlx5e_dcbnl_getpfccfg(struct net_device *netdev,
+ int priority, u8 *setting)
+{
+ if (priority >= CEE_DCBX_MAX_PRIO) {
+ netdev_err(netdev,
+ "%s, priority is out of range\n", __func__);
+ return;
+ }
+
+ if (!setting)
+ return;
+
+ mlx5e_dcbnl_get_priority_pfc(netdev, priority, setting);
+}
+
+static u8 mlx5e_dcbnl_getcap(struct net_device *netdev,
+ int capid, u8 *cap)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 rval = 0;
+
+ switch (capid) {
+ case DCB_CAP_ATTR_PG:
+ *cap = true;
+ break;
+ case DCB_CAP_ATTR_PFC:
+ *cap = true;
+ break;
+ case DCB_CAP_ATTR_UP2TC:
+ *cap = false;
+ break;
+ case DCB_CAP_ATTR_PG_TCS:
+ *cap = 1 << mlx5_max_tc(mdev);
+ break;
+ case DCB_CAP_ATTR_PFC_TCS:
+ *cap = 1 << mlx5_max_tc(mdev);
+ break;
+ case DCB_CAP_ATTR_GSP:
+ *cap = false;
+ break;
+ case DCB_CAP_ATTR_BCN:
+ *cap = false;
+ break;
+ case DCB_CAP_ATTR_DCBX:
+ *cap = priv->dcbx.cap |
+ DCB_CAP_DCBX_VER_CEE |
+ DCB_CAP_DCBX_VER_IEEE;
+ break;
+ default:
+ *cap = 0;
+ rval = 1;
+ break;
+ }
+
+ return rval;
+}
+
+static int mlx5e_dcbnl_getnumtcs(struct net_device *netdev,
+ int tcs_id, u8 *num)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ switch (tcs_id) {
+ case DCB_NUMTCS_ATTR_PG:
+ case DCB_NUMTCS_ATTR_PFC:
+ *num = mlx5_max_tc(mdev) + 1;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static u8 mlx5e_dcbnl_getpfcstate(struct net_device *netdev)
+{
+ struct ieee_pfc pfc;
+
+ if (mlx5e_dcbnl_ieee_getpfc(netdev, &pfc))
+ return MLX5E_CEE_STATE_DOWN;
+
+ return pfc.pfc_en ? MLX5E_CEE_STATE_UP : MLX5E_CEE_STATE_DOWN;
+}
+
+static void mlx5e_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+ if ((state != MLX5E_CEE_STATE_UP) && (state != MLX5E_CEE_STATE_DOWN))
+ return;
+
+ cee_cfg->pfc_enable = state;
+}
+
+static int mlx5e_dcbnl_getbuffer(struct net_device *dev,
+ struct dcbnl_buffer *dcb_buffer)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_port_buffer port_buffer;
+ u8 buffer[MLX5E_MAX_PRIORITY];
+ int i, err;
+
+ if (!MLX5_BUFFER_SUPPORTED(mdev))
+ return -EOPNOTSUPP;
+
+ err = mlx5e_port_query_priority2buffer(mdev, buffer);
+ if (err)
+ return err;
+
+ for (i = 0; i < MLX5E_MAX_PRIORITY; i++)
+ dcb_buffer->prio2buffer[i] = buffer[i];
+
+ err = mlx5e_port_query_buffer(priv, &port_buffer);
+ if (err)
+ return err;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++)
+ dcb_buffer->buffer_size[i] = port_buffer.buffer[i].size;
+ dcb_buffer->total_size = port_buffer.port_buffer_size;
+
+ return 0;
+}
+
+static int mlx5e_dcbnl_setbuffer(struct net_device *dev,
+ struct dcbnl_buffer *dcb_buffer)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_port_buffer port_buffer;
+ u8 old_prio2buffer[MLX5E_MAX_PRIORITY];
+ u32 *buffer_size = NULL;
+ u8 *prio2buffer = NULL;
+ u32 changed = 0;
+ int i, err;
+
+ if (!MLX5_BUFFER_SUPPORTED(mdev))
+ return -EOPNOTSUPP;
+
+ for (i = 0; i < DCBX_MAX_BUFFERS; i++)
+ mlx5_core_dbg(mdev, "buffer[%d]=%d\n", i, dcb_buffer->buffer_size[i]);
+
+ for (i = 0; i < MLX5E_MAX_PRIORITY; i++)
+ mlx5_core_dbg(mdev, "priority %d buffer%d\n", i, dcb_buffer->prio2buffer[i]);
+
+ err = mlx5e_port_query_priority2buffer(mdev, old_prio2buffer);
+ if (err)
+ return err;
+
+ for (i = 0; i < MLX5E_MAX_PRIORITY; i++) {
+ if (dcb_buffer->prio2buffer[i] != old_prio2buffer[i]) {
+ changed |= MLX5E_PORT_BUFFER_PRIO2BUFFER;
+ prio2buffer = dcb_buffer->prio2buffer;
+ break;
+ }
+ }
+
+ err = mlx5e_port_query_buffer(priv, &port_buffer);
+ if (err)
+ return err;
+
+ for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ if (port_buffer.buffer[i].size != dcb_buffer->buffer_size[i]) {
+ changed |= MLX5E_PORT_BUFFER_SIZE;
+ buffer_size = dcb_buffer->buffer_size;
+ break;
+ }
+ }
+
+ if (!changed)
+ return 0;
+
+ priv->dcbx.manual_buffer = true;
+ err = mlx5e_port_manual_buffer_config(priv, changed, dev->mtu, NULL,
+ buffer_size, prio2buffer);
+ return err;
+}
+
+const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
+ .ieee_getets = mlx5e_dcbnl_ieee_getets,
+ .ieee_setets = mlx5e_dcbnl_ieee_setets,
+ .ieee_getmaxrate = mlx5e_dcbnl_ieee_getmaxrate,
+ .ieee_setmaxrate = mlx5e_dcbnl_ieee_setmaxrate,
+ .ieee_getpfc = mlx5e_dcbnl_ieee_getpfc,
+ .ieee_setpfc = mlx5e_dcbnl_ieee_setpfc,
+ .ieee_setapp = mlx5e_dcbnl_ieee_setapp,
+ .ieee_delapp = mlx5e_dcbnl_ieee_delapp,
+ .getdcbx = mlx5e_dcbnl_getdcbx,
+ .setdcbx = mlx5e_dcbnl_setdcbx,
+ .dcbnl_getbuffer = mlx5e_dcbnl_getbuffer,
+ .dcbnl_setbuffer = mlx5e_dcbnl_setbuffer,
+
+/* CEE interfaces */
+ .setall = mlx5e_dcbnl_setall,
+ .getstate = mlx5e_dcbnl_getstate,
+ .getpermhwaddr = mlx5e_dcbnl_getpermhwaddr,
+
+ .setpgtccfgtx = mlx5e_dcbnl_setpgtccfgtx,
+ .setpgbwgcfgtx = mlx5e_dcbnl_setpgbwgcfgtx,
+ .getpgtccfgtx = mlx5e_dcbnl_getpgtccfgtx,
+ .getpgbwgcfgtx = mlx5e_dcbnl_getpgbwgcfgtx,
+
+ .setpfccfg = mlx5e_dcbnl_setpfccfg,
+ .getpfccfg = mlx5e_dcbnl_getpfccfg,
+ .getcap = mlx5e_dcbnl_getcap,
+ .getnumtcs = mlx5e_dcbnl_getnumtcs,
+ .getpfcstate = mlx5e_dcbnl_getpfcstate,
+ .setpfcstate = mlx5e_dcbnl_setpfcstate,
+};
+
+static void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv,
+ enum mlx5_dcbx_oper_mode *mode)
+{
+ u32 out[MLX5_ST_SZ_DW(dcbx_param)];
+
+ *mode = MLX5E_DCBX_PARAM_VER_OPER_HOST;
+
+ if (!mlx5_query_port_dcbx_param(priv->mdev, out))
+ *mode = MLX5_GET(dcbx_param, out, version_oper);
+
+ /* From driver's point of view, we only care if the mode
+ * is host (HOST) or non-host (AUTO)
+ */
+ if (*mode != MLX5E_DCBX_PARAM_VER_OPER_HOST)
+ *mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO;
+}
+
+static void mlx5e_ets_init(struct mlx5e_priv *priv)
+{
+ struct ieee_ets ets;
+ int err;
+ int i;
+
+ if (!MLX5_CAP_GEN(priv->mdev, ets))
+ return;
+
+ memset(&ets, 0, sizeof(ets));
+ ets.ets_cap = mlx5_max_tc(priv->mdev) + 1;
+ for (i = 0; i < ets.ets_cap; i++) {
+ ets.tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
+ ets.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR;
+ ets.prio_tc[i] = i;
+ }
+
+ if (ets.ets_cap > 1) {
+ /* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */
+ ets.prio_tc[0] = 1;
+ ets.prio_tc[1] = 0;
+ }
+
+ err = mlx5e_dcbnl_ieee_setets_core(priv, &ets);
+ if (err)
+ netdev_err(priv->netdev,
+ "%s, Failed to init ETS: %d\n", __func__, err);
+}
+
+enum {
+ INIT,
+ DELETE,
+};
+
+static void mlx5e_dcbnl_dscp_app(struct mlx5e_priv *priv, int action)
+{
+ struct dcb_app temp;
+ int i;
+
+ if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager))
+ return;
+
+ if (!MLX5_DSCP_SUPPORTED(priv->mdev))
+ return;
+
+ /* No SEL_DSCP entry in non DSCP state */
+ if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_DSCP)
+ return;
+
+ temp.selector = IEEE_8021QAZ_APP_SEL_DSCP;
+ for (i = 0; i < MLX5E_MAX_DSCP; i++) {
+ temp.protocol = i;
+ temp.priority = priv->dcbx_dp.dscp2prio[i];
+ if (action == INIT)
+ dcb_ieee_setapp(priv->netdev, &temp);
+ else
+ dcb_ieee_delapp(priv->netdev, &temp);
+ }
+
+ priv->dcbx.dscp_app_cnt = (action == INIT) ? MLX5E_MAX_DSCP : 0;
+}
+
+void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv)
+{
+ mlx5e_dcbnl_dscp_app(priv, INIT);
+}
+
+void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv)
+{
+ mlx5e_dcbnl_dscp_app(priv, DELETE);
+}
+
+static void mlx5e_trust_update_tx_min_inline_mode(struct mlx5e_priv *priv,
+ struct mlx5e_params *params)
+{
+ params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(priv->mdev);
+ if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP &&
+ params->tx_min_inline_mode == MLX5_INLINE_MODE_L2)
+ params->tx_min_inline_mode = MLX5_INLINE_MODE_IP;
+}
+
+static void mlx5e_trust_update_sq_inline_mode(struct mlx5e_priv *priv)
+{
+ struct mlx5e_channels new_channels = {};
+
+ mutex_lock(&priv->state_lock);
+
+ new_channels.params = priv->channels.params;
+ mlx5e_trust_update_tx_min_inline_mode(priv, &new_channels.params);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ goto out;
+ }
+
+ /* Skip if tx_min_inline is the same */
+ if (new_channels.params.tx_min_inline_mode ==
+ priv->channels.params.tx_min_inline_mode)
+ goto out;
+
+ if (mlx5e_open_channels(priv, &new_channels))
+ goto out;
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+
+out:
+ mutex_unlock(&priv->state_lock);
+}
+
+static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state)
+{
+ int err;
+
+ err = mlx5_set_trust_state(priv->mdev, trust_state);
+ if (err)
+ return err;
+ priv->dcbx_dp.trust_state = trust_state;
+ mlx5e_trust_update_sq_inline_mode(priv);
+
+ return err;
+}
+
+static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio)
+{
+ int err;
+
+ err = mlx5_set_dscp2prio(priv->mdev, dscp, prio);
+ if (err)
+ return err;
+
+ priv->dcbx_dp.dscp2prio[dscp] = prio;
+ return err;
+}
+
+static int mlx5e_trust_initialize(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP;
+
+ if (!MLX5_DSCP_SUPPORTED(mdev))
+ return 0;
+
+ err = mlx5_query_trust_state(priv->mdev, &priv->dcbx_dp.trust_state);
+ if (err)
+ return err;
+
+ mlx5e_trust_update_tx_min_inline_mode(priv, &priv->channels.params);
+
+ err = mlx5_query_dscp2prio(priv->mdev, priv->dcbx_dp.dscp2prio);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv)
+{
+ struct mlx5e_dcbx *dcbx = &priv->dcbx;
+
+ mlx5e_trust_initialize(priv);
+
+ if (!MLX5_CAP_GEN(priv->mdev, qos))
+ return;
+
+ if (MLX5_CAP_GEN(priv->mdev, dcbx))
+ mlx5e_dcbnl_query_dcbx_mode(priv, &dcbx->mode);
+
+ priv->dcbx.cap = DCB_CAP_DCBX_VER_CEE |
+ DCB_CAP_DCBX_VER_IEEE;
+ if (priv->dcbx.mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
+ priv->dcbx.cap |= DCB_CAP_DCBX_HOST;
+
+ priv->dcbx.manual_buffer = false;
+ priv->dcbx.cable_len = MLX5E_DEFAULT_CABLE_LEN;
+
+ mlx5e_ets_init(priv);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
new file mode 100644
index 000000000..d67adf70a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/net_dim.h>
+#include "en.h"
+
+static void
+mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder,
+ struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq)
+{
+ mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts);
+ dim->state = NET_DIM_START_MEASURE;
+}
+
+void mlx5e_rx_dim_work(struct work_struct *work)
+{
+ struct net_dim *dim = container_of(work, struct net_dim, work);
+ struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim);
+ struct net_dim_cq_moder cur_moder =
+ net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
+
+ mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq);
+}
+
+void mlx5e_tx_dim_work(struct work_struct *work)
+{
+ struct net_dim *dim = container_of(work, struct net_dim, work);
+ struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim);
+ struct net_dim_cq_moder cur_moder =
+ net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
+
+ mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
new file mode 100644
index 000000000..a5c4e4f0f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -0,0 +1,1701 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "en.h"
+#include "en/port.h"
+#include "lib/clock.h"
+
+void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ strlcpy(drvinfo->driver, DRIVER_NAME, sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, DRIVER_VERSION,
+ sizeof(drvinfo->version));
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%04d (%.16s)",
+ fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev),
+ mdev->board_id);
+ strlcpy(drvinfo->bus_info, pci_name(mdev->pdev),
+ sizeof(drvinfo->bus_info));
+}
+
+static void mlx5e_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_drvinfo(priv, drvinfo);
+}
+
+struct ptys2ethtool_config {
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised);
+};
+
+static struct ptys2ethtool_config ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER];
+
+#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, ...) \
+ ({ \
+ struct ptys2ethtool_config *cfg; \
+ const unsigned int modes[] = { __VA_ARGS__ }; \
+ unsigned int i; \
+ cfg = &ptys2ethtool_table[reg_]; \
+ bitmap_zero(cfg->supported, \
+ __ETHTOOL_LINK_MODE_MASK_NBITS); \
+ bitmap_zero(cfg->advertised, \
+ __ETHTOOL_LINK_MODE_MASK_NBITS); \
+ for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) { \
+ __set_bit(modes[i], cfg->supported); \
+ __set_bit(modes[i], cfg->advertised); \
+ } \
+ })
+
+void mlx5e_build_ptys2ethtool_map(void)
+{
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII,
+ ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX,
+ ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4,
+ ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4,
+ ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2,
+ ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4,
+ ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4,
+ ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4,
+ ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER,
+ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4,
+ ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4,
+ ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2,
+ ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4,
+ ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4,
+ ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4,
+ ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4,
+ ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T,
+ ETHTOOL_LINK_MODE_10000baseT_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR,
+ ETHTOOL_LINK_MODE_25000baseCR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR,
+ ETHTOOL_LINK_MODE_25000baseKR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR,
+ ETHTOOL_LINK_MODE_25000baseSR_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2,
+ ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2,
+ ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT);
+}
+
+int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
+{
+ int i, num_stats = 0;
+
+ switch (sset) {
+ case ETH_SS_STATS:
+ for (i = 0; i < mlx5e_num_stats_grps; i++)
+ num_stats += mlx5e_stats_grps[i].get_num_stats(priv);
+ return num_stats;
+ case ETH_SS_PRIV_FLAGS:
+ return ARRAY_SIZE(mlx5e_priv_flags);
+ case ETH_SS_TEST:
+ return mlx5e_self_test_num(priv);
+ /* fallthrough */
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_get_sset_count(struct net_device *dev, int sset)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_get_sset_count(priv, sset);
+}
+
+static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data)
+{
+ int i, idx = 0;
+
+ for (i = 0; i < mlx5e_num_stats_grps; i++)
+ idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx);
+}
+
+void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data)
+{
+ int i;
+
+ switch (stringset) {
+ case ETH_SS_PRIV_FLAGS:
+ for (i = 0; i < ARRAY_SIZE(mlx5e_priv_flags); i++)
+ strcpy(data + i * ETH_GSTRING_LEN, mlx5e_priv_flags[i]);
+ break;
+
+ case ETH_SS_TEST:
+ for (i = 0; i < mlx5e_self_test_num(priv); i++)
+ strcpy(data + i * ETH_GSTRING_LEN,
+ mlx5e_self_tests[i]);
+ break;
+
+ case ETH_SS_STATS:
+ mlx5e_fill_stats_strings(priv, data);
+ break;
+ }
+}
+
+static void mlx5e_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_strings(priv, stringset, data);
+}
+
+void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
+ struct ethtool_stats *stats, u64 *data)
+{
+ int i, idx = 0;
+
+ mutex_lock(&priv->state_lock);
+ mlx5e_update_stats(priv);
+ mutex_unlock(&priv->state_lock);
+
+ for (i = 0; i < mlx5e_num_stats_grps; i++)
+ idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx);
+}
+
+static void mlx5e_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats,
+ u64 *data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_ethtool_stats(priv, stats, data);
+}
+
+void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv,
+ struct ethtool_ringparam *param)
+{
+ param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE;
+ param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE;
+ param->rx_pending = 1 << priv->channels.params.log_rq_mtu_frames;
+ param->tx_pending = 1 << priv->channels.params.log_sq_size;
+}
+
+static void mlx5e_get_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_ringparam(priv, param);
+}
+
+int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
+ struct ethtool_ringparam *param)
+{
+ struct mlx5e_channels new_channels = {};
+ u8 log_rq_size;
+ u8 log_sq_size;
+ int err = 0;
+
+ if (param->rx_jumbo_pending) {
+ netdev_info(priv->netdev, "%s: rx_jumbo_pending not supported\n",
+ __func__);
+ return -EINVAL;
+ }
+ if (param->rx_mini_pending) {
+ netdev_info(priv->netdev, "%s: rx_mini_pending not supported\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) {
+ netdev_info(priv->netdev, "%s: rx_pending (%d) < min (%d)\n",
+ __func__, param->rx_pending,
+ 1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE);
+ return -EINVAL;
+ }
+
+ if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) {
+ netdev_info(priv->netdev, "%s: tx_pending (%d) < min (%d)\n",
+ __func__, param->tx_pending,
+ 1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE);
+ return -EINVAL;
+ }
+
+ log_rq_size = order_base_2(param->rx_pending);
+ log_sq_size = order_base_2(param->tx_pending);
+
+ if (log_rq_size == priv->channels.params.log_rq_mtu_frames &&
+ log_sq_size == priv->channels.params.log_sq_size)
+ return 0;
+
+ mutex_lock(&priv->state_lock);
+
+ new_channels.params = priv->channels.params;
+ new_channels.params.log_rq_mtu_frames = log_rq_size;
+ new_channels.params.log_sq_size = log_sq_size;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ goto unlock;
+ }
+
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ goto unlock;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_set_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_set_ringparam(priv, param);
+}
+
+void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
+ struct ethtool_channels *ch)
+{
+ ch->max_combined = priv->profile->max_nch(priv->mdev);
+ ch->combined_count = priv->channels.params.num_channels;
+}
+
+static void mlx5e_get_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_channels(priv, ch);
+}
+
+int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
+ struct ethtool_channels *ch)
+{
+ unsigned int count = ch->combined_count;
+ struct mlx5e_channels new_channels = {};
+ bool arfs_enabled;
+ int err = 0;
+
+ if (!count) {
+ netdev_info(priv->netdev, "%s: combined_count=0 not supported\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ if (priv->channels.params.num_channels == count)
+ return 0;
+
+ mutex_lock(&priv->state_lock);
+
+ new_channels.params = priv->channels.params;
+ new_channels.params.num_channels = count;
+ if (!netif_is_rxfh_configured(priv->netdev))
+ mlx5e_build_default_indir_rqt(new_channels.params.indirection_rqt,
+ MLX5E_INDIR_RQT_SIZE, count);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ goto out;
+ }
+
+ /* Create fresh channels with new parameters */
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ goto out;
+
+ arfs_enabled = priv->netdev->features & NETIF_F_NTUPLE;
+ if (arfs_enabled)
+ mlx5e_arfs_disable(priv);
+
+ /* Switch to new channels, set new parameters and close old ones */
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+
+ if (arfs_enabled) {
+ err = mlx5e_arfs_enable(priv);
+ if (err)
+ netdev_err(priv->netdev, "%s: mlx5e_arfs_enable failed: %d\n",
+ __func__, err);
+ }
+
+out:
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_set_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_set_channels(priv, ch);
+}
+
+int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
+ struct ethtool_coalesce *coal)
+{
+ struct net_dim_cq_moder *rx_moder, *tx_moder;
+
+ if (!MLX5_CAP_GEN(priv->mdev, cq_moderation))
+ return -EOPNOTSUPP;
+
+ rx_moder = &priv->channels.params.rx_cq_moderation;
+ coal->rx_coalesce_usecs = rx_moder->usec;
+ coal->rx_max_coalesced_frames = rx_moder->pkts;
+ coal->use_adaptive_rx_coalesce = priv->channels.params.rx_dim_enabled;
+
+ tx_moder = &priv->channels.params.tx_cq_moderation;
+ coal->tx_coalesce_usecs = tx_moder->usec;
+ coal->tx_max_coalesced_frames = tx_moder->pkts;
+ coal->use_adaptive_tx_coalesce = priv->channels.params.tx_dim_enabled;
+
+ return 0;
+}
+
+static int mlx5e_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_coalesce(priv, coal);
+}
+
+#define MLX5E_MAX_COAL_TIME MLX5_MAX_CQ_PERIOD
+#define MLX5E_MAX_COAL_FRAMES MLX5_MAX_CQ_COUNT
+
+static void
+mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int tc;
+ int i;
+
+ for (i = 0; i < priv->channels.num; ++i) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+
+ for (tc = 0; tc < c->num_tc; tc++) {
+ mlx5_core_modify_cq_moderation(mdev,
+ &c->sq[tc].cq.mcq,
+ coal->tx_coalesce_usecs,
+ coal->tx_max_coalesced_frames);
+ }
+
+ mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq,
+ coal->rx_coalesce_usecs,
+ coal->rx_max_coalesced_frames);
+ }
+}
+
+int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
+ struct ethtool_coalesce *coal)
+{
+ struct net_dim_cq_moder *rx_moder, *tx_moder;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_channels new_channels = {};
+ int err = 0;
+ bool reset;
+
+ if (!MLX5_CAP_GEN(mdev, cq_moderation))
+ return -EOPNOTSUPP;
+
+ if (coal->tx_coalesce_usecs > MLX5E_MAX_COAL_TIME ||
+ coal->rx_coalesce_usecs > MLX5E_MAX_COAL_TIME) {
+ netdev_info(priv->netdev, "%s: maximum coalesce time supported is %lu usecs\n",
+ __func__, MLX5E_MAX_COAL_TIME);
+ return -ERANGE;
+ }
+
+ if (coal->tx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES ||
+ coal->rx_max_coalesced_frames > MLX5E_MAX_COAL_FRAMES) {
+ netdev_info(priv->netdev, "%s: maximum coalesced frames supported is %lu\n",
+ __func__, MLX5E_MAX_COAL_FRAMES);
+ return -ERANGE;
+ }
+
+ mutex_lock(&priv->state_lock);
+ new_channels.params = priv->channels.params;
+
+ rx_moder = &new_channels.params.rx_cq_moderation;
+ rx_moder->usec = coal->rx_coalesce_usecs;
+ rx_moder->pkts = coal->rx_max_coalesced_frames;
+ new_channels.params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce;
+
+ tx_moder = &new_channels.params.tx_cq_moderation;
+ tx_moder->usec = coal->tx_coalesce_usecs;
+ tx_moder->pkts = coal->tx_max_coalesced_frames;
+ new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ goto out;
+ }
+ /* we are opened */
+
+ reset = (!!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled) ||
+ (!!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled);
+
+ if (!reset) {
+ mlx5e_set_priv_channels_coalesce(priv, coal);
+ priv->channels.params = new_channels.params;
+ goto out;
+ }
+
+ /* open fresh channels with new coal parameters */
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ goto out;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+
+out:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int mlx5e_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_set_coalesce(priv, coal);
+}
+
+static void ptys2ethtool_supported_link(unsigned long *supported_modes,
+ u32 eth_proto_cap)
+{
+ unsigned long proto_cap = eth_proto_cap;
+ int proto;
+
+ for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER)
+ bitmap_or(supported_modes, supported_modes,
+ ptys2ethtool_table[proto].supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static void ptys2ethtool_adver_link(unsigned long *advertising_modes,
+ u32 eth_proto_cap)
+{
+ unsigned long proto_cap = eth_proto_cap;
+ int proto;
+
+ for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER)
+ bitmap_or(advertising_modes, advertising_modes,
+ ptys2ethtool_table[proto].advertised,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings,
+ u32 eth_proto_cap,
+ u8 connector_type)
+{
+ if (!connector_type || connector_type >= MLX5E_CONNECTOR_TYPE_NUMBER) {
+ if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR)
+ | MLX5E_PROT_MASK(MLX5E_10GBASE_SR)
+ | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4)
+ | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4)
+ | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4)
+ | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) {
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported,
+ FIBRE);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising,
+ FIBRE);
+ }
+
+ if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_100GBASE_KR4)
+ | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4)
+ | MLX5E_PROT_MASK(MLX5E_10GBASE_KR)
+ | MLX5E_PROT_MASK(MLX5E_10GBASE_KX4)
+ | MLX5E_PROT_MASK(MLX5E_1000BASE_KX))) {
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported,
+ Backplane);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising,
+ Backplane);
+ }
+ return;
+ }
+
+ switch (connector_type) {
+ case MLX5E_PORT_TP:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, TP);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, TP);
+ break;
+ case MLX5E_PORT_AUI:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, AUI);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, AUI);
+ break;
+ case MLX5E_PORT_BNC:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, BNC);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, BNC);
+ break;
+ case MLX5E_PORT_MII:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, MII);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, MII);
+ break;
+ case MLX5E_PORT_FIBRE:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, FIBRE);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, FIBRE);
+ break;
+ case MLX5E_PORT_DA:
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ supported, Backplane);
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, Backplane);
+ break;
+ case MLX5E_PORT_NONE:
+ case MLX5E_PORT_OTHER:
+ default:
+ break;
+ }
+}
+
+static void get_speed_duplex(struct net_device *netdev,
+ u32 eth_proto_oper,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ u32 speed = SPEED_UNKNOWN;
+ u8 duplex = DUPLEX_UNKNOWN;
+
+ if (!netif_carrier_ok(netdev))
+ goto out;
+
+ speed = mlx5e_port_ptys2speed(eth_proto_oper);
+ if (!speed) {
+ speed = SPEED_UNKNOWN;
+ goto out;
+ }
+
+ duplex = DUPLEX_FULL;
+
+out:
+ link_ksettings->base.speed = speed;
+ link_ksettings->base.duplex = duplex;
+}
+
+static void get_supported(u32 eth_proto_cap,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ unsigned long *supported = link_ksettings->link_modes.supported;
+
+ ptys2ethtool_supported_link(supported, eth_proto_cap);
+ ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause);
+}
+
+static void get_advertising(u32 eth_proto_cap, u8 tx_pause,
+ u8 rx_pause,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ unsigned long *advertising = link_ksettings->link_modes.advertising;
+
+ ptys2ethtool_adver_link(advertising, eth_proto_cap);
+ if (rx_pause)
+ ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause);
+ if (tx_pause ^ rx_pause)
+ ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause);
+}
+
+static int ptys2connector_type[MLX5E_CONNECTOR_TYPE_NUMBER] = {
+ [MLX5E_PORT_UNKNOWN] = PORT_OTHER,
+ [MLX5E_PORT_NONE] = PORT_NONE,
+ [MLX5E_PORT_TP] = PORT_TP,
+ [MLX5E_PORT_AUI] = PORT_AUI,
+ [MLX5E_PORT_BNC] = PORT_BNC,
+ [MLX5E_PORT_MII] = PORT_MII,
+ [MLX5E_PORT_FIBRE] = PORT_FIBRE,
+ [MLX5E_PORT_DA] = PORT_DA,
+ [MLX5E_PORT_OTHER] = PORT_OTHER,
+ };
+
+static u8 get_connector_port(u32 eth_proto, u8 connector_type)
+{
+ if (connector_type && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER)
+ return ptys2connector_type[connector_type];
+
+ if (eth_proto &
+ (MLX5E_PROT_MASK(MLX5E_10GBASE_SR) |
+ MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) |
+ MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) |
+ MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) {
+ return PORT_FIBRE;
+ }
+
+ if (eth_proto &
+ (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) |
+ MLX5E_PROT_MASK(MLX5E_10GBASE_CR) |
+ MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) {
+ return PORT_DA;
+ }
+
+ if (eth_proto &
+ (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) |
+ MLX5E_PROT_MASK(MLX5E_10GBASE_KR) |
+ MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) |
+ MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) {
+ return PORT_NONE;
+ }
+
+ return PORT_OTHER;
+}
+
+static void get_lp_advertising(u32 eth_proto_lp,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising;
+
+ ptys2ethtool_adver_link(lp_advertising, eth_proto_lp);
+}
+
+static int mlx5e_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
+ u32 rx_pause = 0;
+ u32 tx_pause = 0;
+ u32 eth_proto_cap;
+ u32 eth_proto_admin;
+ u32 eth_proto_lp;
+ u32 eth_proto_oper;
+ u8 an_disable_admin;
+ u8 an_status;
+ u8 connector_type;
+ int err;
+
+ err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
+ if (err) {
+ netdev_err(netdev, "%s: query port ptys failed: %d\n",
+ __func__, err);
+ goto err_query_ptys;
+ }
+
+ eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability);
+ eth_proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin);
+ eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
+ eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise);
+ an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
+ an_status = MLX5_GET(ptys_reg, out, an_status);
+ connector_type = MLX5_GET(ptys_reg, out, connector_type);
+
+ mlx5_query_port_pause(mdev, &rx_pause, &tx_pause);
+
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+
+ get_supported(eth_proto_cap, link_ksettings);
+ get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings);
+ get_speed_duplex(netdev, eth_proto_oper, link_ksettings);
+
+ eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
+
+ link_ksettings->base.port = get_connector_port(eth_proto_oper,
+ connector_type);
+ ptys2ethtool_supported_advertised_port(link_ksettings, eth_proto_admin,
+ connector_type);
+ get_lp_advertising(eth_proto_lp, link_ksettings);
+
+ if (an_status == MLX5_AN_COMPLETE)
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ lp_advertising, Autoneg);
+
+ link_ksettings->base.autoneg = an_disable_admin ? AUTONEG_DISABLE :
+ AUTONEG_ENABLE;
+ ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
+ Autoneg);
+ if (!an_disable_admin)
+ ethtool_link_ksettings_add_link_mode(link_ksettings,
+ advertising, Autoneg);
+
+err_query_ptys:
+ return err;
+}
+
+static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes)
+{
+ u32 i, ptys_modes = 0;
+
+ for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
+ if (bitmap_intersects(ptys2ethtool_table[i].advertised,
+ link_modes,
+ __ETHTOOL_LINK_MODE_MASK_NBITS))
+ ptys_modes |= MLX5E_PROT_MASK(i);
+ }
+
+ return ptys_modes;
+}
+
+static int mlx5e_set_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *link_ksettings)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 eth_proto_cap, eth_proto_admin;
+ bool an_changes = false;
+ u8 an_disable_admin;
+ u8 an_disable_cap;
+ bool an_disable;
+ u32 link_modes;
+ u8 an_status;
+ u32 speed;
+ int err;
+
+ speed = link_ksettings->base.speed;
+
+ link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ?
+ mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) :
+ mlx5e_port_speed2linkmodes(speed);
+
+ err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
+ if (err) {
+ netdev_err(netdev, "%s: query port eth proto cap failed: %d\n",
+ __func__, err);
+ goto out;
+ }
+
+ link_modes = link_modes & eth_proto_cap;
+ if (!link_modes) {
+ netdev_err(netdev, "%s: Not supported link mode(s) requested",
+ __func__);
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = mlx5_query_port_proto_admin(mdev, &eth_proto_admin, MLX5_PTYS_EN);
+ if (err) {
+ netdev_err(netdev, "%s: query port eth proto admin failed: %d\n",
+ __func__, err);
+ goto out;
+ }
+
+ mlx5_query_port_autoneg(mdev, MLX5_PTYS_EN, &an_status,
+ &an_disable_cap, &an_disable_admin);
+
+ an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE;
+ an_changes = ((!an_disable && an_disable_admin) ||
+ (an_disable && !an_disable_admin));
+
+ if (!an_changes && link_modes == eth_proto_admin)
+ goto out;
+
+ mlx5_set_port_ptys(mdev, an_disable, link_modes, MLX5_PTYS_EN);
+ mlx5_toggle_port_link(mdev);
+
+out:
+ return err;
+}
+
+static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return sizeof(priv->channels.params.toeplitz_hash_key);
+}
+
+static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev)
+{
+ return MLX5E_INDIR_RQT_SIZE;
+}
+
+static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+ u8 *hfunc)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ if (indir)
+ memcpy(indir, priv->channels.params.indirection_rqt,
+ sizeof(priv->channels.params.indirection_rqt));
+
+ if (key)
+ memcpy(key, priv->channels.params.toeplitz_hash_key,
+ sizeof(priv->channels.params.toeplitz_hash_key));
+
+ if (hfunc)
+ *hfunc = priv->channels.params.rss_hfunc;
+
+ return 0;
+}
+
+static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
+{
+ void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int ctxlen = MLX5_ST_SZ_BYTES(tirc);
+ int tt;
+
+ MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ memset(tirc, 0, ctxlen);
+ mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
+ mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen);
+ }
+
+ if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ return;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ memset(tirc, 0, ctxlen);
+ mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true);
+ mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, inlen);
+ }
+}
+
+static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
+ const u8 *key, const u8 hfunc)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+ bool hash_changed = false;
+ void *in;
+
+ if ((hfunc != ETH_RSS_HASH_NO_CHANGE) &&
+ (hfunc != ETH_RSS_HASH_XOR) &&
+ (hfunc != ETH_RSS_HASH_TOP))
+ return -EINVAL;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mutex_lock(&priv->state_lock);
+
+ if (hfunc != ETH_RSS_HASH_NO_CHANGE &&
+ hfunc != priv->channels.params.rss_hfunc) {
+ priv->channels.params.rss_hfunc = hfunc;
+ hash_changed = true;
+ }
+
+ if (indir) {
+ memcpy(priv->channels.params.indirection_rqt, indir,
+ sizeof(priv->channels.params.indirection_rqt));
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ u32 rqtn = priv->indir_rqt.rqtn;
+ struct mlx5e_redirect_rqt_param rrp = {
+ .is_rss = true,
+ {
+ .rss = {
+ .hfunc = priv->channels.params.rss_hfunc,
+ .channels = &priv->channels,
+ },
+ },
+ };
+
+ mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp);
+ }
+ }
+
+ if (key) {
+ memcpy(priv->channels.params.toeplitz_hash_key, key,
+ sizeof(priv->channels.params.toeplitz_hash_key));
+ hash_changed = hash_changed ||
+ priv->channels.params.rss_hfunc == ETH_RSS_HASH_TOP;
+ }
+
+ if (hash_changed)
+ mlx5e_modify_tirs_hash(priv, in, inlen);
+
+ mutex_unlock(&priv->state_lock);
+
+ kvfree(in);
+
+ return 0;
+}
+
+#define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC 100
+#define MLX5E_PFC_PREVEN_TOUT_MAX_MSEC 8000
+#define MLX5E_PFC_PREVEN_MINOR_PRECENT 85
+#define MLX5E_PFC_PREVEN_TOUT_MIN_MSEC 80
+#define MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout) \
+ max_t(u16, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC, \
+ (critical_tout * MLX5E_PFC_PREVEN_MINOR_PRECENT) / 100)
+
+static int mlx5e_get_pfc_prevention_tout(struct net_device *netdev,
+ u16 *pfc_prevention_tout)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) ||
+ !MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
+ return -EOPNOTSUPP;
+
+ return mlx5_query_port_stall_watermark(mdev, pfc_prevention_tout, NULL);
+}
+
+static int mlx5e_set_pfc_prevention_tout(struct net_device *netdev,
+ u16 pfc_preven)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 critical_tout;
+ u16 minor;
+
+ if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) ||
+ !MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
+ return -EOPNOTSUPP;
+
+ critical_tout = (pfc_preven == PFC_STORM_PREVENTION_AUTO) ?
+ MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC :
+ pfc_preven;
+
+ if (critical_tout != PFC_STORM_PREVENTION_DISABLE &&
+ (critical_tout > MLX5E_PFC_PREVEN_TOUT_MAX_MSEC ||
+ critical_tout < MLX5E_PFC_PREVEN_TOUT_MIN_MSEC)) {
+ netdev_info(netdev, "%s: pfc prevention tout not in range (%d-%d)\n",
+ __func__, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC,
+ MLX5E_PFC_PREVEN_TOUT_MAX_MSEC);
+ return -EINVAL;
+ }
+
+ minor = MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout);
+ return mlx5_set_port_stall_watermark(mdev, critical_tout,
+ minor);
+}
+
+static int mlx5e_get_tunable(struct net_device *dev,
+ const struct ethtool_tunable *tuna,
+ void *data)
+{
+ int err;
+
+ switch (tuna->id) {
+ case ETHTOOL_PFC_PREVENTION_TOUT:
+ err = mlx5e_get_pfc_prevention_tout(dev, data);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static int mlx5e_set_tunable(struct net_device *dev,
+ const struct ethtool_tunable *tuna,
+ const void *data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int err;
+
+ mutex_lock(&priv->state_lock);
+
+ switch (tuna->id) {
+ case ETHTOOL_PFC_PREVENTION_TOUT:
+ err = mlx5e_set_pfc_prevention_tout(dev, *(u16 *)data);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static void mlx5e_get_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ err = mlx5_query_port_pause(mdev, &pauseparam->rx_pause,
+ &pauseparam->tx_pause);
+ if (err) {
+ netdev_err(netdev, "%s: mlx5_query_port_pause failed:0x%x\n",
+ __func__, err);
+ }
+}
+
+static int mlx5e_set_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pauseparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ if (!MLX5_CAP_GEN(mdev, vport_group_manager))
+ return -EOPNOTSUPP;
+
+ if (pauseparam->autoneg)
+ return -EINVAL;
+
+ err = mlx5_set_port_pause(mdev,
+ pauseparam->rx_pause ? 1 : 0,
+ pauseparam->tx_pause ? 1 : 0);
+ if (err) {
+ netdev_err(netdev, "%s: mlx5_set_port_pause failed:0x%x\n",
+ __func__, err);
+ }
+
+ return err;
+}
+
+int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
+ struct ethtool_ts_info *info)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ info->phc_index = mlx5_clock_get_ptp_index(mdev);
+
+ if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) ||
+ info->phc_index == -1)
+ return 0;
+
+ info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ info->tx_types = BIT(HWTSTAMP_TX_OFF) |
+ BIT(HWTSTAMP_TX_ON);
+
+ info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+ BIT(HWTSTAMP_FILTER_ALL);
+
+ return 0;
+}
+
+static int mlx5e_get_ts_info(struct net_device *dev,
+ struct ethtool_ts_info *info)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_get_ts_info(priv, info);
+}
+
+static __u32 mlx5e_get_wol_supported(struct mlx5_core_dev *mdev)
+{
+ __u32 ret = 0;
+
+ if (MLX5_CAP_GEN(mdev, wol_g))
+ ret |= WAKE_MAGIC;
+
+ if (MLX5_CAP_GEN(mdev, wol_s))
+ ret |= WAKE_MAGICSECURE;
+
+ if (MLX5_CAP_GEN(mdev, wol_a))
+ ret |= WAKE_ARP;
+
+ if (MLX5_CAP_GEN(mdev, wol_b))
+ ret |= WAKE_BCAST;
+
+ if (MLX5_CAP_GEN(mdev, wol_m))
+ ret |= WAKE_MCAST;
+
+ if (MLX5_CAP_GEN(mdev, wol_u))
+ ret |= WAKE_UCAST;
+
+ if (MLX5_CAP_GEN(mdev, wol_p))
+ ret |= WAKE_PHY;
+
+ return ret;
+}
+
+static __u32 mlx5e_refomrat_wol_mode_mlx5_to_linux(u8 mode)
+{
+ __u32 ret = 0;
+
+ if (mode & MLX5_WOL_MAGIC)
+ ret |= WAKE_MAGIC;
+
+ if (mode & MLX5_WOL_SECURED_MAGIC)
+ ret |= WAKE_MAGICSECURE;
+
+ if (mode & MLX5_WOL_ARP)
+ ret |= WAKE_ARP;
+
+ if (mode & MLX5_WOL_BROADCAST)
+ ret |= WAKE_BCAST;
+
+ if (mode & MLX5_WOL_MULTICAST)
+ ret |= WAKE_MCAST;
+
+ if (mode & MLX5_WOL_UNICAST)
+ ret |= WAKE_UCAST;
+
+ if (mode & MLX5_WOL_PHY_ACTIVITY)
+ ret |= WAKE_PHY;
+
+ return ret;
+}
+
+static u8 mlx5e_refomrat_wol_mode_linux_to_mlx5(__u32 mode)
+{
+ u8 ret = 0;
+
+ if (mode & WAKE_MAGIC)
+ ret |= MLX5_WOL_MAGIC;
+
+ if (mode & WAKE_MAGICSECURE)
+ ret |= MLX5_WOL_SECURED_MAGIC;
+
+ if (mode & WAKE_ARP)
+ ret |= MLX5_WOL_ARP;
+
+ if (mode & WAKE_BCAST)
+ ret |= MLX5_WOL_BROADCAST;
+
+ if (mode & WAKE_MCAST)
+ ret |= MLX5_WOL_MULTICAST;
+
+ if (mode & WAKE_UCAST)
+ ret |= MLX5_WOL_UNICAST;
+
+ if (mode & WAKE_PHY)
+ ret |= MLX5_WOL_PHY_ACTIVITY;
+
+ return ret;
+}
+
+static void mlx5e_get_wol(struct net_device *netdev,
+ struct ethtool_wolinfo *wol)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 mlx5_wol_mode;
+ int err;
+
+ memset(wol, 0, sizeof(*wol));
+
+ wol->supported = mlx5e_get_wol_supported(mdev);
+ if (!wol->supported)
+ return;
+
+ err = mlx5_query_port_wol(mdev, &mlx5_wol_mode);
+ if (err)
+ return;
+
+ wol->wolopts = mlx5e_refomrat_wol_mode_mlx5_to_linux(mlx5_wol_mode);
+}
+
+static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ __u32 wol_supported = mlx5e_get_wol_supported(mdev);
+ u32 mlx5_wol_mode;
+
+ if (!wol_supported)
+ return -EOPNOTSUPP;
+
+ if (wol->wolopts & ~wol_supported)
+ return -EINVAL;
+
+ mlx5_wol_mode = mlx5e_refomrat_wol_mode_linux_to_mlx5(wol->wolopts);
+
+ return mlx5_set_port_wol(mdev, mlx5_wol_mode);
+}
+
+static u32 mlx5e_get_msglevel(struct net_device *dev)
+{
+ return ((struct mlx5e_priv *)netdev_priv(dev))->msglevel;
+}
+
+static void mlx5e_set_msglevel(struct net_device *dev, u32 val)
+{
+ ((struct mlx5e_priv *)netdev_priv(dev))->msglevel = val;
+}
+
+static int mlx5e_set_phys_id(struct net_device *dev,
+ enum ethtool_phys_id_state state)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 beacon_duration;
+
+ if (!MLX5_CAP_GEN(mdev, beacon_led))
+ return -EOPNOTSUPP;
+
+ switch (state) {
+ case ETHTOOL_ID_ACTIVE:
+ beacon_duration = MLX5_BEACON_DURATION_INF;
+ break;
+ case ETHTOOL_ID_INACTIVE:
+ beacon_duration = MLX5_BEACON_DURATION_OFF;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return mlx5_set_port_beacon(mdev, beacon_duration);
+}
+
+static int mlx5e_get_module_info(struct net_device *netdev,
+ struct ethtool_modinfo *modinfo)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *dev = priv->mdev;
+ int size_read = 0;
+ u8 data[4];
+
+ size_read = mlx5_query_module_eeprom(dev, 0, 2, data);
+ if (size_read < 2)
+ return -EIO;
+
+ /* data[0] = identifier byte */
+ switch (data[0]) {
+ case MLX5_MODULE_ID_QSFP:
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+ break;
+ case MLX5_MODULE_ID_QSFP_PLUS:
+ case MLX5_MODULE_ID_QSFP28:
+ /* data[1] = revision id */
+ if (data[0] == MLX5_MODULE_ID_QSFP28 || data[1] >= 0x3) {
+ modinfo->type = ETH_MODULE_SFF_8636;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+ }
+ break;
+ case MLX5_MODULE_ID_SFP:
+ modinfo->type = ETH_MODULE_SFF_8472;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+ break;
+ default:
+ netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n",
+ __func__, data[0]);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx5e_get_module_eeprom(struct net_device *netdev,
+ struct ethtool_eeprom *ee,
+ u8 *data)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int offset = ee->offset;
+ int size_read;
+ int i = 0;
+
+ if (!ee->len)
+ return -EINVAL;
+
+ memset(data, 0, ee->len);
+
+ while (i < ee->len) {
+ size_read = mlx5_query_module_eeprom(mdev, offset, ee->len - i,
+ data + i);
+
+ if (!size_read)
+ /* Done reading */
+ return 0;
+
+ if (size_read < 0) {
+ netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n",
+ __func__, size_read);
+ return size_read;
+ }
+
+ i += size_read;
+ offset += size_read;
+ }
+
+ return 0;
+}
+
+typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable);
+
+static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
+ bool is_rx_cq)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_channels new_channels = {};
+ bool mode_changed;
+ u8 cq_period_mode, current_cq_period_mode;
+ int err = 0;
+
+ cq_period_mode = enable ?
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
+ MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+ current_cq_period_mode = is_rx_cq ?
+ priv->channels.params.rx_cq_moderation.cq_period_mode :
+ priv->channels.params.tx_cq_moderation.cq_period_mode;
+ mode_changed = cq_period_mode != current_cq_period_mode;
+
+ if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE &&
+ !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe))
+ return -EOPNOTSUPP;
+
+ if (!mode_changed)
+ return 0;
+
+ new_channels.params = priv->channels.params;
+ if (is_rx_cq)
+ mlx5e_set_rx_cq_mode_params(&new_channels.params, cq_period_mode);
+ else
+ mlx5e_set_tx_cq_mode_params(&new_channels.params, cq_period_mode);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ return 0;
+ }
+
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ return err;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+ return 0;
+}
+
+static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable)
+{
+ return set_pflag_cqe_based_moder(netdev, enable, false);
+}
+
+static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable)
+{
+ return set_pflag_cqe_based_moder(netdev, enable, true);
+}
+
+int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val)
+{
+ bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS);
+ struct mlx5e_channels new_channels = {};
+ int err = 0;
+
+ if (!MLX5_CAP_GEN(priv->mdev, cqe_compression))
+ return new_val ? -EOPNOTSUPP : 0;
+
+ if (curr_val == new_val)
+ return 0;
+
+ new_channels.params = priv->channels.params;
+ MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ return 0;
+ }
+
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ return err;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+ mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n",
+ MLX5E_GET_PFLAG(&priv->channels.params,
+ MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF");
+
+ return 0;
+}
+
+static int set_pflag_rx_cqe_compress(struct net_device *netdev,
+ bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ if (!MLX5_CAP_GEN(mdev, cqe_compression))
+ return -EOPNOTSUPP;
+
+ if (enable && priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) {
+ netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n");
+ return -EINVAL;
+ }
+
+ err = mlx5e_modify_rx_cqe_compression_locked(priv, enable);
+ if (err)
+ return err;
+
+ priv->channels.params.rx_cqe_compress_def = enable;
+
+ return 0;
+}
+
+static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_channels new_channels = {};
+ int err;
+
+ if (enable) {
+ if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
+ return -EOPNOTSUPP;
+ if (!mlx5e_striding_rq_possible(mdev, &priv->channels.params))
+ return -EINVAL;
+ } else if (priv->channels.params.lro_en) {
+ netdev_warn(netdev, "Can't set legacy RQ with LRO, disable LRO first\n");
+ return -EINVAL;
+ }
+
+ new_channels.params = priv->channels.params;
+
+ MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_STRIDING_RQ, enable);
+ mlx5e_set_rq_type(mdev, &new_channels.params);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ return 0;
+ }
+
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ return err;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+ return 0;
+}
+
+static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_channels *channels = &priv->channels;
+ struct mlx5e_channel *c;
+ int i;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state) ||
+ priv->channels.params.xdp_prog)
+ return 0;
+
+ for (i = 0; i < channels->num; i++) {
+ c = channels->c[i];
+ if (enable)
+ __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+ else
+ __clear_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+ }
+
+ return 0;
+}
+
+static int mlx5e_handle_pflag(struct net_device *netdev,
+ u32 wanted_flags,
+ enum mlx5e_priv_flag flag,
+ mlx5e_pflag_handler pflag_handler)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ bool enable = !!(wanted_flags & flag);
+ u32 changes = wanted_flags ^ priv->channels.params.pflags;
+ int err;
+
+ if (!(changes & flag))
+ return 0;
+
+ err = pflag_handler(netdev, enable);
+ if (err) {
+ netdev_err(netdev, "%s private flag 0x%x failed err %d\n",
+ enable ? "Enable" : "Disable", flag, err);
+ return err;
+ }
+
+ MLX5E_SET_PFLAG(&priv->channels.params, flag, enable);
+ return 0;
+}
+
+static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_handle_pflag(netdev, pflags,
+ MLX5E_PFLAG_RX_CQE_BASED_MODER,
+ set_pflag_rx_cqe_based_moder);
+ if (err)
+ goto out;
+
+ err = mlx5e_handle_pflag(netdev, pflags,
+ MLX5E_PFLAG_TX_CQE_BASED_MODER,
+ set_pflag_tx_cqe_based_moder);
+ if (err)
+ goto out;
+
+ err = mlx5e_handle_pflag(netdev, pflags,
+ MLX5E_PFLAG_RX_CQE_COMPRESS,
+ set_pflag_rx_cqe_compress);
+ if (err)
+ goto out;
+
+ err = mlx5e_handle_pflag(netdev, pflags,
+ MLX5E_PFLAG_RX_STRIDING_RQ,
+ set_pflag_rx_striding_rq);
+ if (err)
+ goto out;
+
+ err = mlx5e_handle_pflag(netdev, pflags,
+ MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
+ set_pflag_rx_no_csum_complete);
+
+out:
+ mutex_unlock(&priv->state_lock);
+
+ /* Need to fix some features.. */
+ netdev_update_features(netdev);
+
+ return err;
+}
+
+static u32 mlx5e_get_priv_flags(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return priv->channels.params.pflags;
+}
+
+int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
+ struct ethtool_flash *flash)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct net_device *dev = priv->netdev;
+ const struct firmware *fw;
+ int err;
+
+ if (flash->region != ETHTOOL_FLASH_ALL_REGIONS)
+ return -EOPNOTSUPP;
+
+ err = request_firmware_direct(&fw, flash->data, &dev->dev);
+ if (err)
+ return err;
+
+ dev_hold(dev);
+ rtnl_unlock();
+
+ err = mlx5_firmware_flash(mdev, fw);
+ release_firmware(fw);
+
+ rtnl_lock();
+ dev_put(dev);
+ return err;
+}
+
+static int mlx5e_flash_device(struct net_device *dev,
+ struct ethtool_flash *flash)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_flash_device(priv, flash);
+}
+
+#ifndef CONFIG_MLX5_EN_RXNFC
+/* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS
+ * otherwise this function will be defined from en_fs_ethtool.c
+ */
+static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (info->cmd != ETHTOOL_GRXRINGS)
+ return -EOPNOTSUPP;
+ /* ring_count is needed by ethtool -x */
+ info->data = priv->channels.params.num_channels;
+ return 0;
+}
+#endif
+
+const struct ethtool_ops mlx5e_ethtool_ops = {
+ .get_drvinfo = mlx5e_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = mlx5e_get_strings,
+ .get_sset_count = mlx5e_get_sset_count,
+ .get_ethtool_stats = mlx5e_get_ethtool_stats,
+ .get_ringparam = mlx5e_get_ringparam,
+ .set_ringparam = mlx5e_set_ringparam,
+ .get_channels = mlx5e_get_channels,
+ .set_channels = mlx5e_set_channels,
+ .get_coalesce = mlx5e_get_coalesce,
+ .set_coalesce = mlx5e_set_coalesce,
+ .get_link_ksettings = mlx5e_get_link_ksettings,
+ .set_link_ksettings = mlx5e_set_link_ksettings,
+ .get_rxfh_key_size = mlx5e_get_rxfh_key_size,
+ .get_rxfh_indir_size = mlx5e_get_rxfh_indir_size,
+ .get_rxfh = mlx5e_get_rxfh,
+ .set_rxfh = mlx5e_set_rxfh,
+ .get_rxnfc = mlx5e_get_rxnfc,
+#ifdef CONFIG_MLX5_EN_RXNFC
+ .set_rxnfc = mlx5e_set_rxnfc,
+#endif
+ .flash_device = mlx5e_flash_device,
+ .get_tunable = mlx5e_get_tunable,
+ .set_tunable = mlx5e_set_tunable,
+ .get_pauseparam = mlx5e_get_pauseparam,
+ .set_pauseparam = mlx5e_set_pauseparam,
+ .get_ts_info = mlx5e_get_ts_info,
+ .set_phys_id = mlx5e_set_phys_id,
+ .get_wol = mlx5e_get_wol,
+ .set_wol = mlx5e_set_wol,
+ .get_module_info = mlx5e_get_module_info,
+ .get_module_eeprom = mlx5e_get_module_eeprom,
+ .get_priv_flags = mlx5e_get_priv_flags,
+ .set_priv_flags = mlx5e_set_priv_flags,
+ .self_test = mlx5e_self_test,
+ .get_msglevel = mlx5e_get_msglevel,
+ .set_msglevel = mlx5e_set_msglevel,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
new file mode 100644
index 000000000..c6eea6b6b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -0,0 +1,1576 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/list.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/mlx5/fs.h>
+#include "en.h"
+#include "lib/mpfs.h"
+
+static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
+ struct mlx5e_l2_rule *ai, int type);
+static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
+ struct mlx5e_l2_rule *ai);
+
+enum {
+ MLX5E_FULLMATCH = 0,
+ MLX5E_ALLMULTI = 1,
+ MLX5E_PROMISC = 2,
+};
+
+enum {
+ MLX5E_UC = 0,
+ MLX5E_MC_IPV4 = 1,
+ MLX5E_MC_IPV6 = 2,
+ MLX5E_MC_OTHER = 3,
+};
+
+enum {
+ MLX5E_ACTION_NONE = 0,
+ MLX5E_ACTION_ADD = 1,
+ MLX5E_ACTION_DEL = 2,
+};
+
+struct mlx5e_l2_hash_node {
+ struct hlist_node hlist;
+ u8 action;
+ struct mlx5e_l2_rule ai;
+ bool mpfs;
+};
+
+static inline int mlx5e_hash_l2(u8 *addr)
+{
+ return addr[5];
+}
+
+static void mlx5e_add_l2_to_hash(struct hlist_head *hash, u8 *addr)
+{
+ struct mlx5e_l2_hash_node *hn;
+ int ix = mlx5e_hash_l2(addr);
+ int found = 0;
+
+ hlist_for_each_entry(hn, &hash[ix], hlist)
+ if (ether_addr_equal_64bits(hn->ai.addr, addr)) {
+ found = 1;
+ break;
+ }
+
+ if (found) {
+ hn->action = MLX5E_ACTION_NONE;
+ return;
+ }
+
+ hn = kzalloc(sizeof(*hn), GFP_ATOMIC);
+ if (!hn)
+ return;
+
+ ether_addr_copy(hn->ai.addr, addr);
+ hn->action = MLX5E_ACTION_ADD;
+
+ hlist_add_head(&hn->hlist, &hash[ix]);
+}
+
+static void mlx5e_del_l2_from_hash(struct mlx5e_l2_hash_node *hn)
+{
+ hlist_del(&hn->hlist);
+ kfree(hn);
+}
+
+static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
+{
+ struct net_device *ndev = priv->netdev;
+ int max_list_size;
+ int list_size;
+ u16 *vlans;
+ int vlan;
+ int err;
+ int i;
+
+ list_size = 0;
+ for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID)
+ list_size++;
+
+ max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list);
+
+ if (list_size > max_list_size) {
+ netdev_warn(ndev,
+ "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n",
+ list_size, max_list_size);
+ list_size = max_list_size;
+ }
+
+ vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL);
+ if (!vlans)
+ return -ENOMEM;
+
+ i = 0;
+ for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID) {
+ if (i >= list_size)
+ break;
+ vlans[i++] = vlan;
+ }
+
+ err = mlx5_modify_nic_vport_vlans(priv->mdev, vlans, list_size);
+ if (err)
+ netdev_err(ndev, "Failed to modify vport vlans list err(%d)\n",
+ err);
+
+ kfree(vlans);
+ return err;
+}
+
+enum mlx5e_vlan_rule_type {
+ MLX5E_VLAN_RULE_TYPE_UNTAGGED,
+ MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID,
+ MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID,
+ MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID,
+ MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID,
+};
+
+static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
+ enum mlx5e_vlan_rule_type rule_type,
+ u16 vid, struct mlx5_flow_spec *spec)
+{
+ struct mlx5_flow_table *ft = priv->fs.vlan.ft.t;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_handle **rule_p;
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ int err = 0;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = priv->fs.l2.ft.t;
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ switch (rule_type) {
+ case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
+ /* cvlan_tag enabled in match criteria and
+ * disabled in match value means both S & C tags
+ * don't exist (untagged of both)
+ */
+ rule_p = &priv->fs.vlan.untagged_rule;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.cvlan_tag);
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID:
+ rule_p = &priv->fs.vlan.any_cvlan_rule;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1);
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID:
+ rule_p = &priv->fs.vlan.any_svlan_rule;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1);
+ break;
+ case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID:
+ rule_p = &priv->fs.vlan.active_svlans_rule[vid];
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.svlan_tag);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.first_vid);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid,
+ vid);
+ break;
+ default: /* MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID */
+ rule_p = &priv->fs.vlan.active_cvlans_rule[vid];
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.cvlan_tag);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.first_vid);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid,
+ vid);
+ break;
+ }
+
+ if (WARN_ONCE(*rule_p, "VLAN rule already exists type %d", rule_type))
+ return 0;
+
+ *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+
+ if (IS_ERR(*rule_p)) {
+ err = PTR_ERR(*rule_p);
+ *rule_p = NULL;
+ netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
+ }
+
+ return err;
+}
+
+static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
+ enum mlx5e_vlan_rule_type rule_type, u16 vid)
+{
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID)
+ mlx5e_vport_context_update_vlans(priv);
+
+ err = __mlx5e_add_vlan_rule(priv, rule_type, vid, spec);
+
+ kvfree(spec);
+
+ return err;
+}
+
+static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv,
+ enum mlx5e_vlan_rule_type rule_type, u16 vid)
+{
+ switch (rule_type) {
+ case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
+ if (priv->fs.vlan.untagged_rule) {
+ mlx5_del_flow_rules(priv->fs.vlan.untagged_rule);
+ priv->fs.vlan.untagged_rule = NULL;
+ }
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID:
+ if (priv->fs.vlan.any_cvlan_rule) {
+ mlx5_del_flow_rules(priv->fs.vlan.any_cvlan_rule);
+ priv->fs.vlan.any_cvlan_rule = NULL;
+ }
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID:
+ if (priv->fs.vlan.any_svlan_rule) {
+ mlx5_del_flow_rules(priv->fs.vlan.any_svlan_rule);
+ priv->fs.vlan.any_svlan_rule = NULL;
+ }
+ break;
+ case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID:
+ if (priv->fs.vlan.active_svlans_rule[vid]) {
+ mlx5_del_flow_rules(priv->fs.vlan.active_svlans_rule[vid]);
+ priv->fs.vlan.active_svlans_rule[vid] = NULL;
+ }
+ break;
+ case MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID:
+ if (priv->fs.vlan.active_cvlans_rule[vid]) {
+ mlx5_del_flow_rules(priv->fs.vlan.active_cvlans_rule[vid]);
+ priv->fs.vlan.active_cvlans_rule[vid] = NULL;
+ }
+ mlx5e_vport_context_update_vlans(priv);
+ break;
+ }
+}
+
+static void mlx5e_del_any_vid_rules(struct mlx5e_priv *priv)
+{
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0);
+}
+
+static int mlx5e_add_any_vid_rules(struct mlx5e_priv *priv)
+{
+ int err;
+
+ err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+ if (err)
+ return err;
+
+ return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0);
+}
+
+void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv)
+{
+ if (!priv->fs.vlan.cvlan_filter_disabled)
+ return;
+
+ priv->fs.vlan.cvlan_filter_disabled = false;
+ if (priv->netdev->flags & IFF_PROMISC)
+ return;
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+}
+
+void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv)
+{
+ if (priv->fs.vlan.cvlan_filter_disabled)
+ return;
+
+ priv->fs.vlan.cvlan_filter_disabled = true;
+ if (priv->netdev->flags & IFF_PROMISC)
+ return;
+ mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
+}
+
+static int mlx5e_vlan_rx_add_cvid(struct mlx5e_priv *priv, u16 vid)
+{
+ int err;
+
+ set_bit(vid, priv->fs.vlan.active_cvlans);
+
+ err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
+ if (err)
+ clear_bit(vid, priv->fs.vlan.active_cvlans);
+
+ return err;
+}
+
+static int mlx5e_vlan_rx_add_svid(struct mlx5e_priv *priv, u16 vid)
+{
+ struct net_device *netdev = priv->netdev;
+ int err;
+
+ set_bit(vid, priv->fs.vlan.active_svlans);
+
+ err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
+ if (err) {
+ clear_bit(vid, priv->fs.vlan.active_svlans);
+ return err;
+ }
+
+ /* Need to fix some features.. */
+ netdev_update_features(netdev);
+ return err;
+}
+
+int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (be16_to_cpu(proto) == ETH_P_8021Q)
+ return mlx5e_vlan_rx_add_cvid(priv, vid);
+ else if (be16_to_cpu(proto) == ETH_P_8021AD)
+ return mlx5e_vlan_rx_add_svid(priv, vid);
+
+ return -EOPNOTSUPP;
+}
+
+int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (be16_to_cpu(proto) == ETH_P_8021Q) {
+ clear_bit(vid, priv->fs.vlan.active_cvlans);
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
+ } else if (be16_to_cpu(proto) == ETH_P_8021AD) {
+ clear_bit(vid, priv->fs.vlan.active_svlans);
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
+ netdev_update_features(dev);
+ }
+
+ return 0;
+}
+
+static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv)
+{
+ int i;
+
+ mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+
+ for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) {
+ mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
+ }
+
+ for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
+ mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
+
+ if (priv->fs.vlan.cvlan_filter_disabled)
+ mlx5e_add_any_vid_rules(priv);
+}
+
+static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
+{
+ int i;
+
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+
+ for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) {
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
+ }
+
+ for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
+
+ WARN_ON_ONCE(!(test_bit(MLX5E_STATE_DESTROYING, &priv->state)));
+
+ /* must be called after DESTROY bit is set and
+ * set_rx_mode is called and flushed
+ */
+ if (priv->fs.vlan.cvlan_filter_disabled)
+ mlx5e_del_any_vid_rules(priv);
+}
+
+#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \
+ for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \
+ hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist)
+
+static void mlx5e_execute_l2_action(struct mlx5e_priv *priv,
+ struct mlx5e_l2_hash_node *hn)
+{
+ u8 action = hn->action;
+ u8 mac_addr[ETH_ALEN];
+ int l2_err = 0;
+
+ ether_addr_copy(mac_addr, hn->ai.addr);
+
+ switch (action) {
+ case MLX5E_ACTION_ADD:
+ mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH);
+ if (!is_multicast_ether_addr(mac_addr)) {
+ l2_err = mlx5_mpfs_add_mac(priv->mdev, mac_addr);
+ hn->mpfs = !l2_err;
+ }
+ hn->action = MLX5E_ACTION_NONE;
+ break;
+
+ case MLX5E_ACTION_DEL:
+ if (!is_multicast_ether_addr(mac_addr) && hn->mpfs)
+ l2_err = mlx5_mpfs_del_mac(priv->mdev, mac_addr);
+ mlx5e_del_l2_flow_rule(priv, &hn->ai);
+ mlx5e_del_l2_from_hash(hn);
+ break;
+ }
+
+ if (l2_err)
+ netdev_warn(priv->netdev, "MPFS, failed to %s mac %pM, err(%d)\n",
+ action == MLX5E_ACTION_ADD ? "add" : "del", mac_addr, l2_err);
+}
+
+static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+ struct netdev_hw_addr *ha;
+
+ netif_addr_lock_bh(netdev);
+
+ mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc,
+ priv->netdev->dev_addr);
+
+ netdev_for_each_uc_addr(ha, netdev)
+ mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc, ha->addr);
+
+ netdev_for_each_mc_addr(ha, netdev)
+ mlx5e_add_l2_to_hash(priv->fs.l2.netdev_mc, ha->addr);
+
+ netif_addr_unlock_bh(netdev);
+}
+
+static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type,
+ u8 addr_array[][ETH_ALEN], int size)
+{
+ bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC);
+ struct net_device *ndev = priv->netdev;
+ struct mlx5e_l2_hash_node *hn;
+ struct hlist_head *addr_list;
+ struct hlist_node *tmp;
+ int i = 0;
+ int hi;
+
+ addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc;
+
+ if (is_uc) /* Make sure our own address is pushed first */
+ ether_addr_copy(addr_array[i++], ndev->dev_addr);
+ else if (priv->fs.l2.broadcast_enabled)
+ ether_addr_copy(addr_array[i++], ndev->broadcast);
+
+ mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) {
+ if (ether_addr_equal(ndev->dev_addr, hn->ai.addr))
+ continue;
+ if (i >= size)
+ break;
+ ether_addr_copy(addr_array[i++], hn->ai.addr);
+ }
+}
+
+static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv,
+ int list_type)
+{
+ bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC);
+ struct mlx5e_l2_hash_node *hn;
+ u8 (*addr_array)[ETH_ALEN] = NULL;
+ struct hlist_head *addr_list;
+ struct hlist_node *tmp;
+ int max_size;
+ int size;
+ int err;
+ int hi;
+
+ size = is_uc ? 0 : (priv->fs.l2.broadcast_enabled ? 1 : 0);
+ max_size = is_uc ?
+ 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) :
+ 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list);
+
+ addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc;
+ mlx5e_for_each_hash_node(hn, tmp, addr_list, hi)
+ size++;
+
+ if (size > max_size) {
+ netdev_warn(priv->netdev,
+ "netdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n",
+ is_uc ? "UC" : "MC", size, max_size);
+ size = max_size;
+ }
+
+ if (size) {
+ addr_array = kcalloc(size, ETH_ALEN, GFP_KERNEL);
+ if (!addr_array) {
+ err = -ENOMEM;
+ goto out;
+ }
+ mlx5e_fill_addr_array(priv, list_type, addr_array, size);
+ }
+
+ err = mlx5_modify_nic_vport_mac_list(priv->mdev, list_type, addr_array, size);
+out:
+ if (err)
+ netdev_err(priv->netdev,
+ "Failed to modify vport %s list err(%d)\n",
+ is_uc ? "UC" : "MC", err);
+ kfree(addr_array);
+}
+
+static void mlx5e_vport_context_update(struct mlx5e_priv *priv)
+{
+ struct mlx5e_l2_table *ea = &priv->fs.l2;
+
+ mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_UC);
+ mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_MC);
+ mlx5_modify_nic_vport_promisc(priv->mdev, 0,
+ ea->allmulti_enabled,
+ ea->promisc_enabled);
+}
+
+static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv)
+{
+ struct mlx5e_l2_hash_node *hn;
+ struct hlist_node *tmp;
+ int i;
+
+ mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i)
+ mlx5e_execute_l2_action(priv, hn);
+
+ mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i)
+ mlx5e_execute_l2_action(priv, hn);
+}
+
+static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv)
+{
+ struct mlx5e_l2_hash_node *hn;
+ struct hlist_node *tmp;
+ int i;
+
+ mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i)
+ hn->action = MLX5E_ACTION_DEL;
+ mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i)
+ hn->action = MLX5E_ACTION_DEL;
+
+ if (!test_bit(MLX5E_STATE_DESTROYING, &priv->state))
+ mlx5e_sync_netdev_addr(priv);
+
+ mlx5e_apply_netdev_addr(priv);
+}
+
+void mlx5e_set_rx_mode_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ set_rx_mode_work);
+
+ struct mlx5e_l2_table *ea = &priv->fs.l2;
+ struct net_device *ndev = priv->netdev;
+
+ bool rx_mode_enable = !test_bit(MLX5E_STATE_DESTROYING, &priv->state);
+ bool promisc_enabled = rx_mode_enable && (ndev->flags & IFF_PROMISC);
+ bool allmulti_enabled = rx_mode_enable && (ndev->flags & IFF_ALLMULTI);
+ bool broadcast_enabled = rx_mode_enable;
+
+ bool enable_promisc = !ea->promisc_enabled && promisc_enabled;
+ bool disable_promisc = ea->promisc_enabled && !promisc_enabled;
+ bool enable_allmulti = !ea->allmulti_enabled && allmulti_enabled;
+ bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled;
+ bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled;
+ bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled;
+
+ if (enable_promisc) {
+ if (!priv->channels.params.vlan_strip_disable)
+ netdev_warn_once(ndev,
+ "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n");
+ mlx5e_add_l2_flow_rule(priv, &ea->promisc, MLX5E_PROMISC);
+ if (!priv->fs.vlan.cvlan_filter_disabled)
+ mlx5e_add_any_vid_rules(priv);
+ }
+ if (enable_allmulti)
+ mlx5e_add_l2_flow_rule(priv, &ea->allmulti, MLX5E_ALLMULTI);
+ if (enable_broadcast)
+ mlx5e_add_l2_flow_rule(priv, &ea->broadcast, MLX5E_FULLMATCH);
+
+ mlx5e_handle_netdev_addr(priv);
+
+ if (disable_broadcast)
+ mlx5e_del_l2_flow_rule(priv, &ea->broadcast);
+ if (disable_allmulti)
+ mlx5e_del_l2_flow_rule(priv, &ea->allmulti);
+ if (disable_promisc) {
+ if (!priv->fs.vlan.cvlan_filter_disabled)
+ mlx5e_del_any_vid_rules(priv);
+ mlx5e_del_l2_flow_rule(priv, &ea->promisc);
+ }
+
+ ea->promisc_enabled = promisc_enabled;
+ ea->allmulti_enabled = allmulti_enabled;
+ ea->broadcast_enabled = broadcast_enabled;
+
+ mlx5e_vport_context_update(priv);
+}
+
+static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft)
+{
+ int i;
+
+ for (i = ft->num_groups - 1; i >= 0; i--) {
+ if (!IS_ERR_OR_NULL(ft->g[i]))
+ mlx5_destroy_flow_group(ft->g[i]);
+ ft->g[i] = NULL;
+ }
+ ft->num_groups = 0;
+}
+
+void mlx5e_init_l2_addr(struct mlx5e_priv *priv)
+{
+ ether_addr_copy(priv->fs.l2.broadcast.addr, priv->netdev->broadcast);
+}
+
+void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft)
+{
+ mlx5e_destroy_groups(ft);
+ kfree(ft->g);
+ mlx5_destroy_flow_table(ft->t);
+ ft->t = NULL;
+}
+
+static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc)
+{
+ int i;
+
+ for (i = 0; i < MLX5E_NUM_TT; i++) {
+ if (!IS_ERR_OR_NULL(ttc->rules[i])) {
+ mlx5_del_flow_rules(ttc->rules[i]);
+ ttc->rules[i] = NULL;
+ }
+ }
+
+ for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) {
+ if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) {
+ mlx5_del_flow_rules(ttc->tunnel_rules[i]);
+ ttc->tunnel_rules[i] = NULL;
+ }
+ }
+}
+
+struct mlx5e_etype_proto {
+ u16 etype;
+ u8 proto;
+};
+
+static struct mlx5e_etype_proto ttc_rules[] = {
+ [MLX5E_TT_IPV4_TCP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_TCP,
+ },
+ [MLX5E_TT_IPV6_TCP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_TCP,
+ },
+ [MLX5E_TT_IPV4_UDP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_UDP,
+ },
+ [MLX5E_TT_IPV6_UDP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_UDP,
+ },
+ [MLX5E_TT_IPV4_IPSEC_AH] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_AH,
+ },
+ [MLX5E_TT_IPV6_IPSEC_AH] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_AH,
+ },
+ [MLX5E_TT_IPV4_IPSEC_ESP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_ESP,
+ },
+ [MLX5E_TT_IPV6_IPSEC_ESP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_ESP,
+ },
+ [MLX5E_TT_IPV4] = {
+ .etype = ETH_P_IP,
+ .proto = 0,
+ },
+ [MLX5E_TT_IPV6] = {
+ .etype = ETH_P_IPV6,
+ .proto = 0,
+ },
+ [MLX5E_TT_ANY] = {
+ .etype = 0,
+ .proto = 0,
+ },
+};
+
+static struct mlx5e_etype_proto ttc_tunnel_rules[] = {
+ [MLX5E_TT_IPV4_GRE] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_GRE,
+ },
+ [MLX5E_TT_IPV6_GRE] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_GRE,
+ },
+};
+
+static u8 mlx5e_etype_to_ipv(u16 ethertype)
+{
+ if (ethertype == ETH_P_IP)
+ return 4;
+
+ if (ethertype == ETH_P_IPV6)
+ return 6;
+
+ return 0;
+}
+
+static struct mlx5_flow_handle *
+mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_destination *dest,
+ u16 etype,
+ u8 proto)
+{
+ int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ u8 ipv;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ if (proto) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
+ }
+
+ ipv = mlx5e_etype_to_ipv(etype);
+ if (match_ipv_outer && ipv) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv);
+ } else if (etype) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
+ }
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
+ }
+
+ kvfree(spec);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv,
+ struct ttc_params *params,
+ struct mlx5e_ttc_table *ttc)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_handle **rules;
+ struct mlx5_flow_table *ft;
+ int tt;
+ int err;
+
+ ft = ttc->ft.t;
+ rules = ttc->rules;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
+ if (tt == MLX5E_TT_ANY)
+ dest.tir_num = params->any_tt_tirn;
+ else
+ dest.tir_num = params->indir_tirn[tt];
+ rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest,
+ ttc_rules[tt].etype,
+ ttc_rules[tt].proto);
+ if (IS_ERR(rules[tt]))
+ goto del_rules;
+ }
+
+ if (!params->inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ return 0;
+
+ rules = ttc->tunnel_rules;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = params->inner_ttc->ft.t;
+ for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
+ rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest,
+ ttc_tunnel_rules[tt].etype,
+ ttc_tunnel_rules[tt].proto);
+ if (IS_ERR(rules[tt]))
+ goto del_rules;
+ }
+
+ return 0;
+
+del_rules:
+ err = PTR_ERR(rules[tt]);
+ rules[tt] = NULL;
+ mlx5e_cleanup_ttc_rules(ttc);
+ return err;
+}
+
+#define MLX5E_TTC_NUM_GROUPS 3
+#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT)
+#define MLX5E_TTC_GROUP2_SIZE BIT(1)
+#define MLX5E_TTC_GROUP3_SIZE BIT(0)
+#define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\
+ MLX5E_TTC_GROUP2_SIZE +\
+ MLX5E_TTC_GROUP3_SIZE)
+
+#define MLX5E_INNER_TTC_NUM_GROUPS 3
+#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3)
+#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1)
+#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0)
+#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\
+ MLX5E_INNER_TTC_GROUP2_SIZE +\
+ MLX5E_INNER_TTC_GROUP3_SIZE)
+
+static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc,
+ bool use_ipv)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5e_flow_table *ft = &ttc->ft;
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_TTC_NUM_GROUPS,
+ sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ft->g);
+ ft->g = NULL;
+ return -ENOMEM;
+ }
+
+ /* L4 Group */
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+ if (use_ipv)
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version);
+ else
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_TTC_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* L3 Group */
+ MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_TTC_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Any Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_TTC_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+static struct mlx5_flow_handle *
+mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_destination *dest,
+ u16 etype, u8 proto)
+{
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ u8 ipv;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ ipv = mlx5e_etype_to_ipv(etype);
+ if (etype && ipv) {
+ spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv);
+ }
+
+ if (proto) {
+ spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto);
+ }
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
+ }
+
+ kvfree(spec);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv,
+ struct ttc_params *params,
+ struct mlx5e_ttc_table *ttc)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_handle **rules;
+ struct mlx5_flow_table *ft;
+ int err;
+ int tt;
+
+ ft = ttc->ft.t;
+ rules = ttc->rules;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
+ if (tt == MLX5E_TT_ANY)
+ dest.tir_num = params->any_tt_tirn;
+ else
+ dest.tir_num = params->indir_tirn[tt];
+
+ rules[tt] = mlx5e_generate_inner_ttc_rule(priv, ft, &dest,
+ ttc_rules[tt].etype,
+ ttc_rules[tt].proto);
+ if (IS_ERR(rules[tt]))
+ goto del_rules;
+ }
+
+ return 0;
+
+del_rules:
+ err = PTR_ERR(rules[tt]);
+ rules[tt] = NULL;
+ mlx5e_cleanup_ttc_rules(ttc);
+ return err;
+}
+
+static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5e_flow_table *ft = &ttc->ft;
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ft->g);
+ ft->g = NULL;
+ return -ENOMEM;
+ }
+
+ /* L4 Group */
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
+ MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_INNER_TTC_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* L3 Group */
+ MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_INNER_TTC_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ /* Any Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_INNER_TTC_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv,
+ struct ttc_params *ttc_params)
+{
+ ttc_params->any_tt_tirn = priv->direct_tir[0].tirn;
+ ttc_params->inner_ttc = &priv->fs.inner_ttc;
+}
+
+void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params)
+{
+ struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+
+ ft_attr->max_fte = MLX5E_INNER_TTC_TABLE_SIZE;
+ ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL;
+ ft_attr->prio = MLX5E_NIC_PRIO;
+}
+
+void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params)
+
+{
+ struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+
+ ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
+ ft_attr->level = MLX5E_TTC_FT_LEVEL;
+ ft_attr->prio = MLX5E_NIC_PRIO;
+}
+
+int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
+ struct mlx5e_ttc_table *ttc)
+{
+ struct mlx5e_flow_table *ft = &ttc->ft;
+ int err;
+
+ if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ return 0;
+
+ ft->t = mlx5_create_flow_table(priv->fs.ns, &params->ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ err = mlx5e_create_inner_ttc_table_groups(ttc);
+ if (err)
+ goto err;
+
+ err = mlx5e_generate_inner_ttc_table_rules(priv, params, ttc);
+ if (err)
+ goto err;
+
+ return 0;
+
+err:
+ mlx5e_destroy_flow_table(ft);
+ return err;
+}
+
+void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv,
+ struct mlx5e_ttc_table *ttc)
+{
+ if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ return;
+
+ mlx5e_cleanup_ttc_rules(ttc);
+ mlx5e_destroy_flow_table(&ttc->ft);
+}
+
+void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv,
+ struct mlx5e_ttc_table *ttc)
+{
+ mlx5e_cleanup_ttc_rules(ttc);
+ mlx5e_destroy_flow_table(&ttc->ft);
+}
+
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
+ struct mlx5e_ttc_table *ttc)
+{
+ bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
+ struct mlx5e_flow_table *ft = &ttc->ft;
+ int err;
+
+ ft->t = mlx5_create_flow_table(priv->fs.ns, &params->ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer);
+ if (err)
+ goto err;
+
+ err = mlx5e_generate_ttc_table_rules(priv, params, ttc);
+ if (err)
+ goto err;
+
+ return 0;
+err:
+ mlx5e_destroy_flow_table(ft);
+ return err;
+}
+
+static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
+ struct mlx5e_l2_rule *ai)
+{
+ if (!IS_ERR_OR_NULL(ai->rule)) {
+ mlx5_del_flow_rules(ai->rule);
+ ai->rule = NULL;
+ }
+}
+
+static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
+ struct mlx5e_l2_rule *ai, int type)
+{
+ struct mlx5_flow_table *ft = priv->fs.l2.ft.t;
+ struct mlx5_flow_destination dest = {};
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ u8 *mc_dmac;
+ u8 *mv_dmac;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ mc_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.dmac_47_16);
+ mv_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dmac_47_16);
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = priv->fs.ttc.ft.t;
+
+ switch (type) {
+ case MLX5E_FULLMATCH:
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ eth_broadcast_addr(mc_dmac);
+ ether_addr_copy(mv_dmac, ai->addr);
+ break;
+
+ case MLX5E_ALLMULTI:
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ mc_dmac[0] = 0x01;
+ mv_dmac[0] = 0x01;
+ break;
+
+ case MLX5E_PROMISC:
+ break;
+ }
+
+ ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+ if (IS_ERR(ai->rule)) {
+ netdev_err(priv->netdev, "%s: add l2 rule(mac:%pM) failed\n",
+ __func__, mv_dmac);
+ err = PTR_ERR(ai->rule);
+ ai->rule = NULL;
+ }
+
+ kvfree(spec);
+
+ return err;
+}
+
+#define MLX5E_NUM_L2_GROUPS 3
+#define MLX5E_L2_GROUP1_SIZE BIT(0)
+#define MLX5E_L2_GROUP2_SIZE BIT(15)
+#define MLX5E_L2_GROUP3_SIZE BIT(0)
+#define MLX5E_L2_TABLE_SIZE (MLX5E_L2_GROUP1_SIZE +\
+ MLX5E_L2_GROUP2_SIZE +\
+ MLX5E_L2_GROUP3_SIZE)
+static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5e_flow_table *ft = &l2_table->ft;
+ int ix = 0;
+ u8 *mc_dmac;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ft->g = kcalloc(MLX5E_NUM_L2_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ft->g);
+ return -ENOMEM;
+ }
+
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ mc_dmac = MLX5_ADDR_OF(fte_match_param, mc,
+ outer_headers.dmac_47_16);
+ /* Flow Group for promiscuous */
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_L2_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ /* Flow Group for full match */
+ eth_broadcast_addr(mc_dmac);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_L2_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ /* Flow Group for allmulti */
+ eth_zero_addr(mc_dmac);
+ mc_dmac[0] = 0x01;
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_L2_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err_destroy_groups:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ mlx5e_destroy_groups(ft);
+ kvfree(in);
+ kfree(ft->g);
+
+ return err;
+}
+
+static void mlx5e_destroy_l2_table(struct mlx5e_priv *priv)
+{
+ mlx5e_destroy_flow_table(&priv->fs.l2.ft);
+}
+
+static int mlx5e_create_l2_table(struct mlx5e_priv *priv)
+{
+ struct mlx5e_l2_table *l2_table = &priv->fs.l2;
+ struct mlx5e_flow_table *ft = &l2_table->ft;
+ struct mlx5_flow_table_attr ft_attr = {};
+ int err;
+
+ ft->num_groups = 0;
+
+ ft_attr.max_fte = MLX5E_L2_TABLE_SIZE;
+ ft_attr.level = MLX5E_L2_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+
+ err = mlx5e_create_l2_table_groups(l2_table);
+ if (err)
+ goto err_destroy_flow_table;
+
+ return 0;
+
+err_destroy_flow_table:
+ mlx5_destroy_flow_table(ft->t);
+ ft->t = NULL;
+
+ return err;
+}
+
+#define MLX5E_NUM_VLAN_GROUPS 4
+#define MLX5E_VLAN_GROUP0_SIZE BIT(12)
+#define MLX5E_VLAN_GROUP1_SIZE BIT(12)
+#define MLX5E_VLAN_GROUP2_SIZE BIT(1)
+#define MLX5E_VLAN_GROUP3_SIZE BIT(0)
+#define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\
+ MLX5E_VLAN_GROUP1_SIZE +\
+ MLX5E_VLAN_GROUP2_SIZE +\
+ MLX5E_VLAN_GROUP3_SIZE)
+
+static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in,
+ int inlen)
+{
+ int err;
+ int ix = 0;
+ u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_VLAN_GROUP0_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_VLAN_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_VLAN_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5E_VLAN_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+ if (IS_ERR(ft->g[ft->num_groups]))
+ goto err_destroy_groups;
+ ft->num_groups++;
+
+ return 0;
+
+err_destroy_groups:
+ err = PTR_ERR(ft->g[ft->num_groups]);
+ ft->g[ft->num_groups] = NULL;
+ mlx5e_destroy_groups(ft);
+
+ return err;
+}
+
+static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft)
+{
+ u32 *in;
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ err = __mlx5e_create_vlan_table_groups(ft, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+static int mlx5e_create_vlan_table(struct mlx5e_priv *priv)
+{
+ struct mlx5e_flow_table *ft = &priv->fs.vlan.ft;
+ struct mlx5_flow_table_attr ft_attr = {};
+ int err;
+
+ ft->num_groups = 0;
+
+ ft_attr.max_fte = MLX5E_VLAN_TABLE_SIZE;
+ ft_attr.level = MLX5E_VLAN_FT_LEVEL;
+ ft_attr.prio = MLX5E_NIC_PRIO;
+
+ ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+
+ if (IS_ERR(ft->t)) {
+ err = PTR_ERR(ft->t);
+ ft->t = NULL;
+ return err;
+ }
+ ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+ if (!ft->g) {
+ err = -ENOMEM;
+ goto err_destroy_vlan_table;
+ }
+
+ err = mlx5e_create_vlan_table_groups(ft);
+ if (err)
+ goto err_free_g;
+
+ mlx5e_add_vlan_rules(priv);
+
+ return 0;
+
+err_free_g:
+ kfree(ft->g);
+err_destroy_vlan_table:
+ mlx5_destroy_flow_table(ft->t);
+ ft->t = NULL;
+
+ return err;
+}
+
+static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv)
+{
+ mlx5e_del_vlan_rules(priv);
+ mlx5e_destroy_flow_table(&priv->fs.vlan.ft);
+}
+
+int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
+{
+ struct ttc_params ttc_params = {};
+ int tt, err;
+
+ priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
+
+ if (!priv->fs.ns)
+ return -EOPNOTSUPP;
+
+ err = mlx5e_arfs_create_tables(priv);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n",
+ err);
+ priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
+ }
+
+ mlx5e_set_ttc_basic_params(priv, &ttc_params);
+ mlx5e_set_inner_ttc_ft_params(&ttc_params);
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn;
+
+ err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n",
+ err);
+ goto err_destroy_arfs_tables;
+ }
+
+ mlx5e_set_ttc_ft_params(&ttc_params);
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn;
+
+ err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
+ err);
+ goto err_destroy_inner_ttc_table;
+ }
+
+ err = mlx5e_create_l2_table(priv);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create l2 table, err=%d\n",
+ err);
+ goto err_destroy_ttc_table;
+ }
+
+ err = mlx5e_create_vlan_table(priv);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create vlan table, err=%d\n",
+ err);
+ goto err_destroy_l2_table;
+ }
+
+ mlx5e_ethtool_init_steering(priv);
+
+ return 0;
+
+err_destroy_l2_table:
+ mlx5e_destroy_l2_table(priv);
+err_destroy_ttc_table:
+ mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
+err_destroy_inner_ttc_table:
+ mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+err_destroy_arfs_tables:
+ mlx5e_arfs_destroy_tables(priv);
+
+ return err;
+}
+
+void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
+{
+ mlx5e_destroy_vlan_table(priv);
+ mlx5e_destroy_l2_table(priv);
+ mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
+ mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+ mlx5e_arfs_destroy_tables(priv);
+ mlx5e_ethtool_cleanup_steering(priv);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
new file mode 100644
index 000000000..41cde926c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -0,0 +1,844 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/fs.h>
+#include "en.h"
+
+struct mlx5e_ethtool_rule {
+ struct list_head list;
+ struct ethtool_rx_flow_spec flow_spec;
+ struct mlx5_flow_handle *rule;
+ struct mlx5e_ethtool_table *eth_ft;
+};
+
+static void put_flow_table(struct mlx5e_ethtool_table *eth_ft)
+{
+ if (!--eth_ft->num_rules) {
+ mlx5_destroy_flow_table(eth_ft->ft);
+ eth_ft->ft = NULL;
+ }
+}
+
+#define MLX5E_ETHTOOL_L3_L4_PRIO 0
+#define MLX5E_ETHTOOL_L2_PRIO (MLX5E_ETHTOOL_L3_L4_PRIO + ETHTOOL_NUM_L3_L4_FTS)
+#define MLX5E_ETHTOOL_NUM_ENTRIES 64000
+#define MLX5E_ETHTOOL_NUM_GROUPS 10
+static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
+ struct ethtool_rx_flow_spec *fs,
+ int num_tuples)
+{
+ struct mlx5e_ethtool_table *eth_ft;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+ int max_tuples;
+ int table_size;
+ int prio;
+
+ switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ max_tuples = ETHTOOL_NUM_L3_L4_FTS;
+ prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
+ eth_ft = &priv->fs.ethtool.l3_l4_ft[prio];
+ break;
+ case IP_USER_FLOW:
+ case IPV6_USER_FLOW:
+ max_tuples = ETHTOOL_NUM_L3_L4_FTS;
+ prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples);
+ eth_ft = &priv->fs.ethtool.l3_l4_ft[prio];
+ break;
+ case ETHER_FLOW:
+ max_tuples = ETHTOOL_NUM_L2_FTS;
+ prio = max_tuples - num_tuples;
+ eth_ft = &priv->fs.ethtool.l2_ft[prio];
+ prio += MLX5E_ETHTOOL_L2_PRIO;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ eth_ft->num_rules++;
+ if (eth_ft->ft)
+ return eth_ft;
+
+ ns = mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_ETHTOOL);
+ if (!ns)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev,
+ flow_table_properties_nic_receive.log_max_ft_size)),
+ MLX5E_ETHTOOL_NUM_ENTRIES);
+ ft = mlx5_create_auto_grouped_flow_table(ns, prio,
+ table_size,
+ MLX5E_ETHTOOL_NUM_GROUPS, 0, 0);
+ if (IS_ERR(ft))
+ return (void *)ft;
+
+ eth_ft->ft = ft;
+ return eth_ft;
+}
+
+static void mask_spec(u8 *mask, u8 *val, size_t size)
+{
+ unsigned int i;
+
+ for (i = 0; i < size; i++, mask++, val++)
+ *((u8 *)val) = *((u8 *)mask) & *((u8 *)val);
+}
+
+#define MLX5E_FTE_SET(header_p, fld, v) \
+ MLX5_SET(fte_match_set_lyr_2_4, header_p, fld, v)
+
+#define MLX5E_FTE_ADDR_OF(header_p, fld) \
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4, header_p, fld)
+
+static void
+set_ip4(void *headers_c, void *headers_v, __be32 ip4src_m,
+ __be32 ip4src_v, __be32 ip4dst_m, __be32 ip4dst_v)
+{
+ if (ip4src_m) {
+ memcpy(MLX5E_FTE_ADDR_OF(headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ip4src_v, sizeof(ip4src_v));
+ memset(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ 0xff, sizeof(ip4src_m));
+ }
+ if (ip4dst_m) {
+ memcpy(MLX5E_FTE_ADDR_OF(headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ip4dst_v, sizeof(ip4dst_v));
+ memset(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ 0xff, sizeof(ip4dst_m));
+ }
+
+ MLX5E_FTE_SET(headers_c, ethertype, 0xffff);
+ MLX5E_FTE_SET(headers_v, ethertype, ETH_P_IP);
+}
+
+static void
+set_ip6(void *headers_c, void *headers_v, __be32 ip6src_m[4],
+ __be32 ip6src_v[4], __be32 ip6dst_m[4], __be32 ip6dst_v[4])
+{
+ u8 ip6_sz = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
+
+ if (!ipv6_addr_any((struct in6_addr *)ip6src_m)) {
+ memcpy(MLX5E_FTE_ADDR_OF(headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ ip6src_v, ip6_sz);
+ memcpy(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ ip6src_m, ip6_sz);
+ }
+ if (!ipv6_addr_any((struct in6_addr *)ip6dst_m)) {
+ memcpy(MLX5E_FTE_ADDR_OF(headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ ip6dst_v, ip6_sz);
+ memcpy(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ ip6dst_m, ip6_sz);
+ }
+
+ MLX5E_FTE_SET(headers_c, ethertype, 0xffff);
+ MLX5E_FTE_SET(headers_v, ethertype, ETH_P_IPV6);
+}
+
+static void
+set_tcp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v,
+ __be16 pdst_m, __be16 pdst_v)
+{
+ if (psrc_m) {
+ MLX5E_FTE_SET(headers_c, tcp_sport, 0xffff);
+ MLX5E_FTE_SET(headers_v, tcp_sport, ntohs(psrc_v));
+ }
+ if (pdst_m) {
+ MLX5E_FTE_SET(headers_c, tcp_dport, 0xffff);
+ MLX5E_FTE_SET(headers_v, tcp_dport, ntohs(pdst_v));
+ }
+
+ MLX5E_FTE_SET(headers_c, ip_protocol, 0xffff);
+ MLX5E_FTE_SET(headers_v, ip_protocol, IPPROTO_TCP);
+}
+
+static void
+set_udp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v,
+ __be16 pdst_m, __be16 pdst_v)
+{
+ if (psrc_m) {
+ MLX5E_FTE_SET(headers_c, udp_sport, 0xffff);
+ MLX5E_FTE_SET(headers_v, udp_sport, ntohs(psrc_v));
+ }
+
+ if (pdst_m) {
+ MLX5E_FTE_SET(headers_c, udp_dport, 0xffff);
+ MLX5E_FTE_SET(headers_v, udp_dport, ntohs(pdst_v));
+ }
+
+ MLX5E_FTE_SET(headers_c, ip_protocol, 0xffff);
+ MLX5E_FTE_SET(headers_v, ip_protocol, IPPROTO_UDP);
+}
+
+static void
+parse_tcp4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.tcp_ip4_spec;
+ struct ethtool_tcpip4_spec *l4_val = &fs->h_u.tcp_ip4_spec;
+
+ set_ip4(headers_c, headers_v, l4_mask->ip4src, l4_val->ip4src,
+ l4_mask->ip4dst, l4_val->ip4dst);
+
+ set_tcp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc,
+ l4_mask->pdst, l4_val->pdst);
+}
+
+static void
+parse_udp4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.udp_ip4_spec;
+ struct ethtool_tcpip4_spec *l4_val = &fs->h_u.udp_ip4_spec;
+
+ set_ip4(headers_c, headers_v, l4_mask->ip4src, l4_val->ip4src,
+ l4_mask->ip4dst, l4_val->ip4dst);
+
+ set_udp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc,
+ l4_mask->pdst, l4_val->pdst);
+}
+
+static void
+parse_ip4(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_usrip4_spec *l3_mask = &fs->m_u.usr_ip4_spec;
+ struct ethtool_usrip4_spec *l3_val = &fs->h_u.usr_ip4_spec;
+
+ set_ip4(headers_c, headers_v, l3_mask->ip4src, l3_val->ip4src,
+ l3_mask->ip4dst, l3_val->ip4dst);
+
+ if (l3_mask->proto) {
+ MLX5E_FTE_SET(headers_c, ip_protocol, l3_mask->proto);
+ MLX5E_FTE_SET(headers_v, ip_protocol, l3_val->proto);
+ }
+}
+
+static void
+parse_ip6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_usrip6_spec *l3_mask = &fs->m_u.usr_ip6_spec;
+ struct ethtool_usrip6_spec *l3_val = &fs->h_u.usr_ip6_spec;
+
+ set_ip6(headers_c, headers_v, l3_mask->ip6src,
+ l3_val->ip6src, l3_mask->ip6dst, l3_val->ip6dst);
+
+ if (l3_mask->l4_proto) {
+ MLX5E_FTE_SET(headers_c, ip_protocol, l3_mask->l4_proto);
+ MLX5E_FTE_SET(headers_v, ip_protocol, l3_val->l4_proto);
+ }
+}
+
+static void
+parse_tcp6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.tcp_ip6_spec;
+ struct ethtool_tcpip6_spec *l4_val = &fs->h_u.tcp_ip6_spec;
+
+ set_ip6(headers_c, headers_v, l4_mask->ip6src,
+ l4_val->ip6src, l4_mask->ip6dst, l4_val->ip6dst);
+
+ set_tcp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc,
+ l4_mask->pdst, l4_val->pdst);
+}
+
+static void
+parse_udp6(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.udp_ip6_spec;
+ struct ethtool_tcpip6_spec *l4_val = &fs->h_u.udp_ip6_spec;
+
+ set_ip6(headers_c, headers_v, l4_mask->ip6src,
+ l4_val->ip6src, l4_mask->ip6dst, l4_val->ip6dst);
+
+ set_udp(headers_c, headers_v, l4_mask->psrc, l4_val->psrc,
+ l4_mask->pdst, l4_val->pdst);
+}
+
+static void
+parse_ether(void *headers_c, void *headers_v, struct ethtool_rx_flow_spec *fs)
+{
+ struct ethhdr *eth_mask = &fs->m_u.ether_spec;
+ struct ethhdr *eth_val = &fs->h_u.ether_spec;
+
+ mask_spec((u8 *)eth_mask, (u8 *)eth_val, sizeof(*eth_mask));
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, smac_47_16), eth_mask->h_source);
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, smac_47_16), eth_val->h_source);
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, dmac_47_16), eth_mask->h_dest);
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, dmac_47_16), eth_val->h_dest);
+ MLX5E_FTE_SET(headers_c, ethertype, ntohs(eth_mask->h_proto));
+ MLX5E_FTE_SET(headers_v, ethertype, ntohs(eth_val->h_proto));
+}
+
+static void
+set_cvlan(void *headers_c, void *headers_v, __be16 vlan_tci)
+{
+ MLX5E_FTE_SET(headers_c, cvlan_tag, 1);
+ MLX5E_FTE_SET(headers_v, cvlan_tag, 1);
+ MLX5E_FTE_SET(headers_c, first_vid, 0xfff);
+ MLX5E_FTE_SET(headers_v, first_vid, ntohs(vlan_tci));
+}
+
+static void
+set_dmac(void *headers_c, void *headers_v,
+ unsigned char m_dest[ETH_ALEN], unsigned char v_dest[ETH_ALEN])
+{
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_c, dmac_47_16), m_dest);
+ ether_addr_copy(MLX5E_FTE_ADDR_OF(headers_v, dmac_47_16), v_dest);
+}
+
+static int set_flow_attrs(u32 *match_c, u32 *match_v,
+ struct ethtool_rx_flow_spec *fs)
+{
+ void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+ void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
+
+ switch (flow_type) {
+ case TCP_V4_FLOW:
+ parse_tcp4(outer_headers_c, outer_headers_v, fs);
+ break;
+ case UDP_V4_FLOW:
+ parse_udp4(outer_headers_c, outer_headers_v, fs);
+ break;
+ case IP_USER_FLOW:
+ parse_ip4(outer_headers_c, outer_headers_v, fs);
+ break;
+ case TCP_V6_FLOW:
+ parse_tcp6(outer_headers_c, outer_headers_v, fs);
+ break;
+ case UDP_V6_FLOW:
+ parse_udp6(outer_headers_c, outer_headers_v, fs);
+ break;
+ case IPV6_USER_FLOW:
+ parse_ip6(outer_headers_c, outer_headers_v, fs);
+ break;
+ case ETHER_FLOW:
+ parse_ether(outer_headers_c, outer_headers_v, fs);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if ((fs->flow_type & FLOW_EXT) &&
+ (fs->m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK)))
+ set_cvlan(outer_headers_c, outer_headers_v, fs->h_ext.vlan_tci);
+
+ if (fs->flow_type & FLOW_MAC_EXT &&
+ !is_zero_ether_addr(fs->m_ext.h_dest)) {
+ mask_spec(fs->m_ext.h_dest, fs->h_ext.h_dest, ETH_ALEN);
+ set_dmac(outer_headers_c, outer_headers_v, fs->m_ext.h_dest,
+ fs->h_ext.h_dest);
+ }
+
+ return 0;
+}
+
+static void add_rule_to_list(struct mlx5e_priv *priv,
+ struct mlx5e_ethtool_rule *rule)
+{
+ struct mlx5e_ethtool_rule *iter;
+ struct list_head *head = &priv->fs.ethtool.rules;
+
+ list_for_each_entry(iter, &priv->fs.ethtool.rules, list) {
+ if (iter->flow_spec.location > rule->flow_spec.location)
+ break;
+ head = &iter->list;
+ }
+ priv->fs.ethtool.tot_num_rules++;
+ list_add(&rule->list, head);
+}
+
+static bool outer_header_zero(u32 *match_criteria)
+{
+ int size = MLX5_FLD_SZ_BYTES(fte_match_param, outer_headers);
+ char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ outer_headers);
+
+ return outer_headers_c[0] == 0 && !memcmp(outer_headers_c,
+ outer_headers_c + 1,
+ size - 1);
+}
+
+static struct mlx5_flow_handle *
+add_ethtool_flow_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_table *ft,
+ struct ethtool_rx_flow_spec *fs)
+{
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_spec *spec;
+ struct mlx5_flow_handle *rule;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+ err = set_flow_attrs(spec->match_criteria, spec->match_value,
+ fs);
+ if (err)
+ goto free;
+
+ if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+ } else {
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ }
+
+ spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
+ flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n",
+ __func__, err);
+ goto free;
+ }
+free:
+ kvfree(spec);
+ kfree(dst);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static void del_ethtool_rule(struct mlx5e_priv *priv,
+ struct mlx5e_ethtool_rule *eth_rule)
+{
+ if (eth_rule->rule)
+ mlx5_del_flow_rules(eth_rule->rule);
+ list_del(&eth_rule->list);
+ priv->fs.ethtool.tot_num_rules--;
+ put_flow_table(eth_rule->eth_ft);
+ kfree(eth_rule);
+}
+
+static struct mlx5e_ethtool_rule *find_ethtool_rule(struct mlx5e_priv *priv,
+ int location)
+{
+ struct mlx5e_ethtool_rule *iter;
+
+ list_for_each_entry(iter, &priv->fs.ethtool.rules, list) {
+ if (iter->flow_spec.location == location)
+ return iter;
+ }
+ return NULL;
+}
+
+static struct mlx5e_ethtool_rule *get_ethtool_rule(struct mlx5e_priv *priv,
+ int location)
+{
+ struct mlx5e_ethtool_rule *eth_rule;
+
+ eth_rule = find_ethtool_rule(priv, location);
+ if (eth_rule)
+ del_ethtool_rule(priv, eth_rule);
+
+ eth_rule = kzalloc(sizeof(*eth_rule), GFP_KERNEL);
+ if (!eth_rule)
+ return ERR_PTR(-ENOMEM);
+
+ add_rule_to_list(priv, eth_rule);
+ return eth_rule;
+}
+
+#define MAX_NUM_OF_ETHTOOL_RULES BIT(10)
+
+#define all_ones(field) (field == (__force typeof(field))-1)
+#define all_zeros_or_all_ones(field) \
+ ((field) == 0 || (field) == (__force typeof(field))-1)
+
+static int validate_ethter(struct ethtool_rx_flow_spec *fs)
+{
+ struct ethhdr *eth_mask = &fs->m_u.ether_spec;
+ int ntuples = 0;
+
+ if (!is_zero_ether_addr(eth_mask->h_dest))
+ ntuples++;
+ if (!is_zero_ether_addr(eth_mask->h_source))
+ ntuples++;
+ if (eth_mask->h_proto)
+ ntuples++;
+ return ntuples;
+}
+
+static int validate_tcpudp4(struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip4_spec *l4_mask = &fs->m_u.tcp_ip4_spec;
+ int ntuples = 0;
+
+ if (l4_mask->tos)
+ return -EINVAL;
+
+ if (l4_mask->ip4src) {
+ if (!all_ones(l4_mask->ip4src))
+ return -EINVAL;
+ ntuples++;
+ }
+ if (l4_mask->ip4dst) {
+ if (!all_ones(l4_mask->ip4dst))
+ return -EINVAL;
+ ntuples++;
+ }
+ if (l4_mask->psrc) {
+ if (!all_ones(l4_mask->psrc))
+ return -EINVAL;
+ ntuples++;
+ }
+ if (l4_mask->pdst) {
+ if (!all_ones(l4_mask->pdst))
+ return -EINVAL;
+ ntuples++;
+ }
+ /* Flow is TCP/UDP */
+ return ++ntuples;
+}
+
+static int validate_ip4(struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_usrip4_spec *l3_mask = &fs->m_u.usr_ip4_spec;
+ int ntuples = 0;
+
+ if (l3_mask->l4_4_bytes || l3_mask->tos ||
+ fs->h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4)
+ return -EINVAL;
+ if (l3_mask->ip4src) {
+ if (!all_ones(l3_mask->ip4src))
+ return -EINVAL;
+ ntuples++;
+ }
+ if (l3_mask->ip4dst) {
+ if (!all_ones(l3_mask->ip4dst))
+ return -EINVAL;
+ ntuples++;
+ }
+ if (l3_mask->proto)
+ ntuples++;
+ /* Flow is IPv4 */
+ return ++ntuples;
+}
+
+static int validate_ip6(struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_usrip6_spec *l3_mask = &fs->m_u.usr_ip6_spec;
+ int ntuples = 0;
+
+ if (l3_mask->l4_4_bytes || l3_mask->tclass)
+ return -EINVAL;
+ if (!ipv6_addr_any((struct in6_addr *)l3_mask->ip6src))
+ ntuples++;
+
+ if (!ipv6_addr_any((struct in6_addr *)l3_mask->ip6dst))
+ ntuples++;
+ if (l3_mask->l4_proto)
+ ntuples++;
+ /* Flow is IPv6 */
+ return ++ntuples;
+}
+
+static int validate_tcpudp6(struct ethtool_rx_flow_spec *fs)
+{
+ struct ethtool_tcpip6_spec *l4_mask = &fs->m_u.tcp_ip6_spec;
+ int ntuples = 0;
+
+ if (l4_mask->tclass)
+ return -EINVAL;
+
+ if (!ipv6_addr_any((struct in6_addr *)l4_mask->ip6src))
+ ntuples++;
+
+ if (!ipv6_addr_any((struct in6_addr *)l4_mask->ip6dst))
+ ntuples++;
+
+ if (l4_mask->psrc) {
+ if (!all_ones(l4_mask->psrc))
+ return -EINVAL;
+ ntuples++;
+ }
+ if (l4_mask->pdst) {
+ if (!all_ones(l4_mask->pdst))
+ return -EINVAL;
+ ntuples++;
+ }
+ /* Flow is TCP/UDP */
+ return ++ntuples;
+}
+
+static int validate_vlan(struct ethtool_rx_flow_spec *fs)
+{
+ if (fs->m_ext.vlan_etype ||
+ fs->m_ext.vlan_tci != cpu_to_be16(VLAN_VID_MASK))
+ return -EINVAL;
+
+ if (fs->m_ext.vlan_tci &&
+ (be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID))
+ return -EINVAL;
+
+ return 1;
+}
+
+static int validate_flow(struct mlx5e_priv *priv,
+ struct ethtool_rx_flow_spec *fs)
+{
+ int num_tuples = 0;
+ int ret = 0;
+
+ if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES)
+ return -ENOSPC;
+
+ if (fs->ring_cookie >= priv->channels.params.num_channels &&
+ fs->ring_cookie != RX_CLS_FLOW_DISC)
+ return -EINVAL;
+
+ switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+ case ETHER_FLOW:
+ num_tuples += validate_ethter(fs);
+ break;
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ ret = validate_tcpudp4(fs);
+ if (ret < 0)
+ return ret;
+ num_tuples += ret;
+ break;
+ case IP_USER_FLOW:
+ ret = validate_ip4(fs);
+ if (ret < 0)
+ return ret;
+ num_tuples += ret;
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ ret = validate_tcpudp6(fs);
+ if (ret < 0)
+ return ret;
+ num_tuples += ret;
+ break;
+ case IPV6_USER_FLOW:
+ ret = validate_ip6(fs);
+ if (ret < 0)
+ return ret;
+ num_tuples += ret;
+ break;
+ default:
+ return -ENOTSUPP;
+ }
+ if ((fs->flow_type & FLOW_EXT)) {
+ ret = validate_vlan(fs);
+ if (ret < 0)
+ return ret;
+ num_tuples += ret;
+ }
+
+ if (fs->flow_type & FLOW_MAC_EXT &&
+ !is_zero_ether_addr(fs->m_ext.h_dest))
+ num_tuples++;
+
+ return num_tuples;
+}
+
+static int
+mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
+ struct ethtool_rx_flow_spec *fs)
+{
+ struct mlx5e_ethtool_table *eth_ft;
+ struct mlx5e_ethtool_rule *eth_rule;
+ struct mlx5_flow_handle *rule;
+ int num_tuples;
+ int err;
+
+ num_tuples = validate_flow(priv, fs);
+ if (num_tuples <= 0) {
+ netdev_warn(priv->netdev, "%s: flow is not valid %d\n",
+ __func__, num_tuples);
+ return num_tuples;
+ }
+
+ eth_ft = get_flow_table(priv, fs, num_tuples);
+ if (IS_ERR(eth_ft))
+ return PTR_ERR(eth_ft);
+
+ eth_rule = get_ethtool_rule(priv, fs->location);
+ if (IS_ERR(eth_rule)) {
+ put_flow_table(eth_ft);
+ return PTR_ERR(eth_rule);
+ }
+
+ eth_rule->flow_spec = *fs;
+ eth_rule->eth_ft = eth_ft;
+ if (!eth_ft->ft) {
+ err = -EINVAL;
+ goto del_ethtool_rule;
+ }
+ rule = add_ethtool_flow_rule(priv, eth_ft->ft, fs);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ goto del_ethtool_rule;
+ }
+
+ eth_rule->rule = rule;
+
+ return 0;
+
+del_ethtool_rule:
+ del_ethtool_rule(priv, eth_rule);
+
+ return err;
+}
+
+static int
+mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, int location)
+{
+ struct mlx5e_ethtool_rule *eth_rule;
+ int err = 0;
+
+ if (location >= MAX_NUM_OF_ETHTOOL_RULES)
+ return -ENOSPC;
+
+ eth_rule = find_ethtool_rule(priv, location);
+ if (!eth_rule) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ del_ethtool_rule(priv, eth_rule);
+out:
+ return err;
+}
+
+static int
+mlx5e_ethtool_get_flow(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, int location)
+{
+ struct mlx5e_ethtool_rule *eth_rule;
+
+ if (location < 0 || location >= MAX_NUM_OF_ETHTOOL_RULES)
+ return -EINVAL;
+
+ list_for_each_entry(eth_rule, &priv->fs.ethtool.rules, list) {
+ if (eth_rule->flow_spec.location == location) {
+ info->fs = eth_rule->flow_spec;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static int
+mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, u32 *rule_locs)
+{
+ int location = 0;
+ int idx = 0;
+ int err = 0;
+
+ info->data = MAX_NUM_OF_ETHTOOL_RULES;
+ while ((!err || err == -ENOENT) && idx < info->rule_cnt) {
+ err = mlx5e_ethtool_get_flow(priv, info, location);
+ if (!err)
+ rule_locs[idx++] = location;
+ location++;
+ }
+ return err;
+}
+
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv)
+{
+ struct mlx5e_ethtool_rule *iter;
+ struct mlx5e_ethtool_rule *temp;
+
+ list_for_each_entry_safe(iter, temp, &priv->fs.ethtool.rules, list)
+ del_ethtool_rule(priv, iter);
+}
+
+void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv)
+{
+ INIT_LIST_HEAD(&priv->fs.ethtool.rules);
+}
+
+int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+ int err = 0;
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ switch (cmd->cmd) {
+ case ETHTOOL_SRXCLSRLINS:
+ err = mlx5e_ethtool_flow_replace(priv, &cmd->fs);
+ break;
+ case ETHTOOL_SRXCLSRLDEL:
+ err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ return err;
+}
+
+int mlx5e_get_rxnfc(struct net_device *dev,
+ struct ethtool_rxnfc *info, u32 *rule_locs)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int err = 0;
+
+ switch (info->cmd) {
+ case ETHTOOL_GRXRINGS:
+ info->data = priv->channels.params.num_channels;
+ break;
+ case ETHTOOL_GRXCLSRLCNT:
+ info->rule_cnt = priv->fs.ethtool.tot_num_rules;
+ break;
+ case ETHTOOL_GRXCLSRULE:
+ err = mlx5e_ethtool_get_flow(priv, info, info->fs.location);
+ break;
+ case ETHTOOL_GRXCLSRLALL:
+ err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ return err;
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
new file mode 100644
index 000000000..6ecb92f55
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -0,0 +1,5221 @@
+/*
+ * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <net/tc_act/tc_gact.h>
+#include <net/pkt_cls.h>
+#include <linux/mlx5/fs.h>
+#include <net/vxlan.h>
+#include <linux/bpf.h>
+#include <net/page_pool.h>
+#include "eswitch.h"
+#include "en.h"
+#include "en_tc.h"
+#include "en_rep.h"
+#include "en_accel/ipsec.h"
+#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/tls.h"
+#include "accel/ipsec.h"
+#include "accel/tls.h"
+#include "lib/vxlan.h"
+#include "lib/clock.h"
+#include "en/port.h"
+#include "en/xdp.h"
+
+struct mlx5e_rq_param {
+ u32 rqc[MLX5_ST_SZ_DW(rqc)];
+ struct mlx5_wq_param wq;
+ struct mlx5e_rq_frags_info frags_info;
+};
+
+struct mlx5e_sq_param {
+ u32 sqc[MLX5_ST_SZ_DW(sqc)];
+ struct mlx5_wq_param wq;
+};
+
+struct mlx5e_cq_param {
+ u32 cqc[MLX5_ST_SZ_DW(cqc)];
+ struct mlx5_wq_param wq;
+ u16 eq_ix;
+ u8 cq_period_mode;
+};
+
+struct mlx5e_channel_param {
+ struct mlx5e_rq_param rq;
+ struct mlx5e_sq_param sq;
+ struct mlx5e_sq_param xdp_sq;
+ struct mlx5e_sq_param icosq;
+ struct mlx5e_cq_param rx_cq;
+ struct mlx5e_cq_param tx_cq;
+ struct mlx5e_cq_param icosq_cq;
+};
+
+bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
+{
+ bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) &&
+ MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
+ MLX5_CAP_ETH(mdev, reg_umr_sq);
+ u16 max_wqe_sz_cap = MLX5_CAP_GEN(mdev, max_wqe_sz_sq);
+ bool inline_umr = MLX5E_UMR_WQE_INLINE_SZ <= max_wqe_sz_cap;
+
+ if (!striding_rq_umr)
+ return false;
+ if (!inline_umr) {
+ mlx5_core_warn(mdev, "Cannot support Striding RQ: UMR WQE size (%d) exceeds maximum supported (%d).\n",
+ (int)MLX5E_UMR_WQE_INLINE_SZ, max_wqe_sz_cap);
+ return false;
+ }
+ return true;
+}
+
+static u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params)
+{
+ u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ u16 linear_rq_headroom = params->xdp_prog ?
+ XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
+ u32 frag_sz;
+
+ linear_rq_headroom += NET_IP_ALIGN;
+
+ frag_sz = MLX5_SKB_FRAG_SZ(linear_rq_headroom + hw_mtu);
+
+ if (params->xdp_prog && frag_sz < PAGE_SIZE)
+ frag_sz = PAGE_SIZE;
+
+ return frag_sz;
+}
+
+static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params)
+{
+ u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+
+ return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
+}
+
+static bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+
+ return !params->lro_en && frag_sz <= PAGE_SIZE;
+}
+
+#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \
+ MLX5_MPWQE_LOG_STRIDE_SZ_BASE)
+static bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+ s8 signed_log_num_strides_param;
+ u8 log_num_strides;
+
+ if (!mlx5e_rx_is_linear_skb(mdev, params))
+ return false;
+
+ if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
+ return false;
+
+ if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
+ return true;
+
+ log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz);
+ signed_log_num_strides_param =
+ (s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
+
+ return signed_log_num_strides_param >= 0;
+}
+
+static u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
+{
+ if (params->log_rq_mtu_frames <
+ mlx5e_mpwqe_log_pkts_per_wqe(params) + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW)
+ return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW;
+
+ return params->log_rq_mtu_frames - mlx5e_mpwqe_log_pkts_per_wqe(params);
+}
+
+static u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
+ return order_base_2(mlx5e_rx_get_linear_frag_sz(params));
+
+ return MLX5E_MPWQE_STRIDE_SZ(mdev,
+ MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
+}
+
+static u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ return MLX5_MPWRQ_LOG_WQE_SZ -
+ mlx5e_mpwqe_get_log_stride_size(mdev, params);
+}
+
+static u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ u16 linear_rq_headroom = params->xdp_prog ?
+ XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
+ bool is_linear_skb;
+
+ linear_rq_headroom += NET_IP_ALIGN;
+
+ is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
+ mlx5e_rx_is_linear_skb(mdev, params) :
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, params);
+
+ return is_linear_skb ? linear_rq_headroom : 0;
+}
+
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+ params->log_rq_mtu_frames = is_kdump_kernel() ?
+ MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
+ MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
+
+ mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
+ params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
+ params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
+ BIT(mlx5e_mpwqe_get_log_rq_size(params)) :
+ BIT(params->log_rq_mtu_frames),
+ BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params)),
+ MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
+}
+
+bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ return mlx5e_check_fragmented_striding_rq_cap(mdev) &&
+ !MLX5_IPSEC_DEV(mdev) &&
+ !(params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params));
+}
+
+void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) &&
+ MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ?
+ MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
+ MLX5_WQ_TYPE_CYCLIC;
+}
+
+static void mlx5e_update_carrier(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 port_state;
+
+ port_state = mlx5_query_vport_state(mdev,
+ MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT,
+ 0);
+
+ if (port_state == VPORT_STATE_UP) {
+ netdev_info(priv->netdev, "Link up\n");
+ netif_carrier_on(priv->netdev);
+ } else {
+ netdev_info(priv->netdev, "Link down\n");
+ netif_carrier_off(priv->netdev);
+ }
+}
+
+static void mlx5e_update_carrier_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ update_carrier_work);
+
+ mutex_lock(&priv->state_lock);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ if (priv->profile->update_carrier)
+ priv->profile->update_carrier(priv);
+ mutex_unlock(&priv->state_lock);
+}
+
+void mlx5e_update_stats(struct mlx5e_priv *priv)
+{
+ int i;
+
+ for (i = mlx5e_num_stats_grps - 1; i >= 0; i--)
+ if (mlx5e_stats_grps[i].update_stats)
+ mlx5e_stats_grps[i].update_stats(priv);
+}
+
+static void mlx5e_update_ndo_stats(struct mlx5e_priv *priv)
+{
+ int i;
+
+ for (i = mlx5e_num_stats_grps - 1; i >= 0; i--)
+ if (mlx5e_stats_grps[i].update_stats_mask &
+ MLX5E_NDO_UPDATE_STATS)
+ mlx5e_stats_grps[i].update_stats(priv);
+}
+
+void mlx5e_update_stats_work(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct mlx5e_priv *priv = container_of(dwork, struct mlx5e_priv,
+ update_stats_work);
+
+ mutex_lock(&priv->state_lock);
+ priv->profile->update_stats(priv);
+ mutex_unlock(&priv->state_lock);
+}
+
+static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
+ enum mlx5_dev_event event, unsigned long param)
+{
+ struct mlx5e_priv *priv = vpriv;
+
+ if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state))
+ return;
+
+ switch (event) {
+ case MLX5_DEV_EVENT_PORT_UP:
+ case MLX5_DEV_EVENT_PORT_DOWN:
+ queue_work(priv->wq, &priv->update_carrier_work);
+ break;
+ default:
+ break;
+ }
+}
+
+static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
+{
+ set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
+}
+
+static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
+{
+ clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
+ synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC));
+}
+
+static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
+ struct mlx5e_icosq *sq,
+ struct mlx5e_umr_wqe *wqe)
+{
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
+ u8 ds_cnt = DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_DS);
+
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+ ds_cnt);
+ cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ cseg->imm = rq->mkey_be;
+
+ ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
+ ucseg->xlt_octowords =
+ cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE));
+ ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
+}
+
+static u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq)
+{
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+ default:
+ return mlx5_wq_cyc_get_size(&rq->wqe.wq);
+ }
+}
+
+static u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq)
+{
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return rq->mpwqe.wq.cur_sz;
+ default:
+ return rq->wqe.wq.cur_sz;
+ }
+}
+
+static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
+ struct mlx5e_channel *c)
+{
+ int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+
+ rq->mpwqe.info = kvzalloc_node(array_size(wq_sz,
+ sizeof(*rq->mpwqe.info)),
+ GFP_KERNEL, cpu_to_node(c->cpu));
+ if (!rq->mpwqe.info)
+ return -ENOMEM;
+
+ mlx5e_build_umr_wqe(rq, &c->icosq, &rq->mpwqe.umr_wqe);
+
+ return 0;
+}
+
+static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
+ u64 npages, u8 page_shift,
+ struct mlx5_core_mkey *umr_mkey)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ void *mkc;
+ u32 *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ MLX5_SET(mkc, mkc, free, 1);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
+
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn);
+ MLX5_SET64(mkc, mkc, len, npages << page_shift);
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ MLX5_MTT_OCTW(npages));
+ MLX5_SET(mkc, mkc, log_page_size, page_shift);
+
+ err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq)
+{
+ u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq));
+
+ return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey);
+}
+
+static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix)
+{
+ return (wqe_ix << MLX5E_LOG_ALIGNED_MPWQE_PPW) << PAGE_SHIFT;
+}
+
+static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
+{
+ struct mlx5e_wqe_frag_info next_frag = {};
+ struct mlx5e_wqe_frag_info *prev = NULL;
+ int i;
+
+ next_frag.di = &rq->wqe.di[0];
+
+ for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) {
+ struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
+ struct mlx5e_wqe_frag_info *frag =
+ &rq->wqe.frags[i << rq->wqe.info.log_num_frags];
+ int f;
+
+ for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) {
+ if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) {
+ next_frag.di++;
+ next_frag.offset = 0;
+ if (prev)
+ prev->last_in_page = true;
+ }
+ *frag = next_frag;
+
+ /* prepare next */
+ next_frag.offset += frag_info[f].frag_stride;
+ prev = frag;
+ }
+ }
+
+ if (prev)
+ prev->last_in_page = true;
+}
+
+static int mlx5e_init_di_list(struct mlx5e_rq *rq,
+ struct mlx5e_params *params,
+ int wq_sz, int cpu)
+{
+ int len = wq_sz << rq->wqe.info.log_num_frags;
+
+ rq->wqe.di = kvzalloc_node(array_size(len, sizeof(*rq->wqe.di)),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!rq->wqe.di)
+ return -ENOMEM;
+
+ mlx5e_init_frags_partition(rq);
+
+ return 0;
+}
+
+static void mlx5e_free_di_list(struct mlx5e_rq *rq)
+{
+ kvfree(rq->wqe.di);
+}
+
+static int mlx5e_alloc_rq(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *rqp,
+ struct mlx5e_rq *rq)
+{
+ struct page_pool_params pp_params = { 0 };
+ struct mlx5_core_dev *mdev = c->mdev;
+ void *rqc = rqp->rqc;
+ void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ u32 pool_size;
+ int wq_sz;
+ int err;
+ int i;
+
+ rqp->wq.db_numa_node = cpu_to_node(c->cpu);
+
+ rq->wq_type = params->rq_wq_type;
+ rq->pdev = c->pdev;
+ rq->netdev = c->netdev;
+ rq->tstamp = c->tstamp;
+ rq->clock = &mdev->clock;
+ rq->channel = c;
+ rq->ix = c->ix;
+ rq->mdev = mdev;
+ rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ rq->stats = &c->priv->channel_stats[c->ix].rq;
+
+ rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL;
+ if (IS_ERR(rq->xdp_prog)) {
+ err = PTR_ERR(rq->xdp_prog);
+ rq->xdp_prog = NULL;
+ goto err_rq_wq_destroy;
+ }
+
+ err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix);
+ if (err < 0)
+ goto err_rq_wq_destroy;
+
+ rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+ rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params);
+ pool_size = 1 << params->log_rq_mtu_frames;
+
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
+ &rq->wq_ctrl);
+ if (err)
+ goto err_rq_wq_destroy;
+
+ rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR];
+
+ wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
+
+ pool_size = MLX5_MPWRQ_PAGES_PER_WQE << mlx5e_mpwqe_get_log_rq_size(params);
+
+ rq->post_wqes = mlx5e_post_rx_mpwqes;
+ rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
+
+ rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe;
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (MLX5_IPSEC_DEV(mdev)) {
+ err = -EINVAL;
+ netdev_err(c->netdev, "MPWQE RQ with IPSec offload not supported\n");
+ goto err_rq_wq_destroy;
+ }
+#endif
+ if (!rq->handle_rx_cqe) {
+ err = -EINVAL;
+ netdev_err(c->netdev, "RX handler of MPWQE RQ is not set, err %d\n", err);
+ goto err_rq_wq_destroy;
+ }
+
+ rq->mpwqe.skb_from_cqe_mpwrq =
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ?
+ mlx5e_skb_from_cqe_mpwrq_linear :
+ mlx5e_skb_from_cqe_mpwrq_nonlinear;
+ rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params);
+ rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params));
+
+ err = mlx5e_create_rq_umr_mkey(mdev, rq);
+ if (err)
+ goto err_rq_wq_destroy;
+ rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
+
+ err = mlx5e_rq_alloc_mpwqe_info(rq, c);
+ if (err)
+ goto err_free;
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
+ &rq->wq_ctrl);
+ if (err)
+ goto err_rq_wq_destroy;
+
+ rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
+
+ wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
+
+ rq->wqe.info = rqp->frags_info;
+ rq->wqe.frags =
+ kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
+ (wq_sz << rq->wqe.info.log_num_frags)),
+ GFP_KERNEL, cpu_to_node(c->cpu));
+ if (!rq->wqe.frags) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ err = mlx5e_init_di_list(rq, params, wq_sz, c->cpu);
+ if (err)
+ goto err_free;
+ rq->post_wqes = mlx5e_post_rx_wqes;
+ rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (c->priv->ipsec)
+ rq->handle_rx_cqe = mlx5e_ipsec_handle_rx_cqe;
+ else
+#endif
+ rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe;
+ if (!rq->handle_rx_cqe) {
+ err = -EINVAL;
+ netdev_err(c->netdev, "RX handler of RQ is not set, err %d\n", err);
+ goto err_free;
+ }
+
+ rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(mdev, params) ?
+ mlx5e_skb_from_cqe_linear :
+ mlx5e_skb_from_cqe_nonlinear;
+ rq->mkey_be = c->mkey_be;
+ }
+
+ /* Create a page_pool and register it with rxq */
+ pp_params.order = 0;
+ pp_params.flags = 0; /* No-internal DMA mapping in page_pool */
+ pp_params.pool_size = pool_size;
+ pp_params.nid = cpu_to_node(c->cpu);
+ pp_params.dev = c->pdev;
+ pp_params.dma_dir = rq->buff.map_dir;
+
+ /* page_pool can be used even when there is no rq->xdp_prog,
+ * given page_pool does not handle DMA mapping there is no
+ * required state to clear. And page_pool gracefully handle
+ * elevated refcnt.
+ */
+ rq->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(rq->page_pool)) {
+ err = PTR_ERR(rq->page_pool);
+ rq->page_pool = NULL;
+ goto err_free;
+ }
+ err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+ MEM_TYPE_PAGE_POOL, rq->page_pool);
+ if (err)
+ goto err_free;
+
+ for (i = 0; i < wq_sz; i++) {
+ if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+ struct mlx5e_rx_wqe_ll *wqe =
+ mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i);
+ u32 byte_count =
+ rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz;
+ u64 dma_offset = mlx5e_get_mpwqe_offset(rq, i);
+
+ wqe->data[0].addr = cpu_to_be64(dma_offset + rq->buff.headroom);
+ wqe->data[0].byte_count = cpu_to_be32(byte_count);
+ wqe->data[0].lkey = rq->mkey_be;
+ } else {
+ struct mlx5e_rx_wqe_cyc *wqe =
+ mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i);
+ int f;
+
+ for (f = 0; f < rq->wqe.info.num_frags; f++) {
+ u32 frag_size = rq->wqe.info.arr[f].frag_size |
+ MLX5_HW_START_PADDING;
+
+ wqe->data[f].byte_count = cpu_to_be32(frag_size);
+ wqe->data[f].lkey = rq->mkey_be;
+ }
+ /* check if num_frags is not a pow of two */
+ if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) {
+ wqe->data[f].byte_count = 0;
+ wqe->data[f].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
+ wqe->data[f].addr = 0;
+ }
+ }
+ }
+
+ INIT_WORK(&rq->dim.work, mlx5e_rx_dim_work);
+
+ switch (params->rx_cq_moderation.cq_period_mode) {
+ case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
+ rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
+ break;
+ case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
+ default:
+ rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ }
+
+ rq->page_cache.head = 0;
+ rq->page_cache.tail = 0;
+
+ return 0;
+
+err_free:
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ kvfree(rq->mpwqe.info);
+ mlx5_core_destroy_mkey(mdev, &rq->umr_mkey);
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ kvfree(rq->wqe.frags);
+ mlx5e_free_di_list(rq);
+ }
+
+err_rq_wq_destroy:
+ if (rq->xdp_prog)
+ bpf_prog_put(rq->xdp_prog);
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
+ if (rq->page_pool)
+ page_pool_destroy(rq->page_pool);
+ mlx5_wq_destroy(&rq->wq_ctrl);
+
+ return err;
+}
+
+static void mlx5e_free_rq(struct mlx5e_rq *rq)
+{
+ int i;
+
+ if (rq->xdp_prog)
+ bpf_prog_put(rq->xdp_prog);
+
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
+ if (rq->page_pool)
+ page_pool_destroy(rq->page_pool);
+
+ switch (rq->wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ kvfree(rq->mpwqe.info);
+ mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ kvfree(rq->wqe.frags);
+ mlx5e_free_di_list(rq);
+ }
+
+ for (i = rq->page_cache.head; i != rq->page_cache.tail;
+ i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
+ struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i];
+
+ mlx5e_page_release(rq, dma_info, false);
+ }
+ mlx5_wq_destroy(&rq->wq_ctrl);
+}
+
+static int mlx5e_create_rq(struct mlx5e_rq *rq,
+ struct mlx5e_rq_param *param)
+{
+ struct mlx5_core_dev *mdev = rq->mdev;
+
+ void *in;
+ void *rqc;
+ void *wq;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
+ sizeof(u64) * rq->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+ memcpy(rqc, param->rqc, sizeof(param->rqc));
+
+ MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
+
+ mlx5_fill_page_frag_array(&rq->wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
+
+ err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state,
+ int next_state)
+{
+ struct mlx5_core_dev *mdev = rq->mdev;
+
+ void *in;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+
+ MLX5_SET(modify_rq_in, in, rq_state, curr_state);
+ MLX5_SET(rqc, rqc, state, next_state);
+
+ err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable)
+{
+ struct mlx5e_channel *c = rq->channel;
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ void *in;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+
+ MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY);
+ MLX5_SET64(modify_rq_in, in, modify_bitmask,
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_SCATTER_FCS);
+ MLX5_SET(rqc, rqc, scatter_fcs, enable);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
+
+ err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
+{
+ struct mlx5e_channel *c = rq->channel;
+ struct mlx5_core_dev *mdev = c->mdev;
+ void *in;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+
+ MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY);
+ MLX5_SET64(modify_rq_in, in, modify_bitmask,
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
+ MLX5_SET(rqc, rqc, vsd, vsd);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
+
+ err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
+{
+ mlx5_core_destroy_rq(rq->mdev, rq->rqn);
+}
+
+static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
+{
+ unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time);
+ struct mlx5e_channel *c = rq->channel;
+
+ u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5e_rqwq_get_size(rq));
+
+ do {
+ if (mlx5e_rqwq_get_cur_sz(rq) >= min_wqes)
+ return 0;
+
+ msleep(20);
+ } while (time_before(jiffies, exp_time));
+
+ netdev_warn(c->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
+ c->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
+
+ return -ETIMEDOUT;
+}
+
+static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
+{
+ __be16 wqe_ix_be;
+ u16 wqe_ix;
+
+ if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+ struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
+
+ /* UMR WQE (if in progress) is always at wq->head */
+ if (rq->mpwqe.umr_in_progress)
+ rq->dealloc_wqe(rq, wq->head);
+
+ while (!mlx5_wq_ll_is_empty(wq)) {
+ struct mlx5e_rx_wqe_ll *wqe;
+
+ wqe_ix_be = *wq->tail_next;
+ wqe_ix = be16_to_cpu(wqe_ix_be);
+ wqe = mlx5_wq_ll_get_wqe(wq, wqe_ix);
+ rq->dealloc_wqe(rq, wqe_ix);
+ mlx5_wq_ll_pop(wq, wqe_ix_be,
+ &wqe->next.next_wqe_index);
+ }
+ } else {
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+
+ while (!mlx5_wq_cyc_is_empty(wq)) {
+ wqe_ix = mlx5_wq_cyc_get_tail(wq);
+ rq->dealloc_wqe(rq, wqe_ix);
+ mlx5_wq_cyc_pop(wq);
+ }
+ }
+
+}
+
+static int mlx5e_open_rq(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *param,
+ struct mlx5e_rq *rq)
+{
+ int err;
+
+ err = mlx5e_alloc_rq(c, params, param, rq);
+ if (err)
+ return err;
+
+ err = mlx5e_create_rq(rq, param);
+ if (err)
+ goto err_free_rq;
+
+ err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+ if (err)
+ goto err_destroy_rq;
+
+ if (params->rx_dim_enabled)
+ __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
+
+ /* We disable csum_complete when XDP is enabled since
+ * XDP programs might manipulate packets which will render
+ * skb->checksum incorrect.
+ */
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp)
+ __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+
+ return 0;
+
+err_destroy_rq:
+ mlx5e_destroy_rq(rq);
+err_free_rq:
+ mlx5e_free_rq(rq);
+
+ return err;
+}
+
+static void mlx5e_activate_rq(struct mlx5e_rq *rq)
+{
+ struct mlx5e_icosq *sq = &rq->channel->icosq;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5e_tx_wqe *nopwqe;
+
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+ set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
+ sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
+ nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl);
+}
+
+static void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
+{
+ clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
+ napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
+}
+
+static void mlx5e_close_rq(struct mlx5e_rq *rq)
+{
+ cancel_work_sync(&rq->dim.work);
+ mlx5e_destroy_rq(rq);
+ mlx5e_free_rx_descs(rq);
+ mlx5e_free_rq(rq);
+}
+
+static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq)
+{
+ kvfree(sq->db.xdpi);
+}
+
+static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
+{
+ int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+
+ sq->db.xdpi = kvzalloc_node(array_size(wq_sz, sizeof(*sq->db.xdpi)),
+ GFP_KERNEL, numa);
+ if (!sq->db.xdpi) {
+ mlx5e_free_xdpsq_db(sq);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_xdpsq *sq,
+ bool is_redirect)
+{
+ void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq);
+ struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ int err;
+
+ sq->pdev = c->pdev;
+ sq->mkey_be = c->mkey_be;
+ sq->channel = c;
+ sq->uar_map = mdev->mlx5e_res.bfreg.map;
+ sq->min_inline_mode = params->tx_min_inline_mode;
+ sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ sq->stats = is_redirect ?
+ &c->priv->channel_stats[c->ix].xdpsq :
+ &c->priv->channel_stats[c->ix].rq_xdpsq;
+
+ param->wq.db_numa_node = cpu_to_node(c->cpu);
+ err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
+ if (err)
+ return err;
+ wq->db = &wq->db[MLX5_SND_DBR];
+
+ err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
+ if (err)
+ goto err_sq_wq_destroy;
+
+ return 0;
+
+err_sq_wq_destroy:
+ mlx5_wq_destroy(&sq->wq_ctrl);
+
+ return err;
+}
+
+static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq)
+{
+ mlx5e_free_xdpsq_db(sq);
+ mlx5_wq_destroy(&sq->wq_ctrl);
+}
+
+static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq)
+{
+ kvfree(sq->db.ico_wqe);
+}
+
+static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa)
+{
+ u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+
+ sq->db.ico_wqe = kvzalloc_node(array_size(wq_sz,
+ sizeof(*sq->db.ico_wqe)),
+ GFP_KERNEL, numa);
+ if (!sq->db.ico_wqe)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_icosq *sq)
+{
+ void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq);
+ struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ int err;
+
+ sq->channel = c;
+ sq->uar_map = mdev->mlx5e_res.bfreg.map;
+
+ param->wq.db_numa_node = cpu_to_node(c->cpu);
+ err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
+ if (err)
+ return err;
+ wq->db = &wq->db[MLX5_SND_DBR];
+
+ err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
+ if (err)
+ goto err_sq_wq_destroy;
+
+ return 0;
+
+err_sq_wq_destroy:
+ mlx5_wq_destroy(&sq->wq_ctrl);
+
+ return err;
+}
+
+static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
+{
+ mlx5e_free_icosq_db(sq);
+ mlx5_wq_destroy(&sq->wq_ctrl);
+}
+
+static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
+{
+ kvfree(sq->db.wqe_info);
+ kvfree(sq->db.dma_fifo);
+}
+
+static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
+{
+ int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
+
+ sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
+ sizeof(*sq->db.dma_fifo)),
+ GFP_KERNEL, numa);
+ sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
+ sizeof(*sq->db.wqe_info)),
+ GFP_KERNEL, numa);
+ if (!sq->db.dma_fifo || !sq->db.wqe_info) {
+ mlx5e_free_txqsq_db(sq);
+ return -ENOMEM;
+ }
+
+ sq->dma_fifo_mask = df_sz - 1;
+
+ return 0;
+}
+
+static void mlx5e_sq_recover(struct work_struct *work);
+static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
+ int txq_ix,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_txqsq *sq,
+ int tc)
+{
+ void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq);
+ struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ int err;
+
+ sq->pdev = c->pdev;
+ sq->tstamp = c->tstamp;
+ sq->clock = &mdev->clock;
+ sq->mkey_be = c->mkey_be;
+ sq->channel = c;
+ sq->txq_ix = txq_ix;
+ sq->uar_map = mdev->mlx5e_res.bfreg.map;
+ sq->min_inline_mode = params->tx_min_inline_mode;
+ sq->stats = &c->priv->channel_stats[c->ix].sq[tc];
+ INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover);
+ if (MLX5_IPSEC_DEV(c->priv->mdev))
+ set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
+ if (mlx5_accel_is_tls_device(c->priv->mdev))
+ set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
+
+ param->wq.db_numa_node = cpu_to_node(c->cpu);
+ err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
+ if (err)
+ return err;
+ wq->db = &wq->db[MLX5_SND_DBR];
+
+ err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
+ if (err)
+ goto err_sq_wq_destroy;
+
+ INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work);
+ sq->dim.mode = params->tx_cq_moderation.cq_period_mode;
+
+ return 0;
+
+err_sq_wq_destroy:
+ mlx5_wq_destroy(&sq->wq_ctrl);
+
+ return err;
+}
+
+static void mlx5e_free_txqsq(struct mlx5e_txqsq *sq)
+{
+ mlx5e_free_txqsq_db(sq);
+ mlx5_wq_destroy(&sq->wq_ctrl);
+}
+
+struct mlx5e_create_sq_param {
+ struct mlx5_wq_ctrl *wq_ctrl;
+ u32 cqn;
+ u32 tisn;
+ u8 tis_lst_sz;
+ u8 min_inline_mode;
+};
+
+static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_create_sq_param *csp,
+ u32 *sqn)
+{
+ void *in;
+ void *sqc;
+ void *wq;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
+ sizeof(u64) * csp->wq_ctrl->buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ memcpy(sqc, param->sqc, sizeof(param->sqc));
+ MLX5_SET(sqc, sqc, tis_lst_sz, csp->tis_lst_sz);
+ MLX5_SET(sqc, sqc, tis_num_0, csp->tisn);
+ MLX5_SET(sqc, sqc, cqn, csp->cqn);
+
+ if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+ MLX5_SET(sqc, sqc, min_wqe_inline_mode, csp->min_inline_mode);
+
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.bfreg.index);
+ MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma);
+
+ mlx5_fill_page_frag_array(&csp->wq_ctrl->buf,
+ (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
+
+ err = mlx5_core_create_sq(mdev, in, inlen, sqn);
+
+ kvfree(in);
+
+ return err;
+}
+
+struct mlx5e_modify_sq_param {
+ int curr_state;
+ int next_state;
+ bool rl_update;
+ int rl_index;
+};
+
+static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
+ struct mlx5e_modify_sq_param *p)
+{
+ void *in;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+
+ MLX5_SET(modify_sq_in, in, sq_state, p->curr_state);
+ MLX5_SET(sqc, sqc, state, p->next_state);
+ if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) {
+ MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
+ MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index);
+ }
+
+ err = mlx5_core_modify_sq(mdev, sqn, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn)
+{
+ mlx5_core_destroy_sq(mdev, sqn);
+}
+
+static int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_create_sq_param *csp,
+ u32 *sqn)
+{
+ struct mlx5e_modify_sq_param msp = {0};
+ int err;
+
+ err = mlx5e_create_sq(mdev, param, csp, sqn);
+ if (err)
+ return err;
+
+ msp.curr_state = MLX5_SQC_STATE_RST;
+ msp.next_state = MLX5_SQC_STATE_RDY;
+ err = mlx5e_modify_sq(mdev, *sqn, &msp);
+ if (err)
+ mlx5e_destroy_sq(mdev, *sqn);
+
+ return err;
+}
+
+static int mlx5e_set_sq_maxrate(struct net_device *dev,
+ struct mlx5e_txqsq *sq, u32 rate);
+
+static int mlx5e_open_txqsq(struct mlx5e_channel *c,
+ u32 tisn,
+ int txq_ix,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_txqsq *sq,
+ int tc)
+{
+ struct mlx5e_create_sq_param csp = {};
+ u32 tx_rate;
+ int err;
+
+ err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq, tc);
+ if (err)
+ return err;
+
+ csp.tisn = tisn;
+ csp.tis_lst_sz = 1;
+ csp.cqn = sq->cq.mcq.cqn;
+ csp.wq_ctrl = &sq->wq_ctrl;
+ csp.min_inline_mode = sq->min_inline_mode;
+ err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+ if (err)
+ goto err_free_txqsq;
+
+ tx_rate = c->priv->tx_rates[sq->txq_ix];
+ if (tx_rate)
+ mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate);
+
+ if (params->tx_dim_enabled)
+ sq->state |= BIT(MLX5E_SQ_STATE_AM);
+
+ return 0;
+
+err_free_txqsq:
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ mlx5e_free_txqsq(sq);
+
+ return err;
+}
+
+static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
+{
+ WARN_ONCE(sq->cc != sq->pc,
+ "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
+ sq->sqn, sq->cc, sq->pc);
+ sq->cc = 0;
+ sq->dma_fifo_cc = 0;
+ sq->pc = 0;
+}
+
+static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
+{
+ sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix);
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+ set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ netdev_tx_reset_queue(sq->txq);
+ netif_tx_start_queue(sq->txq);
+}
+
+static inline void netif_tx_disable_queue(struct netdev_queue *txq)
+{
+ __netif_tx_lock_bh(txq);
+ netif_tx_stop_queue(txq);
+ __netif_tx_unlock_bh(txq);
+}
+
+static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_channel *c = sq->channel;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ /* prevent netif_tx_wake_queue */
+ napi_synchronize(&c->napi);
+
+ netif_tx_disable_queue(sq->txq);
+
+ /* last doorbell out, godspeed .. */
+ if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) {
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ struct mlx5e_tx_wqe *nop;
+
+ sq->db.wqe_info[pi].skb = NULL;
+ nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl);
+ }
+}
+
+static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_channel *c = sq->channel;
+ struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_rate_limit rl = {0};
+
+ cancel_work_sync(&sq->dim.work);
+ mlx5e_destroy_sq(mdev, sq->sqn);
+ if (sq->rate_limit) {
+ rl.rate = sq->rate_limit;
+ mlx5_rl_remove_rate(mdev, &rl);
+ }
+ mlx5e_free_txqsq_descs(sq);
+ mlx5e_free_txqsq(sq);
+}
+
+static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
+{
+ unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
+
+ while (time_before(jiffies, exp_time)) {
+ if (sq->cc == sq->pc)
+ return 0;
+
+ msleep(20);
+ }
+
+ netdev_err(sq->channel->netdev,
+ "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
+ sq->sqn, sq->cc, sq->pc);
+
+ return -ETIMEDOUT;
+}
+
+static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state)
+{
+ struct mlx5_core_dev *mdev = sq->channel->mdev;
+ struct net_device *dev = sq->channel->netdev;
+ struct mlx5e_modify_sq_param msp = {0};
+ int err;
+
+ msp.curr_state = curr_state;
+ msp.next_state = MLX5_SQC_STATE_RST;
+
+ err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
+ if (err) {
+ netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn);
+ return err;
+ }
+
+ memset(&msp, 0, sizeof(msp));
+ msp.curr_state = MLX5_SQC_STATE_RST;
+ msp.next_state = MLX5_SQC_STATE_RDY;
+
+ err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
+ if (err) {
+ netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn);
+ return err;
+ }
+
+ return 0;
+}
+
+static void mlx5e_sq_recover(struct work_struct *work)
+{
+ struct mlx5e_txqsq_recover *recover =
+ container_of(work, struct mlx5e_txqsq_recover,
+ recover_work);
+ struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq,
+ recover);
+ struct mlx5_core_dev *mdev = sq->channel->mdev;
+ struct net_device *dev = sq->channel->netdev;
+ u8 state;
+ int err;
+
+ err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
+ if (err) {
+ netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
+ sq->sqn, err);
+ return;
+ }
+
+ if (state != MLX5_RQC_STATE_ERR) {
+ netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
+ return;
+ }
+
+ netif_tx_disable_queue(sq->txq);
+
+ if (mlx5e_wait_for_sq_flush(sq))
+ return;
+
+ /* If the interval between two consecutive recovers per SQ is too
+ * short, don't recover to avoid infinite loop of ERR_CQE -> recover.
+ * If we reached this state, there is probably a bug that needs to be
+ * fixed. let's keep the queue close and let tx timeout cleanup.
+ */
+ if (jiffies_to_msecs(jiffies - recover->last_recover) <
+ MLX5E_SQ_RECOVER_MIN_INTERVAL) {
+ netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n",
+ sq->sqn);
+ return;
+ }
+
+ /* At this point, no new packets will arrive from the stack as TXQ is
+ * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
+ * pending WQEs. SQ can safely reset the SQ.
+ */
+ if (mlx5e_sq_to_ready(sq, state))
+ return;
+
+ mlx5e_reset_txqsq_cc_pc(sq);
+ sq->stats->recover++;
+ recover->last_recover = jiffies;
+ mlx5e_activate_txqsq(sq);
+}
+
+static int mlx5e_open_icosq(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_icosq *sq)
+{
+ struct mlx5e_create_sq_param csp = {};
+ int err;
+
+ err = mlx5e_alloc_icosq(c, param, sq);
+ if (err)
+ return err;
+
+ csp.cqn = sq->cq.mcq.cqn;
+ csp.wq_ctrl = &sq->wq_ctrl;
+ csp.min_inline_mode = params->tx_min_inline_mode;
+ set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+ if (err)
+ goto err_free_icosq;
+
+ return 0;
+
+err_free_icosq:
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ mlx5e_free_icosq(sq);
+
+ return err;
+}
+
+static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
+{
+ struct mlx5e_channel *c = sq->channel;
+
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ napi_synchronize(&c->napi);
+
+ mlx5e_destroy_sq(c->mdev, sq->sqn);
+ mlx5e_free_icosq(sq);
+}
+
+static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_xdpsq *sq,
+ bool is_redirect)
+{
+ unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
+ struct mlx5e_create_sq_param csp = {};
+ unsigned int inline_hdr_sz = 0;
+ int err;
+ int i;
+
+ err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect);
+ if (err)
+ return err;
+
+ csp.tis_lst_sz = 1;
+ csp.tisn = c->priv->tisn[0]; /* tc = 0 */
+ csp.cqn = sq->cq.mcq.cqn;
+ csp.wq_ctrl = &sq->wq_ctrl;
+ csp.min_inline_mode = sq->min_inline_mode;
+ if (is_redirect)
+ set_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state);
+ set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+ if (err)
+ goto err_free_xdpsq;
+
+ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
+ inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
+ ds_cnt++;
+ }
+
+ /* Pre initialize fixed WQE fields */
+ for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
+ struct mlx5_wqe_data_seg *dseg;
+
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+ eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
+
+ dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
+ dseg->lkey = sq->mkey_be;
+ }
+
+ return 0;
+
+err_free_xdpsq:
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ mlx5e_free_xdpsq(sq);
+
+ return err;
+}
+
+static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5e_channel *c = sq->channel;
+
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ napi_synchronize(&c->napi);
+
+ mlx5e_destroy_sq(c->mdev, sq->sqn);
+ mlx5e_free_xdpsq_descs(sq);
+ mlx5e_free_xdpsq(sq);
+}
+
+static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev,
+ struct mlx5e_cq_param *param,
+ struct mlx5e_cq *cq)
+{
+ struct mlx5_core_cq *mcq = &cq->mcq;
+ int eqn_not_used;
+ unsigned int irqn;
+ int err;
+ u32 i;
+
+ err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn);
+ if (err)
+ return err;
+
+ err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
+ &cq->wq_ctrl);
+ if (err)
+ return err;
+
+ mcq->cqe_sz = 64;
+ mcq->set_ci_db = cq->wq_ctrl.db.db;
+ mcq->arm_db = cq->wq_ctrl.db.db + 1;
+ *mcq->set_ci_db = 0;
+ *mcq->arm_db = 0;
+ mcq->vector = param->eq_ix;
+ mcq->comp = mlx5e_completion_event;
+ mcq->event = mlx5e_cq_error_event;
+ mcq->irqn = irqn;
+
+ for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
+
+ cqe->op_own = 0xf1;
+ }
+
+ cq->mdev = mdev;
+
+ return 0;
+}
+
+static int mlx5e_alloc_cq(struct mlx5e_channel *c,
+ struct mlx5e_cq_param *param,
+ struct mlx5e_cq *cq)
+{
+ struct mlx5_core_dev *mdev = c->priv->mdev;
+ int err;
+
+ param->wq.buf_numa_node = cpu_to_node(c->cpu);
+ param->wq.db_numa_node = cpu_to_node(c->cpu);
+ param->eq_ix = c->ix;
+
+ err = mlx5e_alloc_cq_common(mdev, param, cq);
+
+ cq->napi = &c->napi;
+ cq->channel = c;
+
+ return err;
+}
+
+static void mlx5e_free_cq(struct mlx5e_cq *cq)
+{
+ mlx5_wq_destroy(&cq->wq_ctrl);
+}
+
+static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
+{
+ struct mlx5_core_dev *mdev = cq->mdev;
+ struct mlx5_core_cq *mcq = &cq->mcq;
+
+ void *in;
+ void *cqc;
+ int inlen;
+ unsigned int irqn_not_used;
+ int eqn;
+ int err;
+
+ err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
+ if (err)
+ return err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ sizeof(u64) * cq->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+
+ memcpy(cqc, param->cqc, sizeof(param->cqc));
+
+ mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
+
+ MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode);
+ MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
+ MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
+
+ err = mlx5_core_create_cq(mdev, mcq, in, inlen);
+
+ kvfree(in);
+
+ if (err)
+ return err;
+
+ mlx5e_cq_arm(cq);
+
+ return 0;
+}
+
+static void mlx5e_destroy_cq(struct mlx5e_cq *cq)
+{
+ mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
+}
+
+static int mlx5e_open_cq(struct mlx5e_channel *c,
+ struct net_dim_cq_moder moder,
+ struct mlx5e_cq_param *param,
+ struct mlx5e_cq *cq)
+{
+ struct mlx5_core_dev *mdev = c->mdev;
+ int err;
+
+ err = mlx5e_alloc_cq(c, param, cq);
+ if (err)
+ return err;
+
+ err = mlx5e_create_cq(cq, param);
+ if (err)
+ goto err_free_cq;
+
+ if (MLX5_CAP_GEN(mdev, cq_moderation))
+ mlx5_core_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts);
+ return 0;
+
+err_free_cq:
+ mlx5e_free_cq(cq);
+
+ return err;
+}
+
+static void mlx5e_close_cq(struct mlx5e_cq *cq)
+{
+ mlx5e_destroy_cq(cq);
+ mlx5e_free_cq(cq);
+}
+
+static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
+{
+ return cpumask_first(priv->mdev->priv.irq_info[ix + MLX5_EQ_VEC_COMP_BASE].mask);
+}
+
+static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_channel_param *cparam)
+{
+ int err;
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++) {
+ err = mlx5e_open_cq(c, params->tx_cq_moderation,
+ &cparam->tx_cq, &c->sq[tc].cq);
+ if (err)
+ goto err_close_tx_cqs;
+ }
+
+ return 0;
+
+err_close_tx_cqs:
+ for (tc--; tc >= 0; tc--)
+ mlx5e_close_cq(&c->sq[tc].cq);
+
+ return err;
+}
+
+static void mlx5e_close_tx_cqs(struct mlx5e_channel *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_close_cq(&c->sq[tc].cq);
+}
+
+static int mlx5e_open_sqs(struct mlx5e_channel *c,
+ struct mlx5e_params *params,
+ struct mlx5e_channel_param *cparam)
+{
+ struct mlx5e_priv *priv = c->priv;
+ int err, tc, max_nch = priv->profile->max_nch(priv->mdev);
+
+ for (tc = 0; tc < params->num_tc; tc++) {
+ int txq_ix = c->ix + tc * max_nch;
+
+ err = mlx5e_open_txqsq(c, c->priv->tisn[tc], txq_ix,
+ params, &cparam->sq, &c->sq[tc], tc);
+ if (err)
+ goto err_close_sqs;
+ }
+
+ return 0;
+
+err_close_sqs:
+ for (tc--; tc >= 0; tc--)
+ mlx5e_close_txqsq(&c->sq[tc]);
+
+ return err;
+}
+
+static void mlx5e_close_sqs(struct mlx5e_channel *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_close_txqsq(&c->sq[tc]);
+}
+
+static int mlx5e_set_sq_maxrate(struct net_device *dev,
+ struct mlx5e_txqsq *sq, u32 rate)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_modify_sq_param msp = {0};
+ struct mlx5_rate_limit rl = {0};
+ u16 rl_index = 0;
+ int err;
+
+ if (rate == sq->rate_limit)
+ /* nothing to do */
+ return 0;
+
+ if (sq->rate_limit) {
+ rl.rate = sq->rate_limit;
+ /* remove current rl index to free space to next ones */
+ mlx5_rl_remove_rate(mdev, &rl);
+ }
+
+ sq->rate_limit = 0;
+
+ if (rate) {
+ rl.rate = rate;
+ err = mlx5_rl_add_rate(mdev, &rl_index, &rl);
+ if (err) {
+ netdev_err(dev, "Failed configuring rate %u: %d\n",
+ rate, err);
+ return err;
+ }
+ }
+
+ msp.curr_state = MLX5_SQC_STATE_RDY;
+ msp.next_state = MLX5_SQC_STATE_RDY;
+ msp.rl_index = rl_index;
+ msp.rl_update = true;
+ err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
+ if (err) {
+ netdev_err(dev, "Failed configuring rate %u: %d\n",
+ rate, err);
+ /* remove the rate from the table */
+ if (rate)
+ mlx5_rl_remove_rate(mdev, &rl);
+ return err;
+ }
+
+ sq->rate_limit = rate;
+ return 0;
+}
+
+static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_txqsq *sq = priv->txq2sq[index];
+ int err = 0;
+
+ if (!mlx5_rl_is_supported(mdev)) {
+ netdev_err(dev, "Rate limiting is not supported on this device\n");
+ return -EINVAL;
+ }
+
+ /* rate is given in Mb/sec, HW config is in Kb/sec */
+ rate = rate << 10;
+
+ /* Check whether rate in valid range, 0 is always valid */
+ if (rate && !mlx5_rl_is_in_range(mdev, rate)) {
+ netdev_err(dev, "TX rate %u, is not in range\n", rate);
+ return -ERANGE;
+ }
+
+ mutex_lock(&priv->state_lock);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ err = mlx5e_set_sq_maxrate(dev, sq, rate);
+ if (!err)
+ priv->tx_rates[index] = rate;
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
+ struct mlx5e_params *params,
+ struct mlx5e_channel_param *cparam,
+ struct mlx5e_channel **cp)
+{
+ struct net_dim_cq_moder icocq_moder = {0, 0};
+ struct net_device *netdev = priv->netdev;
+ int cpu = mlx5e_get_cpu(priv, ix);
+ struct mlx5e_channel *c;
+ unsigned int irq;
+ int err;
+ int eqn;
+
+ err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq);
+ if (err)
+ return err;
+
+ c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
+ if (!c)
+ return -ENOMEM;
+
+ c->priv = priv;
+ c->mdev = priv->mdev;
+ c->tstamp = &priv->tstamp;
+ c->ix = ix;
+ c->cpu = cpu;
+ c->pdev = &priv->mdev->pdev->dev;
+ c->netdev = priv->netdev;
+ c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
+ c->num_tc = params->num_tc;
+ c->xdp = !!params->xdp_prog;
+ c->stats = &priv->channel_stats[ix].ch;
+
+ c->irq_desc = irq_to_desc(irq);
+
+ netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
+
+ err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq);
+ if (err)
+ goto err_napi_del;
+
+ err = mlx5e_open_tx_cqs(c, params, cparam);
+ if (err)
+ goto err_close_icosq_cq;
+
+ err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xdpsq.cq);
+ if (err)
+ goto err_close_tx_cqs;
+
+ err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->rq.cq);
+ if (err)
+ goto err_close_xdp_tx_cqs;
+
+ /* XDP SQ CQ params are same as normal TXQ sq CQ params */
+ err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation,
+ &cparam->tx_cq, &c->rq.xdpsq.cq) : 0;
+ if (err)
+ goto err_close_rx_cq;
+
+ napi_enable(&c->napi);
+
+ err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq);
+ if (err)
+ goto err_disable_napi;
+
+ err = mlx5e_open_sqs(c, params, cparam);
+ if (err)
+ goto err_close_icosq;
+
+ err = c->xdp ? mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->rq.xdpsq, false) : 0;
+ if (err)
+ goto err_close_sqs;
+
+ err = mlx5e_open_rq(c, params, &cparam->rq, &c->rq);
+ if (err)
+ goto err_close_xdp_sq;
+
+ err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->xdpsq, true);
+ if (err)
+ goto err_close_rq;
+
+ *cp = c;
+
+ return 0;
+
+err_close_rq:
+ mlx5e_close_rq(&c->rq);
+
+err_close_xdp_sq:
+ if (c->xdp)
+ mlx5e_close_xdpsq(&c->rq.xdpsq);
+
+err_close_sqs:
+ mlx5e_close_sqs(c);
+
+err_close_icosq:
+ mlx5e_close_icosq(&c->icosq);
+
+err_disable_napi:
+ napi_disable(&c->napi);
+ if (c->xdp)
+ mlx5e_close_cq(&c->rq.xdpsq.cq);
+
+err_close_rx_cq:
+ mlx5e_close_cq(&c->rq.cq);
+
+err_close_xdp_tx_cqs:
+ mlx5e_close_cq(&c->xdpsq.cq);
+
+err_close_tx_cqs:
+ mlx5e_close_tx_cqs(c);
+
+err_close_icosq_cq:
+ mlx5e_close_cq(&c->icosq.cq);
+
+err_napi_del:
+ netif_napi_del(&c->napi);
+ kvfree(c);
+
+ return err;
+}
+
+static void mlx5e_activate_channel(struct mlx5e_channel *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_activate_txqsq(&c->sq[tc]);
+ mlx5e_activate_rq(&c->rq);
+ netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
+}
+
+static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
+{
+ int tc;
+
+ mlx5e_deactivate_rq(&c->rq);
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_deactivate_txqsq(&c->sq[tc]);
+}
+
+static void mlx5e_close_channel(struct mlx5e_channel *c)
+{
+ mlx5e_close_xdpsq(&c->xdpsq);
+ mlx5e_close_rq(&c->rq);
+ if (c->xdp)
+ mlx5e_close_xdpsq(&c->rq.xdpsq);
+ mlx5e_close_sqs(c);
+ mlx5e_close_icosq(&c->icosq);
+ napi_disable(&c->napi);
+ if (c->xdp)
+ mlx5e_close_cq(&c->rq.xdpsq.cq);
+ mlx5e_close_cq(&c->rq.cq);
+ mlx5e_close_cq(&c->xdpsq.cq);
+ mlx5e_close_tx_cqs(c);
+ mlx5e_close_cq(&c->icosq.cq);
+ netif_napi_del(&c->napi);
+
+ kvfree(c);
+}
+
+#define DEFAULT_FRAG_SIZE (2048)
+
+static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_frags_info *info)
+{
+ u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ int frag_size_max = DEFAULT_FRAG_SIZE;
+ u32 buf_size = 0;
+ int i;
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (MLX5_IPSEC_DEV(mdev))
+ byte_count += MLX5E_METADATA_ETHER_LEN;
+#endif
+
+ if (mlx5e_rx_is_linear_skb(mdev, params)) {
+ int frag_stride;
+
+ frag_stride = mlx5e_rx_get_linear_frag_sz(params);
+ frag_stride = roundup_pow_of_two(frag_stride);
+
+ info->arr[0].frag_size = byte_count;
+ info->arr[0].frag_stride = frag_stride;
+ info->num_frags = 1;
+ info->wqe_bulk = PAGE_SIZE / frag_stride;
+ goto out;
+ }
+
+ if (byte_count > PAGE_SIZE +
+ (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max)
+ frag_size_max = PAGE_SIZE;
+
+ i = 0;
+ while (buf_size < byte_count) {
+ int frag_size = byte_count - buf_size;
+
+ if (i < MLX5E_MAX_RX_FRAGS - 1)
+ frag_size = min(frag_size, frag_size_max);
+
+ info->arr[i].frag_size = frag_size;
+ info->arr[i].frag_stride = roundup_pow_of_two(frag_size);
+
+ buf_size += frag_size;
+ i++;
+ }
+ info->num_frags = i;
+ /* number of different wqes sharing a page */
+ info->wqe_bulk = 1 + (info->num_frags % 2);
+
+out:
+ info->wqe_bulk = max_t(u8, info->wqe_bulk, 8);
+ info->log_num_frags = order_base_2(info->num_frags);
+}
+
+static inline u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs)
+{
+ int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs;
+
+ switch (wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ sz += sizeof(struct mlx5e_rx_wqe_ll);
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ sz += sizeof(struct mlx5e_rx_wqe_cyc);
+ }
+
+ return order_base_2(sz);
+}
+
+static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_rq_param *param)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ void *rqc = param->rqc;
+ void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ int ndsegs = 1;
+
+ switch (params->rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ MLX5_SET(wq, wq, log_wqe_num_of_strides,
+ mlx5e_mpwqe_get_log_num_strides(mdev, params) -
+ MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
+ MLX5_SET(wq, wq, log_wqe_stride_size,
+ mlx5e_mpwqe_get_log_stride_size(mdev, params) -
+ MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
+ MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params));
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
+ mlx5e_build_rq_frags_info(mdev, params, &param->frags_info);
+ ndsegs = param->frags_info.num_frags;
+ }
+
+ MLX5_SET(wq, wq, wq_type, params->rq_wq_type);
+ MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
+ MLX5_SET(wq, wq, log_wq_stride,
+ mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
+ MLX5_SET(wq, wq, pd, mdev->mlx5e_res.pdn);
+ MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter);
+ MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable);
+ MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en);
+
+ param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
+}
+
+static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
+ struct mlx5e_rq_param *param)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ void *rqc = param->rqc;
+ void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, log_wq_stride,
+ mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1));
+ MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter);
+
+ param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
+}
+
+static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+ MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn);
+
+ param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev);
+}
+
+static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(priv, param);
+ MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+ MLX5_SET(sqc, sqc, allow_swp, !!MLX5_IPSEC_DEV(priv->mdev));
+}
+
+static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index);
+}
+
+static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_cq_param *param)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ void *cqc = param->cqc;
+ u8 log_cq_size;
+
+ switch (params->rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ log_cq_size = mlx5e_mpwqe_get_log_rq_size(params) +
+ mlx5e_mpwqe_get_log_num_strides(mdev, params);
+ break;
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ log_cq_size = params->log_rq_mtu_frames;
+ }
+
+ MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
+ MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
+ MLX5_SET(cqc, cqc, cqe_comp_en, 1);
+ }
+
+ mlx5e_build_common_cq_param(priv, param);
+ param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
+}
+
+static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size);
+
+ mlx5e_build_common_cq_param(priv, param);
+ param->cq_period_mode = params->tx_cq_moderation.cq_period_mode;
+}
+
+static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, log_cq_size, log_wq_size);
+
+ mlx5e_build_common_cq_param(priv, param);
+
+ param->cq_period_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
+static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(priv, param);
+
+ MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+ MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
+}
+
+static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(priv, param);
+ MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+}
+
+static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_channel_param *cparam)
+{
+ u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+
+ mlx5e_build_rq_param(priv, params, &cparam->rq);
+ mlx5e_build_sq_param(priv, params, &cparam->sq);
+ mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
+ mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq);
+ mlx5e_build_rx_cq_param(priv, params, &cparam->rx_cq);
+ mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq);
+ mlx5e_build_ico_cq_param(priv, icosq_log_wq_sz, &cparam->icosq_cq);
+}
+
+int mlx5e_open_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *chs)
+{
+ struct mlx5e_channel_param *cparam;
+ int err = -ENOMEM;
+ int i;
+
+ chs->num = chs->params.num_channels;
+
+ chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL);
+ cparam = kvzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL);
+ if (!chs->c || !cparam)
+ goto err_free;
+
+ mlx5e_build_channel_param(priv, &chs->params, cparam);
+ for (i = 0; i < chs->num; i++) {
+ err = mlx5e_open_channel(priv, i, &chs->params, cparam, &chs->c[i]);
+ if (err)
+ goto err_close_channels;
+ }
+
+ kvfree(cparam);
+ return 0;
+
+err_close_channels:
+ for (i--; i >= 0; i--)
+ mlx5e_close_channel(chs->c[i]);
+
+err_free:
+ kfree(chs->c);
+ kvfree(cparam);
+ chs->num = 0;
+ return err;
+}
+
+static void mlx5e_activate_channels(struct mlx5e_channels *chs)
+{
+ int i;
+
+ for (i = 0; i < chs->num; i++)
+ mlx5e_activate_channel(chs->c[i]);
+}
+
+static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
+{
+ int err = 0;
+ int i;
+
+ for (i = 0; i < chs->num; i++)
+ err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq,
+ err ? 0 : 20000);
+
+ return err ? -ETIMEDOUT : 0;
+}
+
+static void mlx5e_deactivate_channels(struct mlx5e_channels *chs)
+{
+ int i;
+
+ for (i = 0; i < chs->num; i++)
+ mlx5e_deactivate_channel(chs->c[i]);
+}
+
+void mlx5e_close_channels(struct mlx5e_channels *chs)
+{
+ int i;
+
+ for (i = 0; i < chs->num; i++)
+ mlx5e_close_channel(chs->c[i]);
+
+ kfree(chs->c);
+ chs->num = 0;
+}
+
+static int
+mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, struct mlx5e_rqt *rqt)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ void *rqtc;
+ int inlen;
+ int err;
+ u32 *in;
+ int i;
+
+ inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
+
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
+ MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
+
+ for (i = 0; i < sz; i++)
+ MLX5_SET(rqtc, rqtc, rq_num[i], priv->drop_rq.rqn);
+
+ err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn);
+ if (!err)
+ rqt->enabled = true;
+
+ kvfree(in);
+ return err;
+}
+
+void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt)
+{
+ rqt->enabled = false;
+ mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn);
+}
+
+int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rqt *rqt = &priv->indir_rqt;
+ int err;
+
+ err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, rqt);
+ if (err)
+ mlx5_core_warn(priv->mdev, "create indirect rqts failed, %d\n", err);
+ return err;
+}
+
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rqt *rqt;
+ int err;
+ int ix;
+
+ for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
+ rqt = &priv->direct_tir[ix].rqt;
+ err = mlx5e_create_rqt(priv, 1 /*size */, rqt);
+ if (err)
+ goto err_destroy_rqts;
+ }
+
+ return 0;
+
+err_destroy_rqts:
+ mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err);
+ for (ix--; ix >= 0; ix--)
+ mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt);
+
+ return err;
+}
+
+void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv)
+{
+ int i;
+
+ for (i = 0; i < priv->profile->max_nch(priv->mdev); i++)
+ mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
+}
+
+static int mlx5e_rx_hash_fn(int hfunc)
+{
+ return (hfunc == ETH_RSS_HASH_TOP) ?
+ MLX5_RX_HASH_FN_TOEPLITZ :
+ MLX5_RX_HASH_FN_INVERTED_XOR8;
+}
+
+int mlx5e_bits_invert(unsigned long a, int size)
+{
+ int inv = 0;
+ int i;
+
+ for (i = 0; i < size; i++)
+ inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i;
+
+ return inv;
+}
+
+static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz,
+ struct mlx5e_redirect_rqt_param rrp, void *rqtc)
+{
+ int i;
+
+ for (i = 0; i < sz; i++) {
+ u32 rqn;
+
+ if (rrp.is_rss) {
+ int ix = i;
+
+ if (rrp.rss.hfunc == ETH_RSS_HASH_XOR)
+ ix = mlx5e_bits_invert(i, ilog2(sz));
+
+ ix = priv->channels.params.indirection_rqt[ix];
+ rqn = rrp.rss.channels->c[ix]->rq.rqn;
+ } else {
+ rqn = rrp.rqn;
+ }
+ MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
+ }
+}
+
+int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
+ struct mlx5e_redirect_rqt_param rrp)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ void *rqtc;
+ int inlen;
+ u32 *in;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
+
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
+ MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1);
+ mlx5e_fill_rqt_rqns(priv, sz, rrp, rqtc);
+ err = mlx5_core_modify_rqt(mdev, rqtn, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+static u32 mlx5e_get_direct_rqn(struct mlx5e_priv *priv, int ix,
+ struct mlx5e_redirect_rqt_param rrp)
+{
+ if (!rrp.is_rss)
+ return rrp.rqn;
+
+ if (ix >= rrp.rss.channels->num)
+ return priv->drop_rq.rqn;
+
+ return rrp.rss.channels->c[ix]->rq.rqn;
+}
+
+static void mlx5e_redirect_rqts(struct mlx5e_priv *priv,
+ struct mlx5e_redirect_rqt_param rrp)
+{
+ u32 rqtn;
+ int ix;
+
+ if (priv->indir_rqt.enabled) {
+ /* RSS RQ table */
+ rqtn = priv->indir_rqt.rqtn;
+ mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp);
+ }
+
+ for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
+ struct mlx5e_redirect_rqt_param direct_rrp = {
+ .is_rss = false,
+ {
+ .rqn = mlx5e_get_direct_rqn(priv, ix, rrp)
+ },
+ };
+
+ /* Direct RQ Tables */
+ if (!priv->direct_tir[ix].rqt.enabled)
+ continue;
+
+ rqtn = priv->direct_tir[ix].rqt.rqtn;
+ mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
+ }
+}
+
+static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *chs)
+{
+ struct mlx5e_redirect_rqt_param rrp = {
+ .is_rss = true,
+ {
+ .rss = {
+ .channels = chs,
+ .hfunc = chs->params.rss_hfunc,
+ }
+ },
+ };
+
+ mlx5e_redirect_rqts(priv, rrp);
+}
+
+static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv)
+{
+ struct mlx5e_redirect_rqt_param drop_rrp = {
+ .is_rss = false,
+ {
+ .rqn = priv->drop_rq.rqn,
+ },
+ };
+
+ mlx5e_redirect_rqts(priv, drop_rrp);
+}
+
+static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc)
+{
+ if (!params->lro_en)
+ return;
+
+#define ROUGH_MAX_L2_L3_HDR_SZ 256
+
+ MLX5_SET(tirc, tirc, lro_enable_mask,
+ MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
+ MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
+ MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
+ (params->lro_wqe_sz - ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
+ MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout);
+}
+
+void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
+ enum mlx5e_traffic_types tt,
+ void *tirc, bool inner)
+{
+ void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) :
+ MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
+
+#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP)
+
+#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_L4_SPORT |\
+ MLX5_HASH_FIELD_SEL_L4_DPORT)
+
+#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_IPSEC_SPI)
+
+ MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(params->rss_hfunc));
+ if (params->rss_hfunc == ETH_RSS_HASH_TOP) {
+ void *rss_key = MLX5_ADDR_OF(tirc, tirc,
+ rx_hash_toeplitz_key);
+ size_t len = MLX5_FLD_SZ_BYTES(tirc,
+ rx_hash_toeplitz_key);
+
+ MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
+ memcpy(rss_key, params->toeplitz_hash_key, len);
+ }
+
+ switch (tt) {
+ case MLX5E_TT_IPV4_TCP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_TCP);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_L4PORTS);
+ break;
+
+ case MLX5E_TT_IPV6_TCP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_TCP);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_L4PORTS);
+ break;
+
+ case MLX5E_TT_IPV4_UDP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_UDP);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_L4PORTS);
+ break;
+
+ case MLX5E_TT_IPV6_UDP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_UDP);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_L4PORTS);
+ break;
+
+ case MLX5E_TT_IPV4_IPSEC_AH:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_IPSEC_SPI);
+ break;
+
+ case MLX5E_TT_IPV6_IPSEC_AH:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_IPSEC_SPI);
+ break;
+
+ case MLX5E_TT_IPV4_IPSEC_ESP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_IPSEC_SPI);
+ break;
+
+ case MLX5E_TT_IPV6_IPSEC_ESP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_IPSEC_SPI);
+ break;
+
+ case MLX5E_TT_IPV4:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP);
+ break;
+
+ case MLX5E_TT_IPV6:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP);
+ break;
+ default:
+ WARN_ONCE(true, "%s: bad traffic type!\n", __func__);
+ }
+}
+
+static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ void *in;
+ void *tirc;
+ int inlen;
+ int err;
+ int tt;
+ int ix;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_tir_in, in, bitmask.lro, 1);
+ tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
+
+ mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in,
+ inlen);
+ if (err)
+ goto free_in;
+ }
+
+ for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
+ err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn,
+ in, inlen);
+ if (err)
+ goto free_in;
+ }
+
+free_in:
+ kvfree(in);
+
+ return err;
+}
+
+static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv,
+ enum mlx5e_traffic_types tt,
+ u32 *tirc)
+{
+ MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
+
+ mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
+
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
+ MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
+ MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1);
+
+ mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true);
+}
+
+static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params, u16 mtu)
+{
+ u16 hw_mtu = MLX5E_SW2HW_MTU(params, mtu);
+ int err;
+
+ err = mlx5_set_port_mtu(mdev, hw_mtu, 1);
+ if (err)
+ return err;
+
+ /* Update vport context MTU */
+ mlx5_modify_nic_vport_mtu(mdev, hw_mtu);
+ return 0;
+}
+
+static void mlx5e_query_mtu(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params, u16 *mtu)
+{
+ u16 hw_mtu = 0;
+ int err;
+
+ err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
+ if (err || !hw_mtu) /* fallback to port oper mtu */
+ mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1);
+
+ *mtu = MLX5E_HW2SW_MTU(params, hw_mtu);
+}
+
+static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 mtu;
+ int err;
+
+ err = mlx5e_set_mtu(mdev, params, params->sw_mtu);
+ if (err)
+ return err;
+
+ mlx5e_query_mtu(mdev, params, &mtu);
+ if (mtu != params->sw_mtu)
+ netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n",
+ __func__, mtu, params->sw_mtu);
+
+ params->sw_mtu = mtu;
+ return 0;
+}
+
+static void mlx5e_netdev_set_tcs(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int nch = priv->channels.params.num_channels;
+ int ntc = priv->channels.params.num_tc;
+ int tc;
+
+ netdev_reset_tc(netdev);
+
+ if (ntc == 1)
+ return;
+
+ netdev_set_num_tc(netdev, ntc);
+
+ /* Map netdev TCs to offset 0
+ * We have our own UP to TXQ mapping for QoS
+ */
+ for (tc = 0; tc < ntc; tc++)
+ netdev_set_tc_queue(netdev, tc, nch, 0);
+}
+
+static void mlx5e_build_tc2txq_maps(struct mlx5e_priv *priv)
+{
+ int max_nch = priv->profile->max_nch(priv->mdev);
+ int i, tc;
+
+ for (i = 0; i < max_nch; i++)
+ for (tc = 0; tc < priv->profile->max_tc; tc++)
+ priv->channel_tc2txq[i][tc] = i + tc * max_nch;
+}
+
+static void mlx5e_build_tx2sq_maps(struct mlx5e_priv *priv)
+{
+ struct mlx5e_channel *c;
+ struct mlx5e_txqsq *sq;
+ int i, tc;
+
+ for (i = 0; i < priv->channels.num; i++) {
+ c = priv->channels.c[i];
+ for (tc = 0; tc < c->num_tc; tc++) {
+ sq = &c->sq[tc];
+ priv->txq2sq[sq->txq_ix] = sq;
+ }
+ }
+}
+
+void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
+{
+ int num_txqs = priv->channels.num * priv->channels.params.num_tc;
+ struct net_device *netdev = priv->netdev;
+
+ mlx5e_netdev_set_tcs(netdev);
+ netif_set_real_num_tx_queues(netdev, num_txqs);
+ netif_set_real_num_rx_queues(netdev, priv->channels.num);
+
+ mlx5e_build_tx2sq_maps(priv);
+ mlx5e_activate_channels(&priv->channels);
+ mlx5e_xdp_tx_enable(priv);
+ netif_tx_start_all_queues(priv->netdev);
+
+ if (MLX5_ESWITCH_MANAGER(priv->mdev))
+ mlx5e_add_sqs_fwd_rules(priv);
+
+ mlx5e_wait_channels_min_rx_wqes(&priv->channels);
+ mlx5e_redirect_rqts_to_channels(priv, &priv->channels);
+}
+
+void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
+{
+ mlx5e_redirect_rqts_to_drop(priv);
+
+ if (MLX5_ESWITCH_MANAGER(priv->mdev))
+ mlx5e_remove_sqs_fwd_rules(priv);
+
+ /* FIXME: This is a W/A only for tx timeout watch dog false alarm when
+ * polling for inactive tx queues.
+ */
+ netif_tx_stop_all_queues(priv->netdev);
+ netif_tx_disable(priv->netdev);
+ mlx5e_xdp_tx_disable(priv);
+ mlx5e_deactivate_channels(&priv->channels);
+}
+
+void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *new_chs,
+ mlx5e_fp_hw_modify hw_modify)
+{
+ struct net_device *netdev = priv->netdev;
+ int new_num_txqs;
+ int carrier_ok;
+ new_num_txqs = new_chs->num * new_chs->params.num_tc;
+
+ carrier_ok = netif_carrier_ok(netdev);
+ netif_carrier_off(netdev);
+
+ if (new_num_txqs < netdev->real_num_tx_queues)
+ netif_set_real_num_tx_queues(netdev, new_num_txqs);
+
+ mlx5e_deactivate_priv_channels(priv);
+ mlx5e_close_channels(&priv->channels);
+
+ priv->channels = *new_chs;
+
+ /* New channels are ready to roll, modify HW settings if needed */
+ if (hw_modify)
+ hw_modify(priv);
+
+ mlx5e_refresh_tirs(priv, false);
+ mlx5e_activate_priv_channels(priv);
+
+ /* return carrier back if needed */
+ if (carrier_ok)
+ netif_carrier_on(netdev);
+}
+
+void mlx5e_timestamp_init(struct mlx5e_priv *priv)
+{
+ priv->tstamp.tx_type = HWTSTAMP_TX_OFF;
+ priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE;
+}
+
+int mlx5e_open_locked(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ set_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ err = mlx5e_open_channels(priv, &priv->channels);
+ if (err)
+ goto err_clear_state_opened_flag;
+
+ mlx5e_refresh_tirs(priv, false);
+ mlx5e_activate_priv_channels(priv);
+ if (priv->profile->update_carrier)
+ priv->profile->update_carrier(priv);
+
+ if (priv->profile->update_stats)
+ queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
+
+ return 0;
+
+err_clear_state_opened_flag:
+ clear_bit(MLX5E_STATE_OPENED, &priv->state);
+ return err;
+}
+
+int mlx5e_open(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_open_locked(netdev);
+ if (!err)
+ mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP);
+ mutex_unlock(&priv->state_lock);
+
+ if (mlx5_vxlan_allowed(priv->mdev->vxlan))
+ udp_tunnel_get_rx_info(netdev);
+
+ return err;
+}
+
+int mlx5e_close_locked(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ /* May already be CLOSED in case a previous configuration operation
+ * (e.g RX/TX queue size change) that involves close&open failed.
+ */
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ clear_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ netif_carrier_off(priv->netdev);
+ mlx5e_deactivate_priv_channels(priv);
+ mlx5e_close_channels(&priv->channels);
+
+ return 0;
+}
+
+int mlx5e_close(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ if (!netif_device_present(netdev))
+ return -ENODEV;
+
+ mutex_lock(&priv->state_lock);
+ mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_DOWN);
+ err = mlx5e_close_locked(netdev);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev,
+ struct mlx5e_rq *rq,
+ struct mlx5e_rq_param *param)
+{
+ void *rqc = param->rqc;
+ void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ int err;
+
+ param->wq.db_numa_node = param->wq.buf_numa_node;
+
+ err = mlx5_wq_cyc_create(mdev, &param->wq, rqc_wq, &rq->wqe.wq,
+ &rq->wq_ctrl);
+ if (err)
+ return err;
+
+ /* Mark as unused given "Drop-RQ" packets never reach XDP */
+ xdp_rxq_info_unused(&rq->xdp_rxq);
+
+ rq->mdev = mdev;
+
+ return 0;
+}
+
+static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev,
+ struct mlx5e_cq *cq,
+ struct mlx5e_cq_param *param)
+{
+ param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
+ param->wq.db_numa_node = dev_to_node(&mdev->pdev->dev);
+
+ return mlx5e_alloc_cq_common(mdev, param, cq);
+}
+
+static int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
+ struct mlx5e_rq *drop_rq)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_cq_param cq_param = {};
+ struct mlx5e_rq_param rq_param = {};
+ struct mlx5e_cq *cq = &drop_rq->cq;
+ int err;
+
+ mlx5e_build_drop_rq_param(priv, &rq_param);
+
+ err = mlx5e_alloc_drop_cq(mdev, cq, &cq_param);
+ if (err)
+ return err;
+
+ err = mlx5e_create_cq(cq, &cq_param);
+ if (err)
+ goto err_free_cq;
+
+ err = mlx5e_alloc_drop_rq(mdev, drop_rq, &rq_param);
+ if (err)
+ goto err_destroy_cq;
+
+ err = mlx5e_create_rq(drop_rq, &rq_param);
+ if (err)
+ goto err_free_rq;
+
+ err = mlx5e_modify_rq_state(drop_rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+ if (err)
+ mlx5_core_warn(priv->mdev, "modify_rq_state failed, rx_if_down_packets won't be counted %d\n", err);
+
+ return 0;
+
+err_free_rq:
+ mlx5e_free_rq(drop_rq);
+
+err_destroy_cq:
+ mlx5e_destroy_cq(cq);
+
+err_free_cq:
+ mlx5e_free_cq(cq);
+
+ return err;
+}
+
+static void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
+{
+ mlx5e_destroy_rq(drop_rq);
+ mlx5e_free_rq(drop_rq);
+ mlx5e_destroy_cq(&drop_rq->cq);
+ mlx5e_free_cq(&drop_rq->cq);
+}
+
+int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
+ u32 underlay_qpn, u32 *tisn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
+ void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ MLX5_SET(tisc, tisc, prio, tc << 1);
+ MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn);
+ MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn);
+
+ if (mlx5_lag_is_lacp_owner(mdev))
+ MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
+
+ return mlx5_core_create_tis(mdev, in, sizeof(in), tisn);
+}
+
+void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn)
+{
+ mlx5_core_destroy_tis(mdev, tisn);
+}
+
+int mlx5e_create_tises(struct mlx5e_priv *priv)
+{
+ int err;
+ int tc;
+
+ for (tc = 0; tc < priv->profile->max_tc; tc++) {
+ err = mlx5e_create_tis(priv->mdev, tc, 0, &priv->tisn[tc]);
+ if (err)
+ goto err_close_tises;
+ }
+
+ return 0;
+
+err_close_tises:
+ for (tc--; tc >= 0; tc--)
+ mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
+
+ return err;
+}
+
+void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
+{
+ int tc;
+
+ for (tc = 0; tc < priv->profile->max_tc; tc++)
+ mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
+}
+
+static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv,
+ enum mlx5e_traffic_types tt,
+ u32 *tirc)
+{
+ MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
+
+ mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
+
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
+ MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
+ mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
+}
+
+static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc)
+{
+ MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
+
+ mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
+
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
+ MLX5_SET(tirc, tirc, indirect_table, rqtn);
+ MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
+}
+
+int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tir *tir;
+ void *tirc;
+ int inlen;
+ int i = 0;
+ int err;
+ u32 *in;
+ int tt;
+
+ inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ memset(in, 0, inlen);
+ tir = &priv->indir_tir[tt];
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+ mlx5e_build_indir_tir_ctx(priv, tt, tirc);
+ err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err);
+ goto err_destroy_inner_tirs;
+ }
+ }
+
+ if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ goto out;
+
+ for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) {
+ memset(in, 0, inlen);
+ tir = &priv->inner_indir_tir[i];
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+ mlx5e_build_inner_indir_tir_ctx(priv, i, tirc);
+ err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err);
+ goto err_destroy_inner_tirs;
+ }
+ }
+
+out:
+ kvfree(in);
+
+ return 0;
+
+err_destroy_inner_tirs:
+ for (i--; i >= 0; i--)
+ mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
+
+ for (tt--; tt >= 0; tt--)
+ mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]);
+
+ kvfree(in);
+
+ return err;
+}
+
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv)
+{
+ int nch = priv->profile->max_nch(priv->mdev);
+ struct mlx5e_tir *tir;
+ void *tirc;
+ int inlen;
+ int err;
+ u32 *in;
+ int ix;
+
+ inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ for (ix = 0; ix < nch; ix++) {
+ memset(in, 0, inlen);
+ tir = &priv->direct_tir[ix];
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+ mlx5e_build_direct_tir_ctx(priv, priv->direct_tir[ix].rqt.rqtn, tirc);
+ err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
+ if (err)
+ goto err_destroy_ch_tirs;
+ }
+
+ kvfree(in);
+
+ return 0;
+
+err_destroy_ch_tirs:
+ mlx5_core_warn(priv->mdev, "create direct tirs failed, %d\n", err);
+ for (ix--; ix >= 0; ix--)
+ mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]);
+
+ kvfree(in);
+
+ return err;
+}
+
+void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv)
+{
+ int i;
+
+ for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
+ mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]);
+
+ if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ return;
+
+ for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
+ mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
+}
+
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv)
+{
+ int nch = priv->profile->max_nch(priv->mdev);
+ int i;
+
+ for (i = 0; i < nch; i++)
+ mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]);
+}
+
+static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
+{
+ int err = 0;
+ int i;
+
+ for (i = 0; i < chs->num; i++) {
+ err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
+{
+ int err = 0;
+ int i;
+
+ for (i = 0; i < chs->num; i++) {
+ err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlx5e_setup_tc_mqprio(struct net_device *netdev,
+ struct tc_mqprio_qopt *mqprio)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_channels new_channels = {};
+ u8 tc = mqprio->num_tc;
+ int err = 0;
+
+ mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+ if (tc && tc != MLX5E_MAX_NUM_TC)
+ return -EINVAL;
+
+ mutex_lock(&priv->state_lock);
+
+ new_channels.params = priv->channels.params;
+ new_channels.params.num_tc = tc ? tc : 1;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ goto out;
+ }
+
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ goto out;
+
+ priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
+ new_channels.params.num_tc);
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+out:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+#ifdef CONFIG_MLX5_ESWITCH
+static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *cls_flower,
+ int flags)
+{
+ switch (cls_flower->command) {
+ case TC_CLSFLOWER_REPLACE:
+ return mlx5e_configure_flower(priv, cls_flower, flags);
+ case TC_CLSFLOWER_DESTROY:
+ return mlx5e_delete_flower(priv, cls_flower, flags);
+ case TC_CLSFLOWER_STATS:
+ return mlx5e_stats_flower(priv, cls_flower, flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct mlx5e_priv *priv = cb_priv;
+
+ if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_setup_tc_block(struct net_device *dev,
+ struct tc_block_offload *f)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, mlx5e_setup_tc_block_cb,
+ priv, priv, f->extack);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, mlx5e_setup_tc_block_cb,
+ priv);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+#endif
+
+static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ switch (type) {
+#ifdef CONFIG_MLX5_ESWITCH
+ case TC_SETUP_BLOCK:
+ return mlx5e_setup_tc_block(dev, type_data);
+#endif
+ case TC_SETUP_QDISC_MQPRIO:
+ return mlx5e_setup_tc_mqprio(dev, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void
+mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_sw_stats *sstats = &priv->stats.sw;
+ struct mlx5e_vport_stats *vstats = &priv->stats.vport;
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+
+ /* update HW stats in background for next time */
+ queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
+
+ if (mlx5e_is_uplink_rep(priv)) {
+ stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
+ stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok);
+ stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
+ stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
+ } else {
+ mlx5e_grp_sw_update_stats(priv);
+ stats->rx_packets = sstats->rx_packets;
+ stats->rx_bytes = sstats->rx_bytes;
+ stats->tx_packets = sstats->tx_packets;
+ stats->tx_bytes = sstats->tx_bytes;
+ stats->tx_dropped = sstats->tx_queue_dropped;
+ }
+
+ stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
+
+ stats->rx_length_errors =
+ PPORT_802_3_GET(pstats, a_in_range_length_errors) +
+ PPORT_802_3_GET(pstats, a_out_of_range_length_field) +
+ PPORT_802_3_GET(pstats, a_frame_too_long_errors);
+ stats->rx_crc_errors =
+ PPORT_802_3_GET(pstats, a_frame_check_sequence_errors);
+ stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors);
+ stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards);
+ stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors +
+ stats->rx_frame_errors;
+ stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors;
+
+ /* vport multicast also counts packets that are dropped due to steering
+ * or rx out of buffer
+ */
+ stats->multicast =
+ VPORT_COUNTER_GET(vstats, received_eth_multicast.packets);
+}
+
+static void mlx5e_set_rx_mode(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ queue_work(priv->wq, &priv->set_rx_mode_work);
+}
+
+static int mlx5e_set_mac(struct net_device *netdev, void *addr)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct sockaddr *saddr = addr;
+
+ if (!is_valid_ether_addr(saddr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ netif_addr_lock_bh(netdev);
+ ether_addr_copy(netdev->dev_addr, saddr->sa_data);
+ netif_addr_unlock_bh(netdev);
+
+ queue_work(priv->wq, &priv->set_rx_mode_work);
+
+ return 0;
+}
+
+#define MLX5E_SET_FEATURE(features, feature, enable) \
+ do { \
+ if (enable) \
+ *features |= feature; \
+ else \
+ *features &= ~feature; \
+ } while (0)
+
+typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable);
+
+static int set_feature_lro(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_channels new_channels = {};
+ struct mlx5e_params *old_params;
+ int err = 0;
+ bool reset;
+
+ mutex_lock(&priv->state_lock);
+
+ old_params = &priv->channels.params;
+ if (enable && !MLX5E_GET_PFLAG(old_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
+ netdev_warn(netdev, "can't set LRO with legacy RQ\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ reset = test_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ new_channels.params = *old_params;
+ new_channels.params.lro_en = enable;
+
+ if (old_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params) ==
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params))
+ reset = false;
+ }
+
+ if (!reset) {
+ *old_params = new_channels.params;
+ err = mlx5e_modify_tirs_lro(priv);
+ goto out;
+ }
+
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ goto out;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_modify_tirs_lro);
+out:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int set_feature_cvlan_filter(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ if (enable)
+ mlx5e_enable_cvlan_filter(priv);
+ else
+ mlx5e_disable_cvlan_filter(priv);
+
+ return 0;
+}
+
+#ifdef CONFIG_MLX5_ESWITCH
+static int set_feature_tc_num_filters(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ if (!enable && mlx5e_tc_num_filters(priv)) {
+ netdev_err(netdev,
+ "Active offloaded tc filters, can't turn hw_tc_offload off\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#endif
+
+static int set_feature_rx_all(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_set_port_fcs(mdev, !enable);
+}
+
+static int set_feature_rx_fcs(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ mutex_lock(&priv->state_lock);
+
+ priv->channels.params.scatter_fcs_en = enable;
+ err = mlx5e_modify_channels_scatter_fcs(&priv->channels, enable);
+ if (err)
+ priv->channels.params.scatter_fcs_en = !enable;
+
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int set_feature_rx_vlan(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err = 0;
+
+ mutex_lock(&priv->state_lock);
+
+ priv->channels.params.vlan_strip_disable = !enable;
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+
+ err = mlx5e_modify_channels_vsd(&priv->channels, !enable);
+ if (err)
+ priv->channels.params.vlan_strip_disable = enable;
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+#ifdef CONFIG_MLX5_EN_ARFS
+static int set_feature_arfs(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ if (enable)
+ err = mlx5e_arfs_enable(priv);
+ else
+ err = mlx5e_arfs_disable(priv);
+
+ return err;
+}
+#endif
+
+static int mlx5e_handle_feature(struct net_device *netdev,
+ netdev_features_t *features,
+ netdev_features_t feature,
+ mlx5e_feature_handler feature_handler)
+{
+ netdev_features_t changes = *features ^ netdev->features;
+ bool enable = !!(*features & feature);
+ int err;
+
+ if (!(changes & feature))
+ return 0;
+
+ err = feature_handler(netdev, enable);
+ if (err) {
+ MLX5E_SET_FEATURE(features, feature, !enable);
+ netdev_err(netdev, "%s feature %pNF failed, err %d\n",
+ enable ? "Enable" : "Disable", &feature, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlx5e_set_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ netdev_features_t oper_features = features;
+ int err = 0;
+
+#define MLX5E_HANDLE_FEATURE(feature, handler) \
+ mlx5e_handle_feature(netdev, &oper_features, feature, handler)
+
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
+ set_feature_cvlan_filter);
+#ifdef CONFIG_MLX5_ESWITCH
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters);
+#endif
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all);
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs);
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan);
+#ifdef CONFIG_MLX5_EN_ARFS
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_NTUPLE, set_feature_arfs);
+#endif
+
+ if (err) {
+ netdev->features = oper_features;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_params *params;
+
+ mutex_lock(&priv->state_lock);
+ params = &priv->channels.params;
+ if (!bitmap_empty(priv->fs.vlan.active_svlans, VLAN_N_VID)) {
+ /* HW strips the outer C-tag header, this is a problem
+ * for S-tag traffic.
+ */
+ features &= ~NETIF_F_HW_VLAN_CTAG_RX;
+ if (!params->vlan_strip_disable)
+ netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n");
+ }
+ if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
+ features &= ~NETIF_F_LRO;
+ if (params->lro_en)
+ netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n");
+ }
+
+ if (params->xdp_prog) {
+ if (features & NETIF_F_LRO) {
+ netdev_warn(netdev, "LRO is incompatible with XDP\n");
+ features &= ~NETIF_F_LRO;
+ }
+ }
+
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
+ features &= ~NETIF_F_RXHASH;
+ if (netdev->features & NETIF_F_RXHASH)
+ netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
+ }
+
+ mutex_unlock(&priv->state_lock);
+
+ return features;
+}
+
+int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
+ change_hw_mtu_cb set_mtu_cb)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_channels new_channels = {};
+ struct mlx5e_params *params;
+ int err = 0;
+ bool reset;
+
+ mutex_lock(&priv->state_lock);
+
+ params = &priv->channels.params;
+
+ reset = !params->lro_en;
+ reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ new_channels.params = *params;
+ new_channels.params.sw_mtu = new_mtu;
+
+ if (params->xdp_prog &&
+ !mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
+ netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
+ new_mtu, mlx5e_xdp_max_mtu(params));
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+ bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, &new_channels.params);
+ u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params);
+ u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params);
+
+ reset = reset && (is_linear || (ppw_old != ppw_new));
+ }
+
+ if (!reset) {
+ params->sw_mtu = new_mtu;
+ if (set_mtu_cb)
+ set_mtu_cb(priv);
+ netdev->mtu = params->sw_mtu;
+ goto out;
+ }
+
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ goto out;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, set_mtu_cb);
+ netdev->mtu = new_channels.params.sw_mtu;
+
+out:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu)
+{
+ return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu);
+}
+
+int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
+{
+ struct hwtstamp_config config;
+ int err;
+
+ if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) ||
+ (mlx5_clock_get_ptp_index(priv->mdev) == -1))
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+
+ /* TX HW timestamp */
+ switch (config.tx_type) {
+ case HWTSTAMP_TX_OFF:
+ case HWTSTAMP_TX_ON:
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ mutex_lock(&priv->state_lock);
+ /* RX HW timestamp */
+ switch (config.rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ /* Reset CQE compression to Admin default */
+ mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def);
+ break;
+ case HWTSTAMP_FILTER_ALL:
+ case HWTSTAMP_FILTER_SOME:
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ case HWTSTAMP_FILTER_NTP_ALL:
+ /* Disable CQE compression */
+ netdev_warn(priv->netdev, "Disabling cqe compression");
+ err = mlx5e_modify_rx_cqe_compression_locked(priv, false);
+ if (err) {
+ netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
+ mutex_unlock(&priv->state_lock);
+ return err;
+ }
+ config.rx_filter = HWTSTAMP_FILTER_ALL;
+ break;
+ default:
+ mutex_unlock(&priv->state_lock);
+ return -ERANGE;
+ }
+
+ memcpy(&priv->tstamp, &config, sizeof(config));
+ mutex_unlock(&priv->state_lock);
+
+ /* might need to fix some features */
+ netdev_update_features(priv->netdev);
+
+ return copy_to_user(ifr->ifr_data, &config,
+ sizeof(config)) ? -EFAULT : 0;
+}
+
+int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr)
+{
+ struct hwtstamp_config *cfg = &priv->tstamp;
+
+ if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
+ return -EOPNOTSUPP;
+
+ return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0;
+}
+
+static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ switch (cmd) {
+ case SIOCSHWTSTAMP:
+ return mlx5e_hwstamp_set(priv, ifr);
+ case SIOCGHWTSTAMP:
+ return mlx5e_hwstamp_get(priv, ifr);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+#ifdef CONFIG_MLX5_ESWITCH
+static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac);
+}
+
+static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
+ __be16 vlan_proto)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (vlan_proto != htons(ETH_P_8021Q))
+ return -EPROTONOSUPPORT;
+
+ return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1,
+ vlan, qos);
+}
+
+static int mlx5e_set_vf_spoofchk(struct net_device *dev, int vf, bool setting)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_set_vport_spoofchk(mdev->priv.eswitch, vf + 1, setting);
+}
+
+static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting);
+}
+
+static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
+ int max_tx_rate)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1,
+ max_tx_rate, min_tx_rate);
+}
+
+static int mlx5_vport_link2ifla(u8 esw_link)
+{
+ switch (esw_link) {
+ case MLX5_VPORT_ADMIN_STATE_DOWN:
+ return IFLA_VF_LINK_STATE_DISABLE;
+ case MLX5_VPORT_ADMIN_STATE_UP:
+ return IFLA_VF_LINK_STATE_ENABLE;
+ }
+ return IFLA_VF_LINK_STATE_AUTO;
+}
+
+static int mlx5_ifla_link2vport(u8 ifla_link)
+{
+ switch (ifla_link) {
+ case IFLA_VF_LINK_STATE_DISABLE:
+ return MLX5_VPORT_ADMIN_STATE_DOWN;
+ case IFLA_VF_LINK_STATE_ENABLE:
+ return MLX5_VPORT_ADMIN_STATE_UP;
+ }
+ return MLX5_VPORT_ADMIN_STATE_AUTO;
+}
+
+static int mlx5e_set_vf_link_state(struct net_device *dev, int vf,
+ int link_state)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1,
+ mlx5_ifla_link2vport(link_state));
+}
+
+static int mlx5e_get_vf_config(struct net_device *dev,
+ int vf, struct ifla_vf_info *ivi)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi);
+ if (err)
+ return err;
+ ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate);
+ return 0;
+}
+
+static int mlx5e_get_vf_stats(struct net_device *dev,
+ int vf, struct ifla_vf_stats *vf_stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1,
+ vf_stats);
+}
+#endif
+
+struct mlx5e_vxlan_work {
+ struct work_struct work;
+ struct mlx5e_priv *priv;
+ u16 port;
+};
+
+static void mlx5e_vxlan_add_work(struct work_struct *work)
+{
+ struct mlx5e_vxlan_work *vxlan_work =
+ container_of(work, struct mlx5e_vxlan_work, work);
+ struct mlx5e_priv *priv = vxlan_work->priv;
+ u16 port = vxlan_work->port;
+
+ mutex_lock(&priv->state_lock);
+ mlx5_vxlan_add_port(priv->mdev->vxlan, port);
+ mutex_unlock(&priv->state_lock);
+
+ kfree(vxlan_work);
+}
+
+static void mlx5e_vxlan_del_work(struct work_struct *work)
+{
+ struct mlx5e_vxlan_work *vxlan_work =
+ container_of(work, struct mlx5e_vxlan_work, work);
+ struct mlx5e_priv *priv = vxlan_work->priv;
+ u16 port = vxlan_work->port;
+
+ mutex_lock(&priv->state_lock);
+ mlx5_vxlan_del_port(priv->mdev->vxlan, port);
+ mutex_unlock(&priv->state_lock);
+ kfree(vxlan_work);
+}
+
+static void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, u16 port, int add)
+{
+ struct mlx5e_vxlan_work *vxlan_work;
+
+ vxlan_work = kmalloc(sizeof(*vxlan_work), GFP_ATOMIC);
+ if (!vxlan_work)
+ return;
+
+ if (add)
+ INIT_WORK(&vxlan_work->work, mlx5e_vxlan_add_work);
+ else
+ INIT_WORK(&vxlan_work->work, mlx5e_vxlan_del_work);
+
+ vxlan_work->priv = priv;
+ vxlan_work->port = port;
+ queue_work(priv->wq, &vxlan_work->work);
+}
+
+static void mlx5e_add_vxlan_port(struct net_device *netdev,
+ struct udp_tunnel_info *ti)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+ return;
+
+ if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
+ return;
+
+ mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1);
+}
+
+static void mlx5e_del_vxlan_port(struct net_device *netdev,
+ struct udp_tunnel_info *ti)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+ return;
+
+ if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
+ return;
+
+ mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 0);
+}
+
+static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ unsigned int offset = 0;
+ struct udphdr *udph;
+ u8 proto;
+ u16 port;
+
+ switch (vlan_get_protocol(skb)) {
+ case htons(ETH_P_IP):
+ proto = ip_hdr(skb)->protocol;
+ break;
+ case htons(ETH_P_IPV6):
+ proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
+ break;
+ default:
+ goto out;
+ }
+
+ switch (proto) {
+ case IPPROTO_GRE:
+ return features;
+ case IPPROTO_UDP:
+ udph = udp_hdr(skb);
+ port = be16_to_cpu(udph->dest);
+
+ /* Verify if UDP port is being offloaded by HW */
+ if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port))
+ return features;
+ }
+
+out:
+ /* Disable CSUM and GSO if the udp dport is not offloaded by HW */
+ return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
+static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
+ struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ features = vlan_features_check(skb, features);
+ features = vxlan_features_check(skb, features);
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+ if (mlx5e_ipsec_feature_check(skb, netdev, features))
+ return features;
+#endif
+
+ /* Validate if the tunneled packet is being offloaded by HW */
+ if (skb->encapsulation &&
+ (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))
+ return mlx5e_tunnel_features_check(priv, skb, features);
+
+ return features;
+}
+
+static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
+ struct mlx5e_txqsq *sq)
+{
+ struct mlx5_eq *eq = sq->cq.mcq.eq;
+ u32 eqe_count;
+
+ netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
+ eq->eqn, eq->cons_index, eq->irqn);
+
+ eqe_count = mlx5_eq_poll_irq_disabled(eq);
+ if (!eqe_count)
+ return false;
+
+ netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn);
+ sq->channel->stats->eq_rearm++;
+ return true;
+}
+
+static void mlx5e_tx_timeout_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ tx_timeout_work);
+ struct net_device *dev = priv->netdev;
+ bool reopen_channels = false;
+ int i, err;
+
+ rtnl_lock();
+ mutex_lock(&priv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+
+ for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) {
+ struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i);
+ struct mlx5e_txqsq *sq = priv->txq2sq[i];
+
+ if (!netif_xmit_stopped(dev_queue))
+ continue;
+
+ netdev_err(dev,
+ "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
+ i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
+ jiffies_to_usecs(jiffies - dev_queue->trans_start));
+
+ /* If we recover a lost interrupt, most likely TX timeout will
+ * be resolved, skip reopening channels
+ */
+ if (!mlx5e_tx_timeout_eq_recover(dev, sq)) {
+ clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+ reopen_channels = true;
+ }
+ }
+
+ if (!reopen_channels)
+ goto unlock;
+
+ mlx5e_close_locked(dev);
+ err = mlx5e_open_locked(dev);
+ if (err)
+ netdev_err(priv->netdev,
+ "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
+ err);
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+ rtnl_unlock();
+}
+
+static void mlx5e_tx_timeout(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ netdev_err(dev, "TX timeout detected\n");
+ queue_work(priv->wq, &priv->tx_timeout_work);
+}
+
+static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
+{
+ struct net_device *netdev = priv->netdev;
+ struct mlx5e_channels new_channels = {};
+
+ if (priv->channels.params.lro_en) {
+ netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n");
+ return -EINVAL;
+ }
+
+ if (MLX5_IPSEC_DEV(priv->mdev)) {
+ netdev_warn(netdev, "can't set XDP with IPSec offload\n");
+ return -EINVAL;
+ }
+
+ new_channels.params = priv->channels.params;
+ new_channels.params.xdp_prog = prog;
+
+ if (!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
+ netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
+ new_channels.params.sw_mtu,
+ mlx5e_xdp_max_mtu(&new_channels.params));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct bpf_prog *old_prog;
+ bool reset, was_opened;
+ int err = 0;
+ int i;
+
+ mutex_lock(&priv->state_lock);
+
+ if (prog) {
+ err = mlx5e_xdp_allowed(priv, prog);
+ if (err)
+ goto unlock;
+ }
+
+ was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ /* no need for full reset when exchanging programs */
+ reset = (!priv->channels.params.xdp_prog || !prog);
+
+ if (was_opened && reset)
+ mlx5e_close_locked(netdev);
+ if (was_opened && !reset) {
+ /* num_channels is invariant here, so we can take the
+ * batched reference right upfront.
+ */
+ prog = bpf_prog_add(prog, priv->channels.num);
+ if (IS_ERR(prog)) {
+ err = PTR_ERR(prog);
+ goto unlock;
+ }
+ }
+
+ /* exchange programs, extra prog reference we got from caller
+ * as long as we don't fail from this point onwards.
+ */
+ old_prog = xchg(&priv->channels.params.xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ if (reset) /* change RQ type according to priv->xdp_prog */
+ mlx5e_set_rq_type(priv->mdev, &priv->channels.params);
+
+ if (was_opened && reset)
+ mlx5e_open_locked(netdev);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
+ goto unlock;
+
+ /* exchanging programs w/o reset, we update ref counts on behalf
+ * of the channels RQs here.
+ */
+ for (i = 0; i < priv->channels.num; i++) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+
+ clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
+ napi_synchronize(&c->napi);
+ /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */
+
+ old_prog = xchg(&c->rq.xdp_prog, prog);
+
+ set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
+ /* napi_schedule in case we have missed anything */
+ napi_schedule(&c->napi);
+
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ }
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+
+ /* Need to fix some features. */
+ if (!err)
+ netdev_update_features(netdev);
+
+ return err;
+}
+
+static u32 mlx5e_xdp_query(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ const struct bpf_prog *xdp_prog;
+ u32 prog_id = 0;
+
+ mutex_lock(&priv->state_lock);
+ xdp_prog = priv->channels.params.xdp_prog;
+ if (xdp_prog)
+ prog_id = xdp_prog->aux->id;
+ mutex_unlock(&priv->state_lock);
+
+ return prog_id;
+}
+
+static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return mlx5e_xdp_set(dev, xdp->prog);
+ case XDP_QUERY_PROG:
+ xdp->prog_id = mlx5e_xdp_query(dev);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+const struct net_device_ops mlx5e_netdev_ops = {
+ .ndo_open = mlx5e_open,
+ .ndo_stop = mlx5e_close,
+ .ndo_start_xmit = mlx5e_xmit,
+ .ndo_setup_tc = mlx5e_setup_tc,
+ .ndo_select_queue = mlx5e_select_queue,
+ .ndo_get_stats64 = mlx5e_get_stats,
+ .ndo_set_rx_mode = mlx5e_set_rx_mode,
+ .ndo_set_mac_address = mlx5e_set_mac,
+ .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid,
+ .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid,
+ .ndo_set_features = mlx5e_set_features,
+ .ndo_fix_features = mlx5e_fix_features,
+ .ndo_change_mtu = mlx5e_change_nic_mtu,
+ .ndo_do_ioctl = mlx5e_ioctl,
+ .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate,
+ .ndo_udp_tunnel_add = mlx5e_add_vxlan_port,
+ .ndo_udp_tunnel_del = mlx5e_del_vxlan_port,
+ .ndo_features_check = mlx5e_features_check,
+ .ndo_tx_timeout = mlx5e_tx_timeout,
+ .ndo_bpf = mlx5e_xdp,
+ .ndo_xdp_xmit = mlx5e_xdp_xmit,
+#ifdef CONFIG_MLX5_EN_ARFS
+ .ndo_rx_flow_steer = mlx5e_rx_flow_steer,
+#endif
+#ifdef CONFIG_MLX5_ESWITCH
+ /* SRIOV E-Switch NDOs */
+ .ndo_set_vf_mac = mlx5e_set_vf_mac,
+ .ndo_set_vf_vlan = mlx5e_set_vf_vlan,
+ .ndo_set_vf_spoofchk = mlx5e_set_vf_spoofchk,
+ .ndo_set_vf_trust = mlx5e_set_vf_trust,
+ .ndo_set_vf_rate = mlx5e_set_vf_rate,
+ .ndo_get_vf_config = mlx5e_get_vf_config,
+ .ndo_set_vf_link_state = mlx5e_set_vf_link_state,
+ .ndo_get_vf_stats = mlx5e_get_vf_stats,
+ .ndo_has_offload_stats = mlx5e_has_offload_stats,
+ .ndo_get_offload_stats = mlx5e_get_offload_stats,
+#endif
+};
+
+static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
+{
+ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return -EOPNOTSUPP;
+ if (!MLX5_CAP_GEN(mdev, eth_net_offloads) ||
+ !MLX5_CAP_GEN(mdev, nic_flow_table) ||
+ !MLX5_CAP_ETH(mdev, csum_cap) ||
+ !MLX5_CAP_ETH(mdev, max_lso_cap) ||
+ !MLX5_CAP_ETH(mdev, vlan_cap) ||
+ !MLX5_CAP_ETH(mdev, rss_ind_tbl_cap) ||
+ MLX5_CAP_FLOWTABLE(mdev,
+ flow_table_properties_nic_receive.max_ft_level)
+ < 3) {
+ mlx5_core_warn(mdev,
+ "Not creating net device, some required device capabilities are missing\n");
+ return -EOPNOTSUPP;
+ }
+ if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable))
+ mlx5_core_warn(mdev, "Self loop back prevention is not supported\n");
+ if (!MLX5_CAP_GEN(mdev, cq_moderation))
+ mlx5_core_warn(mdev, "CQ moderation is not supported\n");
+
+ return 0;
+}
+
+void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
+ int num_channels)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ indirection_rqt[i] = i % num_channels;
+}
+
+static bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
+{
+ u32 link_speed = 0;
+ u32 pci_bw = 0;
+
+ mlx5e_port_max_linkspeed(mdev, &link_speed);
+ pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL);
+ mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
+ link_speed, pci_bw);
+
+#define MLX5E_SLOW_PCI_RATIO (2)
+
+ return link_speed && pci_bw &&
+ link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
+}
+
+static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
+{
+ struct net_dim_cq_moder moder;
+
+ moder.cq_period_mode = cq_period_mode;
+ moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
+ moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
+ if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+ moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE;
+
+ return moder;
+}
+
+static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
+{
+ struct net_dim_cq_moder moder;
+
+ moder.cq_period_mode = cq_period_mode;
+ moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
+ moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
+ if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+ moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
+
+ return moder;
+}
+
+static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
+{
+ return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
+ NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE :
+ NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
+void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+{
+ if (params->tx_dim_enabled) {
+ u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+ params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode);
+ } else {
+ params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode);
+ }
+
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER,
+ params->tx_cq_moderation.cq_period_mode ==
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+}
+
+void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+{
+ if (params->rx_dim_enabled) {
+ u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+ params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode);
+ } else {
+ params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode);
+ }
+
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER,
+ params->rx_cq_moderation.cq_period_mode ==
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+}
+
+static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
+{
+ int i;
+
+ /* The supported periods are organized in ascending order */
+ for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++)
+ if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout)
+ break;
+
+ return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
+}
+
+void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ u16 max_channels, u16 mtu)
+{
+ u8 rx_cq_period_mode;
+
+ params->sw_mtu = mtu;
+ params->hard_mtu = MLX5E_ETH_HARD_MTU;
+ params->num_channels = max_channels;
+ params->num_tc = 1;
+
+ /* SQ */
+ params->log_sq_size = is_kdump_kernel() ?
+ MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
+ MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+
+ /* set CQE compression */
+ params->rx_cqe_compress_def = false;
+ if (MLX5_CAP_GEN(mdev, cqe_compression) &&
+ MLX5_CAP_GEN(mdev, vport_group_manager))
+ params->rx_cqe_compress_def = slow_pci_heuristic(mdev);
+
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, false);
+
+ /* RQ */
+ /* Prefer Striding RQ, unless any of the following holds:
+ * - Striding RQ configuration is not possible/supported.
+ * - Slow PCI heuristic.
+ * - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
+ */
+ if (!slow_pci_heuristic(mdev) &&
+ mlx5e_striding_rq_possible(mdev, params) &&
+ (mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ||
+ !mlx5e_rx_is_linear_skb(mdev, params)))
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
+ mlx5e_set_rq_type(mdev, params);
+ mlx5e_init_rq_type_params(mdev, params);
+
+ /* HW LRO */
+
+ /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */
+ if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
+ params->lro_en = !slow_pci_heuristic(mdev);
+ params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
+
+ /* CQ moderation params */
+ rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
+ MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+ params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+ params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+ mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode);
+ mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
+
+ /* TX inline */
+ params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev);
+
+ /* RSS */
+ params->rss_hfunc = ETH_RSS_HASH_XOR;
+ netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key));
+ mlx5e_build_default_indir_rqt(params->indirection_rqt,
+ MLX5E_INDIR_RQT_SIZE, max_channels);
+}
+
+static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ priv->mdev = mdev;
+ priv->netdev = netdev;
+ priv->profile = profile;
+ priv->ppriv = ppriv;
+ priv->msglevel = MLX5E_MSG_LEVEL;
+ priv->max_opened_tc = 1;
+
+ mlx5e_build_nic_params(mdev, &priv->channels.params,
+ profile->max_nch(mdev), netdev->mtu);
+
+ mutex_init(&priv->state_lock);
+
+ INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
+ INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
+ INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
+ INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
+
+ mlx5e_timestamp_init(priv);
+}
+
+static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr);
+ if (is_zero_ether_addr(netdev->dev_addr) &&
+ !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) {
+ eth_hw_addr_random(netdev);
+ mlx5_core_info(priv->mdev, "Assigned random MAC address %pM\n", netdev->dev_addr);
+ }
+}
+
+#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
+static const struct switchdev_ops mlx5e_switchdev_ops = {
+ .switchdev_port_attr_get = mlx5e_attr_get,
+};
+#endif
+
+static void mlx5e_build_nic_netdev(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ bool fcs_supported;
+ bool fcs_enabled;
+
+ SET_NETDEV_DEV(netdev, &mdev->pdev->dev);
+
+ netdev->netdev_ops = &mlx5e_netdev_ops;
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos))
+ netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
+#endif
+
+ netdev->watchdog_timeo = 15 * HZ;
+
+ netdev->ethtool_ops = &mlx5e_ethtool_ops;
+
+ netdev->vlan_features |= NETIF_F_SG;
+ netdev->vlan_features |= NETIF_F_IP_CSUM;
+ netdev->vlan_features |= NETIF_F_IPV6_CSUM;
+ netdev->vlan_features |= NETIF_F_GRO;
+ netdev->vlan_features |= NETIF_F_TSO;
+ netdev->vlan_features |= NETIF_F_TSO6;
+ netdev->vlan_features |= NETIF_F_RXCSUM;
+ netdev->vlan_features |= NETIF_F_RXHASH;
+
+ netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX;
+ netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX;
+
+ if (!!MLX5_CAP_ETH(mdev, lro_cap) &&
+ mlx5e_check_fragmented_striding_rq_cap(mdev))
+ netdev->vlan_features |= NETIF_F_LRO;
+
+ netdev->hw_features = netdev->vlan_features;
+ netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
+ netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+ netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX;
+
+ if (mlx5_vxlan_allowed(mdev->vxlan) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
+ netdev->hw_enc_features |= NETIF_F_IP_CSUM;
+ netdev->hw_enc_features |= NETIF_F_IPV6_CSUM;
+ netdev->hw_enc_features |= NETIF_F_TSO;
+ netdev->hw_enc_features |= NETIF_F_TSO6;
+ netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL;
+ }
+
+ if (mlx5_vxlan_allowed(mdev->vxlan)) {
+ netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+ netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL;
+ }
+
+ if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
+ netdev->hw_features |= NETIF_F_GSO_GRE |
+ NETIF_F_GSO_GRE_CSUM;
+ netdev->hw_enc_features |= NETIF_F_GSO_GRE |
+ NETIF_F_GSO_GRE_CSUM;
+ netdev->gso_partial_features |= NETIF_F_GSO_GRE |
+ NETIF_F_GSO_GRE_CSUM;
+ }
+
+ netdev->hw_features |= NETIF_F_GSO_PARTIAL;
+ netdev->gso_partial_features |= NETIF_F_GSO_UDP_L4;
+ netdev->hw_features |= NETIF_F_GSO_UDP_L4;
+ netdev->features |= NETIF_F_GSO_UDP_L4;
+
+ mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled);
+
+ if (fcs_supported)
+ netdev->hw_features |= NETIF_F_RXALL;
+
+ if (MLX5_CAP_ETH(mdev, scatter_fcs))
+ netdev->hw_features |= NETIF_F_RXFCS;
+
+ netdev->features = netdev->hw_features;
+ if (!priv->channels.params.lro_en)
+ netdev->features &= ~NETIF_F_LRO;
+
+ if (fcs_enabled)
+ netdev->features &= ~NETIF_F_RXALL;
+
+ if (!priv->channels.params.scatter_fcs_en)
+ netdev->features &= ~NETIF_F_RXFCS;
+
+ /* prefere CQE compression over rxhash */
+ if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
+ netdev->features &= ~NETIF_F_RXHASH;
+
+#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
+ if (FT_CAP(flow_modify_en) &&
+ FT_CAP(modify_root) &&
+ FT_CAP(identified_miss_table_mode) &&
+ FT_CAP(flow_table_modify)) {
+#ifdef CONFIG_MLX5_ESWITCH
+ netdev->hw_features |= NETIF_F_HW_TC;
+#endif
+#ifdef CONFIG_MLX5_EN_ARFS
+ netdev->hw_features |= NETIF_F_NTUPLE;
+#endif
+ }
+
+ netdev->features |= NETIF_F_HIGHDMA;
+ netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER;
+
+ netdev->priv_flags |= IFF_UNICAST_FLT;
+
+ mlx5e_set_netdev_dev_addr(netdev);
+
+#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
+ if (MLX5_ESWITCH_MANAGER(mdev))
+ netdev->switchdev_ops = &mlx5e_switchdev_ops;
+#endif
+
+ mlx5e_ipsec_build_netdev(priv);
+ mlx5e_tls_build_netdev(priv);
+}
+
+static void mlx5e_create_q_counters(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ err = mlx5_core_alloc_q_counter(mdev, &priv->q_counter);
+ if (err) {
+ mlx5_core_warn(mdev, "alloc queue counter failed, %d\n", err);
+ priv->q_counter = 0;
+ }
+
+ err = mlx5_core_alloc_q_counter(mdev, &priv->drop_rq_q_counter);
+ if (err) {
+ mlx5_core_warn(mdev, "alloc drop RQ counter failed, %d\n", err);
+ priv->drop_rq_q_counter = 0;
+ }
+}
+
+static void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
+{
+ if (priv->q_counter)
+ mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter);
+
+ if (priv->drop_rq_q_counter)
+ mlx5_core_dealloc_q_counter(priv->mdev, priv->drop_rq_q_counter);
+}
+
+static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv);
+ err = mlx5e_ipsec_init(priv);
+ if (err)
+ mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
+ err = mlx5e_tls_init(priv);
+ if (err)
+ mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
+ mlx5e_build_nic_netdev(netdev);
+ mlx5e_build_tc2txq_maps(priv);
+}
+
+static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
+{
+ mlx5e_tls_cleanup(priv);
+ mlx5e_ipsec_cleanup(priv);
+}
+
+static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int err;
+
+ err = mlx5e_create_indirect_rqt(priv);
+ if (err)
+ return err;
+
+ err = mlx5e_create_direct_rqts(priv);
+ if (err)
+ goto err_destroy_indirect_rqts;
+
+ err = mlx5e_create_indirect_tirs(priv);
+ if (err)
+ goto err_destroy_direct_rqts;
+
+ err = mlx5e_create_direct_tirs(priv);
+ if (err)
+ goto err_destroy_indirect_tirs;
+
+ err = mlx5e_create_flow_steering(priv);
+ if (err) {
+ mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
+ goto err_destroy_direct_tirs;
+ }
+
+ err = mlx5e_tc_nic_init(priv);
+ if (err)
+ goto err_destroy_flow_steering;
+
+ return 0;
+
+err_destroy_flow_steering:
+ mlx5e_destroy_flow_steering(priv);
+err_destroy_direct_tirs:
+ mlx5e_destroy_direct_tirs(priv);
+err_destroy_indirect_tirs:
+ mlx5e_destroy_indirect_tirs(priv);
+err_destroy_direct_rqts:
+ mlx5e_destroy_direct_rqts(priv);
+err_destroy_indirect_rqts:
+ mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ return err;
+}
+
+static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
+{
+ mlx5e_tc_nic_cleanup(priv);
+ mlx5e_destroy_flow_steering(priv);
+ mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_indirect_tirs(priv);
+ mlx5e_destroy_direct_rqts(priv);
+ mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+}
+
+static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
+{
+ int err;
+
+ err = mlx5e_create_tises(priv);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
+ return err;
+ }
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ mlx5e_dcbnl_initialize(priv);
+#endif
+ return 0;
+}
+
+static void mlx5e_nic_enable(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 max_mtu;
+
+ mlx5e_init_l2_addr(priv);
+
+ /* Marking the link as currently not needed by the Driver */
+ if (!netif_running(netdev))
+ mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN);
+
+ /* MTU range: 68 - hw-specific max */
+ netdev->min_mtu = ETH_MIN_MTU;
+ mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1);
+ netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+ mlx5e_set_dev_port_mtu(priv);
+
+ mlx5_lag_add(mdev, netdev);
+
+ mlx5e_enable_async_events(priv);
+
+ if (MLX5_ESWITCH_MANAGER(priv->mdev))
+ mlx5e_register_vport_reps(priv);
+
+ if (netdev->reg_state != NETREG_REGISTERED)
+ return;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ mlx5e_dcbnl_init_app(priv);
+#endif
+
+ queue_work(priv->wq, &priv->set_rx_mode_work);
+
+ rtnl_lock();
+ if (netif_running(netdev))
+ mlx5e_open(netdev);
+ netif_device_attach(netdev);
+ rtnl_unlock();
+}
+
+static void mlx5e_nic_disable(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (priv->netdev->reg_state == NETREG_REGISTERED)
+ mlx5e_dcbnl_delete_app(priv);
+#endif
+
+ rtnl_lock();
+ if (netif_running(priv->netdev))
+ mlx5e_close(priv->netdev);
+ netif_device_detach(priv->netdev);
+ rtnl_unlock();
+
+ queue_work(priv->wq, &priv->set_rx_mode_work);
+
+ if (MLX5_ESWITCH_MANAGER(priv->mdev))
+ mlx5e_unregister_vport_reps(priv);
+
+ mlx5e_disable_async_events(priv);
+ mlx5_lag_remove(mdev);
+}
+
+static const struct mlx5e_profile mlx5e_nic_profile = {
+ .init = mlx5e_nic_init,
+ .cleanup = mlx5e_nic_cleanup,
+ .init_rx = mlx5e_init_nic_rx,
+ .cleanup_rx = mlx5e_cleanup_nic_rx,
+ .init_tx = mlx5e_init_nic_tx,
+ .cleanup_tx = mlx5e_cleanup_nic_tx,
+ .enable = mlx5e_nic_enable,
+ .disable = mlx5e_nic_disable,
+ .update_stats = mlx5e_update_ndo_stats,
+ .max_nch = mlx5e_get_max_num_channels,
+ .update_carrier = mlx5e_update_carrier,
+ .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe,
+ .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+ .max_tc = MLX5E_MAX_NUM_TC,
+};
+
+/* mlx5e generic netdev management API (move to en_common.c) */
+
+struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
+{
+ int nch = profile->max_nch(mdev);
+ struct net_device *netdev;
+ struct mlx5e_priv *priv;
+
+ netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
+ nch * profile->max_tc,
+ nch);
+ if (!netdev) {
+ mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
+ return NULL;
+ }
+
+#ifdef CONFIG_MLX5_EN_ARFS
+ netdev->rx_cpu_rmap = mdev->rmap;
+#endif
+
+ profile->init(mdev, netdev, profile, ppriv);
+
+ netif_carrier_off(netdev);
+
+ priv = netdev_priv(netdev);
+
+ priv->wq = create_singlethread_workqueue("mlx5e");
+ if (!priv->wq)
+ goto err_cleanup_nic;
+
+ return netdev;
+
+err_cleanup_nic:
+ if (profile->cleanup)
+ profile->cleanup(priv);
+ free_netdev(netdev);
+
+ return NULL;
+}
+
+int mlx5e_attach_netdev(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ const struct mlx5e_profile *profile;
+ int max_nch;
+ int err;
+
+ profile = priv->profile;
+ clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
+
+ /* max number of channels may have changed */
+ max_nch = mlx5e_get_max_num_channels(priv->mdev);
+ if (priv->channels.params.num_channels > max_nch) {
+ mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch);
+ priv->channels.params.num_channels = max_nch;
+ mlx5e_build_default_indir_rqt(priv->channels.params.indirection_rqt,
+ MLX5E_INDIR_RQT_SIZE, max_nch);
+ }
+
+ err = profile->init_tx(priv);
+ if (err)
+ goto out;
+
+ mlx5e_create_q_counters(priv);
+
+ err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
+ if (err) {
+ mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
+ goto err_destroy_q_counters;
+ }
+
+ err = profile->init_rx(priv);
+ if (err)
+ goto err_close_drop_rq;
+
+ if (profile->enable)
+ profile->enable(priv);
+
+ return 0;
+
+err_close_drop_rq:
+ mlx5e_close_drop_rq(&priv->drop_rq);
+
+err_destroy_q_counters:
+ mlx5e_destroy_q_counters(priv);
+ profile->cleanup_tx(priv);
+
+out:
+ return err;
+}
+
+void mlx5e_detach_netdev(struct mlx5e_priv *priv)
+{
+ const struct mlx5e_profile *profile = priv->profile;
+
+ set_bit(MLX5E_STATE_DESTROYING, &priv->state);
+
+ if (profile->disable)
+ profile->disable(priv);
+ flush_workqueue(priv->wq);
+
+ profile->cleanup_rx(priv);
+ mlx5e_close_drop_rq(&priv->drop_rq);
+ mlx5e_destroy_q_counters(priv);
+ profile->cleanup_tx(priv);
+ cancel_delayed_work_sync(&priv->update_stats_work);
+}
+
+void mlx5e_destroy_netdev(struct mlx5e_priv *priv)
+{
+ const struct mlx5e_profile *profile = priv->profile;
+ struct net_device *netdev = priv->netdev;
+
+ destroy_workqueue(priv->wq);
+ if (profile->cleanup)
+ profile->cleanup(priv);
+ free_netdev(netdev);
+}
+
+/* mlx5e_attach and mlx5e_detach scope should be only creating/destroying
+ * hardware contexts and to connect it to the current netdev.
+ */
+static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv)
+{
+ struct mlx5e_priv *priv = vpriv;
+ struct net_device *netdev = priv->netdev;
+ int err;
+
+ if (netif_device_present(netdev))
+ return 0;
+
+ err = mlx5e_create_mdev_resources(mdev);
+ if (err)
+ return err;
+
+ err = mlx5e_attach_netdev(priv);
+ if (err) {
+ mlx5e_destroy_mdev_resources(mdev);
+ return err;
+ }
+
+ return 0;
+}
+
+static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv)
+{
+ struct mlx5e_priv *priv = vpriv;
+ struct net_device *netdev = priv->netdev;
+
+ if (!netif_device_present(netdev))
+ return;
+
+ mlx5e_detach_netdev(priv);
+ mlx5e_destroy_mdev_resources(mdev);
+}
+
+static void *mlx5e_add(struct mlx5_core_dev *mdev)
+{
+ struct net_device *netdev;
+ void *rpriv = NULL;
+ void *priv;
+ int err;
+
+ err = mlx5e_check_required_hca_cap(mdev);
+ if (err)
+ return NULL;
+
+#ifdef CONFIG_MLX5_ESWITCH
+ if (MLX5_ESWITCH_MANAGER(mdev)) {
+ rpriv = mlx5e_alloc_nic_rep_priv(mdev);
+ if (!rpriv) {
+ mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n");
+ return NULL;
+ }
+ }
+#endif
+
+ netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, rpriv);
+ if (!netdev) {
+ mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
+ goto err_free_rpriv;
+ }
+
+ priv = netdev_priv(netdev);
+
+ err = mlx5e_attach(mdev, priv);
+ if (err) {
+ mlx5_core_err(mdev, "mlx5e_attach failed, %d\n", err);
+ goto err_destroy_netdev;
+ }
+
+ err = register_netdev(netdev);
+ if (err) {
+ mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
+ goto err_detach;
+ }
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ mlx5e_dcbnl_init_app(priv);
+#endif
+ return priv;
+
+err_detach:
+ mlx5e_detach(mdev, priv);
+err_destroy_netdev:
+ mlx5e_destroy_netdev(priv);
+err_free_rpriv:
+ kfree(rpriv);
+ return NULL;
+}
+
+static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
+{
+ struct mlx5e_priv *priv = vpriv;
+ void *ppriv = priv->ppriv;
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ mlx5e_dcbnl_delete_app(priv);
+#endif
+ unregister_netdev(priv->netdev);
+ mlx5e_detach(mdev, vpriv);
+ mlx5e_destroy_netdev(priv);
+ kfree(ppriv);
+}
+
+static void *mlx5e_get_netdev(void *vpriv)
+{
+ struct mlx5e_priv *priv = vpriv;
+
+ return priv->netdev;
+}
+
+static struct mlx5_interface mlx5e_interface = {
+ .add = mlx5e_add,
+ .remove = mlx5e_remove,
+ .attach = mlx5e_attach,
+ .detach = mlx5e_detach,
+ .event = mlx5e_async_event,
+ .protocol = MLX5_INTERFACE_PROTOCOL_ETH,
+ .get_dev = mlx5e_get_netdev,
+};
+
+void mlx5e_init(void)
+{
+ mlx5e_ipsec_build_inverse_table();
+ mlx5e_build_ptys2ethtool_map();
+ mlx5_register_interface(&mlx5e_interface);
+}
+
+void mlx5e_cleanup(void)
+{
+ mlx5_unregister_interface(&mlx5e_interface);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
new file mode 100644
index 000000000..1ab40d622
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -0,0 +1,1295 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <generated/utsrelease.h>
+#include <linux/mlx5/fs.h>
+#include <net/switchdev.h>
+#include <net/pkt_cls.h>
+#include <net/act_api.h>
+#include <net/netevent.h>
+#include <net/arp.h>
+
+#include "eswitch.h"
+#include "en.h"
+#include "en_rep.h"
+#include "en_tc.h"
+#include "fs_core.h"
+
+#define MLX5E_REP_PARAMS_LOG_SQ_SIZE \
+ max(0x6, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
+#define MLX5E_REP_PARAMS_LOG_RQ_SIZE \
+ max(0x6, MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)
+
+static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
+
+static void mlx5e_rep_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ strlcpy(drvinfo->driver, mlx5e_rep_driver_name,
+ sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version));
+}
+
+static const struct counter_desc sw_rep_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
+};
+
+struct vport_stats {
+ u64 vport_rx_packets;
+ u64 vport_tx_packets;
+ u64 vport_rx_bytes;
+ u64 vport_tx_bytes;
+};
+
+static const struct counter_desc vport_rep_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_packets) },
+ { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_bytes) },
+ { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_packets) },
+ { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_bytes) },
+};
+
+#define NUM_VPORT_REP_SW_COUNTERS ARRAY_SIZE(sw_rep_stats_desc)
+#define NUM_VPORT_REP_HW_COUNTERS ARRAY_SIZE(vport_rep_stats_desc)
+
+static void mlx5e_rep_get_strings(struct net_device *dev,
+ u32 stringset, uint8_t *data)
+{
+ int i, j;
+
+ switch (stringset) {
+ case ETH_SS_STATS:
+ for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++)
+ strcpy(data + (i * ETH_GSTRING_LEN),
+ sw_rep_stats_desc[i].format);
+ for (j = 0; j < NUM_VPORT_REP_HW_COUNTERS; j++, i++)
+ strcpy(data + (i * ETH_GSTRING_LEN),
+ vport_rep_stats_desc[j].format);
+ break;
+ }
+}
+
+static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct rtnl_link_stats64 *vport_stats;
+ struct ifla_vf_stats vf_stats;
+ int err;
+
+ err = mlx5_eswitch_get_vport_stats(esw, rep->vport, &vf_stats);
+ if (err) {
+ pr_warn("vport %d error %d reading stats\n", rep->vport, err);
+ return;
+ }
+
+ vport_stats = &priv->stats.vf_vport;
+ /* flip tx/rx as we are reporting the counters for the switch vport */
+ vport_stats->rx_packets = vf_stats.tx_packets;
+ vport_stats->rx_bytes = vf_stats.tx_bytes;
+ vport_stats->tx_packets = vf_stats.rx_packets;
+ vport_stats->tx_bytes = vf_stats.rx_bytes;
+}
+
+static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
+{
+ struct mlx5e_sw_stats *s = &priv->stats.sw;
+ struct mlx5e_rq_stats *rq_stats;
+ struct mlx5e_sq_stats *sq_stats;
+ int i, j;
+
+ memset(s, 0, sizeof(*s));
+ for (i = 0; i < priv->channels.num; i++) {
+ struct mlx5e_channel *c = priv->channels.c[i];
+
+ rq_stats = c->rq.stats;
+
+ s->rx_packets += rq_stats->packets;
+ s->rx_bytes += rq_stats->bytes;
+
+ for (j = 0; j < priv->channels.params.num_tc; j++) {
+ sq_stats = c->sq[j].stats;
+
+ s->tx_packets += sq_stats->packets;
+ s->tx_bytes += sq_stats->bytes;
+ s->tx_queue_dropped += sq_stats->dropped;
+ }
+ }
+}
+
+static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int i, j;
+
+ if (!data)
+ return;
+
+ mutex_lock(&priv->state_lock);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_rep_update_sw_counters(priv);
+ mlx5e_rep_update_hw_counters(priv);
+ mutex_unlock(&priv->state_lock);
+
+ for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++)
+ data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.sw,
+ sw_rep_stats_desc, i);
+
+ for (j = 0; j < NUM_VPORT_REP_HW_COUNTERS; j++, i++)
+ data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.vf_vport,
+ vport_rep_stats_desc, j);
+}
+
+static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return NUM_VPORT_REP_SW_COUNTERS + NUM_VPORT_REP_HW_COUNTERS;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
+ .get_drvinfo = mlx5e_rep_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = mlx5e_rep_get_strings,
+ .get_sset_count = mlx5e_rep_get_sset_count,
+ .get_ethtool_stats = mlx5e_rep_get_ethtool_stats,
+};
+
+int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ if (esw->mode != SRIOV_OFFLOADS)
+ return -EOPNOTSUPP;
+
+ switch (attr->id) {
+ case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
+ attr->u.ppid.id_len = ETH_ALEN;
+ ether_addr_copy(attr->u.ppid.id, rep->hw_id);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_sq *rep_sq, *tmp;
+ struct mlx5e_rep_priv *rpriv;
+
+ if (esw->mode != SRIOV_OFFLOADS)
+ return;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+ list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) {
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+ list_del(&rep_sq->list);
+ kfree(rep_sq);
+ }
+}
+
+static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ u32 *sqns_array, int sqns_num)
+{
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_sq *rep_sq;
+ int err;
+ int i;
+
+ if (esw->mode != SRIOV_OFFLOADS)
+ return 0;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+ for (i = 0; i < sqns_num; i++) {
+ rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL);
+ if (!rep_sq) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ /* Add re-inject rule to the PF/representor sqs */
+ flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw,
+ rep->vport,
+ sqns_array[i]);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ kfree(rep_sq);
+ goto out_err;
+ }
+ rep_sq->send_to_vport_rule = flow_rule;
+ list_add(&rep_sq->list, &rpriv->vport_sqs_list);
+ }
+ return 0;
+
+out_err:
+ mlx5e_sqs2vport_stop(esw, rep);
+ return err;
+}
+
+int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5e_channel *c;
+ int n, tc, num_sqs = 0;
+ int err = -ENOMEM;
+ u32 *sqs;
+
+ sqs = kcalloc(priv->channels.num * priv->channels.params.num_tc, sizeof(*sqs), GFP_KERNEL);
+ if (!sqs)
+ goto out;
+
+ for (n = 0; n < priv->channels.num; n++) {
+ c = priv->channels.c[n];
+ for (tc = 0; tc < c->num_tc; tc++)
+ sqs[num_sqs++] = c->sq[tc].sqn;
+ }
+
+ err = mlx5e_sqs2vport_start(esw, rep, sqs, num_sqs);
+ kfree(sqs);
+
+out:
+ if (err)
+ netdev_warn(priv->netdev, "Failed to add SQs FWD rules %d\n", err);
+ return err;
+}
+
+void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+
+ mlx5e_sqs2vport_stop(esw, rep);
+}
+
+static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ unsigned long ipv6_interval = NEIGH_VAR(&nd_tbl.parms,
+ DELAY_PROBE_TIME);
+#else
+ unsigned long ipv6_interval = ~0UL;
+#endif
+ unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms,
+ DELAY_PROBE_TIME);
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
+}
+
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+
+ mlx5_fc_queue_stats_work(priv->mdev,
+ &neigh_update->neigh_stats_work,
+ neigh_update->min_interval);
+}
+
+static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
+ neigh_update.neigh_stats_work.work);
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe;
+
+ rtnl_lock();
+ if (!list_empty(&rpriv->neigh_update.neigh_list))
+ mlx5e_rep_queue_neigh_stats_work(priv);
+
+ list_for_each_entry(nhe, &rpriv->neigh_update.neigh_list, neigh_list)
+ mlx5e_tc_update_neigh_used_value(nhe);
+
+ rtnl_unlock();
+}
+
+static void mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
+{
+ refcount_inc(&nhe->refcnt);
+}
+
+static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
+{
+ if (refcount_dec_and_test(&nhe->refcnt))
+ kfree(nhe);
+}
+
+static void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ bool neigh_connected,
+ unsigned char ha[ETH_ALEN])
+{
+ struct ethhdr *eth = (struct ethhdr *)e->encap_header;
+
+ ASSERT_RTNL();
+
+ if ((!neigh_connected && (e->flags & MLX5_ENCAP_ENTRY_VALID)) ||
+ !ether_addr_equal(e->h_dest, ha))
+ mlx5e_tc_encap_flows_del(priv, e);
+
+ if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
+ ether_addr_copy(e->h_dest, ha);
+ ether_addr_copy(eth->h_dest, ha);
+
+ mlx5e_tc_encap_flows_add(priv, e);
+ }
+}
+
+static void mlx5e_rep_neigh_update(struct work_struct *work)
+{
+ struct mlx5e_neigh_hash_entry *nhe =
+ container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
+ struct neighbour *n = nhe->n;
+ struct mlx5e_encap_entry *e;
+ unsigned char ha[ETH_ALEN];
+ struct mlx5e_priv *priv;
+ bool neigh_connected;
+ bool encap_connected;
+ u8 nud_state, dead;
+
+ rtnl_lock();
+
+ /* If these parameters are changed after we release the lock,
+ * we'll receive another event letting us know about it.
+ * We use this lock to avoid inconsistency between the neigh validity
+ * and it's hw address.
+ */
+ read_lock_bh(&n->lock);
+ memcpy(ha, n->ha, ETH_ALEN);
+ nud_state = n->nud_state;
+ dead = n->dead;
+ read_unlock_bh(&n->lock);
+
+ neigh_connected = (nud_state & NUD_VALID) && !dead;
+
+ list_for_each_entry(e, &nhe->encap_list, encap_list) {
+ encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
+ priv = netdev_priv(e->out_dev);
+
+ if (encap_connected != neigh_connected ||
+ !ether_addr_equal(e->h_dest, ha))
+ mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
+ }
+ mlx5e_rep_neigh_entry_release(nhe);
+ rtnl_unlock();
+ neigh_release(n);
+}
+
+static struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh);
+
+static int mlx5e_rep_netevent_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
+ neigh_update.netevent_nb);
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_neigh_hash_entry *nhe = NULL;
+ struct mlx5e_neigh m_neigh = {};
+ struct neigh_parms *p;
+ struct neighbour *n;
+ bool found = false;
+
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
+ n = ptr;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (n->tbl != &nd_tbl && n->tbl != &arp_tbl)
+#else
+ if (n->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ m_neigh.dev = n->dev;
+ m_neigh.family = n->ops->family;
+ memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+
+ /* We are in atomic context and can't take RTNL mutex, so use
+ * spin_lock_bh to lookup the neigh table. bh is used since
+ * netevent can be called from a softirq context.
+ */
+ spin_lock_bh(&neigh_update->encap_lock);
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
+ if (!nhe) {
+ spin_unlock_bh(&neigh_update->encap_lock);
+ return NOTIFY_DONE;
+ }
+
+ /* This assignment is valid as long as the the neigh reference
+ * is taken
+ */
+ nhe->n = n;
+
+ /* Take a reference to ensure the neighbour and mlx5 encap
+ * entry won't be destructed until we drop the reference in
+ * delayed work.
+ */
+ neigh_hold(n);
+ mlx5e_rep_neigh_entry_hold(nhe);
+
+ if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
+ mlx5e_rep_neigh_entry_release(nhe);
+ neigh_release(n);
+ }
+ spin_unlock_bh(&neigh_update->encap_lock);
+ break;
+
+ case NETEVENT_DELAY_PROBE_TIME_UPDATE:
+ p = ptr;
+
+ /* We check the device is present since we don't care about
+ * changes in the default table, we only care about changes
+ * done per device delay prob time parameter.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!p->dev || (p->tbl != &nd_tbl && p->tbl != &arp_tbl))
+#else
+ if (!p->dev || p->tbl != &arp_tbl)
+#endif
+ return NOTIFY_DONE;
+
+ /* We are in atomic context and can't take RTNL mutex,
+ * so use spin_lock_bh to walk the neigh list and look for
+ * the relevant device. bh is used since netevent can be
+ * called from a softirq context.
+ */
+ spin_lock_bh(&neigh_update->encap_lock);
+ list_for_each_entry(nhe, &neigh_update->neigh_list, neigh_list) {
+ if (p->dev == nhe->m_neigh.dev) {
+ found = true;
+ break;
+ }
+ }
+ spin_unlock_bh(&neigh_update->encap_lock);
+ if (!found)
+ return NOTIFY_DONE;
+
+ neigh_update->min_interval = min_t(unsigned long,
+ NEIGH_VAR(p, DELAY_PROBE_TIME),
+ neigh_update->min_interval);
+ mlx5_fc_update_sampling_interval(priv->mdev,
+ neigh_update->min_interval);
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static const struct rhashtable_params mlx5e_neigh_ht_params = {
+ .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
+ .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
+ .key_len = sizeof(struct mlx5e_neigh),
+ .automatic_shrinking = true,
+};
+
+static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ int err;
+
+ err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
+ if (err)
+ return err;
+
+ INIT_LIST_HEAD(&neigh_update->neigh_list);
+ spin_lock_init(&neigh_update->encap_lock);
+ INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
+ mlx5e_rep_neigh_stats_work);
+ mlx5e_rep_neigh_update_init_interval(rpriv);
+
+ rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event;
+ err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb);
+ if (err)
+ goto out_err;
+ return 0;
+
+out_err:
+ rhashtable_destroy(&neigh_update->neigh_ht);
+ return err;
+}
+
+static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ unregister_netevent_notifier(&neigh_update->netevent_nb);
+
+ flush_workqueue(priv->wq); /* flush neigh update works */
+
+ cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
+
+ rhashtable_destroy(&neigh_update->neigh_ht);
+}
+
+static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ int err;
+
+ err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ if (err)
+ return err;
+
+ list_add(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
+
+ return err;
+}
+
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ spin_lock_bh(&rpriv->neigh_update.encap_lock);
+
+ list_del(&nhe->neigh_list);
+
+ rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
+ &nhe->rhash_node,
+ mlx5e_neigh_ht_params);
+ spin_unlock_bh(&rpriv->neigh_update.encap_lock);
+}
+
+/* This function must only be called under RTNL lock or under the
+ * representor's encap_lock in case RTNL mutex can't be held.
+ */
+static struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+ struct mlx5e_neigh *m_neigh)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+
+ return rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
+ mlx5e_neigh_ht_params);
+}
+
+static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct mlx5e_neigh_hash_entry **nhe)
+{
+ int err;
+
+ *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
+ if (!*nhe)
+ return -ENOMEM;
+
+ memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
+ INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
+ INIT_LIST_HEAD(&(*nhe)->encap_list);
+ refcount_set(&(*nhe)->refcnt, 1);
+
+ err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
+ if (err)
+ goto out_free;
+ return 0;
+
+out_free:
+ kfree(*nhe);
+ return err;
+}
+
+static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv,
+ struct mlx5e_neigh_hash_entry *nhe)
+{
+ /* The neigh hash entry must be removed from the hash table regardless
+ * of the reference count value, so it won't be found by the next
+ * neigh notification call. The neigh hash entry reference count is
+ * incremented only during creation and neigh notification calls and
+ * protects from freeing the nhe struct.
+ */
+ mlx5e_rep_neigh_entry_remove(priv, nhe);
+ mlx5e_rep_neigh_entry_release(nhe);
+}
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_neigh_hash_entry *nhe;
+ int err;
+
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
+ if (!nhe) {
+ err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
+ if (err)
+ return err;
+ }
+ list_add(&e->encap_list, &nhe->encap_list);
+ return 0;
+}
+
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5e_neigh_hash_entry *nhe;
+
+ list_del(&e->encap_list);
+ nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
+
+ if (list_empty(&nhe->encap_list))
+ mlx5e_rep_neigh_entry_destroy(priv, nhe);
+}
+
+static int mlx5e_rep_open(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_open_locked(dev);
+ if (err)
+ goto unlock;
+
+ if (!mlx5_modify_vport_admin_state(priv->mdev,
+ MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ rep->vport, MLX5_VPORT_ADMIN_STATE_UP))
+ netif_carrier_on(dev);
+
+unlock:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+static int mlx5e_rep_close(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ int ret;
+
+ mutex_lock(&priv->state_lock);
+ mlx5_modify_vport_admin_state(priv->mdev,
+ MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ rep->vport, MLX5_VPORT_ADMIN_STATE_DOWN);
+ ret = mlx5e_close_locked(dev);
+ mutex_unlock(&priv->state_lock);
+ return ret;
+}
+
+static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
+ char *buf, size_t len)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ int ret;
+
+ ret = snprintf(buf, len, "%d", rep->vport - 1);
+ if (ret >= len)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int
+mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *cls_flower, int flags)
+{
+ switch (cls_flower->command) {
+ case TC_CLSFLOWER_REPLACE:
+ return mlx5e_configure_flower(priv, cls_flower, flags);
+ case TC_CLSFLOWER_DESTROY:
+ return mlx5e_delete_flower(priv, cls_flower, flags);
+ case TC_CLSFLOWER_STATS:
+ return mlx5e_stats_flower(priv, cls_flower, flags);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct mlx5e_priv *priv = cb_priv;
+
+ if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct mlx5e_priv *priv = cb_priv;
+
+ if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_tc_block(struct net_device *dev,
+ struct tc_block_offload *f)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb,
+ priv, priv, f->extack);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return mlx5e_rep_setup_tc_block(dev, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep;
+
+ if (!MLX5_ESWITCH_MANAGER(priv->mdev))
+ return false;
+
+ rep = rpriv->rep;
+ if (esw->mode == SRIOV_OFFLOADS &&
+ rep && rep->vport == FDB_UPLINK_VPORT)
+ return true;
+
+ return false;
+}
+
+static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep;
+
+ if (!MLX5_ESWITCH_MANAGER(priv->mdev))
+ return false;
+
+ rep = rpriv->rep;
+ if (rep && rep->vport != FDB_UPLINK_VPORT)
+ return true;
+
+ return false;
+}
+
+bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ switch (attr_id) {
+ case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+ if (mlx5e_is_vf_vport_rep(priv) || mlx5e_is_uplink_rep(priv))
+ return true;
+ }
+
+ return false;
+}
+
+static int
+mlx5e_get_sw_stats64(const struct net_device *dev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_sw_stats *sstats = &priv->stats.sw;
+
+ mlx5e_rep_update_sw_counters(priv);
+
+ stats->rx_packets = sstats->rx_packets;
+ stats->rx_bytes = sstats->rx_bytes;
+ stats->tx_packets = sstats->tx_packets;
+ stats->tx_bytes = sstats->tx_bytes;
+
+ stats->tx_dropped = sstats->tx_queue_dropped;
+
+ return 0;
+}
+
+int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
+ void *sp)
+{
+ switch (attr_id) {
+ case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+ return mlx5e_get_sw_stats64(dev, sp);
+ }
+
+ return -EINVAL;
+}
+
+static void
+mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ /* update HW stats in background for next time */
+ queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
+
+ memcpy(stats, &priv->stats.vf_vport, sizeof(*stats));
+}
+
+static const struct switchdev_ops mlx5e_rep_switchdev_ops = {
+ .switchdev_port_attr_get = mlx5e_attr_get,
+};
+
+static int mlx5e_change_rep_mtu(struct net_device *netdev, int new_mtu)
+{
+ return mlx5e_change_mtu(netdev, new_mtu, NULL);
+}
+
+static const struct net_device_ops mlx5e_netdev_ops_rep = {
+ .ndo_open = mlx5e_rep_open,
+ .ndo_stop = mlx5e_rep_close,
+ .ndo_start_xmit = mlx5e_xmit,
+ .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name,
+ .ndo_setup_tc = mlx5e_rep_setup_tc,
+ .ndo_get_stats64 = mlx5e_rep_get_stats,
+ .ndo_has_offload_stats = mlx5e_has_offload_stats,
+ .ndo_get_offload_stats = mlx5e_get_offload_stats,
+ .ndo_change_mtu = mlx5e_change_rep_mtu,
+};
+
+static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params, u16 mtu)
+{
+ u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
+ MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
+ MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+
+ params->hard_mtu = MLX5E_ETH_HARD_MTU;
+ params->sw_mtu = mtu;
+ params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE;
+ params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC;
+ params->log_rq_mtu_frames = MLX5E_REP_PARAMS_LOG_RQ_SIZE;
+
+ params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+ mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
+
+ params->num_tc = 1;
+ params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+
+ mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
+}
+
+static void mlx5e_build_rep_netdev(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u16 max_mtu;
+
+ netdev->netdev_ops = &mlx5e_netdev_ops_rep;
+
+ netdev->watchdog_timeo = 15 * HZ;
+
+ netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
+
+ netdev->switchdev_ops = &mlx5e_rep_switchdev_ops;
+
+ netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
+ netdev->hw_features |= NETIF_F_HW_TC;
+
+ eth_hw_addr_random(netdev);
+
+ netdev->min_mtu = ETH_MIN_MTU;
+ mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
+ netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
+}
+
+static void mlx5e_init_rep(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ priv->mdev = mdev;
+ priv->netdev = netdev;
+ priv->profile = profile;
+ priv->ppriv = ppriv;
+
+ mutex_init(&priv->state_lock);
+
+ INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
+
+ priv->channels.params.num_channels = profile->max_nch(mdev);
+
+ mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu);
+ mlx5e_build_rep_netdev(netdev);
+
+ mlx5e_timestamp_init(priv);
+}
+
+static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_flow_handle *flow_rule;
+ int err;
+
+ mlx5e_init_l2_addr(priv);
+
+ err = mlx5e_create_direct_rqts(priv);
+ if (err)
+ return err;
+
+ err = mlx5e_create_direct_tirs(priv);
+ if (err)
+ goto err_destroy_direct_rqts;
+
+ flow_rule = mlx5_eswitch_create_vport_rx_rule(esw,
+ rep->vport,
+ priv->direct_tir[0].tirn);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ goto err_destroy_direct_tirs;
+ }
+ rpriv->vport_rx_rule = flow_rule;
+
+ return 0;
+
+err_destroy_direct_tirs:
+ mlx5e_destroy_direct_tirs(priv);
+err_destroy_direct_rqts:
+ mlx5e_destroy_direct_rqts(priv);
+ return err;
+}
+
+static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+ mlx5_del_flow_rules(rpriv->vport_rx_rule);
+ mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_direct_rqts(priv);
+}
+
+static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
+{
+ int err;
+
+ err = mlx5e_create_tises(priv);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
+ return err;
+ }
+ return 0;
+}
+
+static int mlx5e_get_rep_max_num_channels(struct mlx5_core_dev *mdev)
+{
+#define MLX5E_PORT_REPRESENTOR_NCH 1
+ return MLX5E_PORT_REPRESENTOR_NCH;
+}
+
+static const struct mlx5e_profile mlx5e_rep_profile = {
+ .init = mlx5e_init_rep,
+ .init_rx = mlx5e_init_rep_rx,
+ .cleanup_rx = mlx5e_cleanup_rep_rx,
+ .init_tx = mlx5e_init_rep_tx,
+ .cleanup_tx = mlx5e_cleanup_nic_tx,
+ .update_stats = mlx5e_rep_update_hw_counters,
+ .max_nch = mlx5e_get_rep_max_num_channels,
+ .update_carrier = NULL,
+ .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
+ .rx_handlers.handle_rx_cqe_mpwqe = NULL /* Not supported */,
+ .max_tc = 1,
+};
+
+/* e-Switch vport representors */
+
+static int
+mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ int err;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_add_sqs_fwd_rules(priv);
+ if (err)
+ return err;
+ }
+
+ err = mlx5e_rep_neigh_init(rpriv);
+ if (err)
+ goto err_remove_sqs;
+
+ /* init shared tc flow table */
+ err = mlx5e_tc_esw_init(&rpriv->tc_ht);
+ if (err)
+ goto err_neigh_cleanup;
+
+ return 0;
+
+err_neigh_cleanup:
+ mlx5e_rep_neigh_cleanup(rpriv);
+err_remove_sqs:
+ mlx5e_remove_sqs_fwd_rules(priv);
+ return err;
+}
+
+static void
+mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_remove_sqs_fwd_rules(priv);
+
+ /* clean uplink offloaded TC rules, delete shared tc flow table */
+ mlx5e_tc_esw_cleanup(&rpriv->tc_ht);
+
+ mlx5e_rep_neigh_cleanup(rpriv);
+}
+
+static int
+mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5e_rep_priv *rpriv;
+ struct net_device *netdev;
+ struct mlx5e_priv *upriv;
+ int err;
+
+ rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
+ if (!rpriv)
+ return -ENOMEM;
+
+ netdev = mlx5e_create_netdev(dev, &mlx5e_rep_profile, rpriv);
+ if (!netdev) {
+ pr_warn("Failed to create representor netdev for vport %d\n",
+ rep->vport);
+ kfree(rpriv);
+ return -EINVAL;
+ }
+
+ rpriv->netdev = netdev;
+ rpriv->rep = rep;
+ rep->rep_if[REP_ETH].priv = rpriv;
+ INIT_LIST_HEAD(&rpriv->vport_sqs_list);
+
+ err = mlx5e_attach_netdev(netdev_priv(netdev));
+ if (err) {
+ pr_warn("Failed to attach representor netdev for vport %d\n",
+ rep->vport);
+ goto err_destroy_netdev;
+ }
+
+ err = mlx5e_rep_neigh_init(rpriv);
+ if (err) {
+ pr_warn("Failed to initialized neighbours handling for vport %d\n",
+ rep->vport);
+ goto err_detach_netdev;
+ }
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch, REP_ETH);
+ upriv = netdev_priv(uplink_rpriv->netdev);
+ err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb_egdev,
+ upriv);
+ if (err)
+ goto err_neigh_cleanup;
+
+ err = register_netdev(netdev);
+ if (err) {
+ pr_warn("Failed to register representor netdev for vport %d\n",
+ rep->vport);
+ goto err_egdev_cleanup;
+ }
+
+ return 0;
+
+err_egdev_cleanup:
+ tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev,
+ upriv);
+
+err_neigh_cleanup:
+ mlx5e_rep_neigh_cleanup(rpriv);
+
+err_detach_netdev:
+ mlx5e_detach_netdev(netdev_priv(netdev));
+
+err_destroy_netdev:
+ mlx5e_destroy_netdev(netdev_priv(netdev));
+ kfree(rpriv);
+ return err;
+}
+
+static void
+mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
+ struct net_device *netdev = rpriv->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *uplink_rpriv;
+ void *ppriv = priv->ppriv;
+ struct mlx5e_priv *upriv;
+
+ unregister_netdev(netdev);
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch,
+ REP_ETH);
+ upriv = netdev_priv(uplink_rpriv->netdev);
+ tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev,
+ upriv);
+ mlx5e_rep_neigh_cleanup(rpriv);
+ mlx5e_detach_netdev(priv);
+ mlx5e_destroy_netdev(priv);
+ kfree(ppriv); /* mlx5e_rep_priv */
+}
+
+static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+
+ return rpriv->netdev;
+}
+
+static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(mdev);
+ int vport;
+
+ for (vport = 1; vport < total_vfs; vport++) {
+ struct mlx5_eswitch_rep_if rep_if = {};
+
+ rep_if.load = mlx5e_vport_rep_load;
+ rep_if.unload = mlx5e_vport_rep_unload;
+ rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
+ mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
+ }
+}
+
+static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ int total_vfs = MLX5_TOTAL_VPORTS(mdev);
+ int vport;
+
+ for (vport = 1; vport < total_vfs; vport++)
+ mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH);
+}
+
+void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ struct mlx5_eswitch_rep_if rep_if;
+ struct mlx5e_rep_priv *rpriv;
+
+ rpriv = priv->ppriv;
+ rpriv->netdev = priv->netdev;
+
+ rep_if.load = mlx5e_nic_rep_load;
+ rep_if.unload = mlx5e_nic_rep_unload;
+ rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
+ rep_if.priv = rpriv;
+ INIT_LIST_HEAD(&rpriv->vport_sqs_list);
+ mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/
+
+ mlx5e_rep_register_vf_vports(priv); /* VFs vports */
+}
+
+void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+ mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */
+ mlx5_eswitch_unregister_vport_rep(esw, 0, REP_ETH); /* UPLINK PF*/
+}
+
+void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv;
+
+ rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
+ if (!rpriv)
+ return NULL;
+
+ rpriv->rep = &esw->offloads.vport_reps[0];
+ return rpriv;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
new file mode 100644
index 000000000..844d32d5c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5E_REP_H__
+#define __MLX5E_REP_H__
+
+#include <net/ip_tunnels.h>
+#include <linux/rhashtable.h>
+#include "eswitch.h"
+#include "en.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+struct mlx5e_neigh_update_table {
+ struct rhashtable neigh_ht;
+ /* Save the neigh hash entries in a list in addition to the hash table
+ * (neigh_ht). In order to iterate easily over the neigh entries.
+ * Used for stats query.
+ */
+ struct list_head neigh_list;
+ /* protect lookup/remove operations */
+ spinlock_t encap_lock;
+ struct notifier_block netevent_nb;
+ struct delayed_work neigh_stats_work;
+ unsigned long min_interval; /* jiffies */
+};
+
+struct mlx5e_rep_priv {
+ struct mlx5_eswitch_rep *rep;
+ struct mlx5e_neigh_update_table neigh_update;
+ struct net_device *netdev;
+ struct mlx5_flow_handle *vport_rx_rule;
+ struct list_head vport_sqs_list;
+ struct rhashtable tc_ht; /* valid for uplink rep */
+};
+
+static inline
+struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep)
+{
+ return (struct mlx5e_rep_priv *)rep->rep_if[REP_ETH].priv;
+}
+
+struct mlx5e_neigh {
+ struct net_device *dev;
+ union {
+ __be32 v4;
+ struct in6_addr v6;
+ } dst_ip;
+ int family;
+};
+
+struct mlx5e_neigh_hash_entry {
+ struct rhash_head rhash_node;
+ struct mlx5e_neigh m_neigh;
+
+ /* Save the neigh hash entry in a list on the representor in
+ * addition to the hash table. In order to iterate easily over the
+ * neighbour entries. Used for stats query.
+ */
+ struct list_head neigh_list;
+
+ /* encap list sharing the same neigh */
+ struct list_head encap_list;
+
+ /* valid only when the neigh reference is taken during
+ * neigh_update_work workqueue callback.
+ */
+ struct neighbour *n;
+ struct work_struct neigh_update_work;
+
+ /* neigh hash entry can be deleted only when the refcount is zero.
+ * refcount is needed to avoid neigh hash entry removal by TC, while
+ * it's used by the neigh notification call.
+ */
+ refcount_t refcnt;
+
+ /* Save the last reported time offloaded trafic pass over one of the
+ * neigh hash entry flows. Use it to periodically update the neigh
+ * 'used' value and avoid neigh deleting by the kernel.
+ */
+ unsigned long reported_lastuse;
+};
+
+enum {
+ /* set when the encap entry is successfully offloaded into HW */
+ MLX5_ENCAP_ENTRY_VALID = BIT(0),
+};
+
+struct mlx5e_encap_entry {
+ /* neigh hash entry list of encaps sharing the same neigh */
+ struct list_head encap_list;
+ struct mlx5e_neigh m_neigh;
+ /* a node of the eswitch encap hash table which keeping all the encap
+ * entries
+ */
+ struct hlist_node encap_hlist;
+ struct list_head flows;
+ u32 encap_id;
+ struct ip_tunnel_info tun_info;
+ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
+
+ struct net_device *out_dev;
+ int tunnel_type;
+ u8 flags;
+ char *encap_header;
+ int encap_size;
+};
+
+struct mlx5e_rep_sq {
+ struct mlx5_flow_handle *send_to_vport_rule;
+ struct list_head list;
+};
+
+void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev);
+void mlx5e_register_vport_reps(struct mlx5e_priv *priv);
+void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv);
+bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
+int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
+void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
+
+int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, void *sp);
+bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id);
+
+int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr);
+void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e);
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e);
+
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
+#else /* CONFIG_MLX5_ESWITCH */
+static inline void mlx5e_register_vport_reps(struct mlx5e_priv *priv) {}
+static inline void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) {}
+static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; }
+static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {}
+#endif
+
+#endif /* __MLX5E_REP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
new file mode 100644
index 000000000..9d86e49a7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -0,0 +1,1470 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/prefetch.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <net/busy_poll.h>
+#include <net/ip6_checksum.h>
+#include <net/page_pool.h>
+#include <net/inet_ecn.h>
+#include "en.h"
+#include "en_tc.h"
+#include "eswitch.h"
+#include "en_rep.h"
+#include "ipoib/ipoib.h"
+#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/tls_rxtx.h"
+#include "lib/clock.h"
+#include "en/xdp.h"
+
+static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
+{
+ return config->rx_filter == HWTSTAMP_FILTER_ALL;
+}
+
+static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc,
+ void *data)
+{
+ u32 ci = mlx5_cqwq_ctr2ix(&cq->wq, cqcc);
+
+ memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64));
+}
+
+static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq,
+ struct mlx5e_cq *cq, u32 cqcc)
+{
+ mlx5e_read_cqe_slot(cq, cqcc, &cq->title);
+ cq->decmprs_left = be32_to_cpu(cq->title.byte_cnt);
+ cq->decmprs_wqe_counter = be16_to_cpu(cq->title.wqe_counter);
+ rq->stats->cqe_compress_blks++;
+}
+
+static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc)
+{
+ mlx5e_read_cqe_slot(cq, cqcc, cq->mini_arr);
+ cq->mini_arr_idx = 0;
+}
+
+static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n)
+{
+ struct mlx5_cqwq *wq = &cq->wq;
+
+ u8 op_own = mlx5_cqwq_get_ctr_wrap_cnt(wq, cqcc) & 1;
+ u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc);
+ u32 wq_sz = mlx5_cqwq_get_size(wq);
+ u32 ci_top = min_t(u32, wq_sz, ci + n);
+
+ for (; ci < ci_top; ci++, n--) {
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci);
+
+ cqe->op_own = op_own;
+ }
+
+ if (unlikely(ci == wq_sz)) {
+ op_own = !op_own;
+ for (ci = 0; ci < n; ci++) {
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci);
+
+ cqe->op_own = op_own;
+ }
+ }
+}
+
+static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
+ struct mlx5e_cq *cq, u32 cqcc)
+{
+ cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt;
+ cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum;
+ cq->title.op_own &= 0xf0;
+ cq->title.op_own |= 0x01 & (cqcc >> cq->wq.fbc.log_sz);
+ cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter);
+
+ if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+ cq->decmprs_wqe_counter +=
+ mpwrq_get_cqe_consumed_strides(&cq->title);
+ else
+ cq->decmprs_wqe_counter =
+ mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cq->decmprs_wqe_counter + 1);
+}
+
+static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq,
+ struct mlx5e_cq *cq, u32 cqcc)
+{
+ mlx5e_decompress_cqe(rq, cq, cqcc);
+ cq->title.rss_hash_type = 0;
+ cq->title.rss_hash_result = 0;
+}
+
+static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
+ struct mlx5e_cq *cq,
+ int update_owner_only,
+ int budget_rem)
+{
+ u32 cqcc = cq->wq.cc + update_owner_only;
+ u32 cqe_count;
+ u32 i;
+
+ cqe_count = min_t(u32, cq->decmprs_left, budget_rem);
+
+ for (i = update_owner_only; i < cqe_count;
+ i++, cq->mini_arr_idx++, cqcc++) {
+ if (cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE)
+ mlx5e_read_mini_arr_slot(cq, cqcc);
+
+ mlx5e_decompress_cqe_no_hash(rq, cq, cqcc);
+ rq->handle_rx_cqe(rq, &cq->title);
+ }
+ mlx5e_cqes_update_owner(cq, cq->wq.cc, cqcc - cq->wq.cc);
+ cq->wq.cc = cqcc;
+ cq->decmprs_left -= cqe_count;
+ rq->stats->cqe_compress_pkts += cqe_count;
+
+ return cqe_count;
+}
+
+static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
+ struct mlx5e_cq *cq,
+ int budget_rem)
+{
+ mlx5e_read_title_slot(rq, cq, cq->wq.cc);
+ mlx5e_read_mini_arr_slot(cq, cq->wq.cc + 1);
+ mlx5e_decompress_cqe(rq, cq, cq->wq.cc);
+ rq->handle_rx_cqe(rq, &cq->title);
+ cq->mini_arr_idx++;
+
+ return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1;
+}
+
+static inline bool mlx5e_page_is_reserved(struct page *page)
+{
+ return page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id();
+}
+
+static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ struct mlx5e_page_cache *cache = &rq->page_cache;
+ u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ if (tail_next == cache->head) {
+ stats->cache_full++;
+ return false;
+ }
+
+ if (unlikely(mlx5e_page_is_reserved(dma_info->page))) {
+ stats->cache_waive++;
+ return false;
+ }
+
+ cache->page_cache[cache->tail] = *dma_info;
+ cache->tail = tail_next;
+ return true;
+}
+
+static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ struct mlx5e_page_cache *cache = &rq->page_cache;
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ if (unlikely(cache->head == cache->tail)) {
+ stats->cache_empty++;
+ return false;
+ }
+
+ if (page_ref_count(cache->page_cache[cache->head].page) != 1) {
+ stats->cache_busy++;
+ return false;
+ }
+
+ *dma_info = cache->page_cache[cache->head];
+ cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
+ stats->cache_reuse++;
+
+ dma_sync_single_for_device(rq->pdev, dma_info->addr,
+ PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ return true;
+}
+
+static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ if (mlx5e_rx_cache_get(rq, dma_info))
+ return 0;
+
+ dma_info->page = page_pool_dev_alloc_pages(rq->page_pool);
+ if (unlikely(!dma_info->page))
+ return -ENOMEM;
+
+ dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0,
+ PAGE_SIZE, rq->buff.map_dir);
+ if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
+ put_page(dma_info->page);
+ dma_info->page = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info)
+{
+ dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir);
+}
+
+void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
+ bool recycle)
+{
+ if (likely(recycle)) {
+ if (mlx5e_rx_cache_put(rq, dma_info))
+ return;
+
+ mlx5e_page_dma_unmap(rq, dma_info);
+ page_pool_recycle_direct(rq->page_pool, dma_info->page);
+ } else {
+ mlx5e_page_dma_unmap(rq, dma_info);
+ put_page(dma_info->page);
+ }
+}
+
+static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
+ struct mlx5e_wqe_frag_info *frag)
+{
+ int err = 0;
+
+ if (!frag->offset)
+ /* On first frag (offset == 0), replenish page (dma_info actually).
+ * Other frags that point to the same dma_info (with a different
+ * offset) should just use the new one without replenishing again
+ * by themselves.
+ */
+ err = mlx5e_page_alloc_mapped(rq, frag->di);
+
+ return err;
+}
+
+static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq,
+ struct mlx5e_wqe_frag_info *frag,
+ bool recycle)
+{
+ if (frag->last_in_page)
+ mlx5e_page_release(rq, frag->di, recycle);
+}
+
+static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix)
+{
+ return &rq->wqe.frags[ix << rq->wqe.info.log_num_frags];
+}
+
+static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe,
+ u16 ix)
+{
+ struct mlx5e_wqe_frag_info *frag = get_frag(rq, ix);
+ int err;
+ int i;
+
+ for (i = 0; i < rq->wqe.info.num_frags; i++, frag++) {
+ err = mlx5e_get_rx_frag(rq, frag);
+ if (unlikely(err))
+ goto free_frags;
+
+ wqe->data[i].addr = cpu_to_be64(frag->di->addr +
+ frag->offset + rq->buff.headroom);
+ }
+
+ return 0;
+
+free_frags:
+ while (--i >= 0)
+ mlx5e_put_rx_frag(rq, --frag, true);
+
+ return err;
+}
+
+static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq,
+ struct mlx5e_wqe_frag_info *wi,
+ bool recycle)
+{
+ int i;
+
+ for (i = 0; i < rq->wqe.info.num_frags; i++, wi++)
+ mlx5e_put_rx_frag(rq, wi, recycle);
+}
+
+void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
+{
+ struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix);
+
+ mlx5e_free_rx_wqe(rq, wi, false);
+}
+
+static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ int err;
+ int i;
+
+ for (i = 0; i < wqe_bulk; i++) {
+ struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, ix + i);
+
+ err = mlx5e_alloc_rx_wqe(rq, wqe, ix + i);
+ if (unlikely(err))
+ goto free_wqes;
+ }
+
+ return 0;
+
+free_wqes:
+ while (--i >= 0)
+ mlx5e_dealloc_rx_wqe(rq, ix + i);
+
+ return err;
+}
+
+static inline void
+mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
+ struct mlx5e_dma_info *di, u32 frag_offset, u32 len,
+ unsigned int truesize)
+{
+ dma_sync_single_for_cpu(rq->pdev,
+ di->addr + frag_offset,
+ len, DMA_FROM_DEVICE);
+ page_ref_inc(di->page);
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ di->page, frag_offset, len, truesize);
+}
+
+static inline void
+mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
+ struct mlx5e_dma_info *dma_info,
+ int offset_from, int offset_to, u32 headlen)
+{
+ const void *from = page_address(dma_info->page) + offset_from;
+ /* Aligning len to sizeof(long) optimizes memcpy performance */
+ unsigned int len = ALIGN(headlen, sizeof(long));
+
+ dma_sync_single_for_cpu(pdev, dma_info->addr + offset_from, len,
+ DMA_FROM_DEVICE);
+ skb_copy_to_linear_data_offset(skb, offset_to, from, len);
+}
+
+static inline void
+mlx5e_copy_skb_header_mpwqe(struct device *pdev,
+ struct sk_buff *skb,
+ struct mlx5e_dma_info *dma_info,
+ u32 offset, u32 headlen)
+{
+ u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset);
+
+ mlx5e_copy_skb_header(pdev, skb, dma_info, offset, 0, headlen_pg);
+
+ if (unlikely(offset + headlen > PAGE_SIZE)) {
+ dma_info++;
+ mlx5e_copy_skb_header(pdev, skb, dma_info, 0, headlen_pg,
+ headlen - headlen_pg);
+ }
+}
+
+static void
+mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle)
+{
+ const bool no_xdp_xmit =
+ bitmap_empty(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
+ struct mlx5e_dma_info *dma_info = wi->umr.dma_info;
+ int i;
+
+ for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++)
+ if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
+ mlx5e_page_release(rq, &dma_info[i], recycle);
+}
+
+static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
+{
+ struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
+ struct mlx5e_rx_wqe_ll *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
+
+ rq->mpwqe.umr_in_progress = false;
+
+ mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index));
+
+ /* ensure wqes are visible to device before updating doorbell record */
+ dma_wmb();
+
+ mlx5_wq_ll_update_db_record(wq);
+}
+
+static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
+{
+ return sq->pc >> MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+}
+
+static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
+ struct mlx5_wq_cyc *wq,
+ u16 pi, u16 nnops)
+{
+ struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
+
+ edge_wi = wi + nnops;
+
+ /* fill sq frag edge with nops to avoid wqe wrapping two pages */
+ for (; wi < edge_wi; wi++) {
+ wi->opcode = MLX5_OPCODE_NOP;
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+}
+
+static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
+{
+ struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
+ struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0];
+ struct mlx5e_icosq *sq = &rq->channel->icosq;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5e_umr_wqe *umr_wqe;
+ u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
+ u16 pi, contig_wqebbs_room;
+ int err;
+ int i;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
+ mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ }
+
+ umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ if (unlikely(mlx5e_icosq_wrap_cnt(sq) < 2))
+ memcpy(umr_wqe, &rq->mpwqe.umr_wqe,
+ offsetof(struct mlx5e_umr_wqe, inline_mtts));
+
+ for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
+ err = mlx5e_page_alloc_mapped(rq, dma_info);
+ if (unlikely(err))
+ goto err_unmap;
+ umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
+ }
+
+ bitmap_zero(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
+ wi->consumed_strides = 0;
+
+ rq->mpwqe.umr_in_progress = true;
+
+ umr_wqe->ctrl.opmod_idx_opcode =
+ cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
+ MLX5_OPCODE_UMR);
+ umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset);
+
+ sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
+ sq->pc += MLX5E_UMR_WQEBBS;
+ mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &umr_wqe->ctrl);
+
+ return 0;
+
+err_unmap:
+ while (--i >= 0) {
+ dma_info--;
+ mlx5e_page_release(rq, dma_info, true);
+ }
+ rq->stats->buff_alloc_err++;
+
+ return err;
+}
+
+void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
+{
+ struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
+ /* Don't recycle, this function is called on rq/netdev close */
+ mlx5e_free_rx_mpwqe(rq, wi, false);
+}
+
+bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ u8 wqe_bulk;
+ int err;
+
+ if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
+ return false;
+
+ wqe_bulk = rq->wqe.info.wqe_bulk;
+
+ if (mlx5_wq_cyc_missing(wq) < wqe_bulk)
+ return false;
+
+ do {
+ u16 head = mlx5_wq_cyc_get_head(wq);
+
+ err = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk);
+ if (unlikely(err)) {
+ rq->stats->buff_alloc_err++;
+ break;
+ }
+
+ mlx5_wq_cyc_push_n(wq, wqe_bulk);
+ } while (mlx5_wq_cyc_missing(wq) >= wqe_bulk);
+
+ /* ensure wqes are visible to device before updating doorbell record */
+ dma_wmb();
+
+ mlx5_wq_cyc_update_db_record(wq);
+
+ return !!err;
+}
+
+static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq,
+ struct mlx5e_icosq *sq,
+ struct mlx5e_rq *rq,
+ struct mlx5_cqe64 *cqe)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+ struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci];
+
+ mlx5_cqwq_pop(&cq->wq);
+
+ if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) {
+ netdev_WARN_ONCE(cq->channel->netdev,
+ "Bad OP in ICOSQ CQE: 0x%x\n", cqe->op_own);
+ return;
+ }
+
+ if (likely(icowi->opcode == MLX5_OPCODE_UMR)) {
+ mlx5e_post_rx_mpwqe(rq);
+ return;
+ }
+
+ if (unlikely(icowi->opcode != MLX5_OPCODE_NOP))
+ netdev_WARN_ONCE(cq->channel->netdev,
+ "Bad OPCODE in ICOSQ WQE info: 0x%x\n", icowi->opcode);
+}
+
+static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
+{
+ struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
+ struct mlx5_cqe64 *cqe;
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+ return;
+
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
+ if (likely(!cqe))
+ return;
+
+ /* by design, there's only a single cqe */
+ mlx5e_poll_ico_single_cqe(cq, sq, rq, cqe);
+
+ mlx5_cqwq_update_db_record(&cq->wq);
+}
+
+bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
+{
+ struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
+
+ if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
+ return false;
+
+ mlx5e_poll_ico_cq(&rq->channel->icosq.cq, rq);
+
+ if (mlx5_wq_ll_is_full(wq))
+ return false;
+
+ if (!rq->mpwqe.umr_in_progress)
+ mlx5e_alloc_rx_mpwqe(rq, wq->head);
+ else
+ rq->stats->congst_umr += mlx5_wq_ll_missing(wq) > 2;
+
+ return false;
+}
+
+static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp)
+{
+ u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
+ u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
+ (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA);
+
+ tcp->check = 0;
+ tcp->psh = get_cqe_lro_tcppsh(cqe);
+
+ if (tcp_ack) {
+ tcp->ack = 1;
+ tcp->ack_seq = cqe->lro_ack_seq_num;
+ tcp->window = cqe->lro_tcp_win;
+ }
+}
+
+static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
+ u32 cqe_bcnt)
+{
+ struct ethhdr *eth = (struct ethhdr *)(skb->data);
+ struct tcphdr *tcp;
+ int network_depth = 0;
+ __wsum check;
+ __be16 proto;
+ u16 tot_len;
+ void *ip_p;
+
+ proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth);
+
+ tot_len = cqe_bcnt - network_depth;
+ ip_p = skb->data + network_depth;
+
+ if (proto == htons(ETH_P_IP)) {
+ struct iphdr *ipv4 = ip_p;
+
+ tcp = ip_p + sizeof(struct iphdr);
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+
+ ipv4->ttl = cqe->lro_min_ttl;
+ ipv4->tot_len = cpu_to_be16(tot_len);
+ ipv4->check = 0;
+ ipv4->check = ip_fast_csum((unsigned char *)ipv4,
+ ipv4->ihl);
+
+ mlx5e_lro_update_tcp_hdr(cqe, tcp);
+ check = csum_partial(tcp, tcp->doff * 4,
+ csum_unfold((__force __sum16)cqe->check_sum));
+ /* Almost done, don't forget the pseudo header */
+ tcp->check = csum_tcpudp_magic(ipv4->saddr, ipv4->daddr,
+ tot_len - sizeof(struct iphdr),
+ IPPROTO_TCP, check);
+ } else {
+ u16 payload_len = tot_len - sizeof(struct ipv6hdr);
+ struct ipv6hdr *ipv6 = ip_p;
+
+ tcp = ip_p + sizeof(struct ipv6hdr);
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+
+ ipv6->hop_limit = cqe->lro_min_ttl;
+ ipv6->payload_len = cpu_to_be16(payload_len);
+
+ mlx5e_lro_update_tcp_hdr(cqe, tcp);
+ check = csum_partial(tcp, tcp->doff * 4,
+ csum_unfold((__force __sum16)cqe->check_sum));
+ /* Almost done, don't forget the pseudo header */
+ tcp->check = csum_ipv6_magic(&ipv6->saddr, &ipv6->daddr, payload_len,
+ IPPROTO_TCP, check);
+ }
+}
+
+static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe,
+ struct sk_buff *skb)
+{
+ u8 cht = cqe->rss_hash_type;
+ int ht = (cht & CQE_RSS_HTYPE_L4) ? PKT_HASH_TYPE_L4 :
+ (cht & CQE_RSS_HTYPE_IP) ? PKT_HASH_TYPE_L3 :
+ PKT_HASH_TYPE_NONE;
+ skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht);
+}
+
+static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth,
+ __be16 *proto)
+{
+ *proto = ((struct ethhdr *)skb->data)->h_proto;
+ *proto = __vlan_get_protocol(skb, *proto, network_depth);
+
+ if (*proto == htons(ETH_P_IP))
+ return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr));
+
+ if (*proto == htons(ETH_P_IPV6))
+ return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr));
+
+ return false;
+}
+
+static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
+{
+ int network_depth = 0;
+ __be16 proto;
+ void *ip;
+ int rc;
+
+ if (unlikely(!is_last_ethertype_ip(skb, &network_depth, &proto)))
+ return;
+
+ ip = skb->data + network_depth;
+ rc = ((proto == htons(ETH_P_IP)) ? IP_ECN_set_ce((struct iphdr *)ip) :
+ IP6_ECN_set_ce(skb, (struct ipv6hdr *)ip));
+
+ rq->stats->ecn_mark += !!rc;
+}
+
+static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
+{
+ void *ip_p = skb->data + network_depth;
+
+ return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
+ ((struct ipv6hdr *)ip_p)->nexthdr;
+}
+
+#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
+
+#define MAX_PADDING 8
+
+static void
+tail_padding_csum_slow(struct sk_buff *skb, int offset, int len,
+ struct mlx5e_rq_stats *stats)
+{
+ stats->csum_complete_tail_slow++;
+ skb->csum = csum_block_add(skb->csum,
+ skb_checksum(skb, offset, len, 0),
+ offset);
+}
+
+static void
+tail_padding_csum(struct sk_buff *skb, int offset,
+ struct mlx5e_rq_stats *stats)
+{
+ u8 tail_padding[MAX_PADDING];
+ int len = skb->len - offset;
+ void *tail;
+
+ if (unlikely(len > MAX_PADDING)) {
+ tail_padding_csum_slow(skb, offset, len, stats);
+ return;
+ }
+
+ tail = skb_header_pointer(skb, offset, len, tail_padding);
+ if (unlikely(!tail)) {
+ tail_padding_csum_slow(skb, offset, len, stats);
+ return;
+ }
+
+ stats->csum_complete_tail++;
+ skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset);
+}
+
+static void
+mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto,
+ struct mlx5e_rq_stats *stats)
+{
+ struct ipv6hdr *ip6;
+ struct iphdr *ip4;
+ int pkt_len;
+
+ switch (proto) {
+ case htons(ETH_P_IP):
+ ip4 = (struct iphdr *)(skb->data + network_depth);
+ pkt_len = network_depth + ntohs(ip4->tot_len);
+ break;
+ case htons(ETH_P_IPV6):
+ ip6 = (struct ipv6hdr *)(skb->data + network_depth);
+ pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len);
+ break;
+ default:
+ return;
+ }
+
+ if (likely(pkt_len >= skb->len))
+ return;
+
+ tail_padding_csum(skb, pkt_len, stats);
+}
+
+static inline void mlx5e_handle_csum(struct net_device *netdev,
+ struct mlx5_cqe64 *cqe,
+ struct mlx5e_rq *rq,
+ struct sk_buff *skb,
+ bool lro)
+{
+ struct mlx5e_rq_stats *stats = rq->stats;
+ int network_depth = 0;
+ __be16 proto;
+
+ if (unlikely(!(netdev->features & NETIF_F_RXCSUM)))
+ goto csum_none;
+
+ if (lro) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ stats->csum_unnecessary++;
+ return;
+ }
+
+ /* True when explicitly set via priv flag, or XDP prog is loaded */
+ if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))
+ goto csum_unnecessary;
+
+ /* CQE csum doesn't cover padding octets in short ethernet
+ * frames. And the pad field is appended prior to calculating
+ * and appending the FCS field.
+ *
+ * Detecting these padded frames requires to verify and parse
+ * IP headers, so we simply force all those small frames to be
+ * CHECKSUM_UNNECESSARY even if they are not padded.
+ */
+ if (short_frame(skb->len))
+ goto csum_unnecessary;
+
+ if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) {
+ if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP))
+ goto csum_unnecessary;
+
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
+ if (network_depth > ETH_HLEN)
+ /* CQE csum is calculated from the IP header and does
+ * not cover VLAN headers (if present). This will add
+ * the checksum manually.
+ */
+ skb->csum = csum_partial(skb->data + ETH_HLEN,
+ network_depth - ETH_HLEN,
+ skb->csum);
+
+ mlx5e_skb_padding_csum(skb, network_depth, proto, stats);
+ stats->csum_complete++;
+ return;
+ }
+
+csum_unnecessary:
+ if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
+ (cqe->hds_ip_ext & CQE_L4_OK))) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (cqe_is_tunneled(cqe)) {
+ skb->csum_level = 1;
+ skb->encapsulation = 1;
+ stats->csum_unnecessary_inner++;
+ return;
+ }
+ stats->csum_unnecessary++;
+ return;
+ }
+csum_none:
+ skb->ip_summed = CHECKSUM_NONE;
+ stats->csum_none++;
+}
+
+#define MLX5E_CE_BIT_MASK 0x80
+
+static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
+ u32 cqe_bcnt,
+ struct mlx5e_rq *rq,
+ struct sk_buff *skb)
+{
+ u8 lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
+ struct mlx5e_rq_stats *stats = rq->stats;
+ struct net_device *netdev = rq->netdev;
+
+ skb->mac_len = ETH_HLEN;
+
+#ifdef CONFIG_MLX5_EN_TLS
+ mlx5e_tls_handle_rx_skb(netdev, skb, &cqe_bcnt);
+#endif
+
+ if (lro_num_seg > 1) {
+ mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
+ skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
+ /* Subtract one since we already counted this as one
+ * "regular" packet in mlx5e_complete_rx_cqe()
+ */
+ stats->packets += lro_num_seg - 1;
+ stats->lro_packets++;
+ stats->lro_bytes += cqe_bcnt;
+ }
+
+ if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp)))
+ skb_hwtstamps(skb)->hwtstamp =
+ mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe));
+
+ skb_record_rx_queue(skb, rq->ix);
+
+ if (likely(netdev->features & NETIF_F_RXHASH))
+ mlx5e_skb_set_hash(cqe, skb);
+
+ if (cqe_has_vlan(cqe)) {
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ be16_to_cpu(cqe->vlan_info));
+ stats->removed_vlan_packets++;
+ }
+
+ skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
+
+ mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg);
+ /* checking CE bit in cqe - MSB in ml_path field */
+ if (unlikely(cqe->ml_path & MLX5E_CE_BIT_MASK))
+ mlx5e_enable_ecn(rq, skb);
+
+ skb->protocol = eth_type_trans(skb, netdev);
+}
+
+static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
+ struct mlx5_cqe64 *cqe,
+ u32 cqe_bcnt,
+ struct sk_buff *skb)
+{
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ stats->packets++;
+ stats->bytes += cqe_bcnt;
+ mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
+}
+
+static inline
+struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
+ u32 frag_size, u16 headroom,
+ u32 cqe_bcnt)
+{
+ struct sk_buff *skb = build_skb(va, frag_size);
+
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+
+ skb_reserve(skb, headroom);
+ skb_put(skb, cqe_bcnt);
+
+ return skb;
+}
+
+struct sk_buff *
+mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+ struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt)
+{
+ struct mlx5e_dma_info *di = wi->di;
+ u16 rx_headroom = rq->buff.headroom;
+ struct sk_buff *skb;
+ void *va, *data;
+ bool consumed;
+ u32 frag_size;
+
+ va = page_address(di->page) + wi->offset;
+ data = va + rx_headroom;
+ frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
+
+ dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
+ frag_size, DMA_FROM_DEVICE);
+ prefetchw(va); /* xdp_frame data area */
+ prefetch(data);
+
+ if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ rq->stats->wqe_err++;
+ return NULL;
+ }
+
+ rcu_read_lock();
+ consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt);
+ rcu_read_unlock();
+ if (consumed)
+ return NULL; /* page/packet was consumed by XDP */
+
+ skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* queue up for recycling/reuse */
+ page_ref_inc(di->page);
+
+ return skb;
+}
+
+struct sk_buff *
+mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+ struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt)
+{
+ struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
+ struct mlx5e_wqe_frag_info *head_wi = wi;
+ u16 headlen = min_t(u32, MLX5E_RX_MAX_HEAD, cqe_bcnt);
+ u16 frag_headlen = headlen;
+ u16 byte_cnt = cqe_bcnt - headlen;
+ struct sk_buff *skb;
+
+ if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ rq->stats->wqe_err++;
+ return NULL;
+ }
+
+ /* XDP is not supported in this configuration, as incoming packets
+ * might spread among multiple pages.
+ */
+ skb = napi_alloc_skb(rq->cq.napi,
+ ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+
+ prefetchw(skb->data);
+
+ while (byte_cnt) {
+ u16 frag_consumed_bytes =
+ min_t(u16, frag_info->frag_size - frag_headlen, byte_cnt);
+
+ mlx5e_add_skb_frag(rq, skb, wi->di, wi->offset + frag_headlen,
+ frag_consumed_bytes, frag_info->frag_stride);
+ byte_cnt -= frag_consumed_bytes;
+ frag_headlen = 0;
+ frag_info++;
+ wi++;
+ }
+
+ /* copy header */
+ mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset,
+ 0, headlen);
+ /* skb linear part was allocated with headlen and aligned to long */
+ skb->tail += headlen;
+ skb->len += headlen;
+
+ return skb;
+}
+
+void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ struct mlx5e_wqe_frag_info *wi;
+ struct sk_buff *skb;
+ u32 cqe_bcnt;
+ u16 ci;
+
+ ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+ wi = get_frag(rq, ci);
+ cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
+
+ skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+ if (!skb) {
+ /* probably for XDP */
+ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
+ /* do not return page to cache,
+ * it will be returned on XDP_TX completion.
+ */
+ goto wq_cyc_pop;
+ }
+ goto free_wqe;
+ }
+
+ mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+ napi_gro_receive(rq->cq.napi, skb);
+
+free_wqe:
+ mlx5e_free_rx_wqe(rq, wi, true);
+wq_cyc_pop:
+ mlx5_wq_cyc_pop(wq);
+}
+
+#ifdef CONFIG_MLX5_ESWITCH
+void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ struct net_device *netdev = rq->netdev;
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ struct mlx5e_wqe_frag_info *wi;
+ struct sk_buff *skb;
+ u32 cqe_bcnt;
+ u16 ci;
+
+ ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+ wi = get_frag(rq, ci);
+ cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
+
+ skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+ if (!skb) {
+ /* probably for XDP */
+ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
+ /* do not return page to cache,
+ * it will be returned on XDP_TX completion.
+ */
+ goto wq_cyc_pop;
+ }
+ goto free_wqe;
+ }
+
+ mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+
+ if (rep->vlan && skb_vlan_tag_present(skb))
+ skb_vlan_pop(skb);
+
+ napi_gro_receive(rq->cq.napi, skb);
+
+free_wqe:
+ mlx5e_free_rx_wqe(rq, wi, true);
+wq_cyc_pop:
+ mlx5_wq_cyc_pop(wq);
+}
+#endif
+
+struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx)
+{
+ u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt);
+ struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
+ u32 frag_offset = head_offset + headlen;
+ u32 byte_cnt = cqe_bcnt - headlen;
+ struct mlx5e_dma_info *head_di = di;
+ struct sk_buff *skb;
+
+ skb = napi_alloc_skb(rq->cq.napi,
+ ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+
+ prefetchw(skb->data);
+
+ if (unlikely(frag_offset >= PAGE_SIZE)) {
+ di++;
+ frag_offset -= PAGE_SIZE;
+ }
+
+ while (byte_cnt) {
+ u32 pg_consumed_bytes =
+ min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
+ unsigned int truesize =
+ ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
+
+ mlx5e_add_skb_frag(rq, skb, di, frag_offset,
+ pg_consumed_bytes, truesize);
+ byte_cnt -= pg_consumed_bytes;
+ frag_offset = 0;
+ di++;
+ }
+ /* copy header */
+ mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, head_di,
+ head_offset, headlen);
+ /* skb linear part was allocated with headlen and aligned to long */
+ skb->tail += headlen;
+ skb->len += headlen;
+
+ return skb;
+}
+
+struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt, u32 head_offset, u32 page_idx)
+{
+ struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
+ u16 rx_headroom = rq->buff.headroom;
+ u32 cqe_bcnt32 = cqe_bcnt;
+ struct sk_buff *skb;
+ void *va, *data;
+ u32 frag_size;
+ bool consumed;
+
+ /* Check packet size. Note LRO doesn't use linear SKB */
+ if (unlikely(cqe_bcnt > rq->hw_mtu)) {
+ rq->stats->oversize_pkts_sw_drop++;
+ return NULL;
+ }
+
+ va = page_address(di->page) + head_offset;
+ data = va + rx_headroom;
+ frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
+
+ dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
+ frag_size, DMA_FROM_DEVICE);
+ prefetchw(va); /* xdp_frame data area */
+ prefetch(data);
+
+ rcu_read_lock();
+ consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32);
+ rcu_read_unlock();
+ if (consumed) {
+ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
+ __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+ return NULL; /* page/packet was consumed by XDP */
+ }
+
+ skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32);
+ if (unlikely(!skb))
+ return NULL;
+
+ /* queue up for recycling/reuse */
+ page_ref_inc(di->page);
+
+ return skb;
+}
+
+void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
+ u16 wqe_id = be16_to_cpu(cqe->wqe_id);
+ struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id];
+ u16 stride_ix = mpwrq_get_cqe_stride_index(cqe);
+ u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz;
+ u32 head_offset = wqe_offset & (PAGE_SIZE - 1);
+ u32 page_idx = wqe_offset >> PAGE_SHIFT;
+ struct mlx5e_rx_wqe_ll *wqe;
+ struct mlx5_wq_ll *wq;
+ struct sk_buff *skb;
+ u16 cqe_bcnt;
+
+ wi->consumed_strides += cstrides;
+
+ if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ rq->stats->wqe_err++;
+ goto mpwrq_cqe_out;
+ }
+
+ if (unlikely(mpwrq_is_filler_cqe(cqe))) {
+ struct mlx5e_rq_stats *stats = rq->stats;
+
+ stats->mpwqe_filler_cqes++;
+ stats->mpwqe_filler_strides += cstrides;
+ goto mpwrq_cqe_out;
+ }
+
+ cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
+
+ skb = rq->mpwqe.skb_from_cqe_mpwrq(rq, wi, cqe_bcnt, head_offset,
+ page_idx);
+ if (!skb)
+ goto mpwrq_cqe_out;
+
+ mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+ napi_gro_receive(rq->cq.napi, skb);
+
+mpwrq_cqe_out:
+ if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
+ return;
+
+ wq = &rq->mpwqe.wq;
+ wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
+ mlx5e_free_rx_mpwqe(rq, wi, true);
+ mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
+}
+
+int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
+{
+ struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
+ struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
+ struct mlx5_cqe64 *cqe;
+ int work_done = 0;
+
+ if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
+ return 0;
+
+ if (cq->decmprs_left) {
+ work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
+ if (cq->decmprs_left || work_done >= budget)
+ goto out;
+ }
+
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
+ if (!cqe) {
+ if (unlikely(work_done))
+ goto out;
+ return 0;
+ }
+
+ do {
+ if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
+ work_done +=
+ mlx5e_decompress_cqes_start(rq, cq,
+ budget - work_done);
+ continue;
+ }
+
+ mlx5_cqwq_pop(&cq->wq);
+
+ rq->handle_rx_cqe(rq, cqe);
+ } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+
+out:
+ if (xdpsq->doorbell) {
+ mlx5e_xmit_xdp_doorbell(xdpsq);
+ xdpsq->doorbell = false;
+ }
+
+ if (xdpsq->redirect_flush) {
+ xdp_do_flush_map();
+ xdpsq->redirect_flush = false;
+ }
+
+ mlx5_cqwq_update_db_record(&cq->wq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ return work_done;
+}
+
+#ifdef CONFIG_MLX5_CORE_IPOIB
+
+#define MLX5_IB_GRH_SGID_OFFSET 8
+#define MLX5_IB_GRH_DGID_OFFSET 24
+#define MLX5_GID_SIZE 16
+
+static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
+ struct mlx5_cqe64 *cqe,
+ u32 cqe_bcnt,
+ struct sk_buff *skb)
+{
+ struct mlx5e_rq_stats *stats = rq->stats;
+ struct hwtstamp_config *tstamp;
+ struct net_device *netdev;
+ struct mlx5e_priv *priv;
+ char *pseudo_header;
+ u32 flags_rqpn;
+ u32 qpn;
+ u8 *dgid;
+ u8 g;
+
+ qpn = be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff;
+ netdev = mlx5i_pkey_get_netdev(rq->netdev, qpn);
+
+ /* No mapping present, cannot process SKB. This might happen if a child
+ * interface is going down while having unprocessed CQEs on parent RQ
+ */
+ if (unlikely(!netdev)) {
+ /* TODO: add drop counters support */
+ skb->dev = NULL;
+ pr_warn_once("Unable to map QPN %u to dev - dropping skb\n", qpn);
+ return;
+ }
+
+ priv = mlx5i_epriv(netdev);
+ tstamp = &priv->tstamp;
+
+ flags_rqpn = be32_to_cpu(cqe->flags_rqpn);
+ g = (flags_rqpn >> 28) & 3;
+ dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET;
+ if ((!g) || dgid[0] != 0xff)
+ skb->pkt_type = PACKET_HOST;
+ else if (memcmp(dgid, netdev->broadcast + 4, MLX5_GID_SIZE) == 0)
+ skb->pkt_type = PACKET_BROADCAST;
+ else
+ skb->pkt_type = PACKET_MULTICAST;
+
+ /* Drop packets that this interface sent, ie multicast packets
+ * that the HCA has replicated.
+ */
+ if (g && (qpn == (flags_rqpn & 0xffffff)) &&
+ (memcmp(netdev->dev_addr + 4, skb->data + MLX5_IB_GRH_SGID_OFFSET,
+ MLX5_GID_SIZE) == 0)) {
+ skb->dev = NULL;
+ return;
+ }
+
+ skb_pull(skb, MLX5_IB_GRH_BYTES);
+
+ skb->protocol = *((__be16 *)(skb->data));
+
+ if (netdev->features & NETIF_F_RXCSUM) {
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
+ stats->csum_complete++;
+ } else {
+ skb->ip_summed = CHECKSUM_NONE;
+ stats->csum_none++;
+ }
+
+ if (unlikely(mlx5e_rx_hw_stamp(tstamp)))
+ skb_hwtstamps(skb)->hwtstamp =
+ mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe));
+
+ skb_record_rx_queue(skb, rq->ix);
+
+ if (likely(netdev->features & NETIF_F_RXHASH))
+ mlx5e_skb_set_hash(cqe, skb);
+
+ /* 20 bytes of ipoib header and 4 for encap existing */
+ pseudo_header = skb_push(skb, MLX5_IPOIB_PSEUDO_LEN);
+ memset(pseudo_header, 0, MLX5_IPOIB_PSEUDO_LEN);
+ skb_reset_mac_header(skb);
+ skb_pull(skb, MLX5_IPOIB_HARD_LEN);
+
+ skb->dev = netdev;
+
+ stats->packets++;
+ stats->bytes += cqe_bcnt;
+}
+
+void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ struct mlx5e_wqe_frag_info *wi;
+ struct sk_buff *skb;
+ u32 cqe_bcnt;
+ u16 ci;
+
+ ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+ wi = get_frag(rq, ci);
+ cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
+
+ skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+ if (!skb)
+ goto wq_free_wqe;
+
+ mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+ if (unlikely(!skb->dev)) {
+ dev_kfree_skb_any(skb);
+ goto wq_free_wqe;
+ }
+ napi_gro_receive(rq->cq.napi, skb);
+
+wq_free_wqe:
+ mlx5e_free_rx_wqe(rq, wi, true);
+ mlx5_wq_cyc_pop(wq);
+}
+
+#endif /* CONFIG_MLX5_CORE_IPOIB */
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+
+void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ struct mlx5e_wqe_frag_info *wi;
+ struct sk_buff *skb;
+ u32 cqe_bcnt;
+ u16 ci;
+
+ ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
+ wi = get_frag(rq, ci);
+ cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
+
+ skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+ if (unlikely(!skb)) {
+ /* a DROP, save the page-reuse checks */
+ mlx5e_free_rx_wqe(rq, wi, true);
+ goto wq_cyc_pop;
+ }
+ skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb, &cqe_bcnt);
+ if (unlikely(!skb)) {
+ mlx5e_free_rx_wqe(rq, wi, true);
+ goto wq_cyc_pop;
+ }
+
+ mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+ napi_gro_receive(rq->cq.napi, skb);
+
+ mlx5e_free_rx_wqe(rq, wi, true);
+wq_cyc_pop:
+ mlx5_wq_cyc_pop(wq);
+}
+
+#endif /* CONFIG_MLX5_EN_IPSEC */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
new file mode 100644
index 000000000..5fb088b54
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/prefetch.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <net/udp.h>
+#include "en.h"
+#include "en/port.h"
+
+enum {
+ MLX5E_ST_LINK_STATE,
+ MLX5E_ST_LINK_SPEED,
+ MLX5E_ST_HEALTH_INFO,
+#ifdef CONFIG_INET
+ MLX5E_ST_LOOPBACK,
+#endif
+ MLX5E_ST_NUM,
+};
+
+const char mlx5e_self_tests[MLX5E_ST_NUM][ETH_GSTRING_LEN] = {
+ "Link Test",
+ "Speed Test",
+ "Health Test",
+#ifdef CONFIG_INET
+ "Loopback Test",
+#endif
+};
+
+int mlx5e_self_test_num(struct mlx5e_priv *priv)
+{
+ return ARRAY_SIZE(mlx5e_self_tests);
+}
+
+static int mlx5e_test_health_info(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_health *health = &priv->mdev->priv.health;
+
+ return health->sick ? 1 : 0;
+}
+
+static int mlx5e_test_link_state(struct mlx5e_priv *priv)
+{
+ u8 port_state;
+
+ if (!netif_carrier_ok(priv->netdev))
+ return 1;
+
+ port_state = mlx5_query_vport_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0);
+ return port_state == VPORT_STATE_UP ? 0 : 1;
+}
+
+static int mlx5e_test_link_speed(struct mlx5e_priv *priv)
+{
+ u32 speed;
+
+ if (!netif_carrier_ok(priv->netdev))
+ return 1;
+
+ return mlx5e_port_linkspeed(priv->mdev, &speed);
+}
+
+struct mlx5ehdr {
+ __be32 version;
+ __be64 magic;
+};
+
+#ifdef CONFIG_INET
+/* loopback test */
+#define MLX5E_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) +\
+ sizeof(struct udphdr) + sizeof(struct mlx5ehdr))
+#define MLX5E_TEST_MAGIC 0x5AEED15C001ULL
+
+static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
+{
+ struct sk_buff *skb = NULL;
+ struct mlx5ehdr *mlxh;
+ struct ethhdr *ethh;
+ struct udphdr *udph;
+ struct iphdr *iph;
+ int iplen;
+
+ skb = netdev_alloc_skb(priv->netdev, MLX5E_TEST_PKT_SIZE);
+ if (!skb) {
+ netdev_err(priv->netdev, "\tFailed to alloc loopback skb\n");
+ return NULL;
+ }
+
+ prefetchw(skb->data);
+ skb_reserve(skb, NET_IP_ALIGN);
+
+ /* Reserve for ethernet and IP header */
+ ethh = skb_push(skb, ETH_HLEN);
+ skb_reset_mac_header(skb);
+
+ skb_set_network_header(skb, skb->len);
+ iph = skb_put(skb, sizeof(struct iphdr));
+
+ skb_set_transport_header(skb, skb->len);
+ udph = skb_put(skb, sizeof(struct udphdr));
+
+ /* Fill ETH header */
+ ether_addr_copy(ethh->h_dest, priv->netdev->dev_addr);
+ eth_zero_addr(ethh->h_source);
+ ethh->h_proto = htons(ETH_P_IP);
+
+ /* Fill UDP header */
+ udph->source = htons(9);
+ udph->dest = htons(9); /* Discard Protocol */
+ udph->len = htons(sizeof(struct mlx5ehdr) + sizeof(struct udphdr));
+ udph->check = 0;
+
+ /* Fill IP header */
+ iph->ihl = 5;
+ iph->ttl = 32;
+ iph->version = 4;
+ iph->protocol = IPPROTO_UDP;
+ iplen = sizeof(struct iphdr) + sizeof(struct udphdr) +
+ sizeof(struct mlx5ehdr);
+ iph->tot_len = htons(iplen);
+ iph->frag_off = 0;
+ iph->saddr = 0;
+ iph->daddr = 0;
+ iph->tos = 0;
+ iph->id = 0;
+ ip_send_check(iph);
+
+ /* Fill test header and data */
+ mlxh = skb_put(skb, sizeof(*mlxh));
+ mlxh->version = 0;
+ mlxh->magic = cpu_to_be64(MLX5E_TEST_MAGIC);
+
+ skb->csum = 0;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ udp4_hwcsum(skb, iph->saddr, iph->daddr);
+
+ skb->protocol = htons(ETH_P_IP);
+ skb->pkt_type = PACKET_HOST;
+ skb->dev = priv->netdev;
+
+ return skb;
+}
+
+struct mlx5e_lbt_priv {
+ struct packet_type pt;
+ struct completion comp;
+ bool loopback_ok;
+ bool local_lb;
+};
+
+static int
+mlx5e_test_loopback_validate(struct sk_buff *skb,
+ struct net_device *ndev,
+ struct packet_type *pt,
+ struct net_device *orig_ndev)
+{
+ struct mlx5e_lbt_priv *lbtp = pt->af_packet_priv;
+ struct mlx5ehdr *mlxh;
+ struct ethhdr *ethh;
+ struct udphdr *udph;
+ struct iphdr *iph;
+
+ /* We are only going to peek, no need to clone the SKB */
+ if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb))
+ goto out;
+
+ ethh = (struct ethhdr *)skb_mac_header(skb);
+ if (!ether_addr_equal(ethh->h_dest, orig_ndev->dev_addr))
+ goto out;
+
+ iph = ip_hdr(skb);
+ if (iph->protocol != IPPROTO_UDP)
+ goto out;
+
+ /* Don't assume skb_transport_header() was set */
+ udph = (struct udphdr *)((u8 *)iph + 4 * iph->ihl);
+ if (udph->dest != htons(9))
+ goto out;
+
+ mlxh = (struct mlx5ehdr *)((char *)udph + sizeof(*udph));
+ if (mlxh->magic != cpu_to_be64(MLX5E_TEST_MAGIC))
+ goto out; /* so close ! */
+
+ /* bingo */
+ lbtp->loopback_ok = true;
+ complete(&lbtp->comp);
+out:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
+ struct mlx5e_lbt_priv *lbtp)
+{
+ int err = 0;
+
+ /* Temporarily enable local_lb */
+ err = mlx5_nic_vport_query_local_lb(priv->mdev, &lbtp->local_lb);
+ if (err)
+ return err;
+
+ if (!lbtp->local_lb) {
+ err = mlx5_nic_vport_update_local_lb(priv->mdev, true);
+ if (err)
+ return err;
+ }
+
+ err = mlx5e_refresh_tirs(priv, true);
+ if (err)
+ goto out;
+
+ lbtp->loopback_ok = false;
+ init_completion(&lbtp->comp);
+
+ lbtp->pt.type = htons(ETH_P_IP);
+ lbtp->pt.func = mlx5e_test_loopback_validate;
+ lbtp->pt.dev = priv->netdev;
+ lbtp->pt.af_packet_priv = lbtp;
+ dev_add_pack(&lbtp->pt);
+
+ return 0;
+
+out:
+ if (!lbtp->local_lb)
+ mlx5_nic_vport_update_local_lb(priv->mdev, false);
+
+ return err;
+}
+
+static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv,
+ struct mlx5e_lbt_priv *lbtp)
+{
+ if (!lbtp->local_lb)
+ mlx5_nic_vport_update_local_lb(priv->mdev, false);
+
+ dev_remove_pack(&lbtp->pt);
+ mlx5e_refresh_tirs(priv, false);
+}
+
+#define MLX5E_LB_VERIFY_TIMEOUT (msecs_to_jiffies(200))
+static int mlx5e_test_loopback(struct mlx5e_priv *priv)
+{
+ struct mlx5e_lbt_priv *lbtp;
+ struct sk_buff *skb = NULL;
+ int err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ netdev_err(priv->netdev,
+ "\tCan't perform loopback test while device is down\n");
+ return -ENODEV;
+ }
+
+ lbtp = kzalloc(sizeof(*lbtp), GFP_KERNEL);
+ if (!lbtp)
+ return -ENOMEM;
+ lbtp->loopback_ok = false;
+
+ err = mlx5e_test_loopback_setup(priv, lbtp);
+ if (err)
+ goto out;
+
+ skb = mlx5e_test_get_udp_skb(priv);
+ if (!skb) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ skb_set_queue_mapping(skb, 0);
+ err = dev_queue_xmit(skb);
+ if (err) {
+ netdev_err(priv->netdev,
+ "\tFailed to xmit loopback packet err(%d)\n",
+ err);
+ goto cleanup;
+ }
+
+ wait_for_completion_timeout(&lbtp->comp, MLX5E_LB_VERIFY_TIMEOUT);
+ err = !lbtp->loopback_ok;
+
+cleanup:
+ mlx5e_test_loopback_cleanup(priv, lbtp);
+out:
+ kfree(lbtp);
+ return err;
+}
+#endif
+
+static int (*mlx5e_st_func[MLX5E_ST_NUM])(struct mlx5e_priv *) = {
+ mlx5e_test_link_state,
+ mlx5e_test_link_speed,
+ mlx5e_test_health_info,
+#ifdef CONFIG_INET
+ mlx5e_test_loopback,
+#endif
+};
+
+void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
+ u64 *buf)
+{
+ struct mlx5e_priv *priv = netdev_priv(ndev);
+ int i;
+
+ memset(buf, 0, sizeof(u64) * MLX5E_ST_NUM);
+
+ mutex_lock(&priv->state_lock);
+ netdev_info(ndev, "Self test begin..\n");
+
+ for (i = 0; i < MLX5E_ST_NUM; i++) {
+ netdev_info(ndev, "\t[%d] %s start..\n",
+ i, mlx5e_self_tests[i]);
+ buf[i] = mlx5e_st_func[i](priv);
+ netdev_info(ndev, "\t[%d] %s end: result(%lld)\n",
+ i, mlx5e_self_tests[i], buf[i]);
+ }
+
+ mutex_unlock(&priv->state_lock);
+
+ for (i = 0; i < MLX5E_ST_NUM; i++) {
+ if (buf[i]) {
+ etest->flags |= ETH_TEST_FL_FAILED;
+ break;
+ }
+ }
+ netdev_info(ndev, "Self test out: status flags(0x%x)\n",
+ etest->flags);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
new file mode 100644
index 000000000..9a68dee58
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -0,0 +1,1404 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "en.h"
+#include "en_accel/ipsec.h"
+#include "en_accel/tls.h"
+
+static const struct counter_desc sw_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) },
+
+#ifdef CONFIG_MLX5_EN_TLS
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) },
+#endif
+
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_ecn_mark) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_xmit) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_cqe) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_none) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_recover) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_cqes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_cqes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_strides) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_oversize_pkts_sw_drop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_page_reuse) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_events) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_aff_change) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) },
+};
+
+#define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc)
+
+static int mlx5e_grp_sw_get_num_stats(struct mlx5e_priv *priv)
+{
+ return NUM_SW_COUNTERS;
+}
+
+static int mlx5e_grp_sw_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_SW_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_SW_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, sw_stats_desc, i);
+ return idx;
+}
+
+void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_sw_stats temp, *s = &temp;
+ int i;
+
+ memset(s, 0, sizeof(*s));
+
+ for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) {
+ struct mlx5e_channel_stats *channel_stats =
+ &priv->channel_stats[i];
+ struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq;
+ struct mlx5e_xdpsq_stats *xdpsq_stats = &channel_stats->rq_xdpsq;
+ struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
+ struct mlx5e_ch_stats *ch_stats = &channel_stats->ch;
+ int j;
+
+ s->rx_packets += rq_stats->packets;
+ s->rx_bytes += rq_stats->bytes;
+ s->rx_lro_packets += rq_stats->lro_packets;
+ s->rx_lro_bytes += rq_stats->lro_bytes;
+ s->rx_ecn_mark += rq_stats->ecn_mark;
+ s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
+ s->rx_csum_none += rq_stats->csum_none;
+ s->rx_csum_complete += rq_stats->csum_complete;
+ s->rx_csum_complete_tail += rq_stats->csum_complete_tail;
+ s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow;
+ s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
+ s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
+ s->rx_xdp_drop += rq_stats->xdp_drop;
+ s->rx_xdp_redirect += rq_stats->xdp_redirect;
+ s->rx_xdp_tx_xmit += xdpsq_stats->xmit;
+ s->rx_xdp_tx_full += xdpsq_stats->full;
+ s->rx_xdp_tx_err += xdpsq_stats->err;
+ s->rx_xdp_tx_cqe += xdpsq_stats->cqes;
+ s->rx_wqe_err += rq_stats->wqe_err;
+ s->rx_mpwqe_filler_cqes += rq_stats->mpwqe_filler_cqes;
+ s->rx_mpwqe_filler_strides += rq_stats->mpwqe_filler_strides;
+ s->rx_oversize_pkts_sw_drop += rq_stats->oversize_pkts_sw_drop;
+ s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
+ s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
+ s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
+ s->rx_page_reuse += rq_stats->page_reuse;
+ s->rx_cache_reuse += rq_stats->cache_reuse;
+ s->rx_cache_full += rq_stats->cache_full;
+ s->rx_cache_empty += rq_stats->cache_empty;
+ s->rx_cache_busy += rq_stats->cache_busy;
+ s->rx_cache_waive += rq_stats->cache_waive;
+ s->rx_congst_umr += rq_stats->congst_umr;
+ s->ch_events += ch_stats->events;
+ s->ch_poll += ch_stats->poll;
+ s->ch_arm += ch_stats->arm;
+ s->ch_aff_change += ch_stats->aff_change;
+ s->ch_eq_rearm += ch_stats->eq_rearm;
+ /* xdp redirect */
+ s->tx_xdp_xmit += xdpsq_red_stats->xmit;
+ s->tx_xdp_full += xdpsq_red_stats->full;
+ s->tx_xdp_err += xdpsq_red_stats->err;
+ s->tx_xdp_cqes += xdpsq_red_stats->cqes;
+
+ for (j = 0; j < priv->max_opened_tc; j++) {
+ struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
+
+ s->tx_packets += sq_stats->packets;
+ s->tx_bytes += sq_stats->bytes;
+ s->tx_tso_packets += sq_stats->tso_packets;
+ s->tx_tso_bytes += sq_stats->tso_bytes;
+ s->tx_tso_inner_packets += sq_stats->tso_inner_packets;
+ s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes;
+ s->tx_added_vlan_packets += sq_stats->added_vlan_packets;
+ s->tx_nop += sq_stats->nop;
+ s->tx_queue_stopped += sq_stats->stopped;
+ s->tx_queue_wake += sq_stats->wake;
+ s->tx_queue_dropped += sq_stats->dropped;
+ s->tx_cqe_err += sq_stats->cqe_err;
+ s->tx_recover += sq_stats->recover;
+ s->tx_xmit_more += sq_stats->xmit_more;
+ s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
+ s->tx_csum_none += sq_stats->csum_none;
+ s->tx_csum_partial += sq_stats->csum_partial;
+#ifdef CONFIG_MLX5_EN_TLS
+ s->tx_tls_ooo += sq_stats->tls_ooo;
+ s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes;
+#endif
+ s->tx_cqes += sq_stats->cqes;
+
+ /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
+ barrier();
+ }
+ }
+
+ memcpy(&priv->stats.sw, s, sizeof(*s));
+}
+
+static const struct counter_desc q_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) },
+};
+
+static const struct counter_desc drop_rq_stats_desc[] = {
+ { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_if_down_packets) },
+};
+
+#define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc)
+#define NUM_DROP_RQ_COUNTERS ARRAY_SIZE(drop_rq_stats_desc)
+
+static int mlx5e_grp_q_get_num_stats(struct mlx5e_priv *priv)
+{
+ int num_stats = 0;
+
+ if (priv->q_counter)
+ num_stats += NUM_Q_COUNTERS;
+
+ if (priv->drop_rq_q_counter)
+ num_stats += NUM_DROP_RQ_COUNTERS;
+
+ return num_stats;
+}
+
+static int mlx5e_grp_q_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ q_stats_desc[i].format);
+
+ for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ drop_rq_stats_desc[i].format);
+
+ return idx;
+}
+
+static int mlx5e_grp_q_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++)
+ data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt,
+ q_stats_desc, i);
+ for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++)
+ data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt,
+ drop_rq_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_q_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt;
+ u32 out[MLX5_ST_SZ_DW(query_q_counter_out)];
+
+ if (priv->q_counter &&
+ !mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out,
+ sizeof(out)))
+ qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out,
+ out, out_of_buffer);
+ if (priv->drop_rq_q_counter &&
+ !mlx5_core_query_q_counter(priv->mdev, priv->drop_rq_q_counter, 0,
+ out, sizeof(out)))
+ qcnt->rx_if_down_packets = MLX5_GET(query_q_counter_out, out,
+ out_of_buffer);
+}
+
+#define VNIC_ENV_OFF(c) MLX5_BYTE_OFF(query_vnic_env_out, c)
+static const struct counter_desc vnic_env_stats_desc[] = {
+ { "rx_steer_missed_packets",
+ VNIC_ENV_OFF(vport_env.nic_receive_steering_discard) },
+};
+
+#define NUM_VNIC_ENV_COUNTERS ARRAY_SIZE(vnic_env_stats_desc)
+
+static int mlx5e_grp_vnic_env_get_num_stats(struct mlx5e_priv *priv)
+{
+ return MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard) ?
+ NUM_VNIC_ENV_COUNTERS : 0;
+}
+
+static int mlx5e_grp_vnic_env_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
+ return idx;
+
+ for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ vnic_env_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_vnic_env_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ int i;
+
+ if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
+ return idx;
+
+ for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vnic.query_vnic_env_out,
+ vnic_env_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_vnic_env_update_stats(struct mlx5e_priv *priv)
+{
+ u32 *out = (u32 *)priv->stats.vnic.query_vnic_env_out;
+ int outlen = MLX5_ST_SZ_BYTES(query_vnic_env_out);
+ u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {0};
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
+ return;
+
+ MLX5_SET(query_vnic_env_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VNIC_ENV);
+ MLX5_SET(query_vnic_env_in, in, op_mod, 0);
+ MLX5_SET(query_vnic_env_in, in, other_vport, 0);
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
+}
+
+#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c)
+static const struct counter_desc vport_stats_desc[] = {
+ { "rx_vport_unicast_packets",
+ VPORT_COUNTER_OFF(received_eth_unicast.packets) },
+ { "rx_vport_unicast_bytes",
+ VPORT_COUNTER_OFF(received_eth_unicast.octets) },
+ { "tx_vport_unicast_packets",
+ VPORT_COUNTER_OFF(transmitted_eth_unicast.packets) },
+ { "tx_vport_unicast_bytes",
+ VPORT_COUNTER_OFF(transmitted_eth_unicast.octets) },
+ { "rx_vport_multicast_packets",
+ VPORT_COUNTER_OFF(received_eth_multicast.packets) },
+ { "rx_vport_multicast_bytes",
+ VPORT_COUNTER_OFF(received_eth_multicast.octets) },
+ { "tx_vport_multicast_packets",
+ VPORT_COUNTER_OFF(transmitted_eth_multicast.packets) },
+ { "tx_vport_multicast_bytes",
+ VPORT_COUNTER_OFF(transmitted_eth_multicast.octets) },
+ { "rx_vport_broadcast_packets",
+ VPORT_COUNTER_OFF(received_eth_broadcast.packets) },
+ { "rx_vport_broadcast_bytes",
+ VPORT_COUNTER_OFF(received_eth_broadcast.octets) },
+ { "tx_vport_broadcast_packets",
+ VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) },
+ { "tx_vport_broadcast_bytes",
+ VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) },
+ { "rx_vport_rdma_unicast_packets",
+ VPORT_COUNTER_OFF(received_ib_unicast.packets) },
+ { "rx_vport_rdma_unicast_bytes",
+ VPORT_COUNTER_OFF(received_ib_unicast.octets) },
+ { "tx_vport_rdma_unicast_packets",
+ VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) },
+ { "tx_vport_rdma_unicast_bytes",
+ VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) },
+ { "rx_vport_rdma_multicast_packets",
+ VPORT_COUNTER_OFF(received_ib_multicast.packets) },
+ { "rx_vport_rdma_multicast_bytes",
+ VPORT_COUNTER_OFF(received_ib_multicast.octets) },
+ { "tx_vport_rdma_multicast_packets",
+ VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) },
+ { "tx_vport_rdma_multicast_bytes",
+ VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) },
+};
+
+#define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc)
+
+static int mlx5e_grp_vport_get_num_stats(struct mlx5e_priv *priv)
+{
+ return NUM_VPORT_COUNTERS;
+}
+
+static int mlx5e_grp_vport_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_VPORT_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_vport_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_VPORT_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out,
+ vport_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_vport_update_stats(struct mlx5e_priv *priv)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+ u32 *out = (u32 *)priv->stats.vport.query_vport_out;
+ u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0};
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER);
+ MLX5_SET(query_vport_counter_in, in, op_mod, 0);
+ MLX5_SET(query_vport_counter_in, in, other_vport, 0);
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
+}
+
+#define PPORT_802_3_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_802_3_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pport_802_3_stats_desc[] = {
+ { "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) },
+ { "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) },
+ { "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) },
+ { "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) },
+ { "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) },
+ { "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) },
+ { "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) },
+ { "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) },
+ { "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) },
+ { "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) },
+ { "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) },
+ { "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) },
+ { "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) },
+ { "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) },
+ { "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) },
+ { "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) },
+ { "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) },
+ { "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) },
+};
+
+#define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc)
+
+static int mlx5e_grp_802_3_get_num_stats(struct mlx5e_priv *priv)
+{
+ return NUM_PPORT_802_3_COUNTERS;
+}
+
+static int mlx5e_grp_802_3_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_802_3_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters,
+ pport_802_3_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ void *out;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ out = pstats->IEEE_802_3_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+#define PPORT_2863_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_2863_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pport_2863_stats_desc[] = {
+ { "rx_discards_phy", PPORT_2863_OFF(if_in_discards) },
+ { "tx_discards_phy", PPORT_2863_OFF(if_out_discards) },
+ { "tx_errors_phy", PPORT_2863_OFF(if_out_errors) },
+};
+
+#define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc)
+
+static int mlx5e_grp_2863_get_num_stats(struct mlx5e_priv *priv)
+{
+ return NUM_PPORT_2863_COUNTERS;
+}
+
+static int mlx5e_grp_2863_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2863_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_2863_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters,
+ pport_2863_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_2863_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ void *out;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ out = pstats->RFC_2863_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+#define PPORT_2819_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_2819_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pport_2819_stats_desc[] = {
+ { "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) },
+ { "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) },
+ { "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) },
+ { "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) },
+ { "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) },
+ { "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) },
+ { "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) },
+ { "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) },
+ { "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) },
+ { "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) },
+ { "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) },
+ { "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) },
+ { "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) },
+};
+
+#define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc)
+
+static int mlx5e_grp_2819_get_num_stats(struct mlx5e_priv *priv)
+{
+ return NUM_PPORT_2819_COUNTERS;
+}
+
+static int mlx5e_grp_2819_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2819_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_2819_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters,
+ pport_2819_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_2819_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ void *out;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ out = pstats->RFC_2819_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+#define PPORT_PHY_STATISTICAL_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.phys_layer_statistical_cntrs.c##_high)
+static const struct counter_desc pport_phy_statistical_stats_desc[] = {
+ { "rx_pcs_symbol_err_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) },
+ { "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) },
+};
+
+#define NUM_PPORT_PHY_STATISTICAL_COUNTERS ARRAY_SIZE(pport_phy_statistical_stats_desc)
+
+static int mlx5e_grp_phy_get_num_stats(struct mlx5e_priv *priv)
+{
+ /* "1" for link_down_events special counter */
+ return MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group) ?
+ NUM_PPORT_PHY_STATISTICAL_COUNTERS + 1 : 1;
+}
+
+static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, "link_down_events_phy");
+
+ if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
+ return idx;
+
+ for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pport_phy_statistical_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
+{
+ int i;
+
+ /* link_down_events_phy has special handling since it is not stored in __be64 format */
+ data[idx++] = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters,
+ counter_set.phys_layer_cntrs.link_down_events);
+
+ if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
+ return idx;
+
+ for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
+ pport_phy_statistical_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_phy_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ void *out;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ out = pstats->phy_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+
+ if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
+ return;
+
+ out = pstats->phy_statistical_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+#define PPORT_ETH_EXT_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_extended_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pport_eth_ext_stats_desc[] = {
+ { "rx_buffer_passed_thres_phy", PPORT_ETH_EXT_OFF(rx_buffer_almost_full) },
+};
+
+#define NUM_PPORT_ETH_EXT_COUNTERS ARRAY_SIZE(pport_eth_ext_stats_desc)
+
+static int mlx5e_grp_eth_ext_get_num_stats(struct mlx5e_priv *priv)
+{
+ if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters))
+ return NUM_PPORT_ETH_EXT_COUNTERS;
+
+ return 0;
+}
+
+static int mlx5e_grp_eth_ext_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters))
+ for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pport_eth_ext_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_eth_ext_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ int i;
+
+ if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters))
+ for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters,
+ pport_eth_ext_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_eth_ext_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ void *out;
+
+ if (!MLX5_CAP_PCAM_FEATURE(mdev, rx_buffer_fullness_counters))
+ return;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ out = pstats->eth_ext_counters;
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+#define PCIE_PERF_OFF(c) \
+ MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c)
+static const struct counter_desc pcie_perf_stats_desc[] = {
+ { "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) },
+ { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) },
+};
+
+#define PCIE_PERF_OFF64(c) \
+ MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pcie_perf_stats_desc64[] = {
+ { "outbound_pci_buffer_overflow", PCIE_PERF_OFF64(tx_overflow_buffer_pkt) },
+};
+
+static const struct counter_desc pcie_perf_stall_stats_desc[] = {
+ { "outbound_pci_stalled_rd", PCIE_PERF_OFF(outbound_stalled_reads) },
+ { "outbound_pci_stalled_wr", PCIE_PERF_OFF(outbound_stalled_writes) },
+ { "outbound_pci_stalled_rd_events", PCIE_PERF_OFF(outbound_stalled_reads_events) },
+ { "outbound_pci_stalled_wr_events", PCIE_PERF_OFF(outbound_stalled_writes_events) },
+};
+
+#define NUM_PCIE_PERF_COUNTERS ARRAY_SIZE(pcie_perf_stats_desc)
+#define NUM_PCIE_PERF_COUNTERS64 ARRAY_SIZE(pcie_perf_stats_desc64)
+#define NUM_PCIE_PERF_STALL_COUNTERS ARRAY_SIZE(pcie_perf_stall_stats_desc)
+
+static int mlx5e_grp_pcie_get_num_stats(struct mlx5e_priv *priv)
+{
+ int num_stats = 0;
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group))
+ num_stats += NUM_PCIE_PERF_COUNTERS;
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt))
+ num_stats += NUM_PCIE_PERF_COUNTERS64;
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled))
+ num_stats += NUM_PCIE_PERF_STALL_COUNTERS;
+
+ return num_stats;
+}
+
+static int mlx5e_grp_pcie_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group))
+ for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pcie_perf_stats_desc[i].format);
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt))
+ for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pcie_perf_stats_desc64[i].format);
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled))
+ for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pcie_perf_stall_stats_desc[i].format);
+ return idx;
+}
+
+static int mlx5e_grp_pcie_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ int i;
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group))
+ for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters,
+ pcie_perf_stats_desc, i);
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt))
+ for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters,
+ pcie_perf_stats_desc64, i);
+
+ if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled))
+ for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters,
+ pcie_perf_stall_stats_desc, i);
+ return idx;
+}
+
+static void mlx5e_grp_pcie_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(mpcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(mpcnt_reg);
+ void *out;
+
+ if (!MLX5_CAP_MCAM_FEATURE(mdev, pcie_performance_group))
+ return;
+
+ out = pcie_stats->pcie_perf_counters;
+ MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
+}
+
+#define PPORT_PER_PRIO_OFF(c) \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_per_prio_grp_data_layout.c##_high)
+static const struct counter_desc pport_per_prio_traffic_stats_desc[] = {
+ { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) },
+ { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) },
+ { "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) },
+ { "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) },
+};
+
+#define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS ARRAY_SIZE(pport_per_prio_traffic_stats_desc)
+
+static int mlx5e_grp_per_prio_traffic_get_num_stats(void)
+{
+ return NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * NUM_PPORT_PRIO;
+}
+
+static int mlx5e_grp_per_prio_traffic_fill_strings(struct mlx5e_priv *priv,
+ u8 *data,
+ int idx)
+{
+ int i, prio;
+
+ for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ pport_per_prio_traffic_stats_desc[i].format, prio);
+ }
+
+ return idx;
+}
+
+static int mlx5e_grp_per_prio_traffic_fill_stats(struct mlx5e_priv *priv,
+ u64 *data,
+ int idx)
+{
+ int i, prio;
+
+ for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio],
+ pport_per_prio_traffic_stats_desc, i);
+ }
+
+ return idx;
+}
+
+static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
+ /* %s is "global" or "prio{i}" */
+ { "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) },
+ { "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) },
+ { "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) },
+ { "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) },
+ { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
+};
+
+static const struct counter_desc pport_pfc_stall_stats_desc[] = {
+ { "tx_pause_storm_warning_events ", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) },
+ { "tx_pause_storm_error_events", PPORT_PER_PRIO_OFF(device_stall_critical_watermark_cnt) },
+};
+
+#define NUM_PPORT_PER_PRIO_PFC_COUNTERS ARRAY_SIZE(pport_per_prio_pfc_stats_desc)
+#define NUM_PPORT_PFC_STALL_COUNTERS(priv) (ARRAY_SIZE(pport_pfc_stall_stats_desc) * \
+ MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) * \
+ MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
+
+static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 pfc_en_tx;
+ u8 pfc_en_rx;
+ int err;
+
+ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return 0;
+
+ err = mlx5_query_port_pfc(mdev, &pfc_en_tx, &pfc_en_rx);
+
+ return err ? 0 : pfc_en_tx | pfc_en_rx;
+}
+
+static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 rx_pause;
+ u32 tx_pause;
+ int err;
+
+ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return false;
+
+ err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause);
+
+ return err ? false : rx_pause | tx_pause;
+}
+
+static int mlx5e_grp_per_prio_pfc_get_num_stats(struct mlx5e_priv *priv)
+{
+ return (mlx5e_query_global_pause_combined(priv) +
+ hweight8(mlx5e_query_pfc_combined(priv))) *
+ NUM_PPORT_PER_PRIO_PFC_COUNTERS +
+ NUM_PPORT_PFC_STALL_COUNTERS(priv);
+}
+
+static int mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv,
+ u8 *data,
+ int idx)
+{
+ unsigned long pfc_combined;
+ int i, prio;
+
+ pfc_combined = mlx5e_query_pfc_combined(priv);
+ for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+ char pfc_string[ETH_GSTRING_LEN];
+
+ snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio);
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ pport_per_prio_pfc_stats_desc[i].format, pfc_string);
+ }
+ }
+
+ if (mlx5e_query_global_pause_combined(priv)) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ pport_per_prio_pfc_stats_desc[i].format, "global");
+ }
+ }
+
+ for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pport_pfc_stall_stats_desc[i].format);
+
+ return idx;
+}
+
+static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv,
+ u64 *data,
+ int idx)
+{
+ unsigned long pfc_combined;
+ int i, prio;
+
+ pfc_combined = mlx5e_query_pfc_combined(priv);
+ for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio],
+ pport_per_prio_pfc_stats_desc, i);
+ }
+ }
+
+ if (mlx5e_query_global_pause_combined(priv)) {
+ for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0],
+ pport_per_prio_pfc_stats_desc, i);
+ }
+ }
+
+ for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++)
+ data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0],
+ pport_pfc_stall_stats_desc, i);
+
+ return idx;
+}
+
+static int mlx5e_grp_per_prio_get_num_stats(struct mlx5e_priv *priv)
+{
+ return mlx5e_grp_per_prio_traffic_get_num_stats() +
+ mlx5e_grp_per_prio_pfc_get_num_stats(priv);
+}
+
+static int mlx5e_grp_per_prio_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ idx = mlx5e_grp_per_prio_traffic_fill_strings(priv, data, idx);
+ idx = mlx5e_grp_per_prio_pfc_fill_strings(priv, data, idx);
+ return idx;
+}
+
+static int mlx5e_grp_per_prio_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ idx = mlx5e_grp_per_prio_traffic_fill_stats(priv, data, idx);
+ idx = mlx5e_grp_per_prio_pfc_fill_stats(priv, data, idx);
+ return idx;
+}
+
+static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv)
+{
+ struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ int prio;
+ void *out;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
+ for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+ out = pstats->per_prio_counters[prio];
+ MLX5_SET(ppcnt_reg, in, prio_tc, prio);
+ mlx5_core_access_reg(mdev, in, sz, out, sz,
+ MLX5_REG_PPCNT, 0, 0);
+ }
+}
+
+static const struct counter_desc mlx5e_pme_status_desc[] = {
+ { "module_unplug", 8 },
+};
+
+static const struct counter_desc mlx5e_pme_error_desc[] = {
+ { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */
+ { "module_high_temp", 48 }, /* high temperature */
+ { "module_bad_shorted", 56 }, /* bad or shorted cable/module */
+};
+
+#define NUM_PME_STATUS_STATS ARRAY_SIZE(mlx5e_pme_status_desc)
+#define NUM_PME_ERR_STATS ARRAY_SIZE(mlx5e_pme_error_desc)
+
+static int mlx5e_grp_pme_get_num_stats(struct mlx5e_priv *priv)
+{
+ return NUM_PME_STATUS_STATS + NUM_PME_ERR_STATS;
+}
+
+static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int i;
+
+ for (i = 0; i < NUM_PME_STATUS_STATS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_status_desc[i].format);
+
+ for (i = 0; i < NUM_PME_ERR_STATS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format);
+
+ return idx;
+}
+
+static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ struct mlx5_priv *mlx5_priv = &priv->mdev->priv;
+ int i;
+
+ for (i = 0; i < NUM_PME_STATUS_STATS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters,
+ mlx5e_pme_status_desc, i);
+
+ for (i = 0; i < NUM_PME_ERR_STATS; i++)
+ data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters,
+ mlx5e_pme_error_desc, i);
+
+ return idx;
+}
+
+static int mlx5e_grp_ipsec_get_num_stats(struct mlx5e_priv *priv)
+{
+ return mlx5e_ipsec_get_count(priv);
+}
+
+static int mlx5e_grp_ipsec_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ return idx + mlx5e_ipsec_get_strings(priv,
+ data + idx * ETH_GSTRING_LEN);
+}
+
+static int mlx5e_grp_ipsec_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ return idx + mlx5e_ipsec_get_stats(priv, data + idx);
+}
+
+static void mlx5e_grp_ipsec_update_stats(struct mlx5e_priv *priv)
+{
+ mlx5e_ipsec_update_stats(priv);
+}
+
+static int mlx5e_grp_tls_get_num_stats(struct mlx5e_priv *priv)
+{
+ return mlx5e_tls_get_count(priv);
+}
+
+static int mlx5e_grp_tls_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ return idx + mlx5e_tls_get_strings(priv, data + idx * ETH_GSTRING_LEN);
+}
+
+static int mlx5e_grp_tls_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
+{
+ return idx + mlx5e_tls_get_stats(priv, data + idx);
+}
+
+static const struct counter_desc rq_stats_desc[] = {
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_redirect) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, ecn_mark) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, page_reuse) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) },
+};
+
+static const struct counter_desc sq_stats_desc[] = {
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, recover) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqe_err) },
+};
+
+static const struct counter_desc rq_xdpsq_stats_desc[] = {
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) },
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) },
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
+};
+
+static const struct counter_desc xdpsq_stats_desc[] = {
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) },
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) },
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
+};
+
+static const struct counter_desc ch_stats_desc[] = {
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, events) },
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, poll) },
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, arm) },
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, aff_change) },
+ { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) },
+};
+
+#define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc)
+#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc)
+#define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc)
+#define NUM_RQ_XDPSQ_STATS ARRAY_SIZE(rq_xdpsq_stats_desc)
+#define NUM_CH_STATS ARRAY_SIZE(ch_stats_desc)
+
+static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
+{
+ int max_nch = priv->profile->max_nch(priv->mdev);
+
+ return (NUM_RQ_STATS * max_nch) +
+ (NUM_CH_STATS * max_nch) +
+ (NUM_SQ_STATS * max_nch * priv->max_opened_tc) +
+ (NUM_RQ_XDPSQ_STATS * max_nch) +
+ (NUM_XDPSQ_STATS * max_nch);
+}
+
+static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
+ int idx)
+{
+ int max_nch = priv->profile->max_nch(priv->mdev);
+ int i, j, tc;
+
+ for (i = 0; i < max_nch; i++)
+ for (j = 0; j < NUM_CH_STATS; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ ch_stats_desc[j].format, i);
+
+ for (i = 0; i < max_nch; i++) {
+ for (j = 0; j < NUM_RQ_STATS; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ rq_stats_desc[j].format, i);
+ for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ rq_xdpsq_stats_desc[j].format, i);
+ }
+
+ for (tc = 0; tc < priv->max_opened_tc; tc++)
+ for (i = 0; i < max_nch; i++)
+ for (j = 0; j < NUM_SQ_STATS; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ sq_stats_desc[j].format,
+ priv->channel_tc2txq[i][tc]);
+
+ for (i = 0; i < max_nch; i++)
+ for (j = 0; j < NUM_XDPSQ_STATS; j++)
+ sprintf(data + (idx++) * ETH_GSTRING_LEN,
+ xdpsq_stats_desc[j].format, i);
+
+ return idx;
+}
+
+static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int idx)
+{
+ int max_nch = priv->profile->max_nch(priv->mdev);
+ int i, j, tc;
+
+ for (i = 0; i < max_nch; i++)
+ for (j = 0; j < NUM_CH_STATS; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].ch,
+ ch_stats_desc, j);
+
+ for (i = 0; i < max_nch; i++) {
+ for (j = 0; j < NUM_RQ_STATS; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq,
+ rq_stats_desc, j);
+ for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq_xdpsq,
+ rq_xdpsq_stats_desc, j);
+ }
+
+ for (tc = 0; tc < priv->max_opened_tc; tc++)
+ for (i = 0; i < max_nch; i++)
+ for (j = 0; j < NUM_SQ_STATS; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc],
+ sq_stats_desc, j);
+
+ for (i = 0; i < max_nch; i++)
+ for (j = 0; j < NUM_XDPSQ_STATS; j++)
+ data[idx++] =
+ MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xdpsq,
+ xdpsq_stats_desc, j);
+
+ return idx;
+}
+
+/* The stats groups order is opposite to the update_stats() order calls */
+const struct mlx5e_stats_grp mlx5e_stats_grps[] = {
+ {
+ .get_num_stats = mlx5e_grp_sw_get_num_stats,
+ .fill_strings = mlx5e_grp_sw_fill_strings,
+ .fill_stats = mlx5e_grp_sw_fill_stats,
+ .update_stats = mlx5e_grp_sw_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_q_get_num_stats,
+ .fill_strings = mlx5e_grp_q_fill_strings,
+ .fill_stats = mlx5e_grp_q_fill_stats,
+ .update_stats_mask = MLX5E_NDO_UPDATE_STATS,
+ .update_stats = mlx5e_grp_q_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_vnic_env_get_num_stats,
+ .fill_strings = mlx5e_grp_vnic_env_fill_strings,
+ .fill_stats = mlx5e_grp_vnic_env_fill_stats,
+ .update_stats = mlx5e_grp_vnic_env_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_vport_get_num_stats,
+ .fill_strings = mlx5e_grp_vport_fill_strings,
+ .fill_stats = mlx5e_grp_vport_fill_stats,
+ .update_stats_mask = MLX5E_NDO_UPDATE_STATS,
+ .update_stats = mlx5e_grp_vport_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_802_3_get_num_stats,
+ .fill_strings = mlx5e_grp_802_3_fill_strings,
+ .fill_stats = mlx5e_grp_802_3_fill_stats,
+ .update_stats_mask = MLX5E_NDO_UPDATE_STATS,
+ .update_stats = mlx5e_grp_802_3_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_2863_get_num_stats,
+ .fill_strings = mlx5e_grp_2863_fill_strings,
+ .fill_stats = mlx5e_grp_2863_fill_stats,
+ .update_stats = mlx5e_grp_2863_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_2819_get_num_stats,
+ .fill_strings = mlx5e_grp_2819_fill_strings,
+ .fill_stats = mlx5e_grp_2819_fill_stats,
+ .update_stats = mlx5e_grp_2819_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_phy_get_num_stats,
+ .fill_strings = mlx5e_grp_phy_fill_strings,
+ .fill_stats = mlx5e_grp_phy_fill_stats,
+ .update_stats = mlx5e_grp_phy_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_eth_ext_get_num_stats,
+ .fill_strings = mlx5e_grp_eth_ext_fill_strings,
+ .fill_stats = mlx5e_grp_eth_ext_fill_stats,
+ .update_stats = mlx5e_grp_eth_ext_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_pcie_get_num_stats,
+ .fill_strings = mlx5e_grp_pcie_fill_strings,
+ .fill_stats = mlx5e_grp_pcie_fill_stats,
+ .update_stats = mlx5e_grp_pcie_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_per_prio_get_num_stats,
+ .fill_strings = mlx5e_grp_per_prio_fill_strings,
+ .fill_stats = mlx5e_grp_per_prio_fill_stats,
+ .update_stats = mlx5e_grp_per_prio_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_pme_get_num_stats,
+ .fill_strings = mlx5e_grp_pme_fill_strings,
+ .fill_stats = mlx5e_grp_pme_fill_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_ipsec_get_num_stats,
+ .fill_strings = mlx5e_grp_ipsec_fill_strings,
+ .fill_stats = mlx5e_grp_ipsec_fill_stats,
+ .update_stats = mlx5e_grp_ipsec_update_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_tls_get_num_stats,
+ .fill_strings = mlx5e_grp_tls_fill_strings,
+ .fill_stats = mlx5e_grp_tls_fill_stats,
+ },
+ {
+ .get_num_stats = mlx5e_grp_channels_get_num_stats,
+ .fill_strings = mlx5e_grp_channels_fill_strings,
+ .fill_stats = mlx5e_grp_channels_fill_stats,
+ }
+};
+
+const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
new file mode 100644
index 000000000..3ea8033ed
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -0,0 +1,284 @@
+/*
+ * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_EN_STATS_H__
+#define __MLX5_EN_STATS_H__
+
+#define MLX5E_READ_CTR64_CPU(ptr, dsc, i) \
+ (*(u64 *)((char *)ptr + dsc[i].offset))
+#define MLX5E_READ_CTR64_BE(ptr, dsc, i) \
+ be64_to_cpu(*(__be64 *)((char *)ptr + dsc[i].offset))
+#define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \
+ (*(u32 *)((char *)ptr + dsc[i].offset))
+#define MLX5E_READ_CTR32_BE(ptr, dsc, i) \
+ be32_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset))
+
+#define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld)
+#define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_XDPSQ_STAT(type, fld) "tx%d_xdp_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_RQ_XDPSQ_STAT(type, fld) "rx%d_xdp_tx_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld)
+
+struct counter_desc {
+ char format[ETH_GSTRING_LEN];
+ size_t offset; /* Byte offset */
+};
+
+struct mlx5e_sw_stats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 tx_packets;
+ u64 tx_bytes;
+ u64 tx_tso_packets;
+ u64 tx_tso_bytes;
+ u64 tx_tso_inner_packets;
+ u64 tx_tso_inner_bytes;
+ u64 tx_added_vlan_packets;
+ u64 tx_nop;
+ u64 rx_lro_packets;
+ u64 rx_lro_bytes;
+ u64 rx_ecn_mark;
+ u64 rx_removed_vlan_packets;
+ u64 rx_csum_unnecessary;
+ u64 rx_csum_none;
+ u64 rx_csum_complete;
+ u64 rx_csum_complete_tail;
+ u64 rx_csum_complete_tail_slow;
+ u64 rx_csum_unnecessary_inner;
+ u64 rx_xdp_drop;
+ u64 rx_xdp_redirect;
+ u64 rx_xdp_tx_xmit;
+ u64 rx_xdp_tx_full;
+ u64 rx_xdp_tx_err;
+ u64 rx_xdp_tx_cqe;
+ u64 tx_csum_none;
+ u64 tx_csum_partial;
+ u64 tx_csum_partial_inner;
+ u64 tx_queue_stopped;
+ u64 tx_queue_dropped;
+ u64 tx_xmit_more;
+ u64 tx_recover;
+ u64 tx_cqes;
+ u64 tx_queue_wake;
+ u64 tx_cqe_err;
+ u64 tx_xdp_xmit;
+ u64 tx_xdp_full;
+ u64 tx_xdp_err;
+ u64 tx_xdp_cqes;
+ u64 rx_wqe_err;
+ u64 rx_mpwqe_filler_cqes;
+ u64 rx_mpwqe_filler_strides;
+ u64 rx_oversize_pkts_sw_drop;
+ u64 rx_buff_alloc_err;
+ u64 rx_cqe_compress_blks;
+ u64 rx_cqe_compress_pkts;
+ u64 rx_page_reuse;
+ u64 rx_cache_reuse;
+ u64 rx_cache_full;
+ u64 rx_cache_empty;
+ u64 rx_cache_busy;
+ u64 rx_cache_waive;
+ u64 rx_congst_umr;
+ u64 ch_events;
+ u64 ch_poll;
+ u64 ch_arm;
+ u64 ch_aff_change;
+ u64 ch_eq_rearm;
+
+#ifdef CONFIG_MLX5_EN_TLS
+ u64 tx_tls_ooo;
+ u64 tx_tls_resync_bytes;
+#endif
+};
+
+struct mlx5e_qcounter_stats {
+ u32 rx_out_of_buffer;
+ u32 rx_if_down_packets;
+};
+
+struct mlx5e_vnic_env_stats {
+ __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)];
+};
+
+#define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \
+ vstats->query_vport_out, c)
+
+struct mlx5e_vport_stats {
+ __be64 query_vport_out[MLX5_ST_SZ_QW(query_vport_counter_out)];
+};
+
+#define PPORT_802_3_GET(pstats, c) \
+ MLX5_GET64(ppcnt_reg, pstats->IEEE_802_3_counters, \
+ counter_set.eth_802_3_cntrs_grp_data_layout.c##_high)
+#define PPORT_2863_GET(pstats, c) \
+ MLX5_GET64(ppcnt_reg, pstats->RFC_2863_counters, \
+ counter_set.eth_2863_cntrs_grp_data_layout.c##_high)
+#define PPORT_2819_GET(pstats, c) \
+ MLX5_GET64(ppcnt_reg, pstats->RFC_2819_counters, \
+ counter_set.eth_2819_cntrs_grp_data_layout.c##_high)
+#define PPORT_PHY_STATISTICAL_GET(pstats, c) \
+ MLX5_GET64(ppcnt_reg, (pstats)->phy_statistical_counters, \
+ counter_set.phys_layer_statistical_cntrs.c##_high)
+#define PPORT_PER_PRIO_GET(pstats, prio, c) \
+ MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \
+ counter_set.eth_per_prio_grp_data_layout.c##_high)
+#define NUM_PPORT_PRIO 8
+#define PPORT_ETH_EXT_GET(pstats, c) \
+ MLX5_GET64(ppcnt_reg, (pstats)->eth_ext_counters, \
+ counter_set.eth_extended_cntrs_grp_data_layout.c##_high)
+
+struct mlx5e_pport_stats {
+ __be64 IEEE_802_3_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 RFC_2863_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 RFC_2819_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 phy_statistical_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+ __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+};
+
+#define PCIE_PERF_GET(pcie_stats, c) \
+ MLX5_GET(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \
+ counter_set.pcie_perf_cntrs_grp_data_layout.c)
+
+#define PCIE_PERF_GET64(pcie_stats, c) \
+ MLX5_GET64(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \
+ counter_set.pcie_perf_cntrs_grp_data_layout.c##_high)
+
+struct mlx5e_pcie_stats {
+ __be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)];
+};
+
+struct mlx5e_rq_stats {
+ u64 packets;
+ u64 bytes;
+ u64 csum_complete;
+ u64 csum_complete_tail;
+ u64 csum_complete_tail_slow;
+ u64 csum_unnecessary;
+ u64 csum_unnecessary_inner;
+ u64 csum_none;
+ u64 lro_packets;
+ u64 lro_bytes;
+ u64 ecn_mark;
+ u64 removed_vlan_packets;
+ u64 xdp_drop;
+ u64 xdp_redirect;
+ u64 wqe_err;
+ u64 mpwqe_filler_cqes;
+ u64 mpwqe_filler_strides;
+ u64 oversize_pkts_sw_drop;
+ u64 buff_alloc_err;
+ u64 cqe_compress_blks;
+ u64 cqe_compress_pkts;
+ u64 page_reuse;
+ u64 cache_reuse;
+ u64 cache_full;
+ u64 cache_empty;
+ u64 cache_busy;
+ u64 cache_waive;
+ u64 congst_umr;
+};
+
+struct mlx5e_sq_stats {
+ /* commonly accessed in data path */
+ u64 packets;
+ u64 bytes;
+ u64 xmit_more;
+ u64 tso_packets;
+ u64 tso_bytes;
+ u64 tso_inner_packets;
+ u64 tso_inner_bytes;
+ u64 csum_partial;
+ u64 csum_partial_inner;
+ u64 added_vlan_packets;
+ u64 nop;
+#ifdef CONFIG_MLX5_EN_TLS
+ u64 tls_ooo;
+ u64 tls_resync_bytes;
+#endif
+ /* less likely accessed in data path */
+ u64 csum_none;
+ u64 stopped;
+ u64 dropped;
+ u64 recover;
+ /* dirtied @completion */
+ u64 cqes ____cacheline_aligned_in_smp;
+ u64 wake;
+ u64 cqe_err;
+};
+
+struct mlx5e_xdpsq_stats {
+ u64 xmit;
+ u64 full;
+ u64 err;
+ /* dirtied @completion */
+ u64 cqes ____cacheline_aligned_in_smp;
+};
+
+struct mlx5e_ch_stats {
+ u64 events;
+ u64 poll;
+ u64 arm;
+ u64 aff_change;
+ u64 eq_rearm;
+};
+
+struct mlx5e_stats {
+ struct mlx5e_sw_stats sw;
+ struct mlx5e_qcounter_stats qcnt;
+ struct mlx5e_vnic_env_stats vnic;
+ struct mlx5e_vport_stats vport;
+ struct mlx5e_pport_stats pport;
+ struct rtnl_link_stats64 vf_vport;
+ struct mlx5e_pcie_stats pcie;
+};
+
+enum {
+ MLX5E_NDO_UPDATE_STATS = BIT(0x1),
+};
+
+struct mlx5e_priv;
+struct mlx5e_stats_grp {
+ u16 update_stats_mask;
+ int (*get_num_stats)(struct mlx5e_priv *priv);
+ int (*fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx);
+ int (*fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx);
+ void (*update_stats)(struct mlx5e_priv *priv);
+};
+
+extern const struct mlx5e_stats_grp mlx5e_stats_grps[];
+extern const int mlx5e_num_stats_grps;
+
+void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv);
+
+#endif /* __MLX5_EN_STATS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
new file mode 100644
index 000000000..305085377
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -0,0 +1,3063 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <net/flow_dissector.h>
+#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_skbedit.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/device.h>
+#include <linux/rhashtable.h>
+#include <net/switchdev.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
+#include <net/tc_act/tc_tunnel_key.h>
+#include <net/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_csum.h>
+#include <net/vxlan.h>
+#include <net/arp.h>
+#include "en.h"
+#include "en_rep.h"
+#include "en_tc.h"
+#include "eswitch.h"
+#include "lib/vxlan.h"
+#include "fs_core.h"
+#include "en/port.h"
+
+struct mlx5_nic_flow_attr {
+ u32 action;
+ u32 flow_tag;
+ u32 mod_hdr_id;
+ u32 hairpin_tirn;
+ u8 match_level;
+ struct mlx5_flow_table *hairpin_ft;
+};
+
+#define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1)
+
+enum {
+ MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS,
+ MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS,
+ MLX5E_TC_FLOW_ESWITCH = BIT(MLX5E_TC_FLOW_BASE),
+ MLX5E_TC_FLOW_NIC = BIT(MLX5E_TC_FLOW_BASE + 1),
+ MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2),
+ MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3),
+ MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4),
+};
+
+#define MLX5E_TC_MAX_SPLITS 1
+
+struct mlx5e_tc_flow {
+ struct rhash_head node;
+ struct mlx5e_priv *priv;
+ u64 cookie;
+ u8 flags;
+ struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
+ struct list_head encap; /* flows sharing the same encap ID */
+ struct list_head mod_hdr; /* flows sharing the same mod hdr ID */
+ struct list_head hairpin; /* flows sharing the same hairpin */
+ union {
+ struct mlx5_esw_flow_attr esw_attr[0];
+ struct mlx5_nic_flow_attr nic_attr[0];
+ };
+};
+
+struct mlx5e_tc_flow_parse_attr {
+ struct ip_tunnel_info tun_info;
+ struct mlx5_flow_spec spec;
+ int num_mod_hdr_actions;
+ int max_mod_hdr_actions;
+ void *mod_hdr_actions;
+ int mirred_ifindex;
+};
+
+enum {
+ MLX5_HEADER_TYPE_VXLAN = 0x0,
+ MLX5_HEADER_TYPE_NVGRE = 0x1,
+};
+
+#define MLX5E_TC_TABLE_NUM_GROUPS 4
+#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
+
+struct mlx5e_hairpin {
+ struct mlx5_hairpin *pair;
+
+ struct mlx5_core_dev *func_mdev;
+ struct mlx5e_priv *func_priv;
+ u32 tdn;
+ u32 tirn;
+
+ int num_channels;
+ struct mlx5e_rqt indir_rqt;
+ u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
+ struct mlx5e_ttc_table ttc;
+};
+
+struct mlx5e_hairpin_entry {
+ /* a node of a hash table which keeps all the hairpin entries */
+ struct hlist_node hairpin_hlist;
+
+ /* flows sharing the same hairpin */
+ struct list_head flows;
+
+ u16 peer_vhca_id;
+ u8 prio;
+ struct mlx5e_hairpin *hp;
+};
+
+struct mod_hdr_key {
+ int num_actions;
+ void *actions;
+};
+
+struct mlx5e_mod_hdr_entry {
+ /* a node of a hash table which keeps all the mod_hdr entries */
+ struct hlist_node mod_hdr_hlist;
+
+ /* flows sharing the same mod_hdr entry */
+ struct list_head flows;
+
+ struct mod_hdr_key key;
+
+ u32 mod_hdr_id;
+};
+
+#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
+
+static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key)
+{
+ return jhash(key->actions,
+ key->num_actions * MLX5_MH_ACT_SZ, 0);
+}
+
+static inline int cmp_mod_hdr_info(struct mod_hdr_key *a,
+ struct mod_hdr_key *b)
+{
+ if (a->num_actions != b->num_actions)
+ return 1;
+
+ return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ);
+}
+
+static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5e_tc_flow_parse_attr *parse_attr)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ int num_actions, actions_size, namespace, err;
+ struct mlx5e_mod_hdr_entry *mh;
+ struct mod_hdr_key key;
+ bool found = false;
+ u32 hash_key;
+
+ num_actions = parse_attr->num_mod_hdr_actions;
+ actions_size = MLX5_MH_ACT_SZ * num_actions;
+
+ key.actions = parse_attr->mod_hdr_actions;
+ key.num_actions = num_actions;
+
+ hash_key = hash_mod_hdr_info(&key);
+
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
+ namespace = MLX5_FLOW_NAMESPACE_FDB;
+ hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh,
+ mod_hdr_hlist, hash_key) {
+ if (!cmp_mod_hdr_info(&mh->key, &key)) {
+ found = true;
+ break;
+ }
+ }
+ } else {
+ namespace = MLX5_FLOW_NAMESPACE_KERNEL;
+ hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh,
+ mod_hdr_hlist, hash_key) {
+ if (!cmp_mod_hdr_info(&mh->key, &key)) {
+ found = true;
+ break;
+ }
+ }
+ }
+
+ if (found)
+ goto attach_flow;
+
+ mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
+ if (!mh)
+ return -ENOMEM;
+
+ mh->key.actions = (void *)mh + sizeof(*mh);
+ memcpy(mh->key.actions, key.actions, actions_size);
+ mh->key.num_actions = num_actions;
+ INIT_LIST_HEAD(&mh->flows);
+
+ err = mlx5_modify_header_alloc(priv->mdev, namespace,
+ mh->key.num_actions,
+ mh->key.actions,
+ &mh->mod_hdr_id);
+ if (err)
+ goto out_err;
+
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+ hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
+ else
+ hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
+
+attach_flow:
+ list_add(&flow->mod_hdr, &mh->flows);
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+ flow->esw_attr->mod_hdr_id = mh->mod_hdr_id;
+ else
+ flow->nic_attr->mod_hdr_id = mh->mod_hdr_id;
+
+ return 0;
+
+out_err:
+ kfree(mh);
+ return err;
+}
+
+static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct list_head *next = flow->mod_hdr.next;
+
+ list_del(&flow->mod_hdr);
+
+ if (list_empty(next)) {
+ struct mlx5e_mod_hdr_entry *mh;
+
+ mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows);
+
+ mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
+ hash_del(&mh->mod_hdr_hlist);
+ kfree(mh);
+ }
+}
+
+static
+struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
+{
+ struct net_device *netdev;
+ struct mlx5e_priv *priv;
+
+ netdev = __dev_get_by_index(net, ifindex);
+ priv = netdev_priv(netdev);
+ return priv->mdev;
+}
+
+static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {0};
+ void *tirc;
+ int err;
+
+ err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
+ if (err)
+ goto alloc_tdn_err;
+
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
+ MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]);
+ MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
+
+ err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->tirn);
+ if (err)
+ goto create_tir_err;
+
+ return 0;
+
+create_tir_err:
+ mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
+alloc_tdn_err:
+ return err;
+}
+
+static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
+{
+ mlx5_core_destroy_tir(hp->func_mdev, hp->tirn);
+ mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
+}
+
+static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
+{
+ u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn;
+ struct mlx5e_priv *priv = hp->func_priv;
+ int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
+
+ mlx5e_build_default_indir_rqt(indirection_rqt, sz,
+ hp->num_channels);
+
+ for (i = 0; i < sz; i++) {
+ ix = i;
+ if (priv->channels.params.rss_hfunc == ETH_RSS_HASH_XOR)
+ ix = mlx5e_bits_invert(i, ilog2(sz));
+ ix = indirection_rqt[ix];
+ rqn = hp->pair->rqn[ix];
+ MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
+ }
+}
+
+static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
+{
+ int inlen, err, sz = MLX5E_INDIR_RQT_SIZE;
+ struct mlx5e_priv *priv = hp->func_priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ void *rqtc;
+ u32 *in;
+
+ inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
+
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
+ MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
+
+ mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
+
+ err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
+ if (!err)
+ hp->indir_rqt.enabled = true;
+
+ kvfree(in);
+ return err;
+}
+
+static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
+{
+ struct mlx5e_priv *priv = hp->func_priv;
+ u32 in[MLX5_ST_SZ_DW(create_tir_in)];
+ int tt, i, err;
+ void *tirc;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+
+ MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
+ MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
+ mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
+
+ err = mlx5_core_create_tir(hp->func_mdev, in,
+ MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]);
+ if (err) {
+ mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
+ goto err_destroy_tirs;
+ }
+ }
+ return 0;
+
+err_destroy_tirs:
+ for (i = 0; i < tt; i++)
+ mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]);
+ return err;
+}
+
+static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
+{
+ int tt;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]);
+}
+
+static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
+ struct ttc_params *ttc_params)
+{
+ struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+ int tt;
+
+ memset(ttc_params, 0, sizeof(*ttc_params));
+
+ ttc_params->any_tt_tirn = hp->tirn;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
+
+ ft_attr->max_fte = MLX5E_NUM_TT;
+ ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
+ ft_attr->prio = MLX5E_TC_PRIO;
+}
+
+static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
+{
+ struct mlx5e_priv *priv = hp->func_priv;
+ struct ttc_params ttc_params;
+ int err;
+
+ err = mlx5e_hairpin_create_indirect_rqt(hp);
+ if (err)
+ return err;
+
+ err = mlx5e_hairpin_create_indirect_tirs(hp);
+ if (err)
+ goto err_create_indirect_tirs;
+
+ mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
+ err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
+ if (err)
+ goto err_create_ttc_table;
+
+ netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
+ hp->num_channels, hp->ttc.ft.t->id);
+
+ return 0;
+
+err_create_ttc_table:
+ mlx5e_hairpin_destroy_indirect_tirs(hp);
+err_create_indirect_tirs:
+ mlx5e_destroy_rqt(priv, &hp->indir_rqt);
+
+ return err;
+}
+
+static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
+{
+ struct mlx5e_priv *priv = hp->func_priv;
+
+ mlx5e_destroy_ttc_table(priv, &hp->ttc);
+ mlx5e_hairpin_destroy_indirect_tirs(hp);
+ mlx5e_destroy_rqt(priv, &hp->indir_rqt);
+}
+
+static struct mlx5e_hairpin *
+mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
+ int peer_ifindex)
+{
+ struct mlx5_core_dev *func_mdev, *peer_mdev;
+ struct mlx5e_hairpin *hp;
+ struct mlx5_hairpin *pair;
+ int err;
+
+ hp = kzalloc(sizeof(*hp), GFP_KERNEL);
+ if (!hp)
+ return ERR_PTR(-ENOMEM);
+
+ func_mdev = priv->mdev;
+ peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
+
+ pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
+ if (IS_ERR(pair)) {
+ err = PTR_ERR(pair);
+ goto create_pair_err;
+ }
+ hp->pair = pair;
+ hp->func_mdev = func_mdev;
+ hp->func_priv = priv;
+ hp->num_channels = params->num_channels;
+
+ err = mlx5e_hairpin_create_transport(hp);
+ if (err)
+ goto create_transport_err;
+
+ if (hp->num_channels > 1) {
+ err = mlx5e_hairpin_rss_init(hp);
+ if (err)
+ goto rss_init_err;
+ }
+
+ return hp;
+
+rss_init_err:
+ mlx5e_hairpin_destroy_transport(hp);
+create_transport_err:
+ mlx5_core_hairpin_destroy(hp->pair);
+create_pair_err:
+ kfree(hp);
+ return ERR_PTR(err);
+}
+
+static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
+{
+ if (hp->num_channels > 1)
+ mlx5e_hairpin_rss_cleanup(hp);
+ mlx5e_hairpin_destroy_transport(hp);
+ mlx5_core_hairpin_destroy(hp->pair);
+ kvfree(hp);
+}
+
+static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
+{
+ return (peer_vhca_id << 16 | prio);
+}
+
+static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
+ u16 peer_vhca_id, u8 prio)
+{
+ struct mlx5e_hairpin_entry *hpe;
+ u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
+
+ hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
+ hairpin_hlist, hash_key) {
+ if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio)
+ return hpe;
+ }
+
+ return NULL;
+}
+
+#define UNKNOWN_MATCH_PRIO 8
+
+static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec, u8 *match_prio)
+{
+ void *headers_c, *headers_v;
+ u8 prio_val, prio_mask = 0;
+ bool vlan_present;
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
+ netdev_warn(priv->netdev,
+ "only PCP trust state supported for hairpin\n");
+ return -EOPNOTSUPP;
+ }
+#endif
+ headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
+
+ vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
+ if (vlan_present) {
+ prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
+ prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
+ }
+
+ if (!vlan_present || !prio_mask) {
+ prio_val = UNKNOWN_MATCH_PRIO;
+ } else if (prio_mask != 0x7) {
+ netdev_warn(priv->netdev,
+ "masked priority match not supported for hairpin\n");
+ return -EOPNOTSUPP;
+ }
+
+ *match_prio = prio_val;
+ return 0;
+}
+
+static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5e_tc_flow_parse_attr *parse_attr)
+{
+ int peer_ifindex = parse_attr->mirred_ifindex;
+ struct mlx5_hairpin_params params;
+ struct mlx5_core_dev *peer_mdev;
+ struct mlx5e_hairpin_entry *hpe;
+ struct mlx5e_hairpin *hp;
+ u64 link_speed64;
+ u32 link_speed;
+ u8 match_prio;
+ u16 peer_id;
+ int err;
+
+ peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
+ if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
+ netdev_warn(priv->netdev, "hairpin is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
+ err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio);
+ if (err)
+ return err;
+ hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
+ if (hpe)
+ goto attach_flow;
+
+ hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
+ if (!hpe)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&hpe->flows);
+ hpe->peer_vhca_id = peer_id;
+ hpe->prio = match_prio;
+
+ params.log_data_size = 15;
+ params.log_data_size = min_t(u8, params.log_data_size,
+ MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
+ params.log_data_size = max_t(u8, params.log_data_size,
+ MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
+
+ params.log_num_packets = params.log_data_size -
+ MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev);
+ params.log_num_packets = min_t(u8, params.log_num_packets,
+ MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets));
+
+ params.q_counter = priv->q_counter;
+ /* set hairpin pair per each 50Gbs share of the link */
+ mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
+ link_speed = max_t(u32, link_speed, 50000);
+ link_speed64 = link_speed;
+ do_div(link_speed64, 50000);
+ params.num_channels = link_speed64;
+
+ hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
+ if (IS_ERR(hp)) {
+ err = PTR_ERR(hp);
+ goto create_hairpin_err;
+ }
+
+ netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
+ hp->tirn, hp->pair->rqn[0], hp->pair->peer_mdev->priv.name,
+ hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
+
+ hpe->hp = hp;
+ hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
+ hash_hairpin_info(peer_id, match_prio));
+
+attach_flow:
+ if (hpe->hp->num_channels > 1) {
+ flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS;
+ flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
+ } else {
+ flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
+ }
+ list_add(&flow->hairpin, &hpe->flows);
+
+ return 0;
+
+create_hairpin_err:
+ kfree(hpe);
+ return err;
+}
+
+static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct list_head *next = flow->hairpin.next;
+
+ list_del(&flow->hairpin);
+
+ /* no more hairpin flows for us, release the hairpin pair */
+ if (list_empty(next)) {
+ struct mlx5e_hairpin_entry *hpe;
+
+ hpe = list_entry(next, struct mlx5e_hairpin_entry, flows);
+
+ netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
+ hpe->hp->pair->peer_mdev->priv.name);
+
+ mlx5e_hairpin_destroy(hpe->hp);
+ hash_del(&hpe->hairpin_hlist);
+ kfree(hpe);
+ }
+}
+
+static struct mlx5_flow_handle *
+mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_nic_flow_attr *attr = flow->nic_attr;
+ struct mlx5_core_dev *dev = priv->mdev;
+ struct mlx5_flow_destination dest[2] = {};
+ struct mlx5_flow_act flow_act = {
+ .action = attr->action,
+ .has_flow_tag = true,
+ .flow_tag = attr->flow_tag,
+ .encap_id = 0,
+ };
+ struct mlx5_fc *counter = NULL;
+ struct mlx5_flow_handle *rule;
+ bool table_created = false;
+ int err, dest_ix = 0;
+
+ if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
+ err = mlx5e_hairpin_flow_add(priv, flow, parse_attr);
+ if (err) {
+ rule = ERR_PTR(err);
+ goto err_add_hairpin_flow;
+ }
+ if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[dest_ix].ft = attr->hairpin_ft;
+ } else {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest[dest_ix].tir_num = attr->hairpin_tirn;
+ }
+ dest_ix++;
+ } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[dest_ix].ft = priv->fs.vlan.ft.t;
+ dest_ix++;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ counter = mlx5_fc_create(dev, true);
+ if (IS_ERR(counter)) {
+ rule = ERR_CAST(counter);
+ goto err_fc_create;
+ }
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[dest_ix].counter = counter;
+ dest_ix++;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
+ flow_act.modify_id = attr->mod_hdr_id;
+ kfree(parse_attr->mod_hdr_actions);
+ if (err) {
+ rule = ERR_PTR(err);
+ goto err_create_mod_hdr_id;
+ }
+ }
+
+ if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
+ int tc_grp_size, tc_tbl_size;
+ u32 max_flow_counter;
+
+ max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+ MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+
+ tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
+
+ tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
+ BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
+
+ priv->fs.tc.t =
+ mlx5_create_auto_grouped_flow_table(priv->fs.ns,
+ MLX5E_TC_PRIO,
+ tc_tbl_size,
+ MLX5E_TC_TABLE_NUM_GROUPS,
+ MLX5E_TC_FT_LEVEL, 0);
+ if (IS_ERR(priv->fs.tc.t)) {
+ netdev_err(priv->netdev,
+ "Failed to create tc offload table\n");
+ rule = ERR_CAST(priv->fs.tc.t);
+ goto err_create_ft;
+ }
+
+ table_created = true;
+ }
+
+ if (attr->match_level != MLX5_MATCH_NONE)
+ parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+ rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
+ &flow_act, dest, dest_ix);
+
+ if (IS_ERR(rule))
+ goto err_add_rule;
+
+ return rule;
+
+err_add_rule:
+ if (table_created) {
+ mlx5_destroy_flow_table(priv->fs.tc.t);
+ priv->fs.tc.t = NULL;
+ }
+err_create_ft:
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ mlx5e_detach_mod_hdr(priv, flow);
+err_create_mod_hdr_id:
+ mlx5_fc_destroy(dev, counter);
+err_fc_create:
+ if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
+ mlx5e_hairpin_flow_del(priv, flow);
+err_add_hairpin_flow:
+ return rule;
+}
+
+static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_nic_flow_attr *attr = flow->nic_attr;
+ struct mlx5_fc *counter = NULL;
+
+ counter = mlx5_flow_rule_counter(flow->rule[0]);
+ mlx5_del_flow_rules(flow->rule[0]);
+ mlx5_fc_destroy(priv->mdev, counter);
+
+ if (!mlx5e_tc_num_filters(priv) && priv->fs.tc.t) {
+ mlx5_destroy_flow_table(priv->fs.tc.t);
+ priv->fs.tc.t = NULL;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ mlx5e_detach_mod_hdr(priv, flow);
+
+ if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
+ mlx5e_hairpin_flow_del(priv, flow);
+}
+
+static void mlx5e_detach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow);
+
+static int mlx5e_attach_encap(struct mlx5e_priv *priv,
+ struct ip_tunnel_info *tun_info,
+ struct net_device *mirred_dev,
+ struct net_device **encap_dev,
+ struct mlx5e_tc_flow *flow);
+
+static struct mlx5_flow_handle *
+mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ struct net_device *out_dev, *encap_dev = NULL;
+ struct mlx5_flow_handle *rule = NULL;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *out_priv;
+ int err;
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
+ out_dev = __dev_get_by_index(dev_net(priv->netdev),
+ attr->parse_attr->mirred_ifindex);
+ err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
+ out_dev, &encap_dev, flow);
+ if (err) {
+ rule = ERR_PTR(err);
+ if (err != -EAGAIN)
+ goto err_attach_encap;
+ }
+ out_priv = netdev_priv(encap_dev);
+ rpriv = out_priv->ppriv;
+ attr->out_rep[attr->out_count] = rpriv->rep;
+ attr->out_mdev[attr->out_count++] = out_priv->mdev;
+ }
+
+ err = mlx5_eswitch_add_vlan_action(esw, attr);
+ if (err) {
+ rule = ERR_PTR(err);
+ goto err_add_vlan;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
+ kfree(parse_attr->mod_hdr_actions);
+ if (err) {
+ rule = ERR_PTR(err);
+ goto err_mod_hdr;
+ }
+ }
+
+ /* we get here if (1) there's no error (rule being null) or when
+ * (2) there's an encap action and we're on -EAGAIN (no valid neigh)
+ */
+ if (rule != ERR_PTR(-EAGAIN)) {
+ rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
+ if (IS_ERR(rule))
+ goto err_add_rule;
+
+ if (attr->mirror_count) {
+ flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &parse_attr->spec, attr);
+ if (IS_ERR(flow->rule[1]))
+ goto err_fwd_rule;
+ }
+ }
+ return rule;
+
+err_fwd_rule:
+ mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+ rule = flow->rule[1];
+err_add_rule:
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ mlx5e_detach_mod_hdr(priv, flow);
+err_mod_hdr:
+ mlx5_eswitch_del_vlan_action(esw, attr);
+err_add_vlan:
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+ mlx5e_detach_encap(priv, flow);
+err_attach_encap:
+ return rule;
+}
+
+static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+
+ if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
+ flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
+ if (attr->mirror_count)
+ mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr);
+ mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+ }
+
+ mlx5_eswitch_del_vlan_action(esw, attr);
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
+ mlx5e_detach_encap(priv, flow);
+ kvfree(attr->parse_attr);
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ mlx5e_detach_mod_hdr(priv, flow);
+}
+
+void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
+ e->encap_size, e->encap_header,
+ &e->encap_id);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n",
+ err);
+ return;
+ }
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(priv);
+
+ list_for_each_entry(flow, &e->flows, encap) {
+ esw_attr = flow->esw_attr;
+ esw_attr->encap_id = e->encap_id;
+ flow->rule[0] = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
+ if (IS_ERR(flow->rule[0])) {
+ err = PTR_ERR(flow->rule[0]);
+ mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
+ err);
+ continue;
+ }
+
+ if (esw_attr->mirror_count) {
+ flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
+ if (IS_ERR(flow->rule[1])) {
+ mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], esw_attr);
+ err = PTR_ERR(flow->rule[1]);
+ mlx5_core_warn(priv->mdev, "Failed to update cached mirror flow, %d\n",
+ err);
+ continue;
+ }
+ }
+
+ flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+ }
+}
+
+void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow *flow;
+
+ list_for_each_entry(flow, &e->flows, encap) {
+ if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
+ struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+
+ flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
+ if (attr->mirror_count)
+ mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr);
+ mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+ }
+ }
+
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
+ e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
+ mlx5_encap_dealloc(priv->mdev, e->encap_id);
+ }
+}
+
+void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
+{
+ struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
+ struct mlx5e_tc_flow *flow;
+ struct mlx5e_encap_entry *e;
+ struct mlx5_fc *counter;
+ struct neigh_table *tbl;
+ bool neigh_used = false;
+ struct neighbour *n;
+ u64 lastuse;
+
+ if (m_neigh->family == AF_INET)
+ tbl = &arp_tbl;
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (m_neigh->family == AF_INET6)
+ tbl = &nd_tbl;
+#endif
+ else
+ return;
+
+ list_for_each_entry(e, &nhe->encap_list, encap_list) {
+ if (!(e->flags & MLX5_ENCAP_ENTRY_VALID))
+ continue;
+ list_for_each_entry(flow, &e->flows, encap) {
+ if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
+ counter = mlx5_flow_rule_counter(flow->rule[0]);
+ lastuse = mlx5_fc_query_lastuse(counter);
+ if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
+ neigh_used = true;
+ break;
+ }
+ }
+ }
+ if (neigh_used)
+ break;
+ }
+
+ if (neigh_used) {
+ nhe->reported_lastuse = jiffies;
+
+ /* find the relevant neigh according to the cached device and
+ * dst ip pair
+ */
+ n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
+ if (!n)
+ return;
+
+ neigh_event_send(n, NULL);
+ neigh_release(n);
+ }
+}
+
+static void mlx5e_detach_encap(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct list_head *next = flow->encap.next;
+
+ list_del(&flow->encap);
+ if (list_empty(next)) {
+ struct mlx5e_encap_entry *e;
+
+ e = list_entry(next, struct mlx5e_encap_entry, flows);
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID)
+ mlx5_encap_dealloc(priv->mdev, e->encap_id);
+
+ hash_del_rcu(&e->encap_hlist);
+ kfree(e->encap_header);
+ kfree(e);
+ }
+}
+
+static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+ mlx5e_tc_del_fdb_flow(priv, flow);
+ else
+ mlx5e_tc_del_nic_flow(priv, flow);
+}
+
+static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f)
+{
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_dissector_key_keyid *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ f->key);
+ struct flow_dissector_key_keyid *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ f->mask);
+ MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
+ be32_to_cpu(mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
+ be32_to_cpu(key->keyid));
+ }
+}
+
+static int parse_tunnel_attr(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f)
+{
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+
+ struct flow_dissector_key_control *enc_control =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_CONTROL,
+ f->key);
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+ struct flow_dissector_key_ports *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS,
+ f->key);
+ struct flow_dissector_key_ports *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS,
+ f->mask);
+
+ /* Full udp dst port must be given */
+ if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
+ goto vxlan_match_offload_err;
+
+ if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst)) &&
+ MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
+ parse_vxlan_attr(spec, f);
+ else {
+ netdev_warn(priv->netdev,
+ "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst));
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_dport, ntohs(mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_dport, ntohs(key->dst));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_sport, ntohs(mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_sport, ntohs(key->src));
+ } else { /* udp dst port must be given */
+vxlan_match_offload_err:
+ netdev_warn(priv->netdev,
+ "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_dissector_key_ipv4_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+ f->key);
+ struct flow_dissector_key_ipv4_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+ f->mask);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4,
+ ntohl(mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4,
+ ntohl(key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+ ntohl(mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+ ntohl(key->dst));
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
+ } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_dissector_key_ipv6_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
+ f->key);
+ struct flow_dissector_key_ipv6_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
+ f->mask);
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
+ struct flow_dissector_key_ip *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IP,
+ f->key);
+ struct flow_dissector_key_ip *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IP,
+ f->mask);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
+ }
+
+ /* Enforce DMAC when offloading incoming tunneled flows.
+ * Flow counters require a match on the DMAC.
+ */
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dmac_47_16), priv->netdev->dev_addr);
+
+ /* let software handle IP fragments */
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
+
+ return 0;
+}
+
+static int __parse_cls_flower(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f,
+ u8 *match_level)
+{
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ u16 addr_type = 0;
+ u8 ip_proto = 0;
+
+ *match_level = MLX5_MATCH_NONE;
+
+ if (f->dissector->used_keys &
+ ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_VLAN) |
+ BIT(FLOW_DISSECTOR_KEY_CVLAN) |
+ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_PORTS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_TCP) |
+ BIT(FLOW_DISSECTOR_KEY_IP) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_IP))) {
+ netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
+ f->dissector->used_keys);
+ return -EOPNOTSUPP;
+ }
+
+ if ((dissector_uses_key(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
+ dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
+ dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
+ dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+ struct flow_dissector_key_control *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ENC_CONTROL,
+ f->key);
+ switch (key->addr_type) {
+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+ if (parse_tunnel_attr(priv, spec, f))
+ return -EOPNOTSUPP;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ /* In decap flow, header pointers should point to the inner
+ * headers, outer header were already set by parse_tunnel_attr
+ */
+ headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ inner_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ inner_headers);
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_dissector_key_basic *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
+ struct flow_dissector_key_basic *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->mask);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
+ ntohs(mask->n_proto));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+ ntohs(key->n_proto));
+
+ if (mask->n_proto)
+ *match_level = MLX5_MATCH_L2;
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+ struct flow_dissector_key_vlan *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
+ f->key);
+ struct flow_dissector_key_vlan *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
+ f->mask);
+ if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) {
+ if (key->vlan_tpid == htons(ETH_P_8021AD)) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ svlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ svlan_tag, 1);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ cvlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ cvlan_tag, 1);
+ }
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority);
+
+ *match_level = MLX5_MATCH_L2;
+ }
+ } else if (*match_level != MLX5_MATCH_NONE) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
+ *match_level = MLX5_MATCH_L2;
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) {
+ struct flow_dissector_key_vlan *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_CVLAN,
+ f->key);
+ struct flow_dissector_key_vlan *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_CVLAN,
+ f->mask);
+ if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) {
+ if (key->vlan_tpid == htons(ETH_P_8021AD)) {
+ MLX5_SET(fte_match_set_misc, misc_c,
+ outer_second_svlan_tag, 1);
+ MLX5_SET(fte_match_set_misc, misc_v,
+ outer_second_svlan_tag, 1);
+ } else {
+ MLX5_SET(fte_match_set_misc, misc_c,
+ outer_second_cvlan_tag, 1);
+ MLX5_SET(fte_match_set_misc, misc_v,
+ outer_second_cvlan_tag, 1);
+ }
+
+ MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
+ mask->vlan_id);
+ MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
+ key->vlan_id);
+ MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
+ mask->vlan_priority);
+ MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
+ key->vlan_priority);
+
+ *match_level = MLX5_MATCH_L2;
+ }
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ struct flow_dissector_key_eth_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->key);
+ struct flow_dissector_key_eth_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->mask);
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dmac_47_16),
+ mask->dst);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dmac_47_16),
+ key->dst);
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ smac_47_16),
+ mask->src);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ smac_47_16),
+ key->src);
+
+ if (!is_zero_ether_addr(mask->src) || !is_zero_ether_addr(mask->dst))
+ *match_level = MLX5_MATCH_L2;
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+ struct flow_dissector_key_control *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_CONTROL,
+ f->key);
+
+ struct flow_dissector_key_control *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_CONTROL,
+ f->mask);
+ addr_type = key->addr_type;
+
+ /* the HW doesn't support frag first/later */
+ if (mask->flags & FLOW_DIS_FIRST_FRAG)
+ return -EOPNOTSUPP;
+
+ if (mask->flags & FLOW_DIS_IS_FRAGMENT) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
+ key->flags & FLOW_DIS_IS_FRAGMENT);
+
+ /* the HW doesn't need L3 inline to match on frag=no */
+ if (!(key->flags & FLOW_DIS_IS_FRAGMENT))
+ *match_level = MLX5_MATCH_L2;
+ /* *** L2 attributes parsing up to here *** */
+ else
+ *match_level = MLX5_MATCH_L3;
+ }
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_dissector_key_basic *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
+ struct flow_dissector_key_basic *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->mask);
+ ip_proto = key->ip_proto;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+ mask->ip_proto);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+ key->ip_proto);
+
+ if (mask->ip_proto)
+ *match_level = MLX5_MATCH_L3;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_dissector_key_ipv4_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ f->key);
+ struct flow_dissector_key_ipv4_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ f->mask);
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &mask->src, sizeof(mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &key->src, sizeof(key->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &mask->dst, sizeof(mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &key->dst, sizeof(key->dst));
+
+ if (mask->src || mask->dst)
+ *match_level = MLX5_MATCH_L3;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_dissector_key_ipv6_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ f->key);
+ struct flow_dissector_key_ipv6_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ f->mask);
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &mask->src, sizeof(mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &key->src, sizeof(key->src));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &mask->dst, sizeof(mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &key->dst, sizeof(key->dst));
+
+ if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY ||
+ ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY)
+ *match_level = MLX5_MATCH_L3;
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) {
+ struct flow_dissector_key_ip *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IP,
+ f->key);
+ struct flow_dissector_key_ip *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IP,
+ f->mask);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
+
+ if (mask->ttl &&
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.outer_ipv4_ttl))
+ return -EOPNOTSUPP;
+
+ if (mask->tos || mask->ttl)
+ *match_level = MLX5_MATCH_L3;
+ }
+
+ /* *** L3 attributes parsing up to here *** */
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_dissector_key_ports *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ f->key);
+ struct flow_dissector_key_ports *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ f->mask);
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_sport, ntohs(mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_sport, ntohs(key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_dport, ntohs(mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_dport, ntohs(key->dst));
+ break;
+
+ case IPPROTO_UDP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_sport, ntohs(mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_sport, ntohs(key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_dport, ntohs(mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_dport, ntohs(key->dst));
+ break;
+ default:
+ netdev_err(priv->netdev,
+ "Only UDP and TCP transport are supported\n");
+ return -EINVAL;
+ }
+
+ if (mask->src || mask->dst)
+ *match_level = MLX5_MATCH_L4;
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) {
+ struct flow_dissector_key_tcp *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_TCP,
+ f->key);
+ struct flow_dissector_key_tcp *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_TCP,
+ f->mask);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
+ ntohs(mask->flags));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
+ ntohs(key->flags));
+
+ if (mask->flags)
+ *match_level = MLX5_MATCH_L4;
+ }
+
+ return 0;
+}
+
+static int parse_cls_flower(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec,
+ struct tc_cls_flower_offload *f)
+{
+ struct mlx5_core_dev *dev = priv->mdev;
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep;
+ u8 match_level;
+ int err;
+
+ err = __parse_cls_flower(priv, spec, f, &match_level);
+
+ if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {
+ rep = rpriv->rep;
+ if (rep->vport != FDB_UPLINK_VPORT &&
+ (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
+ esw->offloads.inline_mode < match_level)) {
+ netdev_warn(priv->netdev,
+ "Flow is not offloaded due to min inline setting, required %d actual %d\n",
+ match_level, esw->offloads.inline_mode);
+ return -EOPNOTSUPP;
+ }
+ }
+
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+ flow->esw_attr->match_level = match_level;
+ else
+ flow->nic_attr->match_level = match_level;
+
+ return err;
+}
+
+struct pedit_headers {
+ struct ethhdr eth;
+ struct iphdr ip4;
+ struct ipv6hdr ip6;
+ struct tcphdr tcp;
+ struct udphdr udp;
+};
+
+static int pedit_header_offsets[] = {
+ [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
+ [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
+ [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
+ [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
+ [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
+};
+
+#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
+
+static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
+ struct pedit_headers *masks,
+ struct pedit_headers *vals)
+{
+ u32 *curr_pmask, *curr_pval;
+
+ if (hdr_type >= __PEDIT_HDR_TYPE_MAX)
+ goto out_err;
+
+ curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset);
+ curr_pval = (u32 *)(pedit_header(vals, hdr_type) + offset);
+
+ if (*curr_pmask & mask) /* disallow acting twice on the same location */
+ goto out_err;
+
+ *curr_pmask |= mask;
+ *curr_pval |= (val & mask);
+
+ return 0;
+
+out_err:
+ return -EOPNOTSUPP;
+}
+
+struct mlx5_fields {
+ u8 field;
+ u8 size;
+ u32 offset;
+};
+
+#define OFFLOAD(fw_field, size, field, off) \
+ {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, offsetof(struct pedit_headers, field) + (off)}
+
+static struct mlx5_fields fields[] = {
+ OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0),
+ OFFLOAD(DMAC_15_0, 2, eth.h_dest[4], 0),
+ OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0),
+ OFFLOAD(SMAC_15_0, 2, eth.h_source[4], 0),
+ OFFLOAD(ETHERTYPE, 2, eth.h_proto, 0),
+
+ OFFLOAD(IP_TTL, 1, ip4.ttl, 0),
+ OFFLOAD(SIPV4, 4, ip4.saddr, 0),
+ OFFLOAD(DIPV4, 4, ip4.daddr, 0),
+
+ OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0),
+ OFFLOAD(SIPV6_95_64, 4, ip6.saddr.s6_addr32[1], 0),
+ OFFLOAD(SIPV6_63_32, 4, ip6.saddr.s6_addr32[2], 0),
+ OFFLOAD(SIPV6_31_0, 4, ip6.saddr.s6_addr32[3], 0),
+ OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0),
+ OFFLOAD(DIPV6_95_64, 4, ip6.daddr.s6_addr32[1], 0),
+ OFFLOAD(DIPV6_63_32, 4, ip6.daddr.s6_addr32[2], 0),
+ OFFLOAD(DIPV6_31_0, 4, ip6.daddr.s6_addr32[3], 0),
+ OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0),
+
+ OFFLOAD(TCP_SPORT, 2, tcp.source, 0),
+ OFFLOAD(TCP_DPORT, 2, tcp.dest, 0),
+ OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5),
+
+ OFFLOAD(UDP_SPORT, 2, udp.source, 0),
+ OFFLOAD(UDP_DPORT, 2, udp.dest, 0),
+};
+
+/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
+ * max from the SW pedit action. On success, attr->num_mod_hdr_actions
+ * says how many HW actions were actually parsed.
+ */
+static int offload_pedit_fields(struct pedit_headers *masks,
+ struct pedit_headers *vals,
+ struct mlx5e_tc_flow_parse_attr *parse_attr)
+{
+ struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
+ int i, action_size, nactions, max_actions, first, last, next_z;
+ void *s_masks_p, *a_masks_p, *vals_p;
+ struct mlx5_fields *f;
+ u8 cmd, field_bsize;
+ u32 s_mask, a_mask;
+ unsigned long mask;
+ __be32 mask_be32;
+ __be16 mask_be16;
+ void *action;
+
+ set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET];
+ add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD];
+ set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET];
+ add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD];
+
+ action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
+ action = parse_attr->mod_hdr_actions +
+ parse_attr->num_mod_hdr_actions * action_size;
+
+ max_actions = parse_attr->max_mod_hdr_actions;
+ nactions = parse_attr->num_mod_hdr_actions;
+
+ for (i = 0; i < ARRAY_SIZE(fields); i++) {
+ f = &fields[i];
+ /* avoid seeing bits set from previous iterations */
+ s_mask = 0;
+ a_mask = 0;
+
+ s_masks_p = (void *)set_masks + f->offset;
+ a_masks_p = (void *)add_masks + f->offset;
+
+ memcpy(&s_mask, s_masks_p, f->size);
+ memcpy(&a_mask, a_masks_p, f->size);
+
+ if (!s_mask && !a_mask) /* nothing to offload here */
+ continue;
+
+ if (s_mask && a_mask) {
+ printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
+ return -EOPNOTSUPP;
+ }
+
+ if (nactions == max_actions) {
+ printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions);
+ return -EOPNOTSUPP;
+ }
+
+ if (s_mask) {
+ cmd = MLX5_ACTION_TYPE_SET;
+ mask = s_mask;
+ vals_p = (void *)set_vals + f->offset;
+ /* clear to denote we consumed this field */
+ memset(s_masks_p, 0, f->size);
+ } else {
+ cmd = MLX5_ACTION_TYPE_ADD;
+ mask = a_mask;
+ vals_p = (void *)add_vals + f->offset;
+ /* clear to denote we consumed this field */
+ memset(a_masks_p, 0, f->size);
+ }
+
+ field_bsize = f->size * BITS_PER_BYTE;
+
+ if (field_bsize == 32) {
+ mask_be32 = *(__be32 *)&mask;
+ mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
+ } else if (field_bsize == 16) {
+ mask_be16 = *(__be16 *)&mask;
+ mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
+ }
+
+ first = find_first_bit(&mask, field_bsize);
+ next_z = find_next_zero_bit(&mask, field_bsize, first);
+ last = find_last_bit(&mask, field_bsize);
+ if (first < next_z && next_z < last) {
+ printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
+ mask);
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(set_action_in, action, action_type, cmd);
+ MLX5_SET(set_action_in, action, field, f->field);
+
+ if (cmd == MLX5_ACTION_TYPE_SET) {
+ MLX5_SET(set_action_in, action, offset, first);
+ /* length is num of bits to be written, zero means length of 32 */
+ MLX5_SET(set_action_in, action, length, (last - first + 1));
+ }
+
+ if (field_bsize == 32)
+ MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
+ else if (field_bsize == 16)
+ MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
+ else if (field_bsize == 8)
+ MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
+
+ action += action_size;
+ nactions++;
+ }
+
+ parse_attr->num_mod_hdr_actions = nactions;
+ return 0;
+}
+
+static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
+ const struct tc_action *a, int namespace,
+ struct mlx5e_tc_flow_parse_attr *parse_attr)
+{
+ int nkeys, action_size, max_actions;
+
+ nkeys = tcf_pedit_nkeys(a);
+ action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
+
+ if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
+ max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, max_modify_header_actions);
+ else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
+ max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, max_modify_header_actions);
+
+ /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
+ max_actions = min(max_actions, nkeys * 16);
+
+ parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL);
+ if (!parse_attr->mod_hdr_actions)
+ return -ENOMEM;
+
+ parse_attr->max_mod_hdr_actions = max_actions;
+ return 0;
+}
+
+static const struct pedit_headers zero_masks = {};
+
+static int parse_tc_pedit_action(struct mlx5e_priv *priv,
+ const struct tc_action *a, int namespace,
+ struct mlx5e_tc_flow_parse_attr *parse_attr)
+{
+ struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks;
+ int nkeys, i, err = -EOPNOTSUPP;
+ u32 mask, val, offset;
+ u8 cmd, htype;
+
+ nkeys = tcf_pedit_nkeys(a);
+
+ memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
+ memset(vals, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
+
+ for (i = 0; i < nkeys; i++) {
+ htype = tcf_pedit_htype(a, i);
+ cmd = tcf_pedit_cmd(a, i);
+ err = -EOPNOTSUPP; /* can't be all optimistic */
+
+ if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) {
+ netdev_warn(priv->netdev, "legacy pedit isn't offloaded\n");
+ goto out_err;
+ }
+
+ if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) {
+ netdev_warn(priv->netdev, "pedit cmd %d isn't offloaded\n", cmd);
+ goto out_err;
+ }
+
+ mask = tcf_pedit_mask(a, i);
+ val = tcf_pedit_val(a, i);
+ offset = tcf_pedit_offset(a, i);
+
+ err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]);
+ if (err)
+ goto out_err;
+ }
+
+ if (!parse_attr->mod_hdr_actions) {
+ err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr);
+ if (err)
+ goto out_err;
+ }
+
+ err = offload_pedit_fields(masks, vals, parse_attr);
+ if (err < 0)
+ goto out_dealloc_parsed_actions;
+
+ for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
+ cmd_masks = &masks[cmd];
+ if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
+ netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
+ print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
+ 16, 1, cmd_masks, sizeof(zero_masks), true);
+ err = -EOPNOTSUPP;
+ goto out_dealloc_parsed_actions;
+ }
+ }
+
+ return 0;
+
+out_dealloc_parsed_actions:
+ kfree(parse_attr->mod_hdr_actions);
+out_err:
+ return err;
+}
+
+static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 update_flags)
+{
+ u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
+ TCA_CSUM_UPDATE_FLAG_UDP;
+
+ /* The HW recalcs checksums only if re-writing headers */
+ if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
+ netdev_warn(priv->netdev,
+ "TC csum action is only offloaded with pedit\n");
+ return false;
+ }
+
+ if (update_flags & ~prot_flags) {
+ netdev_warn(priv->netdev,
+ "can't offload TC csum action for some header/s - flags %#x\n",
+ update_flags);
+ return false;
+ }
+
+ return true;
+}
+
+static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
+ struct tcf_exts *exts)
+{
+ const struct tc_action *a;
+ bool modify_ip_header;
+ LIST_HEAD(actions);
+ u8 htype, ip_proto;
+ void *headers_v;
+ u16 ethertype;
+ int nkeys, i;
+
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
+ ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
+
+ /* for non-IP we only re-write MACs, so we're okay */
+ if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
+ goto out_ok;
+
+ modify_ip_header = false;
+ tcf_exts_for_each_action(i, a, exts) {
+ int k;
+
+ if (!is_tcf_pedit(a))
+ continue;
+
+ nkeys = tcf_pedit_nkeys(a);
+ for (k = 0; k < nkeys; k++) {
+ htype = tcf_pedit_htype(a, k);
+ if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 ||
+ htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) {
+ modify_ip_header = true;
+ break;
+ }
+ }
+ }
+
+ ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
+ if (modify_ip_header && ip_proto != IPPROTO_TCP &&
+ ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
+ pr_info("can't offload re-write of ip proto %d\n", ip_proto);
+ return false;
+ }
+
+out_ok:
+ return true;
+}
+
+static bool actions_match_supported(struct mlx5e_priv *priv,
+ struct tcf_exts *exts,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow)
+{
+ u32 actions;
+
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+ actions = flow->esw_attr->action;
+ else
+ actions = flow->nic_attr->action;
+
+ if (flow->flags & MLX5E_TC_FLOW_EGRESS &&
+ !(actions & MLX5_FLOW_CONTEXT_ACTION_DECAP))
+ return false;
+
+ if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ return modify_header_match_supported(&parse_attr->spec, exts);
+
+ return true;
+}
+
+static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
+{
+ struct mlx5_core_dev *fmdev, *pmdev;
+ u64 fsystem_guid, psystem_guid;
+
+ fmdev = priv->mdev;
+ pmdev = peer_priv->mdev;
+
+ mlx5_query_nic_vport_system_image_guid(fmdev, &fsystem_guid);
+ mlx5_query_nic_vport_system_image_guid(pmdev, &psystem_guid);
+
+ return (fsystem_guid == psystem_guid);
+}
+
+static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_nic_flow_attr *attr = flow->nic_attr;
+ const struct tc_action *a;
+ LIST_HEAD(actions);
+ u32 action = 0;
+ int err, i;
+
+ if (!tcf_exts_has_actions(exts))
+ return -EINVAL;
+
+ attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+
+ tcf_exts_for_each_action(i, a, exts) {
+ if (is_tcf_gact_shot(a)) {
+ action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+ if (MLX5_CAP_FLOWTABLE(priv->mdev,
+ flow_table_properties_nic_receive.flow_counter))
+ action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ continue;
+ }
+
+ if (is_tcf_pedit(a)) {
+ err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL,
+ parse_attr);
+ if (err)
+ return err;
+
+ action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ continue;
+ }
+
+ if (is_tcf_csum(a)) {
+ if (csum_offload_supported(priv, action,
+ tcf_csum_update_flags(a)))
+ continue;
+
+ return -EOPNOTSUPP;
+ }
+
+ if (is_tcf_mirred_egress_redirect(a)) {
+ struct net_device *peer_dev = tcf_mirred_dev(a);
+
+ if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
+ same_hw_devs(priv, netdev_priv(peer_dev))) {
+ parse_attr->mirred_ifindex = peer_dev->ifindex;
+ flow->flags |= MLX5E_TC_FLOW_HAIRPIN;
+ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ } else {
+ netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
+ peer_dev->name);
+ return -EINVAL;
+ }
+ continue;
+ }
+
+ if (is_tcf_skbedit_mark(a)) {
+ u32 mark = tcf_skbedit_mark(a);
+
+ if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
+ netdev_warn(priv->netdev, "Bad flow mark - only 16 bit is supported: 0x%x\n",
+ mark);
+ return -EINVAL;
+ }
+
+ attr->flow_tag = mark;
+ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ continue;
+ }
+
+ return -EINVAL;
+ }
+
+ attr->action = action;
+ if (!actions_match_supported(priv, exts, parse_attr, flow))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static inline int cmp_encap_info(struct ip_tunnel_key *a,
+ struct ip_tunnel_key *b)
+{
+ return memcmp(a, b, sizeof(*a));
+}
+
+static inline int hash_encap_info(struct ip_tunnel_key *key)
+{
+ return jhash(key, sizeof(*key), 0);
+}
+
+static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct net_device **out_dev,
+ struct flowi4 *fl4,
+ struct neighbour **out_n,
+ u8 *out_ttl)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct rtable *rt;
+ struct neighbour *n = NULL;
+
+#if IS_ENABLED(CONFIG_INET)
+ int ret;
+
+ rt = ip_route_output_key(dev_net(mirred_dev), fl4);
+ ret = PTR_ERR_OR_ZERO(rt);
+ if (ret)
+ return ret;
+#else
+ return -EOPNOTSUPP;
+#endif
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ /* if the egress device isn't on the same HW e-switch, we use the uplink */
+ if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
+ *out_dev = uplink_rpriv->netdev;
+ else
+ *out_dev = rt->dst.dev;
+
+ if (!(*out_ttl))
+ *out_ttl = ip4_dst_hoplimit(&rt->dst);
+ n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
+ ip_rt_put(rt);
+ if (!n)
+ return -ENOMEM;
+
+ *out_n = n;
+ return 0;
+}
+
+static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
+ struct net_device *peer_netdev)
+{
+ struct mlx5e_priv *peer_priv;
+
+ peer_priv = netdev_priv(peer_netdev);
+
+ return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
+ (priv->netdev->netdev_ops == peer_netdev->netdev_ops) &&
+ same_hw_devs(priv, peer_priv) &&
+ MLX5_VPORT_MANAGER(peer_priv->mdev) &&
+ (peer_priv->mdev->priv.eswitch->mode == SRIOV_OFFLOADS));
+}
+
+static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct net_device **out_dev,
+ struct flowi6 *fl6,
+ struct neighbour **out_n,
+ u8 *out_ttl)
+{
+ struct neighbour *n = NULL;
+ struct dst_entry *dst;
+
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ struct mlx5e_rep_priv *uplink_rpriv;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, fl6,
+ NULL);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
+
+ if (!(*out_ttl))
+ *out_ttl = ip6_dst_hoplimit(dst);
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ /* if the egress device isn't on the same HW e-switch, we use the uplink */
+ if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
+ *out_dev = uplink_rpriv->netdev;
+ else
+ *out_dev = dst->dev;
+#else
+ return -EOPNOTSUPP;
+#endif
+
+ n = dst_neigh_lookup(dst, &fl6->daddr);
+ dst_release(dst);
+ if (!n)
+ return -ENOMEM;
+
+ *out_n = n;
+ return 0;
+}
+
+static void gen_vxlan_header_ipv4(struct net_device *out_dev,
+ char buf[], int encap_size,
+ unsigned char h_dest[ETH_ALEN],
+ u8 tos, u8 ttl,
+ __be32 daddr,
+ __be32 saddr,
+ __be16 udp_dst_port,
+ __be32 vx_vni)
+{
+ struct ethhdr *eth = (struct ethhdr *)buf;
+ struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
+ struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
+ struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
+
+ memset(buf, 0, encap_size);
+
+ ether_addr_copy(eth->h_dest, h_dest);
+ ether_addr_copy(eth->h_source, out_dev->dev_addr);
+ eth->h_proto = htons(ETH_P_IP);
+
+ ip->daddr = daddr;
+ ip->saddr = saddr;
+
+ ip->tos = tos;
+ ip->ttl = ttl;
+ ip->protocol = IPPROTO_UDP;
+ ip->version = 0x4;
+ ip->ihl = 0x5;
+
+ udp->dest = udp_dst_port;
+ vxh->vx_flags = VXLAN_HF_VNI;
+ vxh->vx_vni = vxlan_vni_field(vx_vni);
+}
+
+static void gen_vxlan_header_ipv6(struct net_device *out_dev,
+ char buf[], int encap_size,
+ unsigned char h_dest[ETH_ALEN],
+ u8 tos, u8 ttl,
+ struct in6_addr *daddr,
+ struct in6_addr *saddr,
+ __be16 udp_dst_port,
+ __be32 vx_vni)
+{
+ struct ethhdr *eth = (struct ethhdr *)buf;
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
+ struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
+ struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
+
+ memset(buf, 0, encap_size);
+
+ ether_addr_copy(eth->h_dest, h_dest);
+ ether_addr_copy(eth->h_source, out_dev->dev_addr);
+ eth->h_proto = htons(ETH_P_IPV6);
+
+ ip6_flow_hdr(ip6h, tos, 0);
+ /* the HW fills up ipv6 payload len */
+ ip6h->nexthdr = IPPROTO_UDP;
+ ip6h->hop_limit = ttl;
+ ip6h->daddr = *daddr;
+ ip6h->saddr = *saddr;
+
+ udp->dest = udp_dst_port;
+ vxh->vx_flags = VXLAN_HF_VNI;
+ vxh->vx_vni = vxlan_vni_field(vx_vni);
+}
+
+static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN;
+ struct ip_tunnel_key *tun_key = &e->tun_info.key;
+ struct net_device *out_dev;
+ struct neighbour *n = NULL;
+ struct flowi4 fl4 = {};
+ u8 nud_state, tos, ttl;
+ char *encap_header;
+ int err;
+
+ if (max_encap_size < ipv4_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv4_encap_size, max_encap_size);
+ return -EOPNOTSUPP;
+ }
+
+ encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
+ if (!encap_header)
+ return -ENOMEM;
+
+ switch (e->tunnel_type) {
+ case MLX5_HEADER_TYPE_VXLAN:
+ fl4.flowi4_proto = IPPROTO_UDP;
+ fl4.fl4_dport = tun_key->tp_dst;
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ goto free_encap;
+ }
+
+ tos = tun_key->tos;
+ ttl = tun_key->ttl;
+
+ fl4.flowi4_tos = tun_key->tos;
+ fl4.daddr = tun_key->u.ipv4.dst;
+ fl4.saddr = tun_key->u.ipv4.src;
+
+ err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev,
+ &fl4, &n, &ttl);
+ if (err)
+ goto free_encap;
+
+ /* used by mlx5e_detach_encap to lookup a neigh hash table
+ * entry in the neigh hash table when a user deletes a rule
+ */
+ e->m_neigh.dev = n->dev;
+ e->m_neigh.family = n->ops->family;
+ memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+ e->out_dev = out_dev;
+
+ /* It's importent to add the neigh to the hash table before checking
+ * the neigh validity state. So if we'll get a notification, in case the
+ * neigh changes it's validity state, we would find the relevant neigh
+ * in the hash.
+ */
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+ if (err)
+ goto free_encap;
+
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ ether_addr_copy(e->h_dest, n->ha);
+ read_unlock_bh(&n->lock);
+
+ switch (e->tunnel_type) {
+ case MLX5_HEADER_TYPE_VXLAN:
+ gen_vxlan_header_ipv4(out_dev, encap_header,
+ ipv4_encap_size, e->h_dest, tos, ttl,
+ fl4.daddr,
+ fl4.saddr, tun_key->tp_dst,
+ tunnel_id_to_key32(tun_key->tun_id));
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ goto destroy_neigh_entry;
+ }
+ e->encap_size = ipv4_encap_size;
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
+ err = -EAGAIN;
+ goto out;
+ }
+
+ err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
+ ipv4_encap_size, encap_header, &e->encap_id);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
+ neigh_release(n);
+ return err;
+
+destroy_neigh_entry:
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+free_encap:
+ kfree(encap_header);
+out:
+ if (n)
+ neigh_release(n);
+ return err;
+}
+
+static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
+ struct net_device *mirred_dev,
+ struct mlx5e_encap_entry *e)
+{
+ int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+ int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN;
+ struct ip_tunnel_key *tun_key = &e->tun_info.key;
+ struct net_device *out_dev = NULL;
+ struct neighbour *n = NULL;
+ struct flowi6 fl6 = {};
+ u8 nud_state, tos, ttl;
+ char *encap_header;
+ int err;
+
+ if (max_encap_size < ipv6_encap_size) {
+ mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
+ ipv6_encap_size, max_encap_size);
+ return -EOPNOTSUPP;
+ }
+
+ encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
+ if (!encap_header)
+ return -ENOMEM;
+
+ switch (e->tunnel_type) {
+ case MLX5_HEADER_TYPE_VXLAN:
+ fl6.flowi6_proto = IPPROTO_UDP;
+ fl6.fl6_dport = tun_key->tp_dst;
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ goto free_encap;
+ }
+
+ tos = tun_key->tos;
+ ttl = tun_key->ttl;
+
+ fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
+ fl6.daddr = tun_key->u.ipv6.dst;
+ fl6.saddr = tun_key->u.ipv6.src;
+
+ err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev,
+ &fl6, &n, &ttl);
+ if (err)
+ goto free_encap;
+
+ /* used by mlx5e_detach_encap to lookup a neigh hash table
+ * entry in the neigh hash table when a user deletes a rule
+ */
+ e->m_neigh.dev = n->dev;
+ e->m_neigh.family = n->ops->family;
+ memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+ e->out_dev = out_dev;
+
+ /* It's importent to add the neigh to the hash table before checking
+ * the neigh validity state. So if we'll get a notification, in case the
+ * neigh changes it's validity state, we would find the relevant neigh
+ * in the hash.
+ */
+ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
+ if (err)
+ goto free_encap;
+
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ ether_addr_copy(e->h_dest, n->ha);
+ read_unlock_bh(&n->lock);
+
+ switch (e->tunnel_type) {
+ case MLX5_HEADER_TYPE_VXLAN:
+ gen_vxlan_header_ipv6(out_dev, encap_header,
+ ipv6_encap_size, e->h_dest, tos, ttl,
+ &fl6.daddr,
+ &fl6.saddr, tun_key->tp_dst,
+ tunnel_id_to_key32(tun_key->tun_id));
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ goto destroy_neigh_entry;
+ }
+
+ e->encap_size = ipv6_encap_size;
+ e->encap_header = encap_header;
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
+ err = -EAGAIN;
+ goto out;
+ }
+
+ err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
+ ipv6_encap_size, encap_header, &e->encap_id);
+ if (err)
+ goto destroy_neigh_entry;
+
+ e->flags |= MLX5_ENCAP_ENTRY_VALID;
+ mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
+ neigh_release(n);
+ return err;
+
+destroy_neigh_entry:
+ mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
+free_encap:
+ kfree(encap_header);
+out:
+ if (n)
+ neigh_release(n);
+ return err;
+}
+
+static int mlx5e_attach_encap(struct mlx5e_priv *priv,
+ struct ip_tunnel_info *tun_info,
+ struct net_device *mirred_dev,
+ struct net_device **encap_dev,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ unsigned short family = ip_tunnel_info_af(tun_info);
+ struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ struct ip_tunnel_key *key = &tun_info->key;
+ struct mlx5e_encap_entry *e;
+ int tunnel_type, err = 0;
+ uintptr_t hash_key;
+ bool found = false;
+
+ /* udp dst port must be set */
+ if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
+ goto vxlan_encap_offload_err;
+
+ /* setting udp src port isn't supported */
+ if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) {
+vxlan_encap_offload_err:
+ netdev_warn(priv->netdev,
+ "must set udp dst port and not set udp src port\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) &&
+ MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
+ tunnel_type = MLX5_HEADER_TYPE_VXLAN;
+ } else {
+ netdev_warn(priv->netdev,
+ "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
+ return -EOPNOTSUPP;
+ }
+
+ hash_key = hash_encap_info(key);
+
+ hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
+ encap_hlist, hash_key) {
+ if (!cmp_encap_info(&e->tun_info.key, key)) {
+ found = true;
+ break;
+ }
+ }
+
+ /* must verify if encap is valid or not */
+ if (found)
+ goto attach_flow;
+
+ e = kzalloc(sizeof(*e), GFP_KERNEL);
+ if (!e)
+ return -ENOMEM;
+
+ e->tun_info = *tun_info;
+ e->tunnel_type = tunnel_type;
+ INIT_LIST_HEAD(&e->flows);
+
+ if (family == AF_INET)
+ err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e);
+ else if (family == AF_INET6)
+ err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e);
+
+ if (err && err != -EAGAIN)
+ goto out_err;
+
+ hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
+
+attach_flow:
+ list_add(&flow->encap, &e->flows);
+ *encap_dev = e->out_dev;
+ if (e->flags & MLX5_ENCAP_ENTRY_VALID)
+ attr->encap_id = e->encap_id;
+ else
+ err = -EAGAIN;
+
+ return err;
+
+out_err:
+ kfree(e);
+ return err;
+}
+
+static int parse_tc_vlan_action(struct mlx5e_priv *priv,
+ const struct tc_action *a,
+ struct mlx5_esw_flow_attr *attr,
+ u32 *action)
+{
+ u8 vlan_idx = attr->total_vlan;
+
+ if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
+ return -EOPNOTSUPP;
+
+ if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
+ if (vlan_idx) {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
+ MLX5_FS_VLAN_DEPTH))
+ return -EOPNOTSUPP;
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
+ } else {
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ }
+ } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
+ attr->vlan_vid[vlan_idx] = tcf_vlan_push_vid(a);
+ attr->vlan_prio[vlan_idx] = tcf_vlan_push_prio(a);
+ attr->vlan_proto[vlan_idx] = tcf_vlan_push_proto(a);
+ if (!attr->vlan_proto[vlan_idx])
+ attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
+
+ if (vlan_idx) {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
+ MLX5_FS_VLAN_DEPTH))
+ return -EOPNOTSUPP;
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+ } else {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
+ (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) ||
+ tcf_vlan_push_prio(a)))
+ return -EOPNOTSUPP;
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ }
+ } else { /* action is TCA_VLAN_ACT_MODIFY */
+ return -EOPNOTSUPP;
+ }
+
+ attr->total_vlan = vlan_idx + 1;
+
+ return 0;
+}
+
+static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct ip_tunnel_info *info = NULL;
+ const struct tc_action *a;
+ LIST_HEAD(actions);
+ bool encap = false;
+ u32 action = 0;
+ int err, i;
+
+ if (!tcf_exts_has_actions(exts))
+ return -EINVAL;
+
+ attr->in_rep = rpriv->rep;
+ attr->in_mdev = priv->mdev;
+
+ tcf_exts_for_each_action(i, a, exts) {
+ if (is_tcf_gact_shot(a)) {
+ action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ continue;
+ }
+
+ if (is_tcf_pedit(a)) {
+ err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB,
+ parse_attr);
+ if (err)
+ return err;
+
+ action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ attr->mirror_count = attr->out_count;
+ continue;
+ }
+
+ if (is_tcf_csum(a)) {
+ if (csum_offload_supported(priv, action,
+ tcf_csum_update_flags(a)))
+ continue;
+
+ return -EOPNOTSUPP;
+ }
+
+ if (is_tcf_mirred_egress_redirect(a) || is_tcf_mirred_egress_mirror(a)) {
+ struct mlx5e_priv *out_priv;
+ struct net_device *out_dev;
+
+ out_dev = tcf_mirred_dev(a);
+
+ if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
+ pr_err("can't support more than %d output ports, can't offload forwarding\n",
+ attr->out_count);
+ return -EOPNOTSUPP;
+ }
+
+ if (switchdev_port_same_parent_id(priv->netdev,
+ out_dev) ||
+ is_merged_eswitch_dev(priv, out_dev)) {
+ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ out_priv = netdev_priv(out_dev);
+ rpriv = out_priv->ppriv;
+ attr->out_rep[attr->out_count] = rpriv->rep;
+ attr->out_mdev[attr->out_count++] = out_priv->mdev;
+ } else if (encap) {
+ parse_attr->mirred_ifindex = out_dev->ifindex;
+ parse_attr->tun_info = *info;
+ attr->parse_attr = parse_attr;
+ action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ /* attr->out_rep is resolved when we handle encap */
+ } else {
+ pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
+ priv->netdev->name, out_dev->name);
+ return -EINVAL;
+ }
+ continue;
+ }
+
+ if (is_tcf_tunnel_set(a)) {
+ info = tcf_tunnel_info(a);
+ if (info)
+ encap = true;
+ else
+ return -EOPNOTSUPP;
+ attr->mirror_count = attr->out_count;
+ continue;
+ }
+
+ if (is_tcf_vlan(a)) {
+ err = parse_tc_vlan_action(priv, a, attr, &action);
+
+ if (err)
+ return err;
+
+ attr->mirror_count = attr->out_count;
+ continue;
+ }
+
+ if (is_tcf_tunnel_release(a)) {
+ action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ continue;
+ }
+
+ return -EINVAL;
+ }
+
+ attr->action = action;
+ if (!actions_match_supported(priv, exts, parse_attr, flow))
+ return -EOPNOTSUPP;
+
+ if (attr->out_count > 1 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
+ netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void get_flags(int flags, u8 *flow_flags)
+{
+ u8 __flow_flags = 0;
+
+ if (flags & MLX5E_TC_INGRESS)
+ __flow_flags |= MLX5E_TC_FLOW_INGRESS;
+ if (flags & MLX5E_TC_EGRESS)
+ __flow_flags |= MLX5E_TC_FLOW_EGRESS;
+
+ *flow_flags = __flow_flags;
+}
+
+static const struct rhashtable_params tc_ht_params = {
+ .head_offset = offsetof(struct mlx5e_tc_flow, node),
+ .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
+ .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
+ .automatic_shrinking = true,
+};
+
+static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ if (MLX5_VPORT_MANAGER(priv->mdev) && esw->mode == SRIOV_OFFLOADS) {
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ return &uplink_rpriv->tc_ht;
+ } else
+ return &priv->fs.tc.ht;
+}
+
+int mlx5e_configure_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f, int flags)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct mlx5e_tc_flow *flow;
+ int attr_size, err = 0;
+ u8 flow_flags = 0;
+
+ get_flags(flags, &flow_flags);
+
+ flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
+ if (flow) {
+ netdev_warn_once(priv->netdev, "flow cookie %lx already exists, ignoring\n", f->cookie);
+ return 0;
+ }
+
+ if (esw && esw->mode == SRIOV_OFFLOADS) {
+ flow_flags |= MLX5E_TC_FLOW_ESWITCH;
+ attr_size = sizeof(struct mlx5_esw_flow_attr);
+ } else {
+ flow_flags |= MLX5E_TC_FLOW_NIC;
+ attr_size = sizeof(struct mlx5_nic_flow_attr);
+ }
+
+ flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
+ parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
+ if (!parse_attr || !flow) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ flow->cookie = f->cookie;
+ flow->flags = flow_flags;
+ flow->priv = priv;
+
+ err = parse_cls_flower(priv, flow, &parse_attr->spec, f);
+ if (err < 0)
+ goto err_free;
+
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
+ err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow);
+ if (err < 0)
+ goto err_free;
+ flow->rule[0] = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow);
+ } else {
+ err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow);
+ if (err < 0)
+ goto err_free;
+ flow->rule[0] = mlx5e_tc_add_nic_flow(priv, parse_attr, flow);
+ }
+
+ if (IS_ERR(flow->rule[0])) {
+ err = PTR_ERR(flow->rule[0]);
+ if (err != -EAGAIN)
+ goto err_free;
+ }
+
+ if (err != -EAGAIN)
+ flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+
+ if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
+ !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP))
+ kvfree(parse_attr);
+
+ err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params);
+ if (err) {
+ mlx5e_tc_del_flow(priv, flow);
+ kfree(flow);
+ }
+
+ return err;
+
+err_free:
+ kvfree(parse_attr);
+ kfree(flow);
+ return err;
+}
+
+#define DIRECTION_MASK (MLX5E_TC_INGRESS | MLX5E_TC_EGRESS)
+#define FLOW_DIRECTION_MASK (MLX5E_TC_FLOW_INGRESS | MLX5E_TC_FLOW_EGRESS)
+
+static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
+{
+ if ((flow->flags & FLOW_DIRECTION_MASK) == (flags & DIRECTION_MASK))
+ return true;
+
+ return false;
+}
+
+int mlx5e_delete_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f, int flags)
+{
+ struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct mlx5e_tc_flow *flow;
+
+ flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
+ if (!flow || !same_flow_direction(flow, flags))
+ return -EINVAL;
+
+ rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
+
+ mlx5e_tc_del_flow(priv, flow);
+
+ kfree(flow);
+
+ return 0;
+}
+
+int mlx5e_stats_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f, int flags)
+{
+ struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct mlx5e_tc_flow *flow;
+ struct mlx5_fc *counter;
+ u64 bytes;
+ u64 packets;
+ u64 lastuse;
+
+ flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
+ if (!flow || !same_flow_direction(flow, flags))
+ return -EINVAL;
+
+ if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED))
+ return 0;
+
+ counter = mlx5_flow_rule_counter(flow->rule[0]);
+ if (!counter)
+ return 0;
+
+ mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
+
+ tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
+
+ return 0;
+}
+
+static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
+ struct mlx5e_priv *peer_priv)
+{
+ struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
+ struct mlx5e_hairpin_entry *hpe;
+ u16 peer_vhca_id;
+ int bkt;
+
+ if (!same_hw_devs(priv, peer_priv))
+ return;
+
+ peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
+
+ hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) {
+ if (hpe->peer_vhca_id == peer_vhca_id)
+ hpe->hp->pair->peer_gone = true;
+ }
+}
+
+static int mlx5e_tc_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct mlx5e_flow_steering *fs;
+ struct mlx5e_priv *peer_priv;
+ struct mlx5e_tc_table *tc;
+ struct mlx5e_priv *priv;
+
+ if (ndev->netdev_ops != &mlx5e_netdev_ops ||
+ event != NETDEV_UNREGISTER ||
+ ndev->reg_state == NETREG_REGISTERED)
+ return NOTIFY_DONE;
+
+ tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
+ fs = container_of(tc, struct mlx5e_flow_steering, tc);
+ priv = container_of(fs, struct mlx5e_priv, fs);
+ peer_priv = netdev_priv(ndev);
+ if (priv == peer_priv ||
+ !(priv->netdev->features & NETIF_F_HW_TC))
+ return NOTIFY_DONE;
+
+ mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
+
+ return NOTIFY_DONE;
+}
+
+int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_table *tc = &priv->fs.tc;
+ int err;
+
+ hash_init(tc->mod_hdr_tbl);
+ hash_init(tc->hairpin_tbl);
+
+ err = rhashtable_init(&tc->ht, &tc_ht_params);
+ if (err)
+ return err;
+
+ tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
+ if (register_netdevice_notifier(&tc->netdevice_nb)) {
+ tc->netdevice_nb.notifier_call = NULL;
+ mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
+ }
+
+ return err;
+}
+
+static void _mlx5e_tc_del_flow(void *ptr, void *arg)
+{
+ struct mlx5e_tc_flow *flow = ptr;
+ struct mlx5e_priv *priv = flow->priv;
+
+ mlx5e_tc_del_flow(priv, flow);
+ kfree(flow);
+}
+
+void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_table *tc = &priv->fs.tc;
+
+ if (tc->netdevice_nb.notifier_call)
+ unregister_netdevice_notifier(&tc->netdevice_nb);
+
+ rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
+
+ if (!IS_ERR_OR_NULL(tc->t)) {
+ mlx5_destroy_flow_table(tc->t);
+ tc->t = NULL;
+ }
+}
+
+int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
+{
+ return rhashtable_init(tc_ht, &tc_ht_params);
+}
+
+void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
+{
+ rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
+}
+
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv)
+{
+ struct rhashtable *tc_ht = get_tc_ht(priv);
+
+ return atomic_read(&tc_ht->nelems);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
new file mode 100644
index 000000000..49436bf3b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_EN_TC_H__
+#define __MLX5_EN_TC_H__
+
+#include <net/pkt_cls.h>
+
+#define MLX5E_TC_FLOW_ID_MASK 0x0000ffff
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+enum {
+ MLX5E_TC_INGRESS = BIT(0),
+ MLX5E_TC_EGRESS = BIT(1),
+ MLX5E_TC_LAST_EXPORTED_BIT = 1,
+};
+
+int mlx5e_tc_nic_init(struct mlx5e_priv *priv);
+void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv);
+
+int mlx5e_tc_esw_init(struct rhashtable *tc_ht);
+void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht);
+
+int mlx5e_configure_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f, int flags);
+int mlx5e_delete_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f, int flags);
+
+int mlx5e_stats_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f, int flags);
+
+struct mlx5e_encap_entry;
+void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e);
+void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e);
+
+struct mlx5e_neigh_hash_entry;
+void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
+
+int mlx5e_tc_num_filters(struct mlx5e_priv *priv);
+
+#else /* CONFIG_MLX5_ESWITCH */
+static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
+static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return 0; }
+#endif
+
+#endif /* __MLX5_EN_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
new file mode 100644
index 000000000..52d3989bb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -0,0 +1,730 @@
+/*
+ * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/tcp.h>
+#include <linux/if_vlan.h>
+#include <net/dsfield.h>
+#include "en.h"
+#include "ipoib/ipoib.h"
+#include "en_accel/en_accel.h"
+#include "lib/clock.h"
+
+#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS
+
+#ifndef CONFIG_MLX5_EN_TLS
+#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
+ MLX5E_SQ_NOPS_ROOM)
+#else
+/* TLS offload requires MLX5E_SQ_STOP_ROOM to have
+ * enough room for a resync SKB, a normal SKB and a NOP
+ */
+#define MLX5E_SQ_STOP_ROOM (2 * MLX5_SEND_WQE_MAX_WQEBBS +\
+ MLX5E_SQ_NOPS_ROOM)
+#endif
+
+static inline void mlx5e_tx_dma_unmap(struct device *pdev,
+ struct mlx5e_sq_dma *dma)
+{
+ switch (dma->type) {
+ case MLX5E_DMA_MAP_SINGLE:
+ dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+ break;
+ case MLX5E_DMA_MAP_PAGE:
+ dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+ break;
+ default:
+ WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
+ }
+}
+
+static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i)
+{
+ return &sq->db.dma_fifo[i & sq->dma_fifo_mask];
+}
+
+static inline void mlx5e_dma_push(struct mlx5e_txqsq *sq,
+ dma_addr_t addr,
+ u32 size,
+ enum mlx5e_dma_map_type map_type)
+{
+ struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++);
+
+ dma->addr = addr;
+ dma->size = size;
+ dma->type = map_type;
+}
+
+static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
+{
+ int i;
+
+ for (i = 0; i < num_dma; i++) {
+ struct mlx5e_sq_dma *last_pushed_dma =
+ mlx5e_dma_get(sq, --sq->dma_fifo_pc);
+
+ mlx5e_tx_dma_unmap(sq->pdev, last_pushed_dma);
+ }
+}
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb)
+{
+ int dscp_cp = 0;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+
+ return priv->dcbx_dp.dscp2prio[dscp_cp];
+}
+#endif
+
+u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev,
+ select_queue_fallback_t fallback)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int channel_ix = fallback(dev, skb, NULL);
+ u16 num_channels;
+ int up = 0;
+
+ if (!netdev_get_num_tc(dev))
+ return channel_ix;
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP)
+ up = mlx5e_get_dscp_up(priv, skb);
+ else
+#endif
+ if (skb_vlan_tag_present(skb))
+ up = skb->vlan_tci >> VLAN_PRIO_SHIFT;
+
+ /* channel_ix can be larger than num_channels since
+ * dev->num_real_tx_queues = num_channels * num_tc
+ */
+ num_channels = priv->channels.params.num_channels;
+ if (channel_ix >= num_channels)
+ channel_ix = reciprocal_scale(channel_ix, num_channels);
+
+ return priv->channel_tc2txq[channel_ix][up];
+}
+
+static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb)
+{
+#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
+
+ return max(skb_network_offset(skb), MLX5E_MIN_INLINE);
+}
+
+static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb)
+{
+ struct flow_keys keys;
+
+ if (skb_transport_header_was_set(skb))
+ return skb_transport_offset(skb);
+ else if (skb_flow_dissect_flow_keys(skb, &keys, 0))
+ return keys.control.thoff;
+ else
+ return mlx5e_skb_l2_header_offset(skb);
+}
+
+static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
+ struct sk_buff *skb)
+{
+ u16 hlen;
+
+ switch (mode) {
+ case MLX5_INLINE_MODE_NONE:
+ return 0;
+ case MLX5_INLINE_MODE_TCP_UDP:
+ hlen = eth_get_headlen(skb->data, skb_headlen(skb));
+ if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb))
+ hlen += VLAN_HLEN;
+ break;
+ case MLX5_INLINE_MODE_IP:
+ /* When transport header is set to zero, it means no transport
+ * header. When transport header is set to 0xff's, it means
+ * transport header wasn't set.
+ */
+ if (skb_transport_offset(skb)) {
+ hlen = mlx5e_skb_l3_header_offset(skb);
+ break;
+ }
+ /* fall through */
+ case MLX5_INLINE_MODE_L2:
+ default:
+ hlen = mlx5e_skb_l2_header_offset(skb);
+ }
+ return min_t(u16, hlen, skb_headlen(skb));
+}
+
+static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs)
+{
+ struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start;
+ int cpy1_sz = 2 * ETH_ALEN;
+ int cpy2_sz = ihs - cpy1_sz;
+
+ memcpy(vhdr, skb->data, cpy1_sz);
+ vhdr->h_vlan_proto = skb->vlan_proto;
+ vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb));
+ memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz);
+}
+
+static inline void
+mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
+{
+ if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
+ if (skb->encapsulation) {
+ eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM |
+ MLX5_ETH_WQE_L4_INNER_CSUM;
+ sq->stats->csum_partial_inner++;
+ } else {
+ eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
+ sq->stats->csum_partial++;
+ }
+ } else
+ sq->stats->csum_none++;
+}
+
+static inline u16
+mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb)
+{
+ struct mlx5e_sq_stats *stats = sq->stats;
+ u16 ihs;
+
+ if (skb->encapsulation) {
+ ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb);
+ stats->tso_inner_packets++;
+ stats->tso_inner_bytes += skb->len - ihs;
+ } else {
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+ ihs = skb_transport_offset(skb) + sizeof(struct udphdr);
+ else
+ ihs = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ stats->tso_packets++;
+ stats->tso_bytes += skb->len - ihs;
+ }
+
+ return ihs;
+}
+
+static inline int
+mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ unsigned char *skb_data, u16 headlen,
+ struct mlx5_wqe_data_seg *dseg)
+{
+ dma_addr_t dma_addr = 0;
+ u8 num_dma = 0;
+ int i;
+
+ if (headlen) {
+ dma_addr = dma_map_single(sq->pdev, skb_data, headlen,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
+ goto dma_unmap_wqe_err;
+
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->lkey = sq->mkey_be;
+ dseg->byte_count = cpu_to_be32(headlen);
+
+ mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE);
+ num_dma++;
+ dseg++;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
+ int fsz = skb_frag_size(frag);
+
+ dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
+ goto dma_unmap_wqe_err;
+
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->lkey = sq->mkey_be;
+ dseg->byte_count = cpu_to_be32(fsz);
+
+ mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE);
+ num_dma++;
+ dseg++;
+ }
+
+ return num_dma;
+
+dma_unmap_wqe_err:
+ mlx5e_dma_unmap_wqe_err(sq, num_dma);
+ return -ENOMEM;
+}
+
+static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
+ struct mlx5_wq_cyc *wq,
+ u16 pi, u16 nnops)
+{
+ struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
+
+ edge_wi = wi + nnops;
+
+ /* fill sq frag edge with nops to avoid wqe wrapping two pages */
+ for (; wi < edge_wi; wi++) {
+ wi->skb = NULL;
+ wi->num_wqebbs = 1;
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+ sq->stats->nop += nnops;
+}
+
+static inline void
+mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ u8 opcode, u16 ds_cnt, u8 num_wqebbs, u32 num_bytes, u8 num_dma,
+ struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+
+ wi->num_bytes = num_bytes;
+ wi->num_dma = num_dma;
+ wi->num_wqebbs = num_wqebbs;
+ wi->skb = skb;
+
+ cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+
+ netdev_tx_sent_queue(sq->txq, num_bytes);
+
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
+ sq->pc += wi->num_wqebbs;
+ if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM))) {
+ netif_tx_stop_queue(sq->txq);
+ sq->stats->stopped++;
+ }
+
+ if (!skb->xmit_more || netif_xmit_stopped(sq->txq))
+ mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
+}
+
+#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
+
+netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5e_tx_wqe *wqe, u16 pi)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5_wqe_eth_seg *eseg;
+ struct mlx5_wqe_data_seg *dseg;
+ struct mlx5e_tx_wqe_info *wi;
+
+ struct mlx5e_sq_stats *stats = sq->stats;
+ u16 headlen, ihs, contig_wqebbs_room;
+ u16 ds_cnt, ds_cnt_inl = 0;
+ u8 num_wqebbs, opcode;
+ u32 num_bytes;
+ int num_dma;
+ __be16 mss;
+
+ /* Calc ihs and ds cnt, no writes to wqe yet */
+ ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
+ if (skb_is_gso(skb)) {
+ opcode = MLX5_OPCODE_LSO;
+ mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
+ ihs = mlx5e_tx_get_gso_ihs(sq, skb);
+ num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
+ stats->packets += skb_shinfo(skb)->gso_segs;
+ } else {
+ opcode = MLX5_OPCODE_SEND;
+ mss = 0;
+ ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb);
+ num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
+ stats->packets++;
+ }
+
+ stats->bytes += num_bytes;
+ stats->xmit_more += skb->xmit_more;
+
+ headlen = skb->len - ihs - skb->data_len;
+ ds_cnt += !!headlen;
+ ds_cnt += skb_shinfo(skb)->nr_frags;
+
+ if (ihs) {
+ ihs += !!skb_vlan_tag_present(skb) * VLAN_HLEN;
+
+ ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS);
+ ds_cnt += ds_cnt_inl;
+ }
+
+ num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+ contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+#ifdef CONFIG_MLX5_EN_IPSEC
+ struct mlx5_wqe_eth_seg cur_eth = wqe->eth;
+#endif
+ mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
+ mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
+#ifdef CONFIG_MLX5_EN_IPSEC
+ wqe->eth = cur_eth;
+#endif
+ }
+
+ /* fill wqe */
+ wi = &sq->db.wqe_info[pi];
+ cseg = &wqe->ctrl;
+ eseg = &wqe->eth;
+ dseg = wqe->data;
+
+ mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
+
+ eseg->mss = mss;
+
+ if (ihs) {
+ eseg->inline_hdr.sz = cpu_to_be16(ihs);
+ if (skb_vlan_tag_present(skb)) {
+ ihs -= VLAN_HLEN;
+ mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs);
+ stats->added_vlan_packets++;
+ } else {
+ memcpy(eseg->inline_hdr.start, skb->data, ihs);
+ }
+ dseg += ds_cnt_inl;
+ } else if (skb_vlan_tag_present(skb)) {
+ eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN);
+ if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD))
+ eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN);
+ eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb));
+ stats->added_vlan_packets++;
+ }
+
+ num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg);
+ if (unlikely(num_dma < 0))
+ goto err_drop;
+
+ mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
+ num_dma, wi, cseg);
+
+ return NETDEV_TX_OK;
+
+err_drop:
+ stats->dropped++;
+ dev_kfree_skb_any(skb);
+
+ return NETDEV_TX_OK;
+}
+
+netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_tx_wqe *wqe;
+ struct mlx5e_txqsq *sq;
+ u16 pi;
+
+ sq = priv->txq2sq[skb_get_queue_mapping(skb)];
+ mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
+
+ /* might send skbs and update wqe and pi */
+ skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi);
+ if (unlikely(!skb))
+ return NETDEV_TX_OK;
+
+ return mlx5e_sq_xmit(sq, skb, wqe, pi);
+}
+
+static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq,
+ struct mlx5_err_cqe *err_cqe)
+{
+ struct mlx5_cqwq *wq = &sq->cq.wq;
+ u32 ci;
+
+ ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1);
+
+ netdev_err(sq->channel->netdev,
+ "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
+ sq->cq.mcq.cqn, ci, sq->sqn, err_cqe->syndrome,
+ err_cqe->vendor_err_synd);
+ mlx5_dump_err_cqe(sq->cq.mdev, err_cqe);
+}
+
+bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
+{
+ struct mlx5e_sq_stats *stats;
+ struct mlx5e_txqsq *sq;
+ struct mlx5_cqe64 *cqe;
+ u32 dma_fifo_cc;
+ u32 nbytes;
+ u16 npkts;
+ u16 sqcc;
+ int i;
+
+ sq = container_of(cq, struct mlx5e_txqsq, cq);
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+ return false;
+
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
+ if (!cqe)
+ return false;
+
+ stats = sq->stats;
+
+ npkts = 0;
+ nbytes = 0;
+
+ /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+ * otherwise a cq overrun may occur
+ */
+ sqcc = sq->cc;
+
+ /* avoid dirtying sq cache line every cqe */
+ dma_fifo_cc = sq->dma_fifo_cc;
+
+ i = 0;
+ do {
+ u16 wqe_counter;
+ bool last_wqe;
+
+ mlx5_cqwq_pop(&cq->wq);
+
+ wqe_counter = be16_to_cpu(cqe->wqe_counter);
+
+ if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) {
+ if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
+ &sq->state)) {
+ mlx5e_dump_error_cqe(sq,
+ (struct mlx5_err_cqe *)cqe);
+ queue_work(cq->channel->priv->wq,
+ &sq->recover.recover_work);
+ }
+ stats->cqe_err++;
+ }
+
+ do {
+ struct mlx5e_tx_wqe_info *wi;
+ struct sk_buff *skb;
+ u16 ci;
+ int j;
+
+ last_wqe = (sqcc == wqe_counter);
+
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
+ wi = &sq->db.wqe_info[ci];
+ skb = wi->skb;
+
+ if (unlikely(!skb)) { /* nop */
+ sqcc++;
+ continue;
+ }
+
+ if (unlikely(skb_shinfo(skb)->tx_flags &
+ SKBTX_HW_TSTAMP)) {
+ struct skb_shared_hwtstamps hwts = {};
+
+ hwts.hwtstamp =
+ mlx5_timecounter_cyc2time(sq->clock,
+ get_cqe_ts(cqe));
+ skb_tstamp_tx(skb, &hwts);
+ }
+
+ for (j = 0; j < wi->num_dma; j++) {
+ struct mlx5e_sq_dma *dma =
+ mlx5e_dma_get(sq, dma_fifo_cc++);
+
+ mlx5e_tx_dma_unmap(sq->pdev, dma);
+ }
+
+ npkts++;
+ nbytes += wi->num_bytes;
+ sqcc += wi->num_wqebbs;
+ napi_consume_skb(skb, napi_budget);
+ } while (!last_wqe);
+
+ } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+
+ stats->cqes += i;
+
+ mlx5_cqwq_update_db_record(&cq->wq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ sq->dma_fifo_cc = dma_fifo_cc;
+ sq->cc = sqcc;
+
+ netdev_tx_completed_queue(sq->txq, npkts, nbytes);
+
+ if (netif_tx_queue_stopped(sq->txq) &&
+ mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
+ MLX5E_SQ_STOP_ROOM) &&
+ !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
+ netif_tx_wake_queue(sq->txq);
+ stats->wake++;
+ }
+
+ return (i == MLX5E_TX_CQ_POLL_BUDGET);
+}
+
+void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_tx_wqe_info *wi;
+ u32 nbytes = 0;
+ u16 ci, npkts = 0;
+ struct sk_buff *skb;
+ int i;
+
+ while (sq->cc != sq->pc) {
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
+ wi = &sq->db.wqe_info[ci];
+ skb = wi->skb;
+
+ if (!skb) { /* nop */
+ sq->cc++;
+ continue;
+ }
+
+ for (i = 0; i < wi->num_dma; i++) {
+ struct mlx5e_sq_dma *dma =
+ mlx5e_dma_get(sq, sq->dma_fifo_cc++);
+
+ mlx5e_tx_dma_unmap(sq->pdev, dma);
+ }
+
+ dev_kfree_skb_any(skb);
+ npkts++;
+ nbytes += wi->num_bytes;
+ sq->cc += wi->num_wqebbs;
+ }
+ netdev_tx_completed_queue(sq->txq, npkts, nbytes);
+}
+
+#ifdef CONFIG_MLX5_CORE_IPOIB
+static inline void
+mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey,
+ struct mlx5_wqe_datagram_seg *dseg)
+{
+ memcpy(&dseg->av, av, sizeof(struct mlx5_av));
+ dseg->av.dqp_dct = cpu_to_be32(dqpn | MLX5_EXTENDED_UD_AV);
+ dseg->av.key.qkey.qkey = cpu_to_be32(dqkey);
+}
+
+netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_av *av, u32 dqpn, u32 dqkey)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ struct mlx5i_tx_wqe *wqe;
+
+ struct mlx5_wqe_datagram_seg *datagram;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5_wqe_eth_seg *eseg;
+ struct mlx5_wqe_data_seg *dseg;
+ struct mlx5e_tx_wqe_info *wi;
+
+ struct mlx5e_sq_stats *stats = sq->stats;
+ u16 headlen, ihs, pi, contig_wqebbs_room;
+ u16 ds_cnt, ds_cnt_inl = 0;
+ u8 num_wqebbs, opcode;
+ u32 num_bytes;
+ int num_dma;
+ __be16 mss;
+
+ /* Calc ihs and ds cnt, no writes to wqe yet */
+ ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
+ if (skb_is_gso(skb)) {
+ opcode = MLX5_OPCODE_LSO;
+ mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
+ ihs = mlx5e_tx_get_gso_ihs(sq, skb);
+ num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
+ stats->packets += skb_shinfo(skb)->gso_segs;
+ } else {
+ opcode = MLX5_OPCODE_SEND;
+ mss = 0;
+ ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb);
+ num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
+ stats->packets++;
+ }
+
+ stats->bytes += num_bytes;
+ stats->xmit_more += skb->xmit_more;
+
+ headlen = skb->len - ihs - skb->data_len;
+ ds_cnt += !!headlen;
+ ds_cnt += skb_shinfo(skb)->nr_frags;
+
+ if (ihs) {
+ ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS);
+ ds_cnt += ds_cnt_inl;
+ }
+
+ num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+ mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ }
+
+ mlx5i_sq_fetch_wqe(sq, &wqe, pi);
+
+ /* fill wqe */
+ wi = &sq->db.wqe_info[pi];
+ cseg = &wqe->ctrl;
+ datagram = &wqe->datagram;
+ eseg = &wqe->eth;
+ dseg = wqe->data;
+
+ mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram);
+
+ mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
+
+ eseg->mss = mss;
+
+ if (ihs) {
+ memcpy(eseg->inline_hdr.start, skb->data, ihs);
+ eseg->inline_hdr.sz = cpu_to_be16(ihs);
+ dseg += ds_cnt_inl;
+ }
+
+ num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg);
+ if (unlikely(num_dma < 0))
+ goto err_drop;
+
+ mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
+ num_dma, wi, cseg);
+
+ return NETDEV_TX_OK;
+
+err_drop:
+ stats->dropped++;
+ dev_kfree_skb_any(skb);
+
+ return NETDEV_TX_OK;
+}
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
new file mode 100644
index 000000000..85d517360
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/irq.h>
+#include "en.h"
+#include "en/xdp.h"
+
+static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
+{
+ int current_cpu = smp_processor_id();
+ const struct cpumask *aff;
+ struct irq_data *idata;
+
+ idata = irq_desc_get_irq_data(c->irq_desc);
+ aff = irq_data_get_affinity_mask(idata);
+ return cpumask_test_cpu(current_cpu, aff);
+}
+
+static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_sq_stats *stats = sq->stats;
+ struct net_dim_sample dim_sample;
+
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state)))
+ return;
+
+ net_dim_sample(sq->cq.event_ctr, stats->packets, stats->bytes,
+ &dim_sample);
+ net_dim(&sq->dim, dim_sample);
+}
+
+static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
+{
+ struct mlx5e_rq_stats *stats = rq->stats;
+ struct net_dim_sample dim_sample;
+
+ if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state)))
+ return;
+
+ net_dim_sample(rq->cq.event_ctr, stats->packets, stats->bytes,
+ &dim_sample);
+ net_dim(&rq->dim, dim_sample);
+}
+
+int mlx5e_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
+ napi);
+ struct mlx5e_ch_stats *ch_stats = c->stats;
+ bool busy = false;
+ int work_done = 0;
+ int i;
+
+ ch_stats->poll++;
+
+ for (i = 0; i < c->num_tc; i++)
+ busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget);
+
+ busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
+
+ if (c->xdp)
+ busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq);
+
+ if (likely(budget)) { /* budget=0 means: don't poll rx rings */
+ work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget);
+ busy |= work_done == budget;
+ }
+
+ busy |= c->rq.post_wqes(&c->rq);
+
+ if (busy) {
+ if (likely(mlx5e_channel_no_affinity_change(c)))
+ return budget;
+ ch_stats->aff_change++;
+ if (budget && work_done == budget)
+ work_done--;
+ }
+
+ if (unlikely(!napi_complete_done(napi, work_done)))
+ return work_done;
+
+ ch_stats->arm++;
+
+ for (i = 0; i < c->num_tc; i++) {
+ mlx5e_handle_tx_dim(&c->sq[i]);
+ mlx5e_cq_arm(&c->sq[i].cq);
+ }
+
+ mlx5e_handle_rx_dim(&c->rq);
+
+ mlx5e_cq_arm(&c->rq.cq);
+ mlx5e_cq_arm(&c->icosq.cq);
+ mlx5e_cq_arm(&c->xdpsq.cq);
+
+ return work_done;
+}
+
+void mlx5e_completion_event(struct mlx5_core_cq *mcq)
+{
+ struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
+
+ napi_schedule(cq->napi);
+ cq->event_ctr++;
+ cq->channel->stats->events++;
+}
+
+void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event)
+{
+ struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
+ struct mlx5e_channel *c = cq->channel;
+ struct net_device *netdev = c->netdev;
+
+ netdev_err(netdev, "%s: cqn=0x%.6x event=0x%.2x\n",
+ __func__, mcq->cqn, event);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
new file mode 100644
index 000000000..c1e1a16a9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -0,0 +1,991 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
+#include "mlx5_core.h"
+#include "fpga/core.h"
+#include "eswitch.h"
+#include "lib/clock.h"
+#include "diag/fw_tracer.h"
+
+enum {
+ MLX5_EQE_SIZE = sizeof(struct mlx5_eqe),
+ MLX5_EQE_OWNER_INIT_VAL = 0x1,
+};
+
+enum {
+ MLX5_EQ_STATE_ARMED = 0x9,
+ MLX5_EQ_STATE_FIRED = 0xa,
+ MLX5_EQ_STATE_ALWAYS_ARMED = 0xb,
+};
+
+enum {
+ MLX5_NUM_SPARE_EQE = 0x80,
+ MLX5_NUM_ASYNC_EQE = 0x1000,
+ MLX5_NUM_CMD_EQE = 32,
+ MLX5_NUM_PF_DRAIN = 64,
+};
+
+enum {
+ MLX5_EQ_DOORBEL_OFFSET = 0x40,
+};
+
+#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
+ (1ull << MLX5_EVENT_TYPE_COMM_EST) | \
+ (1ull << MLX5_EVENT_TYPE_SQ_DRAINED) | \
+ (1ull << MLX5_EVENT_TYPE_CQ_ERROR) | \
+ (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR) | \
+ (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED) | \
+ (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
+ (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR) | \
+ (1ull << MLX5_EVENT_TYPE_PORT_CHANGE) | \
+ (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR) | \
+ (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \
+ (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
+
+struct map_eq_in {
+ u64 mask;
+ u32 reserved;
+ u32 unmap_eqn;
+};
+
+struct cre_des_eq {
+ u8 reserved[15];
+ u8 eqn;
+};
+
+static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
+{
+ u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {0};
+
+ MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
+ MLX5_SET(destroy_eq_in, in, eq_number, eqn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
+{
+ return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
+}
+
+static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
+{
+ struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
+
+ return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
+}
+
+static const char *eqe_type_str(u8 type)
+{
+ switch (type) {
+ case MLX5_EVENT_TYPE_COMP:
+ return "MLX5_EVENT_TYPE_COMP";
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ return "MLX5_EVENT_TYPE_PATH_MIG";
+ case MLX5_EVENT_TYPE_COMM_EST:
+ return "MLX5_EVENT_TYPE_COMM_EST";
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ return "MLX5_EVENT_TYPE_SQ_DRAINED";
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ return "MLX5_EVENT_TYPE_CQ_ERROR";
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
+ case MLX5_EVENT_TYPE_INTERNAL_ERROR:
+ return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ return "MLX5_EVENT_TYPE_PORT_CHANGE";
+ case MLX5_EVENT_TYPE_GPIO_EVENT:
+ return "MLX5_EVENT_TYPE_GPIO_EVENT";
+ case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+ return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
+ case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
+ return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
+ case MLX5_EVENT_TYPE_REMOTE_CONFIG:
+ return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
+ case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
+ return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
+ case MLX5_EVENT_TYPE_STALL_EVENT:
+ return "MLX5_EVENT_TYPE_STALL_EVENT";
+ case MLX5_EVENT_TYPE_CMD:
+ return "MLX5_EVENT_TYPE_CMD";
+ case MLX5_EVENT_TYPE_PAGE_REQUEST:
+ return "MLX5_EVENT_TYPE_PAGE_REQUEST";
+ case MLX5_EVENT_TYPE_PAGE_FAULT:
+ return "MLX5_EVENT_TYPE_PAGE_FAULT";
+ case MLX5_EVENT_TYPE_PPS_EVENT:
+ return "MLX5_EVENT_TYPE_PPS_EVENT";
+ case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
+ return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
+ case MLX5_EVENT_TYPE_FPGA_ERROR:
+ return "MLX5_EVENT_TYPE_FPGA_ERROR";
+ case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
+ return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
+ case MLX5_EVENT_TYPE_GENERAL_EVENT:
+ return "MLX5_EVENT_TYPE_GENERAL_EVENT";
+ case MLX5_EVENT_TYPE_DEVICE_TRACER:
+ return "MLX5_EVENT_TYPE_DEVICE_TRACER";
+ default:
+ return "Unrecognized event";
+ }
+}
+
+static enum mlx5_dev_event port_subtype_event(u8 subtype)
+{
+ switch (subtype) {
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ return MLX5_DEV_EVENT_PORT_DOWN;
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ return MLX5_DEV_EVENT_PORT_UP;
+ case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
+ return MLX5_DEV_EVENT_PORT_INITIALIZED;
+ case MLX5_PORT_CHANGE_SUBTYPE_LID:
+ return MLX5_DEV_EVENT_LID_CHANGE;
+ case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
+ return MLX5_DEV_EVENT_PKEY_CHANGE;
+ case MLX5_PORT_CHANGE_SUBTYPE_GUID:
+ return MLX5_DEV_EVENT_GUID_CHANGE;
+ case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
+ return MLX5_DEV_EVENT_CLIENT_REREG;
+ }
+ return -1;
+}
+
+static void eq_update_ci(struct mlx5_eq *eq, int arm)
+{
+ __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
+ u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+ __raw_writel((__force u32)cpu_to_be32(val), addr);
+ /* We still want ordering, just not swabbing, so add a barrier */
+ mb();
+}
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+static void eqe_pf_action(struct work_struct *work)
+{
+ struct mlx5_pagefault *pfault = container_of(work,
+ struct mlx5_pagefault,
+ work);
+ struct mlx5_eq *eq = pfault->eq;
+
+ mlx5_core_page_fault(eq->dev, pfault);
+ mempool_free(pfault, eq->pf_ctx.pool);
+}
+
+static void eq_pf_process(struct mlx5_eq *eq)
+{
+ struct mlx5_core_dev *dev = eq->dev;
+ struct mlx5_eqe_page_fault *pf_eqe;
+ struct mlx5_pagefault *pfault;
+ struct mlx5_eqe *eqe;
+ int set_ci = 0;
+
+ while ((eqe = next_eqe_sw(eq))) {
+ pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC);
+ if (!pfault) {
+ schedule_work(&eq->pf_ctx.work);
+ break;
+ }
+
+ dma_rmb();
+ pf_eqe = &eqe->data.page_fault;
+ pfault->event_subtype = eqe->sub_type;
+ pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
+
+ mlx5_core_dbg(dev,
+ "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
+ eqe->sub_type, pfault->bytes_committed);
+
+ switch (eqe->sub_type) {
+ case MLX5_PFAULT_SUBTYPE_RDMA:
+ /* RDMA based event */
+ pfault->type =
+ be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
+ pfault->token =
+ be32_to_cpu(pf_eqe->rdma.pftype_token) &
+ MLX5_24BIT_MASK;
+ pfault->rdma.r_key =
+ be32_to_cpu(pf_eqe->rdma.r_key);
+ pfault->rdma.packet_size =
+ be16_to_cpu(pf_eqe->rdma.packet_length);
+ pfault->rdma.rdma_op_len =
+ be32_to_cpu(pf_eqe->rdma.rdma_op_len);
+ pfault->rdma.rdma_va =
+ be64_to_cpu(pf_eqe->rdma.rdma_va);
+ mlx5_core_dbg(dev,
+ "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
+ pfault->type, pfault->token,
+ pfault->rdma.r_key);
+ mlx5_core_dbg(dev,
+ "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
+ pfault->rdma.rdma_op_len,
+ pfault->rdma.rdma_va);
+ break;
+
+ case MLX5_PFAULT_SUBTYPE_WQE:
+ /* WQE based event */
+ pfault->type =
+ (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
+ pfault->token =
+ be32_to_cpu(pf_eqe->wqe.token);
+ pfault->wqe.wq_num =
+ be32_to_cpu(pf_eqe->wqe.pftype_wq) &
+ MLX5_24BIT_MASK;
+ pfault->wqe.wqe_index =
+ be16_to_cpu(pf_eqe->wqe.wqe_index);
+ pfault->wqe.packet_size =
+ be16_to_cpu(pf_eqe->wqe.packet_length);
+ mlx5_core_dbg(dev,
+ "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
+ pfault->type, pfault->token,
+ pfault->wqe.wq_num,
+ pfault->wqe.wqe_index);
+ break;
+
+ default:
+ mlx5_core_warn(dev,
+ "Unsupported page fault event sub-type: 0x%02hhx\n",
+ eqe->sub_type);
+ /* Unsupported page faults should still be
+ * resolved by the page fault handler
+ */
+ }
+
+ pfault->eq = eq;
+ INIT_WORK(&pfault->work, eqe_pf_action);
+ queue_work(eq->pf_ctx.wq, &pfault->work);
+
+ ++eq->cons_index;
+ ++set_ci;
+
+ if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
+ eq_update_ci(eq, 0);
+ set_ci = 0;
+ }
+ }
+
+ eq_update_ci(eq, 1);
+}
+
+static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr)
+{
+ struct mlx5_eq *eq = eq_ptr;
+ unsigned long flags;
+
+ if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) {
+ eq_pf_process(eq);
+ spin_unlock_irqrestore(&eq->pf_ctx.lock, flags);
+ } else {
+ schedule_work(&eq->pf_ctx.work);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/* mempool_refill() was proposed but unfortunately wasn't accepted
+ * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
+ * Chip workaround.
+ */
+static void mempool_refill(mempool_t *pool)
+{
+ while (pool->curr_nr < pool->min_nr)
+ mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
+}
+
+static void eq_pf_action(struct work_struct *work)
+{
+ struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work);
+
+ mempool_refill(eq->pf_ctx.pool);
+
+ spin_lock_irq(&eq->pf_ctx.lock);
+ eq_pf_process(eq);
+ spin_unlock_irq(&eq->pf_ctx.lock);
+}
+
+static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name)
+{
+ spin_lock_init(&pf_ctx->lock);
+ INIT_WORK(&pf_ctx->work, eq_pf_action);
+
+ pf_ctx->wq = alloc_ordered_workqueue(name,
+ WQ_MEM_RECLAIM);
+ if (!pf_ctx->wq)
+ return -ENOMEM;
+
+ pf_ctx->pool = mempool_create_kmalloc_pool
+ (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault));
+ if (!pf_ctx->pool)
+ goto err_wq;
+
+ return 0;
+err_wq:
+ destroy_workqueue(pf_ctx->wq);
+ return -ENOMEM;
+}
+
+int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
+ u32 wq_num, u8 type, int error)
+{
+ u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0};
+
+ MLX5_SET(page_fault_resume_in, in, opcode,
+ MLX5_CMD_OP_PAGE_FAULT_RESUME);
+ MLX5_SET(page_fault_resume_in, in, error, !!error);
+ MLX5_SET(page_fault_resume_in, in, page_fault_type, type);
+ MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
+ MLX5_SET(page_fault_resume_in, in, token, token);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
+#endif
+
+static void general_event_handler(struct mlx5_core_dev *dev,
+ struct mlx5_eqe *eqe)
+{
+ switch (eqe->sub_type) {
+ case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
+ if (dev->event)
+ dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0);
+ break;
+ default:
+ mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n",
+ eqe->sub_type);
+ }
+}
+
+static void mlx5_temp_warning_event(struct mlx5_core_dev *dev,
+ struct mlx5_eqe *eqe)
+{
+ u64 value_lsb;
+ u64 value_msb;
+
+ value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
+ value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
+
+ mlx5_core_warn(dev,
+ "High temperature on sensors with bit set %llx %llx",
+ value_msb, value_lsb);
+}
+
+/* caller must eventually call mlx5_cq_put on the returned cq */
+static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
+{
+ struct mlx5_cq_table *table = &eq->cq_table;
+ struct mlx5_core_cq *cq = NULL;
+
+ spin_lock(&table->lock);
+ cq = radix_tree_lookup(&table->tree, cqn);
+ if (likely(cq))
+ mlx5_cq_hold(cq);
+ spin_unlock(&table->lock);
+
+ return cq;
+}
+
+static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn)
+{
+ struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
+
+ if (unlikely(!cq)) {
+ mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
+ return;
+ }
+
+ ++cq->arm_sn;
+
+ cq->comp(cq);
+
+ mlx5_cq_put(cq);
+}
+
+static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
+{
+ struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
+
+ if (unlikely(!cq)) {
+ mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
+ return;
+ }
+
+ cq->event(cq, event_type);
+
+ mlx5_cq_put(cq);
+}
+
+static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
+{
+ struct mlx5_eq *eq = eq_ptr;
+ struct mlx5_core_dev *dev = eq->dev;
+ struct mlx5_eqe *eqe;
+ int set_ci = 0;
+ u32 cqn = -1;
+ u32 rsn;
+ u8 port;
+
+ while ((eqe = next_eqe_sw(eq))) {
+ /*
+ * Make sure we read EQ entry contents after we've
+ * checked the ownership bit.
+ */
+ dma_rmb();
+
+ mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n",
+ eq->eqn, eqe_type_str(eqe->type));
+ switch (eqe->type) {
+ case MLX5_EVENT_TYPE_COMP:
+ cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
+ mlx5_eq_cq_completion(eq, cqn);
+ break;
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
+ rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
+ mlx5_rsc_event(dev, rsn, eqe->type);
+ break;
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
+ mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
+ eqe_type_str(eqe->type), eqe->type, rsn);
+ mlx5_rsc_event(dev, rsn, eqe->type);
+ break;
+
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
+ eqe_type_str(eqe->type), eqe->type, rsn);
+ mlx5_srq_event(dev, rsn, eqe->type);
+ break;
+
+ case MLX5_EVENT_TYPE_CMD:
+ mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
+ break;
+
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ port = (eqe->data.port.port >> 4) & 0xf;
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ case MLX5_PORT_CHANGE_SUBTYPE_LID:
+ case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
+ case MLX5_PORT_CHANGE_SUBTYPE_GUID:
+ case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
+ case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
+ if (dev->event)
+ dev->event(dev, port_subtype_event(eqe->sub_type),
+ (unsigned long)port);
+ break;
+ default:
+ mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
+ port, eqe->sub_type);
+ }
+ break;
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
+ mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
+ cqn, eqe->data.cq_err.syndrome);
+ mlx5_eq_cq_event(eq, cqn, eqe->type);
+ break;
+
+ case MLX5_EVENT_TYPE_PAGE_REQUEST:
+ {
+ u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
+ s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages);
+
+ mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
+ func_id, npages);
+ mlx5_core_req_pages_handler(dev, func_id, npages);
+ }
+ break;
+
+ case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
+ mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
+ break;
+
+ case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+ mlx5_port_module_event(dev, eqe);
+ break;
+
+ case MLX5_EVENT_TYPE_PPS_EVENT:
+ mlx5_pps_event(dev, eqe);
+ break;
+
+ case MLX5_EVENT_TYPE_FPGA_ERROR:
+ case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
+ mlx5_fpga_event(dev, eqe->type, &eqe->data.raw);
+ break;
+
+ case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
+ mlx5_temp_warning_event(dev, eqe);
+ break;
+
+ case MLX5_EVENT_TYPE_GENERAL_EVENT:
+ general_event_handler(dev, eqe);
+ break;
+
+ case MLX5_EVENT_TYPE_DEVICE_TRACER:
+ mlx5_fw_tracer_event(dev, eqe);
+ break;
+
+ default:
+ mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
+ eqe->type, eq->eqn);
+ break;
+ }
+
+ ++eq->cons_index;
+ ++set_ci;
+
+ /* The HCA will think the queue has overflowed if we
+ * don't tell it we've been processing events. We
+ * create our EQs with MLX5_NUM_SPARE_EQE extra
+ * entries, so we must update our consumer index at
+ * least that often.
+ */
+ if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
+ eq_update_ci(eq, 0);
+ set_ci = 0;
+ }
+ }
+
+ eq_update_ci(eq, 1);
+
+ if (cqn != -1)
+ tasklet_schedule(&eq->tasklet_ctx.task);
+
+ return IRQ_HANDLED;
+}
+
+/* Some architectures don't latch interrupts when they are disabled, so using
+ * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
+ * avoid losing them. It is not recommended to use it, unless this is the last
+ * resort.
+ */
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq)
+{
+ u32 count_eqe;
+
+ disable_irq(eq->irqn);
+ count_eqe = eq->cons_index;
+ mlx5_eq_int(eq->irqn, eq);
+ count_eqe = eq->cons_index - count_eqe;
+ enable_irq(eq->irqn);
+
+ return count_eqe;
+}
+
+static void init_eq_buf(struct mlx5_eq *eq)
+{
+ struct mlx5_eqe *eqe;
+ int i;
+
+ for (i = 0; i < eq->nent; i++) {
+ eqe = get_eqe(eq, i);
+ eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
+ }
+}
+
+int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+ int nent, u64 mask, const char *name,
+ enum mlx5_eq_type type)
+{
+ struct mlx5_cq_table *cq_table = &eq->cq_table;
+ u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
+ struct mlx5_priv *priv = &dev->priv;
+ irq_handler_t handler;
+ __be64 *pas;
+ void *eqc;
+ int inlen;
+ u32 *in;
+ int err;
+
+ /* Init CQ table */
+ memset(cq_table, 0, sizeof(*cq_table));
+ spin_lock_init(&cq_table->lock);
+ INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
+
+ eq->type = type;
+ eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
+ eq->cons_index = 0;
+ err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
+ if (err)
+ return err;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (type == MLX5_EQ_TYPE_PF)
+ handler = mlx5_eq_pf_int;
+ else
+#endif
+ handler = mlx5_eq_int;
+
+ init_eq_buf(eq);
+
+ inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
+ MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_buf;
+ }
+
+ pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
+ mlx5_fill_page_array(&eq->buf, pas);
+
+ MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
+ MLX5_SET64(create_eq_in, in, event_bitmask, mask);
+
+ eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
+ MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
+ MLX5_SET(eqc, eqc, uar_page, priv->uar->index);
+ MLX5_SET(eqc, eqc, intr, vecidx);
+ MLX5_SET(eqc, eqc, log_page_size,
+ eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (err)
+ goto err_in;
+
+ snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
+ name, pci_name(dev->pdev));
+
+ eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
+ eq->irqn = pci_irq_vector(dev->pdev, vecidx);
+ eq->dev = dev;
+ eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
+ err = request_irq(eq->irqn, handler, 0,
+ priv->irq_info[vecidx].name, eq);
+ if (err)
+ goto err_eq;
+
+ err = mlx5_debug_eq_add(dev, eq);
+ if (err)
+ goto err_irq;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (type == MLX5_EQ_TYPE_PF) {
+ err = init_pf_ctx(&eq->pf_ctx, name);
+ if (err)
+ goto err_irq;
+ } else
+#endif
+ {
+ INIT_LIST_HEAD(&eq->tasklet_ctx.list);
+ INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
+ spin_lock_init(&eq->tasklet_ctx.lock);
+ tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
+ (unsigned long)&eq->tasklet_ctx);
+ }
+
+ /* EQs are created in ARMED state
+ */
+ eq_update_ci(eq, 1);
+
+ kvfree(in);
+ return 0;
+
+err_irq:
+ free_irq(eq->irqn, eq);
+
+err_eq:
+ mlx5_cmd_destroy_eq(dev, eq->eqn);
+
+err_in:
+ kvfree(in);
+
+err_buf:
+ mlx5_buf_free(dev, &eq->buf);
+ return err;
+}
+
+int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+ int err;
+
+ mlx5_debug_eq_remove(dev, eq);
+ free_irq(eq->irqn, eq);
+ err = mlx5_cmd_destroy_eq(dev, eq->eqn);
+ if (err)
+ mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
+ eq->eqn);
+ synchronize_irq(eq->irqn);
+
+ if (eq->type == MLX5_EQ_TYPE_COMP) {
+ tasklet_disable(&eq->tasklet_ctx.task);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ } else if (eq->type == MLX5_EQ_TYPE_PF) {
+ cancel_work_sync(&eq->pf_ctx.work);
+ destroy_workqueue(eq->pf_ctx.wq);
+ mempool_destroy(eq->pf_ctx.pool);
+#endif
+ }
+ mlx5_buf_free(dev, &eq->buf);
+
+ return err;
+}
+
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+{
+ struct mlx5_cq_table *table = &eq->cq_table;
+ int err;
+
+ spin_lock_irq(&table->lock);
+ err = radix_tree_insert(&table->tree, cq->cqn, cq);
+ spin_unlock_irq(&table->lock);
+
+ return err;
+}
+
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+{
+ struct mlx5_cq_table *table = &eq->cq_table;
+ struct mlx5_core_cq *tmp;
+
+ spin_lock_irq(&table->lock);
+ tmp = radix_tree_delete(&table->tree, cq->cqn);
+ spin_unlock_irq(&table->lock);
+
+ if (!tmp) {
+ mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn);
+ return -ENOENT;
+ }
+
+ if (tmp != cq) {
+ mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int mlx5_eq_init(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ spin_lock_init(&dev->priv.eq_table.lock);
+
+ err = mlx5_eq_debugfs_init(dev);
+
+ return err;
+}
+
+void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
+{
+ mlx5_eq_debugfs_cleanup(dev);
+}
+
+int mlx5_start_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = &dev->priv.eq_table;
+ u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
+ int err;
+
+ if (MLX5_VPORT_MANAGER(dev))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
+
+ if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
+ MLX5_CAP_GEN(dev, general_notification_event))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT);
+
+ if (MLX5_CAP_GEN(dev, port_module_event))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT);
+ else
+ mlx5_core_dbg(dev, "port_module_event is not set\n");
+
+ if (MLX5_PPS_CAP(dev))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT);
+
+ if (MLX5_CAP_GEN(dev, fpga))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR) |
+ (1ull << MLX5_EVENT_TYPE_FPGA_QP_ERROR);
+ if (MLX5_CAP_GEN_MAX(dev, dct))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED);
+
+ if (MLX5_CAP_GEN(dev, temp_warn_event))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT);
+
+ if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
+
+ err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
+ MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
+ "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
+ if (err) {
+ mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
+ return err;
+ }
+
+ mlx5_cmd_use_events(dev);
+
+ err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
+ MLX5_NUM_ASYNC_EQE, async_event_mask,
+ "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC);
+ if (err) {
+ mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
+ goto err1;
+ }
+
+ err = mlx5_create_map_eq(dev, &table->pages_eq,
+ MLX5_EQ_VEC_PAGES,
+ /* TODO: sriov max_vf + */ 1,
+ 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
+ MLX5_EQ_TYPE_ASYNC);
+ if (err) {
+ mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
+ goto err2;
+ }
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (MLX5_CAP_GEN(dev, pg)) {
+ err = mlx5_create_map_eq(dev, &table->pfault_eq,
+ MLX5_EQ_VEC_PFAULT,
+ MLX5_NUM_ASYNC_EQE,
+ 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
+ "mlx5_page_fault_eq",
+ MLX5_EQ_TYPE_PF);
+ if (err) {
+ mlx5_core_warn(dev, "failed to create page fault EQ %d\n",
+ err);
+ goto err3;
+ }
+ }
+
+ return err;
+err3:
+ mlx5_destroy_unmap_eq(dev, &table->pages_eq);
+#else
+ return err;
+#endif
+
+err2:
+ mlx5_destroy_unmap_eq(dev, &table->async_eq);
+
+err1:
+ mlx5_cmd_use_polling(dev);
+ mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+ return err;
+}
+
+void mlx5_stop_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = &dev->priv.eq_table;
+ int err;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (MLX5_CAP_GEN(dev, pg)) {
+ err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
+ if (err)
+ mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
+ err);
+ }
+#endif
+
+ err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
+ if (err)
+ mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
+ err);
+
+ err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
+ if (err)
+ mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
+ err);
+ mlx5_cmd_use_polling(dev);
+
+ err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
+ if (err)
+ mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
+ err);
+}
+
+int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0};
+
+ MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
+ MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = &dev->priv.eq_table;
+ struct mlx5_eq *eq;
+
+#ifdef CONFIG_RFS_ACCEL
+ if (dev->rmap) {
+ free_irq_cpu_rmap(dev->rmap);
+ dev->rmap = NULL;
+ }
+#endif
+ list_for_each_entry(eq, &table->comp_eqs_list, list)
+ free_irq(eq->irqn, eq);
+
+ free_irq(table->pages_eq.irqn, &table->pages_eq);
+ free_irq(table->async_eq.irqn, &table->async_eq);
+ free_irq(table->cmd_eq.irqn, &table->cmd_eq);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (MLX5_CAP_GEN(dev, pg))
+ free_irq(table->pfault_eq.irqn, &table->pfault_eq);
+#endif
+ pci_free_irq_vectors(dev->pdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
new file mode 100644
index 000000000..2190daace
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -0,0 +1,2219 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "fs_core.h"
+
+#define UPLINK_VPORT 0xFFFF
+
+enum {
+ MLX5_ACTION_NONE = 0,
+ MLX5_ACTION_ADD = 1,
+ MLX5_ACTION_DEL = 2,
+};
+
+/* Vport UC/MC hash node */
+struct vport_addr {
+ struct l2addr_node node;
+ u8 action;
+ u32 vport;
+ struct mlx5_flow_handle *flow_rule;
+ bool mpfs; /* UC MAC was added to MPFs */
+ /* A flag indicating that mac was added due to mc promiscuous vport */
+ bool mc_promisc;
+};
+
+enum {
+ UC_ADDR_CHANGE = BIT(0),
+ MC_ADDR_CHANGE = BIT(1),
+ PROMISC_CHANGE = BIT(3),
+};
+
+/* Vport context events */
+#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \
+ MC_ADDR_CHANGE | \
+ PROMISC_CHANGE)
+
+static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
+ u32 events_mask)
+{
+ int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0};
+ int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0};
+ void *nic_vport_ctx;
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
+ in, nic_vport_context);
+
+ MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1);
+
+ if (events_mask & UC_ADDR_CHANGE)
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ event_on_uc_address_change, 1);
+ if (events_mask & MC_ADDR_CHANGE)
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ event_on_mc_address_change, 1);
+ if (events_mask & PROMISC_CHANGE)
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ event_on_promisc_change, 1);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+/* E-Switch vport context HW commands */
+static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
+ void *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)] = {0};
+
+ MLX5_SET(modify_esw_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
+ MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
+ MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+
+static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
+ u16 vlan, u8 qos, u8 set_flags)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0};
+
+ if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
+ !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
+ return -EOPNOTSUPP;
+
+ esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%x\n",
+ vport, vlan, qos, set_flags);
+
+ if (set_flags & SET_VLAN_STRIP)
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.vport_cvlan_strip, 1);
+
+ if (set_flags & SET_VLAN_INSERT) {
+ /* insert only if no vlan in packet */
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.vport_cvlan_insert, 1);
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.cvlan_pcp, qos);
+ MLX5_SET(modify_esw_vport_context_in, in,
+ esw_vport_context.cvlan_id, vlan);
+ }
+
+ MLX5_SET(modify_esw_vport_context_in, in,
+ field_select.vport_cvlan_strip, 1);
+ MLX5_SET(modify_esw_vport_context_in, in,
+ field_select.vport_cvlan_insert, 1);
+
+ return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in));
+}
+
+/* E-Switch FDB */
+static struct mlx5_flow_handle *
+__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
+ u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN])
+{
+ int match_header = (is_zero_ether_addr(mac_c) ? 0 :
+ MLX5_MATCH_OUTER_HEADERS);
+ struct mlx5_flow_handle *flow_rule = NULL;
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_spec *spec;
+ void *mv_misc = NULL;
+ void *mc_misc = NULL;
+ u8 *dmac_v = NULL;
+ u8 *dmac_c = NULL;
+
+ if (rx_rule)
+ match_header |= MLX5_MATCH_MISC_PARAMETERS;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return NULL;
+
+ dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers.dmac_47_16);
+ dmac_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers.dmac_47_16);
+
+ if (match_header & MLX5_MATCH_OUTER_HEADERS) {
+ ether_addr_copy(dmac_v, mac_v);
+ ether_addr_copy(dmac_c, mac_c);
+ }
+
+ if (match_header & MLX5_MATCH_MISC_PARAMETERS) {
+ mv_misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ mc_misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT);
+ MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port);
+ }
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = vport;
+
+ esw_debug(esw->dev,
+ "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n",
+ dmac_v, dmac_c, vport);
+ spec->match_criteria_enable = match_header;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_rule =
+ mlx5_add_flow_rules(esw->fdb_table.legacy.fdb, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ esw_warn(esw->dev,
+ "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
+ dmac_v, dmac_c, vport, PTR_ERR(flow_rule));
+ flow_rule = NULL;
+ }
+
+ kvfree(spec);
+ return flow_rule;
+}
+
+static struct mlx5_flow_handle *
+esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
+{
+ u8 mac_c[ETH_ALEN];
+
+ eth_broadcast_addr(mac_c);
+ return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac);
+}
+
+static struct mlx5_flow_handle *
+esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport)
+{
+ u8 mac_c[ETH_ALEN];
+ u8 mac_v[ETH_ALEN];
+
+ eth_zero_addr(mac_c);
+ eth_zero_addr(mac_v);
+ mac_c[0] = 0x01;
+ mac_v[0] = 0x01;
+ return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac_v);
+}
+
+static struct mlx5_flow_handle *
+esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport)
+{
+ u8 mac_c[ETH_ALEN];
+ u8 mac_v[ETH_ALEN];
+
+ eth_zero_addr(mac_c);
+ eth_zero_addr(mac_v);
+ return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v);
+}
+
+static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *fdb;
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ int table_size;
+ u32 *flow_group_in;
+ u8 *dmac;
+ int err = 0;
+
+ esw_debug(dev, "Create FDB log_max_size(%d)\n",
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+
+ root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get FDB flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+
+ ft_attr.max_fte = table_size;
+ fdb = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(fdb)) {
+ err = PTR_ERR(fdb);
+ esw_warn(dev, "Failed to create FDB Table err %d\n", err);
+ goto out;
+ }
+ esw->fdb_table.legacy.fdb = fdb;
+
+ /* Addresses group : Full match unicast/multicast addresses */
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+ dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ /* Preserve 2 entries for allmulti and promisc rules*/
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3);
+ eth_broadcast_addr(dmac);
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.legacy.addr_grp = g;
+
+ /* Allmulti group : One rule that forwards any mcast traffic */
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 2);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 2);
+ eth_zero_addr(dmac);
+ dmac[0] = 0x01;
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.legacy.allmulti_grp = g;
+
+ /* Promiscuous group :
+ * One rule that forward all unmatched traffic from previous groups
+ */
+ eth_zero_addr(dmac);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1);
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err);
+ goto out;
+ }
+ esw->fdb_table.legacy.promisc_grp = g;
+
+out:
+ if (err) {
+ if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.allmulti_grp)) {
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
+ esw->fdb_table.legacy.allmulti_grp = NULL;
+ }
+ if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.addr_grp)) {
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
+ esw->fdb_table.legacy.addr_grp = NULL;
+ }
+ if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.fdb)) {
+ mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb);
+ esw->fdb_table.legacy.fdb = NULL;
+ }
+ }
+
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw)
+{
+ if (!esw->fdb_table.legacy.fdb)
+ return;
+
+ esw_debug(esw->dev, "Destroy FDB Table\n");
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp);
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
+ mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
+ mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb);
+ esw->fdb_table.legacy.fdb = NULL;
+ esw->fdb_table.legacy.addr_grp = NULL;
+ esw->fdb_table.legacy.allmulti_grp = NULL;
+ esw->fdb_table.legacy.promisc_grp = NULL;
+}
+
+/* E-Switch vport UC/MC lists management */
+typedef int (*vport_addr_action)(struct mlx5_eswitch *esw,
+ struct vport_addr *vaddr);
+
+static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+ u8 *mac = vaddr->node.addr;
+ u32 vport = vaddr->vport;
+ int err;
+
+ /* Skip mlx5_mpfs_add_mac for PFs,
+ * it is already done by the PF netdev in mlx5e_execute_l2_action
+ */
+ if (!vport)
+ goto fdb_add;
+
+ err = mlx5_mpfs_add_mac(esw->dev, mac);
+ if (err) {
+ esw_warn(esw->dev,
+ "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n",
+ mac, vport, err);
+ return err;
+ }
+ vaddr->mpfs = true;
+
+fdb_add:
+ /* SRIOV is enabled: Forward UC MAC to vport */
+ if (esw->fdb_table.legacy.fdb && esw->mode == SRIOV_LEGACY)
+ vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
+
+ esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n",
+ vport, mac, vaddr->flow_rule);
+
+ return 0;
+}
+
+static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+ u8 *mac = vaddr->node.addr;
+ u32 vport = vaddr->vport;
+ int err = 0;
+
+ /* Skip mlx5_mpfs_del_mac for PFs,
+ * it is already done by the PF netdev in mlx5e_execute_l2_action
+ */
+ if (!vport || !vaddr->mpfs)
+ goto fdb_del;
+
+ err = mlx5_mpfs_del_mac(esw->dev, mac);
+ if (err)
+ esw_warn(esw->dev,
+ "Failed to del L2 table mac(%pM) for vport(%d), err(%d)\n",
+ mac, vport, err);
+ vaddr->mpfs = false;
+
+fdb_del:
+ if (vaddr->flow_rule)
+ mlx5_del_flow_rules(vaddr->flow_rule);
+ vaddr->flow_rule = NULL;
+
+ return 0;
+}
+
+static void update_allmulti_vports(struct mlx5_eswitch *esw,
+ struct vport_addr *vaddr,
+ struct esw_mc_addr *esw_mc)
+{
+ u8 *mac = vaddr->node.addr;
+ u32 vport_idx = 0;
+
+ for (vport_idx = 0; vport_idx < esw->total_vports; vport_idx++) {
+ struct mlx5_vport *vport = &esw->vports[vport_idx];
+ struct hlist_head *vport_hash = vport->mc_list;
+ struct vport_addr *iter_vaddr =
+ l2addr_hash_find(vport_hash,
+ mac,
+ struct vport_addr);
+ if (IS_ERR_OR_NULL(vport->allmulti_rule) ||
+ vaddr->vport == vport_idx)
+ continue;
+ switch (vaddr->action) {
+ case MLX5_ACTION_ADD:
+ if (iter_vaddr)
+ continue;
+ iter_vaddr = l2addr_hash_add(vport_hash, mac,
+ struct vport_addr,
+ GFP_KERNEL);
+ if (!iter_vaddr) {
+ esw_warn(esw->dev,
+ "ALL-MULTI: Failed to add MAC(%pM) to vport[%d] DB\n",
+ mac, vport_idx);
+ continue;
+ }
+ iter_vaddr->vport = vport_idx;
+ iter_vaddr->flow_rule =
+ esw_fdb_set_vport_rule(esw,
+ mac,
+ vport_idx);
+ iter_vaddr->mc_promisc = true;
+ break;
+ case MLX5_ACTION_DEL:
+ if (!iter_vaddr)
+ continue;
+ mlx5_del_flow_rules(iter_vaddr->flow_rule);
+ l2addr_hash_del(iter_vaddr);
+ break;
+ }
+ }
+}
+
+static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+ struct hlist_head *hash = esw->mc_table;
+ struct esw_mc_addr *esw_mc;
+ u8 *mac = vaddr->node.addr;
+ u32 vport = vaddr->vport;
+
+ if (!esw->fdb_table.legacy.fdb)
+ return 0;
+
+ esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr);
+ if (esw_mc)
+ goto add;
+
+ esw_mc = l2addr_hash_add(hash, mac, struct esw_mc_addr, GFP_KERNEL);
+ if (!esw_mc)
+ return -ENOMEM;
+
+ esw_mc->uplink_rule = /* Forward MC MAC to Uplink */
+ esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT);
+
+ /* Add this multicast mac to all the mc promiscuous vports */
+ update_allmulti_vports(esw, vaddr, esw_mc);
+
+add:
+ /* If the multicast mac is added as a result of mc promiscuous vport,
+ * don't increment the multicast ref count
+ */
+ if (!vaddr->mc_promisc)
+ esw_mc->refcnt++;
+
+ /* Forward MC MAC to vport */
+ vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
+ esw_debug(esw->dev,
+ "\tADDED MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n",
+ vport, mac, vaddr->flow_rule,
+ esw_mc->refcnt, esw_mc->uplink_rule);
+ return 0;
+}
+
+static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
+{
+ struct hlist_head *hash = esw->mc_table;
+ struct esw_mc_addr *esw_mc;
+ u8 *mac = vaddr->node.addr;
+ u32 vport = vaddr->vport;
+
+ if (!esw->fdb_table.legacy.fdb)
+ return 0;
+
+ esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr);
+ if (!esw_mc) {
+ esw_warn(esw->dev,
+ "Failed to find eswitch MC addr for MAC(%pM) vport(%d)",
+ mac, vport);
+ return -EINVAL;
+ }
+ esw_debug(esw->dev,
+ "\tDELETE MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n",
+ vport, mac, vaddr->flow_rule, esw_mc->refcnt,
+ esw_mc->uplink_rule);
+
+ if (vaddr->flow_rule)
+ mlx5_del_flow_rules(vaddr->flow_rule);
+ vaddr->flow_rule = NULL;
+
+ /* If the multicast mac is added as a result of mc promiscuous vport,
+ * don't decrement the multicast ref count.
+ */
+ if (vaddr->mc_promisc || (--esw_mc->refcnt > 0))
+ return 0;
+
+ /* Remove this multicast mac from all the mc promiscuous vports */
+ update_allmulti_vports(esw, vaddr, esw_mc);
+
+ if (esw_mc->uplink_rule)
+ mlx5_del_flow_rules(esw_mc->uplink_rule);
+
+ l2addr_hash_del(esw_mc);
+ return 0;
+}
+
+/* Apply vport UC/MC list to HW l2 table and FDB table */
+static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw,
+ u32 vport_num, int list_type)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
+ vport_addr_action vport_addr_add;
+ vport_addr_action vport_addr_del;
+ struct vport_addr *addr;
+ struct l2addr_node *node;
+ struct hlist_head *hash;
+ struct hlist_node *tmp;
+ int hi;
+
+ vport_addr_add = is_uc ? esw_add_uc_addr :
+ esw_add_mc_addr;
+ vport_addr_del = is_uc ? esw_del_uc_addr :
+ esw_del_mc_addr;
+
+ hash = is_uc ? vport->uc_list : vport->mc_list;
+ for_each_l2hash_node(node, tmp, hash, hi) {
+ addr = container_of(node, struct vport_addr, node);
+ switch (addr->action) {
+ case MLX5_ACTION_ADD:
+ vport_addr_add(esw, addr);
+ addr->action = MLX5_ACTION_NONE;
+ break;
+ case MLX5_ACTION_DEL:
+ vport_addr_del(esw, addr);
+ l2addr_hash_del(addr);
+ break;
+ }
+ }
+}
+
+/* Sync vport UC/MC list from vport context */
+static void esw_update_vport_addr_list(struct mlx5_eswitch *esw,
+ u32 vport_num, int list_type)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
+ u8 (*mac_list)[ETH_ALEN];
+ struct l2addr_node *node;
+ struct vport_addr *addr;
+ struct hlist_head *hash;
+ struct hlist_node *tmp;
+ int size;
+ int err;
+ int hi;
+ int i;
+
+ size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) :
+ MLX5_MAX_MC_PER_VPORT(esw->dev);
+
+ mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL);
+ if (!mac_list)
+ return;
+
+ hash = is_uc ? vport->uc_list : vport->mc_list;
+
+ for_each_l2hash_node(node, tmp, hash, hi) {
+ addr = container_of(node, struct vport_addr, node);
+ addr->action = MLX5_ACTION_DEL;
+ }
+
+ if (!vport->enabled)
+ goto out;
+
+ err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type,
+ mac_list, &size);
+ if (err)
+ goto out;
+ esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n",
+ vport_num, is_uc ? "UC" : "MC", size);
+
+ for (i = 0; i < size; i++) {
+ if (is_uc && !is_valid_ether_addr(mac_list[i]))
+ continue;
+
+ if (!is_uc && !is_multicast_ether_addr(mac_list[i]))
+ continue;
+
+ addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr);
+ if (addr) {
+ addr->action = MLX5_ACTION_NONE;
+ /* If this mac was previously added because of allmulti
+ * promiscuous rx mode, its now converted to be original
+ * vport mac.
+ */
+ if (addr->mc_promisc) {
+ struct esw_mc_addr *esw_mc =
+ l2addr_hash_find(esw->mc_table,
+ mac_list[i],
+ struct esw_mc_addr);
+ if (!esw_mc) {
+ esw_warn(esw->dev,
+ "Failed to MAC(%pM) in mcast DB\n",
+ mac_list[i]);
+ continue;
+ }
+ esw_mc->refcnt++;
+ addr->mc_promisc = false;
+ }
+ continue;
+ }
+
+ addr = l2addr_hash_add(hash, mac_list[i], struct vport_addr,
+ GFP_KERNEL);
+ if (!addr) {
+ esw_warn(esw->dev,
+ "Failed to add MAC(%pM) to vport[%d] DB\n",
+ mac_list[i], vport_num);
+ continue;
+ }
+ addr->vport = vport_num;
+ addr->action = MLX5_ACTION_ADD;
+ }
+out:
+ kfree(mac_list);
+}
+
+/* Sync vport UC/MC list from vport context
+ * Must be called after esw_update_vport_addr_list
+ */
+static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ struct l2addr_node *node;
+ struct vport_addr *addr;
+ struct hlist_head *hash;
+ struct hlist_node *tmp;
+ int hi;
+
+ hash = vport->mc_list;
+
+ for_each_l2hash_node(node, tmp, esw->mc_table, hi) {
+ u8 *mac = node->addr;
+
+ addr = l2addr_hash_find(hash, mac, struct vport_addr);
+ if (addr) {
+ if (addr->action == MLX5_ACTION_DEL)
+ addr->action = MLX5_ACTION_NONE;
+ continue;
+ }
+ addr = l2addr_hash_add(hash, mac, struct vport_addr,
+ GFP_KERNEL);
+ if (!addr) {
+ esw_warn(esw->dev,
+ "Failed to add allmulti MAC(%pM) to vport[%d] DB\n",
+ mac, vport_num);
+ continue;
+ }
+ addr->vport = vport_num;
+ addr->action = MLX5_ACTION_ADD;
+ addr->mc_promisc = true;
+ }
+}
+
+/* Apply vport rx mode to HW FDB table */
+static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num,
+ bool promisc, bool mc_promisc)
+{
+ struct esw_mc_addr *allmulti_addr = &esw->mc_promisc;
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+
+ if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc)
+ goto promisc;
+
+ if (mc_promisc) {
+ vport->allmulti_rule =
+ esw_fdb_set_vport_allmulti_rule(esw, vport_num);
+ if (!allmulti_addr->uplink_rule)
+ allmulti_addr->uplink_rule =
+ esw_fdb_set_vport_allmulti_rule(esw,
+ UPLINK_VPORT);
+ allmulti_addr->refcnt++;
+ } else if (vport->allmulti_rule) {
+ mlx5_del_flow_rules(vport->allmulti_rule);
+ vport->allmulti_rule = NULL;
+
+ if (--allmulti_addr->refcnt > 0)
+ goto promisc;
+
+ if (allmulti_addr->uplink_rule)
+ mlx5_del_flow_rules(allmulti_addr->uplink_rule);
+ allmulti_addr->uplink_rule = NULL;
+ }
+
+promisc:
+ if (IS_ERR_OR_NULL(vport->promisc_rule) != promisc)
+ return;
+
+ if (promisc) {
+ vport->promisc_rule = esw_fdb_set_vport_promisc_rule(esw,
+ vport_num);
+ } else if (vport->promisc_rule) {
+ mlx5_del_flow_rules(vport->promisc_rule);
+ vport->promisc_rule = NULL;
+ }
+}
+
+/* Sync vport rx mode from vport context */
+static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ int promisc_all = 0;
+ int promisc_uc = 0;
+ int promisc_mc = 0;
+ int err;
+
+ err = mlx5_query_nic_vport_promisc(esw->dev,
+ vport_num,
+ &promisc_uc,
+ &promisc_mc,
+ &promisc_all);
+ if (err)
+ return;
+ esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n",
+ vport_num, promisc_all, promisc_mc);
+
+ if (!vport->info.trusted || !vport->enabled) {
+ promisc_uc = 0;
+ promisc_mc = 0;
+ promisc_all = 0;
+ }
+
+ esw_apply_vport_rx_mode(esw, vport_num, promisc_all,
+ (promisc_all || promisc_mc));
+}
+
+static void esw_vport_change_handle_locked(struct mlx5_vport *vport)
+{
+ struct mlx5_core_dev *dev = vport->dev;
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ u8 mac[ETH_ALEN];
+
+ mlx5_query_nic_vport_mac_address(dev, vport->vport, mac);
+ esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n",
+ vport->vport, mac);
+
+ if (vport->enabled_events & UC_ADDR_CHANGE) {
+ esw_update_vport_addr_list(esw, vport->vport,
+ MLX5_NVPRT_LIST_TYPE_UC);
+ esw_apply_vport_addr_list(esw, vport->vport,
+ MLX5_NVPRT_LIST_TYPE_UC);
+ }
+
+ if (vport->enabled_events & MC_ADDR_CHANGE) {
+ esw_update_vport_addr_list(esw, vport->vport,
+ MLX5_NVPRT_LIST_TYPE_MC);
+ }
+
+ if (vport->enabled_events & PROMISC_CHANGE) {
+ esw_update_vport_rx_mode(esw, vport->vport);
+ if (!IS_ERR_OR_NULL(vport->allmulti_rule))
+ esw_update_vport_mc_promisc(esw, vport->vport);
+ }
+
+ if (vport->enabled_events & (PROMISC_CHANGE | MC_ADDR_CHANGE)) {
+ esw_apply_vport_addr_list(esw, vport->vport,
+ MLX5_NVPRT_LIST_TYPE_MC);
+ }
+
+ esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport);
+ if (vport->enabled)
+ arm_vport_context_events_cmd(dev, vport->vport,
+ vport->enabled_events);
+}
+
+static void esw_vport_change_handler(struct work_struct *work)
+{
+ struct mlx5_vport *vport =
+ container_of(work, struct mlx5_vport, vport_change_handler);
+ struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
+
+ mutex_lock(&esw->state_lock);
+ esw_vport_change_handle_locked(vport);
+ mutex_unlock(&esw->state_lock);
+}
+
+static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *vlan_grp = NULL;
+ struct mlx5_flow_group *drop_grp = NULL;
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *acl;
+ void *match_criteria;
+ u32 *flow_group_in;
+ /* The egress acl table contains 2 rules:
+ * 1)Allow traffic with vlan_tag=vst_vlan_id
+ * 2)Drop all other traffic.
+ */
+ int table_size = 2;
+ int err = 0;
+
+ if (!MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support))
+ return -EOPNOTSUPP;
+
+ if (!IS_ERR_OR_NULL(vport->egress.acl))
+ return 0;
+
+ esw_debug(dev, "Create vport[%d] egress ACL log_max_size(%d)\n",
+ vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size));
+
+ root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+ vport->vport);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport);
+ return -EOPNOTSUPP;
+ }
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport);
+ if (IS_ERR(acl)) {
+ err = PTR_ERR(acl);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] egress flow Table, err(%d)\n",
+ vport->vport, err);
+ goto out;
+ }
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+ vlan_grp = mlx5_create_flow_group(acl, flow_group_in);
+ if (IS_ERR(vlan_grp)) {
+ err = PTR_ERR(vlan_grp);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] egress allowed vlans flow group, err(%d)\n",
+ vport->vport, err);
+ goto out;
+ }
+
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+ drop_grp = mlx5_create_flow_group(acl, flow_group_in);
+ if (IS_ERR(drop_grp)) {
+ err = PTR_ERR(drop_grp);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n",
+ vport->vport, err);
+ goto out;
+ }
+
+ vport->egress.acl = acl;
+ vport->egress.drop_grp = drop_grp;
+ vport->egress.allowed_vlans_grp = vlan_grp;
+out:
+ kvfree(flow_group_in);
+ if (err && !IS_ERR_OR_NULL(vlan_grp))
+ mlx5_destroy_flow_group(vlan_grp);
+ if (err && !IS_ERR_OR_NULL(acl))
+ mlx5_destroy_flow_table(acl);
+ return err;
+}
+
+static void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan))
+ mlx5_del_flow_rules(vport->egress.allowed_vlan);
+
+ if (!IS_ERR_OR_NULL(vport->egress.drop_rule))
+ mlx5_del_flow_rules(vport->egress.drop_rule);
+
+ vport->egress.allowed_vlan = NULL;
+ vport->egress.drop_rule = NULL;
+}
+
+static void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (IS_ERR_OR_NULL(vport->egress.acl))
+ return;
+
+ esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport);
+
+ esw_vport_cleanup_egress_rules(esw, vport);
+ mlx5_destroy_flow_group(vport->egress.allowed_vlans_grp);
+ mlx5_destroy_flow_group(vport->egress.drop_grp);
+ mlx5_destroy_flow_table(vport->egress.acl);
+ vport->egress.allowed_vlans_grp = NULL;
+ vport->egress.drop_grp = NULL;
+ vport->egress.acl = NULL;
+}
+
+static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *acl;
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ u32 *flow_group_in;
+ /* The ingress acl table contains 4 groups
+ * (2 active rules at the same time -
+ * 1 allow rule from one of the first 3 groups.
+ * 1 drop rule from the last group):
+ * 1)Allow untagged traffic with smac=original mac.
+ * 2)Allow untagged traffic.
+ * 3)Allow traffic with smac=original mac.
+ * 4)Drop all other traffic.
+ */
+ int table_size = 4;
+ int err = 0;
+
+ if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
+ return -EOPNOTSUPP;
+
+ if (!IS_ERR_OR_NULL(vport->ingress.acl))
+ return 0;
+
+ esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n",
+ vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size));
+
+ root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ vport->vport);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport);
+ return -EOPNOTSUPP;
+ }
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport);
+ if (IS_ERR(acl)) {
+ err = PTR_ERR(acl);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] ingress flow Table, err(%d)\n",
+ vport->vport, err);
+ goto out;
+ }
+ vport->ingress.acl = acl;
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+ g = mlx5_create_flow_group(acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged spoofchk flow group, err(%d)\n",
+ vport->vport, err);
+ goto out;
+ }
+ vport->ingress.allow_untagged_spoofchk_grp = g;
+
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+
+ g = mlx5_create_flow_group(acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged flow group, err(%d)\n",
+ vport->vport, err);
+ goto out;
+ }
+ vport->ingress.allow_untagged_only_grp = g;
+
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2);
+
+ g = mlx5_create_flow_group(acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] ingress spoofchk flow group, err(%d)\n",
+ vport->vport, err);
+ goto out;
+ }
+ vport->ingress.allow_spoofchk_only_grp = g;
+
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3);
+
+ g = mlx5_create_flow_group(acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create E-Switch vport[%d] ingress drop flow group, err(%d)\n",
+ vport->vport, err);
+ goto out;
+ }
+ vport->ingress.drop_grp = g;
+
+out:
+ if (err) {
+ if (!IS_ERR_OR_NULL(vport->ingress.allow_spoofchk_only_grp))
+ mlx5_destroy_flow_group(
+ vport->ingress.allow_spoofchk_only_grp);
+ if (!IS_ERR_OR_NULL(vport->ingress.allow_untagged_only_grp))
+ mlx5_destroy_flow_group(
+ vport->ingress.allow_untagged_only_grp);
+ if (!IS_ERR_OR_NULL(vport->ingress.allow_untagged_spoofchk_grp))
+ mlx5_destroy_flow_group(
+ vport->ingress.allow_untagged_spoofchk_grp);
+ if (!IS_ERR_OR_NULL(vport->ingress.acl))
+ mlx5_destroy_flow_table(vport->ingress.acl);
+ }
+
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (!IS_ERR_OR_NULL(vport->ingress.drop_rule))
+ mlx5_del_flow_rules(vport->ingress.drop_rule);
+
+ if (!IS_ERR_OR_NULL(vport->ingress.allow_rule))
+ mlx5_del_flow_rules(vport->ingress.allow_rule);
+
+ vport->ingress.drop_rule = NULL;
+ vport->ingress.allow_rule = NULL;
+}
+
+static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ if (IS_ERR_OR_NULL(vport->ingress.acl))
+ return;
+
+ esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport);
+
+ esw_vport_cleanup_ingress_rules(esw, vport);
+ mlx5_destroy_flow_group(vport->ingress.allow_spoofchk_only_grp);
+ mlx5_destroy_flow_group(vport->ingress.allow_untagged_only_grp);
+ mlx5_destroy_flow_group(vport->ingress.allow_untagged_spoofchk_grp);
+ mlx5_destroy_flow_group(vport->ingress.drop_grp);
+ mlx5_destroy_flow_table(vport->ingress.acl);
+ vport->ingress.acl = NULL;
+ vport->ingress.drop_grp = NULL;
+ vport->ingress.allow_spoofchk_only_grp = NULL;
+ vport->ingress.allow_untagged_only_grp = NULL;
+ vport->ingress.allow_untagged_spoofchk_grp = NULL;
+}
+
+static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ struct mlx5_fc *counter = vport->ingress.drop_counter;
+ struct mlx5_flow_destination drop_ctr_dst = {0};
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_spec *spec;
+ int dest_num = 0;
+ int err = 0;
+ u8 *smac_v;
+
+ esw_vport_cleanup_ingress_rules(esw, vport);
+
+ if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
+ esw_vport_disable_ingress_acl(esw, vport);
+ return 0;
+ }
+
+ err = esw_vport_enable_ingress_acl(esw, vport);
+ if (err) {
+ mlx5_core_warn(esw->dev,
+ "failed to enable ingress acl (%d) on vport[%d]\n",
+ err, vport->vport);
+ return err;
+ }
+
+ esw_debug(esw->dev,
+ "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
+ vport->vport, vport->info.vlan, vport->info.qos);
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (vport->info.vlan || vport->info.qos)
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
+
+ if (vport->info.spoofchk) {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0);
+ smac_v = MLX5_ADDR_OF(fte_match_param,
+ spec->match_value,
+ outer_headers.smac_47_16);
+ ether_addr_copy(smac_v, vport->info.mac);
+ }
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ vport->ingress.allow_rule =
+ mlx5_add_flow_rules(vport->ingress.acl, spec,
+ &flow_act, NULL, 0);
+ if (IS_ERR(vport->ingress.allow_rule)) {
+ err = PTR_ERR(vport->ingress.allow_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress allow rule, err(%d)\n",
+ vport->vport, err);
+ vport->ingress.allow_rule = NULL;
+ goto out;
+ }
+
+ memset(spec, 0, sizeof(*spec));
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ /* Attach drop flow counter */
+ if (counter) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ drop_ctr_dst.counter = counter;
+ dst = &drop_ctr_dst;
+ dest_num++;
+ }
+ vport->ingress.drop_rule =
+ mlx5_add_flow_rules(vport->ingress.acl, spec,
+ &flow_act, dst, dest_num);
+ if (IS_ERR(vport->ingress.drop_rule)) {
+ err = PTR_ERR(vport->ingress.drop_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress drop rule, err(%d)\n",
+ vport->vport, err);
+ vport->ingress.drop_rule = NULL;
+ goto out;
+ }
+
+out:
+ if (err)
+ esw_vport_cleanup_ingress_rules(esw, vport);
+ kvfree(spec);
+ return err;
+}
+
+static int esw_vport_egress_config(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ struct mlx5_fc *counter = vport->egress.drop_counter;
+ struct mlx5_flow_destination drop_ctr_dst = {0};
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_spec *spec;
+ int dest_num = 0;
+ int err = 0;
+
+ esw_vport_cleanup_egress_rules(esw, vport);
+
+ if (!vport->info.vlan && !vport->info.qos) {
+ esw_vport_disable_egress_acl(esw, vport);
+ return 0;
+ }
+
+ err = esw_vport_enable_egress_acl(esw, vport);
+ if (err) {
+ mlx5_core_warn(esw->dev,
+ "failed to enable egress acl (%d) on vport[%d]\n",
+ err, vport->vport);
+ return err;
+ }
+
+ esw_debug(esw->dev,
+ "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
+ vport->vport, vport->info.vlan, vport->info.qos);
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* Allowed vlan rule */
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan);
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ vport->egress.allowed_vlan =
+ mlx5_add_flow_rules(vport->egress.acl, spec,
+ &flow_act, NULL, 0);
+ if (IS_ERR(vport->egress.allowed_vlan)) {
+ err = PTR_ERR(vport->egress.allowed_vlan);
+ esw_warn(esw->dev,
+ "vport[%d] configure egress allowed vlan rule failed, err(%d)\n",
+ vport->vport, err);
+ vport->egress.allowed_vlan = NULL;
+ goto out;
+ }
+
+ /* Drop others rule (star rule) */
+ memset(spec, 0, sizeof(*spec));
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ /* Attach egress drop flow counter */
+ if (counter) {
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ drop_ctr_dst.counter = counter;
+ dst = &drop_ctr_dst;
+ dest_num++;
+ }
+ vport->egress.drop_rule =
+ mlx5_add_flow_rules(vport->egress.acl, spec,
+ &flow_act, dst, dest_num);
+ if (IS_ERR(vport->egress.drop_rule)) {
+ err = PTR_ERR(vport->egress.drop_rule);
+ esw_warn(esw->dev,
+ "vport[%d] configure egress drop rule failed, err(%d)\n",
+ vport->vport, err);
+ vport->egress.drop_rule = NULL;
+ }
+out:
+ kvfree(spec);
+ return err;
+}
+
+/* Vport QoS management */
+static int esw_create_tsar(struct mlx5_eswitch *esw)
+{
+ u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+ struct mlx5_core_dev *dev = esw->dev;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+ return 0;
+
+ if (esw->qos.enabled)
+ return -EEXIST;
+
+ err = mlx5_create_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ tsar_ctx,
+ &esw->qos.root_tsar_id);
+ if (err) {
+ esw_warn(esw->dev, "E-Switch create TSAR failed (%d)\n", err);
+ return err;
+ }
+
+ esw->qos.enabled = true;
+ return 0;
+}
+
+static void esw_destroy_tsar(struct mlx5_eswitch *esw)
+{
+ int err;
+
+ if (!esw->qos.enabled)
+ return;
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ esw->qos.root_tsar_id);
+ if (err)
+ esw_warn(esw->dev, "E-Switch destroy TSAR failed (%d)\n", err);
+
+ esw->qos.enabled = false;
+}
+
+static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num,
+ u32 initial_max_rate, u32 initial_bw_share)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ struct mlx5_core_dev *dev = esw->dev;
+ void *vport_elem;
+ int err = 0;
+
+ if (!esw->qos.enabled || !MLX5_CAP_GEN(dev, qos) ||
+ !MLX5_CAP_QOS(dev, esw_scheduling))
+ return 0;
+
+ if (vport->qos.enabled)
+ return -EEXIST;
+
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+ vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
+ element_attributes);
+ MLX5_SET(vport_element, vport_elem, vport_number, vport_num);
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
+ esw->qos.root_tsar_id);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
+ initial_max_rate);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, initial_bw_share);
+
+ err = mlx5_create_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ sched_ctx,
+ &vport->qos.esw_tsar_ix);
+ if (err) {
+ esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
+ vport_num, err);
+ return err;
+ }
+
+ vport->qos.enabled = true;
+ return 0;
+}
+
+static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ int err = 0;
+
+ if (!vport->qos.enabled)
+ return;
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ vport->qos.esw_tsar_ix);
+ if (err)
+ esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
+ vport_num, err);
+
+ vport->qos.enabled = false;
+}
+
+static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num,
+ u32 max_rate, u32 bw_share)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+ struct mlx5_core_dev *dev = esw->dev;
+ void *vport_elem;
+ u32 bitmask = 0;
+ int err = 0;
+
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+ return -EOPNOTSUPP;
+
+ if (!vport->qos.enabled)
+ return -EIO;
+
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+ vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
+ element_attributes);
+ MLX5_SET(vport_element, vport_elem, vport_number, vport_num);
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
+ esw->qos.root_tsar_id);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
+ max_rate);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+ bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+ bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
+
+ err = mlx5_modify_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ sched_ctx,
+ vport->qos.esw_tsar_ix,
+ bitmask);
+ if (err) {
+ esw_warn(esw->dev, "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
+ vport_num, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
+{
+ ((u8 *)node_guid)[7] = mac[0];
+ ((u8 *)node_guid)[6] = mac[1];
+ ((u8 *)node_guid)[5] = mac[2];
+ ((u8 *)node_guid)[4] = 0xff;
+ ((u8 *)node_guid)[3] = 0xfe;
+ ((u8 *)node_guid)[2] = mac[3];
+ ((u8 *)node_guid)[1] = mac[4];
+ ((u8 *)node_guid)[0] = mac[5];
+}
+
+static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int vport_num = vport->vport;
+
+ if (!vport_num)
+ return;
+
+ mlx5_modify_vport_admin_state(esw->dev,
+ MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ vport_num,
+ vport->info.link_state);
+ mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac);
+ mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid);
+ modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos,
+ (vport->info.vlan || vport->info.qos));
+
+ /* Only legacy mode needs ACLs */
+ if (esw->mode == SRIOV_LEGACY) {
+ esw_vport_ingress_config(esw, vport);
+ esw_vport_egress_config(esw, vport);
+ }
+}
+
+static void esw_vport_create_drop_counters(struct mlx5_vport *vport)
+{
+ struct mlx5_core_dev *dev = vport->dev;
+
+ if (MLX5_CAP_ESW_INGRESS_ACL(dev, flow_counter)) {
+ vport->ingress.drop_counter = mlx5_fc_create(dev, false);
+ if (IS_ERR(vport->ingress.drop_counter)) {
+ esw_warn(dev,
+ "vport[%d] configure ingress drop rule counter failed\n",
+ vport->vport);
+ vport->ingress.drop_counter = NULL;
+ }
+ }
+
+ if (MLX5_CAP_ESW_EGRESS_ACL(dev, flow_counter)) {
+ vport->egress.drop_counter = mlx5_fc_create(dev, false);
+ if (IS_ERR(vport->egress.drop_counter)) {
+ esw_warn(dev,
+ "vport[%d] configure egress drop rule counter failed\n",
+ vport->vport);
+ vport->egress.drop_counter = NULL;
+ }
+ }
+}
+
+static void esw_vport_destroy_drop_counters(struct mlx5_vport *vport)
+{
+ struct mlx5_core_dev *dev = vport->dev;
+
+ if (vport->ingress.drop_counter)
+ mlx5_fc_destroy(dev, vport->ingress.drop_counter);
+ if (vport->egress.drop_counter)
+ mlx5_fc_destroy(dev, vport->egress.drop_counter);
+}
+
+static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
+ int enable_events)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+
+ mutex_lock(&esw->state_lock);
+ WARN_ON(vport->enabled);
+
+ esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
+
+ /* Create steering drop counters for ingress and egress ACLs */
+ if (vport_num && esw->mode == SRIOV_LEGACY)
+ esw_vport_create_drop_counters(vport);
+
+ /* Restore old vport configuration */
+ esw_apply_vport_conf(esw, vport);
+
+ /* Attach vport to the eswitch rate limiter */
+ if (esw_vport_enable_qos(esw, vport_num, vport->info.max_rate,
+ vport->qos.bw_share))
+ esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num);
+
+ /* Sync with current vport context */
+ vport->enabled_events = enable_events;
+ vport->enabled = true;
+
+ /* only PF is trusted by default */
+ if (!vport_num)
+ vport->info.trusted = true;
+
+ esw_vport_change_handle_locked(vport);
+
+ esw->enabled_vports++;
+ esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num);
+ mutex_unlock(&esw->state_lock);
+}
+
+static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
+{
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+
+ if (!vport->enabled)
+ return;
+
+ esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num);
+ /* Mark this vport as disabled to discard new events */
+ vport->enabled = false;
+
+ synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC));
+ /* Wait for current already scheduled events to complete */
+ flush_workqueue(esw->work_queue);
+ /* Disable events from this vport */
+ arm_vport_context_events_cmd(esw->dev, vport->vport, 0);
+ mutex_lock(&esw->state_lock);
+ /* We don't assume VFs will cleanup after themselves.
+ * Calling vport change handler while vport is disabled will cleanup
+ * the vport resources.
+ */
+ esw_vport_change_handle_locked(vport);
+ vport->enabled_events = 0;
+ esw_vport_disable_qos(esw, vport_num);
+ if (vport_num && esw->mode == SRIOV_LEGACY) {
+ mlx5_modify_vport_admin_state(esw->dev,
+ MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ vport_num,
+ MLX5_VPORT_ADMIN_STATE_DOWN);
+ esw_vport_disable_egress_acl(esw, vport);
+ esw_vport_disable_ingress_acl(esw, vport);
+ esw_vport_destroy_drop_counters(vport);
+ }
+ esw->enabled_vports--;
+ mutex_unlock(&esw->state_lock);
+}
+
+/* Public E-Switch API */
+#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
+
+
+int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
+{
+ int err;
+ int i, enabled_events;
+
+ if (!ESW_ALLOWED(esw) ||
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
+ esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support))
+ esw_warn(esw->dev, "E-Switch ingress ACL is not supported by FW\n");
+
+ if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support))
+ esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n");
+
+ esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
+ esw->mode = mode;
+
+ if (mode == SRIOV_LEGACY) {
+ err = esw_create_legacy_fdb_table(esw);
+ } else {
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
+ err = esw_offloads_init(esw, nvfs + 1);
+ }
+
+ if (err)
+ goto abort;
+
+ err = esw_create_tsar(esw);
+ if (err)
+ esw_warn(esw->dev, "Failed to create eswitch TSAR");
+
+ /* Don't enable vport events when in SRIOV_OFFLOADS mode, since:
+ * 1. L2 table (MPFS) is programmed by PF/VF representors netdevs set_rx_mode
+ * 2. FDB/Eswitch is programmed by user space tools
+ */
+ enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0;
+ for (i = 0; i <= nvfs; i++)
+ esw_enable_vport(esw, i, enabled_events);
+
+ esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
+ esw->enabled_vports);
+ return 0;
+
+abort:
+ esw->mode = SRIOV_NONE;
+
+ if (mode == SRIOV_OFFLOADS)
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
+ return err;
+}
+
+void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
+{
+ struct esw_mc_addr *mc_promisc;
+ int old_mode;
+ int nvports;
+ int i;
+
+ if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE)
+ return;
+
+ esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n",
+ esw->enabled_vports, esw->mode);
+
+ mc_promisc = &esw->mc_promisc;
+ nvports = esw->enabled_vports;
+
+ for (i = 0; i < esw->total_vports; i++)
+ esw_disable_vport(esw, i);
+
+ if (mc_promisc && mc_promisc->uplink_rule)
+ mlx5_del_flow_rules(mc_promisc->uplink_rule);
+
+ esw_destroy_tsar(esw);
+
+ if (esw->mode == SRIOV_LEGACY)
+ esw_destroy_legacy_fdb_table(esw);
+ else if (esw->mode == SRIOV_OFFLOADS)
+ esw_offloads_cleanup(esw, nvports);
+
+ old_mode = esw->mode;
+ esw->mode = SRIOV_NONE;
+
+ if (old_mode == SRIOV_OFFLOADS)
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+}
+
+int mlx5_eswitch_init(struct mlx5_core_dev *dev)
+{
+ int total_vports = MLX5_TOTAL_VPORTS(dev);
+ struct mlx5_eswitch *esw;
+ int vport_num;
+ int err;
+
+ if (!MLX5_VPORT_MANAGER(dev))
+ return 0;
+
+ esw_info(dev,
+ "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
+ total_vports,
+ MLX5_MAX_UC_PER_VPORT(dev),
+ MLX5_MAX_MC_PER_VPORT(dev));
+
+ esw = kzalloc(sizeof(*esw), GFP_KERNEL);
+ if (!esw)
+ return -ENOMEM;
+
+ esw->dev = dev;
+
+ esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq");
+ if (!esw->work_queue) {
+ err = -ENOMEM;
+ goto abort;
+ }
+
+ esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport),
+ GFP_KERNEL);
+ if (!esw->vports) {
+ err = -ENOMEM;
+ goto abort;
+ }
+
+ err = esw_offloads_init_reps(esw);
+ if (err)
+ goto abort;
+
+ hash_init(esw->offloads.encap_tbl);
+ hash_init(esw->offloads.mod_hdr_tbl);
+ mutex_init(&esw->state_lock);
+
+ for (vport_num = 0; vport_num < total_vports; vport_num++) {
+ struct mlx5_vport *vport = &esw->vports[vport_num];
+
+ vport->vport = vport_num;
+ vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+ vport->dev = dev;
+ INIT_WORK(&vport->vport_change_handler,
+ esw_vport_change_handler);
+ }
+
+ esw->total_vports = total_vports;
+ esw->enabled_vports = 0;
+ esw->mode = SRIOV_NONE;
+ esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) &&
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
+ esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
+ else
+ esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+
+ dev->priv.eswitch = esw;
+ return 0;
+abort:
+ if (esw->work_queue)
+ destroy_workqueue(esw->work_queue);
+ esw_offloads_cleanup_reps(esw);
+ kfree(esw->vports);
+ kfree(esw);
+ return err;
+}
+
+void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
+{
+ if (!esw || !MLX5_VPORT_MANAGER(esw->dev))
+ return;
+
+ esw_info(esw->dev, "cleanup\n");
+
+ esw->dev->priv.eswitch = NULL;
+ destroy_workqueue(esw->work_queue);
+ esw_offloads_cleanup_reps(esw);
+ kfree(esw->vports);
+ kfree(esw);
+}
+
+void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
+{
+ struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change;
+ u16 vport_num = be16_to_cpu(vc_eqe->vport_num);
+ struct mlx5_vport *vport;
+
+ if (!esw) {
+ pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n",
+ vport_num);
+ return;
+ }
+
+ vport = &esw->vports[vport_num];
+ if (vport->enabled)
+ queue_work(esw->work_queue, &vport->vport_change_handler);
+}
+
+/* Vport Administration */
+#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
+
+int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
+ int vport, u8 mac[ETH_ALEN])
+{
+ struct mlx5_vport *evport;
+ u64 node_guid;
+ int err = 0;
+
+ if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac))
+ return -EINVAL;
+
+ mutex_lock(&esw->state_lock);
+ evport = &esw->vports[vport];
+
+ if (evport->info.spoofchk && !is_valid_ether_addr(mac))
+ mlx5_core_warn(esw->dev,
+ "Set invalid MAC while spoofchk is on, vport(%d)\n",
+ vport);
+
+ err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac);
+ if (err) {
+ mlx5_core_warn(esw->dev,
+ "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n",
+ vport, err);
+ goto unlock;
+ }
+
+ node_guid_gen_from_mac(&node_guid, mac);
+ err = mlx5_modify_nic_vport_node_guid(esw->dev, vport, node_guid);
+ if (err)
+ mlx5_core_warn(esw->dev,
+ "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n",
+ vport, err);
+
+ ether_addr_copy(evport->info.mac, mac);
+ evport->info.node_guid = node_guid;
+ if (evport->enabled && esw->mode == SRIOV_LEGACY)
+ err = esw_vport_ingress_config(esw, evport);
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
+ int vport, int link_state)
+{
+ struct mlx5_vport *evport;
+ int err = 0;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport))
+ return -EINVAL;
+
+ mutex_lock(&esw->state_lock);
+ evport = &esw->vports[vport];
+
+ err = mlx5_modify_vport_admin_state(esw->dev,
+ MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+ vport, link_state);
+ if (err) {
+ mlx5_core_warn(esw->dev,
+ "Failed to set vport %d link state, err = %d",
+ vport, err);
+ goto unlock;
+ }
+
+ evport->info.link_state = link_state;
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
+ int vport, struct ifla_vf_info *ivi)
+{
+ struct mlx5_vport *evport;
+
+ if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport))
+ return -EINVAL;
+
+ evport = &esw->vports[vport];
+
+ memset(ivi, 0, sizeof(*ivi));
+ ivi->vf = vport - 1;
+
+ mutex_lock(&esw->state_lock);
+ ether_addr_copy(ivi->mac, evport->info.mac);
+ ivi->linkstate = evport->info.link_state;
+ ivi->vlan = evport->info.vlan;
+ ivi->qos = evport->info.qos;
+ ivi->spoofchk = evport->info.spoofchk;
+ ivi->trusted = evport->info.trusted;
+ ivi->min_tx_rate = evport->info.min_rate;
+ ivi->max_tx_rate = evport->info.max_rate;
+ mutex_unlock(&esw->state_lock);
+
+ return 0;
+}
+
+int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ int vport, u16 vlan, u8 qos, u8 set_flags)
+{
+ struct mlx5_vport *evport;
+ int err = 0;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7))
+ return -EINVAL;
+
+ mutex_lock(&esw->state_lock);
+ evport = &esw->vports[vport];
+
+ err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags);
+ if (err)
+ goto unlock;
+
+ evport->info.vlan = vlan;
+ evport->info.qos = qos;
+ if (evport->enabled && esw->mode == SRIOV_LEGACY) {
+ err = esw_vport_ingress_config(esw, evport);
+ if (err)
+ goto unlock;
+ err = esw_vport_egress_config(esw, evport);
+ }
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ int vport, u16 vlan, u8 qos)
+{
+ u8 set_flags = 0;
+
+ if (vlan || qos)
+ set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
+
+ return __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags);
+}
+
+int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
+ int vport, bool spoofchk)
+{
+ struct mlx5_vport *evport;
+ bool pschk;
+ int err = 0;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport))
+ return -EINVAL;
+
+ mutex_lock(&esw->state_lock);
+ evport = &esw->vports[vport];
+ pschk = evport->info.spoofchk;
+ evport->info.spoofchk = spoofchk;
+ if (pschk && !is_valid_ether_addr(evport->info.mac))
+ mlx5_core_warn(esw->dev,
+ "Spoofchk in set while MAC is invalid, vport(%d)\n",
+ evport->vport);
+ if (evport->enabled && esw->mode == SRIOV_LEGACY)
+ err = esw_vport_ingress_config(esw, evport);
+ if (err)
+ evport->info.spoofchk = pschk;
+ mutex_unlock(&esw->state_lock);
+
+ return err;
+}
+
+int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
+ int vport, bool setting)
+{
+ struct mlx5_vport *evport;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport))
+ return -EINVAL;
+
+ mutex_lock(&esw->state_lock);
+ evport = &esw->vports[vport];
+ evport->info.trusted = setting;
+ if (evport->enabled)
+ esw_vport_change_handle_locked(evport);
+ mutex_unlock(&esw->state_lock);
+
+ return 0;
+}
+
+static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ struct mlx5_vport *evport;
+ u32 max_guarantee = 0;
+ int i;
+
+ for (i = 0; i < esw->total_vports; i++) {
+ evport = &esw->vports[i];
+ if (!evport->enabled || evport->info.min_rate < max_guarantee)
+ continue;
+ max_guarantee = evport->info.min_rate;
+ }
+
+ if (max_guarantee)
+ return max_t(u32, max_guarantee / fw_max_bw_share, 1);
+ return 0;
+}
+
+static int normalize_vports_min_rate(struct mlx5_eswitch *esw)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ u32 divider = calculate_vports_min_rate_divider(esw);
+ struct mlx5_vport *evport;
+ u32 vport_max_rate;
+ u32 vport_min_rate;
+ u32 bw_share;
+ int err;
+ int i;
+
+ for (i = 0; i < esw->total_vports; i++) {
+ evport = &esw->vports[i];
+ if (!evport->enabled)
+ continue;
+ vport_min_rate = evport->info.min_rate;
+ vport_max_rate = evport->info.max_rate;
+ bw_share = 0;
+
+ if (divider)
+ bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate,
+ divider,
+ fw_max_bw_share);
+
+ if (bw_share == evport->qos.bw_share)
+ continue;
+
+ err = esw_vport_qos_config(esw, i, vport_max_rate,
+ bw_share);
+ if (!err)
+ evport->qos.bw_share = bw_share;
+ else
+ return err;
+ }
+
+ return 0;
+}
+
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport,
+ u32 max_rate, u32 min_rate)
+{
+ struct mlx5_vport *evport;
+ u32 fw_max_bw_share;
+ u32 previous_min_rate;
+ bool min_rate_supported;
+ bool max_rate_supported;
+ int err = 0;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport))
+ return -EINVAL;
+
+ fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
+ fw_max_bw_share >= MLX5_MIN_BW_SHARE;
+ max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
+
+ if ((min_rate && !min_rate_supported) || (max_rate && !max_rate_supported))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&esw->state_lock);
+ evport = &esw->vports[vport];
+
+ if (min_rate == evport->info.min_rate)
+ goto set_max_rate;
+
+ previous_min_rate = evport->info.min_rate;
+ evport->info.min_rate = min_rate;
+ err = normalize_vports_min_rate(esw);
+ if (err) {
+ evport->info.min_rate = previous_min_rate;
+ goto unlock;
+ }
+
+set_max_rate:
+ if (max_rate == evport->info.max_rate)
+ goto unlock;
+
+ err = esw_vport_qos_config(esw, vport, max_rate, evport->qos.bw_share);
+ if (!err)
+ evport->info.max_rate = max_rate;
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
+ int vport_idx,
+ struct mlx5_vport_drop_stats *stats)
+{
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_vport *vport = &esw->vports[vport_idx];
+ u64 rx_discard_vport_down, tx_discard_vport_down;
+ u64 bytes = 0;
+ int err = 0;
+
+ if (!vport->enabled || esw->mode != SRIOV_LEGACY)
+ return 0;
+
+ if (vport->egress.drop_counter)
+ mlx5_fc_query(dev, vport->egress.drop_counter,
+ &stats->rx_dropped, &bytes);
+
+ if (vport->ingress.drop_counter)
+ mlx5_fc_query(dev, vport->ingress.drop_counter,
+ &stats->tx_dropped, &bytes);
+
+ if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) &&
+ !MLX5_CAP_GEN(dev, transmit_discard_vport_down))
+ return 0;
+
+ err = mlx5_query_vport_down_stats(dev, vport_idx,
+ &rx_discard_vport_down,
+ &tx_discard_vport_down);
+ if (err)
+ return err;
+
+ if (MLX5_CAP_GEN(dev, receive_discard_vport_down))
+ stats->rx_dropped += rx_discard_vport_down;
+ if (MLX5_CAP_GEN(dev, transmit_discard_vport_down))
+ stats->tx_dropped += tx_discard_vport_down;
+
+ return 0;
+}
+
+int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
+ int vport,
+ struct ifla_vf_stats *vf_stats)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+ u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0};
+ struct mlx5_vport_drop_stats stats = {0};
+ int err = 0;
+ u32 *out;
+
+ if (!ESW_ALLOWED(esw))
+ return -EPERM;
+ if (!LEGAL_VPORT(esw, vport))
+ return -EINVAL;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_vport_counter_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VPORT_COUNTER);
+ MLX5_SET(query_vport_counter_in, in, op_mod, 0);
+ MLX5_SET(query_vport_counter_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_vport_counter_in, in, other_vport, 1);
+
+ memset(out, 0, outlen);
+ err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen);
+ if (err)
+ goto free_out;
+
+ #define MLX5_GET_CTR(p, x) \
+ MLX5_GET64(query_vport_counter_out, p, x)
+
+ memset(vf_stats, 0, sizeof(*vf_stats));
+ vf_stats->rx_packets =
+ MLX5_GET_CTR(out, received_eth_unicast.packets) +
+ MLX5_GET_CTR(out, received_ib_unicast.packets) +
+ MLX5_GET_CTR(out, received_eth_multicast.packets) +
+ MLX5_GET_CTR(out, received_ib_multicast.packets) +
+ MLX5_GET_CTR(out, received_eth_broadcast.packets);
+
+ vf_stats->rx_bytes =
+ MLX5_GET_CTR(out, received_eth_unicast.octets) +
+ MLX5_GET_CTR(out, received_ib_unicast.octets) +
+ MLX5_GET_CTR(out, received_eth_multicast.octets) +
+ MLX5_GET_CTR(out, received_ib_multicast.octets) +
+ MLX5_GET_CTR(out, received_eth_broadcast.octets);
+
+ vf_stats->tx_packets =
+ MLX5_GET_CTR(out, transmitted_eth_unicast.packets) +
+ MLX5_GET_CTR(out, transmitted_ib_unicast.packets) +
+ MLX5_GET_CTR(out, transmitted_eth_multicast.packets) +
+ MLX5_GET_CTR(out, transmitted_ib_multicast.packets) +
+ MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
+
+ vf_stats->tx_bytes =
+ MLX5_GET_CTR(out, transmitted_eth_unicast.octets) +
+ MLX5_GET_CTR(out, transmitted_ib_unicast.octets) +
+ MLX5_GET_CTR(out, transmitted_eth_multicast.octets) +
+ MLX5_GET_CTR(out, transmitted_ib_multicast.octets) +
+ MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
+
+ vf_stats->multicast =
+ MLX5_GET_CTR(out, received_eth_multicast.packets) +
+ MLX5_GET_CTR(out, received_ib_multicast.packets);
+
+ vf_stats->broadcast =
+ MLX5_GET_CTR(out, received_eth_broadcast.packets);
+
+ err = mlx5_eswitch_query_vport_drop_stats(esw->dev, vport, &stats);
+ if (err)
+ goto free_out;
+ vf_stats->rx_dropped = stats.rx_dropped;
+ vf_stats->tx_dropped = stats.tx_dropped;
+
+free_out:
+ kvfree(out);
+ return err;
+}
+
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
+{
+ return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE;
+}
+EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
new file mode 100644
index 000000000..c17bfcab5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_ESWITCH_H__
+#define __MLX5_ESWITCH_H__
+
+#include <linux/if_ether.h>
+#include <linux/if_link.h>
+#include <net/devlink.h>
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/fs.h>
+#include "lib/mpfs.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+#define MLX5_MAX_UC_PER_VPORT(dev) \
+ (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list))
+
+#define MLX5_MAX_MC_PER_VPORT(dev) \
+ (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list))
+
+#define FDB_UPLINK_VPORT 0xffff
+
+#define MLX5_MIN_BW_SHARE 1
+
+#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
+ min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit)
+
+#define mlx5_esw_has_fwd_fdb(dev) \
+ MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table)
+
+struct vport_ingress {
+ struct mlx5_flow_table *acl;
+ struct mlx5_flow_group *allow_untagged_spoofchk_grp;
+ struct mlx5_flow_group *allow_spoofchk_only_grp;
+ struct mlx5_flow_group *allow_untagged_only_grp;
+ struct mlx5_flow_group *drop_grp;
+ struct mlx5_flow_handle *allow_rule;
+ struct mlx5_flow_handle *drop_rule;
+ struct mlx5_fc *drop_counter;
+};
+
+struct vport_egress {
+ struct mlx5_flow_table *acl;
+ struct mlx5_flow_group *allowed_vlans_grp;
+ struct mlx5_flow_group *drop_grp;
+ struct mlx5_flow_handle *allowed_vlan;
+ struct mlx5_flow_handle *drop_rule;
+ struct mlx5_fc *drop_counter;
+};
+
+struct mlx5_vport_drop_stats {
+ u64 rx_dropped;
+ u64 tx_dropped;
+};
+
+struct mlx5_vport_info {
+ u8 mac[ETH_ALEN];
+ u16 vlan;
+ u8 qos;
+ u64 node_guid;
+ int link_state;
+ u32 min_rate;
+ u32 max_rate;
+ bool spoofchk;
+ bool trusted;
+};
+
+struct mlx5_vport {
+ struct mlx5_core_dev *dev;
+ int vport;
+ struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE];
+ struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE];
+ struct mlx5_flow_handle *promisc_rule;
+ struct mlx5_flow_handle *allmulti_rule;
+ struct work_struct vport_change_handler;
+
+ struct vport_ingress ingress;
+ struct vport_egress egress;
+
+ struct mlx5_vport_info info;
+
+ struct {
+ bool enabled;
+ u32 esw_tsar_ix;
+ u32 bw_share;
+ } qos;
+
+ bool enabled;
+ u16 enabled_events;
+};
+
+struct mlx5_eswitch_fdb {
+ union {
+ struct legacy_fdb {
+ struct mlx5_flow_table *fdb;
+ struct mlx5_flow_group *addr_grp;
+ struct mlx5_flow_group *allmulti_grp;
+ struct mlx5_flow_group *promisc_grp;
+ } legacy;
+
+ struct offloads_fdb {
+ struct mlx5_flow_table *fast_fdb;
+ struct mlx5_flow_table *fwd_fdb;
+ struct mlx5_flow_table *slow_fdb;
+ struct mlx5_flow_group *send_to_vport_grp;
+ struct mlx5_flow_group *miss_grp;
+ struct mlx5_flow_handle *miss_rule_uni;
+ struct mlx5_flow_handle *miss_rule_multi;
+ int vlan_push_pop_refcount;
+ } offloads;
+ };
+};
+
+struct mlx5_esw_offload {
+ struct mlx5_flow_table *ft_offloads;
+ struct mlx5_flow_group *vport_rx_group;
+ struct mlx5_eswitch_rep *vport_reps;
+ DECLARE_HASHTABLE(encap_tbl, 8);
+ DECLARE_HASHTABLE(mod_hdr_tbl, 8);
+ u8 inline_mode;
+ u64 num_flows;
+ u8 encap;
+};
+
+/* E-Switch MC FDB table hash node */
+struct esw_mc_addr { /* SRIOV only */
+ struct l2addr_node node;
+ struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */
+ u32 refcnt;
+};
+
+struct mlx5_eswitch {
+ struct mlx5_core_dev *dev;
+ struct mlx5_eswitch_fdb fdb_table;
+ struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE];
+ struct workqueue_struct *work_queue;
+ struct mlx5_vport *vports;
+ int total_vports;
+ int enabled_vports;
+ /* Synchronize between vport change events
+ * and async SRIOV admin state changes
+ */
+ struct mutex state_lock;
+ struct esw_mc_addr mc_promisc;
+
+ struct {
+ bool enabled;
+ u32 root_tsar_id;
+ } qos;
+
+ struct mlx5_esw_offload offloads;
+ int mode;
+};
+
+void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
+int esw_offloads_init(struct mlx5_eswitch *esw, int nvports);
+void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
+int esw_offloads_init_reps(struct mlx5_eswitch *esw);
+
+/* E-Switch API */
+int mlx5_eswitch_init(struct mlx5_core_dev *dev);
+void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
+void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
+int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
+void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
+int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
+ int vport, u8 mac[ETH_ALEN]);
+int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
+ int vport, int link_state);
+int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ int vport, u16 vlan, u8 qos);
+int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
+ int vport, bool spoofchk);
+int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
+ int vport_num, bool setting);
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport,
+ u32 max_rate, u32 min_rate);
+int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
+ int vport, struct ifla_vf_info *ivi);
+int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
+ int vport,
+ struct ifla_vf_stats *vf_stats);
+void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
+
+struct mlx5_flow_spec;
+struct mlx5_esw_flow_attr;
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr);
+struct mlx5_flow_handle *
+mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr);
+void
+mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_esw_flow_attr *attr);
+
+struct mlx5_flow_handle *
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
+
+enum {
+ SET_VLAN_STRIP = BIT(0),
+ SET_VLAN_INSERT = BIT(1)
+};
+
+enum mlx5_flow_match_level {
+ MLX5_MATCH_NONE = MLX5_INLINE_MODE_NONE,
+ MLX5_MATCH_L2 = MLX5_INLINE_MODE_L2,
+ MLX5_MATCH_L3 = MLX5_INLINE_MODE_IP,
+ MLX5_MATCH_L4 = MLX5_INLINE_MODE_TCP_UDP,
+};
+
+/* current maximum for flow based vport multicasting */
+#define MLX5_MAX_FLOW_FWD_VPORTS 2
+
+struct mlx5_esw_flow_attr {
+ struct mlx5_eswitch_rep *in_rep;
+ struct mlx5_eswitch_rep *out_rep[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5_core_dev *out_mdev[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5_core_dev *in_mdev;
+
+ int mirror_count;
+ int out_count;
+
+ int action;
+ __be16 vlan_proto[MLX5_FS_VLAN_DEPTH];
+ u16 vlan_vid[MLX5_FS_VLAN_DEPTH];
+ u8 vlan_prio[MLX5_FS_VLAN_DEPTH];
+ u8 total_vlan;
+ bool vlan_handled;
+ u32 encap_id;
+ u32 mod_hdr_id;
+ u8 match_level;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+};
+
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode);
+int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
+int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode);
+int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap);
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
+void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
+
+int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
+ struct mlx5_esw_flow_attr *attr);
+int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
+ struct mlx5_esw_flow_attr *attr);
+int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+ int vport, u16 vlan, u8 qos, u8 set_flags);
+
+static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev,
+ u8 vlan_depth)
+{
+ bool ret = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan) &&
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan);
+
+ if (vlan_depth == 1)
+ return ret;
+
+ return ret && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan_2) &&
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2);
+}
+
+#define MLX5_DEBUG_ESWITCH_MASK BIT(3)
+
+#define esw_info(dev, format, ...) \
+ pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
+
+#define esw_warn(dev, format, ...) \
+ pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
+
+#define esw_debug(dev, format, ...) \
+ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
+#else /* CONFIG_MLX5_ESWITCH */
+/* eswitch API stubs */
+static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
+static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
+static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {}
+static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
+static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
+#endif /* CONFIG_MLX5_ESWITCH */
+
+#endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
new file mode 100644
index 000000000..3028e8d90
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -0,0 +1,1368 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_core.h"
+#include "eswitch.h"
+
+enum {
+ FDB_FAST_PATH = 0,
+ FDB_SLOW_PATH
+};
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr)
+{
+ struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_table *ft = NULL;
+ struct mlx5_fc *counter = NULL;
+ struct mlx5_flow_handle *rule;
+ int j, i = 0;
+ void *misc;
+
+ if (esw->mode != SRIOV_OFFLOADS)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (attr->mirror_count)
+ ft = esw->fdb_table.offloads.fwd_fdb;
+ else
+ ft = esw->fdb_table.offloads.fast_fdb;
+
+ flow_act.action = attr->action;
+ /* if per flow vlan pop/push is emulated, don't set that into the firmware */
+ if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
+ flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
+ MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+ else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
+ flow_act.vlan[0].ethtype = ntohs(attr->vlan_proto[0]);
+ flow_act.vlan[0].vid = attr->vlan_vid[0];
+ flow_act.vlan[0].prio = attr->vlan_prio[0];
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
+ flow_act.vlan[1].ethtype = ntohs(attr->vlan_proto[1]);
+ flow_act.vlan[1].vid = attr->vlan_vid[1];
+ flow_act.vlan[1].prio = attr->vlan_prio[1];
+ }
+ }
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ for (j = attr->mirror_count; j < attr->out_count; j++) {
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest[i].vport.num = attr->out_rep[j]->vport;
+ dest[i].vport.vhca_id =
+ MLX5_CAP_GEN(attr->out_mdev[j], vhca_id);
+ dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+ i++;
+ }
+ }
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ counter = mlx5_fc_create(esw->dev, true);
+ if (IS_ERR(counter)) {
+ rule = ERR_CAST(counter);
+ goto err_counter_alloc;
+ }
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[i].counter = counter;
+ i++;
+ }
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
+
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+
+ if (attr->match_level == MLX5_MATCH_NONE)
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ else
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
+ MLX5_MATCH_MISC_PARAMETERS;
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ flow_act.modify_id = attr->mod_hdr_id;
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+ flow_act.encap_id = attr->encap_id;
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, i);
+ if (IS_ERR(rule))
+ goto err_add_rule;
+ else
+ esw->offloads.num_flows++;
+
+ return rule;
+
+err_add_rule:
+ mlx5_fc_destroy(esw->dev, counter);
+err_counter_alloc:
+ return rule;
+}
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr)
+{
+ struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_handle *rule;
+ void *misc;
+ int i;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ for (i = 0; i < attr->mirror_count; i++) {
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest[i].vport.num = attr->out_rep[i]->vport;
+ dest[i].vport.vhca_id =
+ MLX5_CAP_GEN(attr->out_mdev[i], vhca_id);
+ dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+ }
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = esw->fdb_table.offloads.fwd_fdb,
+ i++;
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
+
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+
+ if (attr->match_level == MLX5_MATCH_NONE)
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ else
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
+ MLX5_MATCH_MISC_PARAMETERS;
+
+ rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fast_fdb, spec, &flow_act, dest, i);
+
+ if (!IS_ERR(rule))
+ esw->offloads.num_flows++;
+
+ return rule;
+}
+
+void
+mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_esw_flow_attr *attr)
+{
+ struct mlx5_fc *counter = NULL;
+
+ counter = mlx5_flow_rule_counter(rule);
+ mlx5_del_flow_rules(rule);
+ mlx5_fc_destroy(esw->dev, counter);
+ esw->offloads.num_flows--;
+}
+
+static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
+{
+ struct mlx5_eswitch_rep *rep;
+ int vf_vport, err = 0;
+
+ esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
+ for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) {
+ rep = &esw->offloads.vport_reps[vf_vport];
+ if (!rep->rep_if[REP_ETH].valid)
+ continue;
+
+ err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val);
+ if (err)
+ goto out;
+ }
+
+out:
+ return err;
+}
+
+static struct mlx5_eswitch_rep *
+esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop)
+{
+ struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL;
+
+ in_rep = attr->in_rep;
+ out_rep = attr->out_rep[0];
+
+ if (push)
+ vport = in_rep;
+ else if (pop)
+ vport = out_rep;
+ else
+ vport = in_rep;
+
+ return vport;
+}
+
+static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr,
+ bool push, bool pop, bool fwd)
+{
+ struct mlx5_eswitch_rep *in_rep, *out_rep;
+
+ if ((push || pop) && !fwd)
+ goto out_notsupp;
+
+ in_rep = attr->in_rep;
+ out_rep = attr->out_rep[0];
+
+ if (push && in_rep->vport == FDB_UPLINK_VPORT)
+ goto out_notsupp;
+
+ if (pop && out_rep->vport == FDB_UPLINK_VPORT)
+ goto out_notsupp;
+
+ /* vport has vlan push configured, can't offload VF --> wire rules w.o it */
+ if (!push && !pop && fwd)
+ if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT)
+ goto out_notsupp;
+
+ /* protects against (1) setting rules with different vlans to push and
+ * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0)
+ */
+ if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan_vid[0]))
+ goto out_notsupp;
+
+ return 0;
+
+out_notsupp:
+ return -EOPNOTSUPP;
+}
+
+int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
+ struct mlx5_esw_flow_attr *attr)
+{
+ struct offloads_fdb *offloads = &esw->fdb_table.offloads;
+ struct mlx5_eswitch_rep *vport = NULL;
+ bool push, pop, fwd;
+ int err = 0;
+
+ /* nop if we're on the vlan push/pop non emulation mode */
+ if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
+ return 0;
+
+ push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
+ pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+ fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+
+ err = esw_add_vlan_action_check(attr, push, pop, fwd);
+ if (err)
+ return err;
+
+ attr->vlan_handled = false;
+
+ vport = esw_vlan_action_get_vport(attr, push, pop);
+
+ if (!push && !pop && fwd) {
+ /* tracks VF --> wire rules without vlan push action */
+ if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT) {
+ vport->vlan_refcount++;
+ attr->vlan_handled = true;
+ }
+
+ return 0;
+ }
+
+ if (!push && !pop)
+ return 0;
+
+ if (!(offloads->vlan_push_pop_refcount)) {
+ /* it's the 1st vlan rule, apply global vlan pop policy */
+ err = esw_set_global_vlan_pop(esw, SET_VLAN_STRIP);
+ if (err)
+ goto out;
+ }
+ offloads->vlan_push_pop_refcount++;
+
+ if (push) {
+ if (vport->vlan_refcount)
+ goto skip_set_push;
+
+ err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan_vid[0], 0,
+ SET_VLAN_INSERT | SET_VLAN_STRIP);
+ if (err)
+ goto out;
+ vport->vlan = attr->vlan_vid[0];
+skip_set_push:
+ vport->vlan_refcount++;
+ }
+out:
+ if (!err)
+ attr->vlan_handled = true;
+ return err;
+}
+
+int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
+ struct mlx5_esw_flow_attr *attr)
+{
+ struct offloads_fdb *offloads = &esw->fdb_table.offloads;
+ struct mlx5_eswitch_rep *vport = NULL;
+ bool push, pop, fwd;
+ int err = 0;
+
+ /* nop if we're on the vlan push/pop non emulation mode */
+ if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
+ return 0;
+
+ if (!attr->vlan_handled)
+ return 0;
+
+ push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
+ pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+ fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+
+ vport = esw_vlan_action_get_vport(attr, push, pop);
+
+ if (!push && !pop && fwd) {
+ /* tracks VF --> wire rules without vlan push action */
+ if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT)
+ vport->vlan_refcount--;
+
+ return 0;
+ }
+
+ if (push) {
+ vport->vlan_refcount--;
+ if (vport->vlan_refcount)
+ goto skip_unset_push;
+
+ vport->vlan = 0;
+ err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport,
+ 0, 0, SET_VLAN_STRIP);
+ if (err)
+ goto out;
+ }
+
+skip_unset_push:
+ offloads->vlan_push_pop_refcount--;
+ if (offloads->vlan_push_pop_refcount)
+ return 0;
+
+ /* no more vlan rules, stop global vlan pop policy */
+ err = esw_set_global_vlan_pop(esw, 0);
+
+out:
+ return err;
+}
+
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn)
+{
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_spec *spec;
+ void *misc;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ flow_rule = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
+ MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = vport;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule))
+ esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule));
+out:
+ kvfree(spec);
+ return flow_rule;
+}
+EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule);
+
+void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
+{
+ mlx5_del_flow_rules(rule);
+}
+
+static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_handle *flow_rule = NULL;
+ struct mlx5_flow_spec *spec;
+ void *headers_c;
+ void *headers_v;
+ int err = 0;
+ u8 *dmac_c;
+ u8 *dmac_v;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c,
+ outer_headers.dmac_47_16);
+ dmac_c[0] = 0x01;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = 0;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ esw_warn(esw->dev, "FDB: Failed to add unicast miss flow rule err %d\n", err);
+ goto out;
+ }
+
+ esw->fdb_table.offloads.miss_rule_uni = flow_rule;
+
+ headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
+ outer_headers.dmac_47_16);
+ dmac_v[0] = 0x01;
+ flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
+ goto out;
+ }
+
+ esw->fdb_table.offloads.miss_rule_multi = flow_rule;
+
+out:
+ kvfree(spec);
+ return err;
+}
+
+#define ESW_OFFLOADS_NUM_GROUPS 4
+
+static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
+{
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *fdb = NULL;
+ int esw_size, err = 0;
+ u32 flags = 0;
+ u32 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+ MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+
+ root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get FDB flow namespace\n");
+ err = -EOPNOTSUPP;
+ goto out_namespace;
+ }
+
+ esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n",
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size),
+ max_flow_counter, ESW_OFFLOADS_NUM_GROUPS);
+
+ esw_size = min_t(int, max_flow_counter * ESW_OFFLOADS_NUM_GROUPS,
+ 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+
+ if (mlx5_esw_has_fwd_fdb(dev))
+ esw_size >>= 1;
+
+ if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN;
+
+ fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
+ esw_size,
+ ESW_OFFLOADS_NUM_GROUPS, 0,
+ flags);
+ if (IS_ERR(fdb)) {
+ err = PTR_ERR(fdb);
+ esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err);
+ goto out_namespace;
+ }
+ esw->fdb_table.offloads.fast_fdb = fdb;
+
+ if (!mlx5_esw_has_fwd_fdb(dev))
+ goto out_namespace;
+
+ fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
+ esw_size,
+ ESW_OFFLOADS_NUM_GROUPS, 1,
+ flags);
+ if (IS_ERR(fdb)) {
+ err = PTR_ERR(fdb);
+ esw_warn(dev, "Failed to create fwd table err %d\n", err);
+ goto out_ft;
+ }
+ esw->fdb_table.offloads.fwd_fdb = fdb;
+
+ return err;
+
+out_ft:
+ mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb);
+out_namespace:
+ return err;
+}
+
+static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
+{
+ if (mlx5_esw_has_fwd_fdb(esw->dev))
+ mlx5_destroy_flow_table(esw->fdb_table.offloads.fwd_fdb);
+ mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb);
+}
+
+#define MAX_PF_SQ 256
+#define MAX_SQ_NVPORTS 32
+
+static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_namespace *root_ns;
+ struct mlx5_flow_table *fdb = NULL;
+ int table_size, ix, err = 0;
+ struct mlx5_flow_group *g;
+ void *match_criteria;
+ u32 *flow_group_in;
+ u8 *dmac;
+
+ esw_debug(esw->dev, "Create offloads FDB Tables\n");
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns) {
+ esw_warn(dev, "Failed to get FDB flow namespace\n");
+ err = -EOPNOTSUPP;
+ goto ns_err;
+ }
+
+ err = esw_create_offloads_fast_fdb_table(esw);
+ if (err)
+ goto fast_fdb_err;
+
+ table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
+
+ ft_attr.max_fte = table_size;
+ ft_attr.prio = FDB_SLOW_PATH;
+
+ fdb = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(fdb)) {
+ err = PTR_ERR(fdb);
+ esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err);
+ goto slow_fdb_err;
+ }
+ esw->fdb_table.offloads.slow_fdb = fdb;
+
+ /* create send-to-vport group */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
+
+ ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ;
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create send-to-vport flow group err(%d)\n", err);
+ goto send_vport_err;
+ }
+ esw->fdb_table.offloads.send_to_vport_grp = g;
+
+ /* create miss group */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_OUTER_HEADERS);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ outer_headers.dmac_47_16);
+ dmac[0] = 0x01;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2);
+
+ g = mlx5_create_flow_group(fdb, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ esw_warn(dev, "Failed to create miss flow group err(%d)\n", err);
+ goto miss_err;
+ }
+ esw->fdb_table.offloads.miss_grp = g;
+
+ err = esw_add_fdb_miss_rule(esw);
+ if (err)
+ goto miss_rule_err;
+
+ kvfree(flow_group_in);
+ return 0;
+
+miss_rule_err:
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
+miss_err:
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
+send_vport_err:
+ mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
+slow_fdb_err:
+ esw_destroy_offloads_fast_fdb_table(esw);
+fast_fdb_err:
+ns_err:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
+{
+ if (!esw->fdb_table.offloads.fast_fdb)
+ return;
+
+ esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
+ mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
+ mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
+
+ mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
+ esw_destroy_offloads_fast_fdb_table(esw);
+}
+
+static int esw_create_offloads_table(struct mlx5_eswitch *esw)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_flow_table *ft_offloads;
+ struct mlx5_flow_namespace *ns;
+ int err = 0;
+
+ ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS);
+ if (!ns) {
+ esw_warn(esw->dev, "Failed to get offloads flow namespace\n");
+ return -EOPNOTSUPP;
+ }
+
+ ft_attr.max_fte = dev->priv.sriov.num_vfs + 2;
+
+ ft_offloads = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft_offloads)) {
+ err = PTR_ERR(ft_offloads);
+ esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err);
+ return err;
+ }
+
+ esw->offloads.ft_offloads = ft_offloads;
+ return 0;
+}
+
+static void esw_destroy_offloads_table(struct mlx5_eswitch *esw)
+{
+ struct mlx5_esw_offload *offloads = &esw->offloads;
+
+ mlx5_destroy_flow_table(offloads->ft_offloads);
+}
+
+static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *g;
+ struct mlx5_priv *priv = &esw->dev->priv;
+ u32 *flow_group_in;
+ void *match_criteria, *misc;
+ int err = 0;
+ int nvports = priv->sriov.num_vfs + 2;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ /* create vport rx group */
+ memset(flow_group_in, 0, inlen);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+ misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1);
+
+ g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in);
+
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err);
+ goto out;
+ }
+
+ esw->offloads.vport_rx_group = g;
+out:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
+{
+ mlx5_destroy_flow_group(esw->offloads.vport_rx_group);
+}
+
+struct mlx5_flow_handle *
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn)
+{
+ struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_spec *spec;
+ void *misc;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ flow_rule = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest.tir_num = tirn;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
+ &flow_act, &dest, 1);
+ if (IS_ERR(flow_rule)) {
+ esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule));
+ goto out;
+ }
+
+out:
+ kvfree(spec);
+ return flow_rule;
+}
+
+static int esw_offloads_start(struct mlx5_eswitch *esw)
+{
+ int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
+
+ if (esw->mode != SRIOV_LEGACY) {
+ esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n");
+ return -EINVAL;
+ }
+
+ mlx5_eswitch_disable_sriov(esw);
+ err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+ if (err) {
+ esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err);
+ err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
+ if (err1)
+ esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err1);
+ }
+ if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
+ if (mlx5_eswitch_inline_mode_get(esw,
+ num_vfs,
+ &esw->offloads.inline_mode)) {
+ esw->offloads.inline_mode = MLX5_INLINE_MODE_L2;
+ esw_warn(esw->dev, "Inline mode is different between vports\n");
+ }
+ }
+ return err;
+}
+
+void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
+{
+ kfree(esw->offloads.vport_reps);
+}
+
+int esw_offloads_init_reps(struct mlx5_eswitch *esw)
+{
+ int total_vfs = MLX5_TOTAL_VPORTS(esw->dev);
+ struct mlx5_core_dev *dev = esw->dev;
+ struct mlx5_esw_offload *offloads;
+ struct mlx5_eswitch_rep *rep;
+ u8 hw_id[ETH_ALEN];
+ int vport;
+
+ esw->offloads.vport_reps = kcalloc(total_vfs,
+ sizeof(struct mlx5_eswitch_rep),
+ GFP_KERNEL);
+ if (!esw->offloads.vport_reps)
+ return -ENOMEM;
+
+ offloads = &esw->offloads;
+ mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
+
+ for (vport = 0; vport < total_vfs; vport++) {
+ rep = &offloads->vport_reps[vport];
+
+ rep->vport = vport;
+ ether_addr_copy(rep->hw_id, hw_id);
+ }
+
+ offloads->vport_reps[0].vport = FDB_UPLINK_VPORT;
+
+ return 0;
+}
+
+static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports,
+ u8 rep_type)
+{
+ struct mlx5_eswitch_rep *rep;
+ int vport;
+
+ for (vport = nvports - 1; vport >= 0; vport--) {
+ rep = &esw->offloads.vport_reps[vport];
+ if (!rep->rep_if[rep_type].valid)
+ continue;
+
+ rep->rep_if[rep_type].unload(rep);
+ }
+}
+
+static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports)
+{
+ u8 rep_type = NUM_REP_TYPES;
+
+ while (rep_type-- > 0)
+ esw_offloads_unload_reps_type(esw, nvports, rep_type);
+}
+
+static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports,
+ u8 rep_type)
+{
+ struct mlx5_eswitch_rep *rep;
+ int vport;
+ int err;
+
+ for (vport = 0; vport < nvports; vport++) {
+ rep = &esw->offloads.vport_reps[vport];
+ if (!rep->rep_if[rep_type].valid)
+ continue;
+
+ err = rep->rep_if[rep_type].load(esw->dev, rep);
+ if (err)
+ goto err_reps;
+ }
+
+ return 0;
+
+err_reps:
+ esw_offloads_unload_reps_type(esw, vport, rep_type);
+ return err;
+}
+
+static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports)
+{
+ u8 rep_type = 0;
+ int err;
+
+ for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
+ err = esw_offloads_load_reps_type(esw, nvports, rep_type);
+ if (err)
+ goto err_reps;
+ }
+
+ return err;
+
+err_reps:
+ while (rep_type-- > 0)
+ esw_offloads_unload_reps_type(esw, nvports, rep_type);
+ return err;
+}
+
+int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
+{
+ int err;
+
+ err = esw_create_offloads_fdb_tables(esw, nvports);
+ if (err)
+ return err;
+
+ err = esw_create_offloads_table(esw);
+ if (err)
+ goto create_ft_err;
+
+ err = esw_create_vport_rx_group(esw);
+ if (err)
+ goto create_fg_err;
+
+ err = esw_offloads_load_reps(esw, nvports);
+ if (err)
+ goto err_reps;
+
+ return 0;
+
+err_reps:
+ esw_destroy_vport_rx_group(esw);
+
+create_fg_err:
+ esw_destroy_offloads_table(esw);
+
+create_ft_err:
+ esw_destroy_offloads_fdb_tables(esw);
+
+ return err;
+}
+
+static int esw_offloads_stop(struct mlx5_eswitch *esw)
+{
+ int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
+
+ mlx5_eswitch_disable_sriov(esw);
+ err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
+ if (err) {
+ esw_warn(esw->dev, "Failed setting eswitch to legacy, err %d\n", err);
+ err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+ if (err1)
+ esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err);
+ }
+
+ /* enable back PF RoCE */
+ mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
+ return err;
+}
+
+void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports)
+{
+ esw_offloads_unload_reps(esw, nvports);
+ esw_destroy_vport_rx_group(esw);
+ esw_destroy_offloads_table(esw);
+ esw_destroy_offloads_fdb_tables(esw);
+}
+
+static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
+{
+ switch (mode) {
+ case DEVLINK_ESWITCH_MODE_LEGACY:
+ *mlx5_mode = SRIOV_LEGACY;
+ break;
+ case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+ *mlx5_mode = SRIOV_OFFLOADS;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode)
+{
+ switch (mlx5_mode) {
+ case SRIOV_LEGACY:
+ *mode = DEVLINK_ESWITCH_MODE_LEGACY;
+ break;
+ case SRIOV_OFFLOADS:
+ *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int esw_inline_mode_from_devlink(u8 mode, u8 *mlx5_mode)
+{
+ switch (mode) {
+ case DEVLINK_ESWITCH_INLINE_MODE_NONE:
+ *mlx5_mode = MLX5_INLINE_MODE_NONE;
+ break;
+ case DEVLINK_ESWITCH_INLINE_MODE_LINK:
+ *mlx5_mode = MLX5_INLINE_MODE_L2;
+ break;
+ case DEVLINK_ESWITCH_INLINE_MODE_NETWORK:
+ *mlx5_mode = MLX5_INLINE_MODE_IP;
+ break;
+ case DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT:
+ *mlx5_mode = MLX5_INLINE_MODE_TCP_UDP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode)
+{
+ switch (mlx5_mode) {
+ case MLX5_INLINE_MODE_NONE:
+ *mode = DEVLINK_ESWITCH_INLINE_MODE_NONE;
+ break;
+ case MLX5_INLINE_MODE_L2:
+ *mode = DEVLINK_ESWITCH_INLINE_MODE_LINK;
+ break;
+ case MLX5_INLINE_MODE_IP:
+ *mode = DEVLINK_ESWITCH_INLINE_MODE_NETWORK;
+ break;
+ case MLX5_INLINE_MODE_TCP_UDP:
+ *mode = DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx5_devlink_eswitch_check(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return -EOPNOTSUPP;
+
+ if(!MLX5_ESWITCH_MANAGER(dev))
+ return -EPERM;
+
+ if (dev->priv.eswitch->mode == SRIOV_NONE)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ u16 cur_mlx5_mode, mlx5_mode = 0;
+ int err;
+
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
+
+ cur_mlx5_mode = dev->priv.eswitch->mode;
+
+ if (esw_mode_from_devlink(mode, &mlx5_mode))
+ return -EINVAL;
+
+ if (cur_mlx5_mode == mlx5_mode)
+ return 0;
+
+ if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
+ return esw_offloads_start(dev->priv.eswitch);
+ else if (mode == DEVLINK_ESWITCH_MODE_LEGACY)
+ return esw_offloads_stop(dev->priv.eswitch);
+ else
+ return -EINVAL;
+}
+
+int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ int err;
+
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
+
+ return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
+}
+
+int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ int err, vport;
+ u8 mlx5_mode;
+
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
+
+ switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
+ case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+ if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE)
+ return 0;
+ /* fall through */
+ case MLX5_CAP_INLINE_MODE_L2:
+ esw_warn(dev, "Inline mode can't be set\n");
+ return -EOPNOTSUPP;
+ case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+ break;
+ }
+
+ if (esw->offloads.num_flows > 0) {
+ esw_warn(dev, "Can't set inline mode when flows are configured\n");
+ return -EOPNOTSUPP;
+ }
+
+ err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
+ if (err)
+ goto out;
+
+ for (vport = 1; vport < esw->enabled_vports; vport++) {
+ err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
+ if (err) {
+ esw_warn(dev, "Failed to set min inline on vport %d\n",
+ vport);
+ goto revert_inline_mode;
+ }
+ }
+
+ esw->offloads.inline_mode = mlx5_mode;
+ return 0;
+
+revert_inline_mode:
+ while (--vport > 0)
+ mlx5_modify_nic_vport_min_inline(dev,
+ vport,
+ esw->offloads.inline_mode);
+out:
+ return err;
+}
+
+int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ int err;
+
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
+
+ return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
+}
+
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
+{
+ u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
+ struct mlx5_core_dev *dev = esw->dev;
+ int vport;
+
+ if (!MLX5_CAP_GEN(dev, vport_group_manager))
+ return -EOPNOTSUPP;
+
+ if (esw->mode == SRIOV_NONE)
+ return -EOPNOTSUPP;
+
+ switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
+ case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+ mlx5_mode = MLX5_INLINE_MODE_NONE;
+ goto out;
+ case MLX5_CAP_INLINE_MODE_L2:
+ mlx5_mode = MLX5_INLINE_MODE_L2;
+ goto out;
+ case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+ goto query_vports;
+ }
+
+query_vports:
+ for (vport = 1; vport <= nvfs; vport++) {
+ mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
+ if (vport > 1 && prev_mlx5_mode != mlx5_mode)
+ return -EINVAL;
+ prev_mlx5_mode = mlx5_mode;
+ }
+
+out:
+ *mode = mlx5_mode;
+ return 0;
+}
+
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ int err;
+
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
+
+ if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
+ (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) ||
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)))
+ return -EOPNOTSUPP;
+
+ if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC)
+ return -EOPNOTSUPP;
+
+ if (esw->mode == SRIOV_LEGACY) {
+ esw->offloads.encap = encap;
+ return 0;
+ }
+
+ if (esw->offloads.encap == encap)
+ return 0;
+
+ if (esw->offloads.num_flows > 0) {
+ esw_warn(dev, "Can't set encapsulation when flows are configured\n");
+ return -EOPNOTSUPP;
+ }
+
+ esw_destroy_offloads_fast_fdb_table(esw);
+
+ esw->offloads.encap = encap;
+ err = esw_create_offloads_fast_fdb_table(esw);
+ if (err) {
+ esw_warn(esw->dev, "Failed re-creating fast FDB table, err %d\n", err);
+ esw->offloads.encap = !encap;
+ (void)esw_create_offloads_fast_fdb_table(esw);
+ }
+ return err;
+}
+
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ int err;
+
+ err = mlx5_devlink_eswitch_check(devlink);
+ if (err)
+ return err;
+
+ *encap = esw->offloads.encap;
+ return 0;
+}
+
+void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index,
+ struct mlx5_eswitch_rep_if *__rep_if,
+ u8 rep_type)
+{
+ struct mlx5_esw_offload *offloads = &esw->offloads;
+ struct mlx5_eswitch_rep_if *rep_if;
+
+ rep_if = &offloads->vport_reps[vport_index].rep_if[rep_type];
+
+ rep_if->load = __rep_if->load;
+ rep_if->unload = __rep_if->unload;
+ rep_if->get_proto_dev = __rep_if->get_proto_dev;
+ rep_if->priv = __rep_if->priv;
+
+ rep_if->valid = true;
+}
+EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep);
+
+void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
+ int vport_index, u8 rep_type)
+{
+ struct mlx5_esw_offload *offloads = &esw->offloads;
+ struct mlx5_eswitch_rep *rep;
+
+ rep = &offloads->vport_reps[vport_index];
+
+ if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled)
+ rep->rep_if[rep_type].unload(rep);
+
+ rep->rep_if[rep_type].valid = false;
+}
+EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
+
+void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
+{
+#define UPLINK_REP_INDEX 0
+ struct mlx5_esw_offload *offloads = &esw->offloads;
+ struct mlx5_eswitch_rep *rep;
+
+ rep = &offloads->vport_reps[UPLINK_REP_INDEX];
+ return rep->rep_if[rep_type].priv;
+}
+
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+ int vport,
+ u8 rep_type)
+{
+ struct mlx5_esw_offload *offloads = &esw->offloads;
+ struct mlx5_eswitch_rep *rep;
+
+ if (vport == FDB_UPLINK_VPORT)
+ vport = UPLINK_REP_INDEX;
+
+ rep = &offloads->vport_reps[vport];
+
+ if (rep->rep_if[rep_type].valid &&
+ rep->rep_if[rep_type].get_proto_dev)
+ return rep->rep_if[rep_type].get_proto_dev(rep);
+ return NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
+
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
+{
+ return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type);
+}
+EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
+
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+ int vport)
+{
+ return &esw->offloads.vport_reps[vport];
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile
new file mode 100644
index 000000000..d8e17110f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
new file mode 100644
index 000000000..c0fd2212e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/cmd.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
+
+#include "mlx5_core.h"
+#include "fpga/cmd.h"
+
+#define MLX5_FPGA_ACCESS_REG_SZ (MLX5_ST_SZ_DW(fpga_access_reg) + \
+ MLX5_FPGA_ACCESS_REG_SIZE_MAX)
+
+int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr,
+ void *buf, bool write)
+{
+ u32 in[MLX5_FPGA_ACCESS_REG_SZ] = {0};
+ u32 out[MLX5_FPGA_ACCESS_REG_SZ];
+ int err;
+
+ if (size & 3)
+ return -EINVAL;
+ if (addr & 3)
+ return -EINVAL;
+ if (size > MLX5_FPGA_ACCESS_REG_SIZE_MAX)
+ return -EINVAL;
+
+ MLX5_SET(fpga_access_reg, in, size, size);
+ MLX5_SET64(fpga_access_reg, in, address, addr);
+ if (write)
+ memcpy(MLX5_ADDR_OF(fpga_access_reg, in, data), buf, size);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_FPGA_ACCESS_REG, 0, write);
+ if (err)
+ return err;
+
+ if (!write)
+ memcpy(buf, MLX5_ADDR_OF(fpga_access_reg, out, data), size);
+
+ return 0;
+}
+
+int mlx5_fpga_caps(struct mlx5_core_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_cap)] = {0};
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), dev->caps.fpga,
+ MLX5_ST_SZ_BYTES(fpga_cap),
+ MLX5_REG_FPGA_CAP, 0, 0);
+}
+
+int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_ctrl)];
+
+ MLX5_SET(fpga_ctrl, in, operation, op);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_FPGA_CTRL, 0, true);
+}
+
+int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size)
+{
+ unsigned int cap_size = MLX5_CAP_FPGA(dev, sandbox_extended_caps_len);
+ u64 addr = MLX5_CAP64_FPGA(dev, sandbox_extended_caps_addr);
+ unsigned int read;
+ int ret = 0;
+
+ if (cap_size > size) {
+ mlx5_core_warn(dev, "Not enough buffer %u for FPGA SBU caps %u",
+ size, cap_size);
+ return -EINVAL;
+ }
+
+ while (cap_size > 0) {
+ read = min_t(unsigned int, cap_size,
+ MLX5_FPGA_ACCESS_REG_SIZE_MAX);
+
+ ret = mlx5_fpga_access_reg(dev, read, addr, caps, false);
+ if (ret) {
+ mlx5_core_warn(dev, "Error reading FPGA SBU caps %u bytes at address 0x%llx: %d",
+ read, addr, ret);
+ return ret;
+ }
+
+ cap_size -= read;
+ addr += read;
+ caps += read;
+ }
+
+ return ret;
+}
+
+int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_ctrl)];
+ int err;
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_FPGA_CTRL, 0, false);
+ if (err)
+ return err;
+
+ query->status = MLX5_GET(fpga_ctrl, out, status);
+ query->admin_image = MLX5_GET(fpga_ctrl, out, flash_select_admin);
+ query->oper_image = MLX5_GET(fpga_ctrl, out, flash_select_oper);
+ return 0;
+}
+
+int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
+ u32 *fpga_qpn)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_create_qp_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_create_qp_out)];
+ int ret;
+
+ MLX5_SET(fpga_create_qp_in, in, opcode, MLX5_CMD_OP_FPGA_CREATE_QP);
+ memcpy(MLX5_ADDR_OF(fpga_create_qp_in, in, fpga_qpc), fpga_qpc,
+ MLX5_FLD_SZ_BYTES(fpga_create_qp_in, fpga_qpc));
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_create_qp_out, out, fpga_qpc),
+ MLX5_FLD_SZ_BYTES(fpga_create_qp_out, fpga_qpc));
+ *fpga_qpn = MLX5_GET(fpga_create_qp_out, out, fpga_qpn);
+ return ret;
+}
+
+int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
+ enum mlx5_fpga_qpc_field_select fields,
+ void *fpga_qpc)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_modify_qp_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_modify_qp_out)];
+
+ MLX5_SET(fpga_modify_qp_in, in, opcode, MLX5_CMD_OP_FPGA_MODIFY_QP);
+ MLX5_SET(fpga_modify_qp_in, in, field_select, fields);
+ MLX5_SET(fpga_modify_qp_in, in, fpga_qpn, fpga_qpn);
+ memcpy(MLX5_ADDR_OF(fpga_modify_qp_in, in, fpga_qpc), fpga_qpc,
+ MLX5_FLD_SZ_BYTES(fpga_modify_qp_in, fpga_qpc));
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_fpga_query_qp(struct mlx5_core_dev *dev,
+ u32 fpga_qpn, void *fpga_qpc)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_query_qp_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_query_qp_out)];
+ int ret;
+
+ MLX5_SET(fpga_query_qp_in, in, opcode, MLX5_CMD_OP_FPGA_QUERY_QP);
+ MLX5_SET(fpga_query_qp_in, in, fpga_qpn, fpga_qpn);
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_query_qp_out, out, fpga_qpc),
+ MLX5_FLD_SZ_BYTES(fpga_query_qp_out, fpga_qpc));
+ return ret;
+}
+
+int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_destroy_qp_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_destroy_qp_out)];
+
+ MLX5_SET(fpga_destroy_qp_in, in, opcode, MLX5_CMD_OP_FPGA_DESTROY_QP);
+ MLX5_SET(fpga_destroy_qp_in, in, fpga_qpn, fpga_qpn);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
+ bool clear, struct mlx5_fpga_qp_counters *data)
+{
+ u32 in[MLX5_ST_SZ_DW(fpga_query_qp_counters_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(fpga_query_qp_counters_out)];
+ int ret;
+
+ MLX5_SET(fpga_query_qp_counters_in, in, opcode,
+ MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS);
+ MLX5_SET(fpga_query_qp_counters_in, in, clear, clear);
+ MLX5_SET(fpga_query_qp_counters_in, in, fpga_qpn, fpga_qpn);
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ data->rx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+ rx_ack_packets);
+ data->rx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+ rx_send_packets);
+ data->tx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+ tx_ack_packets);
+ data->tx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+ tx_send_packets);
+ data->rx_total_drop = MLX5_GET64(fpga_query_qp_counters_out, out,
+ rx_total_drop);
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
new file mode 100644
index 000000000..eb8b0fe0b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_FPGA_H__
+#define __MLX5_FPGA_H__
+
+#include <linux/mlx5/driver.h>
+
+enum mlx5_fpga_device_id {
+ MLX5_FPGA_DEVICE_UNKNOWN = 0,
+ MLX5_FPGA_DEVICE_KU040 = 1,
+ MLX5_FPGA_DEVICE_KU060 = 2,
+ MLX5_FPGA_DEVICE_KU060_2 = 3,
+};
+
+enum mlx5_fpga_image {
+ MLX5_FPGA_IMAGE_USER = 0,
+ MLX5_FPGA_IMAGE_FACTORY,
+};
+
+enum mlx5_fpga_status {
+ MLX5_FPGA_STATUS_SUCCESS = 0,
+ MLX5_FPGA_STATUS_FAILURE = 1,
+ MLX5_FPGA_STATUS_IN_PROGRESS = 2,
+ MLX5_FPGA_STATUS_NONE = 0xFFFF,
+};
+
+struct mlx5_fpga_query {
+ enum mlx5_fpga_image admin_image;
+ enum mlx5_fpga_image oper_image;
+ enum mlx5_fpga_status status;
+};
+
+enum mlx5_fpga_qpc_field_select {
+ MLX5_FPGA_QPC_STATE = BIT(0),
+};
+
+struct mlx5_fpga_qp_counters {
+ u64 rx_ack_packets;
+ u64 rx_send_packets;
+ u64 tx_ack_packets;
+ u64 tx_send_packets;
+ u64 rx_total_drop;
+};
+
+int mlx5_fpga_caps(struct mlx5_core_dev *dev);
+int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query);
+int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op);
+int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr,
+ void *buf, bool write);
+int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size);
+
+int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
+ u32 *fpga_qpn);
+int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
+ enum mlx5_fpga_qpc_field_select fields, void *fpga_qpc);
+int mlx5_fpga_query_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, void *fpga_qpc);
+int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
+ bool clear, struct mlx5_fpga_qp_counters *data);
+int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn);
+
+#endif /* __MLX5_FPGA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
new file mode 100644
index 000000000..d8d0b6bd5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -0,0 +1,1047 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <net/addrconf.h>
+#include <linux/etherdevice.h>
+#include <linux/mlx5/vport.h>
+
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+#include "fpga/conn.h"
+
+#define MLX5_FPGA_PKEY 0xFFFF
+#define MLX5_FPGA_PKEY_INDEX 0 /* RoCE PKEY 0xFFFF is always at index 0 */
+#define MLX5_FPGA_RECV_SIZE 2048
+#define MLX5_FPGA_PORT_NUM 1
+#define MLX5_FPGA_CQ_BUDGET 64
+
+static int mlx5_fpga_conn_map_buf(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct device *dma_device;
+ int err = 0;
+
+ if (unlikely(!buf->sg[0].data))
+ goto out;
+
+ dma_device = &conn->fdev->mdev->pdev->dev;
+ buf->sg[0].dma_addr = dma_map_single(dma_device, buf->sg[0].data,
+ buf->sg[0].size, buf->dma_dir);
+ err = dma_mapping_error(dma_device, buf->sg[0].dma_addr);
+ if (unlikely(err)) {
+ mlx5_fpga_warn(conn->fdev, "DMA error on sg 0: %d\n", err);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (!buf->sg[1].data)
+ goto out;
+
+ buf->sg[1].dma_addr = dma_map_single(dma_device, buf->sg[1].data,
+ buf->sg[1].size, buf->dma_dir);
+ err = dma_mapping_error(dma_device, buf->sg[1].dma_addr);
+ if (unlikely(err)) {
+ mlx5_fpga_warn(conn->fdev, "DMA error on sg 1: %d\n", err);
+ dma_unmap_single(dma_device, buf->sg[0].dma_addr,
+ buf->sg[0].size, buf->dma_dir);
+ err = -ENOMEM;
+ }
+
+out:
+ return err;
+}
+
+static void mlx5_fpga_conn_unmap_buf(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct device *dma_device;
+
+ dma_device = &conn->fdev->mdev->pdev->dev;
+ if (buf->sg[1].data)
+ dma_unmap_single(dma_device, buf->sg[1].dma_addr,
+ buf->sg[1].size, buf->dma_dir);
+
+ if (likely(buf->sg[0].data))
+ dma_unmap_single(dma_device, buf->sg[0].dma_addr,
+ buf->sg[0].size, buf->dma_dir);
+}
+
+static int mlx5_fpga_conn_post_recv(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct mlx5_wqe_data_seg *data;
+ unsigned int ix;
+ int err = 0;
+
+ err = mlx5_fpga_conn_map_buf(conn, buf);
+ if (unlikely(err))
+ goto out;
+
+ if (unlikely(conn->qp.rq.pc - conn->qp.rq.cc >= conn->qp.rq.size)) {
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+ return -EBUSY;
+ }
+
+ ix = conn->qp.rq.pc & (conn->qp.rq.size - 1);
+ data = mlx5_wq_cyc_get_wqe(&conn->qp.wq.rq, ix);
+ data->byte_count = cpu_to_be32(buf->sg[0].size);
+ data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key);
+ data->addr = cpu_to_be64(buf->sg[0].dma_addr);
+
+ conn->qp.rq.pc++;
+ conn->qp.rq.bufs[ix] = buf;
+
+ /* Make sure that descriptors are written before doorbell record. */
+ dma_wmb();
+ *conn->qp.wq.rq.db = cpu_to_be32(conn->qp.rq.pc & 0xffff);
+out:
+ return err;
+}
+
+static void mlx5_fpga_conn_notify_hw(struct mlx5_fpga_conn *conn, void *wqe)
+{
+ /* ensure wqe is visible to device before updating doorbell record */
+ dma_wmb();
+ *conn->qp.wq.sq.db = cpu_to_be32(conn->qp.sq.pc);
+ /* Make sure that doorbell record is visible before ringing */
+ wmb();
+ mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET, NULL);
+}
+
+static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct mlx5_wqe_ctrl_seg *ctrl;
+ struct mlx5_wqe_data_seg *data;
+ unsigned int ix, sgi;
+ int size = 1;
+
+ ix = conn->qp.sq.pc & (conn->qp.sq.size - 1);
+
+ ctrl = mlx5_wq_cyc_get_wqe(&conn->qp.wq.sq, ix);
+ data = (void *)(ctrl + 1);
+
+ for (sgi = 0; sgi < ARRAY_SIZE(buf->sg); sgi++) {
+ if (!buf->sg[sgi].data)
+ break;
+ data->byte_count = cpu_to_be32(buf->sg[sgi].size);
+ data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key);
+ data->addr = cpu_to_be64(buf->sg[sgi].dma_addr);
+ data++;
+ size++;
+ }
+
+ ctrl->imm = 0;
+ ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ ctrl->opmod_idx_opcode = cpu_to_be32(((conn->qp.sq.pc & 0xffff) << 8) |
+ MLX5_OPCODE_SEND);
+ ctrl->qpn_ds = cpu_to_be32(size | (conn->qp.mqp.qpn << 8));
+
+ conn->qp.sq.pc++;
+ conn->qp.sq.bufs[ix] = buf;
+ mlx5_fpga_conn_notify_hw(conn, ctrl);
+}
+
+int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ unsigned long flags;
+ int err;
+
+ if (!conn->qp.active)
+ return -ENOTCONN;
+
+ buf->dma_dir = DMA_TO_DEVICE;
+ err = mlx5_fpga_conn_map_buf(conn, buf);
+ if (err)
+ return err;
+
+ spin_lock_irqsave(&conn->qp.sq.lock, flags);
+
+ if (conn->qp.sq.pc - conn->qp.sq.cc >= conn->qp.sq.size) {
+ list_add_tail(&buf->list, &conn->qp.sq.backlog);
+ goto out_unlock;
+ }
+
+ mlx5_fpga_conn_post_send(conn, buf);
+
+out_unlock:
+ spin_unlock_irqrestore(&conn->qp.sq.lock, flags);
+ return err;
+}
+
+static int mlx5_fpga_conn_post_recv_buf(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_dma_buf *buf;
+ int err;
+
+ buf = kzalloc(sizeof(*buf) + MLX5_FPGA_RECV_SIZE, 0);
+ if (!buf)
+ return -ENOMEM;
+
+ buf->sg[0].data = (void *)(buf + 1);
+ buf->sg[0].size = MLX5_FPGA_RECV_SIZE;
+ buf->dma_dir = DMA_FROM_DEVICE;
+
+ err = mlx5_fpga_conn_post_recv(conn, buf);
+ if (err)
+ kfree(buf);
+
+ return err;
+}
+
+static int mlx5_fpga_conn_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
+ struct mlx5_core_mkey *mkey)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ void *mkc;
+ u32 *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ MLX5_SET(mkc, mkc, pd, pdn);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+ err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+
+static void mlx5_fpga_conn_rq_cqe(struct mlx5_fpga_conn *conn,
+ struct mlx5_cqe64 *cqe, u8 status)
+{
+ struct mlx5_fpga_dma_buf *buf;
+ int ix, err;
+
+ ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.rq.size - 1);
+ buf = conn->qp.rq.bufs[ix];
+ conn->qp.rq.bufs[ix] = NULL;
+ conn->qp.rq.cc++;
+
+ if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR)))
+ mlx5_fpga_warn(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n",
+ buf, conn->fpga_qpn, status);
+ else
+ mlx5_fpga_dbg(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n",
+ buf, conn->fpga_qpn, status);
+
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+
+ if (unlikely(status || !conn->qp.active)) {
+ conn->qp.active = false;
+ kfree(buf);
+ return;
+ }
+
+ buf->sg[0].size = be32_to_cpu(cqe->byte_cnt);
+ mlx5_fpga_dbg(conn->fdev, "Message with %u bytes received successfully\n",
+ buf->sg[0].size);
+ conn->recv_cb(conn->cb_arg, buf);
+
+ buf->sg[0].size = MLX5_FPGA_RECV_SIZE;
+ err = mlx5_fpga_conn_post_recv(conn, buf);
+ if (unlikely(err)) {
+ mlx5_fpga_warn(conn->fdev,
+ "Failed to re-post recv buf: %d\n", err);
+ kfree(buf);
+ }
+}
+
+static void mlx5_fpga_conn_sq_cqe(struct mlx5_fpga_conn *conn,
+ struct mlx5_cqe64 *cqe, u8 status)
+{
+ struct mlx5_fpga_dma_buf *buf, *nextbuf;
+ unsigned long flags;
+ int ix;
+
+ spin_lock_irqsave(&conn->qp.sq.lock, flags);
+
+ ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.sq.size - 1);
+ buf = conn->qp.sq.bufs[ix];
+ conn->qp.sq.bufs[ix] = NULL;
+ conn->qp.sq.cc++;
+
+ /* Handle backlog still under the spinlock to ensure message post order */
+ if (unlikely(!list_empty(&conn->qp.sq.backlog))) {
+ if (likely(conn->qp.active)) {
+ nextbuf = list_first_entry(&conn->qp.sq.backlog,
+ struct mlx5_fpga_dma_buf, list);
+ list_del(&nextbuf->list);
+ mlx5_fpga_conn_post_send(conn, nextbuf);
+ }
+ }
+
+ spin_unlock_irqrestore(&conn->qp.sq.lock, flags);
+
+ if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR)))
+ mlx5_fpga_warn(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n",
+ buf, conn->fpga_qpn, status);
+ else
+ mlx5_fpga_dbg(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n",
+ buf, conn->fpga_qpn, status);
+
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+
+ if (likely(buf->complete))
+ buf->complete(conn, conn->fdev, buf, status);
+
+ if (unlikely(status))
+ conn->qp.active = false;
+}
+
+static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn,
+ struct mlx5_cqe64 *cqe)
+{
+ u8 opcode, status = 0;
+
+ opcode = cqe->op_own >> 4;
+
+ switch (opcode) {
+ case MLX5_CQE_REQ_ERR:
+ status = ((struct mlx5_err_cqe *)cqe)->syndrome;
+ /* Fall through */
+ case MLX5_CQE_REQ:
+ mlx5_fpga_conn_sq_cqe(conn, cqe, status);
+ break;
+
+ case MLX5_CQE_RESP_ERR:
+ status = ((struct mlx5_err_cqe *)cqe)->syndrome;
+ /* Fall through */
+ case MLX5_CQE_RESP_SEND:
+ mlx5_fpga_conn_rq_cqe(conn, cqe, status);
+ break;
+ default:
+ mlx5_fpga_warn(conn->fdev, "Unexpected cqe opcode %u\n",
+ opcode);
+ }
+}
+
+static void mlx5_fpga_conn_arm_cq(struct mlx5_fpga_conn *conn)
+{
+ mlx5_cq_arm(&conn->cq.mcq, MLX5_CQ_DB_REQ_NOT,
+ conn->fdev->conn_res.uar->map, conn->cq.wq.cc);
+}
+
+static void mlx5_fpga_conn_cq_event(struct mlx5_core_cq *mcq,
+ enum mlx5_event event)
+{
+ struct mlx5_fpga_conn *conn;
+
+ conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq);
+ mlx5_fpga_warn(conn->fdev, "CQ event %u on CQ #%u\n", event, mcq->cqn);
+}
+
+static void mlx5_fpga_conn_event(struct mlx5_core_qp *mqp, int event)
+{
+ struct mlx5_fpga_conn *conn;
+
+ conn = container_of(mqp, struct mlx5_fpga_conn, qp.mqp);
+ mlx5_fpga_warn(conn->fdev, "QP event %u on QP #%u\n", event, mqp->qpn);
+}
+
+static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn,
+ unsigned int budget)
+{
+ struct mlx5_cqe64 *cqe;
+
+ while (budget) {
+ cqe = mlx5_cqwq_get_cqe(&conn->cq.wq);
+ if (!cqe)
+ break;
+
+ budget--;
+ mlx5_cqwq_pop(&conn->cq.wq);
+ mlx5_fpga_conn_handle_cqe(conn, cqe);
+ mlx5_cqwq_update_db_record(&conn->cq.wq);
+ }
+ if (!budget) {
+ tasklet_schedule(&conn->cq.tasklet);
+ return;
+ }
+
+ mlx5_fpga_dbg(conn->fdev, "Re-arming CQ with cc# %u\n", conn->cq.wq.cc);
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+ mlx5_fpga_conn_arm_cq(conn);
+}
+
+static void mlx5_fpga_conn_cq_tasklet(unsigned long data)
+{
+ struct mlx5_fpga_conn *conn = (void *)data;
+
+ if (unlikely(!conn->qp.active))
+ return;
+ mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET);
+}
+
+static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq)
+{
+ struct mlx5_fpga_conn *conn;
+
+ conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq);
+ if (unlikely(!conn->qp.active))
+ return;
+ mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET);
+}
+
+static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0};
+ struct mlx5_wq_param wqp;
+ struct mlx5_cqe64 *cqe;
+ int inlen, err, eqn;
+ unsigned int irqn;
+ void *cqc, *in;
+ __be64 *pas;
+ u32 i;
+
+ cq_size = roundup_pow_of_two(cq_size);
+ MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(cq_size));
+
+ wqp.buf_numa_node = mdev->priv.numa_node;
+ wqp.db_numa_node = mdev->priv.numa_node;
+
+ err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &conn->cq.wq,
+ &conn->cq.wq_ctrl);
+ if (err)
+ return err;
+
+ for (i = 0; i < mlx5_cqwq_get_size(&conn->cq.wq); i++) {
+ cqe = mlx5_cqwq_get_wqe(&conn->cq.wq, i);
+ cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
+ }
+
+ inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ sizeof(u64) * conn->cq.wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_cqwq;
+ }
+
+ err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn);
+ if (err) {
+ kvfree(in);
+ goto err_cqwq;
+ }
+
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+ MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
+ MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index);
+ MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(cqc, cqc, dbr_addr, conn->cq.wq_ctrl.db.dma);
+
+ pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
+ mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.buf, pas);
+
+ err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen);
+ kvfree(in);
+
+ if (err)
+ goto err_cqwq;
+
+ conn->cq.mcq.cqe_sz = 64;
+ conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db;
+ conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1;
+ *conn->cq.mcq.set_ci_db = 0;
+ *conn->cq.mcq.arm_db = 0;
+ conn->cq.mcq.vector = 0;
+ conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete;
+ conn->cq.mcq.event = mlx5_fpga_conn_cq_event;
+ conn->cq.mcq.irqn = irqn;
+ conn->cq.mcq.uar = fdev->conn_res.uar;
+ tasklet_init(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet,
+ (unsigned long)conn);
+
+ mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn);
+
+ goto out;
+
+err_cqwq:
+ mlx5_wq_destroy(&conn->cq.wq_ctrl);
+out:
+ return err;
+}
+
+static void mlx5_fpga_conn_destroy_cq(struct mlx5_fpga_conn *conn)
+{
+ tasklet_disable(&conn->cq.tasklet);
+ tasklet_kill(&conn->cq.tasklet);
+ mlx5_core_destroy_cq(conn->fdev->mdev, &conn->cq.mcq);
+ mlx5_wq_destroy(&conn->cq.wq_ctrl);
+}
+
+static int mlx5_fpga_conn_create_wq(struct mlx5_fpga_conn *conn, void *qpc)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ struct mlx5_wq_param wqp;
+
+ wqp.buf_numa_node = mdev->priv.numa_node;
+ wqp.db_numa_node = mdev->priv.numa_node;
+
+ return mlx5_wq_qp_create(mdev, &wqp, qpc, &conn->qp.wq,
+ &conn->qp.wq_ctrl);
+}
+
+static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn,
+ unsigned int tx_size, unsigned int rx_size)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {0};
+ void *in = NULL, *qpc;
+ int err, inlen;
+
+ conn->qp.rq.pc = 0;
+ conn->qp.rq.cc = 0;
+ conn->qp.rq.size = roundup_pow_of_two(rx_size);
+ conn->qp.sq.pc = 0;
+ conn->qp.sq.cc = 0;
+ conn->qp.sq.size = roundup_pow_of_two(tx_size);
+
+ MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+ MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(conn->qp.rq.size));
+ MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(conn->qp.sq.size));
+ err = mlx5_fpga_conn_create_wq(conn, temp_qpc);
+ if (err)
+ goto out;
+
+ conn->qp.rq.bufs = kvcalloc(conn->qp.rq.size,
+ sizeof(conn->qp.rq.bufs[0]),
+ GFP_KERNEL);
+ if (!conn->qp.rq.bufs) {
+ err = -ENOMEM;
+ goto err_wq;
+ }
+
+ conn->qp.sq.bufs = kvcalloc(conn->qp.sq.size,
+ sizeof(conn->qp.sq.bufs[0]),
+ GFP_KERNEL);
+ if (!conn->qp.sq.bufs) {
+ err = -ENOMEM;
+ goto err_rq_bufs;
+ }
+
+ inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+ MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
+ conn->qp.wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_sq_bufs;
+ }
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, uar_page, fdev->conn_res.uar->index);
+ MLX5_SET(qpc, qpc, log_page_size,
+ conn->qp.wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(qpc, qpc, fre, 1);
+ MLX5_SET(qpc, qpc, rlky, 1);
+ MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, pd, fdev->conn_res.pdn);
+ MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+ MLX5_SET(qpc, qpc, log_rq_size, ilog2(conn->qp.rq.size));
+ MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
+ MLX5_SET(qpc, qpc, log_sq_size, ilog2(conn->qp.sq.size));
+ MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn);
+ MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
+ MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma);
+ if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
+ MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
+
+ mlx5_fill_page_frag_array(&conn->qp.wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas));
+
+ err = mlx5_core_create_qp(mdev, &conn->qp.mqp, in, inlen);
+ if (err)
+ goto err_sq_bufs;
+
+ conn->qp.mqp.event = mlx5_fpga_conn_event;
+ mlx5_fpga_dbg(fdev, "Created QP #0x%x\n", conn->qp.mqp.qpn);
+
+ goto out;
+
+err_sq_bufs:
+ kvfree(conn->qp.sq.bufs);
+err_rq_bufs:
+ kvfree(conn->qp.rq.bufs);
+err_wq:
+ mlx5_wq_destroy(&conn->qp.wq_ctrl);
+out:
+ kvfree(in);
+ return err;
+}
+
+static void mlx5_fpga_conn_free_recv_bufs(struct mlx5_fpga_conn *conn)
+{
+ int ix;
+
+ for (ix = 0; ix < conn->qp.rq.size; ix++) {
+ if (!conn->qp.rq.bufs[ix])
+ continue;
+ mlx5_fpga_conn_unmap_buf(conn, conn->qp.rq.bufs[ix]);
+ kfree(conn->qp.rq.bufs[ix]);
+ conn->qp.rq.bufs[ix] = NULL;
+ }
+}
+
+static void mlx5_fpga_conn_flush_send_bufs(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_dma_buf *buf, *temp;
+ int ix;
+
+ for (ix = 0; ix < conn->qp.sq.size; ix++) {
+ buf = conn->qp.sq.bufs[ix];
+ if (!buf)
+ continue;
+ conn->qp.sq.bufs[ix] = NULL;
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+ if (!buf->complete)
+ continue;
+ buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR);
+ }
+ list_for_each_entry_safe(buf, temp, &conn->qp.sq.backlog, list) {
+ mlx5_fpga_conn_unmap_buf(conn, buf);
+ if (!buf->complete)
+ continue;
+ buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR);
+ }
+}
+
+static void mlx5_fpga_conn_destroy_qp(struct mlx5_fpga_conn *conn)
+{
+ mlx5_core_destroy_qp(conn->fdev->mdev, &conn->qp.mqp);
+ mlx5_fpga_conn_free_recv_bufs(conn);
+ mlx5_fpga_conn_flush_send_bufs(conn);
+ kvfree(conn->qp.sq.bufs);
+ kvfree(conn->qp.rq.bufs);
+ mlx5_wq_destroy(&conn->qp.wq_ctrl);
+}
+
+static inline int mlx5_fpga_conn_reset_qp(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_core_dev *mdev = conn->fdev->mdev;
+
+ mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to RST\n", conn->qp.mqp.qpn);
+
+ return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, NULL,
+ &conn->qp.mqp);
+}
+
+static inline int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 *qpc = NULL;
+ int err;
+
+ mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to INIT\n", conn->qp.mqp.qpn);
+
+ qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
+ if (!qpc) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX);
+ MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM);
+ MLX5_SET(qpc, qpc, pd, conn->fdev->conn_res.pdn);
+ MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn);
+ MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
+ MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma);
+
+ err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc,
+ &conn->qp.mqp);
+ if (err) {
+ mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
+ goto out;
+ }
+
+out:
+ kfree(qpc);
+ return err;
+}
+
+static inline int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 *qpc = NULL;
+ int err;
+
+ mlx5_fpga_dbg(conn->fdev, "QP RTR\n");
+
+ qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
+ if (!qpc) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_1K_BYTES);
+ MLX5_SET(qpc, qpc, log_msg_max, (u8)MLX5_CAP_GEN(mdev, log_max_msg));
+ MLX5_SET(qpc, qpc, remote_qpn, conn->fpga_qpn);
+ MLX5_SET(qpc, qpc, next_rcv_psn,
+ MLX5_GET(fpga_qpc, conn->fpga_qpc, next_send_psn));
+ MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX);
+ MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM);
+ ether_addr_copy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
+ MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_mac_47_32));
+ MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
+ MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port));
+ MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
+ conn->qp.sgid_index);
+ MLX5_SET(qpc, qpc, primary_address_path.hop_limit, 0);
+ memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
+ MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_ip),
+ MLX5_FLD_SZ_BYTES(qpc, primary_address_path.rgid_rip));
+
+ err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc,
+ &conn->qp.mqp);
+ if (err) {
+ mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
+ goto out;
+ }
+
+out:
+ kfree(qpc);
+ return err;
+}
+
+static inline int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ u32 *qpc = NULL;
+ u32 opt_mask;
+ int err;
+
+ mlx5_fpga_dbg(conn->fdev, "QP RTS\n");
+
+ qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
+ if (!qpc) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(qpc, qpc, log_ack_req_freq, 8);
+ MLX5_SET(qpc, qpc, min_rnr_nak, 0x12);
+ MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x12); /* ~1.07s */
+ MLX5_SET(qpc, qpc, next_send_psn,
+ MLX5_GET(fpga_qpc, conn->fpga_qpc, next_rcv_psn));
+ MLX5_SET(qpc, qpc, retry_count, 7);
+ MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */
+
+ opt_mask = MLX5_QP_OPTPAR_RNR_TIMEOUT;
+ err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, opt_mask, qpc,
+ &conn->qp.mqp);
+ if (err) {
+ mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
+ goto out;
+ }
+
+out:
+ kfree(qpc);
+ return err;
+}
+
+static int mlx5_fpga_conn_connect(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ int err;
+
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_ACTIVE);
+ err = mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn,
+ MLX5_FPGA_QPC_STATE, &conn->fpga_qpc);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to activate FPGA RC QP: %d\n", err);
+ goto out;
+ }
+
+ err = mlx5_fpga_conn_reset_qp(conn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to change QP state to reset\n");
+ goto err_fpga_qp;
+ }
+
+ err = mlx5_fpga_conn_init_qp(conn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to modify QP from RESET to INIT\n");
+ goto err_fpga_qp;
+ }
+ conn->qp.active = true;
+
+ while (!mlx5_fpga_conn_post_recv_buf(conn))
+ ;
+
+ err = mlx5_fpga_conn_rtr_qp(conn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to change QP state from INIT to RTR\n");
+ goto err_recv_bufs;
+ }
+
+ err = mlx5_fpga_conn_rts_qp(conn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to change QP state from RTR to RTS\n");
+ goto err_recv_bufs;
+ }
+ goto out;
+
+err_recv_bufs:
+ mlx5_fpga_conn_free_recv_bufs(conn);
+err_fpga_qp:
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT);
+ if (mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn,
+ MLX5_FPGA_QPC_STATE, &conn->fpga_qpc))
+ mlx5_fpga_err(fdev, "Failed to revert FPGA QP to INIT\n");
+out:
+ return err;
+}
+
+struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_conn_attr *attr,
+ enum mlx5_ifc_fpga_qp_type qp_type)
+{
+ struct mlx5_fpga_conn *ret, *conn;
+ u8 *remote_mac, *remote_ip;
+ int err;
+
+ if (!attr->recv_cb)
+ return ERR_PTR(-EINVAL);
+
+ conn = kzalloc(sizeof(*conn), GFP_KERNEL);
+ if (!conn)
+ return ERR_PTR(-ENOMEM);
+
+ conn->fdev = fdev;
+ INIT_LIST_HEAD(&conn->qp.sq.backlog);
+
+ spin_lock_init(&conn->qp.sq.lock);
+
+ conn->recv_cb = attr->recv_cb;
+ conn->cb_arg = attr->cb_arg;
+
+ remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32);
+ err = mlx5_query_nic_vport_mac_address(fdev->mdev, 0, remote_mac);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err;
+ }
+
+ /* Build Modified EUI-64 IPv6 address from the MAC address */
+ remote_ip = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_ip);
+ remote_ip[0] = 0xfe;
+ remote_ip[1] = 0x80;
+ addrconf_addr_eui48(&remote_ip[8], remote_mac);
+
+ err = mlx5_core_reserved_gid_alloc(fdev->mdev, &conn->qp.sgid_index);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to allocate SGID: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err;
+ }
+
+ err = mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index,
+ MLX5_ROCE_VERSION_2,
+ MLX5_ROCE_L3_TYPE_IPV6,
+ remote_ip, remote_mac, true, 0,
+ MLX5_FPGA_PORT_NUM);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to set SGID: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err_rsvd_gid;
+ }
+ mlx5_fpga_dbg(fdev, "Reserved SGID index %u\n", conn->qp.sgid_index);
+
+ /* Allow for one cqe per rx/tx wqe, plus one cqe for the next wqe,
+ * created during processing of the cqe
+ */
+ err = mlx5_fpga_conn_create_cq(conn,
+ (attr->tx_size + attr->rx_size) * 2);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to create CQ: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err_gid;
+ }
+
+ mlx5_fpga_conn_arm_cq(conn);
+
+ err = mlx5_fpga_conn_create_qp(conn, attr->tx_size, attr->rx_size);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to create QP: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err_cq;
+ }
+
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, qp_type, qp_type);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, st, MLX5_FPGA_QPC_ST_RC);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, ether_type, ETH_P_8021Q);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, vid, 0);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, next_rcv_psn, 1);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, next_send_psn, 0);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, pkey, MLX5_FPGA_PKEY);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, remote_qpn, conn->qp.mqp.qpn);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, rnr_retry, 7);
+ MLX5_SET(fpga_qpc, conn->fpga_qpc, retry_count, 7);
+
+ err = mlx5_fpga_create_qp(fdev->mdev, &conn->fpga_qpc,
+ &conn->fpga_qpn);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to create FPGA RC QP: %d\n", err);
+ ret = ERR_PTR(err);
+ goto err_qp;
+ }
+
+ err = mlx5_fpga_conn_connect(conn);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto err_conn;
+ }
+
+ mlx5_fpga_dbg(fdev, "FPGA QPN is %u\n", conn->fpga_qpn);
+ ret = conn;
+ goto out;
+
+err_conn:
+ mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn);
+err_qp:
+ mlx5_fpga_conn_destroy_qp(conn);
+err_cq:
+ mlx5_fpga_conn_destroy_cq(conn);
+err_gid:
+ mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, 0, 0, NULL,
+ NULL, false, 0, MLX5_FPGA_PORT_NUM);
+err_rsvd_gid:
+ mlx5_core_reserved_gid_free(fdev->mdev, conn->qp.sgid_index);
+err:
+ kfree(conn);
+out:
+ return ret;
+}
+
+void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn)
+{
+ struct mlx5_fpga_device *fdev = conn->fdev;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+ int err = 0;
+
+ conn->qp.active = false;
+ tasklet_disable(&conn->cq.tasklet);
+ synchronize_irq(conn->cq.mcq.irqn);
+
+ mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn);
+ err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, NULL,
+ &conn->qp.mqp);
+ if (err)
+ mlx5_fpga_warn(fdev, "qp_modify 2ERR failed: %d\n", err);
+ mlx5_fpga_conn_destroy_qp(conn);
+ mlx5_fpga_conn_destroy_cq(conn);
+
+ mlx5_core_roce_gid_set(conn->fdev->mdev, conn->qp.sgid_index, 0, 0,
+ NULL, NULL, false, 0, MLX5_FPGA_PORT_NUM);
+ mlx5_core_reserved_gid_free(conn->fdev->mdev, conn->qp.sgid_index);
+ kfree(conn);
+}
+
+int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev)
+{
+ int err;
+
+ err = mlx5_nic_vport_enable_roce(fdev->mdev);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to enable RoCE: %d\n", err);
+ goto out;
+ }
+
+ fdev->conn_res.uar = mlx5_get_uars_page(fdev->mdev);
+ if (IS_ERR(fdev->conn_res.uar)) {
+ err = PTR_ERR(fdev->conn_res.uar);
+ mlx5_fpga_err(fdev, "get_uars_page failed, %d\n", err);
+ goto err_roce;
+ }
+ mlx5_fpga_dbg(fdev, "Allocated UAR index %u\n",
+ fdev->conn_res.uar->index);
+
+ err = mlx5_core_alloc_pd(fdev->mdev, &fdev->conn_res.pdn);
+ if (err) {
+ mlx5_fpga_err(fdev, "alloc pd failed, %d\n", err);
+ goto err_uar;
+ }
+ mlx5_fpga_dbg(fdev, "Allocated PD %u\n", fdev->conn_res.pdn);
+
+ err = mlx5_fpga_conn_create_mkey(fdev->mdev, fdev->conn_res.pdn,
+ &fdev->conn_res.mkey);
+ if (err) {
+ mlx5_fpga_err(fdev, "create mkey failed, %d\n", err);
+ goto err_dealloc_pd;
+ }
+ mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey.key);
+
+ return 0;
+
+err_dealloc_pd:
+ mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn);
+err_uar:
+ mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar);
+err_roce:
+ mlx5_nic_vport_disable_roce(fdev->mdev);
+out:
+ return err;
+}
+
+void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev)
+{
+ mlx5_core_destroy_mkey(fdev->mdev, &fdev->conn_res.mkey);
+ mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn);
+ mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar);
+ mlx5_nic_vport_disable_roce(fdev->mdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
new file mode 100644
index 000000000..634ae10e2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_FPGA_CONN_H__
+#define __MLX5_FPGA_CONN_H__
+
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
+
+#include "fpga/core.h"
+#include "fpga/sdk.h"
+#include "wq.h"
+
+struct mlx5_fpga_conn {
+ struct mlx5_fpga_device *fdev;
+
+ void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf);
+ void *cb_arg;
+
+ /* FPGA QP */
+ u32 fpga_qpc[MLX5_ST_SZ_DW(fpga_qpc)];
+ u32 fpga_qpn;
+
+ /* CQ */
+ struct {
+ struct mlx5_cqwq wq;
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5_core_cq mcq;
+ struct tasklet_struct tasklet;
+ } cq;
+
+ /* QP */
+ struct {
+ bool active;
+ int sgid_index;
+ struct mlx5_wq_qp wq;
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5_core_qp mqp;
+ struct {
+ spinlock_t lock; /* Protects all SQ state */
+ unsigned int pc;
+ unsigned int cc;
+ unsigned int size;
+ struct mlx5_fpga_dma_buf **bufs;
+ struct list_head backlog;
+ } sq;
+ struct {
+ unsigned int pc;
+ unsigned int cc;
+ unsigned int size;
+ struct mlx5_fpga_dma_buf **bufs;
+ } rq;
+ } qp;
+};
+
+int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev);
+void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev);
+struct mlx5_fpga_conn *
+mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_conn_attr *attr,
+ enum mlx5_ifc_fpga_qp_type qp_type);
+void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn);
+int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf);
+
+#endif /* __MLX5_FPGA_CONN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
new file mode 100644
index 000000000..310f9e7d8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+#include "fpga/core.h"
+#include "fpga/conn.h"
+
+static const char *const mlx5_fpga_error_strings[] = {
+ "Null Syndrome",
+ "Corrupted DDR",
+ "Flash Timeout",
+ "Internal Link Error",
+ "Watchdog HW Failure",
+ "I2C Failure",
+ "Image Changed",
+ "Temperature Critical",
+};
+
+static const char * const mlx5_fpga_qp_error_strings[] = {
+ "Null Syndrome",
+ "Retry Counter Expired",
+ "RNR Expired",
+};
+static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void)
+{
+ struct mlx5_fpga_device *fdev = NULL;
+
+ fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
+ if (!fdev)
+ return NULL;
+
+ spin_lock_init(&fdev->state_lock);
+ fdev->state = MLX5_FPGA_STATUS_NONE;
+ return fdev;
+}
+
+static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
+{
+ switch (image) {
+ case MLX5_FPGA_IMAGE_USER:
+ return "user";
+ case MLX5_FPGA_IMAGE_FACTORY:
+ return "factory";
+ default:
+ return "unknown";
+ }
+}
+
+static const char *mlx5_fpga_device_name(u32 device)
+{
+ switch (device) {
+ case MLX5_FPGA_DEVICE_KU040:
+ return "ku040";
+ case MLX5_FPGA_DEVICE_KU060:
+ return "ku060";
+ case MLX5_FPGA_DEVICE_KU060_2:
+ return "ku060_2";
+ case MLX5_FPGA_DEVICE_UNKNOWN:
+ default:
+ return "unknown";
+ }
+}
+
+static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
+{
+ struct mlx5_fpga_query query;
+ int err;
+
+ err = mlx5_fpga_query(fdev->mdev, &query);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to query status: %d\n", err);
+ return err;
+ }
+
+ fdev->last_admin_image = query.admin_image;
+ fdev->last_oper_image = query.oper_image;
+
+ mlx5_fpga_dbg(fdev, "Status %u; Admin image %u; Oper image %u\n",
+ query.status, query.admin_image, query.oper_image);
+
+ if (query.status != MLX5_FPGA_STATUS_SUCCESS) {
+ mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
+ mlx5_fpga_image_name(fdev->last_oper_image),
+ query.status);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
+{
+ int err;
+ struct mlx5_core_dev *mdev = fdev->mdev;
+
+ err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
+ return err;
+ }
+ err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
+ return err;
+ }
+ err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
+ return err;
+ }
+ return 0;
+}
+
+int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ unsigned int max_num_qps;
+ unsigned long flags;
+ u32 fpga_device_id;
+ int err;
+
+ if (!fdev)
+ return 0;
+
+ err = mlx5_fpga_device_load_check(fdev);
+ if (err)
+ goto out;
+
+ err = mlx5_fpga_caps(fdev->mdev);
+ if (err)
+ goto out;
+
+ fpga_device_id = MLX5_CAP_FPGA(fdev->mdev, fpga_device);
+ mlx5_fpga_info(fdev, "%s:%u; %s image, version %u; SBU %06x:%04x version %d\n",
+ mlx5_fpga_device_name(fpga_device_id),
+ fpga_device_id,
+ mlx5_fpga_image_name(fdev->last_oper_image),
+ MLX5_CAP_FPGA(fdev->mdev, image_version),
+ MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id),
+ MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id),
+ MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version));
+
+ max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
+ if (!max_num_qps) {
+ mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n");
+ err = -ENOTSUPP;
+ goto out;
+ }
+
+ err = mlx5_core_reserve_gids(mdev, max_num_qps);
+ if (err)
+ goto out;
+
+ err = mlx5_fpga_conn_device_init(fdev);
+ if (err)
+ goto err_rsvd_gid;
+
+ if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
+ err = mlx5_fpga_device_brb(fdev);
+ if (err)
+ goto err_conn_init;
+ }
+
+ goto out;
+
+err_conn_init:
+ mlx5_fpga_conn_device_cleanup(fdev);
+
+err_rsvd_gid:
+ mlx5_core_unreserve_gids(mdev, max_num_qps);
+out:
+ spin_lock_irqsave(&fdev->state_lock, flags);
+ fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
+ spin_unlock_irqrestore(&fdev->state_lock, flags);
+ return err;
+}
+
+int mlx5_fpga_init(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = NULL;
+
+ if (!MLX5_CAP_GEN(mdev, fpga)) {
+ mlx5_core_dbg(mdev, "FPGA capability not present\n");
+ return 0;
+ }
+
+ mlx5_core_dbg(mdev, "Initializing FPGA\n");
+
+ fdev = mlx5_fpga_device_alloc();
+ if (!fdev)
+ return -ENOMEM;
+
+ fdev->mdev = mdev;
+ mdev->fpga = fdev;
+
+ return 0;
+}
+
+void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ unsigned int max_num_qps;
+ unsigned long flags;
+ int err;
+
+ if (!fdev)
+ return;
+
+ spin_lock_irqsave(&fdev->state_lock, flags);
+ if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
+ spin_unlock_irqrestore(&fdev->state_lock, flags);
+ return;
+ }
+ fdev->state = MLX5_FPGA_STATUS_NONE;
+ spin_unlock_irqrestore(&fdev->state_lock, flags);
+
+ if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
+ err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
+ if (err)
+ mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
+ err);
+ }
+
+ mlx5_fpga_conn_device_cleanup(fdev);
+ max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
+ mlx5_core_unreserve_gids(mdev, max_num_qps);
+}
+
+void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+
+ mlx5_fpga_device_stop(mdev);
+ kfree(fdev);
+ mdev->fpga = NULL;
+}
+
+static const char *mlx5_fpga_syndrome_to_string(u8 syndrome)
+{
+ if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings))
+ return mlx5_fpga_error_strings[syndrome];
+ return "Unknown";
+}
+
+static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
+{
+ if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings))
+ return mlx5_fpga_qp_error_strings[syndrome];
+ return "Unknown";
+}
+
+void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ const char *event_name;
+ bool teardown = false;
+ unsigned long flags;
+ u8 syndrome;
+
+ switch (event) {
+ case MLX5_EVENT_TYPE_FPGA_ERROR:
+ syndrome = MLX5_GET(fpga_error_event, data, syndrome);
+ event_name = mlx5_fpga_syndrome_to_string(syndrome);
+ break;
+ case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
+ syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome);
+ event_name = mlx5_fpga_qp_syndrome_to_string(syndrome);
+ break;
+ default:
+ mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n",
+ event);
+ return;
+ }
+
+ spin_lock_irqsave(&fdev->state_lock, flags);
+ switch (fdev->state) {
+ case MLX5_FPGA_STATUS_SUCCESS:
+ mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name);
+ teardown = true;
+ break;
+ default:
+ mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n",
+ syndrome, event_name);
+ }
+ spin_unlock_irqrestore(&fdev->state_lock, flags);
+ /* We tear-down the card's interfaces and functionality because
+ * the FPGA bump-on-the-wire is misbehaving and we lose ability
+ * to communicate with the network. User may still be able to
+ * recover by re-programming or debugging the FPGA
+ */
+ if (teardown)
+ mlx5_trigger_health_work(fdev->mdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
new file mode 100644
index 000000000..3e2355c8d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_FPGA_CORE_H__
+#define __MLX5_FPGA_CORE_H__
+
+#ifdef CONFIG_MLX5_FPGA
+
+#include "fpga/cmd.h"
+
+/* Represents an Innova device */
+struct mlx5_fpga_device {
+ struct mlx5_core_dev *mdev;
+ spinlock_t state_lock; /* Protects state transitions */
+ enum mlx5_fpga_status state;
+ enum mlx5_fpga_image last_admin_image;
+ enum mlx5_fpga_image last_oper_image;
+
+ /* QP Connection resources */
+ struct {
+ u32 pdn;
+ struct mlx5_core_mkey mkey;
+ struct mlx5_uars_page *uar;
+ } conn_res;
+
+ struct mlx5_fpga_ipsec *ipsec;
+ struct mlx5_fpga_tls *tls;
+};
+
+#define mlx5_fpga_dbg(__adev, format, ...) \
+ dev_dbg(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##__VA_ARGS__)
+
+#define mlx5_fpga_err(__adev, format, ...) \
+ dev_err(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##__VA_ARGS__)
+
+#define mlx5_fpga_warn(__adev, format, ...) \
+ dev_warn(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##__VA_ARGS__)
+
+#define mlx5_fpga_warn_ratelimited(__adev, format, ...) \
+ dev_warn_ratelimited(&(__adev)->mdev->pdev->dev, "FPGA: %s:%d: " \
+ format, __func__, __LINE__, ##__VA_ARGS__)
+
+#define mlx5_fpga_notice(__adev, format, ...) \
+ dev_notice(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__)
+
+#define mlx5_fpga_info(__adev, format, ...) \
+ dev_info(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__)
+
+int mlx5_fpga_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev);
+int mlx5_fpga_device_start(struct mlx5_core_dev *mdev);
+void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev);
+void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data);
+
+#else
+
+static inline int mlx5_fpga_init(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
+{
+}
+
+static inline int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
+{
+}
+
+static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event,
+ void *data)
+{
+}
+
+#endif
+
+#endif /* __MLX5_FPGA_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
new file mode 100644
index 000000000..715ccafc9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -0,0 +1,1533 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/rhashtable.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs_helpers.h>
+#include <linux/mlx5/fs.h>
+#include <linux/rbtree.h>
+
+#include "mlx5_core.h"
+#include "fs_cmd.h"
+#include "fpga/ipsec.h"
+#include "fpga/sdk.h"
+#include "fpga/core.h"
+
+enum mlx5_fpga_ipsec_cmd_status {
+ MLX5_FPGA_IPSEC_CMD_PENDING,
+ MLX5_FPGA_IPSEC_CMD_SEND_FAIL,
+ MLX5_FPGA_IPSEC_CMD_COMPLETE,
+};
+
+struct mlx5_fpga_ipsec_cmd_context {
+ struct mlx5_fpga_dma_buf buf;
+ enum mlx5_fpga_ipsec_cmd_status status;
+ struct mlx5_ifc_fpga_ipsec_cmd_resp resp;
+ int status_code;
+ struct completion complete;
+ struct mlx5_fpga_device *dev;
+ struct list_head list; /* Item in pending_cmds */
+ u8 command[0];
+};
+
+struct mlx5_fpga_esp_xfrm;
+
+struct mlx5_fpga_ipsec_sa_ctx {
+ struct rhash_head hash;
+ struct mlx5_ifc_fpga_ipsec_sa hw_sa;
+ struct mlx5_core_dev *dev;
+ struct mlx5_fpga_esp_xfrm *fpga_xfrm;
+};
+
+struct mlx5_fpga_esp_xfrm {
+ unsigned int num_rules;
+ struct mlx5_fpga_ipsec_sa_ctx *sa_ctx;
+ struct mutex lock; /* xfrm lock */
+ struct mlx5_accel_esp_xfrm accel_xfrm;
+};
+
+struct mlx5_fpga_ipsec_rule {
+ struct rb_node node;
+ struct fs_fte *fte;
+ struct mlx5_fpga_ipsec_sa_ctx *ctx;
+};
+
+static const struct rhashtable_params rhash_sa = {
+ /* Keep out "cmd" field from the key as it's
+ * value is not constant during the lifetime
+ * of the key object.
+ */
+ .key_len = FIELD_SIZEOF(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) -
+ FIELD_SIZEOF(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd),
+ .key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) +
+ FIELD_SIZEOF(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd),
+ .head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash),
+ .automatic_shrinking = true,
+ .min_size = 1,
+};
+
+struct mlx5_fpga_ipsec {
+ struct mlx5_fpga_device *fdev;
+ struct list_head pending_cmds;
+ spinlock_t pending_cmds_lock; /* Protects pending_cmds */
+ u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)];
+ struct mlx5_fpga_conn *conn;
+
+ struct notifier_block fs_notifier_ingress_bypass;
+ struct notifier_block fs_notifier_egress;
+
+ /* Map hardware SA --> SA context
+ * (mlx5_fpga_ipsec_sa) (mlx5_fpga_ipsec_sa_ctx)
+ * We will use this hash to avoid SAs duplication in fpga which
+ * aren't allowed
+ */
+ struct rhashtable sa_hash; /* hw_sa -> mlx5_fpga_ipsec_sa_ctx */
+ struct mutex sa_hash_lock;
+
+ /* Tree holding all rules for this fpga device
+ * Key for searching a rule (mlx5_fpga_ipsec_rule) is (ft, id)
+ */
+ struct rb_root rules_rb;
+ struct mutex rules_rb_lock; /* rules lock */
+};
+
+static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
+{
+ if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
+ return false;
+
+ if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) !=
+ MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX)
+ return false;
+
+ if (MLX5_CAP_FPGA(mdev, sandbox_product_id) !=
+ MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC)
+ return false;
+
+ return true;
+}
+
+static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_dma_buf *buf,
+ u8 status)
+{
+ struct mlx5_fpga_ipsec_cmd_context *context;
+
+ if (status) {
+ context = container_of(buf, struct mlx5_fpga_ipsec_cmd_context,
+ buf);
+ mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n",
+ status);
+ context->status = MLX5_FPGA_IPSEC_CMD_SEND_FAIL;
+ complete(&context->complete);
+ }
+}
+
+static inline
+int syndrome_to_errno(enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome)
+{
+ switch (syndrome) {
+ case MLX5_FPGA_IPSEC_RESPONSE_SUCCESS:
+ return 0;
+ case MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE:
+ return -EEXIST;
+ case MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST:
+ return -EINVAL;
+ case MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE:
+ return -EIO;
+ }
+ return -EIO;
+}
+
+static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
+{
+ struct mlx5_ifc_fpga_ipsec_cmd_resp *resp = buf->sg[0].data;
+ struct mlx5_fpga_ipsec_cmd_context *context;
+ enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome;
+ struct mlx5_fpga_device *fdev = cb_arg;
+ unsigned long flags;
+
+ if (buf->sg[0].size < sizeof(*resp)) {
+ mlx5_fpga_warn(fdev, "Short receive from FPGA IPSec: %u < %zu bytes\n",
+ buf->sg[0].size, sizeof(*resp));
+ return;
+ }
+
+ mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x\n",
+ ntohl(resp->syndrome));
+
+ spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
+ context = list_first_entry_or_null(&fdev->ipsec->pending_cmds,
+ struct mlx5_fpga_ipsec_cmd_context,
+ list);
+ if (context)
+ list_del(&context->list);
+ spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+
+ if (!context) {
+ mlx5_fpga_warn(fdev, "Received IPSec offload response without pending command request\n");
+ return;
+ }
+ mlx5_fpga_dbg(fdev, "Handling response for %p\n", context);
+
+ syndrome = ntohl(resp->syndrome);
+ context->status_code = syndrome_to_errno(syndrome);
+ context->status = MLX5_FPGA_IPSEC_CMD_COMPLETE;
+ memcpy(&context->resp, resp, sizeof(*resp));
+
+ if (context->status_code)
+ mlx5_fpga_warn(fdev, "IPSec command failed with syndrome %08x\n",
+ syndrome);
+
+ complete(&context->complete);
+}
+
+static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev,
+ const void *cmd, int cmd_size)
+{
+ struct mlx5_fpga_ipsec_cmd_context *context;
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ unsigned long flags;
+ int res;
+
+ if (!fdev || !fdev->ipsec)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (cmd_size & 3)
+ return ERR_PTR(-EINVAL);
+
+ context = kzalloc(sizeof(*context) + cmd_size, GFP_ATOMIC);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ context->status = MLX5_FPGA_IPSEC_CMD_PENDING;
+ context->dev = fdev;
+ context->buf.complete = mlx5_fpga_ipsec_send_complete;
+ init_completion(&context->complete);
+ memcpy(&context->command, cmd, cmd_size);
+ context->buf.sg[0].size = cmd_size;
+ context->buf.sg[0].data = &context->command;
+
+ spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
+ res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf);
+ if (!res)
+ list_add_tail(&context->list, &fdev->ipsec->pending_cmds);
+ spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+
+ if (res) {
+ mlx5_fpga_warn(fdev, "Failed to send IPSec command: %d\n", res);
+ kfree(context);
+ return ERR_PTR(res);
+ }
+
+ /* Context should be freed by the caller after completion. */
+ return context;
+}
+
+static int mlx5_fpga_ipsec_cmd_wait(void *ctx)
+{
+ struct mlx5_fpga_ipsec_cmd_context *context = ctx;
+ unsigned long timeout =
+ msecs_to_jiffies(MLX5_FPGA_CMD_TIMEOUT_MSEC);
+ int res;
+
+ res = wait_for_completion_timeout(&context->complete, timeout);
+ if (!res) {
+ mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n");
+ return -ETIMEDOUT;
+ }
+
+ if (context->status == MLX5_FPGA_IPSEC_CMD_COMPLETE)
+ res = context->status_code;
+ else
+ res = -EIO;
+
+ return res;
+}
+
+static inline bool is_v2_sadb_supported(struct mlx5_fpga_ipsec *fipsec)
+{
+ if (MLX5_GET(ipsec_extended_cap, fipsec->caps, v2_command))
+ return true;
+ return false;
+}
+
+static int mlx5_fpga_ipsec_update_hw_sa(struct mlx5_fpga_device *fdev,
+ struct mlx5_ifc_fpga_ipsec_sa *hw_sa,
+ int opcode)
+{
+ struct mlx5_core_dev *dev = fdev->mdev;
+ struct mlx5_ifc_fpga_ipsec_sa *sa;
+ struct mlx5_fpga_ipsec_cmd_context *cmd_context;
+ size_t sa_cmd_size;
+ int err;
+
+ hw_sa->ipsec_sa_v1.cmd = htonl(opcode);
+ if (is_v2_sadb_supported(fdev->ipsec))
+ sa_cmd_size = sizeof(*hw_sa);
+ else
+ sa_cmd_size = sizeof(hw_sa->ipsec_sa_v1);
+
+ cmd_context = (struct mlx5_fpga_ipsec_cmd_context *)
+ mlx5_fpga_ipsec_cmd_exec(dev, hw_sa, sa_cmd_size);
+ if (IS_ERR(cmd_context))
+ return PTR_ERR(cmd_context);
+
+ err = mlx5_fpga_ipsec_cmd_wait(cmd_context);
+ if (err)
+ goto out;
+
+ sa = (struct mlx5_ifc_fpga_ipsec_sa *)&cmd_context->command;
+ if (sa->ipsec_sa_v1.sw_sa_handle != cmd_context->resp.sw_sa_handle) {
+ mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
+ ntohl(sa->ipsec_sa_v1.sw_sa_handle),
+ ntohl(cmd_context->resp.sw_sa_handle));
+ err = -EIO;
+ }
+
+out:
+ kfree(cmd_context);
+ return err;
+}
+
+u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ u32 ret = 0;
+
+ if (mlx5_fpga_is_ipsec_device(mdev)) {
+ ret |= MLX5_ACCEL_IPSEC_CAP_DEVICE;
+ ret |= MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA;
+ } else {
+ return ret;
+ }
+
+ if (!fdev->ipsec)
+ return ret;
+
+ if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp))
+ ret |= MLX5_ACCEL_IPSEC_CAP_ESP;
+
+ if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6))
+ ret |= MLX5_ACCEL_IPSEC_CAP_IPV6;
+
+ if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso))
+ ret |= MLX5_ACCEL_IPSEC_CAP_LSO;
+
+ if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer))
+ ret |= MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER;
+
+ if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esn)) {
+ ret |= MLX5_ACCEL_IPSEC_CAP_ESN;
+ ret |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN;
+ }
+
+ return ret;
+}
+
+unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+
+ if (!fdev || !fdev->ipsec)
+ return 0;
+
+ return MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+ number_of_ipsec_counters);
+}
+
+int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+ unsigned int counters_count)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ unsigned int i;
+ __be32 *data;
+ u32 count;
+ u64 addr;
+ int ret;
+
+ if (!fdev || !fdev->ipsec)
+ return 0;
+
+ addr = (u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+ ipsec_counters_addr_low) +
+ ((u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+ ipsec_counters_addr_high) << 32);
+
+ count = mlx5_fpga_ipsec_counters_count(mdev);
+
+ data = kzalloc(array3_size(sizeof(*data), count, 2), GFP_KERNEL);
+ if (!data) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = mlx5_fpga_mem_read(fdev, count * sizeof(u64), addr, data,
+ MLX5_FPGA_ACCESS_TYPE_DONTCARE);
+ if (ret < 0) {
+ mlx5_fpga_err(fdev, "Failed to read IPSec counters from HW: %d\n",
+ ret);
+ goto out;
+ }
+ ret = 0;
+
+ if (count > counters_count)
+ count = counters_count;
+
+ /* Each counter is low word, then high. But each word is big-endian */
+ for (i = 0; i < count; i++)
+ counters[i] = (u64)ntohl(data[i * 2]) |
+ ((u64)ntohl(data[i * 2 + 1]) << 32);
+
+out:
+ kfree(data);
+ return ret;
+}
+
+static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags)
+{
+ struct mlx5_fpga_ipsec_cmd_context *context;
+ struct mlx5_ifc_fpga_ipsec_cmd_cap cmd = {0};
+ int err;
+
+ cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP);
+ cmd.flags = htonl(flags);
+ context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd));
+ if (IS_ERR(context))
+ return PTR_ERR(context);
+
+ err = mlx5_fpga_ipsec_cmd_wait(context);
+ if (err)
+ goto out;
+
+ if ((context->resp.flags & cmd.flags) != cmd.flags) {
+ mlx5_fpga_err(context->dev, "Failed to set capabilities. cmd 0x%08x vs resp 0x%08x\n",
+ cmd.flags,
+ context->resp.flags);
+ err = -EIO;
+ }
+
+out:
+ kfree(context);
+ return err;
+}
+
+static int mlx5_fpga_ipsec_enable_supported_caps(struct mlx5_core_dev *mdev)
+{
+ u32 dev_caps = mlx5_fpga_ipsec_device_caps(mdev);
+ u32 flags = 0;
+
+ if (dev_caps & MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER)
+ flags |= MLX5_FPGA_IPSEC_CAP_NO_TRAILER;
+
+ return mlx5_fpga_ipsec_set_caps(mdev, flags);
+}
+
+static void
+mlx5_fpga_ipsec_build_hw_xfrm(struct mlx5_core_dev *mdev,
+ const struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
+ struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
+{
+ const struct aes_gcm_keymat *aes_gcm = &xfrm_attrs->keymat.aes_gcm;
+
+ /* key */
+ memcpy(&hw_sa->ipsec_sa_v1.key_enc, aes_gcm->aes_key,
+ aes_gcm->key_len / 8);
+ /* Duplicate 128 bit key twice according to HW layout */
+ if (aes_gcm->key_len == 128)
+ memcpy(&hw_sa->ipsec_sa_v1.key_enc[16],
+ aes_gcm->aes_key, aes_gcm->key_len / 8);
+
+ /* salt and seq_iv */
+ memcpy(&hw_sa->ipsec_sa_v1.gcm.salt_iv, &aes_gcm->seq_iv,
+ sizeof(aes_gcm->seq_iv));
+ memcpy(&hw_sa->ipsec_sa_v1.gcm.salt, &aes_gcm->salt,
+ sizeof(aes_gcm->salt));
+
+ /* esn */
+ if (xfrm_attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) {
+ hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_ESN_EN;
+ hw_sa->ipsec_sa_v1.flags |=
+ (xfrm_attrs->flags &
+ MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
+ MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
+ hw_sa->esn = htonl(xfrm_attrs->esn);
+ } else {
+ hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_ESN_EN;
+ hw_sa->ipsec_sa_v1.flags &=
+ ~(xfrm_attrs->flags &
+ MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
+ MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
+ hw_sa->esn = 0;
+ }
+
+ /* rx handle */
+ hw_sa->ipsec_sa_v1.sw_sa_handle = htonl(xfrm_attrs->sa_handle);
+
+ /* enc mode */
+ switch (aes_gcm->key_len) {
+ case 128:
+ hw_sa->ipsec_sa_v1.enc_mode =
+ MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128;
+ break;
+ case 256:
+ hw_sa->ipsec_sa_v1.enc_mode =
+ MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128;
+ break;
+ }
+
+ /* flags */
+ hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_SA_VALID |
+ MLX5_FPGA_IPSEC_SA_SPI_EN |
+ MLX5_FPGA_IPSEC_SA_IP_ESP;
+
+ if (xfrm_attrs->action & MLX5_ACCEL_ESP_ACTION_ENCRYPT)
+ hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_DIR_SX;
+ else
+ hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_DIR_SX;
+}
+
+static void
+mlx5_fpga_ipsec_build_hw_sa(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
+ const __be32 saddr[4],
+ const __be32 daddr[4],
+ const __be32 spi, bool is_ipv6,
+ struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
+{
+ mlx5_fpga_ipsec_build_hw_xfrm(mdev, xfrm_attrs, hw_sa);
+
+ /* IPs */
+ memcpy(hw_sa->ipsec_sa_v1.sip, saddr, sizeof(hw_sa->ipsec_sa_v1.sip));
+ memcpy(hw_sa->ipsec_sa_v1.dip, daddr, sizeof(hw_sa->ipsec_sa_v1.dip));
+
+ /* SPI */
+ hw_sa->ipsec_sa_v1.spi = spi;
+
+ /* flags */
+ if (is_ipv6)
+ hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_IPV6;
+}
+
+static bool is_full_mask(const void *p, size_t len)
+{
+ WARN_ON(len % 4);
+
+ return !memchr_inv(p, 0xff, len);
+}
+
+static bool validate_fpga_full_mask(struct mlx5_core_dev *dev,
+ const u32 *match_c,
+ const u32 *match_v)
+{
+ const void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
+ match_c,
+ misc_parameters);
+ const void *headers_c = MLX5_ADDR_OF(fte_match_param,
+ match_c,
+ outer_headers);
+ const void *headers_v = MLX5_ADDR_OF(fte_match_param,
+ match_v,
+ outer_headers);
+
+ if (mlx5_fs_is_outer_ipv4_flow(dev, headers_c, headers_v)) {
+ const void *s_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+ headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ const void *d_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+ headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+ if (!is_full_mask(s_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
+ ipv4)) ||
+ !is_full_mask(d_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
+ ipv4)))
+ return false;
+ } else {
+ const void *s_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+ headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6);
+ const void *d_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+ headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+
+ if (!is_full_mask(s_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6)) ||
+ !is_full_mask(d_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
+ ipv6)))
+ return false;
+ }
+
+ if (!is_full_mask(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
+ outer_esp_spi),
+ MLX5_FLD_SZ_BYTES(fte_match_set_misc, outer_esp_spi)))
+ return false;
+
+ return true;
+}
+
+static bool mlx5_is_fpga_ipsec_rule(struct mlx5_core_dev *dev,
+ u8 match_criteria_enable,
+ const u32 *match_c,
+ const u32 *match_v)
+{
+ u32 ipsec_dev_caps = mlx5_accel_ipsec_device_caps(dev);
+ bool ipv6_flow;
+
+ ipv6_flow = mlx5_fs_is_outer_ipv6_flow(dev, match_c, match_v);
+
+ if (!(match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) ||
+ mlx5_fs_is_outer_udp_flow(match_c, match_v) ||
+ mlx5_fs_is_outer_tcp_flow(match_c, match_v) ||
+ mlx5_fs_is_vxlan_flow(match_c) ||
+ !(mlx5_fs_is_outer_ipv4_flow(dev, match_c, match_v) ||
+ ipv6_flow))
+ return false;
+
+ if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_DEVICE))
+ return false;
+
+ if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_ESP) &&
+ mlx5_fs_is_outer_ipsec_flow(match_c))
+ return false;
+
+ if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_IPV6) &&
+ ipv6_flow)
+ return false;
+
+ if (!validate_fpga_full_mask(dev, match_c, match_v))
+ return false;
+
+ return true;
+}
+
+static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
+ u8 match_criteria_enable,
+ const u32 *match_c,
+ const u32 *match_v,
+ struct mlx5_flow_act *flow_act)
+{
+ const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+ bool is_dmac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_47_16) ||
+ MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_15_0);
+ bool is_smac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_47_16) ||
+ MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_15_0);
+ int ret;
+
+ ret = mlx5_is_fpga_ipsec_rule(dev, match_criteria_enable, match_c,
+ match_v);
+ if (!ret)
+ return ret;
+
+ if (is_dmac || is_smac ||
+ (match_criteria_enable &
+ ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
+ (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
+ flow_act->has_flow_tag)
+ return false;
+
+ return true;
+}
+
+void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm *accel_xfrm,
+ const __be32 saddr[4],
+ const __be32 daddr[4],
+ const __be32 spi, bool is_ipv6)
+{
+ struct mlx5_fpga_ipsec_sa_ctx *sa_ctx;
+ struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+ container_of(accel_xfrm, typeof(*fpga_xfrm),
+ accel_xfrm);
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+ int opcode, err;
+ void *context;
+
+ /* alloc SA */
+ sa_ctx = kzalloc(sizeof(*sa_ctx), GFP_KERNEL);
+ if (!sa_ctx)
+ return ERR_PTR(-ENOMEM);
+
+ sa_ctx->dev = mdev;
+
+ /* build candidate SA */
+ mlx5_fpga_ipsec_build_hw_sa(mdev, &accel_xfrm->attrs,
+ saddr, daddr, spi, is_ipv6,
+ &sa_ctx->hw_sa);
+
+ mutex_lock(&fpga_xfrm->lock);
+
+ if (fpga_xfrm->sa_ctx) { /* multiple rules for same accel_xfrm */
+ /* all rules must be with same IPs and SPI */
+ if (memcmp(&sa_ctx->hw_sa, &fpga_xfrm->sa_ctx->hw_sa,
+ sizeof(sa_ctx->hw_sa))) {
+ context = ERR_PTR(-EINVAL);
+ goto exists;
+ }
+
+ ++fpga_xfrm->num_rules;
+ context = fpga_xfrm->sa_ctx;
+ goto exists;
+ }
+
+ /* This is unbounded fpga_xfrm, try to add to hash */
+ mutex_lock(&fipsec->sa_hash_lock);
+
+ err = rhashtable_lookup_insert_fast(&fipsec->sa_hash, &sa_ctx->hash,
+ rhash_sa);
+ if (err) {
+ /* Can't bound different accel_xfrm to already existing sa_ctx.
+ * This is because we can't support multiple ketmats for
+ * same IPs and SPI
+ */
+ context = ERR_PTR(-EEXIST);
+ goto unlock_hash;
+ }
+
+ /* Bound accel_xfrm to sa_ctx */
+ opcode = is_v2_sadb_supported(fdev->ipsec) ?
+ MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 :
+ MLX5_FPGA_IPSEC_CMD_OP_ADD_SA;
+ err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
+ sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+ if (err) {
+ context = ERR_PTR(err);
+ goto delete_hash;
+ }
+
+ mutex_unlock(&fipsec->sa_hash_lock);
+
+ ++fpga_xfrm->num_rules;
+ fpga_xfrm->sa_ctx = sa_ctx;
+ sa_ctx->fpga_xfrm = fpga_xfrm;
+
+ mutex_unlock(&fpga_xfrm->lock);
+
+ return sa_ctx;
+
+delete_hash:
+ WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
+ rhash_sa));
+unlock_hash:
+ mutex_unlock(&fipsec->sa_hash_lock);
+
+exists:
+ mutex_unlock(&fpga_xfrm->lock);
+ kfree(sa_ctx);
+ return context;
+}
+
+static void *
+mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev,
+ struct fs_fte *fte,
+ bool is_egress)
+{
+ struct mlx5_accel_esp_xfrm *accel_xfrm;
+ __be32 saddr[4], daddr[4], spi;
+ struct mlx5_flow_group *fg;
+ bool is_ipv6 = false;
+
+ fs_get_obj(fg, fte->node.parent);
+ /* validate */
+ if (is_egress &&
+ !mlx5_is_fpga_egress_ipsec_rule(mdev,
+ fg->mask.match_criteria_enable,
+ fg->mask.match_criteria,
+ fte->val,
+ &fte->action))
+ return ERR_PTR(-EINVAL);
+ else if (!mlx5_is_fpga_ipsec_rule(mdev,
+ fg->mask.match_criteria_enable,
+ fg->mask.match_criteria,
+ fte->val))
+ return ERR_PTR(-EINVAL);
+
+ /* get xfrm context */
+ accel_xfrm =
+ (struct mlx5_accel_esp_xfrm *)fte->action.esp_id;
+
+ /* IPs */
+ if (mlx5_fs_is_outer_ipv4_flow(mdev, fg->mask.match_criteria,
+ fte->val)) {
+ memcpy(&saddr[3],
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+ fte->val,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ sizeof(saddr[3]));
+ memcpy(&daddr[3],
+ MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+ fte->val,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ sizeof(daddr[3]));
+ } else {
+ memcpy(saddr,
+ MLX5_ADDR_OF(fte_match_param,
+ fte->val,
+ outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ sizeof(saddr));
+ memcpy(daddr,
+ MLX5_ADDR_OF(fte_match_param,
+ fte->val,
+ outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ sizeof(daddr));
+ is_ipv6 = true;
+ }
+
+ /* SPI */
+ spi = MLX5_GET_BE(typeof(spi),
+ fte_match_param, fte->val,
+ misc_parameters.outer_esp_spi);
+
+ /* create */
+ return mlx5_fpga_ipsec_create_sa_ctx(mdev, accel_xfrm,
+ saddr, daddr,
+ spi, is_ipv6);
+}
+
+static void
+mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx)
+{
+ struct mlx5_fpga_device *fdev = sa_ctx->dev->fpga;
+ struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+ int opcode = is_v2_sadb_supported(fdev->ipsec) ?
+ MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 :
+ MLX5_FPGA_IPSEC_CMD_OP_DEL_SA;
+ int err;
+
+ err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
+ sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+ if (err) {
+ WARN_ON(err);
+ return;
+ }
+
+ mutex_lock(&fipsec->sa_hash_lock);
+ WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
+ rhash_sa));
+ mutex_unlock(&fipsec->sa_hash_lock);
+}
+
+void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
+{
+ struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+ ((struct mlx5_fpga_ipsec_sa_ctx *)context)->fpga_xfrm;
+
+ mutex_lock(&fpga_xfrm->lock);
+ if (!--fpga_xfrm->num_rules) {
+ mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx);
+ kfree(fpga_xfrm->sa_ctx);
+ fpga_xfrm->sa_ctx = NULL;
+ }
+ mutex_unlock(&fpga_xfrm->lock);
+}
+
+static inline struct mlx5_fpga_ipsec_rule *
+_rule_search(struct rb_root *root, struct fs_fte *fte)
+{
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct mlx5_fpga_ipsec_rule *rule =
+ container_of(node, struct mlx5_fpga_ipsec_rule,
+ node);
+
+ if (rule->fte < fte)
+ node = node->rb_left;
+ else if (rule->fte > fte)
+ node = node->rb_right;
+ else
+ return rule;
+ }
+ return NULL;
+}
+
+static struct mlx5_fpga_ipsec_rule *
+rule_search(struct mlx5_fpga_ipsec *ipsec_dev, struct fs_fte *fte)
+{
+ struct mlx5_fpga_ipsec_rule *rule;
+
+ mutex_lock(&ipsec_dev->rules_rb_lock);
+ rule = _rule_search(&ipsec_dev->rules_rb, fte);
+ mutex_unlock(&ipsec_dev->rules_rb_lock);
+
+ return rule;
+}
+
+static inline int _rule_insert(struct rb_root *root,
+ struct mlx5_fpga_ipsec_rule *rule)
+{
+ struct rb_node **new = &root->rb_node, *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct mlx5_fpga_ipsec_rule *this =
+ container_of(*new, struct mlx5_fpga_ipsec_rule,
+ node);
+
+ parent = *new;
+ if (rule->fte < this->fte)
+ new = &((*new)->rb_left);
+ else if (rule->fte > this->fte)
+ new = &((*new)->rb_right);
+ else
+ return -EEXIST;
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&rule->node, parent, new);
+ rb_insert_color(&rule->node, root);
+
+ return 0;
+}
+
+static int rule_insert(struct mlx5_fpga_ipsec *ipsec_dev,
+ struct mlx5_fpga_ipsec_rule *rule)
+{
+ int ret;
+
+ mutex_lock(&ipsec_dev->rules_rb_lock);
+ ret = _rule_insert(&ipsec_dev->rules_rb, rule);
+ mutex_unlock(&ipsec_dev->rules_rb_lock);
+
+ return ret;
+}
+
+static inline void _rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
+ struct mlx5_fpga_ipsec_rule *rule)
+{
+ struct rb_root *root = &ipsec_dev->rules_rb;
+
+ mutex_lock(&ipsec_dev->rules_rb_lock);
+ rb_erase(&rule->node, root);
+ mutex_unlock(&ipsec_dev->rules_rb_lock);
+}
+
+static void rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
+ struct mlx5_fpga_ipsec_rule *rule)
+{
+ _rule_delete(ipsec_dev, rule);
+ kfree(rule);
+}
+
+struct mailbox_mod {
+ uintptr_t saved_esp_id;
+ u32 saved_action;
+ u32 saved_outer_esp_spi_value;
+};
+
+static void restore_spec_mailbox(struct fs_fte *fte,
+ struct mailbox_mod *mbox_mod)
+{
+ char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
+ fte->val,
+ misc_parameters);
+
+ MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
+ mbox_mod->saved_outer_esp_spi_value);
+ fte->action.action |= mbox_mod->saved_action;
+ fte->action.esp_id = (uintptr_t)mbox_mod->saved_esp_id;
+}
+
+static void modify_spec_mailbox(struct mlx5_core_dev *mdev,
+ struct fs_fte *fte,
+ struct mailbox_mod *mbox_mod)
+{
+ char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
+ fte->val,
+ misc_parameters);
+
+ mbox_mod->saved_esp_id = fte->action.esp_id;
+ mbox_mod->saved_action = fte->action.action &
+ (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
+ mbox_mod->saved_outer_esp_spi_value =
+ MLX5_GET(fte_match_set_misc, misc_params_v,
+ outer_esp_spi);
+
+ fte->action.esp_id = 0;
+ fte->action.action &= ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
+ if (!MLX5_CAP_FLOWTABLE(mdev,
+ flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+ MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, 0);
+}
+
+static enum fs_flow_table_type egress_to_fs_ft(bool egress)
+{
+ return egress ? FS_FT_NIC_TX : FS_FT_NIC_RX;
+}
+
+static int fpga_ipsec_fs_create_flow_group(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ u32 *in,
+ unsigned int *group_id,
+ bool is_egress)
+{
+ int (*create_flow_group)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft, u32 *in,
+ unsigned int *group_id) =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_flow_group;
+ char *misc_params_c = MLX5_ADDR_OF(create_flow_group_in, in,
+ match_criteria.misc_parameters);
+ u32 saved_outer_esp_spi_mask;
+ u8 match_criteria_enable;
+ int ret;
+
+ if (MLX5_CAP_FLOWTABLE(dev,
+ flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+ return create_flow_group(dev, ft, in, group_id);
+
+ match_criteria_enable =
+ MLX5_GET(create_flow_group_in, in, match_criteria_enable);
+ saved_outer_esp_spi_mask =
+ MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi);
+ if (!match_criteria_enable || !saved_outer_esp_spi_mask)
+ return create_flow_group(dev, ft, in, group_id);
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 0);
+
+ if (!(*misc_params_c) &&
+ !memcmp(misc_params_c, misc_params_c + 1, MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1))
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+ match_criteria_enable & ~MLX5_MATCH_MISC_PARAMETERS);
+
+ ret = create_flow_group(dev, ft, in, group_id);
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, saved_outer_esp_spi_mask);
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable, match_criteria_enable);
+
+ return ret;
+}
+
+static int fpga_ipsec_fs_create_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg,
+ struct fs_fte *fte,
+ bool is_egress)
+{
+ int (*create_fte)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg,
+ struct fs_fte *fte) =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_fte;
+ struct mlx5_fpga_device *fdev = dev->fpga;
+ struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+ struct mlx5_fpga_ipsec_rule *rule;
+ bool is_esp = fte->action.esp_id;
+ struct mailbox_mod mbox_mod;
+ int ret;
+
+ if (!is_esp ||
+ !(fte->action.action &
+ (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+ return create_fte(dev, ft, fg, fte);
+
+ rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+ if (!rule)
+ return -ENOMEM;
+
+ rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress);
+ if (IS_ERR(rule->ctx)) {
+ int err = PTR_ERR(rule->ctx);
+ kfree(rule);
+ return err;
+ }
+
+ rule->fte = fte;
+ WARN_ON(rule_insert(fipsec, rule));
+
+ modify_spec_mailbox(dev, fte, &mbox_mod);
+ ret = create_fte(dev, ft, fg, fte);
+ restore_spec_mailbox(fte, &mbox_mod);
+ if (ret) {
+ _rule_delete(fipsec, rule);
+ mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
+ kfree(rule);
+ }
+
+ return ret;
+}
+
+static int fpga_ipsec_fs_update_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id,
+ int modify_mask,
+ struct fs_fte *fte,
+ bool is_egress)
+{
+ int (*update_fte)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id,
+ int modify_mask,
+ struct fs_fte *fte) =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->update_fte;
+ bool is_esp = fte->action.esp_id;
+ struct mailbox_mod mbox_mod;
+ int ret;
+
+ if (!is_esp ||
+ !(fte->action.action &
+ (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+ return update_fte(dev, ft, group_id, modify_mask, fte);
+
+ modify_spec_mailbox(dev, fte, &mbox_mod);
+ ret = update_fte(dev, ft, group_id, modify_mask, fte);
+ restore_spec_mailbox(fte, &mbox_mod);
+
+ return ret;
+}
+
+static int fpga_ipsec_fs_delete_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte,
+ bool is_egress)
+{
+ int (*delete_fte)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte) =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->delete_fte;
+ struct mlx5_fpga_device *fdev = dev->fpga;
+ struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+ struct mlx5_fpga_ipsec_rule *rule;
+ bool is_esp = fte->action.esp_id;
+ struct mailbox_mod mbox_mod;
+ int ret;
+
+ if (!is_esp ||
+ !(fte->action.action &
+ (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+ return delete_fte(dev, ft, fte);
+
+ rule = rule_search(fipsec, fte);
+ if (!rule)
+ return -ENOENT;
+
+ mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
+ rule_delete(fipsec, rule);
+
+ modify_spec_mailbox(dev, fte, &mbox_mod);
+ ret = delete_fte(dev, ft, fte);
+ restore_spec_mailbox(fte, &mbox_mod);
+
+ return ret;
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ u32 *in,
+ unsigned int *group_id)
+{
+ return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg,
+ struct fs_fte *fte)
+{
+ return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id,
+ int modify_mask,
+ struct fs_fte *fte)
+{
+ return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte,
+ true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte)
+{
+ return fpga_ipsec_fs_delete_fte(dev, ft, fte, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ u32 *in,
+ unsigned int *group_id)
+{
+ return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg,
+ struct fs_fte *fte)
+{
+ return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id,
+ int modify_mask,
+ struct fs_fte *fte)
+{
+ return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte,
+ false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte)
+{
+ return fpga_ipsec_fs_delete_fte(dev, ft, fte, false);
+}
+
+static struct mlx5_flow_cmds fpga_ipsec_ingress;
+static struct mlx5_flow_cmds fpga_ipsec_egress;
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
+{
+ switch (type) {
+ case FS_FT_NIC_RX:
+ return &fpga_ipsec_ingress;
+ case FS_FT_NIC_TX:
+ return &fpga_ipsec_egress;
+ default:
+ WARN_ON(true);
+ return NULL;
+ }
+}
+
+int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_conn_attr init_attr = {0};
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ struct mlx5_fpga_conn *conn;
+ int err;
+
+ if (!mlx5_fpga_is_ipsec_device(mdev))
+ return 0;
+
+ fdev->ipsec = kzalloc(sizeof(*fdev->ipsec), GFP_KERNEL);
+ if (!fdev->ipsec)
+ return -ENOMEM;
+
+ fdev->ipsec->fdev = fdev;
+
+ err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps),
+ fdev->ipsec->caps);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to retrieve IPSec extended capabilities: %d\n",
+ err);
+ goto error;
+ }
+
+ INIT_LIST_HEAD(&fdev->ipsec->pending_cmds);
+ spin_lock_init(&fdev->ipsec->pending_cmds_lock);
+
+ init_attr.rx_size = SBU_QP_QUEUE_SIZE;
+ init_attr.tx_size = SBU_QP_QUEUE_SIZE;
+ init_attr.recv_cb = mlx5_fpga_ipsec_recv;
+ init_attr.cb_arg = fdev;
+ conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr);
+ if (IS_ERR(conn)) {
+ err = PTR_ERR(conn);
+ mlx5_fpga_err(fdev, "Error creating IPSec command connection %d\n",
+ err);
+ goto error;
+ }
+ fdev->ipsec->conn = conn;
+
+ err = rhashtable_init(&fdev->ipsec->sa_hash, &rhash_sa);
+ if (err)
+ goto err_destroy_conn;
+ mutex_init(&fdev->ipsec->sa_hash_lock);
+
+ fdev->ipsec->rules_rb = RB_ROOT;
+ mutex_init(&fdev->ipsec->rules_rb_lock);
+
+ err = mlx5_fpga_ipsec_enable_supported_caps(mdev);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to enable IPSec extended capabilities: %d\n",
+ err);
+ goto err_destroy_hash;
+ }
+
+ return 0;
+
+err_destroy_hash:
+ rhashtable_destroy(&fdev->ipsec->sa_hash);
+
+err_destroy_conn:
+ mlx5_fpga_sbu_conn_destroy(conn);
+
+error:
+ kfree(fdev->ipsec);
+ fdev->ipsec = NULL;
+ return err;
+}
+
+static void destroy_rules_rb(struct rb_root *root)
+{
+ struct mlx5_fpga_ipsec_rule *r, *tmp;
+
+ rbtree_postorder_for_each_entry_safe(r, tmp, root, node) {
+ rb_erase(&r->node, root);
+ mlx5_fpga_ipsec_delete_sa_ctx(r->ctx);
+ kfree(r);
+ }
+}
+
+void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+
+ if (!mlx5_fpga_is_ipsec_device(mdev))
+ return;
+
+ destroy_rules_rb(&fdev->ipsec->rules_rb);
+ rhashtable_destroy(&fdev->ipsec->sa_hash);
+
+ mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn);
+ kfree(fdev->ipsec);
+ fdev->ipsec = NULL;
+}
+
+void mlx5_fpga_ipsec_build_fs_cmds(void)
+{
+ /* ingress */
+ fpga_ipsec_ingress.create_flow_table =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->create_flow_table;
+ fpga_ipsec_ingress.destroy_flow_table =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_table;
+ fpga_ipsec_ingress.modify_flow_table =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->modify_flow_table;
+ fpga_ipsec_ingress.create_flow_group =
+ mlx5_fpga_ipsec_fs_create_flow_group_ingress;
+ fpga_ipsec_ingress.destroy_flow_group =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_group;
+ fpga_ipsec_ingress.create_fte =
+ mlx5_fpga_ipsec_fs_create_fte_ingress;
+ fpga_ipsec_ingress.update_fte =
+ mlx5_fpga_ipsec_fs_update_fte_ingress;
+ fpga_ipsec_ingress.delete_fte =
+ mlx5_fpga_ipsec_fs_delete_fte_ingress;
+ fpga_ipsec_ingress.update_root_ft =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->update_root_ft;
+
+ /* egress */
+ fpga_ipsec_egress.create_flow_table =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->create_flow_table;
+ fpga_ipsec_egress.destroy_flow_table =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_table;
+ fpga_ipsec_egress.modify_flow_table =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->modify_flow_table;
+ fpga_ipsec_egress.create_flow_group =
+ mlx5_fpga_ipsec_fs_create_flow_group_egress;
+ fpga_ipsec_egress.destroy_flow_group =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_group;
+ fpga_ipsec_egress.create_fte =
+ mlx5_fpga_ipsec_fs_create_fte_egress;
+ fpga_ipsec_egress.update_fte =
+ mlx5_fpga_ipsec_fs_update_fte_egress;
+ fpga_ipsec_egress.delete_fte =
+ mlx5_fpga_ipsec_fs_delete_fte_egress;
+ fpga_ipsec_egress.update_root_ft =
+ mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->update_root_ft;
+}
+
+static int
+mlx5_fpga_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ if (attrs->tfc_pad) {
+ mlx5_core_err(mdev, "Cannot offload xfrm states with tfc padding\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (attrs->replay_type != MLX5_ACCEL_ESP_REPLAY_NONE) {
+ mlx5_core_err(mdev, "Cannot offload xfrm states with anti replay\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (attrs->keymat_type != MLX5_ACCEL_ESP_KEYMAT_AES_GCM) {
+ mlx5_core_err(mdev, "Only aes gcm keymat is supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (attrs->keymat.aes_gcm.iv_algo !=
+ MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ) {
+ mlx5_core_err(mdev, "Only iv sequence algo is supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (attrs->keymat.aes_gcm.icv_len != 128) {
+ mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (attrs->keymat.aes_gcm.key_len != 128 &&
+ attrs->keymat.aes_gcm.key_len != 256) {
+ mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+ return -EOPNOTSUPP;
+ }
+
+ if ((attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) &&
+ (!MLX5_GET(ipsec_extended_cap, mdev->fpga->ipsec->caps,
+ v2_command))) {
+ mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs,
+ u32 flags)
+{
+ struct mlx5_fpga_esp_xfrm *fpga_xfrm;
+
+ if (!(flags & MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA)) {
+ mlx5_core_warn(mdev, "Tried to create an esp action without metadata\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
+ mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ fpga_xfrm = kzalloc(sizeof(*fpga_xfrm), GFP_KERNEL);
+ if (!fpga_xfrm)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&fpga_xfrm->lock);
+ memcpy(&fpga_xfrm->accel_xfrm.attrs, attrs,
+ sizeof(fpga_xfrm->accel_xfrm.attrs));
+
+ return &fpga_xfrm->accel_xfrm;
+}
+
+void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+ struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+ container_of(xfrm, struct mlx5_fpga_esp_xfrm,
+ accel_xfrm);
+ /* assuming no sa_ctx are connected to this xfrm_ctx */
+ kfree(fpga_xfrm);
+}
+
+int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ struct mlx5_core_dev *mdev = xfrm->mdev;
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+ struct mlx5_fpga_esp_xfrm *fpga_xfrm;
+ struct mlx5_ifc_fpga_ipsec_sa org_hw_sa;
+
+ int err = 0;
+
+ if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs)))
+ return 0;
+
+ if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
+ mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (is_v2_sadb_supported(fipsec)) {
+ mlx5_core_warn(mdev, "Modify esp is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ fpga_xfrm = container_of(xfrm, struct mlx5_fpga_esp_xfrm, accel_xfrm);
+
+ mutex_lock(&fpga_xfrm->lock);
+
+ if (!fpga_xfrm->sa_ctx)
+ /* Unbounded xfrm, chane only sw attrs */
+ goto change_sw_xfrm_attrs;
+
+ /* copy original hw sa */
+ memcpy(&org_hw_sa, &fpga_xfrm->sa_ctx->hw_sa, sizeof(org_hw_sa));
+ mutex_lock(&fipsec->sa_hash_lock);
+ /* remove original hw sa from hash */
+ WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
+ &fpga_xfrm->sa_ctx->hash, rhash_sa));
+ /* update hw_sa with new xfrm attrs*/
+ mlx5_fpga_ipsec_build_hw_xfrm(xfrm->mdev, attrs,
+ &fpga_xfrm->sa_ctx->hw_sa);
+ /* try to insert new hw_sa to hash */
+ err = rhashtable_insert_fast(&fipsec->sa_hash,
+ &fpga_xfrm->sa_ctx->hash, rhash_sa);
+ if (err)
+ goto rollback_sa;
+
+ /* modify device with new hw_sa */
+ err = mlx5_fpga_ipsec_update_hw_sa(fdev, &fpga_xfrm->sa_ctx->hw_sa,
+ MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2);
+ fpga_xfrm->sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+ if (err)
+ WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
+ &fpga_xfrm->sa_ctx->hash,
+ rhash_sa));
+rollback_sa:
+ if (err) {
+ /* return original hw_sa to hash */
+ memcpy(&fpga_xfrm->sa_ctx->hw_sa, &org_hw_sa,
+ sizeof(org_hw_sa));
+ WARN_ON(rhashtable_insert_fast(&fipsec->sa_hash,
+ &fpga_xfrm->sa_ctx->hash,
+ rhash_sa));
+ }
+ mutex_unlock(&fipsec->sa_hash_lock);
+
+change_sw_xfrm_attrs:
+ if (!err)
+ memcpy(&xfrm->attrs, attrs, sizeof(xfrm->attrs));
+ mutex_unlock(&fpga_xfrm->lock);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
new file mode 100644
index 000000000..2b5e63b0d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_FPGA_IPSEC_H__
+#define __MLX5_FPGA_IPSEC_H__
+
+#include "accel/ipsec.h"
+#include "fs_cmd.h"
+
+#ifdef CONFIG_MLX5_FPGA
+
+u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
+unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev);
+int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+ unsigned int counters_count);
+
+void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm *accel_xfrm,
+ const __be32 saddr[4],
+ const __be32 daddr[4],
+ const __be32 spi, bool is_ipv6);
+void mlx5_fpga_ipsec_delete_sa_ctx(void *context);
+
+int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev);
+void mlx5_fpga_ipsec_build_fs_cmds(void);
+
+struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs,
+ u32 flags);
+void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm);
+int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs);
+
+const struct mlx5_flow_cmds *
+mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type);
+
+#else
+
+static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline unsigned int
+mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev,
+ u64 *counters)
+{
+ return 0;
+}
+
+static inline void *
+mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+ struct mlx5_accel_esp_xfrm *accel_xfrm,
+ const __be32 saddr[4],
+ const __be32 daddr[4],
+ const __be32 spi, bool is_ipv6)
+{
+ return NULL;
+}
+
+static inline void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
+{
+}
+
+static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
+{
+ return 0;
+}
+
+static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+}
+
+static inline void mlx5_fpga_ipsec_build_fs_cmds(void)
+{
+}
+
+static inline struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs,
+ u32 flags)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+}
+
+static inline int
+mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+ const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline const struct mlx5_flow_cmds *
+mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
+{
+ return mlx5_fs_cmd_get_default(type);
+}
+
+#endif /* CONFIG_MLX5_FPGA */
+
+#endif /* __MLX5_FPGA_SADB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
new file mode 100644
index 000000000..14962969c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+
+#include "fpga/core.h"
+#include "fpga/conn.h"
+#include "fpga/sdk.h"
+
+struct mlx5_fpga_conn *
+mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_conn_attr *attr)
+{
+ return mlx5_fpga_conn_create(fdev, attr, MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_create);
+
+void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn)
+{
+ mlx5_fpga_conn_destroy(conn);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_destroy);
+
+int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ return mlx5_fpga_conn_send(conn, buf);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_sendmsg);
+
+static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
+ u64 addr, u8 *buf)
+{
+ size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX;
+ size_t bytes_done = 0;
+ u8 actual_size;
+ int err;
+
+ if (!size)
+ return -EINVAL;
+
+ if (!fdev->mdev)
+ return -ENOTCONN;
+
+ while (bytes_done < size) {
+ actual_size = min(max_size, (size - bytes_done));
+
+ err = mlx5_fpga_access_reg(fdev->mdev, actual_size,
+ addr + bytes_done,
+ buf + bytes_done, false);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to read over I2C: %d\n",
+ err);
+ break;
+ }
+
+ bytes_done += actual_size;
+ }
+
+ return err;
+}
+
+static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
+ u64 addr, u8 *buf)
+{
+ size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX;
+ size_t bytes_done = 0;
+ u8 actual_size;
+ int err;
+
+ if (!size)
+ return -EINVAL;
+
+ if (!fdev->mdev)
+ return -ENOTCONN;
+
+ while (bytes_done < size) {
+ actual_size = min(max_size, (size - bytes_done));
+
+ err = mlx5_fpga_access_reg(fdev->mdev, actual_size,
+ addr + bytes_done,
+ buf + bytes_done, true);
+ if (err) {
+ mlx5_fpga_err(fdev, "Failed to write FPGA crspace\n");
+ break;
+ }
+
+ bytes_done += actual_size;
+ }
+
+ return err;
+}
+
+int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+ void *buf, enum mlx5_fpga_access_type access_type)
+{
+ int ret;
+
+ switch (access_type) {
+ case MLX5_FPGA_ACCESS_TYPE_I2C:
+ ret = mlx5_fpga_mem_read_i2c(fdev, size, addr, buf);
+ if (ret)
+ return ret;
+ break;
+ default:
+ mlx5_fpga_warn(fdev, "Unexpected read access_type %u\n",
+ access_type);
+ return -EACCES;
+ }
+
+ return size;
+}
+EXPORT_SYMBOL(mlx5_fpga_mem_read);
+
+int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+ void *buf, enum mlx5_fpga_access_type access_type)
+{
+ int ret;
+
+ switch (access_type) {
+ case MLX5_FPGA_ACCESS_TYPE_I2C:
+ ret = mlx5_fpga_mem_write_i2c(fdev, size, addr, buf);
+ if (ret)
+ return ret;
+ break;
+ default:
+ mlx5_fpga_warn(fdev, "Unexpected write access_type %u\n",
+ access_type);
+ return -EACCES;
+ }
+
+ return size;
+}
+EXPORT_SYMBOL(mlx5_fpga_mem_write);
+
+int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf)
+{
+ return mlx5_fpga_sbu_caps(fdev->mdev, buf, size);
+}
+EXPORT_SYMBOL(mlx5_fpga_get_sbu_caps);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
new file mode 100644
index 000000000..656f96be6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef MLX5_FPGA_SDK_H
+#define MLX5_FPGA_SDK_H
+
+#include <linux/types.h>
+#include <linux/dma-direction.h>
+
+/**
+ * DOC: Innova SDK
+ * This header defines the in-kernel API for Innova FPGA client drivers.
+ */
+#define SBU_QP_QUEUE_SIZE 8
+#define MLX5_FPGA_CMD_TIMEOUT_MSEC (60 * 1000)
+
+/**
+ * enum mlx5_fpga_access_type - Enumerated the different methods possible for
+ * accessing the device memory address space
+ */
+enum mlx5_fpga_access_type {
+ /** Use the slow CX-FPGA I2C bus */
+ MLX5_FPGA_ACCESS_TYPE_I2C = 0x0,
+ /** Use the fastest available method */
+ MLX5_FPGA_ACCESS_TYPE_DONTCARE = 0x0,
+};
+
+struct mlx5_fpga_conn;
+struct mlx5_fpga_device;
+
+/**
+ * struct mlx5_fpga_dma_entry - A scatter-gather DMA entry
+ */
+struct mlx5_fpga_dma_entry {
+ /** @data: Virtual address pointer to the data */
+ void *data;
+ /** @size: Size in bytes of the data */
+ unsigned int size;
+ /** @dma_addr: Private member. Physical DMA-mapped address of the data */
+ dma_addr_t dma_addr;
+};
+
+/**
+ * struct mlx5_fpga_dma_buf - A packet buffer
+ * May contain up to 2 scatter-gather data entries
+ */
+struct mlx5_fpga_dma_buf {
+ /** @dma_dir: DMA direction */
+ enum dma_data_direction dma_dir;
+ /** @sg: Scatter-gather entries pointing to the data in memory */
+ struct mlx5_fpga_dma_entry sg[2];
+ /** @list: Item in SQ backlog, for TX packets */
+ struct list_head list;
+ /**
+ * @complete: Completion routine, for TX packets
+ * @conn: FPGA Connection this packet was sent to
+ * @fdev: FPGA device this packet was sent to
+ * @buf: The packet buffer
+ * @status: 0 if successful, or an error code otherwise
+ */
+ void (*complete)(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_dma_buf *buf, u8 status);
+};
+
+/**
+ * struct mlx5_fpga_conn_attr - FPGA connection attributes
+ * Describes the attributes of a connection
+ */
+struct mlx5_fpga_conn_attr {
+ /** @tx_size: Size of connection TX queue, in packets */
+ unsigned int tx_size;
+ /** @rx_size: Size of connection RX queue, in packets */
+ unsigned int rx_size;
+ /**
+ * @recv_cb: Callback function which is called for received packets
+ * @cb_arg: The value provided in mlx5_fpga_conn_attr.cb_arg
+ * @buf: A buffer containing a received packet
+ *
+ * buf is guaranteed to only contain a single scatter-gather entry.
+ * The size of the actual packet received is specified in buf.sg[0].size
+ * When this callback returns, the packet buffer may be re-used for
+ * subsequent receives.
+ */
+ void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf);
+ void *cb_arg;
+};
+
+/**
+ * mlx5_fpga_sbu_conn_create() - Initialize a new FPGA SBU connection
+ * @fdev: The FPGA device
+ * @attr: Attributes of the new connection
+ *
+ * Sets up a new FPGA SBU connection with the specified attributes.
+ * The receive callback function may be called for incoming messages even
+ * before this function returns.
+ *
+ * The caller must eventually destroy the connection by calling
+ * mlx5_fpga_sbu_conn_destroy.
+ *
+ * Return: A new connection, or ERR_PTR() error value otherwise.
+ */
+struct mlx5_fpga_conn *
+mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_conn_attr *attr);
+
+/**
+ * mlx5_fpga_sbu_conn_destroy() - Destroy an FPGA SBU connection
+ * @conn: The FPGA SBU connection to destroy
+ *
+ * Cleans up an FPGA SBU connection which was previously created with
+ * mlx5_fpga_sbu_conn_create.
+ */
+void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn);
+
+/**
+ * mlx5_fpga_sbu_conn_sendmsg() - Queue the transmission of a packet
+ * @fdev: An FPGA SBU connection
+ * @buf: The packet buffer
+ *
+ * Queues a packet for transmission over an FPGA SBU connection.
+ * The buffer should not be modified or freed until completion.
+ * Upon completion, the buf's complete() callback is invoked, indicating the
+ * success or error status of the transmission.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf);
+
+/**
+ * mlx5_fpga_mem_read() - Read from FPGA memory address space
+ * @fdev: The FPGA device
+ * @size: Size of chunk to read, in bytes
+ * @addr: Starting address to read from, in FPGA address space
+ * @buf: Buffer to read into
+ * @access_type: Method for reading
+ *
+ * Reads from the specified address into the specified buffer.
+ * The address may point to configuration space or to DDR.
+ * Large reads may be performed internally as several non-atomic operations.
+ * This function may sleep, so should not be called from atomic contexts.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+ void *buf, enum mlx5_fpga_access_type access_type);
+
+/**
+ * mlx5_fpga_mem_write() - Write to FPGA memory address space
+ * @fdev: The FPGA device
+ * @size: Size of chunk to write, in bytes
+ * @addr: Starting address to write to, in FPGA address space
+ * @buf: Buffer which contains data to write
+ * @access_type: Method for writing
+ *
+ * Writes the specified buffer data to FPGA memory at the specified address.
+ * The address may point to configuration space or to DDR.
+ * Large writes may be performed internally as several non-atomic operations.
+ * This function may sleep, so should not be called from atomic contexts.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+ void *buf, enum mlx5_fpga_access_type access_type);
+
+/**
+ * mlx5_fpga_get_sbu_caps() - Read the SBU capabilities
+ * @fdev: The FPGA device
+ * @size: Size of the buffer to read into
+ * @buf: Buffer to read the capabilities into
+ *
+ * Reads the FPGA SBU capabilities into the specified buffer.
+ * The format of the capabilities buffer is SBU-dependent.
+ *
+ * Return: 0 if successful
+ * -EINVAL if the buffer is not large enough to contain SBU caps
+ * or any other error value otherwise.
+ */
+int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf);
+
+#endif /* MLX5_FPGA_SDK_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
new file mode 100644
index 000000000..22a2ef111
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
@@ -0,0 +1,622 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+#include "fpga/tls.h"
+#include "fpga/cmd.h"
+#include "fpga/sdk.h"
+#include "fpga/core.h"
+#include "accel/tls.h"
+
+struct mlx5_fpga_tls_command_context;
+
+typedef void (*mlx5_fpga_tls_command_complete)
+ (struct mlx5_fpga_conn *conn, struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_tls_command_context *ctx,
+ struct mlx5_fpga_dma_buf *resp);
+
+struct mlx5_fpga_tls_command_context {
+ struct list_head list;
+ /* There is no guarantee on the order between the TX completion
+ * and the command response.
+ * The TX completion is going to touch cmd->buf even in
+ * the case of successful transmission.
+ * So instead of requiring separate allocations for cmd
+ * and cmd->buf we've decided to use a reference counter
+ */
+ refcount_t ref;
+ struct mlx5_fpga_dma_buf buf;
+ mlx5_fpga_tls_command_complete complete;
+};
+
+static void
+mlx5_fpga_tls_put_command_ctx(struct mlx5_fpga_tls_command_context *ctx)
+{
+ if (refcount_dec_and_test(&ctx->ref))
+ kfree(ctx);
+}
+
+static void mlx5_fpga_tls_cmd_complete(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_dma_buf *resp)
+{
+ struct mlx5_fpga_conn *conn = fdev->tls->conn;
+ struct mlx5_fpga_tls_command_context *ctx;
+ struct mlx5_fpga_tls *tls = fdev->tls;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tls->pending_cmds_lock, flags);
+ ctx = list_first_entry(&tls->pending_cmds,
+ struct mlx5_fpga_tls_command_context, list);
+ list_del(&ctx->list);
+ spin_unlock_irqrestore(&tls->pending_cmds_lock, flags);
+ ctx->complete(conn, fdev, ctx, resp);
+}
+
+static void mlx5_fpga_cmd_send_complete(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_dma_buf *buf,
+ u8 status)
+{
+ struct mlx5_fpga_tls_command_context *ctx =
+ container_of(buf, struct mlx5_fpga_tls_command_context, buf);
+
+ mlx5_fpga_tls_put_command_ctx(ctx);
+
+ if (unlikely(status))
+ mlx5_fpga_tls_cmd_complete(fdev, NULL);
+}
+
+static void mlx5_fpga_tls_cmd_send(struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_tls_command_context *cmd,
+ mlx5_fpga_tls_command_complete complete)
+{
+ struct mlx5_fpga_tls *tls = fdev->tls;
+ unsigned long flags;
+ int ret;
+
+ refcount_set(&cmd->ref, 2);
+ cmd->complete = complete;
+ cmd->buf.complete = mlx5_fpga_cmd_send_complete;
+
+ spin_lock_irqsave(&tls->pending_cmds_lock, flags);
+ /* mlx5_fpga_sbu_conn_sendmsg is called under pending_cmds_lock
+ * to make sure commands are inserted to the tls->pending_cmds list
+ * and the command QP in the same order.
+ */
+ ret = mlx5_fpga_sbu_conn_sendmsg(tls->conn, &cmd->buf);
+ if (likely(!ret))
+ list_add_tail(&cmd->list, &tls->pending_cmds);
+ else
+ complete(tls->conn, fdev, cmd, NULL);
+ spin_unlock_irqrestore(&tls->pending_cmds_lock, flags);
+}
+
+/* Start of context identifiers range (inclusive) */
+#define SWID_START 0
+/* End of context identifiers range (exclusive) */
+#define SWID_END BIT(24)
+
+static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock,
+ void *ptr)
+{
+ unsigned long flags;
+ int ret;
+
+ /* TLS metadata format is 1 byte for syndrome followed
+ * by 3 bytes of swid (software ID)
+ * swid must not exceed 3 bytes.
+ * See tls_rxtx.c:insert_pet() for details
+ */
+ BUILD_BUG_ON((SWID_END - 1) & 0xFF000000);
+
+ idr_preload(GFP_KERNEL);
+ spin_lock_irqsave(idr_spinlock, flags);
+ ret = idr_alloc(idr, ptr, SWID_START, SWID_END, GFP_ATOMIC);
+ spin_unlock_irqrestore(idr_spinlock, flags);
+ idr_preload_end();
+
+ return ret;
+}
+
+static void *mlx5_fpga_tls_release_swid(struct idr *idr,
+ spinlock_t *idr_spinlock, u32 swid)
+{
+ unsigned long flags;
+ void *ptr;
+
+ spin_lock_irqsave(idr_spinlock, flags);
+ ptr = idr_remove(idr, swid);
+ spin_unlock_irqrestore(idr_spinlock, flags);
+ return ptr;
+}
+
+static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_dma_buf *buf, u8 status)
+{
+ kfree(buf);
+}
+
+static void
+mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_tls_command_context *cmd,
+ struct mlx5_fpga_dma_buf *resp)
+{
+ if (resp) {
+ u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
+
+ if (syndrome)
+ mlx5_fpga_err(fdev,
+ "Teardown stream failed with syndrome = %d",
+ syndrome);
+ }
+ mlx5_fpga_tls_put_command_ctx(cmd);
+}
+
+static void mlx5_fpga_tls_flow_to_cmd(void *flow, void *cmd)
+{
+ memcpy(MLX5_ADDR_OF(tls_cmd, cmd, src_port), flow,
+ MLX5_BYTE_OFF(tls_flow, ipv6));
+
+ MLX5_SET(tls_cmd, cmd, ipv6, MLX5_GET(tls_flow, flow, ipv6));
+ MLX5_SET(tls_cmd, cmd, direction_sx,
+ MLX5_GET(tls_flow, flow, direction_sx));
+}
+
+int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+ u64 rcd_sn)
+{
+ struct mlx5_fpga_dma_buf *buf;
+ int size = sizeof(*buf) + MLX5_TLS_COMMAND_SIZE;
+ void *flow;
+ void *cmd;
+ int ret;
+
+ buf = kzalloc(size, GFP_ATOMIC);
+ if (!buf)
+ return -ENOMEM;
+
+ cmd = (buf + 1);
+
+ rcu_read_lock();
+ flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
+ if (unlikely(!flow)) {
+ rcu_read_unlock();
+ WARN_ONCE(1, "Received NULL pointer for handle\n");
+ kfree(buf);
+ return -EINVAL;
+ }
+ mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+ rcu_read_unlock();
+
+ MLX5_SET(tls_cmd, cmd, swid, ntohl(handle));
+ MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn));
+ MLX5_SET(tls_cmd, cmd, tcp_sn, seq);
+ MLX5_SET(tls_cmd, cmd, command_type, CMD_RESYNC_RX);
+
+ buf->sg[0].data = cmd;
+ buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
+ buf->complete = mlx_tls_kfree_complete;
+
+ ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf);
+ if (ret < 0)
+ kfree(buf);
+
+ return ret;
+}
+
+static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
+ void *flow, u32 swid, gfp_t flags)
+{
+ struct mlx5_fpga_tls_command_context *ctx;
+ struct mlx5_fpga_dma_buf *buf;
+ void *cmd;
+
+ ctx = kzalloc(sizeof(*ctx) + MLX5_TLS_COMMAND_SIZE, flags);
+ if (!ctx)
+ return;
+
+ buf = &ctx->buf;
+ cmd = (ctx + 1);
+ MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM);
+ MLX5_SET(tls_cmd, cmd, swid, swid);
+
+ mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+ kfree(flow);
+
+ buf->sg[0].data = cmd;
+ buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
+
+ mlx5_fpga_tls_cmd_send(mdev->fpga, ctx,
+ mlx5_fpga_tls_teardown_completion);
+}
+
+void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+ gfp_t flags, bool direction_sx)
+{
+ struct mlx5_fpga_tls *tls = mdev->fpga->tls;
+ void *flow;
+
+ if (direction_sx)
+ flow = mlx5_fpga_tls_release_swid(&tls->tx_idr,
+ &tls->tx_idr_spinlock,
+ swid);
+ else
+ flow = mlx5_fpga_tls_release_swid(&tls->rx_idr,
+ &tls->rx_idr_spinlock,
+ swid);
+
+ if (!flow) {
+ mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n",
+ swid);
+ return;
+ }
+
+ synchronize_rcu(); /* before kfree(flow) */
+ mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags);
+}
+
+enum mlx5_fpga_setup_stream_status {
+ MLX5_FPGA_CMD_PENDING,
+ MLX5_FPGA_CMD_SEND_FAILED,
+ MLX5_FPGA_CMD_RESPONSE_RECEIVED,
+ MLX5_FPGA_CMD_ABANDONED,
+};
+
+struct mlx5_setup_stream_context {
+ struct mlx5_fpga_tls_command_context cmd;
+ atomic_t status;
+ u32 syndrome;
+ struct completion comp;
+};
+
+static void
+mlx5_fpga_tls_setup_completion(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_device *fdev,
+ struct mlx5_fpga_tls_command_context *cmd,
+ struct mlx5_fpga_dma_buf *resp)
+{
+ struct mlx5_setup_stream_context *ctx =
+ container_of(cmd, struct mlx5_setup_stream_context, cmd);
+ int status = MLX5_FPGA_CMD_SEND_FAILED;
+ void *tls_cmd = ctx + 1;
+
+ /* If we failed to send to command resp == NULL */
+ if (resp) {
+ ctx->syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
+ status = MLX5_FPGA_CMD_RESPONSE_RECEIVED;
+ }
+
+ status = atomic_xchg_release(&ctx->status, status);
+ if (likely(status != MLX5_FPGA_CMD_ABANDONED)) {
+ complete(&ctx->comp);
+ return;
+ }
+
+ mlx5_fpga_err(fdev, "Command was abandoned, syndrome = %u\n",
+ ctx->syndrome);
+
+ if (!ctx->syndrome) {
+ /* The process was killed while waiting for the context to be
+ * added, and the add completed successfully.
+ * We need to destroy the HW context, and we can't can't reuse
+ * the command context because we might not have received
+ * the tx completion yet.
+ */
+ mlx5_fpga_tls_del_flow(fdev->mdev,
+ MLX5_GET(tls_cmd, tls_cmd, swid),
+ GFP_ATOMIC,
+ MLX5_GET(tls_cmd, tls_cmd,
+ direction_sx));
+ }
+
+ mlx5_fpga_tls_put_command_ctx(cmd);
+}
+
+static int mlx5_fpga_tls_setup_stream_cmd(struct mlx5_core_dev *mdev,
+ struct mlx5_setup_stream_context *ctx)
+{
+ struct mlx5_fpga_dma_buf *buf;
+ void *cmd = ctx + 1;
+ int status, ret = 0;
+
+ buf = &ctx->cmd.buf;
+ buf->sg[0].data = cmd;
+ buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
+ MLX5_SET(tls_cmd, cmd, command_type, CMD_SETUP_STREAM);
+
+ init_completion(&ctx->comp);
+ atomic_set(&ctx->status, MLX5_FPGA_CMD_PENDING);
+ ctx->syndrome = -1;
+
+ mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd,
+ mlx5_fpga_tls_setup_completion);
+ wait_for_completion_killable(&ctx->comp);
+
+ status = atomic_xchg_acquire(&ctx->status, MLX5_FPGA_CMD_ABANDONED);
+ if (unlikely(status == MLX5_FPGA_CMD_PENDING))
+ /* ctx is going to be released in mlx5_fpga_tls_setup_completion */
+ return -EINTR;
+
+ if (unlikely(ctx->syndrome))
+ ret = -ENOMEM;
+
+ mlx5_fpga_tls_put_command_ctx(&ctx->cmd);
+ return ret;
+}
+
+static void mlx5_fpga_tls_hw_qp_recv_cb(void *cb_arg,
+ struct mlx5_fpga_dma_buf *buf)
+{
+ struct mlx5_fpga_device *fdev = (struct mlx5_fpga_device *)cb_arg;
+
+ mlx5_fpga_tls_cmd_complete(fdev, buf);
+}
+
+bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev)
+{
+ if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
+ return false;
+
+ if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) !=
+ MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX)
+ return false;
+
+ if (MLX5_CAP_FPGA(mdev, sandbox_product_id) !=
+ MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_TLS)
+ return false;
+
+ if (MLX5_CAP_FPGA(mdev, sandbox_product_version) != 0)
+ return false;
+
+ return true;
+}
+
+static int mlx5_fpga_tls_get_caps(struct mlx5_fpga_device *fdev,
+ u32 *p_caps)
+{
+ int err, cap_size = MLX5_ST_SZ_BYTES(tls_extended_cap);
+ u32 caps = 0;
+ void *buf;
+
+ buf = kzalloc(cap_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ err = mlx5_fpga_get_sbu_caps(fdev, cap_size, buf);
+ if (err)
+ goto out;
+
+ if (MLX5_GET(tls_extended_cap, buf, tx))
+ caps |= MLX5_ACCEL_TLS_TX;
+ if (MLX5_GET(tls_extended_cap, buf, rx))
+ caps |= MLX5_ACCEL_TLS_RX;
+ if (MLX5_GET(tls_extended_cap, buf, tls_v12))
+ caps |= MLX5_ACCEL_TLS_V12;
+ if (MLX5_GET(tls_extended_cap, buf, tls_v13))
+ caps |= MLX5_ACCEL_TLS_V13;
+ if (MLX5_GET(tls_extended_cap, buf, lro))
+ caps |= MLX5_ACCEL_TLS_LRO;
+ if (MLX5_GET(tls_extended_cap, buf, ipv6))
+ caps |= MLX5_ACCEL_TLS_IPV6;
+
+ if (MLX5_GET(tls_extended_cap, buf, aes_gcm_128))
+ caps |= MLX5_ACCEL_TLS_AES_GCM128;
+ if (MLX5_GET(tls_extended_cap, buf, aes_gcm_256))
+ caps |= MLX5_ACCEL_TLS_AES_GCM256;
+
+ *p_caps = caps;
+ err = 0;
+out:
+ kfree(buf);
+ return err;
+}
+
+int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+ struct mlx5_fpga_conn_attr init_attr = {0};
+ struct mlx5_fpga_conn *conn;
+ struct mlx5_fpga_tls *tls;
+ int err = 0;
+
+ if (!mlx5_fpga_is_tls_device(mdev) || !fdev)
+ return 0;
+
+ tls = kzalloc(sizeof(*tls), GFP_KERNEL);
+ if (!tls)
+ return -ENOMEM;
+
+ err = mlx5_fpga_tls_get_caps(fdev, &tls->caps);
+ if (err)
+ goto error;
+
+ if (!(tls->caps & (MLX5_ACCEL_TLS_V12 | MLX5_ACCEL_TLS_AES_GCM128))) {
+ err = -ENOTSUPP;
+ goto error;
+ }
+
+ init_attr.rx_size = SBU_QP_QUEUE_SIZE;
+ init_attr.tx_size = SBU_QP_QUEUE_SIZE;
+ init_attr.recv_cb = mlx5_fpga_tls_hw_qp_recv_cb;
+ init_attr.cb_arg = fdev;
+ conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr);
+ if (IS_ERR(conn)) {
+ err = PTR_ERR(conn);
+ mlx5_fpga_err(fdev, "Error creating TLS command connection %d\n",
+ err);
+ goto error;
+ }
+
+ tls->conn = conn;
+ spin_lock_init(&tls->pending_cmds_lock);
+ INIT_LIST_HEAD(&tls->pending_cmds);
+
+ idr_init(&tls->tx_idr);
+ idr_init(&tls->rx_idr);
+ spin_lock_init(&tls->tx_idr_spinlock);
+ spin_lock_init(&tls->rx_idr_spinlock);
+ fdev->tls = tls;
+ return 0;
+
+error:
+ kfree(tls);
+ return err;
+}
+
+void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_fpga_device *fdev = mdev->fpga;
+
+ if (!fdev || !fdev->tls)
+ return;
+
+ mlx5_fpga_sbu_conn_destroy(fdev->tls->conn);
+ kfree(fdev->tls);
+ fdev->tls = NULL;
+}
+
+static void mlx5_fpga_tls_set_aes_gcm128_ctx(void *cmd,
+ struct tls_crypto_info *info,
+ __be64 *rcd_sn)
+{
+ struct tls12_crypto_info_aes_gcm_128 *crypto_info =
+ (struct tls12_crypto_info_aes_gcm_128 *)info;
+
+ memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_rcd_sn), crypto_info->rec_seq,
+ TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
+
+ memcpy(MLX5_ADDR_OF(tls_cmd, cmd, tls_implicit_iv),
+ crypto_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+ memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key),
+ crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+
+ /* in AES-GCM 128 we need to write the key twice */
+ memcpy(MLX5_ADDR_OF(tls_cmd, cmd, encryption_key) +
+ TLS_CIPHER_AES_GCM_128_KEY_SIZE,
+ crypto_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+
+ MLX5_SET(tls_cmd, cmd, alg, MLX5_TLS_ALG_AES_GCM_128);
+}
+
+static int mlx5_fpga_tls_set_key_material(void *cmd, u32 caps,
+ struct tls_crypto_info *crypto_info)
+{
+ __be64 rcd_sn;
+
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128:
+ if (!(caps & MLX5_ACCEL_TLS_AES_GCM128))
+ return -EINVAL;
+ mlx5_fpga_tls_set_aes_gcm128_ctx(cmd, crypto_info, &rcd_sn);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int _mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info,
+ u32 swid, u32 tcp_sn)
+{
+ u32 caps = mlx5_fpga_tls_device_caps(mdev);
+ struct mlx5_setup_stream_context *ctx;
+ int ret = -ENOMEM;
+ size_t cmd_size;
+ void *cmd;
+
+ cmd_size = MLX5_TLS_COMMAND_SIZE + sizeof(*ctx);
+ ctx = kzalloc(cmd_size, GFP_KERNEL);
+ if (!ctx)
+ goto out;
+
+ cmd = ctx + 1;
+ ret = mlx5_fpga_tls_set_key_material(cmd, caps, crypto_info);
+ if (ret)
+ goto free_ctx;
+
+ mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+
+ MLX5_SET(tls_cmd, cmd, swid, swid);
+ MLX5_SET(tls_cmd, cmd, tcp_sn, tcp_sn);
+
+ return mlx5_fpga_tls_setup_stream_cmd(mdev, ctx);
+
+free_ctx:
+ kfree(ctx);
+out:
+ return ret;
+}
+
+int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn, u32 *p_swid,
+ bool direction_sx)
+{
+ struct mlx5_fpga_tls *tls = mdev->fpga->tls;
+ int ret = -ENOMEM;
+ u32 swid;
+
+ if (direction_sx)
+ ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr,
+ &tls->tx_idr_spinlock, flow);
+ else
+ ret = mlx5_fpga_tls_alloc_swid(&tls->rx_idr,
+ &tls->rx_idr_spinlock, flow);
+
+ if (ret < 0)
+ return ret;
+
+ swid = ret;
+ MLX5_SET(tls_flow, flow, direction_sx, direction_sx ? 1 : 0);
+
+ ret = _mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid,
+ start_offload_tcp_sn);
+ if (ret && ret != -EINTR)
+ goto free_swid;
+
+ *p_swid = swid;
+ return 0;
+free_swid:
+ if (direction_sx)
+ mlx5_fpga_tls_release_swid(&tls->tx_idr,
+ &tls->tx_idr_spinlock, swid);
+ else
+ mlx5_fpga_tls_release_swid(&tls->rx_idr,
+ &tls->rx_idr_spinlock, swid);
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
new file mode 100644
index 000000000..3b2e37bf7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_FPGA_TLS_H__
+#define __MLX5_FPGA_TLS_H__
+
+#include <linux/mlx5/driver.h>
+
+#include <net/tls.h>
+#include "fpga/core.h"
+
+struct mlx5_fpga_tls {
+ struct list_head pending_cmds;
+ spinlock_t pending_cmds_lock; /* Protects pending_cmds */
+ u32 caps;
+ struct mlx5_fpga_conn *conn;
+
+ struct idr tx_idr;
+ struct idr rx_idr;
+ spinlock_t tx_idr_spinlock; /* protects the IDR */
+ spinlock_t rx_idr_spinlock; /* protects the IDR */
+};
+
+int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn, u32 *p_swid,
+ bool direction_sx);
+
+void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+ gfp_t flags, bool direction_sx);
+
+bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev);
+int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_tls_cleanup(struct mlx5_core_dev *mdev);
+
+static inline u32 mlx5_fpga_tls_device_caps(struct mlx5_core_dev *mdev)
+{
+ return mdev->fpga->tls->caps;
+}
+
+int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+ u64 rcd_sn);
+
+#endif /* __MLX5_FPGA_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
new file mode 100644
index 000000000..8e01f8180
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -0,0 +1,768 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
+#include <linux/mlx5/mlx5_ifc.h>
+
+#include "fs_core.h"
+#include "fs_cmd.h"
+#include "mlx5_core.h"
+#include "eswitch.h"
+
+static int mlx5_cmd_stub_update_root_ft(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ u32 underlay_qpn,
+ bool disconnect)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_create_flow_table(struct mlx5_core_dev *dev,
+ u16 vport,
+ enum fs_flow_table_op_mod op_mod,
+ enum fs_flow_table_type type,
+ unsigned int level,
+ unsigned int log_size,
+ struct mlx5_flow_table *next_ft,
+ unsigned int *table_id, u32 flags)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_destroy_flow_table(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_modify_flow_table(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_create_flow_group(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ u32 *in,
+ unsigned int *group_id)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_destroy_flow_group(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_create_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *group,
+ struct fs_fte *fte)
+{
+ return 0;
+}
+
+static int mlx5_cmd_stub_update_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id,
+ int modify_mask,
+ struct fs_fte *fte)
+{
+ return -EOPNOTSUPP;
+}
+
+static int mlx5_cmd_stub_delete_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte)
+{
+ return 0;
+}
+
+static int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft, u32 underlay_qpn,
+ bool disconnect)
+{
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0};
+
+ if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
+ underlay_qpn == 0)
+ return 0;
+
+ MLX5_SET(set_flow_table_root_in, in, opcode,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+ MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
+
+ if (disconnect) {
+ MLX5_SET(set_flow_table_root_in, in, op_mod, 1);
+ MLX5_SET(set_flow_table_root_in, in, table_id, 0);
+ } else {
+ MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+ MLX5_SET(set_flow_table_root_in, in, table_id, ft->id);
+ }
+
+ MLX5_SET(set_flow_table_root_in, in, underlay_qpn, underlay_qpn);
+ if (ft->vport) {
+ MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport);
+ MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
+ }
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
+ u16 vport,
+ enum fs_flow_table_op_mod op_mod,
+ enum fs_flow_table_type type,
+ unsigned int level,
+ unsigned int log_size,
+ struct mlx5_flow_table *next_ft,
+ unsigned int *table_id, u32 flags)
+{
+ int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN);
+ u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0};
+ int err;
+
+ MLX5_SET(create_flow_table_in, in, opcode,
+ MLX5_CMD_OP_CREATE_FLOW_TABLE);
+
+ MLX5_SET(create_flow_table_in, in, table_type, type);
+ MLX5_SET(create_flow_table_in, in, flow_table_context.level, level);
+ MLX5_SET(create_flow_table_in, in, flow_table_context.log_size, log_size);
+ if (vport) {
+ MLX5_SET(create_flow_table_in, in, vport_number, vport);
+ MLX5_SET(create_flow_table_in, in, other_vport, 1);
+ }
+
+ MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en,
+ en_encap_decap);
+ MLX5_SET(create_flow_table_in, in, flow_table_context.encap_en,
+ en_encap_decap);
+
+ switch (op_mod) {
+ case FS_FT_OP_MOD_NORMAL:
+ if (next_ft) {
+ MLX5_SET(create_flow_table_in, in,
+ flow_table_context.table_miss_action, 1);
+ MLX5_SET(create_flow_table_in, in,
+ flow_table_context.table_miss_id, next_ft->id);
+ }
+ break;
+
+ case FS_FT_OP_MOD_LAG_DEMUX:
+ MLX5_SET(create_flow_table_in, in, op_mod, 0x1);
+ if (next_ft)
+ MLX5_SET(create_flow_table_in, in,
+ flow_table_context.lag_master_next_table_id,
+ next_ft->id);
+ break;
+ }
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *table_id = MLX5_GET(create_flow_table_out, out,
+ table_id);
+ return err;
+}
+
+static int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0};
+
+ MLX5_SET(destroy_flow_table_in, in, opcode,
+ MLX5_CMD_OP_DESTROY_FLOW_TABLE);
+ MLX5_SET(destroy_flow_table_in, in, table_type, ft->type);
+ MLX5_SET(destroy_flow_table_in, in, table_id, ft->id);
+ if (ft->vport) {
+ MLX5_SET(destroy_flow_table_in, in, vport_number, ft->vport);
+ MLX5_SET(destroy_flow_table_in, in, other_vport, 1);
+ }
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0};
+
+ MLX5_SET(modify_flow_table_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_FLOW_TABLE);
+ MLX5_SET(modify_flow_table_in, in, table_type, ft->type);
+ MLX5_SET(modify_flow_table_in, in, table_id, ft->id);
+
+ if (ft->op_mod == FS_FT_OP_MOD_LAG_DEMUX) {
+ MLX5_SET(modify_flow_table_in, in, modify_field_select,
+ MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID);
+ if (next_ft) {
+ MLX5_SET(modify_flow_table_in, in,
+ flow_table_context.lag_master_next_table_id, next_ft->id);
+ } else {
+ MLX5_SET(modify_flow_table_in, in,
+ flow_table_context.lag_master_next_table_id, 0);
+ }
+ } else {
+ if (ft->vport) {
+ MLX5_SET(modify_flow_table_in, in, vport_number,
+ ft->vport);
+ MLX5_SET(modify_flow_table_in, in, other_vport, 1);
+ }
+ MLX5_SET(modify_flow_table_in, in, modify_field_select,
+ MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
+ if (next_ft) {
+ MLX5_SET(modify_flow_table_in, in,
+ flow_table_context.table_miss_action, 1);
+ MLX5_SET(modify_flow_table_in, in,
+ flow_table_context.table_miss_id,
+ next_ft->id);
+ } else {
+ MLX5_SET(modify_flow_table_in, in,
+ flow_table_context.table_miss_action, 0);
+ }
+ }
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ u32 *in,
+ unsigned int *group_id)
+{
+ u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0};
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int err;
+
+ MLX5_SET(create_flow_group_in, in, opcode,
+ MLX5_CMD_OP_CREATE_FLOW_GROUP);
+ MLX5_SET(create_flow_group_in, in, table_type, ft->type);
+ MLX5_SET(create_flow_group_in, in, table_id, ft->id);
+ if (ft->vport) {
+ MLX5_SET(create_flow_group_in, in, vport_number, ft->vport);
+ MLX5_SET(create_flow_group_in, in, other_vport, 1);
+ }
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *group_id = MLX5_GET(create_flow_group_out, out,
+ group_id);
+ return err;
+}
+
+static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id)
+{
+ u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {0};
+
+ MLX5_SET(destroy_flow_group_in, in, opcode,
+ MLX5_CMD_OP_DESTROY_FLOW_GROUP);
+ MLX5_SET(destroy_flow_group_in, in, table_type, ft->type);
+ MLX5_SET(destroy_flow_group_in, in, table_id, ft->id);
+ MLX5_SET(destroy_flow_group_in, in, group_id, group_id);
+ if (ft->vport) {
+ MLX5_SET(destroy_flow_group_in, in, vport_number, ft->vport);
+ MLX5_SET(destroy_flow_group_in, in, other_vport, 1);
+ }
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
+ int opmod, int modify_mask,
+ struct mlx5_flow_table *ft,
+ unsigned group_id,
+ struct fs_fte *fte)
+{
+ unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) +
+ fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct);
+ u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0};
+ struct mlx5_flow_rule *dst;
+ void *in_flow_context, *vlan;
+ void *in_match_value;
+ void *in_dests;
+ u32 *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY);
+ MLX5_SET(set_fte_in, in, op_mod, opmod);
+ MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask);
+ MLX5_SET(set_fte_in, in, table_type, ft->type);
+ MLX5_SET(set_fte_in, in, table_id, ft->id);
+ MLX5_SET(set_fte_in, in, flow_index, fte->index);
+ if (ft->vport) {
+ MLX5_SET(set_fte_in, in, vport_number, ft->vport);
+ MLX5_SET(set_fte_in, in, other_vport, 1);
+ }
+
+ in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
+ MLX5_SET(flow_context, in_flow_context, group_id, group_id);
+
+ MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
+ MLX5_SET(flow_context, in_flow_context, action, fte->action.action);
+ MLX5_SET(flow_context, in_flow_context, encap_id, fte->action.encap_id);
+ MLX5_SET(flow_context, in_flow_context, modify_header_id,
+ fte->action.modify_id);
+
+ vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan);
+
+ MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype);
+ MLX5_SET(vlan, vlan, vid, fte->action.vlan[0].vid);
+ MLX5_SET(vlan, vlan, prio, fte->action.vlan[0].prio);
+
+ vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan_2);
+
+ MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[1].ethtype);
+ MLX5_SET(vlan, vlan, vid, fte->action.vlan[1].vid);
+ MLX5_SET(vlan, vlan, prio, fte->action.vlan[1].prio);
+
+ in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
+ match_value);
+ memcpy(in_match_value, &fte->val, sizeof(fte->val));
+
+ in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ int list_size = 0;
+
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ unsigned int id, type = dst->dest_attr.type;
+
+ if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+
+ switch (type) {
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+ id = dst->dest_attr.ft_num;
+ type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+ id = dst->dest_attr.ft->id;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_VPORT:
+ id = dst->dest_attr.vport.num;
+ MLX5_SET(dest_format_struct, in_dests,
+ destination_eswitch_owner_vhca_id_valid,
+ dst->dest_attr.vport.vhca_id_valid);
+ MLX5_SET(dest_format_struct, in_dests,
+ destination_eswitch_owner_vhca_id,
+ dst->dest_attr.vport.vhca_id);
+ break;
+ default:
+ id = dst->dest_attr.tir_num;
+ }
+
+ MLX5_SET(dest_format_struct, in_dests, destination_type,
+ type);
+ MLX5_SET(dest_format_struct, in_dests, destination_id, id);
+ in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+ list_size++;
+ }
+
+ MLX5_SET(flow_context, in_flow_context, destination_list_size,
+ list_size);
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev,
+ log_max_flow_counter,
+ ft->type));
+ int list_size = 0;
+
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ if (dst->dest_attr.type !=
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+
+ MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
+ dst->dest_attr.counter->id);
+ in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+ list_size++;
+ }
+ if (list_size > max_list_size) {
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ MLX5_SET(flow_context, in_flow_context, flow_counter_list_size,
+ list_size);
+ }
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+err_out:
+ kvfree(in);
+ return err;
+}
+
+static int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *group,
+ struct fs_fte *fte)
+{
+ unsigned int group_id = group->id;
+
+ return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte);
+}
+
+static int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id,
+ int modify_mask,
+ struct fs_fte *fte)
+{
+ int opmod;
+ int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev,
+ flow_table_properties_nic_receive.
+ flow_modify_en);
+ if (!atomic_mod_cap)
+ return -EOPNOTSUPP;
+ opmod = 1;
+
+ return mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte);
+}
+
+static int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte)
+{
+ u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {0};
+
+ MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
+ MLX5_SET(delete_fte_in, in, table_type, ft->type);
+ MLX5_SET(delete_fte_in, in, table_id, ft->id);
+ MLX5_SET(delete_fte_in, in, flow_index, fte->index);
+ if (ft->vport) {
+ MLX5_SET(delete_fte_in, in, vport_number, ft->vport);
+ MLX5_SET(delete_fte_in, in, other_vport, 1);
+ }
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id)
+{
+ u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0};
+ int err;
+
+ MLX5_SET(alloc_flow_counter_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
+ return err;
+}
+
+int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)] = {0};
+
+ MLX5_SET(dealloc_flow_counter_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
+ MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
+ u64 *packets, u64 *bytes)
+{
+ u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+ MLX5_ST_SZ_BYTES(traffic_counter)] = {0};
+ u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
+ void *stats;
+ int err = 0;
+
+ MLX5_SET(query_flow_counter_in, in, opcode,
+ MLX5_CMD_OP_QUERY_FLOW_COUNTER);
+ MLX5_SET(query_flow_counter_in, in, op_mod, 0);
+ MLX5_SET(query_flow_counter_in, in, flow_counter_id, id);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ stats = MLX5_ADDR_OF(query_flow_counter_out, out, flow_statistics);
+ *packets = MLX5_GET64(traffic_counter, stats, packets);
+ *bytes = MLX5_GET64(traffic_counter, stats, octets);
+ return 0;
+}
+
+struct mlx5_cmd_fc_bulk {
+ u32 id;
+ int num;
+ int outlen;
+ u32 out[0];
+};
+
+struct mlx5_cmd_fc_bulk *
+mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num)
+{
+ struct mlx5_cmd_fc_bulk *b;
+ int outlen =
+ MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+ MLX5_ST_SZ_BYTES(traffic_counter) * num;
+
+ b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL);
+ if (!b)
+ return NULL;
+
+ b->id = id;
+ b->num = num;
+ b->outlen = outlen;
+
+ return b;
+}
+
+void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b)
+{
+ kfree(b);
+}
+
+int
+mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b)
+{
+ u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
+
+ MLX5_SET(query_flow_counter_in, in, opcode,
+ MLX5_CMD_OP_QUERY_FLOW_COUNTER);
+ MLX5_SET(query_flow_counter_in, in, op_mod, 0);
+ MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id);
+ MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num);
+ return mlx5_cmd_exec(dev, in, sizeof(in), b->out, b->outlen);
+}
+
+void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_fc_bulk *b, u32 id,
+ u64 *packets, u64 *bytes)
+{
+ int index = id - b->id;
+ void *stats;
+
+ if (index < 0 || index >= b->num) {
+ mlx5_core_warn(dev, "Flow counter id (0x%x) out of range (0x%x..0x%x). Counter ignored.\n",
+ id, b->id, b->id + b->num - 1);
+ return;
+ }
+
+ stats = MLX5_ADDR_OF(query_flow_counter_out, b->out,
+ flow_statistics[index]);
+ *packets = MLX5_GET64(traffic_counter, stats, packets);
+ *bytes = MLX5_GET64(traffic_counter, stats, octets);
+}
+
+int mlx5_encap_alloc(struct mlx5_core_dev *dev,
+ int header_type,
+ size_t size,
+ void *encap_header,
+ u32 *encap_id)
+{
+ int max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size);
+ u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)];
+ void *encap_header_in;
+ void *header;
+ int inlen;
+ int err;
+ u32 *in;
+
+ if (size > max_encap_size) {
+ mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n",
+ size, max_encap_size);
+ return -EINVAL;
+ }
+
+ in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + size,
+ GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, encap_header);
+ header = MLX5_ADDR_OF(encap_header_in, encap_header_in, encap_header);
+ inlen = header - (void *)in + size;
+
+ memset(in, 0, inlen);
+ MLX5_SET(alloc_encap_header_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_ENCAP_HEADER);
+ MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size);
+ MLX5_SET(encap_header_in, encap_header_in, header_type, header_type);
+ memcpy(header, encap_header, size);
+
+ memset(out, 0, sizeof(out));
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+
+ *encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id);
+ kfree(in);
+ return err;
+}
+
+void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)];
+ u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)];
+
+ memset(in, 0, sizeof(in));
+ MLX5_SET(dealloc_encap_header_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
+ MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id);
+
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
+ u8 namespace, u8 num_actions,
+ void *modify_actions, u32 *modify_header_id)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)];
+ int max_actions, actions_size, inlen, err;
+ void *actions_in;
+ u8 table_type;
+ u32 *in;
+
+ switch (namespace) {
+ case MLX5_FLOW_NAMESPACE_FDB:
+ max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, max_modify_header_actions);
+ table_type = FS_FT_FDB;
+ break;
+ case MLX5_FLOW_NAMESPACE_KERNEL:
+ max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions);
+ table_type = FS_FT_NIC_RX;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (num_actions > max_actions) {
+ mlx5_core_warn(dev, "too many modify header actions %d, max supported %d\n",
+ num_actions, max_actions);
+ return -EOPNOTSUPP;
+ }
+
+ actions_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) * num_actions;
+ inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) + actions_size;
+
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(alloc_modify_header_context_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type);
+ MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_actions);
+
+ actions_in = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions);
+ memcpy(actions_in, modify_actions, actions_size);
+
+ memset(out, 0, sizeof(out));
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+
+ *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id);
+ kfree(in);
+ return err;
+}
+
+void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)];
+ u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)];
+
+ memset(in, 0, sizeof(in));
+ MLX5_SET(dealloc_modify_header_context_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id,
+ modify_header_id);
+
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static const struct mlx5_flow_cmds mlx5_flow_cmds = {
+ .create_flow_table = mlx5_cmd_create_flow_table,
+ .destroy_flow_table = mlx5_cmd_destroy_flow_table,
+ .modify_flow_table = mlx5_cmd_modify_flow_table,
+ .create_flow_group = mlx5_cmd_create_flow_group,
+ .destroy_flow_group = mlx5_cmd_destroy_flow_group,
+ .create_fte = mlx5_cmd_create_fte,
+ .update_fte = mlx5_cmd_update_fte,
+ .delete_fte = mlx5_cmd_delete_fte,
+ .update_root_ft = mlx5_cmd_update_root_ft,
+};
+
+static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
+ .create_flow_table = mlx5_cmd_stub_create_flow_table,
+ .destroy_flow_table = mlx5_cmd_stub_destroy_flow_table,
+ .modify_flow_table = mlx5_cmd_stub_modify_flow_table,
+ .create_flow_group = mlx5_cmd_stub_create_flow_group,
+ .destroy_flow_group = mlx5_cmd_stub_destroy_flow_group,
+ .create_fte = mlx5_cmd_stub_create_fte,
+ .update_fte = mlx5_cmd_stub_update_fte,
+ .delete_fte = mlx5_cmd_stub_delete_fte,
+ .update_root_ft = mlx5_cmd_stub_update_root_ft,
+};
+
+static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void)
+{
+ return &mlx5_flow_cmds;
+}
+
+static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_stub_cmds(void)
+{
+ return &mlx5_flow_cmd_stubs;
+}
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type)
+{
+ switch (type) {
+ case FS_FT_NIC_RX:
+ case FS_FT_ESW_EGRESS_ACL:
+ case FS_FT_ESW_INGRESS_ACL:
+ case FS_FT_FDB:
+ case FS_FT_SNIFFER_RX:
+ case FS_FT_SNIFFER_TX:
+ return mlx5_fs_cmd_get_fw_cmds();
+ case FS_FT_NIC_TX:
+ default:
+ return mlx5_fs_cmd_get_stub_cmds();
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
new file mode 100644
index 000000000..6228ba7bf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MLX5_FS_CMD_
+#define _MLX5_FS_CMD_
+
+#include "fs_core.h"
+
+struct mlx5_flow_cmds {
+ int (*create_flow_table)(struct mlx5_core_dev *dev,
+ u16 vport,
+ enum fs_flow_table_op_mod op_mod,
+ enum fs_flow_table_type type,
+ unsigned int level, unsigned int log_size,
+ struct mlx5_flow_table *next_ft,
+ unsigned int *table_id, u32 flags);
+ int (*destroy_flow_table)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft);
+
+ int (*modify_flow_table)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft);
+
+ int (*create_flow_group)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ u32 *in,
+ unsigned int *group_id);
+
+ int (*destroy_flow_group)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id);
+
+ int (*create_fte)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg,
+ struct fs_fte *fte);
+
+ int (*update_fte)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ unsigned int group_id,
+ int modify_mask,
+ struct fs_fte *fte);
+
+ int (*delete_fte)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte);
+
+ int (*update_root_ft)(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ u32 underlay_qpn,
+ bool disconnect);
+};
+
+int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
+int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id);
+int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
+ u64 *packets, u64 *bytes);
+
+struct mlx5_cmd_fc_bulk;
+
+struct mlx5_cmd_fc_bulk *
+mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num);
+void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b);
+int
+mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b);
+void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
+ struct mlx5_cmd_fc_bulk *b, u32 id,
+ u64 *packets, u64 *bytes);
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
new file mode 100644
index 000000000..f0aa7f0e5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -0,0 +1,2720 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mutex.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
+
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "fs_cmd.h"
+#include "diag/fs_tracepoint.h"
+#include "accel/ipsec.h"
+#include "fpga/ipsec.h"
+
+#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
+ sizeof(struct init_tree_node))
+
+#define ADD_PRIO(num_prios_val, min_level_val, num_levels_val, caps_val,\
+ ...) {.type = FS_TYPE_PRIO,\
+ .min_ft_level = min_level_val,\
+ .num_levels = num_levels_val,\
+ .num_leaf_prios = num_prios_val,\
+ .caps = caps_val,\
+ .children = (struct init_tree_node[]) {__VA_ARGS__},\
+ .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
+}
+
+#define ADD_MULTIPLE_PRIO(num_prios_val, num_levels_val, ...)\
+ ADD_PRIO(num_prios_val, 0, num_levels_val, {},\
+ __VA_ARGS__)\
+
+#define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\
+ .children = (struct init_tree_node[]) {__VA_ARGS__},\
+ .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \
+}
+
+#define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\
+ sizeof(long))
+
+#define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap))
+
+#define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \
+ .caps = (long[]) {__VA_ARGS__} }
+
+#define FS_CHAINING_CAPS FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), \
+ FS_CAP(flow_table_properties_nic_receive.modify_root), \
+ FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \
+ FS_CAP(flow_table_properties_nic_receive.flow_table_modify))
+
+#define LEFTOVERS_NUM_LEVELS 1
+#define LEFTOVERS_NUM_PRIOS 1
+
+#define BY_PASS_PRIO_NUM_LEVELS 1
+#define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
+ LEFTOVERS_NUM_PRIOS)
+
+#define ETHTOOL_PRIO_NUM_LEVELS 1
+#define ETHTOOL_NUM_PRIOS 11
+#define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
+/* Vlan, mac, ttc, inner ttc, aRFS */
+#define KERNEL_NIC_PRIO_NUM_LEVELS 5
+#define KERNEL_NIC_NUM_PRIOS 1
+/* One more level for tc */
+#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1)
+
+#define KERNEL_NIC_TC_NUM_PRIOS 1
+#define KERNEL_NIC_TC_NUM_LEVELS 2
+
+#define ANCHOR_NUM_LEVELS 1
+#define ANCHOR_NUM_PRIOS 1
+#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
+
+#define OFFLOADS_MAX_FT 1
+#define OFFLOADS_NUM_PRIOS 1
+#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1)
+
+#define LAG_PRIO_NUM_LEVELS 1
+#define LAG_NUM_PRIOS 1
+#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1)
+
+struct node_caps {
+ size_t arr_sz;
+ long *caps;
+};
+
+static struct init_tree_node {
+ enum fs_node_type type;
+ struct init_tree_node *children;
+ int ar_size;
+ struct node_caps caps;
+ int min_ft_level;
+ int num_leaf_prios;
+ int prio;
+ int num_levels;
+} root_fs = {
+ .type = FS_TYPE_NAMESPACE,
+ .ar_size = 7,
+ .children = (struct init_tree_node[]) {
+ ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS,
+ ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
+ BY_PASS_PRIO_NUM_LEVELS))),
+ ADD_PRIO(0, LAG_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS,
+ ADD_NS(ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS,
+ LAG_PRIO_NUM_LEVELS))),
+ ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {},
+ ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))),
+ ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS,
+ ADD_NS(ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS,
+ ETHTOOL_PRIO_NUM_LEVELS))),
+ ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {},
+ ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS, KERNEL_NIC_TC_NUM_LEVELS),
+ ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS,
+ KERNEL_NIC_PRIO_NUM_LEVELS))),
+ ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
+ FS_CHAINING_CAPS,
+ ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_NUM_LEVELS))),
+ ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {},
+ ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_NUM_LEVELS))),
+ }
+};
+
+enum fs_i_lock_class {
+ FS_LOCK_GRANDPARENT,
+ FS_LOCK_PARENT,
+ FS_LOCK_CHILD
+};
+
+static const struct rhashtable_params rhash_fte = {
+ .key_len = FIELD_SIZEOF(struct fs_fte, val),
+ .key_offset = offsetof(struct fs_fte, val),
+ .head_offset = offsetof(struct fs_fte, hash),
+ .automatic_shrinking = true,
+ .min_size = 1,
+};
+
+static const struct rhashtable_params rhash_fg = {
+ .key_len = FIELD_SIZEOF(struct mlx5_flow_group, mask),
+ .key_offset = offsetof(struct mlx5_flow_group, mask),
+ .head_offset = offsetof(struct mlx5_flow_group, hash),
+ .automatic_shrinking = true,
+ .min_size = 1,
+
+};
+
+static void del_hw_flow_table(struct fs_node *node);
+static void del_hw_flow_group(struct fs_node *node);
+static void del_hw_fte(struct fs_node *node);
+static void del_sw_flow_table(struct fs_node *node);
+static void del_sw_flow_group(struct fs_node *node);
+static void del_sw_fte(struct fs_node *node);
+static void del_sw_prio(struct fs_node *node);
+static void del_sw_ns(struct fs_node *node);
+/* Delete rule (destination) is special case that
+ * requires to lock the FTE for all the deletion process.
+ */
+static void del_sw_hw_rule(struct fs_node *node);
+static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
+ struct mlx5_flow_destination *d2);
+static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns);
+static struct mlx5_flow_rule *
+find_flow_rule(struct fs_fte *fte,
+ struct mlx5_flow_destination *dest);
+
+static void tree_init_node(struct fs_node *node,
+ void (*del_hw_func)(struct fs_node *),
+ void (*del_sw_func)(struct fs_node *))
+{
+ refcount_set(&node->refcount, 1);
+ INIT_LIST_HEAD(&node->list);
+ INIT_LIST_HEAD(&node->children);
+ init_rwsem(&node->lock);
+ node->del_hw_func = del_hw_func;
+ node->del_sw_func = del_sw_func;
+ node->active = false;
+}
+
+static void tree_add_node(struct fs_node *node, struct fs_node *parent)
+{
+ if (parent)
+ refcount_inc(&parent->refcount);
+ node->parent = parent;
+
+ /* Parent is the root */
+ if (!parent)
+ node->root = node;
+ else
+ node->root = parent->root;
+}
+
+static int tree_get_node(struct fs_node *node)
+{
+ return refcount_inc_not_zero(&node->refcount);
+}
+
+static void nested_down_read_ref_node(struct fs_node *node,
+ enum fs_i_lock_class class)
+{
+ if (node) {
+ down_read_nested(&node->lock, class);
+ refcount_inc(&node->refcount);
+ }
+}
+
+static void nested_down_write_ref_node(struct fs_node *node,
+ enum fs_i_lock_class class)
+{
+ if (node) {
+ down_write_nested(&node->lock, class);
+ refcount_inc(&node->refcount);
+ }
+}
+
+static void down_write_ref_node(struct fs_node *node)
+{
+ if (node) {
+ down_write(&node->lock);
+ refcount_inc(&node->refcount);
+ }
+}
+
+static void up_read_ref_node(struct fs_node *node)
+{
+ refcount_dec(&node->refcount);
+ up_read(&node->lock);
+}
+
+static void up_write_ref_node(struct fs_node *node)
+{
+ refcount_dec(&node->refcount);
+ up_write(&node->lock);
+}
+
+static void tree_put_node(struct fs_node *node)
+{
+ struct fs_node *parent_node = node->parent;
+
+ if (refcount_dec_and_test(&node->refcount)) {
+ if (node->del_hw_func)
+ node->del_hw_func(node);
+ if (parent_node) {
+ /* Only root namespace doesn't have parent and we just
+ * need to free its node.
+ */
+ down_write_ref_node(parent_node);
+ list_del_init(&node->list);
+ if (node->del_sw_func)
+ node->del_sw_func(node);
+ up_write_ref_node(parent_node);
+ } else {
+ kfree(node);
+ }
+ node = NULL;
+ }
+ if (!node && parent_node)
+ tree_put_node(parent_node);
+}
+
+static int tree_remove_node(struct fs_node *node)
+{
+ if (refcount_read(&node->refcount) > 1) {
+ refcount_dec(&node->refcount);
+ return -EEXIST;
+ }
+ tree_put_node(node);
+ return 0;
+}
+
+static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
+ unsigned int prio)
+{
+ struct fs_prio *iter_prio;
+
+ fs_for_each_prio(iter_prio, ns) {
+ if (iter_prio->prio == prio)
+ return iter_prio;
+ }
+
+ return NULL;
+}
+
+static bool check_valid_spec(const struct mlx5_flow_spec *spec)
+{
+ int i;
+
+ for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++)
+ if (spec->match_value[i] & ~spec->match_criteria[i]) {
+ pr_warn("mlx5_core: match_value differs from match_criteria\n");
+ return false;
+ }
+
+ return true;
+}
+
+static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
+{
+ struct fs_node *root;
+ struct mlx5_flow_namespace *ns;
+
+ root = node->root;
+
+ if (WARN_ON(root->type != FS_TYPE_NAMESPACE)) {
+ pr_warn("mlx5: flow steering node is not in tree or garbaged\n");
+ return NULL;
+ }
+
+ ns = container_of(root, struct mlx5_flow_namespace, node);
+ return container_of(ns, struct mlx5_flow_root_namespace, ns);
+}
+
+static inline struct mlx5_flow_steering *get_steering(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root = find_root(node);
+
+ if (root)
+ return root->dev->priv.steering;
+ return NULL;
+}
+
+static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root = find_root(node);
+
+ if (root)
+ return root->dev;
+ return NULL;
+}
+
+static void del_sw_ns(struct fs_node *node)
+{
+ kfree(node);
+}
+
+static void del_sw_prio(struct fs_node *node)
+{
+ kfree(node);
+}
+
+static void del_hw_flow_table(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_table *ft;
+ struct mlx5_core_dev *dev;
+ int err;
+
+ fs_get_obj(ft, node);
+ dev = get_dev(&ft->node);
+ root = find_root(&ft->node);
+
+ if (node->active) {
+ err = root->cmds->destroy_flow_table(dev, ft);
+ if (err)
+ mlx5_core_warn(dev, "flow steering can't destroy ft\n");
+ }
+}
+
+static void del_sw_flow_table(struct fs_node *node)
+{
+ struct mlx5_flow_table *ft;
+ struct fs_prio *prio;
+
+ fs_get_obj(ft, node);
+
+ rhltable_destroy(&ft->fgs_hash);
+ fs_get_obj(prio, ft->node.parent);
+ prio->num_ft--;
+ kfree(ft);
+}
+
+static void del_sw_hw_rule(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_rule *rule;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ struct fs_fte *fte;
+ int modify_mask;
+ struct mlx5_core_dev *dev = get_dev(node);
+ int err;
+ bool update_fte = false;
+
+ fs_get_obj(rule, node);
+ fs_get_obj(fte, rule->node.parent);
+ fs_get_obj(fg, fte->node.parent);
+ fs_get_obj(ft, fg->node.parent);
+ trace_mlx5_fs_del_rule(rule);
+ if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
+ mutex_lock(&rule->dest_attr.ft->lock);
+ list_del(&rule->next_ft);
+ mutex_unlock(&rule->dest_attr.ft->lock);
+ }
+
+ if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER &&
+ --fte->dests_size) {
+ modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
+ BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
+ fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ update_fte = true;
+ goto out;
+ }
+
+ if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+ --fte->dests_size) {
+ modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
+ update_fte = true;
+ }
+out:
+ root = find_root(&ft->node);
+ if (update_fte && fte->dests_size) {
+ err = root->cmds->update_fte(dev, ft, fg->id, modify_mask, fte);
+ if (err)
+ mlx5_core_warn(dev,
+ "%s can't del rule fg id=%d fte_index=%d\n",
+ __func__, fg->id, fte->index);
+ }
+ kfree(rule);
+}
+
+static void del_hw_fte(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ struct mlx5_core_dev *dev;
+ struct fs_fte *fte;
+ int err;
+
+ fs_get_obj(fte, node);
+ fs_get_obj(fg, fte->node.parent);
+ fs_get_obj(ft, fg->node.parent);
+
+ trace_mlx5_fs_del_fte(fte);
+ dev = get_dev(&ft->node);
+ root = find_root(&ft->node);
+ if (node->active) {
+ err = root->cmds->delete_fte(dev, ft, fte);
+ if (err)
+ mlx5_core_warn(dev,
+ "flow steering can't delete fte in index %d of flow group id %d\n",
+ fte->index, fg->id);
+ node->active = 0;
+ }
+}
+
+static void del_sw_fte(struct fs_node *node)
+{
+ struct mlx5_flow_steering *steering = get_steering(node);
+ struct mlx5_flow_group *fg;
+ struct fs_fte *fte;
+ int err;
+
+ fs_get_obj(fte, node);
+ fs_get_obj(fg, fte->node.parent);
+
+ err = rhashtable_remove_fast(&fg->ftes_hash,
+ &fte->hash,
+ rhash_fte);
+ WARN_ON(err);
+ ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index);
+ kmem_cache_free(steering->ftes_cache, fte);
+}
+
+static void del_hw_flow_group(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_group *fg;
+ struct mlx5_flow_table *ft;
+ struct mlx5_core_dev *dev;
+
+ fs_get_obj(fg, node);
+ fs_get_obj(ft, fg->node.parent);
+ dev = get_dev(&ft->node);
+ trace_mlx5_fs_del_fg(fg);
+
+ root = find_root(&ft->node);
+ if (fg->node.active && root->cmds->destroy_flow_group(dev, ft, fg->id))
+ mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
+ fg->id, ft->id);
+}
+
+static void del_sw_flow_group(struct fs_node *node)
+{
+ struct mlx5_flow_steering *steering = get_steering(node);
+ struct mlx5_flow_group *fg;
+ struct mlx5_flow_table *ft;
+ int err;
+
+ fs_get_obj(fg, node);
+ fs_get_obj(ft, fg->node.parent);
+
+ rhashtable_destroy(&fg->ftes_hash);
+ ida_destroy(&fg->fte_allocator);
+ if (ft->autogroup.active && fg->max_ftes == ft->autogroup.group_size)
+ ft->autogroup.num_groups--;
+ err = rhltable_remove(&ft->fgs_hash,
+ &fg->hash,
+ rhash_fg);
+ WARN_ON(err);
+ kmem_cache_free(steering->fgs_cache, fg);
+}
+
+static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte)
+{
+ int index;
+ int ret;
+
+ index = ida_simple_get(&fg->fte_allocator, 0, fg->max_ftes, GFP_KERNEL);
+ if (index < 0)
+ return index;
+
+ fte->index = index + fg->start_index;
+ ret = rhashtable_insert_fast(&fg->ftes_hash,
+ &fte->hash,
+ rhash_fte);
+ if (ret)
+ goto err_ida_remove;
+
+ tree_add_node(&fte->node, &fg->node);
+ list_add_tail(&fte->node.list, &fg->node.children);
+ return 0;
+
+err_ida_remove:
+ ida_simple_remove(&fg->fte_allocator, index);
+ return ret;
+}
+
+static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
+ u32 *match_value,
+ struct mlx5_flow_act *flow_act)
+{
+ struct mlx5_flow_steering *steering = get_steering(&ft->node);
+ struct fs_fte *fte;
+
+ fte = kmem_cache_zalloc(steering->ftes_cache, GFP_KERNEL);
+ if (!fte)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(fte->val, match_value, sizeof(fte->val));
+ fte->node.type = FS_TYPE_FLOW_ENTRY;
+ fte->action = *flow_act;
+
+ tree_init_node(&fte->node, del_hw_fte, del_sw_fte);
+
+ return fte;
+}
+
+static void dealloc_flow_group(struct mlx5_flow_steering *steering,
+ struct mlx5_flow_group *fg)
+{
+ rhashtable_destroy(&fg->ftes_hash);
+ kmem_cache_free(steering->fgs_cache, fg);
+}
+
+static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering,
+ u8 match_criteria_enable,
+ void *match_criteria,
+ int start_index,
+ int end_index)
+{
+ struct mlx5_flow_group *fg;
+ int ret;
+
+ fg = kmem_cache_zalloc(steering->fgs_cache, GFP_KERNEL);
+ if (!fg)
+ return ERR_PTR(-ENOMEM);
+
+ ret = rhashtable_init(&fg->ftes_hash, &rhash_fte);
+ if (ret) {
+ kmem_cache_free(steering->fgs_cache, fg);
+ return ERR_PTR(ret);
+}
+ ida_init(&fg->fte_allocator);
+ fg->mask.match_criteria_enable = match_criteria_enable;
+ memcpy(&fg->mask.match_criteria, match_criteria,
+ sizeof(fg->mask.match_criteria));
+ fg->node.type = FS_TYPE_FLOW_GROUP;
+ fg->start_index = start_index;
+ fg->max_ftes = end_index - start_index + 1;
+
+ return fg;
+}
+
+static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft,
+ u8 match_criteria_enable,
+ void *match_criteria,
+ int start_index,
+ int end_index,
+ struct list_head *prev)
+{
+ struct mlx5_flow_steering *steering = get_steering(&ft->node);
+ struct mlx5_flow_group *fg;
+ int ret;
+
+ fg = alloc_flow_group(steering, match_criteria_enable, match_criteria,
+ start_index, end_index);
+ if (IS_ERR(fg))
+ return fg;
+
+ /* initialize refcnt, add to parent list */
+ ret = rhltable_insert(&ft->fgs_hash,
+ &fg->hash,
+ rhash_fg);
+ if (ret) {
+ dealloc_flow_group(steering, fg);
+ return ERR_PTR(ret);
+ }
+
+ tree_init_node(&fg->node, del_hw_flow_group, del_sw_flow_group);
+ tree_add_node(&fg->node, &ft->node);
+ /* Add node to group list */
+ list_add(&fg->node.list, prev);
+ atomic_inc(&ft->node.version);
+
+ return fg;
+}
+
+static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte,
+ enum fs_flow_table_type table_type,
+ enum fs_flow_table_op_mod op_mod,
+ u32 flags)
+{
+ struct mlx5_flow_table *ft;
+ int ret;
+
+ ft = kzalloc(sizeof(*ft), GFP_KERNEL);
+ if (!ft)
+ return ERR_PTR(-ENOMEM);
+
+ ret = rhltable_init(&ft->fgs_hash, &rhash_fg);
+ if (ret) {
+ kfree(ft);
+ return ERR_PTR(ret);
+ }
+
+ ft->level = level;
+ ft->node.type = FS_TYPE_FLOW_TABLE;
+ ft->op_mod = op_mod;
+ ft->type = table_type;
+ ft->vport = vport;
+ ft->max_fte = max_fte;
+ ft->flags = flags;
+ INIT_LIST_HEAD(&ft->fwd_rules);
+ mutex_init(&ft->lock);
+
+ return ft;
+}
+
+/* If reverse is false, then we search for the first flow table in the
+ * root sub-tree from start(closest from right), else we search for the
+ * last flow table in the root sub-tree till start(closest from left).
+ */
+static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root,
+ struct list_head *start,
+ bool reverse)
+{
+#define list_advance_entry(pos, reverse) \
+ ((reverse) ? list_prev_entry(pos, list) : list_next_entry(pos, list))
+
+#define list_for_each_advance_continue(pos, head, reverse) \
+ for (pos = list_advance_entry(pos, reverse); \
+ &pos->list != (head); \
+ pos = list_advance_entry(pos, reverse))
+
+ struct fs_node *iter = list_entry(start, struct fs_node, list);
+ struct mlx5_flow_table *ft = NULL;
+
+ if (!root)
+ return NULL;
+
+ list_for_each_advance_continue(iter, &root->children, reverse) {
+ if (iter->type == FS_TYPE_FLOW_TABLE) {
+ fs_get_obj(ft, iter);
+ return ft;
+ }
+ ft = find_closest_ft_recursive(iter, &iter->children, reverse);
+ if (ft)
+ return ft;
+ }
+
+ return ft;
+}
+
+/* If reverse if false then return the first flow table in next priority of
+ * prio in the tree, else return the last flow table in the previous priority
+ * of prio in the tree.
+ */
+static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse)
+{
+ struct mlx5_flow_table *ft = NULL;
+ struct fs_node *curr_node;
+ struct fs_node *parent;
+
+ parent = prio->node.parent;
+ curr_node = &prio->node;
+ while (!ft && parent) {
+ ft = find_closest_ft_recursive(parent, &curr_node->list, reverse);
+ curr_node = parent;
+ parent = curr_node->parent;
+ }
+ return ft;
+}
+
+/* Assuming all the tree is locked by mutex chain lock */
+static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio)
+{
+ return find_closest_ft(prio, false);
+}
+
+/* Assuming all the tree is locked by mutex chain lock */
+static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
+{
+ return find_closest_ft(prio, true);
+}
+
+static int connect_fts_in_prio(struct mlx5_core_dev *dev,
+ struct fs_prio *prio,
+ struct mlx5_flow_table *ft)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&prio->node);
+ struct mlx5_flow_table *iter;
+ int i = 0;
+ int err;
+
+ fs_for_each_ft(iter, prio) {
+ i++;
+ err = root->cmds->modify_flow_table(dev, iter, ft);
+ if (err) {
+ mlx5_core_warn(dev, "Failed to modify flow table %d\n",
+ iter->id);
+ /* The driver is out of sync with the FW */
+ if (i > 1)
+ WARN_ON(true);
+ return err;
+ }
+ }
+ return 0;
+}
+
+/* Connect flow tables from previous priority of prio to ft */
+static int connect_prev_fts(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct fs_prio *prio)
+{
+ struct mlx5_flow_table *prev_ft;
+
+ prev_ft = find_prev_chained_ft(prio);
+ if (prev_ft) {
+ struct fs_prio *prev_prio;
+
+ fs_get_obj(prev_prio, prev_ft->node.parent);
+ return connect_fts_in_prio(dev, prev_prio, ft);
+ }
+ return 0;
+}
+
+static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
+ *prio)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&prio->node);
+ struct mlx5_ft_underlay_qp *uqp;
+ int min_level = INT_MAX;
+ int err;
+ u32 qpn;
+
+ if (root->root_ft)
+ min_level = root->root_ft->level;
+
+ if (ft->level >= min_level)
+ return 0;
+
+ if (list_empty(&root->underlay_qpns)) {
+ /* Don't set any QPN (zero) in case QPN list is empty */
+ qpn = 0;
+ err = root->cmds->update_root_ft(root->dev, ft, qpn, false);
+ } else {
+ list_for_each_entry(uqp, &root->underlay_qpns, list) {
+ qpn = uqp->qpn;
+ err = root->cmds->update_root_ft(root->dev, ft,
+ qpn, false);
+ if (err)
+ break;
+ }
+ }
+
+ if (err)
+ mlx5_core_warn(root->dev,
+ "Update root flow table of id(%u) qpn(%d) failed\n",
+ ft->id, qpn);
+ else
+ root->root_ft = ft;
+
+ return err;
+}
+
+static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
+ struct mlx5_flow_destination *dest)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ struct fs_fte *fte;
+ int modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
+ int err = 0;
+
+ fs_get_obj(fte, rule->node.parent);
+ if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+ return -EINVAL;
+ down_write_ref_node(&fte->node);
+ fs_get_obj(fg, fte->node.parent);
+ fs_get_obj(ft, fg->node.parent);
+
+ memcpy(&rule->dest_attr, dest, sizeof(*dest));
+ root = find_root(&ft->node);
+ err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id,
+ modify_mask, fte);
+ up_write_ref_node(&fte->node);
+
+ return err;
+}
+
+int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle,
+ struct mlx5_flow_destination *new_dest,
+ struct mlx5_flow_destination *old_dest)
+{
+ int i;
+
+ if (!old_dest) {
+ if (handle->num_rules != 1)
+ return -EINVAL;
+ return _mlx5_modify_rule_destination(handle->rule[0],
+ new_dest);
+ }
+
+ for (i = 0; i < handle->num_rules; i++) {
+ if (mlx5_flow_dests_cmp(new_dest, &handle->rule[i]->dest_attr))
+ return _mlx5_modify_rule_destination(handle->rule[i],
+ new_dest);
+ }
+
+ return -EINVAL;
+}
+
+/* Modify/set FWD rules that point on old_next_ft to point on new_next_ft */
+static int connect_fwd_rules(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *new_next_ft,
+ struct mlx5_flow_table *old_next_ft)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_rule *iter;
+ int err = 0;
+
+ /* new_next_ft and old_next_ft could be NULL only
+ * when we create/destroy the anchor flow table.
+ */
+ if (!new_next_ft || !old_next_ft)
+ return 0;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = new_next_ft;
+
+ mutex_lock(&old_next_ft->lock);
+ list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules);
+ mutex_unlock(&old_next_ft->lock);
+ list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) {
+ err = _mlx5_modify_rule_destination(iter, &dest);
+ if (err)
+ pr_err("mlx5_core: failed to modify rule to point on flow table %d\n",
+ new_next_ft->id);
+ }
+ return 0;
+}
+
+static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
+ struct fs_prio *prio)
+{
+ struct mlx5_flow_table *next_ft, *first_ft;
+ int err = 0;
+
+ /* Connect_prev_fts and update_root_ft_create are mutually exclusive */
+
+ first_ft = list_first_entry_or_null(&prio->node.children,
+ struct mlx5_flow_table, node.list);
+ if (!first_ft || first_ft->level > ft->level) {
+ err = connect_prev_fts(dev, ft, prio);
+ if (err)
+ return err;
+
+ next_ft = first_ft ? first_ft : find_next_chained_ft(prio);
+ err = connect_fwd_rules(dev, ft, next_ft);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_FLOWTABLE(dev,
+ flow_table_properties_nic_receive.modify_root))
+ err = update_root_ft_create(ft, prio);
+ return err;
+}
+
+static void list_add_flow_table(struct mlx5_flow_table *ft,
+ struct fs_prio *prio)
+{
+ struct list_head *prev = &prio->node.children;
+ struct mlx5_flow_table *iter;
+
+ fs_for_each_ft(iter, prio) {
+ if (iter->level > ft->level)
+ break;
+ prev = &iter->node.list;
+ }
+ list_add(&ft->node.list, prev);
+}
+
+static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
+ struct mlx5_flow_table_attr *ft_attr,
+ enum fs_flow_table_op_mod op_mod,
+ u16 vport)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ns->node);
+ struct mlx5_flow_table *next_ft = NULL;
+ struct fs_prio *fs_prio = NULL;
+ struct mlx5_flow_table *ft;
+ int log_table_sz;
+ int err;
+
+ if (!root) {
+ pr_err("mlx5: flow steering failed to find root of namespace\n");
+ return ERR_PTR(-ENODEV);
+ }
+
+ mutex_lock(&root->chain_lock);
+ fs_prio = find_prio(ns, ft_attr->prio);
+ if (!fs_prio) {
+ err = -EINVAL;
+ goto unlock_root;
+ }
+ if (ft_attr->level >= fs_prio->num_levels) {
+ err = -ENOSPC;
+ goto unlock_root;
+ }
+ /* The level is related to the
+ * priority level range.
+ */
+ ft_attr->level += fs_prio->start_level;
+ ft = alloc_flow_table(ft_attr->level,
+ vport,
+ ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0,
+ root->table_type,
+ op_mod, ft_attr->flags);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ goto unlock_root;
+ }
+
+ tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
+ log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
+ next_ft = find_next_chained_ft(fs_prio);
+ err = root->cmds->create_flow_table(root->dev, ft->vport, ft->op_mod,
+ ft->type, ft->level, log_table_sz,
+ next_ft, &ft->id, ft->flags);
+ if (err)
+ goto free_ft;
+
+ err = connect_flow_table(root->dev, ft, fs_prio);
+ if (err)
+ goto destroy_ft;
+ ft->node.active = true;
+ down_write_ref_node(&fs_prio->node);
+ tree_add_node(&ft->node, &fs_prio->node);
+ list_add_flow_table(ft, fs_prio);
+ fs_prio->num_ft++;
+ up_write_ref_node(&fs_prio->node);
+ mutex_unlock(&root->chain_lock);
+ return ft;
+destroy_ft:
+ root->cmds->destroy_flow_table(root->dev, ft);
+free_ft:
+ rhltable_destroy(&ft->fgs_hash);
+ kfree(ft);
+unlock_root:
+ mutex_unlock(&root->chain_lock);
+ return ERR_PTR(err);
+}
+
+struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
+ struct mlx5_flow_table_attr *ft_attr)
+{
+ return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, 0);
+}
+
+struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
+ int prio, int max_fte,
+ u32 level, u16 vport)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+
+ ft_attr.max_fte = max_fte;
+ ft_attr.level = level;
+ ft_attr.prio = prio;
+
+ return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, vport);
+}
+
+struct mlx5_flow_table*
+mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns,
+ int prio, u32 level)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+
+ ft_attr.level = level;
+ ft_attr.prio = prio;
+ return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0);
+}
+EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
+
+struct mlx5_flow_table*
+mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
+ int prio,
+ int num_flow_table_entries,
+ int max_num_groups,
+ u32 level,
+ u32 flags)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_table *ft;
+
+ if (max_num_groups > num_flow_table_entries)
+ return ERR_PTR(-EINVAL);
+
+ ft_attr.max_fte = num_flow_table_entries;
+ ft_attr.prio = prio;
+ ft_attr.level = level;
+ ft_attr.flags = flags;
+
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft))
+ return ft;
+
+ ft->autogroup.active = true;
+ ft->autogroup.required_groups = max_num_groups;
+ /* We save place for flow groups in addition to max types */
+ ft->autogroup.group_size = ft->max_fte / (max_num_groups + 1);
+
+ return ft;
+}
+EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
+
+struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
+ u32 *fg_in)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+ fg_in, match_criteria);
+ u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
+ fg_in,
+ match_criteria_enable);
+ int start_index = MLX5_GET(create_flow_group_in, fg_in,
+ start_flow_index);
+ int end_index = MLX5_GET(create_flow_group_in, fg_in,
+ end_flow_index);
+ struct mlx5_core_dev *dev = get_dev(&ft->node);
+ struct mlx5_flow_group *fg;
+ int err;
+
+ if (ft->autogroup.active)
+ return ERR_PTR(-EPERM);
+
+ down_write_ref_node(&ft->node);
+ fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria,
+ start_index, end_index,
+ ft->node.children.prev);
+ up_write_ref_node(&ft->node);
+ if (IS_ERR(fg))
+ return fg;
+
+ err = root->cmds->create_flow_group(dev, ft, fg_in, &fg->id);
+ if (err) {
+ tree_put_node(&fg->node);
+ return ERR_PTR(err);
+ }
+ trace_mlx5_fs_add_fg(fg);
+ fg->node.active = true;
+
+ return fg;
+}
+
+static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
+{
+ struct mlx5_flow_rule *rule;
+
+ rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+ if (!rule)
+ return NULL;
+
+ INIT_LIST_HEAD(&rule->next_ft);
+ rule->node.type = FS_TYPE_FLOW_DEST;
+ if (dest)
+ memcpy(&rule->dest_attr, dest, sizeof(*dest));
+
+ return rule;
+}
+
+static struct mlx5_flow_handle *alloc_handle(int num_rules)
+{
+ struct mlx5_flow_handle *handle;
+
+ handle = kzalloc(struct_size(handle, rule, num_rules), GFP_KERNEL);
+ if (!handle)
+ return NULL;
+
+ handle->num_rules = num_rules;
+
+ return handle;
+}
+
+static void destroy_flow_handle(struct fs_fte *fte,
+ struct mlx5_flow_handle *handle,
+ struct mlx5_flow_destination *dest,
+ int i)
+{
+ for (; --i >= 0;) {
+ if (refcount_dec_and_test(&handle->rule[i]->node.refcount)) {
+ fte->dests_size--;
+ list_del(&handle->rule[i]->node.list);
+ kfree(handle->rule[i]);
+ }
+ }
+ kfree(handle);
+}
+
+static struct mlx5_flow_handle *
+create_flow_handle(struct fs_fte *fte,
+ struct mlx5_flow_destination *dest,
+ int dest_num,
+ int *modify_mask,
+ bool *new_rule)
+{
+ struct mlx5_flow_handle *handle;
+ struct mlx5_flow_rule *rule = NULL;
+ static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
+ static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
+ int type;
+ int i = 0;
+
+ handle = alloc_handle((dest_num) ? dest_num : 1);
+ if (!handle)
+ return ERR_PTR(-ENOMEM);
+
+ do {
+ if (dest) {
+ rule = find_flow_rule(fte, dest + i);
+ if (rule) {
+ refcount_inc(&rule->node.refcount);
+ goto rule_found;
+ }
+ }
+
+ *new_rule = true;
+ rule = alloc_rule(dest + i);
+ if (!rule)
+ goto free_rules;
+
+ /* Add dest to dests list- we need flow tables to be in the
+ * end of the list for forward to next prio rules.
+ */
+ tree_init_node(&rule->node, NULL, del_sw_hw_rule);
+ if (dest &&
+ dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
+ list_add(&rule->node.list, &fte->node.children);
+ else
+ list_add_tail(&rule->node.list, &fte->node.children);
+ if (dest) {
+ fte->dests_size++;
+
+ type = dest[i].type ==
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ *modify_mask |= type ? count : dst;
+ }
+rule_found:
+ handle->rule[i] = rule;
+ } while (++i < dest_num);
+
+ return handle;
+
+free_rules:
+ destroy_flow_handle(fte, handle, dest, i);
+ return ERR_PTR(-ENOMEM);
+}
+
+/* fte should not be deleted while calling this function */
+static struct mlx5_flow_handle *
+add_rule_fte(struct fs_fte *fte,
+ struct mlx5_flow_group *fg,
+ struct mlx5_flow_destination *dest,
+ int dest_num,
+ bool update_action)
+{
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_handle *handle;
+ struct mlx5_flow_table *ft;
+ int modify_mask = 0;
+ int err;
+ bool new_rule = false;
+
+ handle = create_flow_handle(fte, dest, dest_num, &modify_mask,
+ &new_rule);
+ if (IS_ERR(handle) || !new_rule)
+ goto out;
+
+ if (update_action)
+ modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
+
+ fs_get_obj(ft, fg->node.parent);
+ root = find_root(&fg->node);
+ if (!(fte->status & FS_FTE_STATUS_EXISTING))
+ err = root->cmds->create_fte(get_dev(&ft->node),
+ ft, fg, fte);
+ else
+ err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id,
+ modify_mask, fte);
+ if (err)
+ goto free_handle;
+
+ fte->node.active = true;
+ fte->status |= FS_FTE_STATUS_EXISTING;
+ atomic_inc(&fte->node.version);
+
+out:
+ return handle;
+
+free_handle:
+ destroy_flow_handle(fte, handle, dest, handle->num_rules);
+ return ERR_PTR(err);
+}
+
+static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft,
+ struct mlx5_flow_spec *spec)
+{
+ struct list_head *prev = &ft->node.children;
+ struct mlx5_flow_group *fg;
+ unsigned int candidate_index = 0;
+ unsigned int group_size = 0;
+
+ if (!ft->autogroup.active)
+ return ERR_PTR(-ENOENT);
+
+ if (ft->autogroup.num_groups < ft->autogroup.required_groups)
+ group_size = ft->autogroup.group_size;
+
+ /* ft->max_fte == ft->autogroup.max_types */
+ if (group_size == 0)
+ group_size = 1;
+
+ /* sorted by start_index */
+ fs_for_each_fg(fg, ft) {
+ if (candidate_index + group_size > fg->start_index)
+ candidate_index = fg->start_index + fg->max_ftes;
+ else
+ break;
+ prev = &fg->node.list;
+ }
+
+ if (candidate_index + group_size > ft->max_fte)
+ return ERR_PTR(-ENOSPC);
+
+ fg = alloc_insert_flow_group(ft,
+ spec->match_criteria_enable,
+ spec->match_criteria,
+ candidate_index,
+ candidate_index + group_size - 1,
+ prev);
+ if (IS_ERR(fg))
+ goto out;
+
+ if (group_size == ft->autogroup.group_size)
+ ft->autogroup.num_groups++;
+
+out:
+ return fg;
+}
+
+static int create_auto_flow_group(struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ struct mlx5_core_dev *dev = get_dev(&ft->node);
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *match_criteria_addr;
+ u8 src_esw_owner_mask_on;
+ void *misc;
+ int err;
+ u32 *in;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+ fg->mask.match_criteria_enable);
+ MLX5_SET(create_flow_group_in, in, start_flow_index, fg->start_index);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, fg->start_index +
+ fg->max_ftes - 1);
+
+ misc = MLX5_ADDR_OF(fte_match_param, fg->mask.match_criteria,
+ misc_parameters);
+ src_esw_owner_mask_on = !!MLX5_GET(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+ MLX5_SET(create_flow_group_in, in,
+ source_eswitch_owner_vhca_id_valid, src_esw_owner_mask_on);
+
+ match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
+ in, match_criteria);
+ memcpy(match_criteria_addr, fg->mask.match_criteria,
+ sizeof(fg->mask.match_criteria));
+
+ err = root->cmds->create_flow_group(dev, ft, in, &fg->id);
+ if (!err) {
+ fg->node.active = true;
+ trace_mlx5_fs_add_fg(fg);
+ }
+
+ kvfree(in);
+ return err;
+}
+
+static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
+ struct mlx5_flow_destination *d2)
+{
+ if (d1->type == d2->type) {
+ if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+ d1->vport.num == d2->vport.num) ||
+ (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
+ d1->ft == d2->ft) ||
+ (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
+ d1->tir_num == d2->tir_num) ||
+ (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM &&
+ d1->ft_num == d2->ft_num))
+ return true;
+ }
+
+ return false;
+}
+
+static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
+ struct mlx5_flow_destination *dest)
+{
+ struct mlx5_flow_rule *rule;
+
+ list_for_each_entry(rule, &fte->node.children, node.list) {
+ if (mlx5_flow_dests_cmp(&rule->dest_attr, dest))
+ return rule;
+ }
+ return NULL;
+}
+
+static bool check_conflicting_actions(u32 action1, u32 action2)
+{
+ u32 xored_actions = action1 ^ action2;
+
+ /* if one rule only wants to count, it's ok */
+ if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT ||
+ action2 == MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ return false;
+
+ if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+ MLX5_FLOW_CONTEXT_ACTION_DECAP |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
+ MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
+ MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
+ MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2 |
+ MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
+ return true;
+
+ return false;
+}
+
+static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act)
+{
+ if (check_conflicting_actions(flow_act->action, fte->action.action)) {
+ mlx5_core_warn(get_dev(&fte->node),
+ "Found two FTEs with conflicting actions\n");
+ return -EEXIST;
+ }
+
+ if (flow_act->has_flow_tag &&
+ fte->action.flow_tag != flow_act->flow_tag) {
+ mlx5_core_warn(get_dev(&fte->node),
+ "FTE flow tag %u already exists with different flow tag %u\n",
+ fte->action.flow_tag,
+ flow_act->flow_tag);
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
+ u32 *match_value,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ int dest_num,
+ struct fs_fte *fte)
+{
+ struct mlx5_flow_handle *handle;
+ int old_action;
+ int i;
+ int ret;
+
+ ret = check_conflicting_ftes(fte, flow_act);
+ if (ret)
+ return ERR_PTR(ret);
+
+ old_action = fte->action.action;
+ fte->action.action |= flow_act->action;
+ handle = add_rule_fte(fte, fg, dest, dest_num,
+ old_action != flow_act->action);
+ if (IS_ERR(handle)) {
+ fte->action.action = old_action;
+ return handle;
+ }
+ trace_mlx5_fs_set_fte(fte, false);
+
+ for (i = 0; i < handle->num_rules; i++) {
+ if (refcount_read(&handle->rule[i]->node.refcount) == 1) {
+ tree_add_node(&handle->rule[i]->node, &fte->node);
+ trace_mlx5_fs_add_rule(handle->rule[i]);
+ }
+ }
+ return handle;
+}
+
+struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle)
+{
+ struct mlx5_flow_rule *dst;
+ struct fs_fte *fte;
+
+ fs_get_obj(fte, handle->rule[0]->node.parent);
+
+ fs_for_each_dst(dst, fte) {
+ if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ return dst->dest_attr.counter;
+ }
+
+ return NULL;
+}
+
+static bool counter_is_valid(struct mlx5_fc *counter, u32 action)
+{
+ if (!(action & MLX5_FLOW_CONTEXT_ACTION_COUNT))
+ return !counter;
+
+ if (!counter)
+ return false;
+
+ return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST));
+}
+
+static bool dest_is_valid(struct mlx5_flow_destination *dest,
+ u32 action,
+ struct mlx5_flow_table *ft)
+{
+ if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER))
+ return counter_is_valid(dest->counter, action);
+
+ if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+ return true;
+
+ if (!dest || ((dest->type ==
+ MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) &&
+ (dest->ft->level <= ft->level)))
+ return false;
+ return true;
+}
+
+struct match_list {
+ struct list_head list;
+ struct mlx5_flow_group *g;
+};
+
+struct match_list_head {
+ struct list_head list;
+ struct match_list first;
+};
+
+static void free_match_list(struct match_list_head *head)
+{
+ if (!list_empty(&head->list)) {
+ struct match_list *iter, *match_tmp;
+
+ list_del(&head->first.list);
+ tree_put_node(&head->first.g->node);
+ list_for_each_entry_safe(iter, match_tmp, &head->list,
+ list) {
+ tree_put_node(&iter->g->node);
+ list_del(&iter->list);
+ kfree(iter);
+ }
+ }
+}
+
+static int build_match_list(struct match_list_head *match_head,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_spec *spec)
+{
+ struct rhlist_head *tmp, *list;
+ struct mlx5_flow_group *g;
+ int err = 0;
+
+ rcu_read_lock();
+ INIT_LIST_HEAD(&match_head->list);
+ /* Collect all fgs which has a matching match_criteria */
+ list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg);
+ /* RCU is atomic, we can't execute FW commands here */
+ rhl_for_each_entry_rcu(g, tmp, list, hash) {
+ struct match_list *curr_match;
+
+ if (likely(list_empty(&match_head->list))) {
+ if (!tree_get_node(&g->node))
+ continue;
+ match_head->first.g = g;
+ list_add_tail(&match_head->first.list,
+ &match_head->list);
+ continue;
+ }
+
+ curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC);
+ if (!curr_match) {
+ rcu_read_unlock();
+ free_match_list(match_head);
+ return -ENOMEM;
+ }
+ if (!tree_get_node(&g->node)) {
+ kfree(curr_match);
+ continue;
+ }
+ curr_match->g = g;
+ list_add_tail(&curr_match->list, &match_head->list);
+ }
+ rcu_read_unlock();
+ return err;
+}
+
+static u64 matched_fgs_get_version(struct list_head *match_head)
+{
+ struct match_list *iter;
+ u64 version = 0;
+
+ list_for_each_entry(iter, match_head, list)
+ version += (u64)atomic_read(&iter->g->node.version);
+ return version;
+}
+
+static struct fs_fte *
+lookup_fte_locked(struct mlx5_flow_group *g,
+ u32 *match_value,
+ bool take_write)
+{
+ struct fs_fte *fte_tmp;
+
+ if (take_write)
+ nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+ else
+ nested_down_read_ref_node(&g->node, FS_LOCK_PARENT);
+ fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value,
+ rhash_fte);
+ if (!fte_tmp || !tree_get_node(&fte_tmp->node)) {
+ fte_tmp = NULL;
+ goto out;
+ }
+ if (!fte_tmp->node.active) {
+ tree_put_node(&fte_tmp->node);
+ fte_tmp = NULL;
+ goto out;
+ }
+
+ nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
+out:
+ if (take_write)
+ up_write_ref_node(&g->node);
+ else
+ up_read_ref_node(&g->node);
+ return fte_tmp;
+}
+
+static struct mlx5_flow_handle *
+try_add_to_existing_fg(struct mlx5_flow_table *ft,
+ struct list_head *match_head,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ int dest_num,
+ int ft_version)
+{
+ struct mlx5_flow_steering *steering = get_steering(&ft->node);
+ struct mlx5_flow_group *g;
+ struct mlx5_flow_handle *rule;
+ struct match_list *iter;
+ bool take_write = false;
+ struct fs_fte *fte;
+ u64 version;
+ int err;
+
+ fte = alloc_fte(ft, spec->match_value, flow_act);
+ if (IS_ERR(fte))
+ return ERR_PTR(-ENOMEM);
+
+search_again_locked:
+ version = matched_fgs_get_version(match_head);
+ /* Try to find a fg that already contains a matching fte */
+ list_for_each_entry(iter, match_head, list) {
+ struct fs_fte *fte_tmp;
+
+ g = iter->g;
+ fte_tmp = lookup_fte_locked(g, spec->match_value, take_write);
+ if (!fte_tmp)
+ continue;
+ rule = add_rule_fg(g, spec->match_value,
+ flow_act, dest, dest_num, fte_tmp);
+ up_write_ref_node(&fte_tmp->node);
+ tree_put_node(&fte_tmp->node);
+ kmem_cache_free(steering->ftes_cache, fte);
+ return rule;
+ }
+
+ /* Check the ft version, for case that new flow group
+ * was added while the fgs weren't locked
+ */
+ if (atomic_read(&ft->node.version) != ft_version) {
+ rule = ERR_PTR(-EAGAIN);
+ goto out;
+ }
+
+ /* Check the fgs version, for case the new FTE with the
+ * same values was added while the fgs weren't locked
+ */
+ if (version != matched_fgs_get_version(match_head)) {
+ take_write = true;
+ goto search_again_locked;
+ }
+
+ list_for_each_entry(iter, match_head, list) {
+ g = iter->g;
+
+ if (!g->node.active)
+ continue;
+
+ nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+
+ err = insert_fte(g, fte);
+ if (err) {
+ up_write_ref_node(&g->node);
+ if (err == -ENOSPC)
+ continue;
+ kmem_cache_free(steering->ftes_cache, fte);
+ return ERR_PTR(err);
+ }
+
+ nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
+ up_write_ref_node(&g->node);
+ rule = add_rule_fg(g, spec->match_value,
+ flow_act, dest, dest_num, fte);
+ up_write_ref_node(&fte->node);
+ tree_put_node(&fte->node);
+ return rule;
+ }
+ rule = ERR_PTR(-ENOENT);
+out:
+ kmem_cache_free(steering->ftes_cache, fte);
+ return rule;
+}
+
+static struct mlx5_flow_handle *
+_mlx5_add_flow_rules(struct mlx5_flow_table *ft,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ int dest_num)
+
+{
+ struct mlx5_flow_steering *steering = get_steering(&ft->node);
+ struct mlx5_flow_group *g;
+ struct mlx5_flow_handle *rule;
+ struct match_list_head match_head;
+ bool take_write = false;
+ struct fs_fte *fte;
+ int version;
+ int err;
+ int i;
+
+ if (!check_valid_spec(spec))
+ return ERR_PTR(-EINVAL);
+
+ for (i = 0; i < dest_num; i++) {
+ if (!dest_is_valid(&dest[i], flow_act->action, ft))
+ return ERR_PTR(-EINVAL);
+ }
+ nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
+search_again_locked:
+ version = atomic_read(&ft->node.version);
+
+ /* Collect all fgs which has a matching match_criteria */
+ err = build_match_list(&match_head, ft, spec);
+ if (err) {
+ if (take_write)
+ up_write_ref_node(&ft->node);
+ else
+ up_read_ref_node(&ft->node);
+ return ERR_PTR(err);
+ }
+
+ if (!take_write)
+ up_read_ref_node(&ft->node);
+
+ rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest,
+ dest_num, version);
+ free_match_list(&match_head);
+ if (!IS_ERR(rule) ||
+ (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) {
+ if (take_write)
+ up_write_ref_node(&ft->node);
+ return rule;
+ }
+
+ if (!take_write) {
+ nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
+ take_write = true;
+ }
+
+ if (PTR_ERR(rule) == -EAGAIN ||
+ version != atomic_read(&ft->node.version))
+ goto search_again_locked;
+
+ g = alloc_auto_flow_group(ft, spec);
+ if (IS_ERR(g)) {
+ rule = ERR_CAST(g);
+ up_write_ref_node(&ft->node);
+ return rule;
+ }
+
+ nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+ up_write_ref_node(&ft->node);
+
+ err = create_auto_flow_group(ft, g);
+ if (err)
+ goto err_release_fg;
+
+ fte = alloc_fte(ft, spec->match_value, flow_act);
+ if (IS_ERR(fte)) {
+ err = PTR_ERR(fte);
+ goto err_release_fg;
+ }
+
+ err = insert_fte(g, fte);
+ if (err) {
+ kmem_cache_free(steering->ftes_cache, fte);
+ goto err_release_fg;
+ }
+
+ nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
+ up_write_ref_node(&g->node);
+ rule = add_rule_fg(g, spec->match_value, flow_act, dest,
+ dest_num, fte);
+ up_write_ref_node(&fte->node);
+ tree_put_node(&fte->node);
+ tree_put_node(&g->node);
+ return rule;
+
+err_release_fg:
+ up_write_ref_node(&g->node);
+ tree_put_node(&g->node);
+ return ERR_PTR(err);
+}
+
+static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
+{
+ return ((ft->type == FS_FT_NIC_RX) &&
+ (MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs)));
+}
+
+struct mlx5_flow_handle *
+mlx5_add_flow_rules(struct mlx5_flow_table *ft,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ int num_dest)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ struct mlx5_flow_destination gen_dest = {};
+ struct mlx5_flow_table *next_ft = NULL;
+ struct mlx5_flow_handle *handle = NULL;
+ u32 sw_action = flow_act->action;
+ struct fs_prio *prio;
+
+ fs_get_obj(prio, ft->node.parent);
+ if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
+ if (!fwd_next_prio_supported(ft))
+ return ERR_PTR(-EOPNOTSUPP);
+ if (num_dest)
+ return ERR_PTR(-EINVAL);
+ mutex_lock(&root->chain_lock);
+ next_ft = find_next_chained_ft(prio);
+ if (next_ft) {
+ gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ gen_dest.ft = next_ft;
+ dest = &gen_dest;
+ num_dest = 1;
+ flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ } else {
+ mutex_unlock(&root->chain_lock);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ }
+
+ handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
+
+ if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
+ if (!IS_ERR_OR_NULL(handle) &&
+ (list_empty(&handle->rule[0]->next_ft))) {
+ mutex_lock(&next_ft->lock);
+ list_add(&handle->rule[0]->next_ft,
+ &next_ft->fwd_rules);
+ mutex_unlock(&next_ft->lock);
+ handle->rule[0]->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ }
+ mutex_unlock(&root->chain_lock);
+ }
+ return handle;
+}
+EXPORT_SYMBOL(mlx5_add_flow_rules);
+
+void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
+{
+ int i;
+
+ for (i = handle->num_rules - 1; i >= 0; i--)
+ tree_remove_node(&handle->rule[i]->node);
+ kfree(handle);
+}
+EXPORT_SYMBOL(mlx5_del_flow_rules);
+
+/* Assuming prio->node.children(flow tables) is sorted by level */
+static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
+{
+ struct fs_prio *prio;
+
+ fs_get_obj(prio, ft->node.parent);
+
+ if (!list_is_last(&ft->node.list, &prio->node.children))
+ return list_next_entry(ft, node.list);
+ return find_next_chained_ft(prio);
+}
+
+static int update_root_ft_destroy(struct mlx5_flow_table *ft)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ struct mlx5_ft_underlay_qp *uqp;
+ struct mlx5_flow_table *new_root_ft = NULL;
+ int err = 0;
+ u32 qpn;
+
+ if (root->root_ft != ft)
+ return 0;
+
+ new_root_ft = find_next_ft(ft);
+ if (!new_root_ft) {
+ root->root_ft = NULL;
+ return 0;
+ }
+
+ if (list_empty(&root->underlay_qpns)) {
+ /* Don't set any QPN (zero) in case QPN list is empty */
+ qpn = 0;
+ err = root->cmds->update_root_ft(root->dev, new_root_ft,
+ qpn, false);
+ } else {
+ list_for_each_entry(uqp, &root->underlay_qpns, list) {
+ qpn = uqp->qpn;
+ err = root->cmds->update_root_ft(root->dev,
+ new_root_ft, qpn,
+ false);
+ if (err)
+ break;
+ }
+ }
+
+ if (err)
+ mlx5_core_warn(root->dev,
+ "Update root flow table of id(%u) qpn(%d) failed\n",
+ ft->id, qpn);
+ else
+ root->root_ft = new_root_ft;
+
+ return 0;
+}
+
+/* Connect flow table from previous priority to
+ * the next flow table.
+ */
+static int disconnect_flow_table(struct mlx5_flow_table *ft)
+{
+ struct mlx5_core_dev *dev = get_dev(&ft->node);
+ struct mlx5_flow_table *next_ft;
+ struct fs_prio *prio;
+ int err = 0;
+
+ err = update_root_ft_destroy(ft);
+ if (err)
+ return err;
+
+ fs_get_obj(prio, ft->node.parent);
+ if (!(list_first_entry(&prio->node.children,
+ struct mlx5_flow_table,
+ node.list) == ft))
+ return 0;
+
+ next_ft = find_next_ft(ft);
+ err = connect_fwd_rules(dev, next_ft, ft);
+ if (err)
+ return err;
+
+ err = connect_prev_fts(dev, next_ft, prio);
+ if (err)
+ mlx5_core_warn(dev, "Failed to disconnect flow table %d\n",
+ ft->id);
+ return err;
+}
+
+int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
+{
+ struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ int err = 0;
+
+ mutex_lock(&root->chain_lock);
+ err = disconnect_flow_table(ft);
+ if (err) {
+ mutex_unlock(&root->chain_lock);
+ return err;
+ }
+ if (tree_remove_node(&ft->node))
+ mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n",
+ ft->id);
+ mutex_unlock(&root->chain_lock);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_destroy_flow_table);
+
+void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
+{
+ if (tree_remove_node(&fg->node))
+ mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n",
+ fg->id);
+}
+
+struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
+ enum mlx5_flow_namespace_type type)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ struct mlx5_flow_root_namespace *root_ns;
+ int prio;
+ struct fs_prio *fs_prio;
+ struct mlx5_flow_namespace *ns;
+
+ if (!steering)
+ return NULL;
+
+ switch (type) {
+ case MLX5_FLOW_NAMESPACE_BYPASS:
+ case MLX5_FLOW_NAMESPACE_LAG:
+ case MLX5_FLOW_NAMESPACE_OFFLOADS:
+ case MLX5_FLOW_NAMESPACE_ETHTOOL:
+ case MLX5_FLOW_NAMESPACE_KERNEL:
+ case MLX5_FLOW_NAMESPACE_LEFTOVERS:
+ case MLX5_FLOW_NAMESPACE_ANCHOR:
+ prio = type;
+ break;
+ case MLX5_FLOW_NAMESPACE_FDB:
+ if (steering->fdb_root_ns)
+ return &steering->fdb_root_ns->ns;
+ else
+ return NULL;
+ case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
+ if (steering->sniffer_rx_root_ns)
+ return &steering->sniffer_rx_root_ns->ns;
+ else
+ return NULL;
+ case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
+ if (steering->sniffer_tx_root_ns)
+ return &steering->sniffer_tx_root_ns->ns;
+ else
+ return NULL;
+ case MLX5_FLOW_NAMESPACE_EGRESS:
+ if (steering->egress_root_ns)
+ return &steering->egress_root_ns->ns;
+ else
+ return NULL;
+ default:
+ return NULL;
+ }
+
+ root_ns = steering->root_ns;
+ if (!root_ns)
+ return NULL;
+
+ fs_prio = find_prio(&root_ns->ns, prio);
+ if (!fs_prio)
+ return NULL;
+
+ ns = list_first_entry(&fs_prio->node.children,
+ typeof(*ns),
+ node.list);
+
+ return ns;
+}
+EXPORT_SYMBOL(mlx5_get_flow_namespace);
+
+struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev,
+ enum mlx5_flow_namespace_type type,
+ int vport)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+
+ if (!steering || vport >= MLX5_TOTAL_VPORTS(dev))
+ return NULL;
+
+ switch (type) {
+ case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
+ if (steering->esw_egress_root_ns &&
+ steering->esw_egress_root_ns[vport])
+ return &steering->esw_egress_root_ns[vport]->ns;
+ else
+ return NULL;
+ case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
+ if (steering->esw_ingress_root_ns &&
+ steering->esw_ingress_root_ns[vport])
+ return &steering->esw_ingress_root_ns[vport]->ns;
+ else
+ return NULL;
+ default:
+ return NULL;
+ }
+}
+
+static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
+ unsigned int prio, int num_levels)
+{
+ struct fs_prio *fs_prio;
+
+ fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL);
+ if (!fs_prio)
+ return ERR_PTR(-ENOMEM);
+
+ fs_prio->node.type = FS_TYPE_PRIO;
+ tree_init_node(&fs_prio->node, NULL, del_sw_prio);
+ tree_add_node(&fs_prio->node, &ns->node);
+ fs_prio->num_levels = num_levels;
+ fs_prio->prio = prio;
+ list_add_tail(&fs_prio->node.list, &ns->node.children);
+
+ return fs_prio;
+}
+
+static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
+ *ns)
+{
+ ns->node.type = FS_TYPE_NAMESPACE;
+
+ return ns;
+}
+
+static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
+{
+ struct mlx5_flow_namespace *ns;
+
+ ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+ if (!ns)
+ return ERR_PTR(-ENOMEM);
+
+ fs_init_namespace(ns);
+ tree_init_node(&ns->node, NULL, del_sw_ns);
+ tree_add_node(&ns->node, &prio->node);
+ list_add_tail(&ns->node.list, &prio->node.children);
+
+ return ns;
+}
+
+static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio,
+ struct init_tree_node *prio_metadata)
+{
+ struct fs_prio *fs_prio;
+ int i;
+
+ for (i = 0; i < prio_metadata->num_leaf_prios; i++) {
+ fs_prio = fs_create_prio(ns, prio++, prio_metadata->num_levels);
+ if (IS_ERR(fs_prio))
+ return PTR_ERR(fs_prio);
+ }
+ return 0;
+}
+
+#define FLOW_TABLE_BIT_SZ 1
+#define GET_FLOW_TABLE_CAP(dev, offset) \
+ ((be32_to_cpu(*((__be32 *)(dev->caps.hca_cur[MLX5_CAP_FLOW_TABLE]) + \
+ offset / 32)) >> \
+ (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
+static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
+{
+ int i;
+
+ for (i = 0; i < caps->arr_sz; i++) {
+ if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i]))
+ return false;
+ }
+ return true;
+}
+
+static int init_root_tree_recursive(struct mlx5_flow_steering *steering,
+ struct init_tree_node *init_node,
+ struct fs_node *fs_parent_node,
+ struct init_tree_node *init_parent_node,
+ int prio)
+{
+ int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev,
+ flow_table_properties_nic_receive.
+ max_ft_level);
+ struct mlx5_flow_namespace *fs_ns;
+ struct fs_prio *fs_prio;
+ struct fs_node *base;
+ int i;
+ int err;
+
+ if (init_node->type == FS_TYPE_PRIO) {
+ if ((init_node->min_ft_level > max_ft_level) ||
+ !has_required_caps(steering->dev, &init_node->caps))
+ return 0;
+
+ fs_get_obj(fs_ns, fs_parent_node);
+ if (init_node->num_leaf_prios)
+ return create_leaf_prios(fs_ns, prio, init_node);
+ fs_prio = fs_create_prio(fs_ns, prio, init_node->num_levels);
+ if (IS_ERR(fs_prio))
+ return PTR_ERR(fs_prio);
+ base = &fs_prio->node;
+ } else if (init_node->type == FS_TYPE_NAMESPACE) {
+ fs_get_obj(fs_prio, fs_parent_node);
+ fs_ns = fs_create_namespace(fs_prio);
+ if (IS_ERR(fs_ns))
+ return PTR_ERR(fs_ns);
+ base = &fs_ns->node;
+ } else {
+ return -EINVAL;
+ }
+ prio = 0;
+ for (i = 0; i < init_node->ar_size; i++) {
+ err = init_root_tree_recursive(steering, &init_node->children[i],
+ base, init_node, prio);
+ if (err)
+ return err;
+ if (init_node->children[i].type == FS_TYPE_PRIO &&
+ init_node->children[i].num_leaf_prios) {
+ prio += init_node->children[i].num_leaf_prios;
+ }
+ }
+
+ return 0;
+}
+
+static int init_root_tree(struct mlx5_flow_steering *steering,
+ struct init_tree_node *init_node,
+ struct fs_node *fs_parent_node)
+{
+ int i;
+ struct mlx5_flow_namespace *fs_ns;
+ int err;
+
+ fs_get_obj(fs_ns, fs_parent_node);
+ for (i = 0; i < init_node->ar_size; i++) {
+ err = init_root_tree_recursive(steering, &init_node->children[i],
+ &fs_ns->node,
+ init_node, i);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static struct mlx5_flow_root_namespace
+*create_root_ns(struct mlx5_flow_steering *steering,
+ enum fs_flow_table_type table_type)
+{
+ const struct mlx5_flow_cmds *cmds = mlx5_fs_cmd_get_default(table_type);
+ struct mlx5_flow_root_namespace *root_ns;
+ struct mlx5_flow_namespace *ns;
+
+ if (mlx5_accel_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
+ (table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX))
+ cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type);
+
+ /* Create the root namespace */
+ root_ns = kzalloc(sizeof(*root_ns), GFP_KERNEL);
+ if (!root_ns)
+ return NULL;
+
+ root_ns->dev = steering->dev;
+ root_ns->table_type = table_type;
+ root_ns->cmds = cmds;
+
+ INIT_LIST_HEAD(&root_ns->underlay_qpns);
+
+ ns = &root_ns->ns;
+ fs_init_namespace(ns);
+ mutex_init(&root_ns->chain_lock);
+ tree_init_node(&ns->node, NULL, NULL);
+ tree_add_node(&ns->node, NULL);
+
+ return root_ns;
+}
+
+static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level);
+
+static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level)
+{
+ struct fs_prio *prio;
+
+ fs_for_each_prio(prio, ns) {
+ /* This updates prio start_level and num_levels */
+ set_prio_attrs_in_prio(prio, acc_level);
+ acc_level += prio->num_levels;
+ }
+ return acc_level;
+}
+
+static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level)
+{
+ struct mlx5_flow_namespace *ns;
+ int acc_level_ns = acc_level;
+
+ prio->start_level = acc_level;
+ fs_for_each_ns(ns, prio)
+ /* This updates start_level and num_levels of ns's priority descendants */
+ acc_level_ns = set_prio_attrs_in_ns(ns, acc_level);
+ if (!prio->num_levels)
+ prio->num_levels = acc_level_ns - prio->start_level;
+ WARN_ON(prio->num_levels < acc_level_ns - prio->start_level);
+}
+
+static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns)
+{
+ struct mlx5_flow_namespace *ns = &root_ns->ns;
+ struct fs_prio *prio;
+ int start_level = 0;
+
+ fs_for_each_prio(prio, ns) {
+ set_prio_attrs_in_prio(prio, start_level);
+ start_level += prio->num_levels;
+ }
+}
+
+#define ANCHOR_PRIO 0
+#define ANCHOR_SIZE 1
+#define ANCHOR_LEVEL 0
+static int create_anchor_flow_table(struct mlx5_flow_steering *steering)
+{
+ struct mlx5_flow_namespace *ns = NULL;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_table *ft;
+
+ ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR);
+ if (WARN_ON(!ns))
+ return -EINVAL;
+
+ ft_attr.max_fte = ANCHOR_SIZE;
+ ft_attr.level = ANCHOR_LEVEL;
+ ft_attr.prio = ANCHOR_PRIO;
+
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ mlx5_core_err(steering->dev, "Failed to create last anchor flow table");
+ return PTR_ERR(ft);
+ }
+ return 0;
+}
+
+static int init_root_ns(struct mlx5_flow_steering *steering)
+{
+ int err;
+
+ steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX);
+ if (!steering->root_ns)
+ return -ENOMEM;
+
+ err = init_root_tree(steering, &root_fs, &steering->root_ns->ns.node);
+ if (err)
+ goto out_err;
+
+ set_prio_attrs(steering->root_ns);
+ err = create_anchor_flow_table(steering);
+ if (err)
+ goto out_err;
+
+ return 0;
+
+out_err:
+ cleanup_root_ns(steering->root_ns);
+ steering->root_ns = NULL;
+ return err;
+}
+
+static void clean_tree(struct fs_node *node)
+{
+ if (node) {
+ struct fs_node *iter;
+ struct fs_node *temp;
+
+ tree_get_node(node);
+ list_for_each_entry_safe(iter, temp, &node->children, list)
+ clean_tree(iter);
+ tree_put_node(node);
+ tree_remove_node(node);
+ }
+}
+
+static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
+{
+ if (!root_ns)
+ return;
+
+ clean_tree(&root_ns->ns.node);
+}
+
+static void cleanup_egress_acls_root_ns(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int i;
+
+ if (!steering->esw_egress_root_ns)
+ return;
+
+ for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++)
+ cleanup_root_ns(steering->esw_egress_root_ns[i]);
+
+ kfree(steering->esw_egress_root_ns);
+ steering->esw_egress_root_ns = NULL;
+}
+
+static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int i;
+
+ if (!steering->esw_ingress_root_ns)
+ return;
+
+ for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++)
+ cleanup_root_ns(steering->esw_ingress_root_ns[i]);
+
+ kfree(steering->esw_ingress_root_ns);
+ steering->esw_ingress_root_ns = NULL;
+}
+
+void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+
+ cleanup_root_ns(steering->root_ns);
+ cleanup_egress_acls_root_ns(dev);
+ cleanup_ingress_acls_root_ns(dev);
+ cleanup_root_ns(steering->fdb_root_ns);
+ cleanup_root_ns(steering->sniffer_rx_root_ns);
+ cleanup_root_ns(steering->sniffer_tx_root_ns);
+ cleanup_root_ns(steering->egress_root_ns);
+ mlx5_cleanup_fc_stats(dev);
+ kmem_cache_destroy(steering->ftes_cache);
+ kmem_cache_destroy(steering->fgs_cache);
+ kfree(steering);
+}
+
+static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
+{
+ struct fs_prio *prio;
+
+ steering->sniffer_tx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_TX);
+ if (!steering->sniffer_tx_root_ns)
+ return -ENOMEM;
+
+ /* Create single prio */
+ prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1);
+ if (IS_ERR(prio)) {
+ cleanup_root_ns(steering->sniffer_tx_root_ns);
+ return PTR_ERR(prio);
+ }
+ return 0;
+}
+
+static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
+{
+ struct fs_prio *prio;
+
+ steering->sniffer_rx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_RX);
+ if (!steering->sniffer_rx_root_ns)
+ return -ENOMEM;
+
+ /* Create single prio */
+ prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1);
+ if (IS_ERR(prio)) {
+ cleanup_root_ns(steering->sniffer_rx_root_ns);
+ return PTR_ERR(prio);
+ }
+ return 0;
+}
+
+static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
+{
+ struct fs_prio *prio;
+
+ steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
+ if (!steering->fdb_root_ns)
+ return -ENOMEM;
+
+ prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 2);
+ if (IS_ERR(prio))
+ goto out_err;
+
+ prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1);
+ if (IS_ERR(prio))
+ goto out_err;
+
+ set_prio_attrs(steering->fdb_root_ns);
+ return 0;
+
+out_err:
+ cleanup_root_ns(steering->fdb_root_ns);
+ steering->fdb_root_ns = NULL;
+ return PTR_ERR(prio);
+}
+
+static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
+{
+ struct fs_prio *prio;
+
+ steering->esw_egress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL);
+ if (!steering->esw_egress_root_ns[vport])
+ return -ENOMEM;
+
+ /* create 1 prio*/
+ prio = fs_create_prio(&steering->esw_egress_root_ns[vport]->ns, 0, 1);
+ return PTR_ERR_OR_ZERO(prio);
+}
+
+static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
+{
+ struct fs_prio *prio;
+
+ steering->esw_ingress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL);
+ if (!steering->esw_ingress_root_ns[vport])
+ return -ENOMEM;
+
+ /* create 1 prio*/
+ prio = fs_create_prio(&steering->esw_ingress_root_ns[vport]->ns, 0, 1);
+ return PTR_ERR_OR_ZERO(prio);
+}
+
+static int init_egress_acls_root_ns(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int err;
+ int i;
+
+ steering->esw_egress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev),
+ sizeof(*steering->esw_egress_root_ns),
+ GFP_KERNEL);
+ if (!steering->esw_egress_root_ns)
+ return -ENOMEM;
+
+ for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) {
+ err = init_egress_acl_root_ns(steering, i);
+ if (err)
+ goto cleanup_root_ns;
+ }
+
+ return 0;
+
+cleanup_root_ns:
+ for (i--; i >= 0; i--)
+ cleanup_root_ns(steering->esw_egress_root_ns[i]);
+ kfree(steering->esw_egress_root_ns);
+ steering->esw_egress_root_ns = NULL;
+ return err;
+}
+
+static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+ int err;
+ int i;
+
+ steering->esw_ingress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev),
+ sizeof(*steering->esw_ingress_root_ns),
+ GFP_KERNEL);
+ if (!steering->esw_ingress_root_ns)
+ return -ENOMEM;
+
+ for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) {
+ err = init_ingress_acl_root_ns(steering, i);
+ if (err)
+ goto cleanup_root_ns;
+ }
+
+ return 0;
+
+cleanup_root_ns:
+ for (i--; i >= 0; i--)
+ cleanup_root_ns(steering->esw_ingress_root_ns[i]);
+ kfree(steering->esw_ingress_root_ns);
+ steering->esw_ingress_root_ns = NULL;
+ return err;
+}
+
+static int init_egress_root_ns(struct mlx5_flow_steering *steering)
+{
+ struct fs_prio *prio;
+
+ steering->egress_root_ns = create_root_ns(steering,
+ FS_FT_NIC_TX);
+ if (!steering->egress_root_ns)
+ return -ENOMEM;
+
+ /* create 1 prio*/
+ prio = fs_create_prio(&steering->egress_root_ns->ns, 0, 1);
+ return PTR_ERR_OR_ZERO(prio);
+}
+
+int mlx5_init_fs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_flow_steering *steering;
+ int err = 0;
+
+ err = mlx5_init_fc_stats(dev);
+ if (err)
+ return err;
+
+ steering = kzalloc(sizeof(*steering), GFP_KERNEL);
+ if (!steering)
+ return -ENOMEM;
+ steering->dev = dev;
+ dev->priv.steering = steering;
+
+ steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
+ sizeof(struct mlx5_flow_group), 0,
+ 0, NULL);
+ steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
+ 0, NULL);
+ if (!steering->ftes_cache || !steering->fgs_cache) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
+ (MLX5_CAP_GEN(dev, nic_flow_table))) ||
+ ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
+ MLX5_CAP_GEN(dev, ipoib_enhanced_offloads))) &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) {
+ err = init_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
+ if (MLX5_ESWITCH_MANAGER(dev)) {
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) {
+ err = init_fdb_root_ns(steering);
+ if (err)
+ goto err;
+ }
+ if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
+ err = init_egress_acls_root_ns(dev);
+ if (err)
+ goto err;
+ }
+ if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
+ err = init_ingress_acls_root_ns(dev);
+ if (err)
+ goto err;
+ }
+ }
+
+ if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) {
+ err = init_sniffer_rx_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
+ if (MLX5_CAP_FLOWTABLE_SNIFFER_TX(dev, ft_support)) {
+ err = init_sniffer_tx_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
+ if (MLX5_IPSEC_DEV(dev)) {
+ err = init_egress_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
+ return 0;
+err:
+ mlx5_cleanup_fs(dev);
+ return err;
+}
+
+int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
+{
+ struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
+ struct mlx5_ft_underlay_qp *new_uqp;
+ int err = 0;
+
+ new_uqp = kzalloc(sizeof(*new_uqp), GFP_KERNEL);
+ if (!new_uqp)
+ return -ENOMEM;
+
+ mutex_lock(&root->chain_lock);
+
+ if (!root->root_ft) {
+ err = -EINVAL;
+ goto update_ft_fail;
+ }
+
+ err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn,
+ false);
+ if (err) {
+ mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n",
+ underlay_qpn, err);
+ goto update_ft_fail;
+ }
+
+ new_uqp->qpn = underlay_qpn;
+ list_add_tail(&new_uqp->list, &root->underlay_qpns);
+
+ mutex_unlock(&root->chain_lock);
+
+ return 0;
+
+update_ft_fail:
+ mutex_unlock(&root->chain_lock);
+ kfree(new_uqp);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn);
+
+int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
+{
+ struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
+ struct mlx5_ft_underlay_qp *uqp;
+ bool found = false;
+ int err = 0;
+
+ mutex_lock(&root->chain_lock);
+ list_for_each_entry(uqp, &root->underlay_qpns, list) {
+ if (uqp->qpn == underlay_qpn) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ mlx5_core_warn(dev, "Failed finding underlay qp (%u) in qpn list\n",
+ underlay_qpn);
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn,
+ true);
+ if (err)
+ mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n",
+ underlay_qpn, err);
+
+ list_del(&uqp->list);
+ mutex_unlock(&root->chain_lock);
+ kfree(uqp);
+
+ return 0;
+
+out:
+ mutex_unlock(&root->chain_lock);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
new file mode 100644
index 000000000..ba62fbce2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MLX5_FS_CORE_
+#define _MLX5_FS_CORE_
+
+#include <linux/refcount.h>
+#include <linux/mlx5/fs.h>
+#include <linux/rhashtable.h>
+
+enum fs_node_type {
+ FS_TYPE_NAMESPACE,
+ FS_TYPE_PRIO,
+ FS_TYPE_FLOW_TABLE,
+ FS_TYPE_FLOW_GROUP,
+ FS_TYPE_FLOW_ENTRY,
+ FS_TYPE_FLOW_DEST
+};
+
+enum fs_flow_table_type {
+ FS_FT_NIC_RX = 0x0,
+ FS_FT_NIC_TX = 0x1,
+ FS_FT_ESW_EGRESS_ACL = 0x2,
+ FS_FT_ESW_INGRESS_ACL = 0x3,
+ FS_FT_FDB = 0X4,
+ FS_FT_SNIFFER_RX = 0X5,
+ FS_FT_SNIFFER_TX = 0X6,
+ FS_FT_MAX_TYPE = FS_FT_SNIFFER_TX,
+};
+
+enum fs_flow_table_op_mod {
+ FS_FT_OP_MOD_NORMAL,
+ FS_FT_OP_MOD_LAG_DEMUX,
+};
+
+enum fs_fte_status {
+ FS_FTE_STATUS_EXISTING = 1UL << 0,
+};
+
+struct mlx5_flow_steering {
+ struct mlx5_core_dev *dev;
+ struct kmem_cache *fgs_cache;
+ struct kmem_cache *ftes_cache;
+ struct mlx5_flow_root_namespace *root_ns;
+ struct mlx5_flow_root_namespace *fdb_root_ns;
+ struct mlx5_flow_root_namespace **esw_egress_root_ns;
+ struct mlx5_flow_root_namespace **esw_ingress_root_ns;
+ struct mlx5_flow_root_namespace *sniffer_tx_root_ns;
+ struct mlx5_flow_root_namespace *sniffer_rx_root_ns;
+ struct mlx5_flow_root_namespace *egress_root_ns;
+};
+
+struct fs_node {
+ struct list_head list;
+ struct list_head children;
+ enum fs_node_type type;
+ struct fs_node *parent;
+ struct fs_node *root;
+ /* lock the node for writing and traversing */
+ struct rw_semaphore lock;
+ refcount_t refcount;
+ bool active;
+ void (*del_hw_func)(struct fs_node *);
+ void (*del_sw_func)(struct fs_node *);
+ atomic_t version;
+};
+
+struct mlx5_flow_rule {
+ struct fs_node node;
+ struct mlx5_flow_destination dest_attr;
+ /* next_ft should be accessed under chain_lock and only of
+ * destination type is FWD_NEXT_fT.
+ */
+ struct list_head next_ft;
+ u32 sw_action;
+};
+
+struct mlx5_flow_handle {
+ int num_rules;
+ struct mlx5_flow_rule *rule[];
+};
+
+/* Type of children is mlx5_flow_group */
+struct mlx5_flow_table {
+ struct fs_node node;
+ u32 id;
+ u16 vport;
+ unsigned int max_fte;
+ unsigned int level;
+ enum fs_flow_table_type type;
+ enum fs_flow_table_op_mod op_mod;
+ struct {
+ bool active;
+ unsigned int required_groups;
+ unsigned int group_size;
+ unsigned int num_groups;
+ } autogroup;
+ /* Protect fwd_rules */
+ struct mutex lock;
+ /* FWD rules that point on this flow table */
+ struct list_head fwd_rules;
+ u32 flags;
+ struct rhltable fgs_hash;
+};
+
+struct mlx5_fc_cache {
+ u64 packets;
+ u64 bytes;
+ u64 lastuse;
+};
+
+struct mlx5_fc {
+ struct rb_node node;
+ struct list_head list;
+
+ /* last{packets,bytes} members are used when calculating the delta since
+ * last reading
+ */
+ u64 lastpackets;
+ u64 lastbytes;
+
+ u32 id;
+ bool deleted;
+ bool aging;
+
+ struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
+};
+
+struct mlx5_ft_underlay_qp {
+ struct list_head list;
+ u32 qpn;
+};
+
+#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_800
+/* Calculate the fte_match_param length and without the reserved length.
+ * Make sure the reserved field is the last.
+ */
+#define MLX5_ST_SZ_DW_MATCH_PARAM \
+ ((MLX5_BYTE_OFF(fte_match_param, MLX5_FTE_MATCH_PARAM_RESERVED) / sizeof(u32)) + \
+ BUILD_BUG_ON_ZERO(MLX5_ST_SZ_BYTES(fte_match_param) != \
+ MLX5_FLD_SZ_BYTES(fte_match_param, \
+ MLX5_FTE_MATCH_PARAM_RESERVED) +\
+ MLX5_BYTE_OFF(fte_match_param, \
+ MLX5_FTE_MATCH_PARAM_RESERVED)))
+
+/* Type of children is mlx5_flow_rule */
+struct fs_fte {
+ struct fs_node node;
+ u32 val[MLX5_ST_SZ_DW_MATCH_PARAM];
+ u32 dests_size;
+ u32 index;
+ struct mlx5_flow_act action;
+ enum fs_fte_status status;
+ struct mlx5_fc *counter;
+ struct rhash_head hash;
+};
+
+/* Type of children is mlx5_flow_table/namespace */
+struct fs_prio {
+ struct fs_node node;
+ unsigned int num_levels;
+ unsigned int start_level;
+ unsigned int prio;
+ unsigned int num_ft;
+};
+
+/* Type of children is fs_prio */
+struct mlx5_flow_namespace {
+ /* parent == NULL => root ns */
+ struct fs_node node;
+};
+
+struct mlx5_flow_group_mask {
+ u8 match_criteria_enable;
+ u32 match_criteria[MLX5_ST_SZ_DW_MATCH_PARAM];
+};
+
+/* Type of children is fs_fte */
+struct mlx5_flow_group {
+ struct fs_node node;
+ struct mlx5_flow_group_mask mask;
+ u32 start_index;
+ u32 max_ftes;
+ struct ida fte_allocator;
+ u32 id;
+ struct rhashtable ftes_hash;
+ struct rhlist_head hash;
+};
+
+struct mlx5_flow_root_namespace {
+ struct mlx5_flow_namespace ns;
+ enum fs_flow_table_type table_type;
+ struct mlx5_core_dev *dev;
+ struct mlx5_flow_table *root_ft;
+ /* Should be held when chaining flow tables */
+ struct mutex chain_lock;
+ struct list_head underlay_qpns;
+ const struct mlx5_flow_cmds *cmds;
+};
+
+int mlx5_init_fc_stats(struct mlx5_core_dev *dev);
+void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev);
+void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev,
+ struct delayed_work *dwork,
+ unsigned long delay);
+void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev,
+ unsigned long interval);
+
+int mlx5_init_fs(struct mlx5_core_dev *dev);
+void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
+
+#define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); }
+
+#define fs_list_for_each_entry(pos, root) \
+ list_for_each_entry(pos, root, node.list)
+
+#define fs_list_for_each_entry_safe(pos, tmp, root) \
+ list_for_each_entry_safe(pos, tmp, root, node.list)
+
+#define fs_for_each_ns_or_ft_reverse(pos, prio) \
+ list_for_each_entry_reverse(pos, &(prio)->node.children, list)
+
+#define fs_for_each_ns_or_ft(pos, prio) \
+ list_for_each_entry(pos, (&(prio)->node.children), list)
+
+#define fs_for_each_prio(pos, ns) \
+ fs_list_for_each_entry(pos, &(ns)->node.children)
+
+#define fs_for_each_ns(pos, prio) \
+ fs_list_for_each_entry(pos, &(prio)->node.children)
+
+#define fs_for_each_ft(pos, prio) \
+ fs_list_for_each_entry(pos, &(prio)->node.children)
+
+#define fs_for_each_ft_safe(pos, tmp, prio) \
+ fs_list_for_each_entry_safe(pos, tmp, &(prio)->node.children)
+
+#define fs_for_each_fg(pos, ft) \
+ fs_list_for_each_entry(pos, &(ft)->node.children)
+
+#define fs_for_each_fte(pos, fg) \
+ fs_list_for_each_entry(pos, &(fg)->node.children)
+
+#define fs_for_each_dst(pos, fte) \
+ fs_list_for_each_entry(pos, &(fte)->node.children)
+
+#define MLX5_CAP_FLOWTABLE_TYPE(mdev, cap, type) ( \
+ (type == FS_FT_NIC_RX) ? MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) : \
+ (type == FS_FT_ESW_EGRESS_ACL) ? MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) : \
+ (type == FS_FT_ESW_INGRESS_ACL) ? MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) : \
+ (type == FS_FT_FDB) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) : \
+ (type == FS_FT_SNIFFER_RX) ? MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) : \
+ (type == FS_FT_SNIFFER_TX) ? MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) : \
+ (BUILD_BUG_ON_ZERO(FS_FT_SNIFFER_TX != FS_FT_MAX_TYPE))\
+ )
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
new file mode 100644
index 000000000..808ddd732
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -0,0 +1,360 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs.h>
+#include <linux/rbtree.h>
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "fs_cmd.h"
+
+#define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
+/* Max number of counters to query in bulk read is 32K */
+#define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
+
+/* locking scheme:
+ *
+ * It is the responsibility of the user to prevent concurrent calls or bad
+ * ordering to mlx5_fc_create(), mlx5_fc_destroy() and accessing a reference
+ * to struct mlx5_fc.
+ * e.g en_tc.c is protected by RTNL lock of its caller, and will never call a
+ * dump (access to struct mlx5_fc) after a counter is destroyed.
+ *
+ * access to counter list:
+ * - create (user context)
+ * - mlx5_fc_create() only adds to an addlist to be used by
+ * mlx5_fc_stats_query_work(). addlist is protected by a spinlock.
+ * - spawn thread to do the actual destroy
+ *
+ * - destroy (user context)
+ * - mark a counter as deleted
+ * - spawn thread to do the actual del
+ *
+ * - dump (user context)
+ * user should not call dump after destroy
+ *
+ * - query (single thread workqueue context)
+ * destroy/dump - no conflict (see destroy)
+ * query/dump - packets and bytes might be inconsistent (since update is not
+ * atomic)
+ * query/create - no conflict (see create)
+ * since every create/destroy spawn the work, only after necessary time has
+ * elapsed, the thread will actually query the hardware.
+ */
+
+static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter)
+{
+ struct rb_node **new = &root->rb_node;
+ struct rb_node *parent = NULL;
+
+ while (*new) {
+ struct mlx5_fc *this = rb_entry(*new, struct mlx5_fc, node);
+ int result = counter->id - this->id;
+
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else
+ new = &((*new)->rb_right);
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&counter->node, parent, new);
+ rb_insert_color(&counter->node, root);
+}
+
+/* The function returns the last node that was queried so the caller
+ * function can continue calling it till all counters are queried.
+ */
+static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
+ struct mlx5_fc *first,
+ u32 last_id)
+{
+ struct mlx5_cmd_fc_bulk *b;
+ struct rb_node *node = NULL;
+ u32 afirst_id;
+ int num;
+ int err;
+
+ int max_bulk = min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
+ (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+
+ /* first id must be aligned to 4 when using bulk query */
+ afirst_id = first->id & ~0x3;
+
+ /* number of counters to query inc. the last counter */
+ num = ALIGN(last_id - afirst_id + 1, 4);
+ if (num > max_bulk) {
+ num = max_bulk;
+ last_id = afirst_id + num - 1;
+ }
+
+ b = mlx5_cmd_fc_bulk_alloc(dev, afirst_id, num);
+ if (!b) {
+ mlx5_core_err(dev, "Error allocating resources for bulk query\n");
+ return NULL;
+ }
+
+ err = mlx5_cmd_fc_bulk_query(dev, b);
+ if (err) {
+ mlx5_core_err(dev, "Error doing bulk query: %d\n", err);
+ goto out;
+ }
+
+ for (node = &first->node; node; node = rb_next(node)) {
+ struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node);
+ struct mlx5_fc_cache *c = &counter->cache;
+ u64 packets;
+ u64 bytes;
+
+ if (counter->id > last_id)
+ break;
+
+ mlx5_cmd_fc_bulk_get(dev, b,
+ counter->id, &packets, &bytes);
+
+ if (c->packets == packets)
+ continue;
+
+ c->packets = packets;
+ c->bytes = bytes;
+ c->lastuse = jiffies;
+ }
+
+out:
+ mlx5_cmd_fc_bulk_free(b);
+
+ return node;
+}
+
+static void mlx5_fc_stats_work(struct work_struct *work)
+{
+ struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
+ priv.fc_stats.work.work);
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ unsigned long now = jiffies;
+ struct mlx5_fc *counter = NULL;
+ struct mlx5_fc *last = NULL;
+ struct rb_node *node;
+ LIST_HEAD(tmplist);
+
+ spin_lock(&fc_stats->addlist_lock);
+
+ list_splice_tail_init(&fc_stats->addlist, &tmplist);
+
+ if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters))
+ queue_delayed_work(fc_stats->wq, &fc_stats->work,
+ fc_stats->sampling_interval);
+
+ spin_unlock(&fc_stats->addlist_lock);
+
+ list_for_each_entry(counter, &tmplist, list)
+ mlx5_fc_stats_insert(&fc_stats->counters, counter);
+
+ node = rb_first(&fc_stats->counters);
+ while (node) {
+ counter = rb_entry(node, struct mlx5_fc, node);
+
+ node = rb_next(node);
+
+ if (counter->deleted) {
+ rb_erase(&counter->node, &fc_stats->counters);
+
+ mlx5_cmd_fc_free(dev, counter->id);
+
+ kfree(counter);
+ continue;
+ }
+
+ last = counter;
+ }
+
+ if (time_before(now, fc_stats->next_query) || !last)
+ return;
+
+ node = rb_first(&fc_stats->counters);
+ while (node) {
+ counter = rb_entry(node, struct mlx5_fc, node);
+
+ node = mlx5_fc_stats_query(dev, counter, last->id);
+ }
+
+ fc_stats->next_query = now + fc_stats->sampling_interval;
+}
+
+struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ struct mlx5_fc *counter;
+ int err;
+
+ counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+ if (!counter)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5_cmd_fc_alloc(dev, &counter->id);
+ if (err)
+ goto err_out;
+
+ if (aging) {
+ counter->cache.lastuse = jiffies;
+ counter->aging = true;
+
+ spin_lock(&fc_stats->addlist_lock);
+ list_add(&counter->list, &fc_stats->addlist);
+ spin_unlock(&fc_stats->addlist_lock);
+
+ mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
+ }
+
+ return counter;
+
+err_out:
+ kfree(counter);
+
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(mlx5_fc_create);
+
+void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+ if (!counter)
+ return;
+
+ if (counter->aging) {
+ counter->deleted = true;
+ mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
+ return;
+ }
+
+ mlx5_cmd_fc_free(dev, counter->id);
+ kfree(counter);
+}
+EXPORT_SYMBOL(mlx5_fc_destroy);
+
+int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+ fc_stats->counters = RB_ROOT;
+ INIT_LIST_HEAD(&fc_stats->addlist);
+ spin_lock_init(&fc_stats->addlist_lock);
+
+ fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
+ if (!fc_stats->wq)
+ return -ENOMEM;
+
+ fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD;
+ INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work);
+
+ return 0;
+}
+
+void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ struct mlx5_fc *counter;
+ struct mlx5_fc *tmp;
+ struct rb_node *node;
+
+ cancel_delayed_work_sync(&dev->priv.fc_stats.work);
+ destroy_workqueue(dev->priv.fc_stats.wq);
+ dev->priv.fc_stats.wq = NULL;
+
+ list_for_each_entry_safe(counter, tmp, &fc_stats->addlist, list) {
+ list_del(&counter->list);
+
+ mlx5_cmd_fc_free(dev, counter->id);
+
+ kfree(counter);
+ }
+
+ node = rb_first(&fc_stats->counters);
+ while (node) {
+ counter = rb_entry(node, struct mlx5_fc, node);
+
+ node = rb_next(node);
+
+ rb_erase(&counter->node, &fc_stats->counters);
+
+ mlx5_cmd_fc_free(dev, counter->id);
+
+ kfree(counter);
+ }
+}
+
+int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
+ u64 *packets, u64 *bytes)
+{
+ return mlx5_cmd_fc_query(dev, counter->id, packets, bytes);
+}
+EXPORT_SYMBOL(mlx5_fc_query);
+
+u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter)
+{
+ return counter->cache.lastuse;
+}
+
+void mlx5_fc_query_cached(struct mlx5_fc *counter,
+ u64 *bytes, u64 *packets, u64 *lastuse)
+{
+ struct mlx5_fc_cache c;
+
+ c = counter->cache;
+
+ *bytes = c.bytes - counter->lastbytes;
+ *packets = c.packets - counter->lastpackets;
+ *lastuse = c.lastuse;
+
+ counter->lastbytes = c.bytes;
+ counter->lastpackets = c.packets;
+}
+
+void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev,
+ struct delayed_work *dwork,
+ unsigned long delay)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+ queue_delayed_work(fc_stats->wq, dwork, delay);
+}
+
+void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev,
+ unsigned long interval)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+ fc_stats->sampling_interval = min_t(unsigned long, interval,
+ fc_stats->sampling_interval);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
new file mode 100644
index 000000000..41ad24f0d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -0,0 +1,527 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include <linux/mlx5/eswitch.h>
+#include <linux/module.h>
+#include "mlx5_core.h"
+#include "../../mlxfw/mlxfw.h"
+
+static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out,
+ int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {0};
+
+ MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+int mlx5_query_board_id(struct mlx5_core_dev *dev)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_adapter_out);
+ int err;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_cmd_query_adapter(dev, out, outlen);
+ if (err)
+ goto out;
+
+ memcpy(dev->board_id,
+ MLX5_ADDR_OF(query_adapter_out, out,
+ query_adapter_struct.vsd_contd_psid),
+ MLX5_FLD_SZ_BYTES(query_adapter_out,
+ query_adapter_struct.vsd_contd_psid));
+
+out:
+ kfree(out);
+ return err;
+}
+
+int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_adapter_out);
+ int err;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_cmd_query_adapter(mdev, out, outlen);
+ if (err)
+ goto out;
+
+ *vendor_id = MLX5_GET(query_adapter_out, out,
+ query_adapter_struct.ieee_vendor_id);
+out:
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_query_vendor_id);
+
+static int mlx5_get_pcam_reg(struct mlx5_core_dev *dev)
+{
+ return mlx5_query_pcam_reg(dev, dev->caps.pcam,
+ MLX5_PCAM_FEATURE_ENHANCED_FEATURES,
+ MLX5_PCAM_REGS_5000_TO_507F);
+}
+
+static int mlx5_get_mcam_reg(struct mlx5_core_dev *dev)
+{
+ return mlx5_query_mcam_reg(dev, dev->caps.mcam,
+ MLX5_MCAM_FEATURE_ENHANCED_FEATURES,
+ MLX5_MCAM_REGS_FIRST_128);
+}
+
+static int mlx5_get_qcam_reg(struct mlx5_core_dev *dev)
+{
+ return mlx5_query_qcam_reg(dev, dev->caps.qcam,
+ MLX5_QCAM_FEATURE_ENHANCED_FEATURES,
+ MLX5_QCAM_REGS_FIRST_128);
+}
+
+int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
+{
+ int err;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
+ if (err)
+ return err;
+
+ if (MLX5_CAP_GEN(dev, eth_net_offloads)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_IPOIB_ENHANCED_OFFLOADS);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, pg)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ODP);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, atomic)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, roce)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, nic_flow_table) ||
+ MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, vport_group_manager) &&
+ MLX5_ESWITCH_MANAGER(dev)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_ESWITCH_MANAGER(dev)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, vector_calc)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_VECTOR_CALC);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, qos)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_QOS);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, debug))
+ mlx5_core_get_caps(dev, MLX5_CAP_DEBUG);
+
+ if (MLX5_CAP_GEN(dev, pcam_reg))
+ mlx5_get_pcam_reg(dev);
+
+ if (MLX5_CAP_GEN(dev, mcam_reg))
+ mlx5_get_mcam_reg(dev);
+
+ if (MLX5_CAP_GEN(dev, qcam_reg))
+ mlx5_get_qcam_reg(dev);
+
+ if (MLX5_CAP_GEN(dev, device_memory)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_MEM);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id)
+{
+ u32 out[MLX5_ST_SZ_DW(init_hca_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(init_hca_in)] = {0};
+ int i;
+
+ MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA);
+
+ if (MLX5_CAP_GEN(dev, sw_owner_id)) {
+ for (i = 0; i < 4; i++)
+ MLX5_ARRAY_SET(init_hca_in, in, sw_owner_id, i,
+ sw_owner_id[i]);
+ }
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0};
+
+ MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0};
+ int force_state;
+ int ret;
+
+ if (!MLX5_CAP_GEN(dev, force_teardown)) {
+ mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n");
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
+ MLX5_SET(teardown_hca_in, in, profile, MLX5_TEARDOWN_HCA_IN_PROFILE_FORCE_CLOSE);
+
+ ret = mlx5_cmd_exec_polling(dev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ force_state = MLX5_GET(teardown_hca_out, out, force_state);
+ if (force_state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) {
+ mlx5_core_warn(dev, "teardown with force mode failed, doing normal teardown\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+enum mlxsw_reg_mcc_instruction {
+ MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE = 0x01,
+ MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE = 0x02,
+ MLX5_REG_MCC_INSTRUCTION_UPDATE_COMPONENT = 0x03,
+ MLX5_REG_MCC_INSTRUCTION_VERIFY_COMPONENT = 0x04,
+ MLX5_REG_MCC_INSTRUCTION_ACTIVATE = 0x06,
+ MLX5_REG_MCC_INSTRUCTION_CANCEL = 0x08,
+};
+
+static int mlx5_reg_mcc_set(struct mlx5_core_dev *dev,
+ enum mlxsw_reg_mcc_instruction instr,
+ u16 component_index, u32 update_handle,
+ u32 component_size)
+{
+ u32 out[MLX5_ST_SZ_DW(mcc_reg)];
+ u32 in[MLX5_ST_SZ_DW(mcc_reg)];
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(mcc_reg, in, instruction, instr);
+ MLX5_SET(mcc_reg, in, component_index, component_index);
+ MLX5_SET(mcc_reg, in, update_handle, update_handle);
+ MLX5_SET(mcc_reg, in, component_size, component_size);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MCC, 0, 1);
+}
+
+static int mlx5_reg_mcc_query(struct mlx5_core_dev *dev,
+ u32 *update_handle, u8 *error_code,
+ u8 *control_state)
+{
+ u32 out[MLX5_ST_SZ_DW(mcc_reg)];
+ u32 in[MLX5_ST_SZ_DW(mcc_reg)];
+ int err;
+
+ memset(in, 0, sizeof(in));
+ memset(out, 0, sizeof(out));
+ MLX5_SET(mcc_reg, in, update_handle, *update_handle);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MCC, 0, 0);
+ if (err)
+ goto out;
+
+ *update_handle = MLX5_GET(mcc_reg, out, update_handle);
+ *error_code = MLX5_GET(mcc_reg, out, error_code);
+ *control_state = MLX5_GET(mcc_reg, out, control_state);
+
+out:
+ return err;
+}
+
+static int mlx5_reg_mcda_set(struct mlx5_core_dev *dev,
+ u32 update_handle,
+ u32 offset, u16 size,
+ u8 *data)
+{
+ int err, in_size = MLX5_ST_SZ_BYTES(mcda_reg) + size;
+ u32 out[MLX5_ST_SZ_DW(mcda_reg)];
+ int i, j, dw_size = size >> 2;
+ __be32 data_element;
+ u32 *in;
+
+ in = kzalloc(in_size, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(mcda_reg, in, update_handle, update_handle);
+ MLX5_SET(mcda_reg, in, offset, offset);
+ MLX5_SET(mcda_reg, in, size, size);
+
+ for (i = 0; i < dw_size; i++) {
+ j = i * 4;
+ data_element = htonl(*(u32 *)&data[j]);
+ memcpy(MLX5_ADDR_OF(mcda_reg, in, data) + j, &data_element, 4);
+ }
+
+ err = mlx5_core_access_reg(dev, in, in_size, out,
+ sizeof(out), MLX5_REG_MCDA, 0, 1);
+ kfree(in);
+ return err;
+}
+
+static int mlx5_reg_mcqi_query(struct mlx5_core_dev *dev,
+ u16 component_index,
+ u32 *max_component_size,
+ u8 *log_mcda_word_size,
+ u16 *mcda_max_write_size)
+{
+ u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_ST_SZ_DW(mcqi_cap)];
+ int offset = MLX5_ST_SZ_DW(mcqi_reg);
+ u32 in[MLX5_ST_SZ_DW(mcqi_reg)];
+ int err;
+
+ memset(in, 0, sizeof(in));
+ memset(out, 0, sizeof(out));
+
+ MLX5_SET(mcqi_reg, in, component_index, component_index);
+ MLX5_SET(mcqi_reg, in, data_size, MLX5_ST_SZ_BYTES(mcqi_cap));
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MCQI, 0, 0);
+ if (err)
+ goto out;
+
+ *max_component_size = MLX5_GET(mcqi_cap, out + offset, max_component_size);
+ *log_mcda_word_size = MLX5_GET(mcqi_cap, out + offset, log_mcda_word_size);
+ *mcda_max_write_size = MLX5_GET(mcqi_cap, out + offset, mcda_max_write_size);
+
+out:
+ return err;
+}
+
+struct mlx5_mlxfw_dev {
+ struct mlxfw_dev mlxfw_dev;
+ struct mlx5_core_dev *mlx5_core_dev;
+};
+
+static int mlx5_component_query(struct mlxfw_dev *mlxfw_dev,
+ u16 component_index, u32 *p_max_size,
+ u8 *p_align_bits, u16 *p_max_write_size)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ return mlx5_reg_mcqi_query(dev, component_index, p_max_size,
+ p_align_bits, p_max_write_size);
+}
+
+static int mlx5_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+ u8 control_state, error_code;
+ int err;
+
+ *fwhandle = 0;
+ err = mlx5_reg_mcc_query(dev, fwhandle, &error_code, &control_state);
+ if (err)
+ return err;
+
+ if (control_state != MLXFW_FSM_STATE_IDLE)
+ return -EBUSY;
+
+ return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE,
+ 0, *fwhandle, 0);
+}
+
+static int mlx5_fsm_component_update(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ u16 component_index, u32 component_size)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_UPDATE_COMPONENT,
+ component_index, fwhandle, component_size);
+}
+
+static int mlx5_fsm_block_download(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ u8 *data, u16 size, u32 offset)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ return mlx5_reg_mcda_set(dev, fwhandle, offset, size, data);
+}
+
+static int mlx5_fsm_component_verify(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ u16 component_index)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_VERIFY_COMPONENT,
+ component_index, fwhandle, 0);
+}
+
+static int mlx5_fsm_activate(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ return mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_ACTIVATE, 0,
+ fwhandle, 0);
+}
+
+static int mlx5_fsm_query_state(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ enum mlxfw_fsm_state *fsm_state,
+ enum mlxfw_fsm_state_err *fsm_state_err)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+ u8 control_state, error_code;
+ int err;
+
+ err = mlx5_reg_mcc_query(dev, &fwhandle, &error_code, &control_state);
+ if (err)
+ return err;
+
+ *fsm_state = control_state;
+ *fsm_state_err = min_t(enum mlxfw_fsm_state_err, error_code,
+ MLXFW_FSM_STATE_ERR_MAX);
+ return 0;
+}
+
+static void mlx5_fsm_cancel(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_CANCEL, 0, fwhandle, 0);
+}
+
+static void mlx5_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
+{
+ struct mlx5_mlxfw_dev *mlx5_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev);
+ struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev;
+
+ mlx5_reg_mcc_set(dev, MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE, 0,
+ fwhandle, 0);
+}
+
+static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = {
+ .component_query = mlx5_component_query,
+ .fsm_lock = mlx5_fsm_lock,
+ .fsm_component_update = mlx5_fsm_component_update,
+ .fsm_block_download = mlx5_fsm_block_download,
+ .fsm_component_verify = mlx5_fsm_component_verify,
+ .fsm_activate = mlx5_fsm_activate,
+ .fsm_query_state = mlx5_fsm_query_state,
+ .fsm_cancel = mlx5_fsm_cancel,
+ .fsm_release = mlx5_fsm_release
+};
+
+int mlx5_firmware_flash(struct mlx5_core_dev *dev,
+ const struct firmware *firmware)
+{
+ struct mlx5_mlxfw_dev mlx5_mlxfw_dev = {
+ .mlxfw_dev = {
+ .ops = &mlx5_mlxfw_dev_ops,
+ .psid = dev->board_id,
+ .psid_size = strlen(dev->board_id),
+ },
+ .mlx5_core_dev = dev
+ };
+
+ if (!MLX5_CAP_GEN(dev, mcam_reg) ||
+ !MLX5_CAP_MCAM_REG(dev, mcqi) ||
+ !MLX5_CAP_MCAM_REG(dev, mcc) ||
+ !MLX5_CAP_MCAM_REG(dev, mcda)) {
+ pr_info("%s flashing isn't supported by the running FW\n", __func__);
+ return -EOPNOTSUPP;
+ }
+
+ return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev, firmware);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
new file mode 100644
index 000000000..9f39aeca8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/vmalloc.h>
+#include <linux/hardirq.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+enum {
+ MLX5_HEALTH_POLL_INTERVAL = 2 * HZ,
+ MAX_MISSES = 3,
+};
+
+enum {
+ MLX5_HEALTH_SYNDR_FW_ERR = 0x1,
+ MLX5_HEALTH_SYNDR_IRISC_ERR = 0x7,
+ MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR = 0x8,
+ MLX5_HEALTH_SYNDR_CRC_ERR = 0x9,
+ MLX5_HEALTH_SYNDR_FETCH_PCI_ERR = 0xa,
+ MLX5_HEALTH_SYNDR_HW_FTL_ERR = 0xb,
+ MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR = 0xc,
+ MLX5_HEALTH_SYNDR_EQ_ERR = 0xd,
+ MLX5_HEALTH_SYNDR_EQ_INV = 0xe,
+ MLX5_HEALTH_SYNDR_FFSER_ERR = 0xf,
+ MLX5_HEALTH_SYNDR_HIGH_TEMP = 0x10
+};
+
+enum {
+ MLX5_NIC_IFC_FULL = 0,
+ MLX5_NIC_IFC_DISABLED = 1,
+ MLX5_NIC_IFC_NO_DRAM_NIC = 2,
+ MLX5_NIC_IFC_INVALID = 3
+};
+
+enum {
+ MLX5_DROP_NEW_HEALTH_WORK,
+ MLX5_DROP_NEW_RECOVERY_WORK,
+};
+
+static u8 get_nic_state(struct mlx5_core_dev *dev)
+{
+ return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3;
+}
+
+static void trigger_cmd_completions(struct mlx5_core_dev *dev)
+{
+ unsigned long flags;
+ u64 vector;
+
+ /* wait for pending handlers to complete */
+ synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD));
+ spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
+ vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
+ if (!vector)
+ goto no_trig;
+
+ vector |= MLX5_TRIGGERED_CMD_COMP;
+ spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+
+ mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
+ mlx5_cmd_comp_handler(dev, vector, true);
+ return;
+
+no_trig:
+ spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
+}
+
+static int in_fatal(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct health_buffer __iomem *h = health->health;
+
+ if (get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+ return 1;
+
+ if (ioread32be(&h->fw_ver) == 0xffffffff)
+ return 1;
+
+ return 0;
+}
+
+void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
+{
+ mutex_lock(&dev->intf_state_mutex);
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ goto unlock;
+
+ mlx5_core_err(dev, "start\n");
+ if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) {
+ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+ trigger_cmd_completions(dev);
+ }
+
+ mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1);
+ mlx5_core_err(dev, "end\n");
+
+unlock:
+ mutex_unlock(&dev->intf_state_mutex);
+}
+
+static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
+{
+ u8 nic_interface = get_nic_state(dev);
+
+ switch (nic_interface) {
+ case MLX5_NIC_IFC_FULL:
+ mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n");
+ break;
+
+ case MLX5_NIC_IFC_DISABLED:
+ mlx5_core_warn(dev, "starting teardown\n");
+ break;
+
+ case MLX5_NIC_IFC_NO_DRAM_NIC:
+ mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
+ break;
+ default:
+ mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n",
+ nic_interface);
+ }
+
+ mlx5_disable_device(dev);
+}
+
+static void health_recover(struct work_struct *work)
+{
+ struct mlx5_core_health *health;
+ struct delayed_work *dwork;
+ struct mlx5_core_dev *dev;
+ struct mlx5_priv *priv;
+ u8 nic_state;
+
+ dwork = container_of(work, struct delayed_work, work);
+ health = container_of(dwork, struct mlx5_core_health, recover_work);
+ priv = container_of(health, struct mlx5_priv, health);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ nic_state = get_nic_state(dev);
+ if (nic_state == MLX5_NIC_IFC_INVALID) {
+ dev_err(&dev->pdev->dev, "health recovery flow aborted since the nic state is invalid\n");
+ return;
+ }
+
+ dev_err(&dev->pdev->dev, "starting health recovery flow\n");
+ mlx5_recover_device(dev);
+}
+
+/* How much time to wait until health resetting the driver (in msecs) */
+#define MLX5_RECOVERY_DELAY_MSECS 60000
+static void health_care(struct work_struct *work)
+{
+ unsigned long recover_delay = msecs_to_jiffies(MLX5_RECOVERY_DELAY_MSECS);
+ struct mlx5_core_health *health;
+ struct mlx5_core_dev *dev;
+ struct mlx5_priv *priv;
+ unsigned long flags;
+
+ health = container_of(work, struct mlx5_core_health, work);
+ priv = container_of(health, struct mlx5_priv, health);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+ mlx5_core_warn(dev, "handling bad device here\n");
+ mlx5_handle_bad_state(dev);
+
+ spin_lock_irqsave(&health->wq_lock, flags);
+ if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags))
+ schedule_delayed_work(&health->recover_work, recover_delay);
+ else
+ dev_err(&dev->pdev->dev,
+ "new health works are not permitted at this stage\n");
+ spin_unlock_irqrestore(&health->wq_lock, flags);
+}
+
+static const char *hsynd_str(u8 synd)
+{
+ switch (synd) {
+ case MLX5_HEALTH_SYNDR_FW_ERR:
+ return "firmware internal error";
+ case MLX5_HEALTH_SYNDR_IRISC_ERR:
+ return "irisc not responding";
+ case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR:
+ return "unrecoverable hardware error";
+ case MLX5_HEALTH_SYNDR_CRC_ERR:
+ return "firmware CRC error";
+ case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR:
+ return "ICM fetch PCI error";
+ case MLX5_HEALTH_SYNDR_HW_FTL_ERR:
+ return "HW fatal error\n";
+ case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR:
+ return "async EQ buffer overrun";
+ case MLX5_HEALTH_SYNDR_EQ_ERR:
+ return "EQ error";
+ case MLX5_HEALTH_SYNDR_EQ_INV:
+ return "Invalid EQ referenced";
+ case MLX5_HEALTH_SYNDR_FFSER_ERR:
+ return "FFSER error";
+ case MLX5_HEALTH_SYNDR_HIGH_TEMP:
+ return "High temperature";
+ default:
+ return "unrecognized error";
+ }
+}
+
+static void print_health_info(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct health_buffer __iomem *h = health->health;
+ char fw_str[18];
+ u32 fw;
+ int i;
+
+ /* If the syndrome is 0, the device is OK and no need to print buffer */
+ if (!ioread8(&h->synd))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
+ dev_err(&dev->pdev->dev, "assert_var[%d] 0x%08x\n", i, ioread32be(h->assert_var + i));
+
+ dev_err(&dev->pdev->dev, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr));
+ dev_err(&dev->pdev->dev, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra));
+ sprintf(fw_str, "%d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
+ dev_err(&dev->pdev->dev, "fw_ver %s\n", fw_str);
+ dev_err(&dev->pdev->dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
+ dev_err(&dev->pdev->dev, "irisc_index %d\n", ioread8(&h->irisc_index));
+ dev_err(&dev->pdev->dev, "synd 0x%x: %s\n", ioread8(&h->synd), hsynd_str(ioread8(&h->synd)));
+ dev_err(&dev->pdev->dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
+ fw = ioread32be(&h->fw_ver);
+ dev_err(&dev->pdev->dev, "raw fw_ver 0x%08x\n", fw);
+}
+
+static unsigned long get_next_poll_jiffies(void)
+{
+ unsigned long next;
+
+ get_random_bytes(&next, sizeof(next));
+ next %= HZ;
+ next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
+
+ return next;
+}
+
+void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ unsigned long flags;
+
+ spin_lock_irqsave(&health->wq_lock, flags);
+ if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
+ queue_work(health->wq, &health->work);
+ else
+ dev_err(&dev->pdev->dev,
+ "new health works are not permitted at this stage\n");
+ spin_unlock_irqrestore(&health->wq_lock, flags);
+}
+
+static void poll_health(struct timer_list *t)
+{
+ struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer);
+ struct mlx5_core_health *health = &dev->priv.health;
+ u32 count;
+
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ goto out;
+
+ count = ioread32be(health->health_counter);
+ if (count == health->prev)
+ ++health->miss_counter;
+ else
+ health->miss_counter = 0;
+
+ health->prev = count;
+ if (health->miss_counter == MAX_MISSES) {
+ dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n");
+ print_health_info(dev);
+ }
+
+ if (in_fatal(dev) && !health->sick) {
+ health->sick = true;
+ print_health_info(dev);
+ mlx5_trigger_health_work(dev);
+ }
+
+out:
+ mod_timer(&health->timer, get_next_poll_jiffies());
+}
+
+void mlx5_start_health_poll(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ timer_setup(&health->timer, poll_health, 0);
+ health->sick = 0;
+ clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+ clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+ health->health = &dev->iseg->health;
+ health->health_counter = &dev->iseg->health_counter;
+
+ health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL);
+ add_timer(&health->timer);
+}
+
+void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ unsigned long flags;
+
+ if (disable_health) {
+ spin_lock_irqsave(&health->wq_lock, flags);
+ set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+ set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+ spin_unlock_irqrestore(&health->wq_lock, flags);
+ }
+
+ del_timer_sync(&health->timer);
+}
+
+void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ unsigned long flags;
+
+ spin_lock_irqsave(&health->wq_lock, flags);
+ set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+ set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+ spin_unlock_irqrestore(&health->wq_lock, flags);
+ cancel_delayed_work_sync(&health->recover_work);
+ cancel_work_sync(&health->work);
+}
+
+void mlx5_drain_health_recovery(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ unsigned long flags;
+
+ spin_lock_irqsave(&health->wq_lock, flags);
+ set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+ spin_unlock_irqrestore(&health->wq_lock, flags);
+ cancel_delayed_work_sync(&dev->priv.health.recover_work);
+}
+
+void mlx5_health_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+
+ destroy_workqueue(health->wq);
+}
+
+int mlx5_health_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health;
+ char *name;
+
+ health = &dev->priv.health;
+ name = kmalloc(64, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ strcpy(name, "mlx5_health");
+ strcat(name, dev_name(&dev->pdev->dev));
+ health->wq = create_singlethread_workqueue(name);
+ kfree(name);
+ if (!health->wq)
+ return -ENOMEM;
+ spin_lock_init(&health->wq_lock);
+ INIT_WORK(&health->work, health_care);
+ INIT_DELAYED_WORK(&health->recover_work, health_recover);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile
new file mode 100644
index 000000000..d8e17110f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
new file mode 100644
index 000000000..90cb50fe1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "en.h"
+#include "ipoib.h"
+
+static void mlx5i_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ mlx5e_ethtool_get_drvinfo(priv, drvinfo);
+ strlcpy(drvinfo->driver, DRIVER_NAME "[ib_ipoib]",
+ sizeof(drvinfo->driver));
+}
+
+static void mlx5i_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ mlx5e_ethtool_get_strings(priv, stringset, data);
+}
+
+static int mlx5i_get_sset_count(struct net_device *dev, int sset)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ return mlx5e_ethtool_get_sset_count(priv, sset);
+}
+
+static void mlx5i_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats,
+ u64 *data)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ mlx5e_ethtool_get_ethtool_stats(priv, stats, data);
+}
+
+static int mlx5i_set_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ return mlx5e_ethtool_set_ringparam(priv, param);
+}
+
+static void mlx5i_get_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ mlx5e_ethtool_get_ringparam(priv, param);
+}
+
+static int mlx5i_set_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ return mlx5e_ethtool_set_channels(priv, ch);
+}
+
+static void mlx5i_get_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ mlx5e_ethtool_get_channels(priv, ch);
+}
+
+static int mlx5i_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ return mlx5e_ethtool_set_coalesce(priv, coal);
+}
+
+static int mlx5i_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ return mlx5e_ethtool_get_coalesce(priv, coal);
+}
+
+static int mlx5i_get_ts_info(struct net_device *netdev,
+ struct ethtool_ts_info *info)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ return mlx5e_ethtool_get_ts_info(priv, info);
+}
+
+static int mlx5i_flash_device(struct net_device *netdev,
+ struct ethtool_flash *flash)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ return mlx5e_ethtool_flash_device(priv, flash);
+}
+
+enum mlx5_ptys_width {
+ MLX5_PTYS_WIDTH_1X = 1 << 0,
+ MLX5_PTYS_WIDTH_2X = 1 << 1,
+ MLX5_PTYS_WIDTH_4X = 1 << 2,
+ MLX5_PTYS_WIDTH_8X = 1 << 3,
+ MLX5_PTYS_WIDTH_12X = 1 << 4,
+};
+
+static inline int mlx5_ptys_width_enum_to_int(enum mlx5_ptys_width width)
+{
+ switch (width) {
+ case MLX5_PTYS_WIDTH_1X: return 1;
+ case MLX5_PTYS_WIDTH_2X: return 2;
+ case MLX5_PTYS_WIDTH_4X: return 4;
+ case MLX5_PTYS_WIDTH_8X: return 8;
+ case MLX5_PTYS_WIDTH_12X: return 12;
+ default: return -1;
+ }
+}
+
+enum mlx5_ptys_rate {
+ MLX5_PTYS_RATE_SDR = 1 << 0,
+ MLX5_PTYS_RATE_DDR = 1 << 1,
+ MLX5_PTYS_RATE_QDR = 1 << 2,
+ MLX5_PTYS_RATE_FDR10 = 1 << 3,
+ MLX5_PTYS_RATE_FDR = 1 << 4,
+ MLX5_PTYS_RATE_EDR = 1 << 5,
+ MLX5_PTYS_RATE_HDR = 1 << 6,
+};
+
+static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate)
+{
+ switch (rate) {
+ case MLX5_PTYS_RATE_SDR: return 2500;
+ case MLX5_PTYS_RATE_DDR: return 5000;
+ case MLX5_PTYS_RATE_QDR:
+ case MLX5_PTYS_RATE_FDR10: return 10000;
+ case MLX5_PTYS_RATE_FDR: return 14000;
+ case MLX5_PTYS_RATE_EDR: return 25000;
+ case MLX5_PTYS_RATE_HDR: return 50000;
+ default: return -1;
+ }
+}
+
+static int mlx5i_get_port_settings(struct net_device *netdev,
+ u16 *ib_link_width_oper, u16 *ib_proto_oper)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
+ int ret;
+
+ ret = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_IB, 1);
+ if (ret)
+ return ret;
+
+ *ib_link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
+ *ib_proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
+
+ return 0;
+}
+
+static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper)
+{
+ int rate, width;
+
+ rate = mlx5_ptys_rate_enum_to_int(ib_proto_oper);
+ if (rate < 0)
+ return -EINVAL;
+ width = mlx5_ptys_width_enum_to_int(ib_link_width_oper);
+ if (width < 0)
+ return -EINVAL;
+
+ return rate * width;
+}
+
+static int mlx5i_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ u16 ib_link_width_oper;
+ u16 ib_proto_oper;
+ int speed, ret;
+
+ ret = mlx5i_get_port_settings(netdev, &ib_link_width_oper, &ib_proto_oper);
+ if (ret)
+ return ret;
+
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
+ ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+
+ speed = mlx5i_get_speed_settings(ib_link_width_oper, ib_proto_oper);
+ if (speed < 0)
+ return -EINVAL;
+
+ link_ksettings->base.duplex = DUPLEX_FULL;
+ link_ksettings->base.port = PORT_OTHER;
+
+ link_ksettings->base.autoneg = AUTONEG_DISABLE;
+
+ link_ksettings->base.speed = speed;
+
+ return 0;
+}
+
+const struct ethtool_ops mlx5i_ethtool_ops = {
+ .get_drvinfo = mlx5i_get_drvinfo,
+ .get_strings = mlx5i_get_strings,
+ .get_sset_count = mlx5i_get_sset_count,
+ .get_ethtool_stats = mlx5i_get_ethtool_stats,
+ .get_ringparam = mlx5i_get_ringparam,
+ .set_ringparam = mlx5i_set_ringparam,
+ .flash_device = mlx5i_flash_device,
+ .get_channels = mlx5i_get_channels,
+ .set_channels = mlx5i_set_channels,
+ .get_coalesce = mlx5i_get_coalesce,
+ .set_coalesce = mlx5i_set_coalesce,
+ .get_ts_info = mlx5i_get_ts_info,
+ .get_link_ksettings = mlx5i_get_link_ksettings,
+ .get_link = ethtool_op_get_link,
+};
+
+const struct ethtool_ops mlx5i_pkey_ethtool_ops = {
+ .get_drvinfo = mlx5i_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_ts_info = mlx5i_get_ts_info,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
new file mode 100644
index 000000000..db6aafcce
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -0,0 +1,697 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_verbs.h>
+#include <linux/mlx5/fs.h>
+#include "en.h"
+#include "ipoib.h"
+
+#define IB_DEFAULT_Q_KEY 0xb1b
+#define MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE 9
+
+static int mlx5i_open(struct net_device *netdev);
+static int mlx5i_close(struct net_device *netdev);
+static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu);
+
+static const struct net_device_ops mlx5i_netdev_ops = {
+ .ndo_open = mlx5i_open,
+ .ndo_stop = mlx5i_close,
+ .ndo_init = mlx5i_dev_init,
+ .ndo_uninit = mlx5i_dev_cleanup,
+ .ndo_change_mtu = mlx5i_change_mtu,
+ .ndo_do_ioctl = mlx5i_ioctl,
+};
+
+/* IPoIB mlx5 netdev profile */
+static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, false);
+ mlx5e_set_rq_type(mdev, params);
+ mlx5e_init_rq_type_params(mdev, params);
+
+ /* RQ size in ipoib by default is 512 */
+ params->log_rq_mtu_frames = is_kdump_kernel() ?
+ MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
+ MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE;
+
+ params->lro_en = false;
+ params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
+}
+
+/* Called directly after IPoIB netdevice was created to initialize SW structs */
+void mlx5i_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ u16 max_mtu;
+
+ /* priv init */
+ priv->mdev = mdev;
+ priv->netdev = netdev;
+ priv->profile = profile;
+ priv->ppriv = ppriv;
+ mutex_init(&priv->state_lock);
+
+ mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
+ netdev->mtu = max_mtu;
+
+ mlx5e_build_nic_params(mdev, &priv->channels.params,
+ profile->max_nch(mdev), netdev->mtu);
+ mlx5i_build_nic_params(mdev, &priv->channels.params);
+
+ mlx5e_timestamp_init(priv);
+
+ /* netdev init */
+ netdev->hw_features |= NETIF_F_SG;
+ netdev->hw_features |= NETIF_F_IP_CSUM;
+ netdev->hw_features |= NETIF_F_IPV6_CSUM;
+ netdev->hw_features |= NETIF_F_GRO;
+ netdev->hw_features |= NETIF_F_TSO;
+ netdev->hw_features |= NETIF_F_TSO6;
+ netdev->hw_features |= NETIF_F_RXCSUM;
+ netdev->hw_features |= NETIF_F_RXHASH;
+
+ netdev->netdev_ops = &mlx5i_netdev_ops;
+ netdev->ethtool_ops = &mlx5i_ethtool_ops;
+}
+
+/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */
+static void mlx5i_cleanup(struct mlx5e_priv *priv)
+{
+ /* Do nothing .. */
+}
+
+int mlx5i_init_underlay_qp(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ struct mlx5_core_qp *qp = &ipriv->qp;
+ struct mlx5_qp_context *context;
+ int ret;
+
+ /* QP states */
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return -ENOMEM;
+
+ context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
+ context->pri_path.port = 1;
+ context->pri_path.pkey_index = cpu_to_be16(ipriv->pkey_index);
+ context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY);
+
+ ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp);
+ if (ret) {
+ mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret);
+ goto err_qp_modify_to_err;
+ }
+ memset(context, 0, sizeof(*context));
+
+ ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp);
+ if (ret) {
+ mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret);
+ goto err_qp_modify_to_err;
+ }
+
+ ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp);
+ if (ret) {
+ mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret);
+ goto err_qp_modify_to_err;
+ }
+
+ kfree(context);
+ return 0;
+
+err_qp_modify_to_err:
+ mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, &context, qp);
+ kfree(context);
+ return ret;
+}
+
+void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv)
+{
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_qp_context context;
+ int err;
+
+ err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context,
+ &ipriv->qp);
+ if (err)
+ mlx5_core_err(mdev, "Failed to modify qp 2RST, err: %d\n", err);
+}
+
+#define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2
+
+int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
+{
+ u32 *in = NULL;
+ void *addr_path;
+ int ret = 0;
+ int inlen;
+ void *qpc;
+
+ inlen = MLX5_ST_SZ_BYTES(create_qp_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, ulp_stateless_offload_mode,
+ MLX5_QP_ENHANCED_ULP_STATELESS_MODE);
+
+ addr_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
+ MLX5_SET(ads, addr_path, vhca_port_num, 1);
+ MLX5_SET(ads, addr_path, grh, 1);
+
+ ret = mlx5_core_create_qp(mdev, qp, in, inlen);
+ if (ret) {
+ mlx5_core_err(mdev, "Failed creating IPoIB QP err : %d\n", ret);
+ goto out;
+ }
+
+out:
+ kvfree(in);
+ return ret;
+}
+
+void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
+{
+ mlx5_core_destroy_qp(mdev, qp);
+}
+
+static int mlx5i_init_tx(struct mlx5e_priv *priv)
+{
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ int err;
+
+ err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create underlay QP failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err);
+ goto err_destroy_underlay_qp;
+ }
+
+ return 0;
+
+err_destroy_underlay_qp:
+ mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
+ return err;
+}
+
+static void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
+{
+ struct mlx5i_priv *ipriv = priv->ppriv;
+
+ mlx5e_destroy_tis(priv->mdev, priv->tisn[0]);
+ mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
+}
+
+static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
+{
+ struct ttc_params ttc_params = {};
+ int tt, err;
+
+ priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
+
+ if (!priv->fs.ns)
+ return -EINVAL;
+
+ err = mlx5e_arfs_create_tables(priv);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n",
+ err);
+ priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
+ }
+
+ mlx5e_set_ttc_basic_params(priv, &ttc_params);
+ mlx5e_set_inner_ttc_ft_params(&ttc_params);
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn;
+
+ err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n",
+ err);
+ goto err_destroy_arfs_tables;
+ }
+
+ mlx5e_set_ttc_ft_params(&ttc_params);
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn;
+
+ err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
+ err);
+ goto err_destroy_inner_ttc_table;
+ }
+
+ return 0;
+
+err_destroy_inner_ttc_table:
+ mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+err_destroy_arfs_tables:
+ mlx5e_arfs_destroy_tables(priv);
+
+ return err;
+}
+
+static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
+{
+ mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
+ mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+ mlx5e_arfs_destroy_tables(priv);
+}
+
+static int mlx5i_init_rx(struct mlx5e_priv *priv)
+{
+ int err;
+
+ err = mlx5e_create_indirect_rqt(priv);
+ if (err)
+ return err;
+
+ err = mlx5e_create_direct_rqts(priv);
+ if (err)
+ goto err_destroy_indirect_rqts;
+
+ err = mlx5e_create_indirect_tirs(priv);
+ if (err)
+ goto err_destroy_direct_rqts;
+
+ err = mlx5e_create_direct_tirs(priv);
+ if (err)
+ goto err_destroy_indirect_tirs;
+
+ err = mlx5i_create_flow_steering(priv);
+ if (err)
+ goto err_destroy_direct_tirs;
+
+ return 0;
+
+err_destroy_direct_tirs:
+ mlx5e_destroy_direct_tirs(priv);
+err_destroy_indirect_tirs:
+ mlx5e_destroy_indirect_tirs(priv);
+err_destroy_direct_rqts:
+ mlx5e_destroy_direct_rqts(priv);
+err_destroy_indirect_rqts:
+ mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ return err;
+}
+
+static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
+{
+ mlx5i_destroy_flow_steering(priv);
+ mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_indirect_tirs(priv);
+ mlx5e_destroy_direct_rqts(priv);
+ mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+}
+
+static const struct mlx5e_profile mlx5i_nic_profile = {
+ .init = mlx5i_init,
+ .cleanup = mlx5i_cleanup,
+ .init_tx = mlx5i_init_tx,
+ .cleanup_tx = mlx5i_cleanup_tx,
+ .init_rx = mlx5i_init_rx,
+ .cleanup_rx = mlx5i_cleanup_rx,
+ .enable = NULL, /* mlx5i_enable */
+ .disable = NULL, /* mlx5i_disable */
+ .update_stats = NULL, /* mlx5i_update_stats */
+ .max_nch = mlx5e_get_max_num_channels,
+ .update_carrier = NULL, /* no HW update in IB link */
+ .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
+ .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
+ .max_tc = MLX5I_MAX_NUM_TC,
+};
+
+/* mlx5i netdev NDos */
+
+static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5e_channels new_channels = {};
+ struct mlx5e_params *params;
+ int err = 0;
+
+ mutex_lock(&priv->state_lock);
+
+ params = &priv->channels.params;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ params->sw_mtu = new_mtu;
+ netdev->mtu = params->sw_mtu;
+ goto out;
+ }
+
+ new_channels.params = *params;
+ new_channels.params.sw_mtu = new_mtu;
+ err = mlx5e_open_channels(priv, &new_channels);
+ if (err)
+ goto out;
+
+ mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+ netdev->mtu = new_channels.params.sw_mtu;
+
+out:
+ mutex_unlock(&priv->state_lock);
+ return err;
+}
+
+int mlx5i_dev_init(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+ struct mlx5i_priv *ipriv = priv->ppriv;
+
+ /* Set dev address using underlay QP */
+ dev->dev_addr[1] = (ipriv->qp.qpn >> 16) & 0xff;
+ dev->dev_addr[2] = (ipriv->qp.qpn >> 8) & 0xff;
+ dev->dev_addr[3] = (ipriv->qp.qpn) & 0xff;
+
+ /* Add QPN to net-device mapping to HT */
+ mlx5i_pkey_add_qpn(dev ,ipriv->qp.qpn);
+
+ return 0;
+}
+
+int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+
+ switch (cmd) {
+ case SIOCSHWTSTAMP:
+ return mlx5e_hwstamp_set(priv, ifr);
+ case SIOCGHWTSTAMP:
+ return mlx5e_hwstamp_get(priv, ifr);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+void mlx5i_dev_cleanup(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+ struct mlx5i_priv *ipriv = priv->ppriv;
+
+ mlx5i_uninit_underlay_qp(priv);
+
+ /* Delete QPN to net-device mapping from HT */
+ mlx5i_pkey_del_qpn(dev, ipriv->qp.qpn);
+}
+
+static int mlx5i_open(struct net_device *netdev)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ struct mlx5_core_dev *mdev = epriv->mdev;
+ int err;
+
+ mutex_lock(&epriv->state_lock);
+
+ set_bit(MLX5E_STATE_OPENED, &epriv->state);
+
+ err = mlx5i_init_underlay_qp(epriv);
+ if (err) {
+ mlx5_core_warn(mdev, "prepare underlay qp state failed, %d\n", err);
+ goto err_clear_state_opened_flag;
+ }
+
+ err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ if (err) {
+ mlx5_core_warn(mdev, "attach underlay qp to ft failed, %d\n", err);
+ goto err_reset_qp;
+ }
+
+ err = mlx5e_open_channels(epriv, &epriv->channels);
+ if (err)
+ goto err_remove_fs_underlay_qp;
+
+ mlx5e_refresh_tirs(epriv, false);
+ mlx5e_activate_priv_channels(epriv);
+
+ mutex_unlock(&epriv->state_lock);
+ return 0;
+
+err_remove_fs_underlay_qp:
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+err_reset_qp:
+ mlx5i_uninit_underlay_qp(epriv);
+err_clear_state_opened_flag:
+ clear_bit(MLX5E_STATE_OPENED, &epriv->state);
+ mutex_unlock(&epriv->state_lock);
+ return err;
+}
+
+static int mlx5i_close(struct net_device *netdev)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ struct mlx5_core_dev *mdev = epriv->mdev;
+
+ /* May already be CLOSED in case a previous configuration operation
+ * (e.g RX/TX queue size change) that involves close&open failed.
+ */
+ mutex_lock(&epriv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &epriv->state))
+ goto unlock;
+
+ clear_bit(MLX5E_STATE_OPENED, &epriv->state);
+
+ netif_carrier_off(epriv->netdev);
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ mlx5e_deactivate_priv_channels(epriv);
+ mlx5e_close_channels(&epriv->channels);
+ mlx5i_uninit_underlay_qp(epriv);
+unlock:
+ mutex_unlock(&epriv->state_lock);
+ return 0;
+}
+
+/* IPoIB RDMA netdev callbacks */
+static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca,
+ union ib_gid *gid, u16 lid, int set_qkey,
+ u32 qkey)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5_core_dev *mdev = epriv->mdev;
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ int err;
+
+ mlx5_core_dbg(mdev, "attaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw);
+ err = mlx5_core_attach_mcg(mdev, gid, ipriv->qp.qpn);
+ if (err)
+ mlx5_core_warn(mdev, "failed attaching QPN 0x%x, MGID %pI6\n",
+ ipriv->qp.qpn, gid->raw);
+
+ if (set_qkey) {
+ mlx5_core_dbg(mdev, "%s setting qkey 0x%x\n",
+ netdev->name, qkey);
+ ipriv->qkey = qkey;
+ }
+
+ return err;
+}
+
+static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca,
+ union ib_gid *gid, u16 lid)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5_core_dev *mdev = epriv->mdev;
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ int err;
+
+ mlx5_core_dbg(mdev, "detaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw);
+
+ err = mlx5_core_detach_mcg(mdev, gid, ipriv->qp.qpn);
+ if (err)
+ mlx5_core_dbg(mdev, "failed detaching QPN 0x%x, MGID %pI6\n",
+ ipriv->qp.qpn, gid->raw);
+
+ return err;
+}
+
+static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb,
+ struct ib_ah *address, u32 dqpn)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(dev);
+ struct mlx5e_txqsq *sq = epriv->txq2sq[skb_get_queue_mapping(skb)];
+ struct mlx5_ib_ah *mah = to_mah(address);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+
+ return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey);
+}
+
+static void mlx5i_set_pkey_index(struct net_device *netdev, int id)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+
+ ipriv->pkey_index = (u16)id;
+}
+
+static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
+{
+ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_IB)
+ return -EOPNOTSUPP;
+
+ if (!MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) {
+ mlx5_core_warn(mdev, "IPoIB enhanced offloads are not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void mlx5_rdma_netdev_free(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ const struct mlx5e_profile *profile = priv->profile;
+
+ mlx5e_detach_netdev(priv);
+ profile->cleanup(priv);
+ destroy_workqueue(priv->wq);
+
+ if (!ipriv->sub_interface) {
+ mlx5i_pkey_qpn_ht_cleanup(netdev);
+ mlx5e_destroy_mdev_resources(priv->mdev);
+ }
+}
+
+struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
+ struct ib_device *ibdev,
+ const char *name,
+ void (*setup)(struct net_device *))
+{
+ const struct mlx5e_profile *profile;
+ struct net_device *netdev;
+ struct mlx5i_priv *ipriv;
+ struct mlx5e_priv *epriv;
+ struct rdma_netdev *rn;
+ bool sub_interface;
+ int nch;
+ int err;
+
+ if (mlx5i_check_required_hca_cap(mdev)) {
+ mlx5_core_warn(mdev, "Accelerated mode is not supported\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ /* TODO: Need to find a better way to check if child device*/
+ sub_interface = (mdev->mlx5e_res.pdn != 0);
+
+ if (sub_interface)
+ profile = mlx5i_pkey_get_profile();
+ else
+ profile = &mlx5i_nic_profile;
+
+ nch = profile->max_nch(mdev);
+
+ netdev = alloc_netdev_mqs(sizeof(struct mlx5i_priv) + sizeof(struct mlx5e_priv),
+ name, NET_NAME_UNKNOWN,
+ setup,
+ nch * MLX5E_MAX_NUM_TC,
+ nch);
+ if (!netdev) {
+ mlx5_core_warn(mdev, "alloc_netdev_mqs failed\n");
+ return NULL;
+ }
+
+ ipriv = netdev_priv(netdev);
+ epriv = mlx5i_epriv(netdev);
+
+ epriv->wq = create_singlethread_workqueue("mlx5i");
+ if (!epriv->wq)
+ goto err_free_netdev;
+
+ ipriv->sub_interface = sub_interface;
+ if (!ipriv->sub_interface) {
+ err = mlx5i_pkey_qpn_ht_init(netdev);
+ if (err) {
+ mlx5_core_warn(mdev, "allocate qpn_to_netdev ht failed\n");
+ goto destroy_wq;
+ }
+
+ /* This should only be called once per mdev */
+ err = mlx5e_create_mdev_resources(mdev);
+ if (err)
+ goto destroy_ht;
+ }
+
+ profile->init(mdev, netdev, profile, ipriv);
+
+ err = mlx5e_attach_netdev(epriv);
+ if (err)
+ goto detach;
+ netif_carrier_off(netdev);
+
+ /* set rdma_netdev func pointers */
+ rn = &ipriv->rn;
+ rn->hca = ibdev;
+ rn->send = mlx5i_xmit;
+ rn->attach_mcast = mlx5i_attach_mcast;
+ rn->detach_mcast = mlx5i_detach_mcast;
+ rn->set_id = mlx5i_set_pkey_index;
+
+ netdev->priv_destructor = mlx5_rdma_netdev_free;
+ netdev->needs_free_netdev = 1;
+
+ return netdev;
+
+detach:
+ profile->cleanup(epriv);
+ if (ipriv->sub_interface)
+ return NULL;
+ mlx5e_destroy_mdev_resources(mdev);
+destroy_ht:
+ mlx5i_pkey_qpn_ht_cleanup(netdev);
+destroy_wq:
+ destroy_workqueue(epriv->wq);
+err_free_netdev:
+ free_netdev(netdev);
+
+ return NULL;
+}
+EXPORT_SYMBOL(mlx5_rdma_netdev_alloc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
new file mode 100644
index 000000000..0982c579e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5E_IPOB_H__
+#define __MLX5E_IPOB_H__
+
+#ifdef CONFIG_MLX5_CORE_IPOIB
+
+#include <linux/mlx5/fs.h>
+#include "en.h"
+
+#define MLX5I_MAX_NUM_TC 1
+
+extern const struct ethtool_ops mlx5i_ethtool_ops;
+extern const struct ethtool_ops mlx5i_pkey_ethtool_ops;
+
+#define MLX5_IB_GRH_BYTES 40
+#define MLX5_IPOIB_ENCAP_LEN 4
+#define MLX5_IPOIB_PSEUDO_LEN 20
+#define MLX5_IPOIB_HARD_LEN (MLX5_IPOIB_PSEUDO_LEN + MLX5_IPOIB_ENCAP_LEN)
+
+/* ipoib rdma netdev's private data structure */
+struct mlx5i_priv {
+ struct rdma_netdev rn; /* keep this first */
+ struct mlx5_core_qp qp;
+ bool sub_interface;
+ u32 qkey;
+ u16 pkey_index;
+ struct mlx5i_pkey_qpn_ht *qpn_htbl;
+ char *mlx5e_priv[0];
+};
+
+/* Underlay QP create/destroy functions */
+int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
+void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
+
+/* Underlay QP state modification init/uninit functions */
+int mlx5i_init_underlay_qp(struct mlx5e_priv *priv);
+void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv);
+
+/* Allocate/Free underlay QPN to net-device hash table */
+int mlx5i_pkey_qpn_ht_init(struct net_device *netdev);
+void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev);
+
+/* Add/Remove an underlay QPN to net-device mapping to/from the hash table */
+int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn);
+int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn);
+
+/* Get the net-device corresponding to the given underlay QPN */
+struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn);
+
+/* Shared ndo functions */
+int mlx5i_dev_init(struct net_device *dev);
+void mlx5i_dev_cleanup(struct net_device *dev);
+int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+
+/* Parent profile functions */
+void mlx5i_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv);
+
+/* Get child interface nic profile */
+const struct mlx5e_profile *mlx5i_pkey_get_profile(void);
+
+/* Extract mlx5e_priv from IPoIB netdev */
+#define mlx5i_epriv(netdev) ((void *)(((struct mlx5i_priv *)netdev_priv(netdev))->mlx5e_priv))
+
+struct mlx5_wqe_eth_pad {
+ u8 rsvd0[16];
+};
+
+struct mlx5i_tx_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_datagram_seg datagram;
+ struct mlx5_wqe_eth_pad pad;
+ struct mlx5_wqe_eth_seg eth;
+ struct mlx5_wqe_data_seg data[0];
+};
+
+static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq,
+ struct mlx5i_tx_wqe **wqe,
+ u16 pi)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+
+ *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ memset(*wqe, 0, sizeof(**wqe));
+}
+
+netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_av *av, u32 dqpn, u32 dqkey);
+void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+
+#endif /* CONFIG_MLX5_CORE_IPOIB */
+#endif /* __MLX5E_IPOB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
new file mode 100644
index 000000000..54a188f41
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/hash.h>
+#include "ipoib.h"
+
+#define MLX5I_MAX_LOG_PKEY_SUP 7
+
+struct qpn_to_netdev {
+ struct net_device *netdev;
+ struct hlist_node hlist;
+ u32 underlay_qpn;
+};
+
+struct mlx5i_pkey_qpn_ht {
+ struct hlist_head buckets[1 << MLX5I_MAX_LOG_PKEY_SUP];
+ spinlock_t ht_lock; /* Synchronise with NAPI */
+};
+
+int mlx5i_pkey_qpn_ht_init(struct net_device *netdev)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+ struct mlx5i_pkey_qpn_ht *qpn_htbl;
+
+ qpn_htbl = kzalloc(sizeof(*qpn_htbl), GFP_KERNEL);
+ if (!qpn_htbl)
+ return -ENOMEM;
+
+ ipriv->qpn_htbl = qpn_htbl;
+ spin_lock_init(&qpn_htbl->ht_lock);
+
+ return 0;
+}
+
+void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+
+ kfree(ipriv->qpn_htbl);
+}
+
+static struct qpn_to_netdev *mlx5i_find_qpn_to_netdev_node(struct hlist_head *buckets,
+ u32 qpn)
+{
+ struct hlist_head *h = &buckets[hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP)];
+ struct qpn_to_netdev *node;
+
+ hlist_for_each_entry(node, h, hlist) {
+ if (node->underlay_qpn == qpn)
+ return node;
+ }
+
+ return NULL;
+}
+
+int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+ struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl;
+ u8 key = hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP);
+ struct qpn_to_netdev *new_node;
+
+ new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
+ if (!new_node)
+ return -ENOMEM;
+
+ new_node->netdev = netdev;
+ new_node->underlay_qpn = qpn;
+ spin_lock_bh(&ht->ht_lock);
+ hlist_add_head(&new_node->hlist, &ht->buckets[key]);
+ spin_unlock_bh(&ht->ht_lock);
+
+ return 0;
+}
+
+int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl;
+ struct qpn_to_netdev *node;
+
+ node = mlx5i_find_qpn_to_netdev_node(ht->buckets, qpn);
+ if (!node) {
+ mlx5_core_warn(epriv->mdev, "QPN to netdev delete from HT failed\n");
+ return -EINVAL;
+ }
+
+ spin_lock_bh(&ht->ht_lock);
+ hlist_del_init(&node->hlist);
+ spin_unlock_bh(&ht->ht_lock);
+ kfree(node);
+
+ return 0;
+}
+
+struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+ struct qpn_to_netdev *node;
+
+ node = mlx5i_find_qpn_to_netdev_node(ipriv->qpn_htbl->buckets, qpn);
+ if (!node)
+ return NULL;
+
+ return node->netdev;
+}
+
+static int mlx5i_pkey_open(struct net_device *netdev);
+static int mlx5i_pkey_close(struct net_device *netdev);
+static int mlx5i_pkey_dev_init(struct net_device *dev);
+static void mlx5i_pkey_dev_cleanup(struct net_device *netdev);
+static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu);
+static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+
+static const struct net_device_ops mlx5i_pkey_netdev_ops = {
+ .ndo_open = mlx5i_pkey_open,
+ .ndo_stop = mlx5i_pkey_close,
+ .ndo_init = mlx5i_pkey_dev_init,
+ .ndo_uninit = mlx5i_pkey_dev_cleanup,
+ .ndo_change_mtu = mlx5i_pkey_change_mtu,
+ .ndo_do_ioctl = mlx5i_pkey_ioctl,
+};
+
+/* Child NDOs */
+static int mlx5i_pkey_dev_init(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+ struct mlx5i_priv *ipriv, *parent_ipriv;
+ struct net_device *parent_dev;
+ int parent_ifindex;
+
+ ipriv = priv->ppriv;
+
+ /* Get QPN to netdevice hash table from parent */
+ parent_ifindex = dev->netdev_ops->ndo_get_iflink(dev);
+ parent_dev = dev_get_by_index(dev_net(dev), parent_ifindex);
+ if (!parent_dev) {
+ mlx5_core_warn(priv->mdev, "failed to get parent device\n");
+ return -EINVAL;
+ }
+
+ parent_ipriv = netdev_priv(parent_dev);
+ ipriv->qpn_htbl = parent_ipriv->qpn_htbl;
+ dev_put(parent_dev);
+
+ return mlx5i_dev_init(dev);
+}
+
+static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ return mlx5i_ioctl(dev, ifr, cmd);
+}
+
+static void mlx5i_pkey_dev_cleanup(struct net_device *netdev)
+{
+ return mlx5i_dev_cleanup(netdev);
+}
+
+static int mlx5i_pkey_open(struct net_device *netdev)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ struct mlx5_core_dev *mdev = epriv->mdev;
+ int err;
+
+ mutex_lock(&epriv->state_lock);
+
+ set_bit(MLX5E_STATE_OPENED, &epriv->state);
+
+ err = mlx5i_init_underlay_qp(epriv);
+ if (err) {
+ mlx5_core_warn(mdev, "prepare child underlay qp state failed, %d\n", err);
+ goto err_release_lock;
+ }
+
+ err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ if (err) {
+ mlx5_core_warn(mdev, "attach child underlay qp to ft failed, %d\n", err);
+ goto err_unint_underlay_qp;
+ }
+
+ err = mlx5e_create_tis(mdev, 0 /* tc */, ipriv->qp.qpn, &epriv->tisn[0]);
+ if (err) {
+ mlx5_core_warn(mdev, "create child tis failed, %d\n", err);
+ goto err_remove_rx_uderlay_qp;
+ }
+
+ err = mlx5e_open_channels(epriv, &epriv->channels);
+ if (err) {
+ mlx5_core_warn(mdev, "opening child channels failed, %d\n", err);
+ goto err_clear_state_opened_flag;
+ }
+ mlx5e_refresh_tirs(epriv, false);
+ mlx5e_activate_priv_channels(epriv);
+ mutex_unlock(&epriv->state_lock);
+
+ return 0;
+
+err_clear_state_opened_flag:
+ mlx5e_destroy_tis(mdev, epriv->tisn[0]);
+err_remove_rx_uderlay_qp:
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+err_unint_underlay_qp:
+ mlx5i_uninit_underlay_qp(epriv);
+err_release_lock:
+ clear_bit(MLX5E_STATE_OPENED, &epriv->state);
+ mutex_unlock(&epriv->state_lock);
+ return err;
+}
+
+static int mlx5i_pkey_close(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ mutex_lock(&priv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+
+ clear_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ netif_carrier_off(priv->netdev);
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ mlx5i_uninit_underlay_qp(priv);
+ mlx5e_deactivate_priv_channels(priv);
+ mlx5e_close_channels(&priv->channels);
+ mlx5e_destroy_tis(mdev, priv->tisn[0]);
+unlock:
+ mutex_unlock(&priv->state_lock);
+ return 0;
+}
+
+static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ mutex_lock(&priv->state_lock);
+ netdev->mtu = new_mtu;
+ mutex_unlock(&priv->state_lock);
+
+ return 0;
+}
+
+/* Called directly after IPoIB netdevice was created to initialize SW structs */
+static void mlx5i_pkey_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ mlx5i_init(mdev, netdev, profile, ppriv);
+
+ /* Override parent ndo */
+ netdev->netdev_ops = &mlx5i_pkey_netdev_ops;
+
+ /* Set child limited ethtool support */
+ netdev->ethtool_ops = &mlx5i_pkey_ethtool_ops;
+
+ /* Use dummy rqs */
+ priv->channels.params.log_rq_mtu_frames = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
+}
+
+/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */
+static void mlx5i_pkey_cleanup(struct mlx5e_priv *priv)
+{
+ /* Do nothing .. */
+}
+
+static int mlx5i_pkey_init_tx(struct mlx5e_priv *priv)
+{
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ int err;
+
+ err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create child underlay QP failed, %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void mlx5i_pkey_cleanup_tx(struct mlx5e_priv *priv)
+{
+ struct mlx5i_priv *ipriv = priv->ppriv;
+
+ mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
+}
+
+static int mlx5i_pkey_init_rx(struct mlx5e_priv *priv)
+{
+ /* Since the rx resources are shared between child and parent, the
+ * parent interface is taking care of rx resource allocation and init
+ */
+ return 0;
+}
+
+static void mlx5i_pkey_cleanup_rx(struct mlx5e_priv *priv)
+{
+ /* Since the rx resources are shared between child and parent, the
+ * parent interface is taking care of rx resource free and de-init
+ */
+}
+
+static const struct mlx5e_profile mlx5i_pkey_nic_profile = {
+ .init = mlx5i_pkey_init,
+ .cleanup = mlx5i_pkey_cleanup,
+ .init_tx = mlx5i_pkey_init_tx,
+ .cleanup_tx = mlx5i_pkey_cleanup_tx,
+ .init_rx = mlx5i_pkey_init_rx,
+ .cleanup_rx = mlx5i_pkey_cleanup_rx,
+ .enable = NULL,
+ .disable = NULL,
+ .update_stats = NULL,
+ .max_nch = mlx5e_get_max_num_channels,
+ .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
+ .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
+ .max_tc = MLX5I_MAX_NUM_TC,
+};
+
+const struct mlx5e_profile *mlx5i_pkey_get_profile(void)
+{
+ return &mlx5i_pkey_nic_profile;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
new file mode 100644
index 000000000..582b2f180
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -0,0 +1,691 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
+#include "mlx5_core.h"
+
+enum {
+ MLX5_LAG_FLAG_BONDED = 1 << 0,
+};
+
+struct lag_func {
+ struct mlx5_core_dev *dev;
+ struct net_device *netdev;
+};
+
+/* Used for collection of netdev event info. */
+struct lag_tracker {
+ enum netdev_lag_tx_type tx_type;
+ struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS];
+ bool is_bonded;
+};
+
+/* LAG data of a ConnectX card.
+ * It serves both its phys functions.
+ */
+struct mlx5_lag {
+ u8 flags;
+ u8 v2p_map[MLX5_MAX_PORTS];
+ struct lag_func pf[MLX5_MAX_PORTS];
+ struct lag_tracker tracker;
+ struct delayed_work bond_work;
+ struct notifier_block nb;
+
+ /* Admin state. Allow lag only if allowed is true
+ * even if network conditions for lag were met
+ */
+ bool allowed;
+};
+
+/* General purpose, use for short periods of time.
+ * Beware of lock dependencies (preferably, no locks should be acquired
+ * under it).
+ */
+static DEFINE_MUTEX(lag_mutex);
+
+static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
+ u8 remap_port2)
+{
+ u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(create_lag_out)] = {0};
+ void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
+
+ MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
+
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
+ u8 remap_port2)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(modify_lag_out)] = {0};
+ void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
+
+ MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
+ MLX5_SET(modify_lag_in, in, field_select, 0x1);
+
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
+ MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_destroy_lag(struct mlx5_core_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_lag_out)] = {0};
+
+ MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(create_vport_lag_out)] = {0};
+
+ MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
+
+int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_vport_lag_out)] = {0};
+
+ MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
+
+static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
+ bool reset, void *out, int out_size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
+
+ MLX5_SET(query_cong_statistics_in, in, opcode,
+ MLX5_CMD_OP_QUERY_CONG_STATISTICS);
+ MLX5_SET(query_cong_statistics_in, in, clear, reset);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
+}
+
+static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev)
+{
+ return dev->priv.lag;
+}
+
+static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
+ struct net_device *ndev)
+{
+ int i;
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (ldev->pf[i].netdev == ndev)
+ return i;
+
+ return -1;
+}
+
+static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev)
+{
+ return !!(ldev->flags & MLX5_LAG_FLAG_BONDED);
+}
+
+static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
+ u8 *port1, u8 *port2)
+{
+ *port1 = 1;
+ *port2 = 2;
+ if (!tracker->netdev_state[0].tx_enabled ||
+ !tracker->netdev_state[0].link_up) {
+ *port1 = 2;
+ return;
+ }
+
+ if (!tracker->netdev_state[1].tx_enabled ||
+ !tracker->netdev_state[1].link_up)
+ *port2 = 1;
+}
+
+static void mlx5_activate_lag(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ int err;
+
+ ldev->flags |= MLX5_LAG_FLAG_BONDED;
+
+ mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0],
+ &ldev->v2p_map[1]);
+
+ err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]);
+ if (err)
+ mlx5_core_err(dev0,
+ "Failed to create LAG (%d)\n",
+ err);
+}
+
+static void mlx5_deactivate_lag(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ int err;
+
+ ldev->flags &= ~MLX5_LAG_FLAG_BONDED;
+
+ err = mlx5_cmd_destroy_lag(dev0);
+ if (err)
+ mlx5_core_err(dev0,
+ "Failed to destroy LAG (%d)\n",
+ err);
+}
+
+static void mlx5_do_bond(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[1].dev;
+ struct lag_tracker tracker;
+ u8 v2p_port1, v2p_port2;
+ int i, err;
+ bool do_bond;
+
+ if (!dev0 || !dev1)
+ return;
+
+ mutex_lock(&lag_mutex);
+ tracker = ldev->tracker;
+ mutex_unlock(&lag_mutex);
+
+ do_bond = tracker.is_bonded && ldev->allowed;
+
+ if (do_bond && !mlx5_lag_is_bonded(ldev)) {
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
+ MLX5_INTERFACE_PROTOCOL_IB);
+
+ mlx5_activate_lag(ldev, &tracker);
+
+ mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_nic_vport_enable_roce(dev1);
+ } else if (do_bond && mlx5_lag_is_bonded(ldev)) {
+ mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1,
+ &v2p_port2);
+
+ if ((v2p_port1 != ldev->v2p_map[0]) ||
+ (v2p_port2 != ldev->v2p_map[1])) {
+ ldev->v2p_map[0] = v2p_port1;
+ ldev->v2p_map[1] = v2p_port2;
+
+ err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
+ if (err)
+ mlx5_core_err(dev0,
+ "Failed to modify LAG (%d)\n",
+ err);
+ }
+ } else if (!do_bond && mlx5_lag_is_bonded(ldev)) {
+ mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+ mlx5_nic_vport_disable_roce(dev1);
+
+ mlx5_deactivate_lag(ldev);
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (ldev->pf[i].dev)
+ mlx5_add_dev_by_protocol(ldev->pf[i].dev,
+ MLX5_INTERFACE_PROTOCOL_IB);
+ }
+}
+
+static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
+{
+ schedule_delayed_work(&ldev->bond_work, delay);
+}
+
+static void mlx5_do_bond_work(struct work_struct *work)
+{
+ struct delayed_work *delayed_work = to_delayed_work(work);
+ struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
+ bond_work);
+ int status;
+
+ status = mlx5_dev_list_trylock();
+ if (!status) {
+ /* 1 sec delay. */
+ mlx5_queue_bond_work(ldev, HZ);
+ return;
+ }
+
+ mlx5_do_bond(ldev);
+ mlx5_dev_list_unlock();
+}
+
+static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ struct net_device *ndev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *upper = info->upper_dev, *ndev_tmp;
+ struct netdev_lag_upper_info *lag_upper_info = NULL;
+ bool is_bonded;
+ int bond_status = 0;
+ int num_slaves = 0;
+ int idx;
+
+ if (!netif_is_lag_master(upper))
+ return 0;
+
+ if (info->linking)
+ lag_upper_info = info->upper_info;
+
+ /* The event may still be of interest if the slave does not belong to
+ * us, but is enslaved to a master which has one or more of our netdevs
+ * as slaves (e.g., if a new slave is added to a master that bonds two
+ * of our netdevs, we should unbond).
+ */
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
+ idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
+ if (idx > -1)
+ bond_status |= (1 << idx);
+
+ num_slaves++;
+ }
+ rcu_read_unlock();
+
+ /* None of this lagdev's netdevs are slaves of this master. */
+ if (!(bond_status & 0x3))
+ return 0;
+
+ if (lag_upper_info)
+ tracker->tx_type = lag_upper_info->tx_type;
+
+ /* Determine bonding status:
+ * A device is considered bonded if both its physical ports are slaves
+ * of the same lag master, and only them.
+ * Lag mode must be activebackup or hash.
+ */
+ is_bonded = (num_slaves == MLX5_MAX_PORTS) &&
+ (bond_status == 0x3) &&
+ ((tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ||
+ (tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH));
+
+ if (tracker->is_bonded != is_bonded) {
+ tracker->is_bonded = is_bonded;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
+ struct lag_tracker *tracker,
+ struct net_device *ndev,
+ struct netdev_notifier_changelowerstate_info *info)
+{
+ struct netdev_lag_lower_state_info *lag_lower_info;
+ int idx;
+
+ if (!netif_is_lag_port(ndev))
+ return 0;
+
+ idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
+ if (idx == -1)
+ return 0;
+
+ /* This information is used to determine virtual to physical
+ * port mapping.
+ */
+ lag_lower_info = info->lower_state_info;
+ if (!lag_lower_info)
+ return 0;
+
+ tracker->netdev_state[idx] = *lag_lower_info;
+
+ return 1;
+}
+
+static int mlx5_lag_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct lag_tracker tracker;
+ struct mlx5_lag *ldev;
+ int changed = 0;
+
+ if (!net_eq(dev_net(ndev), &init_net))
+ return NOTIFY_DONE;
+
+ if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
+ return NOTIFY_DONE;
+
+ ldev = container_of(this, struct mlx5_lag, nb);
+ tracker = ldev->tracker;
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
+ ptr);
+ break;
+ case NETDEV_CHANGELOWERSTATE:
+ changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
+ ndev, ptr);
+ break;
+ }
+
+ mutex_lock(&lag_mutex);
+ ldev->tracker = tracker;
+ mutex_unlock(&lag_mutex);
+
+ if (changed)
+ mlx5_queue_bond_work(ldev, 0);
+
+ return NOTIFY_DONE;
+}
+
+static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
+{
+ if ((ldev->pf[0].dev && mlx5_sriov_is_enabled(ldev->pf[0].dev)) ||
+ (ldev->pf[1].dev && mlx5_sriov_is_enabled(ldev->pf[1].dev)))
+ return false;
+ else
+ return true;
+}
+
+static struct mlx5_lag *mlx5_lag_dev_alloc(void)
+{
+ struct mlx5_lag *ldev;
+
+ ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+ if (!ldev)
+ return NULL;
+
+ INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
+ ldev->allowed = mlx5_lag_check_prereq(ldev);
+
+ return ldev;
+}
+
+static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
+{
+ kfree(ldev);
+}
+
+static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
+ struct mlx5_core_dev *dev,
+ struct net_device *netdev)
+{
+ unsigned int fn = PCI_FUNC(dev->pdev->devfn);
+
+ if (fn >= MLX5_MAX_PORTS)
+ return;
+
+ mutex_lock(&lag_mutex);
+ ldev->pf[fn].dev = dev;
+ ldev->pf[fn].netdev = netdev;
+ ldev->tracker.netdev_state[fn].link_up = 0;
+ ldev->tracker.netdev_state[fn].tx_enabled = 0;
+
+ ldev->allowed = mlx5_lag_check_prereq(ldev);
+ dev->priv.lag = ldev;
+
+ mutex_unlock(&lag_mutex);
+}
+
+static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
+ struct mlx5_core_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (ldev->pf[i].dev == dev)
+ break;
+
+ if (i == MLX5_MAX_PORTS)
+ return;
+
+ mutex_lock(&lag_mutex);
+ memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
+
+ dev->priv.lag = NULL;
+ ldev->allowed = mlx5_lag_check_prereq(ldev);
+ mutex_unlock(&lag_mutex);
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
+{
+ struct mlx5_lag *ldev = NULL;
+ struct mlx5_core_dev *tmp_dev;
+
+ if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+ !MLX5_CAP_GEN(dev, lag_master) ||
+ (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS))
+ return;
+
+ tmp_dev = mlx5_get_next_phys_dev(dev);
+ if (tmp_dev)
+ ldev = tmp_dev->priv.lag;
+
+ if (!ldev) {
+ ldev = mlx5_lag_dev_alloc();
+ if (!ldev) {
+ mlx5_core_err(dev, "Failed to alloc lag dev\n");
+ return;
+ }
+ }
+
+ mlx5_lag_dev_add_pf(ldev, dev, netdev);
+
+ if (!ldev->nb.notifier_call) {
+ ldev->nb.notifier_call = mlx5_lag_netdev_event;
+ if (register_netdevice_notifier(&ldev->nb)) {
+ ldev->nb.notifier_call = NULL;
+ mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
+ }
+ }
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_lag_remove(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ int i;
+
+ ldev = mlx5_lag_dev_get(dev);
+ if (!ldev)
+ return;
+
+ if (mlx5_lag_is_bonded(ldev))
+ mlx5_deactivate_lag(ldev);
+
+ mlx5_lag_dev_remove_pf(ldev, dev);
+
+ for (i = 0; i < MLX5_MAX_PORTS; i++)
+ if (ldev->pf[i].dev)
+ break;
+
+ if (i == MLX5_MAX_PORTS) {
+ if (ldev->nb.notifier_call)
+ unregister_netdevice_notifier(&ldev->nb);
+ cancel_delayed_work_sync(&ldev->bond_work);
+ mlx5_lag_dev_free(ldev);
+ }
+}
+
+bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ bool res;
+
+ mutex_lock(&lag_mutex);
+ ldev = mlx5_lag_dev_get(dev);
+ res = ldev && mlx5_lag_is_bonded(ldev);
+ mutex_unlock(&lag_mutex);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_active);
+
+static int mlx5_lag_set_state(struct mlx5_core_dev *dev, bool allow)
+{
+ struct mlx5_lag *ldev;
+ int ret = 0;
+ bool lag_active;
+
+ mlx5_dev_list_lock();
+
+ ldev = mlx5_lag_dev_get(dev);
+ if (!ldev) {
+ ret = -ENODEV;
+ goto unlock;
+ }
+ lag_active = mlx5_lag_is_bonded(ldev);
+ if (!mlx5_lag_check_prereq(ldev) && allow) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+ if (ldev->allowed == allow)
+ goto unlock;
+ ldev->allowed = allow;
+ if ((lag_active && !allow) || allow)
+ mlx5_do_bond(ldev);
+unlock:
+ mlx5_dev_list_unlock();
+ return ret;
+}
+
+int mlx5_lag_forbid(struct mlx5_core_dev *dev)
+{
+ return mlx5_lag_set_state(dev, false);
+}
+
+int mlx5_lag_allow(struct mlx5_core_dev *dev)
+{
+ return mlx5_lag_set_state(dev, true);
+}
+
+struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
+{
+ struct net_device *ndev = NULL;
+ struct mlx5_lag *ldev;
+
+ mutex_lock(&lag_mutex);
+ ldev = mlx5_lag_dev_get(dev);
+
+ if (!(ldev && mlx5_lag_is_bonded(ldev)))
+ goto unlock;
+
+ if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+ ndev = ldev->tracker.netdev_state[0].tx_enabled ?
+ ldev->pf[0].netdev : ldev->pf[1].netdev;
+ } else {
+ ndev = ldev->pf[0].netdev;
+ }
+ if (ndev)
+ dev_hold(ndev);
+
+unlock:
+ mutex_unlock(&lag_mutex);
+
+ return ndev;
+}
+EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
+
+bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+ struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev,
+ priv);
+ struct mlx5_lag *ldev;
+
+ if (intf->protocol != MLX5_INTERFACE_PROTOCOL_IB)
+ return true;
+
+ ldev = mlx5_lag_dev_get(dev);
+ if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev)
+ return true;
+
+ /* If bonded, we do not add an IB device for PF1. */
+ return false;
+}
+
+int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
+ u64 *values,
+ int num_counters,
+ size_t *offsets)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
+ struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
+ struct mlx5_lag *ldev;
+ int num_ports;
+ int ret, i, j;
+ void *out;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ memset(values, 0, sizeof(*values) * num_counters);
+
+ mutex_lock(&lag_mutex);
+ ldev = mlx5_lag_dev_get(dev);
+ if (ldev && mlx5_lag_is_bonded(ldev)) {
+ num_ports = MLX5_MAX_PORTS;
+ mdev[0] = ldev->pf[0].dev;
+ mdev[1] = ldev->pf[1].dev;
+ } else {
+ num_ports = 1;
+ mdev[0] = dev;
+ }
+
+ for (i = 0; i < num_ports; ++i) {
+ ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen);
+ if (ret)
+ goto unlock;
+
+ for (j = 0; j < num_counters; ++j)
+ values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
+ }
+
+unlock:
+ mutex_unlock(&lag_mutex);
+ kvfree(out);
+ return ret;
+}
+EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile
new file mode 100644
index 000000000..d8e17110f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
new file mode 100644
index 000000000..0fd62510f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -0,0 +1,616 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/clocksource.h>
+#include <linux/highmem.h>
+#include <rdma/mlx5-abi.h>
+#include "en.h"
+#include "clock.h"
+
+enum {
+ MLX5_CYCLES_SHIFT = 23
+};
+
+enum {
+ MLX5_PIN_MODE_IN = 0x0,
+ MLX5_PIN_MODE_OUT = 0x1,
+};
+
+enum {
+ MLX5_OUT_PATTERN_PULSE = 0x0,
+ MLX5_OUT_PATTERN_PERIODIC = 0x1,
+};
+
+enum {
+ MLX5_EVENT_MODE_DISABLE = 0x0,
+ MLX5_EVENT_MODE_REPETETIVE = 0x1,
+ MLX5_EVENT_MODE_ONCE_TILL_ARM = 0x2,
+};
+
+enum {
+ MLX5_MTPPS_FS_ENABLE = BIT(0x0),
+ MLX5_MTPPS_FS_PATTERN = BIT(0x2),
+ MLX5_MTPPS_FS_PIN_MODE = BIT(0x3),
+ MLX5_MTPPS_FS_TIME_STAMP = BIT(0x4),
+ MLX5_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5),
+ MLX5_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7),
+};
+
+static u64 read_internal_timer(const struct cyclecounter *cc)
+{
+ struct mlx5_clock *clock = container_of(cc, struct mlx5_clock, cycles);
+ struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
+ clock);
+
+ return mlx5_read_internal_timer(mdev) & cc->mask;
+}
+
+static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_ib_clock_info *clock_info = mdev->clock_info;
+ struct mlx5_clock *clock = &mdev->clock;
+ u32 sign;
+
+ if (!clock_info)
+ return;
+
+ sign = smp_load_acquire(&clock_info->sign);
+ smp_store_mb(clock_info->sign,
+ sign | MLX5_IB_CLOCK_INFO_KERNEL_UPDATING);
+
+ clock_info->cycles = clock->tc.cycle_last;
+ clock_info->mult = clock->cycles.mult;
+ clock_info->nsec = clock->tc.nsec;
+ clock_info->frac = clock->tc.frac;
+
+ smp_store_release(&clock_info->sign,
+ sign + MLX5_IB_CLOCK_INFO_KERNEL_UPDATING * 2);
+}
+
+static void mlx5_pps_out(struct work_struct *work)
+{
+ struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps,
+ out_work);
+ struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock,
+ pps_info);
+ struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
+ clock);
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+ unsigned long flags;
+ int i;
+
+ for (i = 0; i < clock->ptp_info.n_pins; i++) {
+ u64 tstart;
+
+ write_lock_irqsave(&clock->lock, flags);
+ tstart = clock->pps_info.start[i];
+ clock->pps_info.start[i] = 0;
+ write_unlock_irqrestore(&clock->lock, flags);
+ if (!tstart)
+ continue;
+
+ MLX5_SET(mtpps_reg, in, pin, i);
+ MLX5_SET64(mtpps_reg, in, time_stamp, tstart);
+ MLX5_SET(mtpps_reg, in, field_select, MLX5_MTPPS_FS_TIME_STAMP);
+ mlx5_set_mtpps(mdev, in, sizeof(in));
+ }
+}
+
+static void mlx5_timestamp_overflow(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct mlx5_clock *clock = container_of(dwork, struct mlx5_clock,
+ overflow_work);
+ unsigned long flags;
+
+ write_lock_irqsave(&clock->lock, flags);
+ timecounter_read(&clock->tc);
+ mlx5_update_clock_info_page(clock->mdev);
+ write_unlock_irqrestore(&clock->lock, flags);
+ schedule_delayed_work(&clock->overflow_work, clock->overflow_period);
+}
+
+static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
+ const struct timespec64 *ts)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+ ptp_info);
+ u64 ns = timespec64_to_ns(ts);
+ unsigned long flags;
+
+ write_lock_irqsave(&clock->lock, flags);
+ timecounter_init(&clock->tc, &clock->cycles, ns);
+ mlx5_update_clock_info_page(clock->mdev);
+ write_unlock_irqrestore(&clock->lock, flags);
+
+ return 0;
+}
+
+static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+ ptp_info);
+ u64 ns;
+ unsigned long flags;
+
+ write_lock_irqsave(&clock->lock, flags);
+ ns = timecounter_read(&clock->tc);
+ write_unlock_irqrestore(&clock->lock, flags);
+
+ *ts = ns_to_timespec64(ns);
+
+ return 0;
+}
+
+static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+ ptp_info);
+ unsigned long flags;
+
+ write_lock_irqsave(&clock->lock, flags);
+ timecounter_adjtime(&clock->tc, delta);
+ mlx5_update_clock_info_page(clock->mdev);
+ write_unlock_irqrestore(&clock->lock, flags);
+
+ return 0;
+}
+
+static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
+{
+ u64 adj;
+ u32 diff;
+ unsigned long flags;
+ int neg_adj = 0;
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+ ptp_info);
+
+ if (delta < 0) {
+ neg_adj = 1;
+ delta = -delta;
+ }
+
+ adj = clock->nominal_c_mult;
+ adj *= delta;
+ diff = div_u64(adj, 1000000000ULL);
+
+ write_lock_irqsave(&clock->lock, flags);
+ timecounter_read(&clock->tc);
+ clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
+ clock->nominal_c_mult + diff;
+ mlx5_update_clock_info_page(clock->mdev);
+ write_unlock_irqrestore(&clock->lock, flags);
+
+ return 0;
+}
+
+static int mlx5_extts_configure(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+{
+ struct mlx5_clock *clock =
+ container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_core_dev *mdev =
+ container_of(clock, struct mlx5_core_dev, clock);
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+ u32 field_select = 0;
+ u8 pin_mode = 0;
+ u8 pattern = 0;
+ int pin = -1;
+ int err = 0;
+
+ if (!MLX5_PPS_CAP(mdev))
+ return -EOPNOTSUPP;
+
+ if (rq->extts.index >= clock->ptp_info.n_pins)
+ return -EINVAL;
+
+ if (on) {
+ pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index);
+ if (pin < 0)
+ return -EBUSY;
+ pin_mode = MLX5_PIN_MODE_IN;
+ pattern = !!(rq->extts.flags & PTP_FALLING_EDGE);
+ field_select = MLX5_MTPPS_FS_PIN_MODE |
+ MLX5_MTPPS_FS_PATTERN |
+ MLX5_MTPPS_FS_ENABLE;
+ } else {
+ pin = rq->extts.index;
+ field_select = MLX5_MTPPS_FS_ENABLE;
+ }
+
+ MLX5_SET(mtpps_reg, in, pin, pin);
+ MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
+ MLX5_SET(mtpps_reg, in, pattern, pattern);
+ MLX5_SET(mtpps_reg, in, enable, on);
+ MLX5_SET(mtpps_reg, in, field_select, field_select);
+
+ err = mlx5_set_mtpps(mdev, in, sizeof(in));
+ if (err)
+ return err;
+
+ return mlx5_set_mtppse(mdev, pin, 0,
+ MLX5_EVENT_MODE_REPETETIVE & on);
+}
+
+static int mlx5_perout_configure(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+{
+ struct mlx5_clock *clock =
+ container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_core_dev *mdev =
+ container_of(clock, struct mlx5_core_dev, clock);
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+ u64 nsec_now, nsec_delta, time_stamp = 0;
+ u64 cycles_now, cycles_delta;
+ struct timespec64 ts;
+ unsigned long flags;
+ u32 field_select = 0;
+ u8 pin_mode = 0;
+ u8 pattern = 0;
+ int pin = -1;
+ int err = 0;
+ s64 ns;
+
+ if (!MLX5_PPS_CAP(mdev))
+ return -EOPNOTSUPP;
+
+ if (rq->perout.index >= clock->ptp_info.n_pins)
+ return -EINVAL;
+
+ if (on) {
+ pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT,
+ rq->perout.index);
+ if (pin < 0)
+ return -EBUSY;
+
+ pin_mode = MLX5_PIN_MODE_OUT;
+ pattern = MLX5_OUT_PATTERN_PERIODIC;
+ ts.tv_sec = rq->perout.period.sec;
+ ts.tv_nsec = rq->perout.period.nsec;
+ ns = timespec64_to_ns(&ts);
+
+ if ((ns >> 1) != 500000000LL)
+ return -EINVAL;
+
+ ts.tv_sec = rq->perout.start.sec;
+ ts.tv_nsec = rq->perout.start.nsec;
+ ns = timespec64_to_ns(&ts);
+ cycles_now = mlx5_read_internal_timer(mdev);
+ write_lock_irqsave(&clock->lock, flags);
+ nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
+ nsec_delta = ns - nsec_now;
+ cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
+ clock->cycles.mult);
+ write_unlock_irqrestore(&clock->lock, flags);
+ time_stamp = cycles_now + cycles_delta;
+ field_select = MLX5_MTPPS_FS_PIN_MODE |
+ MLX5_MTPPS_FS_PATTERN |
+ MLX5_MTPPS_FS_ENABLE |
+ MLX5_MTPPS_FS_TIME_STAMP;
+ } else {
+ pin = rq->perout.index;
+ field_select = MLX5_MTPPS_FS_ENABLE;
+ }
+
+ MLX5_SET(mtpps_reg, in, pin, pin);
+ MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
+ MLX5_SET(mtpps_reg, in, pattern, pattern);
+ MLX5_SET(mtpps_reg, in, enable, on);
+ MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
+ MLX5_SET(mtpps_reg, in, field_select, field_select);
+
+ err = mlx5_set_mtpps(mdev, in, sizeof(in));
+ if (err)
+ return err;
+
+ return mlx5_set_mtppse(mdev, pin, 0,
+ MLX5_EVENT_MODE_REPETETIVE & on);
+}
+
+static int mlx5_pps_configure(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+{
+ struct mlx5_clock *clock =
+ container_of(ptp, struct mlx5_clock, ptp_info);
+
+ clock->pps_info.enabled = !!on;
+ return 0;
+}
+
+static int mlx5_ptp_enable(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+{
+ switch (rq->type) {
+ case PTP_CLK_REQ_EXTTS:
+ return mlx5_extts_configure(ptp, rq, on);
+ case PTP_CLK_REQ_PEROUT:
+ return mlx5_perout_configure(ptp, rq, on);
+ case PTP_CLK_REQ_PPS:
+ return mlx5_pps_configure(ptp, rq, on);
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+enum {
+ MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN = BIT(0),
+ MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT = BIT(1),
+};
+
+static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+ enum ptp_pin_function func, unsigned int chan)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+ ptp_info);
+
+ switch (func) {
+ case PTP_PF_NONE:
+ return 0;
+ case PTP_PF_EXTTS:
+ return !(clock->pps_info.pin_caps[pin] &
+ MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_IN);
+ case PTP_PF_PEROUT:
+ return !(clock->pps_info.pin_caps[pin] &
+ MLX5_MTPPS_REG_CAP_PIN_X_MODE_SUPPORT_PPS_OUT);
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static const struct ptp_clock_info mlx5_ptp_clock_info = {
+ .owner = THIS_MODULE,
+ .name = "mlx5_p2p",
+ .max_adj = 100000000,
+ .n_alarm = 0,
+ .n_ext_ts = 0,
+ .n_per_out = 0,
+ .n_pins = 0,
+ .pps = 0,
+ .adjfreq = mlx5_ptp_adjfreq,
+ .adjtime = mlx5_ptp_adjtime,
+ .gettime64 = mlx5_ptp_gettime,
+ .settime64 = mlx5_ptp_settime,
+ .enable = NULL,
+ .verify = NULL,
+};
+
+static int mlx5_init_pin_config(struct mlx5_clock *clock)
+{
+ int i;
+
+ clock->ptp_info.pin_config =
+ kcalloc(clock->ptp_info.n_pins,
+ sizeof(*clock->ptp_info.pin_config),
+ GFP_KERNEL);
+ if (!clock->ptp_info.pin_config)
+ return -ENOMEM;
+ clock->ptp_info.enable = mlx5_ptp_enable;
+ clock->ptp_info.verify = mlx5_ptp_verify;
+ clock->ptp_info.pps = 1;
+
+ for (i = 0; i < clock->ptp_info.n_pins; i++) {
+ snprintf(clock->ptp_info.pin_config[i].name,
+ sizeof(clock->ptp_info.pin_config[i].name),
+ "mlx5_pps%d", i);
+ clock->ptp_info.pin_config[i].index = i;
+ clock->ptp_info.pin_config[i].func = PTP_PF_NONE;
+ clock->ptp_info.pin_config[i].chan = i;
+ }
+
+ return 0;
+}
+
+static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+ u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+
+ mlx5_query_mtpps(mdev, out, sizeof(out));
+
+ clock->ptp_info.n_pins = MLX5_GET(mtpps_reg, out,
+ cap_number_of_pps_pins);
+ clock->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out,
+ cap_max_num_of_pps_in_pins);
+ clock->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out,
+ cap_max_num_of_pps_out_pins);
+
+ clock->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode);
+ clock->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode);
+ clock->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode);
+ clock->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode);
+ clock->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode);
+ clock->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode);
+ clock->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode);
+ clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
+}
+
+void mlx5_pps_event(struct mlx5_core_dev *mdev,
+ struct mlx5_eqe *eqe)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+ struct ptp_clock_event ptp_event;
+ struct timespec64 ts;
+ u64 nsec_now, nsec_delta;
+ u64 cycles_now, cycles_delta;
+ int pin = eqe->data.pps.pin;
+ s64 ns;
+ unsigned long flags;
+
+ switch (clock->ptp_info.pin_config[pin].func) {
+ case PTP_PF_EXTTS:
+ ptp_event.index = pin;
+ ptp_event.timestamp =
+ mlx5_timecounter_cyc2time(clock,
+ be64_to_cpu(eqe->data.pps.time_stamp));
+ if (clock->pps_info.enabled) {
+ ptp_event.type = PTP_CLOCK_PPSUSR;
+ ptp_event.pps_times.ts_real =
+ ns_to_timespec64(ptp_event.timestamp);
+ } else {
+ ptp_event.type = PTP_CLOCK_EXTTS;
+ }
+ ptp_clock_event(clock->ptp, &ptp_event);
+ break;
+ case PTP_PF_PEROUT:
+ mlx5_ptp_gettime(&clock->ptp_info, &ts);
+ cycles_now = mlx5_read_internal_timer(mdev);
+ ts.tv_sec += 1;
+ ts.tv_nsec = 0;
+ ns = timespec64_to_ns(&ts);
+ write_lock_irqsave(&clock->lock, flags);
+ nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
+ nsec_delta = ns - nsec_now;
+ cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
+ clock->cycles.mult);
+ clock->pps_info.start[pin] = cycles_now + cycles_delta;
+ schedule_work(&clock->pps_info.out_work);
+ write_unlock_irqrestore(&clock->lock, flags);
+ break;
+ default:
+ mlx5_core_err(mdev, " Unhandled event\n");
+ }
+}
+
+void mlx5_init_clock(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+ u64 overflow_cycles;
+ u64 ns;
+ u64 frac = 0;
+ u32 dev_freq;
+
+ dev_freq = MLX5_CAP_GEN(mdev, device_frequency_khz);
+ if (!dev_freq) {
+ mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
+ return;
+ }
+ rwlock_init(&clock->lock);
+ clock->cycles.read = read_internal_timer;
+ clock->cycles.shift = MLX5_CYCLES_SHIFT;
+ clock->cycles.mult = clocksource_khz2mult(dev_freq,
+ clock->cycles.shift);
+ clock->nominal_c_mult = clock->cycles.mult;
+ clock->cycles.mask = CLOCKSOURCE_MASK(41);
+ clock->mdev = mdev;
+
+ timecounter_init(&clock->tc, &clock->cycles,
+ ktime_to_ns(ktime_get_real()));
+
+ /* Calculate period in seconds to call the overflow watchdog - to make
+ * sure counter is checked at least twice every wrap around.
+ * The period is calculated as the minimum between max HW cycles count
+ * (The clock source mask) and max amount of cycles that can be
+ * multiplied by clock multiplier where the result doesn't exceed
+ * 64bits.
+ */
+ overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult);
+ overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3));
+
+ ns = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles,
+ frac, &frac);
+ do_div(ns, NSEC_PER_SEC / HZ);
+ clock->overflow_period = ns;
+
+ mdev->clock_info_page = alloc_page(GFP_KERNEL);
+ if (mdev->clock_info_page) {
+ mdev->clock_info = kmap(mdev->clock_info_page);
+ if (!mdev->clock_info) {
+ __free_page(mdev->clock_info_page);
+ mlx5_core_warn(mdev, "failed to map clock page\n");
+ } else {
+ mdev->clock_info->sign = 0;
+ mdev->clock_info->nsec = clock->tc.nsec;
+ mdev->clock_info->cycles = clock->tc.cycle_last;
+ mdev->clock_info->mask = clock->cycles.mask;
+ mdev->clock_info->mult = clock->nominal_c_mult;
+ mdev->clock_info->shift = clock->cycles.shift;
+ mdev->clock_info->frac = clock->tc.frac;
+ mdev->clock_info->overflow_period =
+ clock->overflow_period;
+ }
+ }
+
+ INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out);
+ INIT_DELAYED_WORK(&clock->overflow_work, mlx5_timestamp_overflow);
+ if (clock->overflow_period)
+ schedule_delayed_work(&clock->overflow_work, 0);
+ else
+ mlx5_core_warn(mdev, "invalid overflow period, overflow_work is not scheduled\n");
+
+ /* Configure the PHC */
+ clock->ptp_info = mlx5_ptp_clock_info;
+
+ /* Initialize 1PPS data structures */
+ if (MLX5_PPS_CAP(mdev))
+ mlx5_get_pps_caps(mdev);
+ if (clock->ptp_info.n_pins)
+ mlx5_init_pin_config(clock);
+
+ clock->ptp = ptp_clock_register(&clock->ptp_info,
+ &mdev->pdev->dev);
+ if (IS_ERR(clock->ptp)) {
+ mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n",
+ PTR_ERR(clock->ptp));
+ clock->ptp = NULL;
+ }
+}
+
+void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+
+ if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
+ return;
+
+ if (clock->ptp) {
+ ptp_clock_unregister(clock->ptp);
+ clock->ptp = NULL;
+ }
+
+ cancel_work_sync(&clock->pps_info.out_work);
+ cancel_delayed_work_sync(&clock->overflow_work);
+
+ if (mdev->clock_info) {
+ kunmap(mdev->clock_info_page);
+ __free_page(mdev->clock_info_page);
+ mdev->clock_info = NULL;
+ }
+
+ kfree(clock->ptp_info.pin_config);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
new file mode 100644
index 000000000..02e2e4575
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_CLOCK_H__
+#define __LIB_CLOCK_H__
+
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
+void mlx5_init_clock(struct mlx5_core_dev *mdev);
+void mlx5_cleanup_clock(struct mlx5_core_dev *mdev);
+void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
+
+static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
+{
+ return mdev->clock.ptp ? ptp_clock_index(mdev->clock.ptp) : -1;
+}
+
+static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
+ u64 timestamp)
+{
+ u64 nsec;
+
+ read_lock(&clock->lock);
+ nsec = timecounter_cyc2time(&clock->tc, timestamp);
+ read_unlock(&clock->lock);
+
+ return ns_to_ktime(nsec);
+}
+
+#else
+static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {}
+static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {}
+static inline void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) {}
+
+static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
+{
+ return -1;
+}
+
+static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
+ u64 timestamp)
+{
+ return 0;
+}
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
new file mode 100644
index 000000000..7722a3f9b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/etherdevice.h>
+#include <linux/idr.h>
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+
+void mlx5_init_reserved_gids(struct mlx5_core_dev *dev)
+{
+ unsigned int tblsz = MLX5_CAP_ROCE(dev, roce_address_table_size);
+
+ ida_init(&dev->roce.reserved_gids.ida);
+ dev->roce.reserved_gids.start = tblsz;
+ dev->roce.reserved_gids.count = 0;
+}
+
+void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev)
+{
+ WARN_ON(!ida_is_empty(&dev->roce.reserved_gids.ida));
+ dev->roce.reserved_gids.start = 0;
+ dev->roce.reserved_gids.count = 0;
+ ida_destroy(&dev->roce.reserved_gids.ida);
+}
+
+int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count)
+{
+ if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+ mlx5_core_err(dev, "Cannot reserve GIDs when interfaces are up\n");
+ return -EPERM;
+ }
+ if (dev->roce.reserved_gids.start < count) {
+ mlx5_core_warn(dev, "GID table exhausted attempting to reserve %d more GIDs\n",
+ count);
+ return -ENOMEM;
+ }
+ if (dev->roce.reserved_gids.count + count > MLX5_MAX_RESERVED_GIDS) {
+ mlx5_core_warn(dev, "Unable to reserve %d more GIDs\n", count);
+ return -ENOMEM;
+ }
+
+ dev->roce.reserved_gids.start -= count;
+ dev->roce.reserved_gids.count += count;
+ mlx5_core_dbg(dev, "Reserved %u GIDs starting at %u\n",
+ dev->roce.reserved_gids.count,
+ dev->roce.reserved_gids.start);
+ return 0;
+}
+
+void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count)
+{
+ WARN(test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state), "Unreserving GIDs when interfaces are up");
+ WARN(count > dev->roce.reserved_gids.count, "Unreserving %u GIDs when only %u reserved",
+ count, dev->roce.reserved_gids.count);
+
+ dev->roce.reserved_gids.start += count;
+ dev->roce.reserved_gids.count -= count;
+ mlx5_core_dbg(dev, "%u GIDs starting at %u left reserved\n",
+ dev->roce.reserved_gids.count,
+ dev->roce.reserved_gids.start);
+}
+
+int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index)
+{
+ int end = dev->roce.reserved_gids.start +
+ dev->roce.reserved_gids.count;
+ int index = 0;
+
+ index = ida_simple_get(&dev->roce.reserved_gids.ida,
+ dev->roce.reserved_gids.start, end,
+ GFP_KERNEL);
+ if (index < 0)
+ return index;
+
+ mlx5_core_dbg(dev, "Allocating reserved GID %u\n", index);
+ *gid_index = index;
+ return 0;
+}
+
+void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index)
+{
+ mlx5_core_dbg(dev, "Freeing reserved GID %u\n", gid_index);
+ ida_simple_remove(&dev->roce.reserved_gids.ida, gid_index);
+}
+
+unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev)
+{
+ return dev->roce.reserved_gids.count;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_reserved_gids_count);
+
+int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
+ u8 roce_version, u8 roce_l3_type, const u8 *gid,
+ const u8 *mac, bool vlan, u16 vlan_id, u8 port_num)
+{
+#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
+ u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
+ void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
+ char *addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, in_addr,
+ source_l3_address);
+ void *addr_mac = MLX5_ADDR_OF(roce_addr_layout, in_addr,
+ source_mac_47_32);
+ int gidsz = MLX5_FLD_SZ_BYTES(roce_addr_layout, source_l3_address);
+
+ if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return -EINVAL;
+
+ if (gid) {
+ if (vlan) {
+ MLX5_SET_RA(in_addr, vlan_valid, 1);
+ MLX5_SET_RA(in_addr, vlan_id, vlan_id);
+ }
+
+ ether_addr_copy(addr_mac, mac);
+ MLX5_SET_RA(in_addr, roce_version, roce_version);
+ MLX5_SET_RA(in_addr, roce_l3_type, roce_l3_type);
+ memcpy(addr_l3_addr, gid, gidsz);
+ }
+
+ if (MLX5_CAP_GEN(dev, num_vhca_ports) > 0)
+ MLX5_SET(set_roce_address_in, in, vhca_port_num, port_num);
+
+ MLX5_SET(set_roce_address_in, in, roce_address_index, index);
+ MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_roce_gid_set);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
new file mode 100644
index 000000000..7550b1cc8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_MLX5_H__
+#define __LIB_MLX5_H__
+
+void mlx5_init_reserved_gids(struct mlx5_core_dev *dev);
+void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev);
+int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
+void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
+int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
+void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
new file mode 100644
index 000000000..98359559c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_core.h"
+#include "lib/mpfs.h"
+
+/* HW L2 Table (MPFS) management */
+static int set_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac)
+{
+ u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0};
+ u8 *in_mac_addr;
+
+ MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
+ MLX5_SET(set_l2_table_entry_in, in, table_index, index);
+
+ in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address);
+ ether_addr_copy(&in_mac_addr[2], mac);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
+{
+ u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0};
+
+ MLX5_SET(delete_l2_table_entry_in, in, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
+ MLX5_SET(delete_l2_table_entry_in, in, table_index, index);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+/* UC L2 table hash node */
+struct l2table_node {
+ struct l2addr_node node;
+ u32 index; /* index in HW l2 table */
+};
+
+struct mlx5_mpfs {
+ struct hlist_head hash[MLX5_L2_ADDR_HASH_SIZE];
+ struct mutex lock; /* Synchronize l2 table access */
+ u32 size;
+ unsigned long *bitmap;
+};
+
+static int alloc_l2table_index(struct mlx5_mpfs *l2table, u32 *ix)
+{
+ int err = 0;
+
+ *ix = find_first_zero_bit(l2table->bitmap, l2table->size);
+ if (*ix >= l2table->size)
+ err = -ENOSPC;
+ else
+ __set_bit(*ix, l2table->bitmap);
+
+ return err;
+}
+
+static void free_l2table_index(struct mlx5_mpfs *l2table, u32 ix)
+{
+ __clear_bit(ix, l2table->bitmap);
+}
+
+int mlx5_mpfs_init(struct mlx5_core_dev *dev)
+{
+ int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
+ struct mlx5_mpfs *mpfs;
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return 0;
+
+ mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL);
+ if (!mpfs)
+ return -ENOMEM;
+
+ mutex_init(&mpfs->lock);
+ mpfs->size = l2table_size;
+ mpfs->bitmap = kcalloc(BITS_TO_LONGS(l2table_size),
+ sizeof(uintptr_t), GFP_KERNEL);
+ if (!mpfs->bitmap) {
+ kfree(mpfs);
+ return -ENOMEM;
+ }
+
+ dev->priv.mpfs = mpfs;
+ return 0;
+}
+
+void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_mpfs *mpfs = dev->priv.mpfs;
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return;
+
+ WARN_ON(!hlist_empty(mpfs->hash));
+ kfree(mpfs->bitmap);
+ kfree(mpfs);
+}
+
+int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
+{
+ struct mlx5_mpfs *mpfs = dev->priv.mpfs;
+ struct l2table_node *l2addr;
+ u32 index;
+ int err;
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return 0;
+
+ mutex_lock(&mpfs->lock);
+
+ l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node);
+ if (l2addr) {
+ err = -EEXIST;
+ goto abort;
+ }
+
+ err = alloc_l2table_index(mpfs, &index);
+ if (err)
+ goto abort;
+
+ l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL);
+ if (!l2addr) {
+ free_l2table_index(mpfs, index);
+ err = -ENOMEM;
+ goto abort;
+ }
+
+ l2addr->index = index;
+ err = set_l2table_entry_cmd(dev, index, mac);
+ if (err) {
+ l2addr_hash_del(l2addr);
+ free_l2table_index(mpfs, index);
+ }
+
+ mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index);
+abort:
+ mutex_unlock(&mpfs->lock);
+ return err;
+}
+
+int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac)
+{
+ struct mlx5_mpfs *mpfs = dev->priv.mpfs;
+ struct l2table_node *l2addr;
+ int err = 0;
+ u32 index;
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return 0;
+
+ mutex_lock(&mpfs->lock);
+
+ l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node);
+ if (!l2addr) {
+ err = -ENOENT;
+ goto unlock;
+ }
+
+ index = l2addr->index;
+ del_l2table_entry_cmd(dev, index);
+ l2addr_hash_del(l2addr);
+ free_l2table_index(mpfs, index);
+ mlx5_core_dbg(dev, "MPFS mac deleted %pM, index (%d)\n", mac, index);
+unlock:
+ mutex_unlock(&mpfs->lock);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
new file mode 100644
index 000000000..4a7b2c320
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_MPFS_H__
+#define __MLX5_MPFS_H__
+
+#include <linux/if_ether.h>
+#include <linux/mlx5/device.h>
+
+/* L2 -mac address based- hash helpers */
+#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE))
+#define MLX5_L2_ADDR_HASH(addr) (addr[5])
+
+struct l2addr_node {
+ struct hlist_node hlist;
+ u8 addr[ETH_ALEN];
+};
+
+#define for_each_l2hash_node(hn, tmp, hash, i) \
+ for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \
+ hlist_for_each_entry_safe(hn, tmp, &(hash)[i], hlist)
+
+#define l2addr_hash_find(hash, mac, type) ({ \
+ int ix = MLX5_L2_ADDR_HASH(mac); \
+ bool found = false; \
+ type *ptr = NULL; \
+ \
+ hlist_for_each_entry(ptr, &(hash)[ix], node.hlist) \
+ if (ether_addr_equal(ptr->node.addr, mac)) {\
+ found = true; \
+ break; \
+ } \
+ if (!found) \
+ ptr = NULL; \
+ ptr; \
+})
+
+#define l2addr_hash_add(hash, mac, type, gfp) ({ \
+ int ix = MLX5_L2_ADDR_HASH(mac); \
+ type *ptr = NULL; \
+ \
+ ptr = kzalloc(sizeof(type), gfp); \
+ if (ptr) { \
+ ether_addr_copy(ptr->node.addr, mac); \
+ hlist_add_head(&ptr->node.hlist, &(hash)[ix]);\
+ } \
+ ptr; \
+})
+
+#define l2addr_hash_del(ptr) ({ \
+ hlist_del(&(ptr)->node.hlist); \
+ kfree(ptr); \
+})
+
+#ifdef CONFIG_MLX5_MPFS
+int mlx5_mpfs_init(struct mlx5_core_dev *dev);
+void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev);
+int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac);
+int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac);
+#else /* #ifndef CONFIG_MLX5_MPFS */
+static inline int mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; }
+static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {}
+static inline int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; }
+static inline int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; }
+#endif
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
new file mode 100644
index 000000000..9a8fd7621
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "vxlan.h"
+
+struct mlx5_vxlan {
+ struct mlx5_core_dev *mdev;
+ spinlock_t lock; /* protect vxlan table */
+ /* max_num_ports is usuallly 4, 16 buckets is more than enough */
+ DECLARE_HASHTABLE(htable, 4);
+ int num_ports;
+ struct mutex sync_lock; /* sync add/del port HW operations */
+};
+
+struct mlx5_vxlan_port {
+ struct hlist_node hlist;
+ atomic_t refcount;
+ u16 udp_port;
+};
+
+static inline u8 mlx5_vxlan_max_udp_ports(struct mlx5_core_dev *mdev)
+{
+ return MLX5_CAP_ETH(mdev, max_vxlan_udp_ports) ?: 4;
+}
+
+static int mlx5_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
+{
+ u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0};
+
+ MLX5_SET(add_vxlan_udp_dport_in, in, opcode,
+ MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT);
+ MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port);
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port)
+{
+ u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)] = {0};
+
+ MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
+ MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
+ MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port);
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static struct mlx5_vxlan_port*
+mlx5_vxlan_lookup_port_locked(struct mlx5_vxlan *vxlan, u16 port)
+{
+ struct mlx5_vxlan_port *vxlanp;
+
+ hash_for_each_possible(vxlan->htable, vxlanp, hlist, port) {
+ if (vxlanp->udp_port == port)
+ return vxlanp;
+ }
+
+ return NULL;
+}
+
+struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+ struct mlx5_vxlan_port *vxlanp;
+
+ if (!mlx5_vxlan_allowed(vxlan))
+ return NULL;
+
+ spin_lock_bh(&vxlan->lock);
+ vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port);
+ spin_unlock_bh(&vxlan->lock);
+
+ return vxlanp;
+}
+
+int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+ struct mlx5_vxlan_port *vxlanp;
+ int ret = -ENOSPC;
+
+ vxlanp = mlx5_vxlan_lookup_port(vxlan, port);
+ if (vxlanp) {
+ atomic_inc(&vxlanp->refcount);
+ return 0;
+ }
+
+ mutex_lock(&vxlan->sync_lock);
+ if (vxlan->num_ports >= mlx5_vxlan_max_udp_ports(vxlan->mdev)) {
+ mlx5_core_info(vxlan->mdev,
+ "UDP port (%d) not offloaded, max number of UDP ports (%d) are already offloaded\n",
+ port, mlx5_vxlan_max_udp_ports(vxlan->mdev));
+ ret = -ENOSPC;
+ goto unlock;
+ }
+
+ ret = mlx5_vxlan_core_add_port_cmd(vxlan->mdev, port);
+ if (ret)
+ goto unlock;
+
+ vxlanp = kzalloc(sizeof(*vxlanp), GFP_KERNEL);
+ if (!vxlanp) {
+ ret = -ENOMEM;
+ goto err_delete_port;
+ }
+
+ vxlanp->udp_port = port;
+ atomic_set(&vxlanp->refcount, 1);
+
+ spin_lock_bh(&vxlan->lock);
+ hash_add(vxlan->htable, &vxlanp->hlist, port);
+ spin_unlock_bh(&vxlan->lock);
+
+ vxlan->num_ports++;
+ mutex_unlock(&vxlan->sync_lock);
+ return 0;
+
+err_delete_port:
+ mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
+
+unlock:
+ mutex_unlock(&vxlan->sync_lock);
+ return ret;
+}
+
+int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+ struct mlx5_vxlan_port *vxlanp;
+ bool remove = false;
+ int ret = 0;
+
+ mutex_lock(&vxlan->sync_lock);
+
+ spin_lock_bh(&vxlan->lock);
+ vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port);
+ if (!vxlanp) {
+ ret = -ENOENT;
+ goto out_unlock;
+ }
+
+ if (atomic_dec_and_test(&vxlanp->refcount)) {
+ hash_del(&vxlanp->hlist);
+ remove = true;
+ }
+
+out_unlock:
+ spin_unlock_bh(&vxlan->lock);
+
+ if (remove) {
+ mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
+ kfree(vxlanp);
+ vxlan->num_ports--;
+ }
+
+ mutex_unlock(&vxlan->sync_lock);
+
+ return ret;
+}
+
+struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_vxlan *vxlan;
+
+ if (!MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) || !mlx5_core_is_pf(mdev))
+ return ERR_PTR(-ENOTSUPP);
+
+ vxlan = kzalloc(sizeof(*vxlan), GFP_KERNEL);
+ if (!vxlan)
+ return ERR_PTR(-ENOMEM);
+
+ vxlan->mdev = mdev;
+ mutex_init(&vxlan->sync_lock);
+ spin_lock_init(&vxlan->lock);
+ hash_init(vxlan->htable);
+
+ /* Hardware adds 4789 by default */
+ mlx5_vxlan_add_port(vxlan, 4789);
+
+ return vxlan;
+}
+
+void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan)
+{
+ struct mlx5_vxlan_port *vxlanp;
+ struct hlist_node *tmp;
+ int bkt;
+
+ if (!mlx5_vxlan_allowed(vxlan))
+ return;
+
+ /* Lockless since we are the only hash table consumers*/
+ hash_for_each_safe(vxlan->htable, bkt, tmp, vxlanp, hlist) {
+ hash_del(&vxlanp->hlist);
+ mlx5_vxlan_core_del_port_cmd(vxlan->mdev, vxlanp->udp_port);
+ kfree(vxlanp);
+ }
+
+ kfree(vxlan);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
new file mode 100644
index 000000000..8fb0eb08f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_VXLAN_H__
+#define __MLX5_VXLAN_H__
+
+#include <linux/mlx5/driver.h>
+
+struct mlx5_vxlan;
+struct mlx5_vxlan_port;
+
+static inline bool mlx5_vxlan_allowed(struct mlx5_vxlan *vxlan)
+{
+ /* not allowed reason is encoded in vxlan pointer as error,
+ * on mlx5_vxlan_create
+ */
+ return !IS_ERR_OR_NULL(vxlan);
+}
+
+#if IS_ENABLED(CONFIG_VXLAN)
+struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev);
+void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan);
+int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port);
+int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port);
+struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port);
+#else
+static inline struct mlx5_vxlan*
+mlx5_vxlan_create(struct mlx5_core_dev *mdev) { return ERR_PTR(-EOPNOTSUPP); }
+static inline void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) { return; }
+static inline int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; }
+static inline int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; }
+static inline struct mx5_vxlan_port*
+mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port) { return NULL; }
+#endif
+
+#endif /* __MLX5_VXLAN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
new file mode 100644
index 000000000..3a3b0005f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
+ u16 opmod, u8 port)
+{
+ int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
+ int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in);
+ int err = -ENOMEM;
+ void *data;
+ void *resp;
+ u32 *out;
+ u32 *in;
+
+ in = kzalloc(inlen, GFP_KERNEL);
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!in || !out)
+ goto out;
+
+ MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
+ MLX5_SET(mad_ifc_in, in, op_mod, opmod);
+ MLX5_SET(mad_ifc_in, in, port, port);
+
+ data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
+ memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+ if (err)
+ goto out;
+
+ resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet);
+ memcpy(outb, resp,
+ MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet));
+
+out:
+ kfree(out);
+ kfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
new file mode 100644
index 000000000..a2b25afa2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -0,0 +1,1721 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/io-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/srq.h>
+#include <linux/debugfs.h>
+#include <linux/kmod.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
+#include <linux/version.h>
+#include <net/devlink.h>
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "lib/mpfs.h"
+#include "eswitch.h"
+#include "lib/mlx5.h"
+#include "fpga/core.h"
+#include "fpga/ipsec.h"
+#include "accel/ipsec.h"
+#include "accel/tls.h"
+#include "lib/clock.h"
+#include "lib/vxlan.h"
+#include "diag/fw_tracer.h"
+
+MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRIVER_VERSION);
+
+unsigned int mlx5_core_debug_mask;
+module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644);
+MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
+
+#define MLX5_DEFAULT_PROF 2
+static unsigned int prof_sel = MLX5_DEFAULT_PROF;
+module_param_named(prof_sel, prof_sel, uint, 0444);
+MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
+
+static u32 sw_owner_id[4];
+
+enum {
+ MLX5_ATOMIC_REQ_MODE_BE = 0x0,
+ MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
+};
+
+static struct mlx5_profile profile[] = {
+ [0] = {
+ .mask = 0,
+ },
+ [1] = {
+ .mask = MLX5_PROF_MASK_QP_SIZE,
+ .log_max_qp = 12,
+ },
+ [2] = {
+ .mask = MLX5_PROF_MASK_QP_SIZE |
+ MLX5_PROF_MASK_MR_CACHE,
+ .log_max_qp = 18,
+ .mr_cache[0] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[1] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[2] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[3] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[4] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[5] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[6] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[7] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[8] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[9] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[10] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[11] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[12] = {
+ .size = 64,
+ .limit = 32
+ },
+ .mr_cache[13] = {
+ .size = 32,
+ .limit = 16
+ },
+ .mr_cache[14] = {
+ .size = 16,
+ .limit = 8
+ },
+ .mr_cache[15] = {
+ .size = 8,
+ .limit = 4
+ },
+ },
+};
+
+#define FW_INIT_TIMEOUT_MILI 2000
+#define FW_INIT_WAIT_MS 2
+#define FW_PRE_INIT_TIMEOUT_MILI 10000
+
+static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
+{
+ unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
+ int err = 0;
+
+ while (fw_initializing(dev)) {
+ if (time_after(jiffies, end)) {
+ err = -EBUSY;
+ break;
+ }
+ msleep(FW_INIT_WAIT_MS);
+ }
+
+ return err;
+}
+
+static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
+{
+ int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in,
+ driver_version);
+ u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {0};
+ u8 out[MLX5_ST_SZ_BYTES(set_driver_version_out)] = {0};
+ int remaining_size = driver_ver_sz;
+ char *string;
+
+ if (!MLX5_CAP_GEN(dev, driver_version))
+ return;
+
+ string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version);
+
+ strncpy(string, "Linux", remaining_size);
+
+ remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+ strncat(string, ",", remaining_size);
+
+ remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+ strncat(string, DRIVER_NAME, remaining_size);
+
+ remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+ strncat(string, ",", remaining_size);
+
+ remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+
+ snprintf(string + strlen(string), remaining_size, "%u.%u.%u",
+ (u8)((LINUX_VERSION_CODE >> 16) & 0xff), (u8)((LINUX_VERSION_CODE >> 8) & 0xff),
+ (u16)(LINUX_VERSION_CODE & 0xffff));
+
+ /*Send the command*/
+ MLX5_SET(set_driver_version_in, in, opcode,
+ MLX5_CMD_OP_SET_DRIVER_VERSION);
+
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int set_dma_caps(struct pci_dev *pdev)
+{
+ int err;
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
+ return err;
+ }
+ }
+
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_warn(&pdev->dev,
+ "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pdev->dev,
+ "Can't set consistent PCI DMA mask, aborting\n");
+ return err;
+ }
+ }
+
+ dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
+ return err;
+}
+
+static int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
+{
+ struct pci_dev *pdev = dev->pdev;
+ int err = 0;
+
+ mutex_lock(&dev->pci_status_mutex);
+ if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) {
+ err = pci_enable_device(pdev);
+ if (!err)
+ dev->pci_status = MLX5_PCI_STATUS_ENABLED;
+ }
+ mutex_unlock(&dev->pci_status_mutex);
+
+ return err;
+}
+
+static void mlx5_pci_disable_device(struct mlx5_core_dev *dev)
+{
+ struct pci_dev *pdev = dev->pdev;
+
+ mutex_lock(&dev->pci_status_mutex);
+ if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) {
+ pci_disable_device(pdev);
+ dev->pci_status = MLX5_PCI_STATUS_DISABLED;
+ }
+ mutex_unlock(&dev->pci_status_mutex);
+}
+
+static int request_bar(struct pci_dev *pdev)
+{
+ int err = 0;
+
+ if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+ dev_err(&pdev->dev, "Missing registers BAR, aborting\n");
+ return -ENODEV;
+ }
+
+ err = pci_request_regions(pdev, DRIVER_NAME);
+ if (err)
+ dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
+
+ return err;
+}
+
+static void release_bar(struct pci_dev *pdev)
+{
+ pci_release_regions(pdev);
+}
+
+static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ struct mlx5_eq_table *table = &priv->eq_table;
+ int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+ MLX5_CAP_GEN(dev, max_num_eqs) :
+ 1 << MLX5_CAP_GEN(dev, log_max_eq);
+ int nvec;
+ int err;
+
+ nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
+ MLX5_EQ_VEC_COMP_BASE;
+ nvec = min_t(int, nvec, num_eqs);
+ if (nvec <= MLX5_EQ_VEC_COMP_BASE)
+ return -ENOMEM;
+
+ priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL);
+ if (!priv->irq_info)
+ return -ENOMEM;
+
+ nvec = pci_alloc_irq_vectors(dev->pdev,
+ MLX5_EQ_VEC_COMP_BASE + 1, nvec,
+ PCI_IRQ_MSIX);
+ if (nvec < 0) {
+ err = nvec;
+ goto err_free_irq_info;
+ }
+
+ table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
+
+ return 0;
+
+err_free_irq_info:
+ kfree(priv->irq_info);
+ return err;
+}
+
+static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+
+ pci_free_irq_vectors(dev->pdev);
+ kfree(priv->irq_info);
+}
+
+struct mlx5_reg_host_endianness {
+ u8 he;
+ u8 rsvd[15];
+};
+
+#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
+
+enum {
+ MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
+ MLX5_DEV_CAP_FLAG_DCT,
+};
+
+static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size)
+{
+ switch (size) {
+ case 128:
+ return 0;
+ case 256:
+ return 1;
+ case 512:
+ return 2;
+ case 1024:
+ return 3;
+ case 2048:
+ return 4;
+ case 4096:
+ return 5;
+ default:
+ mlx5_core_warn(dev, "invalid pkey table size %d\n", size);
+ return 0;
+ }
+}
+
+static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
+ enum mlx5_cap_type cap_type,
+ enum mlx5_cap_mode cap_mode)
+{
+ u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
+ int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ void *out, *hca_caps;
+ u16 opmod = (cap_type << 1) | (cap_mode & 0x01);
+ int err;
+
+ memset(in, 0, sizeof(in));
+ out = kzalloc(out_sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+ if (err) {
+ mlx5_core_warn(dev,
+ "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
+ cap_type, cap_mode, err);
+ goto query_ex;
+ }
+
+ hca_caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
+
+ switch (cap_mode) {
+ case HCA_CAP_OPMOD_GET_MAX:
+ memcpy(dev->caps.hca_max[cap_type], hca_caps,
+ MLX5_UN_SZ_BYTES(hca_cap_union));
+ break;
+ case HCA_CAP_OPMOD_GET_CUR:
+ memcpy(dev->caps.hca_cur[cap_type], hca_caps,
+ MLX5_UN_SZ_BYTES(hca_cap_union));
+ break;
+ default:
+ mlx5_core_warn(dev,
+ "Tried to query dev cap type(%x) with wrong opmode(%x)\n",
+ cap_type, cap_mode);
+ err = -EINVAL;
+ break;
+ }
+query_ex:
+ kfree(out);
+ return err;
+}
+
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
+{
+ int ret;
+
+ ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
+ if (ret)
+ return ret;
+ return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
+}
+
+static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
+{
+ u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0};
+
+ MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+ MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
+ return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+}
+
+static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
+{
+ void *set_ctx;
+ void *set_hca_cap;
+ int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+ int req_endianness;
+ int err;
+
+ if (MLX5_CAP_GEN(dev, atomic)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
+ if (err)
+ return err;
+ } else {
+ return 0;
+ }
+
+ req_endianness =
+ MLX5_CAP_ATOMIC(dev,
+ supported_atomic_req_8B_endianness_mode_1);
+
+ if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
+ return 0;
+
+ set_ctx = kzalloc(set_sz, GFP_KERNEL);
+ if (!set_ctx)
+ return -ENOMEM;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+
+ /* Set requestor to host endianness */
+ MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode,
+ MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
+
+ err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
+
+ kfree(set_ctx);
+ return err;
+}
+
+static int handle_hca_cap(struct mlx5_core_dev *dev)
+{
+ void *set_ctx = NULL;
+ struct mlx5_profile *prof = dev->profile;
+ int err = -ENOMEM;
+ int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+ void *set_hca_cap;
+
+ set_ctx = kzalloc(set_sz, GFP_KERNEL);
+ if (!set_ctx)
+ goto query_ex;
+
+ err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
+ if (err)
+ goto query_ex;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
+ capability);
+ memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_GENERAL],
+ MLX5_ST_SZ_BYTES(cmd_hca_cap));
+
+ mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
+ mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)),
+ 128);
+ /* we limit the size of the pkey table to 128 entries for now */
+ MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
+ to_fw_pkey_sz(dev, 128));
+
+ /* Check log_max_qp from HCA caps to set in current profile */
+ if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) {
+ mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n",
+ profile[prof_sel].log_max_qp,
+ MLX5_CAP_GEN_MAX(dev, log_max_qp));
+ profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
+ }
+ if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
+ MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
+ prof->log_max_qp);
+
+ /* disable cmdif checksum */
+ MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
+
+ /* Enable 4K UAR only when HCA supports it and page size is bigger
+ * than 4K.
+ */
+ if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096)
+ MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1);
+
+ MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
+
+ if (MLX5_CAP_GEN_MAX(dev, cache_line_128byte))
+ MLX5_SET(cmd_hca_cap,
+ set_hca_cap,
+ cache_line_128byte,
+ cache_line_size() >= 128 ? 1 : 0);
+
+ if (MLX5_CAP_GEN_MAX(dev, dct))
+ MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1);
+
+ if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports))
+ MLX5_SET(cmd_hca_cap,
+ set_hca_cap,
+ num_vhca_ports,
+ MLX5_CAP_GEN_MAX(dev, num_vhca_ports));
+
+ err = set_caps(dev, set_ctx, set_sz,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
+
+query_ex:
+ kfree(set_ctx);
+ return err;
+}
+
+static int set_hca_ctrl(struct mlx5_core_dev *dev)
+{
+ struct mlx5_reg_host_endianness he_in;
+ struct mlx5_reg_host_endianness he_out;
+ int err;
+
+ if (!mlx5_core_is_pf(dev))
+ return 0;
+
+ memset(&he_in, 0, sizeof(he_in));
+ he_in.he = MLX5_SET_HOST_ENDIANNESS;
+ err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in),
+ &he_out, sizeof(he_out),
+ MLX5_REG_HOST_ENDIANNESS, 0, 1);
+ return err;
+}
+
+static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev)
+{
+ int ret = 0;
+
+ /* Disable local_lb by default */
+ if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
+ ret = mlx5_nic_vport_update_local_lb(dev, false);
+
+ return ret;
+}
+
+int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
+{
+ u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {0};
+
+ MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
+ MLX5_SET(enable_hca_in, in, function_id, func_id);
+ return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+}
+
+int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
+{
+ u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {0};
+
+ MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
+ MLX5_SET(disable_hca_in, in, function_id, func_id);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
+{
+ u32 timer_h, timer_h1, timer_l;
+
+ timer_h = ioread32be(&dev->iseg->internal_timer_h);
+ timer_l = ioread32be(&dev->iseg->internal_timer_l);
+ timer_h1 = ioread32be(&dev->iseg->internal_timer_h);
+ if (timer_h != timer_h1) /* wrap around */
+ timer_l = ioread32be(&dev->iseg->internal_timer_l);
+
+ return (u64)timer_l | (u64)timer_h1 << 32;
+}
+
+static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ struct mlx5_priv *priv = &mdev->priv;
+ int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
+ int irq = pci_irq_vector(mdev->pdev, vecidx);
+
+ if (!zalloc_cpumask_var(&priv->irq_info[vecidx].mask, GFP_KERNEL)) {
+ mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+ return -ENOMEM;
+ }
+
+ cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
+ priv->irq_info[vecidx].mask);
+
+ if (IS_ENABLED(CONFIG_SMP) &&
+ irq_set_affinity_hint(irq, priv->irq_info[vecidx].mask))
+ mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
+
+ return 0;
+}
+
+static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+ int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
+ struct mlx5_priv *priv = &mdev->priv;
+ int irq = pci_irq_vector(mdev->pdev, vecidx);
+
+ irq_set_affinity_hint(irq, NULL);
+ free_cpumask_var(priv->irq_info[vecidx].mask);
+}
+
+static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
+ err = mlx5_irq_set_affinity_hint(mdev, i);
+ if (err)
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ for (i--; i >= 0; i--)
+ mlx5_irq_clear_affinity_hint(mdev, i);
+
+ return err;
+}
+
+static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
+{
+ int i;
+
+ for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
+ mlx5_irq_clear_affinity_hint(mdev, i);
+}
+
+int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
+ unsigned int *irqn)
+{
+ struct mlx5_eq_table *table = &dev->priv.eq_table;
+ struct mlx5_eq *eq, *n;
+ int err = -ENOENT;
+
+ spin_lock(&table->lock);
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+ if (eq->index == vector) {
+ *eqn = eq->eqn;
+ *irqn = eq->irqn;
+ err = 0;
+ break;
+ }
+ }
+ spin_unlock(&table->lock);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_vector2eqn);
+
+struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn)
+{
+ struct mlx5_eq_table *table = &dev->priv.eq_table;
+ struct mlx5_eq *eq;
+
+ spin_lock(&table->lock);
+ list_for_each_entry(eq, &table->comp_eqs_list, list)
+ if (eq->eqn == eqn) {
+ spin_unlock(&table->lock);
+ return eq;
+ }
+
+ spin_unlock(&table->lock);
+
+ return ERR_PTR(-ENOENT);
+}
+
+static void free_comp_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = &dev->priv.eq_table;
+ struct mlx5_eq *eq, *n;
+
+#ifdef CONFIG_RFS_ACCEL
+ if (dev->rmap) {
+ free_irq_cpu_rmap(dev->rmap);
+ dev->rmap = NULL;
+ }
+#endif
+ spin_lock(&table->lock);
+ list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
+ list_del(&eq->list);
+ spin_unlock(&table->lock);
+ if (mlx5_destroy_unmap_eq(dev, eq))
+ mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
+ eq->eqn);
+ kfree(eq);
+ spin_lock(&table->lock);
+ }
+ spin_unlock(&table->lock);
+}
+
+static int alloc_comp_eqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = &dev->priv.eq_table;
+ char name[MLX5_MAX_IRQ_NAME];
+ struct mlx5_eq *eq;
+ int ncomp_vec;
+ int nent;
+ int err;
+ int i;
+
+ INIT_LIST_HEAD(&table->comp_eqs_list);
+ ncomp_vec = table->num_comp_vectors;
+ nent = MLX5_COMP_EQ_SIZE;
+#ifdef CONFIG_RFS_ACCEL
+ dev->rmap = alloc_irq_cpu_rmap(ncomp_vec);
+ if (!dev->rmap)
+ return -ENOMEM;
+#endif
+ for (i = 0; i < ncomp_vec; i++) {
+ eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+ if (!eq) {
+ err = -ENOMEM;
+ goto clean;
+ }
+
+#ifdef CONFIG_RFS_ACCEL
+ irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev,
+ MLX5_EQ_VEC_COMP_BASE + i));
+#endif
+ snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
+ err = mlx5_create_map_eq(dev, eq,
+ i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
+ name, MLX5_EQ_TYPE_COMP);
+ if (err) {
+ kfree(eq);
+ goto clean;
+ }
+ mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
+ eq->index = i;
+ spin_lock(&table->lock);
+ list_add_tail(&eq->list, &table->comp_eqs_list);
+ spin_unlock(&table->lock);
+ }
+
+ return 0;
+
+clean:
+ free_comp_eqs(dev);
+ return err;
+}
+
+static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
+{
+ u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {0};
+ u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {0};
+ u32 sup_issi;
+ int err;
+
+ MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
+ err = mlx5_cmd_exec(dev, query_in, sizeof(query_in),
+ query_out, sizeof(query_out));
+ if (err) {
+ u32 syndrome;
+ u8 status;
+
+ mlx5_cmd_mbox_status(query_out, &status, &syndrome);
+ if (!status || syndrome == MLX5_DRIVER_SYND) {
+ mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n",
+ err, status, syndrome);
+ return err;
+ }
+
+ mlx5_core_warn(dev, "Query ISSI is not supported by FW, ISSI is 0\n");
+ dev->issi = 0;
+ return 0;
+ }
+
+ sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
+
+ if (sup_issi & (1 << 1)) {
+ u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {0};
+ u32 set_out[MLX5_ST_SZ_DW(set_issi_out)] = {0};
+
+ MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
+ MLX5_SET(set_issi_in, set_in, current_issi, 1);
+ err = mlx5_cmd_exec(dev, set_in, sizeof(set_in),
+ set_out, sizeof(set_out));
+ if (err) {
+ mlx5_core_err(dev, "Failed to set ISSI to 1 err(%d)\n",
+ err);
+ return err;
+ }
+
+ dev->issi = 1;
+
+ return 0;
+ } else if (sup_issi & (1 << 0) || !sup_issi) {
+ return 0;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+{
+ struct pci_dev *pdev = dev->pdev;
+ int err = 0;
+
+ pci_set_drvdata(dev->pdev, dev);
+ strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN);
+ priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
+
+ mutex_init(&priv->pgdir_mutex);
+ INIT_LIST_HEAD(&priv->pgdir_list);
+ spin_lock_init(&priv->mkey_lock);
+
+ mutex_init(&priv->alloc_mutex);
+
+ priv->numa_node = dev_to_node(&dev->pdev->dev);
+
+ if (mlx5_debugfs_root)
+ priv->dbg_root =
+ debugfs_create_dir(pci_name(pdev), mlx5_debugfs_root);
+
+ err = mlx5_pci_enable_device(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
+ goto err_dbg;
+ }
+
+ err = request_bar(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "error requesting BARs, aborting\n");
+ goto err_disable;
+ }
+
+ pci_set_master(pdev);
+
+ err = set_dma_caps(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed setting DMA capabilities mask, aborting\n");
+ goto err_clr_master;
+ }
+
+ dev->iseg_base = pci_resource_start(dev->pdev, 0);
+ dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
+ if (!dev->iseg) {
+ err = -ENOMEM;
+ dev_err(&pdev->dev, "Failed mapping initialization segment, aborting\n");
+ goto err_clr_master;
+ }
+
+ return 0;
+
+err_clr_master:
+ pci_clear_master(dev->pdev);
+ release_bar(dev->pdev);
+err_disable:
+ mlx5_pci_disable_device(dev);
+
+err_dbg:
+ debugfs_remove(priv->dbg_root);
+ return err;
+}
+
+static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+{
+ iounmap(dev->iseg);
+ pci_clear_master(dev->pdev);
+ release_bar(dev->pdev);
+ mlx5_pci_disable_device(dev);
+ debugfs_remove_recursive(priv->dbg_root);
+}
+
+static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+{
+ struct pci_dev *pdev = dev->pdev;
+ int err;
+
+ err = mlx5_query_board_id(dev);
+ if (err) {
+ dev_err(&pdev->dev, "query board id failed\n");
+ goto out;
+ }
+
+ err = mlx5_eq_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "failed to initialize eq\n");
+ goto out;
+ }
+
+ err = mlx5_cq_debugfs_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "failed to initialize cq debugfs\n");
+ goto err_eq_cleanup;
+ }
+
+ mlx5_init_qp_table(dev);
+
+ mlx5_init_srq_table(dev);
+
+ mlx5_init_mkey_table(dev);
+
+ mlx5_init_reserved_gids(dev);
+
+ mlx5_init_clock(dev);
+
+ dev->vxlan = mlx5_vxlan_create(dev);
+
+ err = mlx5_init_rl_table(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init rate limiting\n");
+ goto err_tables_cleanup;
+ }
+
+ err = mlx5_mpfs_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init l2 table %d\n", err);
+ goto err_rl_cleanup;
+ }
+
+ err = mlx5_eswitch_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init eswitch %d\n", err);
+ goto err_mpfs_cleanup;
+ }
+
+ err = mlx5_sriov_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init sriov %d\n", err);
+ goto err_eswitch_cleanup;
+ }
+
+ err = mlx5_fpga_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init fpga device %d\n", err);
+ goto err_sriov_cleanup;
+ }
+
+ dev->tracer = mlx5_fw_tracer_create(dev);
+
+ return 0;
+
+err_sriov_cleanup:
+ mlx5_sriov_cleanup(dev);
+err_eswitch_cleanup:
+ mlx5_eswitch_cleanup(dev->priv.eswitch);
+err_mpfs_cleanup:
+ mlx5_mpfs_cleanup(dev);
+err_rl_cleanup:
+ mlx5_cleanup_rl_table(dev);
+err_tables_cleanup:
+ mlx5_vxlan_destroy(dev->vxlan);
+ mlx5_cleanup_mkey_table(dev);
+ mlx5_cleanup_srq_table(dev);
+ mlx5_cleanup_qp_table(dev);
+ mlx5_cq_debugfs_cleanup(dev);
+
+err_eq_cleanup:
+ mlx5_eq_cleanup(dev);
+
+out:
+ return err;
+}
+
+static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
+{
+ mlx5_fw_tracer_destroy(dev->tracer);
+ mlx5_fpga_cleanup(dev);
+ mlx5_sriov_cleanup(dev);
+ mlx5_eswitch_cleanup(dev->priv.eswitch);
+ mlx5_mpfs_cleanup(dev);
+ mlx5_cleanup_rl_table(dev);
+ mlx5_vxlan_destroy(dev->vxlan);
+ mlx5_cleanup_clock(dev);
+ mlx5_cleanup_reserved_gids(dev);
+ mlx5_cleanup_mkey_table(dev);
+ mlx5_cleanup_srq_table(dev);
+ mlx5_cleanup_qp_table(dev);
+ mlx5_cq_debugfs_cleanup(dev);
+ mlx5_eq_cleanup(dev);
+}
+
+static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
+ bool boot)
+{
+ struct pci_dev *pdev = dev->pdev;
+ int err;
+
+ mutex_lock(&dev->intf_state_mutex);
+ if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+ dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n",
+ __func__);
+ goto out;
+ }
+
+ dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
+ fw_rev_min(dev), fw_rev_sub(dev));
+
+ /* Only PFs hold the relevant PCIe information for this query */
+ if (mlx5_core_is_pf(dev))
+ pcie_print_link_status(dev->pdev);
+
+ /* on load removing any previous indication of internal error, device is
+ * up
+ */
+ dev->state = MLX5_DEVICE_STATE_UP;
+
+ /* wait for firmware to accept initialization segments configurations
+ */
+ err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI);
+ if (err) {
+ dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n",
+ FW_PRE_INIT_TIMEOUT_MILI);
+ goto out_err;
+ }
+
+ err = mlx5_cmd_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed initializing command interface, aborting\n");
+ goto out_err;
+ }
+
+ err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
+ if (err) {
+ dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n",
+ FW_INIT_TIMEOUT_MILI);
+ goto err_cmd_cleanup;
+ }
+
+ err = mlx5_core_enable_hca(dev, 0);
+ if (err) {
+ dev_err(&pdev->dev, "enable hca failed\n");
+ goto err_cmd_cleanup;
+ }
+
+ err = mlx5_core_set_issi(dev);
+ if (err) {
+ dev_err(&pdev->dev, "failed to set issi\n");
+ goto err_disable_hca;
+ }
+
+ err = mlx5_satisfy_startup_pages(dev, 1);
+ if (err) {
+ dev_err(&pdev->dev, "failed to allocate boot pages\n");
+ goto err_disable_hca;
+ }
+
+ err = set_hca_ctrl(dev);
+ if (err) {
+ dev_err(&pdev->dev, "set_hca_ctrl failed\n");
+ goto reclaim_boot_pages;
+ }
+
+ err = handle_hca_cap(dev);
+ if (err) {
+ dev_err(&pdev->dev, "handle_hca_cap failed\n");
+ goto reclaim_boot_pages;
+ }
+
+ err = handle_hca_cap_atomic(dev);
+ if (err) {
+ dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
+ goto reclaim_boot_pages;
+ }
+
+ err = mlx5_satisfy_startup_pages(dev, 0);
+ if (err) {
+ dev_err(&pdev->dev, "failed to allocate init pages\n");
+ goto reclaim_boot_pages;
+ }
+
+ err = mlx5_pagealloc_start(dev);
+ if (err) {
+ dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n");
+ goto reclaim_boot_pages;
+ }
+
+ err = mlx5_cmd_init_hca(dev, sw_owner_id);
+ if (err) {
+ dev_err(&pdev->dev, "init hca failed\n");
+ goto err_pagealloc_stop;
+ }
+
+ mlx5_set_driver_version(dev);
+
+ mlx5_start_health_poll(dev);
+
+ err = mlx5_query_hca_caps(dev);
+ if (err) {
+ dev_err(&pdev->dev, "query hca failed\n");
+ goto err_stop_poll;
+ }
+
+ if (boot) {
+ err = mlx5_init_once(dev, priv);
+ if (err) {
+ dev_err(&pdev->dev, "sw objs init failed\n");
+ goto err_stop_poll;
+ }
+ }
+
+ err = mlx5_alloc_irq_vectors(dev);
+ if (err) {
+ dev_err(&pdev->dev, "alloc irq vectors failed\n");
+ goto err_cleanup_once;
+ }
+
+ dev->priv.uar = mlx5_get_uars_page(dev);
+ if (IS_ERR(dev->priv.uar)) {
+ dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
+ err = PTR_ERR(dev->priv.uar);
+ goto err_disable_msix;
+ }
+
+ err = mlx5_start_eqs(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
+ goto err_put_uars;
+ }
+
+ err = mlx5_fw_tracer_init(dev->tracer);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init FW tracer\n");
+ goto err_fw_tracer;
+ }
+
+ err = alloc_comp_eqs(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to alloc completion EQs\n");
+ goto err_comp_eqs;
+ }
+
+ err = mlx5_irq_set_affinity_hints(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
+ goto err_affinity_hints;
+ }
+
+ err = mlx5_fpga_device_start(dev);
+ if (err) {
+ dev_err(&pdev->dev, "fpga device start failed %d\n", err);
+ goto err_fpga_start;
+ }
+
+ err = mlx5_accel_ipsec_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
+ goto err_ipsec_start;
+ }
+
+ err = mlx5_accel_tls_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "TLS device start failed %d\n", err);
+ goto err_tls_start;
+ }
+
+ err = mlx5_init_fs(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init flow steering\n");
+ goto err_fs;
+ }
+
+ err = mlx5_core_set_hca_defaults(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to set hca defaults\n");
+ goto err_fs;
+ }
+
+ err = mlx5_sriov_attach(dev);
+ if (err) {
+ dev_err(&pdev->dev, "sriov init failed %d\n", err);
+ goto err_sriov;
+ }
+
+ if (mlx5_device_registered(dev)) {
+ mlx5_attach_device(dev);
+ } else {
+ err = mlx5_register_device(dev);
+ if (err) {
+ dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
+ goto err_reg_dev;
+ }
+ }
+
+ set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+out:
+ mutex_unlock(&dev->intf_state_mutex);
+
+ return 0;
+
+err_reg_dev:
+ mlx5_sriov_detach(dev);
+
+err_sriov:
+ mlx5_cleanup_fs(dev);
+
+err_fs:
+ mlx5_accel_tls_cleanup(dev);
+
+err_tls_start:
+ mlx5_accel_ipsec_cleanup(dev);
+
+err_ipsec_start:
+ mlx5_fpga_device_stop(dev);
+
+err_fpga_start:
+ mlx5_irq_clear_affinity_hints(dev);
+
+err_affinity_hints:
+ free_comp_eqs(dev);
+
+err_comp_eqs:
+ mlx5_fw_tracer_cleanup(dev->tracer);
+
+err_fw_tracer:
+ mlx5_stop_eqs(dev);
+
+err_put_uars:
+ mlx5_put_uars_page(dev, priv->uar);
+
+err_disable_msix:
+ mlx5_free_irq_vectors(dev);
+
+err_cleanup_once:
+ if (boot)
+ mlx5_cleanup_once(dev);
+
+err_stop_poll:
+ mlx5_stop_health_poll(dev, boot);
+ if (mlx5_cmd_teardown_hca(dev)) {
+ dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
+ goto out_err;
+ }
+
+err_pagealloc_stop:
+ mlx5_pagealloc_stop(dev);
+
+reclaim_boot_pages:
+ mlx5_reclaim_startup_pages(dev);
+
+err_disable_hca:
+ mlx5_core_disable_hca(dev, 0);
+
+err_cmd_cleanup:
+ mlx5_cmd_cleanup(dev);
+
+out_err:
+ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+ mutex_unlock(&dev->intf_state_mutex);
+
+ return err;
+}
+
+static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
+ bool cleanup)
+{
+ int err = 0;
+
+ if (cleanup)
+ mlx5_drain_health_recovery(dev);
+
+ mutex_lock(&dev->intf_state_mutex);
+ if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+ dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
+ __func__);
+ if (cleanup)
+ mlx5_cleanup_once(dev);
+ goto out;
+ }
+
+ clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+
+ if (mlx5_device_registered(dev))
+ mlx5_detach_device(dev);
+
+ mlx5_sriov_detach(dev);
+ mlx5_cleanup_fs(dev);
+ mlx5_accel_ipsec_cleanup(dev);
+ mlx5_accel_tls_cleanup(dev);
+ mlx5_fpga_device_stop(dev);
+ mlx5_irq_clear_affinity_hints(dev);
+ free_comp_eqs(dev);
+ mlx5_fw_tracer_cleanup(dev->tracer);
+ mlx5_stop_eqs(dev);
+ mlx5_put_uars_page(dev, priv->uar);
+ mlx5_free_irq_vectors(dev);
+ if (cleanup)
+ mlx5_cleanup_once(dev);
+ mlx5_stop_health_poll(dev, cleanup);
+ err = mlx5_cmd_teardown_hca(dev);
+ if (err) {
+ dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
+ goto out;
+ }
+ mlx5_pagealloc_stop(dev);
+ mlx5_reclaim_startup_pages(dev);
+ mlx5_core_disable_hca(dev, 0);
+ mlx5_cmd_cleanup(dev);
+
+out:
+ mutex_unlock(&dev->intf_state_mutex);
+ return err;
+}
+
+struct mlx5_core_event_handler {
+ void (*event)(struct mlx5_core_dev *dev,
+ enum mlx5_dev_event event,
+ void *data);
+};
+
+static const struct devlink_ops mlx5_devlink_ops = {
+#ifdef CONFIG_MLX5_ESWITCH
+ .eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
+ .eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
+ .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set,
+ .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get,
+ .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set,
+ .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
+#endif
+};
+
+#define MLX5_IB_MOD "mlx5_ib"
+static int init_one(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct mlx5_core_dev *dev;
+ struct devlink *devlink;
+ struct mlx5_priv *priv;
+ int err;
+
+ devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev));
+ if (!devlink) {
+ dev_err(&pdev->dev, "kzalloc failed\n");
+ return -ENOMEM;
+ }
+
+ dev = devlink_priv(devlink);
+ priv = &dev->priv;
+ priv->pci_dev_data = id->driver_data;
+
+ pci_set_drvdata(pdev, dev);
+
+ dev->pdev = pdev;
+ dev->event = mlx5_core_event;
+ dev->profile = &profile[prof_sel];
+
+ INIT_LIST_HEAD(&priv->ctx_list);
+ spin_lock_init(&priv->ctx_lock);
+ mutex_init(&dev->pci_status_mutex);
+ mutex_init(&dev->intf_state_mutex);
+
+ INIT_LIST_HEAD(&priv->waiting_events_list);
+ priv->is_accum_events = false;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ err = init_srcu_struct(&priv->pfault_srcu);
+ if (err) {
+ dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n",
+ err);
+ goto clean_dev;
+ }
+#endif
+ mutex_init(&priv->bfregs.reg_head.lock);
+ mutex_init(&priv->bfregs.wc_head.lock);
+ INIT_LIST_HEAD(&priv->bfregs.reg_head.list);
+ INIT_LIST_HEAD(&priv->bfregs.wc_head.list);
+
+ err = mlx5_pci_init(dev, priv);
+ if (err) {
+ dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
+ goto clean_srcu;
+ }
+
+ err = mlx5_health_init(dev);
+ if (err) {
+ dev_err(&pdev->dev, "mlx5_health_init failed with error code %d\n", err);
+ goto close_pci;
+ }
+
+ mlx5_pagealloc_init(dev);
+
+ err = mlx5_load_one(dev, priv, true);
+ if (err) {
+ dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
+ goto clean_health;
+ }
+
+ request_module_nowait(MLX5_IB_MOD);
+
+ err = devlink_register(devlink, &pdev->dev);
+ if (err)
+ goto clean_load;
+
+ pci_save_state(pdev);
+ return 0;
+
+clean_load:
+ mlx5_unload_one(dev, priv, true);
+clean_health:
+ mlx5_pagealloc_cleanup(dev);
+ mlx5_health_cleanup(dev);
+close_pci:
+ mlx5_pci_close(dev, priv);
+clean_srcu:
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ cleanup_srcu_struct(&priv->pfault_srcu);
+clean_dev:
+#endif
+ devlink_free(devlink);
+
+ return err;
+}
+
+static void remove_one(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct devlink *devlink = priv_to_devlink(dev);
+ struct mlx5_priv *priv = &dev->priv;
+
+ devlink_unregister(devlink);
+ mlx5_unregister_device(dev);
+
+ if (mlx5_unload_one(dev, priv, true)) {
+ dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n");
+ mlx5_health_cleanup(dev);
+ return;
+ }
+
+ mlx5_pagealloc_cleanup(dev);
+ mlx5_health_cleanup(dev);
+ mlx5_pci_close(dev, priv);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ cleanup_srcu_struct(&priv->pfault_srcu);
+#endif
+ devlink_free(devlink);
+}
+
+static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct mlx5_priv *priv = &dev->priv;
+
+ dev_info(&pdev->dev, "%s was called\n", __func__);
+
+ mlx5_enter_error_state(dev, false);
+ mlx5_unload_one(dev, priv, false);
+ /* In case of kernel call drain the health wq */
+ if (state) {
+ mlx5_drain_health_wq(dev);
+ mlx5_pci_disable_device(dev);
+ }
+
+ return state == pci_channel_io_perm_failure ?
+ PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
+}
+
+/* wait for the device to show vital signs by waiting
+ * for the health counter to start counting.
+ */
+static int wait_vital(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct mlx5_core_health *health = &dev->priv.health;
+ const int niter = 100;
+ u32 last_count = 0;
+ u32 count;
+ int i;
+
+ for (i = 0; i < niter; i++) {
+ count = ioread32be(health->health_counter);
+ if (count && count != 0xffffffff) {
+ if (last_count && last_count != count) {
+ dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
+ return 0;
+ }
+ last_count = count;
+ }
+ msleep(50);
+ }
+
+ return -ETIMEDOUT;
+}
+
+static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ int err;
+
+ dev_info(&pdev->dev, "%s was called\n", __func__);
+
+ err = mlx5_pci_enable_device(dev);
+ if (err) {
+ dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
+ , __func__, err);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ pci_set_master(pdev);
+ pci_restore_state(pdev);
+ pci_save_state(pdev);
+
+ if (wait_vital(pdev)) {
+ dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void mlx5_pci_resume(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct mlx5_priv *priv = &dev->priv;
+ int err;
+
+ dev_info(&pdev->dev, "%s was called\n", __func__);
+
+ err = mlx5_load_one(dev, priv, false);
+ if (err)
+ dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
+ , __func__, err);
+ else
+ dev_info(&pdev->dev, "%s: device recovered\n", __func__);
+}
+
+static const struct pci_error_handlers mlx5_err_handler = {
+ .error_detected = mlx5_pci_err_detected,
+ .slot_reset = mlx5_pci_slot_reset,
+ .resume = mlx5_pci_resume
+};
+
+static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
+{
+ int ret;
+
+ if (!MLX5_CAP_GEN(dev, force_teardown)) {
+ mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ mlx5_core_dbg(dev, "Device in internal error state, giving up\n");
+ return -EAGAIN;
+ }
+
+ /* Panic tear down fw command will stop the PCI bus communication
+ * with the HCA, so the health polll is no longer needed.
+ */
+ mlx5_drain_health_wq(dev);
+ mlx5_stop_health_poll(dev, false);
+
+ ret = mlx5_cmd_force_teardown_hca(dev);
+ if (ret) {
+ mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret);
+ mlx5_start_health_poll(dev);
+ return ret;
+ }
+
+ mlx5_enter_error_state(dev, true);
+
+ /* Some platforms requiring freeing the IRQ's in the shutdown
+ * flow. If they aren't freed they can't be allocated after
+ * kexec. There is no need to cleanup the mlx5_core software
+ * contexts.
+ */
+ mlx5_irq_clear_affinity_hints(dev);
+ mlx5_core_eq_free_irqs(dev);
+
+ return 0;
+}
+
+static void shutdown(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct mlx5_priv *priv = &dev->priv;
+ int err;
+
+ dev_info(&pdev->dev, "Shutdown was called\n");
+ err = mlx5_try_fast_unload(dev);
+ if (err)
+ mlx5_unload_one(dev, priv, false);
+ mlx5_pci_disable_device(dev);
+}
+
+static const struct pci_device_id mlx5_core_pci_table[] = {
+ { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTIB) },
+ { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */
+ { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4) },
+ { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */
+ { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX) },
+ { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */
+ { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */
+ { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */
+ { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5 Ex */
+ { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 Ex VF */
+ { PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */
+ { PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */
+ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */
+ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */
+ { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */
+ { 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
+
+void mlx5_disable_device(struct mlx5_core_dev *dev)
+{
+ mlx5_pci_err_detected(dev->pdev, 0);
+}
+
+void mlx5_recover_device(struct mlx5_core_dev *dev)
+{
+ mlx5_pci_disable_device(dev);
+ if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
+ mlx5_pci_resume(dev->pdev);
+}
+
+static struct pci_driver mlx5_core_driver = {
+ .name = DRIVER_NAME,
+ .id_table = mlx5_core_pci_table,
+ .probe = init_one,
+ .remove = remove_one,
+ .shutdown = shutdown,
+ .err_handler = &mlx5_err_handler,
+ .sriov_configure = mlx5_core_sriov_configure,
+};
+
+static void mlx5_core_verify_params(void)
+{
+ if (prof_sel >= ARRAY_SIZE(profile)) {
+ pr_warn("mlx5_core: WARNING: Invalid module parameter prof_sel %d, valid range 0-%zu, changing back to default(%d)\n",
+ prof_sel,
+ ARRAY_SIZE(profile) - 1,
+ MLX5_DEFAULT_PROF);
+ prof_sel = MLX5_DEFAULT_PROF;
+ }
+}
+
+static int __init init(void)
+{
+ int err;
+
+ get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
+
+ mlx5_core_verify_params();
+ mlx5_fpga_ipsec_build_fs_cmds();
+ mlx5_register_debugfs();
+
+ err = pci_register_driver(&mlx5_core_driver);
+ if (err)
+ goto err_debug;
+
+#ifdef CONFIG_MLX5_CORE_EN
+ mlx5e_init();
+#endif
+
+ return 0;
+
+err_debug:
+ mlx5_unregister_debugfs();
+ return err;
+}
+
+static void __exit cleanup(void)
+{
+#ifdef CONFIG_MLX5_CORE_EN
+ mlx5e_cleanup();
+#endif
+ pci_unregister_driver(&mlx5_core_driver);
+ mlx5_unregister_debugfs();
+}
+
+module_init(init);
+module_exit(cleanup);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
new file mode 100644
index 000000000..ba2b09cc1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include <rdma/ib_verbs.h>
+#include "mlx5_core.h"
+
+int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
+{
+ u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {0};
+ void *gid;
+
+ MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG);
+ MLX5_SET(attach_to_mcg_in, in, qpn, qpn);
+ gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid);
+ memcpy(gid, mgid, sizeof(*mgid));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_attach_mcg);
+
+int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
+{
+ u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {0};
+ void *gid;
+
+ MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+ MLX5_SET(detach_from_mcg_in, in, qpn, qpn);
+ gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid);
+ memcpy(gid, mgid, sizeof(*mgid));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_detach_mcg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
new file mode 100644
index 000000000..b4134fa0b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_CORE_H__
+#define __MLX5_CORE_H__
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/if_link.h>
+#include <linux/firmware.h>
+#include <linux/mlx5/cq.h>
+
+#define DRIVER_NAME "mlx5_core"
+#define DRIVER_VERSION "5.0-0"
+
+extern uint mlx5_core_debug_mask;
+
+#define mlx5_core_dbg(__dev, format, ...) \
+ dev_dbg(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+#define mlx5_core_dbg_once(__dev, format, ...) \
+ dev_dbg_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+#define mlx5_core_dbg_mask(__dev, mask, format, ...) \
+do { \
+ if ((mask) & mlx5_core_debug_mask) \
+ mlx5_core_dbg(__dev, format, ##__VA_ARGS__); \
+} while (0)
+
+#define mlx5_core_err(__dev, format, ...) \
+ dev_err(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+#define mlx5_core_err_rl(__dev, format, ...) \
+ dev_err_ratelimited(&(__dev)->pdev->dev, \
+ "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+#define mlx5_core_warn(__dev, format, ...) \
+ dev_warn(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, \
+ ##__VA_ARGS__)
+
+#define mlx5_core_info(__dev, format, ...) \
+ dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__)
+
+enum {
+ MLX5_CMD_DATA, /* print command payload only */
+ MLX5_CMD_TIME, /* print command execution time */
+};
+
+enum {
+ MLX5_DRIVER_STATUS_ABORTED = 0xfe,
+ MLX5_DRIVER_SYND = 0xbadd00de,
+};
+
+int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
+int mlx5_query_board_id(struct mlx5_core_dev *dev);
+int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
+int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
+int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
+void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
+ unsigned long param);
+void mlx5_core_page_fault(struct mlx5_core_dev *dev,
+ struct mlx5_pagefault *pfault);
+void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
+void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
+void mlx5_disable_device(struct mlx5_core_dev *dev);
+void mlx5_recover_device(struct mlx5_core_dev *dev);
+int mlx5_sriov_init(struct mlx5_core_dev *dev);
+void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
+int mlx5_sriov_attach(struct mlx5_core_dev *dev);
+void mlx5_sriov_detach(struct mlx5_core_dev *dev);
+int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
+bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev);
+int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
+int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
+int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ void *context, u32 *element_id);
+int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ void *context, u32 element_id,
+ u32 modify_bitmask);
+int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ u32 element_id);
+int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
+u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
+
+int mlx5_eq_init(struct mlx5_core_dev *dev);
+void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
+int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+ int nent, u64 mask, const char *name,
+ enum mlx5_eq_type type);
+int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+ u32 *out, int outlen);
+int mlx5_start_eqs(struct mlx5_core_dev *dev);
+void mlx5_stop_eqs(struct mlx5_core_dev *dev);
+/* This function should only be called after mlx5_cmd_force_teardown_hca */
+void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev);
+struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
+void mlx5_cq_tasklet_cb(unsigned long data);
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
+
+int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
+ u8 access_reg_group);
+int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcap, u8 feature_group,
+ u8 access_reg_group);
+int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
+ u8 feature_group, u8 access_reg_group);
+
+void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
+void mlx5_lag_remove(struct mlx5_core_dev *dev);
+
+void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
+void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
+void mlx5_attach_device(struct mlx5_core_dev *dev);
+void mlx5_detach_device(struct mlx5_core_dev *dev);
+bool mlx5_device_registered(struct mlx5_core_dev *dev);
+int mlx5_register_device(struct mlx5_core_dev *dev);
+void mlx5_unregister_device(struct mlx5_core_dev *dev);
+void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol);
+void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol);
+struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
+void mlx5_dev_list_lock(void);
+void mlx5_dev_list_unlock(void);
+int mlx5_dev_list_trylock(void);
+int mlx5_encap_alloc(struct mlx5_core_dev *dev,
+ int header_type,
+ size_t size,
+ void *encap_header,
+ u32 *encap_id);
+void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id);
+
+int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
+ u8 namespace, u8 num_actions,
+ void *modify_actions, u32 *modify_header_id);
+void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id);
+
+bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv);
+
+int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size);
+int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size);
+int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode);
+int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode);
+
+#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \
+ MLX5_CAP_GEN((mdev), pps_modify) && \
+ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \
+ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj))
+
+int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw);
+
+void mlx5e_init(void);
+void mlx5e_cleanup(void);
+
+static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
+{
+ /* LACP owner conditions:
+ * 1) Function is physical.
+ * 2) LAG is supported by FW.
+ * 3) LAG is managed by driver (currently the only option).
+ */
+ return MLX5_CAP_GEN(dev, vport_group_manager) &&
+ (MLX5_CAP_GEN(dev, num_lag_ports) > 1) &&
+ MLX5_CAP_GEN(dev, lag_master);
+}
+
+int mlx5_lag_allow(struct mlx5_core_dev *dev);
+int mlx5_lag_forbid(struct mlx5_core_dev *dev);
+
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
+#endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
new file mode 100644
index 000000000..0670165af
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+void mlx5_init_mkey_table(struct mlx5_core_dev *dev)
+{
+ struct mlx5_mkey_table *table = &dev->priv.mkey_table;
+
+ memset(table, 0, sizeof(*table));
+ rwlock_init(&table->lock);
+ INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+}
+
+void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev)
+{
+}
+
+int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
+ struct mlx5_core_mkey *mkey,
+ u32 *in, int inlen,
+ u32 *out, int outlen,
+ mlx5_cmd_cbk_t callback, void *context)
+{
+ struct mlx5_mkey_table *table = &dev->priv.mkey_table;
+ u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
+ u32 mkey_index;
+ void *mkc;
+ int err;
+ u8 key;
+
+ spin_lock_irq(&dev->priv.mkey_lock);
+ key = dev->priv.mkey_key++;
+ spin_unlock_irq(&dev->priv.mkey_lock);
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
+ MLX5_SET(mkc, mkc, mkey_7_0, key);
+
+ if (callback)
+ return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen,
+ callback, context);
+
+ err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout));
+ if (err)
+ return err;
+
+ mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
+ mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
+ mkey->size = MLX5_GET64(mkc, mkc, len);
+ mkey->key = mlx5_idx_to_mkey(mkey_index) | key;
+ mkey->pd = MLX5_GET(mkc, mkc, pd);
+
+ mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
+ mkey_index, key, mkey->key);
+
+ /* connect to mkey tree */
+ write_lock_irq(&table->lock);
+ err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey);
+ write_unlock_irq(&table->lock);
+ if (err) {
+ mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n",
+ mlx5_base_mkey(mkey->key), err);
+ mlx5_core_destroy_mkey(dev, mkey);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_mkey_cb);
+
+int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
+ struct mlx5_core_mkey *mkey,
+ u32 *in, int inlen)
+{
+ return mlx5_core_create_mkey_cb(dev, mkey, in, inlen,
+ NULL, 0, NULL, NULL);
+}
+EXPORT_SYMBOL(mlx5_core_create_mkey);
+
+int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
+ struct mlx5_core_mkey *mkey)
+{
+ struct mlx5_mkey_table *table = &dev->priv.mkey_table;
+ u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0};
+ struct mlx5_core_mkey *deleted_mkey;
+ unsigned long flags;
+
+ write_lock_irqsave(&table->lock, flags);
+ deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key));
+ write_unlock_irqrestore(&table->lock, flags);
+ if (!deleted_mkey) {
+ mlx5_core_dbg(dev, "failed radix tree delete of mkey 0x%x\n",
+ mlx5_base_mkey(mkey->key));
+ return -ENOENT;
+ }
+
+ MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
+ MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_destroy_mkey);
+
+int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {0};
+
+ memset(out, 0, outlen);
+ MLX5_SET(query_mkey_in, in, opcode, MLX5_CMD_OP_QUERY_MKEY);
+ MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_mkey);
+
+static inline u32 mlx5_get_psv(u32 *out, int psv_index)
+{
+ switch (psv_index) {
+ case 1: return MLX5_GET(create_psv_out, out, psv1_index);
+ case 2: return MLX5_GET(create_psv_out, out, psv2_index);
+ case 3: return MLX5_GET(create_psv_out, out, psv3_index);
+ default: return MLX5_GET(create_psv_out, out, psv0_index);
+ }
+}
+
+int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
+ int npsvs, u32 *sig_index)
+{
+ u32 out[MLX5_ST_SZ_DW(create_psv_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(create_psv_in)] = {0};
+ int i, err;
+
+ if (npsvs > MLX5_MAX_PSVS)
+ return -EINVAL;
+
+ MLX5_SET(create_psv_in, in, opcode, MLX5_CMD_OP_CREATE_PSV);
+ MLX5_SET(create_psv_in, in, pd, pdn);
+ MLX5_SET(create_psv_in, in, num_psv, npsvs);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ for (i = 0; i < npsvs; i++)
+ sig_index[i] = mlx5_get_psv(out, i);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_psv);
+
+int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num)
+{
+ u32 out[MLX5_ST_SZ_DW(destroy_psv_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_psv_in)] = {0};
+
+ MLX5_SET(destroy_psv_in, in, opcode, MLX5_CMD_OP_DESTROY_PSV);
+ MLX5_SET(destroy_psv_in, in, psvn, psv_num);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_destroy_psv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
new file mode 100644
index 000000000..9c3653e06
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -0,0 +1,596 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/highmem.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+enum {
+ MLX5_PAGES_CANT_GIVE = 0,
+ MLX5_PAGES_GIVE = 1,
+ MLX5_PAGES_TAKE = 2
+};
+
+struct mlx5_pages_req {
+ struct mlx5_core_dev *dev;
+ u16 func_id;
+ s32 npages;
+ struct work_struct work;
+};
+
+struct fw_page {
+ struct rb_node rb_node;
+ u64 addr;
+ struct page *page;
+ u16 func_id;
+ unsigned long bitmask;
+ struct list_head list;
+ unsigned free_count;
+};
+
+enum {
+ MAX_RECLAIM_TIME_MSECS = 5000,
+ MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60,
+};
+
+enum {
+ MLX5_MAX_RECLAIM_TIME_MILI = 5000,
+ MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
+};
+
+static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
+{
+ struct rb_root *root = &dev->priv.page_root;
+ struct rb_node **new = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct fw_page *nfp;
+ struct fw_page *tfp;
+ int i;
+
+ while (*new) {
+ parent = *new;
+ tfp = rb_entry(parent, struct fw_page, rb_node);
+ if (tfp->addr < addr)
+ new = &parent->rb_left;
+ else if (tfp->addr > addr)
+ new = &parent->rb_right;
+ else
+ return -EEXIST;
+ }
+
+ nfp = kzalloc(sizeof(*nfp), GFP_KERNEL);
+ if (!nfp)
+ return -ENOMEM;
+
+ nfp->addr = addr;
+ nfp->page = page;
+ nfp->func_id = func_id;
+ nfp->free_count = MLX5_NUM_4K_IN_PAGE;
+ for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++)
+ set_bit(i, &nfp->bitmask);
+
+ rb_link_node(&nfp->rb_node, parent, new);
+ rb_insert_color(&nfp->rb_node, root);
+ list_add(&nfp->list, &dev->priv.free_list);
+
+ return 0;
+}
+
+static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr)
+{
+ struct rb_root *root = &dev->priv.page_root;
+ struct rb_node *tmp = root->rb_node;
+ struct fw_page *result = NULL;
+ struct fw_page *tfp;
+
+ while (tmp) {
+ tfp = rb_entry(tmp, struct fw_page, rb_node);
+ if (tfp->addr < addr) {
+ tmp = tmp->rb_left;
+ } else if (tfp->addr > addr) {
+ tmp = tmp->rb_right;
+ } else {
+ result = tfp;
+ break;
+ }
+ }
+
+ return result;
+}
+
+static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
+ s32 *npages, int boot)
+{
+ u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {0};
+ int err;
+
+ MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES);
+ MLX5_SET(query_pages_in, in, op_mod, boot ?
+ MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES :
+ MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ *npages = MLX5_GET(query_pages_out, out, num_pages);
+ *func_id = MLX5_GET(query_pages_out, out, function_id);
+
+ return err;
+}
+
+static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
+{
+ struct fw_page *fp;
+ unsigned n;
+
+ if (list_empty(&dev->priv.free_list))
+ return -ENOMEM;
+
+ fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
+ n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
+ if (n >= MLX5_NUM_4K_IN_PAGE) {
+ mlx5_core_warn(dev, "alloc 4k bug\n");
+ return -ENOENT;
+ }
+ clear_bit(n, &fp->bitmask);
+ fp->free_count--;
+ if (!fp->free_count)
+ list_del(&fp->list);
+
+ *addr = fp->addr + n * MLX5_ADAPTER_PAGE_SIZE;
+
+ return 0;
+}
+
+#define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT)
+
+static void free_4k(struct mlx5_core_dev *dev, u64 addr)
+{
+ struct fw_page *fwp;
+ int n;
+
+ fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
+ if (!fwp) {
+ mlx5_core_warn(dev, "page not found\n");
+ return;
+ }
+
+ n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
+ fwp->free_count++;
+ set_bit(n, &fwp->bitmask);
+ if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
+ rb_erase(&fwp->rb_node, &dev->priv.page_root);
+ if (fwp->free_count != 1)
+ list_del(&fwp->list);
+ dma_unmap_page(&dev->pdev->dev, addr & MLX5_U64_4K_PAGE_MASK,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ __free_page(fwp->page);
+ kfree(fwp);
+ } else if (fwp->free_count == 1) {
+ list_add(&fwp->list, &dev->priv.free_list);
+ }
+}
+
+static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
+{
+ struct page *page;
+ u64 zero_addr = 1;
+ u64 addr;
+ int err;
+ int nid = dev_to_node(&dev->pdev->dev);
+
+ page = alloc_pages_node(nid, GFP_HIGHUSER, 0);
+ if (!page) {
+ mlx5_core_warn(dev, "failed to allocate page\n");
+ return -ENOMEM;
+ }
+map:
+ addr = dma_map_page(&dev->pdev->dev, page, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&dev->pdev->dev, addr)) {
+ mlx5_core_warn(dev, "failed dma mapping page\n");
+ err = -ENOMEM;
+ goto err_mapping;
+ }
+
+ /* Firmware doesn't support page with physical address 0 */
+ if (addr == 0) {
+ zero_addr = addr;
+ goto map;
+ }
+
+ err = insert_page(dev, addr, page, func_id);
+ if (err) {
+ mlx5_core_err(dev, "failed to track allocated page\n");
+ dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ }
+
+err_mapping:
+ if (err)
+ __free_page(page);
+
+ if (zero_addr == 0)
+ dma_unmap_page(&dev->pdev->dev, zero_addr, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+
+ return err;
+}
+
+static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id)
+{
+ u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
+ int err;
+
+ MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+ MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE);
+ MLX5_SET(manage_pages_in, in, function_id, func_id);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ mlx5_core_warn(dev, "page notify failed func_id(%d) err(%d)\n",
+ func_id, err);
+}
+
+static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
+ int notify_fail)
+{
+ u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
+ int inlen = MLX5_ST_SZ_BYTES(manage_pages_in);
+ u64 addr;
+ int err;
+ u32 *in;
+ int i;
+
+ inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ mlx5_core_warn(dev, "vzalloc failed %d\n", inlen);
+ goto out_free;
+ }
+
+ for (i = 0; i < npages; i++) {
+retry:
+ err = alloc_4k(dev, &addr);
+ if (err) {
+ if (err == -ENOMEM)
+ err = alloc_system_page(dev, func_id);
+ if (err)
+ goto out_4k;
+
+ goto retry;
+ }
+ MLX5_ARRAY_SET64(manage_pages_in, in, pas, i, addr);
+ }
+
+ MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+ MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE);
+ MLX5_SET(manage_pages_in, in, function_id, func_id);
+ MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (err) {
+ mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
+ func_id, npages, err);
+ goto out_4k;
+ }
+
+ dev->priv.fw_pages += npages;
+ if (func_id)
+ dev->priv.vfs_pages += npages;
+
+ mlx5_core_dbg(dev, "err %d\n", err);
+
+ kvfree(in);
+ return 0;
+
+out_4k:
+ for (i--; i >= 0; i--)
+ free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]));
+out_free:
+ kvfree(in);
+ if (notify_fail)
+ page_notify_fail(dev, func_id);
+ return err;
+}
+
+static u32 fwp_fill_manage_pages_out(struct fw_page *fwp, u32 *out, u32 index,
+ u32 npages)
+{
+ u32 pages_set = 0;
+ unsigned int n;
+
+ for_each_clear_bit(n, &fwp->bitmask, MLX5_NUM_4K_IN_PAGE) {
+ MLX5_ARRAY_SET64(manage_pages_out, out, pas, index + pages_set,
+ fwp->addr + (n * MLX5_ADAPTER_PAGE_SIZE));
+ pages_set++;
+
+ if (!--npages)
+ break;
+ }
+
+ return pages_set;
+}
+
+static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
+ u32 *in, int in_size, u32 *out, int out_size)
+{
+ struct fw_page *fwp;
+ struct rb_node *p;
+ u32 func_id;
+ u32 npages;
+ u32 i = 0;
+
+ if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ return mlx5_cmd_exec(dev, in, in_size, out, out_size);
+
+ /* No hard feelings, we want our pages back! */
+ npages = MLX5_GET(manage_pages_in, in, input_num_entries);
+ func_id = MLX5_GET(manage_pages_in, in, function_id);
+
+ p = rb_first(&dev->priv.page_root);
+ while (p && i < npages) {
+ fwp = rb_entry(p, struct fw_page, rb_node);
+ p = rb_next(p);
+ if (fwp->func_id != func_id)
+ continue;
+
+ i += fwp_fill_manage_pages_out(fwp, out, i, npages - i);
+ }
+
+ MLX5_SET(manage_pages_out, out, output_num_entries, i);
+ return 0;
+}
+
+static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
+ int *nclaimed)
+{
+ int outlen = MLX5_ST_SZ_BYTES(manage_pages_out);
+ u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
+ int num_claimed;
+ u32 *out;
+ int err;
+ int i;
+
+ if (nclaimed)
+ *nclaimed = 0;
+
+ outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+ MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE);
+ MLX5_SET(manage_pages_in, in, function_id, func_id);
+ MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+
+ mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
+ err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen);
+ if (err) {
+ mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
+ goto out_free;
+ }
+
+ num_claimed = MLX5_GET(manage_pages_out, out, output_num_entries);
+ if (num_claimed > npages) {
+ mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n",
+ num_claimed, npages);
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ for (i = 0; i < num_claimed; i++)
+ free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]));
+
+ if (nclaimed)
+ *nclaimed = num_claimed;
+
+ dev->priv.fw_pages -= num_claimed;
+ if (func_id)
+ dev->priv.vfs_pages -= num_claimed;
+
+out_free:
+ kvfree(out);
+ return err;
+}
+
+static void pages_work_handler(struct work_struct *work)
+{
+ struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work);
+ struct mlx5_core_dev *dev = req->dev;
+ int err = 0;
+
+ if (req->npages < 0)
+ err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL);
+ else if (req->npages > 0)
+ err = give_pages(dev, req->func_id, req->npages, 1);
+
+ if (err)
+ mlx5_core_warn(dev, "%s fail %d\n",
+ req->npages < 0 ? "reclaim" : "give", err);
+
+ kfree(req);
+}
+
+void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
+ s32 npages)
+{
+ struct mlx5_pages_req *req;
+
+ req = kzalloc(sizeof(*req), GFP_ATOMIC);
+ if (!req) {
+ mlx5_core_warn(dev, "failed to allocate pages request\n");
+ return;
+ }
+
+ req->dev = dev;
+ req->func_id = func_id;
+ req->npages = npages;
+ INIT_WORK(&req->work, pages_work_handler);
+ queue_work(dev->priv.pg_wq, &req->work);
+}
+
+int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
+{
+ u16 uninitialized_var(func_id);
+ s32 uninitialized_var(npages);
+ int err;
+
+ err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot);
+ if (err)
+ return err;
+
+ mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n",
+ npages, boot ? "boot" : "init", func_id);
+
+ return give_pages(dev, func_id, npages, 0);
+}
+
+enum {
+ MLX5_BLKS_FOR_RECLAIM_PAGES = 12
+};
+
+static int optimal_reclaimed_pages(void)
+{
+ struct mlx5_cmd_prot_block *block;
+ struct mlx5_cmd_layout *lay;
+ int ret;
+
+ ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) -
+ MLX5_ST_SZ_BYTES(manage_pages_out)) /
+ MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
+
+ return ret;
+}
+
+int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
+{
+ unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
+ struct fw_page *fwp;
+ struct rb_node *p;
+ int nclaimed = 0;
+ int err = 0;
+
+ do {
+ p = rb_first(&dev->priv.page_root);
+ if (p) {
+ fwp = rb_entry(p, struct fw_page, rb_node);
+ err = reclaim_pages(dev, fwp->func_id,
+ optimal_reclaimed_pages(),
+ &nclaimed);
+
+ if (err) {
+ mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
+ err);
+ return err;
+ }
+ if (nclaimed)
+ end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
+ }
+ if (time_after(jiffies, end)) {
+ mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
+ break;
+ }
+ } while (p);
+
+ WARN(dev->priv.fw_pages,
+ "FW pages counter is %d after reclaiming all pages\n",
+ dev->priv.fw_pages);
+ WARN(dev->priv.vfs_pages,
+ "VFs FW pages counter is %d after reclaiming all pages\n",
+ dev->priv.vfs_pages);
+
+ return 0;
+}
+
+void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
+{
+ dev->priv.page_root = RB_ROOT;
+ INIT_LIST_HEAD(&dev->priv.free_list);
+}
+
+void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
+{
+ /* nothing */
+}
+
+int mlx5_pagealloc_start(struct mlx5_core_dev *dev)
+{
+ dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
+ if (!dev->priv.pg_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
+{
+ destroy_workqueue(dev->priv.pg_wq);
+}
+
+int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
+{
+ unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
+ int prev_vfs_pages = dev->priv.vfs_pages;
+
+ /* In case of internal error we will free the pages manually later */
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ mlx5_core_warn(dev, "Skipping wait for vf pages stage");
+ return 0;
+ }
+
+ mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages,
+ dev->priv.name);
+ while (dev->priv.vfs_pages) {
+ if (time_after(jiffies, end)) {
+ mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages);
+ return -ETIMEDOUT;
+ }
+ if (dev->priv.vfs_pages < prev_vfs_pages) {
+ end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
+ prev_vfs_pages = dev->priv.vfs_pages;
+ }
+ msleep(50);
+ }
+
+ mlx5_core_dbg(dev, "All pages received from %s\n", dev->priv.name);
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
new file mode 100644
index 000000000..bd830d8d6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {0};
+ int err;
+
+ MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *pdn = MLX5_GET(alloc_pd_out, out, pd);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_alloc_pd);
+
+int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn)
+{
+ u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {0};
+
+ MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
+ MLX5_SET(dealloc_pd_in, in, pd, pdn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_dealloc_pd);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
new file mode 100644
index 000000000..09b6b1bfb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -0,0 +1,1116 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/port.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
+ int size_in, void *data_out, int size_out,
+ u16 reg_id, int arg, int write)
+{
+ int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out;
+ int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in;
+ int err = -ENOMEM;
+ u32 *out = NULL;
+ u32 *in = NULL;
+ void *data;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!in || !out)
+ goto out;
+
+ data = MLX5_ADDR_OF(access_register_in, in, register_data);
+ memcpy(data, data_in, size_in);
+
+ MLX5_SET(access_register_in, in, opcode, MLX5_CMD_OP_ACCESS_REG);
+ MLX5_SET(access_register_in, in, op_mod, !write);
+ MLX5_SET(access_register_in, in, argument, arg);
+ MLX5_SET(access_register_in, in, register_id, reg_id);
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+ if (err)
+ goto out;
+
+ data = MLX5_ADDR_OF(access_register_out, out, register_data);
+ memcpy(data_out, data, size_out);
+
+out:
+ kvfree(out);
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_access_reg);
+
+int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
+ u8 access_reg_group)
+{
+ u32 in[MLX5_ST_SZ_DW(pcam_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(pcam_reg);
+
+ MLX5_SET(pcam_reg, in, feature_group, feature_group);
+ MLX5_SET(pcam_reg, in, access_reg_group, access_reg_group);
+
+ return mlx5_core_access_reg(dev, in, sz, pcam, sz, MLX5_REG_PCAM, 0, 0);
+}
+
+int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcam, u8 feature_group,
+ u8 access_reg_group)
+{
+ u32 in[MLX5_ST_SZ_DW(mcam_reg)] = {0};
+ int sz = MLX5_ST_SZ_BYTES(mcam_reg);
+
+ MLX5_SET(mcam_reg, in, feature_group, feature_group);
+ MLX5_SET(mcam_reg, in, access_reg_group, access_reg_group);
+
+ return mlx5_core_access_reg(dev, in, sz, mcam, sz, MLX5_REG_MCAM, 0, 0);
+}
+
+int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
+ u8 feature_group, u8 access_reg_group)
+{
+ u32 in[MLX5_ST_SZ_DW(qcam_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(qcam_reg);
+
+ MLX5_SET(qcam_reg, in, feature_group, feature_group);
+ MLX5_SET(qcam_reg, in, access_reg_group, access_reg_group);
+
+ return mlx5_core_access_reg(mdev, in, sz, qcam, sz, MLX5_REG_QCAM, 0, 0);
+}
+
+struct mlx5_reg_pcap {
+ u8 rsvd0;
+ u8 port_num;
+ u8 rsvd1[2];
+ __be32 caps_127_96;
+ __be32 caps_95_64;
+ __be32 caps_63_32;
+ __be32 caps_31_0;
+};
+
+int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps)
+{
+ struct mlx5_reg_pcap in;
+ struct mlx5_reg_pcap out;
+
+ memset(&in, 0, sizeof(in));
+ in.caps_127_96 = cpu_to_be32(caps);
+ in.port_num = port_num;
+
+ return mlx5_core_access_reg(dev, &in, sizeof(in), &out,
+ sizeof(out), MLX5_REG_PCAP, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_caps);
+
+int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
+ int ptys_size, int proto_mask, u8 local_port)
+{
+ u32 in[MLX5_ST_SZ_DW(ptys_reg)] = {0};
+
+ MLX5_SET(ptys_reg, in, local_port, local_port);
+ MLX5_SET(ptys_reg, in, proto_mask, proto_mask);
+ return mlx5_core_access_reg(dev, in, sizeof(in), ptys,
+ ptys_size, MLX5_REG_PTYS, 0, 0);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_ptys);
+
+int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration)
+{
+ u32 in[MLX5_ST_SZ_DW(mlcr_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(mlcr_reg)];
+
+ MLX5_SET(mlcr_reg, in, local_port, 1);
+ MLX5_SET(mlcr_reg, in, beacon_duration, beacon_duration);
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MLCR, 0, 1);
+}
+
+int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev,
+ u32 *proto_cap, int proto_mask)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ int err;
+
+ err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1);
+ if (err)
+ return err;
+
+ if (proto_mask == MLX5_PTYS_EN)
+ *proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability);
+ else
+ *proto_cap = MLX5_GET(ptys_reg, out, ib_proto_capability);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_proto_cap);
+
+int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev,
+ u32 *proto_admin, int proto_mask)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ int err;
+
+ err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1);
+ if (err)
+ return err;
+
+ if (proto_mask == MLX5_PTYS_EN)
+ *proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin);
+ else
+ *proto_admin = MLX5_GET(ptys_reg, out, ib_proto_admin);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin);
+
+int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
+ u8 *link_width_oper, u8 local_port)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ int err;
+
+ err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, local_port);
+ if (err)
+ return err;
+
+ *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper);
+
+int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev,
+ u32 *proto_oper, u8 local_port)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ int err;
+
+ err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN,
+ local_port);
+ if (err)
+ return err;
+
+ *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
+
+ return 0;
+}
+EXPORT_SYMBOL(mlx5_query_port_eth_proto_oper);
+
+int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
+ u8 *proto_oper, u8 local_port)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ int err;
+
+ err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB,
+ local_port);
+ if (err)
+ return err;
+
+ *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
+
+ return 0;
+}
+EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper);
+
+int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable,
+ u32 proto_admin, int proto_mask)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ u32 in[MLX5_ST_SZ_DW(ptys_reg)];
+ u8 an_disable_admin;
+ u8 an_disable_cap;
+ u8 an_status;
+
+ mlx5_query_port_autoneg(dev, proto_mask, &an_status,
+ &an_disable_cap, &an_disable_admin);
+ if (!an_disable_cap && an_disable)
+ return -EPERM;
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(ptys_reg, in, local_port, 1);
+ MLX5_SET(ptys_reg, in, an_disable_admin, an_disable);
+ MLX5_SET(ptys_reg, in, proto_mask, proto_mask);
+ if (proto_mask == MLX5_PTYS_EN)
+ MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin);
+ else
+ MLX5_SET(ptys_reg, in, ib_proto_admin, proto_admin);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PTYS, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_ptys);
+
+/* This function should be used after setting a port register only */
+void mlx5_toggle_port_link(struct mlx5_core_dev *dev)
+{
+ enum mlx5_port_status ps;
+
+ mlx5_query_port_admin_status(dev, &ps);
+ mlx5_set_port_admin_status(dev, MLX5_PORT_DOWN);
+ if (ps == MLX5_PORT_UP)
+ mlx5_set_port_admin_status(dev, MLX5_PORT_UP);
+}
+EXPORT_SYMBOL_GPL(mlx5_toggle_port_link);
+
+int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
+ enum mlx5_port_status status)
+{
+ u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(paos_reg)];
+
+ MLX5_SET(paos_reg, in, local_port, 1);
+ MLX5_SET(paos_reg, in, admin_status, status);
+ MLX5_SET(paos_reg, in, ase, 1);
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PAOS, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_admin_status);
+
+int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
+ enum mlx5_port_status *status)
+{
+ u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(paos_reg)];
+ int err;
+
+ MLX5_SET(paos_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PAOS, 0, 0);
+ if (err)
+ return err;
+ *status = MLX5_GET(paos_reg, out, admin_status);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_admin_status);
+
+static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu,
+ u16 *max_mtu, u16 *oper_mtu, u8 port)
+{
+ u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pmtu_reg)];
+
+ MLX5_SET(pmtu_reg, in, local_port, port);
+ mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PMTU, 0, 0);
+
+ if (max_mtu)
+ *max_mtu = MLX5_GET(pmtu_reg, out, max_mtu);
+ if (oper_mtu)
+ *oper_mtu = MLX5_GET(pmtu_reg, out, oper_mtu);
+ if (admin_mtu)
+ *admin_mtu = MLX5_GET(pmtu_reg, out, admin_mtu);
+}
+
+int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port)
+{
+ u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pmtu_reg)];
+
+ MLX5_SET(pmtu_reg, in, admin_mtu, mtu);
+ MLX5_SET(pmtu_reg, in, local_port, port);
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PMTU, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_mtu);
+
+void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu,
+ u8 port)
+{
+ mlx5_query_port_mtu(dev, NULL, max_mtu, NULL, port);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_max_mtu);
+
+void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu,
+ u8 port)
+{
+ mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, port);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu);
+
+static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num)
+{
+ u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pmlp_reg)];
+ int module_mapping;
+ int err;
+
+ MLX5_SET(pmlp_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+ MLX5_REG_PMLP, 0, 0);
+ if (err)
+ return err;
+
+ module_mapping = MLX5_GET(pmlp_reg, out, lane0_module_mapping);
+ *module_num = module_mapping & MLX5_EEPROM_IDENTIFIER_BYTE_MASK;
+
+ return 0;
+}
+
+int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
+ u16 offset, u16 size, u8 *data)
+{
+ u32 out[MLX5_ST_SZ_DW(mcia_reg)];
+ u32 in[MLX5_ST_SZ_DW(mcia_reg)];
+ int module_num;
+ u16 i2c_addr;
+ int status;
+ int err;
+ void *ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0);
+
+ err = mlx5_query_module_num(dev, &module_num);
+ if (err)
+ return err;
+
+ memset(in, 0, sizeof(in));
+ size = min_t(int, size, MLX5_EEPROM_MAX_BYTES);
+
+ if (offset < MLX5_EEPROM_PAGE_LENGTH &&
+ offset + size > MLX5_EEPROM_PAGE_LENGTH)
+ /* Cross pages read, read until offset 256 in low page */
+ size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
+
+ i2c_addr = MLX5_I2C_ADDR_LOW;
+
+ MLX5_SET(mcia_reg, in, l, 0);
+ MLX5_SET(mcia_reg, in, module, module_num);
+ MLX5_SET(mcia_reg, in, i2c_device_address, i2c_addr);
+ MLX5_SET(mcia_reg, in, page_number, 0);
+ MLX5_SET(mcia_reg, in, device_address, offset);
+ MLX5_SET(mcia_reg, in, size, size);
+
+ err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MCIA, 0, 0);
+ if (err)
+ return err;
+
+ status = MLX5_GET(mcia_reg, out, status);
+ if (status) {
+ mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n",
+ status);
+ return -EIO;
+ }
+
+ memcpy(data, ptr, size);
+
+ return size;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom);
+
+static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc,
+ int pvlc_size, u8 local_port)
+{
+ u32 in[MLX5_ST_SZ_DW(pvlc_reg)] = {0};
+
+ MLX5_SET(pvlc_reg, in, local_port, local_port);
+ return mlx5_core_access_reg(dev, in, sizeof(in), pvlc,
+ pvlc_size, MLX5_REG_PVLC, 0, 0);
+}
+
+int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev,
+ u8 *vl_hw_cap, u8 local_port)
+{
+ u32 out[MLX5_ST_SZ_DW(pvlc_reg)];
+ int err;
+
+ err = mlx5_query_port_pvlc(dev, out, sizeof(out), local_port);
+ if (err)
+ return err;
+
+ *vl_hw_cap = MLX5_GET(pvlc_reg, out, vl_hw_cap);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_vl_hw_cap);
+
+int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
+ u8 port_num, void *out, size_t sz)
+{
+ u32 *in;
+ int err;
+
+ in = kvzalloc(sz, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ MLX5_SET(ppcnt_reg, in, local_port, port_num);
+
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_INFINIBAND_PORT_COUNTERS_GROUP);
+ err = mlx5_core_access_reg(dev, in, sz, out,
+ sz, MLX5_REG_PPCNT, 0, 0);
+
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt);
+
+static int mlx5_query_pfcc_reg(struct mlx5_core_dev *dev, u32 *out,
+ u32 out_size)
+{
+ u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+
+ MLX5_SET(pfcc_reg, in, local_port, 1);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ out_size, MLX5_REG_PFCC, 0, 0);
+}
+
+int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause)
+{
+ u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+
+ MLX5_SET(pfcc_reg, in, local_port, 1);
+ MLX5_SET(pfcc_reg, in, pptx, tx_pause);
+ MLX5_SET(pfcc_reg, in, pprx, rx_pause);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PFCC, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_pause);
+
+int mlx5_query_port_pause(struct mlx5_core_dev *dev,
+ u32 *rx_pause, u32 *tx_pause)
+{
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+ int err;
+
+ err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
+ if (err)
+ return err;
+
+ if (rx_pause)
+ *rx_pause = MLX5_GET(pfcc_reg, out, pprx);
+
+ if (tx_pause)
+ *tx_pause = MLX5_GET(pfcc_reg, out, pptx);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_pause);
+
+int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev,
+ u16 stall_critical_watermark,
+ u16 stall_minor_watermark)
+{
+ u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+
+ MLX5_SET(pfcc_reg, in, local_port, 1);
+ MLX5_SET(pfcc_reg, in, pptx_mask_n, 1);
+ MLX5_SET(pfcc_reg, in, pprx_mask_n, 1);
+ MLX5_SET(pfcc_reg, in, ppan_mask_n, 1);
+ MLX5_SET(pfcc_reg, in, critical_stall_mask, 1);
+ MLX5_SET(pfcc_reg, in, minor_stall_mask, 1);
+ MLX5_SET(pfcc_reg, in, device_stall_critical_watermark,
+ stall_critical_watermark);
+ MLX5_SET(pfcc_reg, in, device_stall_minor_watermark, stall_minor_watermark);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PFCC, 0, 1);
+}
+
+int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev,
+ u16 *stall_critical_watermark,
+ u16 *stall_minor_watermark)
+{
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+ int err;
+
+ err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
+ if (err)
+ return err;
+
+ if (stall_critical_watermark)
+ *stall_critical_watermark = MLX5_GET(pfcc_reg, out,
+ device_stall_critical_watermark);
+
+ if (stall_minor_watermark)
+ *stall_minor_watermark = MLX5_GET(pfcc_reg, out,
+ device_stall_minor_watermark);
+
+ return 0;
+}
+
+int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx)
+{
+ u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+
+ MLX5_SET(pfcc_reg, in, local_port, 1);
+ MLX5_SET(pfcc_reg, in, pfctx, pfc_en_tx);
+ MLX5_SET(pfcc_reg, in, pfcrx, pfc_en_rx);
+ MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_tx);
+ MLX5_SET_TO_ONES(pfcc_reg, in, prio_mask_rx);
+
+ return mlx5_core_access_reg(dev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_PFCC, 0, 1);
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_pfc);
+
+int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx)
+{
+ u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+ int err;
+
+ err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
+ if (err)
+ return err;
+
+ if (pfc_en_tx)
+ *pfc_en_tx = MLX5_GET(pfcc_reg, out, pfctx);
+
+ if (pfc_en_rx)
+ *pfc_en_rx = MLX5_GET(pfcc_reg, out, pfcrx);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_pfc);
+
+void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask,
+ u8 *an_status,
+ u8 *an_disable_cap, u8 *an_disable_admin)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+
+ *an_status = 0;
+ *an_disable_cap = 0;
+ *an_disable_admin = 0;
+
+ if (mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1))
+ return;
+
+ *an_status = MLX5_GET(ptys_reg, out, an_status);
+ *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap);
+ *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_autoneg);
+
+int mlx5_max_tc(struct mlx5_core_dev *mdev)
+{
+ u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8;
+
+ return num_tc - 1;
+}
+
+int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(dcbx_param)] = {0};
+
+ MLX5_SET(dcbx_param, in, port_number, 1);
+
+ return mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(in), MLX5_REG_DCBX_PARAM, 0, 0);
+}
+
+int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in)
+{
+ u32 out[MLX5_ST_SZ_DW(dcbx_param)];
+
+ MLX5_SET(dcbx_param, in, port_number, 1);
+
+ return mlx5_core_access_reg(mdev, in, sizeof(out), out,
+ sizeof(out), MLX5_REG_DCBX_PARAM, 0, 1);
+}
+
+int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc)
+{
+ u32 in[MLX5_ST_SZ_DW(qtct_reg)] = {0};
+ u32 out[MLX5_ST_SZ_DW(qtct_reg)];
+ int err;
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ if (prio_tc[i] > mlx5_max_tc(mdev))
+ return -EINVAL;
+
+ MLX5_SET(qtct_reg, in, prio, i);
+ MLX5_SET(qtct_reg, in, tclass, prio_tc[i]);
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_QTCT, 0, 1);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_prio_tc);
+
+int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev,
+ u8 prio, u8 *tc)
+{
+ u32 in[MLX5_ST_SZ_DW(qtct_reg)];
+ u32 out[MLX5_ST_SZ_DW(qtct_reg)];
+ int err;
+
+ memset(in, 0, sizeof(in));
+ memset(out, 0, sizeof(out));
+
+ MLX5_SET(qtct_reg, in, port_number, 1);
+ MLX5_SET(qtct_reg, in, prio, prio);
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_QTCT, 0, 0);
+ if (!err)
+ *tc = MLX5_GET(qtct_reg, out, tclass);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_prio_tc);
+
+static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in,
+ int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+
+ if (!MLX5_CAP_GEN(mdev, ets))
+ return -EOPNOTSUPP;
+
+ return mlx5_core_access_reg(mdev, in, inlen, out, sizeof(out),
+ MLX5_REG_QETCR, 0, 1);
+}
+
+static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out,
+ int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(qetc_reg)];
+
+ if (!MLX5_CAP_GEN(mdev, ets))
+ return -EOPNOTSUPP;
+
+ memset(in, 0, sizeof(in));
+ return mlx5_core_access_reg(mdev, in, sizeof(in), out, outlen,
+ MLX5_REG_QETCR, 0, 0);
+}
+
+int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group)
+{
+ u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0};
+ int i;
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ MLX5_SET(qetc_reg, in, tc_configuration[i].g, 1);
+ MLX5_SET(qetc_reg, in, tc_configuration[i].group, tc_group[i]);
+ }
+
+ return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in));
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group);
+
+int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev,
+ u8 tc, u8 *tc_group)
+{
+ u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+ void *ets_tcn_conf;
+ int err;
+
+ err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out));
+ if (err)
+ return err;
+
+ ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out,
+ tc_configuration[tc]);
+
+ *tc_group = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+ group);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_tc_group);
+
+int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw)
+{
+ u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0};
+ int i;
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ MLX5_SET(qetc_reg, in, tc_configuration[i].b, 1);
+ MLX5_SET(qetc_reg, in, tc_configuration[i].bw_allocation, tc_bw[i]);
+ }
+
+ return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in));
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_tc_bw_alloc);
+
+int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev,
+ u8 tc, u8 *bw_pct)
+{
+ u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+ void *ets_tcn_conf;
+ int err;
+
+ err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out));
+ if (err)
+ return err;
+
+ ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out,
+ tc_configuration[tc]);
+
+ *bw_pct = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+ bw_allocation);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_tc_bw_alloc);
+
+int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev,
+ u8 *max_bw_value,
+ u8 *max_bw_units)
+{
+ u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0};
+ void *ets_tcn_conf;
+ int i;
+
+ MLX5_SET(qetc_reg, in, port_number, 1);
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, in, tc_configuration[i]);
+
+ MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, r, 1);
+ MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_units,
+ max_bw_units[i]);
+ MLX5_SET(ets_tcn_config_reg, ets_tcn_conf, max_bw_value,
+ max_bw_value[i]);
+ }
+
+ return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in));
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_port_ets_rate_limit);
+
+int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev,
+ u8 *max_bw_value,
+ u8 *max_bw_units)
+{
+ u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+ void *ets_tcn_conf;
+ int err;
+ int i;
+
+ err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out));
+ if (err)
+ return err;
+
+ for (i = 0; i <= mlx5_max_tc(mdev); i++) {
+ ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, tc_configuration[i]);
+
+ max_bw_value[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+ max_bw_value);
+ max_bw_units[i] = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+ max_bw_units);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_ets_rate_limit);
+
+int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode)
+{
+ u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(set_wol_rol_out)] = {0};
+
+ MLX5_SET(set_wol_rol_in, in, opcode, MLX5_CMD_OP_SET_WOL_ROL);
+ MLX5_SET(set_wol_rol_in, in, wol_mode_valid, 1);
+ MLX5_SET(set_wol_rol_in, in, wol_mode, wol_mode);
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL_GPL(mlx5_set_port_wol);
+
+int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode)
+{
+ u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)] = {0};
+ int err;
+
+ MLX5_SET(query_wol_rol_in, in, opcode, MLX5_CMD_OP_QUERY_WOL_ROL);
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *wol_mode = MLX5_GET(query_wol_rol_out, out, wol_mode);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_wol);
+
+static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out,
+ int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+
+ MLX5_SET(pcmr_reg, in, local_port, 1);
+ return mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ outlen, MLX5_REG_PCMR, 0, 0);
+}
+
+static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(pcmr_reg)];
+
+ return mlx5_core_access_reg(mdev, in, inlen, out,
+ sizeof(out), MLX5_REG_PCMR, 0, 1);
+}
+
+int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable)
+{
+ u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+
+ MLX5_SET(pcmr_reg, in, local_port, 1);
+ MLX5_SET(pcmr_reg, in, fcs_chk, enable);
+ return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
+void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported,
+ bool *enabled)
+{
+ u32 out[MLX5_ST_SZ_DW(pcmr_reg)];
+ /* Default values for FW which do not support MLX5_REG_PCMR */
+ *supported = false;
+ *enabled = true;
+
+ if (!MLX5_CAP_GEN(mdev, ports_check))
+ return;
+
+ if (mlx5_query_ports_check(mdev, out, sizeof(out)))
+ return;
+
+ *supported = !!(MLX5_GET(pcmr_reg, out, fcs_cap));
+ *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk));
+}
+
+static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = {
+ "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */
+ "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */
+ "Cable error", /* MLX5_MODULE_STATUS_ERROR = 0x3 */
+};
+
+static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = {
+ "Power budget exceeded",
+ "Long Range for non MLNX cable",
+ "Bus stuck(I2C or data shorted)",
+ "No EEPROM/retry timeout",
+ "Enforce part number list",
+ "Unknown identifier",
+ "High Temperature",
+ "Bad or shorted cable/module",
+ "Unknown status",
+};
+
+void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+{
+ enum port_module_event_status_type module_status;
+ enum port_module_event_error_type error_type;
+ struct mlx5_eqe_port_module *module_event_eqe;
+ struct mlx5_priv *priv = &dev->priv;
+ u8 module_num;
+
+ module_event_eqe = &eqe->data.port_module;
+ module_num = module_event_eqe->module;
+ module_status = module_event_eqe->module_status &
+ PORT_MODULE_EVENT_MODULE_STATUS_MASK;
+ error_type = module_event_eqe->error_type &
+ PORT_MODULE_EVENT_ERROR_TYPE_MASK;
+
+ if (module_status < MLX5_MODULE_STATUS_ERROR) {
+ priv->pme_stats.status_counters[module_status - 1]++;
+ } else if (module_status == MLX5_MODULE_STATUS_ERROR) {
+ if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN)
+ /* Unknown error type */
+ error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN;
+ priv->pme_stats.error_counters[error_type]++;
+ }
+
+ if (!printk_ratelimit())
+ return;
+
+ if (module_status < MLX5_MODULE_STATUS_ERROR)
+ mlx5_core_info(dev,
+ "Port module event: module %u, %s\n",
+ module_num, mlx5_pme_status[module_status - 1]);
+
+ else if (module_status == MLX5_MODULE_STATUS_ERROR)
+ mlx5_core_info(dev,
+ "Port module event[error]: module %u, %s, %s\n",
+ module_num, mlx5_pme_status[module_status - 1],
+ mlx5_pme_error[error_type]);
+}
+
+int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size)
+{
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+
+ return mlx5_core_access_reg(mdev, in, sizeof(in), mtpps,
+ mtpps_size, MLX5_REG_MTPPS, 0, 0);
+}
+
+int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size)
+{
+ u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+
+ return mlx5_core_access_reg(mdev, mtpps, mtpps_size, out,
+ sizeof(out), MLX5_REG_MTPPS, 0, 1);
+}
+
+int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode)
+{
+ u32 out[MLX5_ST_SZ_DW(mtppse_reg)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtppse_reg)] = {0};
+ int err = 0;
+
+ MLX5_SET(mtppse_reg, in, pin, pin);
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MTPPSE, 0, 0);
+ if (err)
+ return err;
+
+ *arm = MLX5_GET(mtppse_reg, in, event_arm);
+ *mode = MLX5_GET(mtppse_reg, in, event_generation_mode);
+
+ return err;
+}
+
+int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode)
+{
+ u32 out[MLX5_ST_SZ_DW(mtppse_reg)] = {0};
+ u32 in[MLX5_ST_SZ_DW(mtppse_reg)] = {0};
+
+ MLX5_SET(mtppse_reg, in, pin, pin);
+ MLX5_SET(mtppse_reg, in, event_arm, arm);
+ MLX5_SET(mtppse_reg, in, event_generation_mode, mode);
+
+ return mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_MTPPSE, 0, 1);
+}
+
+int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state)
+{
+ u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {};
+ int err;
+
+ MLX5_SET(qpts_reg, in, local_port, 1);
+ MLX5_SET(qpts_reg, in, trust_state, trust_state);
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_QPTS, 0, 1);
+ return err;
+}
+
+int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state)
+{
+ u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {};
+ int err;
+
+ MLX5_SET(qpts_reg, in, local_port, 1);
+
+ err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+ sizeof(out), MLX5_REG_QPTS, 0, 0);
+ if (!err)
+ *trust_state = MLX5_GET(qpts_reg, out, trust_state);
+
+ return err;
+}
+
+int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio)
+{
+ int sz = MLX5_ST_SZ_BYTES(qpdpm_reg);
+ void *qpdpm_dscp;
+ void *out;
+ void *in;
+ int err;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(qpdpm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0);
+ if (err)
+ goto out;
+
+ memcpy(in, out, sz);
+ MLX5_SET(qpdpm_reg, in, local_port, 1);
+
+ /* Update the corresponding dscp entry */
+ qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, in, dscp[dscp]);
+ MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, prio, prio);
+ MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, e, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 1);
+
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
+
+/* dscp2prio[i]: priority that dscp i mapped to */
+#define MLX5E_SUPPORTED_DSCP 64
+int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio)
+{
+ int sz = MLX5_ST_SZ_BYTES(qpdpm_reg);
+ void *qpdpm_dscp;
+ void *out;
+ void *in;
+ int err;
+ int i;
+
+ in = kzalloc(sz, GFP_KERNEL);
+ out = kzalloc(sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(qpdpm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0);
+ if (err)
+ goto out;
+
+ for (i = 0; i < (MLX5E_SUPPORTED_DSCP); i++) {
+ qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, out, dscp[i]);
+ dscp2prio[i] = MLX5_GET16(qpdpm_dscp_reg, qpdpm_dscp, prio);
+ }
+
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
new file mode 100644
index 000000000..479ac21cd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -0,0 +1,636 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/gfp.h>
+#include <linux/export.h>
+#include <linux/mlx5/cmd.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/transobj.h>
+
+#include "mlx5_core.h"
+
+static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev,
+ u32 rsn)
+{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+ struct mlx5_core_rsc_common *common;
+ unsigned long flags;
+
+ spin_lock_irqsave(&table->lock, flags);
+
+ common = radix_tree_lookup(&table->tree, rsn);
+ if (common)
+ atomic_inc(&common->refcount);
+
+ spin_unlock_irqrestore(&table->lock, flags);
+
+ if (!common) {
+ mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n",
+ rsn);
+ return NULL;
+ }
+ return common;
+}
+
+void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common)
+{
+ if (atomic_dec_and_test(&common->refcount))
+ complete(&common->free);
+}
+
+static u64 qp_allowed_event_types(void)
+{
+ u64 mask;
+
+ mask = BIT(MLX5_EVENT_TYPE_PATH_MIG) |
+ BIT(MLX5_EVENT_TYPE_COMM_EST) |
+ BIT(MLX5_EVENT_TYPE_SQ_DRAINED) |
+ BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+ BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR) |
+ BIT(MLX5_EVENT_TYPE_PATH_MIG_FAILED) |
+ BIT(MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) |
+ BIT(MLX5_EVENT_TYPE_WQ_ACCESS_ERROR);
+
+ return mask;
+}
+
+static u64 rq_allowed_event_types(void)
+{
+ u64 mask;
+
+ mask = BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+ BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+
+ return mask;
+}
+
+static u64 sq_allowed_event_types(void)
+{
+ return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+}
+
+static u64 dct_allowed_event_types(void)
+{
+ return BIT(MLX5_EVENT_TYPE_DCT_DRAINED);
+}
+
+static bool is_event_type_allowed(int rsc_type, int event_type)
+{
+ switch (rsc_type) {
+ case MLX5_EVENT_QUEUE_TYPE_QP:
+ return BIT(event_type) & qp_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_RQ:
+ return BIT(event_type) & rq_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_SQ:
+ return BIT(event_type) & sq_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_DCT:
+ return BIT(event_type) & dct_allowed_event_types();
+ default:
+ WARN(1, "Event arrived for unknown resource type");
+ return false;
+ }
+}
+
+void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
+{
+ struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
+ struct mlx5_core_dct *dct;
+ struct mlx5_core_qp *qp;
+
+ if (!common)
+ return;
+
+ if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
+ mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
+ event_type, rsn);
+ goto out;
+ }
+
+ switch (common->res) {
+ case MLX5_RES_QP:
+ case MLX5_RES_RQ:
+ case MLX5_RES_SQ:
+ qp = (struct mlx5_core_qp *)common;
+ qp->event(qp, event_type);
+ break;
+ case MLX5_RES_DCT:
+ dct = (struct mlx5_core_dct *)common;
+ if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED)
+ complete(&dct->drained);
+ break;
+ default:
+ mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn);
+ }
+out:
+ mlx5_core_put_rsc(common);
+}
+
+static int create_resource_common(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *qp,
+ int rsc_type)
+{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+ int err;
+
+ qp->common.res = rsc_type;
+ spin_lock_irq(&table->lock);
+ err = radix_tree_insert(&table->tree,
+ qp->qpn | (rsc_type << MLX5_USER_INDEX_LEN),
+ qp);
+ spin_unlock_irq(&table->lock);
+ if (err)
+ return err;
+
+ atomic_set(&qp->common.refcount, 1);
+ init_completion(&qp->common.free);
+ qp->pid = current->pid;
+
+ return 0;
+}
+
+static void destroy_resource_common(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *qp)
+{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+ unsigned long flags;
+
+ spin_lock_irqsave(&table->lock, flags);
+ radix_tree_delete(&table->tree,
+ qp->qpn | (qp->common.res << MLX5_USER_INDEX_LEN));
+ spin_unlock_irqrestore(&table->lock, flags);
+ mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
+ wait_for_completion(&qp->common.free);
+}
+
+int mlx5_core_create_dct(struct mlx5_core_dev *dev,
+ struct mlx5_core_dct *dct,
+ u32 *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0};
+ u32 din[MLX5_ST_SZ_DW(destroy_dct_in)] = {0};
+ u32 dout[MLX5_ST_SZ_DW(destroy_dct_out)] = {0};
+ struct mlx5_core_qp *qp = &dct->mqp;
+ int err;
+
+ init_completion(&dct->drained);
+ MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT);
+
+ err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+ if (err) {
+ mlx5_core_warn(dev, "create DCT failed, ret %d\n", err);
+ return err;
+ }
+
+ qp->qpn = MLX5_GET(create_dct_out, out, dctn);
+ err = create_resource_common(dev, qp, MLX5_RES_DCT);
+ if (err)
+ goto err_cmd;
+
+ return 0;
+err_cmd:
+ MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ MLX5_SET(destroy_dct_in, din, dctn, qp->qpn);
+ mlx5_cmd_exec(dev, (void *)&in, sizeof(din),
+ (void *)&out, sizeof(dout));
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_create_dct);
+
+int mlx5_core_create_qp(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *qp,
+ u32 *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {0};
+ u32 dout[MLX5_ST_SZ_DW(destroy_qp_out)];
+ u32 din[MLX5_ST_SZ_DW(destroy_qp_in)];
+ int err;
+
+ MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (err)
+ return err;
+
+ qp->qpn = MLX5_GET(create_qp_out, out, qpn);
+ mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
+
+ err = create_resource_common(dev, qp, MLX5_RES_QP);
+ if (err)
+ goto err_cmd;
+
+ err = mlx5_debug_qp_add(dev, qp);
+ if (err)
+ mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n",
+ qp->qpn);
+
+ atomic_inc(&dev->num_qps);
+
+ return 0;
+
+err_cmd:
+ memset(din, 0, sizeof(din));
+ memset(dout, 0, sizeof(dout));
+ MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
+ mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_create_qp);
+
+static int mlx5_core_drain_dct(struct mlx5_core_dev *dev,
+ struct mlx5_core_dct *dct)
+{
+ u32 out[MLX5_ST_SZ_DW(drain_dct_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(drain_dct_in)] = {0};
+ struct mlx5_core_qp *qp = &dct->mqp;
+
+ MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT);
+ MLX5_SET(drain_dct_in, in, dctn, qp->qpn);
+ return mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
+ (void *)&out, sizeof(out));
+}
+
+int mlx5_core_destroy_dct(struct mlx5_core_dev *dev,
+ struct mlx5_core_dct *dct)
+{
+ u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {0};
+ struct mlx5_core_qp *qp = &dct->mqp;
+ int err;
+
+ err = mlx5_core_drain_dct(dev, dct);
+ if (err) {
+ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ goto destroy;
+ } else {
+ mlx5_core_warn(dev, "failed drain DCT 0x%x with error 0x%x\n", qp->qpn, err);
+ return err;
+ }
+ }
+ wait_for_completion(&dct->drained);
+destroy:
+ destroy_resource_common(dev, &dct->mqp);
+ MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
+ err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
+ (void *)&out, sizeof(out));
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_destroy_dct);
+
+int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *qp)
+{
+ u32 out[MLX5_ST_SZ_DW(destroy_qp_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {0};
+ int err;
+
+ mlx5_debug_qp_remove(dev, qp);
+
+ destroy_resource_common(dev, qp);
+
+ MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ atomic_dec(&dev->num_qps);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp);
+
+int mlx5_core_set_delay_drop(struct mlx5_core_dev *dev,
+ u32 timeout_usec)
+{
+ u32 out[MLX5_ST_SZ_DW(set_delay_drop_params_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(set_delay_drop_params_in)] = {0};
+
+ MLX5_SET(set_delay_drop_params_in, in, opcode,
+ MLX5_CMD_OP_SET_DELAY_DROP_PARAMS);
+ MLX5_SET(set_delay_drop_params_in, in, delay_drop_timeout,
+ timeout_usec / 100);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL_GPL(mlx5_core_set_delay_drop);
+
+struct mbox_info {
+ u32 *in;
+ u32 *out;
+ int inlen;
+ int outlen;
+};
+
+static int mbox_alloc(struct mbox_info *mbox, int inlen, int outlen)
+{
+ mbox->inlen = inlen;
+ mbox->outlen = outlen;
+ mbox->in = kzalloc(mbox->inlen, GFP_KERNEL);
+ mbox->out = kzalloc(mbox->outlen, GFP_KERNEL);
+ if (!mbox->in || !mbox->out) {
+ kfree(mbox->in);
+ kfree(mbox->out);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void mbox_free(struct mbox_info *mbox)
+{
+ kfree(mbox->in);
+ kfree(mbox->out);
+}
+
+static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
+ u32 opt_param_mask, void *qpc,
+ struct mbox_info *mbox)
+{
+ mbox->out = NULL;
+ mbox->in = NULL;
+
+#define MBOX_ALLOC(mbox, typ) \
+ mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out))
+
+#define MOD_QP_IN_SET(typ, in, _opcode, _qpn) \
+ MLX5_SET(typ##_in, in, opcode, _opcode); \
+ MLX5_SET(typ##_in, in, qpn, _qpn)
+
+#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc) \
+ MOD_QP_IN_SET(typ, in, _opcode, _qpn); \
+ MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \
+ memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, MLX5_ST_SZ_BYTES(qpc))
+
+ switch (opcode) {
+ /* 2RST & 2ERR */
+ case MLX5_CMD_OP_2RST_QP:
+ if (MBOX_ALLOC(mbox, qp_2rst))
+ return -ENOMEM;
+ MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn);
+ break;
+ case MLX5_CMD_OP_2ERR_QP:
+ if (MBOX_ALLOC(mbox, qp_2err))
+ return -ENOMEM;
+ MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn);
+ break;
+
+ /* MODIFY with QPC */
+ case MLX5_CMD_OP_RST2INIT_QP:
+ if (MBOX_ALLOC(mbox, rst2init_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc);
+ break;
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ if (MBOX_ALLOC(mbox, init2rtr_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc);
+ break;
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ if (MBOX_ALLOC(mbox, rtr2rts_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc);
+ break;
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ if (MBOX_ALLOC(mbox, rts2rts_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc);
+ break;
+ case MLX5_CMD_OP_SQERR2RTS_QP:
+ if (MBOX_ALLOC(mbox, sqerr2rts_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc);
+ break;
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ if (MBOX_ALLOC(mbox, init2init_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc);
+ break;
+ default:
+ mlx5_core_err(dev, "Unknown transition for modify QP: OP(0x%x) QPN(0x%x)\n",
+ opcode, qpn);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode,
+ u32 opt_param_mask, void *qpc,
+ struct mlx5_core_qp *qp)
+{
+ struct mbox_info mbox;
+ int err;
+
+ err = modify_qp_mbox_alloc(dev, opcode, qp->qpn,
+ opt_param_mask, qpc, &mbox);
+ if (err)
+ return err;
+
+ err = mlx5_cmd_exec(dev, mbox.in, mbox.inlen, mbox.out, mbox.outlen);
+ mbox_free(&mbox);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_qp_modify);
+
+void mlx5_init_qp_table(struct mlx5_core_dev *dev)
+{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+
+ memset(table, 0, sizeof(*table));
+ spin_lock_init(&table->lock);
+ INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+ mlx5_qp_debugfs_init(dev);
+}
+
+void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev)
+{
+ mlx5_qp_debugfs_cleanup(dev);
+}
+
+int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {0};
+
+ MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP);
+ MLX5_SET(query_qp_in, in, qpn, qp->qpn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_qp_query);
+
+int mlx5_core_dct_query(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_dct_in)] = {0};
+ struct mlx5_core_qp *qp = &dct->mqp;
+
+ MLX5_SET(query_dct_in, in, opcode, MLX5_CMD_OP_QUERY_DCT);
+ MLX5_SET(query_dct_in, in, dctn, qp->qpn);
+
+ return mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
+ (void *)out, outlen);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_dct_query);
+
+int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {0};
+ int err;
+
+ MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_xrcd_alloc);
+
+int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
+{
+ u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {0};
+
+ MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+ MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
+
+int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *rq)
+{
+ int err;
+ u32 rqn;
+
+ err = mlx5_core_create_rq(dev, in, inlen, &rqn);
+ if (err)
+ return err;
+
+ rq->qpn = rqn;
+ err = create_resource_common(dev, rq, MLX5_RES_RQ);
+ if (err)
+ goto err_destroy_rq;
+
+ return 0;
+
+err_destroy_rq:
+ mlx5_core_destroy_rq(dev, rq->qpn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_rq_tracked);
+
+void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *rq)
+{
+ destroy_resource_common(dev, rq);
+ mlx5_core_destroy_rq(dev, rq->qpn);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
+
+int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *sq)
+{
+ int err;
+ u32 sqn;
+
+ err = mlx5_core_create_sq(dev, in, inlen, &sqn);
+ if (err)
+ return err;
+
+ sq->qpn = sqn;
+ err = create_resource_common(dev, sq, MLX5_RES_SQ);
+ if (err)
+ goto err_destroy_sq;
+
+ return 0;
+
+err_destroy_sq:
+ mlx5_core_destroy_sq(dev, sq->qpn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_sq_tracked);
+
+void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *sq)
+{
+ destroy_resource_common(dev, sq);
+ mlx5_core_destroy_sq(dev, sq->qpn);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
+
+int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id)
+{
+ u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0};
+ int err;
+
+ MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *counter_id = MLX5_GET(alloc_q_counter_out, out,
+ counter_set_id);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_alloc_q_counter);
+
+int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)] = {0};
+
+ MLX5_SET(dealloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+ MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter_id);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL_GPL(mlx5_core_dealloc_q_counter);
+
+int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
+ int reset, void *out, int out_size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {0};
+
+ MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
+ MLX5_SET(query_q_counter_in, in, clear, reset);
+ MLX5_SET(query_q_counter_in, in, counter_set_id, counter_id);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
+}
+EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
new file mode 100644
index 000000000..bc86dffdc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+/* Scheduling element fw management */
+int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ void *ctx, u32 *element_id)
+{
+ u32 in[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {0};
+ void *schedc;
+ int err;
+
+ schedc = MLX5_ADDR_OF(create_scheduling_element_in, in,
+ scheduling_context);
+ MLX5_SET(create_scheduling_element_in, in, opcode,
+ MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT);
+ MLX5_SET(create_scheduling_element_in, in, scheduling_hierarchy,
+ hierarchy);
+ memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context));
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ *element_id = MLX5_GET(create_scheduling_element_out, out,
+ scheduling_element_id);
+ return 0;
+}
+
+int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ void *ctx, u32 element_id,
+ u32 modify_bitmask)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {0};
+ void *schedc;
+
+ schedc = MLX5_ADDR_OF(modify_scheduling_element_in, in,
+ scheduling_context);
+ MLX5_SET(modify_scheduling_element_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT);
+ MLX5_SET(modify_scheduling_element_in, in, scheduling_element_id,
+ element_id);
+ MLX5_SET(modify_scheduling_element_in, in, modify_bitmask,
+ modify_bitmask);
+ MLX5_SET(modify_scheduling_element_in, in, scheduling_hierarchy,
+ hierarchy);
+ memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context));
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+ u32 element_id)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {0};
+
+ MLX5_SET(destroy_scheduling_element_in, in, opcode,
+ MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
+ MLX5_SET(destroy_scheduling_element_in, in, scheduling_element_id,
+ element_id);
+ MLX5_SET(destroy_scheduling_element_in, in, scheduling_hierarchy,
+ hierarchy);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+/* Finds an entry where we can register the given rate
+ * If the rate already exists, return the entry where it is registered,
+ * otherwise return the first available entry.
+ * If the table is full, return NULL
+ */
+static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
+ struct mlx5_rate_limit *rl)
+{
+ struct mlx5_rl_entry *ret_entry = NULL;
+ bool empty_found = false;
+ int i;
+
+ for (i = 0; i < table->max_size; i++) {
+ if (mlx5_rl_are_equal(&table->rl_entry[i].rl, rl))
+ return &table->rl_entry[i];
+ if (!empty_found && !table->rl_entry[i].rl.rate) {
+ empty_found = true;
+ ret_entry = &table->rl_entry[i];
+ }
+ }
+
+ return ret_entry;
+}
+
+static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
+ u16 index,
+ struct mlx5_rate_limit *rl)
+{
+ u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0};
+
+ MLX5_SET(set_pp_rate_limit_in, in, opcode,
+ MLX5_CMD_OP_SET_PP_RATE_LIMIT);
+ MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index);
+ MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rl->rate);
+ MLX5_SET(set_pp_rate_limit_in, in, burst_upper_bound, rl->max_burst_sz);
+ MLX5_SET(set_pp_rate_limit_in, in, typical_packet_size, rl->typical_pkt_sz);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate)
+{
+ struct mlx5_rl_table *table = &dev->priv.rl_table;
+
+ return (rate <= table->max_rate && rate >= table->min_rate);
+}
+EXPORT_SYMBOL(mlx5_rl_is_in_range);
+
+bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0,
+ struct mlx5_rate_limit *rl_1)
+{
+ return ((rl_0->rate == rl_1->rate) &&
+ (rl_0->max_burst_sz == rl_1->max_burst_sz) &&
+ (rl_0->typical_pkt_sz == rl_1->typical_pkt_sz));
+}
+EXPORT_SYMBOL(mlx5_rl_are_equal);
+
+int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index,
+ struct mlx5_rate_limit *rl)
+{
+ struct mlx5_rl_table *table = &dev->priv.rl_table;
+ struct mlx5_rl_entry *entry;
+ int err = 0;
+
+ mutex_lock(&table->rl_lock);
+
+ if (!rl->rate || !mlx5_rl_is_in_range(dev, rl->rate)) {
+ mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n",
+ rl->rate, table->min_rate, table->max_rate);
+ err = -EINVAL;
+ goto out;
+ }
+
+ entry = find_rl_entry(table, rl);
+ if (!entry) {
+ mlx5_core_err(dev, "Max number of %u rates reached\n",
+ table->max_size);
+ err = -ENOSPC;
+ goto out;
+ }
+ if (entry->refcount) {
+ /* rate already configured */
+ entry->refcount++;
+ } else {
+ /* new rate limit */
+ err = mlx5_set_pp_rate_limit_cmd(dev, entry->index, rl);
+ if (err) {
+ mlx5_core_err(dev, "Failed configuring rate limit(err %d): \
+ rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
+ err, rl->rate, rl->max_burst_sz,
+ rl->typical_pkt_sz);
+ goto out;
+ }
+ entry->rl = *rl;
+ entry->refcount = 1;
+ }
+ *index = entry->index;
+
+out:
+ mutex_unlock(&table->rl_lock);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_rl_add_rate);
+
+void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl)
+{
+ struct mlx5_rl_table *table = &dev->priv.rl_table;
+ struct mlx5_rl_entry *entry = NULL;
+ struct mlx5_rate_limit reset_rl = {0};
+
+ /* 0 is a reserved value for unlimited rate */
+ if (rl->rate == 0)
+ return;
+
+ mutex_lock(&table->rl_lock);
+ entry = find_rl_entry(table, rl);
+ if (!entry || !entry->refcount) {
+ mlx5_core_warn(dev, "Rate %u, max_burst_sz %u typical_pkt_sz %u \
+ are not configured\n",
+ rl->rate, rl->max_burst_sz, rl->typical_pkt_sz);
+ goto out;
+ }
+
+ entry->refcount--;
+ if (!entry->refcount) {
+ /* need to remove rate */
+ mlx5_set_pp_rate_limit_cmd(dev, entry->index, &reset_rl);
+ entry->rl = reset_rl;
+ }
+
+out:
+ mutex_unlock(&table->rl_lock);
+}
+EXPORT_SYMBOL(mlx5_rl_remove_rate);
+
+int mlx5_init_rl_table(struct mlx5_core_dev *dev)
+{
+ struct mlx5_rl_table *table = &dev->priv.rl_table;
+ int i;
+
+ mutex_init(&table->rl_lock);
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) {
+ table->max_size = 0;
+ return 0;
+ }
+
+ /* First entry is reserved for unlimited rate */
+ table->max_size = MLX5_CAP_QOS(dev, packet_pacing_rate_table_size) - 1;
+ table->max_rate = MLX5_CAP_QOS(dev, packet_pacing_max_rate);
+ table->min_rate = MLX5_CAP_QOS(dev, packet_pacing_min_rate);
+
+ table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry),
+ GFP_KERNEL);
+ if (!table->rl_entry)
+ return -ENOMEM;
+
+ /* The index represents the index in HW rate limit table
+ * Index 0 is reserved for unlimited rate
+ */
+ for (i = 0; i < table->max_size; i++)
+ table->rl_entry[i].index = i + 1;
+
+ /* Index 0 is reserved */
+ mlx5_core_info(dev, "Rate limit: %u rates are supported, range: %uMbps to %uMbps\n",
+ table->max_size,
+ table->min_rate >> 10,
+ table->max_rate >> 10);
+
+ return 0;
+}
+
+void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
+{
+ struct mlx5_rl_table *table = &dev->priv.rl_table;
+ struct mlx5_rate_limit rl = {0};
+ int i;
+
+ /* Clear all configured rates */
+ for (i = 0; i < table->max_size; i++)
+ if (table->rl_entry[i].rl.rate)
+ mlx5_set_pp_rate_limit_cmd(dev, table->rl_entry[i].index,
+ &rl);
+
+ kfree(dev->priv.rl_table.rl_entry);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
new file mode 100644
index 000000000..a0674962f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2014, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
+#include "mlx5_core.h"
+#include "eswitch.h"
+
+bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+
+ return !!sriov->num_vfs;
+}
+
+static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ struct mlx5_hca_vport_context *in;
+ int err = 0;
+
+ /* Restore sriov guid and policy settings */
+ if (sriov->vfs_ctx[vf].node_guid ||
+ sriov->vfs_ctx[vf].port_guid ||
+ sriov->vfs_ctx[vf].policy != MLX5_POLICY_INVALID) {
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ in->node_guid = sriov->vfs_ctx[vf].node_guid;
+ in->port_guid = sriov->vfs_ctx[vf].port_guid;
+ in->policy = sriov->vfs_ctx[vf].policy;
+ in->field_select =
+ !!(in->port_guid) * MLX5_HCA_VPORT_SEL_PORT_GUID |
+ !!(in->node_guid) * MLX5_HCA_VPORT_SEL_NODE_GUID |
+ !!(in->policy) * MLX5_HCA_VPORT_SEL_STATE_POLICY;
+
+ err = mlx5_core_modify_hca_vport_context(dev, 1, 1, vf + 1, in);
+ if (err)
+ mlx5_core_warn(dev, "modify vport context failed, unable to restore VF %d settings\n", vf);
+
+ kfree(in);
+ }
+
+ return err;
+}
+
+static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ int err;
+ int vf;
+
+ if (sriov->enabled_vfs) {
+ mlx5_core_warn(dev,
+ "failed to enable SRIOV on device, already enabled with %d vfs\n",
+ sriov->enabled_vfs);
+ return -EBUSY;
+ }
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ goto enable_vfs_hca;
+
+ err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
+ if (err) {
+ mlx5_core_warn(dev,
+ "failed to enable eswitch SRIOV (%d)\n", err);
+ return err;
+ }
+
+enable_vfs_hca:
+ for (vf = 0; vf < num_vfs; vf++) {
+ err = mlx5_core_enable_hca(dev, vf + 1);
+ if (err) {
+ mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err);
+ continue;
+ }
+ sriov->vfs_ctx[vf].enabled = 1;
+ sriov->enabled_vfs++;
+ if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) {
+ err = sriov_restore_guids(dev, vf);
+ if (err) {
+ mlx5_core_warn(dev,
+ "failed to restore VF %d settings, err %d\n",
+ vf, err);
+ continue;
+ }
+ }
+ mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf);
+ }
+
+ return 0;
+}
+
+static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ int err;
+ int vf;
+
+ if (!sriov->enabled_vfs)
+ goto out;
+
+ for (vf = 0; vf < sriov->num_vfs; vf++) {
+ if (!sriov->vfs_ctx[vf].enabled)
+ continue;
+ err = mlx5_core_disable_hca(dev, vf + 1);
+ if (err) {
+ mlx5_core_warn(dev, "failed to disable VF %d\n", vf);
+ continue;
+ }
+ sriov->vfs_ctx[vf].enabled = 0;
+ sriov->enabled_vfs--;
+ }
+
+out:
+ if (MLX5_ESWITCH_MANAGER(dev))
+ mlx5_eswitch_disable_sriov(dev->priv.eswitch);
+
+ if (mlx5_wait_for_vf_pages(dev))
+ mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
+}
+
+static int mlx5_pci_enable_sriov(struct pci_dev *pdev, int num_vfs)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ int err = 0;
+
+ if (pci_num_vf(pdev)) {
+ mlx5_core_warn(dev, "Unable to enable pci sriov, already enabled\n");
+ return -EBUSY;
+ }
+
+ err = pci_enable_sriov(pdev, num_vfs);
+ if (err)
+ mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err);
+
+ return err;
+}
+
+static void mlx5_pci_disable_sriov(struct pci_dev *pdev)
+{
+ pci_disable_sriov(pdev);
+}
+
+static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ int err = 0;
+
+ err = mlx5_device_enable_sriov(dev, num_vfs);
+ if (err) {
+ mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err);
+ return err;
+ }
+
+ err = mlx5_pci_enable_sriov(pdev, num_vfs);
+ if (err) {
+ mlx5_core_warn(dev, "mlx5_pci_enable_sriov failed : %d\n", err);
+ mlx5_device_disable_sriov(dev);
+ return err;
+ }
+
+ sriov->num_vfs = num_vfs;
+
+ return 0;
+}
+
+static void mlx5_sriov_disable(struct pci_dev *pdev)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+
+ mlx5_pci_disable_sriov(pdev);
+ mlx5_device_disable_sriov(dev);
+ sriov->num_vfs = 0;
+}
+
+int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+ struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+ int err = 0;
+
+ mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs);
+ if (!mlx5_core_is_pf(dev))
+ return -EPERM;
+
+ if (num_vfs) {
+ int ret;
+
+ ret = mlx5_lag_forbid(dev);
+ if (ret && (ret != -ENODEV))
+ return ret;
+ }
+
+ if (num_vfs) {
+ err = mlx5_sriov_enable(pdev, num_vfs);
+ } else {
+ mlx5_sriov_disable(pdev);
+ mlx5_lag_allow(dev);
+ }
+
+ return err ? err : num_vfs;
+}
+
+int mlx5_sriov_attach(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+
+ if (!mlx5_core_is_pf(dev) || !sriov->num_vfs)
+ return 0;
+
+ /* If sriov VFs exist in PCI level, enable them in device level */
+ return mlx5_device_enable_sriov(dev, sriov->num_vfs);
+}
+
+void mlx5_sriov_detach(struct mlx5_core_dev *dev)
+{
+ if (!mlx5_core_is_pf(dev))
+ return;
+
+ mlx5_device_disable_sriov(dev);
+}
+
+int mlx5_sriov_init(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ struct pci_dev *pdev = dev->pdev;
+ int total_vfs;
+
+ if (!mlx5_core_is_pf(dev))
+ return 0;
+
+ total_vfs = pci_sriov_get_totalvfs(pdev);
+ sriov->num_vfs = pci_num_vf(pdev);
+ sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
+ if (!sriov->vfs_ctx)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+
+ if (!mlx5_core_is_pf(dev))
+ return;
+
+ kfree(sriov->vfs_ctx);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
new file mode 100644
index 000000000..23cc337a9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
@@ -0,0 +1,692 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include <linux/mlx5/srq.h>
+#include <rdma/ib_verbs.h>
+#include "mlx5_core.h"
+#include <linux/mlx5/transobj.h>
+
+void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
+{
+ struct mlx5_srq_table *table = &dev->priv.srq_table;
+ struct mlx5_core_srq *srq;
+
+ spin_lock(&table->lock);
+
+ srq = radix_tree_lookup(&table->tree, srqn);
+ if (srq)
+ atomic_inc(&srq->refcount);
+
+ spin_unlock(&table->lock);
+
+ if (!srq) {
+ mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn);
+ return;
+ }
+
+ srq->event(srq, event_type);
+
+ if (atomic_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+}
+
+static int get_pas_size(struct mlx5_srq_attr *in)
+{
+ u32 log_page_size = in->log_page_size + 12;
+ u32 log_srq_size = in->log_size;
+ u32 log_rq_stride = in->wqe_shift;
+ u32 page_offset = in->page_offset;
+ u32 po_quanta = 1 << (log_page_size - 6);
+ u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride);
+ u32 page_size = 1 << log_page_size;
+ u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
+ u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size;
+
+ return rq_num_pas * sizeof(u64);
+}
+
+static void set_wq(void *wq, struct mlx5_srq_attr *in)
+{
+ MLX5_SET(wq, wq, wq_signature, !!(in->flags
+ & MLX5_SRQ_FLAG_WQ_SIG));
+ MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size);
+ MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4);
+ MLX5_SET(wq, wq, log_wq_sz, in->log_size);
+ MLX5_SET(wq, wq, page_offset, in->page_offset);
+ MLX5_SET(wq, wq, lwm, in->lwm);
+ MLX5_SET(wq, wq, pd, in->pd);
+ MLX5_SET64(wq, wq, dbr_addr, in->db_record);
+}
+
+static void set_srqc(void *srqc, struct mlx5_srq_attr *in)
+{
+ MLX5_SET(srqc, srqc, wq_signature, !!(in->flags
+ & MLX5_SRQ_FLAG_WQ_SIG));
+ MLX5_SET(srqc, srqc, log_page_size, in->log_page_size);
+ MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift);
+ MLX5_SET(srqc, srqc, log_srq_size, in->log_size);
+ MLX5_SET(srqc, srqc, page_offset, in->page_offset);
+ MLX5_SET(srqc, srqc, lwm, in->lwm);
+ MLX5_SET(srqc, srqc, pd, in->pd);
+ MLX5_SET64(srqc, srqc, dbr_addr, in->db_record);
+ MLX5_SET(srqc, srqc, xrcd, in->xrcd);
+ MLX5_SET(srqc, srqc, cqn, in->cqn);
+}
+
+static void get_wq(void *wq, struct mlx5_srq_attr *in)
+{
+ if (MLX5_GET(wq, wq, wq_signature))
+ in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
+ in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz);
+ in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4;
+ in->log_size = MLX5_GET(wq, wq, log_wq_sz);
+ in->page_offset = MLX5_GET(wq, wq, page_offset);
+ in->lwm = MLX5_GET(wq, wq, lwm);
+ in->pd = MLX5_GET(wq, wq, pd);
+ in->db_record = MLX5_GET64(wq, wq, dbr_addr);
+}
+
+static void get_srqc(void *srqc, struct mlx5_srq_attr *in)
+{
+ if (MLX5_GET(srqc, srqc, wq_signature))
+ in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
+ in->log_page_size = MLX5_GET(srqc, srqc, log_page_size);
+ in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride);
+ in->log_size = MLX5_GET(srqc, srqc, log_srq_size);
+ in->page_offset = MLX5_GET(srqc, srqc, page_offset);
+ in->lwm = MLX5_GET(srqc, srqc, lwm);
+ in->pd = MLX5_GET(srqc, srqc, pd);
+ in->db_record = MLX5_GET64(srqc, srqc, dbr_addr);
+}
+
+struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
+{
+ struct mlx5_srq_table *table = &dev->priv.srq_table;
+ struct mlx5_core_srq *srq;
+
+ spin_lock(&table->lock);
+
+ srq = radix_tree_lookup(&table->tree, srqn);
+ if (srq)
+ atomic_inc(&srq->refcount);
+
+ spin_unlock(&table->lock);
+
+ return srq;
+}
+EXPORT_SYMBOL(mlx5_core_get_srq);
+
+static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0};
+ void *create_in;
+ void *srqc;
+ void *pas;
+ int pas_size;
+ int inlen;
+ int err;
+
+ pas_size = get_pas_size(in);
+ inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size;
+ create_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!create_in)
+ return -ENOMEM;
+
+ srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
+ pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
+
+ set_srqc(srqc, in);
+ memcpy(pas, in->pas, pas_size);
+
+ MLX5_SET(create_srq_in, create_in, opcode,
+ MLX5_CMD_OP_CREATE_SRQ);
+
+ err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
+ sizeof(create_out));
+ kvfree(create_in);
+ if (!err)
+ srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
+
+ return err;
+}
+
+static int destroy_srq_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq)
+{
+ u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0};
+ u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0};
+
+ MLX5_SET(destroy_srq_in, srq_in, opcode,
+ MLX5_CMD_OP_DESTROY_SRQ);
+ MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
+
+ return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
+ srq_out, sizeof(srq_out));
+}
+
+static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm, int is_srq)
+{
+ u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
+ u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
+
+ MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ);
+ MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
+ MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn);
+ MLX5_SET(arm_rq_in, srq_in, lwm, lwm);
+
+ return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
+ srq_out, sizeof(srq_out));
+}
+
+static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0};
+ u32 *srq_out;
+ void *srqc;
+ int err;
+
+ srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL);
+ if (!srq_out)
+ return -ENOMEM;
+
+ MLX5_SET(query_srq_in, srq_in, opcode,
+ MLX5_CMD_OP_QUERY_SRQ);
+ MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
+ err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
+ srq_out, MLX5_ST_SZ_BYTES(query_srq_out));
+ if (err)
+ goto out;
+
+ srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry);
+ get_srqc(srqc, out);
+ if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
+out:
+ kvfree(srq_out);
+ return err;
+}
+
+static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)];
+ void *create_in;
+ void *xrc_srqc;
+ void *pas;
+ int pas_size;
+ int inlen;
+ int err;
+
+ pas_size = get_pas_size(in);
+ inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size;
+ create_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!create_in)
+ return -ENOMEM;
+
+ xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in,
+ xrc_srq_context_entry);
+ pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
+
+ set_srqc(xrc_srqc, in);
+ MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index);
+ memcpy(pas, in->pas, pas_size);
+ MLX5_SET(create_xrc_srq_in, create_in, opcode,
+ MLX5_CMD_OP_CREATE_XRC_SRQ);
+
+ memset(create_out, 0, sizeof(create_out));
+ err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
+ sizeof(create_out));
+ if (err)
+ goto out;
+
+ srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn);
+out:
+ kvfree(create_in);
+ return err;
+}
+
+static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq)
+{
+ u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0};
+ u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
+
+ MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode,
+ MLX5_CMD_OP_DESTROY_XRC_SRQ);
+ MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
+
+ return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
+ xrcsrq_out, sizeof(xrcsrq_out));
+}
+
+static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq, u16 lwm)
+{
+ u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
+ u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
+
+ MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
+ MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
+ MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
+ MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm);
+
+ return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
+ xrcsrq_out, sizeof(xrcsrq_out));
+}
+
+static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)];
+ u32 *xrcsrq_out;
+ void *xrc_srqc;
+ int err;
+
+ xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL);
+ if (!xrcsrq_out)
+ return -ENOMEM;
+ memset(xrcsrq_in, 0, sizeof(xrcsrq_in));
+
+ MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode,
+ MLX5_CMD_OP_QUERY_XRC_SRQ);
+ MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
+
+ err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out,
+ MLX5_ST_SZ_BYTES(query_xrc_srq_out));
+ if (err)
+ goto out;
+
+ xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out,
+ xrc_srq_context_entry);
+ get_srqc(xrc_srqc, out);
+ if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
+
+out:
+ kvfree(xrcsrq_out);
+ return err;
+}
+
+static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ void *create_in;
+ void *rmpc;
+ void *wq;
+ int pas_size;
+ int inlen;
+ int err;
+
+ pas_size = get_pas_size(in);
+ inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
+ create_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!create_in)
+ return -ENOMEM;
+
+ rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx);
+ wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+
+ MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+ set_wq(wq, in);
+ memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
+
+ err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn);
+
+ kvfree(create_in);
+ return err;
+}
+
+static int destroy_rmp_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq)
+{
+ return mlx5_core_destroy_rmp(dev, srq->srqn);
+}
+
+static int arm_rmp_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq,
+ u16 lwm)
+{
+ void *in;
+ void *rmpc;
+ void *wq;
+ void *bitmask;
+ int err;
+
+ in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx);
+ bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask);
+ wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+
+ MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY);
+ MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn);
+ MLX5_SET(wq, wq, lwm, lwm);
+ MLX5_SET(rmp_bitmask, bitmask, lwm, 1);
+ MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+
+ err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
+
+ kvfree(in);
+ return err;
+}
+
+static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ u32 *rmp_out;
+ void *rmpc;
+ int err;
+
+ rmp_out = kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL);
+ if (!rmp_out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out);
+ if (err)
+ goto out;
+
+ rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context);
+ get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out);
+ if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
+
+out:
+ kvfree(rmp_out);
+ return err;
+}
+
+static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0};
+ void *create_in;
+ void *xrqc;
+ void *wq;
+ int pas_size;
+ int inlen;
+ int err;
+
+ pas_size = get_pas_size(in);
+ inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size;
+ create_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!create_in)
+ return -ENOMEM;
+
+ xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context);
+ wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
+
+ set_wq(wq, in);
+ memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size);
+
+ if (in->type == IB_SRQT_TM) {
+ MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING);
+ if (in->flags & MLX5_SRQ_FLAG_RNDV)
+ MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV);
+ MLX5_SET(xrqc, xrqc,
+ tag_matching_topology_context.log_matching_list_sz,
+ in->tm_log_list_size);
+ }
+ MLX5_SET(xrqc, xrqc, user_index, in->user_index);
+ MLX5_SET(xrqc, xrqc, cqn, in->cqn);
+ MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
+ err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
+ sizeof(create_out));
+ kvfree(create_in);
+ if (!err)
+ srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn);
+
+ return err;
+}
+
+static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0};
+
+ MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ);
+ MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int arm_xrq_cmd(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq,
+ u16 lwm)
+{
+ u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
+
+ MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ);
+ MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ);
+ MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
+ MLX5_SET(arm_rq_in, in, lwm, lwm);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0};
+ u32 *xrq_out;
+ int outlen = MLX5_ST_SZ_BYTES(query_xrq_out);
+ void *xrqc;
+ int err;
+
+ xrq_out = kvzalloc(outlen, GFP_KERNEL);
+ if (!xrq_out)
+ return -ENOMEM;
+
+ MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ);
+ MLX5_SET(query_xrq_in, in, xrqn, srq->srqn);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen);
+ if (err)
+ goto out;
+
+ xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context);
+ get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out);
+ if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
+ out->tm_next_tag =
+ MLX5_GET(xrqc, xrqc,
+ tag_matching_topology_context.append_next_index);
+ out->tm_hw_phase_cnt =
+ MLX5_GET(xrqc, xrqc,
+ tag_matching_topology_context.hw_phase_cnt);
+ out->tm_sw_phase_cnt =
+ MLX5_GET(xrqc, xrqc,
+ tag_matching_topology_context.sw_phase_cnt);
+
+out:
+ kvfree(xrq_out);
+ return err;
+}
+
+static int create_srq_split(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ if (!dev->issi)
+ return create_srq_cmd(dev, srq, in);
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ return create_xrc_srq_cmd(dev, srq, in);
+ case MLX5_RES_XRQ:
+ return create_xrq_cmd(dev, srq, in);
+ default:
+ return create_rmp_cmd(dev, srq, in);
+ }
+}
+
+static int destroy_srq_split(struct mlx5_core_dev *dev,
+ struct mlx5_core_srq *srq)
+{
+ if (!dev->issi)
+ return destroy_srq_cmd(dev, srq);
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ return destroy_xrc_srq_cmd(dev, srq);
+ case MLX5_RES_XRQ:
+ return destroy_xrq_cmd(dev, srq);
+ default:
+ return destroy_rmp_cmd(dev, srq);
+ }
+}
+
+int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ int err;
+ struct mlx5_srq_table *table = &dev->priv.srq_table;
+
+ switch (in->type) {
+ case IB_SRQT_XRC:
+ srq->common.res = MLX5_RES_XSRQ;
+ break;
+ case IB_SRQT_TM:
+ srq->common.res = MLX5_RES_XRQ;
+ break;
+ default:
+ srq->common.res = MLX5_RES_SRQ;
+ }
+
+ err = create_srq_split(dev, srq, in);
+ if (err)
+ return err;
+
+ atomic_set(&srq->refcount, 1);
+ init_completion(&srq->free);
+
+ spin_lock_irq(&table->lock);
+ err = radix_tree_insert(&table->tree, srq->srqn, srq);
+ spin_unlock_irq(&table->lock);
+ if (err) {
+ mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn);
+ goto err_destroy_srq_split;
+ }
+
+ return 0;
+
+err_destroy_srq_split:
+ destroy_srq_split(dev, srq);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_srq);
+
+int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
+{
+ struct mlx5_srq_table *table = &dev->priv.srq_table;
+ struct mlx5_core_srq *tmp;
+ int err;
+
+ spin_lock_irq(&table->lock);
+ tmp = radix_tree_delete(&table->tree, srq->srqn);
+ spin_unlock_irq(&table->lock);
+ if (!tmp) {
+ mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn);
+ return -EINVAL;
+ }
+ if (tmp != srq) {
+ mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn);
+ return -EINVAL;
+ }
+
+ err = destroy_srq_split(dev, srq);
+ if (err)
+ return err;
+
+ if (atomic_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+ wait_for_completion(&srq->free);
+
+ return 0;
+}
+EXPORT_SYMBOL(mlx5_core_destroy_srq);
+
+int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ if (!dev->issi)
+ return query_srq_cmd(dev, srq, out);
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ return query_xrc_srq_cmd(dev, srq, out);
+ case MLX5_RES_XRQ:
+ return query_xrq_cmd(dev, srq, out);
+ default:
+ return query_rmp_cmd(dev, srq, out);
+ }
+}
+EXPORT_SYMBOL(mlx5_core_query_srq);
+
+int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm, int is_srq)
+{
+ if (!dev->issi)
+ return arm_srq_cmd(dev, srq, lwm, is_srq);
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ return arm_xrc_srq_cmd(dev, srq, lwm);
+ case MLX5_RES_XRQ:
+ return arm_xrq_cmd(dev, srq, lwm);
+ default:
+ return arm_rmp_cmd(dev, srq, lwm);
+ }
+}
+EXPORT_SYMBOL(mlx5_core_arm_srq);
+
+void mlx5_init_srq_table(struct mlx5_core_dev *dev)
+{
+ struct mlx5_srq_table *table = &dev->priv.srq_table;
+
+ memset(table, 0, sizeof(*table));
+ spin_lock_init(&table->lock);
+ INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+}
+
+void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev)
+{
+ /* nothing */
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
new file mode 100644
index 000000000..a1ee9a8a7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -0,0 +1,621 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include <linux/mlx5/transobj.h>
+
+int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
+{
+ u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0};
+ int err;
+
+ MLX5_SET(alloc_transport_domain_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *tdn = MLX5_GET(alloc_transport_domain_out, out,
+ transport_domain);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_alloc_transport_domain);
+
+void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0};
+
+ MLX5_SET(dealloc_transport_domain_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
+ MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain);
+
+int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_rq_out)] = {0};
+ int err;
+
+ MLX5_SET(create_rq_in, in, opcode, MLX5_CMD_OP_CREATE_RQ);
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *rqn = MLX5_GET(create_rq_out, out, rqn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_rq);
+
+int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_rq_out)];
+
+ MLX5_SET(modify_rq_in, in, rqn, rqn);
+ MLX5_SET(modify_rq_in, in, opcode, MLX5_CMD_OP_MODIFY_RQ);
+
+ memset(out, 0, sizeof(out));
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_modify_rq);
+
+void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {0};
+
+ MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
+ MLX5_SET(destroy_rq_in, in, rqn, rqn);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_destroy_rq);
+
+int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {0};
+ int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
+
+ MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ);
+ MLX5_SET(query_rq_in, in, rqn, rqn);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_rq);
+
+int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_sq_out)] = {0};
+ int err;
+
+ MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ);
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *sqn = MLX5_GET(create_sq_out, out, sqn);
+
+ return err;
+}
+
+int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_sq_out)] = {0};
+
+ MLX5_SET(modify_sq_in, in, sqn, sqn);
+ MLX5_SET(modify_sq_in, in, opcode, MLX5_CMD_OP_MODIFY_SQ);
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_modify_sq);
+
+void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {0};
+
+ MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ);
+ MLX5_SET(destroy_sq_in, in, sqn, sqn);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {0};
+ int outlen = MLX5_ST_SZ_BYTES(query_sq_out);
+
+ MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ);
+ MLX5_SET(query_sq_in, in, sqn, sqn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_sq);
+
+int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state)
+{
+ void *out;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(query_sq_out);
+ out = kvzalloc(inlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_sq(dev, sqn, out);
+ if (err)
+ goto out;
+
+ sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
+ *state = MLX5_GET(sqc, sqc, state);
+
+out:
+ kvfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_query_sq_state);
+
+int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *tirn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {0};
+ int err;
+
+ MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
+
+ memset(out, 0, sizeof(out));
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *tirn = MLX5_GET(create_tir_out, out, tirn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_tir);
+
+int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
+ int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_tir_out)] = {0};
+
+ MLX5_SET(modify_tir_in, in, tirn, tirn);
+ MLX5_SET(modify_tir_in, in, opcode, MLX5_CMD_OP_MODIFY_TIR);
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+
+void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {0};
+
+ MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR);
+ MLX5_SET(destroy_tir_in, in, tirn, tirn);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_destroy_tir);
+
+int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *tisn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {0};
+ int err;
+
+ MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS);
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *tisn = MLX5_GET(create_tis_out, out, tisn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_tis);
+
+int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
+ int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_tis_out)] = {0};
+
+ MLX5_SET(modify_tis_in, in, tisn, tisn);
+ MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS);
+
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_modify_tis);
+
+void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0};
+
+ MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ MLX5_SET(destroy_tis_in, in, tisn, tisn);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_destroy_tis);
+
+int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *rmpn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0};
+ int err;
+
+ MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP);
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *rmpn = MLX5_GET(create_rmp_out, out, rmpn);
+
+ return err;
+}
+
+int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0};
+
+ MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+
+int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0};
+
+ MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
+ MLX5_SET(destroy_rmp_in, in, rmpn, rmpn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out,
+ sizeof(out));
+}
+
+int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0};
+ int outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
+
+ MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP);
+ MLX5_SET(query_rmp_in, in, rmpn, rmpn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm)
+{
+ void *in;
+ void *rmpc;
+ void *wq;
+ void *bitmask;
+ int err;
+
+ in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx);
+ bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask);
+ wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+
+ MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY);
+ MLX5_SET(modify_rmp_in, in, rmpn, rmpn);
+ MLX5_SET(wq, wq, lwm, lwm);
+ MLX5_SET(rmp_bitmask, bitmask, lwm, 1);
+ MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+
+ err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in));
+
+ kvfree(in);
+
+ return err;
+}
+
+int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *xsrqn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0};
+ int err;
+
+ MLX5_SET(create_xrc_srq_in, in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ);
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn);
+
+ return err;
+}
+
+int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
+
+ MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ);
+ MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm)
+{
+ u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
+
+ MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
+ MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn);
+ MLX5_SET(arm_xrc_srq_in, in, lwm, lwm);
+ MLX5_SET(arm_xrc_srq_in, in, op_mod,
+ MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *rqtn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
+ int err;
+
+ MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ if (!err)
+ *rqtn = MLX5_GET(create_rqt_out, out, rqtn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_rqt);
+
+int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
+ int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_rqt_out)] = {0};
+
+ MLX5_SET(modify_rqt_in, in, rqtn, rqtn);
+ MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT);
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+}
+
+void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
+
+ MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_destroy_rqt);
+
+static int mlx5_hairpin_create_rq(struct mlx5_core_dev *mdev,
+ struct mlx5_hairpin_params *params, u32 *rqn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_rq_in)] = {0};
+ void *rqc, *wq;
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+ MLX5_SET(rqc, rqc, hairpin, 1);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, counter_set_id, params->q_counter);
+
+ MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size);
+ MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets);
+
+ return mlx5_core_create_rq(mdev, in, MLX5_ST_SZ_BYTES(create_rq_in), rqn);
+}
+
+static int mlx5_hairpin_create_sq(struct mlx5_core_dev *mdev,
+ struct mlx5_hairpin_params *params, u32 *sqn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_sq_in)] = {0};
+ void *sqc, *wq;
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ MLX5_SET(sqc, sqc, hairpin, 1);
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+
+ MLX5_SET(wq, wq, log_hairpin_data_sz, params->log_data_size);
+ MLX5_SET(wq, wq, log_hairpin_num_packets, params->log_num_packets);
+
+ return mlx5_core_create_sq(mdev, in, MLX5_ST_SZ_BYTES(create_sq_in), sqn);
+}
+
+static int mlx5_hairpin_create_queues(struct mlx5_hairpin *hp,
+ struct mlx5_hairpin_params *params)
+{
+ int i, j, err;
+
+ for (i = 0; i < hp->num_channels; i++) {
+ err = mlx5_hairpin_create_rq(hp->func_mdev, params, &hp->rqn[i]);
+ if (err)
+ goto out_err_rq;
+ }
+
+ for (i = 0; i < hp->num_channels; i++) {
+ err = mlx5_hairpin_create_sq(hp->peer_mdev, params, &hp->sqn[i]);
+ if (err)
+ goto out_err_sq;
+ }
+
+ return 0;
+
+out_err_sq:
+ for (j = 0; j < i; j++)
+ mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[j]);
+ i = hp->num_channels;
+out_err_rq:
+ for (j = 0; j < i; j++)
+ mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[j]);
+ return err;
+}
+
+static void mlx5_hairpin_destroy_queues(struct mlx5_hairpin *hp)
+{
+ int i;
+
+ for (i = 0; i < hp->num_channels; i++) {
+ mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[i]);
+ if (!hp->peer_gone)
+ mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]);
+ }
+}
+
+static int mlx5_hairpin_modify_rq(struct mlx5_core_dev *func_mdev, u32 rqn,
+ int curr_state, int next_state,
+ u16 peer_vhca, u32 peer_sq)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_rq_in)] = {0};
+ void *rqc;
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+
+ if (next_state == MLX5_RQC_STATE_RDY) {
+ MLX5_SET(rqc, rqc, hairpin_peer_sq, peer_sq);
+ MLX5_SET(rqc, rqc, hairpin_peer_vhca, peer_vhca);
+ }
+
+ MLX5_SET(modify_rq_in, in, rq_state, curr_state);
+ MLX5_SET(rqc, rqc, state, next_state);
+
+ return mlx5_core_modify_rq(func_mdev, rqn,
+ in, MLX5_ST_SZ_BYTES(modify_rq_in));
+}
+
+static int mlx5_hairpin_modify_sq(struct mlx5_core_dev *peer_mdev, u32 sqn,
+ int curr_state, int next_state,
+ u16 peer_vhca, u32 peer_rq)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_sq_in)] = {0};
+ void *sqc;
+
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+
+ if (next_state == MLX5_SQC_STATE_RDY) {
+ MLX5_SET(sqc, sqc, hairpin_peer_rq, peer_rq);
+ MLX5_SET(sqc, sqc, hairpin_peer_vhca, peer_vhca);
+ }
+
+ MLX5_SET(modify_sq_in, in, sq_state, curr_state);
+ MLX5_SET(sqc, sqc, state, next_state);
+
+ return mlx5_core_modify_sq(peer_mdev, sqn,
+ in, MLX5_ST_SZ_BYTES(modify_sq_in));
+}
+
+static int mlx5_hairpin_pair_queues(struct mlx5_hairpin *hp)
+{
+ int i, j, err;
+
+ /* set peer SQs */
+ for (i = 0; i < hp->num_channels; i++) {
+ err = mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i],
+ MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY,
+ MLX5_CAP_GEN(hp->func_mdev, vhca_id), hp->rqn[i]);
+ if (err)
+ goto err_modify_sq;
+ }
+
+ /* set func RQs */
+ for (i = 0; i < hp->num_channels; i++) {
+ err = mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i],
+ MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY,
+ MLX5_CAP_GEN(hp->peer_mdev, vhca_id), hp->sqn[i]);
+ if (err)
+ goto err_modify_rq;
+ }
+
+ return 0;
+
+err_modify_rq:
+ for (j = 0; j < i; j++)
+ mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[j], MLX5_RQC_STATE_RDY,
+ MLX5_RQC_STATE_RST, 0, 0);
+ i = hp->num_channels;
+err_modify_sq:
+ for (j = 0; j < i; j++)
+ mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[j], MLX5_SQC_STATE_RDY,
+ MLX5_SQC_STATE_RST, 0, 0);
+ return err;
+}
+
+static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp)
+{
+ int i;
+
+ /* unset func RQs */
+ for (i = 0; i < hp->num_channels; i++)
+ mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], MLX5_RQC_STATE_RDY,
+ MLX5_RQC_STATE_RST, 0, 0);
+
+ /* unset peer SQs */
+ if (hp->peer_gone)
+ return;
+ for (i = 0; i < hp->num_channels; i++)
+ mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY,
+ MLX5_SQC_STATE_RST, 0, 0);
+}
+
+struct mlx5_hairpin *
+mlx5_core_hairpin_create(struct mlx5_core_dev *func_mdev,
+ struct mlx5_core_dev *peer_mdev,
+ struct mlx5_hairpin_params *params)
+{
+ struct mlx5_hairpin *hp;
+ int size, err;
+
+ size = sizeof(*hp) + params->num_channels * 2 * sizeof(u32);
+ hp = kzalloc(size, GFP_KERNEL);
+ if (!hp)
+ return ERR_PTR(-ENOMEM);
+
+ hp->func_mdev = func_mdev;
+ hp->peer_mdev = peer_mdev;
+ hp->num_channels = params->num_channels;
+
+ hp->rqn = (void *)hp + sizeof(*hp);
+ hp->sqn = hp->rqn + params->num_channels;
+
+ /* alloc and pair func --> peer hairpin */
+ err = mlx5_hairpin_create_queues(hp, params);
+ if (err)
+ goto err_create_queues;
+
+ err = mlx5_hairpin_pair_queues(hp);
+ if (err)
+ goto err_pair_queues;
+
+ return hp;
+
+err_pair_queues:
+ mlx5_hairpin_destroy_queues(hp);
+err_create_queues:
+ kfree(hp);
+ return ERR_PTR(err);
+}
+
+void mlx5_core_hairpin_destroy(struct mlx5_hairpin *hp)
+{
+ mlx5_hairpin_unpair_queues(hp);
+ mlx5_hairpin_destroy_queues(hp);
+ kfree(hp);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
new file mode 100644
index 000000000..8b97066dd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/io-mapping.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+
+int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {0};
+ int err;
+
+ MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *uarn = MLX5_GET(alloc_uar_out, out, uar);
+ return err;
+}
+EXPORT_SYMBOL(mlx5_cmd_alloc_uar);
+
+int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
+{
+ u32 out[MLX5_ST_SZ_DW(dealloc_uar_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {0};
+
+ MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR);
+ MLX5_SET(dealloc_uar_in, in, uar, uarn);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_cmd_free_uar);
+
+static int uars_per_sys_page(struct mlx5_core_dev *mdev)
+{
+ if (MLX5_CAP_GEN(mdev, uar_4k))
+ return MLX5_CAP_GEN(mdev, num_of_uars_per_page);
+
+ return 1;
+}
+
+static u64 uar2pfn(struct mlx5_core_dev *mdev, u32 index)
+{
+ u32 system_page_index;
+
+ if (MLX5_CAP_GEN(mdev, uar_4k))
+ system_page_index = index >> (PAGE_SHIFT - MLX5_ADAPTER_PAGE_SHIFT);
+ else
+ system_page_index = index;
+
+ return (pci_resource_start(mdev->pdev, 0) >> PAGE_SHIFT) + system_page_index;
+}
+
+static void up_rel_func(struct kref *kref)
+{
+ struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count);
+
+ list_del(&up->list);
+ iounmap(up->map);
+ if (mlx5_cmd_free_uar(up->mdev, up->index))
+ mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index);
+ kfree(up->reg_bitmap);
+ kfree(up->fp_bitmap);
+ kfree(up);
+}
+
+static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev,
+ bool map_wc)
+{
+ struct mlx5_uars_page *up;
+ int err = -ENOMEM;
+ phys_addr_t pfn;
+ int bfregs;
+ int i;
+
+ bfregs = uars_per_sys_page(mdev) * MLX5_BFREGS_PER_UAR;
+ up = kzalloc(sizeof(*up), GFP_KERNEL);
+ if (!up)
+ return ERR_PTR(err);
+
+ up->mdev = mdev;
+ up->reg_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL);
+ if (!up->reg_bitmap)
+ goto error1;
+
+ up->fp_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL);
+ if (!up->fp_bitmap)
+ goto error1;
+
+ for (i = 0; i < bfregs; i++)
+ if ((i % MLX5_BFREGS_PER_UAR) < MLX5_NON_FP_BFREGS_PER_UAR)
+ set_bit(i, up->reg_bitmap);
+ else
+ set_bit(i, up->fp_bitmap);
+
+ up->bfregs = bfregs;
+ up->fp_avail = bfregs * MLX5_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR;
+ up->reg_avail = bfregs * MLX5_NON_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR;
+
+ err = mlx5_cmd_alloc_uar(mdev, &up->index);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err);
+ goto error1;
+ }
+
+ pfn = uar2pfn(mdev, up->index);
+ if (map_wc) {
+ up->map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE);
+ if (!up->map) {
+ err = -EAGAIN;
+ goto error2;
+ }
+ } else {
+ up->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
+ if (!up->map) {
+ err = -ENOMEM;
+ goto error2;
+ }
+ }
+ kref_init(&up->ref_count);
+ mlx5_core_dbg(mdev, "allocated UAR page: index %d, total bfregs %d\n",
+ up->index, up->bfregs);
+ return up;
+
+error2:
+ if (mlx5_cmd_free_uar(mdev, up->index))
+ mlx5_core_warn(mdev, "failed to free uar index %d\n", up->index);
+error1:
+ kfree(up->fp_bitmap);
+ kfree(up->reg_bitmap);
+ kfree(up);
+ return ERR_PTR(err);
+}
+
+struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_uars_page *ret;
+
+ mutex_lock(&mdev->priv.bfregs.reg_head.lock);
+ if (!list_empty(&mdev->priv.bfregs.reg_head.list)) {
+ ret = list_first_entry(&mdev->priv.bfregs.reg_head.list,
+ struct mlx5_uars_page, list);
+ kref_get(&ret->ref_count);
+ goto out;
+ }
+ ret = alloc_uars_page(mdev, false);
+ if (IS_ERR(ret))
+ goto out;
+ list_add(&ret->list, &mdev->priv.bfregs.reg_head.list);
+out:
+ mutex_unlock(&mdev->priv.bfregs.reg_head.lock);
+
+ return ret;
+}
+EXPORT_SYMBOL(mlx5_get_uars_page);
+
+void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up)
+{
+ mutex_lock(&mdev->priv.bfregs.reg_head.lock);
+ kref_put(&up->ref_count, up_rel_func);
+ mutex_unlock(&mdev->priv.bfregs.reg_head.lock);
+}
+EXPORT_SYMBOL(mlx5_put_uars_page);
+
+static unsigned long map_offset(struct mlx5_core_dev *mdev, int dbi)
+{
+ /* return the offset in bytes from the start of the page to the
+ * blue flame area of the UAR
+ */
+ return dbi / MLX5_BFREGS_PER_UAR * MLX5_ADAPTER_PAGE_SIZE +
+ (dbi % MLX5_BFREGS_PER_UAR) *
+ (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) + MLX5_BF_OFFSET;
+}
+
+static int alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
+ bool map_wc, bool fast_path)
+{
+ struct mlx5_bfreg_data *bfregs;
+ struct mlx5_uars_page *up;
+ struct list_head *head;
+ unsigned long *bitmap;
+ unsigned int *avail;
+ struct mutex *lock; /* pointer to right mutex */
+ int dbi;
+
+ bfregs = &mdev->priv.bfregs;
+ if (map_wc) {
+ head = &bfregs->wc_head.list;
+ lock = &bfregs->wc_head.lock;
+ } else {
+ head = &bfregs->reg_head.list;
+ lock = &bfregs->reg_head.lock;
+ }
+ mutex_lock(lock);
+ if (list_empty(head)) {
+ up = alloc_uars_page(mdev, map_wc);
+ if (IS_ERR(up)) {
+ mutex_unlock(lock);
+ return PTR_ERR(up);
+ }
+ list_add(&up->list, head);
+ } else {
+ up = list_entry(head->next, struct mlx5_uars_page, list);
+ kref_get(&up->ref_count);
+ }
+ if (fast_path) {
+ bitmap = up->fp_bitmap;
+ avail = &up->fp_avail;
+ } else {
+ bitmap = up->reg_bitmap;
+ avail = &up->reg_avail;
+ }
+ dbi = find_first_bit(bitmap, up->bfregs);
+ clear_bit(dbi, bitmap);
+ (*avail)--;
+ if (!(*avail))
+ list_del(&up->list);
+
+ bfreg->map = up->map + map_offset(mdev, dbi);
+ bfreg->up = up;
+ bfreg->wc = map_wc;
+ bfreg->index = up->index + dbi / MLX5_BFREGS_PER_UAR;
+ mutex_unlock(lock);
+
+ return 0;
+}
+
+int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
+ bool map_wc, bool fast_path)
+{
+ int err;
+
+ err = alloc_bfreg(mdev, bfreg, map_wc, fast_path);
+ if (!err)
+ return 0;
+
+ if (err == -EAGAIN && map_wc)
+ return alloc_bfreg(mdev, bfreg, false, fast_path);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_alloc_bfreg);
+
+static unsigned int addr_to_dbi_in_syspage(struct mlx5_core_dev *dev,
+ struct mlx5_uars_page *up,
+ struct mlx5_sq_bfreg *bfreg)
+{
+ unsigned int uar_idx;
+ unsigned int bfreg_idx;
+ unsigned int bf_reg_size;
+
+ bf_reg_size = 1 << MLX5_CAP_GEN(dev, log_bf_reg_size);
+
+ uar_idx = (bfreg->map - up->map) >> MLX5_ADAPTER_PAGE_SHIFT;
+ bfreg_idx = (((uintptr_t)bfreg->map % MLX5_ADAPTER_PAGE_SIZE) - MLX5_BF_OFFSET) / bf_reg_size;
+
+ return uar_idx * MLX5_BFREGS_PER_UAR + bfreg_idx;
+}
+
+void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg)
+{
+ struct mlx5_bfreg_data *bfregs;
+ struct mlx5_uars_page *up;
+ struct mutex *lock; /* pointer to right mutex */
+ unsigned int dbi;
+ bool fp;
+ unsigned int *avail;
+ unsigned long *bitmap;
+ struct list_head *head;
+
+ bfregs = &mdev->priv.bfregs;
+ if (bfreg->wc) {
+ head = &bfregs->wc_head.list;
+ lock = &bfregs->wc_head.lock;
+ } else {
+ head = &bfregs->reg_head.list;
+ lock = &bfregs->reg_head.lock;
+ }
+ up = bfreg->up;
+ dbi = addr_to_dbi_in_syspage(mdev, up, bfreg);
+ fp = (dbi % MLX5_BFREGS_PER_UAR) >= MLX5_NON_FP_BFREGS_PER_UAR;
+ if (fp) {
+ avail = &up->fp_avail;
+ bitmap = up->fp_bitmap;
+ } else {
+ avail = &up->reg_avail;
+ bitmap = up->reg_bitmap;
+ }
+ mutex_lock(lock);
+ (*avail)++;
+ set_bit(dbi, bitmap);
+ if (*avail == 1)
+ list_add_tail(&up->list, head);
+
+ kref_put(&up->ref_count, up_rel_func);
+ mutex_unlock(lock);
+}
+EXPORT_SYMBOL(mlx5_free_bfreg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
new file mode 100644
index 000000000..b02af317c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -0,0 +1,1203 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/export.h>
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
+#include "mlx5_core.h"
+
+/* Mutex to hold while enabling or disabling RoCE */
+static DEFINE_MUTEX(mlx5_roce_en_lock);
+
+static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod,
+ u16 vport, u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {0};
+
+ MLX5_SET(query_vport_state_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VPORT_STATE);
+ MLX5_SET(query_vport_state_in, in, op_mod, opmod);
+ MLX5_SET(query_vport_state_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_vport_state_in, in, other_vport, 1);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
+}
+
+u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
+{
+ u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0};
+
+ _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out));
+
+ return MLX5_GET(query_vport_state_out, out, state);
+}
+
+int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
+ u16 vport, u8 state)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0};
+
+ MLX5_SET(modify_vport_state_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_VPORT_STATE);
+ MLX5_SET(modify_vport_state_in, in, op_mod, opmod);
+ MLX5_SET(modify_vport_state_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(modify_vport_state_in, in, other_vport, 1);
+ MLX5_SET(modify_vport_state_in, in, admin_state, state);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0};
+
+ MLX5_SET(query_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+ MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
+}
+
+static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in,
+ int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0};
+
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+ return mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+}
+
+int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+ u16 vport, u8 *min_inline)
+{
+ u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {0};
+ int err;
+
+ err = mlx5_query_nic_vport_context(mdev, vport, out, sizeof(out));
+ if (!err)
+ *min_inline = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.min_wqe_inline_mode);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline);
+
+void mlx5_query_min_inline(struct mlx5_core_dev *mdev,
+ u8 *min_inline_mode)
+{
+ switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) {
+ case MLX5_CAP_INLINE_MODE_L2:
+ *min_inline_mode = MLX5_INLINE_MODE_L2;
+ break;
+ case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+ mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode);
+ break;
+ case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+ *min_inline_mode = MLX5_INLINE_MODE_NONE;
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(mlx5_query_min_inline);
+
+int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+ u16 vport, u8 min_inline)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0};
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *nic_vport_ctx;
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.min_inline, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
+ in, nic_vport_context);
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ min_wqe_inline_mode, min_inline);
+
+ return mlx5_modify_nic_vport_context(mdev, in, inlen);
+}
+
+int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
+ u16 vport, u8 *addr)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ u8 *out_addr;
+ int err;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out,
+ nic_vport_context.permanent_address);
+
+ err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
+ if (!err)
+ ether_addr_copy(addr, &out_addr[2]);
+
+ kvfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address);
+
+int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
+ u16 vport, u8 *addr)
+{
+ void *in;
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ int err;
+ void *nic_vport_ctx;
+ u8 *perm_mac;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.permanent_address, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+
+ if (vport)
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
+ in, nic_vport_context);
+ perm_mac = MLX5_ADDR_OF(nic_vport_context, nic_vport_ctx,
+ permanent_address);
+
+ ether_addr_copy(&perm_mac[2], addr);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_address);
+
+int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ u32 *out;
+ int err;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+ if (!err)
+ *mtu = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.mtu);
+
+ kvfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mtu);
+
+int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, mtu);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu);
+
+int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
+ u32 vport,
+ enum mlx5_list_type list_type,
+ u8 addr_list[][ETH_ALEN],
+ int *list_size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0};
+ void *nic_vport_ctx;
+ int max_list_size;
+ int req_list_size;
+ int out_sz;
+ void *out;
+ int err;
+ int i;
+
+ req_list_size = *list_size;
+
+ max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ?
+ 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) :
+ 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list);
+
+ if (req_list_size > max_list_size) {
+ mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n",
+ req_list_size, max_list_size);
+ req_list_size = max_list_size;
+ }
+
+ out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+ req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
+
+ out = kzalloc(out_sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+ MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type);
+ MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+
+ if (vport)
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+ if (err)
+ goto out;
+
+ nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out,
+ nic_vport_context);
+ req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx,
+ allowed_list_size);
+
+ *list_size = req_list_size;
+ for (i = 0; i < req_list_size; i++) {
+ u8 *mac_addr = MLX5_ADDR_OF(nic_vport_context,
+ nic_vport_ctx,
+ current_uc_mac_address[i]) + 2;
+ ether_addr_copy(addr_list[i], mac_addr);
+ }
+out:
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list);
+
+int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
+ enum mlx5_list_type list_type,
+ u8 addr_list[][ETH_ALEN],
+ int list_size)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+ void *nic_vport_ctx;
+ int max_list_size;
+ int in_sz;
+ void *in;
+ int err;
+ int i;
+
+ max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ?
+ 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) :
+ 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list);
+
+ if (list_size > max_list_size)
+ return -ENOSPC;
+
+ in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+ list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
+
+ memset(out, 0, sizeof(out));
+ in = kzalloc(in_sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.addresses_list, 1);
+
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in,
+ nic_vport_context);
+
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ allowed_list_type, list_type);
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ allowed_list_size, list_size);
+
+ for (i = 0; i < list_size; i++) {
+ u8 *curr_mac = MLX5_ADDR_OF(nic_vport_context,
+ nic_vport_ctx,
+ current_uc_mac_address[i]) + 2;
+ ether_addr_copy(curr_mac, addr_list[i]);
+ }
+
+ err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+ kfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list);
+
+int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev,
+ u32 vport,
+ u16 vlans[],
+ int *size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)];
+ void *nic_vport_ctx;
+ int req_list_size;
+ int max_list_size;
+ int out_sz;
+ void *out;
+ int err;
+ int i;
+
+ req_list_size = *size;
+ max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list);
+ if (req_list_size > max_list_size) {
+ mlx5_core_warn(dev, "Requested list size (%d) > (%d) max list size\n",
+ req_list_size, max_list_size);
+ req_list_size = max_list_size;
+ }
+
+ out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+ req_list_size * MLX5_ST_SZ_BYTES(vlan_layout);
+
+ memset(in, 0, sizeof(in));
+ out = kzalloc(out_sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+ MLX5_SET(query_nic_vport_context_in, in, allowed_list_type,
+ MLX5_NVPRT_LIST_TYPE_VLAN);
+ MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+
+ if (vport)
+ MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+ if (err)
+ goto out;
+
+ nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out,
+ nic_vport_context);
+ req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx,
+ allowed_list_size);
+
+ *size = req_list_size;
+ for (i = 0; i < req_list_size; i++) {
+ void *vlan_addr = MLX5_ADDR_OF(nic_vport_context,
+ nic_vport_ctx,
+ current_uc_mac_address[i]);
+ vlans[i] = MLX5_GET(vlan_layout, vlan_addr, vlan);
+ }
+out:
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_vlans);
+
+int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
+ u16 vlans[],
+ int list_size)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+ void *nic_vport_ctx;
+ int max_list_size;
+ int in_sz;
+ void *in;
+ int err;
+ int i;
+
+ max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list);
+
+ if (list_size > max_list_size)
+ return -ENOSPC;
+
+ in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+ list_size * MLX5_ST_SZ_BYTES(vlan_layout);
+
+ memset(out, 0, sizeof(out));
+ in = kzalloc(in_sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.addresses_list, 1);
+
+ nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in,
+ nic_vport_context);
+
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ allowed_list_type, MLX5_NVPRT_LIST_TYPE_VLAN);
+ MLX5_SET(nic_vport_context, nic_vport_ctx,
+ allowed_list_size, list_size);
+
+ for (i = 0; i < list_size; i++) {
+ void *vlan_addr = MLX5_ADDR_OF(nic_vport_context,
+ nic_vport_ctx,
+ current_uc_mac_address[i]);
+ MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]);
+ }
+
+ err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+ kfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans);
+
+int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
+ u64 *system_image_guid)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+ *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out,
+ nic_vport_context.system_image_guid);
+
+ kvfree(out);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid);
+
+int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+ *node_guid = MLX5_GET64(query_nic_vport_context_out, out,
+ nic_vport_context.node_guid);
+
+ kvfree(out);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid);
+
+int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
+ u32 vport, u64 node_guid)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *nic_vport_context;
+ void *in;
+ int err;
+
+ if (!vport)
+ return -EINVAL;
+ if (!MLX5_CAP_GEN(mdev, vport_group_manager))
+ return -EACCES;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.node_guid, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+ MLX5_SET(modify_nic_vport_context_in, in, other_vport, !!vport);
+
+ nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in,
+ in, nic_vport_context);
+ MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
+ u16 *qkey_viol_cntr)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+ *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.qkey_violation_counter);
+
+ kvfree(out);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr);
+
+int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
+ u8 port_num, u16 vf_num, u16 gid_index,
+ union ib_gid *gid)
+{
+ int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_in);
+ int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_gid_out);
+ int is_group_manager;
+ void *out = NULL;
+ void *in = NULL;
+ union ib_gid *tmp;
+ int tbsz;
+ int nout;
+ int err;
+
+ is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+ tbsz = mlx5_get_gid_table_len(MLX5_CAP_GEN(dev, gid_table_size));
+ mlx5_core_dbg(dev, "vf_num %d, index %d, gid_table_size %d\n",
+ vf_num, gid_index, tbsz);
+
+ if (gid_index > tbsz && gid_index != 0xffff)
+ return -EINVAL;
+
+ if (gid_index == 0xffff)
+ nout = tbsz;
+ else
+ nout = 1;
+
+ out_sz += nout * sizeof(*gid);
+
+ in = kzalloc(in_sz, GFP_KERNEL);
+ out = kzalloc(out_sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(query_hca_vport_gid_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_GID);
+ if (other_vport) {
+ if (is_group_manager) {
+ MLX5_SET(query_hca_vport_gid_in, in, vport_number, vf_num);
+ MLX5_SET(query_hca_vport_gid_in, in, other_vport, 1);
+ } else {
+ err = -EPERM;
+ goto out;
+ }
+ }
+ MLX5_SET(query_hca_vport_gid_in, in, gid_index, gid_index);
+
+ if (MLX5_CAP_GEN(dev, num_ports) == 2)
+ MLX5_SET(query_hca_vport_gid_in, in, port_num, port_num);
+
+ err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz);
+ if (err)
+ goto out;
+
+ tmp = out + MLX5_ST_SZ_BYTES(query_hca_vport_gid_out);
+ gid->global.subnet_prefix = tmp->global.subnet_prefix;
+ gid->global.interface_id = tmp->global.interface_id;
+
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_gid);
+
+int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport,
+ u8 port_num, u16 vf_num, u16 pkey_index,
+ u16 *pkey)
+{
+ int in_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_in);
+ int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_pkey_out);
+ int is_group_manager;
+ void *out = NULL;
+ void *in = NULL;
+ void *pkarr;
+ int nout;
+ int tbsz;
+ int err;
+ int i;
+
+ is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+
+ tbsz = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size));
+ if (pkey_index > tbsz && pkey_index != 0xffff)
+ return -EINVAL;
+
+ if (pkey_index == 0xffff)
+ nout = tbsz;
+ else
+ nout = 1;
+
+ out_sz += nout * MLX5_ST_SZ_BYTES(pkey);
+
+ in = kzalloc(in_sz, GFP_KERNEL);
+ out = kzalloc(out_sz, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(query_hca_vport_pkey_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY);
+ if (other_vport) {
+ if (is_group_manager) {
+ MLX5_SET(query_hca_vport_pkey_in, in, vport_number, vf_num);
+ MLX5_SET(query_hca_vport_pkey_in, in, other_vport, 1);
+ } else {
+ err = -EPERM;
+ goto out;
+ }
+ }
+ MLX5_SET(query_hca_vport_pkey_in, in, pkey_index, pkey_index);
+
+ if (MLX5_CAP_GEN(dev, num_ports) == 2)
+ MLX5_SET(query_hca_vport_pkey_in, in, port_num, port_num);
+
+ err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz);
+ if (err)
+ goto out;
+
+ pkarr = MLX5_ADDR_OF(query_hca_vport_pkey_out, out, pkey);
+ for (i = 0; i < nout; i++, pkey++, pkarr += MLX5_ST_SZ_BYTES(pkey))
+ *pkey = MLX5_GET_PR(pkey, pkarr, pkey);
+
+out:
+ kfree(in);
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_pkey);
+
+int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
+ u8 other_vport, u8 port_num,
+ u16 vf_num,
+ struct mlx5_hca_vport_context *rep)
+{
+ int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
+ int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)] = {0};
+ int is_group_manager;
+ void *out;
+ void *ctx;
+ int err;
+
+ is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+
+ out = kzalloc(out_sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_hca_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT);
+
+ if (other_vport) {
+ if (is_group_manager) {
+ MLX5_SET(query_hca_vport_context_in, in, other_vport, 1);
+ MLX5_SET(query_hca_vport_context_in, in, vport_number, vf_num);
+ } else {
+ err = -EPERM;
+ goto ex;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev, num_ports) == 2)
+ MLX5_SET(query_hca_vport_context_in, in, port_num, port_num);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+ if (err)
+ goto ex;
+
+ ctx = MLX5_ADDR_OF(query_hca_vport_context_out, out, hca_vport_context);
+ rep->field_select = MLX5_GET_PR(hca_vport_context, ctx, field_select);
+ rep->sm_virt_aware = MLX5_GET_PR(hca_vport_context, ctx, sm_virt_aware);
+ rep->has_smi = MLX5_GET_PR(hca_vport_context, ctx, has_smi);
+ rep->has_raw = MLX5_GET_PR(hca_vport_context, ctx, has_raw);
+ rep->policy = MLX5_GET_PR(hca_vport_context, ctx, vport_state_policy);
+ rep->phys_state = MLX5_GET_PR(hca_vport_context, ctx,
+ port_physical_state);
+ rep->vport_state = MLX5_GET_PR(hca_vport_context, ctx, vport_state);
+ rep->port_physical_state = MLX5_GET_PR(hca_vport_context, ctx,
+ port_physical_state);
+ rep->port_guid = MLX5_GET64_PR(hca_vport_context, ctx, port_guid);
+ rep->node_guid = MLX5_GET64_PR(hca_vport_context, ctx, node_guid);
+ rep->cap_mask1 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask1);
+ rep->cap_mask1_perm = MLX5_GET_PR(hca_vport_context, ctx,
+ cap_mask1_field_select);
+ rep->cap_mask2 = MLX5_GET_PR(hca_vport_context, ctx, cap_mask2);
+ rep->cap_mask2_perm = MLX5_GET_PR(hca_vport_context, ctx,
+ cap_mask2_field_select);
+ rep->lid = MLX5_GET_PR(hca_vport_context, ctx, lid);
+ rep->init_type_reply = MLX5_GET_PR(hca_vport_context, ctx,
+ init_type_reply);
+ rep->lmc = MLX5_GET_PR(hca_vport_context, ctx, lmc);
+ rep->subnet_timeout = MLX5_GET_PR(hca_vport_context, ctx,
+ subnet_timeout);
+ rep->sm_lid = MLX5_GET_PR(hca_vport_context, ctx, sm_lid);
+ rep->sm_sl = MLX5_GET_PR(hca_vport_context, ctx, sm_sl);
+ rep->qkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx,
+ qkey_violation_counter);
+ rep->pkey_violation_counter = MLX5_GET_PR(hca_vport_context, ctx,
+ pkey_violation_counter);
+ rep->grh_required = MLX5_GET_PR(hca_vport_context, ctx, grh_required);
+ rep->sys_image_guid = MLX5_GET64_PR(hca_vport_context, ctx,
+ system_image_guid);
+
+ex:
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_context);
+
+int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev,
+ u64 *sys_image_guid)
+{
+ struct mlx5_hca_vport_context *rep;
+ int err;
+
+ rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+ if (!rep)
+ return -ENOMEM;
+
+ err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep);
+ if (!err)
+ *sys_image_guid = rep->sys_image_guid;
+
+ kfree(rep);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_system_image_guid);
+
+int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
+ u64 *node_guid)
+{
+ struct mlx5_hca_vport_context *rep;
+ int err;
+
+ rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+ if (!rep)
+ return -ENOMEM;
+
+ err = mlx5_query_hca_vport_context(dev, 0, 1, 0, rep);
+ if (!err)
+ *node_guid = rep->node_guid;
+
+ kfree(rep);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid);
+
+int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
+ u32 vport,
+ int *promisc_uc,
+ int *promisc_mc,
+ int *promisc_all)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ int err;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
+ if (err)
+ goto out;
+
+ *promisc_uc = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.promisc_uc);
+ *promisc_mc = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.promisc_mc);
+ *promisc_all = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.promisc_all);
+
+out:
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc);
+
+int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
+ int promisc_uc,
+ int promisc_mc,
+ int promisc_all)
+{
+ void *in;
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.promisc_uc, promisc_uc);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.promisc_mc, promisc_mc);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.promisc_all, promisc_all);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc);
+
+enum {
+ UC_LOCAL_LB,
+ MC_LOCAL_LB
+};
+
+int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *in;
+ int err;
+
+ if (!MLX5_CAP_GEN(mdev, disable_local_lb_mc) &&
+ !MLX5_CAP_GEN(mdev, disable_local_lb_uc))
+ return 0;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.disable_mc_local_lb, !enable);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.disable_uc_local_lb, !enable);
+
+ if (MLX5_CAP_GEN(mdev, disable_local_lb_mc))
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.disable_mc_local_lb, 1);
+
+ if (MLX5_CAP_GEN(mdev, disable_local_lb_uc))
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.disable_uc_local_lb, 1);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ if (!err)
+ mlx5_core_dbg(mdev, "%s local_lb\n",
+ enable ? "enable" : "disable");
+
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_update_local_lb);
+
+int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ u32 *out;
+ int value;
+ int err;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+ if (err)
+ goto out;
+
+ value = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.disable_mc_local_lb) << MC_LOCAL_LB;
+
+ value |= MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.disable_uc_local_lb) << UC_LOCAL_LB;
+
+ *status = !value;
+
+out:
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_query_local_lb);
+
+enum mlx5_vport_roce_state {
+ MLX5_VPORT_ROCE_DISABLED = 0,
+ MLX5_VPORT_ROCE_ENABLED = 1,
+};
+
+static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
+ enum mlx5_vport_roce_state state)
+{
+ void *in;
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en,
+ state);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
+{
+ int err = 0;
+
+ mutex_lock(&mlx5_roce_en_lock);
+ if (!mdev->roce.roce_en)
+ err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
+
+ if (!err)
+ mdev->roce.roce_en++;
+ mutex_unlock(&mlx5_roce_en_lock);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);
+
+int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
+{
+ int err = 0;
+
+ mutex_lock(&mlx5_roce_en_lock);
+ if (mdev->roce.roce_en) {
+ mdev->roce.roce_en--;
+ if (mdev->roce.roce_en == 0)
+ err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
+
+ if (err)
+ mdev->roce.roce_en++;
+ }
+ mutex_unlock(&mlx5_roce_en_lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
+
+int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
+ int vf, u8 port_num, void *out,
+ size_t out_sz)
+{
+ int in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in);
+ int is_group_manager;
+ void *in;
+ int err;
+
+ is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+ in = kvzalloc(in_sz, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ MLX5_SET(query_vport_counter_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VPORT_COUNTER);
+ if (other_vport) {
+ if (is_group_manager) {
+ MLX5_SET(query_vport_counter_in, in, other_vport, 1);
+ MLX5_SET(query_vport_counter_in, in, vport_number, vf + 1);
+ } else {
+ err = -EPERM;
+ goto free;
+ }
+ }
+ if (MLX5_CAP_GEN(dev, num_ports) == 2)
+ MLX5_SET(query_vport_counter_in, in, port_num, port_num);
+
+ err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz);
+free:
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter);
+
+int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
+ u64 *rx_discard_vport_down,
+ u64 *tx_discard_vport_down)
+{
+ u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {0};
+ int err;
+
+ MLX5_SET(query_vnic_env_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VNIC_ENV);
+ MLX5_SET(query_vnic_env_in, in, op_mod, 0);
+ MLX5_SET(query_vnic_env_in, in, vport_number, vport);
+ if (vport)
+ MLX5_SET(query_vnic_env_in, in, other_vport, 1);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ *rx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out,
+ vport_env.receive_discard_vport_down);
+ *tx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out,
+ vport_env.transmit_discard_vport_down);
+ return 0;
+}
+
+int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
+ u8 other_vport, u8 port_num,
+ int vf,
+ struct mlx5_hca_vport_context *req)
+{
+ int in_sz = MLX5_ST_SZ_BYTES(modify_hca_vport_context_in);
+ u8 out[MLX5_ST_SZ_BYTES(modify_hca_vport_context_out)];
+ int is_group_manager;
+ void *in;
+ int err;
+ void *ctx;
+
+ mlx5_core_dbg(dev, "vf %d\n", vf);
+ is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+ in = kzalloc(in_sz, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ memset(out, 0, sizeof(out));
+ MLX5_SET(modify_hca_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT);
+ if (other_vport) {
+ if (is_group_manager) {
+ MLX5_SET(modify_hca_vport_context_in, in, other_vport, 1);
+ MLX5_SET(modify_hca_vport_context_in, in, vport_number, vf);
+ } else {
+ err = -EPERM;
+ goto ex;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev, num_ports) > 1)
+ MLX5_SET(modify_hca_vport_context_in, in, port_num, port_num);
+
+ ctx = MLX5_ADDR_OF(modify_hca_vport_context_in, in, hca_vport_context);
+ MLX5_SET(hca_vport_context, ctx, field_select, req->field_select);
+ MLX5_SET(hca_vport_context, ctx, sm_virt_aware, req->sm_virt_aware);
+ MLX5_SET(hca_vport_context, ctx, has_smi, req->has_smi);
+ MLX5_SET(hca_vport_context, ctx, has_raw, req->has_raw);
+ MLX5_SET(hca_vport_context, ctx, vport_state_policy, req->policy);
+ MLX5_SET(hca_vport_context, ctx, port_physical_state, req->phys_state);
+ MLX5_SET(hca_vport_context, ctx, vport_state, req->vport_state);
+ MLX5_SET64(hca_vport_context, ctx, port_guid, req->port_guid);
+ MLX5_SET64(hca_vport_context, ctx, node_guid, req->node_guid);
+ MLX5_SET(hca_vport_context, ctx, cap_mask1, req->cap_mask1);
+ MLX5_SET(hca_vport_context, ctx, cap_mask1_field_select, req->cap_mask1_perm);
+ MLX5_SET(hca_vport_context, ctx, cap_mask2, req->cap_mask2);
+ MLX5_SET(hca_vport_context, ctx, cap_mask2_field_select, req->cap_mask2_perm);
+ MLX5_SET(hca_vport_context, ctx, lid, req->lid);
+ MLX5_SET(hca_vport_context, ctx, init_type_reply, req->init_type_reply);
+ MLX5_SET(hca_vport_context, ctx, lmc, req->lmc);
+ MLX5_SET(hca_vport_context, ctx, subnet_timeout, req->subnet_timeout);
+ MLX5_SET(hca_vport_context, ctx, sm_lid, req->sm_lid);
+ MLX5_SET(hca_vport_context, ctx, sm_sl, req->sm_sl);
+ MLX5_SET(hca_vport_context, ctx, qkey_violation_counter, req->qkey_violation_counter);
+ MLX5_SET(hca_vport_context, ctx, pkey_violation_counter, req->pkey_violation_counter);
+ err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+ex:
+ kfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context);
+
+int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
+ struct mlx5_core_dev *port_mdev)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ err = mlx5_nic_vport_enable_roce(port_mdev);
+ if (err)
+ goto free;
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliated_vhca_id,
+ MLX5_CAP_GEN(master_mdev, vhca_id));
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliation_criteria,
+ MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria));
+
+ err = mlx5_modify_nic_vport_context(port_mdev, in, inlen);
+ if (err)
+ mlx5_nic_vport_disable_roce(port_mdev);
+
+free:
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_affiliate_multiport);
+
+int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *in;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliated_vhca_id, 0);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.affiliation_criteria, 0);
+
+ err = mlx5_modify_nic_vport_context(port_mdev, in, inlen);
+ if (!err)
+ mlx5_nic_vport_disable_roce(port_mdev);
+
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
new file mode 100644
index 000000000..ddca327e8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include "wq.h"
+#include "mlx5_core.h"
+
+u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
+{
+ return (u32)wq->fbc.sz_m1 + 1;
+}
+
+u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
+{
+ return wq->fbc.sz_m1 + 1;
+}
+
+u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
+{
+ return (u32)wq->fbc.sz_m1 + 1;
+}
+
+static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq)
+{
+ return mlx5_wq_cyc_get_size(wq) << wq->fbc.log_stride;
+}
+
+static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq)
+{
+ return mlx5_wq_cyc_get_byte_size(&wq->rq) +
+ mlx5_wq_cyc_get_byte_size(&wq->sq);
+}
+
+static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
+{
+ return mlx5_cqwq_get_size(wq) << wq->fbc.log_stride;
+}
+
+static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq)
+{
+ return mlx5_wq_ll_get_size(wq) << wq->fbc.log_stride;
+}
+
+int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *wqc, struct mlx5_wq_cyc *wq,
+ struct mlx5_wq_ctrl *wq_ctrl)
+{
+ struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
+ int err;
+
+ mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride),
+ MLX5_GET(wq, wqc, log_wq_sz),
+ fbc);
+ wq->sz = wq->fbc.sz_m1 + 1;
+
+ err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq),
+ &wq_ctrl->buf, param->buf_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
+ goto err_db_free;
+ }
+
+ fbc->frag_buf = wq_ctrl->buf;
+ wq->db = wq_ctrl->db.db;
+
+ wq_ctrl->mdev = mdev;
+
+ return 0;
+
+err_db_free:
+ mlx5_db_free(mdev, &wq_ctrl->db);
+
+ return err;
+}
+
+static void mlx5_qp_set_frag_buf(struct mlx5_frag_buf *buf,
+ struct mlx5_wq_qp *qp)
+{
+ struct mlx5_frag_buf_ctrl *sq_fbc;
+ struct mlx5_frag_buf *rqb, *sqb;
+
+ rqb = &qp->rq.fbc.frag_buf;
+ *rqb = *buf;
+ rqb->size = mlx5_wq_cyc_get_byte_size(&qp->rq);
+ rqb->npages = DIV_ROUND_UP(rqb->size, PAGE_SIZE);
+
+ sq_fbc = &qp->sq.fbc;
+ sqb = &sq_fbc->frag_buf;
+ *sqb = *buf;
+ sqb->size = mlx5_wq_cyc_get_byte_size(&qp->sq);
+ sqb->npages = DIV_ROUND_UP(sqb->size, PAGE_SIZE);
+ sqb->frags += rqb->npages; /* first part is for the rq */
+ if (sq_fbc->strides_offset)
+ sqb->frags--;
+}
+
+int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *qpc, struct mlx5_wq_qp *wq,
+ struct mlx5_wq_ctrl *wq_ctrl)
+{
+ u16 sq_strides_offset;
+ u32 rq_pg_remainder;
+ int err;
+
+ mlx5_fill_fbc(MLX5_GET(qpc, qpc, log_rq_stride) + 4,
+ MLX5_GET(qpc, qpc, log_rq_size),
+ &wq->rq.fbc);
+
+ rq_pg_remainder = mlx5_wq_cyc_get_byte_size(&wq->rq) % PAGE_SIZE;
+ sq_strides_offset = rq_pg_remainder / MLX5_SEND_WQE_BB;
+
+ mlx5_fill_fbc_offset(ilog2(MLX5_SEND_WQE_BB),
+ MLX5_GET(qpc, qpc, log_sq_size),
+ sq_strides_offset,
+ &wq->sq.fbc);
+
+ err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq),
+ &wq_ctrl->buf, param->buf_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
+ goto err_db_free;
+ }
+
+ mlx5_qp_set_frag_buf(&wq_ctrl->buf, wq);
+
+ wq->rq.db = &wq_ctrl->db.db[MLX5_RCV_DBR];
+ wq->sq.db = &wq_ctrl->db.db[MLX5_SND_DBR];
+
+ wq_ctrl->mdev = mdev;
+
+ return 0;
+
+err_db_free:
+ mlx5_db_free(mdev, &wq_ctrl->db);
+
+ return err;
+}
+
+int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *cqc, struct mlx5_cqwq *wq,
+ struct mlx5_wq_ctrl *wq_ctrl)
+{
+ int err;
+
+ mlx5_core_init_cq_frag_buf(&wq->fbc, cqc);
+
+ err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5_frag_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
+ &wq_ctrl->buf,
+ param->buf_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n",
+ err);
+ goto err_db_free;
+ }
+
+ wq->fbc.frag_buf = wq_ctrl->buf;
+ wq->db = wq_ctrl->db.db;
+
+ wq_ctrl->mdev = mdev;
+
+ return 0;
+
+err_db_free:
+ mlx5_db_free(mdev, &wq_ctrl->db);
+
+ return err;
+}
+
+int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *wqc, struct mlx5_wq_ll *wq,
+ struct mlx5_wq_ctrl *wq_ctrl)
+{
+ struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
+ struct mlx5_wqe_srq_next_seg *next_seg;
+ int err;
+ int i;
+
+ mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride),
+ MLX5_GET(wq, wqc, log_wq_sz),
+ fbc);
+
+ err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq),
+ &wq_ctrl->buf, param->buf_numa_node);
+ if (err) {
+ mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
+ goto err_db_free;
+ }
+
+ wq->fbc.frag_buf = wq_ctrl->buf;
+ wq->db = wq_ctrl->db.db;
+
+ for (i = 0; i < fbc->sz_m1; i++) {
+ next_seg = mlx5_wq_ll_get_wqe(wq, i);
+ next_seg->next_wqe_index = cpu_to_be16(i + 1);
+ }
+ next_seg = mlx5_wq_ll_get_wqe(wq, i);
+ wq->tail_next = &next_seg->next_wqe_index;
+
+ wq_ctrl->mdev = mdev;
+
+ return 0;
+
+err_db_free:
+ mlx5_db_free(mdev, &wq_ctrl->db);
+
+ return err;
+}
+
+void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl)
+{
+ mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->buf);
+ mlx5_db_free(wq_ctrl->mdev, &wq_ctrl->db);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
new file mode 100644
index 000000000..b1293d153
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_WQ_H__
+#define __MLX5_WQ_H__
+
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
+
+struct mlx5_wq_param {
+ int buf_numa_node;
+ int db_numa_node;
+};
+
+struct mlx5_wq_ctrl {
+ struct mlx5_core_dev *mdev;
+ struct mlx5_frag_buf buf;
+ struct mlx5_db db;
+};
+
+struct mlx5_wq_cyc {
+ struct mlx5_frag_buf_ctrl fbc;
+ __be32 *db;
+ u16 sz;
+ u16 wqe_ctr;
+ u16 cur_sz;
+};
+
+struct mlx5_wq_qp {
+ struct mlx5_wq_cyc rq;
+ struct mlx5_wq_cyc sq;
+};
+
+struct mlx5_cqwq {
+ struct mlx5_frag_buf_ctrl fbc;
+ __be32 *db;
+ u32 cc; /* consumer counter */
+};
+
+struct mlx5_wq_ll {
+ struct mlx5_frag_buf_ctrl fbc;
+ __be32 *db;
+ __be16 *tail_next;
+ u16 head;
+ u16 wqe_ctr;
+ u16 cur_sz;
+};
+
+int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *wqc, struct mlx5_wq_cyc *wq,
+ struct mlx5_wq_ctrl *wq_ctrl);
+u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
+
+int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *qpc, struct mlx5_wq_qp *wq,
+ struct mlx5_wq_ctrl *wq_ctrl);
+
+int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *cqc, struct mlx5_cqwq *wq,
+ struct mlx5_wq_ctrl *wq_ctrl);
+u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq);
+
+int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *wqc, struct mlx5_wq_ll *wq,
+ struct mlx5_wq_ctrl *wq_ctrl);
+u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq);
+
+void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl);
+
+static inline int mlx5_wq_cyc_is_full(struct mlx5_wq_cyc *wq)
+{
+ return wq->cur_sz == wq->sz;
+}
+
+static inline int mlx5_wq_cyc_missing(struct mlx5_wq_cyc *wq)
+{
+ return wq->sz - wq->cur_sz;
+}
+
+static inline int mlx5_wq_cyc_is_empty(struct mlx5_wq_cyc *wq)
+{
+ return !wq->cur_sz;
+}
+
+static inline void mlx5_wq_cyc_push(struct mlx5_wq_cyc *wq)
+{
+ wq->wqe_ctr++;
+ wq->cur_sz++;
+}
+
+static inline void mlx5_wq_cyc_push_n(struct mlx5_wq_cyc *wq, u8 n)
+{
+ wq->wqe_ctr += n;
+ wq->cur_sz += n;
+}
+
+static inline void mlx5_wq_cyc_pop(struct mlx5_wq_cyc *wq)
+{
+ wq->cur_sz--;
+}
+
+static inline void mlx5_wq_cyc_update_db_record(struct mlx5_wq_cyc *wq)
+{
+ *wq->db = cpu_to_be32(wq->wqe_ctr);
+}
+
+static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
+{
+ return ctr & wq->fbc.sz_m1;
+}
+
+static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq)
+{
+ return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr);
+}
+
+static inline u16 mlx5_wq_cyc_get_tail(struct mlx5_wq_cyc *wq)
+{
+ return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr - wq->cur_sz);
+}
+
+static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix)
+{
+ return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
+}
+
+static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix)
+{
+ return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1;
+}
+
+static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
+{
+ int equal = (cc1 == cc2);
+ int smaller = 0x8000 & (cc1 - cc2);
+
+ return !equal && !smaller;
+}
+
+static inline u32 mlx5_cqwq_ctr2ix(struct mlx5_cqwq *wq, u32 ctr)
+{
+ return ctr & wq->fbc.sz_m1;
+}
+
+static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq)
+{
+ return mlx5_cqwq_ctr2ix(wq, wq->cc);
+}
+
+static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
+{
+ return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
+}
+
+static inline u32 mlx5_cqwq_get_ctr_wrap_cnt(struct mlx5_cqwq *wq, u32 ctr)
+{
+ return ctr >> wq->fbc.log_sz;
+}
+
+static inline u32 mlx5_cqwq_get_wrap_cnt(struct mlx5_cqwq *wq)
+{
+ return mlx5_cqwq_get_ctr_wrap_cnt(wq, wq->cc);
+}
+
+static inline void mlx5_cqwq_pop(struct mlx5_cqwq *wq)
+{
+ wq->cc++;
+}
+
+static inline void mlx5_cqwq_update_db_record(struct mlx5_cqwq *wq)
+{
+ *wq->db = cpu_to_be32(wq->cc & 0xffffff);
+}
+
+static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq)
+{
+ u32 ci = mlx5_cqwq_get_ci(wq);
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
+ u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK;
+ u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1;
+
+ if (cqe_ownership_bit != sw_ownership_val)
+ return NULL;
+
+ /* ensure cqe content is read after cqe ownership bit */
+ dma_rmb();
+
+ return cqe;
+}
+
+static inline int mlx5_wq_ll_is_full(struct mlx5_wq_ll *wq)
+{
+ return wq->cur_sz == wq->fbc.sz_m1;
+}
+
+static inline int mlx5_wq_ll_is_empty(struct mlx5_wq_ll *wq)
+{
+ return !wq->cur_sz;
+}
+
+static inline int mlx5_wq_ll_missing(struct mlx5_wq_ll *wq)
+{
+ return wq->fbc.sz_m1 - wq->cur_sz;
+}
+
+static inline void *mlx5_wq_ll_get_wqe(struct mlx5_wq_ll *wq, u16 ix)
+{
+ return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
+}
+
+static inline void mlx5_wq_ll_push(struct mlx5_wq_ll *wq, u16 head_next)
+{
+ wq->head = head_next;
+ wq->wqe_ctr++;
+ wq->cur_sz++;
+}
+
+static inline void mlx5_wq_ll_pop(struct mlx5_wq_ll *wq, __be16 ix,
+ __be16 *next_tail_next)
+{
+ *wq->tail_next = ix;
+ wq->tail_next = next_tail_next;
+ wq->cur_sz--;
+}
+
+static inline void mlx5_wq_ll_update_db_record(struct mlx5_wq_ll *wq)
+{
+ *wq->db = cpu_to_be32(wq->wqe_ctr);
+}
+
+#endif /* __MLX5_WQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlxfw/Kconfig b/drivers/net/ethernet/mellanox/mlxfw/Kconfig
new file mode 100644
index 000000000..186ebe783
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxfw/Kconfig
@@ -0,0 +1,13 @@
+#
+# Mellanox firmware flash library configuration
+#
+
+config MLXFW
+ tristate "Mellanox Technologies firmware flash module"
+ ---help---
+ This driver supports Mellanox Technologies Firmware
+ flashing common logic.
+
+ To compile this driver as a module, choose M here: the
+ module will be called mlxfw.
+ select XZ_DEC
diff --git a/drivers/net/ethernet/mellanox/mlxfw/Makefile b/drivers/net/ethernet/mellanox/mlxfw/Makefile
new file mode 100644
index 000000000..7448b3011
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxfw/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_MLXFW) += mlxfw.o
+mlxfw-objs := mlxfw_fsm.o mlxfw_mfa2_tlv_multi.o mlxfw_mfa2.o
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
new file mode 100644
index 000000000..7a712b6b0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
@@ -0,0 +1,111 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXFW_H
+#define _MLXFW_H
+
+#include <linux/firmware.h>
+
+enum mlxfw_fsm_state {
+ MLXFW_FSM_STATE_IDLE,
+ MLXFW_FSM_STATE_LOCKED,
+ MLXFW_FSM_STATE_INITIALIZE,
+ MLXFW_FSM_STATE_DOWNLOAD,
+ MLXFW_FSM_STATE_VERIFY,
+ MLXFW_FSM_STATE_APPLY,
+ MLXFW_FSM_STATE_ACTIVATE,
+};
+
+enum mlxfw_fsm_state_err {
+ MLXFW_FSM_STATE_ERR_OK,
+ MLXFW_FSM_STATE_ERR_ERROR,
+ MLXFW_FSM_STATE_ERR_REJECTED_DIGEST_ERR,
+ MLXFW_FSM_STATE_ERR_REJECTED_NOT_APPLICABLE,
+ MLXFW_FSM_STATE_ERR_REJECTED_UNKNOWN_KEY,
+ MLXFW_FSM_STATE_ERR_REJECTED_AUTH_FAILED,
+ MLXFW_FSM_STATE_ERR_REJECTED_UNSIGNED,
+ MLXFW_FSM_STATE_ERR_REJECTED_KEY_NOT_APPLICABLE,
+ MLXFW_FSM_STATE_ERR_REJECTED_BAD_FORMAT,
+ MLXFW_FSM_STATE_ERR_BLOCKED_PENDING_RESET,
+ MLXFW_FSM_STATE_ERR_MAX,
+};
+
+struct mlxfw_dev;
+
+struct mlxfw_dev_ops {
+ int (*component_query)(struct mlxfw_dev *mlxfw_dev, u16 component_index,
+ u32 *p_max_size, u8 *p_align_bits,
+ u16 *p_max_write_size);
+
+ int (*fsm_lock)(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle);
+
+ int (*fsm_component_update)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ u16 component_index, u32 component_size);
+
+ int (*fsm_block_download)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ u8 *data, u16 size, u32 offset);
+
+ int (*fsm_component_verify)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ u16 component_index);
+
+ int (*fsm_activate)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle);
+
+ int (*fsm_query_state)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ enum mlxfw_fsm_state *fsm_state,
+ enum mlxfw_fsm_state_err *fsm_state_err);
+
+ void (*fsm_cancel)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle);
+
+ void (*fsm_release)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle);
+};
+
+struct mlxfw_dev {
+ const struct mlxfw_dev_ops *ops;
+ const char *psid;
+ u16 psid_size;
+};
+
+#if IS_REACHABLE(CONFIG_MLXFW)
+int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
+ const struct firmware *firmware);
+#else
+static inline
+int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
+ const struct firmware *firmware)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c
new file mode 100644
index 000000000..d765e7a69
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c
@@ -0,0 +1,275 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxfw/mlxfw.c
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt) "mlxfw: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+
+#include "mlxfw.h"
+#include "mlxfw_mfa2.h"
+
+#define MLXFW_FSM_STATE_WAIT_CYCLE_MS 200
+#define MLXFW_FSM_STATE_WAIT_TIMEOUT_MS 30000
+#define MLXFW_FSM_STATE_WAIT_ROUNDS \
+ (MLXFW_FSM_STATE_WAIT_TIMEOUT_MS / MLXFW_FSM_STATE_WAIT_CYCLE_MS)
+#define MLXFW_FSM_MAX_COMPONENT_SIZE (10 * (1 << 20))
+
+static const char * const mlxfw_fsm_state_err_str[] = {
+ [MLXFW_FSM_STATE_ERR_ERROR] =
+ "general error",
+ [MLXFW_FSM_STATE_ERR_REJECTED_DIGEST_ERR] =
+ "component hash mismatch",
+ [MLXFW_FSM_STATE_ERR_REJECTED_NOT_APPLICABLE] =
+ "component not applicable",
+ [MLXFW_FSM_STATE_ERR_REJECTED_UNKNOWN_KEY] =
+ "unknown key",
+ [MLXFW_FSM_STATE_ERR_REJECTED_AUTH_FAILED] =
+ "authentication failed",
+ [MLXFW_FSM_STATE_ERR_REJECTED_UNSIGNED] =
+ "component was not signed",
+ [MLXFW_FSM_STATE_ERR_REJECTED_KEY_NOT_APPLICABLE] =
+ "key not applicable",
+ [MLXFW_FSM_STATE_ERR_REJECTED_BAD_FORMAT] =
+ "bad format",
+ [MLXFW_FSM_STATE_ERR_BLOCKED_PENDING_RESET] =
+ "pending reset",
+ [MLXFW_FSM_STATE_ERR_MAX] =
+ "unknown error"
+};
+
+static int mlxfw_fsm_state_wait(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ enum mlxfw_fsm_state fsm_state)
+{
+ enum mlxfw_fsm_state_err fsm_state_err;
+ enum mlxfw_fsm_state curr_fsm_state;
+ int times;
+ int err;
+
+ times = MLXFW_FSM_STATE_WAIT_ROUNDS;
+retry:
+ err = mlxfw_dev->ops->fsm_query_state(mlxfw_dev, fwhandle,
+ &curr_fsm_state, &fsm_state_err);
+ if (err)
+ return err;
+
+ if (fsm_state_err != MLXFW_FSM_STATE_ERR_OK) {
+ fsm_state_err = min_t(enum mlxfw_fsm_state_err,
+ fsm_state_err, MLXFW_FSM_STATE_ERR_MAX);
+ pr_err("Firmware flash failed: %s\n",
+ mlxfw_fsm_state_err_str[fsm_state_err]);
+ return -EINVAL;
+ }
+ if (curr_fsm_state != fsm_state) {
+ if (--times == 0) {
+ pr_err("Timeout reached on FSM state change");
+ return -ETIMEDOUT;
+ }
+ msleep(MLXFW_FSM_STATE_WAIT_CYCLE_MS);
+ goto retry;
+ }
+ return 0;
+}
+
+#define MLXFW_ALIGN_DOWN(x, align_bits) ((x) & ~((1 << (align_bits)) - 1))
+#define MLXFW_ALIGN_UP(x, align_bits) \
+ MLXFW_ALIGN_DOWN((x) + ((1 << (align_bits)) - 1), (align_bits))
+
+static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev,
+ u32 fwhandle,
+ struct mlxfw_mfa2_component *comp)
+{
+ u16 comp_max_write_size;
+ u8 comp_align_bits;
+ u32 comp_max_size;
+ u16 block_size;
+ u8 *block_ptr;
+ u32 offset;
+ int err;
+
+ err = mlxfw_dev->ops->component_query(mlxfw_dev, comp->index,
+ &comp_max_size, &comp_align_bits,
+ &comp_max_write_size);
+ if (err)
+ return err;
+
+ comp_max_size = min_t(u32, comp_max_size, MLXFW_FSM_MAX_COMPONENT_SIZE);
+ if (comp->data_size > comp_max_size) {
+ pr_err("Component %d is of size %d which is bigger than limit %d\n",
+ comp->index, comp->data_size, comp_max_size);
+ return -EINVAL;
+ }
+
+ comp_max_write_size = MLXFW_ALIGN_DOWN(comp_max_write_size,
+ comp_align_bits);
+
+ pr_debug("Component update\n");
+ err = mlxfw_dev->ops->fsm_component_update(mlxfw_dev, fwhandle,
+ comp->index,
+ comp->data_size);
+ if (err)
+ return err;
+
+ err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle,
+ MLXFW_FSM_STATE_DOWNLOAD);
+ if (err)
+ goto err_out;
+
+ pr_debug("Component download\n");
+ for (offset = 0;
+ offset < MLXFW_ALIGN_UP(comp->data_size, comp_align_bits);
+ offset += comp_max_write_size) {
+ block_ptr = comp->data + offset;
+ block_size = (u16) min_t(u32, comp->data_size - offset,
+ comp_max_write_size);
+ err = mlxfw_dev->ops->fsm_block_download(mlxfw_dev, fwhandle,
+ block_ptr, block_size,
+ offset);
+ if (err)
+ goto err_out;
+ }
+
+ pr_debug("Component verify\n");
+ err = mlxfw_dev->ops->fsm_component_verify(mlxfw_dev, fwhandle,
+ comp->index);
+ if (err)
+ goto err_out;
+
+ err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_LOCKED);
+ if (err)
+ goto err_out;
+ return 0;
+
+err_out:
+ mlxfw_dev->ops->fsm_cancel(mlxfw_dev, fwhandle);
+ return err;
+}
+
+static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ struct mlxfw_mfa2_file *mfa2_file)
+{
+ u32 component_count;
+ int err;
+ int i;
+
+ err = mlxfw_mfa2_file_component_count(mfa2_file, mlxfw_dev->psid,
+ mlxfw_dev->psid_size,
+ &component_count);
+ if (err) {
+ pr_err("Could not find device PSID in MFA2 file\n");
+ return err;
+ }
+
+ for (i = 0; i < component_count; i++) {
+ struct mlxfw_mfa2_component *comp;
+
+ comp = mlxfw_mfa2_file_component_get(mfa2_file, mlxfw_dev->psid,
+ mlxfw_dev->psid_size, i);
+ if (IS_ERR(comp))
+ return PTR_ERR(comp);
+
+ pr_info("Flashing component type %d\n", comp->index);
+ err = mlxfw_flash_component(mlxfw_dev, fwhandle, comp);
+ mlxfw_mfa2_file_component_put(comp);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
+ const struct firmware *firmware)
+{
+ struct mlxfw_mfa2_file *mfa2_file;
+ u32 fwhandle;
+ int err;
+
+ if (!mlxfw_mfa2_check(firmware)) {
+ pr_err("Firmware file is not MFA2\n");
+ return -EINVAL;
+ }
+
+ mfa2_file = mlxfw_mfa2_file_init(firmware);
+ if (IS_ERR(mfa2_file))
+ return PTR_ERR(mfa2_file);
+
+ pr_info("Initialize firmware flash process\n");
+ err = mlxfw_dev->ops->fsm_lock(mlxfw_dev, &fwhandle);
+ if (err) {
+ pr_err("Could not lock the firmware FSM\n");
+ goto err_fsm_lock;
+ }
+
+ err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle,
+ MLXFW_FSM_STATE_LOCKED);
+ if (err)
+ goto err_state_wait_idle_to_locked;
+
+ err = mlxfw_flash_components(mlxfw_dev, fwhandle, mfa2_file);
+ if (err)
+ goto err_flash_components;
+
+ pr_debug("Activate image\n");
+ err = mlxfw_dev->ops->fsm_activate(mlxfw_dev, fwhandle);
+ if (err) {
+ pr_err("Could not activate the downloaded image\n");
+ goto err_fsm_activate;
+ }
+
+ err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_LOCKED);
+ if (err)
+ goto err_state_wait_activate_to_locked;
+
+ pr_debug("Handle release\n");
+ mlxfw_dev->ops->fsm_release(mlxfw_dev, fwhandle);
+
+ pr_info("Firmware flash done.\n");
+ mlxfw_mfa2_file_fini(mfa2_file);
+ return 0;
+
+err_state_wait_activate_to_locked:
+err_fsm_activate:
+err_flash_components:
+err_state_wait_idle_to_locked:
+ mlxfw_dev->ops->fsm_release(mlxfw_dev, fwhandle);
+err_fsm_lock:
+ mlxfw_mfa2_file_fini(mfa2_file);
+ return err;
+}
+EXPORT_SYMBOL(mlxfw_firmware_flash);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox firmware flash lib");
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
new file mode 100644
index 000000000..b99169a38
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
@@ -0,0 +1,620 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt) "mlxfw_mfa2: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/vmalloc.h>
+#include <linux/xz.h>
+#include "mlxfw_mfa2.h"
+#include "mlxfw_mfa2_file.h"
+#include "mlxfw_mfa2_tlv.h"
+#include "mlxfw_mfa2_format.h"
+#include "mlxfw_mfa2_tlv_multi.h"
+
+/* MFA2 FILE
+ * +----------------------------------+
+ * | MFA2 finger print |
+ * +----------------------------------+
+ * | package descriptor multi_tlv |
+ * | +------------------------------+ | +-----------------+
+ * | | package descriptor tlv +-----> |num_devices=n |
+ * | +------------------------------+ | |num_components=m |
+ * +----------------------------------+ |CB offset |
+ * | device descriptor multi_tlv | |... |
+ * | +------------------------------+ | | |
+ * | | PSID tlv | | +-----------------+
+ * | +------------------------------+ |
+ * | | component index tlv | |
+ * | +------------------------------+ |
+ * +----------------------------------+
+ * | component descriptor multi_tlv |
+ * | +------------------------------+ | +-----------------+
+ * | | component descriptor tlv +-----> |Among others: |
+ * | +------------------------------+ | |CB offset=o |
+ * +----------------------------------+ |comp index=i |
+ * | | |... |
+ * | | | |
+ * | | +-----------------+
+ * | COMPONENT BLOCK (CB) |
+ * | |
+ * | |
+ * | |
+ * +----------------------------------+
+ *
+ * On the top level, an MFA2 file contains:
+ * - Fingerprint
+ * - Several multi_tlvs (TLVs of type MLXFW_MFA2_TLV_MULTI, as defined in
+ * mlxfw_mfa2_format.h)
+ * - Compresses content block
+ *
+ * The first multi_tlv
+ * -------------------
+ * The first multi TLV is treated as package descriptor, and expected to have a
+ * first TLV child of type MLXFW_MFA2_TLV_PACKAGE_DESCRIPTOR which contains all
+ * the global information needed to parse the file. Among others, it contains
+ * the number of device descriptors and component descriptor following this
+ * multi TLV.
+ *
+ * The device descriptor multi_tlv
+ * -------------------------------
+ * The multi TLVs following the package descriptor are treated as device
+ * descriptor, and are expected to have the following children:
+ * - PSID TLV child of type MLXFW_MFA2_TLV_PSID containing that device PSID.
+ * - Component index of type MLXFW_MFA2_TLV_COMPONENT_PTR that contains that
+ * device component index.
+ *
+ * The component descriptor multi_tlv
+ * ----------------------------------
+ * The multi TLVs following the device descriptor multi TLVs are treated as
+ * component descriptor, and are expected to have a first child of type
+ * MLXFW_MFA2_TLV_COMPONENT_DESCRIPTOR that contains mostly the component index,
+ * needed for the flash process and the offset to the binary within the
+ * component block.
+ */
+
+static const u8 mlxfw_mfa2_fingerprint[] = "MLNX.MFA2.XZ.00!";
+static const int mlxfw_mfa2_fingerprint_len =
+ sizeof(mlxfw_mfa2_fingerprint) - 1;
+
+static const u8 mlxfw_mfa2_comp_magic[] = "#BIN.COMPONENT!#";
+static const int mlxfw_mfa2_comp_magic_len = sizeof(mlxfw_mfa2_comp_magic) - 1;
+
+bool mlxfw_mfa2_check(const struct firmware *fw)
+{
+ if (fw->size < sizeof(mlxfw_mfa2_fingerprint))
+ return false;
+
+ return memcmp(fw->data, mlxfw_mfa2_fingerprint,
+ mlxfw_mfa2_fingerprint_len) == 0;
+}
+
+static bool
+mlxfw_mfa2_tlv_multi_validate(const struct mlxfw_mfa2_file *mfa2_file,
+ const struct mlxfw_mfa2_tlv_multi *multi)
+{
+ const struct mlxfw_mfa2_tlv *tlv;
+ u16 idx;
+
+ /* Check that all children are valid */
+ mlxfw_mfa2_tlv_multi_foreach(mfa2_file, tlv, idx, multi) {
+ if (!tlv) {
+ pr_err("Multi has invalid child");
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool
+mlxfw_mfa2_file_dev_validate(const struct mlxfw_mfa2_file *mfa2_file,
+ const struct mlxfw_mfa2_tlv *dev_tlv,
+ u16 dev_idx)
+{
+ const struct mlxfw_mfa2_tlv_component_ptr *cptr;
+ const struct mlxfw_mfa2_tlv_multi *multi;
+ const struct mlxfw_mfa2_tlv_psid *psid;
+ const struct mlxfw_mfa2_tlv *tlv;
+ u16 cptr_count;
+ u16 cptr_idx;
+ int err;
+
+ pr_debug("Device %d\n", dev_idx);
+
+ multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, dev_tlv);
+ if (!multi) {
+ pr_err("Device %d is not a valid TLV error\n", dev_idx);
+ return false;
+ }
+
+ if (!mlxfw_mfa2_tlv_multi_validate(mfa2_file, multi))
+ return false;
+
+ /* Validate the device has PSID tlv */
+ tlv = mlxfw_mfa2_tlv_multi_child_find(mfa2_file, multi,
+ MLXFW_MFA2_TLV_PSID, 0);
+ if (!tlv) {
+ pr_err("Device %d does not have PSID\n", dev_idx);
+ return false;
+ }
+
+ psid = mlxfw_mfa2_tlv_psid_get(mfa2_file, tlv);
+ if (!psid) {
+ pr_err("Device %d PSID TLV is not valid\n", dev_idx);
+ return false;
+ }
+
+ print_hex_dump_debug(" -- Device PSID ", DUMP_PREFIX_NONE, 16, 16,
+ psid->psid, be16_to_cpu(tlv->len), true);
+
+ /* Validate the device has COMPONENT_PTR */
+ err = mlxfw_mfa2_tlv_multi_child_count(mfa2_file, multi,
+ MLXFW_MFA2_TLV_COMPONENT_PTR,
+ &cptr_count);
+ if (err)
+ return false;
+
+ if (cptr_count == 0) {
+ pr_err("Device %d has no components\n", dev_idx);
+ return false;
+ }
+
+ for (cptr_idx = 0; cptr_idx < cptr_count; cptr_idx++) {
+ tlv = mlxfw_mfa2_tlv_multi_child_find(mfa2_file, multi,
+ MLXFW_MFA2_TLV_COMPONENT_PTR,
+ cptr_idx);
+ if (!tlv)
+ return false;
+
+ cptr = mlxfw_mfa2_tlv_component_ptr_get(mfa2_file, tlv);
+ if (!cptr) {
+ pr_err("Device %d COMPONENT_PTR TLV is not valid\n",
+ dev_idx);
+ return false;
+ }
+
+ pr_debug(" -- Component index %d\n",
+ be16_to_cpu(cptr->component_index));
+ }
+ return true;
+}
+
+static bool
+mlxfw_mfa2_file_comp_validate(const struct mlxfw_mfa2_file *mfa2_file,
+ const struct mlxfw_mfa2_tlv *comp_tlv,
+ u16 comp_idx)
+{
+ const struct mlxfw_mfa2_tlv_component_descriptor *cdesc;
+ const struct mlxfw_mfa2_tlv_multi *multi;
+ const struct mlxfw_mfa2_tlv *tlv;
+
+ pr_debug("Component %d\n", comp_idx);
+
+ multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, comp_tlv);
+ if (!multi) {
+ pr_err("Component %d is not a valid TLV error\n", comp_idx);
+ return false;
+ }
+
+ if (!mlxfw_mfa2_tlv_multi_validate(mfa2_file, multi))
+ return false;
+
+ /* Check that component have COMPONENT_DESCRIPTOR as first child */
+ tlv = mlxfw_mfa2_tlv_multi_child(mfa2_file, multi);
+ if (!tlv) {
+ pr_err("Component descriptor %d multi TLV error\n", comp_idx);
+ return false;
+ }
+
+ cdesc = mlxfw_mfa2_tlv_component_descriptor_get(mfa2_file, tlv);
+ if (!cdesc) {
+ pr_err("Component %d does not have a valid descriptor\n",
+ comp_idx);
+ return false;
+ }
+ pr_debug(" -- Component type %d\n", be16_to_cpu(cdesc->identifier));
+ pr_debug(" -- Offset 0x%llx and size %d\n",
+ ((u64) be32_to_cpu(cdesc->cb_offset_h) << 32)
+ | be32_to_cpu(cdesc->cb_offset_l), be32_to_cpu(cdesc->size));
+
+ return true;
+}
+
+static bool mlxfw_mfa2_file_validate(const struct mlxfw_mfa2_file *mfa2_file)
+{
+ const struct mlxfw_mfa2_tlv *tlv;
+ u16 idx;
+
+ pr_debug("Validating file\n");
+
+ /* check that all the devices exist */
+ mlxfw_mfa2_tlv_foreach(mfa2_file, tlv, idx, mfa2_file->first_dev,
+ mfa2_file->dev_count) {
+ if (!tlv) {
+ pr_err("Device TLV error\n");
+ return false;
+ }
+
+ /* Check each device */
+ if (!mlxfw_mfa2_file_dev_validate(mfa2_file, tlv, idx))
+ return false;
+ }
+
+ /* check that all the components exist */
+ mlxfw_mfa2_tlv_foreach(mfa2_file, tlv, idx, mfa2_file->first_component,
+ mfa2_file->component_count) {
+ if (!tlv) {
+ pr_err("Device TLV error\n");
+ return false;
+ }
+
+ /* Check each component */
+ if (!mlxfw_mfa2_file_comp_validate(mfa2_file, tlv, idx))
+ return false;
+ }
+ return true;
+}
+
+struct mlxfw_mfa2_file *mlxfw_mfa2_file_init(const struct firmware *fw)
+{
+ const struct mlxfw_mfa2_tlv_package_descriptor *pd;
+ const struct mlxfw_mfa2_tlv_multi *multi;
+ const struct mlxfw_mfa2_tlv *multi_child;
+ const struct mlxfw_mfa2_tlv *first_tlv;
+ struct mlxfw_mfa2_file *mfa2_file;
+ const void *first_tlv_ptr;
+ const void *cb_top_ptr;
+
+ mfa2_file = kcalloc(1, sizeof(*mfa2_file), GFP_KERNEL);
+ if (!mfa2_file)
+ return ERR_PTR(-ENOMEM);
+
+ mfa2_file->fw = fw;
+ first_tlv_ptr = fw->data + NLA_ALIGN(mlxfw_mfa2_fingerprint_len);
+ first_tlv = mlxfw_mfa2_tlv_get(mfa2_file, first_tlv_ptr);
+ if (!first_tlv) {
+ pr_err("Could not parse package descriptor TLV\n");
+ goto err_out;
+ }
+
+ multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, first_tlv);
+ if (!multi) {
+ pr_err("First TLV is not of valid multi type\n");
+ goto err_out;
+ }
+
+ multi_child = mlxfw_mfa2_tlv_multi_child(mfa2_file, multi);
+ if (!multi_child)
+ goto err_out;
+
+ pd = mlxfw_mfa2_tlv_package_descriptor_get(mfa2_file, multi_child);
+ if (!pd) {
+ pr_err("Could not parse package descriptor TLV\n");
+ goto err_out;
+ }
+
+ mfa2_file->first_dev = mlxfw_mfa2_tlv_next(mfa2_file, first_tlv);
+ if (!mfa2_file->first_dev) {
+ pr_err("First device TLV is not valid\n");
+ goto err_out;
+ }
+
+ mfa2_file->dev_count = be16_to_cpu(pd->num_devices);
+ mfa2_file->first_component = mlxfw_mfa2_tlv_advance(mfa2_file,
+ mfa2_file->first_dev,
+ mfa2_file->dev_count);
+ mfa2_file->component_count = be16_to_cpu(pd->num_components);
+ mfa2_file->cb = fw->data + NLA_ALIGN(be32_to_cpu(pd->cb_offset));
+ if (!mlxfw_mfa2_valid_ptr(mfa2_file, mfa2_file->cb)) {
+ pr_err("Component block is out side the file\n");
+ goto err_out;
+ }
+ mfa2_file->cb_archive_size = be32_to_cpu(pd->cb_archive_size);
+ cb_top_ptr = mfa2_file->cb + mfa2_file->cb_archive_size - 1;
+ if (!mlxfw_mfa2_valid_ptr(mfa2_file, cb_top_ptr)) {
+ pr_err("Component block size is too big\n");
+ goto err_out;
+ }
+
+ if (!mlxfw_mfa2_file_validate(mfa2_file))
+ goto err_out;
+ return mfa2_file;
+err_out:
+ kfree(mfa2_file);
+ return ERR_PTR(-EINVAL);
+}
+
+static const struct mlxfw_mfa2_tlv_multi *
+mlxfw_mfa2_tlv_dev_get(const struct mlxfw_mfa2_file *mfa2_file,
+ const char *psid, u16 psid_size)
+{
+ const struct mlxfw_mfa2_tlv_psid *tlv_psid;
+ const struct mlxfw_mfa2_tlv_multi *dev_multi;
+ const struct mlxfw_mfa2_tlv *dev_tlv;
+ const struct mlxfw_mfa2_tlv *tlv;
+ u32 idx;
+
+ /* for each device tlv */
+ mlxfw_mfa2_tlv_foreach(mfa2_file, dev_tlv, idx, mfa2_file->first_dev,
+ mfa2_file->dev_count) {
+ if (!dev_tlv)
+ return NULL;
+
+ dev_multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, dev_tlv);
+ if (!dev_multi)
+ return NULL;
+
+ /* find psid child and compare */
+ tlv = mlxfw_mfa2_tlv_multi_child_find(mfa2_file, dev_multi,
+ MLXFW_MFA2_TLV_PSID, 0);
+ if (!tlv)
+ return NULL;
+ if (be16_to_cpu(tlv->len) != psid_size)
+ continue;
+
+ tlv_psid = mlxfw_mfa2_tlv_psid_get(mfa2_file, tlv);
+ if (!tlv_psid)
+ return NULL;
+
+ if (memcmp(psid, tlv_psid->psid, psid_size) == 0)
+ return dev_multi;
+ }
+
+ return NULL;
+}
+
+int mlxfw_mfa2_file_component_count(const struct mlxfw_mfa2_file *mfa2_file,
+ const char *psid, u32 psid_size,
+ u32 *p_count)
+{
+ const struct mlxfw_mfa2_tlv_multi *dev_multi;
+ u16 count;
+ int err;
+
+ dev_multi = mlxfw_mfa2_tlv_dev_get(mfa2_file, psid, psid_size);
+ if (!dev_multi)
+ return -EINVAL;
+
+ err = mlxfw_mfa2_tlv_multi_child_count(mfa2_file, dev_multi,
+ MLXFW_MFA2_TLV_COMPONENT_PTR,
+ &count);
+ if (err)
+ return err;
+
+ *p_count = count;
+ return 0;
+}
+
+static int mlxfw_mfa2_xz_dec_run(struct xz_dec *xz_dec, struct xz_buf *xz_buf,
+ bool *finished)
+{
+ enum xz_ret xz_ret;
+
+ xz_ret = xz_dec_run(xz_dec, xz_buf);
+
+ switch (xz_ret) {
+ case XZ_STREAM_END:
+ *finished = true;
+ return 0;
+ case XZ_OK:
+ *finished = false;
+ return 0;
+ case XZ_MEM_ERROR:
+ pr_err("xz no memory\n");
+ return -ENOMEM;
+ case XZ_DATA_ERROR:
+ pr_err("xz file corrupted\n");
+ return -EINVAL;
+ case XZ_FORMAT_ERROR:
+ pr_err("xz format not found\n");
+ return -EINVAL;
+ case XZ_OPTIONS_ERROR:
+ pr_err("unsupported xz option\n");
+ return -EINVAL;
+ case XZ_MEMLIMIT_ERROR:
+ pr_err("xz dictionary too small\n");
+ return -EINVAL;
+ default:
+ pr_err("xz error %d\n", xz_ret);
+ return -EINVAL;
+ }
+}
+
+static int mlxfw_mfa2_file_cb_offset_xz(const struct mlxfw_mfa2_file *mfa2_file,
+ off_t off, size_t size, u8 *buf)
+{
+ struct xz_dec *xz_dec;
+ struct xz_buf dec_buf;
+ off_t curr_off = 0;
+ bool finished;
+ int err;
+
+ xz_dec = xz_dec_init(XZ_DYNALLOC, (u32) -1);
+ if (!xz_dec)
+ return -EINVAL;
+
+ dec_buf.in_size = mfa2_file->cb_archive_size;
+ dec_buf.in = mfa2_file->cb;
+ dec_buf.in_pos = 0;
+ dec_buf.out = buf;
+
+ /* decode up to the offset */
+ do {
+ dec_buf.out_pos = 0;
+ dec_buf.out_size = min_t(size_t, size, off - curr_off);
+ if (dec_buf.out_size == 0)
+ break;
+
+ err = mlxfw_mfa2_xz_dec_run(xz_dec, &dec_buf, &finished);
+ if (err)
+ goto out;
+ if (finished) {
+ pr_err("xz section too short\n");
+ err = -EINVAL;
+ goto out;
+ }
+ curr_off += dec_buf.out_pos;
+ } while (curr_off != off);
+
+ /* decode the needed section */
+ dec_buf.out_pos = 0;
+ dec_buf.out_size = size;
+ err = mlxfw_mfa2_xz_dec_run(xz_dec, &dec_buf, &finished);
+out:
+ xz_dec_end(xz_dec);
+ return err;
+}
+
+static const struct mlxfw_mfa2_tlv_component_descriptor *
+mlxfw_mfa2_file_component_tlv_get(const struct mlxfw_mfa2_file *mfa2_file,
+ u16 comp_index)
+{
+ const struct mlxfw_mfa2_tlv_multi *multi;
+ const struct mlxfw_mfa2_tlv *multi_child;
+ const struct mlxfw_mfa2_tlv *comp_tlv;
+
+ if (comp_index > mfa2_file->component_count)
+ return NULL;
+
+ comp_tlv = mlxfw_mfa2_tlv_advance(mfa2_file, mfa2_file->first_component,
+ comp_index);
+ if (!comp_tlv)
+ return NULL;
+
+ multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, comp_tlv);
+ if (!multi)
+ return NULL;
+
+ multi_child = mlxfw_mfa2_tlv_multi_child(mfa2_file, multi);
+ if (!multi_child)
+ return NULL;
+
+ return mlxfw_mfa2_tlv_component_descriptor_get(mfa2_file, multi_child);
+}
+
+struct mlxfw_mfa2_comp_data {
+ struct mlxfw_mfa2_component comp;
+ u8 buff[0];
+};
+
+static const struct mlxfw_mfa2_tlv_component_descriptor *
+mlxfw_mfa2_file_component_find(const struct mlxfw_mfa2_file *mfa2_file,
+ const char *psid, int psid_size,
+ int component_index)
+{
+ const struct mlxfw_mfa2_tlv_component_ptr *cptr;
+ const struct mlxfw_mfa2_tlv_multi *dev_multi;
+ const struct mlxfw_mfa2_tlv *cptr_tlv;
+ u16 comp_idx;
+
+ dev_multi = mlxfw_mfa2_tlv_dev_get(mfa2_file, psid, psid_size);
+ if (!dev_multi)
+ return NULL;
+
+ cptr_tlv = mlxfw_mfa2_tlv_multi_child_find(mfa2_file, dev_multi,
+ MLXFW_MFA2_TLV_COMPONENT_PTR,
+ component_index);
+ if (!cptr_tlv)
+ return NULL;
+
+ cptr = mlxfw_mfa2_tlv_component_ptr_get(mfa2_file, cptr_tlv);
+ if (!cptr)
+ return NULL;
+
+ comp_idx = be16_to_cpu(cptr->component_index);
+ return mlxfw_mfa2_file_component_tlv_get(mfa2_file, comp_idx);
+}
+
+struct mlxfw_mfa2_component *
+mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file,
+ const char *psid, int psid_size,
+ int component_index)
+{
+ const struct mlxfw_mfa2_tlv_component_descriptor *comp;
+ struct mlxfw_mfa2_comp_data *comp_data;
+ u32 comp_buf_size;
+ off_t cb_offset;
+ u32 comp_size;
+ int err;
+
+ comp = mlxfw_mfa2_file_component_find(mfa2_file, psid, psid_size,
+ component_index);
+ if (!comp)
+ return ERR_PTR(-EINVAL);
+
+ cb_offset = (u64) be32_to_cpu(comp->cb_offset_h) << 32 |
+ be32_to_cpu(comp->cb_offset_l);
+ comp_size = be32_to_cpu(comp->size);
+ comp_buf_size = comp_size + mlxfw_mfa2_comp_magic_len;
+
+ comp_data = vzalloc(sizeof(*comp_data) + comp_buf_size);
+ if (!comp_data)
+ return ERR_PTR(-ENOMEM);
+ comp_data->comp.data_size = comp_size;
+ comp_data->comp.index = be16_to_cpu(comp->identifier);
+ err = mlxfw_mfa2_file_cb_offset_xz(mfa2_file, cb_offset, comp_buf_size,
+ comp_data->buff);
+ if (err) {
+ pr_err("Component could not be reached in CB\n");
+ goto err_out;
+ }
+
+ if (memcmp(comp_data->buff, mlxfw_mfa2_comp_magic,
+ mlxfw_mfa2_comp_magic_len) != 0) {
+ pr_err("Component has wrong magic\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ comp_data->comp.data = comp_data->buff + mlxfw_mfa2_comp_magic_len;
+ return &comp_data->comp;
+err_out:
+ vfree(comp_data);
+ return ERR_PTR(err);
+}
+
+void mlxfw_mfa2_file_component_put(struct mlxfw_mfa2_component *comp)
+{
+ const struct mlxfw_mfa2_comp_data *comp_data;
+
+ comp_data = container_of(comp, struct mlxfw_mfa2_comp_data, comp);
+ vfree(comp_data);
+}
+
+void mlxfw_mfa2_file_fini(struct mlxfw_mfa2_file *mfa2_file)
+{
+ kfree(mfa2_file);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h
new file mode 100644
index 000000000..20472aa13
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h
@@ -0,0 +1,66 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXFW_MFA2_H
+#define _MLXFW_MFA2_H
+
+#include <linux/firmware.h>
+#include "mlxfw.h"
+
+struct mlxfw_mfa2_component {
+ u16 index;
+ u32 data_size;
+ u8 *data;
+};
+
+struct mlxfw_mfa2_file;
+
+bool mlxfw_mfa2_check(const struct firmware *fw);
+
+struct mlxfw_mfa2_file *mlxfw_mfa2_file_init(const struct firmware *fw);
+
+int mlxfw_mfa2_file_component_count(const struct mlxfw_mfa2_file *mfa2_file,
+ const char *psid, u32 psid_size,
+ u32 *p_count);
+
+struct mlxfw_mfa2_component *
+mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file,
+ const char *psid, int psid_size,
+ int component_index);
+
+void mlxfw_mfa2_file_component_put(struct mlxfw_mfa2_component *component);
+
+void mlxfw_mfa2_file_fini(struct mlxfw_mfa2_file *mfa2_file);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h
new file mode 100644
index 000000000..f667942b1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h
@@ -0,0 +1,60 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXFW_MFA2_FILE_H
+#define _MLXFW_MFA2_FILE_H
+
+#include <linux/firmware.h>
+#include <linux/kernel.h>
+
+struct mlxfw_mfa2_file {
+ const struct firmware *fw;
+ const struct mlxfw_mfa2_tlv *first_dev;
+ u16 dev_count;
+ const struct mlxfw_mfa2_tlv *first_component;
+ u16 component_count;
+ const void *cb; /* components block */
+ u32 cb_archive_size; /* size of compressed components block */
+};
+
+static inline bool mlxfw_mfa2_valid_ptr(const struct mlxfw_mfa2_file *mfa2_file,
+ const void *ptr)
+{
+ const void *valid_to = mfa2_file->fw->data + mfa2_file->fw->size;
+ const void *valid_from = mfa2_file->fw->data;
+
+ return ptr > valid_from && ptr < valid_to;
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h
new file mode 100644
index 000000000..dd66737c0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h
@@ -0,0 +1,103 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _MLXFW_MFA2_FORMAT_H
+#define _MLXFW_MFA2_FORMAT_H
+
+#include "mlxfw_mfa2_file.h"
+#include "mlxfw_mfa2_tlv.h"
+
+enum mlxfw_mfa2_tlv_type {
+ MLXFW_MFA2_TLV_MULTI_PART = 0x01,
+ MLXFW_MFA2_TLV_PACKAGE_DESCRIPTOR = 0x02,
+ MLXFW_MFA2_TLV_COMPONENT_DESCRIPTOR = 0x04,
+ MLXFW_MFA2_TLV_COMPONENT_PTR = 0x22,
+ MLXFW_MFA2_TLV_PSID = 0x2A,
+};
+
+enum mlxfw_mfa2_compression_type {
+ MLXFW_MFA2_COMPRESSION_TYPE_NONE,
+ MLXFW_MFA2_COMPRESSION_TYPE_XZ,
+};
+
+struct mlxfw_mfa2_tlv_package_descriptor {
+ __be16 num_components;
+ __be16 num_devices;
+ __be32 cb_offset;
+ __be32 cb_archive_size;
+ __be32 cb_size_h;
+ __be32 cb_size_l;
+ u8 padding[3];
+ u8 cv_compression;
+ __be32 user_data_offset;
+} __packed;
+
+MLXFW_MFA2_TLV(package_descriptor, struct mlxfw_mfa2_tlv_package_descriptor,
+ MLXFW_MFA2_TLV_PACKAGE_DESCRIPTOR);
+
+struct mlxfw_mfa2_tlv_multi {
+ __be16 num_extensions;
+ __be16 total_len;
+} __packed;
+
+MLXFW_MFA2_TLV(multi, struct mlxfw_mfa2_tlv_multi,
+ MLXFW_MFA2_TLV_MULTI_PART);
+
+struct mlxfw_mfa2_tlv_psid {
+ u8 psid[0];
+} __packed;
+
+MLXFW_MFA2_TLV_VARSIZE(psid, struct mlxfw_mfa2_tlv_psid,
+ MLXFW_MFA2_TLV_PSID);
+
+struct mlxfw_mfa2_tlv_component_ptr {
+ __be16 storage_id;
+ __be16 component_index;
+ __be32 storage_address;
+} __packed;
+
+MLXFW_MFA2_TLV(component_ptr, struct mlxfw_mfa2_tlv_component_ptr,
+ MLXFW_MFA2_TLV_COMPONENT_PTR);
+
+struct mlxfw_mfa2_tlv_component_descriptor {
+ __be16 pldm_classification;
+ __be16 identifier;
+ __be32 cb_offset_h;
+ __be32 cb_offset_l;
+ __be32 size;
+} __packed;
+
+MLXFW_MFA2_TLV(component_descriptor, struct mlxfw_mfa2_tlv_component_descriptor,
+ MLXFW_MFA2_TLV_COMPONENT_DESCRIPTOR);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h
new file mode 100644
index 000000000..cc013e77b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h
@@ -0,0 +1,98 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXFW_MFA2_TLV_H
+#define _MLXFW_MFA2_TLV_H
+
+#include <linux/kernel.h>
+#include "mlxfw_mfa2_file.h"
+
+struct mlxfw_mfa2_tlv {
+ u8 version;
+ u8 type;
+ __be16 len;
+ u8 data[0];
+} __packed;
+
+static inline const struct mlxfw_mfa2_tlv *
+mlxfw_mfa2_tlv_get(const struct mlxfw_mfa2_file *mfa2_file, const void *ptr)
+{
+ if (!mlxfw_mfa2_valid_ptr(mfa2_file, ptr) ||
+ !mlxfw_mfa2_valid_ptr(mfa2_file, ptr + sizeof(struct mlxfw_mfa2_tlv)))
+ return NULL;
+ return ptr;
+}
+
+static inline const void *
+mlxfw_mfa2_tlv_payload_get(const struct mlxfw_mfa2_file *mfa2_file,
+ const struct mlxfw_mfa2_tlv *tlv, u8 payload_type,
+ size_t payload_size, bool varsize)
+{
+ void *tlv_top;
+
+ tlv_top = (void *) tlv + be16_to_cpu(tlv->len) - 1;
+ if (!mlxfw_mfa2_valid_ptr(mfa2_file, tlv) ||
+ !mlxfw_mfa2_valid_ptr(mfa2_file, tlv_top))
+ return NULL;
+ if (tlv->type != payload_type)
+ return NULL;
+ if (varsize && (be16_to_cpu(tlv->len) < payload_size))
+ return NULL;
+ if (!varsize && (be16_to_cpu(tlv->len) != payload_size))
+ return NULL;
+
+ return tlv->data;
+}
+
+#define MLXFW_MFA2_TLV(name, payload_type, tlv_type) \
+static inline const payload_type * \
+mlxfw_mfa2_tlv_ ## name ## _get(const struct mlxfw_mfa2_file *mfa2_file, \
+ const struct mlxfw_mfa2_tlv *tlv) \
+{ \
+ return mlxfw_mfa2_tlv_payload_get(mfa2_file, tlv, \
+ tlv_type, sizeof(payload_type), \
+ false); \
+}
+
+#define MLXFW_MFA2_TLV_VARSIZE(name, payload_type, tlv_type) \
+static inline const payload_type * \
+mlxfw_mfa2_tlv_ ## name ## _get(const struct mlxfw_mfa2_file *mfa2_file, \
+ const struct mlxfw_mfa2_tlv *tlv) \
+{ \
+ return mlxfw_mfa2_tlv_payload_get(mfa2_file, tlv, \
+ tlv_type, sizeof(payload_type), \
+ true); \
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c
new file mode 100644
index 000000000..0094b92a2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c
@@ -0,0 +1,126 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt) "MFA2: " fmt
+
+#include "mlxfw_mfa2_tlv_multi.h"
+#include <uapi/linux/netlink.h>
+
+#define MLXFW_MFA2_TLV_TOTAL_SIZE(tlv) \
+ NLA_ALIGN(sizeof(*(tlv)) + be16_to_cpu((tlv)->len))
+
+const struct mlxfw_mfa2_tlv *
+mlxfw_mfa2_tlv_multi_child(const struct mlxfw_mfa2_file *mfa2_file,
+ const struct mlxfw_mfa2_tlv_multi *multi)
+{
+ size_t multi_len;
+
+ multi_len = NLA_ALIGN(sizeof(struct mlxfw_mfa2_tlv_multi));
+ return mlxfw_mfa2_tlv_get(mfa2_file, (void *) multi + multi_len);
+}
+
+const struct mlxfw_mfa2_tlv *
+mlxfw_mfa2_tlv_next(const struct mlxfw_mfa2_file *mfa2_file,
+ const struct mlxfw_mfa2_tlv *tlv)
+{
+ const struct mlxfw_mfa2_tlv_multi *multi;
+ u16 tlv_len;
+ void *next;
+
+ tlv_len = MLXFW_MFA2_TLV_TOTAL_SIZE(tlv);
+
+ if (tlv->type == MLXFW_MFA2_TLV_MULTI_PART) {
+ multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, tlv);
+ tlv_len = NLA_ALIGN(tlv_len + be16_to_cpu(multi->total_len));
+ }
+
+ next = (void *) tlv + tlv_len;
+ return mlxfw_mfa2_tlv_get(mfa2_file, next);
+}
+
+const struct mlxfw_mfa2_tlv *
+mlxfw_mfa2_tlv_advance(const struct mlxfw_mfa2_file *mfa2_file,
+ const struct mlxfw_mfa2_tlv *from_tlv, u16 count)
+{
+ const struct mlxfw_mfa2_tlv *tlv;
+ u16 idx;
+
+ mlxfw_mfa2_tlv_foreach(mfa2_file, tlv, idx, from_tlv, count)
+ if (!tlv)
+ return NULL;
+ return tlv;
+}
+
+const struct mlxfw_mfa2_tlv *
+mlxfw_mfa2_tlv_multi_child_find(const struct mlxfw_mfa2_file *mfa2_file,
+ const struct mlxfw_mfa2_tlv_multi *multi,
+ enum mlxfw_mfa2_tlv_type type, u16 index)
+{
+ const struct mlxfw_mfa2_tlv *tlv;
+ u16 skip = 0;
+ u16 idx;
+
+ mlxfw_mfa2_tlv_multi_foreach(mfa2_file, tlv, idx, multi) {
+ if (!tlv) {
+ pr_err("TLV parsing error\n");
+ return NULL;
+ }
+ if (tlv->type == type)
+ if (skip++ == index)
+ return tlv;
+ }
+ return NULL;
+}
+
+int mlxfw_mfa2_tlv_multi_child_count(const struct mlxfw_mfa2_file *mfa2_file,
+ const struct mlxfw_mfa2_tlv_multi *multi,
+ enum mlxfw_mfa2_tlv_type type,
+ u16 *p_count)
+{
+ const struct mlxfw_mfa2_tlv *tlv;
+ u16 count = 0;
+ u16 idx;
+
+ mlxfw_mfa2_tlv_multi_foreach(mfa2_file, tlv, idx, multi) {
+ if (!tlv) {
+ pr_err("TLV parsing error\n");
+ return -EINVAL;
+ }
+
+ if (tlv->type == type)
+ count++;
+ }
+ *p_count = count;
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h
new file mode 100644
index 000000000..2c667894f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h
@@ -0,0 +1,71 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _MLXFW_MFA2_TLV_MULTI_H
+#define _MLXFW_MFA2_TLV_MULTI_H
+
+#include "mlxfw_mfa2_tlv.h"
+#include "mlxfw_mfa2_format.h"
+#include "mlxfw_mfa2_file.h"
+
+const struct mlxfw_mfa2_tlv *
+mlxfw_mfa2_tlv_multi_child(const struct mlxfw_mfa2_file *mfa2_file,
+ const struct mlxfw_mfa2_tlv_multi *multi);
+
+const struct mlxfw_mfa2_tlv *
+mlxfw_mfa2_tlv_next(const struct mlxfw_mfa2_file *mfa2_file,
+ const struct mlxfw_mfa2_tlv *tlv);
+
+const struct mlxfw_mfa2_tlv *
+mlxfw_mfa2_tlv_advance(const struct mlxfw_mfa2_file *mfa2_file,
+ const struct mlxfw_mfa2_tlv *from_tlv, u16 count);
+
+const struct mlxfw_mfa2_tlv *
+mlxfw_mfa2_tlv_multi_child_find(const struct mlxfw_mfa2_file *mfa2_file,
+ const struct mlxfw_mfa2_tlv_multi *multi,
+ enum mlxfw_mfa2_tlv_type type, u16 index);
+
+int mlxfw_mfa2_tlv_multi_child_count(const struct mlxfw_mfa2_file *mfa2_file,
+ const struct mlxfw_mfa2_tlv_multi *multi,
+ enum mlxfw_mfa2_tlv_type type,
+ u16 *p_count);
+
+#define mlxfw_mfa2_tlv_foreach(mfa2_file, tlv, idx, from_tlv, count) \
+ for (idx = 0, tlv = from_tlv; idx < (count); \
+ idx++, tlv = mlxfw_mfa2_tlv_next(mfa2_file, tlv))
+
+#define mlxfw_mfa2_tlv_multi_foreach(mfa2_file, tlv, idx, multi) \
+ mlxfw_mfa2_tlv_foreach(mfa2_file, tlv, idx, \
+ mlxfw_mfa2_tlv_multi_child(mfa2_file, multi), \
+ be16_to_cpu(multi->num_extensions) + 1)
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
new file mode 100644
index 000000000..8a291eb36
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -0,0 +1,109 @@
+#
+# Mellanox switch drivers configuration
+#
+
+config MLXSW_CORE
+ tristate "Mellanox Technologies Switch ASICs support"
+ depends on MAY_USE_DEVLINK
+ ---help---
+ This driver supports Mellanox Technologies Switch ASICs family.
+
+ To compile this driver as a module, choose M here: the
+ module will be called mlxsw_core.
+
+config MLXSW_CORE_HWMON
+ bool "HWMON support for Mellanox Technologies Switch ASICs"
+ depends on MLXSW_CORE && HWMON
+ depends on !(MLXSW_CORE=y && HWMON=m)
+ default y
+ ---help---
+ Say Y here if you want to expose HWMON interface on mlxsw devices.
+
+config MLXSW_CORE_THERMAL
+ bool "Thermal zone support for Mellanox Technologies Switch ASICs"
+ depends on MLXSW_CORE && THERMAL
+ depends on !(MLXSW_CORE=y && THERMAL=m)
+ default y
+ ---help---
+ Say Y here if you want to automatically control fans speed according
+ ambient temperature reported by ASIC.
+
+config MLXSW_PCI
+ tristate "PCI bus implementation for Mellanox Technologies Switch ASICs"
+ depends on PCI && HAS_IOMEM && MLXSW_CORE
+ default m
+ ---help---
+ This is PCI bus implementation for Mellanox Technologies Switch ASICs.
+
+ To compile this driver as a module, choose M here: the
+ module will be called mlxsw_pci.
+
+config MLXSW_I2C
+ tristate "I2C bus implementation for Mellanox Technologies Switch ASICs"
+ depends on I2C && MLXSW_CORE
+ default m
+ ---help---
+ This is I2C bus implementation for Mellanox Technologies Switch ASICs.
+
+ To compile this driver as a module, choose M here: the
+ module will be called mlxsw_i2c.
+
+config MLXSW_SWITCHIB
+ tristate "Mellanox Technologies SwitchIB and SwitchIB-2 support"
+ depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV
+ default m
+ ---help---
+ This driver supports Mellanox Technologies SwitchIB and SwitchIB-2
+ Infiniband Switch ASICs.
+
+ To compile this driver as a module, choose M here: the
+ module will be called mlxsw_switchib.
+
+config MLXSW_SWITCHX2
+ tristate "Mellanox Technologies SwitchX-2 support"
+ depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV
+ default m
+ ---help---
+ This driver supports Mellanox Technologies SwitchX-2 Ethernet
+ Switch ASICs.
+
+ To compile this driver as a module, choose M here: the
+ module will be called mlxsw_switchx2.
+
+config MLXSW_SPECTRUM
+ tristate "Mellanox Technologies Spectrum support"
+ depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q
+ depends on PSAMPLE || PSAMPLE=n
+ depends on BRIDGE || BRIDGE=n
+ depends on IPV6 || IPV6=n
+ depends on NET_IPGRE || NET_IPGRE=n
+ depends on IPV6_GRE || IPV6_GRE=n
+ select GENERIC_ALLOCATOR
+ select PARMAN
+ select MLXFW
+ default m
+ ---help---
+ This driver supports Mellanox Technologies Spectrum Ethernet
+ Switch ASICs.
+
+ To compile this driver as a module, choose M here: the
+ module will be called mlxsw_spectrum.
+
+config MLXSW_SPECTRUM_DCB
+ bool "Data Center Bridging (DCB) support"
+ depends on MLXSW_SPECTRUM && DCB
+ default y
+ ---help---
+ Say Y here if you want to use Data Center Bridging (DCB) in the
+ driver.
+
+config MLXSW_MINIMAL
+ tristate "Mellanox Technologies minimal I2C support"
+ depends on MLXSW_CORE && MLXSW_I2C
+ default m
+ ---help---
+ This driver supports I2C access for Mellanox Technologies Switch
+ ASICs.
+
+ To compile this driver as a module, choose M here: the
+ module will be called mlxsw_minimal.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
new file mode 100644
index 000000000..68fa44a41
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_MLXSW_CORE) += mlxsw_core.o
+mlxsw_core-objs := core.o core_acl_flex_keys.o \
+ core_acl_flex_actions.o
+mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o
+mlxsw_core-$(CONFIG_MLXSW_CORE_THERMAL) += core_thermal.o
+obj-$(CONFIG_MLXSW_PCI) += mlxsw_pci.o
+mlxsw_pci-objs := pci.o
+obj-$(CONFIG_MLXSW_I2C) += mlxsw_i2c.o
+mlxsw_i2c-objs := i2c.o
+obj-$(CONFIG_MLXSW_SWITCHIB) += mlxsw_switchib.o
+mlxsw_switchib-objs := switchib.o
+obj-$(CONFIG_MLXSW_SWITCHX2) += mlxsw_switchx2.o
+mlxsw_switchx2-objs := switchx2.o
+obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o
+mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \
+ spectrum_switchdev.o spectrum_router.o \
+ spectrum1_kvdl.o spectrum2_kvdl.o \
+ spectrum_kvdl.o \
+ spectrum_acl_tcam.o spectrum_acl_ctcam.o \
+ spectrum_acl_atcam.o spectrum_acl_erp.o \
+ spectrum1_acl_tcam.o spectrum2_acl_tcam.o \
+ spectrum_acl.o \
+ spectrum_flower.o spectrum_cnt.o \
+ spectrum_fid.o spectrum_ipip.o \
+ spectrum_acl_flex_actions.o \
+ spectrum_acl_flex_keys.o \
+ spectrum1_mr_tcam.o spectrum2_mr_tcam.o \
+ spectrum_mr_tcam.o spectrum_mr.o \
+ spectrum_qdisc.o spectrum_span.o
+mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o
+mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
+obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o
+mlxsw_minimal-objs := minimal.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
new file mode 100644
index 000000000..0772e4339
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
@@ -0,0 +1,1180 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_CMD_H
+#define _MLXSW_CMD_H
+
+#include "item.h"
+
+#define MLXSW_CMD_MBOX_SIZE 4096
+
+static inline char *mlxsw_cmd_mbox_alloc(void)
+{
+ return kzalloc(MLXSW_CMD_MBOX_SIZE, GFP_KERNEL);
+}
+
+static inline void mlxsw_cmd_mbox_free(char *mbox)
+{
+ kfree(mbox);
+}
+
+static inline void mlxsw_cmd_mbox_zero(char *mbox)
+{
+ memset(mbox, 0, MLXSW_CMD_MBOX_SIZE);
+}
+
+struct mlxsw_core;
+
+int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod,
+ u32 in_mod, bool out_mbox_direct, bool reset_ok,
+ char *in_mbox, size_t in_mbox_size,
+ char *out_mbox, size_t out_mbox_size);
+
+static inline int mlxsw_cmd_exec_in(struct mlxsw_core *mlxsw_core, u16 opcode,
+ u8 opcode_mod, u32 in_mod, char *in_mbox,
+ size_t in_mbox_size)
+{
+ return mlxsw_cmd_exec(mlxsw_core, opcode, opcode_mod, in_mod, false,
+ false, in_mbox, in_mbox_size, NULL, 0);
+}
+
+static inline int mlxsw_cmd_exec_out(struct mlxsw_core *mlxsw_core, u16 opcode,
+ u8 opcode_mod, u32 in_mod,
+ bool out_mbox_direct,
+ char *out_mbox, size_t out_mbox_size)
+{
+ return mlxsw_cmd_exec(mlxsw_core, opcode, opcode_mod, in_mod,
+ out_mbox_direct, false, NULL, 0,
+ out_mbox, out_mbox_size);
+}
+
+static inline int mlxsw_cmd_exec_none(struct mlxsw_core *mlxsw_core, u16 opcode,
+ u8 opcode_mod, u32 in_mod)
+{
+ return mlxsw_cmd_exec(mlxsw_core, opcode, opcode_mod, in_mod, false,
+ false, NULL, 0, NULL, 0);
+}
+
+enum mlxsw_cmd_opcode {
+ MLXSW_CMD_OPCODE_QUERY_FW = 0x004,
+ MLXSW_CMD_OPCODE_QUERY_BOARDINFO = 0x006,
+ MLXSW_CMD_OPCODE_QUERY_AQ_CAP = 0x003,
+ MLXSW_CMD_OPCODE_MAP_FA = 0xFFF,
+ MLXSW_CMD_OPCODE_UNMAP_FA = 0xFFE,
+ MLXSW_CMD_OPCODE_CONFIG_PROFILE = 0x100,
+ MLXSW_CMD_OPCODE_ACCESS_REG = 0x040,
+ MLXSW_CMD_OPCODE_SW2HW_DQ = 0x201,
+ MLXSW_CMD_OPCODE_HW2SW_DQ = 0x202,
+ MLXSW_CMD_OPCODE_2ERR_DQ = 0x01E,
+ MLXSW_CMD_OPCODE_QUERY_DQ = 0x022,
+ MLXSW_CMD_OPCODE_SW2HW_CQ = 0x016,
+ MLXSW_CMD_OPCODE_HW2SW_CQ = 0x017,
+ MLXSW_CMD_OPCODE_QUERY_CQ = 0x018,
+ MLXSW_CMD_OPCODE_SW2HW_EQ = 0x013,
+ MLXSW_CMD_OPCODE_HW2SW_EQ = 0x014,
+ MLXSW_CMD_OPCODE_QUERY_EQ = 0x015,
+ MLXSW_CMD_OPCODE_QUERY_RESOURCES = 0x101,
+};
+
+static inline const char *mlxsw_cmd_opcode_str(u16 opcode)
+{
+ switch (opcode) {
+ case MLXSW_CMD_OPCODE_QUERY_FW:
+ return "QUERY_FW";
+ case MLXSW_CMD_OPCODE_QUERY_BOARDINFO:
+ return "QUERY_BOARDINFO";
+ case MLXSW_CMD_OPCODE_QUERY_AQ_CAP:
+ return "QUERY_AQ_CAP";
+ case MLXSW_CMD_OPCODE_MAP_FA:
+ return "MAP_FA";
+ case MLXSW_CMD_OPCODE_UNMAP_FA:
+ return "UNMAP_FA";
+ case MLXSW_CMD_OPCODE_CONFIG_PROFILE:
+ return "CONFIG_PROFILE";
+ case MLXSW_CMD_OPCODE_ACCESS_REG:
+ return "ACCESS_REG";
+ case MLXSW_CMD_OPCODE_SW2HW_DQ:
+ return "SW2HW_DQ";
+ case MLXSW_CMD_OPCODE_HW2SW_DQ:
+ return "HW2SW_DQ";
+ case MLXSW_CMD_OPCODE_2ERR_DQ:
+ return "2ERR_DQ";
+ case MLXSW_CMD_OPCODE_QUERY_DQ:
+ return "QUERY_DQ";
+ case MLXSW_CMD_OPCODE_SW2HW_CQ:
+ return "SW2HW_CQ";
+ case MLXSW_CMD_OPCODE_HW2SW_CQ:
+ return "HW2SW_CQ";
+ case MLXSW_CMD_OPCODE_QUERY_CQ:
+ return "QUERY_CQ";
+ case MLXSW_CMD_OPCODE_SW2HW_EQ:
+ return "SW2HW_EQ";
+ case MLXSW_CMD_OPCODE_HW2SW_EQ:
+ return "HW2SW_EQ";
+ case MLXSW_CMD_OPCODE_QUERY_EQ:
+ return "QUERY_EQ";
+ case MLXSW_CMD_OPCODE_QUERY_RESOURCES:
+ return "QUERY_RESOURCES";
+ default:
+ return "*UNKNOWN*";
+ }
+}
+
+enum mlxsw_cmd_status {
+ /* Command execution succeeded. */
+ MLXSW_CMD_STATUS_OK = 0x00,
+ /* Internal error (e.g. bus error) occurred while processing command. */
+ MLXSW_CMD_STATUS_INTERNAL_ERR = 0x01,
+ /* Operation/command not supported or opcode modifier not supported. */
+ MLXSW_CMD_STATUS_BAD_OP = 0x02,
+ /* Parameter not supported, parameter out of range. */
+ MLXSW_CMD_STATUS_BAD_PARAM = 0x03,
+ /* System was not enabled or bad system state. */
+ MLXSW_CMD_STATUS_BAD_SYS_STATE = 0x04,
+ /* Attempt to access reserved or unallocated resource, or resource in
+ * inappropriate ownership.
+ */
+ MLXSW_CMD_STATUS_BAD_RESOURCE = 0x05,
+ /* Requested resource is currently executing a command. */
+ MLXSW_CMD_STATUS_RESOURCE_BUSY = 0x06,
+ /* Required capability exceeds device limits. */
+ MLXSW_CMD_STATUS_EXCEED_LIM = 0x08,
+ /* Resource is not in the appropriate state or ownership. */
+ MLXSW_CMD_STATUS_BAD_RES_STATE = 0x09,
+ /* Index out of range (might be beyond table size or attempt to
+ * access a reserved resource).
+ */
+ MLXSW_CMD_STATUS_BAD_INDEX = 0x0A,
+ /* NVMEM checksum/CRC failed. */
+ MLXSW_CMD_STATUS_BAD_NVMEM = 0x0B,
+ /* Device is currently running reset */
+ MLXSW_CMD_STATUS_RUNNING_RESET = 0x26,
+ /* Bad management packet (silently discarded). */
+ MLXSW_CMD_STATUS_BAD_PKT = 0x30,
+};
+
+static inline const char *mlxsw_cmd_status_str(u8 status)
+{
+ switch (status) {
+ case MLXSW_CMD_STATUS_OK:
+ return "OK";
+ case MLXSW_CMD_STATUS_INTERNAL_ERR:
+ return "INTERNAL_ERR";
+ case MLXSW_CMD_STATUS_BAD_OP:
+ return "BAD_OP";
+ case MLXSW_CMD_STATUS_BAD_PARAM:
+ return "BAD_PARAM";
+ case MLXSW_CMD_STATUS_BAD_SYS_STATE:
+ return "BAD_SYS_STATE";
+ case MLXSW_CMD_STATUS_BAD_RESOURCE:
+ return "BAD_RESOURCE";
+ case MLXSW_CMD_STATUS_RESOURCE_BUSY:
+ return "RESOURCE_BUSY";
+ case MLXSW_CMD_STATUS_EXCEED_LIM:
+ return "EXCEED_LIM";
+ case MLXSW_CMD_STATUS_BAD_RES_STATE:
+ return "BAD_RES_STATE";
+ case MLXSW_CMD_STATUS_BAD_INDEX:
+ return "BAD_INDEX";
+ case MLXSW_CMD_STATUS_BAD_NVMEM:
+ return "BAD_NVMEM";
+ case MLXSW_CMD_STATUS_RUNNING_RESET:
+ return "RUNNING_RESET";
+ case MLXSW_CMD_STATUS_BAD_PKT:
+ return "BAD_PKT";
+ default:
+ return "*UNKNOWN*";
+ }
+}
+
+/* QUERY_FW - Query Firmware
+ * -------------------------
+ * OpMod == 0, INMmod == 0
+ * -----------------------
+ * The QUERY_FW command retrieves information related to firmware, command
+ * interface version and the amount of resources that should be allocated to
+ * the firmware.
+ */
+
+static inline int mlxsw_cmd_query_fw(struct mlxsw_core *mlxsw_core,
+ char *out_mbox)
+{
+ return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_FW,
+ 0, 0, false, out_mbox, MLXSW_CMD_MBOX_SIZE);
+}
+
+/* cmd_mbox_query_fw_fw_pages
+ * Amount of physical memory to be allocatedfor firmware usage in 4KB pages.
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, fw_pages, 0x00, 16, 16);
+
+/* cmd_mbox_query_fw_fw_rev_major
+ * Firmware Revision - Major
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, fw_rev_major, 0x00, 0, 16);
+
+/* cmd_mbox_query_fw_fw_rev_subminor
+ * Firmware Sub-minor version (Patch level)
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, fw_rev_subminor, 0x04, 16, 16);
+
+/* cmd_mbox_query_fw_fw_rev_minor
+ * Firmware Revision - Minor
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, fw_rev_minor, 0x04, 0, 16);
+
+/* cmd_mbox_query_fw_core_clk
+ * Internal Clock Frequency (in MHz)
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, core_clk, 0x08, 16, 16);
+
+/* cmd_mbox_query_fw_cmd_interface_rev
+ * Command Interface Interpreter Revision ID. This number is bumped up
+ * every time a non-backward-compatible change is done for the command
+ * interface. The current cmd_interface_rev is 1.
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, cmd_interface_rev, 0x08, 0, 16);
+
+/* cmd_mbox_query_fw_dt
+ * If set, Debug Trace is supported
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, dt, 0x0C, 31, 1);
+
+/* cmd_mbox_query_fw_api_version
+ * Indicates the version of the API, to enable software querying
+ * for compatibility. The current api_version is 1.
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, api_version, 0x0C, 0, 16);
+
+/* cmd_mbox_query_fw_fw_hour
+ * Firmware timestamp - hour
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, fw_hour, 0x10, 24, 8);
+
+/* cmd_mbox_query_fw_fw_minutes
+ * Firmware timestamp - minutes
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, fw_minutes, 0x10, 16, 8);
+
+/* cmd_mbox_query_fw_fw_seconds
+ * Firmware timestamp - seconds
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, fw_seconds, 0x10, 8, 8);
+
+/* cmd_mbox_query_fw_fw_year
+ * Firmware timestamp - year
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, fw_year, 0x14, 16, 16);
+
+/* cmd_mbox_query_fw_fw_month
+ * Firmware timestamp - month
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, fw_month, 0x14, 8, 8);
+
+/* cmd_mbox_query_fw_fw_day
+ * Firmware timestamp - day
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, fw_day, 0x14, 0, 8);
+
+/* cmd_mbox_query_fw_clr_int_base_offset
+ * Clear Interrupt register's offset from clr_int_bar register
+ * in PCI address space.
+ */
+MLXSW_ITEM64(cmd_mbox, query_fw, clr_int_base_offset, 0x20, 0, 64);
+
+/* cmd_mbox_query_fw_clr_int_bar
+ * PCI base address register (BAR) where clr_int register is located.
+ * 00 - BAR 0-1 (64 bit BAR)
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, clr_int_bar, 0x28, 30, 2);
+
+/* cmd_mbox_query_fw_error_buf_offset
+ * Read Only buffer for internal error reports of offset
+ * from error_buf_bar register in PCI address space).
+ */
+MLXSW_ITEM64(cmd_mbox, query_fw, error_buf_offset, 0x30, 0, 64);
+
+/* cmd_mbox_query_fw_error_buf_size
+ * Internal error buffer size in DWORDs
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, error_buf_size, 0x38, 0, 32);
+
+/* cmd_mbox_query_fw_error_int_bar
+ * PCI base address register (BAR) where error buffer
+ * register is located.
+ * 00 - BAR 0-1 (64 bit BAR)
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, error_int_bar, 0x3C, 30, 2);
+
+/* cmd_mbox_query_fw_doorbell_page_offset
+ * Offset of the doorbell page
+ */
+MLXSW_ITEM64(cmd_mbox, query_fw, doorbell_page_offset, 0x40, 0, 64);
+
+/* cmd_mbox_query_fw_doorbell_page_bar
+ * PCI base address register (BAR) of the doorbell page
+ * 00 - BAR 0-1 (64 bit BAR)
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, doorbell_page_bar, 0x48, 30, 2);
+
+/* QUERY_BOARDINFO - Query Board Information
+ * -----------------------------------------
+ * OpMod == 0 (N/A), INMmod == 0 (N/A)
+ * -----------------------------------
+ * The QUERY_BOARDINFO command retrieves adapter specific parameters.
+ */
+
+static inline int mlxsw_cmd_boardinfo(struct mlxsw_core *mlxsw_core,
+ char *out_mbox)
+{
+ return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_BOARDINFO,
+ 0, 0, false, out_mbox, MLXSW_CMD_MBOX_SIZE);
+}
+
+/* cmd_mbox_boardinfo_intapin
+ * When PCIe interrupt messages are being used, this value is used for clearing
+ * an interrupt. When using MSI-X, this register is not used.
+ */
+MLXSW_ITEM32(cmd_mbox, boardinfo, intapin, 0x10, 24, 8);
+
+/* cmd_mbox_boardinfo_vsd_vendor_id
+ * PCISIG Vendor ID (www.pcisig.com/membership/vid_search) of the vendor
+ * specifying/formatting the VSD. The vsd_vendor_id identifies the management
+ * domain of the VSD/PSID data. Different vendors may choose different VSD/PSID
+ * format and encoding as long as they use their assigned vsd_vendor_id.
+ */
+MLXSW_ITEM32(cmd_mbox, boardinfo, vsd_vendor_id, 0x1C, 0, 16);
+
+/* cmd_mbox_boardinfo_vsd
+ * Vendor Specific Data. The VSD string that is burnt to the Flash
+ * with the firmware.
+ */
+#define MLXSW_CMD_BOARDINFO_VSD_LEN 208
+MLXSW_ITEM_BUF(cmd_mbox, boardinfo, vsd, 0x20, MLXSW_CMD_BOARDINFO_VSD_LEN);
+
+/* cmd_mbox_boardinfo_psid
+ * The PSID field is a 16-ascii (byte) character string which acts as
+ * the board ID. The PSID format is used in conjunction with
+ * Mellanox vsd_vendor_id (15B3h).
+ */
+#define MLXSW_CMD_BOARDINFO_PSID_LEN 16
+MLXSW_ITEM_BUF(cmd_mbox, boardinfo, psid, 0xF0, MLXSW_CMD_BOARDINFO_PSID_LEN);
+
+/* QUERY_AQ_CAP - Query Asynchronous Queues Capabilities
+ * -----------------------------------------------------
+ * OpMod == 0 (N/A), INMmod == 0 (N/A)
+ * -----------------------------------
+ * The QUERY_AQ_CAP command returns the device asynchronous queues
+ * capabilities supported.
+ */
+
+static inline int mlxsw_cmd_query_aq_cap(struct mlxsw_core *mlxsw_core,
+ char *out_mbox)
+{
+ return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_AQ_CAP,
+ 0, 0, false, out_mbox, MLXSW_CMD_MBOX_SIZE);
+}
+
+/* cmd_mbox_query_aq_cap_log_max_sdq_sz
+ * Log (base 2) of max WQEs allowed on SDQ.
+ */
+MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_sdq_sz, 0x00, 24, 8);
+
+/* cmd_mbox_query_aq_cap_max_num_sdqs
+ * Maximum number of SDQs.
+ */
+MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_num_sdqs, 0x00, 0, 8);
+
+/* cmd_mbox_query_aq_cap_log_max_rdq_sz
+ * Log (base 2) of max WQEs allowed on RDQ.
+ */
+MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_rdq_sz, 0x04, 24, 8);
+
+/* cmd_mbox_query_aq_cap_max_num_rdqs
+ * Maximum number of RDQs.
+ */
+MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_num_rdqs, 0x04, 0, 8);
+
+/* cmd_mbox_query_aq_cap_log_max_cq_sz
+ * Log (base 2) of the Maximum CQEs allowed in a CQ for CQEv0 and CQEv1.
+ */
+MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_cq_sz, 0x08, 24, 8);
+
+/* cmd_mbox_query_aq_cap_log_max_cqv2_sz
+ * Log (base 2) of the Maximum CQEs allowed in a CQ for CQEv2.
+ */
+MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_cqv2_sz, 0x08, 16, 8);
+
+/* cmd_mbox_query_aq_cap_max_num_cqs
+ * Maximum number of CQs.
+ */
+MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_num_cqs, 0x08, 0, 8);
+
+/* cmd_mbox_query_aq_cap_log_max_eq_sz
+ * Log (base 2) of max EQEs allowed on EQ.
+ */
+MLXSW_ITEM32(cmd_mbox, query_aq_cap, log_max_eq_sz, 0x0C, 24, 8);
+
+/* cmd_mbox_query_aq_cap_max_num_eqs
+ * Maximum number of EQs.
+ */
+MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_num_eqs, 0x0C, 0, 8);
+
+/* cmd_mbox_query_aq_cap_max_sg_sq
+ * The maximum S/G list elements in an DSQ. DSQ must not contain
+ * more S/G entries than indicated here.
+ */
+MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_sg_sq, 0x10, 8, 8);
+
+/* cmd_mbox_query_aq_cap_
+ * The maximum S/G list elements in an DRQ. DRQ must not contain
+ * more S/G entries than indicated here.
+ */
+MLXSW_ITEM32(cmd_mbox, query_aq_cap, max_sg_rq, 0x10, 0, 8);
+
+/* MAP_FA - Map Firmware Area
+ * --------------------------
+ * OpMod == 0 (N/A), INMmod == Number of VPM entries
+ * -------------------------------------------------
+ * The MAP_FA command passes physical pages to the switch. These pages
+ * are used to store the device firmware. MAP_FA can be executed multiple
+ * times until all the firmware area is mapped (the size that should be
+ * mapped is retrieved through the QUERY_FW command). All required pages
+ * must be mapped to finish the initialization phase. Physical memory
+ * passed in this command must be pinned.
+ */
+
+#define MLXSW_CMD_MAP_FA_VPM_ENTRIES_MAX 32
+
+static inline int mlxsw_cmd_map_fa(struct mlxsw_core *mlxsw_core,
+ char *in_mbox, u32 vpm_entries_count)
+{
+ return mlxsw_cmd_exec_in(mlxsw_core, MLXSW_CMD_OPCODE_MAP_FA,
+ 0, vpm_entries_count,
+ in_mbox, MLXSW_CMD_MBOX_SIZE);
+}
+
+/* cmd_mbox_map_fa_pa
+ * Physical Address.
+ */
+MLXSW_ITEM64_INDEXED(cmd_mbox, map_fa, pa, 0x00, 12, 52, 0x08, 0x00, true);
+
+/* cmd_mbox_map_fa_log2size
+ * Log (base 2) of the size in 4KB pages of the physical and contiguous memory
+ * that starts at PA_L/H.
+ */
+MLXSW_ITEM32_INDEXED(cmd_mbox, map_fa, log2size, 0x00, 0, 5, 0x08, 0x04, false);
+
+/* UNMAP_FA - Unmap Firmware Area
+ * ------------------------------
+ * OpMod == 0 (N/A), INMmod == 0 (N/A)
+ * -----------------------------------
+ * The UNMAP_FA command unload the firmware and unmaps all the
+ * firmware area. After this command is completed the device will not access
+ * the pages that were mapped to the firmware area. After executing UNMAP_FA
+ * command, software reset must be done prior to execution of MAP_FW command.
+ */
+
+static inline int mlxsw_cmd_unmap_fa(struct mlxsw_core *mlxsw_core)
+{
+ return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_UNMAP_FA, 0, 0);
+}
+
+/* QUERY_RESOURCES - Query chip resources
+ * --------------------------------------
+ * OpMod == 0 (N/A) , INMmod is index
+ * ----------------------------------
+ * The QUERY_RESOURCES command retrieves information related to chip resources
+ * by resource ID. Every command returns 32 entries. INmod is being use as base.
+ * for example, index 1 will return entries 32-63. When the tables end and there
+ * are no more sources in the table, will return resource id 0xFFF to indicate
+ * it.
+ */
+
+#define MLXSW_CMD_QUERY_RESOURCES_TABLE_END_ID 0xffff
+#define MLXSW_CMD_QUERY_RESOURCES_MAX_QUERIES 100
+#define MLXSW_CMD_QUERY_RESOURCES_PER_QUERY 32
+
+static inline int mlxsw_cmd_query_resources(struct mlxsw_core *mlxsw_core,
+ char *out_mbox, int index)
+{
+ return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_RESOURCES,
+ 0, index, false, out_mbox,
+ MLXSW_CMD_MBOX_SIZE);
+}
+
+/* cmd_mbox_query_resource_id
+ * The resource id. 0xFFFF indicates table's end.
+ */
+MLXSW_ITEM32_INDEXED(cmd_mbox, query_resource, id, 0x00, 16, 16, 0x8, 0, false);
+
+/* cmd_mbox_query_resource_data
+ * The resource
+ */
+MLXSW_ITEM64_INDEXED(cmd_mbox, query_resource, data,
+ 0x00, 0, 40, 0x8, 0, false);
+
+/* CONFIG_PROFILE (Set) - Configure Switch Profile
+ * ------------------------------
+ * OpMod == 1 (Set), INMmod == 0 (N/A)
+ * -----------------------------------
+ * The CONFIG_PROFILE command sets the switch profile. The command can be
+ * executed on the device only once at startup in order to allocate and
+ * configure all switch resources and prepare it for operational mode.
+ * It is not possible to change the device profile after the chip is
+ * in operational mode.
+ * Failure of the CONFIG_PROFILE command leaves the hardware in an indeterminate
+ * state therefore it is required to perform software reset to the device
+ * following an unsuccessful completion of the command. It is required
+ * to perform software reset to the device to change an existing profile.
+ */
+
+static inline int mlxsw_cmd_config_profile_set(struct mlxsw_core *mlxsw_core,
+ char *in_mbox)
+{
+ return mlxsw_cmd_exec_in(mlxsw_core, MLXSW_CMD_OPCODE_CONFIG_PROFILE,
+ 1, 0, in_mbox, MLXSW_CMD_MBOX_SIZE);
+}
+
+/* cmd_mbox_config_profile_set_max_vepa_channels
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_max_vepa_channels, 0x0C, 0, 1);
+
+/* cmd_mbox_config_profile_set_max_lag
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_max_lag, 0x0C, 1, 1);
+
+/* cmd_mbox_config_profile_set_max_port_per_lag
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_max_port_per_lag, 0x0C, 2, 1);
+
+/* cmd_mbox_config_profile_set_max_mid
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_max_mid, 0x0C, 3, 1);
+
+/* cmd_mbox_config_profile_set_max_pgt
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_max_pgt, 0x0C, 4, 1);
+
+/* cmd_mbox_config_profile_set_max_system_port
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_max_system_port, 0x0C, 5, 1);
+
+/* cmd_mbox_config_profile_set_max_vlan_groups
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_max_vlan_groups, 0x0C, 6, 1);
+
+/* cmd_mbox_config_profile_set_max_regions
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_max_regions, 0x0C, 7, 1);
+
+/* cmd_mbox_config_profile_set_flood_mode
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_flood_mode, 0x0C, 8, 1);
+
+/* cmd_mbox_config_profile_set_max_flood_tables
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_flood_tables, 0x0C, 9, 1);
+
+/* cmd_mbox_config_profile_set_max_ib_mc
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_max_ib_mc, 0x0C, 12, 1);
+
+/* cmd_mbox_config_profile_set_max_pkey
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_max_pkey, 0x0C, 13, 1);
+
+/* cmd_mbox_config_profile_set_adaptive_routing_group_cap
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile,
+ set_adaptive_routing_group_cap, 0x0C, 14, 1);
+
+/* cmd_mbox_config_profile_set_ar_sec
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_ar_sec, 0x0C, 15, 1);
+
+/* cmd_mbox_config_set_kvd_linear_size
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_linear_size, 0x0C, 24, 1);
+
+/* cmd_mbox_config_set_kvd_hash_single_size
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_single_size, 0x0C, 25, 1);
+
+/* cmd_mbox_config_set_kvd_hash_double_size
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_double_size, 0x0C, 26, 1);
+
+/* cmd_mbox_config_set_cqe_version
+ * Capability bit. Setting a bit to 1 configures the profile
+ * according to the mailbox contents.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, set_cqe_version, 0x08, 0, 1);
+
+/* cmd_mbox_config_profile_max_vepa_channels
+ * Maximum number of VEPA channels per port (0 through 16)
+ * 0 - multi-channel VEPA is disabled
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, max_vepa_channels, 0x10, 0, 8);
+
+/* cmd_mbox_config_profile_max_lag
+ * Maximum number of LAG IDs requested.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, max_lag, 0x14, 0, 16);
+
+/* cmd_mbox_config_profile_max_port_per_lag
+ * Maximum number of ports per LAG requested.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, max_port_per_lag, 0x18, 0, 16);
+
+/* cmd_mbox_config_profile_max_mid
+ * Maximum Multicast IDs.
+ * Multicast IDs are allocated from 0 to max_mid-1
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, max_mid, 0x1C, 0, 16);
+
+/* cmd_mbox_config_profile_max_pgt
+ * Maximum records in the Port Group Table per Switch Partition.
+ * Port Group Table indexes are from 0 to max_pgt-1
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, max_pgt, 0x20, 0, 16);
+
+/* cmd_mbox_config_profile_max_system_port
+ * The maximum number of system ports that can be allocated.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, max_system_port, 0x24, 0, 16);
+
+/* cmd_mbox_config_profile_max_vlan_groups
+ * Maximum number VLAN Groups for VLAN binding.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, max_vlan_groups, 0x28, 0, 12);
+
+/* cmd_mbox_config_profile_max_regions
+ * Maximum number of TCAM Regions.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, max_regions, 0x2C, 0, 16);
+
+/* cmd_mbox_config_profile_max_flood_tables
+ * Maximum number of single-entry flooding tables. Different flooding tables
+ * can be associated with different packet types.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, max_flood_tables, 0x30, 16, 4);
+
+/* cmd_mbox_config_profile_max_vid_flood_tables
+ * Maximum number of per-vid flooding tables. Flooding tables are associated
+ * to the different packet types for the different switch partitions.
+ * Table size is 4K entries covering all VID space.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, max_vid_flood_tables, 0x30, 8, 4);
+
+/* cmd_mbox_config_profile_flood_mode
+ * Flooding mode to use.
+ * 0-2 - Backward compatible modes for SwitchX devices.
+ * 3 - Mixed mode, where:
+ * max_flood_tables indicates the number of single-entry tables.
+ * max_vid_flood_tables indicates the number of per-VID tables.
+ * max_fid_offset_flood_tables indicates the number of FID-offset tables.
+ * max_fid_flood_tables indicates the number of per-FID tables.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, flood_mode, 0x30, 0, 2);
+
+/* cmd_mbox_config_profile_max_fid_offset_flood_tables
+ * Maximum number of FID-offset flooding tables.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile,
+ max_fid_offset_flood_tables, 0x34, 24, 4);
+
+/* cmd_mbox_config_profile_fid_offset_flood_table_size
+ * The size (number of entries) of each FID-offset flood table.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile,
+ fid_offset_flood_table_size, 0x34, 0, 16);
+
+/* cmd_mbox_config_profile_max_fid_flood_tables
+ * Maximum number of per-FID flooding tables.
+ *
+ * Note: This flooding tables cover special FIDs only (vFIDs), starting at
+ * FID value 4K and higher.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, max_fid_flood_tables, 0x38, 24, 4);
+
+/* cmd_mbox_config_profile_fid_flood_table_size
+ * The size (number of entries) of each per-FID table.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, fid_flood_table_size, 0x38, 0, 16);
+
+/* cmd_mbox_config_profile_max_ib_mc
+ * Maximum number of multicast FDB records for InfiniBand
+ * FDB (in 512 chunks) per InfiniBand switch partition.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, max_ib_mc, 0x40, 0, 15);
+
+/* cmd_mbox_config_profile_max_pkey
+ * Maximum per port PKEY table size (for PKEY enforcement)
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, max_pkey, 0x44, 0, 15);
+
+/* cmd_mbox_config_profile_ar_sec
+ * Primary/secondary capability
+ * Describes the number of adaptive routing sub-groups
+ * 0 - disable primary/secondary (single group)
+ * 1 - enable primary/secondary (2 sub-groups)
+ * 2 - 3 sub-groups: Not supported in SwitchX, SwitchX-2
+ * 3 - 4 sub-groups: Not supported in SwitchX, SwitchX-2
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, ar_sec, 0x4C, 24, 2);
+
+/* cmd_mbox_config_profile_adaptive_routing_group_cap
+ * Adaptive Routing Group Capability. Indicates the number of AR groups
+ * supported. Note that when Primary/secondary is enabled, each
+ * primary/secondary couple consumes 2 adaptive routing entries.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, adaptive_routing_group_cap, 0x4C, 0, 16);
+
+/* cmd_mbox_config_profile_arn
+ * Adaptive Routing Notification Enable
+ * Not supported in SwitchX, SwitchX-2
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, arn, 0x50, 31, 1);
+
+/* cmd_mbox_config_kvd_linear_size
+ * KVD Linear Size
+ * Valid for Spectrum only
+ * Allowed values are 128*N where N=0 or higher
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, kvd_linear_size, 0x54, 0, 24);
+
+/* cmd_mbox_config_kvd_hash_single_size
+ * KVD Hash single-entries size
+ * Valid for Spectrum only
+ * Allowed values are 128*N where N=0 or higher
+ * Must be greater or equal to cap_min_kvd_hash_single_size
+ * Must be smaller or equal to cap_kvd_size - kvd_linear_size
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_single_size, 0x58, 0, 24);
+
+/* cmd_mbox_config_kvd_hash_double_size
+ * KVD Hash double-entries size (units of single-size entries)
+ * Valid for Spectrum only
+ * Allowed values are 128*N where N=0 or higher
+ * Must be either 0 or greater or equal to cap_min_kvd_hash_double_size
+ * Must be smaller or equal to cap_kvd_size - kvd_linear_size
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_double_size, 0x5C, 0, 24);
+
+/* cmd_mbox_config_profile_swid_config_mask
+ * Modify Switch Partition Configuration mask. When set, the configu-
+ * ration value for the Switch Partition are taken from the mailbox.
+ * When clear, the current configuration values are used.
+ * Bit 0 - set type
+ * Bit 1 - properties
+ * Other - reserved
+ */
+MLXSW_ITEM32_INDEXED(cmd_mbox, config_profile, swid_config_mask,
+ 0x60, 24, 8, 0x08, 0x00, false);
+
+/* cmd_mbox_config_profile_swid_config_type
+ * Switch Partition type.
+ * 0000 - disabled (Switch Partition does not exist)
+ * 0001 - InfiniBand
+ * 0010 - Ethernet
+ * 1000 - router port (SwitchX-2 only)
+ * Other - reserved
+ */
+MLXSW_ITEM32_INDEXED(cmd_mbox, config_profile, swid_config_type,
+ 0x60, 20, 4, 0x08, 0x00, false);
+
+/* cmd_mbox_config_profile_swid_config_properties
+ * Switch Partition properties.
+ */
+MLXSW_ITEM32_INDEXED(cmd_mbox, config_profile, swid_config_properties,
+ 0x60, 0, 8, 0x08, 0x00, false);
+
+/* cmd_mbox_config_profile_cqe_version
+ * CQE version:
+ * 0: CQE version is 0
+ * 1: CQE version is either 1 or 2
+ * CQE ver 1 or 2 is configured by Completion Queue Context field cqe_ver.
+ */
+MLXSW_ITEM32(cmd_mbox, config_profile, cqe_version, 0xB0, 0, 8);
+
+/* ACCESS_REG - Access EMAD Supported Register
+ * ----------------------------------
+ * OpMod == 0 (N/A), INMmod == 0 (N/A)
+ * -------------------------------------
+ * The ACCESS_REG command supports accessing device registers. This access
+ * is mainly used for bootstrapping.
+ */
+
+static inline int mlxsw_cmd_access_reg(struct mlxsw_core *mlxsw_core,
+ bool reset_ok,
+ char *in_mbox, char *out_mbox)
+{
+ return mlxsw_cmd_exec(mlxsw_core, MLXSW_CMD_OPCODE_ACCESS_REG,
+ 0, 0, false, reset_ok,
+ in_mbox, MLXSW_CMD_MBOX_SIZE,
+ out_mbox, MLXSW_CMD_MBOX_SIZE);
+}
+
+/* SW2HW_DQ - Software to Hardware DQ
+ * ----------------------------------
+ * OpMod == 0 (send DQ) / OpMod == 1 (receive DQ)
+ * INMmod == DQ number
+ * ----------------------------------------------
+ * The SW2HW_DQ command transitions a descriptor queue from software to
+ * hardware ownership. The command enables posting WQEs and ringing DoorBells
+ * on the descriptor queue.
+ */
+
+static inline int __mlxsw_cmd_sw2hw_dq(struct mlxsw_core *mlxsw_core,
+ char *in_mbox, u32 dq_number,
+ u8 opcode_mod)
+{
+ return mlxsw_cmd_exec_in(mlxsw_core, MLXSW_CMD_OPCODE_SW2HW_DQ,
+ opcode_mod, dq_number,
+ in_mbox, MLXSW_CMD_MBOX_SIZE);
+}
+
+enum {
+ MLXSW_CMD_OPCODE_MOD_SDQ = 0,
+ MLXSW_CMD_OPCODE_MOD_RDQ = 1,
+};
+
+static inline int mlxsw_cmd_sw2hw_sdq(struct mlxsw_core *mlxsw_core,
+ char *in_mbox, u32 dq_number)
+{
+ return __mlxsw_cmd_sw2hw_dq(mlxsw_core, in_mbox, dq_number,
+ MLXSW_CMD_OPCODE_MOD_SDQ);
+}
+
+static inline int mlxsw_cmd_sw2hw_rdq(struct mlxsw_core *mlxsw_core,
+ char *in_mbox, u32 dq_number)
+{
+ return __mlxsw_cmd_sw2hw_dq(mlxsw_core, in_mbox, dq_number,
+ MLXSW_CMD_OPCODE_MOD_RDQ);
+}
+
+/* cmd_mbox_sw2hw_dq_cq
+ * Number of the CQ that this Descriptor Queue reports completions to.
+ */
+MLXSW_ITEM32(cmd_mbox, sw2hw_dq, cq, 0x00, 24, 8);
+
+/* cmd_mbox_sw2hw_dq_sdq_tclass
+ * SDQ: CPU Egress TClass
+ * RDQ: Reserved
+ */
+MLXSW_ITEM32(cmd_mbox, sw2hw_dq, sdq_tclass, 0x00, 16, 6);
+
+/* cmd_mbox_sw2hw_dq_log2_dq_sz
+ * Log (base 2) of the Descriptor Queue size in 4KB pages.
+ */
+MLXSW_ITEM32(cmd_mbox, sw2hw_dq, log2_dq_sz, 0x00, 0, 6);
+
+/* cmd_mbox_sw2hw_dq_pa
+ * Physical Address.
+ */
+MLXSW_ITEM64_INDEXED(cmd_mbox, sw2hw_dq, pa, 0x10, 12, 52, 0x08, 0x00, true);
+
+/* HW2SW_DQ - Hardware to Software DQ
+ * ----------------------------------
+ * OpMod == 0 (send DQ) / OpMod == 1 (receive DQ)
+ * INMmod == DQ number
+ * ----------------------------------------------
+ * The HW2SW_DQ command transitions a descriptor queue from hardware to
+ * software ownership. Incoming packets on the DQ are silently discarded,
+ * SW should not post descriptors on nonoperational DQs.
+ */
+
+static inline int __mlxsw_cmd_hw2sw_dq(struct mlxsw_core *mlxsw_core,
+ u32 dq_number, u8 opcode_mod)
+{
+ return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_HW2SW_DQ,
+ opcode_mod, dq_number);
+}
+
+static inline int mlxsw_cmd_hw2sw_sdq(struct mlxsw_core *mlxsw_core,
+ u32 dq_number)
+{
+ return __mlxsw_cmd_hw2sw_dq(mlxsw_core, dq_number,
+ MLXSW_CMD_OPCODE_MOD_SDQ);
+}
+
+static inline int mlxsw_cmd_hw2sw_rdq(struct mlxsw_core *mlxsw_core,
+ u32 dq_number)
+{
+ return __mlxsw_cmd_hw2sw_dq(mlxsw_core, dq_number,
+ MLXSW_CMD_OPCODE_MOD_RDQ);
+}
+
+/* 2ERR_DQ - To Error DQ
+ * ---------------------
+ * OpMod == 0 (send DQ) / OpMod == 1 (receive DQ)
+ * INMmod == DQ number
+ * ----------------------------------------------
+ * The 2ERR_DQ command transitions the DQ into the error state from the state
+ * in which it has been. While the command is executed, some in-process
+ * descriptors may complete. Once the DQ transitions into the error state,
+ * if there are posted descriptors on the RDQ/SDQ, the hardware writes
+ * a completion with error (flushed) for all descriptors posted in the RDQ/SDQ.
+ * When the command is completed successfully, the DQ is already in
+ * the error state.
+ */
+
+static inline int __mlxsw_cmd_2err_dq(struct mlxsw_core *mlxsw_core,
+ u32 dq_number, u8 opcode_mod)
+{
+ return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_2ERR_DQ,
+ opcode_mod, dq_number);
+}
+
+static inline int mlxsw_cmd_2err_sdq(struct mlxsw_core *mlxsw_core,
+ u32 dq_number)
+{
+ return __mlxsw_cmd_2err_dq(mlxsw_core, dq_number,
+ MLXSW_CMD_OPCODE_MOD_SDQ);
+}
+
+static inline int mlxsw_cmd_2err_rdq(struct mlxsw_core *mlxsw_core,
+ u32 dq_number)
+{
+ return __mlxsw_cmd_2err_dq(mlxsw_core, dq_number,
+ MLXSW_CMD_OPCODE_MOD_RDQ);
+}
+
+/* QUERY_DQ - Query DQ
+ * ---------------------
+ * OpMod == 0 (send DQ) / OpMod == 1 (receive DQ)
+ * INMmod == DQ number
+ * ----------------------------------------------
+ * The QUERY_DQ command retrieves a snapshot of DQ parameters from the hardware.
+ *
+ * Note: Output mailbox has the same format as SW2HW_DQ.
+ */
+
+static inline int __mlxsw_cmd_query_dq(struct mlxsw_core *mlxsw_core,
+ char *out_mbox, u32 dq_number,
+ u8 opcode_mod)
+{
+ return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_2ERR_DQ,
+ opcode_mod, dq_number, false,
+ out_mbox, MLXSW_CMD_MBOX_SIZE);
+}
+
+static inline int mlxsw_cmd_query_sdq(struct mlxsw_core *mlxsw_core,
+ char *out_mbox, u32 dq_number)
+{
+ return __mlxsw_cmd_query_dq(mlxsw_core, out_mbox, dq_number,
+ MLXSW_CMD_OPCODE_MOD_SDQ);
+}
+
+static inline int mlxsw_cmd_query_rdq(struct mlxsw_core *mlxsw_core,
+ char *out_mbox, u32 dq_number)
+{
+ return __mlxsw_cmd_query_dq(mlxsw_core, out_mbox, dq_number,
+ MLXSW_CMD_OPCODE_MOD_RDQ);
+}
+
+/* SW2HW_CQ - Software to Hardware CQ
+ * ----------------------------------
+ * OpMod == 0 (N/A), INMmod == CQ number
+ * -------------------------------------
+ * The SW2HW_CQ command transfers ownership of a CQ context entry from software
+ * to hardware. The command takes the CQ context entry from the input mailbox
+ * and stores it in the CQC in the ownership of the hardware. The command fails
+ * if the requested CQC entry is already in the ownership of the hardware.
+ */
+
+static inline int mlxsw_cmd_sw2hw_cq(struct mlxsw_core *mlxsw_core,
+ char *in_mbox, u32 cq_number)
+{
+ return mlxsw_cmd_exec_in(mlxsw_core, MLXSW_CMD_OPCODE_SW2HW_CQ,
+ 0, cq_number, in_mbox, MLXSW_CMD_MBOX_SIZE);
+}
+
+enum mlxsw_cmd_mbox_sw2hw_cq_cqe_ver {
+ MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_1,
+ MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_2,
+};
+
+/* cmd_mbox_sw2hw_cq_cqe_ver
+ * CQE Version.
+ */
+MLXSW_ITEM32(cmd_mbox, sw2hw_cq, cqe_ver, 0x00, 28, 4);
+
+/* cmd_mbox_sw2hw_cq_c_eqn
+ * Event Queue this CQ reports completion events to.
+ */
+MLXSW_ITEM32(cmd_mbox, sw2hw_cq, c_eqn, 0x00, 24, 1);
+
+/* cmd_mbox_sw2hw_cq_st
+ * Event delivery state machine
+ * 0x0 - FIRED
+ * 0x1 - ARMED (Request for Notification)
+ */
+MLXSW_ITEM32(cmd_mbox, sw2hw_cq, st, 0x00, 8, 1);
+
+/* cmd_mbox_sw2hw_cq_log_cq_size
+ * Log (base 2) of the CQ size (in entries).
+ */
+MLXSW_ITEM32(cmd_mbox, sw2hw_cq, log_cq_size, 0x00, 0, 4);
+
+/* cmd_mbox_sw2hw_cq_producer_counter
+ * Producer Counter. The counter is incremented for each CQE that is
+ * written by the HW to the CQ.
+ * Maintained by HW (valid for the QUERY_CQ command only)
+ */
+MLXSW_ITEM32(cmd_mbox, sw2hw_cq, producer_counter, 0x04, 0, 16);
+
+/* cmd_mbox_sw2hw_cq_pa
+ * Physical Address.
+ */
+MLXSW_ITEM64_INDEXED(cmd_mbox, sw2hw_cq, pa, 0x10, 11, 53, 0x08, 0x00, true);
+
+/* HW2SW_CQ - Hardware to Software CQ
+ * ----------------------------------
+ * OpMod == 0 (N/A), INMmod == CQ number
+ * -------------------------------------
+ * The HW2SW_CQ command transfers ownership of a CQ context entry from hardware
+ * to software. The CQC entry is invalidated as a result of this command.
+ */
+
+static inline int mlxsw_cmd_hw2sw_cq(struct mlxsw_core *mlxsw_core,
+ u32 cq_number)
+{
+ return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_HW2SW_CQ,
+ 0, cq_number);
+}
+
+/* QUERY_CQ - Query CQ
+ * ----------------------------------
+ * OpMod == 0 (N/A), INMmod == CQ number
+ * -------------------------------------
+ * The QUERY_CQ command retrieves a snapshot of the current CQ context entry.
+ * The command stores the snapshot in the output mailbox in the software format.
+ * Note that the CQ context state and values are not affected by the QUERY_CQ
+ * command. The QUERY_CQ command is for debug purposes only.
+ *
+ * Note: Output mailbox has the same format as SW2HW_CQ.
+ */
+
+static inline int mlxsw_cmd_query_cq(struct mlxsw_core *mlxsw_core,
+ char *out_mbox, u32 cq_number)
+{
+ return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_CQ,
+ 0, cq_number, false,
+ out_mbox, MLXSW_CMD_MBOX_SIZE);
+}
+
+/* SW2HW_EQ - Software to Hardware EQ
+ * ----------------------------------
+ * OpMod == 0 (N/A), INMmod == EQ number
+ * -------------------------------------
+ * The SW2HW_EQ command transfers ownership of an EQ context entry from software
+ * to hardware. The command takes the EQ context entry from the input mailbox
+ * and stores it in the EQC in the ownership of the hardware. The command fails
+ * if the requested EQC entry is already in the ownership of the hardware.
+ */
+
+static inline int mlxsw_cmd_sw2hw_eq(struct mlxsw_core *mlxsw_core,
+ char *in_mbox, u32 eq_number)
+{
+ return mlxsw_cmd_exec_in(mlxsw_core, MLXSW_CMD_OPCODE_SW2HW_EQ,
+ 0, eq_number, in_mbox, MLXSW_CMD_MBOX_SIZE);
+}
+
+/* cmd_mbox_sw2hw_eq_int_msix
+ * When set, MSI-X cycles will be generated by this EQ.
+ * When cleared, an interrupt will be generated by this EQ.
+ */
+MLXSW_ITEM32(cmd_mbox, sw2hw_eq, int_msix, 0x00, 24, 1);
+
+/* cmd_mbox_sw2hw_eq_st
+ * Event delivery state machine
+ * 0x0 - FIRED
+ * 0x1 - ARMED (Request for Notification)
+ * 0x11 - Always ARMED
+ * other - reserved
+ */
+MLXSW_ITEM32(cmd_mbox, sw2hw_eq, st, 0x00, 8, 2);
+
+/* cmd_mbox_sw2hw_eq_log_eq_size
+ * Log (base 2) of the EQ size (in entries).
+ */
+MLXSW_ITEM32(cmd_mbox, sw2hw_eq, log_eq_size, 0x00, 0, 4);
+
+/* cmd_mbox_sw2hw_eq_producer_counter
+ * Producer Counter. The counter is incremented for each EQE that is written
+ * by the HW to the EQ.
+ * Maintained by HW (valid for the QUERY_EQ command only)
+ */
+MLXSW_ITEM32(cmd_mbox, sw2hw_eq, producer_counter, 0x04, 0, 16);
+
+/* cmd_mbox_sw2hw_eq_pa
+ * Physical Address.
+ */
+MLXSW_ITEM64_INDEXED(cmd_mbox, sw2hw_eq, pa, 0x10, 11, 53, 0x08, 0x00, true);
+
+/* HW2SW_EQ - Hardware to Software EQ
+ * ----------------------------------
+ * OpMod == 0 (N/A), INMmod == EQ number
+ * -------------------------------------
+ */
+
+static inline int mlxsw_cmd_hw2sw_eq(struct mlxsw_core *mlxsw_core,
+ u32 eq_number)
+{
+ return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_HW2SW_EQ,
+ 0, eq_number);
+}
+
+/* QUERY_EQ - Query EQ
+ * ----------------------------------
+ * OpMod == 0 (N/A), INMmod == EQ number
+ * -------------------------------------
+ *
+ * Note: Output mailbox has the same format as SW2HW_EQ.
+ */
+
+static inline int mlxsw_cmd_query_eq(struct mlxsw_core *mlxsw_core,
+ char *out_mbox, u32 eq_number)
+{
+ return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_EQ,
+ 0, eq_number, false,
+ out_mbox, MLXSW_CMD_MBOX_SIZE);
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
new file mode 100644
index 000000000..049ca4ba4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -0,0 +1,1914 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/err.h>
+#include <linux/if_link.h>
+#include <linux/netdevice.h>
+#include <linux/completion.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/random.h>
+#include <linux/jiffies.h>
+#include <linux/mutex.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <asm/byteorder.h>
+#include <net/devlink.h>
+#include <trace/events/devlink.h>
+
+#include "core.h"
+#include "item.h"
+#include "cmd.h"
+#include "port.h"
+#include "trap.h"
+#include "emad.h"
+#include "reg.h"
+#include "resources.h"
+
+static LIST_HEAD(mlxsw_core_driver_list);
+static DEFINE_SPINLOCK(mlxsw_core_driver_list_lock);
+
+static const char mlxsw_core_driver_name[] = "mlxsw_core";
+
+static struct workqueue_struct *mlxsw_wq;
+static struct workqueue_struct *mlxsw_owq;
+
+struct mlxsw_core_port {
+ struct devlink_port devlink_port;
+ void *port_driver_priv;
+ u8 local_port;
+};
+
+void *mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port)
+{
+ return mlxsw_core_port->port_driver_priv;
+}
+EXPORT_SYMBOL(mlxsw_core_port_driver_priv);
+
+static bool mlxsw_core_port_check(struct mlxsw_core_port *mlxsw_core_port)
+{
+ return mlxsw_core_port->port_driver_priv != NULL;
+}
+
+struct mlxsw_core {
+ struct mlxsw_driver *driver;
+ const struct mlxsw_bus *bus;
+ void *bus_priv;
+ const struct mlxsw_bus_info *bus_info;
+ struct workqueue_struct *emad_wq;
+ struct list_head rx_listener_list;
+ struct list_head event_listener_list;
+ struct {
+ atomic64_t tid;
+ struct list_head trans_list;
+ spinlock_t trans_list_lock; /* protects trans_list writes */
+ bool use_emad;
+ } emad;
+ struct {
+ u8 *mapping; /* lag_id+port_index to local_port mapping */
+ } lag;
+ struct mlxsw_res res;
+ struct mlxsw_hwmon *hwmon;
+ struct mlxsw_thermal *thermal;
+ struct mlxsw_core_port *ports;
+ unsigned int max_ports;
+ bool reload_fail;
+ bool fw_flash_in_progress;
+ unsigned long driver_priv[0];
+ /* driver_priv has to be always the last item */
+};
+
+#define MLXSW_PORT_MAX_PORTS_DEFAULT 0x40
+
+static int mlxsw_ports_init(struct mlxsw_core *mlxsw_core)
+{
+ /* Switch ports are numbered from 1 to queried value */
+ if (MLXSW_CORE_RES_VALID(mlxsw_core, MAX_SYSTEM_PORT))
+ mlxsw_core->max_ports = MLXSW_CORE_RES_GET(mlxsw_core,
+ MAX_SYSTEM_PORT) + 1;
+ else
+ mlxsw_core->max_ports = MLXSW_PORT_MAX_PORTS_DEFAULT + 1;
+
+ mlxsw_core->ports = kcalloc(mlxsw_core->max_ports,
+ sizeof(struct mlxsw_core_port), GFP_KERNEL);
+ if (!mlxsw_core->ports)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void mlxsw_ports_fini(struct mlxsw_core *mlxsw_core)
+{
+ kfree(mlxsw_core->ports);
+}
+
+unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core)
+{
+ return mlxsw_core->max_ports;
+}
+EXPORT_SYMBOL(mlxsw_core_max_ports);
+
+void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core)
+{
+ return mlxsw_core->driver_priv;
+}
+EXPORT_SYMBOL(mlxsw_core_driver_priv);
+
+struct mlxsw_rx_listener_item {
+ struct list_head list;
+ struct mlxsw_rx_listener rxl;
+ void *priv;
+};
+
+struct mlxsw_event_listener_item {
+ struct list_head list;
+ struct mlxsw_event_listener el;
+ void *priv;
+};
+
+/******************
+ * EMAD processing
+ ******************/
+
+/* emad_eth_hdr_dmac
+ * Destination MAC in EMAD's Ethernet header.
+ * Must be set to 01:02:c9:00:00:01
+ */
+MLXSW_ITEM_BUF(emad, eth_hdr, dmac, 0x00, 6);
+
+/* emad_eth_hdr_smac
+ * Source MAC in EMAD's Ethernet header.
+ * Must be set to 00:02:c9:01:02:03
+ */
+MLXSW_ITEM_BUF(emad, eth_hdr, smac, 0x06, 6);
+
+/* emad_eth_hdr_ethertype
+ * Ethertype in EMAD's Ethernet header.
+ * Must be set to 0x8932
+ */
+MLXSW_ITEM32(emad, eth_hdr, ethertype, 0x0C, 16, 16);
+
+/* emad_eth_hdr_mlx_proto
+ * Mellanox protocol.
+ * Must be set to 0x0.
+ */
+MLXSW_ITEM32(emad, eth_hdr, mlx_proto, 0x0C, 8, 8);
+
+/* emad_eth_hdr_ver
+ * Mellanox protocol version.
+ * Must be set to 0x0.
+ */
+MLXSW_ITEM32(emad, eth_hdr, ver, 0x0C, 4, 4);
+
+/* emad_op_tlv_type
+ * Type of the TLV.
+ * Must be set to 0x1 (operation TLV).
+ */
+MLXSW_ITEM32(emad, op_tlv, type, 0x00, 27, 5);
+
+/* emad_op_tlv_len
+ * Length of the operation TLV in u32.
+ * Must be set to 0x4.
+ */
+MLXSW_ITEM32(emad, op_tlv, len, 0x00, 16, 11);
+
+/* emad_op_tlv_dr
+ * Direct route bit. Setting to 1 indicates the EMAD is a direct route
+ * EMAD. DR TLV must follow.
+ *
+ * Note: Currently not supported and must not be set.
+ */
+MLXSW_ITEM32(emad, op_tlv, dr, 0x00, 15, 1);
+
+/* emad_op_tlv_status
+ * Returned status in case of EMAD response. Must be set to 0 in case
+ * of EMAD request.
+ * 0x0 - success
+ * 0x1 - device is busy. Requester should retry
+ * 0x2 - Mellanox protocol version not supported
+ * 0x3 - unknown TLV
+ * 0x4 - register not supported
+ * 0x5 - operation class not supported
+ * 0x6 - EMAD method not supported
+ * 0x7 - bad parameter (e.g. port out of range)
+ * 0x8 - resource not available
+ * 0x9 - message receipt acknowledgment. Requester should retry
+ * 0x70 - internal error
+ */
+MLXSW_ITEM32(emad, op_tlv, status, 0x00, 8, 7);
+
+/* emad_op_tlv_register_id
+ * Register ID of register within register TLV.
+ */
+MLXSW_ITEM32(emad, op_tlv, register_id, 0x04, 16, 16);
+
+/* emad_op_tlv_r
+ * Response bit. Setting to 1 indicates Response, otherwise request.
+ */
+MLXSW_ITEM32(emad, op_tlv, r, 0x04, 15, 1);
+
+/* emad_op_tlv_method
+ * EMAD method type.
+ * 0x1 - query
+ * 0x2 - write
+ * 0x3 - send (currently not supported)
+ * 0x4 - event
+ */
+MLXSW_ITEM32(emad, op_tlv, method, 0x04, 8, 7);
+
+/* emad_op_tlv_class
+ * EMAD operation class. Must be set to 0x1 (REG_ACCESS).
+ */
+MLXSW_ITEM32(emad, op_tlv, class, 0x04, 0, 8);
+
+/* emad_op_tlv_tid
+ * EMAD transaction ID. Used for pairing request and response EMADs.
+ */
+MLXSW_ITEM64(emad, op_tlv, tid, 0x08, 0, 64);
+
+/* emad_reg_tlv_type
+ * Type of the TLV.
+ * Must be set to 0x3 (register TLV).
+ */
+MLXSW_ITEM32(emad, reg_tlv, type, 0x00, 27, 5);
+
+/* emad_reg_tlv_len
+ * Length of the operation TLV in u32.
+ */
+MLXSW_ITEM32(emad, reg_tlv, len, 0x00, 16, 11);
+
+/* emad_end_tlv_type
+ * Type of the TLV.
+ * Must be set to 0x0 (end TLV).
+ */
+MLXSW_ITEM32(emad, end_tlv, type, 0x00, 27, 5);
+
+/* emad_end_tlv_len
+ * Length of the end TLV in u32.
+ * Must be set to 1.
+ */
+MLXSW_ITEM32(emad, end_tlv, len, 0x00, 16, 11);
+
+enum mlxsw_core_reg_access_type {
+ MLXSW_CORE_REG_ACCESS_TYPE_QUERY,
+ MLXSW_CORE_REG_ACCESS_TYPE_WRITE,
+};
+
+static inline const char *
+mlxsw_core_reg_access_type_str(enum mlxsw_core_reg_access_type type)
+{
+ switch (type) {
+ case MLXSW_CORE_REG_ACCESS_TYPE_QUERY:
+ return "query";
+ case MLXSW_CORE_REG_ACCESS_TYPE_WRITE:
+ return "write";
+ }
+ BUG();
+}
+
+static void mlxsw_emad_pack_end_tlv(char *end_tlv)
+{
+ mlxsw_emad_end_tlv_type_set(end_tlv, MLXSW_EMAD_TLV_TYPE_END);
+ mlxsw_emad_end_tlv_len_set(end_tlv, MLXSW_EMAD_END_TLV_LEN);
+}
+
+static void mlxsw_emad_pack_reg_tlv(char *reg_tlv,
+ const struct mlxsw_reg_info *reg,
+ char *payload)
+{
+ mlxsw_emad_reg_tlv_type_set(reg_tlv, MLXSW_EMAD_TLV_TYPE_REG);
+ mlxsw_emad_reg_tlv_len_set(reg_tlv, reg->len / sizeof(u32) + 1);
+ memcpy(reg_tlv + sizeof(u32), payload, reg->len);
+}
+
+static void mlxsw_emad_pack_op_tlv(char *op_tlv,
+ const struct mlxsw_reg_info *reg,
+ enum mlxsw_core_reg_access_type type,
+ u64 tid)
+{
+ mlxsw_emad_op_tlv_type_set(op_tlv, MLXSW_EMAD_TLV_TYPE_OP);
+ mlxsw_emad_op_tlv_len_set(op_tlv, MLXSW_EMAD_OP_TLV_LEN);
+ mlxsw_emad_op_tlv_dr_set(op_tlv, 0);
+ mlxsw_emad_op_tlv_status_set(op_tlv, 0);
+ mlxsw_emad_op_tlv_register_id_set(op_tlv, reg->id);
+ mlxsw_emad_op_tlv_r_set(op_tlv, MLXSW_EMAD_OP_TLV_REQUEST);
+ if (type == MLXSW_CORE_REG_ACCESS_TYPE_QUERY)
+ mlxsw_emad_op_tlv_method_set(op_tlv,
+ MLXSW_EMAD_OP_TLV_METHOD_QUERY);
+ else
+ mlxsw_emad_op_tlv_method_set(op_tlv,
+ MLXSW_EMAD_OP_TLV_METHOD_WRITE);
+ mlxsw_emad_op_tlv_class_set(op_tlv,
+ MLXSW_EMAD_OP_TLV_CLASS_REG_ACCESS);
+ mlxsw_emad_op_tlv_tid_set(op_tlv, tid);
+}
+
+static int mlxsw_emad_construct_eth_hdr(struct sk_buff *skb)
+{
+ char *eth_hdr = skb_push(skb, MLXSW_EMAD_ETH_HDR_LEN);
+
+ mlxsw_emad_eth_hdr_dmac_memcpy_to(eth_hdr, MLXSW_EMAD_EH_DMAC);
+ mlxsw_emad_eth_hdr_smac_memcpy_to(eth_hdr, MLXSW_EMAD_EH_SMAC);
+ mlxsw_emad_eth_hdr_ethertype_set(eth_hdr, MLXSW_EMAD_EH_ETHERTYPE);
+ mlxsw_emad_eth_hdr_mlx_proto_set(eth_hdr, MLXSW_EMAD_EH_MLX_PROTO);
+ mlxsw_emad_eth_hdr_ver_set(eth_hdr, MLXSW_EMAD_EH_PROTO_VERSION);
+
+ skb_reset_mac_header(skb);
+
+ return 0;
+}
+
+static void mlxsw_emad_construct(struct sk_buff *skb,
+ const struct mlxsw_reg_info *reg,
+ char *payload,
+ enum mlxsw_core_reg_access_type type,
+ u64 tid)
+{
+ char *buf;
+
+ buf = skb_push(skb, MLXSW_EMAD_END_TLV_LEN * sizeof(u32));
+ mlxsw_emad_pack_end_tlv(buf);
+
+ buf = skb_push(skb, reg->len + sizeof(u32));
+ mlxsw_emad_pack_reg_tlv(buf, reg, payload);
+
+ buf = skb_push(skb, MLXSW_EMAD_OP_TLV_LEN * sizeof(u32));
+ mlxsw_emad_pack_op_tlv(buf, reg, type, tid);
+
+ mlxsw_emad_construct_eth_hdr(skb);
+}
+
+static char *mlxsw_emad_op_tlv(const struct sk_buff *skb)
+{
+ return ((char *) (skb->data + MLXSW_EMAD_ETH_HDR_LEN));
+}
+
+static char *mlxsw_emad_reg_tlv(const struct sk_buff *skb)
+{
+ return ((char *) (skb->data + MLXSW_EMAD_ETH_HDR_LEN +
+ MLXSW_EMAD_OP_TLV_LEN * sizeof(u32)));
+}
+
+static char *mlxsw_emad_reg_payload(const char *op_tlv)
+{
+ return ((char *) (op_tlv + (MLXSW_EMAD_OP_TLV_LEN + 1) * sizeof(u32)));
+}
+
+static u64 mlxsw_emad_get_tid(const struct sk_buff *skb)
+{
+ char *op_tlv;
+
+ op_tlv = mlxsw_emad_op_tlv(skb);
+ return mlxsw_emad_op_tlv_tid_get(op_tlv);
+}
+
+static bool mlxsw_emad_is_resp(const struct sk_buff *skb)
+{
+ char *op_tlv;
+
+ op_tlv = mlxsw_emad_op_tlv(skb);
+ return (mlxsw_emad_op_tlv_r_get(op_tlv) == MLXSW_EMAD_OP_TLV_RESPONSE);
+}
+
+static int mlxsw_emad_process_status(char *op_tlv,
+ enum mlxsw_emad_op_tlv_status *p_status)
+{
+ *p_status = mlxsw_emad_op_tlv_status_get(op_tlv);
+
+ switch (*p_status) {
+ case MLXSW_EMAD_OP_TLV_STATUS_SUCCESS:
+ return 0;
+ case MLXSW_EMAD_OP_TLV_STATUS_BUSY:
+ case MLXSW_EMAD_OP_TLV_STATUS_MESSAGE_RECEIPT_ACK:
+ return -EAGAIN;
+ case MLXSW_EMAD_OP_TLV_STATUS_VERSION_NOT_SUPPORTED:
+ case MLXSW_EMAD_OP_TLV_STATUS_UNKNOWN_TLV:
+ case MLXSW_EMAD_OP_TLV_STATUS_REGISTER_NOT_SUPPORTED:
+ case MLXSW_EMAD_OP_TLV_STATUS_CLASS_NOT_SUPPORTED:
+ case MLXSW_EMAD_OP_TLV_STATUS_METHOD_NOT_SUPPORTED:
+ case MLXSW_EMAD_OP_TLV_STATUS_BAD_PARAMETER:
+ case MLXSW_EMAD_OP_TLV_STATUS_RESOURCE_NOT_AVAILABLE:
+ case MLXSW_EMAD_OP_TLV_STATUS_INTERNAL_ERROR:
+ default:
+ return -EIO;
+ }
+}
+
+static int
+mlxsw_emad_process_status_skb(struct sk_buff *skb,
+ enum mlxsw_emad_op_tlv_status *p_status)
+{
+ return mlxsw_emad_process_status(mlxsw_emad_op_tlv(skb), p_status);
+}
+
+struct mlxsw_reg_trans {
+ struct list_head list;
+ struct list_head bulk_list;
+ struct mlxsw_core *core;
+ struct sk_buff *tx_skb;
+ struct mlxsw_tx_info tx_info;
+ struct delayed_work timeout_dw;
+ unsigned int retries;
+ u64 tid;
+ struct completion completion;
+ atomic_t active;
+ mlxsw_reg_trans_cb_t *cb;
+ unsigned long cb_priv;
+ const struct mlxsw_reg_info *reg;
+ enum mlxsw_core_reg_access_type type;
+ int err;
+ enum mlxsw_emad_op_tlv_status emad_status;
+ struct rcu_head rcu;
+};
+
+#define MLXSW_EMAD_TIMEOUT_DURING_FW_FLASH_MS 3000
+#define MLXSW_EMAD_TIMEOUT_MS 200
+
+static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans)
+{
+ unsigned long timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS);
+
+ if (trans->core->fw_flash_in_progress)
+ timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_DURING_FW_FLASH_MS);
+
+ queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw,
+ timeout << trans->retries);
+}
+
+static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core,
+ struct mlxsw_reg_trans *trans)
+{
+ struct sk_buff *skb;
+ int err;
+
+ skb = skb_copy(trans->tx_skb, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), false, 0,
+ skb->data + mlxsw_core->driver->txhdr_len,
+ skb->len - mlxsw_core->driver->txhdr_len);
+
+ atomic_set(&trans->active, 1);
+ err = mlxsw_core_skb_transmit(mlxsw_core, skb, &trans->tx_info);
+ if (err) {
+ dev_kfree_skb(skb);
+ return err;
+ }
+ mlxsw_emad_trans_timeout_schedule(trans);
+ return 0;
+}
+
+static void mlxsw_emad_trans_finish(struct mlxsw_reg_trans *trans, int err)
+{
+ struct mlxsw_core *mlxsw_core = trans->core;
+
+ dev_kfree_skb(trans->tx_skb);
+ spin_lock_bh(&mlxsw_core->emad.trans_list_lock);
+ list_del_rcu(&trans->list);
+ spin_unlock_bh(&mlxsw_core->emad.trans_list_lock);
+ trans->err = err;
+ complete(&trans->completion);
+}
+
+static void mlxsw_emad_transmit_retry(struct mlxsw_core *mlxsw_core,
+ struct mlxsw_reg_trans *trans)
+{
+ int err;
+
+ if (trans->retries < MLXSW_EMAD_MAX_RETRY) {
+ trans->retries++;
+ err = mlxsw_emad_transmit(trans->core, trans);
+ if (err == 0)
+ return;
+
+ if (!atomic_dec_and_test(&trans->active))
+ return;
+ } else {
+ err = -EIO;
+ }
+ mlxsw_emad_trans_finish(trans, err);
+}
+
+static void mlxsw_emad_trans_timeout_work(struct work_struct *work)
+{
+ struct mlxsw_reg_trans *trans = container_of(work,
+ struct mlxsw_reg_trans,
+ timeout_dw.work);
+
+ if (!atomic_dec_and_test(&trans->active))
+ return;
+
+ mlxsw_emad_transmit_retry(trans->core, trans);
+}
+
+static void mlxsw_emad_process_response(struct mlxsw_core *mlxsw_core,
+ struct mlxsw_reg_trans *trans,
+ struct sk_buff *skb)
+{
+ int err;
+
+ if (!atomic_dec_and_test(&trans->active))
+ return;
+
+ err = mlxsw_emad_process_status_skb(skb, &trans->emad_status);
+ if (err == -EAGAIN) {
+ mlxsw_emad_transmit_retry(mlxsw_core, trans);
+ } else {
+ if (err == 0) {
+ char *op_tlv = mlxsw_emad_op_tlv(skb);
+
+ if (trans->cb)
+ trans->cb(mlxsw_core,
+ mlxsw_emad_reg_payload(op_tlv),
+ trans->reg->len, trans->cb_priv);
+ }
+ mlxsw_emad_trans_finish(trans, err);
+ }
+}
+
+/* called with rcu read lock held */
+static void mlxsw_emad_rx_listener_func(struct sk_buff *skb, u8 local_port,
+ void *priv)
+{
+ struct mlxsw_core *mlxsw_core = priv;
+ struct mlxsw_reg_trans *trans;
+
+ trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), true, 0,
+ skb->data, skb->len);
+
+ if (!mlxsw_emad_is_resp(skb))
+ goto free_skb;
+
+ list_for_each_entry_rcu(trans, &mlxsw_core->emad.trans_list, list) {
+ if (mlxsw_emad_get_tid(skb) == trans->tid) {
+ mlxsw_emad_process_response(mlxsw_core, trans, skb);
+ break;
+ }
+ }
+
+free_skb:
+ dev_kfree_skb(skb);
+}
+
+static const struct mlxsw_listener mlxsw_emad_rx_listener =
+ MLXSW_RXL(mlxsw_emad_rx_listener_func, ETHEMAD, TRAP_TO_CPU, false,
+ EMAD, DISCARD);
+
+static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core)
+{
+ struct workqueue_struct *emad_wq;
+ u64 tid;
+ int err;
+
+ if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX))
+ return 0;
+
+ emad_wq = alloc_workqueue("mlxsw_core_emad", 0, 0);
+ if (!emad_wq)
+ return -ENOMEM;
+ mlxsw_core->emad_wq = emad_wq;
+
+ /* Set the upper 32 bits of the transaction ID field to a random
+ * number. This allows us to discard EMADs addressed to other
+ * devices.
+ */
+ get_random_bytes(&tid, 4);
+ tid <<= 32;
+ atomic64_set(&mlxsw_core->emad.tid, tid);
+
+ INIT_LIST_HEAD(&mlxsw_core->emad.trans_list);
+ spin_lock_init(&mlxsw_core->emad.trans_list_lock);
+
+ err = mlxsw_core_trap_register(mlxsw_core, &mlxsw_emad_rx_listener,
+ mlxsw_core);
+ if (err)
+ goto err_trap_register;
+
+ err = mlxsw_core->driver->basic_trap_groups_set(mlxsw_core);
+ if (err)
+ goto err_emad_trap_set;
+ mlxsw_core->emad.use_emad = true;
+
+ return 0;
+
+err_emad_trap_set:
+ mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener,
+ mlxsw_core);
+err_trap_register:
+ destroy_workqueue(mlxsw_core->emad_wq);
+ return err;
+}
+
+static void mlxsw_emad_fini(struct mlxsw_core *mlxsw_core)
+{
+
+ if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX))
+ return;
+
+ mlxsw_core->emad.use_emad = false;
+ mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener,
+ mlxsw_core);
+ destroy_workqueue(mlxsw_core->emad_wq);
+}
+
+static struct sk_buff *mlxsw_emad_alloc(const struct mlxsw_core *mlxsw_core,
+ u16 reg_len)
+{
+ struct sk_buff *skb;
+ u16 emad_len;
+
+ emad_len = (reg_len + sizeof(u32) + MLXSW_EMAD_ETH_HDR_LEN +
+ (MLXSW_EMAD_OP_TLV_LEN + MLXSW_EMAD_END_TLV_LEN) *
+ sizeof(u32) + mlxsw_core->driver->txhdr_len);
+ if (emad_len > MLXSW_EMAD_MAX_FRAME_LEN)
+ return NULL;
+
+ skb = netdev_alloc_skb(NULL, emad_len);
+ if (!skb)
+ return NULL;
+ memset(skb->data, 0, emad_len);
+ skb_reserve(skb, emad_len);
+
+ return skb;
+}
+
+static int mlxsw_emad_reg_access(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_reg_info *reg,
+ char *payload,
+ enum mlxsw_core_reg_access_type type,
+ struct mlxsw_reg_trans *trans,
+ struct list_head *bulk_list,
+ mlxsw_reg_trans_cb_t *cb,
+ unsigned long cb_priv, u64 tid)
+{
+ struct sk_buff *skb;
+ int err;
+
+ dev_dbg(mlxsw_core->bus_info->dev, "EMAD reg access (tid=%llx,reg_id=%x(%s),type=%s)\n",
+ tid, reg->id, mlxsw_reg_id_str(reg->id),
+ mlxsw_core_reg_access_type_str(type));
+
+ skb = mlxsw_emad_alloc(mlxsw_core, reg->len);
+ if (!skb)
+ return -ENOMEM;
+
+ list_add_tail(&trans->bulk_list, bulk_list);
+ trans->core = mlxsw_core;
+ trans->tx_skb = skb;
+ trans->tx_info.local_port = MLXSW_PORT_CPU_PORT;
+ trans->tx_info.is_emad = true;
+ INIT_DELAYED_WORK(&trans->timeout_dw, mlxsw_emad_trans_timeout_work);
+ trans->tid = tid;
+ init_completion(&trans->completion);
+ trans->cb = cb;
+ trans->cb_priv = cb_priv;
+ trans->reg = reg;
+ trans->type = type;
+
+ mlxsw_emad_construct(skb, reg, payload, type, trans->tid);
+ mlxsw_core->driver->txhdr_construct(skb, &trans->tx_info);
+
+ spin_lock_bh(&mlxsw_core->emad.trans_list_lock);
+ list_add_tail_rcu(&trans->list, &mlxsw_core->emad.trans_list);
+ spin_unlock_bh(&mlxsw_core->emad.trans_list_lock);
+ err = mlxsw_emad_transmit(mlxsw_core, trans);
+ if (err)
+ goto err_out;
+ return 0;
+
+err_out:
+ spin_lock_bh(&mlxsw_core->emad.trans_list_lock);
+ list_del_rcu(&trans->list);
+ spin_unlock_bh(&mlxsw_core->emad.trans_list_lock);
+ list_del(&trans->bulk_list);
+ dev_kfree_skb(trans->tx_skb);
+ return err;
+}
+
+/*****************
+ * Core functions
+ *****************/
+
+int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver)
+{
+ spin_lock(&mlxsw_core_driver_list_lock);
+ list_add_tail(&mlxsw_driver->list, &mlxsw_core_driver_list);
+ spin_unlock(&mlxsw_core_driver_list_lock);
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_core_driver_register);
+
+void mlxsw_core_driver_unregister(struct mlxsw_driver *mlxsw_driver)
+{
+ spin_lock(&mlxsw_core_driver_list_lock);
+ list_del(&mlxsw_driver->list);
+ spin_unlock(&mlxsw_core_driver_list_lock);
+}
+EXPORT_SYMBOL(mlxsw_core_driver_unregister);
+
+static struct mlxsw_driver *__driver_find(const char *kind)
+{
+ struct mlxsw_driver *mlxsw_driver;
+
+ list_for_each_entry(mlxsw_driver, &mlxsw_core_driver_list, list) {
+ if (strcmp(mlxsw_driver->kind, kind) == 0)
+ return mlxsw_driver;
+ }
+ return NULL;
+}
+
+static struct mlxsw_driver *mlxsw_core_driver_get(const char *kind)
+{
+ struct mlxsw_driver *mlxsw_driver;
+
+ spin_lock(&mlxsw_core_driver_list_lock);
+ mlxsw_driver = __driver_find(kind);
+ spin_unlock(&mlxsw_core_driver_list_lock);
+ return mlxsw_driver;
+}
+
+static int mlxsw_devlink_port_split(struct devlink *devlink,
+ unsigned int port_index,
+ unsigned int count,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+
+ if (port_index >= mlxsw_core->max_ports) {
+ NL_SET_ERR_MSG_MOD(extack, "Port index exceeds maximum number of ports");
+ return -EINVAL;
+ }
+ if (!mlxsw_core->driver->port_split)
+ return -EOPNOTSUPP;
+ return mlxsw_core->driver->port_split(mlxsw_core, port_index, count,
+ extack);
+}
+
+static int mlxsw_devlink_port_unsplit(struct devlink *devlink,
+ unsigned int port_index,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+
+ if (port_index >= mlxsw_core->max_ports) {
+ NL_SET_ERR_MSG_MOD(extack, "Port index exceeds maximum number of ports");
+ return -EINVAL;
+ }
+ if (!mlxsw_core->driver->port_unsplit)
+ return -EOPNOTSUPP;
+ return mlxsw_core->driver->port_unsplit(mlxsw_core, port_index,
+ extack);
+}
+
+static int
+mlxsw_devlink_sb_pool_get(struct devlink *devlink,
+ unsigned int sb_index, u16 pool_index,
+ struct devlink_sb_pool_info *pool_info)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+ struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+
+ if (!mlxsw_driver->sb_pool_get)
+ return -EOPNOTSUPP;
+ return mlxsw_driver->sb_pool_get(mlxsw_core, sb_index,
+ pool_index, pool_info);
+}
+
+static int
+mlxsw_devlink_sb_pool_set(struct devlink *devlink,
+ unsigned int sb_index, u16 pool_index, u32 size,
+ enum devlink_sb_threshold_type threshold_type)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+ struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+
+ if (!mlxsw_driver->sb_pool_set)
+ return -EOPNOTSUPP;
+ return mlxsw_driver->sb_pool_set(mlxsw_core, sb_index,
+ pool_index, size, threshold_type);
+}
+
+static void *__dl_port(struct devlink_port *devlink_port)
+{
+ return container_of(devlink_port, struct mlxsw_core_port, devlink_port);
+}
+
+static int mlxsw_devlink_port_type_set(struct devlink_port *devlink_port,
+ enum devlink_port_type port_type)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
+ struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+ struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
+
+ if (!mlxsw_driver->port_type_set)
+ return -EOPNOTSUPP;
+
+ return mlxsw_driver->port_type_set(mlxsw_core,
+ mlxsw_core_port->local_port,
+ port_type);
+}
+
+static int mlxsw_devlink_sb_port_pool_get(struct devlink_port *devlink_port,
+ unsigned int sb_index, u16 pool_index,
+ u32 *p_threshold)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
+ struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+ struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
+
+ if (!mlxsw_driver->sb_port_pool_get ||
+ !mlxsw_core_port_check(mlxsw_core_port))
+ return -EOPNOTSUPP;
+ return mlxsw_driver->sb_port_pool_get(mlxsw_core_port, sb_index,
+ pool_index, p_threshold);
+}
+
+static int mlxsw_devlink_sb_port_pool_set(struct devlink_port *devlink_port,
+ unsigned int sb_index, u16 pool_index,
+ u32 threshold)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
+ struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+ struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
+
+ if (!mlxsw_driver->sb_port_pool_set ||
+ !mlxsw_core_port_check(mlxsw_core_port))
+ return -EOPNOTSUPP;
+ return mlxsw_driver->sb_port_pool_set(mlxsw_core_port, sb_index,
+ pool_index, threshold);
+}
+
+static int
+mlxsw_devlink_sb_tc_pool_bind_get(struct devlink_port *devlink_port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 *p_pool_index, u32 *p_threshold)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
+ struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+ struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
+
+ if (!mlxsw_driver->sb_tc_pool_bind_get ||
+ !mlxsw_core_port_check(mlxsw_core_port))
+ return -EOPNOTSUPP;
+ return mlxsw_driver->sb_tc_pool_bind_get(mlxsw_core_port, sb_index,
+ tc_index, pool_type,
+ p_pool_index, p_threshold);
+}
+
+static int
+mlxsw_devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 pool_index, u32 threshold)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
+ struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+ struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
+
+ if (!mlxsw_driver->sb_tc_pool_bind_set ||
+ !mlxsw_core_port_check(mlxsw_core_port))
+ return -EOPNOTSUPP;
+ return mlxsw_driver->sb_tc_pool_bind_set(mlxsw_core_port, sb_index,
+ tc_index, pool_type,
+ pool_index, threshold);
+}
+
+static int mlxsw_devlink_sb_occ_snapshot(struct devlink *devlink,
+ unsigned int sb_index)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+ struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+
+ if (!mlxsw_driver->sb_occ_snapshot)
+ return -EOPNOTSUPP;
+ return mlxsw_driver->sb_occ_snapshot(mlxsw_core, sb_index);
+}
+
+static int mlxsw_devlink_sb_occ_max_clear(struct devlink *devlink,
+ unsigned int sb_index)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+ struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+
+ if (!mlxsw_driver->sb_occ_max_clear)
+ return -EOPNOTSUPP;
+ return mlxsw_driver->sb_occ_max_clear(mlxsw_core, sb_index);
+}
+
+static int
+mlxsw_devlink_sb_occ_port_pool_get(struct devlink_port *devlink_port,
+ unsigned int sb_index, u16 pool_index,
+ u32 *p_cur, u32 *p_max)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
+ struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+ struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
+
+ if (!mlxsw_driver->sb_occ_port_pool_get ||
+ !mlxsw_core_port_check(mlxsw_core_port))
+ return -EOPNOTSUPP;
+ return mlxsw_driver->sb_occ_port_pool_get(mlxsw_core_port, sb_index,
+ pool_index, p_cur, p_max);
+}
+
+static int
+mlxsw_devlink_sb_occ_tc_port_bind_get(struct devlink_port *devlink_port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u32 *p_cur, u32 *p_max)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
+ struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+ struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
+
+ if (!mlxsw_driver->sb_occ_tc_port_bind_get ||
+ !mlxsw_core_port_check(mlxsw_core_port))
+ return -EOPNOTSUPP;
+ return mlxsw_driver->sb_occ_tc_port_bind_get(mlxsw_core_port,
+ sb_index, tc_index,
+ pool_type, p_cur, p_max);
+}
+
+static int mlxsw_devlink_core_bus_device_reload(struct devlink *devlink,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+ int err;
+
+ if (!(mlxsw_core->bus->features & MLXSW_BUS_F_RESET))
+ return -EOPNOTSUPP;
+
+ mlxsw_core_bus_device_unregister(mlxsw_core, true);
+ err = mlxsw_core_bus_device_register(mlxsw_core->bus_info,
+ mlxsw_core->bus,
+ mlxsw_core->bus_priv, true,
+ devlink);
+ mlxsw_core->reload_fail = !!err;
+
+ return err;
+}
+
+static const struct devlink_ops mlxsw_devlink_ops = {
+ .reload = mlxsw_devlink_core_bus_device_reload,
+ .port_type_set = mlxsw_devlink_port_type_set,
+ .port_split = mlxsw_devlink_port_split,
+ .port_unsplit = mlxsw_devlink_port_unsplit,
+ .sb_pool_get = mlxsw_devlink_sb_pool_get,
+ .sb_pool_set = mlxsw_devlink_sb_pool_set,
+ .sb_port_pool_get = mlxsw_devlink_sb_port_pool_get,
+ .sb_port_pool_set = mlxsw_devlink_sb_port_pool_set,
+ .sb_tc_pool_bind_get = mlxsw_devlink_sb_tc_pool_bind_get,
+ .sb_tc_pool_bind_set = mlxsw_devlink_sb_tc_pool_bind_set,
+ .sb_occ_snapshot = mlxsw_devlink_sb_occ_snapshot,
+ .sb_occ_max_clear = mlxsw_devlink_sb_occ_max_clear,
+ .sb_occ_port_pool_get = mlxsw_devlink_sb_occ_port_pool_get,
+ .sb_occ_tc_port_bind_get = mlxsw_devlink_sb_occ_tc_port_bind_get,
+};
+
+int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
+ const struct mlxsw_bus *mlxsw_bus,
+ void *bus_priv, bool reload,
+ struct devlink *devlink)
+{
+ const char *device_kind = mlxsw_bus_info->device_kind;
+ struct mlxsw_core *mlxsw_core;
+ struct mlxsw_driver *mlxsw_driver;
+ struct mlxsw_res *res;
+ size_t alloc_size;
+ int err;
+
+ mlxsw_driver = mlxsw_core_driver_get(device_kind);
+ if (!mlxsw_driver)
+ return -EINVAL;
+
+ if (!reload) {
+ alloc_size = sizeof(*mlxsw_core) + mlxsw_driver->priv_size;
+ devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size);
+ if (!devlink) {
+ err = -ENOMEM;
+ goto err_devlink_alloc;
+ }
+ }
+
+ mlxsw_core = devlink_priv(devlink);
+ INIT_LIST_HEAD(&mlxsw_core->rx_listener_list);
+ INIT_LIST_HEAD(&mlxsw_core->event_listener_list);
+ mlxsw_core->driver = mlxsw_driver;
+ mlxsw_core->bus = mlxsw_bus;
+ mlxsw_core->bus_priv = bus_priv;
+ mlxsw_core->bus_info = mlxsw_bus_info;
+
+ res = mlxsw_driver->res_query_enabled ? &mlxsw_core->res : NULL;
+ err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, res);
+ if (err)
+ goto err_bus_init;
+
+ if (mlxsw_driver->resources_register && !reload) {
+ err = mlxsw_driver->resources_register(mlxsw_core);
+ if (err)
+ goto err_register_resources;
+ }
+
+ err = mlxsw_ports_init(mlxsw_core);
+ if (err)
+ goto err_ports_init;
+
+ if (MLXSW_CORE_RES_VALID(mlxsw_core, MAX_LAG) &&
+ MLXSW_CORE_RES_VALID(mlxsw_core, MAX_LAG_MEMBERS)) {
+ alloc_size = sizeof(u8) *
+ MLXSW_CORE_RES_GET(mlxsw_core, MAX_LAG) *
+ MLXSW_CORE_RES_GET(mlxsw_core, MAX_LAG_MEMBERS);
+ mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL);
+ if (!mlxsw_core->lag.mapping) {
+ err = -ENOMEM;
+ goto err_alloc_lag_mapping;
+ }
+ }
+
+ err = mlxsw_emad_init(mlxsw_core);
+ if (err)
+ goto err_emad_init;
+
+ if (!reload) {
+ err = devlink_register(devlink, mlxsw_bus_info->dev);
+ if (err)
+ goto err_devlink_register;
+ }
+
+ err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon);
+ if (err)
+ goto err_hwmon_init;
+
+ err = mlxsw_thermal_init(mlxsw_core, mlxsw_bus_info,
+ &mlxsw_core->thermal);
+ if (err)
+ goto err_thermal_init;
+
+ if (mlxsw_driver->init) {
+ err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
+ if (err)
+ goto err_driver_init;
+ }
+
+ return 0;
+
+err_driver_init:
+ mlxsw_thermal_fini(mlxsw_core->thermal);
+err_thermal_init:
+ mlxsw_hwmon_fini(mlxsw_core->hwmon);
+err_hwmon_init:
+ if (!reload)
+ devlink_unregister(devlink);
+err_devlink_register:
+ mlxsw_emad_fini(mlxsw_core);
+err_emad_init:
+ kfree(mlxsw_core->lag.mapping);
+err_alloc_lag_mapping:
+ mlxsw_ports_fini(mlxsw_core);
+err_ports_init:
+ if (!reload)
+ devlink_resources_unregister(devlink, NULL);
+err_register_resources:
+ mlxsw_bus->fini(bus_priv);
+err_bus_init:
+ if (!reload)
+ devlink_free(devlink);
+err_devlink_alloc:
+ return err;
+}
+EXPORT_SYMBOL(mlxsw_core_bus_device_register);
+
+void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
+ bool reload)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_core);
+
+ if (mlxsw_core->reload_fail) {
+ if (!reload)
+ /* Only the parts that were not de-initialized in the
+ * failed reload attempt need to be de-initialized.
+ */
+ goto reload_fail_deinit;
+ else
+ return;
+ }
+
+ if (mlxsw_core->driver->fini)
+ mlxsw_core->driver->fini(mlxsw_core);
+ mlxsw_thermal_fini(mlxsw_core->thermal);
+ mlxsw_hwmon_fini(mlxsw_core->hwmon);
+ if (!reload)
+ devlink_unregister(devlink);
+ mlxsw_emad_fini(mlxsw_core);
+ kfree(mlxsw_core->lag.mapping);
+ mlxsw_ports_fini(mlxsw_core);
+ if (!reload)
+ devlink_resources_unregister(devlink, NULL);
+ mlxsw_core->bus->fini(mlxsw_core->bus_priv);
+ if (!reload)
+ devlink_free(devlink);
+
+ return;
+
+reload_fail_deinit:
+ devlink_unregister(devlink);
+ devlink_resources_unregister(devlink, NULL);
+ devlink_free(devlink);
+}
+EXPORT_SYMBOL(mlxsw_core_bus_device_unregister);
+
+bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_tx_info *tx_info)
+{
+ return mlxsw_core->bus->skb_transmit_busy(mlxsw_core->bus_priv,
+ tx_info);
+}
+EXPORT_SYMBOL(mlxsw_core_skb_transmit_busy);
+
+int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
+ const struct mlxsw_tx_info *tx_info)
+{
+ return mlxsw_core->bus->skb_transmit(mlxsw_core->bus_priv, skb,
+ tx_info);
+}
+EXPORT_SYMBOL(mlxsw_core_skb_transmit);
+
+static bool __is_rx_listener_equal(const struct mlxsw_rx_listener *rxl_a,
+ const struct mlxsw_rx_listener *rxl_b)
+{
+ return (rxl_a->func == rxl_b->func &&
+ rxl_a->local_port == rxl_b->local_port &&
+ rxl_a->trap_id == rxl_b->trap_id);
+}
+
+static struct mlxsw_rx_listener_item *
+__find_rx_listener_item(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_rx_listener *rxl,
+ void *priv)
+{
+ struct mlxsw_rx_listener_item *rxl_item;
+
+ list_for_each_entry(rxl_item, &mlxsw_core->rx_listener_list, list) {
+ if (__is_rx_listener_equal(&rxl_item->rxl, rxl) &&
+ rxl_item->priv == priv)
+ return rxl_item;
+ }
+ return NULL;
+}
+
+int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_rx_listener *rxl,
+ void *priv)
+{
+ struct mlxsw_rx_listener_item *rxl_item;
+
+ rxl_item = __find_rx_listener_item(mlxsw_core, rxl, priv);
+ if (rxl_item)
+ return -EEXIST;
+ rxl_item = kmalloc(sizeof(*rxl_item), GFP_KERNEL);
+ if (!rxl_item)
+ return -ENOMEM;
+ rxl_item->rxl = *rxl;
+ rxl_item->priv = priv;
+
+ list_add_rcu(&rxl_item->list, &mlxsw_core->rx_listener_list);
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_core_rx_listener_register);
+
+void mlxsw_core_rx_listener_unregister(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_rx_listener *rxl,
+ void *priv)
+{
+ struct mlxsw_rx_listener_item *rxl_item;
+
+ rxl_item = __find_rx_listener_item(mlxsw_core, rxl, priv);
+ if (!rxl_item)
+ return;
+ list_del_rcu(&rxl_item->list);
+ synchronize_rcu();
+ kfree(rxl_item);
+}
+EXPORT_SYMBOL(mlxsw_core_rx_listener_unregister);
+
+static void mlxsw_core_event_listener_func(struct sk_buff *skb, u8 local_port,
+ void *priv)
+{
+ struct mlxsw_event_listener_item *event_listener_item = priv;
+ struct mlxsw_reg_info reg;
+ char *payload;
+ char *op_tlv = mlxsw_emad_op_tlv(skb);
+ char *reg_tlv = mlxsw_emad_reg_tlv(skb);
+
+ reg.id = mlxsw_emad_op_tlv_register_id_get(op_tlv);
+ reg.len = (mlxsw_emad_reg_tlv_len_get(reg_tlv) - 1) * sizeof(u32);
+ payload = mlxsw_emad_reg_payload(op_tlv);
+ event_listener_item->el.func(&reg, payload, event_listener_item->priv);
+ dev_kfree_skb(skb);
+}
+
+static bool __is_event_listener_equal(const struct mlxsw_event_listener *el_a,
+ const struct mlxsw_event_listener *el_b)
+{
+ return (el_a->func == el_b->func &&
+ el_a->trap_id == el_b->trap_id);
+}
+
+static struct mlxsw_event_listener_item *
+__find_event_listener_item(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_event_listener *el,
+ void *priv)
+{
+ struct mlxsw_event_listener_item *el_item;
+
+ list_for_each_entry(el_item, &mlxsw_core->event_listener_list, list) {
+ if (__is_event_listener_equal(&el_item->el, el) &&
+ el_item->priv == priv)
+ return el_item;
+ }
+ return NULL;
+}
+
+int mlxsw_core_event_listener_register(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_event_listener *el,
+ void *priv)
+{
+ int err;
+ struct mlxsw_event_listener_item *el_item;
+ const struct mlxsw_rx_listener rxl = {
+ .func = mlxsw_core_event_listener_func,
+ .local_port = MLXSW_PORT_DONT_CARE,
+ .trap_id = el->trap_id,
+ };
+
+ el_item = __find_event_listener_item(mlxsw_core, el, priv);
+ if (el_item)
+ return -EEXIST;
+ el_item = kmalloc(sizeof(*el_item), GFP_KERNEL);
+ if (!el_item)
+ return -ENOMEM;
+ el_item->el = *el;
+ el_item->priv = priv;
+
+ err = mlxsw_core_rx_listener_register(mlxsw_core, &rxl, el_item);
+ if (err)
+ goto err_rx_listener_register;
+
+ /* No reason to save item if we did not manage to register an RX
+ * listener for it.
+ */
+ list_add_rcu(&el_item->list, &mlxsw_core->event_listener_list);
+
+ return 0;
+
+err_rx_listener_register:
+ kfree(el_item);
+ return err;
+}
+EXPORT_SYMBOL(mlxsw_core_event_listener_register);
+
+void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_event_listener *el,
+ void *priv)
+{
+ struct mlxsw_event_listener_item *el_item;
+ const struct mlxsw_rx_listener rxl = {
+ .func = mlxsw_core_event_listener_func,
+ .local_port = MLXSW_PORT_DONT_CARE,
+ .trap_id = el->trap_id,
+ };
+
+ el_item = __find_event_listener_item(mlxsw_core, el, priv);
+ if (!el_item)
+ return;
+ mlxsw_core_rx_listener_unregister(mlxsw_core, &rxl, el_item);
+ list_del(&el_item->list);
+ kfree(el_item);
+}
+EXPORT_SYMBOL(mlxsw_core_event_listener_unregister);
+
+static int mlxsw_core_listener_register(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_listener *listener,
+ void *priv)
+{
+ if (listener->is_event)
+ return mlxsw_core_event_listener_register(mlxsw_core,
+ &listener->u.event_listener,
+ priv);
+ else
+ return mlxsw_core_rx_listener_register(mlxsw_core,
+ &listener->u.rx_listener,
+ priv);
+}
+
+static void mlxsw_core_listener_unregister(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_listener *listener,
+ void *priv)
+{
+ if (listener->is_event)
+ mlxsw_core_event_listener_unregister(mlxsw_core,
+ &listener->u.event_listener,
+ priv);
+ else
+ mlxsw_core_rx_listener_unregister(mlxsw_core,
+ &listener->u.rx_listener,
+ priv);
+}
+
+int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_listener *listener, void *priv)
+{
+ char hpkt_pl[MLXSW_REG_HPKT_LEN];
+ int err;
+
+ err = mlxsw_core_listener_register(mlxsw_core, listener, priv);
+ if (err)
+ return err;
+
+ mlxsw_reg_hpkt_pack(hpkt_pl, listener->action, listener->trap_id,
+ listener->trap_group, listener->is_ctrl);
+ err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl);
+ if (err)
+ goto err_trap_set;
+
+ return 0;
+
+err_trap_set:
+ mlxsw_core_listener_unregister(mlxsw_core, listener, priv);
+ return err;
+}
+EXPORT_SYMBOL(mlxsw_core_trap_register);
+
+void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_listener *listener,
+ void *priv)
+{
+ char hpkt_pl[MLXSW_REG_HPKT_LEN];
+
+ if (!listener->is_event) {
+ mlxsw_reg_hpkt_pack(hpkt_pl, listener->unreg_action,
+ listener->trap_id, listener->trap_group,
+ listener->is_ctrl);
+ mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl);
+ }
+
+ mlxsw_core_listener_unregister(mlxsw_core, listener, priv);
+}
+EXPORT_SYMBOL(mlxsw_core_trap_unregister);
+
+static u64 mlxsw_core_tid_get(struct mlxsw_core *mlxsw_core)
+{
+ return atomic64_inc_return(&mlxsw_core->emad.tid);
+}
+
+static int mlxsw_core_reg_access_emad(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_reg_info *reg,
+ char *payload,
+ enum mlxsw_core_reg_access_type type,
+ struct list_head *bulk_list,
+ mlxsw_reg_trans_cb_t *cb,
+ unsigned long cb_priv)
+{
+ u64 tid = mlxsw_core_tid_get(mlxsw_core);
+ struct mlxsw_reg_trans *trans;
+ int err;
+
+ trans = kzalloc(sizeof(*trans), GFP_KERNEL);
+ if (!trans)
+ return -ENOMEM;
+
+ err = mlxsw_emad_reg_access(mlxsw_core, reg, payload, type, trans,
+ bulk_list, cb, cb_priv, tid);
+ if (err) {
+ kfree_rcu(trans, rcu);
+ return err;
+ }
+ return 0;
+}
+
+int mlxsw_reg_trans_query(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_reg_info *reg, char *payload,
+ struct list_head *bulk_list,
+ mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv)
+{
+ return mlxsw_core_reg_access_emad(mlxsw_core, reg, payload,
+ MLXSW_CORE_REG_ACCESS_TYPE_QUERY,
+ bulk_list, cb, cb_priv);
+}
+EXPORT_SYMBOL(mlxsw_reg_trans_query);
+
+int mlxsw_reg_trans_write(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_reg_info *reg, char *payload,
+ struct list_head *bulk_list,
+ mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv)
+{
+ return mlxsw_core_reg_access_emad(mlxsw_core, reg, payload,
+ MLXSW_CORE_REG_ACCESS_TYPE_WRITE,
+ bulk_list, cb, cb_priv);
+}
+EXPORT_SYMBOL(mlxsw_reg_trans_write);
+
+static int mlxsw_reg_trans_wait(struct mlxsw_reg_trans *trans)
+{
+ struct mlxsw_core *mlxsw_core = trans->core;
+ int err;
+
+ wait_for_completion(&trans->completion);
+ cancel_delayed_work_sync(&trans->timeout_dw);
+ err = trans->err;
+
+ if (trans->retries)
+ dev_warn(mlxsw_core->bus_info->dev, "EMAD retries (%d/%d) (tid=%llx)\n",
+ trans->retries, MLXSW_EMAD_MAX_RETRY, trans->tid);
+ if (err)
+ dev_err(mlxsw_core->bus_info->dev, "EMAD reg access failed (tid=%llx,reg_id=%x(%s),type=%s,status=%x(%s))\n",
+ trans->tid, trans->reg->id,
+ mlxsw_reg_id_str(trans->reg->id),
+ mlxsw_core_reg_access_type_str(trans->type),
+ trans->emad_status,
+ mlxsw_emad_op_tlv_status_str(trans->emad_status));
+
+ list_del(&trans->bulk_list);
+ kfree_rcu(trans, rcu);
+ return err;
+}
+
+int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list)
+{
+ struct mlxsw_reg_trans *trans;
+ struct mlxsw_reg_trans *tmp;
+ int sum_err = 0;
+ int err;
+
+ list_for_each_entry_safe(trans, tmp, bulk_list, bulk_list) {
+ err = mlxsw_reg_trans_wait(trans);
+ if (err && sum_err == 0)
+ sum_err = err; /* first error to be returned */
+ }
+ return sum_err;
+}
+EXPORT_SYMBOL(mlxsw_reg_trans_bulk_wait);
+
+static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_reg_info *reg,
+ char *payload,
+ enum mlxsw_core_reg_access_type type)
+{
+ enum mlxsw_emad_op_tlv_status status;
+ int err, n_retry;
+ bool reset_ok;
+ char *in_mbox, *out_mbox, *tmp;
+
+ dev_dbg(mlxsw_core->bus_info->dev, "Reg cmd access (reg_id=%x(%s),type=%s)\n",
+ reg->id, mlxsw_reg_id_str(reg->id),
+ mlxsw_core_reg_access_type_str(type));
+
+ in_mbox = mlxsw_cmd_mbox_alloc();
+ if (!in_mbox)
+ return -ENOMEM;
+
+ out_mbox = mlxsw_cmd_mbox_alloc();
+ if (!out_mbox) {
+ err = -ENOMEM;
+ goto free_in_mbox;
+ }
+
+ mlxsw_emad_pack_op_tlv(in_mbox, reg, type,
+ mlxsw_core_tid_get(mlxsw_core));
+ tmp = in_mbox + MLXSW_EMAD_OP_TLV_LEN * sizeof(u32);
+ mlxsw_emad_pack_reg_tlv(tmp, reg, payload);
+
+ /* There is a special treatment needed for MRSR (reset) register.
+ * The command interface will return error after the command
+ * is executed, so tell the lower layer to expect it
+ * and cope accordingly.
+ */
+ reset_ok = reg->id == MLXSW_REG_MRSR_ID;
+
+ n_retry = 0;
+retry:
+ err = mlxsw_cmd_access_reg(mlxsw_core, reset_ok, in_mbox, out_mbox);
+ if (!err) {
+ err = mlxsw_emad_process_status(out_mbox, &status);
+ if (err) {
+ if (err == -EAGAIN && n_retry++ < MLXSW_EMAD_MAX_RETRY)
+ goto retry;
+ dev_err(mlxsw_core->bus_info->dev, "Reg cmd access status failed (status=%x(%s))\n",
+ status, mlxsw_emad_op_tlv_status_str(status));
+ }
+ }
+
+ if (!err)
+ memcpy(payload, mlxsw_emad_reg_payload(out_mbox),
+ reg->len);
+
+ mlxsw_cmd_mbox_free(out_mbox);
+free_in_mbox:
+ mlxsw_cmd_mbox_free(in_mbox);
+ if (err)
+ dev_err(mlxsw_core->bus_info->dev, "Reg cmd access failed (reg_id=%x(%s),type=%s)\n",
+ reg->id, mlxsw_reg_id_str(reg->id),
+ mlxsw_core_reg_access_type_str(type));
+ return err;
+}
+
+static void mlxsw_core_reg_access_cb(struct mlxsw_core *mlxsw_core,
+ char *payload, size_t payload_len,
+ unsigned long cb_priv)
+{
+ char *orig_payload = (char *) cb_priv;
+
+ memcpy(orig_payload, payload, payload_len);
+}
+
+static int mlxsw_core_reg_access(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_reg_info *reg,
+ char *payload,
+ enum mlxsw_core_reg_access_type type)
+{
+ LIST_HEAD(bulk_list);
+ int err;
+
+ /* During initialization EMAD interface is not available to us,
+ * so we default to command interface. We switch to EMAD interface
+ * after setting the appropriate traps.
+ */
+ if (!mlxsw_core->emad.use_emad)
+ return mlxsw_core_reg_access_cmd(mlxsw_core, reg,
+ payload, type);
+
+ err = mlxsw_core_reg_access_emad(mlxsw_core, reg,
+ payload, type, &bulk_list,
+ mlxsw_core_reg_access_cb,
+ (unsigned long) payload);
+ if (err)
+ return err;
+ return mlxsw_reg_trans_bulk_wait(&bulk_list);
+}
+
+int mlxsw_reg_query(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_reg_info *reg, char *payload)
+{
+ return mlxsw_core_reg_access(mlxsw_core, reg, payload,
+ MLXSW_CORE_REG_ACCESS_TYPE_QUERY);
+}
+EXPORT_SYMBOL(mlxsw_reg_query);
+
+int mlxsw_reg_write(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_reg_info *reg, char *payload)
+{
+ return mlxsw_core_reg_access(mlxsw_core, reg, payload,
+ MLXSW_CORE_REG_ACCESS_TYPE_WRITE);
+}
+EXPORT_SYMBOL(mlxsw_reg_write);
+
+void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
+ struct mlxsw_rx_info *rx_info)
+{
+ struct mlxsw_rx_listener_item *rxl_item;
+ const struct mlxsw_rx_listener *rxl;
+ u8 local_port;
+ bool found = false;
+
+ if (rx_info->is_lag) {
+ dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: lag_id = %d, lag_port_index = 0x%x\n",
+ __func__, rx_info->u.lag_id,
+ rx_info->trap_id);
+ /* Upper layer does not care if the skb came from LAG or not,
+ * so just get the local_port for the lag port and push it up.
+ */
+ local_port = mlxsw_core_lag_mapping_get(mlxsw_core,
+ rx_info->u.lag_id,
+ rx_info->lag_port_index);
+ } else {
+ local_port = rx_info->u.sys_port;
+ }
+
+ dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: local_port = %d, trap_id = 0x%x\n",
+ __func__, local_port, rx_info->trap_id);
+
+ if ((rx_info->trap_id >= MLXSW_TRAP_ID_MAX) ||
+ (local_port >= mlxsw_core->max_ports))
+ goto drop;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(rxl_item, &mlxsw_core->rx_listener_list, list) {
+ rxl = &rxl_item->rxl;
+ if ((rxl->local_port == MLXSW_PORT_DONT_CARE ||
+ rxl->local_port == local_port) &&
+ rxl->trap_id == rx_info->trap_id) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ rcu_read_unlock();
+ goto drop;
+ }
+
+ rxl->func(skb, local_port, rxl_item->priv);
+ rcu_read_unlock();
+ return;
+
+drop:
+ dev_kfree_skb(skb);
+}
+EXPORT_SYMBOL(mlxsw_core_skb_receive);
+
+static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core,
+ u16 lag_id, u8 port_index)
+{
+ return MLXSW_CORE_RES_GET(mlxsw_core, MAX_LAG_MEMBERS) * lag_id +
+ port_index;
+}
+
+void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core,
+ u16 lag_id, u8 port_index, u8 local_port)
+{
+ int index = mlxsw_core_lag_mapping_index(mlxsw_core,
+ lag_id, port_index);
+
+ mlxsw_core->lag.mapping[index] = local_port;
+}
+EXPORT_SYMBOL(mlxsw_core_lag_mapping_set);
+
+u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core,
+ u16 lag_id, u8 port_index)
+{
+ int index = mlxsw_core_lag_mapping_index(mlxsw_core,
+ lag_id, port_index);
+
+ return mlxsw_core->lag.mapping[index];
+}
+EXPORT_SYMBOL(mlxsw_core_lag_mapping_get);
+
+void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core,
+ u16 lag_id, u8 local_port)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_core, MAX_LAG_MEMBERS); i++) {
+ int index = mlxsw_core_lag_mapping_index(mlxsw_core,
+ lag_id, i);
+
+ if (mlxsw_core->lag.mapping[index] == local_port)
+ mlxsw_core->lag.mapping[index] = 0;
+ }
+}
+EXPORT_SYMBOL(mlxsw_core_lag_mapping_clear);
+
+bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core,
+ enum mlxsw_res_id res_id)
+{
+ return mlxsw_res_valid(&mlxsw_core->res, res_id);
+}
+EXPORT_SYMBOL(mlxsw_core_res_valid);
+
+u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core,
+ enum mlxsw_res_id res_id)
+{
+ return mlxsw_res_get(&mlxsw_core->res, res_id);
+}
+EXPORT_SYMBOL(mlxsw_core_res_get);
+
+int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_core);
+ struct mlxsw_core_port *mlxsw_core_port =
+ &mlxsw_core->ports[local_port];
+ struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+ int err;
+
+ mlxsw_core_port->local_port = local_port;
+ err = devlink_port_register(devlink, devlink_port, local_port);
+ if (err)
+ memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port));
+ return err;
+}
+EXPORT_SYMBOL(mlxsw_core_port_init);
+
+void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port)
+{
+ struct mlxsw_core_port *mlxsw_core_port =
+ &mlxsw_core->ports[local_port];
+ struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+
+ devlink_port_unregister(devlink_port);
+ memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port));
+}
+EXPORT_SYMBOL(mlxsw_core_port_fini);
+
+void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port,
+ void *port_driver_priv, struct net_device *dev,
+ u32 port_number, bool split,
+ u32 split_port_subnumber)
+{
+ struct mlxsw_core_port *mlxsw_core_port =
+ &mlxsw_core->ports[local_port];
+ struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+
+ mlxsw_core_port->port_driver_priv = port_driver_priv;
+ devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL,
+ port_number, split, split_port_subnumber);
+ devlink_port_type_eth_set(devlink_port, dev);
+}
+EXPORT_SYMBOL(mlxsw_core_port_eth_set);
+
+void mlxsw_core_port_ib_set(struct mlxsw_core *mlxsw_core, u8 local_port,
+ void *port_driver_priv)
+{
+ struct mlxsw_core_port *mlxsw_core_port =
+ &mlxsw_core->ports[local_port];
+ struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+
+ mlxsw_core_port->port_driver_priv = port_driver_priv;
+ devlink_port_type_ib_set(devlink_port, NULL);
+}
+EXPORT_SYMBOL(mlxsw_core_port_ib_set);
+
+void mlxsw_core_port_clear(struct mlxsw_core *mlxsw_core, u8 local_port,
+ void *port_driver_priv)
+{
+ struct mlxsw_core_port *mlxsw_core_port =
+ &mlxsw_core->ports[local_port];
+ struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+
+ mlxsw_core_port->port_driver_priv = port_driver_priv;
+ devlink_port_type_clear(devlink_port);
+}
+EXPORT_SYMBOL(mlxsw_core_port_clear);
+
+enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core,
+ u8 local_port)
+{
+ struct mlxsw_core_port *mlxsw_core_port =
+ &mlxsw_core->ports[local_port];
+ struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+
+ return devlink_port->type;
+}
+EXPORT_SYMBOL(mlxsw_core_port_type_get);
+
+int mlxsw_core_port_get_phys_port_name(struct mlxsw_core *mlxsw_core,
+ u8 local_port, char *name, size_t len)
+{
+ struct mlxsw_core_port *mlxsw_core_port =
+ &mlxsw_core->ports[local_port];
+ struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+
+ return devlink_port_get_phys_port_name(devlink_port, name, len);
+}
+EXPORT_SYMBOL(mlxsw_core_port_get_phys_port_name);
+
+static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core,
+ const char *buf, size_t size)
+{
+ __be32 *m = (__be32 *) buf;
+ int i;
+ int count = size / sizeof(__be32);
+
+ for (i = count - 1; i >= 0; i--)
+ if (m[i])
+ break;
+ i++;
+ count = i ? i : 1;
+ for (i = 0; i < count; i += 4)
+ dev_dbg(mlxsw_core->bus_info->dev, "%04x - %08x %08x %08x %08x\n",
+ i * 4, be32_to_cpu(m[i]), be32_to_cpu(m[i + 1]),
+ be32_to_cpu(m[i + 2]), be32_to_cpu(m[i + 3]));
+}
+
+int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod,
+ u32 in_mod, bool out_mbox_direct, bool reset_ok,
+ char *in_mbox, size_t in_mbox_size,
+ char *out_mbox, size_t out_mbox_size)
+{
+ u8 status;
+ int err;
+
+ BUG_ON(in_mbox_size % sizeof(u32) || out_mbox_size % sizeof(u32));
+ if (!mlxsw_core->bus->cmd_exec)
+ return -EOPNOTSUPP;
+
+ dev_dbg(mlxsw_core->bus_info->dev, "Cmd exec (opcode=%x(%s),opcode_mod=%x,in_mod=%x)\n",
+ opcode, mlxsw_cmd_opcode_str(opcode), opcode_mod, in_mod);
+ if (in_mbox) {
+ dev_dbg(mlxsw_core->bus_info->dev, "Input mailbox:\n");
+ mlxsw_core_buf_dump_dbg(mlxsw_core, in_mbox, in_mbox_size);
+ }
+
+ err = mlxsw_core->bus->cmd_exec(mlxsw_core->bus_priv, opcode,
+ opcode_mod, in_mod, out_mbox_direct,
+ in_mbox, in_mbox_size,
+ out_mbox, out_mbox_size, &status);
+
+ if (!err && out_mbox) {
+ dev_dbg(mlxsw_core->bus_info->dev, "Output mailbox:\n");
+ mlxsw_core_buf_dump_dbg(mlxsw_core, out_mbox, out_mbox_size);
+ }
+
+ if (reset_ok && err == -EIO &&
+ status == MLXSW_CMD_STATUS_RUNNING_RESET) {
+ err = 0;
+ } else if (err == -EIO && status != MLXSW_CMD_STATUS_OK) {
+ dev_err(mlxsw_core->bus_info->dev, "Cmd exec failed (opcode=%x(%s),opcode_mod=%x,in_mod=%x,status=%x(%s))\n",
+ opcode, mlxsw_cmd_opcode_str(opcode), opcode_mod,
+ in_mod, status, mlxsw_cmd_status_str(status));
+ } else if (err == -ETIMEDOUT) {
+ dev_err(mlxsw_core->bus_info->dev, "Cmd exec timed-out (opcode=%x(%s),opcode_mod=%x,in_mod=%x)\n",
+ opcode, mlxsw_cmd_opcode_str(opcode), opcode_mod,
+ in_mod);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(mlxsw_cmd_exec);
+
+int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay)
+{
+ return queue_delayed_work(mlxsw_wq, dwork, delay);
+}
+EXPORT_SYMBOL(mlxsw_core_schedule_dw);
+
+bool mlxsw_core_schedule_work(struct work_struct *work)
+{
+ return queue_work(mlxsw_owq, work);
+}
+EXPORT_SYMBOL(mlxsw_core_schedule_work);
+
+void mlxsw_core_flush_owq(void)
+{
+ flush_workqueue(mlxsw_owq);
+}
+EXPORT_SYMBOL(mlxsw_core_flush_owq);
+
+int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_config_profile *profile,
+ u64 *p_single_size, u64 *p_double_size,
+ u64 *p_linear_size)
+{
+ struct mlxsw_driver *driver = mlxsw_core->driver;
+
+ if (!driver->kvd_sizes_get)
+ return -EINVAL;
+
+ return driver->kvd_sizes_get(mlxsw_core, profile,
+ p_single_size, p_double_size,
+ p_linear_size);
+}
+EXPORT_SYMBOL(mlxsw_core_kvd_sizes_get);
+
+void mlxsw_core_fw_flash_start(struct mlxsw_core *mlxsw_core)
+{
+ mlxsw_core->fw_flash_in_progress = true;
+}
+EXPORT_SYMBOL(mlxsw_core_fw_flash_start);
+
+void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core)
+{
+ mlxsw_core->fw_flash_in_progress = false;
+}
+EXPORT_SYMBOL(mlxsw_core_fw_flash_end);
+
+static int __init mlxsw_core_module_init(void)
+{
+ int err;
+
+ mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0);
+ if (!mlxsw_wq)
+ return -ENOMEM;
+ mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0,
+ mlxsw_core_driver_name);
+ if (!mlxsw_owq) {
+ err = -ENOMEM;
+ goto err_alloc_ordered_workqueue;
+ }
+ return 0;
+
+err_alloc_ordered_workqueue:
+ destroy_workqueue(mlxsw_wq);
+ return err;
+}
+
+static void __exit mlxsw_core_module_exit(void)
+{
+ destroy_workqueue(mlxsw_owq);
+ destroy_workqueue(mlxsw_wq);
+}
+
+module_init(mlxsw_core_module_init);
+module_exit(mlxsw_core_module_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox switch device core driver");
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
new file mode 100644
index 000000000..c4e497176
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -0,0 +1,395 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_CORE_H
+#define _MLXSW_CORE_H
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/gfp.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/workqueue.h>
+#include <net/devlink.h>
+
+#include "trap.h"
+#include "reg.h"
+#include "cmd.h"
+#include "resources.h"
+
+struct mlxsw_core;
+struct mlxsw_core_port;
+struct mlxsw_driver;
+struct mlxsw_bus;
+struct mlxsw_bus_info;
+
+unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core);
+
+void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core);
+
+int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver);
+void mlxsw_core_driver_unregister(struct mlxsw_driver *mlxsw_driver);
+
+int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
+ const struct mlxsw_bus *mlxsw_bus,
+ void *bus_priv, bool reload,
+ struct devlink *devlink);
+void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, bool reload);
+
+struct mlxsw_tx_info {
+ u8 local_port;
+ bool is_emad;
+};
+
+bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_tx_info *tx_info);
+int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
+ const struct mlxsw_tx_info *tx_info);
+
+struct mlxsw_rx_listener {
+ void (*func)(struct sk_buff *skb, u8 local_port, void *priv);
+ u8 local_port;
+ u16 trap_id;
+ enum mlxsw_reg_hpkt_action action;
+};
+
+struct mlxsw_event_listener {
+ void (*func)(const struct mlxsw_reg_info *reg,
+ char *payload, void *priv);
+ enum mlxsw_event_trap_id trap_id;
+};
+
+struct mlxsw_listener {
+ u16 trap_id;
+ union {
+ struct mlxsw_rx_listener rx_listener;
+ struct mlxsw_event_listener event_listener;
+ } u;
+ enum mlxsw_reg_hpkt_action action;
+ enum mlxsw_reg_hpkt_action unreg_action;
+ u8 trap_group;
+ bool is_ctrl; /* should go via control buffer or not */
+ bool is_event;
+};
+
+#define MLXSW_RXL(_func, _trap_id, _action, _is_ctrl, _trap_group, \
+ _unreg_action) \
+ { \
+ .trap_id = MLXSW_TRAP_ID_##_trap_id, \
+ .u.rx_listener = \
+ { \
+ .func = _func, \
+ .local_port = MLXSW_PORT_DONT_CARE, \
+ .trap_id = MLXSW_TRAP_ID_##_trap_id, \
+ }, \
+ .action = MLXSW_REG_HPKT_ACTION_##_action, \
+ .unreg_action = MLXSW_REG_HPKT_ACTION_##_unreg_action, \
+ .trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_trap_group, \
+ .is_ctrl = _is_ctrl, \
+ .is_event = false, \
+ }
+
+#define MLXSW_EVENTL(_func, _trap_id, _trap_group) \
+ { \
+ .trap_id = MLXSW_TRAP_ID_##_trap_id, \
+ .u.event_listener = \
+ { \
+ .func = _func, \
+ .trap_id = MLXSW_TRAP_ID_##_trap_id, \
+ }, \
+ .action = MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU, \
+ .trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_trap_group, \
+ .is_ctrl = false, \
+ .is_event = true, \
+ }
+
+int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_rx_listener *rxl,
+ void *priv);
+void mlxsw_core_rx_listener_unregister(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_rx_listener *rxl,
+ void *priv);
+
+int mlxsw_core_event_listener_register(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_event_listener *el,
+ void *priv);
+void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_event_listener *el,
+ void *priv);
+
+int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_listener *listener,
+ void *priv);
+void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_listener *listener,
+ void *priv);
+
+typedef void mlxsw_reg_trans_cb_t(struct mlxsw_core *mlxsw_core, char *payload,
+ size_t payload_len, unsigned long cb_priv);
+
+int mlxsw_reg_trans_query(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_reg_info *reg, char *payload,
+ struct list_head *bulk_list,
+ mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv);
+int mlxsw_reg_trans_write(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_reg_info *reg, char *payload,
+ struct list_head *bulk_list,
+ mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv);
+int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list);
+
+int mlxsw_reg_query(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_reg_info *reg, char *payload);
+int mlxsw_reg_write(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_reg_info *reg, char *payload);
+
+struct mlxsw_rx_info {
+ bool is_lag;
+ union {
+ u16 sys_port;
+ u16 lag_id;
+ } u;
+ u8 lag_port_index;
+ int trap_id;
+};
+
+void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
+ struct mlxsw_rx_info *rx_info);
+
+void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core,
+ u16 lag_id, u8 port_index, u8 local_port);
+u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core,
+ u16 lag_id, u8 port_index);
+void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core,
+ u16 lag_id, u8 local_port);
+
+void *mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port);
+int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port);
+void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port);
+void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port,
+ void *port_driver_priv, struct net_device *dev,
+ u32 port_number, bool split,
+ u32 split_port_subnumber);
+void mlxsw_core_port_ib_set(struct mlxsw_core *mlxsw_core, u8 local_port,
+ void *port_driver_priv);
+void mlxsw_core_port_clear(struct mlxsw_core *mlxsw_core, u8 local_port,
+ void *port_driver_priv);
+enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core,
+ u8 local_port);
+int mlxsw_core_port_get_phys_port_name(struct mlxsw_core *mlxsw_core,
+ u8 local_port, char *name, size_t len);
+
+int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay);
+bool mlxsw_core_schedule_work(struct work_struct *work);
+void mlxsw_core_flush_owq(void);
+
+#define MLXSW_CONFIG_PROFILE_SWID_COUNT 8
+
+struct mlxsw_swid_config {
+ u8 used_type:1,
+ used_properties:1;
+ u8 type;
+ u8 properties;
+};
+
+struct mlxsw_config_profile {
+ u16 used_max_vepa_channels:1,
+ used_max_mid:1,
+ used_max_pgt:1,
+ used_max_system_port:1,
+ used_max_vlan_groups:1,
+ used_max_regions:1,
+ used_flood_tables:1,
+ used_flood_mode:1,
+ used_max_ib_mc:1,
+ used_max_pkey:1,
+ used_ar_sec:1,
+ used_adaptive_routing_group_cap:1,
+ used_kvd_sizes:1;
+ u8 max_vepa_channels;
+ u16 max_mid;
+ u16 max_pgt;
+ u16 max_system_port;
+ u16 max_vlan_groups;
+ u16 max_regions;
+ u8 max_flood_tables;
+ u8 max_vid_flood_tables;
+ u8 flood_mode;
+ u8 max_fid_offset_flood_tables;
+ u16 fid_offset_flood_table_size;
+ u8 max_fid_flood_tables;
+ u16 fid_flood_table_size;
+ u16 max_ib_mc;
+ u16 max_pkey;
+ u8 ar_sec;
+ u16 adaptive_routing_group_cap;
+ u8 arn;
+ u32 kvd_linear_size;
+ u8 kvd_hash_single_parts;
+ u8 kvd_hash_double_parts;
+ struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT];
+};
+
+struct mlxsw_driver {
+ struct list_head list;
+ const char *kind;
+ size_t priv_size;
+ int (*init)(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_bus_info *mlxsw_bus_info);
+ void (*fini)(struct mlxsw_core *mlxsw_core);
+ int (*basic_trap_groups_set)(struct mlxsw_core *mlxsw_core);
+ int (*port_type_set)(struct mlxsw_core *mlxsw_core, u8 local_port,
+ enum devlink_port_type new_type);
+ int (*port_split)(struct mlxsw_core *mlxsw_core, u8 local_port,
+ unsigned int count, struct netlink_ext_ack *extack);
+ int (*port_unsplit)(struct mlxsw_core *mlxsw_core, u8 local_port,
+ struct netlink_ext_ack *extack);
+ int (*sb_pool_get)(struct mlxsw_core *mlxsw_core,
+ unsigned int sb_index, u16 pool_index,
+ struct devlink_sb_pool_info *pool_info);
+ int (*sb_pool_set)(struct mlxsw_core *mlxsw_core,
+ unsigned int sb_index, u16 pool_index, u32 size,
+ enum devlink_sb_threshold_type threshold_type);
+ int (*sb_port_pool_get)(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 pool_index,
+ u32 *p_threshold);
+ int (*sb_port_pool_set)(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 pool_index,
+ u32 threshold);
+ int (*sb_tc_pool_bind_get)(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 *p_pool_index, u32 *p_threshold);
+ int (*sb_tc_pool_bind_set)(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 pool_index, u32 threshold);
+ int (*sb_occ_snapshot)(struct mlxsw_core *mlxsw_core,
+ unsigned int sb_index);
+ int (*sb_occ_max_clear)(struct mlxsw_core *mlxsw_core,
+ unsigned int sb_index);
+ int (*sb_occ_port_pool_get)(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 pool_index,
+ u32 *p_cur, u32 *p_max);
+ int (*sb_occ_tc_port_bind_get)(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u32 *p_cur, u32 *p_max);
+ void (*txhdr_construct)(struct sk_buff *skb,
+ const struct mlxsw_tx_info *tx_info);
+ int (*resources_register)(struct mlxsw_core *mlxsw_core);
+ int (*kvd_sizes_get)(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_config_profile *profile,
+ u64 *p_single_size, u64 *p_double_size,
+ u64 *p_linear_size);
+ u8 txhdr_len;
+ const struct mlxsw_config_profile *profile;
+ bool res_query_enabled;
+};
+
+int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_config_profile *profile,
+ u64 *p_single_size, u64 *p_double_size,
+ u64 *p_linear_size);
+
+void mlxsw_core_fw_flash_start(struct mlxsw_core *mlxsw_core);
+void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core);
+
+bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core,
+ enum mlxsw_res_id res_id);
+
+#define MLXSW_CORE_RES_VALID(mlxsw_core, short_res_id) \
+ mlxsw_core_res_valid(mlxsw_core, MLXSW_RES_ID_##short_res_id)
+
+u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core,
+ enum mlxsw_res_id res_id);
+
+#define MLXSW_CORE_RES_GET(mlxsw_core, short_res_id) \
+ mlxsw_core_res_get(mlxsw_core, MLXSW_RES_ID_##short_res_id)
+
+#define MLXSW_BUS_F_TXRX BIT(0)
+#define MLXSW_BUS_F_RESET BIT(1)
+
+struct mlxsw_bus {
+ const char *kind;
+ int (*init)(void *bus_priv, struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_config_profile *profile,
+ struct mlxsw_res *res);
+ void (*fini)(void *bus_priv);
+ bool (*skb_transmit_busy)(void *bus_priv,
+ const struct mlxsw_tx_info *tx_info);
+ int (*skb_transmit)(void *bus_priv, struct sk_buff *skb,
+ const struct mlxsw_tx_info *tx_info);
+ int (*cmd_exec)(void *bus_priv, u16 opcode, u8 opcode_mod,
+ u32 in_mod, bool out_mbox_direct,
+ char *in_mbox, size_t in_mbox_size,
+ char *out_mbox, size_t out_mbox_size,
+ u8 *p_status);
+ u8 features;
+};
+
+struct mlxsw_fw_rev {
+ u16 major;
+ u16 minor;
+ u16 subminor;
+ u16 can_reset_minor;
+};
+
+struct mlxsw_bus_info {
+ const char *device_kind;
+ const char *device_name;
+ struct device *dev;
+ struct mlxsw_fw_rev fw_rev;
+ u8 vsd[MLXSW_CMD_BOARDINFO_VSD_LEN];
+ u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN];
+};
+
+struct mlxsw_hwmon;
+
+#ifdef CONFIG_MLXSW_CORE_HWMON
+
+int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_bus_info *mlxsw_bus_info,
+ struct mlxsw_hwmon **p_hwmon);
+void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon);
+
+#else
+
+static inline int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_bus_info *mlxsw_bus_info,
+ struct mlxsw_hwmon **p_hwmon)
+{
+ return 0;
+}
+
+static inline void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+}
+
+#endif
+
+struct mlxsw_thermal;
+
+#ifdef CONFIG_MLXSW_CORE_THERMAL
+
+int mlxsw_thermal_init(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_bus_info *mlxsw_bus_info,
+ struct mlxsw_thermal **p_thermal);
+void mlxsw_thermal_fini(struct mlxsw_thermal *thermal);
+
+#else
+
+static inline int mlxsw_thermal_init(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_bus_info *mlxsw_bus_info,
+ struct mlxsw_thermal **p_thermal)
+{
+ return 0;
+}
+
+static inline void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
+{
+}
+
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
new file mode 100644
index 000000000..2cbfa5cfe
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
@@ -0,0 +1,1233 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/rhashtable.h>
+#include <linux/list.h>
+
+#include "item.h"
+#include "trap.h"
+#include "core_acl_flex_actions.h"
+
+enum mlxsw_afa_set_type {
+ MLXSW_AFA_SET_TYPE_NEXT,
+ MLXSW_AFA_SET_TYPE_GOTO,
+};
+
+/* afa_set_type
+ * Type of the record at the end of the action set.
+ */
+MLXSW_ITEM32(afa, set, type, 0xA0, 28, 4);
+
+/* afa_set_next_action_set_ptr
+ * A pointer to the next action set in the KVD Centralized database.
+ */
+MLXSW_ITEM32(afa, set, next_action_set_ptr, 0xA4, 0, 24);
+
+/* afa_set_goto_g
+ * group - When set, the binding is of an ACL group. When cleared,
+ * the binding is of an ACL.
+ * Must be set to 1 for Spectrum.
+ */
+MLXSW_ITEM32(afa, set, goto_g, 0xA4, 29, 1);
+
+enum mlxsw_afa_set_goto_binding_cmd {
+ /* continue go the next binding point */
+ MLXSW_AFA_SET_GOTO_BINDING_CMD_NONE,
+ /* jump to the next binding point no return */
+ MLXSW_AFA_SET_GOTO_BINDING_CMD_JUMP,
+ /* terminate the acl binding */
+ MLXSW_AFA_SET_GOTO_BINDING_CMD_TERM = 4,
+};
+
+/* afa_set_goto_binding_cmd */
+MLXSW_ITEM32(afa, set, goto_binding_cmd, 0xA4, 24, 3);
+
+/* afa_set_goto_next_binding
+ * ACL/ACL group identifier. If the g bit is set, this field should hold
+ * the acl_group_id, else it should hold the acl_id.
+ */
+MLXSW_ITEM32(afa, set, goto_next_binding, 0xA4, 0, 16);
+
+/* afa_all_action_type
+ * Action Type.
+ */
+MLXSW_ITEM32(afa, all, action_type, 0x00, 24, 6);
+
+struct mlxsw_afa {
+ unsigned int max_acts_per_set;
+ const struct mlxsw_afa_ops *ops;
+ void *ops_priv;
+ struct rhashtable set_ht;
+ struct rhashtable fwd_entry_ht;
+};
+
+#define MLXSW_AFA_SET_LEN 0xA8
+
+struct mlxsw_afa_set_ht_key {
+ char enc_actions[MLXSW_AFA_SET_LEN]; /* Encoded set */
+ bool is_first;
+};
+
+/* Set structure holds one action set record. It contains up to three
+ * actions (depends on size of particular actions). The set is either
+ * put directly to a rule, or it is stored in KVD linear area.
+ * To prevent duplicate entries in KVD linear area, a hashtable is
+ * used to track sets that were previously inserted and may be shared.
+ */
+
+struct mlxsw_afa_set {
+ struct rhash_head ht_node;
+ struct mlxsw_afa_set_ht_key ht_key;
+ u32 kvdl_index;
+ bool shared; /* Inserted in hashtable (doesn't mean that
+ * kvdl_index is valid).
+ */
+ unsigned int ref_count;
+ struct mlxsw_afa_set *next; /* Pointer to the next set. */
+ struct mlxsw_afa_set *prev; /* Pointer to the previous set,
+ * note that set may have multiple
+ * sets from multiple blocks
+ * pointing at it. This is only
+ * usable until commit.
+ */
+};
+
+static const struct rhashtable_params mlxsw_afa_set_ht_params = {
+ .key_len = sizeof(struct mlxsw_afa_set_ht_key),
+ .key_offset = offsetof(struct mlxsw_afa_set, ht_key),
+ .head_offset = offsetof(struct mlxsw_afa_set, ht_node),
+ .automatic_shrinking = true,
+};
+
+struct mlxsw_afa_fwd_entry_ht_key {
+ u8 local_port;
+};
+
+struct mlxsw_afa_fwd_entry {
+ struct rhash_head ht_node;
+ struct mlxsw_afa_fwd_entry_ht_key ht_key;
+ u32 kvdl_index;
+ unsigned int ref_count;
+};
+
+static const struct rhashtable_params mlxsw_afa_fwd_entry_ht_params = {
+ .key_len = sizeof(struct mlxsw_afa_fwd_entry_ht_key),
+ .key_offset = offsetof(struct mlxsw_afa_fwd_entry, ht_key),
+ .head_offset = offsetof(struct mlxsw_afa_fwd_entry, ht_node),
+ .automatic_shrinking = true,
+};
+
+struct mlxsw_afa *mlxsw_afa_create(unsigned int max_acts_per_set,
+ const struct mlxsw_afa_ops *ops,
+ void *ops_priv)
+{
+ struct mlxsw_afa *mlxsw_afa;
+ int err;
+
+ mlxsw_afa = kzalloc(sizeof(*mlxsw_afa), GFP_KERNEL);
+ if (!mlxsw_afa)
+ return ERR_PTR(-ENOMEM);
+ err = rhashtable_init(&mlxsw_afa->set_ht, &mlxsw_afa_set_ht_params);
+ if (err)
+ goto err_set_rhashtable_init;
+ err = rhashtable_init(&mlxsw_afa->fwd_entry_ht,
+ &mlxsw_afa_fwd_entry_ht_params);
+ if (err)
+ goto err_fwd_entry_rhashtable_init;
+ mlxsw_afa->max_acts_per_set = max_acts_per_set;
+ mlxsw_afa->ops = ops;
+ mlxsw_afa->ops_priv = ops_priv;
+ return mlxsw_afa;
+
+err_fwd_entry_rhashtable_init:
+ rhashtable_destroy(&mlxsw_afa->set_ht);
+err_set_rhashtable_init:
+ kfree(mlxsw_afa);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(mlxsw_afa_create);
+
+void mlxsw_afa_destroy(struct mlxsw_afa *mlxsw_afa)
+{
+ rhashtable_destroy(&mlxsw_afa->fwd_entry_ht);
+ rhashtable_destroy(&mlxsw_afa->set_ht);
+ kfree(mlxsw_afa);
+}
+EXPORT_SYMBOL(mlxsw_afa_destroy);
+
+static void mlxsw_afa_set_goto_set(struct mlxsw_afa_set *set,
+ enum mlxsw_afa_set_goto_binding_cmd cmd,
+ u16 group_id)
+{
+ char *actions = set->ht_key.enc_actions;
+
+ mlxsw_afa_set_type_set(actions, MLXSW_AFA_SET_TYPE_GOTO);
+ mlxsw_afa_set_goto_g_set(actions, true);
+ mlxsw_afa_set_goto_binding_cmd_set(actions, cmd);
+ mlxsw_afa_set_goto_next_binding_set(actions, group_id);
+}
+
+static void mlxsw_afa_set_next_set(struct mlxsw_afa_set *set,
+ u32 next_set_kvdl_index)
+{
+ char *actions = set->ht_key.enc_actions;
+
+ mlxsw_afa_set_type_set(actions, MLXSW_AFA_SET_TYPE_NEXT);
+ mlxsw_afa_set_next_action_set_ptr_set(actions, next_set_kvdl_index);
+}
+
+static struct mlxsw_afa_set *mlxsw_afa_set_create(bool is_first)
+{
+ struct mlxsw_afa_set *set;
+
+ set = kzalloc(sizeof(*set), GFP_KERNEL);
+ if (!set)
+ return NULL;
+ /* Need to initialize the set to pass by default */
+ mlxsw_afa_set_goto_set(set, MLXSW_AFA_SET_GOTO_BINDING_CMD_TERM, 0);
+ set->ht_key.is_first = is_first;
+ set->ref_count = 1;
+ return set;
+}
+
+static void mlxsw_afa_set_destroy(struct mlxsw_afa_set *set)
+{
+ kfree(set);
+}
+
+static int mlxsw_afa_set_share(struct mlxsw_afa *mlxsw_afa,
+ struct mlxsw_afa_set *set)
+{
+ int err;
+
+ err = rhashtable_insert_fast(&mlxsw_afa->set_ht, &set->ht_node,
+ mlxsw_afa_set_ht_params);
+ if (err)
+ return err;
+ err = mlxsw_afa->ops->kvdl_set_add(mlxsw_afa->ops_priv,
+ &set->kvdl_index,
+ set->ht_key.enc_actions,
+ set->ht_key.is_first);
+ if (err)
+ goto err_kvdl_set_add;
+ set->shared = true;
+ set->prev = NULL;
+ return 0;
+
+err_kvdl_set_add:
+ rhashtable_remove_fast(&mlxsw_afa->set_ht, &set->ht_node,
+ mlxsw_afa_set_ht_params);
+ return err;
+}
+
+static void mlxsw_afa_set_unshare(struct mlxsw_afa *mlxsw_afa,
+ struct mlxsw_afa_set *set)
+{
+ mlxsw_afa->ops->kvdl_set_del(mlxsw_afa->ops_priv,
+ set->kvdl_index,
+ set->ht_key.is_first);
+ rhashtable_remove_fast(&mlxsw_afa->set_ht, &set->ht_node,
+ mlxsw_afa_set_ht_params);
+ set->shared = false;
+}
+
+static void mlxsw_afa_set_put(struct mlxsw_afa *mlxsw_afa,
+ struct mlxsw_afa_set *set)
+{
+ if (--set->ref_count)
+ return;
+ if (set->shared)
+ mlxsw_afa_set_unshare(mlxsw_afa, set);
+ mlxsw_afa_set_destroy(set);
+}
+
+static struct mlxsw_afa_set *mlxsw_afa_set_get(struct mlxsw_afa *mlxsw_afa,
+ struct mlxsw_afa_set *orig_set)
+{
+ struct mlxsw_afa_set *set;
+ int err;
+
+ /* There is a hashtable of sets maintained. If a set with the exact
+ * same encoding exists, we reuse it. Otherwise, the current set
+ * is shared by making it available to others using the hash table.
+ */
+ set = rhashtable_lookup_fast(&mlxsw_afa->set_ht, &orig_set->ht_key,
+ mlxsw_afa_set_ht_params);
+ if (set) {
+ set->ref_count++;
+ mlxsw_afa_set_put(mlxsw_afa, orig_set);
+ } else {
+ set = orig_set;
+ err = mlxsw_afa_set_share(mlxsw_afa, set);
+ if (err)
+ return ERR_PTR(err);
+ }
+ return set;
+}
+
+/* Block structure holds a list of action sets. One action block
+ * represents one chain of actions executed upon match of a rule.
+ */
+
+struct mlxsw_afa_block {
+ struct mlxsw_afa *afa;
+ bool finished;
+ struct mlxsw_afa_set *first_set;
+ struct mlxsw_afa_set *cur_set;
+ unsigned int cur_act_index; /* In current set. */
+ struct list_head resource_list; /* List of resources held by actions
+ * in this block.
+ */
+};
+
+struct mlxsw_afa_resource {
+ struct list_head list;
+ void (*destructor)(struct mlxsw_afa_block *block,
+ struct mlxsw_afa_resource *resource);
+};
+
+static void mlxsw_afa_resource_add(struct mlxsw_afa_block *block,
+ struct mlxsw_afa_resource *resource)
+{
+ list_add(&resource->list, &block->resource_list);
+}
+
+static void mlxsw_afa_resource_del(struct mlxsw_afa_resource *resource)
+{
+ list_del(&resource->list);
+}
+
+static void mlxsw_afa_resources_destroy(struct mlxsw_afa_block *block)
+{
+ struct mlxsw_afa_resource *resource, *tmp;
+
+ list_for_each_entry_safe(resource, tmp, &block->resource_list, list) {
+ resource->destructor(block, resource);
+ }
+}
+
+struct mlxsw_afa_block *mlxsw_afa_block_create(struct mlxsw_afa *mlxsw_afa)
+{
+ struct mlxsw_afa_block *block;
+
+ block = kzalloc(sizeof(*block), GFP_KERNEL);
+ if (!block)
+ return ERR_PTR(-ENOMEM);
+ INIT_LIST_HEAD(&block->resource_list);
+ block->afa = mlxsw_afa;
+
+ /* At least one action set is always present, so just create it here */
+ block->first_set = mlxsw_afa_set_create(true);
+ if (!block->first_set)
+ goto err_first_set_create;
+
+ /* In case user instructs to have dummy first set, we leave it
+ * empty here and create another, real, set right away.
+ */
+ if (mlxsw_afa->ops->dummy_first_set) {
+ block->cur_set = mlxsw_afa_set_create(false);
+ if (!block->cur_set)
+ goto err_second_set_create;
+ block->cur_set->prev = block->first_set;
+ block->first_set->next = block->cur_set;
+ } else {
+ block->cur_set = block->first_set;
+ }
+
+ return block;
+
+err_second_set_create:
+ mlxsw_afa_set_destroy(block->first_set);
+err_first_set_create:
+ kfree(block);
+ return ERR_PTR(-ENOMEM);
+}
+EXPORT_SYMBOL(mlxsw_afa_block_create);
+
+void mlxsw_afa_block_destroy(struct mlxsw_afa_block *block)
+{
+ struct mlxsw_afa_set *set = block->first_set;
+ struct mlxsw_afa_set *next_set;
+
+ do {
+ next_set = set->next;
+ mlxsw_afa_set_put(block->afa, set);
+ set = next_set;
+ } while (set);
+ mlxsw_afa_resources_destroy(block);
+ kfree(block);
+}
+EXPORT_SYMBOL(mlxsw_afa_block_destroy);
+
+int mlxsw_afa_block_commit(struct mlxsw_afa_block *block)
+{
+ struct mlxsw_afa_set *set = block->cur_set;
+ struct mlxsw_afa_set *prev_set;
+
+ block->cur_set = NULL;
+ block->finished = true;
+
+ /* Go over all linked sets starting from last
+ * and try to find existing set in the hash table.
+ * In case it is not there, assign a KVD linear index
+ * and insert it.
+ */
+ do {
+ prev_set = set->prev;
+ set = mlxsw_afa_set_get(block->afa, set);
+ if (IS_ERR(set))
+ /* No rollback is needed since the chain is
+ * in consistent state and mlxsw_afa_block_destroy
+ * will take care of putting it away.
+ */
+ return PTR_ERR(set);
+ if (prev_set) {
+ prev_set->next = set;
+ mlxsw_afa_set_next_set(prev_set, set->kvdl_index);
+ set = prev_set;
+ }
+ } while (prev_set);
+
+ block->first_set = set;
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_commit);
+
+char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block)
+{
+ return block->first_set->ht_key.enc_actions;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_first_set);
+
+char *mlxsw_afa_block_cur_set(struct mlxsw_afa_block *block)
+{
+ return block->cur_set->ht_key.enc_actions;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_cur_set);
+
+u32 mlxsw_afa_block_first_kvdl_index(struct mlxsw_afa_block *block)
+{
+ /* First set is never in KVD linear. So the first set
+ * with valid KVD linear index is always the second one.
+ */
+ if (WARN_ON(!block->first_set->next))
+ return 0;
+ return block->first_set->next->kvdl_index;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_first_kvdl_index);
+
+int mlxsw_afa_block_activity_get(struct mlxsw_afa_block *block, bool *activity)
+{
+ u32 kvdl_index = mlxsw_afa_block_first_kvdl_index(block);
+
+ return block->afa->ops->kvdl_set_activity_get(block->afa->ops_priv,
+ kvdl_index, activity);
+}
+EXPORT_SYMBOL(mlxsw_afa_block_activity_get);
+
+int mlxsw_afa_block_continue(struct mlxsw_afa_block *block)
+{
+ if (block->finished)
+ return -EINVAL;
+ mlxsw_afa_set_goto_set(block->cur_set,
+ MLXSW_AFA_SET_GOTO_BINDING_CMD_NONE, 0);
+ block->finished = true;
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_continue);
+
+int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id)
+{
+ if (block->finished)
+ return -EINVAL;
+ mlxsw_afa_set_goto_set(block->cur_set,
+ MLXSW_AFA_SET_GOTO_BINDING_CMD_JUMP, group_id);
+ block->finished = true;
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_jump);
+
+int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block)
+{
+ if (block->finished)
+ return -EINVAL;
+ mlxsw_afa_set_goto_set(block->cur_set,
+ MLXSW_AFA_SET_GOTO_BINDING_CMD_TERM, 0);
+ block->finished = true;
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_terminate);
+
+static struct mlxsw_afa_fwd_entry *
+mlxsw_afa_fwd_entry_create(struct mlxsw_afa *mlxsw_afa, u8 local_port)
+{
+ struct mlxsw_afa_fwd_entry *fwd_entry;
+ int err;
+
+ fwd_entry = kzalloc(sizeof(*fwd_entry), GFP_KERNEL);
+ if (!fwd_entry)
+ return ERR_PTR(-ENOMEM);
+ fwd_entry->ht_key.local_port = local_port;
+ fwd_entry->ref_count = 1;
+
+ err = rhashtable_insert_fast(&mlxsw_afa->fwd_entry_ht,
+ &fwd_entry->ht_node,
+ mlxsw_afa_fwd_entry_ht_params);
+ if (err)
+ goto err_rhashtable_insert;
+
+ err = mlxsw_afa->ops->kvdl_fwd_entry_add(mlxsw_afa->ops_priv,
+ &fwd_entry->kvdl_index,
+ local_port);
+ if (err)
+ goto err_kvdl_fwd_entry_add;
+ return fwd_entry;
+
+err_kvdl_fwd_entry_add:
+ rhashtable_remove_fast(&mlxsw_afa->fwd_entry_ht, &fwd_entry->ht_node,
+ mlxsw_afa_fwd_entry_ht_params);
+err_rhashtable_insert:
+ kfree(fwd_entry);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_afa_fwd_entry_destroy(struct mlxsw_afa *mlxsw_afa,
+ struct mlxsw_afa_fwd_entry *fwd_entry)
+{
+ mlxsw_afa->ops->kvdl_fwd_entry_del(mlxsw_afa->ops_priv,
+ fwd_entry->kvdl_index);
+ rhashtable_remove_fast(&mlxsw_afa->fwd_entry_ht, &fwd_entry->ht_node,
+ mlxsw_afa_fwd_entry_ht_params);
+ kfree(fwd_entry);
+}
+
+static struct mlxsw_afa_fwd_entry *
+mlxsw_afa_fwd_entry_get(struct mlxsw_afa *mlxsw_afa, u8 local_port)
+{
+ struct mlxsw_afa_fwd_entry_ht_key ht_key = {0};
+ struct mlxsw_afa_fwd_entry *fwd_entry;
+
+ ht_key.local_port = local_port;
+ fwd_entry = rhashtable_lookup_fast(&mlxsw_afa->fwd_entry_ht, &ht_key,
+ mlxsw_afa_fwd_entry_ht_params);
+ if (fwd_entry) {
+ fwd_entry->ref_count++;
+ return fwd_entry;
+ }
+ return mlxsw_afa_fwd_entry_create(mlxsw_afa, local_port);
+}
+
+static void mlxsw_afa_fwd_entry_put(struct mlxsw_afa *mlxsw_afa,
+ struct mlxsw_afa_fwd_entry *fwd_entry)
+{
+ if (--fwd_entry->ref_count)
+ return;
+ mlxsw_afa_fwd_entry_destroy(mlxsw_afa, fwd_entry);
+}
+
+struct mlxsw_afa_fwd_entry_ref {
+ struct mlxsw_afa_resource resource;
+ struct mlxsw_afa_fwd_entry *fwd_entry;
+};
+
+static void
+mlxsw_afa_fwd_entry_ref_destroy(struct mlxsw_afa_block *block,
+ struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref)
+{
+ mlxsw_afa_resource_del(&fwd_entry_ref->resource);
+ mlxsw_afa_fwd_entry_put(block->afa, fwd_entry_ref->fwd_entry);
+ kfree(fwd_entry_ref);
+}
+
+static void
+mlxsw_afa_fwd_entry_ref_destructor(struct mlxsw_afa_block *block,
+ struct mlxsw_afa_resource *resource)
+{
+ struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref;
+
+ fwd_entry_ref = container_of(resource, struct mlxsw_afa_fwd_entry_ref,
+ resource);
+ mlxsw_afa_fwd_entry_ref_destroy(block, fwd_entry_ref);
+}
+
+static struct mlxsw_afa_fwd_entry_ref *
+mlxsw_afa_fwd_entry_ref_create(struct mlxsw_afa_block *block, u8 local_port)
+{
+ struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref;
+ struct mlxsw_afa_fwd_entry *fwd_entry;
+ int err;
+
+ fwd_entry_ref = kzalloc(sizeof(*fwd_entry_ref), GFP_KERNEL);
+ if (!fwd_entry_ref)
+ return ERR_PTR(-ENOMEM);
+ fwd_entry = mlxsw_afa_fwd_entry_get(block->afa, local_port);
+ if (IS_ERR(fwd_entry)) {
+ err = PTR_ERR(fwd_entry);
+ goto err_fwd_entry_get;
+ }
+ fwd_entry_ref->fwd_entry = fwd_entry;
+ fwd_entry_ref->resource.destructor = mlxsw_afa_fwd_entry_ref_destructor;
+ mlxsw_afa_resource_add(block, &fwd_entry_ref->resource);
+ return fwd_entry_ref;
+
+err_fwd_entry_get:
+ kfree(fwd_entry_ref);
+ return ERR_PTR(err);
+}
+
+struct mlxsw_afa_counter {
+ struct mlxsw_afa_resource resource;
+ u32 counter_index;
+};
+
+static void
+mlxsw_afa_counter_destroy(struct mlxsw_afa_block *block,
+ struct mlxsw_afa_counter *counter)
+{
+ mlxsw_afa_resource_del(&counter->resource);
+ block->afa->ops->counter_index_put(block->afa->ops_priv,
+ counter->counter_index);
+ kfree(counter);
+}
+
+static void
+mlxsw_afa_counter_destructor(struct mlxsw_afa_block *block,
+ struct mlxsw_afa_resource *resource)
+{
+ struct mlxsw_afa_counter *counter;
+
+ counter = container_of(resource, struct mlxsw_afa_counter, resource);
+ mlxsw_afa_counter_destroy(block, counter);
+}
+
+static struct mlxsw_afa_counter *
+mlxsw_afa_counter_create(struct mlxsw_afa_block *block)
+{
+ struct mlxsw_afa_counter *counter;
+ int err;
+
+ counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+ if (!counter)
+ return ERR_PTR(-ENOMEM);
+
+ err = block->afa->ops->counter_index_get(block->afa->ops_priv,
+ &counter->counter_index);
+ if (err)
+ goto err_counter_index_get;
+ counter->resource.destructor = mlxsw_afa_counter_destructor;
+ mlxsw_afa_resource_add(block, &counter->resource);
+ return counter;
+
+err_counter_index_get:
+ kfree(counter);
+ return ERR_PTR(err);
+}
+
+#define MLXSW_AFA_ONE_ACTION_LEN 32
+#define MLXSW_AFA_PAYLOAD_OFFSET 4
+
+static char *mlxsw_afa_block_append_action(struct mlxsw_afa_block *block,
+ u8 action_code, u8 action_size)
+{
+ char *oneact;
+ char *actions;
+
+ if (block->finished)
+ return ERR_PTR(-EINVAL);
+ if (block->cur_act_index + action_size >
+ block->afa->max_acts_per_set) {
+ struct mlxsw_afa_set *set;
+
+ /* The appended action won't fit into the current action set,
+ * so create a new set.
+ */
+ set = mlxsw_afa_set_create(false);
+ if (!set)
+ return ERR_PTR(-ENOBUFS);
+ set->prev = block->cur_set;
+ block->cur_act_index = 0;
+ block->cur_set->next = set;
+ block->cur_set = set;
+ }
+
+ actions = block->cur_set->ht_key.enc_actions;
+ oneact = actions + block->cur_act_index * MLXSW_AFA_ONE_ACTION_LEN;
+ block->cur_act_index += action_size;
+ mlxsw_afa_all_action_type_set(oneact, action_code);
+ return oneact + MLXSW_AFA_PAYLOAD_OFFSET;
+}
+
+/* VLAN Action
+ * -----------
+ * VLAN action is used for manipulating VLANs. It can be used to implement QinQ,
+ * VLAN translation, change of PCP bits of the VLAN tag, push, pop as swap VLANs
+ * and more.
+ */
+
+#define MLXSW_AFA_VLAN_CODE 0x02
+#define MLXSW_AFA_VLAN_SIZE 1
+
+enum mlxsw_afa_vlan_vlan_tag_cmd {
+ MLXSW_AFA_VLAN_VLAN_TAG_CMD_NOP,
+ MLXSW_AFA_VLAN_VLAN_TAG_CMD_PUSH_TAG,
+ MLXSW_AFA_VLAN_VLAN_TAG_CMD_POP_TAG,
+};
+
+enum mlxsw_afa_vlan_cmd {
+ MLXSW_AFA_VLAN_CMD_NOP,
+ MLXSW_AFA_VLAN_CMD_SET_OUTER,
+ MLXSW_AFA_VLAN_CMD_SET_INNER,
+ MLXSW_AFA_VLAN_CMD_COPY_OUTER_TO_INNER,
+ MLXSW_AFA_VLAN_CMD_COPY_INNER_TO_OUTER,
+ MLXSW_AFA_VLAN_CMD_SWAP,
+};
+
+/* afa_vlan_vlan_tag_cmd
+ * Tag command: push, pop, nop VLAN header.
+ */
+MLXSW_ITEM32(afa, vlan, vlan_tag_cmd, 0x00, 29, 3);
+
+/* afa_vlan_vid_cmd */
+MLXSW_ITEM32(afa, vlan, vid_cmd, 0x04, 29, 3);
+
+/* afa_vlan_vid */
+MLXSW_ITEM32(afa, vlan, vid, 0x04, 0, 12);
+
+/* afa_vlan_ethertype_cmd */
+MLXSW_ITEM32(afa, vlan, ethertype_cmd, 0x08, 29, 3);
+
+/* afa_vlan_ethertype
+ * Index to EtherTypes in Switch VLAN EtherType Register (SVER).
+ */
+MLXSW_ITEM32(afa, vlan, ethertype, 0x08, 24, 3);
+
+/* afa_vlan_pcp_cmd */
+MLXSW_ITEM32(afa, vlan, pcp_cmd, 0x08, 13, 3);
+
+/* afa_vlan_pcp */
+MLXSW_ITEM32(afa, vlan, pcp, 0x08, 8, 3);
+
+static inline void
+mlxsw_afa_vlan_pack(char *payload,
+ enum mlxsw_afa_vlan_vlan_tag_cmd vlan_tag_cmd,
+ enum mlxsw_afa_vlan_cmd vid_cmd, u16 vid,
+ enum mlxsw_afa_vlan_cmd pcp_cmd, u8 pcp,
+ enum mlxsw_afa_vlan_cmd ethertype_cmd, u8 ethertype)
+{
+ mlxsw_afa_vlan_vlan_tag_cmd_set(payload, vlan_tag_cmd);
+ mlxsw_afa_vlan_vid_cmd_set(payload, vid_cmd);
+ mlxsw_afa_vlan_vid_set(payload, vid);
+ mlxsw_afa_vlan_pcp_cmd_set(payload, pcp_cmd);
+ mlxsw_afa_vlan_pcp_set(payload, pcp);
+ mlxsw_afa_vlan_ethertype_cmd_set(payload, ethertype_cmd);
+ mlxsw_afa_vlan_ethertype_set(payload, ethertype);
+}
+
+int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block,
+ u16 vid, u8 pcp, u8 et,
+ struct netlink_ext_ack *extack)
+{
+ char *act = mlxsw_afa_block_append_action(block,
+ MLXSW_AFA_VLAN_CODE,
+ MLXSW_AFA_VLAN_SIZE);
+
+ if (IS_ERR(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot append vlan_modify action");
+ return PTR_ERR(act);
+ }
+ mlxsw_afa_vlan_pack(act, MLXSW_AFA_VLAN_VLAN_TAG_CMD_NOP,
+ MLXSW_AFA_VLAN_CMD_SET_OUTER, vid,
+ MLXSW_AFA_VLAN_CMD_SET_OUTER, pcp,
+ MLXSW_AFA_VLAN_CMD_SET_OUTER, et);
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_vlan_modify);
+
+/* Trap / Discard Action
+ * ---------------------
+ * The Trap / Discard action enables trapping / mirroring packets to the CPU
+ * as well as discarding packets.
+ * The ACL Trap / Discard separates the forward/discard control from CPU
+ * trap control. In addition, the Trap / Discard action enables activating
+ * SPAN (port mirroring).
+ */
+
+#define MLXSW_AFA_TRAPDISC_CODE 0x03
+#define MLXSW_AFA_TRAPDISC_SIZE 1
+
+enum mlxsw_afa_trapdisc_trap_action {
+ MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP = 0,
+ MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP = 2,
+};
+
+/* afa_trapdisc_trap_action
+ * Trap Action.
+ */
+MLXSW_ITEM32(afa, trapdisc, trap_action, 0x00, 24, 4);
+
+enum mlxsw_afa_trapdisc_forward_action {
+ MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD = 1,
+ MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD = 3,
+};
+
+/* afa_trapdisc_forward_action
+ * Forward Action.
+ */
+MLXSW_ITEM32(afa, trapdisc, forward_action, 0x00, 0, 4);
+
+/* afa_trapdisc_trap_id
+ * Trap ID to configure.
+ */
+MLXSW_ITEM32(afa, trapdisc, trap_id, 0x04, 0, 9);
+
+/* afa_trapdisc_mirror_agent
+ * Mirror agent.
+ */
+MLXSW_ITEM32(afa, trapdisc, mirror_agent, 0x08, 29, 3);
+
+/* afa_trapdisc_mirror_enable
+ * Mirror enable.
+ */
+MLXSW_ITEM32(afa, trapdisc, mirror_enable, 0x08, 24, 1);
+
+static inline void
+mlxsw_afa_trapdisc_pack(char *payload,
+ enum mlxsw_afa_trapdisc_trap_action trap_action,
+ enum mlxsw_afa_trapdisc_forward_action forward_action,
+ u16 trap_id)
+{
+ mlxsw_afa_trapdisc_trap_action_set(payload, trap_action);
+ mlxsw_afa_trapdisc_forward_action_set(payload, forward_action);
+ mlxsw_afa_trapdisc_trap_id_set(payload, trap_id);
+}
+
+static inline void
+mlxsw_afa_trapdisc_mirror_pack(char *payload, bool mirror_enable,
+ u8 mirror_agent)
+{
+ mlxsw_afa_trapdisc_mirror_enable_set(payload, mirror_enable);
+ mlxsw_afa_trapdisc_mirror_agent_set(payload, mirror_agent);
+}
+
+int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block)
+{
+ char *act = mlxsw_afa_block_append_action(block,
+ MLXSW_AFA_TRAPDISC_CODE,
+ MLXSW_AFA_TRAPDISC_SIZE);
+
+ if (IS_ERR(act))
+ return PTR_ERR(act);
+ mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP,
+ MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, 0);
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_drop);
+
+int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id)
+{
+ char *act = mlxsw_afa_block_append_action(block,
+ MLXSW_AFA_TRAPDISC_CODE,
+ MLXSW_AFA_TRAPDISC_SIZE);
+
+ if (IS_ERR(act))
+ return PTR_ERR(act);
+ mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP,
+ MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD,
+ trap_id);
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_trap);
+
+int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
+ u16 trap_id)
+{
+ char *act = mlxsw_afa_block_append_action(block,
+ MLXSW_AFA_TRAPDISC_CODE,
+ MLXSW_AFA_TRAPDISC_SIZE);
+
+ if (IS_ERR(act))
+ return PTR_ERR(act);
+ mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP,
+ MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD,
+ trap_id);
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_trap_and_forward);
+
+struct mlxsw_afa_mirror {
+ struct mlxsw_afa_resource resource;
+ int span_id;
+ u8 local_in_port;
+ bool ingress;
+};
+
+static void
+mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block,
+ struct mlxsw_afa_mirror *mirror)
+{
+ mlxsw_afa_resource_del(&mirror->resource);
+ block->afa->ops->mirror_del(block->afa->ops_priv,
+ mirror->local_in_port,
+ mirror->span_id,
+ mirror->ingress);
+ kfree(mirror);
+}
+
+static void
+mlxsw_afa_mirror_destructor(struct mlxsw_afa_block *block,
+ struct mlxsw_afa_resource *resource)
+{
+ struct mlxsw_afa_mirror *mirror;
+
+ mirror = container_of(resource, struct mlxsw_afa_mirror, resource);
+ mlxsw_afa_mirror_destroy(block, mirror);
+}
+
+static struct mlxsw_afa_mirror *
+mlxsw_afa_mirror_create(struct mlxsw_afa_block *block, u8 local_in_port,
+ const struct net_device *out_dev, bool ingress)
+{
+ struct mlxsw_afa_mirror *mirror;
+ int err;
+
+ mirror = kzalloc(sizeof(*mirror), GFP_KERNEL);
+ if (!mirror)
+ return ERR_PTR(-ENOMEM);
+
+ err = block->afa->ops->mirror_add(block->afa->ops_priv,
+ local_in_port, out_dev,
+ ingress, &mirror->span_id);
+ if (err)
+ goto err_mirror_add;
+
+ mirror->ingress = ingress;
+ mirror->local_in_port = local_in_port;
+ mirror->resource.destructor = mlxsw_afa_mirror_destructor;
+ mlxsw_afa_resource_add(block, &mirror->resource);
+ return mirror;
+
+err_mirror_add:
+ kfree(mirror);
+ return ERR_PTR(err);
+}
+
+static int
+mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block,
+ u8 mirror_agent)
+{
+ char *act = mlxsw_afa_block_append_action(block,
+ MLXSW_AFA_TRAPDISC_CODE,
+ MLXSW_AFA_TRAPDISC_SIZE);
+ if (IS_ERR(act))
+ return PTR_ERR(act);
+ mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP,
+ MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, 0);
+ mlxsw_afa_trapdisc_mirror_pack(act, true, mirror_agent);
+ return 0;
+}
+
+int
+mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, u8 local_in_port,
+ const struct net_device *out_dev, bool ingress,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_afa_mirror *mirror;
+ int err;
+
+ mirror = mlxsw_afa_mirror_create(block, local_in_port, out_dev,
+ ingress);
+ if (IS_ERR(mirror)) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot create mirror action");
+ return PTR_ERR(mirror);
+ }
+ err = mlxsw_afa_block_append_allocated_mirror(block, mirror->span_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot append mirror action");
+ goto err_append_allocated_mirror;
+ }
+
+ return 0;
+
+err_append_allocated_mirror:
+ mlxsw_afa_mirror_destroy(block, mirror);
+ return err;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_mirror);
+
+/* Forwarding Action
+ * -----------------
+ * Forwarding Action can be used to implement Policy Based Switching (PBS)
+ * as well as OpenFlow related "Output" action.
+ */
+
+#define MLXSW_AFA_FORWARD_CODE 0x07
+#define MLXSW_AFA_FORWARD_SIZE 1
+
+enum mlxsw_afa_forward_type {
+ /* PBS, Policy Based Switching */
+ MLXSW_AFA_FORWARD_TYPE_PBS,
+ /* Output, OpenFlow output type */
+ MLXSW_AFA_FORWARD_TYPE_OUTPUT,
+};
+
+/* afa_forward_type */
+MLXSW_ITEM32(afa, forward, type, 0x00, 24, 2);
+
+/* afa_forward_pbs_ptr
+ * A pointer to the PBS entry configured by PPBS register.
+ * Reserved when in_port is set.
+ */
+MLXSW_ITEM32(afa, forward, pbs_ptr, 0x08, 0, 24);
+
+/* afa_forward_in_port
+ * Packet is forwarded back to the ingress port.
+ */
+MLXSW_ITEM32(afa, forward, in_port, 0x0C, 0, 1);
+
+static inline void
+mlxsw_afa_forward_pack(char *payload, enum mlxsw_afa_forward_type type,
+ u32 pbs_ptr, bool in_port)
+{
+ mlxsw_afa_forward_type_set(payload, type);
+ mlxsw_afa_forward_pbs_ptr_set(payload, pbs_ptr);
+ mlxsw_afa_forward_in_port_set(payload, in_port);
+}
+
+int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
+ u8 local_port, bool in_port,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref;
+ u32 kvdl_index;
+ char *act;
+ int err;
+
+ if (in_port) {
+ NL_SET_ERR_MSG_MOD(extack, "Forwarding to ingress port is not supported");
+ return -EOPNOTSUPP;
+ }
+ fwd_entry_ref = mlxsw_afa_fwd_entry_ref_create(block, local_port);
+ if (IS_ERR(fwd_entry_ref)) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot create forward action");
+ return PTR_ERR(fwd_entry_ref);
+ }
+ kvdl_index = fwd_entry_ref->fwd_entry->kvdl_index;
+
+ act = mlxsw_afa_block_append_action(block, MLXSW_AFA_FORWARD_CODE,
+ MLXSW_AFA_FORWARD_SIZE);
+ if (IS_ERR(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot append forward action");
+ err = PTR_ERR(act);
+ goto err_append_action;
+ }
+ mlxsw_afa_forward_pack(act, MLXSW_AFA_FORWARD_TYPE_PBS,
+ kvdl_index, in_port);
+ return 0;
+
+err_append_action:
+ mlxsw_afa_fwd_entry_ref_destroy(block, fwd_entry_ref);
+ return err;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_fwd);
+
+/* Policing and Counting Action
+ * ----------------------------
+ * Policing and Counting action is used for binding policer and counter
+ * to ACL rules.
+ */
+
+#define MLXSW_AFA_POLCNT_CODE 0x08
+#define MLXSW_AFA_POLCNT_SIZE 1
+
+enum mlxsw_afa_polcnt_counter_set_type {
+ /* No count */
+ MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_NO_COUNT = 0x00,
+ /* Count packets and bytes */
+ MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03,
+ /* Count only packets */
+ MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS = 0x05,
+};
+
+/* afa_polcnt_counter_set_type
+ * Counter set type for flow counters.
+ */
+MLXSW_ITEM32(afa, polcnt, counter_set_type, 0x04, 24, 8);
+
+/* afa_polcnt_counter_index
+ * Counter index for flow counters.
+ */
+MLXSW_ITEM32(afa, polcnt, counter_index, 0x04, 0, 24);
+
+static inline void
+mlxsw_afa_polcnt_pack(char *payload,
+ enum mlxsw_afa_polcnt_counter_set_type set_type,
+ u32 counter_index)
+{
+ mlxsw_afa_polcnt_counter_set_type_set(payload, set_type);
+ mlxsw_afa_polcnt_counter_index_set(payload, counter_index);
+}
+
+int mlxsw_afa_block_append_allocated_counter(struct mlxsw_afa_block *block,
+ u32 counter_index)
+{
+ char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_POLCNT_CODE,
+ MLXSW_AFA_POLCNT_SIZE);
+ if (IS_ERR(act))
+ return PTR_ERR(act);
+ mlxsw_afa_polcnt_pack(act, MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS_BYTES,
+ counter_index);
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_allocated_counter);
+
+int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block,
+ u32 *p_counter_index,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_afa_counter *counter;
+ u32 counter_index;
+ int err;
+
+ counter = mlxsw_afa_counter_create(block);
+ if (IS_ERR(counter)) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot create count action");
+ return PTR_ERR(counter);
+ }
+ counter_index = counter->counter_index;
+
+ err = mlxsw_afa_block_append_allocated_counter(block, counter_index);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot append count action");
+ goto err_append_allocated_counter;
+ }
+ if (p_counter_index)
+ *p_counter_index = counter_index;
+ return 0;
+
+err_append_allocated_counter:
+ mlxsw_afa_counter_destroy(block, counter);
+ return err;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_counter);
+
+/* Virtual Router and Forwarding Domain Action
+ * -------------------------------------------
+ * Virtual Switch action is used for manipulate the Virtual Router (VR),
+ * MPLS label space and the Forwarding Identifier (FID).
+ */
+
+#define MLXSW_AFA_VIRFWD_CODE 0x0E
+#define MLXSW_AFA_VIRFWD_SIZE 1
+
+enum mlxsw_afa_virfwd_fid_cmd {
+ /* Do nothing */
+ MLXSW_AFA_VIRFWD_FID_CMD_NOOP,
+ /* Set the Forwarding Identifier (FID) to fid */
+ MLXSW_AFA_VIRFWD_FID_CMD_SET,
+};
+
+/* afa_virfwd_fid_cmd */
+MLXSW_ITEM32(afa, virfwd, fid_cmd, 0x08, 29, 3);
+
+/* afa_virfwd_fid
+ * The FID value.
+ */
+MLXSW_ITEM32(afa, virfwd, fid, 0x08, 0, 16);
+
+static inline void mlxsw_afa_virfwd_pack(char *payload,
+ enum mlxsw_afa_virfwd_fid_cmd fid_cmd,
+ u16 fid)
+{
+ mlxsw_afa_virfwd_fid_cmd_set(payload, fid_cmd);
+ mlxsw_afa_virfwd_fid_set(payload, fid);
+}
+
+int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid,
+ struct netlink_ext_ack *extack)
+{
+ char *act = mlxsw_afa_block_append_action(block,
+ MLXSW_AFA_VIRFWD_CODE,
+ MLXSW_AFA_VIRFWD_SIZE);
+ if (IS_ERR(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot append fid_set action");
+ return PTR_ERR(act);
+ }
+ mlxsw_afa_virfwd_pack(act, MLXSW_AFA_VIRFWD_FID_CMD_SET, fid);
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_fid_set);
+
+/* MC Routing Action
+ * -----------------
+ * The Multicast router action. Can be used by RMFT_V2 - Router Multicast
+ * Forwarding Table Version 2 Register.
+ */
+
+#define MLXSW_AFA_MCROUTER_CODE 0x10
+#define MLXSW_AFA_MCROUTER_SIZE 2
+
+enum mlxsw_afa_mcrouter_rpf_action {
+ MLXSW_AFA_MCROUTER_RPF_ACTION_NOP,
+ MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP,
+ MLXSW_AFA_MCROUTER_RPF_ACTION_DISCARD_ERROR,
+};
+
+/* afa_mcrouter_rpf_action */
+MLXSW_ITEM32(afa, mcrouter, rpf_action, 0x00, 28, 3);
+
+/* afa_mcrouter_expected_irif */
+MLXSW_ITEM32(afa, mcrouter, expected_irif, 0x00, 0, 16);
+
+/* afa_mcrouter_min_mtu */
+MLXSW_ITEM32(afa, mcrouter, min_mtu, 0x08, 0, 16);
+
+enum mlxsw_afa_mrouter_vrmid {
+ MLXSW_AFA_MCROUTER_VRMID_INVALID,
+ MLXSW_AFA_MCROUTER_VRMID_VALID
+};
+
+/* afa_mcrouter_vrmid
+ * Valid RMID: rigr_rmid_index is used as RMID
+ */
+MLXSW_ITEM32(afa, mcrouter, vrmid, 0x0C, 31, 1);
+
+/* afa_mcrouter_rigr_rmid_index
+ * When the vrmid field is set to invalid, the field is used as pointer to
+ * Router Interface Group (RIGR) Table in the KVD linear.
+ * When the vrmid is set to valid, the field is used as RMID index, ranged
+ * from 0 to max_mid - 1. The index is to the Port Group Table.
+ */
+MLXSW_ITEM32(afa, mcrouter, rigr_rmid_index, 0x0C, 0, 24);
+
+static inline void
+mlxsw_afa_mcrouter_pack(char *payload,
+ enum mlxsw_afa_mcrouter_rpf_action rpf_action,
+ u16 expected_irif, u16 min_mtu,
+ enum mlxsw_afa_mrouter_vrmid vrmid, u32 rigr_rmid_index)
+
+{
+ mlxsw_afa_mcrouter_rpf_action_set(payload, rpf_action);
+ mlxsw_afa_mcrouter_expected_irif_set(payload, expected_irif);
+ mlxsw_afa_mcrouter_min_mtu_set(payload, min_mtu);
+ mlxsw_afa_mcrouter_vrmid_set(payload, vrmid);
+ mlxsw_afa_mcrouter_rigr_rmid_index_set(payload, rigr_rmid_index);
+}
+
+int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block,
+ u16 expected_irif, u16 min_mtu,
+ bool rmid_valid, u32 kvdl_index)
+{
+ char *act = mlxsw_afa_block_append_action(block,
+ MLXSW_AFA_MCROUTER_CODE,
+ MLXSW_AFA_MCROUTER_SIZE);
+ if (IS_ERR(act))
+ return PTR_ERR(act);
+ mlxsw_afa_mcrouter_pack(act, MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP,
+ expected_irif, min_mtu, rmid_valid, kvdl_index);
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_mcrouter);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
new file mode 100644
index 000000000..0e3a59dda
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_CORE_ACL_FLEX_ACTIONS_H
+#define _MLXSW_CORE_ACL_FLEX_ACTIONS_H
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+
+struct mlxsw_afa;
+struct mlxsw_afa_block;
+
+struct mlxsw_afa_ops {
+ int (*kvdl_set_add)(void *priv, u32 *p_kvdl_index,
+ char *enc_actions, bool is_first);
+ void (*kvdl_set_del)(void *priv, u32 kvdl_index, bool is_first);
+ int (*kvdl_set_activity_get)(void *priv, u32 kvdl_index,
+ bool *activity);
+ int (*kvdl_fwd_entry_add)(void *priv, u32 *p_kvdl_index, u8 local_port);
+ void (*kvdl_fwd_entry_del)(void *priv, u32 kvdl_index);
+ int (*counter_index_get)(void *priv, unsigned int *p_counter_index);
+ void (*counter_index_put)(void *priv, unsigned int counter_index);
+ int (*mirror_add)(void *priv, u8 local_in_port,
+ const struct net_device *out_dev,
+ bool ingress, int *p_span_id);
+ void (*mirror_del)(void *priv, u8 local_in_port, int span_id,
+ bool ingress);
+ bool dummy_first_set;
+};
+
+struct mlxsw_afa *mlxsw_afa_create(unsigned int max_acts_per_set,
+ const struct mlxsw_afa_ops *ops,
+ void *ops_priv);
+void mlxsw_afa_destroy(struct mlxsw_afa *mlxsw_afa);
+struct mlxsw_afa_block *mlxsw_afa_block_create(struct mlxsw_afa *mlxsw_afa);
+void mlxsw_afa_block_destroy(struct mlxsw_afa_block *block);
+int mlxsw_afa_block_commit(struct mlxsw_afa_block *block);
+char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block);
+char *mlxsw_afa_block_cur_set(struct mlxsw_afa_block *block);
+u32 mlxsw_afa_block_first_kvdl_index(struct mlxsw_afa_block *block);
+int mlxsw_afa_block_activity_get(struct mlxsw_afa_block *block, bool *activity);
+int mlxsw_afa_block_continue(struct mlxsw_afa_block *block);
+int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id);
+int mlxsw_afa_block_terminate(struct mlxsw_afa_block *block);
+int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block);
+int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id);
+int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
+ u16 trap_id);
+int mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
+ u8 local_in_port,
+ const struct net_device *out_dev,
+ bool ingress,
+ struct netlink_ext_ack *extack);
+int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
+ u8 local_port, bool in_port,
+ struct netlink_ext_ack *extack);
+int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block,
+ u16 vid, u8 pcp, u8 et,
+ struct netlink_ext_ack *extack);
+int mlxsw_afa_block_append_allocated_counter(struct mlxsw_afa_block *block,
+ u32 counter_index);
+int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block,
+ u32 *p_counter_index,
+ struct netlink_ext_ack *extack);
+int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid,
+ struct netlink_ext_ack *extack);
+int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block,
+ u16 expected_irif, u16 min_mtu,
+ bool rmid_valid, u32 kvdl_index);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
new file mode 100644
index 000000000..785bf01fe
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
@@ -0,0 +1,460 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+
+#include "item.h"
+#include "core_acl_flex_keys.h"
+
+struct mlxsw_afk {
+ struct list_head key_info_list;
+ unsigned int max_blocks;
+ const struct mlxsw_afk_ops *ops;
+ const struct mlxsw_afk_block *blocks;
+ unsigned int blocks_count;
+};
+
+static bool mlxsw_afk_blocks_check(struct mlxsw_afk *mlxsw_afk)
+{
+ int i;
+ int j;
+
+ for (i = 0; i < mlxsw_afk->blocks_count; i++) {
+ const struct mlxsw_afk_block *block = &mlxsw_afk->blocks[i];
+
+ for (j = 0; j < block->instances_count; j++) {
+ struct mlxsw_afk_element_inst *elinst;
+
+ elinst = &block->instances[j];
+ if (elinst->type != elinst->info->type ||
+ elinst->item.size.bits !=
+ elinst->info->item.size.bits)
+ return false;
+ }
+ }
+ return true;
+}
+
+struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks,
+ const struct mlxsw_afk_ops *ops)
+{
+ struct mlxsw_afk *mlxsw_afk;
+
+ mlxsw_afk = kzalloc(sizeof(*mlxsw_afk), GFP_KERNEL);
+ if (!mlxsw_afk)
+ return NULL;
+ INIT_LIST_HEAD(&mlxsw_afk->key_info_list);
+ mlxsw_afk->max_blocks = max_blocks;
+ mlxsw_afk->ops = ops;
+ mlxsw_afk->blocks = ops->blocks;
+ mlxsw_afk->blocks_count = ops->blocks_count;
+ WARN_ON(!mlxsw_afk_blocks_check(mlxsw_afk));
+ return mlxsw_afk;
+}
+EXPORT_SYMBOL(mlxsw_afk_create);
+
+void mlxsw_afk_destroy(struct mlxsw_afk *mlxsw_afk)
+{
+ WARN_ON(!list_empty(&mlxsw_afk->key_info_list));
+ kfree(mlxsw_afk);
+}
+EXPORT_SYMBOL(mlxsw_afk_destroy);
+
+struct mlxsw_afk_key_info {
+ struct list_head list;
+ unsigned int ref_count;
+ unsigned int blocks_count;
+ int element_to_block[MLXSW_AFK_ELEMENT_MAX]; /* index is element, value
+ * is index inside "blocks"
+ */
+ struct mlxsw_afk_element_usage elusage;
+ const struct mlxsw_afk_block *blocks[0];
+};
+
+static bool
+mlxsw_afk_key_info_elements_eq(struct mlxsw_afk_key_info *key_info,
+ struct mlxsw_afk_element_usage *elusage)
+{
+ return memcmp(&key_info->elusage, elusage, sizeof(*elusage)) == 0;
+}
+
+static struct mlxsw_afk_key_info *
+mlxsw_afk_key_info_find(struct mlxsw_afk *mlxsw_afk,
+ struct mlxsw_afk_element_usage *elusage)
+{
+ struct mlxsw_afk_key_info *key_info;
+
+ list_for_each_entry(key_info, &mlxsw_afk->key_info_list, list) {
+ if (mlxsw_afk_key_info_elements_eq(key_info, elusage))
+ return key_info;
+ }
+ return NULL;
+}
+
+struct mlxsw_afk_picker {
+ struct {
+ DECLARE_BITMAP(element, MLXSW_AFK_ELEMENT_MAX);
+ unsigned int total;
+ } hits[0];
+};
+
+static void mlxsw_afk_picker_count_hits(struct mlxsw_afk *mlxsw_afk,
+ struct mlxsw_afk_picker *picker,
+ enum mlxsw_afk_element element)
+{
+ int i;
+ int j;
+
+ for (i = 0; i < mlxsw_afk->blocks_count; i++) {
+ const struct mlxsw_afk_block *block = &mlxsw_afk->blocks[i];
+
+ for (j = 0; j < block->instances_count; j++) {
+ struct mlxsw_afk_element_inst *elinst;
+
+ elinst = &block->instances[j];
+ if (elinst->info->element == element) {
+ __set_bit(element, picker->hits[i].element);
+ picker->hits[i].total++;
+ }
+ }
+ }
+}
+
+static void mlxsw_afk_picker_subtract_hits(struct mlxsw_afk *mlxsw_afk,
+ struct mlxsw_afk_picker *picker,
+ int block_index)
+{
+ DECLARE_BITMAP(hits_element, MLXSW_AFK_ELEMENT_MAX);
+ int i;
+ int j;
+
+ memcpy(&hits_element, &picker->hits[block_index].element,
+ sizeof(hits_element));
+
+ for (i = 0; i < mlxsw_afk->blocks_count; i++) {
+ for_each_set_bit(j, hits_element, MLXSW_AFK_ELEMENT_MAX) {
+ if (__test_and_clear_bit(j, picker->hits[i].element))
+ picker->hits[i].total--;
+ }
+ }
+}
+
+static int mlxsw_afk_picker_most_hits_get(struct mlxsw_afk *mlxsw_afk,
+ struct mlxsw_afk_picker *picker)
+{
+ int most_index = -EINVAL; /* Should never happen to return this */
+ int most_hits = 0;
+ int i;
+
+ for (i = 0; i < mlxsw_afk->blocks_count; i++) {
+ if (picker->hits[i].total > most_hits) {
+ most_hits = picker->hits[i].total;
+ most_index = i;
+ }
+ }
+ return most_index;
+}
+
+static int mlxsw_afk_picker_key_info_add(struct mlxsw_afk *mlxsw_afk,
+ struct mlxsw_afk_picker *picker,
+ int block_index,
+ struct mlxsw_afk_key_info *key_info)
+{
+ enum mlxsw_afk_element element;
+
+ if (key_info->blocks_count == mlxsw_afk->max_blocks)
+ return -EINVAL;
+
+ for_each_set_bit(element, picker->hits[block_index].element,
+ MLXSW_AFK_ELEMENT_MAX) {
+ key_info->element_to_block[element] = key_info->blocks_count;
+ mlxsw_afk_element_usage_add(&key_info->elusage, element);
+ }
+
+ key_info->blocks[key_info->blocks_count] =
+ &mlxsw_afk->blocks[block_index];
+ key_info->blocks_count++;
+ return 0;
+}
+
+static int mlxsw_afk_picker(struct mlxsw_afk *mlxsw_afk,
+ struct mlxsw_afk_key_info *key_info,
+ struct mlxsw_afk_element_usage *elusage)
+{
+ struct mlxsw_afk_picker *picker;
+ enum mlxsw_afk_element element;
+ size_t alloc_size;
+ int err;
+
+ alloc_size = sizeof(picker->hits[0]) * mlxsw_afk->blocks_count;
+ picker = kzalloc(alloc_size, GFP_KERNEL);
+ if (!picker)
+ return -ENOMEM;
+
+ /* Since the same elements could be present in multiple blocks,
+ * we must find out optimal block list in order to make the
+ * block count as low as possible.
+ *
+ * First, we count hits. We go over all available blocks and count
+ * how many of requested elements are covered by each.
+ *
+ * Then in loop, we find block with most hits and add it to
+ * output key_info. Then we have to subtract this block hits so
+ * the next iteration will find most suitable block for
+ * the rest of requested elements.
+ */
+
+ mlxsw_afk_element_usage_for_each(element, elusage)
+ mlxsw_afk_picker_count_hits(mlxsw_afk, picker, element);
+
+ do {
+ int block_index;
+
+ block_index = mlxsw_afk_picker_most_hits_get(mlxsw_afk, picker);
+ if (block_index < 0) {
+ err = block_index;
+ goto out;
+ }
+ err = mlxsw_afk_picker_key_info_add(mlxsw_afk, picker,
+ block_index, key_info);
+ if (err)
+ goto out;
+ mlxsw_afk_picker_subtract_hits(mlxsw_afk, picker, block_index);
+ } while (!mlxsw_afk_key_info_elements_eq(key_info, elusage));
+
+ err = 0;
+out:
+ kfree(picker);
+ return err;
+}
+
+static struct mlxsw_afk_key_info *
+mlxsw_afk_key_info_create(struct mlxsw_afk *mlxsw_afk,
+ struct mlxsw_afk_element_usage *elusage)
+{
+ struct mlxsw_afk_key_info *key_info;
+ size_t alloc_size;
+ int err;
+
+ alloc_size = sizeof(*key_info) +
+ sizeof(key_info->blocks[0]) * mlxsw_afk->max_blocks;
+ key_info = kzalloc(alloc_size, GFP_KERNEL);
+ if (!key_info)
+ return ERR_PTR(-ENOMEM);
+ err = mlxsw_afk_picker(mlxsw_afk, key_info, elusage);
+ if (err)
+ goto err_picker;
+ list_add(&key_info->list, &mlxsw_afk->key_info_list);
+ key_info->ref_count = 1;
+ return key_info;
+
+err_picker:
+ kfree(key_info);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_afk_key_info_destroy(struct mlxsw_afk_key_info *key_info)
+{
+ list_del(&key_info->list);
+ kfree(key_info);
+}
+
+struct mlxsw_afk_key_info *
+mlxsw_afk_key_info_get(struct mlxsw_afk *mlxsw_afk,
+ struct mlxsw_afk_element_usage *elusage)
+{
+ struct mlxsw_afk_key_info *key_info;
+
+ key_info = mlxsw_afk_key_info_find(mlxsw_afk, elusage);
+ if (key_info) {
+ key_info->ref_count++;
+ return key_info;
+ }
+ return mlxsw_afk_key_info_create(mlxsw_afk, elusage);
+}
+EXPORT_SYMBOL(mlxsw_afk_key_info_get);
+
+void mlxsw_afk_key_info_put(struct mlxsw_afk_key_info *key_info)
+{
+ if (--key_info->ref_count)
+ return;
+ mlxsw_afk_key_info_destroy(key_info);
+}
+EXPORT_SYMBOL(mlxsw_afk_key_info_put);
+
+bool mlxsw_afk_key_info_subset(struct mlxsw_afk_key_info *key_info,
+ struct mlxsw_afk_element_usage *elusage)
+{
+ return mlxsw_afk_element_usage_subset(elusage, &key_info->elusage);
+}
+EXPORT_SYMBOL(mlxsw_afk_key_info_subset);
+
+static const struct mlxsw_afk_element_inst *
+mlxsw_afk_block_elinst_get(const struct mlxsw_afk_block *block,
+ enum mlxsw_afk_element element)
+{
+ int i;
+
+ for (i = 0; i < block->instances_count; i++) {
+ struct mlxsw_afk_element_inst *elinst;
+
+ elinst = &block->instances[i];
+ if (elinst->info->element == element)
+ return elinst;
+ }
+ return NULL;
+}
+
+static const struct mlxsw_afk_element_inst *
+mlxsw_afk_key_info_elinst_get(struct mlxsw_afk_key_info *key_info,
+ enum mlxsw_afk_element element,
+ int *p_block_index)
+{
+ const struct mlxsw_afk_element_inst *elinst;
+ const struct mlxsw_afk_block *block;
+ int block_index;
+
+ if (WARN_ON(!test_bit(element, key_info->elusage.usage)))
+ return NULL;
+ block_index = key_info->element_to_block[element];
+ block = key_info->blocks[block_index];
+
+ elinst = mlxsw_afk_block_elinst_get(block, element);
+ if (WARN_ON(!elinst))
+ return NULL;
+
+ *p_block_index = block_index;
+ return elinst;
+}
+
+u16
+mlxsw_afk_key_info_block_encoding_get(const struct mlxsw_afk_key_info *key_info,
+ int block_index)
+{
+ return key_info->blocks[block_index]->encoding;
+}
+EXPORT_SYMBOL(mlxsw_afk_key_info_block_encoding_get);
+
+unsigned int
+mlxsw_afk_key_info_blocks_count_get(const struct mlxsw_afk_key_info *key_info)
+{
+ return key_info->blocks_count;
+}
+EXPORT_SYMBOL(mlxsw_afk_key_info_blocks_count_get);
+
+void mlxsw_afk_values_add_u32(struct mlxsw_afk_element_values *values,
+ enum mlxsw_afk_element element,
+ u32 key_value, u32 mask_value)
+{
+ const struct mlxsw_afk_element_info *elinfo =
+ &mlxsw_afk_element_infos[element];
+ const struct mlxsw_item *storage_item = &elinfo->item;
+
+ if (!mask_value)
+ return;
+ if (WARN_ON(elinfo->type != MLXSW_AFK_ELEMENT_TYPE_U32))
+ return;
+ __mlxsw_item_set32(values->storage.key, storage_item, 0, key_value);
+ __mlxsw_item_set32(values->storage.mask, storage_item, 0, mask_value);
+ mlxsw_afk_element_usage_add(&values->elusage, element);
+}
+EXPORT_SYMBOL(mlxsw_afk_values_add_u32);
+
+void mlxsw_afk_values_add_buf(struct mlxsw_afk_element_values *values,
+ enum mlxsw_afk_element element,
+ const char *key_value, const char *mask_value,
+ unsigned int len)
+{
+ const struct mlxsw_afk_element_info *elinfo =
+ &mlxsw_afk_element_infos[element];
+ const struct mlxsw_item *storage_item = &elinfo->item;
+
+ if (!memchr_inv(mask_value, 0, len)) /* If mask is zero */
+ return;
+ if (WARN_ON(elinfo->type != MLXSW_AFK_ELEMENT_TYPE_BUF) ||
+ WARN_ON(elinfo->item.size.bytes != len))
+ return;
+ __mlxsw_item_memcpy_to(values->storage.key, key_value,
+ storage_item, 0);
+ __mlxsw_item_memcpy_to(values->storage.mask, mask_value,
+ storage_item, 0);
+ mlxsw_afk_element_usage_add(&values->elusage, element);
+}
+EXPORT_SYMBOL(mlxsw_afk_values_add_buf);
+
+static void mlxsw_sp_afk_encode_u32(const struct mlxsw_item *storage_item,
+ const struct mlxsw_item *output_item,
+ char *storage, char *output)
+{
+ u32 value;
+
+ value = __mlxsw_item_get32(storage, storage_item, 0);
+ __mlxsw_item_set32(output, output_item, 0, value);
+}
+
+static void mlxsw_sp_afk_encode_buf(const struct mlxsw_item *storage_item,
+ const struct mlxsw_item *output_item,
+ char *storage, char *output)
+{
+ char *storage_data = __mlxsw_item_data(storage, storage_item, 0);
+ char *output_data = __mlxsw_item_data(output, output_item, 0);
+ size_t len = output_item->size.bytes;
+
+ memcpy(output_data, storage_data, len);
+}
+
+static void
+mlxsw_sp_afk_encode_one(const struct mlxsw_afk_element_inst *elinst,
+ char *output, char *storage)
+{
+ const struct mlxsw_item *storage_item = &elinst->info->item;
+ const struct mlxsw_item *output_item = &elinst->item;
+
+ if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_U32)
+ mlxsw_sp_afk_encode_u32(storage_item, output_item,
+ storage, output);
+ else if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_BUF)
+ mlxsw_sp_afk_encode_buf(storage_item, output_item,
+ storage, output);
+}
+
+#define MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE 16
+
+void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk,
+ struct mlxsw_afk_key_info *key_info,
+ struct mlxsw_afk_element_values *values,
+ char *key, char *mask, int block_start, int block_end)
+{
+ char block_mask[MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE];
+ char block_key[MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE];
+ const struct mlxsw_afk_element_inst *elinst;
+ enum mlxsw_afk_element element;
+ int block_index, i;
+
+ for (i = block_start; i <= block_end; i++) {
+ memset(block_key, 0, MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE);
+ memset(block_mask, 0, MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE);
+
+ mlxsw_afk_element_usage_for_each(element, &values->elusage) {
+ elinst = mlxsw_afk_key_info_elinst_get(key_info,
+ element,
+ &block_index);
+ if (!elinst || block_index != i)
+ continue;
+
+ mlxsw_sp_afk_encode_one(elinst, block_key,
+ values->storage.key);
+ mlxsw_sp_afk_encode_one(elinst, block_mask,
+ values->storage.mask);
+ }
+
+ if (key)
+ mlxsw_afk->ops->encode_block(block_key, i, key);
+ if (mask)
+ mlxsw_afk->ops->encode_block(block_mask, i, mask);
+ }
+}
+EXPORT_SYMBOL(mlxsw_afk_encode);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
new file mode 100644
index 000000000..c29c045d8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
@@ -0,0 +1,233 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_CORE_ACL_FLEX_KEYS_H
+#define _MLXSW_CORE_ACL_FLEX_KEYS_H
+
+#include <linux/types.h>
+#include <linux/bitmap.h>
+
+#include "item.h"
+
+enum mlxsw_afk_element {
+ MLXSW_AFK_ELEMENT_SRC_SYS_PORT,
+ MLXSW_AFK_ELEMENT_DMAC_32_47,
+ MLXSW_AFK_ELEMENT_DMAC_0_31,
+ MLXSW_AFK_ELEMENT_SMAC_32_47,
+ MLXSW_AFK_ELEMENT_SMAC_0_31,
+ MLXSW_AFK_ELEMENT_ETHERTYPE,
+ MLXSW_AFK_ELEMENT_IP_PROTO,
+ MLXSW_AFK_ELEMENT_SRC_IP_96_127,
+ MLXSW_AFK_ELEMENT_SRC_IP_64_95,
+ MLXSW_AFK_ELEMENT_SRC_IP_32_63,
+ MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+ MLXSW_AFK_ELEMENT_DST_IP_96_127,
+ MLXSW_AFK_ELEMENT_DST_IP_64_95,
+ MLXSW_AFK_ELEMENT_DST_IP_32_63,
+ MLXSW_AFK_ELEMENT_DST_IP_0_31,
+ MLXSW_AFK_ELEMENT_DST_L4_PORT,
+ MLXSW_AFK_ELEMENT_SRC_L4_PORT,
+ MLXSW_AFK_ELEMENT_VID,
+ MLXSW_AFK_ELEMENT_PCP,
+ MLXSW_AFK_ELEMENT_TCP_FLAGS,
+ MLXSW_AFK_ELEMENT_IP_TTL_,
+ MLXSW_AFK_ELEMENT_IP_ECN,
+ MLXSW_AFK_ELEMENT_IP_DSCP,
+ MLXSW_AFK_ELEMENT_MAX,
+};
+
+enum mlxsw_afk_element_type {
+ MLXSW_AFK_ELEMENT_TYPE_U32,
+ MLXSW_AFK_ELEMENT_TYPE_BUF,
+};
+
+struct mlxsw_afk_element_info {
+ enum mlxsw_afk_element element; /* element ID */
+ enum mlxsw_afk_element_type type;
+ struct mlxsw_item item; /* element geometry in internal storage */
+};
+
+#define MLXSW_AFK_ELEMENT_INFO(_type, _element, _offset, _shift, _size) \
+ [MLXSW_AFK_ELEMENT_##_element] = { \
+ .element = MLXSW_AFK_ELEMENT_##_element, \
+ .type = _type, \
+ .item = { \
+ .offset = _offset, \
+ .shift = _shift, \
+ .size = {.bits = _size}, \
+ .name = #_element, \
+ }, \
+ }
+
+#define MLXSW_AFK_ELEMENT_INFO_U32(_element, _offset, _shift, _size) \
+ MLXSW_AFK_ELEMENT_INFO(MLXSW_AFK_ELEMENT_TYPE_U32, \
+ _element, _offset, _shift, _size)
+
+#define MLXSW_AFK_ELEMENT_INFO_BUF(_element, _offset, _size) \
+ MLXSW_AFK_ELEMENT_INFO(MLXSW_AFK_ELEMENT_TYPE_BUF, \
+ _element, _offset, 0, _size)
+
+/* For the purpose of the driver, define an internal storage scratchpad
+ * that will be used to store key/mask values. For each defined element type
+ * define an internal storage geometry.
+ */
+static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
+ MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 8),
+ MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_32_47, 0x04, 2),
+ MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_0_31, 0x06, 4),
+ MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_32_47, 0x0A, 2),
+ MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_0_31, 0x0C, 4),
+ MLXSW_AFK_ELEMENT_INFO_U32(ETHERTYPE, 0x00, 0, 16),
+ MLXSW_AFK_ELEMENT_INFO_U32(IP_PROTO, 0x10, 0, 8),
+ MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12),
+ MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3),
+ MLXSW_AFK_ELEMENT_INFO_U32(TCP_FLAGS, 0x10, 23, 9),
+ MLXSW_AFK_ELEMENT_INFO_U32(DST_L4_PORT, 0x14, 0, 16),
+ MLXSW_AFK_ELEMENT_INFO_U32(SRC_L4_PORT, 0x14, 16, 16),
+ MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8),
+ MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2),
+ MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6),
+ MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4),
+ MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4),
+ MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4),
+ MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_0_31, 0x2C, 4),
+ MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_96_127, 0x30, 4),
+ MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_64_95, 0x34, 4),
+ MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_32_63, 0x38, 4),
+ MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_0_31, 0x3C, 4),
+};
+
+#define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x40
+
+struct mlxsw_afk_element_inst { /* element instance in actual block */
+ const struct mlxsw_afk_element_info *info;
+ enum mlxsw_afk_element_type type;
+ struct mlxsw_item item; /* element geometry in block */
+};
+
+#define MLXSW_AFK_ELEMENT_INST(_type, _element, _offset, _shift, _size) \
+ { \
+ .info = &mlxsw_afk_element_infos[MLXSW_AFK_ELEMENT_##_element], \
+ .type = _type, \
+ .item = { \
+ .offset = _offset, \
+ .shift = _shift, \
+ .size = {.bits = _size}, \
+ .name = #_element, \
+ }, \
+ }
+
+#define MLXSW_AFK_ELEMENT_INST_U32(_element, _offset, _shift, _size) \
+ MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_U32, \
+ _element, _offset, _shift, _size)
+
+#define MLXSW_AFK_ELEMENT_INST_BUF(_element, _offset, _size) \
+ MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_BUF, \
+ _element, _offset, 0, _size)
+
+struct mlxsw_afk_block {
+ u16 encoding; /* block ID */
+ struct mlxsw_afk_element_inst *instances;
+ unsigned int instances_count;
+};
+
+#define MLXSW_AFK_BLOCK(_encoding, _instances) \
+ { \
+ .encoding = _encoding, \
+ .instances = _instances, \
+ .instances_count = ARRAY_SIZE(_instances), \
+ }
+
+struct mlxsw_afk_element_usage {
+ DECLARE_BITMAP(usage, MLXSW_AFK_ELEMENT_MAX);
+};
+
+#define mlxsw_afk_element_usage_for_each(element, elusage) \
+ for_each_set_bit(element, (elusage)->usage, MLXSW_AFK_ELEMENT_MAX)
+
+static inline void
+mlxsw_afk_element_usage_add(struct mlxsw_afk_element_usage *elusage,
+ enum mlxsw_afk_element element)
+{
+ __set_bit(element, elusage->usage);
+}
+
+static inline void
+mlxsw_afk_element_usage_zero(struct mlxsw_afk_element_usage *elusage)
+{
+ bitmap_zero(elusage->usage, MLXSW_AFK_ELEMENT_MAX);
+}
+
+static inline void
+mlxsw_afk_element_usage_fill(struct mlxsw_afk_element_usage *elusage,
+ const enum mlxsw_afk_element *elements,
+ unsigned int elements_count)
+{
+ int i;
+
+ mlxsw_afk_element_usage_zero(elusage);
+ for (i = 0; i < elements_count; i++)
+ mlxsw_afk_element_usage_add(elusage, elements[i]);
+}
+
+static inline bool
+mlxsw_afk_element_usage_subset(struct mlxsw_afk_element_usage *elusage_small,
+ struct mlxsw_afk_element_usage *elusage_big)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_AFK_ELEMENT_MAX; i++)
+ if (test_bit(i, elusage_small->usage) &&
+ !test_bit(i, elusage_big->usage))
+ return false;
+ return true;
+}
+
+struct mlxsw_afk;
+
+struct mlxsw_afk_ops {
+ const struct mlxsw_afk_block *blocks;
+ unsigned int blocks_count;
+ void (*encode_block)(char *block, int block_index, char *output);
+};
+
+struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks,
+ const struct mlxsw_afk_ops *ops);
+void mlxsw_afk_destroy(struct mlxsw_afk *mlxsw_afk);
+
+struct mlxsw_afk_key_info;
+
+struct mlxsw_afk_key_info *
+mlxsw_afk_key_info_get(struct mlxsw_afk *mlxsw_afk,
+ struct mlxsw_afk_element_usage *elusage);
+void mlxsw_afk_key_info_put(struct mlxsw_afk_key_info *key_info);
+bool mlxsw_afk_key_info_subset(struct mlxsw_afk_key_info *key_info,
+ struct mlxsw_afk_element_usage *elusage);
+
+u16
+mlxsw_afk_key_info_block_encoding_get(const struct mlxsw_afk_key_info *key_info,
+ int block_index);
+unsigned int
+mlxsw_afk_key_info_blocks_count_get(const struct mlxsw_afk_key_info *key_info);
+
+struct mlxsw_afk_element_values {
+ struct mlxsw_afk_element_usage elusage;
+ struct {
+ char key[MLXSW_AFK_ELEMENT_STORAGE_SIZE];
+ char mask[MLXSW_AFK_ELEMENT_STORAGE_SIZE];
+ } storage;
+};
+
+void mlxsw_afk_values_add_u32(struct mlxsw_afk_element_values *values,
+ enum mlxsw_afk_element element,
+ u32 key_value, u32 mask_value);
+void mlxsw_afk_values_add_buf(struct mlxsw_afk_element_values *values,
+ enum mlxsw_afk_element element,
+ const char *key_value, const char *mask_value,
+ unsigned int len);
+void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk,
+ struct mlxsw_afk_key_info *key_info,
+ struct mlxsw_afk_element_values *values,
+ char *key, char *mask, int block_start, int block_end);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
new file mode 100644
index 000000000..e04e8162a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -0,0 +1,346 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/hwmon.h>
+#include <linux/err.h>
+
+#include "core.h"
+
+#define MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT 127
+#define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT * 4 + \
+ MLXSW_MFCR_TACHOS_MAX + MLXSW_MFCR_PWMS_MAX)
+
+struct mlxsw_hwmon_attr {
+ struct device_attribute dev_attr;
+ struct mlxsw_hwmon *hwmon;
+ unsigned int type_index;
+ char name[32];
+};
+
+struct mlxsw_hwmon {
+ struct mlxsw_core *core;
+ const struct mlxsw_bus_info *bus_info;
+ struct device *hwmon_dev;
+ struct attribute_group group;
+ const struct attribute_group *groups[2];
+ struct attribute *attrs[MLXSW_HWMON_ATTR_COUNT + 1];
+ struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT];
+ unsigned int attrs_count;
+};
+
+static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+ container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+ struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+ char mtmp_pl[MLXSW_REG_MTMP_LEN];
+ unsigned int temp;
+ int err;
+
+ mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index,
+ false, false);
+ err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
+ if (err) {
+ dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
+ return err;
+ }
+ mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+ return sprintf(buf, "%u\n", temp);
+}
+
+static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+ container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+ struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+ char mtmp_pl[MLXSW_REG_MTMP_LEN];
+ unsigned int temp_max;
+ int err;
+
+ mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index,
+ false, false);
+ err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
+ if (err) {
+ dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
+ return err;
+ }
+ mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL);
+ return sprintf(buf, "%u\n", temp_max);
+}
+
+static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+ container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+ struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+ char mtmp_pl[MLXSW_REG_MTMP_LEN];
+ unsigned long val;
+ int err;
+
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val != 1)
+ return -EINVAL;
+
+ mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, true, true);
+ err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
+ if (err) {
+ dev_err(mlxsw_hwmon->bus_info->dev, "Failed to reset temp sensor history\n");
+ return err;
+ }
+ return len;
+}
+
+static ssize_t mlxsw_hwmon_fan_rpm_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+ container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+ struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+ char mfsm_pl[MLXSW_REG_MFSM_LEN];
+ int err;
+
+ mlxsw_reg_mfsm_pack(mfsm_pl, mlwsw_hwmon_attr->type_index);
+ err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsm), mfsm_pl);
+ if (err) {
+ dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query fan\n");
+ return err;
+ }
+ return sprintf(buf, "%u\n", mlxsw_reg_mfsm_rpm_get(mfsm_pl));
+}
+
+static ssize_t mlxsw_hwmon_pwm_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+ container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+ struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+ char mfsc_pl[MLXSW_REG_MFSC_LEN];
+ int err;
+
+ mlxsw_reg_mfsc_pack(mfsc_pl, mlwsw_hwmon_attr->type_index, 0);
+ err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl);
+ if (err) {
+ dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query PWM\n");
+ return err;
+ }
+ return sprintf(buf, "%u\n",
+ mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl));
+}
+
+static ssize_t mlxsw_hwmon_pwm_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+ container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+ struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+ char mfsc_pl[MLXSW_REG_MFSC_LEN];
+ unsigned long val;
+ int err;
+
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val > 255)
+ return -EINVAL;
+
+ mlxsw_reg_mfsc_pack(mfsc_pl, mlwsw_hwmon_attr->type_index, val);
+ err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl);
+ if (err) {
+ dev_err(mlxsw_hwmon->bus_info->dev, "Failed to write PWM\n");
+ return err;
+ }
+ return len;
+}
+
+enum mlxsw_hwmon_attr_type {
+ MLXSW_HWMON_ATTR_TYPE_TEMP,
+ MLXSW_HWMON_ATTR_TYPE_TEMP_MAX,
+ MLXSW_HWMON_ATTR_TYPE_TEMP_RST,
+ MLXSW_HWMON_ATTR_TYPE_FAN_RPM,
+ MLXSW_HWMON_ATTR_TYPE_PWM,
+};
+
+static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
+ enum mlxsw_hwmon_attr_type attr_type,
+ unsigned int type_index, unsigned int num) {
+ struct mlxsw_hwmon_attr *mlxsw_hwmon_attr;
+ unsigned int attr_index;
+
+ attr_index = mlxsw_hwmon->attrs_count;
+ mlxsw_hwmon_attr = &mlxsw_hwmon->hwmon_attrs[attr_index];
+
+ switch (attr_type) {
+ case MLXSW_HWMON_ATTR_TYPE_TEMP:
+ mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_show;
+ mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+ snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+ "temp%u_input", num + 1);
+ break;
+ case MLXSW_HWMON_ATTR_TYPE_TEMP_MAX:
+ mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_max_show;
+ mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+ snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+ "temp%u_highest", num + 1);
+ break;
+ case MLXSW_HWMON_ATTR_TYPE_TEMP_RST:
+ mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_temp_rst_store;
+ mlxsw_hwmon_attr->dev_attr.attr.mode = 0200;
+ snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+ "temp%u_reset_history", num + 1);
+ break;
+ case MLXSW_HWMON_ATTR_TYPE_FAN_RPM:
+ mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_fan_rpm_show;
+ mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+ snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+ "fan%u_input", num + 1);
+ break;
+ case MLXSW_HWMON_ATTR_TYPE_PWM:
+ mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_pwm_show;
+ mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_pwm_store;
+ mlxsw_hwmon_attr->dev_attr.attr.mode = 0644;
+ snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+ "pwm%u", num + 1);
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ mlxsw_hwmon_attr->type_index = type_index;
+ mlxsw_hwmon_attr->hwmon = mlxsw_hwmon;
+ mlxsw_hwmon_attr->dev_attr.attr.name = mlxsw_hwmon_attr->name;
+ sysfs_attr_init(&mlxsw_hwmon_attr->dev_attr.attr);
+
+ mlxsw_hwmon->attrs[attr_index] = &mlxsw_hwmon_attr->dev_attr.attr;
+ mlxsw_hwmon->attrs_count++;
+}
+
+static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+ char mtcap_pl[MLXSW_REG_MTCAP_LEN] = {0};
+ char mtmp_pl[MLXSW_REG_MTMP_LEN];
+ u8 sensor_count;
+ int i;
+ int err;
+
+ err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtcap), mtcap_pl);
+ if (err) {
+ dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get number of temp sensors\n");
+ return err;
+ }
+ sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl);
+ for (i = 0; i < sensor_count; i++) {
+ mlxsw_reg_mtmp_pack(mtmp_pl, i, true, true);
+ err = mlxsw_reg_write(mlxsw_hwmon->core,
+ MLXSW_REG(mtmp), mtmp_pl);
+ if (err) {
+ dev_err(mlxsw_hwmon->bus_info->dev, "Failed to setup temp sensor number %d\n",
+ i);
+ return err;
+ }
+ mlxsw_hwmon_attr_add(mlxsw_hwmon,
+ MLXSW_HWMON_ATTR_TYPE_TEMP, i, i);
+ mlxsw_hwmon_attr_add(mlxsw_hwmon,
+ MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, i, i);
+ mlxsw_hwmon_attr_add(mlxsw_hwmon,
+ MLXSW_HWMON_ATTR_TYPE_TEMP_RST, i, i);
+ }
+ return 0;
+}
+
+static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+ char mfcr_pl[MLXSW_REG_MFCR_LEN] = {0};
+ enum mlxsw_reg_mfcr_pwm_frequency freq;
+ unsigned int type_index;
+ unsigned int num;
+ u16 tacho_active;
+ u8 pwm_active;
+ int err;
+
+ err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfcr), mfcr_pl);
+ if (err) {
+ dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get to probe PWMs and Tachometers\n");
+ return err;
+ }
+ mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active);
+ num = 0;
+ for (type_index = 0; type_index < MLXSW_MFCR_TACHOS_MAX; type_index++) {
+ if (tacho_active & BIT(type_index))
+ mlxsw_hwmon_attr_add(mlxsw_hwmon,
+ MLXSW_HWMON_ATTR_TYPE_FAN_RPM,
+ type_index, num++);
+ }
+ num = 0;
+ for (type_index = 0; type_index < MLXSW_MFCR_PWMS_MAX; type_index++) {
+ if (pwm_active & BIT(type_index))
+ mlxsw_hwmon_attr_add(mlxsw_hwmon,
+ MLXSW_HWMON_ATTR_TYPE_PWM,
+ type_index, num++);
+ }
+ return 0;
+}
+
+int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_bus_info *mlxsw_bus_info,
+ struct mlxsw_hwmon **p_hwmon)
+{
+ struct mlxsw_hwmon *mlxsw_hwmon;
+ struct device *hwmon_dev;
+ int err;
+
+ mlxsw_hwmon = kzalloc(sizeof(*mlxsw_hwmon), GFP_KERNEL);
+ if (!mlxsw_hwmon)
+ return -ENOMEM;
+ mlxsw_hwmon->core = mlxsw_core;
+ mlxsw_hwmon->bus_info = mlxsw_bus_info;
+
+ err = mlxsw_hwmon_temp_init(mlxsw_hwmon);
+ if (err)
+ goto err_temp_init;
+
+ err = mlxsw_hwmon_fans_init(mlxsw_hwmon);
+ if (err)
+ goto err_fans_init;
+
+ mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group;
+ mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs;
+
+ hwmon_dev = hwmon_device_register_with_groups(mlxsw_bus_info->dev,
+ "mlxsw", mlxsw_hwmon,
+ mlxsw_hwmon->groups);
+ if (IS_ERR(hwmon_dev)) {
+ err = PTR_ERR(hwmon_dev);
+ goto err_hwmon_register;
+ }
+
+ mlxsw_hwmon->hwmon_dev = hwmon_dev;
+ *p_hwmon = mlxsw_hwmon;
+ return 0;
+
+err_hwmon_register:
+err_fans_init:
+err_temp_init:
+ kfree(mlxsw_hwmon);
+ return err;
+}
+
+void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+ hwmon_device_unregister(mlxsw_hwmon->hwmon_dev);
+ kfree(mlxsw_hwmon);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
new file mode 100644
index 000000000..6d29dc428
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved
+ * Copyright (c) 2016 Ivan Vecera <cera@cera.cz>
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/thermal.h>
+#include <linux/err.h>
+
+#include "core.h"
+
+#define MLXSW_THERMAL_POLL_INT 1000 /* ms */
+#define MLXSW_THERMAL_MAX_TEMP 110000 /* 110C */
+#define MLXSW_THERMAL_MAX_STATE 10
+#define MLXSW_THERMAL_MAX_DUTY 255
+
+struct mlxsw_thermal_trip {
+ int type;
+ int temp;
+ int min_state;
+ int max_state;
+};
+
+static const struct mlxsw_thermal_trip default_thermal_trips[] = {
+ { /* In range - 0-40% PWM */
+ .type = THERMAL_TRIP_ACTIVE,
+ .temp = 75000,
+ .min_state = 0,
+ .max_state = (4 * MLXSW_THERMAL_MAX_STATE) / 10,
+ },
+ { /* High - 40-100% PWM */
+ .type = THERMAL_TRIP_ACTIVE,
+ .temp = 80000,
+ .min_state = (4 * MLXSW_THERMAL_MAX_STATE) / 10,
+ .max_state = MLXSW_THERMAL_MAX_STATE,
+ },
+ {
+ /* Very high - 100% PWM */
+ .type = THERMAL_TRIP_ACTIVE,
+ .temp = 85000,
+ .min_state = MLXSW_THERMAL_MAX_STATE,
+ .max_state = MLXSW_THERMAL_MAX_STATE,
+ },
+ { /* Warning */
+ .type = THERMAL_TRIP_HOT,
+ .temp = 105000,
+ .min_state = MLXSW_THERMAL_MAX_STATE,
+ .max_state = MLXSW_THERMAL_MAX_STATE,
+ },
+ { /* Critical - soft poweroff */
+ .type = THERMAL_TRIP_CRITICAL,
+ .temp = MLXSW_THERMAL_MAX_TEMP,
+ .min_state = MLXSW_THERMAL_MAX_STATE,
+ .max_state = MLXSW_THERMAL_MAX_STATE,
+ }
+};
+
+#define MLXSW_THERMAL_NUM_TRIPS ARRAY_SIZE(default_thermal_trips)
+
+/* Make sure all trips are writable */
+#define MLXSW_THERMAL_TRIP_MASK (BIT(MLXSW_THERMAL_NUM_TRIPS) - 1)
+
+struct mlxsw_thermal {
+ struct mlxsw_core *core;
+ const struct mlxsw_bus_info *bus_info;
+ struct thermal_zone_device *tzdev;
+ struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX];
+ struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
+ enum thermal_device_mode mode;
+};
+
+static inline u8 mlxsw_state_to_duty(int state)
+{
+ return DIV_ROUND_CLOSEST(state * MLXSW_THERMAL_MAX_DUTY,
+ MLXSW_THERMAL_MAX_STATE);
+}
+
+static inline int mlxsw_duty_to_state(u8 duty)
+{
+ return DIV_ROUND_CLOSEST(duty * MLXSW_THERMAL_MAX_STATE,
+ MLXSW_THERMAL_MAX_DUTY);
+}
+
+static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
+ struct thermal_cooling_device *cdev)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
+ if (thermal->cdevs[i] == cdev)
+ return i;
+
+ return -ENODEV;
+}
+
+static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
+ struct thermal_cooling_device *cdev)
+{
+ struct mlxsw_thermal *thermal = tzdev->devdata;
+ struct device *dev = thermal->bus_info->dev;
+ int i, err;
+
+ /* If the cooling device is one of ours bind it */
+ if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
+ return 0;
+
+ for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
+ const struct mlxsw_thermal_trip *trip = &thermal->trips[i];
+
+ err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
+ trip->max_state,
+ trip->min_state,
+ THERMAL_WEIGHT_DEFAULT);
+ if (err < 0) {
+ dev_err(dev, "Failed to bind cooling device to trip %d\n", i);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static int mlxsw_thermal_unbind(struct thermal_zone_device *tzdev,
+ struct thermal_cooling_device *cdev)
+{
+ struct mlxsw_thermal *thermal = tzdev->devdata;
+ struct device *dev = thermal->bus_info->dev;
+ int i;
+ int err;
+
+ /* If the cooling device is our one unbind it */
+ if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
+ return 0;
+
+ for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
+ err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
+ if (err < 0) {
+ dev_err(dev, "Failed to unbind cooling device\n");
+ return err;
+ }
+ }
+ return 0;
+}
+
+static int mlxsw_thermal_get_mode(struct thermal_zone_device *tzdev,
+ enum thermal_device_mode *mode)
+{
+ struct mlxsw_thermal *thermal = tzdev->devdata;
+
+ *mode = thermal->mode;
+
+ return 0;
+}
+
+static int mlxsw_thermal_set_mode(struct thermal_zone_device *tzdev,
+ enum thermal_device_mode mode)
+{
+ struct mlxsw_thermal *thermal = tzdev->devdata;
+
+ mutex_lock(&tzdev->lock);
+
+ if (mode == THERMAL_DEVICE_ENABLED)
+ tzdev->polling_delay = MLXSW_THERMAL_POLL_INT;
+ else
+ tzdev->polling_delay = 0;
+
+ mutex_unlock(&tzdev->lock);
+
+ thermal->mode = mode;
+ thermal_zone_device_update(tzdev, THERMAL_EVENT_UNSPECIFIED);
+
+ return 0;
+}
+
+static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
+ int *p_temp)
+{
+ struct mlxsw_thermal *thermal = tzdev->devdata;
+ struct device *dev = thermal->bus_info->dev;
+ char mtmp_pl[MLXSW_REG_MTMP_LEN];
+ unsigned int temp;
+ int err;
+
+ mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false);
+
+ err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
+ if (err) {
+ dev_err(dev, "Failed to query temp sensor\n");
+ return err;
+ }
+ mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+
+ *p_temp = (int) temp;
+ return 0;
+}
+
+static int mlxsw_thermal_get_trip_type(struct thermal_zone_device *tzdev,
+ int trip,
+ enum thermal_trip_type *p_type)
+{
+ struct mlxsw_thermal *thermal = tzdev->devdata;
+
+ if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
+ return -EINVAL;
+
+ *p_type = thermal->trips[trip].type;
+ return 0;
+}
+
+static int mlxsw_thermal_get_trip_temp(struct thermal_zone_device *tzdev,
+ int trip, int *p_temp)
+{
+ struct mlxsw_thermal *thermal = tzdev->devdata;
+
+ if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
+ return -EINVAL;
+
+ *p_temp = thermal->trips[trip].temp;
+ return 0;
+}
+
+static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev,
+ int trip, int temp)
+{
+ struct mlxsw_thermal *thermal = tzdev->devdata;
+
+ if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
+ temp > MLXSW_THERMAL_MAX_TEMP)
+ return -EINVAL;
+
+ thermal->trips[trip].temp = temp;
+ return 0;
+}
+
+static struct thermal_zone_device_ops mlxsw_thermal_ops = {
+ .bind = mlxsw_thermal_bind,
+ .unbind = mlxsw_thermal_unbind,
+ .get_mode = mlxsw_thermal_get_mode,
+ .set_mode = mlxsw_thermal_set_mode,
+ .get_temp = mlxsw_thermal_get_temp,
+ .get_trip_type = mlxsw_thermal_get_trip_type,
+ .get_trip_temp = mlxsw_thermal_get_trip_temp,
+ .set_trip_temp = mlxsw_thermal_set_trip_temp,
+};
+
+static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev,
+ unsigned long *p_state)
+{
+ *p_state = MLXSW_THERMAL_MAX_STATE;
+ return 0;
+}
+
+static int mlxsw_thermal_get_cur_state(struct thermal_cooling_device *cdev,
+ unsigned long *p_state)
+
+{
+ struct mlxsw_thermal *thermal = cdev->devdata;
+ struct device *dev = thermal->bus_info->dev;
+ char mfsc_pl[MLXSW_REG_MFSC_LEN];
+ int err, idx;
+ u8 duty;
+
+ idx = mlxsw_get_cooling_device_idx(thermal, cdev);
+ if (idx < 0)
+ return idx;
+
+ mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
+ err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
+ if (err) {
+ dev_err(dev, "Failed to query PWM duty\n");
+ return err;
+ }
+
+ duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl);
+ *p_state = mlxsw_duty_to_state(duty);
+ return 0;
+}
+
+static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev,
+ unsigned long state)
+
+{
+ struct mlxsw_thermal *thermal = cdev->devdata;
+ struct device *dev = thermal->bus_info->dev;
+ char mfsc_pl[MLXSW_REG_MFSC_LEN];
+ int err, idx;
+
+ idx = mlxsw_get_cooling_device_idx(thermal, cdev);
+ if (idx < 0)
+ return idx;
+
+ mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state));
+ err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
+ if (err) {
+ dev_err(dev, "Failed to write PWM duty\n");
+ return err;
+ }
+ return 0;
+}
+
+static const struct thermal_cooling_device_ops mlxsw_cooling_ops = {
+ .get_max_state = mlxsw_thermal_get_max_state,
+ .get_cur_state = mlxsw_thermal_get_cur_state,
+ .set_cur_state = mlxsw_thermal_set_cur_state,
+};
+
+int mlxsw_thermal_init(struct mlxsw_core *core,
+ const struct mlxsw_bus_info *bus_info,
+ struct mlxsw_thermal **p_thermal)
+{
+ char mfcr_pl[MLXSW_REG_MFCR_LEN] = { 0 };
+ enum mlxsw_reg_mfcr_pwm_frequency freq;
+ struct device *dev = bus_info->dev;
+ struct mlxsw_thermal *thermal;
+ u16 tacho_active;
+ u8 pwm_active;
+ int err, i;
+
+ thermal = devm_kzalloc(dev, sizeof(*thermal),
+ GFP_KERNEL);
+ if (!thermal)
+ return -ENOMEM;
+
+ thermal->core = core;
+ thermal->bus_info = bus_info;
+ memcpy(thermal->trips, default_thermal_trips, sizeof(thermal->trips));
+
+ err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfcr), mfcr_pl);
+ if (err) {
+ dev_err(dev, "Failed to probe PWMs\n");
+ goto err_free_thermal;
+ }
+ mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active);
+
+ for (i = 0; i < MLXSW_MFCR_TACHOS_MAX; i++) {
+ if (tacho_active & BIT(i)) {
+ char mfsl_pl[MLXSW_REG_MFSL_LEN];
+
+ mlxsw_reg_mfsl_pack(mfsl_pl, i, 0, 0);
+
+ /* We need to query the register to preserve maximum */
+ err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsl),
+ mfsl_pl);
+ if (err)
+ goto err_free_thermal;
+
+ /* set the minimal RPMs to 0 */
+ mlxsw_reg_mfsl_tach_min_set(mfsl_pl, 0);
+ err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsl),
+ mfsl_pl);
+ if (err)
+ goto err_free_thermal;
+ }
+ }
+ for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
+ if (pwm_active & BIT(i)) {
+ struct thermal_cooling_device *cdev;
+
+ cdev = thermal_cooling_device_register("Fan", thermal,
+ &mlxsw_cooling_ops);
+ if (IS_ERR(cdev)) {
+ err = PTR_ERR(cdev);
+ dev_err(dev, "Failed to register cooling device\n");
+ goto err_unreg_cdevs;
+ }
+ thermal->cdevs[i] = cdev;
+ }
+ }
+
+ thermal->tzdev = thermal_zone_device_register("mlxsw",
+ MLXSW_THERMAL_NUM_TRIPS,
+ MLXSW_THERMAL_TRIP_MASK,
+ thermal,
+ &mlxsw_thermal_ops,
+ NULL, 0,
+ MLXSW_THERMAL_POLL_INT);
+ if (IS_ERR(thermal->tzdev)) {
+ err = PTR_ERR(thermal->tzdev);
+ dev_err(dev, "Failed to register thermal zone\n");
+ goto err_unreg_cdevs;
+ }
+
+ thermal->mode = THERMAL_DEVICE_ENABLED;
+ *p_thermal = thermal;
+ return 0;
+err_unreg_cdevs:
+ for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
+ if (thermal->cdevs[i])
+ thermal_cooling_device_unregister(thermal->cdevs[i]);
+err_free_thermal:
+ devm_kfree(dev, thermal);
+ return err;
+}
+
+void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
+{
+ int i;
+
+ if (thermal->tzdev) {
+ thermal_zone_device_unregister(thermal->tzdev);
+ thermal->tzdev = NULL;
+ }
+
+ for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
+ if (thermal->cdevs[i]) {
+ thermal_cooling_device_unregister(thermal->cdevs[i]);
+ thermal->cdevs[i] = NULL;
+ }
+ }
+
+ devm_kfree(thermal->bus_info->dev, thermal);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/emad.h b/drivers/net/ethernet/mellanox/mlxsw/emad.h
new file mode 100644
index 000000000..a33b896f4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/emad.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_EMAD_H
+#define _MLXSW_EMAD_H
+
+#define MLXSW_EMAD_MAX_FRAME_LEN 1518 /* Length in u8 */
+#define MLXSW_EMAD_MAX_RETRY 5
+
+/* EMAD Ethernet header */
+#define MLXSW_EMAD_ETH_HDR_LEN 0x10 /* Length in u8 */
+#define MLXSW_EMAD_EH_DMAC "\x01\x02\xc9\x00\x00\x01"
+#define MLXSW_EMAD_EH_SMAC "\x00\x02\xc9\x01\x02\x03"
+#define MLXSW_EMAD_EH_ETHERTYPE 0x8932
+#define MLXSW_EMAD_EH_MLX_PROTO 0
+#define MLXSW_EMAD_EH_PROTO_VERSION 0
+
+/* EMAD TLV Types */
+enum {
+ MLXSW_EMAD_TLV_TYPE_END,
+ MLXSW_EMAD_TLV_TYPE_OP,
+ MLXSW_EMAD_TLV_TYPE_DR,
+ MLXSW_EMAD_TLV_TYPE_REG,
+ MLXSW_EMAD_TLV_TYPE_USERDATA,
+ MLXSW_EMAD_TLV_TYPE_OOBETH,
+};
+
+/* OP TLV */
+#define MLXSW_EMAD_OP_TLV_LEN 4 /* Length in u32 */
+
+enum {
+ MLXSW_EMAD_OP_TLV_CLASS_REG_ACCESS = 1,
+ MLXSW_EMAD_OP_TLV_CLASS_IPC = 2,
+};
+
+enum mlxsw_emad_op_tlv_status {
+ MLXSW_EMAD_OP_TLV_STATUS_SUCCESS,
+ MLXSW_EMAD_OP_TLV_STATUS_BUSY,
+ MLXSW_EMAD_OP_TLV_STATUS_VERSION_NOT_SUPPORTED,
+ MLXSW_EMAD_OP_TLV_STATUS_UNKNOWN_TLV,
+ MLXSW_EMAD_OP_TLV_STATUS_REGISTER_NOT_SUPPORTED,
+ MLXSW_EMAD_OP_TLV_STATUS_CLASS_NOT_SUPPORTED,
+ MLXSW_EMAD_OP_TLV_STATUS_METHOD_NOT_SUPPORTED,
+ MLXSW_EMAD_OP_TLV_STATUS_BAD_PARAMETER,
+ MLXSW_EMAD_OP_TLV_STATUS_RESOURCE_NOT_AVAILABLE,
+ MLXSW_EMAD_OP_TLV_STATUS_MESSAGE_RECEIPT_ACK,
+ MLXSW_EMAD_OP_TLV_STATUS_INTERNAL_ERROR = 0x70,
+};
+
+static inline char *mlxsw_emad_op_tlv_status_str(u8 status)
+{
+ switch (status) {
+ case MLXSW_EMAD_OP_TLV_STATUS_SUCCESS:
+ return "operation performed";
+ case MLXSW_EMAD_OP_TLV_STATUS_BUSY:
+ return "device is busy";
+ case MLXSW_EMAD_OP_TLV_STATUS_VERSION_NOT_SUPPORTED:
+ return "version not supported";
+ case MLXSW_EMAD_OP_TLV_STATUS_UNKNOWN_TLV:
+ return "unknown TLV";
+ case MLXSW_EMAD_OP_TLV_STATUS_REGISTER_NOT_SUPPORTED:
+ return "register not supported";
+ case MLXSW_EMAD_OP_TLV_STATUS_CLASS_NOT_SUPPORTED:
+ return "class not supported";
+ case MLXSW_EMAD_OP_TLV_STATUS_METHOD_NOT_SUPPORTED:
+ return "method not supported";
+ case MLXSW_EMAD_OP_TLV_STATUS_BAD_PARAMETER:
+ return "bad parameter";
+ case MLXSW_EMAD_OP_TLV_STATUS_RESOURCE_NOT_AVAILABLE:
+ return "resource not available";
+ case MLXSW_EMAD_OP_TLV_STATUS_MESSAGE_RECEIPT_ACK:
+ return "acknowledged. retransmit";
+ case MLXSW_EMAD_OP_TLV_STATUS_INTERNAL_ERROR:
+ return "internal error";
+ default:
+ return "*UNKNOWN*";
+ }
+}
+
+enum {
+ MLXSW_EMAD_OP_TLV_REQUEST,
+ MLXSW_EMAD_OP_TLV_RESPONSE
+};
+
+enum {
+ MLXSW_EMAD_OP_TLV_METHOD_QUERY = 1,
+ MLXSW_EMAD_OP_TLV_METHOD_WRITE = 2,
+ MLXSW_EMAD_OP_TLV_METHOD_SEND = 3,
+ MLXSW_EMAD_OP_TLV_METHOD_EVENT = 5,
+};
+
+/* END TLV */
+#define MLXSW_EMAD_END_TLV_LEN 1 /* Length in u32 */
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
new file mode 100644
index 000000000..1d1025fd2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
@@ -0,0 +1,551 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/slab.h>
+
+#include "cmd.h"
+#include "core.h"
+#include "i2c.h"
+
+#define MLXSW_I2C_CIR2_BASE 0x72000
+#define MLXSW_I2C_CIR_STATUS_OFF 0x18
+#define MLXSW_I2C_CIR2_OFF_STATUS (MLXSW_I2C_CIR2_BASE + \
+ MLXSW_I2C_CIR_STATUS_OFF)
+#define MLXSW_I2C_OPMOD_SHIFT 12
+#define MLXSW_I2C_GO_BIT_SHIFT 23
+#define MLXSW_I2C_CIR_CTRL_STATUS_SHIFT 24
+#define MLXSW_I2C_GO_BIT BIT(MLXSW_I2C_GO_BIT_SHIFT)
+#define MLXSW_I2C_GO_OPMODE BIT(MLXSW_I2C_OPMOD_SHIFT)
+#define MLXSW_I2C_SET_IMM_CMD (MLXSW_I2C_GO_OPMODE | \
+ MLXSW_CMD_OPCODE_QUERY_FW)
+#define MLXSW_I2C_PUSH_IMM_CMD (MLXSW_I2C_GO_BIT | \
+ MLXSW_I2C_SET_IMM_CMD)
+#define MLXSW_I2C_SET_CMD (MLXSW_CMD_OPCODE_ACCESS_REG)
+#define MLXSW_I2C_PUSH_CMD (MLXSW_I2C_GO_BIT | MLXSW_I2C_SET_CMD)
+#define MLXSW_I2C_TLV_HDR_SIZE 0x10
+#define MLXSW_I2C_ADDR_WIDTH 4
+#define MLXSW_I2C_PUSH_CMD_SIZE (MLXSW_I2C_ADDR_WIDTH + 4)
+#define MLXSW_I2C_READ_SEMA_SIZE 4
+#define MLXSW_I2C_PREP_SIZE (MLXSW_I2C_ADDR_WIDTH + 28)
+#define MLXSW_I2C_MBOX_SIZE 20
+#define MLXSW_I2C_MBOX_OUT_PARAM_OFF 12
+#define MLXSW_I2C_MAX_BUFF_SIZE 32
+#define MLXSW_I2C_MBOX_OFFSET_BITS 20
+#define MLXSW_I2C_MBOX_SIZE_BITS 12
+#define MLXSW_I2C_ADDR_BUF_SIZE 4
+#define MLXSW_I2C_BLK_MAX 32
+#define MLXSW_I2C_RETRY 5
+#define MLXSW_I2C_TIMEOUT_MSECS 5000
+
+/**
+ * struct mlxsw_i2c - device private data:
+ * @cmd.mb_size_in: input mailbox size;
+ * @cmd.mb_off_in: input mailbox offset in register space;
+ * @cmd.mb_size_out: output mailbox size;
+ * @cmd.mb_off_out: output mailbox offset in register space;
+ * @cmd.lock: command execution lock;
+ * @dev: I2C device;
+ * @core: switch core pointer;
+ * @bus_info: bus info block;
+ */
+struct mlxsw_i2c {
+ struct {
+ u32 mb_size_in;
+ u32 mb_off_in;
+ u32 mb_size_out;
+ u32 mb_off_out;
+ struct mutex lock;
+ } cmd;
+ struct device *dev;
+ struct mlxsw_core *core;
+ struct mlxsw_bus_info bus_info;
+};
+
+#define MLXSW_I2C_READ_MSG(_client, _addr_buf, _buf, _len) { \
+ { .addr = (_client)->addr, \
+ .buf = (_addr_buf), \
+ .len = MLXSW_I2C_ADDR_BUF_SIZE, \
+ .flags = 0 }, \
+ { .addr = (_client)->addr, \
+ .buf = (_buf), \
+ .len = (_len), \
+ .flags = I2C_M_RD } }
+
+#define MLXSW_I2C_WRITE_MSG(_client, _buf, _len) \
+ { .addr = (_client)->addr, \
+ .buf = (u8 *)(_buf), \
+ .len = (_len), \
+ .flags = 0 }
+
+/* Routine converts in and out mail boxes offset and size. */
+static inline void
+mlxsw_i2c_convert_mbox(struct mlxsw_i2c *mlxsw_i2c, u8 *buf)
+{
+ u32 tmp;
+
+ /* Local in/out mailboxes: 20 bits for offset, 12 for size */
+ tmp = be32_to_cpup((__be32 *) buf);
+ mlxsw_i2c->cmd.mb_off_in = tmp &
+ GENMASK(MLXSW_I2C_MBOX_OFFSET_BITS - 1, 0);
+ mlxsw_i2c->cmd.mb_size_in = (tmp & GENMASK(31,
+ MLXSW_I2C_MBOX_OFFSET_BITS)) >>
+ MLXSW_I2C_MBOX_OFFSET_BITS;
+
+ tmp = be32_to_cpup((__be32 *) (buf + MLXSW_I2C_ADDR_WIDTH));
+ mlxsw_i2c->cmd.mb_off_out = tmp &
+ GENMASK(MLXSW_I2C_MBOX_OFFSET_BITS - 1, 0);
+ mlxsw_i2c->cmd.mb_size_out = (tmp & GENMASK(31,
+ MLXSW_I2C_MBOX_OFFSET_BITS)) >>
+ MLXSW_I2C_MBOX_OFFSET_BITS;
+}
+
+/* Routine obtains register size from mail box buffer. */
+static inline int mlxsw_i2c_get_reg_size(u8 *in_mbox)
+{
+ u16 tmp = be16_to_cpup((__be16 *) (in_mbox + MLXSW_I2C_TLV_HDR_SIZE));
+
+ return (tmp & 0x7ff) * 4 + MLXSW_I2C_TLV_HDR_SIZE;
+}
+
+/* Routine sets I2C device internal offset in the transaction buffer. */
+static inline void mlxsw_i2c_set_slave_addr(u8 *buf, u32 off)
+{
+ __be32 *val = (__be32 *) buf;
+
+ *val = htonl(off);
+}
+
+/* Routine waits until go bit is cleared. */
+static int mlxsw_i2c_wait_go_bit(struct i2c_client *client,
+ struct mlxsw_i2c *mlxsw_i2c, u8 *p_status)
+{
+ u8 addr_buf[MLXSW_I2C_ADDR_BUF_SIZE];
+ u8 buf[MLXSW_I2C_READ_SEMA_SIZE];
+ int len = MLXSW_I2C_READ_SEMA_SIZE;
+ struct i2c_msg read_sema[] =
+ MLXSW_I2C_READ_MSG(client, addr_buf, buf, len);
+ bool wait_done = false;
+ unsigned long end;
+ int i = 0, err;
+
+ mlxsw_i2c_set_slave_addr(addr_buf, MLXSW_I2C_CIR2_OFF_STATUS);
+
+ end = jiffies + msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS);
+ do {
+ u32 ctrl;
+
+ err = i2c_transfer(client->adapter, read_sema,
+ ARRAY_SIZE(read_sema));
+
+ ctrl = be32_to_cpu(*(__be32 *) buf);
+ if (err == ARRAY_SIZE(read_sema)) {
+ if (!(ctrl & MLXSW_I2C_GO_BIT)) {
+ wait_done = true;
+ *p_status = ctrl >>
+ MLXSW_I2C_CIR_CTRL_STATUS_SHIFT;
+ break;
+ }
+ }
+ cond_resched();
+ } while ((time_before(jiffies, end)) || (i++ < MLXSW_I2C_RETRY));
+
+ if (wait_done) {
+ if (*p_status)
+ err = -EIO;
+ } else {
+ return -ETIMEDOUT;
+ }
+
+ return err > 0 ? 0 : err;
+}
+
+/* Routine posts a command to ASIC though mail box. */
+static int mlxsw_i2c_write_cmd(struct i2c_client *client,
+ struct mlxsw_i2c *mlxsw_i2c,
+ int immediate)
+{
+ __be32 push_cmd_buf[MLXSW_I2C_PUSH_CMD_SIZE / 4] = {
+ 0, cpu_to_be32(MLXSW_I2C_PUSH_IMM_CMD)
+ };
+ __be32 prep_cmd_buf[MLXSW_I2C_PREP_SIZE / 4] = {
+ 0, 0, 0, 0, 0, 0,
+ cpu_to_be32(client->adapter->nr & 0xffff),
+ cpu_to_be32(MLXSW_I2C_SET_IMM_CMD)
+ };
+ struct i2c_msg push_cmd =
+ MLXSW_I2C_WRITE_MSG(client, push_cmd_buf,
+ MLXSW_I2C_PUSH_CMD_SIZE);
+ struct i2c_msg prep_cmd =
+ MLXSW_I2C_WRITE_MSG(client, prep_cmd_buf, MLXSW_I2C_PREP_SIZE);
+ int err;
+
+ if (!immediate) {
+ push_cmd_buf[1] = cpu_to_be32(MLXSW_I2C_PUSH_CMD);
+ prep_cmd_buf[7] = cpu_to_be32(MLXSW_I2C_SET_CMD);
+ }
+ mlxsw_i2c_set_slave_addr((u8 *)prep_cmd_buf,
+ MLXSW_I2C_CIR2_BASE);
+ mlxsw_i2c_set_slave_addr((u8 *)push_cmd_buf,
+ MLXSW_I2C_CIR2_OFF_STATUS);
+
+ /* Prepare Command Interface Register for transaction */
+ err = i2c_transfer(client->adapter, &prep_cmd, 1);
+ if (err < 0)
+ return err;
+ else if (err != 1)
+ return -EIO;
+
+ /* Write out Command Interface Register GO bit to push transaction */
+ err = i2c_transfer(client->adapter, &push_cmd, 1);
+ if (err < 0)
+ return err;
+ else if (err != 1)
+ return -EIO;
+
+ return 0;
+}
+
+/* Routine obtains mail box offsets from ASIC register space. */
+static int mlxsw_i2c_get_mbox(struct i2c_client *client,
+ struct mlxsw_i2c *mlxsw_i2c)
+{
+ u8 addr_buf[MLXSW_I2C_ADDR_BUF_SIZE];
+ u8 buf[MLXSW_I2C_MBOX_SIZE];
+ struct i2c_msg mbox_cmd[] =
+ MLXSW_I2C_READ_MSG(client, addr_buf, buf, MLXSW_I2C_MBOX_SIZE);
+ int err;
+
+ /* Read mail boxes offsets. */
+ mlxsw_i2c_set_slave_addr(addr_buf, MLXSW_I2C_CIR2_BASE);
+ err = i2c_transfer(client->adapter, mbox_cmd, 2);
+ if (err != 2) {
+ dev_err(&client->dev, "Could not obtain mail boxes\n");
+ if (!err)
+ return -EIO;
+ else
+ return err;
+ }
+
+ /* Convert mail boxes. */
+ mlxsw_i2c_convert_mbox(mlxsw_i2c, &buf[MLXSW_I2C_MBOX_OUT_PARAM_OFF]);
+
+ return err;
+}
+
+/* Routine sends I2C write transaction to ASIC device. */
+static int
+mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num,
+ u8 *p_status)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
+ unsigned long timeout = msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS);
+ u8 tran_buf[MLXSW_I2C_MAX_BUFF_SIZE + MLXSW_I2C_ADDR_BUF_SIZE];
+ int off = mlxsw_i2c->cmd.mb_off_in, chunk_size, i, j;
+ unsigned long end;
+ struct i2c_msg write_tran =
+ MLXSW_I2C_WRITE_MSG(client, tran_buf, MLXSW_I2C_PUSH_CMD_SIZE);
+ int err;
+
+ for (i = 0; i < num; i++) {
+ chunk_size = (in_mbox_size > MLXSW_I2C_BLK_MAX) ?
+ MLXSW_I2C_BLK_MAX : in_mbox_size;
+ write_tran.len = MLXSW_I2C_ADDR_WIDTH + chunk_size;
+ mlxsw_i2c_set_slave_addr(tran_buf, off);
+ memcpy(&tran_buf[MLXSW_I2C_ADDR_BUF_SIZE], in_mbox +
+ MLXSW_I2C_BLK_MAX * i, chunk_size);
+
+ j = 0;
+ end = jiffies + timeout;
+ do {
+ err = i2c_transfer(client->adapter, &write_tran, 1);
+ if (err == 1)
+ break;
+
+ cond_resched();
+ } while ((time_before(jiffies, end)) ||
+ (j++ < MLXSW_I2C_RETRY));
+
+ if (err != 1) {
+ if (!err)
+ err = -EIO;
+ return err;
+ }
+
+ off += chunk_size;
+ in_mbox_size -= chunk_size;
+ }
+
+ /* Prepare and write out Command Interface Register for transaction. */
+ err = mlxsw_i2c_write_cmd(client, mlxsw_i2c, 0);
+ if (err) {
+ dev_err(&client->dev, "Could not start transaction");
+ return -EIO;
+ }
+
+ /* Wait until go bit is cleared. */
+ err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, p_status);
+ if (err) {
+ dev_err(&client->dev, "HW semaphore is not released");
+ return err;
+ }
+
+ /* Validate transaction completion status. */
+ if (*p_status) {
+ dev_err(&client->dev, "Bad transaction completion status %x\n",
+ *p_status);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/* Routine executes I2C command. */
+static int
+mlxsw_i2c_cmd(struct device *dev, size_t in_mbox_size, u8 *in_mbox,
+ size_t out_mbox_size, u8 *out_mbox, u8 *status)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
+ unsigned long timeout = msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS);
+ u8 tran_buf[MLXSW_I2C_ADDR_BUF_SIZE];
+ int num, chunk_size, reg_size, i, j;
+ int off = mlxsw_i2c->cmd.mb_off_out;
+ unsigned long end;
+ struct i2c_msg read_tran[] =
+ MLXSW_I2C_READ_MSG(client, tran_buf, NULL, 0);
+ int err;
+
+ WARN_ON(in_mbox_size % sizeof(u32) || out_mbox_size % sizeof(u32));
+
+ reg_size = mlxsw_i2c_get_reg_size(in_mbox);
+ num = reg_size / MLXSW_I2C_BLK_MAX;
+ if (reg_size % MLXSW_I2C_BLK_MAX)
+ num++;
+
+ if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) {
+ dev_err(&client->dev, "Could not acquire lock");
+ return -EINVAL;
+ }
+
+ err = mlxsw_i2c_write(dev, reg_size, in_mbox, num, status);
+ if (err)
+ goto cmd_fail;
+
+ /* No out mailbox is case of write transaction. */
+ if (!out_mbox) {
+ mutex_unlock(&mlxsw_i2c->cmd.lock);
+ return 0;
+ }
+
+ /* Send read transaction to get output mailbox content. */
+ read_tran[1].buf = out_mbox;
+ for (i = 0; i < num; i++) {
+ chunk_size = (reg_size > MLXSW_I2C_BLK_MAX) ?
+ MLXSW_I2C_BLK_MAX : reg_size;
+ read_tran[1].len = chunk_size;
+ mlxsw_i2c_set_slave_addr(tran_buf, off);
+
+ j = 0;
+ end = jiffies + timeout;
+ do {
+ err = i2c_transfer(client->adapter, read_tran,
+ ARRAY_SIZE(read_tran));
+ if (err == ARRAY_SIZE(read_tran))
+ break;
+
+ cond_resched();
+ } while ((time_before(jiffies, end)) ||
+ (j++ < MLXSW_I2C_RETRY));
+
+ if (err != ARRAY_SIZE(read_tran)) {
+ if (!err)
+ err = -EIO;
+
+ goto cmd_fail;
+ }
+
+ off += chunk_size;
+ reg_size -= chunk_size;
+ read_tran[1].buf += chunk_size;
+ }
+
+ mutex_unlock(&mlxsw_i2c->cmd.lock);
+
+ return 0;
+
+cmd_fail:
+ mutex_unlock(&mlxsw_i2c->cmd.lock);
+ return err;
+}
+
+static int mlxsw_i2c_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod,
+ u32 in_mod, bool out_mbox_direct,
+ char *in_mbox, size_t in_mbox_size,
+ char *out_mbox, size_t out_mbox_size,
+ u8 *status)
+{
+ struct mlxsw_i2c *mlxsw_i2c = bus_priv;
+
+ return mlxsw_i2c_cmd(mlxsw_i2c->dev, in_mbox_size, in_mbox,
+ out_mbox_size, out_mbox, status);
+}
+
+static bool mlxsw_i2c_skb_transmit_busy(void *bus_priv,
+ const struct mlxsw_tx_info *tx_info)
+{
+ return false;
+}
+
+static int mlxsw_i2c_skb_transmit(void *bus_priv, struct sk_buff *skb,
+ const struct mlxsw_tx_info *tx_info)
+{
+ return 0;
+}
+
+static int
+mlxsw_i2c_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_config_profile *profile,
+ struct mlxsw_res *resources)
+{
+ struct mlxsw_i2c *mlxsw_i2c = bus_priv;
+
+ mlxsw_i2c->core = mlxsw_core;
+
+ return 0;
+}
+
+static void mlxsw_i2c_fini(void *bus_priv)
+{
+ struct mlxsw_i2c *mlxsw_i2c = bus_priv;
+
+ mlxsw_i2c->core = NULL;
+}
+
+static const struct mlxsw_bus mlxsw_i2c_bus = {
+ .kind = "i2c",
+ .init = mlxsw_i2c_init,
+ .fini = mlxsw_i2c_fini,
+ .skb_transmit_busy = mlxsw_i2c_skb_transmit_busy,
+ .skb_transmit = mlxsw_i2c_skb_transmit,
+ .cmd_exec = mlxsw_i2c_cmd_exec,
+};
+
+static int mlxsw_i2c_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct mlxsw_i2c *mlxsw_i2c;
+ u8 status;
+ int err;
+
+ mlxsw_i2c = devm_kzalloc(&client->dev, sizeof(*mlxsw_i2c), GFP_KERNEL);
+ if (!mlxsw_i2c)
+ return -ENOMEM;
+
+ i2c_set_clientdata(client, mlxsw_i2c);
+ mutex_init(&mlxsw_i2c->cmd.lock);
+
+ /* In order to use mailboxes through the i2c, special area is reserved
+ * on the i2c address space that can be used for input and output
+ * mailboxes. Such mailboxes are called local mailboxes. When using a
+ * local mailbox, software should specify 0 as the Input/Output
+ * parameters. The location of the Local Mailbox addresses on the i2c
+ * space can be retrieved through the QUERY_FW command.
+ * For this purpose QUERY_FW is to be issued with opcode modifier equal
+ * 0x01. For such command the output parameter is an immediate value.
+ * Here QUERY_FW command is invoked for ASIC probing and for getting
+ * local mailboxes addresses from immedate output parameters.
+ */
+
+ /* Prepare and write out Command Interface Register for transaction */
+ err = mlxsw_i2c_write_cmd(client, mlxsw_i2c, 1);
+ if (err) {
+ dev_err(&client->dev, "Could not start transaction");
+ goto errout;
+ }
+
+ /* Wait until go bit is cleared. */
+ err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, &status);
+ if (err) {
+ dev_err(&client->dev, "HW semaphore is not released");
+ goto errout;
+ }
+
+ /* Validate transaction completion status. */
+ if (status) {
+ dev_err(&client->dev, "Bad transaction completion status %x\n",
+ status);
+ err = -EIO;
+ goto errout;
+ }
+
+ /* Get mailbox offsets. */
+ err = mlxsw_i2c_get_mbox(client, mlxsw_i2c);
+ if (err < 0) {
+ dev_err(&client->dev, "Fail to get mailboxes\n");
+ goto errout;
+ }
+
+ dev_info(&client->dev, "%s mb size=%x off=0x%08x out mb size=%x off=0x%08x\n",
+ id->name, mlxsw_i2c->cmd.mb_size_in,
+ mlxsw_i2c->cmd.mb_off_in, mlxsw_i2c->cmd.mb_size_out,
+ mlxsw_i2c->cmd.mb_off_out);
+
+ /* Register device bus. */
+ mlxsw_i2c->bus_info.device_kind = id->name;
+ mlxsw_i2c->bus_info.device_name = client->name;
+ mlxsw_i2c->bus_info.dev = &client->dev;
+ mlxsw_i2c->dev = &client->dev;
+
+ err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info,
+ &mlxsw_i2c_bus, mlxsw_i2c, false,
+ NULL);
+ if (err) {
+ dev_err(&client->dev, "Fail to register core bus\n");
+ return err;
+ }
+
+ return 0;
+
+errout:
+ mutex_destroy(&mlxsw_i2c->cmd.lock);
+ i2c_set_clientdata(client, NULL);
+
+ return err;
+}
+
+static int mlxsw_i2c_remove(struct i2c_client *client)
+{
+ struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
+
+ mlxsw_core_bus_device_unregister(mlxsw_i2c->core, false);
+ mutex_destroy(&mlxsw_i2c->cmd.lock);
+
+ return 0;
+}
+
+int mlxsw_i2c_driver_register(struct i2c_driver *i2c_driver)
+{
+ i2c_driver->probe = mlxsw_i2c_probe;
+ i2c_driver->remove = mlxsw_i2c_remove;
+ return i2c_add_driver(i2c_driver);
+}
+EXPORT_SYMBOL(mlxsw_i2c_driver_register);
+
+void mlxsw_i2c_driver_unregister(struct i2c_driver *i2c_driver)
+{
+ i2c_del_driver(i2c_driver);
+}
+EXPORT_SYMBOL(mlxsw_i2c_driver_unregister);
+
+MODULE_AUTHOR("Vadim Pasternak <vadimp@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox switch I2C interface driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.h b/drivers/net/ethernet/mellanox/mlxsw/i2c.h
new file mode 100644
index 000000000..17e059d47
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_I2C_H
+#define _MLXSW_I2C_H
+
+#include <linux/i2c.h>
+
+#if IS_ENABLED(CONFIG_MLXSW_I2C)
+
+int mlxsw_i2c_driver_register(struct i2c_driver *i2c_driver);
+void mlxsw_i2c_driver_unregister(struct i2c_driver *i2c_driver);
+
+#else
+
+static inline int
+mlxsw_i2c_driver_register(struct i2c_driver *i2c_driver)
+{
+ return -ENODEV;
+}
+
+static inline void
+mlxsw_i2c_driver_unregister(struct i2c_driver *i2c_driver)
+{
+}
+
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/ib.h b/drivers/net/ethernet/mellanox/mlxsw/ib.h
new file mode 100644
index 000000000..2d0cb0f5e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/ib.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_IB_H
+#define _MLXSW_IB_H
+
+#define MLXSW_IB_DEFAULT_MTU 4096
+
+#endif /* _MLXSW_IB_H */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/item.h b/drivers/net/ethernet/mellanox/mlxsw/item.h
new file mode 100644
index 000000000..e92cadc98
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/item.h
@@ -0,0 +1,509 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_ITEM_H
+#define _MLXSW_ITEM_H
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/bitops.h>
+
+struct mlxsw_item {
+ unsigned short offset; /* bytes in container */
+ short step; /* step in bytes for indexed items */
+ unsigned short in_step_offset; /* offset within one step */
+ unsigned char shift; /* shift in bits */
+ unsigned char element_size; /* size of element in bit array */
+ bool no_real_shift;
+ union {
+ unsigned char bits;
+ unsigned short bytes;
+ } size;
+ const char *name;
+};
+
+static inline unsigned int
+__mlxsw_item_offset(const struct mlxsw_item *item, unsigned short index,
+ size_t typesize)
+{
+ BUG_ON(index && !item->step);
+ if (item->offset % typesize != 0 ||
+ item->step % typesize != 0 ||
+ item->in_step_offset % typesize != 0) {
+ pr_err("mlxsw: item bug (name=%s,offset=%x,step=%x,in_step_offset=%x,typesize=%zx)\n",
+ item->name, item->offset, item->step,
+ item->in_step_offset, typesize);
+ BUG();
+ }
+
+ return ((item->offset + item->step * index + item->in_step_offset) /
+ typesize);
+}
+
+static inline u8 __mlxsw_item_get8(const char *buf,
+ const struct mlxsw_item *item,
+ unsigned short index)
+{
+ unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u8));
+ u8 *b = (u8 *) buf;
+ u8 tmp;
+
+ tmp = b[offset];
+ tmp >>= item->shift;
+ tmp &= GENMASK(item->size.bits - 1, 0);
+ if (item->no_real_shift)
+ tmp <<= item->shift;
+ return tmp;
+}
+
+static inline void __mlxsw_item_set8(char *buf, const struct mlxsw_item *item,
+ unsigned short index, u8 val)
+{
+ unsigned int offset = __mlxsw_item_offset(item, index,
+ sizeof(u8));
+ u8 *b = (u8 *) buf;
+ u8 mask = GENMASK(item->size.bits - 1, 0) << item->shift;
+ u8 tmp;
+
+ if (!item->no_real_shift)
+ val <<= item->shift;
+ val &= mask;
+ tmp = b[offset];
+ tmp &= ~mask;
+ tmp |= val;
+ b[offset] = tmp;
+}
+
+static inline u16 __mlxsw_item_get16(const char *buf,
+ const struct mlxsw_item *item,
+ unsigned short index)
+{
+ unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u16));
+ __be16 *b = (__be16 *) buf;
+ u16 tmp;
+
+ tmp = be16_to_cpu(b[offset]);
+ tmp >>= item->shift;
+ tmp &= GENMASK(item->size.bits - 1, 0);
+ if (item->no_real_shift)
+ tmp <<= item->shift;
+ return tmp;
+}
+
+static inline void __mlxsw_item_set16(char *buf, const struct mlxsw_item *item,
+ unsigned short index, u16 val)
+{
+ unsigned int offset = __mlxsw_item_offset(item, index,
+ sizeof(u16));
+ __be16 *b = (__be16 *) buf;
+ u16 mask = GENMASK(item->size.bits - 1, 0) << item->shift;
+ u16 tmp;
+
+ if (!item->no_real_shift)
+ val <<= item->shift;
+ val &= mask;
+ tmp = be16_to_cpu(b[offset]);
+ tmp &= ~mask;
+ tmp |= val;
+ b[offset] = cpu_to_be16(tmp);
+}
+
+static inline u32 __mlxsw_item_get32(const char *buf,
+ const struct mlxsw_item *item,
+ unsigned short index)
+{
+ unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u32));
+ __be32 *b = (__be32 *) buf;
+ u32 tmp;
+
+ tmp = be32_to_cpu(b[offset]);
+ tmp >>= item->shift;
+ tmp &= GENMASK(item->size.bits - 1, 0);
+ if (item->no_real_shift)
+ tmp <<= item->shift;
+ return tmp;
+}
+
+static inline void __mlxsw_item_set32(char *buf, const struct mlxsw_item *item,
+ unsigned short index, u32 val)
+{
+ unsigned int offset = __mlxsw_item_offset(item, index,
+ sizeof(u32));
+ __be32 *b = (__be32 *) buf;
+ u32 mask = GENMASK(item->size.bits - 1, 0) << item->shift;
+ u32 tmp;
+
+ if (!item->no_real_shift)
+ val <<= item->shift;
+ val &= mask;
+ tmp = be32_to_cpu(b[offset]);
+ tmp &= ~mask;
+ tmp |= val;
+ b[offset] = cpu_to_be32(tmp);
+}
+
+static inline u64 __mlxsw_item_get64(const char *buf,
+ const struct mlxsw_item *item,
+ unsigned short index)
+{
+ unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u64));
+ __be64 *b = (__be64 *) buf;
+ u64 tmp;
+
+ tmp = be64_to_cpu(b[offset]);
+ tmp >>= item->shift;
+ tmp &= GENMASK_ULL(item->size.bits - 1, 0);
+ if (item->no_real_shift)
+ tmp <<= item->shift;
+ return tmp;
+}
+
+static inline void __mlxsw_item_set64(char *buf, const struct mlxsw_item *item,
+ unsigned short index, u64 val)
+{
+ unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u64));
+ __be64 *b = (__be64 *) buf;
+ u64 mask = GENMASK_ULL(item->size.bits - 1, 0) << item->shift;
+ u64 tmp;
+
+ if (!item->no_real_shift)
+ val <<= item->shift;
+ val &= mask;
+ tmp = be64_to_cpu(b[offset]);
+ tmp &= ~mask;
+ tmp |= val;
+ b[offset] = cpu_to_be64(tmp);
+}
+
+static inline void __mlxsw_item_memcpy_from(const char *buf, char *dst,
+ const struct mlxsw_item *item,
+ unsigned short index)
+{
+ unsigned int offset = __mlxsw_item_offset(item, index, sizeof(char));
+
+ memcpy(dst, &buf[offset], item->size.bytes);
+}
+
+static inline void __mlxsw_item_memcpy_to(char *buf, const char *src,
+ const struct mlxsw_item *item,
+ unsigned short index)
+{
+ unsigned int offset = __mlxsw_item_offset(item, index, sizeof(char));
+
+ memcpy(&buf[offset], src, item->size.bytes);
+}
+
+static inline char *__mlxsw_item_data(char *buf, const struct mlxsw_item *item,
+ unsigned short index)
+{
+ unsigned int offset = __mlxsw_item_offset(item, index, sizeof(char));
+
+ return &buf[offset];
+}
+
+static inline u16
+__mlxsw_item_bit_array_offset(const struct mlxsw_item *item,
+ u16 index, u8 *shift)
+{
+ u16 max_index, be_index;
+ u16 offset; /* byte offset inside the array */
+ u8 in_byte_index;
+
+ BUG_ON(index && !item->element_size);
+ if (item->offset % sizeof(u32) != 0 ||
+ BITS_PER_BYTE % item->element_size != 0) {
+ pr_err("mlxsw: item bug (name=%s,offset=%x,element_size=%x)\n",
+ item->name, item->offset, item->element_size);
+ BUG();
+ }
+
+ max_index = (item->size.bytes << 3) / item->element_size - 1;
+ be_index = max_index - index;
+ offset = be_index * item->element_size >> 3;
+ in_byte_index = index % (BITS_PER_BYTE / item->element_size);
+ *shift = in_byte_index * item->element_size;
+
+ return item->offset + offset;
+}
+
+static inline u8 __mlxsw_item_bit_array_get(const char *buf,
+ const struct mlxsw_item *item,
+ u16 index)
+{
+ u8 shift, tmp;
+ u16 offset = __mlxsw_item_bit_array_offset(item, index, &shift);
+
+ tmp = buf[offset];
+ tmp >>= shift;
+ tmp &= GENMASK(item->element_size - 1, 0);
+ return tmp;
+}
+
+static inline void __mlxsw_item_bit_array_set(char *buf,
+ const struct mlxsw_item *item,
+ u16 index, u8 val)
+{
+ u8 shift, tmp;
+ u16 offset = __mlxsw_item_bit_array_offset(item, index, &shift);
+ u8 mask = GENMASK(item->element_size - 1, 0) << shift;
+
+ val <<= shift;
+ val &= mask;
+ tmp = buf[offset];
+ tmp &= ~mask;
+ tmp |= val;
+ buf[offset] = tmp;
+}
+
+#define __ITEM_NAME(_type, _cname, _iname) \
+ mlxsw_##_type##_##_cname##_##_iname##_item
+
+/* _type: cmd_mbox, reg, etc.
+ * _cname: containter name (e.g. command name, register name)
+ * _iname: item name within the container
+ */
+
+#define MLXSW_ITEM8(_type, _cname, _iname, _offset, _shift, _sizebits) \
+static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \
+ .offset = _offset, \
+ .shift = _shift, \
+ .size = {.bits = _sizebits,}, \
+ .name = #_type "_" #_cname "_" #_iname, \
+}; \
+static inline u8 mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \
+{ \
+ return __mlxsw_item_get8(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \
+} \
+static inline void mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u8 val)\
+{ \
+ __mlxsw_item_set8(buf, &__ITEM_NAME(_type, _cname, _iname), 0, val); \
+}
+
+#define MLXSW_ITEM8_INDEXED(_type, _cname, _iname, _offset, _shift, _sizebits, \
+ _step, _instepoffset, _norealshift) \
+static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \
+ .offset = _offset, \
+ .step = _step, \
+ .in_step_offset = _instepoffset, \
+ .shift = _shift, \
+ .no_real_shift = _norealshift, \
+ .size = {.bits = _sizebits,}, \
+ .name = #_type "_" #_cname "_" #_iname, \
+}; \
+static inline u8 \
+mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\
+{ \
+ return __mlxsw_item_get8(buf, &__ITEM_NAME(_type, _cname, _iname), \
+ index); \
+} \
+static inline void \
+mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, unsigned short index, \
+ u8 val) \
+{ \
+ __mlxsw_item_set8(buf, &__ITEM_NAME(_type, _cname, _iname), \
+ index, val); \
+}
+
+#define MLXSW_ITEM16(_type, _cname, _iname, _offset, _shift, _sizebits) \
+static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \
+ .offset = _offset, \
+ .shift = _shift, \
+ .size = {.bits = _sizebits,}, \
+ .name = #_type "_" #_cname "_" #_iname, \
+}; \
+static inline u16 mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \
+{ \
+ return __mlxsw_item_get16(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \
+} \
+static inline void mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u16 val)\
+{ \
+ __mlxsw_item_set16(buf, &__ITEM_NAME(_type, _cname, _iname), 0, val); \
+}
+
+#define MLXSW_ITEM16_INDEXED(_type, _cname, _iname, _offset, _shift, _sizebits, \
+ _step, _instepoffset, _norealshift) \
+static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \
+ .offset = _offset, \
+ .step = _step, \
+ .in_step_offset = _instepoffset, \
+ .shift = _shift, \
+ .no_real_shift = _norealshift, \
+ .size = {.bits = _sizebits,}, \
+ .name = #_type "_" #_cname "_" #_iname, \
+}; \
+static inline u16 \
+mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\
+{ \
+ return __mlxsw_item_get16(buf, &__ITEM_NAME(_type, _cname, _iname), \
+ index); \
+} \
+static inline void \
+mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, unsigned short index, \
+ u16 val) \
+{ \
+ __mlxsw_item_set16(buf, &__ITEM_NAME(_type, _cname, _iname), \
+ index, val); \
+}
+
+#define MLXSW_ITEM32(_type, _cname, _iname, _offset, _shift, _sizebits) \
+static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \
+ .offset = _offset, \
+ .shift = _shift, \
+ .size = {.bits = _sizebits,}, \
+ .name = #_type "_" #_cname "_" #_iname, \
+}; \
+static inline u32 mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \
+{ \
+ return __mlxsw_item_get32(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \
+} \
+static inline void mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u32 val)\
+{ \
+ __mlxsw_item_set32(buf, &__ITEM_NAME(_type, _cname, _iname), 0, val); \
+}
+
+#define MLXSW_ITEM32_INDEXED(_type, _cname, _iname, _offset, _shift, _sizebits, \
+ _step, _instepoffset, _norealshift) \
+static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \
+ .offset = _offset, \
+ .step = _step, \
+ .in_step_offset = _instepoffset, \
+ .shift = _shift, \
+ .no_real_shift = _norealshift, \
+ .size = {.bits = _sizebits,}, \
+ .name = #_type "_" #_cname "_" #_iname, \
+}; \
+static inline u32 \
+mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\
+{ \
+ return __mlxsw_item_get32(buf, &__ITEM_NAME(_type, _cname, _iname), \
+ index); \
+} \
+static inline void \
+mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, unsigned short index, \
+ u32 val) \
+{ \
+ __mlxsw_item_set32(buf, &__ITEM_NAME(_type, _cname, _iname), \
+ index, val); \
+}
+
+#define MLXSW_ITEM64(_type, _cname, _iname, _offset, _shift, _sizebits) \
+static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \
+ .offset = _offset, \
+ .shift = _shift, \
+ .size = {.bits = _sizebits,}, \
+ .name = #_type "_" #_cname "_" #_iname, \
+}; \
+static inline u64 mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \
+{ \
+ return __mlxsw_item_get64(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \
+} \
+static inline void mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u64 val)\
+{ \
+ __mlxsw_item_set64(buf, &__ITEM_NAME(_type, _cname, _iname), 0, val); \
+}
+
+#define MLXSW_ITEM64_INDEXED(_type, _cname, _iname, _offset, _shift, \
+ _sizebits, _step, _instepoffset, _norealshift) \
+static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \
+ .offset = _offset, \
+ .step = _step, \
+ .in_step_offset = _instepoffset, \
+ .shift = _shift, \
+ .no_real_shift = _norealshift, \
+ .size = {.bits = _sizebits,}, \
+ .name = #_type "_" #_cname "_" #_iname, \
+}; \
+static inline u64 \
+mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\
+{ \
+ return __mlxsw_item_get64(buf, &__ITEM_NAME(_type, _cname, _iname), \
+ index); \
+} \
+static inline void \
+mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, unsigned short index, \
+ u64 val) \
+{ \
+ __mlxsw_item_set64(buf, &__ITEM_NAME(_type, _cname, _iname), \
+ index, val); \
+}
+
+#define MLXSW_ITEM_BUF(_type, _cname, _iname, _offset, _sizebytes) \
+static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \
+ .offset = _offset, \
+ .size = {.bytes = _sizebytes,}, \
+ .name = #_type "_" #_cname "_" #_iname, \
+}; \
+static inline void \
+mlxsw_##_type##_##_cname##_##_iname##_memcpy_from(const char *buf, char *dst) \
+{ \
+ __mlxsw_item_memcpy_from(buf, dst, \
+ &__ITEM_NAME(_type, _cname, _iname), 0); \
+} \
+static inline void \
+mlxsw_##_type##_##_cname##_##_iname##_memcpy_to(char *buf, const char *src) \
+{ \
+ __mlxsw_item_memcpy_to(buf, src, \
+ &__ITEM_NAME(_type, _cname, _iname), 0); \
+} \
+static inline char * \
+mlxsw_##_type##_##_cname##_##_iname##_data(char *buf) \
+{ \
+ return __mlxsw_item_data(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \
+}
+
+#define MLXSW_ITEM_BUF_INDEXED(_type, _cname, _iname, _offset, _sizebytes, \
+ _step, _instepoffset) \
+static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \
+ .offset = _offset, \
+ .step = _step, \
+ .in_step_offset = _instepoffset, \
+ .size = {.bytes = _sizebytes,}, \
+ .name = #_type "_" #_cname "_" #_iname, \
+}; \
+static inline void \
+mlxsw_##_type##_##_cname##_##_iname##_memcpy_from(const char *buf, \
+ unsigned short index, \
+ char *dst) \
+{ \
+ __mlxsw_item_memcpy_from(buf, dst, \
+ &__ITEM_NAME(_type, _cname, _iname), index); \
+} \
+static inline void \
+mlxsw_##_type##_##_cname##_##_iname##_memcpy_to(char *buf, \
+ unsigned short index, \
+ const char *src) \
+{ \
+ __mlxsw_item_memcpy_to(buf, src, \
+ &__ITEM_NAME(_type, _cname, _iname), index); \
+} \
+static inline char * \
+mlxsw_##_type##_##_cname##_##_iname##_data(char *buf, unsigned short index) \
+{ \
+ return __mlxsw_item_data(buf, \
+ &__ITEM_NAME(_type, _cname, _iname), index); \
+}
+
+#define MLXSW_ITEM_BIT_ARRAY(_type, _cname, _iname, _offset, _sizebytes, \
+ _element_size) \
+static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \
+ .offset = _offset, \
+ .element_size = _element_size, \
+ .size = {.bytes = _sizebytes,}, \
+ .name = #_type "_" #_cname "_" #_iname, \
+}; \
+static inline u8 \
+mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, u16 index) \
+{ \
+ return __mlxsw_item_bit_array_get(buf, \
+ &__ITEM_NAME(_type, _cname, _iname), \
+ index); \
+} \
+static inline void \
+mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u16 index, u8 val) \
+{ \
+ return __mlxsw_item_bit_array_set(buf, \
+ &__ITEM_NAME(_type, _cname, _iname), \
+ index, val); \
+} \
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
new file mode 100644
index 000000000..5a6c4457f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/types.h>
+
+#include "core.h"
+#include "i2c.h"
+
+static const char mlxsw_minimal_driver_name[] = "mlxsw_minimal";
+
+static const struct mlxsw_config_profile mlxsw_minimal_config_profile;
+
+static struct mlxsw_driver mlxsw_minimal_driver = {
+ .kind = mlxsw_minimal_driver_name,
+ .priv_size = 1,
+ .profile = &mlxsw_minimal_config_profile,
+};
+
+static const struct i2c_device_id mlxsw_minimal_i2c_id[] = {
+ { "mlxsw_minimal", 0},
+ { },
+};
+
+static struct i2c_driver mlxsw_minimal_i2c_driver = {
+ .driver.name = "mlxsw_minimal",
+ .class = I2C_CLASS_HWMON,
+ .id_table = mlxsw_minimal_i2c_id,
+};
+
+static int __init mlxsw_minimal_module_init(void)
+{
+ int err;
+
+ err = mlxsw_core_driver_register(&mlxsw_minimal_driver);
+ if (err)
+ return err;
+
+ err = mlxsw_i2c_driver_register(&mlxsw_minimal_i2c_driver);
+ if (err)
+ goto err_i2c_driver_register;
+
+ return 0;
+
+err_i2c_driver_register:
+ mlxsw_core_driver_unregister(&mlxsw_minimal_driver);
+
+ return err;
+}
+
+static void __exit mlxsw_minimal_module_exit(void)
+{
+ mlxsw_i2c_driver_unregister(&mlxsw_minimal_i2c_driver);
+ mlxsw_core_driver_unregister(&mlxsw_minimal_driver);
+}
+
+module_init(mlxsw_minimal_module_init);
+module_exit(mlxsw_minimal_module_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Vadim Pasternak <vadimp@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox minimal driver");
+MODULE_DEVICE_TABLE(i2c, mlxsw_minimal_i2c_id);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
new file mode 100644
index 000000000..addd57655
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -0,0 +1,1853 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
+#include <linux/log2.h>
+#include <linux/string.h>
+
+#include "pci_hw.h"
+#include "pci.h"
+#include "core.h"
+#include "cmd.h"
+#include "port.h"
+#include "resources.h"
+
+#define mlxsw_pci_write32(mlxsw_pci, reg, val) \
+ iowrite32be(val, (mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg))
+#define mlxsw_pci_read32(mlxsw_pci, reg) \
+ ioread32be((mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg))
+
+enum mlxsw_pci_queue_type {
+ MLXSW_PCI_QUEUE_TYPE_SDQ,
+ MLXSW_PCI_QUEUE_TYPE_RDQ,
+ MLXSW_PCI_QUEUE_TYPE_CQ,
+ MLXSW_PCI_QUEUE_TYPE_EQ,
+};
+
+#define MLXSW_PCI_QUEUE_TYPE_COUNT 4
+
+static const u16 mlxsw_pci_doorbell_type_offset[] = {
+ MLXSW_PCI_DOORBELL_SDQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_SDQ */
+ MLXSW_PCI_DOORBELL_RDQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_RDQ */
+ MLXSW_PCI_DOORBELL_CQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_CQ */
+ MLXSW_PCI_DOORBELL_EQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_EQ */
+};
+
+static const u16 mlxsw_pci_doorbell_arm_type_offset[] = {
+ 0, /* unused */
+ 0, /* unused */
+ MLXSW_PCI_DOORBELL_ARM_CQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_CQ */
+ MLXSW_PCI_DOORBELL_ARM_EQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_EQ */
+};
+
+struct mlxsw_pci_mem_item {
+ char *buf;
+ dma_addr_t mapaddr;
+ size_t size;
+};
+
+struct mlxsw_pci_queue_elem_info {
+ char *elem; /* pointer to actual dma mapped element mem chunk */
+ union {
+ struct {
+ struct sk_buff *skb;
+ } sdq;
+ struct {
+ struct sk_buff *skb;
+ } rdq;
+ } u;
+};
+
+struct mlxsw_pci_queue {
+ spinlock_t lock; /* for queue accesses */
+ struct mlxsw_pci_mem_item mem_item;
+ struct mlxsw_pci_queue_elem_info *elem_info;
+ u16 producer_counter;
+ u16 consumer_counter;
+ u16 count; /* number of elements in queue */
+ u8 num; /* queue number */
+ u8 elem_size; /* size of one element */
+ enum mlxsw_pci_queue_type type;
+ struct tasklet_struct tasklet; /* queue processing tasklet */
+ struct mlxsw_pci *pci;
+ union {
+ struct {
+ u32 comp_sdq_count;
+ u32 comp_rdq_count;
+ enum mlxsw_pci_cqe_v v;
+ } cq;
+ struct {
+ u32 ev_cmd_count;
+ u32 ev_comp_count;
+ u32 ev_other_count;
+ } eq;
+ } u;
+};
+
+struct mlxsw_pci_queue_type_group {
+ struct mlxsw_pci_queue *q;
+ u8 count; /* number of queues in group */
+};
+
+struct mlxsw_pci {
+ struct pci_dev *pdev;
+ u8 __iomem *hw_addr;
+ struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT];
+ u32 doorbell_offset;
+ struct mlxsw_core *core;
+ struct {
+ struct mlxsw_pci_mem_item *items;
+ unsigned int count;
+ } fw_area;
+ struct {
+ struct mlxsw_pci_mem_item out_mbox;
+ struct mlxsw_pci_mem_item in_mbox;
+ struct mutex lock; /* Lock access to command registers */
+ bool nopoll;
+ wait_queue_head_t wait;
+ bool wait_done;
+ struct {
+ u8 status;
+ u64 out_param;
+ } comp;
+ } cmd;
+ struct mlxsw_bus_info bus_info;
+ const struct pci_device_id *id;
+ enum mlxsw_pci_cqe_v max_cqe_ver; /* Maximal supported CQE version */
+ u8 num_sdq_cqs; /* Number of CQs used for SDQs */
+};
+
+static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q)
+{
+ tasklet_schedule(&q->tasklet);
+}
+
+static char *__mlxsw_pci_queue_elem_get(struct mlxsw_pci_queue *q,
+ size_t elem_size, int elem_index)
+{
+ return q->mem_item.buf + (elem_size * elem_index);
+}
+
+static struct mlxsw_pci_queue_elem_info *
+mlxsw_pci_queue_elem_info_get(struct mlxsw_pci_queue *q, int elem_index)
+{
+ return &q->elem_info[elem_index];
+}
+
+static struct mlxsw_pci_queue_elem_info *
+mlxsw_pci_queue_elem_info_producer_get(struct mlxsw_pci_queue *q)
+{
+ int index = q->producer_counter & (q->count - 1);
+
+ if ((u16) (q->producer_counter - q->consumer_counter) == q->count)
+ return NULL;
+ return mlxsw_pci_queue_elem_info_get(q, index);
+}
+
+static struct mlxsw_pci_queue_elem_info *
+mlxsw_pci_queue_elem_info_consumer_get(struct mlxsw_pci_queue *q)
+{
+ int index = q->consumer_counter & (q->count - 1);
+
+ return mlxsw_pci_queue_elem_info_get(q, index);
+}
+
+static char *mlxsw_pci_queue_elem_get(struct mlxsw_pci_queue *q, int elem_index)
+{
+ return mlxsw_pci_queue_elem_info_get(q, elem_index)->elem;
+}
+
+static bool mlxsw_pci_elem_hw_owned(struct mlxsw_pci_queue *q, bool owner_bit)
+{
+ return owner_bit != !!(q->consumer_counter & q->count);
+}
+
+static struct mlxsw_pci_queue_type_group *
+mlxsw_pci_queue_type_group_get(struct mlxsw_pci *mlxsw_pci,
+ enum mlxsw_pci_queue_type q_type)
+{
+ return &mlxsw_pci->queues[q_type];
+}
+
+static u8 __mlxsw_pci_queue_count(struct mlxsw_pci *mlxsw_pci,
+ enum mlxsw_pci_queue_type q_type)
+{
+ struct mlxsw_pci_queue_type_group *queue_group;
+
+ queue_group = mlxsw_pci_queue_type_group_get(mlxsw_pci, q_type);
+ return queue_group->count;
+}
+
+static u8 mlxsw_pci_sdq_count(struct mlxsw_pci *mlxsw_pci)
+{
+ return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_SDQ);
+}
+
+static u8 mlxsw_pci_cq_count(struct mlxsw_pci *mlxsw_pci)
+{
+ return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_CQ);
+}
+
+static struct mlxsw_pci_queue *
+__mlxsw_pci_queue_get(struct mlxsw_pci *mlxsw_pci,
+ enum mlxsw_pci_queue_type q_type, u8 q_num)
+{
+ return &mlxsw_pci->queues[q_type].q[q_num];
+}
+
+static struct mlxsw_pci_queue *mlxsw_pci_sdq_get(struct mlxsw_pci *mlxsw_pci,
+ u8 q_num)
+{
+ return __mlxsw_pci_queue_get(mlxsw_pci,
+ MLXSW_PCI_QUEUE_TYPE_SDQ, q_num);
+}
+
+static struct mlxsw_pci_queue *mlxsw_pci_rdq_get(struct mlxsw_pci *mlxsw_pci,
+ u8 q_num)
+{
+ return __mlxsw_pci_queue_get(mlxsw_pci,
+ MLXSW_PCI_QUEUE_TYPE_RDQ, q_num);
+}
+
+static struct mlxsw_pci_queue *mlxsw_pci_cq_get(struct mlxsw_pci *mlxsw_pci,
+ u8 q_num)
+{
+ return __mlxsw_pci_queue_get(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_CQ, q_num);
+}
+
+static struct mlxsw_pci_queue *mlxsw_pci_eq_get(struct mlxsw_pci *mlxsw_pci,
+ u8 q_num)
+{
+ return __mlxsw_pci_queue_get(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_EQ, q_num);
+}
+
+static void __mlxsw_pci_queue_doorbell_set(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue *q,
+ u16 val)
+{
+ mlxsw_pci_write32(mlxsw_pci,
+ DOORBELL(mlxsw_pci->doorbell_offset,
+ mlxsw_pci_doorbell_type_offset[q->type],
+ q->num), val);
+}
+
+static void __mlxsw_pci_queue_doorbell_arm_set(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue *q,
+ u16 val)
+{
+ mlxsw_pci_write32(mlxsw_pci,
+ DOORBELL(mlxsw_pci->doorbell_offset,
+ mlxsw_pci_doorbell_arm_type_offset[q->type],
+ q->num), val);
+}
+
+static void mlxsw_pci_queue_doorbell_producer_ring(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue *q)
+{
+ wmb(); /* ensure all writes are done before we ring a bell */
+ __mlxsw_pci_queue_doorbell_set(mlxsw_pci, q, q->producer_counter);
+}
+
+static void mlxsw_pci_queue_doorbell_consumer_ring(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue *q)
+{
+ wmb(); /* ensure all writes are done before we ring a bell */
+ __mlxsw_pci_queue_doorbell_set(mlxsw_pci, q,
+ q->consumer_counter + q->count);
+}
+
+static void
+mlxsw_pci_queue_doorbell_arm_consumer_ring(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue *q)
+{
+ wmb(); /* ensure all writes are done before we ring a bell */
+ __mlxsw_pci_queue_doorbell_arm_set(mlxsw_pci, q, q->consumer_counter);
+}
+
+static dma_addr_t __mlxsw_pci_queue_page_get(struct mlxsw_pci_queue *q,
+ int page_index)
+{
+ return q->mem_item.mapaddr + MLXSW_PCI_PAGE_SIZE * page_index;
+}
+
+static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
+ struct mlxsw_pci_queue *q)
+{
+ int i;
+ int err;
+
+ q->producer_counter = 0;
+ q->consumer_counter = 0;
+
+ /* Set CQ of same number of this SDQ. */
+ mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, q->num);
+ mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, 3);
+ mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */
+ for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) {
+ dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i);
+
+ mlxsw_cmd_mbox_sw2hw_dq_pa_set(mbox, i, mapaddr);
+ }
+
+ err = mlxsw_cmd_sw2hw_sdq(mlxsw_pci->core, mbox, q->num);
+ if (err)
+ return err;
+ mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
+ return 0;
+}
+
+static void mlxsw_pci_sdq_fini(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue *q)
+{
+ mlxsw_cmd_hw2sw_sdq(mlxsw_pci->core, q->num);
+}
+
+static int mlxsw_pci_wqe_frag_map(struct mlxsw_pci *mlxsw_pci, char *wqe,
+ int index, char *frag_data, size_t frag_len,
+ int direction)
+{
+ struct pci_dev *pdev = mlxsw_pci->pdev;
+ dma_addr_t mapaddr;
+
+ mapaddr = pci_map_single(pdev, frag_data, frag_len, direction);
+ if (unlikely(pci_dma_mapping_error(pdev, mapaddr))) {
+ dev_err_ratelimited(&pdev->dev, "failed to dma map tx frag\n");
+ return -EIO;
+ }
+ mlxsw_pci_wqe_address_set(wqe, index, mapaddr);
+ mlxsw_pci_wqe_byte_count_set(wqe, index, frag_len);
+ return 0;
+}
+
+static void mlxsw_pci_wqe_frag_unmap(struct mlxsw_pci *mlxsw_pci, char *wqe,
+ int index, int direction)
+{
+ struct pci_dev *pdev = mlxsw_pci->pdev;
+ size_t frag_len = mlxsw_pci_wqe_byte_count_get(wqe, index);
+ dma_addr_t mapaddr = mlxsw_pci_wqe_address_get(wqe, index);
+
+ if (!frag_len)
+ return;
+ pci_unmap_single(pdev, mapaddr, frag_len, direction);
+}
+
+static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue_elem_info *elem_info)
+{
+ size_t buf_len = MLXSW_PORT_MAX_MTU;
+ char *wqe = elem_info->elem;
+ struct sk_buff *skb;
+ int err;
+
+ elem_info->u.rdq.skb = NULL;
+ skb = netdev_alloc_skb_ip_align(NULL, buf_len);
+ if (!skb)
+ return -ENOMEM;
+
+ /* Assume that wqe was previously zeroed. */
+
+ err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data,
+ buf_len, DMA_FROM_DEVICE);
+ if (err)
+ goto err_frag_map;
+
+ elem_info->u.rdq.skb = skb;
+ return 0;
+
+err_frag_map:
+ dev_kfree_skb_any(skb);
+ return err;
+}
+
+static void mlxsw_pci_rdq_skb_free(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue_elem_info *elem_info)
+{
+ struct sk_buff *skb;
+ char *wqe;
+
+ skb = elem_info->u.rdq.skb;
+ wqe = elem_info->elem;
+
+ mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
+ dev_kfree_skb_any(skb);
+}
+
+static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
+ struct mlxsw_pci_queue *q)
+{
+ struct mlxsw_pci_queue_elem_info *elem_info;
+ u8 sdq_count = mlxsw_pci_sdq_count(mlxsw_pci);
+ int i;
+ int err;
+
+ q->producer_counter = 0;
+ q->consumer_counter = 0;
+
+ /* Set CQ of same number of this RDQ with base
+ * above SDQ count as the lower ones are assigned to SDQs.
+ */
+ mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, sdq_count + q->num);
+ mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */
+ for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) {
+ dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i);
+
+ mlxsw_cmd_mbox_sw2hw_dq_pa_set(mbox, i, mapaddr);
+ }
+
+ err = mlxsw_cmd_sw2hw_rdq(mlxsw_pci->core, mbox, q->num);
+ if (err)
+ return err;
+
+ mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
+
+ for (i = 0; i < q->count; i++) {
+ elem_info = mlxsw_pci_queue_elem_info_producer_get(q);
+ BUG_ON(!elem_info);
+ err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info);
+ if (err)
+ goto rollback;
+ /* Everything is set up, ring doorbell to pass elem to HW */
+ q->producer_counter++;
+ mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
+ }
+
+ return 0;
+
+rollback:
+ for (i--; i >= 0; i--) {
+ elem_info = mlxsw_pci_queue_elem_info_get(q, i);
+ mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info);
+ }
+ mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num);
+
+ return err;
+}
+
+static void mlxsw_pci_rdq_fini(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue *q)
+{
+ struct mlxsw_pci_queue_elem_info *elem_info;
+ int i;
+
+ mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num);
+ for (i = 0; i < q->count; i++) {
+ elem_info = mlxsw_pci_queue_elem_info_get(q, i);
+ mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info);
+ }
+}
+
+static void mlxsw_pci_cq_pre_init(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue *q)
+{
+ q->u.cq.v = mlxsw_pci->max_cqe_ver;
+
+ /* For SDQ it is pointless to use CQEv2, so use CQEv1 instead */
+ if (q->u.cq.v == MLXSW_PCI_CQE_V2 &&
+ q->num < mlxsw_pci->num_sdq_cqs)
+ q->u.cq.v = MLXSW_PCI_CQE_V1;
+}
+
+static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
+ struct mlxsw_pci_queue *q)
+{
+ int i;
+ int err;
+
+ q->consumer_counter = 0;
+
+ for (i = 0; i < q->count; i++) {
+ char *elem = mlxsw_pci_queue_elem_get(q, i);
+
+ mlxsw_pci_cqe_owner_set(q->u.cq.v, elem, 1);
+ }
+
+ if (q->u.cq.v == MLXSW_PCI_CQE_V1)
+ mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox,
+ MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_1);
+ else if (q->u.cq.v == MLXSW_PCI_CQE_V2)
+ mlxsw_cmd_mbox_sw2hw_cq_cqe_ver_set(mbox,
+ MLXSW_CMD_MBOX_SW2HW_CQ_CQE_VER_2);
+
+ mlxsw_cmd_mbox_sw2hw_cq_c_eqn_set(mbox, MLXSW_PCI_EQ_COMP_NUM);
+ mlxsw_cmd_mbox_sw2hw_cq_st_set(mbox, 0);
+ mlxsw_cmd_mbox_sw2hw_cq_log_cq_size_set(mbox, ilog2(q->count));
+ for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) {
+ dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i);
+
+ mlxsw_cmd_mbox_sw2hw_cq_pa_set(mbox, i, mapaddr);
+ }
+ err = mlxsw_cmd_sw2hw_cq(mlxsw_pci->core, mbox, q->num);
+ if (err)
+ return err;
+ mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
+ mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
+ return 0;
+}
+
+static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue *q)
+{
+ mlxsw_cmd_hw2sw_cq(mlxsw_pci->core, q->num);
+}
+
+static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue *q,
+ u16 consumer_counter_limit,
+ char *cqe)
+{
+ struct pci_dev *pdev = mlxsw_pci->pdev;
+ struct mlxsw_pci_queue_elem_info *elem_info;
+ char *wqe;
+ struct sk_buff *skb;
+ int i;
+
+ spin_lock(&q->lock);
+ elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
+ skb = elem_info->u.sdq.skb;
+ wqe = elem_info->elem;
+ for (i = 0; i < MLXSW_PCI_WQE_SG_ENTRIES; i++)
+ mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE);
+ dev_kfree_skb_any(skb);
+ elem_info->u.sdq.skb = NULL;
+
+ if (q->consumer_counter++ != consumer_counter_limit)
+ dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in SDQ\n");
+ spin_unlock(&q->lock);
+}
+
+static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue *q,
+ u16 consumer_counter_limit,
+ enum mlxsw_pci_cqe_v cqe_v, char *cqe)
+{
+ struct pci_dev *pdev = mlxsw_pci->pdev;
+ struct mlxsw_pci_queue_elem_info *elem_info;
+ char *wqe;
+ struct sk_buff *skb;
+ struct mlxsw_rx_info rx_info;
+ u16 byte_count;
+ int err;
+
+ elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
+ skb = elem_info->u.sdq.skb;
+ if (!skb)
+ return;
+ wqe = elem_info->elem;
+ mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
+
+ if (q->consumer_counter++ != consumer_counter_limit)
+ dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n");
+
+ if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) {
+ rx_info.is_lag = true;
+ rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe_v, cqe);
+ rx_info.lag_port_index =
+ mlxsw_pci_cqe_lag_subport_get(cqe_v, cqe);
+ } else {
+ rx_info.is_lag = false;
+ rx_info.u.sys_port = mlxsw_pci_cqe_system_port_get(cqe);
+ }
+
+ rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe);
+
+ byte_count = mlxsw_pci_cqe_byte_count_get(cqe);
+ if (mlxsw_pci_cqe_crc_get(cqe_v, cqe))
+ byte_count -= ETH_FCS_LEN;
+ skb_put(skb, byte_count);
+ mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info);
+
+ memset(wqe, 0, q->elem_size);
+ err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info);
+ if (err)
+ dev_dbg_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n");
+ /* Everything is set up, ring doorbell to pass elem to HW */
+ q->producer_counter++;
+ mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
+ return;
+}
+
+static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q)
+{
+ struct mlxsw_pci_queue_elem_info *elem_info;
+ char *elem;
+ bool owner_bit;
+
+ elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
+ elem = elem_info->elem;
+ owner_bit = mlxsw_pci_cqe_owner_get(q->u.cq.v, elem);
+ if (mlxsw_pci_elem_hw_owned(q, owner_bit))
+ return NULL;
+ q->consumer_counter++;
+ rmb(); /* make sure we read owned bit before the rest of elem */
+ return elem;
+}
+
+static void mlxsw_pci_cq_tasklet(unsigned long data)
+{
+ struct mlxsw_pci_queue *q = (struct mlxsw_pci_queue *) data;
+ struct mlxsw_pci *mlxsw_pci = q->pci;
+ char *cqe;
+ int items = 0;
+ int credits = q->count >> 1;
+
+ while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) {
+ u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe);
+ u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe);
+ u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe);
+ char ncqe[MLXSW_PCI_CQE_SIZE_MAX];
+
+ memcpy(ncqe, cqe, q->elem_size);
+ mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
+
+ if (sendq) {
+ struct mlxsw_pci_queue *sdq;
+
+ sdq = mlxsw_pci_sdq_get(mlxsw_pci, dqn);
+ mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq,
+ wqe_counter, ncqe);
+ q->u.cq.comp_sdq_count++;
+ } else {
+ struct mlxsw_pci_queue *rdq;
+
+ rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn);
+ mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq,
+ wqe_counter, q->u.cq.v, ncqe);
+ q->u.cq.comp_rdq_count++;
+ }
+ if (++items == credits)
+ break;
+ }
+ if (items)
+ mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
+}
+
+static u16 mlxsw_pci_cq_elem_count(const struct mlxsw_pci_queue *q)
+{
+ return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_COUNT :
+ MLXSW_PCI_CQE01_COUNT;
+}
+
+static u8 mlxsw_pci_cq_elem_size(const struct mlxsw_pci_queue *q)
+{
+ return q->u.cq.v == MLXSW_PCI_CQE_V2 ? MLXSW_PCI_CQE2_SIZE :
+ MLXSW_PCI_CQE01_SIZE;
+}
+
+static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
+ struct mlxsw_pci_queue *q)
+{
+ int i;
+ int err;
+
+ q->consumer_counter = 0;
+
+ for (i = 0; i < q->count; i++) {
+ char *elem = mlxsw_pci_queue_elem_get(q, i);
+
+ mlxsw_pci_eqe_owner_set(elem, 1);
+ }
+
+ mlxsw_cmd_mbox_sw2hw_eq_int_msix_set(mbox, 1); /* MSI-X used */
+ mlxsw_cmd_mbox_sw2hw_eq_st_set(mbox, 1); /* armed */
+ mlxsw_cmd_mbox_sw2hw_eq_log_eq_size_set(mbox, ilog2(q->count));
+ for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) {
+ dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i);
+
+ mlxsw_cmd_mbox_sw2hw_eq_pa_set(mbox, i, mapaddr);
+ }
+ err = mlxsw_cmd_sw2hw_eq(mlxsw_pci->core, mbox, q->num);
+ if (err)
+ return err;
+ mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
+ mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
+ return 0;
+}
+
+static void mlxsw_pci_eq_fini(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue *q)
+{
+ mlxsw_cmd_hw2sw_eq(mlxsw_pci->core, q->num);
+}
+
+static void mlxsw_pci_eq_cmd_event(struct mlxsw_pci *mlxsw_pci, char *eqe)
+{
+ mlxsw_pci->cmd.comp.status = mlxsw_pci_eqe_cmd_status_get(eqe);
+ mlxsw_pci->cmd.comp.out_param =
+ ((u64) mlxsw_pci_eqe_cmd_out_param_h_get(eqe)) << 32 |
+ mlxsw_pci_eqe_cmd_out_param_l_get(eqe);
+ mlxsw_pci->cmd.wait_done = true;
+ wake_up(&mlxsw_pci->cmd.wait);
+}
+
+static char *mlxsw_pci_eq_sw_eqe_get(struct mlxsw_pci_queue *q)
+{
+ struct mlxsw_pci_queue_elem_info *elem_info;
+ char *elem;
+ bool owner_bit;
+
+ elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
+ elem = elem_info->elem;
+ owner_bit = mlxsw_pci_eqe_owner_get(elem);
+ if (mlxsw_pci_elem_hw_owned(q, owner_bit))
+ return NULL;
+ q->consumer_counter++;
+ rmb(); /* make sure we read owned bit before the rest of elem */
+ return elem;
+}
+
+static void mlxsw_pci_eq_tasklet(unsigned long data)
+{
+ struct mlxsw_pci_queue *q = (struct mlxsw_pci_queue *) data;
+ struct mlxsw_pci *mlxsw_pci = q->pci;
+ u8 cq_count = mlxsw_pci_cq_count(mlxsw_pci);
+ unsigned long active_cqns[BITS_TO_LONGS(MLXSW_PCI_CQS_MAX)];
+ char *eqe;
+ u8 cqn;
+ bool cq_handle = false;
+ int items = 0;
+ int credits = q->count >> 1;
+
+ memset(&active_cqns, 0, sizeof(active_cqns));
+
+ while ((eqe = mlxsw_pci_eq_sw_eqe_get(q))) {
+
+ /* Command interface completion events are always received on
+ * queue MLXSW_PCI_EQ_ASYNC_NUM (EQ0) and completion events
+ * are mapped to queue MLXSW_PCI_EQ_COMP_NUM (EQ1).
+ */
+ switch (q->num) {
+ case MLXSW_PCI_EQ_ASYNC_NUM:
+ mlxsw_pci_eq_cmd_event(mlxsw_pci, eqe);
+ q->u.eq.ev_cmd_count++;
+ break;
+ case MLXSW_PCI_EQ_COMP_NUM:
+ cqn = mlxsw_pci_eqe_cqn_get(eqe);
+ set_bit(cqn, active_cqns);
+ cq_handle = true;
+ q->u.eq.ev_comp_count++;
+ break;
+ default:
+ q->u.eq.ev_other_count++;
+ }
+ if (++items == credits)
+ break;
+ }
+ if (items) {
+ mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
+ mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
+ }
+
+ if (!cq_handle)
+ return;
+ for_each_set_bit(cqn, active_cqns, cq_count) {
+ q = mlxsw_pci_cq_get(mlxsw_pci, cqn);
+ mlxsw_pci_queue_tasklet_schedule(q);
+ }
+}
+
+struct mlxsw_pci_queue_ops {
+ const char *name;
+ enum mlxsw_pci_queue_type type;
+ void (*pre_init)(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue *q);
+ int (*init)(struct mlxsw_pci *mlxsw_pci, char *mbox,
+ struct mlxsw_pci_queue *q);
+ void (*fini)(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_queue *q);
+ void (*tasklet)(unsigned long data);
+ u16 (*elem_count_f)(const struct mlxsw_pci_queue *q);
+ u8 (*elem_size_f)(const struct mlxsw_pci_queue *q);
+ u16 elem_count;
+ u8 elem_size;
+};
+
+static const struct mlxsw_pci_queue_ops mlxsw_pci_sdq_ops = {
+ .type = MLXSW_PCI_QUEUE_TYPE_SDQ,
+ .init = mlxsw_pci_sdq_init,
+ .fini = mlxsw_pci_sdq_fini,
+ .elem_count = MLXSW_PCI_WQE_COUNT,
+ .elem_size = MLXSW_PCI_WQE_SIZE,
+};
+
+static const struct mlxsw_pci_queue_ops mlxsw_pci_rdq_ops = {
+ .type = MLXSW_PCI_QUEUE_TYPE_RDQ,
+ .init = mlxsw_pci_rdq_init,
+ .fini = mlxsw_pci_rdq_fini,
+ .elem_count = MLXSW_PCI_WQE_COUNT,
+ .elem_size = MLXSW_PCI_WQE_SIZE
+};
+
+static const struct mlxsw_pci_queue_ops mlxsw_pci_cq_ops = {
+ .type = MLXSW_PCI_QUEUE_TYPE_CQ,
+ .pre_init = mlxsw_pci_cq_pre_init,
+ .init = mlxsw_pci_cq_init,
+ .fini = mlxsw_pci_cq_fini,
+ .tasklet = mlxsw_pci_cq_tasklet,
+ .elem_count_f = mlxsw_pci_cq_elem_count,
+ .elem_size_f = mlxsw_pci_cq_elem_size
+};
+
+static const struct mlxsw_pci_queue_ops mlxsw_pci_eq_ops = {
+ .type = MLXSW_PCI_QUEUE_TYPE_EQ,
+ .init = mlxsw_pci_eq_init,
+ .fini = mlxsw_pci_eq_fini,
+ .tasklet = mlxsw_pci_eq_tasklet,
+ .elem_count = MLXSW_PCI_EQE_COUNT,
+ .elem_size = MLXSW_PCI_EQE_SIZE
+};
+
+static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
+ const struct mlxsw_pci_queue_ops *q_ops,
+ struct mlxsw_pci_queue *q, u8 q_num)
+{
+ struct mlxsw_pci_mem_item *mem_item = &q->mem_item;
+ int i;
+ int err;
+
+ q->num = q_num;
+ if (q_ops->pre_init)
+ q_ops->pre_init(mlxsw_pci, q);
+
+ spin_lock_init(&q->lock);
+ q->count = q_ops->elem_count_f ? q_ops->elem_count_f(q) :
+ q_ops->elem_count;
+ q->elem_size = q_ops->elem_size_f ? q_ops->elem_size_f(q) :
+ q_ops->elem_size;
+ q->type = q_ops->type;
+ q->pci = mlxsw_pci;
+
+ if (q_ops->tasklet)
+ tasklet_init(&q->tasklet, q_ops->tasklet, (unsigned long) q);
+
+ mem_item->size = MLXSW_PCI_AQ_SIZE;
+ mem_item->buf = pci_alloc_consistent(mlxsw_pci->pdev,
+ mem_item->size,
+ &mem_item->mapaddr);
+ if (!mem_item->buf)
+ return -ENOMEM;
+ memset(mem_item->buf, 0, mem_item->size);
+
+ q->elem_info = kcalloc(q->count, sizeof(*q->elem_info), GFP_KERNEL);
+ if (!q->elem_info) {
+ err = -ENOMEM;
+ goto err_elem_info_alloc;
+ }
+
+ /* Initialize dma mapped elements info elem_info for
+ * future easy access.
+ */
+ for (i = 0; i < q->count; i++) {
+ struct mlxsw_pci_queue_elem_info *elem_info;
+
+ elem_info = mlxsw_pci_queue_elem_info_get(q, i);
+ elem_info->elem =
+ __mlxsw_pci_queue_elem_get(q, q->elem_size, i);
+ }
+
+ mlxsw_cmd_mbox_zero(mbox);
+ err = q_ops->init(mlxsw_pci, mbox, q);
+ if (err)
+ goto err_q_ops_init;
+ return 0;
+
+err_q_ops_init:
+ kfree(q->elem_info);
+err_elem_info_alloc:
+ pci_free_consistent(mlxsw_pci->pdev, mem_item->size,
+ mem_item->buf, mem_item->mapaddr);
+ return err;
+}
+
+static void mlxsw_pci_queue_fini(struct mlxsw_pci *mlxsw_pci,
+ const struct mlxsw_pci_queue_ops *q_ops,
+ struct mlxsw_pci_queue *q)
+{
+ struct mlxsw_pci_mem_item *mem_item = &q->mem_item;
+
+ q_ops->fini(mlxsw_pci, q);
+ kfree(q->elem_info);
+ pci_free_consistent(mlxsw_pci->pdev, mem_item->size,
+ mem_item->buf, mem_item->mapaddr);
+}
+
+static int mlxsw_pci_queue_group_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
+ const struct mlxsw_pci_queue_ops *q_ops,
+ u8 num_qs)
+{
+ struct mlxsw_pci_queue_type_group *queue_group;
+ int i;
+ int err;
+
+ queue_group = mlxsw_pci_queue_type_group_get(mlxsw_pci, q_ops->type);
+ queue_group->q = kcalloc(num_qs, sizeof(*queue_group->q), GFP_KERNEL);
+ if (!queue_group->q)
+ return -ENOMEM;
+
+ for (i = 0; i < num_qs; i++) {
+ err = mlxsw_pci_queue_init(mlxsw_pci, mbox, q_ops,
+ &queue_group->q[i], i);
+ if (err)
+ goto err_queue_init;
+ }
+ queue_group->count = num_qs;
+
+ return 0;
+
+err_queue_init:
+ for (i--; i >= 0; i--)
+ mlxsw_pci_queue_fini(mlxsw_pci, q_ops, &queue_group->q[i]);
+ kfree(queue_group->q);
+ return err;
+}
+
+static void mlxsw_pci_queue_group_fini(struct mlxsw_pci *mlxsw_pci,
+ const struct mlxsw_pci_queue_ops *q_ops)
+{
+ struct mlxsw_pci_queue_type_group *queue_group;
+ int i;
+
+ queue_group = mlxsw_pci_queue_type_group_get(mlxsw_pci, q_ops->type);
+ for (i = 0; i < queue_group->count; i++)
+ mlxsw_pci_queue_fini(mlxsw_pci, q_ops, &queue_group->q[i]);
+ kfree(queue_group->q);
+}
+
+static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox)
+{
+ struct pci_dev *pdev = mlxsw_pci->pdev;
+ u8 num_sdqs;
+ u8 sdq_log2sz;
+ u8 num_rdqs;
+ u8 rdq_log2sz;
+ u8 num_cqs;
+ u8 cq_log2sz;
+ u8 cqv2_log2sz;
+ u8 num_eqs;
+ u8 eq_log2sz;
+ int err;
+
+ mlxsw_cmd_mbox_zero(mbox);
+ err = mlxsw_cmd_query_aq_cap(mlxsw_pci->core, mbox);
+ if (err)
+ return err;
+
+ num_sdqs = mlxsw_cmd_mbox_query_aq_cap_max_num_sdqs_get(mbox);
+ sdq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_sdq_sz_get(mbox);
+ num_rdqs = mlxsw_cmd_mbox_query_aq_cap_max_num_rdqs_get(mbox);
+ rdq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_rdq_sz_get(mbox);
+ num_cqs = mlxsw_cmd_mbox_query_aq_cap_max_num_cqs_get(mbox);
+ cq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_cq_sz_get(mbox);
+ cqv2_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_cqv2_sz_get(mbox);
+ num_eqs = mlxsw_cmd_mbox_query_aq_cap_max_num_eqs_get(mbox);
+ eq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_eq_sz_get(mbox);
+
+ if (num_sdqs + num_rdqs > num_cqs ||
+ num_cqs > MLXSW_PCI_CQS_MAX || num_eqs != MLXSW_PCI_EQS_COUNT) {
+ dev_err(&pdev->dev, "Unsupported number of queues\n");
+ return -EINVAL;
+ }
+
+ if ((1 << sdq_log2sz != MLXSW_PCI_WQE_COUNT) ||
+ (1 << rdq_log2sz != MLXSW_PCI_WQE_COUNT) ||
+ (1 << cq_log2sz != MLXSW_PCI_CQE01_COUNT) ||
+ (mlxsw_pci->max_cqe_ver == MLXSW_PCI_CQE_V2 &&
+ (1 << cqv2_log2sz != MLXSW_PCI_CQE2_COUNT)) ||
+ (1 << eq_log2sz != MLXSW_PCI_EQE_COUNT)) {
+ dev_err(&pdev->dev, "Unsupported number of async queue descriptors\n");
+ return -EINVAL;
+ }
+
+ mlxsw_pci->num_sdq_cqs = num_sdqs;
+
+ err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_eq_ops,
+ num_eqs);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to initialize event queues\n");
+ return err;
+ }
+
+ err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_cq_ops,
+ num_cqs);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to initialize completion queues\n");
+ goto err_cqs_init;
+ }
+
+ err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_sdq_ops,
+ num_sdqs);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to initialize send descriptor queues\n");
+ goto err_sdqs_init;
+ }
+
+ err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_rdq_ops,
+ num_rdqs);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to initialize receive descriptor queues\n");
+ goto err_rdqs_init;
+ }
+
+ /* We have to poll in command interface until queues are initialized */
+ mlxsw_pci->cmd.nopoll = true;
+ return 0;
+
+err_rdqs_init:
+ mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_sdq_ops);
+err_sdqs_init:
+ mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_cq_ops);
+err_cqs_init:
+ mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_eq_ops);
+ return err;
+}
+
+static void mlxsw_pci_aqs_fini(struct mlxsw_pci *mlxsw_pci)
+{
+ mlxsw_pci->cmd.nopoll = false;
+ mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_rdq_ops);
+ mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_sdq_ops);
+ mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_cq_ops);
+ mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_eq_ops);
+}
+
+static void
+mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci,
+ char *mbox, int index,
+ const struct mlxsw_swid_config *swid)
+{
+ u8 mask = 0;
+
+ if (swid->used_type) {
+ mlxsw_cmd_mbox_config_profile_swid_config_type_set(
+ mbox, index, swid->type);
+ mask |= 1;
+ }
+ if (swid->used_properties) {
+ mlxsw_cmd_mbox_config_profile_swid_config_properties_set(
+ mbox, index, swid->properties);
+ mask |= 2;
+ }
+ mlxsw_cmd_mbox_config_profile_swid_config_mask_set(mbox, index, mask);
+}
+
+static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox,
+ struct mlxsw_res *res)
+{
+ int index, i;
+ u64 data;
+ u16 id;
+ int err;
+
+ if (!res)
+ return 0;
+
+ mlxsw_cmd_mbox_zero(mbox);
+
+ for (index = 0; index < MLXSW_CMD_QUERY_RESOURCES_MAX_QUERIES;
+ index++) {
+ err = mlxsw_cmd_query_resources(mlxsw_pci->core, mbox, index);
+ if (err)
+ return err;
+
+ for (i = 0; i < MLXSW_CMD_QUERY_RESOURCES_PER_QUERY; i++) {
+ id = mlxsw_cmd_mbox_query_resource_id_get(mbox, i);
+ data = mlxsw_cmd_mbox_query_resource_data_get(mbox, i);
+
+ if (id == MLXSW_CMD_QUERY_RESOURCES_TABLE_END_ID)
+ return 0;
+
+ mlxsw_res_parse(res, id, data);
+ }
+ }
+
+ /* If after MLXSW_RESOURCES_QUERY_MAX_QUERIES we still didn't get
+ * MLXSW_RESOURCES_TABLE_END_ID, something went bad in the FW.
+ */
+ return -EIO;
+}
+
+static int
+mlxsw_pci_profile_get_kvd_sizes(const struct mlxsw_pci *mlxsw_pci,
+ const struct mlxsw_config_profile *profile,
+ struct mlxsw_res *res)
+{
+ u64 single_size, double_size, linear_size;
+ int err;
+
+ err = mlxsw_core_kvd_sizes_get(mlxsw_pci->core, profile,
+ &single_size, &double_size,
+ &linear_size);
+ if (err)
+ return err;
+
+ MLXSW_RES_SET(res, KVD_SINGLE_SIZE, single_size);
+ MLXSW_RES_SET(res, KVD_DOUBLE_SIZE, double_size);
+ MLXSW_RES_SET(res, KVD_LINEAR_SIZE, linear_size);
+
+ return 0;
+}
+
+static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
+ const struct mlxsw_config_profile *profile,
+ struct mlxsw_res *res)
+{
+ int i;
+ int err;
+
+ mlxsw_cmd_mbox_zero(mbox);
+
+ if (profile->used_max_vepa_channels) {
+ mlxsw_cmd_mbox_config_profile_set_max_vepa_channels_set(
+ mbox, 1);
+ mlxsw_cmd_mbox_config_profile_max_vepa_channels_set(
+ mbox, profile->max_vepa_channels);
+ }
+ if (profile->used_max_mid) {
+ mlxsw_cmd_mbox_config_profile_set_max_mid_set(
+ mbox, 1);
+ mlxsw_cmd_mbox_config_profile_max_mid_set(
+ mbox, profile->max_mid);
+ }
+ if (profile->used_max_pgt) {
+ mlxsw_cmd_mbox_config_profile_set_max_pgt_set(
+ mbox, 1);
+ mlxsw_cmd_mbox_config_profile_max_pgt_set(
+ mbox, profile->max_pgt);
+ }
+ if (profile->used_max_system_port) {
+ mlxsw_cmd_mbox_config_profile_set_max_system_port_set(
+ mbox, 1);
+ mlxsw_cmd_mbox_config_profile_max_system_port_set(
+ mbox, profile->max_system_port);
+ }
+ if (profile->used_max_vlan_groups) {
+ mlxsw_cmd_mbox_config_profile_set_max_vlan_groups_set(
+ mbox, 1);
+ mlxsw_cmd_mbox_config_profile_max_vlan_groups_set(
+ mbox, profile->max_vlan_groups);
+ }
+ if (profile->used_max_regions) {
+ mlxsw_cmd_mbox_config_profile_set_max_regions_set(
+ mbox, 1);
+ mlxsw_cmd_mbox_config_profile_max_regions_set(
+ mbox, profile->max_regions);
+ }
+ if (profile->used_flood_tables) {
+ mlxsw_cmd_mbox_config_profile_set_flood_tables_set(
+ mbox, 1);
+ mlxsw_cmd_mbox_config_profile_max_flood_tables_set(
+ mbox, profile->max_flood_tables);
+ mlxsw_cmd_mbox_config_profile_max_vid_flood_tables_set(
+ mbox, profile->max_vid_flood_tables);
+ mlxsw_cmd_mbox_config_profile_max_fid_offset_flood_tables_set(
+ mbox, profile->max_fid_offset_flood_tables);
+ mlxsw_cmd_mbox_config_profile_fid_offset_flood_table_size_set(
+ mbox, profile->fid_offset_flood_table_size);
+ mlxsw_cmd_mbox_config_profile_max_fid_flood_tables_set(
+ mbox, profile->max_fid_flood_tables);
+ mlxsw_cmd_mbox_config_profile_fid_flood_table_size_set(
+ mbox, profile->fid_flood_table_size);
+ }
+ if (profile->used_flood_mode) {
+ mlxsw_cmd_mbox_config_profile_set_flood_mode_set(
+ mbox, 1);
+ mlxsw_cmd_mbox_config_profile_flood_mode_set(
+ mbox, profile->flood_mode);
+ }
+ if (profile->used_max_ib_mc) {
+ mlxsw_cmd_mbox_config_profile_set_max_ib_mc_set(
+ mbox, 1);
+ mlxsw_cmd_mbox_config_profile_max_ib_mc_set(
+ mbox, profile->max_ib_mc);
+ }
+ if (profile->used_max_pkey) {
+ mlxsw_cmd_mbox_config_profile_set_max_pkey_set(
+ mbox, 1);
+ mlxsw_cmd_mbox_config_profile_max_pkey_set(
+ mbox, profile->max_pkey);
+ }
+ if (profile->used_ar_sec) {
+ mlxsw_cmd_mbox_config_profile_set_ar_sec_set(
+ mbox, 1);
+ mlxsw_cmd_mbox_config_profile_ar_sec_set(
+ mbox, profile->ar_sec);
+ }
+ if (profile->used_adaptive_routing_group_cap) {
+ mlxsw_cmd_mbox_config_profile_set_adaptive_routing_group_cap_set(
+ mbox, 1);
+ mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set(
+ mbox, profile->adaptive_routing_group_cap);
+ }
+ if (profile->used_kvd_sizes && MLXSW_RES_VALID(res, KVD_SIZE)) {
+ err = mlxsw_pci_profile_get_kvd_sizes(mlxsw_pci, profile, res);
+ if (err)
+ return err;
+
+ mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set(mbox, 1);
+ mlxsw_cmd_mbox_config_profile_kvd_linear_size_set(mbox,
+ MLXSW_RES_GET(res, KVD_LINEAR_SIZE));
+ mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set(mbox,
+ 1);
+ mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set(mbox,
+ MLXSW_RES_GET(res, KVD_SINGLE_SIZE));
+ mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set(
+ mbox, 1);
+ mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(mbox,
+ MLXSW_RES_GET(res, KVD_DOUBLE_SIZE));
+ }
+
+ for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++)
+ mlxsw_pci_config_profile_swid_config(mlxsw_pci, mbox, i,
+ &profile->swid_config[i]);
+
+ if (mlxsw_pci->max_cqe_ver > MLXSW_PCI_CQE_V0) {
+ mlxsw_cmd_mbox_config_profile_set_cqe_version_set(mbox, 1);
+ mlxsw_cmd_mbox_config_profile_cqe_version_set(mbox, 1);
+ }
+
+ return mlxsw_cmd_config_profile_set(mlxsw_pci->core, mbox);
+}
+
+static int mlxsw_pci_boardinfo(struct mlxsw_pci *mlxsw_pci, char *mbox)
+{
+ struct mlxsw_bus_info *bus_info = &mlxsw_pci->bus_info;
+ int err;
+
+ mlxsw_cmd_mbox_zero(mbox);
+ err = mlxsw_cmd_boardinfo(mlxsw_pci->core, mbox);
+ if (err)
+ return err;
+ mlxsw_cmd_mbox_boardinfo_vsd_memcpy_from(mbox, bus_info->vsd);
+ mlxsw_cmd_mbox_boardinfo_psid_memcpy_from(mbox, bus_info->psid);
+ return 0;
+}
+
+static int mlxsw_pci_fw_area_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
+ u16 num_pages)
+{
+ struct mlxsw_pci_mem_item *mem_item;
+ int nent = 0;
+ int i;
+ int err;
+
+ mlxsw_pci->fw_area.items = kcalloc(num_pages, sizeof(*mem_item),
+ GFP_KERNEL);
+ if (!mlxsw_pci->fw_area.items)
+ return -ENOMEM;
+ mlxsw_pci->fw_area.count = num_pages;
+
+ mlxsw_cmd_mbox_zero(mbox);
+ for (i = 0; i < num_pages; i++) {
+ mem_item = &mlxsw_pci->fw_area.items[i];
+
+ mem_item->size = MLXSW_PCI_PAGE_SIZE;
+ mem_item->buf = pci_alloc_consistent(mlxsw_pci->pdev,
+ mem_item->size,
+ &mem_item->mapaddr);
+ if (!mem_item->buf) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+ mlxsw_cmd_mbox_map_fa_pa_set(mbox, nent, mem_item->mapaddr);
+ mlxsw_cmd_mbox_map_fa_log2size_set(mbox, nent, 0); /* 1 page */
+ if (++nent == MLXSW_CMD_MAP_FA_VPM_ENTRIES_MAX) {
+ err = mlxsw_cmd_map_fa(mlxsw_pci->core, mbox, nent);
+ if (err)
+ goto err_cmd_map_fa;
+ nent = 0;
+ mlxsw_cmd_mbox_zero(mbox);
+ }
+ }
+
+ if (nent) {
+ err = mlxsw_cmd_map_fa(mlxsw_pci->core, mbox, nent);
+ if (err)
+ goto err_cmd_map_fa;
+ }
+
+ return 0;
+
+err_cmd_map_fa:
+err_alloc:
+ for (i--; i >= 0; i--) {
+ mem_item = &mlxsw_pci->fw_area.items[i];
+
+ pci_free_consistent(mlxsw_pci->pdev, mem_item->size,
+ mem_item->buf, mem_item->mapaddr);
+ }
+ kfree(mlxsw_pci->fw_area.items);
+ return err;
+}
+
+static void mlxsw_pci_fw_area_fini(struct mlxsw_pci *mlxsw_pci)
+{
+ struct mlxsw_pci_mem_item *mem_item;
+ int i;
+
+ mlxsw_cmd_unmap_fa(mlxsw_pci->core);
+
+ for (i = 0; i < mlxsw_pci->fw_area.count; i++) {
+ mem_item = &mlxsw_pci->fw_area.items[i];
+
+ pci_free_consistent(mlxsw_pci->pdev, mem_item->size,
+ mem_item->buf, mem_item->mapaddr);
+ }
+ kfree(mlxsw_pci->fw_area.items);
+}
+
+static irqreturn_t mlxsw_pci_eq_irq_handler(int irq, void *dev_id)
+{
+ struct mlxsw_pci *mlxsw_pci = dev_id;
+ struct mlxsw_pci_queue *q;
+ int i;
+
+ for (i = 0; i < MLXSW_PCI_EQS_COUNT; i++) {
+ q = mlxsw_pci_eq_get(mlxsw_pci, i);
+ mlxsw_pci_queue_tasklet_schedule(q);
+ }
+ return IRQ_HANDLED;
+}
+
+static int mlxsw_pci_mbox_alloc(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_mem_item *mbox)
+{
+ struct pci_dev *pdev = mlxsw_pci->pdev;
+ int err = 0;
+
+ mbox->size = MLXSW_CMD_MBOX_SIZE;
+ mbox->buf = pci_alloc_consistent(pdev, MLXSW_CMD_MBOX_SIZE,
+ &mbox->mapaddr);
+ if (!mbox->buf) {
+ dev_err(&pdev->dev, "Failed allocating memory for mailbox\n");
+ err = -ENOMEM;
+ }
+
+ return err;
+}
+
+static void mlxsw_pci_mbox_free(struct mlxsw_pci *mlxsw_pci,
+ struct mlxsw_pci_mem_item *mbox)
+{
+ struct pci_dev *pdev = mlxsw_pci->pdev;
+
+ pci_free_consistent(pdev, MLXSW_CMD_MBOX_SIZE, mbox->buf,
+ mbox->mapaddr);
+}
+
+static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
+ const struct pci_device_id *id)
+{
+ unsigned long end;
+ char mrsr_pl[MLXSW_REG_MRSR_LEN];
+ int err;
+
+ mlxsw_reg_mrsr_pack(mrsr_pl);
+ err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
+ if (err)
+ return err;
+ if (id->device == PCI_DEVICE_ID_MELLANOX_SWITCHX2) {
+ msleep(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
+ return 0;
+ }
+
+ /* We must wait for the HW to become responsive once again. */
+ msleep(MLXSW_PCI_SW_RESET_WAIT_MSECS);
+
+ end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
+ do {
+ u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY);
+
+ if ((val & MLXSW_PCI_FW_READY_MASK) == MLXSW_PCI_FW_READY_MAGIC)
+ return 0;
+ cond_resched();
+ } while (time_before(jiffies, end));
+ return -EBUSY;
+}
+
+static int mlxsw_pci_alloc_irq_vectors(struct mlxsw_pci *mlxsw_pci)
+{
+ int err;
+
+ err = pci_alloc_irq_vectors(mlxsw_pci->pdev, 1, 1, PCI_IRQ_MSIX);
+ if (err < 0)
+ dev_err(&mlxsw_pci->pdev->dev, "MSI-X init failed\n");
+ return err;
+}
+
+static void mlxsw_pci_free_irq_vectors(struct mlxsw_pci *mlxsw_pci)
+{
+ pci_free_irq_vectors(mlxsw_pci->pdev);
+}
+
+static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_config_profile *profile,
+ struct mlxsw_res *res)
+{
+ struct mlxsw_pci *mlxsw_pci = bus_priv;
+ struct pci_dev *pdev = mlxsw_pci->pdev;
+ char *mbox;
+ u16 num_pages;
+ int err;
+
+ mutex_init(&mlxsw_pci->cmd.lock);
+ init_waitqueue_head(&mlxsw_pci->cmd.wait);
+
+ mlxsw_pci->core = mlxsw_core;
+
+ mbox = mlxsw_cmd_mbox_alloc();
+ if (!mbox)
+ return -ENOMEM;
+
+ err = mlxsw_pci_mbox_alloc(mlxsw_pci, &mlxsw_pci->cmd.in_mbox);
+ if (err)
+ goto mbox_put;
+
+ err = mlxsw_pci_mbox_alloc(mlxsw_pci, &mlxsw_pci->cmd.out_mbox);
+ if (err)
+ goto err_out_mbox_alloc;
+
+ err = mlxsw_pci_sw_reset(mlxsw_pci, mlxsw_pci->id);
+ if (err)
+ goto err_sw_reset;
+
+ err = mlxsw_pci_alloc_irq_vectors(mlxsw_pci);
+ if (err < 0) {
+ dev_err(&pdev->dev, "MSI-X init failed\n");
+ goto err_alloc_irq;
+ }
+
+ err = mlxsw_cmd_query_fw(mlxsw_core, mbox);
+ if (err)
+ goto err_query_fw;
+
+ mlxsw_pci->bus_info.fw_rev.major =
+ mlxsw_cmd_mbox_query_fw_fw_rev_major_get(mbox);
+ mlxsw_pci->bus_info.fw_rev.minor =
+ mlxsw_cmd_mbox_query_fw_fw_rev_minor_get(mbox);
+ mlxsw_pci->bus_info.fw_rev.subminor =
+ mlxsw_cmd_mbox_query_fw_fw_rev_subminor_get(mbox);
+
+ if (mlxsw_cmd_mbox_query_fw_cmd_interface_rev_get(mbox) != 1) {
+ dev_err(&pdev->dev, "Unsupported cmd interface revision ID queried from hw\n");
+ err = -EINVAL;
+ goto err_iface_rev;
+ }
+ if (mlxsw_cmd_mbox_query_fw_doorbell_page_bar_get(mbox) != 0) {
+ dev_err(&pdev->dev, "Unsupported doorbell page bar queried from hw\n");
+ err = -EINVAL;
+ goto err_doorbell_page_bar;
+ }
+
+ mlxsw_pci->doorbell_offset =
+ mlxsw_cmd_mbox_query_fw_doorbell_page_offset_get(mbox);
+
+ num_pages = mlxsw_cmd_mbox_query_fw_fw_pages_get(mbox);
+ err = mlxsw_pci_fw_area_init(mlxsw_pci, mbox, num_pages);
+ if (err)
+ goto err_fw_area_init;
+
+ err = mlxsw_pci_boardinfo(mlxsw_pci, mbox);
+ if (err)
+ goto err_boardinfo;
+
+ err = mlxsw_pci_resources_query(mlxsw_pci, mbox, res);
+ if (err)
+ goto err_query_resources;
+
+ if (MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V2) &&
+ MLXSW_CORE_RES_GET(mlxsw_core, CQE_V2))
+ mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V2;
+ else if (MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V1) &&
+ MLXSW_CORE_RES_GET(mlxsw_core, CQE_V1))
+ mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V1;
+ else if ((MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V0) &&
+ MLXSW_CORE_RES_GET(mlxsw_core, CQE_V0)) ||
+ !MLXSW_CORE_RES_VALID(mlxsw_core, CQE_V0)) {
+ mlxsw_pci->max_cqe_ver = MLXSW_PCI_CQE_V0;
+ } else {
+ dev_err(&pdev->dev, "Invalid supported CQE version combination reported\n");
+ goto err_cqe_v_check;
+ }
+
+ err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile, res);
+ if (err)
+ goto err_config_profile;
+
+ err = mlxsw_pci_aqs_init(mlxsw_pci, mbox);
+ if (err)
+ goto err_aqs_init;
+
+ err = request_irq(pci_irq_vector(pdev, 0),
+ mlxsw_pci_eq_irq_handler, 0,
+ mlxsw_pci->bus_info.device_kind, mlxsw_pci);
+ if (err) {
+ dev_err(&pdev->dev, "IRQ request failed\n");
+ goto err_request_eq_irq;
+ }
+
+ goto mbox_put;
+
+err_request_eq_irq:
+ mlxsw_pci_aqs_fini(mlxsw_pci);
+err_aqs_init:
+err_config_profile:
+err_cqe_v_check:
+err_query_resources:
+err_boardinfo:
+ mlxsw_pci_fw_area_fini(mlxsw_pci);
+err_fw_area_init:
+err_doorbell_page_bar:
+err_iface_rev:
+err_query_fw:
+ mlxsw_pci_free_irq_vectors(mlxsw_pci);
+err_alloc_irq:
+err_sw_reset:
+ mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.out_mbox);
+err_out_mbox_alloc:
+ mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.in_mbox);
+mbox_put:
+ mlxsw_cmd_mbox_free(mbox);
+ return err;
+}
+
+static void mlxsw_pci_fini(void *bus_priv)
+{
+ struct mlxsw_pci *mlxsw_pci = bus_priv;
+
+ free_irq(pci_irq_vector(mlxsw_pci->pdev, 0), mlxsw_pci);
+ mlxsw_pci_aqs_fini(mlxsw_pci);
+ mlxsw_pci_fw_area_fini(mlxsw_pci);
+ mlxsw_pci_free_irq_vectors(mlxsw_pci);
+ mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.out_mbox);
+ mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.in_mbox);
+}
+
+static struct mlxsw_pci_queue *
+mlxsw_pci_sdq_pick(struct mlxsw_pci *mlxsw_pci,
+ const struct mlxsw_tx_info *tx_info)
+{
+ u8 sdqn = tx_info->local_port % mlxsw_pci_sdq_count(mlxsw_pci);
+
+ return mlxsw_pci_sdq_get(mlxsw_pci, sdqn);
+}
+
+static bool mlxsw_pci_skb_transmit_busy(void *bus_priv,
+ const struct mlxsw_tx_info *tx_info)
+{
+ struct mlxsw_pci *mlxsw_pci = bus_priv;
+ struct mlxsw_pci_queue *q = mlxsw_pci_sdq_pick(mlxsw_pci, tx_info);
+
+ return !mlxsw_pci_queue_elem_info_producer_get(q);
+}
+
+static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb,
+ const struct mlxsw_tx_info *tx_info)
+{
+ struct mlxsw_pci *mlxsw_pci = bus_priv;
+ struct mlxsw_pci_queue *q;
+ struct mlxsw_pci_queue_elem_info *elem_info;
+ char *wqe;
+ int i;
+ int err;
+
+ if (skb_shinfo(skb)->nr_frags > MLXSW_PCI_WQE_SG_ENTRIES - 1) {
+ err = skb_linearize(skb);
+ if (err)
+ return err;
+ }
+
+ q = mlxsw_pci_sdq_pick(mlxsw_pci, tx_info);
+ spin_lock_bh(&q->lock);
+ elem_info = mlxsw_pci_queue_elem_info_producer_get(q);
+ if (!elem_info) {
+ /* queue is full */
+ err = -EAGAIN;
+ goto unlock;
+ }
+ elem_info->u.sdq.skb = skb;
+
+ wqe = elem_info->elem;
+ mlxsw_pci_wqe_c_set(wqe, 1); /* always report completion */
+ mlxsw_pci_wqe_lp_set(wqe, !!tx_info->is_emad);
+ mlxsw_pci_wqe_type_set(wqe, MLXSW_PCI_WQE_TYPE_ETHERNET);
+
+ err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data,
+ skb_headlen(skb), DMA_TO_DEVICE);
+ if (err)
+ goto unlock;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, i + 1,
+ skb_frag_address(frag),
+ skb_frag_size(frag),
+ DMA_TO_DEVICE);
+ if (err)
+ goto unmap_frags;
+ }
+
+ /* Set unused sq entries byte count to zero. */
+ for (i++; i < MLXSW_PCI_WQE_SG_ENTRIES; i++)
+ mlxsw_pci_wqe_byte_count_set(wqe, i, 0);
+
+ /* Everything is set up, ring producer doorbell to get HW going */
+ q->producer_counter++;
+ mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
+
+ goto unlock;
+
+unmap_frags:
+ for (; i >= 0; i--)
+ mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE);
+unlock:
+ spin_unlock_bh(&q->lock);
+ return err;
+}
+
+static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod,
+ u32 in_mod, bool out_mbox_direct,
+ char *in_mbox, size_t in_mbox_size,
+ char *out_mbox, size_t out_mbox_size,
+ u8 *p_status)
+{
+ struct mlxsw_pci *mlxsw_pci = bus_priv;
+ dma_addr_t in_mapaddr = 0, out_mapaddr = 0;
+ bool evreq = mlxsw_pci->cmd.nopoll;
+ unsigned long timeout = msecs_to_jiffies(MLXSW_PCI_CIR_TIMEOUT_MSECS);
+ bool *p_wait_done = &mlxsw_pci->cmd.wait_done;
+ int err;
+
+ *p_status = MLXSW_CMD_STATUS_OK;
+
+ err = mutex_lock_interruptible(&mlxsw_pci->cmd.lock);
+ if (err)
+ return err;
+
+ if (in_mbox) {
+ memcpy(mlxsw_pci->cmd.in_mbox.buf, in_mbox, in_mbox_size);
+ in_mapaddr = mlxsw_pci->cmd.in_mbox.mapaddr;
+ }
+ mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_HI, upper_32_bits(in_mapaddr));
+ mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_LO, lower_32_bits(in_mapaddr));
+
+ if (out_mbox)
+ out_mapaddr = mlxsw_pci->cmd.out_mbox.mapaddr;
+ mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_HI, upper_32_bits(out_mapaddr));
+ mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_LO, lower_32_bits(out_mapaddr));
+
+ mlxsw_pci_write32(mlxsw_pci, CIR_IN_MODIFIER, in_mod);
+ mlxsw_pci_write32(mlxsw_pci, CIR_TOKEN, 0);
+
+ *p_wait_done = false;
+
+ wmb(); /* all needs to be written before we write control register */
+ mlxsw_pci_write32(mlxsw_pci, CIR_CTRL,
+ MLXSW_PCI_CIR_CTRL_GO_BIT |
+ (evreq ? MLXSW_PCI_CIR_CTRL_EVREQ_BIT : 0) |
+ (opcode_mod << MLXSW_PCI_CIR_CTRL_OPCODE_MOD_SHIFT) |
+ opcode);
+
+ if (!evreq) {
+ unsigned long end;
+
+ end = jiffies + timeout;
+ do {
+ u32 ctrl = mlxsw_pci_read32(mlxsw_pci, CIR_CTRL);
+
+ if (!(ctrl & MLXSW_PCI_CIR_CTRL_GO_BIT)) {
+ *p_wait_done = true;
+ *p_status = ctrl >> MLXSW_PCI_CIR_CTRL_STATUS_SHIFT;
+ break;
+ }
+ cond_resched();
+ } while (time_before(jiffies, end));
+ } else {
+ wait_event_timeout(mlxsw_pci->cmd.wait, *p_wait_done, timeout);
+ *p_status = mlxsw_pci->cmd.comp.status;
+ }
+
+ err = 0;
+ if (*p_wait_done) {
+ if (*p_status)
+ err = -EIO;
+ } else {
+ err = -ETIMEDOUT;
+ }
+
+ if (!err && out_mbox && out_mbox_direct) {
+ /* Some commands don't use output param as address to mailbox
+ * but they store output directly into registers. In that case,
+ * copy registers into mbox buffer.
+ */
+ __be32 tmp;
+
+ if (!evreq) {
+ tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci,
+ CIR_OUT_PARAM_HI));
+ memcpy(out_mbox, &tmp, sizeof(tmp));
+ tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci,
+ CIR_OUT_PARAM_LO));
+ memcpy(out_mbox + sizeof(tmp), &tmp, sizeof(tmp));
+ }
+ } else if (!err && out_mbox) {
+ memcpy(out_mbox, mlxsw_pci->cmd.out_mbox.buf, out_mbox_size);
+ }
+
+ mutex_unlock(&mlxsw_pci->cmd.lock);
+
+ return err;
+}
+
+static const struct mlxsw_bus mlxsw_pci_bus = {
+ .kind = "pci",
+ .init = mlxsw_pci_init,
+ .fini = mlxsw_pci_fini,
+ .skb_transmit_busy = mlxsw_pci_skb_transmit_busy,
+ .skb_transmit = mlxsw_pci_skb_transmit,
+ .cmd_exec = mlxsw_pci_cmd_exec,
+ .features = MLXSW_BUS_F_TXRX | MLXSW_BUS_F_RESET,
+};
+
+static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ const char *driver_name = pdev->driver->name;
+ struct mlxsw_pci *mlxsw_pci;
+ bool called_again = false;
+ int err;
+
+ mlxsw_pci = kzalloc(sizeof(*mlxsw_pci), GFP_KERNEL);
+ if (!mlxsw_pci)
+ return -ENOMEM;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "pci_enable_device failed\n");
+ goto err_pci_enable_device;
+ }
+
+ err = pci_request_regions(pdev, driver_name);
+ if (err) {
+ dev_err(&pdev->dev, "pci_request_regions failed\n");
+ goto err_pci_request_regions;
+ }
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (!err) {
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_err(&pdev->dev, "pci_set_consistent_dma_mask failed\n");
+ goto err_pci_set_dma_mask;
+ }
+ } else {
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pdev->dev, "pci_set_dma_mask failed\n");
+ goto err_pci_set_dma_mask;
+ }
+ }
+
+ if (pci_resource_len(pdev, 0) < MLXSW_PCI_BAR0_SIZE) {
+ dev_err(&pdev->dev, "invalid PCI region size\n");
+ err = -EINVAL;
+ goto err_pci_resource_len_check;
+ }
+
+ mlxsw_pci->hw_addr = ioremap(pci_resource_start(pdev, 0),
+ pci_resource_len(pdev, 0));
+ if (!mlxsw_pci->hw_addr) {
+ dev_err(&pdev->dev, "ioremap failed\n");
+ err = -EIO;
+ goto err_ioremap;
+ }
+ pci_set_master(pdev);
+
+ mlxsw_pci->pdev = pdev;
+ pci_set_drvdata(pdev, mlxsw_pci);
+
+ mlxsw_pci->bus_info.device_kind = driver_name;
+ mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev);
+ mlxsw_pci->bus_info.dev = &pdev->dev;
+ mlxsw_pci->id = id;
+
+again:
+ err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info,
+ &mlxsw_pci_bus, mlxsw_pci, false,
+ NULL);
+ /* -EAGAIN is returned in case the FW was updated. FW needs
+ * a reset, so lets try to call mlxsw_core_bus_device_register()
+ * again.
+ */
+ if (err == -EAGAIN && !called_again) {
+ called_again = true;
+ goto again;
+ } else if (err) {
+ dev_err(&pdev->dev, "cannot register bus device\n");
+ goto err_bus_device_register;
+ }
+
+ return 0;
+
+err_bus_device_register:
+ iounmap(mlxsw_pci->hw_addr);
+err_ioremap:
+err_pci_resource_len_check:
+err_pci_set_dma_mask:
+ pci_release_regions(pdev);
+err_pci_request_regions:
+ pci_disable_device(pdev);
+err_pci_enable_device:
+ kfree(mlxsw_pci);
+ return err;
+}
+
+static void mlxsw_pci_remove(struct pci_dev *pdev)
+{
+ struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev);
+
+ mlxsw_core_bus_device_unregister(mlxsw_pci->core, false);
+ iounmap(mlxsw_pci->hw_addr);
+ pci_release_regions(mlxsw_pci->pdev);
+ pci_disable_device(mlxsw_pci->pdev);
+ kfree(mlxsw_pci);
+}
+
+int mlxsw_pci_driver_register(struct pci_driver *pci_driver)
+{
+ pci_driver->probe = mlxsw_pci_probe;
+ pci_driver->remove = mlxsw_pci_remove;
+ pci_driver->shutdown = mlxsw_pci_remove;
+ return pci_register_driver(pci_driver);
+}
+EXPORT_SYMBOL(mlxsw_pci_driver_register);
+
+void mlxsw_pci_driver_unregister(struct pci_driver *pci_driver)
+{
+ pci_unregister_driver(pci_driver);
+}
+EXPORT_SYMBOL(mlxsw_pci_driver_unregister);
+
+static int __init mlxsw_pci_module_init(void)
+{
+ return 0;
+}
+
+static void __exit mlxsw_pci_module_exit(void)
+{
+}
+
+module_init(mlxsw_pci_module_init);
+module_exit(mlxsw_pci_module_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox switch PCI interface driver");
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h
new file mode 100644
index 000000000..946339e13
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_PCI_H
+#define _MLXSW_PCI_H
+
+#include <linux/pci.h>
+
+#define PCI_DEVICE_ID_MELLANOX_SWITCHX2 0xc738
+#define PCI_DEVICE_ID_MELLANOX_SPECTRUM 0xcb84
+#define PCI_DEVICE_ID_MELLANOX_SPECTRUM2 0xcf6c
+#define PCI_DEVICE_ID_MELLANOX_SWITCHIB 0xcb20
+#define PCI_DEVICE_ID_MELLANOX_SWITCHIB2 0xcf08
+
+#if IS_ENABLED(CONFIG_MLXSW_PCI)
+
+int mlxsw_pci_driver_register(struct pci_driver *pci_driver);
+void mlxsw_pci_driver_unregister(struct pci_driver *pci_driver);
+
+#else
+
+static inline int
+mlxsw_pci_driver_register(struct pci_driver *pci_driver)
+{
+ return 0;
+}
+
+static inline void
+mlxsw_pci_driver_unregister(struct pci_driver *pci_driver)
+{
+}
+
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
new file mode 100644
index 000000000..100618531
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_PCI_HW_H
+#define _MLXSW_PCI_HW_H
+
+#include <linux/bitops.h>
+
+#include "item.h"
+
+#define MLXSW_PCI_BAR0_SIZE (1024 * 1024) /* 1MB */
+#define MLXSW_PCI_PAGE_SIZE 4096
+
+#define MLXSW_PCI_CIR_BASE 0x71000
+#define MLXSW_PCI_CIR_IN_PARAM_HI MLXSW_PCI_CIR_BASE
+#define MLXSW_PCI_CIR_IN_PARAM_LO (MLXSW_PCI_CIR_BASE + 0x04)
+#define MLXSW_PCI_CIR_IN_MODIFIER (MLXSW_PCI_CIR_BASE + 0x08)
+#define MLXSW_PCI_CIR_OUT_PARAM_HI (MLXSW_PCI_CIR_BASE + 0x0C)
+#define MLXSW_PCI_CIR_OUT_PARAM_LO (MLXSW_PCI_CIR_BASE + 0x10)
+#define MLXSW_PCI_CIR_TOKEN (MLXSW_PCI_CIR_BASE + 0x14)
+#define MLXSW_PCI_CIR_CTRL (MLXSW_PCI_CIR_BASE + 0x18)
+#define MLXSW_PCI_CIR_CTRL_GO_BIT BIT(23)
+#define MLXSW_PCI_CIR_CTRL_EVREQ_BIT BIT(22)
+#define MLXSW_PCI_CIR_CTRL_OPCODE_MOD_SHIFT 12
+#define MLXSW_PCI_CIR_CTRL_STATUS_SHIFT 24
+#define MLXSW_PCI_CIR_TIMEOUT_MSECS 1000
+
+#define MLXSW_PCI_SW_RESET 0xF0010
+#define MLXSW_PCI_SW_RESET_RST_BIT BIT(0)
+#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 20000
+#define MLXSW_PCI_SW_RESET_WAIT_MSECS 100
+#define MLXSW_PCI_FW_READY 0xA1844
+#define MLXSW_PCI_FW_READY_MASK 0xFFFF
+#define MLXSW_PCI_FW_READY_MAGIC 0x5E
+
+#define MLXSW_PCI_DOORBELL_SDQ_OFFSET 0x000
+#define MLXSW_PCI_DOORBELL_RDQ_OFFSET 0x200
+#define MLXSW_PCI_DOORBELL_CQ_OFFSET 0x400
+#define MLXSW_PCI_DOORBELL_EQ_OFFSET 0x600
+#define MLXSW_PCI_DOORBELL_ARM_CQ_OFFSET 0x800
+#define MLXSW_PCI_DOORBELL_ARM_EQ_OFFSET 0xA00
+
+#define MLXSW_PCI_DOORBELL(offset, type_offset, num) \
+ ((offset) + (type_offset) + (num) * 4)
+
+#define MLXSW_PCI_CQS_MAX 96
+#define MLXSW_PCI_EQS_COUNT 2
+#define MLXSW_PCI_EQ_ASYNC_NUM 0
+#define MLXSW_PCI_EQ_COMP_NUM 1
+
+#define MLXSW_PCI_AQ_PAGES 8
+#define MLXSW_PCI_AQ_SIZE (MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES)
+#define MLXSW_PCI_WQE_SIZE 32 /* 32 bytes per element */
+#define MLXSW_PCI_CQE01_SIZE 16 /* 16 bytes per element */
+#define MLXSW_PCI_CQE2_SIZE 32 /* 32 bytes per element */
+#define MLXSW_PCI_CQE_SIZE_MAX MLXSW_PCI_CQE2_SIZE
+#define MLXSW_PCI_EQE_SIZE 16 /* 16 bytes per element */
+#define MLXSW_PCI_WQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE)
+#define MLXSW_PCI_CQE01_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE01_SIZE)
+#define MLXSW_PCI_CQE2_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE2_SIZE)
+#define MLXSW_PCI_EQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_EQE_SIZE)
+#define MLXSW_PCI_EQE_UPDATE_COUNT 0x80
+
+#define MLXSW_PCI_WQE_SG_ENTRIES 3
+#define MLXSW_PCI_WQE_TYPE_ETHERNET 0xA
+
+/* pci_wqe_c
+ * If set it indicates that a completion should be reported upon
+ * execution of this descriptor.
+ */
+MLXSW_ITEM32(pci, wqe, c, 0x00, 31, 1);
+
+/* pci_wqe_lp
+ * Local Processing, set if packet should be processed by the local
+ * switch hardware:
+ * For Ethernet EMAD (Direct Route and non Direct Route) -
+ * must be set if packet destination is local device
+ * For InfiniBand CTL - must be set if packet destination is local device
+ * Otherwise it must be clear
+ * Local Process packets must not exceed the size of 2K (including payload
+ * and headers).
+ */
+MLXSW_ITEM32(pci, wqe, lp, 0x00, 30, 1);
+
+/* pci_wqe_type
+ * Packet type.
+ */
+MLXSW_ITEM32(pci, wqe, type, 0x00, 23, 4);
+
+/* pci_wqe_byte_count
+ * Size of i-th scatter/gather entry, 0 if entry is unused.
+ */
+MLXSW_ITEM16_INDEXED(pci, wqe, byte_count, 0x02, 0, 14, 0x02, 0x00, false);
+
+/* pci_wqe_address
+ * Physical address of i-th scatter/gather entry.
+ * Gather Entries must be 2Byte aligned.
+ */
+MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false);
+
+enum mlxsw_pci_cqe_v {
+ MLXSW_PCI_CQE_V0,
+ MLXSW_PCI_CQE_V1,
+ MLXSW_PCI_CQE_V2,
+};
+
+#define mlxsw_pci_cqe_item_helpers(name, v0, v1, v2) \
+static inline u32 mlxsw_pci_cqe_##name##_get(enum mlxsw_pci_cqe_v v, char *cqe) \
+{ \
+ switch (v) { \
+ default: \
+ case MLXSW_PCI_CQE_V0: \
+ return mlxsw_pci_cqe##v0##_##name##_get(cqe); \
+ case MLXSW_PCI_CQE_V1: \
+ return mlxsw_pci_cqe##v1##_##name##_get(cqe); \
+ case MLXSW_PCI_CQE_V2: \
+ return mlxsw_pci_cqe##v2##_##name##_get(cqe); \
+ } \
+} \
+static inline void mlxsw_pci_cqe_##name##_set(enum mlxsw_pci_cqe_v v, \
+ char *cqe, u32 val) \
+{ \
+ switch (v) { \
+ default: \
+ case MLXSW_PCI_CQE_V0: \
+ mlxsw_pci_cqe##v0##_##name##_set(cqe, val); \
+ break; \
+ case MLXSW_PCI_CQE_V1: \
+ mlxsw_pci_cqe##v1##_##name##_set(cqe, val); \
+ break; \
+ case MLXSW_PCI_CQE_V2: \
+ mlxsw_pci_cqe##v2##_##name##_set(cqe, val); \
+ break; \
+ } \
+}
+
+/* pci_cqe_lag
+ * Packet arrives from a port which is a LAG
+ */
+MLXSW_ITEM32(pci, cqe0, lag, 0x00, 23, 1);
+MLXSW_ITEM32(pci, cqe12, lag, 0x00, 24, 1);
+mlxsw_pci_cqe_item_helpers(lag, 0, 12, 12);
+
+/* pci_cqe_system_port/lag_id
+ * When lag=0: System port on which the packet was received
+ * When lag=1:
+ * bits [15:4] LAG ID on which the packet was received
+ * bits [3:0] sub_port on which the packet was received
+ */
+MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16);
+MLXSW_ITEM32(pci, cqe0, lag_id, 0x00, 4, 12);
+MLXSW_ITEM32(pci, cqe12, lag_id, 0x00, 0, 16);
+mlxsw_pci_cqe_item_helpers(lag_id, 0, 12, 12);
+MLXSW_ITEM32(pci, cqe0, lag_subport, 0x00, 0, 4);
+MLXSW_ITEM32(pci, cqe12, lag_subport, 0x00, 16, 8);
+mlxsw_pci_cqe_item_helpers(lag_subport, 0, 12, 12);
+
+/* pci_cqe_wqe_counter
+ * WQE count of the WQEs completed on the associated dqn
+ */
+MLXSW_ITEM32(pci, cqe, wqe_counter, 0x04, 16, 16);
+
+/* pci_cqe_byte_count
+ * Byte count of received packets including additional two
+ * Reserved Bytes that are append to the end of the frame.
+ * Reserved for Send CQE.
+ */
+MLXSW_ITEM32(pci, cqe, byte_count, 0x04, 0, 14);
+
+/* pci_cqe_trap_id
+ * Trap ID that captured the packet.
+ */
+MLXSW_ITEM32(pci, cqe, trap_id, 0x08, 0, 9);
+
+/* pci_cqe_crc
+ * Length include CRC. Indicates the length field includes
+ * the packet's CRC.
+ */
+MLXSW_ITEM32(pci, cqe0, crc, 0x0C, 8, 1);
+MLXSW_ITEM32(pci, cqe12, crc, 0x0C, 9, 1);
+mlxsw_pci_cqe_item_helpers(crc, 0, 12, 12);
+
+/* pci_cqe_e
+ * CQE with Error.
+ */
+MLXSW_ITEM32(pci, cqe0, e, 0x0C, 7, 1);
+MLXSW_ITEM32(pci, cqe12, e, 0x00, 27, 1);
+mlxsw_pci_cqe_item_helpers(e, 0, 12, 12);
+
+/* pci_cqe_sr
+ * 1 - Send Queue
+ * 0 - Receive Queue
+ */
+MLXSW_ITEM32(pci, cqe0, sr, 0x0C, 6, 1);
+MLXSW_ITEM32(pci, cqe12, sr, 0x00, 26, 1);
+mlxsw_pci_cqe_item_helpers(sr, 0, 12, 12);
+
+/* pci_cqe_dqn
+ * Descriptor Queue (DQ) Number.
+ */
+MLXSW_ITEM32(pci, cqe0, dqn, 0x0C, 1, 5);
+MLXSW_ITEM32(pci, cqe12, dqn, 0x0C, 1, 6);
+mlxsw_pci_cqe_item_helpers(dqn, 0, 12, 12);
+
+/* pci_cqe_owner
+ * Ownership bit.
+ */
+MLXSW_ITEM32(pci, cqe01, owner, 0x0C, 0, 1);
+MLXSW_ITEM32(pci, cqe2, owner, 0x1C, 0, 1);
+mlxsw_pci_cqe_item_helpers(owner, 01, 01, 2);
+
+/* pci_eqe_event_type
+ * Event type.
+ */
+MLXSW_ITEM32(pci, eqe, event_type, 0x0C, 24, 8);
+#define MLXSW_PCI_EQE_EVENT_TYPE_COMP 0x00
+#define MLXSW_PCI_EQE_EVENT_TYPE_CMD 0x0A
+
+/* pci_eqe_event_sub_type
+ * Event type.
+ */
+MLXSW_ITEM32(pci, eqe, event_sub_type, 0x0C, 16, 8);
+
+/* pci_eqe_cqn
+ * Completion Queue that triggeret this EQE.
+ */
+MLXSW_ITEM32(pci, eqe, cqn, 0x0C, 8, 7);
+
+/* pci_eqe_owner
+ * Ownership bit.
+ */
+MLXSW_ITEM32(pci, eqe, owner, 0x0C, 0, 1);
+
+/* pci_eqe_cmd_token
+ * Command completion event - token
+ */
+MLXSW_ITEM32(pci, eqe, cmd_token, 0x00, 16, 16);
+
+/* pci_eqe_cmd_status
+ * Command completion event - status
+ */
+MLXSW_ITEM32(pci, eqe, cmd_status, 0x00, 0, 8);
+
+/* pci_eqe_cmd_out_param_h
+ * Command completion event - output parameter - higher part
+ */
+MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x04, 0, 32);
+
+/* pci_eqe_cmd_out_param_l
+ * Command completion event - output parameter - lower part
+ */
+MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x08, 0, 32);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h
new file mode 100644
index 000000000..a33eeef0b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/port.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_PORT_H
+#define _MLXSW_PORT_H
+
+#include <linux/types.h>
+
+#define MLXSW_PORT_MAX_MTU 10000
+
+#define MLXSW_PORT_DEFAULT_VID 1
+
+#define MLXSW_PORT_SWID_DISABLED_PORT 255
+#define MLXSW_PORT_SWID_ALL_SWIDS 254
+#define MLXSW_PORT_SWID_TYPE_IB 1
+#define MLXSW_PORT_SWID_TYPE_ETH 2
+
+#define MLXSW_PORT_MID 0xd000
+
+#define MLXSW_PORT_MAX_IB_PHY_PORTS 36
+#define MLXSW_PORT_MAX_IB_PORTS (MLXSW_PORT_MAX_IB_PHY_PORTS + 1)
+
+#define MLXSW_PORT_CPU_PORT 0x0
+
+#define MLXSW_PORT_DONT_CARE 0xFF
+
+#define MLXSW_PORT_MODULE_MAX_WIDTH 4
+
+enum mlxsw_port_admin_status {
+ MLXSW_PORT_ADMIN_STATUS_UP = 1,
+ MLXSW_PORT_ADMIN_STATUS_DOWN = 2,
+ MLXSW_PORT_ADMIN_STATUS_UP_ONCE = 3,
+ MLXSW_PORT_ADMIN_STATUS_DISABLED = 4,
+};
+
+enum mlxsw_reg_pude_oper_status {
+ MLXSW_PORT_OPER_STATUS_UP = 1,
+ MLXSW_PORT_OPER_STATUS_DOWN = 2,
+ MLXSW_PORT_OPER_STATUS_FAILURE = 4, /* Can be set to up again. */
+};
+
+#endif /* _MLXSW_PORT_H */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
new file mode 100644
index 000000000..c9895876a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -0,0 +1,8894 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_REG_H
+#define _MLXSW_REG_H
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/bitops.h>
+#include <linux/if_vlan.h>
+
+#include "item.h"
+#include "port.h"
+
+struct mlxsw_reg_info {
+ u16 id;
+ u16 len; /* In u8 */
+ const char *name;
+};
+
+#define MLXSW_REG_DEFINE(_name, _id, _len) \
+static const struct mlxsw_reg_info mlxsw_reg_##_name = { \
+ .id = _id, \
+ .len = _len, \
+ .name = #_name, \
+}
+
+#define MLXSW_REG(type) (&mlxsw_reg_##type)
+#define MLXSW_REG_LEN(type) MLXSW_REG(type)->len
+#define MLXSW_REG_ZERO(type, payload) memset(payload, 0, MLXSW_REG(type)->len)
+
+/* SGCR - Switch General Configuration Register
+ * --------------------------------------------
+ * This register is used for configuration of the switch capabilities.
+ */
+#define MLXSW_REG_SGCR_ID 0x2000
+#define MLXSW_REG_SGCR_LEN 0x10
+
+MLXSW_REG_DEFINE(sgcr, MLXSW_REG_SGCR_ID, MLXSW_REG_SGCR_LEN);
+
+/* reg_sgcr_llb
+ * Link Local Broadcast (Default=0)
+ * When set, all Link Local packets (224.0.0.X) will be treated as broadcast
+ * packets and ignore the IGMP snooping entries.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sgcr, llb, 0x04, 0, 1);
+
+static inline void mlxsw_reg_sgcr_pack(char *payload, bool llb)
+{
+ MLXSW_REG_ZERO(sgcr, payload);
+ mlxsw_reg_sgcr_llb_set(payload, !!llb);
+}
+
+/* SPAD - Switch Physical Address Register
+ * ---------------------------------------
+ * The SPAD register configures the switch physical MAC address.
+ */
+#define MLXSW_REG_SPAD_ID 0x2002
+#define MLXSW_REG_SPAD_LEN 0x10
+
+MLXSW_REG_DEFINE(spad, MLXSW_REG_SPAD_ID, MLXSW_REG_SPAD_LEN);
+
+/* reg_spad_base_mac
+ * Base MAC address for the switch partitions.
+ * Per switch partition MAC address is equal to:
+ * base_mac + swid
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, spad, base_mac, 0x02, 6);
+
+/* SMID - Switch Multicast ID
+ * --------------------------
+ * The MID record maps from a MID (Multicast ID), which is a unique identifier
+ * of the multicast group within the stacking domain, into a list of local
+ * ports into which the packet is replicated.
+ */
+#define MLXSW_REG_SMID_ID 0x2007
+#define MLXSW_REG_SMID_LEN 0x240
+
+MLXSW_REG_DEFINE(smid, MLXSW_REG_SMID_ID, MLXSW_REG_SMID_LEN);
+
+/* reg_smid_swid
+ * Switch partition ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, smid, swid, 0x00, 24, 8);
+
+/* reg_smid_mid
+ * Multicast identifier - global identifier that represents the multicast group
+ * across all devices.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, smid, mid, 0x00, 0, 16);
+
+/* reg_smid_port
+ * Local port memebership (1 bit per port).
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, smid, port, 0x20, 0x20, 1);
+
+/* reg_smid_port_mask
+ * Local port mask (1 bit per port).
+ * Access: W
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, smid, port_mask, 0x220, 0x20, 1);
+
+static inline void mlxsw_reg_smid_pack(char *payload, u16 mid,
+ u8 port, bool set)
+{
+ MLXSW_REG_ZERO(smid, payload);
+ mlxsw_reg_smid_swid_set(payload, 0);
+ mlxsw_reg_smid_mid_set(payload, mid);
+ mlxsw_reg_smid_port_set(payload, port, set);
+ mlxsw_reg_smid_port_mask_set(payload, port, 1);
+}
+
+/* SSPR - Switch System Port Record Register
+ * -----------------------------------------
+ * Configures the system port to local port mapping.
+ */
+#define MLXSW_REG_SSPR_ID 0x2008
+#define MLXSW_REG_SSPR_LEN 0x8
+
+MLXSW_REG_DEFINE(sspr, MLXSW_REG_SSPR_ID, MLXSW_REG_SSPR_LEN);
+
+/* reg_sspr_m
+ * Master - if set, then the record describes the master system port.
+ * This is needed in case a local port is mapped into several system ports
+ * (for multipathing). That number will be reported as the source system
+ * port when packets are forwarded to the CPU. Only one master port is allowed
+ * per local port.
+ *
+ * Note: Must be set for Spectrum.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sspr, m, 0x00, 31, 1);
+
+/* reg_sspr_local_port
+ * Local port number.
+ *
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sspr, local_port, 0x00, 16, 8);
+
+/* reg_sspr_sub_port
+ * Virtual port within the physical port.
+ * Should be set to 0 when virtual ports are not enabled on the port.
+ *
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sspr, sub_port, 0x00, 8, 8);
+
+/* reg_sspr_system_port
+ * Unique identifier within the stacking domain that represents all the ports
+ * that are available in the system (external ports).
+ *
+ * Currently, only single-ASIC configurations are supported, so we default to
+ * 1:1 mapping between system ports and local ports.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sspr, system_port, 0x04, 0, 16);
+
+static inline void mlxsw_reg_sspr_pack(char *payload, u8 local_port)
+{
+ MLXSW_REG_ZERO(sspr, payload);
+ mlxsw_reg_sspr_m_set(payload, 1);
+ mlxsw_reg_sspr_local_port_set(payload, local_port);
+ mlxsw_reg_sspr_sub_port_set(payload, 0);
+ mlxsw_reg_sspr_system_port_set(payload, local_port);
+}
+
+/* SFDAT - Switch Filtering Database Aging Time
+ * --------------------------------------------
+ * Controls the Switch aging time. Aging time is able to be set per Switch
+ * Partition.
+ */
+#define MLXSW_REG_SFDAT_ID 0x2009
+#define MLXSW_REG_SFDAT_LEN 0x8
+
+MLXSW_REG_DEFINE(sfdat, MLXSW_REG_SFDAT_ID, MLXSW_REG_SFDAT_LEN);
+
+/* reg_sfdat_swid
+ * Switch partition ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sfdat, swid, 0x00, 24, 8);
+
+/* reg_sfdat_age_time
+ * Aging time in seconds
+ * Min - 10 seconds
+ * Max - 1,000,000 seconds
+ * Default is 300 seconds.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdat, age_time, 0x04, 0, 20);
+
+static inline void mlxsw_reg_sfdat_pack(char *payload, u32 age_time)
+{
+ MLXSW_REG_ZERO(sfdat, payload);
+ mlxsw_reg_sfdat_swid_set(payload, 0);
+ mlxsw_reg_sfdat_age_time_set(payload, age_time);
+}
+
+/* SFD - Switch Filtering Database
+ * -------------------------------
+ * The following register defines the access to the filtering database.
+ * The register supports querying, adding, removing and modifying the database.
+ * The access is optimized for bulk updates in which case more than one
+ * FDB record is present in the same command.
+ */
+#define MLXSW_REG_SFD_ID 0x200A
+#define MLXSW_REG_SFD_BASE_LEN 0x10 /* base length, without records */
+#define MLXSW_REG_SFD_REC_LEN 0x10 /* record length */
+#define MLXSW_REG_SFD_REC_MAX_COUNT 64
+#define MLXSW_REG_SFD_LEN (MLXSW_REG_SFD_BASE_LEN + \
+ MLXSW_REG_SFD_REC_LEN * MLXSW_REG_SFD_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(sfd, MLXSW_REG_SFD_ID, MLXSW_REG_SFD_LEN);
+
+/* reg_sfd_swid
+ * Switch partition ID for queries. Reserved on Write.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sfd, swid, 0x00, 24, 8);
+
+enum mlxsw_reg_sfd_op {
+ /* Dump entire FDB a (process according to record_locator) */
+ MLXSW_REG_SFD_OP_QUERY_DUMP = 0,
+ /* Query records by {MAC, VID/FID} value */
+ MLXSW_REG_SFD_OP_QUERY_QUERY = 1,
+ /* Query and clear activity. Query records by {MAC, VID/FID} value */
+ MLXSW_REG_SFD_OP_QUERY_QUERY_AND_CLEAR_ACTIVITY = 2,
+ /* Test. Response indicates if each of the records could be
+ * added to the FDB.
+ */
+ MLXSW_REG_SFD_OP_WRITE_TEST = 0,
+ /* Add/modify. Aged-out records cannot be added. This command removes
+ * the learning notification of the {MAC, VID/FID}. Response includes
+ * the entries that were added to the FDB.
+ */
+ MLXSW_REG_SFD_OP_WRITE_EDIT = 1,
+ /* Remove record by {MAC, VID/FID}. This command also removes
+ * the learning notification and aged-out notifications
+ * of the {MAC, VID/FID}. The response provides current (pre-removal)
+ * entries as non-aged-out.
+ */
+ MLXSW_REG_SFD_OP_WRITE_REMOVE = 2,
+ /* Remove learned notification by {MAC, VID/FID}. The response provides
+ * the removed learning notification.
+ */
+ MLXSW_REG_SFD_OP_WRITE_REMOVE_NOTIFICATION = 2,
+};
+
+/* reg_sfd_op
+ * Operation.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, sfd, op, 0x04, 30, 2);
+
+/* reg_sfd_record_locator
+ * Used for querying the FDB. Use record_locator=0 to initiate the
+ * query. When a record is returned, a new record_locator is
+ * returned to be used in the subsequent query.
+ * Reserved for database update.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sfd, record_locator, 0x04, 0, 30);
+
+/* reg_sfd_num_rec
+ * Request: Number of records to read/add/modify/remove
+ * Response: Number of records read/added/replaced/removed
+ * See above description for more details.
+ * Ranges 0..64
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfd, num_rec, 0x08, 0, 8);
+
+static inline void mlxsw_reg_sfd_pack(char *payload, enum mlxsw_reg_sfd_op op,
+ u32 record_locator)
+{
+ MLXSW_REG_ZERO(sfd, payload);
+ mlxsw_reg_sfd_op_set(payload, op);
+ mlxsw_reg_sfd_record_locator_set(payload, record_locator);
+}
+
+/* reg_sfd_rec_swid
+ * Switch partition ID.
+ * Access: Index
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, rec_swid, MLXSW_REG_SFD_BASE_LEN, 24, 8,
+ MLXSW_REG_SFD_REC_LEN, 0x00, false);
+
+enum mlxsw_reg_sfd_rec_type {
+ MLXSW_REG_SFD_REC_TYPE_UNICAST = 0x0,
+ MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG = 0x1,
+ MLXSW_REG_SFD_REC_TYPE_MULTICAST = 0x2,
+};
+
+/* reg_sfd_rec_type
+ * FDB record type.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, rec_type, MLXSW_REG_SFD_BASE_LEN, 20, 4,
+ MLXSW_REG_SFD_REC_LEN, 0x00, false);
+
+enum mlxsw_reg_sfd_rec_policy {
+ /* Replacement disabled, aging disabled. */
+ MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY = 0,
+ /* (mlag remote): Replacement enabled, aging disabled,
+ * learning notification enabled on this port.
+ */
+ MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_MLAG = 1,
+ /* (ingress device): Replacement enabled, aging enabled. */
+ MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS = 3,
+};
+
+/* reg_sfd_rec_policy
+ * Policy.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, rec_policy, MLXSW_REG_SFD_BASE_LEN, 18, 2,
+ MLXSW_REG_SFD_REC_LEN, 0x00, false);
+
+/* reg_sfd_rec_a
+ * Activity. Set for new static entries. Set for static entries if a frame SMAC
+ * lookup hits on the entry.
+ * To clear the a bit, use "query and clear activity" op.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, rec_a, MLXSW_REG_SFD_BASE_LEN, 16, 1,
+ MLXSW_REG_SFD_REC_LEN, 0x00, false);
+
+/* reg_sfd_rec_mac
+ * MAC address.
+ * Access: Index
+ */
+MLXSW_ITEM_BUF_INDEXED(reg, sfd, rec_mac, MLXSW_REG_SFD_BASE_LEN, 6,
+ MLXSW_REG_SFD_REC_LEN, 0x02);
+
+enum mlxsw_reg_sfd_rec_action {
+ /* forward */
+ MLXSW_REG_SFD_REC_ACTION_NOP = 0,
+ /* forward and trap, trap_id is FDB_TRAP */
+ MLXSW_REG_SFD_REC_ACTION_MIRROR_TO_CPU = 1,
+ /* trap and do not forward, trap_id is FDB_TRAP */
+ MLXSW_REG_SFD_REC_ACTION_TRAP = 2,
+ /* forward to IP router */
+ MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER = 3,
+ MLXSW_REG_SFD_REC_ACTION_DISCARD_ERROR = 15,
+};
+
+/* reg_sfd_rec_action
+ * Action to apply on the packet.
+ * Note: Dynamic entries can only be configured with NOP action.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, rec_action, MLXSW_REG_SFD_BASE_LEN, 28, 4,
+ MLXSW_REG_SFD_REC_LEN, 0x0C, false);
+
+/* reg_sfd_uc_sub_port
+ * VEPA channel on local port.
+ * Valid only if local port is a non-stacking port. Must be 0 if multichannel
+ * VEPA is not enabled.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8,
+ MLXSW_REG_SFD_REC_LEN, 0x08, false);
+
+/* reg_sfd_uc_fid_vid
+ * Filtering ID or VLAN ID
+ * For SwitchX and SwitchX-2:
+ * - Dynamic entries (policy 2,3) use FID
+ * - Static entries (policy 0) use VID
+ * - When independent learning is configured, VID=FID
+ * For Spectrum: use FID for both Dynamic and Static entries.
+ * VID should not be used.
+ * Access: Index
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16,
+ MLXSW_REG_SFD_REC_LEN, 0x08, false);
+
+/* reg_sfd_uc_system_port
+ * Unique port identifier for the final destination of the packet.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_system_port, MLXSW_REG_SFD_BASE_LEN, 0, 16,
+ MLXSW_REG_SFD_REC_LEN, 0x0C, false);
+
+static inline void mlxsw_reg_sfd_rec_pack(char *payload, int rec_index,
+ enum mlxsw_reg_sfd_rec_type rec_type,
+ const char *mac,
+ enum mlxsw_reg_sfd_rec_action action)
+{
+ u8 num_rec = mlxsw_reg_sfd_num_rec_get(payload);
+
+ if (rec_index >= num_rec)
+ mlxsw_reg_sfd_num_rec_set(payload, rec_index + 1);
+ mlxsw_reg_sfd_rec_swid_set(payload, rec_index, 0);
+ mlxsw_reg_sfd_rec_type_set(payload, rec_index, rec_type);
+ mlxsw_reg_sfd_rec_mac_memcpy_to(payload, rec_index, mac);
+ mlxsw_reg_sfd_rec_action_set(payload, rec_index, action);
+}
+
+static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index,
+ enum mlxsw_reg_sfd_rec_policy policy,
+ const char *mac, u16 fid_vid,
+ enum mlxsw_reg_sfd_rec_action action,
+ u8 local_port)
+{
+ mlxsw_reg_sfd_rec_pack(payload, rec_index,
+ MLXSW_REG_SFD_REC_TYPE_UNICAST, mac, action);
+ mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy);
+ mlxsw_reg_sfd_uc_sub_port_set(payload, rec_index, 0);
+ mlxsw_reg_sfd_uc_fid_vid_set(payload, rec_index, fid_vid);
+ mlxsw_reg_sfd_uc_system_port_set(payload, rec_index, local_port);
+}
+
+static inline void mlxsw_reg_sfd_uc_unpack(char *payload, int rec_index,
+ char *mac, u16 *p_fid_vid,
+ u8 *p_local_port)
+{
+ mlxsw_reg_sfd_rec_mac_memcpy_from(payload, rec_index, mac);
+ *p_fid_vid = mlxsw_reg_sfd_uc_fid_vid_get(payload, rec_index);
+ *p_local_port = mlxsw_reg_sfd_uc_system_port_get(payload, rec_index);
+}
+
+/* reg_sfd_uc_lag_sub_port
+ * LAG sub port.
+ * Must be 0 if multichannel VEPA is not enabled.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8,
+ MLXSW_REG_SFD_REC_LEN, 0x08, false);
+
+/* reg_sfd_uc_lag_fid_vid
+ * Filtering ID or VLAN ID
+ * For SwitchX and SwitchX-2:
+ * - Dynamic entries (policy 2,3) use FID
+ * - Static entries (policy 0) use VID
+ * - When independent learning is configured, VID=FID
+ * For Spectrum: use FID for both Dynamic and Static entries.
+ * VID should not be used.
+ * Access: Index
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16,
+ MLXSW_REG_SFD_REC_LEN, 0x08, false);
+
+/* reg_sfd_uc_lag_lag_vid
+ * Indicates VID in case of vFIDs. Reserved for FIDs.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_vid, MLXSW_REG_SFD_BASE_LEN, 16, 12,
+ MLXSW_REG_SFD_REC_LEN, 0x0C, false);
+
+/* reg_sfd_uc_lag_lag_id
+ * LAG Identifier - pointer into the LAG descriptor table.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_id, MLXSW_REG_SFD_BASE_LEN, 0, 10,
+ MLXSW_REG_SFD_REC_LEN, 0x0C, false);
+
+static inline void
+mlxsw_reg_sfd_uc_lag_pack(char *payload, int rec_index,
+ enum mlxsw_reg_sfd_rec_policy policy,
+ const char *mac, u16 fid_vid,
+ enum mlxsw_reg_sfd_rec_action action, u16 lag_vid,
+ u16 lag_id)
+{
+ mlxsw_reg_sfd_rec_pack(payload, rec_index,
+ MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG,
+ mac, action);
+ mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy);
+ mlxsw_reg_sfd_uc_lag_sub_port_set(payload, rec_index, 0);
+ mlxsw_reg_sfd_uc_lag_fid_vid_set(payload, rec_index, fid_vid);
+ mlxsw_reg_sfd_uc_lag_lag_vid_set(payload, rec_index, lag_vid);
+ mlxsw_reg_sfd_uc_lag_lag_id_set(payload, rec_index, lag_id);
+}
+
+static inline void mlxsw_reg_sfd_uc_lag_unpack(char *payload, int rec_index,
+ char *mac, u16 *p_vid,
+ u16 *p_lag_id)
+{
+ mlxsw_reg_sfd_rec_mac_memcpy_from(payload, rec_index, mac);
+ *p_vid = mlxsw_reg_sfd_uc_lag_fid_vid_get(payload, rec_index);
+ *p_lag_id = mlxsw_reg_sfd_uc_lag_lag_id_get(payload, rec_index);
+}
+
+/* reg_sfd_mc_pgi
+ *
+ * Multicast port group index - index into the port group table.
+ * Value 0x1FFF indicates the pgi should point to the MID entry.
+ * For Spectrum this value must be set to 0x1FFF
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, mc_pgi, MLXSW_REG_SFD_BASE_LEN, 16, 13,
+ MLXSW_REG_SFD_REC_LEN, 0x08, false);
+
+/* reg_sfd_mc_fid_vid
+ *
+ * Filtering ID or VLAN ID
+ * Access: Index
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, mc_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16,
+ MLXSW_REG_SFD_REC_LEN, 0x08, false);
+
+/* reg_sfd_mc_mid
+ *
+ * Multicast identifier - global identifier that represents the multicast
+ * group across all devices.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, mc_mid, MLXSW_REG_SFD_BASE_LEN, 0, 16,
+ MLXSW_REG_SFD_REC_LEN, 0x0C, false);
+
+static inline void
+mlxsw_reg_sfd_mc_pack(char *payload, int rec_index,
+ const char *mac, u16 fid_vid,
+ enum mlxsw_reg_sfd_rec_action action, u16 mid)
+{
+ mlxsw_reg_sfd_rec_pack(payload, rec_index,
+ MLXSW_REG_SFD_REC_TYPE_MULTICAST, mac, action);
+ mlxsw_reg_sfd_mc_pgi_set(payload, rec_index, 0x1FFF);
+ mlxsw_reg_sfd_mc_fid_vid_set(payload, rec_index, fid_vid);
+ mlxsw_reg_sfd_mc_mid_set(payload, rec_index, mid);
+}
+
+/* SFN - Switch FDB Notification Register
+ * -------------------------------------------
+ * The switch provides notifications on newly learned FDB entries and
+ * aged out entries. The notifications can be polled by software.
+ */
+#define MLXSW_REG_SFN_ID 0x200B
+#define MLXSW_REG_SFN_BASE_LEN 0x10 /* base length, without records */
+#define MLXSW_REG_SFN_REC_LEN 0x10 /* record length */
+#define MLXSW_REG_SFN_REC_MAX_COUNT 64
+#define MLXSW_REG_SFN_LEN (MLXSW_REG_SFN_BASE_LEN + \
+ MLXSW_REG_SFN_REC_LEN * MLXSW_REG_SFN_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(sfn, MLXSW_REG_SFN_ID, MLXSW_REG_SFN_LEN);
+
+/* reg_sfn_swid
+ * Switch partition ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sfn, swid, 0x00, 24, 8);
+
+/* reg_sfn_end
+ * Forces the current session to end.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, sfn, end, 0x04, 20, 1);
+
+/* reg_sfn_num_rec
+ * Request: Number of learned notifications and aged-out notification
+ * records requested.
+ * Response: Number of notification records returned (must be smaller
+ * than or equal to the value requested)
+ * Ranges 0..64
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, sfn, num_rec, 0x04, 0, 8);
+
+static inline void mlxsw_reg_sfn_pack(char *payload)
+{
+ MLXSW_REG_ZERO(sfn, payload);
+ mlxsw_reg_sfn_swid_set(payload, 0);
+ mlxsw_reg_sfn_end_set(payload, 1);
+ mlxsw_reg_sfn_num_rec_set(payload, MLXSW_REG_SFN_REC_MAX_COUNT);
+}
+
+/* reg_sfn_rec_swid
+ * Switch partition ID.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, sfn, rec_swid, MLXSW_REG_SFN_BASE_LEN, 24, 8,
+ MLXSW_REG_SFN_REC_LEN, 0x00, false);
+
+enum mlxsw_reg_sfn_rec_type {
+ /* MAC addresses learned on a regular port. */
+ MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC = 0x5,
+ /* MAC addresses learned on a LAG port. */
+ MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG = 0x6,
+ /* Aged-out MAC address on a regular port. */
+ MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC = 0x7,
+ /* Aged-out MAC address on a LAG port. */
+ MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG = 0x8,
+};
+
+/* reg_sfn_rec_type
+ * Notification record type.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, sfn, rec_type, MLXSW_REG_SFN_BASE_LEN, 20, 4,
+ MLXSW_REG_SFN_REC_LEN, 0x00, false);
+
+/* reg_sfn_rec_mac
+ * MAC address.
+ * Access: RO
+ */
+MLXSW_ITEM_BUF_INDEXED(reg, sfn, rec_mac, MLXSW_REG_SFN_BASE_LEN, 6,
+ MLXSW_REG_SFN_REC_LEN, 0x02);
+
+/* reg_sfn_mac_sub_port
+ * VEPA channel on the local port.
+ * 0 if multichannel VEPA is not enabled.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, sfn, mac_sub_port, MLXSW_REG_SFN_BASE_LEN, 16, 8,
+ MLXSW_REG_SFN_REC_LEN, 0x08, false);
+
+/* reg_sfn_mac_fid
+ * Filtering identifier.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, sfn, mac_fid, MLXSW_REG_SFN_BASE_LEN, 0, 16,
+ MLXSW_REG_SFN_REC_LEN, 0x08, false);
+
+/* reg_sfn_mac_system_port
+ * Unique port identifier for the final destination of the packet.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, sfn, mac_system_port, MLXSW_REG_SFN_BASE_LEN, 0, 16,
+ MLXSW_REG_SFN_REC_LEN, 0x0C, false);
+
+static inline void mlxsw_reg_sfn_mac_unpack(char *payload, int rec_index,
+ char *mac, u16 *p_vid,
+ u8 *p_local_port)
+{
+ mlxsw_reg_sfn_rec_mac_memcpy_from(payload, rec_index, mac);
+ *p_vid = mlxsw_reg_sfn_mac_fid_get(payload, rec_index);
+ *p_local_port = mlxsw_reg_sfn_mac_system_port_get(payload, rec_index);
+}
+
+/* reg_sfn_mac_lag_lag_id
+ * LAG ID (pointer into the LAG descriptor table).
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, sfn, mac_lag_lag_id, MLXSW_REG_SFN_BASE_LEN, 0, 10,
+ MLXSW_REG_SFN_REC_LEN, 0x0C, false);
+
+static inline void mlxsw_reg_sfn_mac_lag_unpack(char *payload, int rec_index,
+ char *mac, u16 *p_vid,
+ u16 *p_lag_id)
+{
+ mlxsw_reg_sfn_rec_mac_memcpy_from(payload, rec_index, mac);
+ *p_vid = mlxsw_reg_sfn_mac_fid_get(payload, rec_index);
+ *p_lag_id = mlxsw_reg_sfn_mac_lag_lag_id_get(payload, rec_index);
+}
+
+/* SPMS - Switch Port MSTP/RSTP State Register
+ * -------------------------------------------
+ * Configures the spanning tree state of a physical port.
+ */
+#define MLXSW_REG_SPMS_ID 0x200D
+#define MLXSW_REG_SPMS_LEN 0x404
+
+MLXSW_REG_DEFINE(spms, MLXSW_REG_SPMS_ID, MLXSW_REG_SPMS_LEN);
+
+/* reg_spms_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, spms, local_port, 0x00, 16, 8);
+
+enum mlxsw_reg_spms_state {
+ MLXSW_REG_SPMS_STATE_NO_CHANGE,
+ MLXSW_REG_SPMS_STATE_DISCARDING,
+ MLXSW_REG_SPMS_STATE_LEARNING,
+ MLXSW_REG_SPMS_STATE_FORWARDING,
+};
+
+/* reg_spms_state
+ * Spanning tree state of each VLAN ID (VID) of the local port.
+ * 0 - Do not change spanning tree state (used only when writing).
+ * 1 - Discarding. No learning or forwarding to/from this port (default).
+ * 2 - Learning. Port is learning, but not forwarding.
+ * 3 - Forwarding. Port is learning and forwarding.
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, spms, state, 0x04, 0x400, 2);
+
+static inline void mlxsw_reg_spms_pack(char *payload, u8 local_port)
+{
+ MLXSW_REG_ZERO(spms, payload);
+ mlxsw_reg_spms_local_port_set(payload, local_port);
+}
+
+static inline void mlxsw_reg_spms_vid_pack(char *payload, u16 vid,
+ enum mlxsw_reg_spms_state state)
+{
+ mlxsw_reg_spms_state_set(payload, vid, state);
+}
+
+/* SPVID - Switch Port VID
+ * -----------------------
+ * The switch port VID configures the default VID for a port.
+ */
+#define MLXSW_REG_SPVID_ID 0x200E
+#define MLXSW_REG_SPVID_LEN 0x08
+
+MLXSW_REG_DEFINE(spvid, MLXSW_REG_SPVID_ID, MLXSW_REG_SPVID_LEN);
+
+/* reg_spvid_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, spvid, local_port, 0x00, 16, 8);
+
+/* reg_spvid_sub_port
+ * Virtual port within the physical port.
+ * Should be set to 0 when virtual ports are not enabled on the port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, spvid, sub_port, 0x00, 8, 8);
+
+/* reg_spvid_pvid
+ * Port default VID
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spvid, pvid, 0x04, 0, 12);
+
+static inline void mlxsw_reg_spvid_pack(char *payload, u8 local_port, u16 pvid)
+{
+ MLXSW_REG_ZERO(spvid, payload);
+ mlxsw_reg_spvid_local_port_set(payload, local_port);
+ mlxsw_reg_spvid_pvid_set(payload, pvid);
+}
+
+/* SPVM - Switch Port VLAN Membership
+ * ----------------------------------
+ * The Switch Port VLAN Membership register configures the VLAN membership
+ * of a port in a VLAN denoted by VID. VLAN membership is managed per
+ * virtual port. The register can be used to add and remove VID(s) from a port.
+ */
+#define MLXSW_REG_SPVM_ID 0x200F
+#define MLXSW_REG_SPVM_BASE_LEN 0x04 /* base length, without records */
+#define MLXSW_REG_SPVM_REC_LEN 0x04 /* record length */
+#define MLXSW_REG_SPVM_REC_MAX_COUNT 255
+#define MLXSW_REG_SPVM_LEN (MLXSW_REG_SPVM_BASE_LEN + \
+ MLXSW_REG_SPVM_REC_LEN * MLXSW_REG_SPVM_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(spvm, MLXSW_REG_SPVM_ID, MLXSW_REG_SPVM_LEN);
+
+/* reg_spvm_pt
+ * Priority tagged. If this bit is set, packets forwarded to the port with
+ * untagged VLAN membership (u bit is set) will be tagged with priority tag
+ * (VID=0)
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spvm, pt, 0x00, 31, 1);
+
+/* reg_spvm_pte
+ * Priority Tagged Update Enable. On Write operations, if this bit is cleared,
+ * the pt bit will NOT be updated. To update the pt bit, pte must be set.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, spvm, pte, 0x00, 30, 1);
+
+/* reg_spvm_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, spvm, local_port, 0x00, 16, 8);
+
+/* reg_spvm_sub_port
+ * Virtual port within the physical port.
+ * Should be set to 0 when virtual ports are not enabled on the port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, spvm, sub_port, 0x00, 8, 8);
+
+/* reg_spvm_num_rec
+ * Number of records to update. Each record contains: i, e, u, vid.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, spvm, num_rec, 0x00, 0, 8);
+
+/* reg_spvm_rec_i
+ * Ingress membership in VLAN ID.
+ * Access: Index
+ */
+MLXSW_ITEM32_INDEXED(reg, spvm, rec_i,
+ MLXSW_REG_SPVM_BASE_LEN, 14, 1,
+ MLXSW_REG_SPVM_REC_LEN, 0, false);
+
+/* reg_spvm_rec_e
+ * Egress membership in VLAN ID.
+ * Access: Index
+ */
+MLXSW_ITEM32_INDEXED(reg, spvm, rec_e,
+ MLXSW_REG_SPVM_BASE_LEN, 13, 1,
+ MLXSW_REG_SPVM_REC_LEN, 0, false);
+
+/* reg_spvm_rec_u
+ * Untagged - port is an untagged member - egress transmission uses untagged
+ * frames on VID<n>
+ * Access: Index
+ */
+MLXSW_ITEM32_INDEXED(reg, spvm, rec_u,
+ MLXSW_REG_SPVM_BASE_LEN, 12, 1,
+ MLXSW_REG_SPVM_REC_LEN, 0, false);
+
+/* reg_spvm_rec_vid
+ * Egress membership in VLAN ID.
+ * Access: Index
+ */
+MLXSW_ITEM32_INDEXED(reg, spvm, rec_vid,
+ MLXSW_REG_SPVM_BASE_LEN, 0, 12,
+ MLXSW_REG_SPVM_REC_LEN, 0, false);
+
+static inline void mlxsw_reg_spvm_pack(char *payload, u8 local_port,
+ u16 vid_begin, u16 vid_end,
+ bool is_member, bool untagged)
+{
+ int size = vid_end - vid_begin + 1;
+ int i;
+
+ MLXSW_REG_ZERO(spvm, payload);
+ mlxsw_reg_spvm_local_port_set(payload, local_port);
+ mlxsw_reg_spvm_num_rec_set(payload, size);
+
+ for (i = 0; i < size; i++) {
+ mlxsw_reg_spvm_rec_i_set(payload, i, is_member);
+ mlxsw_reg_spvm_rec_e_set(payload, i, is_member);
+ mlxsw_reg_spvm_rec_u_set(payload, i, untagged);
+ mlxsw_reg_spvm_rec_vid_set(payload, i, vid_begin + i);
+ }
+}
+
+/* SPAFT - Switch Port Acceptable Frame Types
+ * ------------------------------------------
+ * The Switch Port Acceptable Frame Types register configures the frame
+ * admittance of the port.
+ */
+#define MLXSW_REG_SPAFT_ID 0x2010
+#define MLXSW_REG_SPAFT_LEN 0x08
+
+MLXSW_REG_DEFINE(spaft, MLXSW_REG_SPAFT_ID, MLXSW_REG_SPAFT_LEN);
+
+/* reg_spaft_local_port
+ * Local port number.
+ * Access: Index
+ *
+ * Note: CPU port is not supported (all tag types are allowed).
+ */
+MLXSW_ITEM32(reg, spaft, local_port, 0x00, 16, 8);
+
+/* reg_spaft_sub_port
+ * Virtual port within the physical port.
+ * Should be set to 0 when virtual ports are not enabled on the port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spaft, sub_port, 0x00, 8, 8);
+
+/* reg_spaft_allow_untagged
+ * When set, untagged frames on the ingress are allowed (default).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spaft, allow_untagged, 0x04, 31, 1);
+
+/* reg_spaft_allow_prio_tagged
+ * When set, priority tagged frames on the ingress are allowed (default).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spaft, allow_prio_tagged, 0x04, 30, 1);
+
+/* reg_spaft_allow_tagged
+ * When set, tagged frames on the ingress are allowed (default).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spaft, allow_tagged, 0x04, 29, 1);
+
+static inline void mlxsw_reg_spaft_pack(char *payload, u8 local_port,
+ bool allow_untagged)
+{
+ MLXSW_REG_ZERO(spaft, payload);
+ mlxsw_reg_spaft_local_port_set(payload, local_port);
+ mlxsw_reg_spaft_allow_untagged_set(payload, allow_untagged);
+ mlxsw_reg_spaft_allow_prio_tagged_set(payload, allow_untagged);
+ mlxsw_reg_spaft_allow_tagged_set(payload, true);
+}
+
+/* SFGC - Switch Flooding Group Configuration
+ * ------------------------------------------
+ * The following register controls the association of flooding tables and MIDs
+ * to packet types used for flooding.
+ */
+#define MLXSW_REG_SFGC_ID 0x2011
+#define MLXSW_REG_SFGC_LEN 0x10
+
+MLXSW_REG_DEFINE(sfgc, MLXSW_REG_SFGC_ID, MLXSW_REG_SFGC_LEN);
+
+enum mlxsw_reg_sfgc_type {
+ MLXSW_REG_SFGC_TYPE_BROADCAST,
+ MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST,
+ MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4,
+ MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6,
+ MLXSW_REG_SFGC_TYPE_RESERVED,
+ MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP,
+ MLXSW_REG_SFGC_TYPE_IPV4_LINK_LOCAL,
+ MLXSW_REG_SFGC_TYPE_IPV6_ALL_HOST,
+ MLXSW_REG_SFGC_TYPE_MAX,
+};
+
+/* reg_sfgc_type
+ * The traffic type to reach the flooding table.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sfgc, type, 0x00, 0, 4);
+
+enum mlxsw_reg_sfgc_bridge_type {
+ MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID = 0,
+ MLXSW_REG_SFGC_BRIDGE_TYPE_VFID = 1,
+};
+
+/* reg_sfgc_bridge_type
+ * Access: Index
+ *
+ * Note: SwitchX-2 only supports 802.1Q mode.
+ */
+MLXSW_ITEM32(reg, sfgc, bridge_type, 0x04, 24, 3);
+
+enum mlxsw_flood_table_type {
+ MLXSW_REG_SFGC_TABLE_TYPE_VID = 1,
+ MLXSW_REG_SFGC_TABLE_TYPE_SINGLE = 2,
+ MLXSW_REG_SFGC_TABLE_TYPE_ANY = 0,
+ MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET = 3,
+ MLXSW_REG_SFGC_TABLE_TYPE_FID = 4,
+};
+
+/* reg_sfgc_table_type
+ * See mlxsw_flood_table_type
+ * Access: RW
+ *
+ * Note: FID offset and FID types are not supported in SwitchX-2.
+ */
+MLXSW_ITEM32(reg, sfgc, table_type, 0x04, 16, 3);
+
+/* reg_sfgc_flood_table
+ * Flooding table index to associate with the specific type on the specific
+ * switch partition.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfgc, flood_table, 0x04, 0, 6);
+
+/* reg_sfgc_mid
+ * The multicast ID for the swid. Not supported for Spectrum
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfgc, mid, 0x08, 0, 16);
+
+/* reg_sfgc_counter_set_type
+ * Counter Set Type for flow counters.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfgc, counter_set_type, 0x0C, 24, 8);
+
+/* reg_sfgc_counter_index
+ * Counter Index for flow counters.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfgc, counter_index, 0x0C, 0, 24);
+
+static inline void
+mlxsw_reg_sfgc_pack(char *payload, enum mlxsw_reg_sfgc_type type,
+ enum mlxsw_reg_sfgc_bridge_type bridge_type,
+ enum mlxsw_flood_table_type table_type,
+ unsigned int flood_table)
+{
+ MLXSW_REG_ZERO(sfgc, payload);
+ mlxsw_reg_sfgc_type_set(payload, type);
+ mlxsw_reg_sfgc_bridge_type_set(payload, bridge_type);
+ mlxsw_reg_sfgc_table_type_set(payload, table_type);
+ mlxsw_reg_sfgc_flood_table_set(payload, flood_table);
+ mlxsw_reg_sfgc_mid_set(payload, MLXSW_PORT_MID);
+}
+
+/* SFTR - Switch Flooding Table Register
+ * -------------------------------------
+ * The switch flooding table is used for flooding packet replication. The table
+ * defines a bit mask of ports for packet replication.
+ */
+#define MLXSW_REG_SFTR_ID 0x2012
+#define MLXSW_REG_SFTR_LEN 0x420
+
+MLXSW_REG_DEFINE(sftr, MLXSW_REG_SFTR_ID, MLXSW_REG_SFTR_LEN);
+
+/* reg_sftr_swid
+ * Switch partition ID with which to associate the port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sftr, swid, 0x00, 24, 8);
+
+/* reg_sftr_flood_table
+ * Flooding table index to associate with the specific type on the specific
+ * switch partition.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sftr, flood_table, 0x00, 16, 6);
+
+/* reg_sftr_index
+ * Index. Used as an index into the Flooding Table in case the table is
+ * configured to use VID / FID or FID Offset.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sftr, index, 0x00, 0, 16);
+
+/* reg_sftr_table_type
+ * See mlxsw_flood_table_type
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sftr, table_type, 0x04, 16, 3);
+
+/* reg_sftr_range
+ * Range of entries to update
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sftr, range, 0x04, 0, 16);
+
+/* reg_sftr_port
+ * Local port membership (1 bit per port).
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, sftr, port, 0x20, 0x20, 1);
+
+/* reg_sftr_cpu_port_mask
+ * CPU port mask (1 bit per port).
+ * Access: W
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, sftr, port_mask, 0x220, 0x20, 1);
+
+static inline void mlxsw_reg_sftr_pack(char *payload,
+ unsigned int flood_table,
+ unsigned int index,
+ enum mlxsw_flood_table_type table_type,
+ unsigned int range, u8 port, bool set)
+{
+ MLXSW_REG_ZERO(sftr, payload);
+ mlxsw_reg_sftr_swid_set(payload, 0);
+ mlxsw_reg_sftr_flood_table_set(payload, flood_table);
+ mlxsw_reg_sftr_index_set(payload, index);
+ mlxsw_reg_sftr_table_type_set(payload, table_type);
+ mlxsw_reg_sftr_range_set(payload, range);
+ mlxsw_reg_sftr_port_set(payload, port, set);
+ mlxsw_reg_sftr_port_mask_set(payload, port, 1);
+}
+
+/* SFDF - Switch Filtering DB Flush
+ * --------------------------------
+ * The switch filtering DB flush register is used to flush the FDB.
+ * Note that FDB notifications are flushed as well.
+ */
+#define MLXSW_REG_SFDF_ID 0x2013
+#define MLXSW_REG_SFDF_LEN 0x14
+
+MLXSW_REG_DEFINE(sfdf, MLXSW_REG_SFDF_ID, MLXSW_REG_SFDF_LEN);
+
+/* reg_sfdf_swid
+ * Switch partition ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sfdf, swid, 0x00, 24, 8);
+
+enum mlxsw_reg_sfdf_flush_type {
+ MLXSW_REG_SFDF_FLUSH_PER_SWID,
+ MLXSW_REG_SFDF_FLUSH_PER_FID,
+ MLXSW_REG_SFDF_FLUSH_PER_PORT,
+ MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID,
+ MLXSW_REG_SFDF_FLUSH_PER_LAG,
+ MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID,
+};
+
+/* reg_sfdf_flush_type
+ * Flush type.
+ * 0 - All SWID dynamic entries are flushed.
+ * 1 - All FID dynamic entries are flushed.
+ * 2 - All dynamic entries pointing to port are flushed.
+ * 3 - All FID dynamic entries pointing to port are flushed.
+ * 4 - All dynamic entries pointing to LAG are flushed.
+ * 5 - All FID dynamic entries pointing to LAG are flushed.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, flush_type, 0x04, 28, 4);
+
+/* reg_sfdf_flush_static
+ * Static.
+ * 0 - Flush only dynamic entries.
+ * 1 - Flush both dynamic and static entries.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, flush_static, 0x04, 24, 1);
+
+static inline void mlxsw_reg_sfdf_pack(char *payload,
+ enum mlxsw_reg_sfdf_flush_type type)
+{
+ MLXSW_REG_ZERO(sfdf, payload);
+ mlxsw_reg_sfdf_flush_type_set(payload, type);
+ mlxsw_reg_sfdf_flush_static_set(payload, true);
+}
+
+/* reg_sfdf_fid
+ * FID to flush.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, fid, 0x0C, 0, 16);
+
+/* reg_sfdf_system_port
+ * Port to flush.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, system_port, 0x0C, 0, 16);
+
+/* reg_sfdf_port_fid_system_port
+ * Port to flush, pointed to by FID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, port_fid_system_port, 0x08, 0, 16);
+
+/* reg_sfdf_lag_id
+ * LAG ID to flush.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, lag_id, 0x0C, 0, 10);
+
+/* reg_sfdf_lag_fid_lag_id
+ * LAG ID to flush, pointed to by FID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfdf, lag_fid_lag_id, 0x08, 0, 10);
+
+/* SLDR - Switch LAG Descriptor Register
+ * -----------------------------------------
+ * The switch LAG descriptor register is populated by LAG descriptors.
+ * Each LAG descriptor is indexed by lag_id. The LAG ID runs from 0 to
+ * max_lag-1.
+ */
+#define MLXSW_REG_SLDR_ID 0x2014
+#define MLXSW_REG_SLDR_LEN 0x0C /* counting in only one port in list */
+
+MLXSW_REG_DEFINE(sldr, MLXSW_REG_SLDR_ID, MLXSW_REG_SLDR_LEN);
+
+enum mlxsw_reg_sldr_op {
+ /* Indicates a creation of a new LAG-ID, lag_id must be valid */
+ MLXSW_REG_SLDR_OP_LAG_CREATE,
+ MLXSW_REG_SLDR_OP_LAG_DESTROY,
+ /* Ports that appear in the list have the Distributor enabled */
+ MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST,
+ /* Removes ports from the disributor list */
+ MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST,
+};
+
+/* reg_sldr_op
+ * Operation.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sldr, op, 0x00, 29, 3);
+
+/* reg_sldr_lag_id
+ * LAG identifier. The lag_id is the index into the LAG descriptor table.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sldr, lag_id, 0x00, 0, 10);
+
+static inline void mlxsw_reg_sldr_lag_create_pack(char *payload, u8 lag_id)
+{
+ MLXSW_REG_ZERO(sldr, payload);
+ mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_CREATE);
+ mlxsw_reg_sldr_lag_id_set(payload, lag_id);
+}
+
+static inline void mlxsw_reg_sldr_lag_destroy_pack(char *payload, u8 lag_id)
+{
+ MLXSW_REG_ZERO(sldr, payload);
+ mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_DESTROY);
+ mlxsw_reg_sldr_lag_id_set(payload, lag_id);
+}
+
+/* reg_sldr_num_ports
+ * The number of member ports of the LAG.
+ * Reserved for Create / Destroy operations
+ * For Add / Remove operations - indicates the number of ports in the list.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sldr, num_ports, 0x04, 24, 8);
+
+/* reg_sldr_system_port
+ * System port.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sldr, system_port, 0x08, 0, 16, 4, 0, false);
+
+static inline void mlxsw_reg_sldr_lag_add_port_pack(char *payload, u8 lag_id,
+ u8 local_port)
+{
+ MLXSW_REG_ZERO(sldr, payload);
+ mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST);
+ mlxsw_reg_sldr_lag_id_set(payload, lag_id);
+ mlxsw_reg_sldr_num_ports_set(payload, 1);
+ mlxsw_reg_sldr_system_port_set(payload, 0, local_port);
+}
+
+static inline void mlxsw_reg_sldr_lag_remove_port_pack(char *payload, u8 lag_id,
+ u8 local_port)
+{
+ MLXSW_REG_ZERO(sldr, payload);
+ mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST);
+ mlxsw_reg_sldr_lag_id_set(payload, lag_id);
+ mlxsw_reg_sldr_num_ports_set(payload, 1);
+ mlxsw_reg_sldr_system_port_set(payload, 0, local_port);
+}
+
+/* SLCR - Switch LAG Configuration 2 Register
+ * -------------------------------------------
+ * The Switch LAG Configuration register is used for configuring the
+ * LAG properties of the switch.
+ */
+#define MLXSW_REG_SLCR_ID 0x2015
+#define MLXSW_REG_SLCR_LEN 0x10
+
+MLXSW_REG_DEFINE(slcr, MLXSW_REG_SLCR_ID, MLXSW_REG_SLCR_LEN);
+
+enum mlxsw_reg_slcr_pp {
+ /* Global Configuration (for all ports) */
+ MLXSW_REG_SLCR_PP_GLOBAL,
+ /* Per port configuration, based on local_port field */
+ MLXSW_REG_SLCR_PP_PER_PORT,
+};
+
+/* reg_slcr_pp
+ * Per Port Configuration
+ * Note: Reading at Global mode results in reading port 1 configuration.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, slcr, pp, 0x00, 24, 1);
+
+/* reg_slcr_local_port
+ * Local port number
+ * Supported from CPU port
+ * Not supported from router port
+ * Reserved when pp = Global Configuration
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, slcr, local_port, 0x00, 16, 8);
+
+enum mlxsw_reg_slcr_type {
+ MLXSW_REG_SLCR_TYPE_CRC, /* default */
+ MLXSW_REG_SLCR_TYPE_XOR,
+ MLXSW_REG_SLCR_TYPE_RANDOM,
+};
+
+/* reg_slcr_type
+ * Hash type
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, slcr, type, 0x00, 0, 4);
+
+/* Ingress port */
+#define MLXSW_REG_SLCR_LAG_HASH_IN_PORT BIT(0)
+/* SMAC - for IPv4 and IPv6 packets */
+#define MLXSW_REG_SLCR_LAG_HASH_SMAC_IP BIT(1)
+/* SMAC - for non-IP packets */
+#define MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP BIT(2)
+#define MLXSW_REG_SLCR_LAG_HASH_SMAC \
+ (MLXSW_REG_SLCR_LAG_HASH_SMAC_IP | \
+ MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP)
+/* DMAC - for IPv4 and IPv6 packets */
+#define MLXSW_REG_SLCR_LAG_HASH_DMAC_IP BIT(3)
+/* DMAC - for non-IP packets */
+#define MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP BIT(4)
+#define MLXSW_REG_SLCR_LAG_HASH_DMAC \
+ (MLXSW_REG_SLCR_LAG_HASH_DMAC_IP | \
+ MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP)
+/* Ethertype - for IPv4 and IPv6 packets */
+#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP BIT(5)
+/* Ethertype - for non-IP packets */
+#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP BIT(6)
+#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE \
+ (MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP | \
+ MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP)
+/* VLAN ID - for IPv4 and IPv6 packets */
+#define MLXSW_REG_SLCR_LAG_HASH_VLANID_IP BIT(7)
+/* VLAN ID - for non-IP packets */
+#define MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP BIT(8)
+#define MLXSW_REG_SLCR_LAG_HASH_VLANID \
+ (MLXSW_REG_SLCR_LAG_HASH_VLANID_IP | \
+ MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP)
+/* Source IP address (can be IPv4 or IPv6) */
+#define MLXSW_REG_SLCR_LAG_HASH_SIP BIT(9)
+/* Destination IP address (can be IPv4 or IPv6) */
+#define MLXSW_REG_SLCR_LAG_HASH_DIP BIT(10)
+/* TCP/UDP source port */
+#define MLXSW_REG_SLCR_LAG_HASH_SPORT BIT(11)
+/* TCP/UDP destination port*/
+#define MLXSW_REG_SLCR_LAG_HASH_DPORT BIT(12)
+/* IPv4 Protocol/IPv6 Next Header */
+#define MLXSW_REG_SLCR_LAG_HASH_IPPROTO BIT(13)
+/* IPv6 Flow label */
+#define MLXSW_REG_SLCR_LAG_HASH_FLOWLABEL BIT(14)
+/* SID - FCoE source ID */
+#define MLXSW_REG_SLCR_LAG_HASH_FCOE_SID BIT(15)
+/* DID - FCoE destination ID */
+#define MLXSW_REG_SLCR_LAG_HASH_FCOE_DID BIT(16)
+/* OXID - FCoE originator exchange ID */
+#define MLXSW_REG_SLCR_LAG_HASH_FCOE_OXID BIT(17)
+/* Destination QP number - for RoCE packets */
+#define MLXSW_REG_SLCR_LAG_HASH_ROCE_DQP BIT(19)
+
+/* reg_slcr_lag_hash
+ * LAG hashing configuration. This is a bitmask, in which each set
+ * bit includes the corresponding item in the LAG hash calculation.
+ * The default lag_hash contains SMAC, DMAC, VLANID and
+ * Ethertype (for all packet types).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, slcr, lag_hash, 0x04, 0, 20);
+
+static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash)
+{
+ MLXSW_REG_ZERO(slcr, payload);
+ mlxsw_reg_slcr_pp_set(payload, MLXSW_REG_SLCR_PP_GLOBAL);
+ mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_CRC);
+ mlxsw_reg_slcr_lag_hash_set(payload, lag_hash);
+}
+
+/* SLCOR - Switch LAG Collector Register
+ * -------------------------------------
+ * The Switch LAG Collector register controls the Local Port membership
+ * in a LAG and enablement of the collector.
+ */
+#define MLXSW_REG_SLCOR_ID 0x2016
+#define MLXSW_REG_SLCOR_LEN 0x10
+
+MLXSW_REG_DEFINE(slcor, MLXSW_REG_SLCOR_ID, MLXSW_REG_SLCOR_LEN);
+
+enum mlxsw_reg_slcor_col {
+ /* Port is added with collector disabled */
+ MLXSW_REG_SLCOR_COL_LAG_ADD_PORT,
+ MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED,
+ MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_DISABLED,
+ MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT,
+};
+
+/* reg_slcor_col
+ * Collector configuration
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, slcor, col, 0x00, 30, 2);
+
+/* reg_slcor_local_port
+ * Local port number
+ * Not supported for CPU port
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, slcor, local_port, 0x00, 16, 8);
+
+/* reg_slcor_lag_id
+ * LAG Identifier. Index into the LAG descriptor table.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, slcor, lag_id, 0x00, 0, 10);
+
+/* reg_slcor_port_index
+ * Port index in the LAG list. Only valid on Add Port to LAG col.
+ * Valid range is from 0 to cap_max_lag_members-1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, slcor, port_index, 0x04, 0, 10);
+
+static inline void mlxsw_reg_slcor_pack(char *payload,
+ u8 local_port, u16 lag_id,
+ enum mlxsw_reg_slcor_col col)
+{
+ MLXSW_REG_ZERO(slcor, payload);
+ mlxsw_reg_slcor_col_set(payload, col);
+ mlxsw_reg_slcor_local_port_set(payload, local_port);
+ mlxsw_reg_slcor_lag_id_set(payload, lag_id);
+}
+
+static inline void mlxsw_reg_slcor_port_add_pack(char *payload,
+ u8 local_port, u16 lag_id,
+ u8 port_index)
+{
+ mlxsw_reg_slcor_pack(payload, local_port, lag_id,
+ MLXSW_REG_SLCOR_COL_LAG_ADD_PORT);
+ mlxsw_reg_slcor_port_index_set(payload, port_index);
+}
+
+static inline void mlxsw_reg_slcor_port_remove_pack(char *payload,
+ u8 local_port, u16 lag_id)
+{
+ mlxsw_reg_slcor_pack(payload, local_port, lag_id,
+ MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT);
+}
+
+static inline void mlxsw_reg_slcor_col_enable_pack(char *payload,
+ u8 local_port, u16 lag_id)
+{
+ mlxsw_reg_slcor_pack(payload, local_port, lag_id,
+ MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED);
+}
+
+static inline void mlxsw_reg_slcor_col_disable_pack(char *payload,
+ u8 local_port, u16 lag_id)
+{
+ mlxsw_reg_slcor_pack(payload, local_port, lag_id,
+ MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED);
+}
+
+/* SPMLR - Switch Port MAC Learning Register
+ * -----------------------------------------
+ * Controls the Switch MAC learning policy per port.
+ */
+#define MLXSW_REG_SPMLR_ID 0x2018
+#define MLXSW_REG_SPMLR_LEN 0x8
+
+MLXSW_REG_DEFINE(spmlr, MLXSW_REG_SPMLR_ID, MLXSW_REG_SPMLR_LEN);
+
+/* reg_spmlr_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, spmlr, local_port, 0x00, 16, 8);
+
+/* reg_spmlr_sub_port
+ * Virtual port within the physical port.
+ * Should be set to 0 when virtual ports are not enabled on the port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, spmlr, sub_port, 0x00, 8, 8);
+
+enum mlxsw_reg_spmlr_learn_mode {
+ MLXSW_REG_SPMLR_LEARN_MODE_DISABLE = 0,
+ MLXSW_REG_SPMLR_LEARN_MODE_ENABLE = 2,
+ MLXSW_REG_SPMLR_LEARN_MODE_SEC = 3,
+};
+
+/* reg_spmlr_learn_mode
+ * Learning mode on the port.
+ * 0 - Learning disabled.
+ * 2 - Learning enabled.
+ * 3 - Security mode.
+ *
+ * In security mode the switch does not learn MACs on the port, but uses the
+ * SMAC to see if it exists on another ingress port. If so, the packet is
+ * classified as a bad packet and is discarded unless the software registers
+ * to receive port security error packets usign HPKT.
+ */
+MLXSW_ITEM32(reg, spmlr, learn_mode, 0x04, 30, 2);
+
+static inline void mlxsw_reg_spmlr_pack(char *payload, u8 local_port,
+ enum mlxsw_reg_spmlr_learn_mode mode)
+{
+ MLXSW_REG_ZERO(spmlr, payload);
+ mlxsw_reg_spmlr_local_port_set(payload, local_port);
+ mlxsw_reg_spmlr_sub_port_set(payload, 0);
+ mlxsw_reg_spmlr_learn_mode_set(payload, mode);
+}
+
+/* SVFA - Switch VID to FID Allocation Register
+ * --------------------------------------------
+ * Controls the VID to FID mapping and {Port, VID} to FID mapping for
+ * virtualized ports.
+ */
+#define MLXSW_REG_SVFA_ID 0x201C
+#define MLXSW_REG_SVFA_LEN 0x10
+
+MLXSW_REG_DEFINE(svfa, MLXSW_REG_SVFA_ID, MLXSW_REG_SVFA_LEN);
+
+/* reg_svfa_swid
+ * Switch partition ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, svfa, swid, 0x00, 24, 8);
+
+/* reg_svfa_local_port
+ * Local port number.
+ * Access: Index
+ *
+ * Note: Reserved for 802.1Q FIDs.
+ */
+MLXSW_ITEM32(reg, svfa, local_port, 0x00, 16, 8);
+
+enum mlxsw_reg_svfa_mt {
+ MLXSW_REG_SVFA_MT_VID_TO_FID,
+ MLXSW_REG_SVFA_MT_PORT_VID_TO_FID,
+};
+
+/* reg_svfa_mapping_table
+ * Mapping table:
+ * 0 - VID to FID
+ * 1 - {Port, VID} to FID
+ * Access: Index
+ *
+ * Note: Reserved for SwitchX-2.
+ */
+MLXSW_ITEM32(reg, svfa, mapping_table, 0x00, 8, 3);
+
+/* reg_svfa_v
+ * Valid.
+ * Valid if set.
+ * Access: RW
+ *
+ * Note: Reserved for SwitchX-2.
+ */
+MLXSW_ITEM32(reg, svfa, v, 0x00, 0, 1);
+
+/* reg_svfa_fid
+ * Filtering ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, svfa, fid, 0x04, 16, 16);
+
+/* reg_svfa_vid
+ * VLAN ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, svfa, vid, 0x04, 0, 12);
+
+/* reg_svfa_counter_set_type
+ * Counter set type for flow counters.
+ * Access: RW
+ *
+ * Note: Reserved for SwitchX-2.
+ */
+MLXSW_ITEM32(reg, svfa, counter_set_type, 0x08, 24, 8);
+
+/* reg_svfa_counter_index
+ * Counter index for flow counters.
+ * Access: RW
+ *
+ * Note: Reserved for SwitchX-2.
+ */
+MLXSW_ITEM32(reg, svfa, counter_index, 0x08, 0, 24);
+
+static inline void mlxsw_reg_svfa_pack(char *payload, u8 local_port,
+ enum mlxsw_reg_svfa_mt mt, bool valid,
+ u16 fid, u16 vid)
+{
+ MLXSW_REG_ZERO(svfa, payload);
+ local_port = mt == MLXSW_REG_SVFA_MT_VID_TO_FID ? 0 : local_port;
+ mlxsw_reg_svfa_swid_set(payload, 0);
+ mlxsw_reg_svfa_local_port_set(payload, local_port);
+ mlxsw_reg_svfa_mapping_table_set(payload, mt);
+ mlxsw_reg_svfa_v_set(payload, valid);
+ mlxsw_reg_svfa_fid_set(payload, fid);
+ mlxsw_reg_svfa_vid_set(payload, vid);
+}
+
+/* SVPE - Switch Virtual-Port Enabling Register
+ * --------------------------------------------
+ * Enables port virtualization.
+ */
+#define MLXSW_REG_SVPE_ID 0x201E
+#define MLXSW_REG_SVPE_LEN 0x4
+
+MLXSW_REG_DEFINE(svpe, MLXSW_REG_SVPE_ID, MLXSW_REG_SVPE_LEN);
+
+/* reg_svpe_local_port
+ * Local port number
+ * Access: Index
+ *
+ * Note: CPU port is not supported (uses VLAN mode only).
+ */
+MLXSW_ITEM32(reg, svpe, local_port, 0x00, 16, 8);
+
+/* reg_svpe_vp_en
+ * Virtual port enable.
+ * 0 - Disable, VLAN mode (VID to FID).
+ * 1 - Enable, Virtual port mode ({Port, VID} to FID).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, svpe, vp_en, 0x00, 8, 1);
+
+static inline void mlxsw_reg_svpe_pack(char *payload, u8 local_port,
+ bool enable)
+{
+ MLXSW_REG_ZERO(svpe, payload);
+ mlxsw_reg_svpe_local_port_set(payload, local_port);
+ mlxsw_reg_svpe_vp_en_set(payload, enable);
+}
+
+/* SFMR - Switch FID Management Register
+ * -------------------------------------
+ * Creates and configures FIDs.
+ */
+#define MLXSW_REG_SFMR_ID 0x201F
+#define MLXSW_REG_SFMR_LEN 0x18
+
+MLXSW_REG_DEFINE(sfmr, MLXSW_REG_SFMR_ID, MLXSW_REG_SFMR_LEN);
+
+enum mlxsw_reg_sfmr_op {
+ MLXSW_REG_SFMR_OP_CREATE_FID,
+ MLXSW_REG_SFMR_OP_DESTROY_FID,
+};
+
+/* reg_sfmr_op
+ * Operation.
+ * 0 - Create or edit FID.
+ * 1 - Destroy FID.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, sfmr, op, 0x00, 24, 4);
+
+/* reg_sfmr_fid
+ * Filtering ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sfmr, fid, 0x00, 0, 16);
+
+/* reg_sfmr_fid_offset
+ * FID offset.
+ * Used to point into the flooding table selected by SFGC register if
+ * the table is of type FID-Offset. Otherwise, this field is reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfmr, fid_offset, 0x08, 0, 16);
+
+/* reg_sfmr_vtfp
+ * Valid Tunnel Flood Pointer.
+ * If not set, then nve_tunnel_flood_ptr is reserved and considered NULL.
+ * Access: RW
+ *
+ * Note: Reserved for 802.1Q FIDs.
+ */
+MLXSW_ITEM32(reg, sfmr, vtfp, 0x0C, 31, 1);
+
+/* reg_sfmr_nve_tunnel_flood_ptr
+ * Underlay Flooding and BC Pointer.
+ * Used as a pointer to the first entry of the group based link lists of
+ * flooding or BC entries (for NVE tunnels).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sfmr, nve_tunnel_flood_ptr, 0x0C, 0, 24);
+
+/* reg_sfmr_vv
+ * VNI Valid.
+ * If not set, then vni is reserved.
+ * Access: RW
+ *
+ * Note: Reserved for 802.1Q FIDs.
+ */
+MLXSW_ITEM32(reg, sfmr, vv, 0x10, 31, 1);
+
+/* reg_sfmr_vni
+ * Virtual Network Identifier.
+ * Access: RW
+ *
+ * Note: A given VNI can only be assigned to one FID.
+ */
+MLXSW_ITEM32(reg, sfmr, vni, 0x10, 0, 24);
+
+static inline void mlxsw_reg_sfmr_pack(char *payload,
+ enum mlxsw_reg_sfmr_op op, u16 fid,
+ u16 fid_offset)
+{
+ MLXSW_REG_ZERO(sfmr, payload);
+ mlxsw_reg_sfmr_op_set(payload, op);
+ mlxsw_reg_sfmr_fid_set(payload, fid);
+ mlxsw_reg_sfmr_fid_offset_set(payload, fid_offset);
+ mlxsw_reg_sfmr_vtfp_set(payload, false);
+ mlxsw_reg_sfmr_vv_set(payload, false);
+}
+
+/* SPVMLR - Switch Port VLAN MAC Learning Register
+ * -----------------------------------------------
+ * Controls the switch MAC learning policy per {Port, VID}.
+ */
+#define MLXSW_REG_SPVMLR_ID 0x2020
+#define MLXSW_REG_SPVMLR_BASE_LEN 0x04 /* base length, without records */
+#define MLXSW_REG_SPVMLR_REC_LEN 0x04 /* record length */
+#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 255
+#define MLXSW_REG_SPVMLR_LEN (MLXSW_REG_SPVMLR_BASE_LEN + \
+ MLXSW_REG_SPVMLR_REC_LEN * \
+ MLXSW_REG_SPVMLR_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(spvmlr, MLXSW_REG_SPVMLR_ID, MLXSW_REG_SPVMLR_LEN);
+
+/* reg_spvmlr_local_port
+ * Local ingress port.
+ * Access: Index
+ *
+ * Note: CPU port is not supported.
+ */
+MLXSW_ITEM32(reg, spvmlr, local_port, 0x00, 16, 8);
+
+/* reg_spvmlr_num_rec
+ * Number of records to update.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, spvmlr, num_rec, 0x00, 0, 8);
+
+/* reg_spvmlr_rec_learn_enable
+ * 0 - Disable learning for {Port, VID}.
+ * 1 - Enable learning for {Port, VID}.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, spvmlr, rec_learn_enable, MLXSW_REG_SPVMLR_BASE_LEN,
+ 31, 1, MLXSW_REG_SPVMLR_REC_LEN, 0x00, false);
+
+/* reg_spvmlr_rec_vid
+ * VLAN ID to be added/removed from port or for querying.
+ * Access: Index
+ */
+MLXSW_ITEM32_INDEXED(reg, spvmlr, rec_vid, MLXSW_REG_SPVMLR_BASE_LEN, 0, 12,
+ MLXSW_REG_SPVMLR_REC_LEN, 0x00, false);
+
+static inline void mlxsw_reg_spvmlr_pack(char *payload, u8 local_port,
+ u16 vid_begin, u16 vid_end,
+ bool learn_enable)
+{
+ int num_rec = vid_end - vid_begin + 1;
+ int i;
+
+ WARN_ON(num_rec < 1 || num_rec > MLXSW_REG_SPVMLR_REC_MAX_COUNT);
+
+ MLXSW_REG_ZERO(spvmlr, payload);
+ mlxsw_reg_spvmlr_local_port_set(payload, local_port);
+ mlxsw_reg_spvmlr_num_rec_set(payload, num_rec);
+
+ for (i = 0; i < num_rec; i++) {
+ mlxsw_reg_spvmlr_rec_learn_enable_set(payload, i, learn_enable);
+ mlxsw_reg_spvmlr_rec_vid_set(payload, i, vid_begin + i);
+ }
+}
+
+/* CWTP - Congetion WRED ECN TClass Profile
+ * ----------------------------------------
+ * Configures the profiles for queues of egress port and traffic class
+ */
+#define MLXSW_REG_CWTP_ID 0x2802
+#define MLXSW_REG_CWTP_BASE_LEN 0x28
+#define MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN 0x08
+#define MLXSW_REG_CWTP_LEN 0x40
+
+MLXSW_REG_DEFINE(cwtp, MLXSW_REG_CWTP_ID, MLXSW_REG_CWTP_LEN);
+
+/* reg_cwtp_local_port
+ * Local port number
+ * Not supported for CPU port
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, cwtp, local_port, 0, 16, 8);
+
+/* reg_cwtp_traffic_class
+ * Traffic Class to configure
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, cwtp, traffic_class, 32, 0, 8);
+
+/* reg_cwtp_profile_min
+ * Minimum Average Queue Size of the profile in cells.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, cwtp, profile_min, MLXSW_REG_CWTP_BASE_LEN,
+ 0, 20, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 0, false);
+
+/* reg_cwtp_profile_percent
+ * Percentage of WRED and ECN marking for maximum Average Queue size
+ * Range is 0 to 100, units of integer percentage
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, cwtp, profile_percent, MLXSW_REG_CWTP_BASE_LEN,
+ 24, 7, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 4, false);
+
+/* reg_cwtp_profile_max
+ * Maximum Average Queue size of the profile in cells
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, cwtp, profile_max, MLXSW_REG_CWTP_BASE_LEN,
+ 0, 20, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 4, false);
+
+#define MLXSW_REG_CWTP_MIN_VALUE 64
+#define MLXSW_REG_CWTP_MAX_PROFILE 2
+#define MLXSW_REG_CWTP_DEFAULT_PROFILE 1
+
+static inline void mlxsw_reg_cwtp_pack(char *payload, u8 local_port,
+ u8 traffic_class)
+{
+ int i;
+
+ MLXSW_REG_ZERO(cwtp, payload);
+ mlxsw_reg_cwtp_local_port_set(payload, local_port);
+ mlxsw_reg_cwtp_traffic_class_set(payload, traffic_class);
+
+ for (i = 0; i <= MLXSW_REG_CWTP_MAX_PROFILE; i++) {
+ mlxsw_reg_cwtp_profile_min_set(payload, i,
+ MLXSW_REG_CWTP_MIN_VALUE);
+ mlxsw_reg_cwtp_profile_max_set(payload, i,
+ MLXSW_REG_CWTP_MIN_VALUE);
+ }
+}
+
+#define MLXSW_REG_CWTP_PROFILE_TO_INDEX(profile) (profile - 1)
+
+static inline void
+mlxsw_reg_cwtp_profile_pack(char *payload, u8 profile, u32 min, u32 max,
+ u32 probability)
+{
+ u8 index = MLXSW_REG_CWTP_PROFILE_TO_INDEX(profile);
+
+ mlxsw_reg_cwtp_profile_min_set(payload, index, min);
+ mlxsw_reg_cwtp_profile_max_set(payload, index, max);
+ mlxsw_reg_cwtp_profile_percent_set(payload, index, probability);
+}
+
+/* CWTPM - Congestion WRED ECN TClass and Pool Mapping
+ * ---------------------------------------------------
+ * The CWTPM register maps each egress port and traffic class to profile num.
+ */
+#define MLXSW_REG_CWTPM_ID 0x2803
+#define MLXSW_REG_CWTPM_LEN 0x44
+
+MLXSW_REG_DEFINE(cwtpm, MLXSW_REG_CWTPM_ID, MLXSW_REG_CWTPM_LEN);
+
+/* reg_cwtpm_local_port
+ * Local port number
+ * Not supported for CPU port
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, cwtpm, local_port, 0, 16, 8);
+
+/* reg_cwtpm_traffic_class
+ * Traffic Class to configure
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, cwtpm, traffic_class, 32, 0, 8);
+
+/* reg_cwtpm_ew
+ * Control enablement of WRED for traffic class:
+ * 0 - Disable
+ * 1 - Enable
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, cwtpm, ew, 36, 1, 1);
+
+/* reg_cwtpm_ee
+ * Control enablement of ECN for traffic class:
+ * 0 - Disable
+ * 1 - Enable
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, cwtpm, ee, 36, 0, 1);
+
+/* reg_cwtpm_tcp_g
+ * TCP Green Profile.
+ * Index of the profile within {port, traffic class} to use.
+ * 0 for disabling both WRED and ECN for this type of traffic.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, cwtpm, tcp_g, 52, 0, 2);
+
+/* reg_cwtpm_tcp_y
+ * TCP Yellow Profile.
+ * Index of the profile within {port, traffic class} to use.
+ * 0 for disabling both WRED and ECN for this type of traffic.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, cwtpm, tcp_y, 56, 16, 2);
+
+/* reg_cwtpm_tcp_r
+ * TCP Red Profile.
+ * Index of the profile within {port, traffic class} to use.
+ * 0 for disabling both WRED and ECN for this type of traffic.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, cwtpm, tcp_r, 56, 0, 2);
+
+/* reg_cwtpm_ntcp_g
+ * Non-TCP Green Profile.
+ * Index of the profile within {port, traffic class} to use.
+ * 0 for disabling both WRED and ECN for this type of traffic.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, cwtpm, ntcp_g, 60, 0, 2);
+
+/* reg_cwtpm_ntcp_y
+ * Non-TCP Yellow Profile.
+ * Index of the profile within {port, traffic class} to use.
+ * 0 for disabling both WRED and ECN for this type of traffic.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, cwtpm, ntcp_y, 64, 16, 2);
+
+/* reg_cwtpm_ntcp_r
+ * Non-TCP Red Profile.
+ * Index of the profile within {port, traffic class} to use.
+ * 0 for disabling both WRED and ECN for this type of traffic.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, cwtpm, ntcp_r, 64, 0, 2);
+
+#define MLXSW_REG_CWTPM_RESET_PROFILE 0
+
+static inline void mlxsw_reg_cwtpm_pack(char *payload, u8 local_port,
+ u8 traffic_class, u8 profile,
+ bool wred, bool ecn)
+{
+ MLXSW_REG_ZERO(cwtpm, payload);
+ mlxsw_reg_cwtpm_local_port_set(payload, local_port);
+ mlxsw_reg_cwtpm_traffic_class_set(payload, traffic_class);
+ mlxsw_reg_cwtpm_ew_set(payload, wred);
+ mlxsw_reg_cwtpm_ee_set(payload, ecn);
+ mlxsw_reg_cwtpm_tcp_g_set(payload, profile);
+ mlxsw_reg_cwtpm_tcp_y_set(payload, profile);
+ mlxsw_reg_cwtpm_tcp_r_set(payload, profile);
+ mlxsw_reg_cwtpm_ntcp_g_set(payload, profile);
+ mlxsw_reg_cwtpm_ntcp_y_set(payload, profile);
+ mlxsw_reg_cwtpm_ntcp_r_set(payload, profile);
+}
+
+/* PGCR - Policy-Engine General Configuration Register
+ * ---------------------------------------------------
+ * This register configures general Policy-Engine settings.
+ */
+#define MLXSW_REG_PGCR_ID 0x3001
+#define MLXSW_REG_PGCR_LEN 0x20
+
+MLXSW_REG_DEFINE(pgcr, MLXSW_REG_PGCR_ID, MLXSW_REG_PGCR_LEN);
+
+/* reg_pgcr_default_action_pointer_base
+ * Default action pointer base. Each region has a default action pointer
+ * which is equal to default_action_pointer_base + region_id.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pgcr, default_action_pointer_base, 0x1C, 0, 24);
+
+static inline void mlxsw_reg_pgcr_pack(char *payload, u32 pointer_base)
+{
+ MLXSW_REG_ZERO(pgcr, payload);
+ mlxsw_reg_pgcr_default_action_pointer_base_set(payload, pointer_base);
+}
+
+/* PPBT - Policy-Engine Port Binding Table
+ * ---------------------------------------
+ * This register is used for configuration of the Port Binding Table.
+ */
+#define MLXSW_REG_PPBT_ID 0x3002
+#define MLXSW_REG_PPBT_LEN 0x14
+
+MLXSW_REG_DEFINE(ppbt, MLXSW_REG_PPBT_ID, MLXSW_REG_PPBT_LEN);
+
+enum mlxsw_reg_pxbt_e {
+ MLXSW_REG_PXBT_E_IACL,
+ MLXSW_REG_PXBT_E_EACL,
+};
+
+/* reg_ppbt_e
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ppbt, e, 0x00, 31, 1);
+
+enum mlxsw_reg_pxbt_op {
+ MLXSW_REG_PXBT_OP_BIND,
+ MLXSW_REG_PXBT_OP_UNBIND,
+};
+
+/* reg_ppbt_op
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ppbt, op, 0x00, 28, 3);
+
+/* reg_ppbt_local_port
+ * Local port. Not including CPU port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ppbt, local_port, 0x00, 16, 8);
+
+/* reg_ppbt_g
+ * group - When set, the binding is of an ACL group. When cleared,
+ * the binding is of an ACL.
+ * Must be set to 1 for Spectrum.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ppbt, g, 0x10, 31, 1);
+
+/* reg_ppbt_acl_info
+ * ACL/ACL group identifier. If the g bit is set, this field should hold
+ * the acl_group_id, else it should hold the acl_id.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ppbt, acl_info, 0x10, 0, 16);
+
+static inline void mlxsw_reg_ppbt_pack(char *payload, enum mlxsw_reg_pxbt_e e,
+ enum mlxsw_reg_pxbt_op op,
+ u8 local_port, u16 acl_info)
+{
+ MLXSW_REG_ZERO(ppbt, payload);
+ mlxsw_reg_ppbt_e_set(payload, e);
+ mlxsw_reg_ppbt_op_set(payload, op);
+ mlxsw_reg_ppbt_local_port_set(payload, local_port);
+ mlxsw_reg_ppbt_g_set(payload, true);
+ mlxsw_reg_ppbt_acl_info_set(payload, acl_info);
+}
+
+/* PACL - Policy-Engine ACL Register
+ * ---------------------------------
+ * This register is used for configuration of the ACL.
+ */
+#define MLXSW_REG_PACL_ID 0x3004
+#define MLXSW_REG_PACL_LEN 0x70
+
+MLXSW_REG_DEFINE(pacl, MLXSW_REG_PACL_ID, MLXSW_REG_PACL_LEN);
+
+/* reg_pacl_v
+ * Valid. Setting the v bit makes the ACL valid. It should not be cleared
+ * while the ACL is bounded to either a port, VLAN or ACL rule.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pacl, v, 0x00, 24, 1);
+
+/* reg_pacl_acl_id
+ * An identifier representing the ACL (managed by software)
+ * Range 0 .. cap_max_acl_regions - 1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pacl, acl_id, 0x08, 0, 16);
+
+#define MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN 16
+
+/* reg_pacl_tcam_region_info
+ * Opaque object that represents a TCAM region.
+ * Obtained through PTAR register.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, pacl, tcam_region_info, 0x30,
+ MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN);
+
+static inline void mlxsw_reg_pacl_pack(char *payload, u16 acl_id,
+ bool valid, const char *tcam_region_info)
+{
+ MLXSW_REG_ZERO(pacl, payload);
+ mlxsw_reg_pacl_acl_id_set(payload, acl_id);
+ mlxsw_reg_pacl_v_set(payload, valid);
+ mlxsw_reg_pacl_tcam_region_info_memcpy_to(payload, tcam_region_info);
+}
+
+/* PAGT - Policy-Engine ACL Group Table
+ * ------------------------------------
+ * This register is used for configuration of the ACL Group Table.
+ */
+#define MLXSW_REG_PAGT_ID 0x3005
+#define MLXSW_REG_PAGT_BASE_LEN 0x30
+#define MLXSW_REG_PAGT_ACL_LEN 4
+#define MLXSW_REG_PAGT_ACL_MAX_NUM 16
+#define MLXSW_REG_PAGT_LEN (MLXSW_REG_PAGT_BASE_LEN + \
+ MLXSW_REG_PAGT_ACL_MAX_NUM * MLXSW_REG_PAGT_ACL_LEN)
+
+MLXSW_REG_DEFINE(pagt, MLXSW_REG_PAGT_ID, MLXSW_REG_PAGT_LEN);
+
+/* reg_pagt_size
+ * Number of ACLs in the group.
+ * Size 0 invalidates a group.
+ * Range 0 .. cap_max_acl_group_size (hard coded to 16 for now)
+ * Total number of ACLs in all groups must be lower or equal
+ * to cap_max_acl_tot_groups
+ * Note: a group which is binded must not be invalidated
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pagt, size, 0x00, 0, 8);
+
+/* reg_pagt_acl_group_id
+ * An identifier (numbered from 0..cap_max_acl_groups-1) representing
+ * the ACL Group identifier (managed by software).
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pagt, acl_group_id, 0x08, 0, 16);
+
+/* reg_pagt_acl_id
+ * ACL identifier
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, pagt, acl_id, 0x30, 0, 16, 0x04, 0x00, false);
+
+static inline void mlxsw_reg_pagt_pack(char *payload, u16 acl_group_id)
+{
+ MLXSW_REG_ZERO(pagt, payload);
+ mlxsw_reg_pagt_acl_group_id_set(payload, acl_group_id);
+}
+
+static inline void mlxsw_reg_pagt_acl_id_pack(char *payload, int index,
+ u16 acl_id)
+{
+ u8 size = mlxsw_reg_pagt_size_get(payload);
+
+ if (index >= size)
+ mlxsw_reg_pagt_size_set(payload, index + 1);
+ mlxsw_reg_pagt_acl_id_set(payload, index, acl_id);
+}
+
+/* PTAR - Policy-Engine TCAM Allocation Register
+ * ---------------------------------------------
+ * This register is used for allocation of regions in the TCAM.
+ * Note: Query method is not supported on this register.
+ */
+#define MLXSW_REG_PTAR_ID 0x3006
+#define MLXSW_REG_PTAR_BASE_LEN 0x20
+#define MLXSW_REG_PTAR_KEY_ID_LEN 1
+#define MLXSW_REG_PTAR_KEY_ID_MAX_NUM 16
+#define MLXSW_REG_PTAR_LEN (MLXSW_REG_PTAR_BASE_LEN + \
+ MLXSW_REG_PTAR_KEY_ID_MAX_NUM * MLXSW_REG_PTAR_KEY_ID_LEN)
+
+MLXSW_REG_DEFINE(ptar, MLXSW_REG_PTAR_ID, MLXSW_REG_PTAR_LEN);
+
+enum mlxsw_reg_ptar_op {
+ /* allocate a TCAM region */
+ MLXSW_REG_PTAR_OP_ALLOC,
+ /* resize a TCAM region */
+ MLXSW_REG_PTAR_OP_RESIZE,
+ /* deallocate TCAM region */
+ MLXSW_REG_PTAR_OP_FREE,
+ /* test allocation */
+ MLXSW_REG_PTAR_OP_TEST,
+};
+
+/* reg_ptar_op
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, ptar, op, 0x00, 28, 4);
+
+/* reg_ptar_action_set_type
+ * Type of action set to be used on this region.
+ * For Spectrum and Spectrum-2, this is always type 2 - "flexible"
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, ptar, action_set_type, 0x00, 16, 8);
+
+enum mlxsw_reg_ptar_key_type {
+ MLXSW_REG_PTAR_KEY_TYPE_FLEX = 0x50, /* Spetrum */
+ MLXSW_REG_PTAR_KEY_TYPE_FLEX2 = 0x51, /* Spectrum-2 */
+};
+
+/* reg_ptar_key_type
+ * TCAM key type for the region.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, ptar, key_type, 0x00, 0, 8);
+
+/* reg_ptar_region_size
+ * TCAM region size. When allocating/resizing this is the requested size,
+ * the response is the actual size. Note that actual size may be
+ * larger than requested.
+ * Allowed range 1 .. cap_max_rules-1
+ * Reserved during op deallocate.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, ptar, region_size, 0x04, 0, 16);
+
+/* reg_ptar_region_id
+ * Region identifier
+ * Range 0 .. cap_max_regions-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ptar, region_id, 0x08, 0, 16);
+
+/* reg_ptar_tcam_region_info
+ * Opaque object that represents the TCAM region.
+ * Returned when allocating a region.
+ * Provided by software for ACL generation and region deallocation and resize.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, ptar, tcam_region_info, 0x10,
+ MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN);
+
+/* reg_ptar_flexible_key_id
+ * Identifier of the Flexible Key.
+ * Only valid if key_type == "FLEX_KEY"
+ * The key size will be rounded up to one of the following values:
+ * 9B, 18B, 36B, 54B.
+ * This field is reserved for in resize operation.
+ * Access: WO
+ */
+MLXSW_ITEM8_INDEXED(reg, ptar, flexible_key_id, 0x20, 0, 8,
+ MLXSW_REG_PTAR_KEY_ID_LEN, 0x00, false);
+
+static inline void mlxsw_reg_ptar_pack(char *payload, enum mlxsw_reg_ptar_op op,
+ enum mlxsw_reg_ptar_key_type key_type,
+ u16 region_size, u16 region_id,
+ const char *tcam_region_info)
+{
+ MLXSW_REG_ZERO(ptar, payload);
+ mlxsw_reg_ptar_op_set(payload, op);
+ mlxsw_reg_ptar_action_set_type_set(payload, 2); /* "flexible" */
+ mlxsw_reg_ptar_key_type_set(payload, key_type);
+ mlxsw_reg_ptar_region_size_set(payload, region_size);
+ mlxsw_reg_ptar_region_id_set(payload, region_id);
+ mlxsw_reg_ptar_tcam_region_info_memcpy_to(payload, tcam_region_info);
+}
+
+static inline void mlxsw_reg_ptar_key_id_pack(char *payload, int index,
+ u16 key_id)
+{
+ mlxsw_reg_ptar_flexible_key_id_set(payload, index, key_id);
+}
+
+static inline void mlxsw_reg_ptar_unpack(char *payload, char *tcam_region_info)
+{
+ mlxsw_reg_ptar_tcam_region_info_memcpy_from(payload, tcam_region_info);
+}
+
+/* PPBS - Policy-Engine Policy Based Switching Register
+ * ----------------------------------------------------
+ * This register retrieves and sets Policy Based Switching Table entries.
+ */
+#define MLXSW_REG_PPBS_ID 0x300C
+#define MLXSW_REG_PPBS_LEN 0x14
+
+MLXSW_REG_DEFINE(ppbs, MLXSW_REG_PPBS_ID, MLXSW_REG_PPBS_LEN);
+
+/* reg_ppbs_pbs_ptr
+ * Index into the PBS table.
+ * For Spectrum, the index points to the KVD Linear.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ppbs, pbs_ptr, 0x08, 0, 24);
+
+/* reg_ppbs_system_port
+ * Unique port identifier for the final destination of the packet.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ppbs, system_port, 0x10, 0, 16);
+
+static inline void mlxsw_reg_ppbs_pack(char *payload, u32 pbs_ptr,
+ u16 system_port)
+{
+ MLXSW_REG_ZERO(ppbs, payload);
+ mlxsw_reg_ppbs_pbs_ptr_set(payload, pbs_ptr);
+ mlxsw_reg_ppbs_system_port_set(payload, system_port);
+}
+
+/* PRCR - Policy-Engine Rules Copy Register
+ * ----------------------------------------
+ * This register is used for accessing rules within a TCAM region.
+ */
+#define MLXSW_REG_PRCR_ID 0x300D
+#define MLXSW_REG_PRCR_LEN 0x40
+
+MLXSW_REG_DEFINE(prcr, MLXSW_REG_PRCR_ID, MLXSW_REG_PRCR_LEN);
+
+enum mlxsw_reg_prcr_op {
+ /* Move rules. Moves the rules from "tcam_region_info" starting
+ * at offset "offset" to "dest_tcam_region_info"
+ * at offset "dest_offset."
+ */
+ MLXSW_REG_PRCR_OP_MOVE,
+ /* Copy rules. Copies the rules from "tcam_region_info" starting
+ * at offset "offset" to "dest_tcam_region_info"
+ * at offset "dest_offset."
+ */
+ MLXSW_REG_PRCR_OP_COPY,
+};
+
+/* reg_prcr_op
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, prcr, op, 0x00, 28, 4);
+
+/* reg_prcr_offset
+ * Offset within the source region to copy/move from.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, prcr, offset, 0x00, 0, 16);
+
+/* reg_prcr_size
+ * The number of rules to copy/move.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, prcr, size, 0x04, 0, 16);
+
+/* reg_prcr_tcam_region_info
+ * Opaque object that represents the source TCAM region.
+ * Access: Index
+ */
+MLXSW_ITEM_BUF(reg, prcr, tcam_region_info, 0x10,
+ MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN);
+
+/* reg_prcr_dest_offset
+ * Offset within the source region to copy/move to.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, prcr, dest_offset, 0x20, 0, 16);
+
+/* reg_prcr_dest_tcam_region_info
+ * Opaque object that represents the destination TCAM region.
+ * Access: Index
+ */
+MLXSW_ITEM_BUF(reg, prcr, dest_tcam_region_info, 0x30,
+ MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN);
+
+static inline void mlxsw_reg_prcr_pack(char *payload, enum mlxsw_reg_prcr_op op,
+ const char *src_tcam_region_info,
+ u16 src_offset,
+ const char *dest_tcam_region_info,
+ u16 dest_offset, u16 size)
+{
+ MLXSW_REG_ZERO(prcr, payload);
+ mlxsw_reg_prcr_op_set(payload, op);
+ mlxsw_reg_prcr_offset_set(payload, src_offset);
+ mlxsw_reg_prcr_size_set(payload, size);
+ mlxsw_reg_prcr_tcam_region_info_memcpy_to(payload,
+ src_tcam_region_info);
+ mlxsw_reg_prcr_dest_offset_set(payload, dest_offset);
+ mlxsw_reg_prcr_dest_tcam_region_info_memcpy_to(payload,
+ dest_tcam_region_info);
+}
+
+/* PEFA - Policy-Engine Extended Flexible Action Register
+ * ------------------------------------------------------
+ * This register is used for accessing an extended flexible action entry
+ * in the central KVD Linear Database.
+ */
+#define MLXSW_REG_PEFA_ID 0x300F
+#define MLXSW_REG_PEFA_LEN 0xB0
+
+MLXSW_REG_DEFINE(pefa, MLXSW_REG_PEFA_ID, MLXSW_REG_PEFA_LEN);
+
+/* reg_pefa_index
+ * Index in the KVD Linear Centralized Database.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pefa, index, 0x00, 0, 24);
+
+/* reg_pefa_a
+ * Index in the KVD Linear Centralized Database.
+ * Activity
+ * For a new entry: set if ca=0, clear if ca=1
+ * Set if a packet lookup has hit on the specific entry
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, pefa, a, 0x04, 29, 1);
+
+/* reg_pefa_ca
+ * Clear activity
+ * When write: activity is according to this field
+ * When read: after reading the activity is cleared according to ca
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, pefa, ca, 0x04, 24, 1);
+
+#define MLXSW_REG_FLEX_ACTION_SET_LEN 0xA8
+
+/* reg_pefa_flex_action_set
+ * Action-set to perform when rule is matched.
+ * Must be zero padded if action set is shorter.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, pefa, flex_action_set, 0x08, MLXSW_REG_FLEX_ACTION_SET_LEN);
+
+static inline void mlxsw_reg_pefa_pack(char *payload, u32 index, bool ca,
+ const char *flex_action_set)
+{
+ MLXSW_REG_ZERO(pefa, payload);
+ mlxsw_reg_pefa_index_set(payload, index);
+ mlxsw_reg_pefa_ca_set(payload, ca);
+ if (flex_action_set)
+ mlxsw_reg_pefa_flex_action_set_memcpy_to(payload,
+ flex_action_set);
+}
+
+static inline void mlxsw_reg_pefa_unpack(char *payload, bool *p_a)
+{
+ *p_a = mlxsw_reg_pefa_a_get(payload);
+}
+
+/* PTCE-V2 - Policy-Engine TCAM Entry Register Version 2
+ * -----------------------------------------------------
+ * This register is used for accessing rules within a TCAM region.
+ * It is a new version of PTCE in order to support wider key,
+ * mask and action within a TCAM region. This register is not supported
+ * by SwitchX and SwitchX-2.
+ */
+#define MLXSW_REG_PTCE2_ID 0x3017
+#define MLXSW_REG_PTCE2_LEN 0x1D8
+
+MLXSW_REG_DEFINE(ptce2, MLXSW_REG_PTCE2_ID, MLXSW_REG_PTCE2_LEN);
+
+/* reg_ptce2_v
+ * Valid.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptce2, v, 0x00, 31, 1);
+
+/* reg_ptce2_a
+ * Activity. Set if a packet lookup has hit on the specific entry.
+ * To clear the "a" bit, use "clear activity" op or "clear on read" op.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptce2, a, 0x00, 30, 1);
+
+enum mlxsw_reg_ptce2_op {
+ /* Read operation. */
+ MLXSW_REG_PTCE2_OP_QUERY_READ = 0,
+ /* clear on read operation. Used to read entry
+ * and clear Activity bit.
+ */
+ MLXSW_REG_PTCE2_OP_QUERY_CLEAR_ON_READ = 1,
+ /* Write operation. Used to write a new entry to the table.
+ * All R/W fields are relevant for new entry. Activity bit is set
+ * for new entries - Note write with v = 0 will delete the entry.
+ */
+ MLXSW_REG_PTCE2_OP_WRITE_WRITE = 0,
+ /* Update action. Only action set will be updated. */
+ MLXSW_REG_PTCE2_OP_WRITE_UPDATE = 1,
+ /* Clear activity. A bit is cleared for the entry. */
+ MLXSW_REG_PTCE2_OP_WRITE_CLEAR_ACTIVITY = 2,
+};
+
+/* reg_ptce2_op
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, ptce2, op, 0x00, 20, 3);
+
+/* reg_ptce2_offset
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ptce2, offset, 0x00, 0, 16);
+
+/* reg_ptce2_priority
+ * Priority of the rule, higher values win. The range is 1..cap_kvd_size-1.
+ * Note: priority does not have to be unique per rule.
+ * Within a region, higher priority should have lower offset (no limitation
+ * between regions in a multi-region).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptce2, priority, 0x04, 0, 24);
+
+/* reg_ptce2_tcam_region_info
+ * Opaque object that represents the TCAM region.
+ * Access: Index
+ */
+MLXSW_ITEM_BUF(reg, ptce2, tcam_region_info, 0x10,
+ MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN);
+
+#define MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN 96
+
+/* reg_ptce2_flex_key_blocks
+ * ACL Key.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, ptce2, flex_key_blocks, 0x20,
+ MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN);
+
+/* reg_ptce2_mask
+ * mask- in the same size as key. A bit that is set directs the TCAM
+ * to compare the corresponding bit in key. A bit that is clear directs
+ * the TCAM to ignore the corresponding bit in key.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, ptce2, mask, 0x80,
+ MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN);
+
+/* reg_ptce2_flex_action_set
+ * ACL action set.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, ptce2, flex_action_set, 0xE0,
+ MLXSW_REG_FLEX_ACTION_SET_LEN);
+
+static inline void mlxsw_reg_ptce2_pack(char *payload, bool valid,
+ enum mlxsw_reg_ptce2_op op,
+ const char *tcam_region_info,
+ u16 offset, u32 priority)
+{
+ MLXSW_REG_ZERO(ptce2, payload);
+ mlxsw_reg_ptce2_v_set(payload, valid);
+ mlxsw_reg_ptce2_op_set(payload, op);
+ mlxsw_reg_ptce2_offset_set(payload, offset);
+ mlxsw_reg_ptce2_priority_set(payload, priority);
+ mlxsw_reg_ptce2_tcam_region_info_memcpy_to(payload, tcam_region_info);
+}
+
+/* PERPT - Policy-Engine ERP Table Register
+ * ----------------------------------------
+ * This register adds and removes eRPs from the eRP table.
+ */
+#define MLXSW_REG_PERPT_ID 0x3021
+#define MLXSW_REG_PERPT_LEN 0x80
+
+MLXSW_REG_DEFINE(perpt, MLXSW_REG_PERPT_ID, MLXSW_REG_PERPT_LEN);
+
+/* reg_perpt_erpt_bank
+ * eRP table bank.
+ * Range 0 .. cap_max_erp_table_banks - 1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, perpt, erpt_bank, 0x00, 16, 4);
+
+/* reg_perpt_erpt_index
+ * Index to eRP table within the eRP bank.
+ * Range is 0 .. cap_max_erp_table_bank_size - 1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, perpt, erpt_index, 0x00, 0, 8);
+
+enum mlxsw_reg_perpt_key_size {
+ MLXSW_REG_PERPT_KEY_SIZE_2KB,
+ MLXSW_REG_PERPT_KEY_SIZE_4KB,
+ MLXSW_REG_PERPT_KEY_SIZE_8KB,
+ MLXSW_REG_PERPT_KEY_SIZE_12KB,
+};
+
+/* reg_perpt_key_size
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, perpt, key_size, 0x04, 0, 4);
+
+/* reg_perpt_bf_bypass
+ * 0 - The eRP is used only if bloom filter state is set for the given
+ * rule.
+ * 1 - The eRP is used regardless of bloom filter state.
+ * The bypass is an OR condition of region_id or eRP. See PERCR.bf_bypass
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, perpt, bf_bypass, 0x08, 8, 1);
+
+/* reg_perpt_erp_id
+ * eRP ID for use by the rules.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, perpt, erp_id, 0x08, 0, 4);
+
+/* reg_perpt_erpt_base_bank
+ * Base eRP table bank, points to head of erp_vector
+ * Range is 0 .. cap_max_erp_table_banks - 1
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, perpt, erpt_base_bank, 0x0C, 16, 4);
+
+/* reg_perpt_erpt_base_index
+ * Base index to eRP table within the eRP bank
+ * Range is 0 .. cap_max_erp_table_bank_size - 1
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, perpt, erpt_base_index, 0x0C, 0, 8);
+
+/* reg_perpt_erp_index_in_vector
+ * eRP index in the vector.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, perpt, erp_index_in_vector, 0x10, 0, 4);
+
+/* reg_perpt_erp_vector
+ * eRP vector.
+ * Access: OP
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, perpt, erp_vector, 0x14, 4, 1);
+
+/* reg_perpt_mask
+ * Mask
+ * 0 - A-TCAM will ignore the bit in key
+ * 1 - A-TCAM will compare the bit in key
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, perpt, mask, 0x20, MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN);
+
+static inline void mlxsw_reg_perpt_erp_vector_pack(char *payload,
+ unsigned long *erp_vector,
+ unsigned long size)
+{
+ unsigned long bit;
+
+ for_each_set_bit(bit, erp_vector, size)
+ mlxsw_reg_perpt_erp_vector_set(payload, bit, true);
+}
+
+static inline void
+mlxsw_reg_perpt_pack(char *payload, u8 erpt_bank, u8 erpt_index,
+ enum mlxsw_reg_perpt_key_size key_size, u8 erp_id,
+ u8 erpt_base_bank, u8 erpt_base_index, u8 erp_index,
+ char *mask)
+{
+ MLXSW_REG_ZERO(perpt, payload);
+ mlxsw_reg_perpt_erpt_bank_set(payload, erpt_bank);
+ mlxsw_reg_perpt_erpt_index_set(payload, erpt_index);
+ mlxsw_reg_perpt_key_size_set(payload, key_size);
+ mlxsw_reg_perpt_bf_bypass_set(payload, true);
+ mlxsw_reg_perpt_erp_id_set(payload, erp_id);
+ mlxsw_reg_perpt_erpt_base_bank_set(payload, erpt_base_bank);
+ mlxsw_reg_perpt_erpt_base_index_set(payload, erpt_base_index);
+ mlxsw_reg_perpt_erp_index_in_vector_set(payload, erp_index);
+ mlxsw_reg_perpt_mask_memcpy_to(payload, mask);
+}
+
+/* PERAR - Policy-Engine Region Association Register
+ * -------------------------------------------------
+ * This register associates a hw region for region_id's. Changing on the fly
+ * is supported by the device.
+ */
+#define MLXSW_REG_PERAR_ID 0x3026
+#define MLXSW_REG_PERAR_LEN 0x08
+
+MLXSW_REG_DEFINE(perar, MLXSW_REG_PERAR_ID, MLXSW_REG_PERAR_LEN);
+
+/* reg_perar_region_id
+ * Region identifier
+ * Range 0 .. cap_max_regions-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, perar, region_id, 0x00, 0, 16);
+
+static inline unsigned int
+mlxsw_reg_perar_hw_regions_needed(unsigned int block_num)
+{
+ return DIV_ROUND_UP(block_num, 4);
+}
+
+/* reg_perar_hw_region
+ * HW Region
+ * Range 0 .. cap_max_regions-1
+ * Default: hw_region = region_id
+ * For a 8 key block region, 2 consecutive regions are used
+ * For a 12 key block region, 3 consecutive regions are used
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, perar, hw_region, 0x04, 0, 16);
+
+static inline void mlxsw_reg_perar_pack(char *payload, u16 region_id,
+ u16 hw_region)
+{
+ MLXSW_REG_ZERO(perar, payload);
+ mlxsw_reg_perar_region_id_set(payload, region_id);
+ mlxsw_reg_perar_hw_region_set(payload, hw_region);
+}
+
+/* PTCE-V3 - Policy-Engine TCAM Entry Register Version 3
+ * -----------------------------------------------------
+ * This register is a new version of PTCE-V2 in order to support the
+ * A-TCAM. This register is not supported by SwitchX/-2 and Spectrum.
+ */
+#define MLXSW_REG_PTCE3_ID 0x3027
+#define MLXSW_REG_PTCE3_LEN 0xF0
+
+MLXSW_REG_DEFINE(ptce3, MLXSW_REG_PTCE3_ID, MLXSW_REG_PTCE3_LEN);
+
+/* reg_ptce3_v
+ * Valid.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptce3, v, 0x00, 31, 1);
+
+enum mlxsw_reg_ptce3_op {
+ /* Write operation. Used to write a new entry to the table.
+ * All R/W fields are relevant for new entry. Activity bit is set
+ * for new entries. Write with v = 0 will delete the entry. Must
+ * not be used if an entry exists.
+ */
+ MLXSW_REG_PTCE3_OP_WRITE_WRITE = 0,
+ /* Update operation */
+ MLXSW_REG_PTCE3_OP_WRITE_UPDATE = 1,
+ /* Read operation */
+ MLXSW_REG_PTCE3_OP_QUERY_READ = 0,
+};
+
+/* reg_ptce3_op
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, ptce3, op, 0x00, 20, 3);
+
+/* reg_ptce3_priority
+ * Priority of the rule. Higher values win.
+ * For Spectrum-2 range is 1..cap_kvd_size - 1
+ * Note: Priority does not have to be unique per rule.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptce3, priority, 0x04, 0, 24);
+
+/* reg_ptce3_tcam_region_info
+ * Opaque object that represents the TCAM region.
+ * Access: Index
+ */
+MLXSW_ITEM_BUF(reg, ptce3, tcam_region_info, 0x10,
+ MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN);
+
+/* reg_ptce3_flex2_key_blocks
+ * ACL key. The key must be masked according to eRP (if exists) or
+ * according to master mask.
+ * Access: Index
+ */
+MLXSW_ITEM_BUF(reg, ptce3, flex2_key_blocks, 0x20,
+ MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN);
+
+/* reg_ptce3_erp_id
+ * eRP ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ptce3, erp_id, 0x80, 0, 4);
+
+/* reg_ptce3_delta_start
+ * Start point of delta_value and delta_mask, in bits. Must not exceed
+ * num_key_blocks * 36 - 8. Reserved when delta_mask = 0.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ptce3, delta_start, 0x84, 0, 10);
+
+/* reg_ptce3_delta_mask
+ * Delta mask.
+ * 0 - Ignore relevant bit in delta_value
+ * 1 - Compare relevant bit in delta_value
+ * Delta mask must not be set for reserved fields in the key blocks.
+ * Note: No delta when no eRPs. Thus, for regions with
+ * PERERP.erpt_pointer_valid = 0 the delta mask must be 0.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ptce3, delta_mask, 0x88, 16, 8);
+
+/* reg_ptce3_delta_value
+ * Delta value.
+ * Bits which are masked by delta_mask must be 0.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ptce3, delta_value, 0x88, 0, 8);
+
+/* reg_ptce3_prune_vector
+ * Pruning vector relative to the PERPT.erp_id.
+ * Used for reducing lookups.
+ * 0 - NEED: Do a lookup using the eRP.
+ * 1 - PRUNE: Do not perform a lookup using the eRP.
+ * Maybe be modified by PEAPBL and PEAPBM.
+ * Note: In Spectrum-2, a region of 8 key blocks must be set to either
+ * all 1's or all 0's.
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, ptce3, prune_vector, 0x90, 4, 1);
+
+/* reg_ptce3_prune_ctcam
+ * Pruning on C-TCAM. Used for reducing lookups.
+ * 0 - NEED: Do a lookup in the C-TCAM.
+ * 1 - PRUNE: Do not perform a lookup in the C-TCAM.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptce3, prune_ctcam, 0x94, 31, 1);
+
+/* reg_ptce3_large_exists
+ * Large entry key ID exists.
+ * Within the region:
+ * 0 - SINGLE: The large_entry_key_id is not currently in use.
+ * For rule insert: The MSB of the key (blocks 6..11) will be added.
+ * For rule delete: The MSB of the key will be removed.
+ * 1 - NON_SINGLE: The large_entry_key_id is currently in use.
+ * For rule insert: The MSB of the key (blocks 6..11) will not be added.
+ * For rule delete: The MSB of the key will not be removed.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, ptce3, large_exists, 0x98, 31, 1);
+
+/* reg_ptce3_large_entry_key_id
+ * Large entry key ID.
+ * A key for 12 key blocks rules. Reserved when region has less than 12 key
+ * blocks. Must be different for different keys which have the same common
+ * 6 key blocks (MSB, blocks 6..11) key within a region.
+ * Range is 0..cap_max_pe_large_key_id - 1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptce3, large_entry_key_id, 0x98, 0, 24);
+
+/* reg_ptce3_action_pointer
+ * Pointer to action.
+ * Range is 0..cap_max_kvd_action_sets - 1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptce3, action_pointer, 0xA0, 0, 24);
+
+static inline void mlxsw_reg_ptce3_pack(char *payload, bool valid,
+ enum mlxsw_reg_ptce3_op op,
+ u32 priority,
+ const char *tcam_region_info,
+ const char *key, u8 erp_id,
+ bool large_exists, u32 lkey_id,
+ u32 action_pointer)
+{
+ MLXSW_REG_ZERO(ptce3, payload);
+ mlxsw_reg_ptce3_v_set(payload, valid);
+ mlxsw_reg_ptce3_op_set(payload, op);
+ mlxsw_reg_ptce3_priority_set(payload, priority);
+ mlxsw_reg_ptce3_tcam_region_info_memcpy_to(payload, tcam_region_info);
+ mlxsw_reg_ptce3_flex2_key_blocks_memcpy_to(payload, key);
+ mlxsw_reg_ptce3_erp_id_set(payload, erp_id);
+ mlxsw_reg_ptce3_large_exists_set(payload, large_exists);
+ mlxsw_reg_ptce3_large_entry_key_id_set(payload, lkey_id);
+ mlxsw_reg_ptce3_action_pointer_set(payload, action_pointer);
+}
+
+/* PERCR - Policy-Engine Region Configuration Register
+ * ---------------------------------------------------
+ * This register configures the region parameters. The region_id must be
+ * allocated.
+ */
+#define MLXSW_REG_PERCR_ID 0x302A
+#define MLXSW_REG_PERCR_LEN 0x80
+
+MLXSW_REG_DEFINE(percr, MLXSW_REG_PERCR_ID, MLXSW_REG_PERCR_LEN);
+
+/* reg_percr_region_id
+ * Region identifier.
+ * Range 0..cap_max_regions-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, percr, region_id, 0x00, 0, 16);
+
+/* reg_percr_atcam_ignore_prune
+ * Ignore prune_vector by other A-TCAM rules. Used e.g., for a new rule.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, percr, atcam_ignore_prune, 0x04, 25, 1);
+
+/* reg_percr_ctcam_ignore_prune
+ * Ignore prune_ctcam by other A-TCAM rules. Used e.g., for a new rule.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, percr, ctcam_ignore_prune, 0x04, 24, 1);
+
+/* reg_percr_bf_bypass
+ * Bloom filter bypass.
+ * 0 - Bloom filter is used (default)
+ * 1 - Bloom filter is bypassed. The bypass is an OR condition of
+ * region_id or eRP. See PERPT.bf_bypass
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, percr, bf_bypass, 0x04, 16, 1);
+
+/* reg_percr_master_mask
+ * Master mask. Logical OR mask of all masks of all rules of a region
+ * (both A-TCAM and C-TCAM). When there are no eRPs
+ * (erpt_pointer_valid = 0), then this provides the mask.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, percr, master_mask, 0x20, 96);
+
+static inline void mlxsw_reg_percr_pack(char *payload, u16 region_id)
+{
+ MLXSW_REG_ZERO(percr, payload);
+ mlxsw_reg_percr_region_id_set(payload, region_id);
+ mlxsw_reg_percr_atcam_ignore_prune_set(payload, false);
+ mlxsw_reg_percr_ctcam_ignore_prune_set(payload, false);
+ mlxsw_reg_percr_bf_bypass_set(payload, true);
+}
+
+/* PERERP - Policy-Engine Region eRP Register
+ * ------------------------------------------
+ * This register configures the region eRP. The region_id must be
+ * allocated.
+ */
+#define MLXSW_REG_PERERP_ID 0x302B
+#define MLXSW_REG_PERERP_LEN 0x1C
+
+MLXSW_REG_DEFINE(pererp, MLXSW_REG_PERERP_ID, MLXSW_REG_PERERP_LEN);
+
+/* reg_pererp_region_id
+ * Region identifier.
+ * Range 0..cap_max_regions-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pererp, region_id, 0x00, 0, 16);
+
+/* reg_pererp_ctcam_le
+ * C-TCAM lookup enable. Reserved when erpt_pointer_valid = 0.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pererp, ctcam_le, 0x04, 28, 1);
+
+/* reg_pererp_erpt_pointer_valid
+ * erpt_pointer is valid.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pererp, erpt_pointer_valid, 0x10, 31, 1);
+
+/* reg_pererp_erpt_bank_pointer
+ * Pointer to eRP table bank. May be modified at any time.
+ * Range 0..cap_max_erp_table_banks-1
+ * Reserved when erpt_pointer_valid = 0
+ */
+MLXSW_ITEM32(reg, pererp, erpt_bank_pointer, 0x10, 16, 4);
+
+/* reg_pererp_erpt_pointer
+ * Pointer to eRP table within the eRP bank. Can be changed for an
+ * existing region.
+ * Range 0..cap_max_erp_table_size-1
+ * Reserved when erpt_pointer_valid = 0
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pererp, erpt_pointer, 0x10, 0, 8);
+
+/* reg_pererp_erpt_vector
+ * Vector of allowed eRP indexes starting from erpt_pointer within the
+ * erpt_bank_pointer. Next entries will be in next bank.
+ * Note that eRP index is used and not eRP ID.
+ * Reserved when erpt_pointer_valid = 0
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, pererp, erpt_vector, 0x14, 4, 1);
+
+/* reg_pererp_master_rp_id
+ * Master RP ID. When there are no eRPs, then this provides the eRP ID
+ * for the lookup. Can be changed for an existing region.
+ * Reserved when erpt_pointer_valid = 1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pererp, master_rp_id, 0x18, 0, 4);
+
+static inline void mlxsw_reg_pererp_erp_vector_pack(char *payload,
+ unsigned long *erp_vector,
+ unsigned long size)
+{
+ unsigned long bit;
+
+ for_each_set_bit(bit, erp_vector, size)
+ mlxsw_reg_pererp_erpt_vector_set(payload, bit, true);
+}
+
+static inline void mlxsw_reg_pererp_pack(char *payload, u16 region_id,
+ bool ctcam_le, bool erpt_pointer_valid,
+ u8 erpt_bank_pointer, u8 erpt_pointer,
+ u8 master_rp_id)
+{
+ MLXSW_REG_ZERO(pererp, payload);
+ mlxsw_reg_pererp_region_id_set(payload, region_id);
+ mlxsw_reg_pererp_ctcam_le_set(payload, ctcam_le);
+ mlxsw_reg_pererp_erpt_pointer_valid_set(payload, erpt_pointer_valid);
+ mlxsw_reg_pererp_erpt_bank_pointer_set(payload, erpt_bank_pointer);
+ mlxsw_reg_pererp_erpt_pointer_set(payload, erpt_pointer);
+ mlxsw_reg_pererp_master_rp_id_set(payload, master_rp_id);
+}
+
+/* IEDR - Infrastructure Entry Delete Register
+ * ----------------------------------------------------
+ * This register is used for deleting entries from the entry tables.
+ * It is legitimate to attempt to delete a nonexisting entry (the device will
+ * respond as a good flow).
+ */
+#define MLXSW_REG_IEDR_ID 0x3804
+#define MLXSW_REG_IEDR_BASE_LEN 0x10 /* base length, without records */
+#define MLXSW_REG_IEDR_REC_LEN 0x8 /* record length */
+#define MLXSW_REG_IEDR_REC_MAX_COUNT 64
+#define MLXSW_REG_IEDR_LEN (MLXSW_REG_IEDR_BASE_LEN + \
+ MLXSW_REG_IEDR_REC_LEN * \
+ MLXSW_REG_IEDR_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(iedr, MLXSW_REG_IEDR_ID, MLXSW_REG_IEDR_LEN);
+
+/* reg_iedr_num_rec
+ * Number of records.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, iedr, num_rec, 0x00, 0, 8);
+
+/* reg_iedr_rec_type
+ * Resource type.
+ * Access: OP
+ */
+MLXSW_ITEM32_INDEXED(reg, iedr, rec_type, MLXSW_REG_IEDR_BASE_LEN, 24, 8,
+ MLXSW_REG_IEDR_REC_LEN, 0x00, false);
+
+/* reg_iedr_rec_size
+ * Size of entries do be deleted. The unit is 1 entry, regardless of entry type.
+ * Access: OP
+ */
+MLXSW_ITEM32_INDEXED(reg, iedr, rec_size, MLXSW_REG_IEDR_BASE_LEN, 0, 11,
+ MLXSW_REG_IEDR_REC_LEN, 0x00, false);
+
+/* reg_iedr_rec_index_start
+ * Resource index start.
+ * Access: OP
+ */
+MLXSW_ITEM32_INDEXED(reg, iedr, rec_index_start, MLXSW_REG_IEDR_BASE_LEN, 0, 24,
+ MLXSW_REG_IEDR_REC_LEN, 0x04, false);
+
+static inline void mlxsw_reg_iedr_pack(char *payload)
+{
+ MLXSW_REG_ZERO(iedr, payload);
+}
+
+static inline void mlxsw_reg_iedr_rec_pack(char *payload, int rec_index,
+ u8 rec_type, u16 rec_size,
+ u32 rec_index_start)
+{
+ u8 num_rec = mlxsw_reg_iedr_num_rec_get(payload);
+
+ if (rec_index >= num_rec)
+ mlxsw_reg_iedr_num_rec_set(payload, rec_index + 1);
+ mlxsw_reg_iedr_rec_type_set(payload, rec_index, rec_type);
+ mlxsw_reg_iedr_rec_size_set(payload, rec_index, rec_size);
+ mlxsw_reg_iedr_rec_index_start_set(payload, rec_index, rec_index_start);
+}
+
+/* QPTS - QoS Priority Trust State Register
+ * ----------------------------------------
+ * This register controls the port policy to calculate the switch priority and
+ * packet color based on incoming packet fields.
+ */
+#define MLXSW_REG_QPTS_ID 0x4002
+#define MLXSW_REG_QPTS_LEN 0x8
+
+MLXSW_REG_DEFINE(qpts, MLXSW_REG_QPTS_ID, MLXSW_REG_QPTS_LEN);
+
+/* reg_qpts_local_port
+ * Local port number.
+ * Access: Index
+ *
+ * Note: CPU port is supported.
+ */
+MLXSW_ITEM32(reg, qpts, local_port, 0x00, 16, 8);
+
+enum mlxsw_reg_qpts_trust_state {
+ MLXSW_REG_QPTS_TRUST_STATE_PCP = 1,
+ MLXSW_REG_QPTS_TRUST_STATE_DSCP = 2, /* For MPLS, trust EXP. */
+};
+
+/* reg_qpts_trust_state
+ * Trust state for a given port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpts, trust_state, 0x04, 0, 3);
+
+static inline void mlxsw_reg_qpts_pack(char *payload, u8 local_port,
+ enum mlxsw_reg_qpts_trust_state ts)
+{
+ MLXSW_REG_ZERO(qpts, payload);
+
+ mlxsw_reg_qpts_local_port_set(payload, local_port);
+ mlxsw_reg_qpts_trust_state_set(payload, ts);
+}
+
+/* QPCR - QoS Policer Configuration Register
+ * -----------------------------------------
+ * The QPCR register is used to create policers - that limit
+ * the rate of bytes or packets via some trap group.
+ */
+#define MLXSW_REG_QPCR_ID 0x4004
+#define MLXSW_REG_QPCR_LEN 0x28
+
+MLXSW_REG_DEFINE(qpcr, MLXSW_REG_QPCR_ID, MLXSW_REG_QPCR_LEN);
+
+enum mlxsw_reg_qpcr_g {
+ MLXSW_REG_QPCR_G_GLOBAL = 2,
+ MLXSW_REG_QPCR_G_STORM_CONTROL = 3,
+};
+
+/* reg_qpcr_g
+ * The policer type.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qpcr, g, 0x00, 14, 2);
+
+/* reg_qpcr_pid
+ * Policer ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qpcr, pid, 0x00, 0, 14);
+
+/* reg_qpcr_color_aware
+ * Is the policer aware of colors.
+ * Must be 0 (unaware) for cpu port.
+ * Access: RW for unbounded policer. RO for bounded policer.
+ */
+MLXSW_ITEM32(reg, qpcr, color_aware, 0x04, 15, 1);
+
+/* reg_qpcr_bytes
+ * Is policer limit is for bytes per sec or packets per sec.
+ * 0 - packets
+ * 1 - bytes
+ * Access: RW for unbounded policer. RO for bounded policer.
+ */
+MLXSW_ITEM32(reg, qpcr, bytes, 0x04, 14, 1);
+
+enum mlxsw_reg_qpcr_ir_units {
+ MLXSW_REG_QPCR_IR_UNITS_M,
+ MLXSW_REG_QPCR_IR_UNITS_K,
+};
+
+/* reg_qpcr_ir_units
+ * Policer's units for cir and eir fields (for bytes limits only)
+ * 1 - 10^3
+ * 0 - 10^6
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, qpcr, ir_units, 0x04, 12, 1);
+
+enum mlxsw_reg_qpcr_rate_type {
+ MLXSW_REG_QPCR_RATE_TYPE_SINGLE = 1,
+ MLXSW_REG_QPCR_RATE_TYPE_DOUBLE = 2,
+};
+
+/* reg_qpcr_rate_type
+ * Policer can have one limit (single rate) or 2 limits with specific operation
+ * for packets that exceed the lower rate but not the upper one.
+ * (For cpu port must be single rate)
+ * Access: RW for unbounded policer. RO for bounded policer.
+ */
+MLXSW_ITEM32(reg, qpcr, rate_type, 0x04, 8, 2);
+
+/* reg_qpc_cbs
+ * Policer's committed burst size.
+ * The policer is working with time slices of 50 nano sec. By default every
+ * slice is granted the proportionate share of the committed rate. If we want to
+ * allow a slice to exceed that share (while still keeping the rate per sec) we
+ * can allow burst. The burst size is between the default proportionate share
+ * (and no lower than 8) to 32Gb. (Even though giving a number higher than the
+ * committed rate will result in exceeding the rate). The burst size must be a
+ * log of 2 and will be determined by 2^cbs.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpcr, cbs, 0x08, 24, 6);
+
+/* reg_qpcr_cir
+ * Policer's committed rate.
+ * The rate used for sungle rate, the lower rate for double rate.
+ * For bytes limits, the rate will be this value * the unit from ir_units.
+ * (Resolution error is up to 1%).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpcr, cir, 0x0C, 0, 32);
+
+/* reg_qpcr_eir
+ * Policer's exceed rate.
+ * The higher rate for double rate, reserved for single rate.
+ * Lower rate for double rate policer.
+ * For bytes limits, the rate will be this value * the unit from ir_units.
+ * (Resolution error is up to 1%).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpcr, eir, 0x10, 0, 32);
+
+#define MLXSW_REG_QPCR_DOUBLE_RATE_ACTION 2
+
+/* reg_qpcr_exceed_action.
+ * What to do with packets between the 2 limits for double rate.
+ * Access: RW for unbounded policer. RO for bounded policer.
+ */
+MLXSW_ITEM32(reg, qpcr, exceed_action, 0x14, 0, 4);
+
+enum mlxsw_reg_qpcr_action {
+ /* Discard */
+ MLXSW_REG_QPCR_ACTION_DISCARD = 1,
+ /* Forward and set color to red.
+ * If the packet is intended to cpu port, it will be dropped.
+ */
+ MLXSW_REG_QPCR_ACTION_FORWARD = 2,
+};
+
+/* reg_qpcr_violate_action
+ * What to do with packets that cross the cir limit (for single rate) or the eir
+ * limit (for double rate).
+ * Access: RW for unbounded policer. RO for bounded policer.
+ */
+MLXSW_ITEM32(reg, qpcr, violate_action, 0x18, 0, 4);
+
+static inline void mlxsw_reg_qpcr_pack(char *payload, u16 pid,
+ enum mlxsw_reg_qpcr_ir_units ir_units,
+ bool bytes, u32 cir, u16 cbs)
+{
+ MLXSW_REG_ZERO(qpcr, payload);
+ mlxsw_reg_qpcr_pid_set(payload, pid);
+ mlxsw_reg_qpcr_g_set(payload, MLXSW_REG_QPCR_G_GLOBAL);
+ mlxsw_reg_qpcr_rate_type_set(payload, MLXSW_REG_QPCR_RATE_TYPE_SINGLE);
+ mlxsw_reg_qpcr_violate_action_set(payload,
+ MLXSW_REG_QPCR_ACTION_DISCARD);
+ mlxsw_reg_qpcr_cir_set(payload, cir);
+ mlxsw_reg_qpcr_ir_units_set(payload, ir_units);
+ mlxsw_reg_qpcr_bytes_set(payload, bytes);
+ mlxsw_reg_qpcr_cbs_set(payload, cbs);
+}
+
+/* QTCT - QoS Switch Traffic Class Table
+ * -------------------------------------
+ * Configures the mapping between the packet switch priority and the
+ * traffic class on the transmit port.
+ */
+#define MLXSW_REG_QTCT_ID 0x400A
+#define MLXSW_REG_QTCT_LEN 0x08
+
+MLXSW_REG_DEFINE(qtct, MLXSW_REG_QTCT_ID, MLXSW_REG_QTCT_LEN);
+
+/* reg_qtct_local_port
+ * Local port number.
+ * Access: Index
+ *
+ * Note: CPU port is not supported.
+ */
+MLXSW_ITEM32(reg, qtct, local_port, 0x00, 16, 8);
+
+/* reg_qtct_sub_port
+ * Virtual port within the physical port.
+ * Should be set to 0 when virtual ports are not enabled on the port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qtct, sub_port, 0x00, 8, 8);
+
+/* reg_qtct_switch_prio
+ * Switch priority.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qtct, switch_prio, 0x00, 0, 4);
+
+/* reg_qtct_tclass
+ * Traffic class.
+ * Default values:
+ * switch_prio 0 : tclass 1
+ * switch_prio 1 : tclass 0
+ * switch_prio i : tclass i, for i > 1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qtct, tclass, 0x04, 0, 4);
+
+static inline void mlxsw_reg_qtct_pack(char *payload, u8 local_port,
+ u8 switch_prio, u8 tclass)
+{
+ MLXSW_REG_ZERO(qtct, payload);
+ mlxsw_reg_qtct_local_port_set(payload, local_port);
+ mlxsw_reg_qtct_switch_prio_set(payload, switch_prio);
+ mlxsw_reg_qtct_tclass_set(payload, tclass);
+}
+
+/* QEEC - QoS ETS Element Configuration Register
+ * ---------------------------------------------
+ * Configures the ETS elements.
+ */
+#define MLXSW_REG_QEEC_ID 0x400D
+#define MLXSW_REG_QEEC_LEN 0x20
+
+MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN);
+
+/* reg_qeec_local_port
+ * Local port number.
+ * Access: Index
+ *
+ * Note: CPU port is supported.
+ */
+MLXSW_ITEM32(reg, qeec, local_port, 0x00, 16, 8);
+
+enum mlxsw_reg_qeec_hr {
+ MLXSW_REG_QEEC_HIERARCY_PORT,
+ MLXSW_REG_QEEC_HIERARCY_GROUP,
+ MLXSW_REG_QEEC_HIERARCY_SUBGROUP,
+ MLXSW_REG_QEEC_HIERARCY_TC,
+};
+
+/* reg_qeec_element_hierarchy
+ * 0 - Port
+ * 1 - Group
+ * 2 - Subgroup
+ * 3 - Traffic Class
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qeec, element_hierarchy, 0x04, 16, 4);
+
+/* reg_qeec_element_index
+ * The index of the element in the hierarchy.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qeec, element_index, 0x04, 0, 8);
+
+/* reg_qeec_next_element_index
+ * The index of the next (lower) element in the hierarchy.
+ * Access: RW
+ *
+ * Note: Reserved for element_hierarchy 0.
+ */
+MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8);
+
+/* reg_qeec_mise
+ * Min shaper configuration enable. Enables configuration of the min
+ * shaper on this ETS element
+ * 0 - Disable
+ * 1 - Enable
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1);
+
+enum {
+ MLXSW_REG_QEEC_BYTES_MODE,
+ MLXSW_REG_QEEC_PACKETS_MODE,
+};
+
+/* reg_qeec_pb
+ * Packets or bytes mode.
+ * 0 - Bytes mode
+ * 1 - Packets mode
+ * Access: RW
+ *
+ * Note: Used for max shaper configuration. For Spectrum, packets mode
+ * is supported only for traffic classes of CPU port.
+ */
+MLXSW_ITEM32(reg, qeec, pb, 0x0C, 28, 1);
+
+/* The smallest permitted min shaper rate. */
+#define MLXSW_REG_QEEC_MIS_MIN 200000 /* Kbps */
+
+/* reg_qeec_min_shaper_rate
+ * Min shaper information rate.
+ * For CPU port, can only be configured for port hierarchy.
+ * When in bytes mode, value is specified in units of 1000bps.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28);
+
+/* reg_qeec_mase
+ * Max shaper configuration enable. Enables configuration of the max
+ * shaper on this ETS element.
+ * 0 - Disable
+ * 1 - Enable
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, mase, 0x10, 31, 1);
+
+/* A large max rate will disable the max shaper. */
+#define MLXSW_REG_QEEC_MAS_DIS 200000000 /* Kbps */
+
+/* reg_qeec_max_shaper_rate
+ * Max shaper information rate.
+ * For CPU port, can only be configured for port hierarchy.
+ * When in bytes mode, value is specified in units of 1000bps.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, max_shaper_rate, 0x10, 0, 28);
+
+/* reg_qeec_de
+ * DWRR configuration enable. Enables configuration of the dwrr and
+ * dwrr_weight.
+ * 0 - Disable
+ * 1 - Enable
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, de, 0x18, 31, 1);
+
+/* reg_qeec_dwrr
+ * Transmission selection algorithm to use on the link going down from
+ * the ETS element.
+ * 0 - Strict priority
+ * 1 - DWRR
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, dwrr, 0x18, 15, 1);
+
+/* reg_qeec_dwrr_weight
+ * DWRR weight on the link going down from the ETS element. The
+ * percentage of bandwidth guaranteed to an ETS element within
+ * its hierarchy. The sum of all weights across all ETS elements
+ * within one hierarchy should be equal to 100. Reserved when
+ * transmission selection algorithm is strict priority.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, dwrr_weight, 0x18, 0, 8);
+
+static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port,
+ enum mlxsw_reg_qeec_hr hr, u8 index,
+ u8 next_index)
+{
+ MLXSW_REG_ZERO(qeec, payload);
+ mlxsw_reg_qeec_local_port_set(payload, local_port);
+ mlxsw_reg_qeec_element_hierarchy_set(payload, hr);
+ mlxsw_reg_qeec_element_index_set(payload, index);
+ mlxsw_reg_qeec_next_element_index_set(payload, next_index);
+}
+
+/* QRWE - QoS ReWrite Enable
+ * -------------------------
+ * This register configures the rewrite enable per receive port.
+ */
+#define MLXSW_REG_QRWE_ID 0x400F
+#define MLXSW_REG_QRWE_LEN 0x08
+
+MLXSW_REG_DEFINE(qrwe, MLXSW_REG_QRWE_ID, MLXSW_REG_QRWE_LEN);
+
+/* reg_qrwe_local_port
+ * Local port number.
+ * Access: Index
+ *
+ * Note: CPU port is supported. No support for router port.
+ */
+MLXSW_ITEM32(reg, qrwe, local_port, 0x00, 16, 8);
+
+/* reg_qrwe_dscp
+ * Whether to enable DSCP rewrite (default is 0, don't rewrite).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qrwe, dscp, 0x04, 1, 1);
+
+/* reg_qrwe_pcp
+ * Whether to enable PCP and DEI rewrite (default is 0, don't rewrite).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qrwe, pcp, 0x04, 0, 1);
+
+static inline void mlxsw_reg_qrwe_pack(char *payload, u8 local_port,
+ bool rewrite_pcp, bool rewrite_dscp)
+{
+ MLXSW_REG_ZERO(qrwe, payload);
+ mlxsw_reg_qrwe_local_port_set(payload, local_port);
+ mlxsw_reg_qrwe_pcp_set(payload, rewrite_pcp);
+ mlxsw_reg_qrwe_dscp_set(payload, rewrite_dscp);
+}
+
+/* QPDSM - QoS Priority to DSCP Mapping
+ * ------------------------------------
+ * QoS Priority to DSCP Mapping Register
+ */
+#define MLXSW_REG_QPDSM_ID 0x4011
+#define MLXSW_REG_QPDSM_BASE_LEN 0x04 /* base length, without records */
+#define MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN 0x4 /* record length */
+#define MLXSW_REG_QPDSM_PRIO_ENTRY_REC_MAX_COUNT 16
+#define MLXSW_REG_QPDSM_LEN (MLXSW_REG_QPDSM_BASE_LEN + \
+ MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN * \
+ MLXSW_REG_QPDSM_PRIO_ENTRY_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(qpdsm, MLXSW_REG_QPDSM_ID, MLXSW_REG_QPDSM_LEN);
+
+/* reg_qpdsm_local_port
+ * Local Port. Supported for data packets from CPU port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qpdsm, local_port, 0x00, 16, 8);
+
+/* reg_qpdsm_prio_entry_color0_e
+ * Enable update of the entry for color 0 and a given port.
+ * Access: WO
+ */
+MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color0_e,
+ MLXSW_REG_QPDSM_BASE_LEN, 31, 1,
+ MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false);
+
+/* reg_qpdsm_prio_entry_color0_dscp
+ * DSCP field in the outer label of the packet for color 0 and a given port.
+ * Reserved when e=0.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color0_dscp,
+ MLXSW_REG_QPDSM_BASE_LEN, 24, 6,
+ MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false);
+
+/* reg_qpdsm_prio_entry_color1_e
+ * Enable update of the entry for color 1 and a given port.
+ * Access: WO
+ */
+MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color1_e,
+ MLXSW_REG_QPDSM_BASE_LEN, 23, 1,
+ MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false);
+
+/* reg_qpdsm_prio_entry_color1_dscp
+ * DSCP field in the outer label of the packet for color 1 and a given port.
+ * Reserved when e=0.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color1_dscp,
+ MLXSW_REG_QPDSM_BASE_LEN, 16, 6,
+ MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false);
+
+/* reg_qpdsm_prio_entry_color2_e
+ * Enable update of the entry for color 2 and a given port.
+ * Access: WO
+ */
+MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color2_e,
+ MLXSW_REG_QPDSM_BASE_LEN, 15, 1,
+ MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false);
+
+/* reg_qpdsm_prio_entry_color2_dscp
+ * DSCP field in the outer label of the packet for color 2 and a given port.
+ * Reserved when e=0.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, qpdsm, prio_entry_color2_dscp,
+ MLXSW_REG_QPDSM_BASE_LEN, 8, 6,
+ MLXSW_REG_QPDSM_PRIO_ENTRY_REC_LEN, 0x00, false);
+
+static inline void mlxsw_reg_qpdsm_pack(char *payload, u8 local_port)
+{
+ MLXSW_REG_ZERO(qpdsm, payload);
+ mlxsw_reg_qpdsm_local_port_set(payload, local_port);
+}
+
+static inline void
+mlxsw_reg_qpdsm_prio_pack(char *payload, unsigned short prio, u8 dscp)
+{
+ mlxsw_reg_qpdsm_prio_entry_color0_e_set(payload, prio, 1);
+ mlxsw_reg_qpdsm_prio_entry_color0_dscp_set(payload, prio, dscp);
+ mlxsw_reg_qpdsm_prio_entry_color1_e_set(payload, prio, 1);
+ mlxsw_reg_qpdsm_prio_entry_color1_dscp_set(payload, prio, dscp);
+ mlxsw_reg_qpdsm_prio_entry_color2_e_set(payload, prio, 1);
+ mlxsw_reg_qpdsm_prio_entry_color2_dscp_set(payload, prio, dscp);
+}
+
+/* QPDPM - QoS Port DSCP to Priority Mapping Register
+ * --------------------------------------------------
+ * This register controls the mapping from DSCP field to
+ * Switch Priority for IP packets.
+ */
+#define MLXSW_REG_QPDPM_ID 0x4013
+#define MLXSW_REG_QPDPM_BASE_LEN 0x4 /* base length, without records */
+#define MLXSW_REG_QPDPM_DSCP_ENTRY_REC_LEN 0x2 /* record length */
+#define MLXSW_REG_QPDPM_DSCP_ENTRY_REC_MAX_COUNT 64
+#define MLXSW_REG_QPDPM_LEN (MLXSW_REG_QPDPM_BASE_LEN + \
+ MLXSW_REG_QPDPM_DSCP_ENTRY_REC_LEN * \
+ MLXSW_REG_QPDPM_DSCP_ENTRY_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(qpdpm, MLXSW_REG_QPDPM_ID, MLXSW_REG_QPDPM_LEN);
+
+/* reg_qpdpm_local_port
+ * Local Port. Supported for data packets from CPU port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qpdpm, local_port, 0x00, 16, 8);
+
+/* reg_qpdpm_dscp_e
+ * Enable update of the specific entry. When cleared, the switch_prio and color
+ * fields are ignored and the previous switch_prio and color values are
+ * preserved.
+ * Access: WO
+ */
+MLXSW_ITEM16_INDEXED(reg, qpdpm, dscp_entry_e, MLXSW_REG_QPDPM_BASE_LEN, 15, 1,
+ MLXSW_REG_QPDPM_DSCP_ENTRY_REC_LEN, 0x00, false);
+
+/* reg_qpdpm_dscp_prio
+ * The new Switch Priority value for the relevant DSCP value.
+ * Access: RW
+ */
+MLXSW_ITEM16_INDEXED(reg, qpdpm, dscp_entry_prio,
+ MLXSW_REG_QPDPM_BASE_LEN, 0, 4,
+ MLXSW_REG_QPDPM_DSCP_ENTRY_REC_LEN, 0x00, false);
+
+static inline void mlxsw_reg_qpdpm_pack(char *payload, u8 local_port)
+{
+ MLXSW_REG_ZERO(qpdpm, payload);
+ mlxsw_reg_qpdpm_local_port_set(payload, local_port);
+}
+
+static inline void
+mlxsw_reg_qpdpm_dscp_pack(char *payload, unsigned short dscp, u8 prio)
+{
+ mlxsw_reg_qpdpm_dscp_entry_e_set(payload, dscp, 1);
+ mlxsw_reg_qpdpm_dscp_entry_prio_set(payload, dscp, prio);
+}
+
+/* QTCTM - QoS Switch Traffic Class Table is Multicast-Aware Register
+ * ------------------------------------------------------------------
+ * This register configures if the Switch Priority to Traffic Class mapping is
+ * based on Multicast packet indication. If so, then multicast packets will get
+ * a Traffic Class that is plus (cap_max_tclass_data/2) the value configured by
+ * QTCT.
+ * By default, Switch Priority to Traffic Class mapping is not based on
+ * Multicast packet indication.
+ */
+#define MLXSW_REG_QTCTM_ID 0x401A
+#define MLXSW_REG_QTCTM_LEN 0x08
+
+MLXSW_REG_DEFINE(qtctm, MLXSW_REG_QTCTM_ID, MLXSW_REG_QTCTM_LEN);
+
+/* reg_qtctm_local_port
+ * Local port number.
+ * No support for CPU port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qtctm, local_port, 0x00, 16, 8);
+
+/* reg_qtctm_mc
+ * Multicast Mode
+ * Whether Switch Priority to Traffic Class mapping is based on Multicast packet
+ * indication (default is 0, not based on Multicast packet indication).
+ */
+MLXSW_ITEM32(reg, qtctm, mc, 0x04, 0, 1);
+
+static inline void
+mlxsw_reg_qtctm_pack(char *payload, u8 local_port, bool mc)
+{
+ MLXSW_REG_ZERO(qtctm, payload);
+ mlxsw_reg_qtctm_local_port_set(payload, local_port);
+ mlxsw_reg_qtctm_mc_set(payload, mc);
+}
+
+/* PMLP - Ports Module to Local Port Register
+ * ------------------------------------------
+ * Configures the assignment of modules to local ports.
+ */
+#define MLXSW_REG_PMLP_ID 0x5002
+#define MLXSW_REG_PMLP_LEN 0x40
+
+MLXSW_REG_DEFINE(pmlp, MLXSW_REG_PMLP_ID, MLXSW_REG_PMLP_LEN);
+
+/* reg_pmlp_rxtx
+ * 0 - Tx value is used for both Tx and Rx.
+ * 1 - Rx value is taken from a separte field.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pmlp, rxtx, 0x00, 31, 1);
+
+/* reg_pmlp_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmlp, local_port, 0x00, 16, 8);
+
+/* reg_pmlp_width
+ * 0 - Unmap local port.
+ * 1 - Lane 0 is used.
+ * 2 - Lanes 0 and 1 are used.
+ * 4 - Lanes 0, 1, 2 and 3 are used.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pmlp, width, 0x00, 0, 8);
+
+/* reg_pmlp_module
+ * Module number.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, pmlp, module, 0x04, 0, 8, 0x04, 0x00, false);
+
+/* reg_pmlp_tx_lane
+ * Tx Lane. When rxtx field is cleared, this field is used for Rx as well.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, pmlp, tx_lane, 0x04, 16, 2, 0x04, 0x00, false);
+
+/* reg_pmlp_rx_lane
+ * Rx Lane. When rxtx field is cleared, this field is ignored and Rx lane is
+ * equal to Tx lane.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, pmlp, rx_lane, 0x04, 24, 2, 0x04, 0x00, false);
+
+static inline void mlxsw_reg_pmlp_pack(char *payload, u8 local_port)
+{
+ MLXSW_REG_ZERO(pmlp, payload);
+ mlxsw_reg_pmlp_local_port_set(payload, local_port);
+}
+
+/* PMTU - Port MTU Register
+ * ------------------------
+ * Configures and reports the port MTU.
+ */
+#define MLXSW_REG_PMTU_ID 0x5003
+#define MLXSW_REG_PMTU_LEN 0x10
+
+MLXSW_REG_DEFINE(pmtu, MLXSW_REG_PMTU_ID, MLXSW_REG_PMTU_LEN);
+
+/* reg_pmtu_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmtu, local_port, 0x00, 16, 8);
+
+/* reg_pmtu_max_mtu
+ * Maximum MTU.
+ * When port type (e.g. Ethernet) is configured, the relevant MTU is
+ * reported, otherwise the minimum between the max_mtu of the different
+ * types is reported.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, pmtu, max_mtu, 0x04, 16, 16);
+
+/* reg_pmtu_admin_mtu
+ * MTU value to set port to. Must be smaller or equal to max_mtu.
+ * Note: If port type is Infiniband, then port must be disabled, when its
+ * MTU is set.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pmtu, admin_mtu, 0x08, 16, 16);
+
+/* reg_pmtu_oper_mtu
+ * The actual MTU configured on the port. Packets exceeding this size
+ * will be dropped.
+ * Note: In Ethernet and FC oper_mtu == admin_mtu, however, in Infiniband
+ * oper_mtu might be smaller than admin_mtu.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, pmtu, oper_mtu, 0x0C, 16, 16);
+
+static inline void mlxsw_reg_pmtu_pack(char *payload, u8 local_port,
+ u16 new_mtu)
+{
+ MLXSW_REG_ZERO(pmtu, payload);
+ mlxsw_reg_pmtu_local_port_set(payload, local_port);
+ mlxsw_reg_pmtu_max_mtu_set(payload, 0);
+ mlxsw_reg_pmtu_admin_mtu_set(payload, new_mtu);
+ mlxsw_reg_pmtu_oper_mtu_set(payload, 0);
+}
+
+/* PTYS - Port Type and Speed Register
+ * -----------------------------------
+ * Configures and reports the port speed type.
+ *
+ * Note: When set while the link is up, the changes will not take effect
+ * until the port transitions from down to up state.
+ */
+#define MLXSW_REG_PTYS_ID 0x5004
+#define MLXSW_REG_PTYS_LEN 0x40
+
+MLXSW_REG_DEFINE(ptys, MLXSW_REG_PTYS_ID, MLXSW_REG_PTYS_LEN);
+
+/* an_disable_admin
+ * Auto negotiation disable administrative configuration
+ * 0 - Device doesn't support AN disable.
+ * 1 - Device supports AN disable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptys, an_disable_admin, 0x00, 30, 1);
+
+/* reg_ptys_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ptys, local_port, 0x00, 16, 8);
+
+#define MLXSW_REG_PTYS_PROTO_MASK_IB BIT(0)
+#define MLXSW_REG_PTYS_PROTO_MASK_ETH BIT(2)
+
+/* reg_ptys_proto_mask
+ * Protocol mask. Indicates which protocol is used.
+ * 0 - Infiniband.
+ * 1 - Fibre Channel.
+ * 2 - Ethernet.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ptys, proto_mask, 0x00, 0, 3);
+
+enum {
+ MLXSW_REG_PTYS_AN_STATUS_NA,
+ MLXSW_REG_PTYS_AN_STATUS_OK,
+ MLXSW_REG_PTYS_AN_STATUS_FAIL,
+};
+
+/* reg_ptys_an_status
+ * Autonegotiation status.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4);
+
+#define MLXSW_REG_PTYS_ETH_SPEED_SGMII BIT(0)
+#define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX BIT(1)
+#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 BIT(2)
+#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 BIT(3)
+#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR BIT(4)
+#define MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2 BIT(5)
+#define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 BIT(6)
+#define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 BIT(7)
+#define MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4 BIT(8)
+#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR BIT(12)
+#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR BIT(13)
+#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR BIT(14)
+#define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 BIT(15)
+#define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4 BIT(16)
+#define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_SR2 BIT(18)
+#define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR4 BIT(19)
+#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 BIT(20)
+#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 BIT(21)
+#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 BIT(22)
+#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4 BIT(23)
+#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_TX BIT(24)
+#define MLXSW_REG_PTYS_ETH_SPEED_100BASE_T BIT(25)
+#define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T BIT(26)
+#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR BIT(27)
+#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR BIT(28)
+#define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR BIT(29)
+#define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2 BIT(30)
+#define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2 BIT(31)
+
+/* reg_ptys_eth_proto_cap
+ * Ethernet port supported speeds and protocols.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, eth_proto_cap, 0x0C, 0, 32);
+
+/* reg_ptys_ib_link_width_cap
+ * IB port supported widths.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, ib_link_width_cap, 0x10, 16, 16);
+
+#define MLXSW_REG_PTYS_IB_SPEED_SDR BIT(0)
+#define MLXSW_REG_PTYS_IB_SPEED_DDR BIT(1)
+#define MLXSW_REG_PTYS_IB_SPEED_QDR BIT(2)
+#define MLXSW_REG_PTYS_IB_SPEED_FDR10 BIT(3)
+#define MLXSW_REG_PTYS_IB_SPEED_FDR BIT(4)
+#define MLXSW_REG_PTYS_IB_SPEED_EDR BIT(5)
+
+/* reg_ptys_ib_proto_cap
+ * IB port supported speeds and protocols.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, ib_proto_cap, 0x10, 0, 16);
+
+/* reg_ptys_eth_proto_admin
+ * Speed and protocol to set port to.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptys, eth_proto_admin, 0x18, 0, 32);
+
+/* reg_ptys_ib_link_width_admin
+ * IB width to set port to.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptys, ib_link_width_admin, 0x1C, 16, 16);
+
+/* reg_ptys_ib_proto_admin
+ * IB speeds and protocols to set port to.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptys, ib_proto_admin, 0x1C, 0, 16);
+
+/* reg_ptys_eth_proto_oper
+ * The current speed and protocol configured for the port.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, eth_proto_oper, 0x24, 0, 32);
+
+/* reg_ptys_ib_link_width_oper
+ * The current IB width to set port to.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, ib_link_width_oper, 0x28, 16, 16);
+
+/* reg_ptys_ib_proto_oper
+ * The current IB speed and protocol.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, ib_proto_oper, 0x28, 0, 16);
+
+/* reg_ptys_eth_proto_lp_advertise
+ * The protocols that were advertised by the link partner during
+ * autonegotiation.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, eth_proto_lp_advertise, 0x30, 0, 32);
+
+static inline void mlxsw_reg_ptys_eth_pack(char *payload, u8 local_port,
+ u32 proto_admin, bool autoneg)
+{
+ MLXSW_REG_ZERO(ptys, payload);
+ mlxsw_reg_ptys_local_port_set(payload, local_port);
+ mlxsw_reg_ptys_proto_mask_set(payload, MLXSW_REG_PTYS_PROTO_MASK_ETH);
+ mlxsw_reg_ptys_eth_proto_admin_set(payload, proto_admin);
+ mlxsw_reg_ptys_an_disable_admin_set(payload, !autoneg);
+}
+
+static inline void mlxsw_reg_ptys_eth_unpack(char *payload,
+ u32 *p_eth_proto_cap,
+ u32 *p_eth_proto_adm,
+ u32 *p_eth_proto_oper)
+{
+ if (p_eth_proto_cap)
+ *p_eth_proto_cap = mlxsw_reg_ptys_eth_proto_cap_get(payload);
+ if (p_eth_proto_adm)
+ *p_eth_proto_adm = mlxsw_reg_ptys_eth_proto_admin_get(payload);
+ if (p_eth_proto_oper)
+ *p_eth_proto_oper = mlxsw_reg_ptys_eth_proto_oper_get(payload);
+}
+
+static inline void mlxsw_reg_ptys_ib_pack(char *payload, u8 local_port,
+ u16 proto_admin, u16 link_width)
+{
+ MLXSW_REG_ZERO(ptys, payload);
+ mlxsw_reg_ptys_local_port_set(payload, local_port);
+ mlxsw_reg_ptys_proto_mask_set(payload, MLXSW_REG_PTYS_PROTO_MASK_IB);
+ mlxsw_reg_ptys_ib_proto_admin_set(payload, proto_admin);
+ mlxsw_reg_ptys_ib_link_width_admin_set(payload, link_width);
+}
+
+static inline void mlxsw_reg_ptys_ib_unpack(char *payload, u16 *p_ib_proto_cap,
+ u16 *p_ib_link_width_cap,
+ u16 *p_ib_proto_oper,
+ u16 *p_ib_link_width_oper)
+{
+ if (p_ib_proto_cap)
+ *p_ib_proto_cap = mlxsw_reg_ptys_ib_proto_cap_get(payload);
+ if (p_ib_link_width_cap)
+ *p_ib_link_width_cap =
+ mlxsw_reg_ptys_ib_link_width_cap_get(payload);
+ if (p_ib_proto_oper)
+ *p_ib_proto_oper = mlxsw_reg_ptys_ib_proto_oper_get(payload);
+ if (p_ib_link_width_oper)
+ *p_ib_link_width_oper =
+ mlxsw_reg_ptys_ib_link_width_oper_get(payload);
+}
+
+/* PPAD - Port Physical Address Register
+ * -------------------------------------
+ * The PPAD register configures the per port physical MAC address.
+ */
+#define MLXSW_REG_PPAD_ID 0x5005
+#define MLXSW_REG_PPAD_LEN 0x10
+
+MLXSW_REG_DEFINE(ppad, MLXSW_REG_PPAD_ID, MLXSW_REG_PPAD_LEN);
+
+/* reg_ppad_single_base_mac
+ * 0: base_mac, local port should be 0 and mac[7:0] is
+ * reserved. HW will set incremental
+ * 1: single_mac - mac of the local_port
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ppad, single_base_mac, 0x00, 28, 1);
+
+/* reg_ppad_local_port
+ * port number, if single_base_mac = 0 then local_port is reserved
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ppad, local_port, 0x00, 16, 8);
+
+/* reg_ppad_mac
+ * If single_base_mac = 0 - base MAC address, mac[7:0] is reserved.
+ * If single_base_mac = 1 - the per port MAC address
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, ppad, mac, 0x02, 6);
+
+static inline void mlxsw_reg_ppad_pack(char *payload, bool single_base_mac,
+ u8 local_port)
+{
+ MLXSW_REG_ZERO(ppad, payload);
+ mlxsw_reg_ppad_single_base_mac_set(payload, !!single_base_mac);
+ mlxsw_reg_ppad_local_port_set(payload, local_port);
+}
+
+/* PAOS - Ports Administrative and Operational Status Register
+ * -----------------------------------------------------------
+ * Configures and retrieves per port administrative and operational status.
+ */
+#define MLXSW_REG_PAOS_ID 0x5006
+#define MLXSW_REG_PAOS_LEN 0x10
+
+MLXSW_REG_DEFINE(paos, MLXSW_REG_PAOS_ID, MLXSW_REG_PAOS_LEN);
+
+/* reg_paos_swid
+ * Switch partition ID with which to associate the port.
+ * Note: while external ports uses unique local port numbers (and thus swid is
+ * redundant), router ports use the same local port number where swid is the
+ * only indication for the relevant port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, paos, swid, 0x00, 24, 8);
+
+/* reg_paos_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, paos, local_port, 0x00, 16, 8);
+
+/* reg_paos_admin_status
+ * Port administrative state (the desired state of the port):
+ * 1 - Up.
+ * 2 - Down.
+ * 3 - Up once. This means that in case of link failure, the port won't go
+ * into polling mode, but will wait to be re-enabled by software.
+ * 4 - Disabled by system. Can only be set by hardware.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, paos, admin_status, 0x00, 8, 4);
+
+/* reg_paos_oper_status
+ * Port operational state (the current state):
+ * 1 - Up.
+ * 2 - Down.
+ * 3 - Down by port failure. This means that the device will not let the
+ * port up again until explicitly specified by software.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, paos, oper_status, 0x00, 0, 4);
+
+/* reg_paos_ase
+ * Admin state update enabled.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, paos, ase, 0x04, 31, 1);
+
+/* reg_paos_ee
+ * Event update enable. If this bit is set, event generation will be
+ * updated based on the e field.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, paos, ee, 0x04, 30, 1);
+
+/* reg_paos_e
+ * Event generation on operational state change:
+ * 0 - Do not generate event.
+ * 1 - Generate Event.
+ * 2 - Generate Single Event.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, paos, e, 0x04, 0, 2);
+
+static inline void mlxsw_reg_paos_pack(char *payload, u8 local_port,
+ enum mlxsw_port_admin_status status)
+{
+ MLXSW_REG_ZERO(paos, payload);
+ mlxsw_reg_paos_swid_set(payload, 0);
+ mlxsw_reg_paos_local_port_set(payload, local_port);
+ mlxsw_reg_paos_admin_status_set(payload, status);
+ mlxsw_reg_paos_oper_status_set(payload, 0);
+ mlxsw_reg_paos_ase_set(payload, 1);
+ mlxsw_reg_paos_ee_set(payload, 1);
+ mlxsw_reg_paos_e_set(payload, 1);
+}
+
+/* PFCC - Ports Flow Control Configuration Register
+ * ------------------------------------------------
+ * Configures and retrieves the per port flow control configuration.
+ */
+#define MLXSW_REG_PFCC_ID 0x5007
+#define MLXSW_REG_PFCC_LEN 0x20
+
+MLXSW_REG_DEFINE(pfcc, MLXSW_REG_PFCC_ID, MLXSW_REG_PFCC_LEN);
+
+/* reg_pfcc_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pfcc, local_port, 0x00, 16, 8);
+
+/* reg_pfcc_pnat
+ * Port number access type. Determines the way local_port is interpreted:
+ * 0 - Local port number.
+ * 1 - IB / label port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pfcc, pnat, 0x00, 14, 2);
+
+/* reg_pfcc_shl_cap
+ * Send to higher layers capabilities:
+ * 0 - No capability of sending Pause and PFC frames to higher layers.
+ * 1 - Device has capability of sending Pause and PFC frames to higher
+ * layers.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, pfcc, shl_cap, 0x00, 1, 1);
+
+/* reg_pfcc_shl_opr
+ * Send to higher layers operation:
+ * 0 - Pause and PFC frames are handled by the port (default).
+ * 1 - Pause and PFC frames are handled by the port and also sent to
+ * higher layers. Only valid if shl_cap = 1.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pfcc, shl_opr, 0x00, 0, 1);
+
+/* reg_pfcc_ppan
+ * Pause policy auto negotiation.
+ * 0 - Disabled. Generate / ignore Pause frames based on pptx / pprtx.
+ * 1 - Enabled. When auto-negotiation is performed, set the Pause policy
+ * based on the auto-negotiation resolution.
+ * Access: RW
+ *
+ * Note: The auto-negotiation advertisement is set according to pptx and
+ * pprtx. When PFC is set on Tx / Rx, ppan must be set to 0.
+ */
+MLXSW_ITEM32(reg, pfcc, ppan, 0x04, 28, 4);
+
+/* reg_pfcc_prio_mask_tx
+ * Bit per priority indicating if Tx flow control policy should be
+ * updated based on bit pfctx.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, pfcc, prio_mask_tx, 0x04, 16, 8);
+
+/* reg_pfcc_prio_mask_rx
+ * Bit per priority indicating if Rx flow control policy should be
+ * updated based on bit pfcrx.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, pfcc, prio_mask_rx, 0x04, 0, 8);
+
+/* reg_pfcc_pptx
+ * Admin Pause policy on Tx.
+ * 0 - Never generate Pause frames (default).
+ * 1 - Generate Pause frames according to Rx buffer threshold.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pfcc, pptx, 0x08, 31, 1);
+
+/* reg_pfcc_aptx
+ * Active (operational) Pause policy on Tx.
+ * 0 - Never generate Pause frames.
+ * 1 - Generate Pause frames according to Rx buffer threshold.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, pfcc, aptx, 0x08, 30, 1);
+
+/* reg_pfcc_pfctx
+ * Priority based flow control policy on Tx[7:0]. Per-priority bit mask:
+ * 0 - Never generate priority Pause frames on the specified priority
+ * (default).
+ * 1 - Generate priority Pause frames according to Rx buffer threshold on
+ * the specified priority.
+ * Access: RW
+ *
+ * Note: pfctx and pptx must be mutually exclusive.
+ */
+MLXSW_ITEM32(reg, pfcc, pfctx, 0x08, 16, 8);
+
+/* reg_pfcc_pprx
+ * Admin Pause policy on Rx.
+ * 0 - Ignore received Pause frames (default).
+ * 1 - Respect received Pause frames.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pfcc, pprx, 0x0C, 31, 1);
+
+/* reg_pfcc_aprx
+ * Active (operational) Pause policy on Rx.
+ * 0 - Ignore received Pause frames.
+ * 1 - Respect received Pause frames.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, pfcc, aprx, 0x0C, 30, 1);
+
+/* reg_pfcc_pfcrx
+ * Priority based flow control policy on Rx[7:0]. Per-priority bit mask:
+ * 0 - Ignore incoming priority Pause frames on the specified priority
+ * (default).
+ * 1 - Respect incoming priority Pause frames on the specified priority.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pfcc, pfcrx, 0x0C, 16, 8);
+
+#define MLXSW_REG_PFCC_ALL_PRIO 0xFF
+
+static inline void mlxsw_reg_pfcc_prio_pack(char *payload, u8 pfc_en)
+{
+ mlxsw_reg_pfcc_prio_mask_tx_set(payload, MLXSW_REG_PFCC_ALL_PRIO);
+ mlxsw_reg_pfcc_prio_mask_rx_set(payload, MLXSW_REG_PFCC_ALL_PRIO);
+ mlxsw_reg_pfcc_pfctx_set(payload, pfc_en);
+ mlxsw_reg_pfcc_pfcrx_set(payload, pfc_en);
+}
+
+static inline void mlxsw_reg_pfcc_pack(char *payload, u8 local_port)
+{
+ MLXSW_REG_ZERO(pfcc, payload);
+ mlxsw_reg_pfcc_local_port_set(payload, local_port);
+}
+
+/* PPCNT - Ports Performance Counters Register
+ * -------------------------------------------
+ * The PPCNT register retrieves per port performance counters.
+ */
+#define MLXSW_REG_PPCNT_ID 0x5008
+#define MLXSW_REG_PPCNT_LEN 0x100
+#define MLXSW_REG_PPCNT_COUNTERS_OFFSET 0x08
+
+MLXSW_REG_DEFINE(ppcnt, MLXSW_REG_PPCNT_ID, MLXSW_REG_PPCNT_LEN);
+
+/* reg_ppcnt_swid
+ * For HCA: must be always 0.
+ * Switch partition ID to associate port with.
+ * Switch partitions are numbered from 0 to 7 inclusively.
+ * Switch partition 254 indicates stacking ports.
+ * Switch partition 255 indicates all switch partitions.
+ * Only valid on Set() operation with local_port=255.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ppcnt, swid, 0x00, 24, 8);
+
+/* reg_ppcnt_local_port
+ * Local port number.
+ * 255 indicates all ports on the device, and is only allowed
+ * for Set() operation.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ppcnt, local_port, 0x00, 16, 8);
+
+/* reg_ppcnt_pnat
+ * Port number access type:
+ * 0 - Local port number
+ * 1 - IB port number
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ppcnt, pnat, 0x00, 14, 2);
+
+enum mlxsw_reg_ppcnt_grp {
+ MLXSW_REG_PPCNT_IEEE_8023_CNT = 0x0,
+ MLXSW_REG_PPCNT_RFC_2819_CNT = 0x2,
+ MLXSW_REG_PPCNT_EXT_CNT = 0x5,
+ MLXSW_REG_PPCNT_PRIO_CNT = 0x10,
+ MLXSW_REG_PPCNT_TC_CNT = 0x11,
+ MLXSW_REG_PPCNT_TC_CONG_TC = 0x13,
+};
+
+/* reg_ppcnt_grp
+ * Performance counter group.
+ * Group 63 indicates all groups. Only valid on Set() operation with
+ * clr bit set.
+ * 0x0: IEEE 802.3 Counters
+ * 0x1: RFC 2863 Counters
+ * 0x2: RFC 2819 Counters
+ * 0x3: RFC 3635 Counters
+ * 0x5: Ethernet Extended Counters
+ * 0x8: Link Level Retransmission Counters
+ * 0x10: Per Priority Counters
+ * 0x11: Per Traffic Class Counters
+ * 0x12: Physical Layer Counters
+ * 0x13: Per Traffic Class Congestion Counters
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ppcnt, grp, 0x00, 0, 6);
+
+/* reg_ppcnt_clr
+ * Clear counters. Setting the clr bit will reset the counter value
+ * for all counters in the counter group. This bit can be set
+ * for both Set() and Get() operation.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, ppcnt, clr, 0x04, 31, 1);
+
+/* reg_ppcnt_prio_tc
+ * Priority for counter set that support per priority, valid values: 0-7.
+ * Traffic class for counter set that support per traffic class,
+ * valid values: 0- cap_max_tclass-1 .
+ * For HCA: cap_max_tclass is always 8.
+ * Otherwise must be 0.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ppcnt, prio_tc, 0x04, 0, 5);
+
+/* Ethernet IEEE 802.3 Counter Group */
+
+/* reg_ppcnt_a_frames_transmitted_ok
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_frames_transmitted_ok,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64);
+
+/* reg_ppcnt_a_frames_received_ok
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_frames_received_ok,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64);
+
+/* reg_ppcnt_a_frame_check_sequence_errors
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_frame_check_sequence_errors,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x10, 0, 64);
+
+/* reg_ppcnt_a_alignment_errors
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_alignment_errors,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x18, 0, 64);
+
+/* reg_ppcnt_a_octets_transmitted_ok
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_octets_transmitted_ok,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x20, 0, 64);
+
+/* reg_ppcnt_a_octets_received_ok
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_octets_received_ok,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x28, 0, 64);
+
+/* reg_ppcnt_a_multicast_frames_xmitted_ok
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_multicast_frames_xmitted_ok,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x30, 0, 64);
+
+/* reg_ppcnt_a_broadcast_frames_xmitted_ok
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_broadcast_frames_xmitted_ok,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x38, 0, 64);
+
+/* reg_ppcnt_a_multicast_frames_received_ok
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_multicast_frames_received_ok,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x40, 0, 64);
+
+/* reg_ppcnt_a_broadcast_frames_received_ok
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_broadcast_frames_received_ok,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x48, 0, 64);
+
+/* reg_ppcnt_a_in_range_length_errors
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_in_range_length_errors,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x50, 0, 64);
+
+/* reg_ppcnt_a_out_of_range_length_field
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_out_of_range_length_field,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x58, 0, 64);
+
+/* reg_ppcnt_a_frame_too_long_errors
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_frame_too_long_errors,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64);
+
+/* reg_ppcnt_a_symbol_error_during_carrier
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_symbol_error_during_carrier,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x68, 0, 64);
+
+/* reg_ppcnt_a_mac_control_frames_transmitted
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_mac_control_frames_transmitted,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64);
+
+/* reg_ppcnt_a_mac_control_frames_received
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_mac_control_frames_received,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x78, 0, 64);
+
+/* reg_ppcnt_a_unsupported_opcodes_received
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_unsupported_opcodes_received,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x80, 0, 64);
+
+/* reg_ppcnt_a_pause_mac_ctrl_frames_received
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_received,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x88, 0, 64);
+
+/* reg_ppcnt_a_pause_mac_ctrl_frames_transmitted
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_transmitted,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x90, 0, 64);
+
+/* Ethernet RFC 2819 Counter Group */
+
+/* reg_ppcnt_ether_stats_pkts64octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts64octets,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x58, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts65to127octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts65to127octets,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts128to255octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts128to255octets,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x68, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts256to511octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts256to511octets,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts512to1023octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts512to1023octets,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x78, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts1024to1518octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts1024to1518octets,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x80, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts1519to2047octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts1519to2047octets,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x88, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts2048to4095octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts2048to4095octets,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x90, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts4096to8191octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts4096to8191octets,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x98, 0, 64);
+
+/* reg_ppcnt_ether_stats_pkts8192to10239octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts8192to10239octets,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0xA0, 0, 64);
+
+/* Ethernet Extended Counter Group Counters */
+
+/* reg_ppcnt_ecn_marked
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ecn_marked,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64);
+
+/* Ethernet Per Priority Group Counters */
+
+/* reg_ppcnt_rx_octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, rx_octets,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64);
+
+/* reg_ppcnt_rx_frames
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, rx_frames,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x20, 0, 64);
+
+/* reg_ppcnt_tx_octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, tx_octets,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x28, 0, 64);
+
+/* reg_ppcnt_tx_frames
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, tx_frames,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x48, 0, 64);
+
+/* reg_ppcnt_rx_pause
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, rx_pause,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x50, 0, 64);
+
+/* reg_ppcnt_rx_pause_duration
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, rx_pause_duration,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x58, 0, 64);
+
+/* reg_ppcnt_tx_pause
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, tx_pause,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64);
+
+/* reg_ppcnt_tx_pause_duration
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, tx_pause_duration,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x68, 0, 64);
+
+/* reg_ppcnt_rx_pause_transition
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, tx_pause_transition,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64);
+
+/* Ethernet Per Traffic Group Counters */
+
+/* reg_ppcnt_tc_transmit_queue
+ * Contains the transmit queue depth in cells of traffic class
+ * selected by prio_tc and the port selected by local_port.
+ * The field cannot be cleared.
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, tc_transmit_queue,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64);
+
+/* reg_ppcnt_tc_no_buffer_discard_uc
+ * The number of unicast packets dropped due to lack of shared
+ * buffer resources.
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, tc_no_buffer_discard_uc,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64);
+
+/* Ethernet Per Traffic Class Congestion Group Counters */
+
+/* reg_ppcnt_wred_discard
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, wred_discard,
+ MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64);
+
+static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port,
+ enum mlxsw_reg_ppcnt_grp grp,
+ u8 prio_tc)
+{
+ MLXSW_REG_ZERO(ppcnt, payload);
+ mlxsw_reg_ppcnt_swid_set(payload, 0);
+ mlxsw_reg_ppcnt_local_port_set(payload, local_port);
+ mlxsw_reg_ppcnt_pnat_set(payload, 0);
+ mlxsw_reg_ppcnt_grp_set(payload, grp);
+ mlxsw_reg_ppcnt_clr_set(payload, 0);
+ mlxsw_reg_ppcnt_prio_tc_set(payload, prio_tc);
+}
+
+/* PLIB - Port Local to InfiniBand Port
+ * ------------------------------------
+ * The PLIB register performs mapping from Local Port into InfiniBand Port.
+ */
+#define MLXSW_REG_PLIB_ID 0x500A
+#define MLXSW_REG_PLIB_LEN 0x10
+
+MLXSW_REG_DEFINE(plib, MLXSW_REG_PLIB_ID, MLXSW_REG_PLIB_LEN);
+
+/* reg_plib_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, plib, local_port, 0x00, 16, 8);
+
+/* reg_plib_ib_port
+ * InfiniBand port remapping for local_port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, plib, ib_port, 0x00, 0, 8);
+
+/* PPTB - Port Prio To Buffer Register
+ * -----------------------------------
+ * Configures the switch priority to buffer table.
+ */
+#define MLXSW_REG_PPTB_ID 0x500B
+#define MLXSW_REG_PPTB_LEN 0x10
+
+MLXSW_REG_DEFINE(pptb, MLXSW_REG_PPTB_ID, MLXSW_REG_PPTB_LEN);
+
+enum {
+ MLXSW_REG_PPTB_MM_UM,
+ MLXSW_REG_PPTB_MM_UNICAST,
+ MLXSW_REG_PPTB_MM_MULTICAST,
+};
+
+/* reg_pptb_mm
+ * Mapping mode.
+ * 0 - Map both unicast and multicast packets to the same buffer.
+ * 1 - Map only unicast packets.
+ * 2 - Map only multicast packets.
+ * Access: Index
+ *
+ * Note: SwitchX-2 only supports the first option.
+ */
+MLXSW_ITEM32(reg, pptb, mm, 0x00, 28, 2);
+
+/* reg_pptb_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pptb, local_port, 0x00, 16, 8);
+
+/* reg_pptb_um
+ * Enables the update of the untagged_buf field.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pptb, um, 0x00, 8, 1);
+
+/* reg_pptb_pm
+ * Enables the update of the prio_to_buff field.
+ * Bit <i> is a flag for updating the mapping for switch priority <i>.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pptb, pm, 0x00, 0, 8);
+
+/* reg_pptb_prio_to_buff
+ * Mapping of switch priority <i> to one of the allocated receive port
+ * buffers.
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, pptb, prio_to_buff, 0x04, 0x04, 4);
+
+/* reg_pptb_pm_msb
+ * Enables the update of the prio_to_buff field.
+ * Bit <i> is a flag for updating the mapping for switch priority <i+8>.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pptb, pm_msb, 0x08, 24, 8);
+
+/* reg_pptb_untagged_buff
+ * Mapping of untagged frames to one of the allocated receive port buffers.
+ * Access: RW
+ *
+ * Note: In SwitchX-2 this field must be mapped to buffer 8. Reserved for
+ * Spectrum, as it maps untagged packets based on the default switch priority.
+ */
+MLXSW_ITEM32(reg, pptb, untagged_buff, 0x08, 0, 4);
+
+/* reg_pptb_prio_to_buff_msb
+ * Mapping of switch priority <i+8> to one of the allocated receive port
+ * buffers.
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, pptb, prio_to_buff_msb, 0x0C, 0x04, 4);
+
+#define MLXSW_REG_PPTB_ALL_PRIO 0xFF
+
+static inline void mlxsw_reg_pptb_pack(char *payload, u8 local_port)
+{
+ MLXSW_REG_ZERO(pptb, payload);
+ mlxsw_reg_pptb_mm_set(payload, MLXSW_REG_PPTB_MM_UM);
+ mlxsw_reg_pptb_local_port_set(payload, local_port);
+ mlxsw_reg_pptb_pm_set(payload, MLXSW_REG_PPTB_ALL_PRIO);
+ mlxsw_reg_pptb_pm_msb_set(payload, MLXSW_REG_PPTB_ALL_PRIO);
+}
+
+static inline void mlxsw_reg_pptb_prio_to_buff_pack(char *payload, u8 prio,
+ u8 buff)
+{
+ mlxsw_reg_pptb_prio_to_buff_set(payload, prio, buff);
+ mlxsw_reg_pptb_prio_to_buff_msb_set(payload, prio, buff);
+}
+
+/* PBMC - Port Buffer Management Control Register
+ * ----------------------------------------------
+ * The PBMC register configures and retrieves the port packet buffer
+ * allocation for different Prios, and the Pause threshold management.
+ */
+#define MLXSW_REG_PBMC_ID 0x500C
+#define MLXSW_REG_PBMC_LEN 0x6C
+
+MLXSW_REG_DEFINE(pbmc, MLXSW_REG_PBMC_ID, MLXSW_REG_PBMC_LEN);
+
+/* reg_pbmc_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pbmc, local_port, 0x00, 16, 8);
+
+/* reg_pbmc_xoff_timer_value
+ * When device generates a pause frame, it uses this value as the pause
+ * timer (time for the peer port to pause in quota-512 bit time).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pbmc, xoff_timer_value, 0x04, 16, 16);
+
+/* reg_pbmc_xoff_refresh
+ * The time before a new pause frame should be sent to refresh the pause RW
+ * state. Using the same units as xoff_timer_value above (in quota-512 bit
+ * time).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pbmc, xoff_refresh, 0x04, 0, 16);
+
+#define MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX 11
+
+/* reg_pbmc_buf_lossy
+ * The field indicates if the buffer is lossy.
+ * 0 - Lossless
+ * 1 - Lossy
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, pbmc, buf_lossy, 0x0C, 25, 1, 0x08, 0x00, false);
+
+/* reg_pbmc_buf_epsb
+ * Eligible for Port Shared buffer.
+ * If epsb is set, packets assigned to buffer are allowed to insert the port
+ * shared buffer.
+ * When buf_lossy is MLXSW_REG_PBMC_LOSSY_LOSSY this field is reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, pbmc, buf_epsb, 0x0C, 24, 1, 0x08, 0x00, false);
+
+/* reg_pbmc_buf_size
+ * The part of the packet buffer array is allocated for the specific buffer.
+ * Units are represented in cells.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, pbmc, buf_size, 0x0C, 0, 16, 0x08, 0x00, false);
+
+/* reg_pbmc_buf_xoff_threshold
+ * Once the amount of data in the buffer goes above this value, device
+ * starts sending PFC frames for all priorities associated with the
+ * buffer. Units are represented in cells. Reserved in case of lossy
+ * buffer.
+ * Access: RW
+ *
+ * Note: In Spectrum, reserved for buffer[9].
+ */
+MLXSW_ITEM32_INDEXED(reg, pbmc, buf_xoff_threshold, 0x0C, 16, 16,
+ 0x08, 0x04, false);
+
+/* reg_pbmc_buf_xon_threshold
+ * When the amount of data in the buffer goes below this value, device
+ * stops sending PFC frames for the priorities associated with the
+ * buffer. Units are represented in cells. Reserved in case of lossy
+ * buffer.
+ * Access: RW
+ *
+ * Note: In Spectrum, reserved for buffer[9].
+ */
+MLXSW_ITEM32_INDEXED(reg, pbmc, buf_xon_threshold, 0x0C, 0, 16,
+ 0x08, 0x04, false);
+
+static inline void mlxsw_reg_pbmc_pack(char *payload, u8 local_port,
+ u16 xoff_timer_value, u16 xoff_refresh)
+{
+ MLXSW_REG_ZERO(pbmc, payload);
+ mlxsw_reg_pbmc_local_port_set(payload, local_port);
+ mlxsw_reg_pbmc_xoff_timer_value_set(payload, xoff_timer_value);
+ mlxsw_reg_pbmc_xoff_refresh_set(payload, xoff_refresh);
+}
+
+static inline void mlxsw_reg_pbmc_lossy_buffer_pack(char *payload,
+ int buf_index,
+ u16 size)
+{
+ mlxsw_reg_pbmc_buf_lossy_set(payload, buf_index, 1);
+ mlxsw_reg_pbmc_buf_epsb_set(payload, buf_index, 0);
+ mlxsw_reg_pbmc_buf_size_set(payload, buf_index, size);
+}
+
+static inline void mlxsw_reg_pbmc_lossless_buffer_pack(char *payload,
+ int buf_index, u16 size,
+ u16 threshold)
+{
+ mlxsw_reg_pbmc_buf_lossy_set(payload, buf_index, 0);
+ mlxsw_reg_pbmc_buf_epsb_set(payload, buf_index, 0);
+ mlxsw_reg_pbmc_buf_size_set(payload, buf_index, size);
+ mlxsw_reg_pbmc_buf_xoff_threshold_set(payload, buf_index, threshold);
+ mlxsw_reg_pbmc_buf_xon_threshold_set(payload, buf_index, threshold);
+}
+
+/* PSPA - Port Switch Partition Allocation
+ * ---------------------------------------
+ * Controls the association of a port with a switch partition and enables
+ * configuring ports as stacking ports.
+ */
+#define MLXSW_REG_PSPA_ID 0x500D
+#define MLXSW_REG_PSPA_LEN 0x8
+
+MLXSW_REG_DEFINE(pspa, MLXSW_REG_PSPA_ID, MLXSW_REG_PSPA_LEN);
+
+/* reg_pspa_swid
+ * Switch partition ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pspa, swid, 0x00, 24, 8);
+
+/* reg_pspa_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pspa, local_port, 0x00, 16, 8);
+
+/* reg_pspa_sub_port
+ * Virtual port within the local port. Set to 0 when virtual ports are
+ * disabled on the local port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pspa, sub_port, 0x00, 8, 8);
+
+static inline void mlxsw_reg_pspa_pack(char *payload, u8 swid, u8 local_port)
+{
+ MLXSW_REG_ZERO(pspa, payload);
+ mlxsw_reg_pspa_swid_set(payload, swid);
+ mlxsw_reg_pspa_local_port_set(payload, local_port);
+ mlxsw_reg_pspa_sub_port_set(payload, 0);
+}
+
+/* HTGT - Host Trap Group Table
+ * ----------------------------
+ * Configures the properties for forwarding to CPU.
+ */
+#define MLXSW_REG_HTGT_ID 0x7002
+#define MLXSW_REG_HTGT_LEN 0x20
+
+MLXSW_REG_DEFINE(htgt, MLXSW_REG_HTGT_ID, MLXSW_REG_HTGT_LEN);
+
+/* reg_htgt_swid
+ * Switch partition ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, htgt, swid, 0x00, 24, 8);
+
+#define MLXSW_REG_HTGT_PATH_TYPE_LOCAL 0x0 /* For locally attached CPU */
+
+/* reg_htgt_type
+ * CPU path type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, htgt, type, 0x00, 8, 4);
+
+enum mlxsw_reg_htgt_trap_group {
+ MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
+ MLXSW_REG_HTGT_TRAP_GROUP_SX2_RX,
+ MLXSW_REG_HTGT_TRAP_GROUP_SX2_CTRL,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_STP,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND,
+};
+
+/* reg_htgt_trap_group
+ * Trap group number. User defined number specifying which trap groups
+ * should be forwarded to the CPU. The mapping between trap IDs and trap
+ * groups is configured using HPKT register.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, htgt, trap_group, 0x00, 0, 8);
+
+enum {
+ MLXSW_REG_HTGT_POLICER_DISABLE,
+ MLXSW_REG_HTGT_POLICER_ENABLE,
+};
+
+/* reg_htgt_pide
+ * Enable policer ID specified using 'pid' field.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, htgt, pide, 0x04, 15, 1);
+
+#define MLXSW_REG_HTGT_INVALID_POLICER 0xff
+
+/* reg_htgt_pid
+ * Policer ID for the trap group.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, htgt, pid, 0x04, 0, 8);
+
+#define MLXSW_REG_HTGT_TRAP_TO_CPU 0x0
+
+/* reg_htgt_mirror_action
+ * Mirror action to use.
+ * 0 - Trap to CPU.
+ * 1 - Trap to CPU and mirror to a mirroring agent.
+ * 2 - Mirror to a mirroring agent and do not trap to CPU.
+ * Access: RW
+ *
+ * Note: Mirroring to a mirroring agent is only supported in Spectrum.
+ */
+MLXSW_ITEM32(reg, htgt, mirror_action, 0x08, 8, 2);
+
+/* reg_htgt_mirroring_agent
+ * Mirroring agent.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, htgt, mirroring_agent, 0x08, 0, 3);
+
+#define MLXSW_REG_HTGT_DEFAULT_PRIORITY 0
+
+/* reg_htgt_priority
+ * Trap group priority.
+ * In case a packet matches multiple classification rules, the packet will
+ * only be trapped once, based on the trap ID associated with the group (via
+ * register HPKT) with the highest priority.
+ * Supported values are 0-7, with 7 represnting the highest priority.
+ * Access: RW
+ *
+ * Note: In SwitchX-2 this field is ignored and the priority value is replaced
+ * by the 'trap_group' field.
+ */
+MLXSW_ITEM32(reg, htgt, priority, 0x0C, 0, 4);
+
+#define MLXSW_REG_HTGT_DEFAULT_TC 7
+
+/* reg_htgt_local_path_cpu_tclass
+ * CPU ingress traffic class for the trap group.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, htgt, local_path_cpu_tclass, 0x10, 16, 6);
+
+enum mlxsw_reg_htgt_local_path_rdq {
+ MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_CTRL = 0x13,
+ MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_RX = 0x14,
+ MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_EMAD = 0x15,
+ MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SIB_EMAD = 0x15,
+};
+/* reg_htgt_local_path_rdq
+ * Receive descriptor queue (RDQ) to use for the trap group.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, htgt, local_path_rdq, 0x10, 0, 6);
+
+static inline void mlxsw_reg_htgt_pack(char *payload, u8 group, u8 policer_id,
+ u8 priority, u8 tc)
+{
+ MLXSW_REG_ZERO(htgt, payload);
+
+ if (policer_id == MLXSW_REG_HTGT_INVALID_POLICER) {
+ mlxsw_reg_htgt_pide_set(payload,
+ MLXSW_REG_HTGT_POLICER_DISABLE);
+ } else {
+ mlxsw_reg_htgt_pide_set(payload,
+ MLXSW_REG_HTGT_POLICER_ENABLE);
+ mlxsw_reg_htgt_pid_set(payload, policer_id);
+ }
+
+ mlxsw_reg_htgt_type_set(payload, MLXSW_REG_HTGT_PATH_TYPE_LOCAL);
+ mlxsw_reg_htgt_trap_group_set(payload, group);
+ mlxsw_reg_htgt_mirror_action_set(payload, MLXSW_REG_HTGT_TRAP_TO_CPU);
+ mlxsw_reg_htgt_mirroring_agent_set(payload, 0);
+ mlxsw_reg_htgt_priority_set(payload, priority);
+ mlxsw_reg_htgt_local_path_cpu_tclass_set(payload, tc);
+ mlxsw_reg_htgt_local_path_rdq_set(payload, group);
+}
+
+/* HPKT - Host Packet Trap
+ * -----------------------
+ * Configures trap IDs inside trap groups.
+ */
+#define MLXSW_REG_HPKT_ID 0x7003
+#define MLXSW_REG_HPKT_LEN 0x10
+
+MLXSW_REG_DEFINE(hpkt, MLXSW_REG_HPKT_ID, MLXSW_REG_HPKT_LEN);
+
+enum {
+ MLXSW_REG_HPKT_ACK_NOT_REQUIRED,
+ MLXSW_REG_HPKT_ACK_REQUIRED,
+};
+
+/* reg_hpkt_ack
+ * Require acknowledgements from the host for events.
+ * If set, then the device will wait for the event it sent to be acknowledged
+ * by the host. This option is only relevant for event trap IDs.
+ * Access: RW
+ *
+ * Note: Currently not supported by firmware.
+ */
+MLXSW_ITEM32(reg, hpkt, ack, 0x00, 24, 1);
+
+enum mlxsw_reg_hpkt_action {
+ MLXSW_REG_HPKT_ACTION_FORWARD,
+ MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU,
+ MLXSW_REG_HPKT_ACTION_MIRROR_TO_CPU,
+ MLXSW_REG_HPKT_ACTION_DISCARD,
+ MLXSW_REG_HPKT_ACTION_SOFT_DISCARD,
+ MLXSW_REG_HPKT_ACTION_TRAP_AND_SOFT_DISCARD,
+};
+
+/* reg_hpkt_action
+ * Action to perform on packet when trapped.
+ * 0 - No action. Forward to CPU based on switching rules.
+ * 1 - Trap to CPU (CPU receives sole copy).
+ * 2 - Mirror to CPU (CPU receives a replica of the packet).
+ * 3 - Discard.
+ * 4 - Soft discard (allow other traps to act on the packet).
+ * 5 - Trap and soft discard (allow other traps to overwrite this trap).
+ * Access: RW
+ *
+ * Note: Must be set to 0 (forward) for event trap IDs, as they are already
+ * addressed to the CPU.
+ */
+MLXSW_ITEM32(reg, hpkt, action, 0x00, 20, 3);
+
+/* reg_hpkt_trap_group
+ * Trap group to associate the trap with.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, hpkt, trap_group, 0x00, 12, 6);
+
+/* reg_hpkt_trap_id
+ * Trap ID.
+ * Access: Index
+ *
+ * Note: A trap ID can only be associated with a single trap group. The device
+ * will associate the trap ID with the last trap group configured.
+ */
+MLXSW_ITEM32(reg, hpkt, trap_id, 0x00, 0, 9);
+
+enum {
+ MLXSW_REG_HPKT_CTRL_PACKET_DEFAULT,
+ MLXSW_REG_HPKT_CTRL_PACKET_NO_BUFFER,
+ MLXSW_REG_HPKT_CTRL_PACKET_USE_BUFFER,
+};
+
+/* reg_hpkt_ctrl
+ * Configure dedicated buffer resources for control packets.
+ * Ignored by SwitchX-2.
+ * 0 - Keep factory defaults.
+ * 1 - Do not use control buffer for this trap ID.
+ * 2 - Use control buffer for this trap ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, hpkt, ctrl, 0x04, 16, 2);
+
+static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id,
+ enum mlxsw_reg_htgt_trap_group trap_group,
+ bool is_ctrl)
+{
+ MLXSW_REG_ZERO(hpkt, payload);
+ mlxsw_reg_hpkt_ack_set(payload, MLXSW_REG_HPKT_ACK_NOT_REQUIRED);
+ mlxsw_reg_hpkt_action_set(payload, action);
+ mlxsw_reg_hpkt_trap_group_set(payload, trap_group);
+ mlxsw_reg_hpkt_trap_id_set(payload, trap_id);
+ mlxsw_reg_hpkt_ctrl_set(payload, is_ctrl ?
+ MLXSW_REG_HPKT_CTRL_PACKET_USE_BUFFER :
+ MLXSW_REG_HPKT_CTRL_PACKET_NO_BUFFER);
+}
+
+/* RGCR - Router General Configuration Register
+ * --------------------------------------------
+ * The register is used for setting up the router configuration.
+ */
+#define MLXSW_REG_RGCR_ID 0x8001
+#define MLXSW_REG_RGCR_LEN 0x28
+
+MLXSW_REG_DEFINE(rgcr, MLXSW_REG_RGCR_ID, MLXSW_REG_RGCR_LEN);
+
+/* reg_rgcr_ipv4_en
+ * IPv4 router enable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rgcr, ipv4_en, 0x00, 31, 1);
+
+/* reg_rgcr_ipv6_en
+ * IPv6 router enable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rgcr, ipv6_en, 0x00, 30, 1);
+
+/* reg_rgcr_max_router_interfaces
+ * Defines the maximum number of active router interfaces for all virtual
+ * routers.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rgcr, max_router_interfaces, 0x10, 0, 16);
+
+/* reg_rgcr_usp
+ * Update switch priority and packet color.
+ * 0 - Preserve the value of Switch Priority and packet color.
+ * 1 - Recalculate the value of Switch Priority and packet color.
+ * Access: RW
+ *
+ * Note: Not supported by SwitchX and SwitchX-2.
+ */
+MLXSW_ITEM32(reg, rgcr, usp, 0x18, 20, 1);
+
+/* reg_rgcr_pcp_rw
+ * Indicates how to handle the pcp_rewrite_en value:
+ * 0 - Preserve the value of pcp_rewrite_en.
+ * 2 - Disable PCP rewrite.
+ * 3 - Enable PCP rewrite.
+ * Access: RW
+ *
+ * Note: Not supported by SwitchX and SwitchX-2.
+ */
+MLXSW_ITEM32(reg, rgcr, pcp_rw, 0x18, 16, 2);
+
+/* reg_rgcr_activity_dis
+ * Activity disable:
+ * 0 - Activity will be set when an entry is hit (default).
+ * 1 - Activity will not be set when an entry is hit.
+ *
+ * Bit 0 - Disable activity bit in Router Algorithmic LPM Unicast Entry
+ * (RALUE).
+ * Bit 1 - Disable activity bit in Router Algorithmic LPM Unicast Host
+ * Entry (RAUHT).
+ * Bits 2:7 are reserved.
+ * Access: RW
+ *
+ * Note: Not supported by SwitchX, SwitchX-2 and Switch-IB.
+ */
+MLXSW_ITEM32(reg, rgcr, activity_dis, 0x20, 0, 8);
+
+static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en,
+ bool ipv6_en)
+{
+ MLXSW_REG_ZERO(rgcr, payload);
+ mlxsw_reg_rgcr_ipv4_en_set(payload, ipv4_en);
+ mlxsw_reg_rgcr_ipv6_en_set(payload, ipv6_en);
+}
+
+/* RITR - Router Interface Table Register
+ * --------------------------------------
+ * The register is used to configure the router interface table.
+ */
+#define MLXSW_REG_RITR_ID 0x8002
+#define MLXSW_REG_RITR_LEN 0x40
+
+MLXSW_REG_DEFINE(ritr, MLXSW_REG_RITR_ID, MLXSW_REG_RITR_LEN);
+
+/* reg_ritr_enable
+ * Enables routing on the router interface.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, enable, 0x00, 31, 1);
+
+/* reg_ritr_ipv4
+ * IPv4 routing enable. Enables routing of IPv4 traffic on the router
+ * interface.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1);
+
+/* reg_ritr_ipv6
+ * IPv6 routing enable. Enables routing of IPv6 traffic on the router
+ * interface.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1);
+
+/* reg_ritr_ipv4_mc
+ * IPv4 multicast routing enable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv4_mc, 0x00, 27, 1);
+
+/* reg_ritr_ipv6_mc
+ * IPv6 multicast routing enable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv6_mc, 0x00, 26, 1);
+
+enum mlxsw_reg_ritr_if_type {
+ /* VLAN interface. */
+ MLXSW_REG_RITR_VLAN_IF,
+ /* FID interface. */
+ MLXSW_REG_RITR_FID_IF,
+ /* Sub-port interface. */
+ MLXSW_REG_RITR_SP_IF,
+ /* Loopback Interface. */
+ MLXSW_REG_RITR_LOOPBACK_IF,
+};
+
+/* reg_ritr_type
+ * Router interface type as per enum mlxsw_reg_ritr_if_type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, type, 0x00, 23, 3);
+
+enum {
+ MLXSW_REG_RITR_RIF_CREATE,
+ MLXSW_REG_RITR_RIF_DEL,
+};
+
+/* reg_ritr_op
+ * Opcode:
+ * 0 - Create or edit RIF.
+ * 1 - Delete RIF.
+ * Reserved for SwitchX-2. For Spectrum, editing of interface properties
+ * is not supported. An interface must be deleted and re-created in order
+ * to update properties.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, ritr, op, 0x00, 20, 2);
+
+/* reg_ritr_rif
+ * Router interface index. A pointer to the Router Interface Table.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ritr, rif, 0x00, 0, 16);
+
+/* reg_ritr_ipv4_fe
+ * IPv4 Forwarding Enable.
+ * Enables routing of IPv4 traffic on the router interface. When disabled,
+ * forwarding is blocked but local traffic (traps and IP2ME) will be enabled.
+ * Not supported in SwitchX-2.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1);
+
+/* reg_ritr_ipv6_fe
+ * IPv6 Forwarding Enable.
+ * Enables routing of IPv6 traffic on the router interface. When disabled,
+ * forwarding is blocked but local traffic (traps and IP2ME) will be enabled.
+ * Not supported in SwitchX-2.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1);
+
+/* reg_ritr_ipv4_mc_fe
+ * IPv4 Multicast Forwarding Enable.
+ * When disabled, forwarding is blocked but local traffic (traps and IP to me)
+ * will be enabled.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv4_mc_fe, 0x04, 27, 1);
+
+/* reg_ritr_ipv6_mc_fe
+ * IPv6 Multicast Forwarding Enable.
+ * When disabled, forwarding is blocked but local traffic (traps and IP to me)
+ * will be enabled.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv6_mc_fe, 0x04, 26, 1);
+
+/* reg_ritr_lb_en
+ * Loop-back filter enable for unicast packets.
+ * If the flag is set then loop-back filter for unicast packets is
+ * implemented on the RIF. Multicast packets are always subject to
+ * loop-back filtering.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, lb_en, 0x04, 24, 1);
+
+/* reg_ritr_virtual_router
+ * Virtual router ID associated with the router interface.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, virtual_router, 0x04, 0, 16);
+
+/* reg_ritr_mtu
+ * Router interface MTU.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, mtu, 0x34, 0, 16);
+
+/* reg_ritr_if_swid
+ * Switch partition ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, if_swid, 0x08, 24, 8);
+
+/* reg_ritr_if_mac
+ * Router interface MAC address.
+ * In Spectrum, all MAC addresses must have the same 38 MSBits.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, ritr, if_mac, 0x12, 6);
+
+/* reg_ritr_if_vrrp_id_ipv6
+ * VRRP ID for IPv6
+ * Note: Reserved for RIF types other than VLAN, FID and Sub-port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, if_vrrp_id_ipv6, 0x1C, 8, 8);
+
+/* reg_ritr_if_vrrp_id_ipv4
+ * VRRP ID for IPv4
+ * Note: Reserved for RIF types other than VLAN, FID and Sub-port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, if_vrrp_id_ipv4, 0x1C, 0, 8);
+
+/* VLAN Interface */
+
+/* reg_ritr_vlan_if_vid
+ * VLAN ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, vlan_if_vid, 0x08, 0, 12);
+
+/* FID Interface */
+
+/* reg_ritr_fid_if_fid
+ * Filtering ID. Used to connect a bridge to the router. Only FIDs from
+ * the vFID range are supported.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, fid_if_fid, 0x08, 0, 16);
+
+static inline void mlxsw_reg_ritr_fid_set(char *payload,
+ enum mlxsw_reg_ritr_if_type rif_type,
+ u16 fid)
+{
+ if (rif_type == MLXSW_REG_RITR_FID_IF)
+ mlxsw_reg_ritr_fid_if_fid_set(payload, fid);
+ else
+ mlxsw_reg_ritr_vlan_if_vid_set(payload, fid);
+}
+
+/* Sub-port Interface */
+
+/* reg_ritr_sp_if_lag
+ * LAG indication. When this bit is set the system_port field holds the
+ * LAG identifier.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, sp_if_lag, 0x08, 24, 1);
+
+/* reg_ritr_sp_system_port
+ * Port unique indentifier. When lag bit is set, this field holds the
+ * lag_id in bits 0:9.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16);
+
+/* reg_ritr_sp_if_vid
+ * VLAN ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, sp_if_vid, 0x18, 0, 12);
+
+/* Loopback Interface */
+
+enum mlxsw_reg_ritr_loopback_protocol {
+ /* IPinIP IPv4 underlay Unicast */
+ MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4,
+ /* IPinIP IPv6 underlay Unicast */
+ MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV6,
+};
+
+/* reg_ritr_loopback_protocol
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, loopback_protocol, 0x08, 28, 4);
+
+enum mlxsw_reg_ritr_loopback_ipip_type {
+ /* Tunnel is IPinIP. */
+ MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_IP,
+ /* Tunnel is GRE, no key. */
+ MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP,
+ /* Tunnel is GRE, with a key. */
+ MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP,
+};
+
+/* reg_ritr_loopback_ipip_type
+ * Encapsulation type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, loopback_ipip_type, 0x10, 24, 4);
+
+enum mlxsw_reg_ritr_loopback_ipip_options {
+ /* The key is defined by gre_key. */
+ MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
+};
+
+/* reg_ritr_loopback_ipip_options
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, loopback_ipip_options, 0x10, 20, 4);
+
+/* reg_ritr_loopback_ipip_uvr
+ * Underlay Virtual Router ID.
+ * Range is 0..cap_max_virtual_routers-1.
+ * Reserved for Spectrum-2.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, loopback_ipip_uvr, 0x10, 0, 16);
+
+/* reg_ritr_loopback_ipip_usip*
+ * Encapsulation Underlay source IP.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, ritr, loopback_ipip_usip6, 0x18, 16);
+MLXSW_ITEM32(reg, ritr, loopback_ipip_usip4, 0x24, 0, 32);
+
+/* reg_ritr_loopback_ipip_gre_key
+ * GRE Key.
+ * Reserved when ipip_type is not IP_IN_GRE_KEY_IN_IP.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, loopback_ipip_gre_key, 0x28, 0, 32);
+
+/* Shared between ingress/egress */
+enum mlxsw_reg_ritr_counter_set_type {
+ /* No Count. */
+ MLXSW_REG_RITR_COUNTER_SET_TYPE_NO_COUNT = 0x0,
+ /* Basic. Used for router interfaces, counting the following:
+ * - Error and Discard counters.
+ * - Unicast, Multicast and Broadcast counters. Sharing the
+ * same set of counters for the different type of traffic
+ * (IPv4, IPv6 and mpls).
+ */
+ MLXSW_REG_RITR_COUNTER_SET_TYPE_BASIC = 0x9,
+};
+
+/* reg_ritr_ingress_counter_index
+ * Counter Index for flow counter.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ingress_counter_index, 0x38, 0, 24);
+
+/* reg_ritr_ingress_counter_set_type
+ * Igress Counter Set Type for router interface counter.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ingress_counter_set_type, 0x38, 24, 8);
+
+/* reg_ritr_egress_counter_index
+ * Counter Index for flow counter.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, egress_counter_index, 0x3C, 0, 24);
+
+/* reg_ritr_egress_counter_set_type
+ * Egress Counter Set Type for router interface counter.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, egress_counter_set_type, 0x3C, 24, 8);
+
+static inline void mlxsw_reg_ritr_counter_pack(char *payload, u32 index,
+ bool enable, bool egress)
+{
+ enum mlxsw_reg_ritr_counter_set_type set_type;
+
+ if (enable)
+ set_type = MLXSW_REG_RITR_COUNTER_SET_TYPE_BASIC;
+ else
+ set_type = MLXSW_REG_RITR_COUNTER_SET_TYPE_NO_COUNT;
+ mlxsw_reg_ritr_egress_counter_set_type_set(payload, set_type);
+
+ if (egress)
+ mlxsw_reg_ritr_egress_counter_index_set(payload, index);
+ else
+ mlxsw_reg_ritr_ingress_counter_index_set(payload, index);
+}
+
+static inline void mlxsw_reg_ritr_rif_pack(char *payload, u16 rif)
+{
+ MLXSW_REG_ZERO(ritr, payload);
+ mlxsw_reg_ritr_rif_set(payload, rif);
+}
+
+static inline void mlxsw_reg_ritr_sp_if_pack(char *payload, bool lag,
+ u16 system_port, u16 vid)
+{
+ mlxsw_reg_ritr_sp_if_lag_set(payload, lag);
+ mlxsw_reg_ritr_sp_if_system_port_set(payload, system_port);
+ mlxsw_reg_ritr_sp_if_vid_set(payload, vid);
+}
+
+static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
+ enum mlxsw_reg_ritr_if_type type,
+ u16 rif, u16 vr_id, u16 mtu)
+{
+ bool op = enable ? MLXSW_REG_RITR_RIF_CREATE : MLXSW_REG_RITR_RIF_DEL;
+
+ MLXSW_REG_ZERO(ritr, payload);
+ mlxsw_reg_ritr_enable_set(payload, enable);
+ mlxsw_reg_ritr_ipv4_set(payload, 1);
+ mlxsw_reg_ritr_ipv6_set(payload, 1);
+ mlxsw_reg_ritr_ipv4_mc_set(payload, 1);
+ mlxsw_reg_ritr_ipv6_mc_set(payload, 1);
+ mlxsw_reg_ritr_type_set(payload, type);
+ mlxsw_reg_ritr_op_set(payload, op);
+ mlxsw_reg_ritr_rif_set(payload, rif);
+ mlxsw_reg_ritr_ipv4_fe_set(payload, 1);
+ mlxsw_reg_ritr_ipv6_fe_set(payload, 1);
+ mlxsw_reg_ritr_ipv4_mc_fe_set(payload, 1);
+ mlxsw_reg_ritr_ipv6_mc_fe_set(payload, 1);
+ mlxsw_reg_ritr_lb_en_set(payload, 1);
+ mlxsw_reg_ritr_virtual_router_set(payload, vr_id);
+ mlxsw_reg_ritr_mtu_set(payload, mtu);
+}
+
+static inline void mlxsw_reg_ritr_mac_pack(char *payload, const char *mac)
+{
+ mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac);
+}
+
+static inline void
+mlxsw_reg_ritr_loopback_ipip_common_pack(char *payload,
+ enum mlxsw_reg_ritr_loopback_ipip_type ipip_type,
+ enum mlxsw_reg_ritr_loopback_ipip_options options,
+ u16 uvr_id, u32 gre_key)
+{
+ mlxsw_reg_ritr_loopback_ipip_type_set(payload, ipip_type);
+ mlxsw_reg_ritr_loopback_ipip_options_set(payload, options);
+ mlxsw_reg_ritr_loopback_ipip_uvr_set(payload, uvr_id);
+ mlxsw_reg_ritr_loopback_ipip_gre_key_set(payload, gre_key);
+}
+
+static inline void
+mlxsw_reg_ritr_loopback_ipip4_pack(char *payload,
+ enum mlxsw_reg_ritr_loopback_ipip_type ipip_type,
+ enum mlxsw_reg_ritr_loopback_ipip_options options,
+ u16 uvr_id, u32 usip, u32 gre_key)
+{
+ mlxsw_reg_ritr_loopback_protocol_set(payload,
+ MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4);
+ mlxsw_reg_ritr_loopback_ipip_common_pack(payload, ipip_type, options,
+ uvr_id, gre_key);
+ mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip);
+}
+
+/* RTAR - Router TCAM Allocation Register
+ * --------------------------------------
+ * This register is used for allocation of regions in the TCAM table.
+ */
+#define MLXSW_REG_RTAR_ID 0x8004
+#define MLXSW_REG_RTAR_LEN 0x20
+
+MLXSW_REG_DEFINE(rtar, MLXSW_REG_RTAR_ID, MLXSW_REG_RTAR_LEN);
+
+enum mlxsw_reg_rtar_op {
+ MLXSW_REG_RTAR_OP_ALLOCATE,
+ MLXSW_REG_RTAR_OP_RESIZE,
+ MLXSW_REG_RTAR_OP_DEALLOCATE,
+};
+
+/* reg_rtar_op
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, rtar, op, 0x00, 28, 4);
+
+enum mlxsw_reg_rtar_key_type {
+ MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST = 1,
+ MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST = 3
+};
+
+/* reg_rtar_key_type
+ * TCAM key type for the region.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, rtar, key_type, 0x00, 0, 8);
+
+/* reg_rtar_region_size
+ * TCAM region size. When allocating/resizing this is the requested
+ * size, the response is the actual size.
+ * Note: Actual size may be larger than requested.
+ * Reserved for op = Deallocate
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, rtar, region_size, 0x04, 0, 16);
+
+static inline void mlxsw_reg_rtar_pack(char *payload,
+ enum mlxsw_reg_rtar_op op,
+ enum mlxsw_reg_rtar_key_type key_type,
+ u16 region_size)
+{
+ MLXSW_REG_ZERO(rtar, payload);
+ mlxsw_reg_rtar_op_set(payload, op);
+ mlxsw_reg_rtar_key_type_set(payload, key_type);
+ mlxsw_reg_rtar_region_size_set(payload, region_size);
+}
+
+/* RATR - Router Adjacency Table Register
+ * --------------------------------------
+ * The RATR register is used to configure the Router Adjacency (next-hop)
+ * Table.
+ */
+#define MLXSW_REG_RATR_ID 0x8008
+#define MLXSW_REG_RATR_LEN 0x2C
+
+MLXSW_REG_DEFINE(ratr, MLXSW_REG_RATR_ID, MLXSW_REG_RATR_LEN);
+
+enum mlxsw_reg_ratr_op {
+ /* Read */
+ MLXSW_REG_RATR_OP_QUERY_READ = 0,
+ /* Read and clear activity */
+ MLXSW_REG_RATR_OP_QUERY_READ_CLEAR = 2,
+ /* Write Adjacency entry */
+ MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY = 1,
+ /* Write Adjacency entry only if the activity is cleared.
+ * The write may not succeed if the activity is set. There is not
+ * direct feedback if the write has succeeded or not, however
+ * the get will reveal the actual entry (SW can compare the get
+ * response to the set command).
+ */
+ MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY = 3,
+};
+
+/* reg_ratr_op
+ * Note that Write operation may also be used for updating
+ * counter_set_type and counter_index. In this case all other
+ * fields must not be updated.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, ratr, op, 0x00, 28, 4);
+
+/* reg_ratr_v
+ * Valid bit. Indicates if the adjacency entry is valid.
+ * Note: the device may need some time before reusing an invalidated
+ * entry. During this time the entry can not be reused. It is
+ * recommended to use another entry before reusing an invalidated
+ * entry (e.g. software can put it at the end of the list for
+ * reusing). Trying to access an invalidated entry not yet cleared
+ * by the device results with failure indicating "Try Again" status.
+ * When valid is '0' then egress_router_interface,trap_action,
+ * adjacency_parameters and counters are reserved
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, v, 0x00, 24, 1);
+
+/* reg_ratr_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on
+ * the specific entry. To clear the a bit, use "clear activity".
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ratr, a, 0x00, 16, 1);
+
+enum mlxsw_reg_ratr_type {
+ /* Ethernet */
+ MLXSW_REG_RATR_TYPE_ETHERNET,
+ /* IPoIB Unicast without GRH.
+ * Reserved for Spectrum.
+ */
+ MLXSW_REG_RATR_TYPE_IPOIB_UC,
+ /* IPoIB Unicast with GRH. Supported only in table 0 (Ethernet unicast
+ * adjacency).
+ * Reserved for Spectrum.
+ */
+ MLXSW_REG_RATR_TYPE_IPOIB_UC_W_GRH,
+ /* IPoIB Multicast.
+ * Reserved for Spectrum.
+ */
+ MLXSW_REG_RATR_TYPE_IPOIB_MC,
+ /* MPLS.
+ * Reserved for SwitchX/-2.
+ */
+ MLXSW_REG_RATR_TYPE_MPLS,
+ /* IPinIP Encap.
+ * Reserved for SwitchX/-2.
+ */
+ MLXSW_REG_RATR_TYPE_IPIP,
+};
+
+/* reg_ratr_type
+ * Adjacency entry type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, type, 0x04, 28, 4);
+
+/* reg_ratr_adjacency_index_low
+ * Bits 15:0 of index into the adjacency table.
+ * For SwitchX and SwitchX-2, the adjacency table is linear and
+ * used for adjacency entries only.
+ * For Spectrum, the index is to the KVD linear.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ratr, adjacency_index_low, 0x04, 0, 16);
+
+/* reg_ratr_egress_router_interface
+ * Range is 0 .. cap_max_router_interfaces - 1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, egress_router_interface, 0x08, 0, 16);
+
+enum mlxsw_reg_ratr_trap_action {
+ MLXSW_REG_RATR_TRAP_ACTION_NOP,
+ MLXSW_REG_RATR_TRAP_ACTION_TRAP,
+ MLXSW_REG_RATR_TRAP_ACTION_MIRROR_TO_CPU,
+ MLXSW_REG_RATR_TRAP_ACTION_MIRROR,
+ MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS,
+};
+
+/* reg_ratr_trap_action
+ * see mlxsw_reg_ratr_trap_action
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, trap_action, 0x0C, 28, 4);
+
+/* reg_ratr_adjacency_index_high
+ * Bits 23:16 of the adjacency_index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ratr, adjacency_index_high, 0x0C, 16, 8);
+
+enum mlxsw_reg_ratr_trap_id {
+ MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0,
+ MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1,
+};
+
+/* reg_ratr_trap_id
+ * Trap ID to be reported to CPU.
+ * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1.
+ * For trap_action of NOP, MIRROR and DISCARD_ERROR
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, trap_id, 0x0C, 0, 8);
+
+/* reg_ratr_eth_destination_mac
+ * MAC address of the destination next-hop.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, ratr, eth_destination_mac, 0x12, 6);
+
+enum mlxsw_reg_ratr_ipip_type {
+ /* IPv4, address set by mlxsw_reg_ratr_ipip_ipv4_udip. */
+ MLXSW_REG_RATR_IPIP_TYPE_IPV4,
+ /* IPv6, address set by mlxsw_reg_ratr_ipip_ipv6_ptr. */
+ MLXSW_REG_RATR_IPIP_TYPE_IPV6,
+};
+
+/* reg_ratr_ipip_type
+ * Underlay destination ip type.
+ * Note: the type field must match the protocol of the router interface.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, ipip_type, 0x10, 16, 4);
+
+/* reg_ratr_ipip_ipv4_udip
+ * Underlay ipv4 dip.
+ * Reserved when ipip_type is IPv6.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, ipip_ipv4_udip, 0x18, 0, 32);
+
+/* reg_ratr_ipip_ipv6_ptr
+ * Pointer to IPv6 underlay destination ip address.
+ * For Spectrum: Pointer to KVD linear space.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, ipip_ipv6_ptr, 0x1C, 0, 24);
+
+enum mlxsw_reg_flow_counter_set_type {
+ /* No count */
+ MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00,
+ /* Count packets and bytes */
+ MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03,
+ /* Count only packets */
+ MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05,
+};
+
+/* reg_ratr_counter_set_type
+ * Counter set type for flow counters
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, counter_set_type, 0x28, 24, 8);
+
+/* reg_ratr_counter_index
+ * Counter index for flow counters
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, counter_index, 0x28, 0, 24);
+
+static inline void
+mlxsw_reg_ratr_pack(char *payload,
+ enum mlxsw_reg_ratr_op op, bool valid,
+ enum mlxsw_reg_ratr_type type,
+ u32 adjacency_index, u16 egress_rif)
+{
+ MLXSW_REG_ZERO(ratr, payload);
+ mlxsw_reg_ratr_op_set(payload, op);
+ mlxsw_reg_ratr_v_set(payload, valid);
+ mlxsw_reg_ratr_type_set(payload, type);
+ mlxsw_reg_ratr_adjacency_index_low_set(payload, adjacency_index);
+ mlxsw_reg_ratr_adjacency_index_high_set(payload, adjacency_index >> 16);
+ mlxsw_reg_ratr_egress_router_interface_set(payload, egress_rif);
+}
+
+static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload,
+ const char *dest_mac)
+{
+ mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac);
+}
+
+static inline void mlxsw_reg_ratr_ipip4_entry_pack(char *payload, u32 ipv4_udip)
+{
+ mlxsw_reg_ratr_ipip_type_set(payload, MLXSW_REG_RATR_IPIP_TYPE_IPV4);
+ mlxsw_reg_ratr_ipip_ipv4_udip_set(payload, ipv4_udip);
+}
+
+static inline void mlxsw_reg_ratr_counter_pack(char *payload, u64 counter_index,
+ bool counter_enable)
+{
+ enum mlxsw_reg_flow_counter_set_type set_type;
+
+ if (counter_enable)
+ set_type = MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES;
+ else
+ set_type = MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT;
+
+ mlxsw_reg_ratr_counter_index_set(payload, counter_index);
+ mlxsw_reg_ratr_counter_set_type_set(payload, set_type);
+}
+
+/* RDPM - Router DSCP to Priority Mapping
+ * --------------------------------------
+ * Controls the mapping from DSCP field to switch priority on routed packets
+ */
+#define MLXSW_REG_RDPM_ID 0x8009
+#define MLXSW_REG_RDPM_BASE_LEN 0x00
+#define MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN 0x01
+#define MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT 64
+#define MLXSW_REG_RDPM_LEN 0x40
+#define MLXSW_REG_RDPM_LAST_ENTRY (MLXSW_REG_RDPM_BASE_LEN + \
+ MLXSW_REG_RDPM_LEN - \
+ MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN)
+
+MLXSW_REG_DEFINE(rdpm, MLXSW_REG_RDPM_ID, MLXSW_REG_RDPM_LEN);
+
+/* reg_dscp_entry_e
+ * Enable update of the specific entry
+ * Access: Index
+ */
+MLXSW_ITEM8_INDEXED(reg, rdpm, dscp_entry_e, MLXSW_REG_RDPM_LAST_ENTRY, 7, 1,
+ -MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN, 0x00, false);
+
+/* reg_dscp_entry_prio
+ * Switch Priority
+ * Access: RW
+ */
+MLXSW_ITEM8_INDEXED(reg, rdpm, dscp_entry_prio, MLXSW_REG_RDPM_LAST_ENTRY, 0, 4,
+ -MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN, 0x00, false);
+
+static inline void mlxsw_reg_rdpm_pack(char *payload, unsigned short index,
+ u8 prio)
+{
+ mlxsw_reg_rdpm_dscp_entry_e_set(payload, index, 1);
+ mlxsw_reg_rdpm_dscp_entry_prio_set(payload, index, prio);
+}
+
+/* RICNT - Router Interface Counter Register
+ * -----------------------------------------
+ * The RICNT register retrieves per port performance counters
+ */
+#define MLXSW_REG_RICNT_ID 0x800B
+#define MLXSW_REG_RICNT_LEN 0x100
+
+MLXSW_REG_DEFINE(ricnt, MLXSW_REG_RICNT_ID, MLXSW_REG_RICNT_LEN);
+
+/* reg_ricnt_counter_index
+ * Counter index
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ricnt, counter_index, 0x04, 0, 24);
+
+enum mlxsw_reg_ricnt_counter_set_type {
+ /* No Count. */
+ MLXSW_REG_RICNT_COUNTER_SET_TYPE_NO_COUNT = 0x00,
+ /* Basic. Used for router interfaces, counting the following:
+ * - Error and Discard counters.
+ * - Unicast, Multicast and Broadcast counters. Sharing the
+ * same set of counters for the different type of traffic
+ * (IPv4, IPv6 and mpls).
+ */
+ MLXSW_REG_RICNT_COUNTER_SET_TYPE_BASIC = 0x09,
+};
+
+/* reg_ricnt_counter_set_type
+ * Counter Set Type for router interface counter
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ricnt, counter_set_type, 0x04, 24, 8);
+
+enum mlxsw_reg_ricnt_opcode {
+ /* Nop. Supported only for read access*/
+ MLXSW_REG_RICNT_OPCODE_NOP = 0x00,
+ /* Clear. Setting the clr bit will reset the counter value for
+ * all counters of the specified Router Interface.
+ */
+ MLXSW_REG_RICNT_OPCODE_CLEAR = 0x08,
+};
+
+/* reg_ricnt_opcode
+ * Opcode
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ricnt, op, 0x00, 28, 4);
+
+/* reg_ricnt_good_unicast_packets
+ * good unicast packets.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, good_unicast_packets, 0x08, 0, 64);
+
+/* reg_ricnt_good_multicast_packets
+ * good multicast packets.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, good_multicast_packets, 0x10, 0, 64);
+
+/* reg_ricnt_good_broadcast_packets
+ * good broadcast packets
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, good_broadcast_packets, 0x18, 0, 64);
+
+/* reg_ricnt_good_unicast_bytes
+ * A count of L3 data and padding octets not including L2 headers
+ * for good unicast frames.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, good_unicast_bytes, 0x20, 0, 64);
+
+/* reg_ricnt_good_multicast_bytes
+ * A count of L3 data and padding octets not including L2 headers
+ * for good multicast frames.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, good_multicast_bytes, 0x28, 0, 64);
+
+/* reg_ritr_good_broadcast_bytes
+ * A count of L3 data and padding octets not including L2 headers
+ * for good broadcast frames.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, good_broadcast_bytes, 0x30, 0, 64);
+
+/* reg_ricnt_error_packets
+ * A count of errored frames that do not pass the router checks.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, error_packets, 0x38, 0, 64);
+
+/* reg_ricnt_discrad_packets
+ * A count of non-errored frames that do not pass the router checks.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, discard_packets, 0x40, 0, 64);
+
+/* reg_ricnt_error_bytes
+ * A count of L3 data and padding octets not including L2 headers
+ * for errored frames.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, error_bytes, 0x48, 0, 64);
+
+/* reg_ricnt_discard_bytes
+ * A count of L3 data and padding octets not including L2 headers
+ * for non-errored frames that do not pass the router checks.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, ricnt, discard_bytes, 0x50, 0, 64);
+
+static inline void mlxsw_reg_ricnt_pack(char *payload, u32 index,
+ enum mlxsw_reg_ricnt_opcode op)
+{
+ MLXSW_REG_ZERO(ricnt, payload);
+ mlxsw_reg_ricnt_op_set(payload, op);
+ mlxsw_reg_ricnt_counter_index_set(payload, index);
+ mlxsw_reg_ricnt_counter_set_type_set(payload,
+ MLXSW_REG_RICNT_COUNTER_SET_TYPE_BASIC);
+}
+
+/* RRCR - Router Rules Copy Register Layout
+ * ----------------------------------------
+ * This register is used for moving and copying route entry rules.
+ */
+#define MLXSW_REG_RRCR_ID 0x800F
+#define MLXSW_REG_RRCR_LEN 0x24
+
+MLXSW_REG_DEFINE(rrcr, MLXSW_REG_RRCR_ID, MLXSW_REG_RRCR_LEN);
+
+enum mlxsw_reg_rrcr_op {
+ /* Move rules */
+ MLXSW_REG_RRCR_OP_MOVE,
+ /* Copy rules */
+ MLXSW_REG_RRCR_OP_COPY,
+};
+
+/* reg_rrcr_op
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, rrcr, op, 0x00, 28, 4);
+
+/* reg_rrcr_offset
+ * Offset within the region from which to copy/move.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rrcr, offset, 0x00, 0, 16);
+
+/* reg_rrcr_size
+ * The number of rules to copy/move.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, rrcr, size, 0x04, 0, 16);
+
+/* reg_rrcr_table_id
+ * Identifier of the table on which to perform the operation. Encoding is the
+ * same as in RTAR.key_type
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rrcr, table_id, 0x10, 0, 4);
+
+/* reg_rrcr_dest_offset
+ * Offset within the region to which to copy/move
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rrcr, dest_offset, 0x20, 0, 16);
+
+static inline void mlxsw_reg_rrcr_pack(char *payload, enum mlxsw_reg_rrcr_op op,
+ u16 offset, u16 size,
+ enum mlxsw_reg_rtar_key_type table_id,
+ u16 dest_offset)
+{
+ MLXSW_REG_ZERO(rrcr, payload);
+ mlxsw_reg_rrcr_op_set(payload, op);
+ mlxsw_reg_rrcr_offset_set(payload, offset);
+ mlxsw_reg_rrcr_size_set(payload, size);
+ mlxsw_reg_rrcr_table_id_set(payload, table_id);
+ mlxsw_reg_rrcr_dest_offset_set(payload, dest_offset);
+}
+
+/* RALTA - Router Algorithmic LPM Tree Allocation Register
+ * -------------------------------------------------------
+ * RALTA is used to allocate the LPM trees of the SHSPM method.
+ */
+#define MLXSW_REG_RALTA_ID 0x8010
+#define MLXSW_REG_RALTA_LEN 0x04
+
+MLXSW_REG_DEFINE(ralta, MLXSW_REG_RALTA_ID, MLXSW_REG_RALTA_LEN);
+
+/* reg_ralta_op
+ * opcode (valid for Write, must be 0 on Read)
+ * 0 - allocate a tree
+ * 1 - deallocate a tree
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, ralta, op, 0x00, 28, 2);
+
+enum mlxsw_reg_ralxx_protocol {
+ MLXSW_REG_RALXX_PROTOCOL_IPV4,
+ MLXSW_REG_RALXX_PROTOCOL_IPV6,
+};
+
+/* reg_ralta_protocol
+ * Protocol.
+ * Deallocation opcode: Reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralta, protocol, 0x00, 24, 4);
+
+/* reg_ralta_tree_id
+ * An identifier (numbered from 1..cap_shspm_max_trees-1) representing
+ * the tree identifier (managed by software).
+ * Note that tree_id 0 is allocated for a default-route tree.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralta, tree_id, 0x00, 0, 8);
+
+static inline void mlxsw_reg_ralta_pack(char *payload, bool alloc,
+ enum mlxsw_reg_ralxx_protocol protocol,
+ u8 tree_id)
+{
+ MLXSW_REG_ZERO(ralta, payload);
+ mlxsw_reg_ralta_op_set(payload, !alloc);
+ mlxsw_reg_ralta_protocol_set(payload, protocol);
+ mlxsw_reg_ralta_tree_id_set(payload, tree_id);
+}
+
+/* RALST - Router Algorithmic LPM Structure Tree Register
+ * ------------------------------------------------------
+ * RALST is used to set and query the structure of an LPM tree.
+ * The structure of the tree must be sorted as a sorted binary tree, while
+ * each node is a bin that is tagged as the length of the prefixes the lookup
+ * will refer to. Therefore, bin X refers to a set of entries with prefixes
+ * of X bits to match with the destination address. The bin 0 indicates
+ * the default action, when there is no match of any prefix.
+ */
+#define MLXSW_REG_RALST_ID 0x8011
+#define MLXSW_REG_RALST_LEN 0x104
+
+MLXSW_REG_DEFINE(ralst, MLXSW_REG_RALST_ID, MLXSW_REG_RALST_LEN);
+
+/* reg_ralst_root_bin
+ * The bin number of the root bin.
+ * 0<root_bin=<(length of IP address)
+ * For a default-route tree configure 0xff
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralst, root_bin, 0x00, 16, 8);
+
+/* reg_ralst_tree_id
+ * Tree identifier numbered from 1..(cap_shspm_max_trees-1).
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralst, tree_id, 0x00, 0, 8);
+
+#define MLXSW_REG_RALST_BIN_NO_CHILD 0xff
+#define MLXSW_REG_RALST_BIN_OFFSET 0x04
+#define MLXSW_REG_RALST_BIN_COUNT 128
+
+/* reg_ralst_left_child_bin
+ * Holding the children of the bin according to the stored tree's structure.
+ * For trees composed of less than 4 blocks, the bins in excess are reserved.
+ * Note that tree_id 0 is allocated for a default-route tree, bins are 0xff
+ * Access: RW
+ */
+MLXSW_ITEM16_INDEXED(reg, ralst, left_child_bin, 0x04, 8, 8, 0x02, 0x00, false);
+
+/* reg_ralst_right_child_bin
+ * Holding the children of the bin according to the stored tree's structure.
+ * For trees composed of less than 4 blocks, the bins in excess are reserved.
+ * Note that tree_id 0 is allocated for a default-route tree, bins are 0xff
+ * Access: RW
+ */
+MLXSW_ITEM16_INDEXED(reg, ralst, right_child_bin, 0x04, 0, 8, 0x02, 0x00,
+ false);
+
+static inline void mlxsw_reg_ralst_pack(char *payload, u8 root_bin, u8 tree_id)
+{
+ MLXSW_REG_ZERO(ralst, payload);
+
+ /* Initialize all bins to have no left or right child */
+ memset(payload + MLXSW_REG_RALST_BIN_OFFSET,
+ MLXSW_REG_RALST_BIN_NO_CHILD, MLXSW_REG_RALST_BIN_COUNT * 2);
+
+ mlxsw_reg_ralst_root_bin_set(payload, root_bin);
+ mlxsw_reg_ralst_tree_id_set(payload, tree_id);
+}
+
+static inline void mlxsw_reg_ralst_bin_pack(char *payload, u8 bin_number,
+ u8 left_child_bin,
+ u8 right_child_bin)
+{
+ int bin_index = bin_number - 1;
+
+ mlxsw_reg_ralst_left_child_bin_set(payload, bin_index, left_child_bin);
+ mlxsw_reg_ralst_right_child_bin_set(payload, bin_index,
+ right_child_bin);
+}
+
+/* RALTB - Router Algorithmic LPM Tree Binding Register
+ * ----------------------------------------------------
+ * RALTB is used to bind virtual router and protocol to an allocated LPM tree.
+ */
+#define MLXSW_REG_RALTB_ID 0x8012
+#define MLXSW_REG_RALTB_LEN 0x04
+
+MLXSW_REG_DEFINE(raltb, MLXSW_REG_RALTB_ID, MLXSW_REG_RALTB_LEN);
+
+/* reg_raltb_virtual_router
+ * Virtual Router ID
+ * Range is 0..cap_max_virtual_routers-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raltb, virtual_router, 0x00, 16, 16);
+
+/* reg_raltb_protocol
+ * Protocol.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raltb, protocol, 0x00, 12, 4);
+
+/* reg_raltb_tree_id
+ * Tree to be used for the {virtual_router, protocol}
+ * Tree identifier numbered from 1..(cap_shspm_max_trees-1).
+ * By default, all Unicast IPv4 and IPv6 are bound to tree_id 0.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, raltb, tree_id, 0x00, 0, 8);
+
+static inline void mlxsw_reg_raltb_pack(char *payload, u16 virtual_router,
+ enum mlxsw_reg_ralxx_protocol protocol,
+ u8 tree_id)
+{
+ MLXSW_REG_ZERO(raltb, payload);
+ mlxsw_reg_raltb_virtual_router_set(payload, virtual_router);
+ mlxsw_reg_raltb_protocol_set(payload, protocol);
+ mlxsw_reg_raltb_tree_id_set(payload, tree_id);
+}
+
+/* RALUE - Router Algorithmic LPM Unicast Entry Register
+ * -----------------------------------------------------
+ * RALUE is used to configure and query LPM entries that serve
+ * the Unicast protocols.
+ */
+#define MLXSW_REG_RALUE_ID 0x8013
+#define MLXSW_REG_RALUE_LEN 0x38
+
+MLXSW_REG_DEFINE(ralue, MLXSW_REG_RALUE_ID, MLXSW_REG_RALUE_LEN);
+
+/* reg_ralue_protocol
+ * Protocol.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralue, protocol, 0x00, 24, 4);
+
+enum mlxsw_reg_ralue_op {
+ /* Read operation. If entry doesn't exist, the operation fails. */
+ MLXSW_REG_RALUE_OP_QUERY_READ = 0,
+ /* Clear on read operation. Used to read entry and
+ * clear Activity bit.
+ */
+ MLXSW_REG_RALUE_OP_QUERY_CLEAR = 1,
+ /* Write operation. Used to write a new entry to the table. All RW
+ * fields are written for new entry. Activity bit is set
+ * for new entries.
+ */
+ MLXSW_REG_RALUE_OP_WRITE_WRITE = 0,
+ /* Update operation. Used to update an existing route entry and
+ * only update the RW fields that are detailed in the field
+ * op_u_mask. If entry doesn't exist, the operation fails.
+ */
+ MLXSW_REG_RALUE_OP_WRITE_UPDATE = 1,
+ /* Clear activity. The Activity bit (the field a) is cleared
+ * for the entry.
+ */
+ MLXSW_REG_RALUE_OP_WRITE_CLEAR = 2,
+ /* Delete operation. Used to delete an existing entry. If entry
+ * doesn't exist, the operation fails.
+ */
+ MLXSW_REG_RALUE_OP_WRITE_DELETE = 3,
+};
+
+/* reg_ralue_op
+ * Operation.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, ralue, op, 0x00, 20, 3);
+
+/* reg_ralue_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on the
+ * specific entry, only if the entry is a route. To clear the a bit, use
+ * "clear activity" op.
+ * Enabled by activity_dis in RGCR
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ralue, a, 0x00, 16, 1);
+
+/* reg_ralue_virtual_router
+ * Virtual Router ID
+ * Range is 0..cap_max_virtual_routers-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralue, virtual_router, 0x04, 16, 16);
+
+#define MLXSW_REG_RALUE_OP_U_MASK_ENTRY_TYPE BIT(0)
+#define MLXSW_REG_RALUE_OP_U_MASK_BMP_LEN BIT(1)
+#define MLXSW_REG_RALUE_OP_U_MASK_ACTION BIT(2)
+
+/* reg_ralue_op_u_mask
+ * opcode update mask.
+ * On read operation, this field is reserved.
+ * This field is valid for update opcode, otherwise - reserved.
+ * This field is a bitmask of the fields that should be updated.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, ralue, op_u_mask, 0x04, 8, 3);
+
+/* reg_ralue_prefix_len
+ * Number of bits in the prefix of the LPM route.
+ * Note that for IPv6 prefixes, if prefix_len>64 the entry consumes
+ * two entries in the physical HW table.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralue, prefix_len, 0x08, 0, 8);
+
+/* reg_ralue_dip*
+ * The prefix of the route or of the marker that the object of the LPM
+ * is compared with. The most significant bits of the dip are the prefix.
+ * The least significant bits must be '0' if the prefix_len is smaller
+ * than 128 for IPv6 or smaller than 32 for IPv4.
+ * IPv4 address uses bits dip[31:0] and bits dip[127:32] are reserved.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ralue, dip4, 0x18, 0, 32);
+MLXSW_ITEM_BUF(reg, ralue, dip6, 0x0C, 16);
+
+enum mlxsw_reg_ralue_entry_type {
+ MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_ENTRY = 1,
+ MLXSW_REG_RALUE_ENTRY_TYPE_ROUTE_ENTRY = 2,
+ MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_AND_ROUTE_ENTRY = 3,
+};
+
+/* reg_ralue_entry_type
+ * Entry type.
+ * Note - for Marker entries, the action_type and action fields are reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, entry_type, 0x1C, 30, 2);
+
+/* reg_ralue_bmp_len
+ * The best match prefix length in the case that there is no match for
+ * longer prefixes.
+ * If (entry_type != MARKER_ENTRY), bmp_len must be equal to prefix_len
+ * Note for any update operation with entry_type modification this
+ * field must be set.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, bmp_len, 0x1C, 16, 8);
+
+enum mlxsw_reg_ralue_action_type {
+ MLXSW_REG_RALUE_ACTION_TYPE_REMOTE,
+ MLXSW_REG_RALUE_ACTION_TYPE_LOCAL,
+ MLXSW_REG_RALUE_ACTION_TYPE_IP2ME,
+};
+
+/* reg_ralue_action_type
+ * Action Type
+ * Indicates how the IP address is connected.
+ * It can be connected to a local subnet through local_erif or can be
+ * on a remote subnet connected through a next-hop router,
+ * or transmitted to the CPU.
+ * Reserved when entry_type = MARKER_ENTRY
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, action_type, 0x1C, 0, 2);
+
+enum mlxsw_reg_ralue_trap_action {
+ MLXSW_REG_RALUE_TRAP_ACTION_NOP,
+ MLXSW_REG_RALUE_TRAP_ACTION_TRAP,
+ MLXSW_REG_RALUE_TRAP_ACTION_MIRROR_TO_CPU,
+ MLXSW_REG_RALUE_TRAP_ACTION_MIRROR,
+ MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR,
+};
+
+/* reg_ralue_trap_action
+ * Trap action.
+ * For IP2ME action, only NOP and MIRROR are possible.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, trap_action, 0x20, 28, 4);
+
+/* reg_ralue_trap_id
+ * Trap ID to be reported to CPU.
+ * Trap ID is RTR_INGRESS0 or RTR_INGRESS1.
+ * For trap_action of NOP, MIRROR and DISCARD_ERROR, trap_id is reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, trap_id, 0x20, 0, 9);
+
+/* reg_ralue_adjacency_index
+ * Points to the first entry of the group-based ECMP.
+ * Only relevant in case of REMOTE action.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, adjacency_index, 0x24, 0, 24);
+
+/* reg_ralue_ecmp_size
+ * Amount of sequential entries starting
+ * from the adjacency_index (the number of ECMPs).
+ * The valid range is 1-64, 512, 1024, 2048 and 4096.
+ * Reserved when trap_action is TRAP or DISCARD_ERROR.
+ * Only relevant in case of REMOTE action.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, ecmp_size, 0x28, 0, 13);
+
+/* reg_ralue_local_erif
+ * Egress Router Interface.
+ * Only relevant in case of LOCAL action.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16);
+
+/* reg_ralue_ip2me_v
+ * Valid bit for the tunnel_ptr field.
+ * If valid = 0 then trap to CPU as IP2ME trap ID.
+ * If valid = 1 and the packet format allows NVE or IPinIP tunnel
+ * decapsulation then tunnel decapsulation is done.
+ * If valid = 1 and packet format does not allow NVE or IPinIP tunnel
+ * decapsulation then trap as IP2ME trap ID.
+ * Only relevant in case of IP2ME action.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, ip2me_v, 0x24, 31, 1);
+
+/* reg_ralue_ip2me_tunnel_ptr
+ * Tunnel Pointer for NVE or IPinIP tunnel decapsulation.
+ * For Spectrum, pointer to KVD Linear.
+ * Only relevant in case of IP2ME action.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ralue, ip2me_tunnel_ptr, 0x24, 0, 24);
+
+static inline void mlxsw_reg_ralue_pack(char *payload,
+ enum mlxsw_reg_ralxx_protocol protocol,
+ enum mlxsw_reg_ralue_op op,
+ u16 virtual_router, u8 prefix_len)
+{
+ MLXSW_REG_ZERO(ralue, payload);
+ mlxsw_reg_ralue_protocol_set(payload, protocol);
+ mlxsw_reg_ralue_op_set(payload, op);
+ mlxsw_reg_ralue_virtual_router_set(payload, virtual_router);
+ mlxsw_reg_ralue_prefix_len_set(payload, prefix_len);
+ mlxsw_reg_ralue_entry_type_set(payload,
+ MLXSW_REG_RALUE_ENTRY_TYPE_ROUTE_ENTRY);
+ mlxsw_reg_ralue_bmp_len_set(payload, prefix_len);
+}
+
+static inline void mlxsw_reg_ralue_pack4(char *payload,
+ enum mlxsw_reg_ralxx_protocol protocol,
+ enum mlxsw_reg_ralue_op op,
+ u16 virtual_router, u8 prefix_len,
+ u32 dip)
+{
+ mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len);
+ mlxsw_reg_ralue_dip4_set(payload, dip);
+}
+
+static inline void mlxsw_reg_ralue_pack6(char *payload,
+ enum mlxsw_reg_ralxx_protocol protocol,
+ enum mlxsw_reg_ralue_op op,
+ u16 virtual_router, u8 prefix_len,
+ const void *dip)
+{
+ mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len);
+ mlxsw_reg_ralue_dip6_memcpy_to(payload, dip);
+}
+
+static inline void
+mlxsw_reg_ralue_act_remote_pack(char *payload,
+ enum mlxsw_reg_ralue_trap_action trap_action,
+ u16 trap_id, u32 adjacency_index, u16 ecmp_size)
+{
+ mlxsw_reg_ralue_action_type_set(payload,
+ MLXSW_REG_RALUE_ACTION_TYPE_REMOTE);
+ mlxsw_reg_ralue_trap_action_set(payload, trap_action);
+ mlxsw_reg_ralue_trap_id_set(payload, trap_id);
+ mlxsw_reg_ralue_adjacency_index_set(payload, adjacency_index);
+ mlxsw_reg_ralue_ecmp_size_set(payload, ecmp_size);
+}
+
+static inline void
+mlxsw_reg_ralue_act_local_pack(char *payload,
+ enum mlxsw_reg_ralue_trap_action trap_action,
+ u16 trap_id, u16 local_erif)
+{
+ mlxsw_reg_ralue_action_type_set(payload,
+ MLXSW_REG_RALUE_ACTION_TYPE_LOCAL);
+ mlxsw_reg_ralue_trap_action_set(payload, trap_action);
+ mlxsw_reg_ralue_trap_id_set(payload, trap_id);
+ mlxsw_reg_ralue_local_erif_set(payload, local_erif);
+}
+
+static inline void
+mlxsw_reg_ralue_act_ip2me_pack(char *payload)
+{
+ mlxsw_reg_ralue_action_type_set(payload,
+ MLXSW_REG_RALUE_ACTION_TYPE_IP2ME);
+}
+
+static inline void
+mlxsw_reg_ralue_act_ip2me_tun_pack(char *payload, u32 tunnel_ptr)
+{
+ mlxsw_reg_ralue_action_type_set(payload,
+ MLXSW_REG_RALUE_ACTION_TYPE_IP2ME);
+ mlxsw_reg_ralue_ip2me_v_set(payload, 1);
+ mlxsw_reg_ralue_ip2me_tunnel_ptr_set(payload, tunnel_ptr);
+}
+
+/* RAUHT - Router Algorithmic LPM Unicast Host Table Register
+ * ----------------------------------------------------------
+ * The RAUHT register is used to configure and query the Unicast Host table in
+ * devices that implement the Algorithmic LPM.
+ */
+#define MLXSW_REG_RAUHT_ID 0x8014
+#define MLXSW_REG_RAUHT_LEN 0x74
+
+MLXSW_REG_DEFINE(rauht, MLXSW_REG_RAUHT_ID, MLXSW_REG_RAUHT_LEN);
+
+enum mlxsw_reg_rauht_type {
+ MLXSW_REG_RAUHT_TYPE_IPV4,
+ MLXSW_REG_RAUHT_TYPE_IPV6,
+};
+
+/* reg_rauht_type
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauht, type, 0x00, 24, 2);
+
+enum mlxsw_reg_rauht_op {
+ MLXSW_REG_RAUHT_OP_QUERY_READ = 0,
+ /* Read operation */
+ MLXSW_REG_RAUHT_OP_QUERY_CLEAR_ON_READ = 1,
+ /* Clear on read operation. Used to read entry and clear
+ * activity bit.
+ */
+ MLXSW_REG_RAUHT_OP_WRITE_ADD = 0,
+ /* Add. Used to write a new entry to the table. All R/W fields are
+ * relevant for new entry. Activity bit is set for new entries.
+ */
+ MLXSW_REG_RAUHT_OP_WRITE_UPDATE = 1,
+ /* Update action. Used to update an existing route entry and
+ * only update the following fields:
+ * trap_action, trap_id, mac, counter_set_type, counter_index
+ */
+ MLXSW_REG_RAUHT_OP_WRITE_CLEAR_ACTIVITY = 2,
+ /* Clear activity. A bit is cleared for the entry. */
+ MLXSW_REG_RAUHT_OP_WRITE_DELETE = 3,
+ /* Delete entry */
+ MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL = 4,
+ /* Delete all host entries on a RIF. In this command, dip
+ * field is reserved.
+ */
+};
+
+/* reg_rauht_op
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, rauht, op, 0x00, 20, 3);
+
+/* reg_rauht_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on
+ * the specific entry.
+ * To clear the a bit, use "clear activity" op.
+ * Enabled by activity_dis in RGCR
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, rauht, a, 0x00, 16, 1);
+
+/* reg_rauht_rif
+ * Router Interface
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16);
+
+/* reg_rauht_dip*
+ * Destination address.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32);
+MLXSW_ITEM_BUF(reg, rauht, dip6, 0x10, 16);
+
+enum mlxsw_reg_rauht_trap_action {
+ MLXSW_REG_RAUHT_TRAP_ACTION_NOP,
+ MLXSW_REG_RAUHT_TRAP_ACTION_TRAP,
+ MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR_TO_CPU,
+ MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR,
+ MLXSW_REG_RAUHT_TRAP_ACTION_DISCARD_ERRORS,
+};
+
+/* reg_rauht_trap_action
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rauht, trap_action, 0x60, 28, 4);
+
+enum mlxsw_reg_rauht_trap_id {
+ MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS0,
+ MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS1,
+};
+
+/* reg_rauht_trap_id
+ * Trap ID to be reported to CPU.
+ * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1.
+ * For trap_action of NOP, MIRROR and DISCARD_ERROR,
+ * trap_id is reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9);
+
+/* reg_rauht_counter_set_type
+ * Counter set type for flow counters
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rauht, counter_set_type, 0x68, 24, 8);
+
+/* reg_rauht_counter_index
+ * Counter index for flow counters
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rauht, counter_index, 0x68, 0, 24);
+
+/* reg_rauht_mac
+ * MAC address.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, rauht, mac, 0x6E, 6);
+
+static inline void mlxsw_reg_rauht_pack(char *payload,
+ enum mlxsw_reg_rauht_op op, u16 rif,
+ const char *mac)
+{
+ MLXSW_REG_ZERO(rauht, payload);
+ mlxsw_reg_rauht_op_set(payload, op);
+ mlxsw_reg_rauht_rif_set(payload, rif);
+ mlxsw_reg_rauht_mac_memcpy_to(payload, mac);
+}
+
+static inline void mlxsw_reg_rauht_pack4(char *payload,
+ enum mlxsw_reg_rauht_op op, u16 rif,
+ const char *mac, u32 dip)
+{
+ mlxsw_reg_rauht_pack(payload, op, rif, mac);
+ mlxsw_reg_rauht_dip4_set(payload, dip);
+}
+
+static inline void mlxsw_reg_rauht_pack6(char *payload,
+ enum mlxsw_reg_rauht_op op, u16 rif,
+ const char *mac, const char *dip)
+{
+ mlxsw_reg_rauht_pack(payload, op, rif, mac);
+ mlxsw_reg_rauht_type_set(payload, MLXSW_REG_RAUHT_TYPE_IPV6);
+ mlxsw_reg_rauht_dip6_memcpy_to(payload, dip);
+}
+
+static inline void mlxsw_reg_rauht_pack_counter(char *payload,
+ u64 counter_index)
+{
+ mlxsw_reg_rauht_counter_index_set(payload, counter_index);
+ mlxsw_reg_rauht_counter_set_type_set(payload,
+ MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES);
+}
+
+/* RALEU - Router Algorithmic LPM ECMP Update Register
+ * ---------------------------------------------------
+ * The register enables updating the ECMP section in the action for multiple
+ * LPM Unicast entries in a single operation. The update is executed to
+ * all entries of a {virtual router, protocol} tuple using the same ECMP group.
+ */
+#define MLXSW_REG_RALEU_ID 0x8015
+#define MLXSW_REG_RALEU_LEN 0x28
+
+MLXSW_REG_DEFINE(raleu, MLXSW_REG_RALEU_ID, MLXSW_REG_RALEU_LEN);
+
+/* reg_raleu_protocol
+ * Protocol.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raleu, protocol, 0x00, 24, 4);
+
+/* reg_raleu_virtual_router
+ * Virtual Router ID
+ * Range is 0..cap_max_virtual_routers-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raleu, virtual_router, 0x00, 0, 16);
+
+/* reg_raleu_adjacency_index
+ * Adjacency Index used for matching on the existing entries.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raleu, adjacency_index, 0x10, 0, 24);
+
+/* reg_raleu_ecmp_size
+ * ECMP Size used for matching on the existing entries.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, raleu, ecmp_size, 0x14, 0, 13);
+
+/* reg_raleu_new_adjacency_index
+ * New Adjacency Index.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, raleu, new_adjacency_index, 0x20, 0, 24);
+
+/* reg_raleu_new_ecmp_size
+ * New ECMP Size.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, raleu, new_ecmp_size, 0x24, 0, 13);
+
+static inline void mlxsw_reg_raleu_pack(char *payload,
+ enum mlxsw_reg_ralxx_protocol protocol,
+ u16 virtual_router,
+ u32 adjacency_index, u16 ecmp_size,
+ u32 new_adjacency_index,
+ u16 new_ecmp_size)
+{
+ MLXSW_REG_ZERO(raleu, payload);
+ mlxsw_reg_raleu_protocol_set(payload, protocol);
+ mlxsw_reg_raleu_virtual_router_set(payload, virtual_router);
+ mlxsw_reg_raleu_adjacency_index_set(payload, adjacency_index);
+ mlxsw_reg_raleu_ecmp_size_set(payload, ecmp_size);
+ mlxsw_reg_raleu_new_adjacency_index_set(payload, new_adjacency_index);
+ mlxsw_reg_raleu_new_ecmp_size_set(payload, new_ecmp_size);
+}
+
+/* RAUHTD - Router Algorithmic LPM Unicast Host Table Dump Register
+ * ----------------------------------------------------------------
+ * The RAUHTD register allows dumping entries from the Router Unicast Host
+ * Table. For a given session an entry is dumped no more than one time. The
+ * first RAUHTD access after reset is a new session. A session ends when the
+ * num_rec response is smaller than num_rec request or for IPv4 when the
+ * num_entries is smaller than 4. The clear activity affect the current session
+ * or the last session if a new session has not started.
+ */
+#define MLXSW_REG_RAUHTD_ID 0x8018
+#define MLXSW_REG_RAUHTD_BASE_LEN 0x20
+#define MLXSW_REG_RAUHTD_REC_LEN 0x20
+#define MLXSW_REG_RAUHTD_REC_MAX_NUM 32
+#define MLXSW_REG_RAUHTD_LEN (MLXSW_REG_RAUHTD_BASE_LEN + \
+ MLXSW_REG_RAUHTD_REC_MAX_NUM * MLXSW_REG_RAUHTD_REC_LEN)
+#define MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC 4
+
+MLXSW_REG_DEFINE(rauhtd, MLXSW_REG_RAUHTD_ID, MLXSW_REG_RAUHTD_LEN);
+
+#define MLXSW_REG_RAUHTD_FILTER_A BIT(0)
+#define MLXSW_REG_RAUHTD_FILTER_RIF BIT(3)
+
+/* reg_rauhtd_filter_fields
+ * if a bit is '0' then the relevant field is ignored and dump is done
+ * regardless of the field value
+ * Bit0 - filter by activity: entry_a
+ * Bit3 - filter by entry rip: entry_rif
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, filter_fields, 0x00, 0, 8);
+
+enum mlxsw_reg_rauhtd_op {
+ MLXSW_REG_RAUHTD_OP_DUMP,
+ MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR,
+};
+
+/* reg_rauhtd_op
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, rauhtd, op, 0x04, 24, 2);
+
+/* reg_rauhtd_num_rec
+ * At request: number of records requested
+ * At response: number of records dumped
+ * For IPv4, each record has 4 entries at request and up to 4 entries
+ * at response
+ * Range is 0..MLXSW_REG_RAUHTD_REC_MAX_NUM
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, num_rec, 0x04, 0, 8);
+
+/* reg_rauhtd_entry_a
+ * Dump only if activity has value of entry_a
+ * Reserved if filter_fields bit0 is '0'
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, entry_a, 0x08, 16, 1);
+
+enum mlxsw_reg_rauhtd_type {
+ MLXSW_REG_RAUHTD_TYPE_IPV4,
+ MLXSW_REG_RAUHTD_TYPE_IPV6,
+};
+
+/* reg_rauhtd_type
+ * Dump only if record type is:
+ * 0 - IPv4
+ * 1 - IPv6
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, type, 0x08, 0, 4);
+
+/* reg_rauhtd_entry_rif
+ * Dump only if RIF has value of entry_rif
+ * Reserved if filter_fields bit3 is '0'
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rauhtd, entry_rif, 0x0C, 0, 16);
+
+static inline void mlxsw_reg_rauhtd_pack(char *payload,
+ enum mlxsw_reg_rauhtd_type type)
+{
+ MLXSW_REG_ZERO(rauhtd, payload);
+ mlxsw_reg_rauhtd_filter_fields_set(payload, MLXSW_REG_RAUHTD_FILTER_A);
+ mlxsw_reg_rauhtd_op_set(payload, MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR);
+ mlxsw_reg_rauhtd_num_rec_set(payload, MLXSW_REG_RAUHTD_REC_MAX_NUM);
+ mlxsw_reg_rauhtd_entry_a_set(payload, 1);
+ mlxsw_reg_rauhtd_type_set(payload, type);
+}
+
+/* reg_rauhtd_ipv4_rec_num_entries
+ * Number of valid entries in this record:
+ * 0 - 1 valid entry
+ * 1 - 2 valid entries
+ * 2 - 3 valid entries
+ * 3 - 4 valid entries
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_rec_num_entries,
+ MLXSW_REG_RAUHTD_BASE_LEN, 28, 2,
+ MLXSW_REG_RAUHTD_REC_LEN, 0x00, false);
+
+/* reg_rauhtd_rec_type
+ * Record type.
+ * 0 - IPv4
+ * 1 - IPv6
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, rec_type, MLXSW_REG_RAUHTD_BASE_LEN, 24, 2,
+ MLXSW_REG_RAUHTD_REC_LEN, 0x00, false);
+
+#define MLXSW_REG_RAUHTD_IPV4_ENT_LEN 0x8
+
+/* reg_rauhtd_ipv4_ent_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on the
+ * specific entry.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1,
+ MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false);
+
+/* reg_rauhtd_ipv4_ent_rif
+ * Router interface.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0,
+ 16, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false);
+
+/* reg_rauhtd_ipv4_ent_dip
+ * Destination IPv4 address.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, 0,
+ 32, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x04, false);
+
+#define MLXSW_REG_RAUHTD_IPV6_ENT_LEN 0x20
+
+/* reg_rauhtd_ipv6_ent_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on the
+ * specific entry.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1,
+ MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false);
+
+/* reg_rauhtd_ipv6_ent_rif
+ * Router interface.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0,
+ 16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false);
+
+/* reg_rauhtd_ipv6_ent_dip
+ * Destination IPv6 address.
+ * Access: RO
+ */
+MLXSW_ITEM_BUF_INDEXED(reg, rauhtd, ipv6_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN,
+ 16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x10);
+
+static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload,
+ int ent_index, u16 *p_rif,
+ u32 *p_dip)
+{
+ *p_rif = mlxsw_reg_rauhtd_ipv4_ent_rif_get(payload, ent_index);
+ *p_dip = mlxsw_reg_rauhtd_ipv4_ent_dip_get(payload, ent_index);
+}
+
+static inline void mlxsw_reg_rauhtd_ent_ipv6_unpack(char *payload,
+ int rec_index, u16 *p_rif,
+ char *p_dip)
+{
+ *p_rif = mlxsw_reg_rauhtd_ipv6_ent_rif_get(payload, rec_index);
+ mlxsw_reg_rauhtd_ipv6_ent_dip_memcpy_from(payload, rec_index, p_dip);
+}
+
+/* RTDP - Routing Tunnel Decap Properties Register
+ * -----------------------------------------------
+ * The RTDP register is used for configuring the tunnel decap properties of NVE
+ * and IPinIP.
+ */
+#define MLXSW_REG_RTDP_ID 0x8020
+#define MLXSW_REG_RTDP_LEN 0x44
+
+MLXSW_REG_DEFINE(rtdp, MLXSW_REG_RTDP_ID, MLXSW_REG_RTDP_LEN);
+
+enum mlxsw_reg_rtdp_type {
+ MLXSW_REG_RTDP_TYPE_NVE,
+ MLXSW_REG_RTDP_TYPE_IPIP,
+};
+
+/* reg_rtdp_type
+ * Type of the RTDP entry as per enum mlxsw_reg_rtdp_type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, type, 0x00, 28, 4);
+
+/* reg_rtdp_tunnel_index
+ * Index to the Decap entry.
+ * For Spectrum, Index to KVD Linear.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rtdp, tunnel_index, 0x00, 0, 24);
+
+/* IPinIP */
+
+/* reg_rtdp_ipip_irif
+ * Ingress Router Interface for the overlay router
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_irif, 0x04, 16, 16);
+
+enum mlxsw_reg_rtdp_ipip_sip_check {
+ /* No sip checks. */
+ MLXSW_REG_RTDP_IPIP_SIP_CHECK_NO,
+ /* Filter packet if underlay is not IPv4 or if underlay SIP does not
+ * equal ipv4_usip.
+ */
+ MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4,
+ /* Filter packet if underlay is not IPv6 or if underlay SIP does not
+ * equal ipv6_usip.
+ */
+ MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6 = 3,
+};
+
+/* reg_rtdp_ipip_sip_check
+ * SIP check to perform. If decapsulation failed due to these configurations
+ * then trap_id is IPIP_DECAP_ERROR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_sip_check, 0x04, 0, 3);
+
+/* If set, allow decapsulation of IPinIP (without GRE). */
+#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_IPIP BIT(0)
+/* If set, allow decapsulation of IPinGREinIP without a key. */
+#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE BIT(1)
+/* If set, allow decapsulation of IPinGREinIP with a key. */
+#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY BIT(2)
+
+/* reg_rtdp_ipip_type_check
+ * Flags as per MLXSW_REG_RTDP_IPIP_TYPE_CHECK_*. If decapsulation failed due to
+ * these configurations then trap_id is IPIP_DECAP_ERROR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_type_check, 0x08, 24, 3);
+
+/* reg_rtdp_ipip_gre_key_check
+ * Whether GRE key should be checked. When check is enabled:
+ * - A packet received as IPinIP (without GRE) will always pass.
+ * - A packet received as IPinGREinIP without a key will not pass the check.
+ * - A packet received as IPinGREinIP with a key will pass the check only if the
+ * key in the packet is equal to expected_gre_key.
+ * If decapsulation failed due to GRE key then trap_id is IPIP_DECAP_ERROR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_gre_key_check, 0x08, 23, 1);
+
+/* reg_rtdp_ipip_ipv4_usip
+ * Underlay IPv4 address for ipv4 source address check.
+ * Reserved when sip_check is not '1'.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_ipv4_usip, 0x0C, 0, 32);
+
+/* reg_rtdp_ipip_ipv6_usip_ptr
+ * This field is valid when sip_check is "sipv6 check explicitly". This is a
+ * pointer to the IPv6 DIP which is configured by RIPS. For Spectrum, the index
+ * is to the KVD linear.
+ * Reserved when sip_check is not MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_ipv6_usip_ptr, 0x10, 0, 24);
+
+/* reg_rtdp_ipip_expected_gre_key
+ * GRE key for checking.
+ * Reserved when gre_key_check is '0'.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_expected_gre_key, 0x14, 0, 32);
+
+static inline void mlxsw_reg_rtdp_pack(char *payload,
+ enum mlxsw_reg_rtdp_type type,
+ u32 tunnel_index)
+{
+ MLXSW_REG_ZERO(rtdp, payload);
+ mlxsw_reg_rtdp_type_set(payload, type);
+ mlxsw_reg_rtdp_tunnel_index_set(payload, tunnel_index);
+}
+
+static inline void
+mlxsw_reg_rtdp_ipip4_pack(char *payload, u16 irif,
+ enum mlxsw_reg_rtdp_ipip_sip_check sip_check,
+ unsigned int type_check, bool gre_key_check,
+ u32 ipv4_usip, u32 expected_gre_key)
+{
+ mlxsw_reg_rtdp_ipip_irif_set(payload, irif);
+ mlxsw_reg_rtdp_ipip_sip_check_set(payload, sip_check);
+ mlxsw_reg_rtdp_ipip_type_check_set(payload, type_check);
+ mlxsw_reg_rtdp_ipip_gre_key_check_set(payload, gre_key_check);
+ mlxsw_reg_rtdp_ipip_ipv4_usip_set(payload, ipv4_usip);
+ mlxsw_reg_rtdp_ipip_expected_gre_key_set(payload, expected_gre_key);
+}
+
+/* RIGR-V2 - Router Interface Group Register Version 2
+ * ---------------------------------------------------
+ * The RIGR_V2 register is used to add, remove and query egress interface list
+ * of a multicast forwarding entry.
+ */
+#define MLXSW_REG_RIGR2_ID 0x8023
+#define MLXSW_REG_RIGR2_LEN 0xB0
+
+#define MLXSW_REG_RIGR2_MAX_ERIFS 32
+
+MLXSW_REG_DEFINE(rigr2, MLXSW_REG_RIGR2_ID, MLXSW_REG_RIGR2_LEN);
+
+/* reg_rigr2_rigr_index
+ * KVD Linear index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rigr2, rigr_index, 0x04, 0, 24);
+
+/* reg_rigr2_vnext
+ * Next RIGR Index is valid.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rigr2, vnext, 0x08, 31, 1);
+
+/* reg_rigr2_next_rigr_index
+ * Next RIGR Index. The index is to the KVD linear.
+ * Reserved when vnxet = '0'.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rigr2, next_rigr_index, 0x08, 0, 24);
+
+/* reg_rigr2_vrmid
+ * RMID Index is valid.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rigr2, vrmid, 0x20, 31, 1);
+
+/* reg_rigr2_rmid_index
+ * RMID Index.
+ * Range 0 .. max_mid - 1
+ * Reserved when vrmid = '0'.
+ * The index is to the Port Group Table (PGT)
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rigr2, rmid_index, 0x20, 0, 16);
+
+/* reg_rigr2_erif_entry_v
+ * Egress Router Interface is valid.
+ * Note that low-entries must be set if high-entries are set. For
+ * example: if erif_entry[2].v is set then erif_entry[1].v and
+ * erif_entry[0].v must be set.
+ * Index can be from 0 to cap_mc_erif_list_entries-1
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, rigr2, erif_entry_v, 0x24, 31, 1, 4, 0, false);
+
+/* reg_rigr2_erif_entry_erif
+ * Egress Router Interface.
+ * Valid range is from 0 to cap_max_router_interfaces - 1
+ * Index can be from 0 to MLXSW_REG_RIGR2_MAX_ERIFS - 1
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, rigr2, erif_entry_erif, 0x24, 0, 16, 4, 0, false);
+
+static inline void mlxsw_reg_rigr2_pack(char *payload, u32 rigr_index,
+ bool vnext, u32 next_rigr_index)
+{
+ MLXSW_REG_ZERO(rigr2, payload);
+ mlxsw_reg_rigr2_rigr_index_set(payload, rigr_index);
+ mlxsw_reg_rigr2_vnext_set(payload, vnext);
+ mlxsw_reg_rigr2_next_rigr_index_set(payload, next_rigr_index);
+ mlxsw_reg_rigr2_vrmid_set(payload, 0);
+ mlxsw_reg_rigr2_rmid_index_set(payload, 0);
+}
+
+static inline void mlxsw_reg_rigr2_erif_entry_pack(char *payload, int index,
+ bool v, u16 erif)
+{
+ mlxsw_reg_rigr2_erif_entry_v_set(payload, index, v);
+ mlxsw_reg_rigr2_erif_entry_erif_set(payload, index, erif);
+}
+
+/* RECR-V2 - Router ECMP Configuration Version 2 Register
+ * ------------------------------------------------------
+ */
+#define MLXSW_REG_RECR2_ID 0x8025
+#define MLXSW_REG_RECR2_LEN 0x38
+
+MLXSW_REG_DEFINE(recr2, MLXSW_REG_RECR2_ID, MLXSW_REG_RECR2_LEN);
+
+/* reg_recr2_pp
+ * Per-port configuration
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, recr2, pp, 0x00, 24, 1);
+
+/* reg_recr2_sh
+ * Symmetric hash
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, recr2, sh, 0x00, 8, 1);
+
+/* reg_recr2_seed
+ * Seed
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, recr2, seed, 0x08, 0, 32);
+
+enum {
+ /* Enable IPv4 fields if packet is not TCP and not UDP */
+ MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP = 3,
+ /* Enable IPv4 fields if packet is TCP or UDP */
+ MLXSW_REG_RECR2_IPV4_EN_TCP_UDP = 4,
+ /* Enable IPv6 fields if packet is not TCP and not UDP */
+ MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP = 5,
+ /* Enable IPv6 fields if packet is TCP or UDP */
+ MLXSW_REG_RECR2_IPV6_EN_TCP_UDP = 6,
+ /* Enable TCP/UDP header fields if packet is IPv4 */
+ MLXSW_REG_RECR2_TCP_UDP_EN_IPV4 = 7,
+ /* Enable TCP/UDP header fields if packet is IPv6 */
+ MLXSW_REG_RECR2_TCP_UDP_EN_IPV6 = 8,
+};
+
+/* reg_recr2_outer_header_enables
+ * Bit mask where each bit enables a specific layer to be included in
+ * the hash calculation.
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_enables, 0x10, 0x04, 1);
+
+enum {
+ /* IPv4 Source IP */
+ MLXSW_REG_RECR2_IPV4_SIP0 = 9,
+ MLXSW_REG_RECR2_IPV4_SIP3 = 12,
+ /* IPv4 Destination IP */
+ MLXSW_REG_RECR2_IPV4_DIP0 = 13,
+ MLXSW_REG_RECR2_IPV4_DIP3 = 16,
+ /* IP Protocol */
+ MLXSW_REG_RECR2_IPV4_PROTOCOL = 17,
+ /* IPv6 Source IP */
+ MLXSW_REG_RECR2_IPV6_SIP0_7 = 21,
+ MLXSW_REG_RECR2_IPV6_SIP8 = 29,
+ MLXSW_REG_RECR2_IPV6_SIP15 = 36,
+ /* IPv6 Destination IP */
+ MLXSW_REG_RECR2_IPV6_DIP0_7 = 37,
+ MLXSW_REG_RECR2_IPV6_DIP8 = 45,
+ MLXSW_REG_RECR2_IPV6_DIP15 = 52,
+ /* IPv6 Next Header */
+ MLXSW_REG_RECR2_IPV6_NEXT_HEADER = 53,
+ /* IPv6 Flow Label */
+ MLXSW_REG_RECR2_IPV6_FLOW_LABEL = 57,
+ /* TCP/UDP Source Port */
+ MLXSW_REG_RECR2_TCP_UDP_SPORT = 74,
+ /* TCP/UDP Destination Port */
+ MLXSW_REG_RECR2_TCP_UDP_DPORT = 75,
+};
+
+/* reg_recr2_outer_header_fields_enable
+ * Packet fields to enable for ECMP hash subject to outer_header_enable.
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_fields_enable, 0x14, 0x14, 1);
+
+static inline void mlxsw_reg_recr2_ipv4_sip_enable(char *payload)
+{
+ int i;
+
+ for (i = MLXSW_REG_RECR2_IPV4_SIP0; i <= MLXSW_REG_RECR2_IPV4_SIP3; i++)
+ mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
+ true);
+}
+
+static inline void mlxsw_reg_recr2_ipv4_dip_enable(char *payload)
+{
+ int i;
+
+ for (i = MLXSW_REG_RECR2_IPV4_DIP0; i <= MLXSW_REG_RECR2_IPV4_DIP3; i++)
+ mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
+ true);
+}
+
+static inline void mlxsw_reg_recr2_ipv6_sip_enable(char *payload)
+{
+ int i = MLXSW_REG_RECR2_IPV6_SIP0_7;
+
+ mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true);
+
+ i = MLXSW_REG_RECR2_IPV6_SIP8;
+ for (; i <= MLXSW_REG_RECR2_IPV6_SIP15; i++)
+ mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
+ true);
+}
+
+static inline void mlxsw_reg_recr2_ipv6_dip_enable(char *payload)
+{
+ int i = MLXSW_REG_RECR2_IPV6_DIP0_7;
+
+ mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true);
+
+ i = MLXSW_REG_RECR2_IPV6_DIP8;
+ for (; i <= MLXSW_REG_RECR2_IPV6_DIP15; i++)
+ mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
+ true);
+}
+
+static inline void mlxsw_reg_recr2_pack(char *payload, u32 seed)
+{
+ MLXSW_REG_ZERO(recr2, payload);
+ mlxsw_reg_recr2_pp_set(payload, false);
+ mlxsw_reg_recr2_sh_set(payload, true);
+ mlxsw_reg_recr2_seed_set(payload, seed);
+}
+
+/* RMFT-V2 - Router Multicast Forwarding Table Version 2 Register
+ * --------------------------------------------------------------
+ * The RMFT_V2 register is used to configure and query the multicast table.
+ */
+#define MLXSW_REG_RMFT2_ID 0x8027
+#define MLXSW_REG_RMFT2_LEN 0x174
+
+MLXSW_REG_DEFINE(rmft2, MLXSW_REG_RMFT2_ID, MLXSW_REG_RMFT2_LEN);
+
+/* reg_rmft2_v
+ * Valid
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, v, 0x00, 31, 1);
+
+enum mlxsw_reg_rmft2_type {
+ MLXSW_REG_RMFT2_TYPE_IPV4,
+ MLXSW_REG_RMFT2_TYPE_IPV6
+};
+
+/* reg_rmft2_type
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rmft2, type, 0x00, 28, 2);
+
+enum mlxsw_sp_reg_rmft2_op {
+ /* For Write:
+ * Write operation. Used to write a new entry to the table. All RW
+ * fields are relevant for new entry. Activity bit is set for new
+ * entries - Note write with v (Valid) 0 will delete the entry.
+ * For Query:
+ * Read operation
+ */
+ MLXSW_REG_RMFT2_OP_READ_WRITE,
+};
+
+/* reg_rmft2_op
+ * Operation.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, rmft2, op, 0x00, 20, 2);
+
+/* reg_rmft2_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on the specific
+ * entry.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, rmft2, a, 0x00, 16, 1);
+
+/* reg_rmft2_offset
+ * Offset within the multicast forwarding table to write to.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rmft2, offset, 0x00, 0, 16);
+
+/* reg_rmft2_virtual_router
+ * Virtual Router ID. Range from 0..cap_max_virtual_routers-1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, virtual_router, 0x04, 0, 16);
+
+enum mlxsw_reg_rmft2_irif_mask {
+ MLXSW_REG_RMFT2_IRIF_MASK_IGNORE,
+ MLXSW_REG_RMFT2_IRIF_MASK_COMPARE
+};
+
+/* reg_rmft2_irif_mask
+ * Ingress RIF mask.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, irif_mask, 0x08, 24, 1);
+
+/* reg_rmft2_irif
+ * Ingress RIF index.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, irif, 0x08, 0, 16);
+
+/* reg_rmft2_dip{4,6}
+ * Destination IPv4/6 address
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, rmft2, dip6, 0x10, 16);
+MLXSW_ITEM32(reg, rmft2, dip4, 0x1C, 0, 32);
+
+/* reg_rmft2_dip{4,6}_mask
+ * A bit that is set directs the TCAM to compare the corresponding bit in key. A
+ * bit that is clear directs the TCAM to ignore the corresponding bit in key.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, rmft2, dip6_mask, 0x20, 16);
+MLXSW_ITEM32(reg, rmft2, dip4_mask, 0x2C, 0, 32);
+
+/* reg_rmft2_sip{4,6}
+ * Source IPv4/6 address
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, rmft2, sip6, 0x30, 16);
+MLXSW_ITEM32(reg, rmft2, sip4, 0x3C, 0, 32);
+
+/* reg_rmft2_sip{4,6}_mask
+ * A bit that is set directs the TCAM to compare the corresponding bit in key. A
+ * bit that is clear directs the TCAM to ignore the corresponding bit in key.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, rmft2, sip6_mask, 0x40, 16);
+MLXSW_ITEM32(reg, rmft2, sip4_mask, 0x4C, 0, 32);
+
+/* reg_rmft2_flexible_action_set
+ * ACL action set. The only supported action types in this field and in any
+ * action-set pointed from here are as follows:
+ * 00h: ACTION_NULL
+ * 01h: ACTION_MAC_TTL, only TTL configuration is supported.
+ * 03h: ACTION_TRAP
+ * 06h: ACTION_QOS
+ * 08h: ACTION_POLICING_MONITORING
+ * 10h: ACTION_ROUTER_MC
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, rmft2, flexible_action_set, 0x80,
+ MLXSW_REG_FLEX_ACTION_SET_LEN);
+
+static inline void
+mlxsw_reg_rmft2_common_pack(char *payload, bool v, u16 offset,
+ u16 virtual_router,
+ enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif,
+ const char *flex_action_set)
+{
+ MLXSW_REG_ZERO(rmft2, payload);
+ mlxsw_reg_rmft2_v_set(payload, v);
+ mlxsw_reg_rmft2_op_set(payload, MLXSW_REG_RMFT2_OP_READ_WRITE);
+ mlxsw_reg_rmft2_offset_set(payload, offset);
+ mlxsw_reg_rmft2_virtual_router_set(payload, virtual_router);
+ mlxsw_reg_rmft2_irif_mask_set(payload, irif_mask);
+ mlxsw_reg_rmft2_irif_set(payload, irif);
+ if (flex_action_set)
+ mlxsw_reg_rmft2_flexible_action_set_memcpy_to(payload,
+ flex_action_set);
+}
+
+static inline void
+mlxsw_reg_rmft2_ipv4_pack(char *payload, bool v, u16 offset, u16 virtual_router,
+ enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif,
+ u32 dip4, u32 dip4_mask, u32 sip4, u32 sip4_mask,
+ const char *flexible_action_set)
+{
+ mlxsw_reg_rmft2_common_pack(payload, v, offset, virtual_router,
+ irif_mask, irif, flexible_action_set);
+ mlxsw_reg_rmft2_type_set(payload, MLXSW_REG_RMFT2_TYPE_IPV4);
+ mlxsw_reg_rmft2_dip4_set(payload, dip4);
+ mlxsw_reg_rmft2_dip4_mask_set(payload, dip4_mask);
+ mlxsw_reg_rmft2_sip4_set(payload, sip4);
+ mlxsw_reg_rmft2_sip4_mask_set(payload, sip4_mask);
+}
+
+static inline void
+mlxsw_reg_rmft2_ipv6_pack(char *payload, bool v, u16 offset, u16 virtual_router,
+ enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif,
+ struct in6_addr dip6, struct in6_addr dip6_mask,
+ struct in6_addr sip6, struct in6_addr sip6_mask,
+ const char *flexible_action_set)
+{
+ mlxsw_reg_rmft2_common_pack(payload, v, offset, virtual_router,
+ irif_mask, irif, flexible_action_set);
+ mlxsw_reg_rmft2_type_set(payload, MLXSW_REG_RMFT2_TYPE_IPV6);
+ mlxsw_reg_rmft2_dip6_memcpy_to(payload, (void *)&dip6);
+ mlxsw_reg_rmft2_dip6_mask_memcpy_to(payload, (void *)&dip6_mask);
+ mlxsw_reg_rmft2_sip6_memcpy_to(payload, (void *)&sip6);
+ mlxsw_reg_rmft2_sip6_mask_memcpy_to(payload, (void *)&sip6_mask);
+}
+
+/* MFCR - Management Fan Control Register
+ * --------------------------------------
+ * This register controls the settings of the Fan Speed PWM mechanism.
+ */
+#define MLXSW_REG_MFCR_ID 0x9001
+#define MLXSW_REG_MFCR_LEN 0x08
+
+MLXSW_REG_DEFINE(mfcr, MLXSW_REG_MFCR_ID, MLXSW_REG_MFCR_LEN);
+
+enum mlxsw_reg_mfcr_pwm_frequency {
+ MLXSW_REG_MFCR_PWM_FEQ_11HZ = 0x00,
+ MLXSW_REG_MFCR_PWM_FEQ_14_7HZ = 0x01,
+ MLXSW_REG_MFCR_PWM_FEQ_22_1HZ = 0x02,
+ MLXSW_REG_MFCR_PWM_FEQ_1_4KHZ = 0x40,
+ MLXSW_REG_MFCR_PWM_FEQ_5KHZ = 0x41,
+ MLXSW_REG_MFCR_PWM_FEQ_20KHZ = 0x42,
+ MLXSW_REG_MFCR_PWM_FEQ_22_5KHZ = 0x43,
+ MLXSW_REG_MFCR_PWM_FEQ_25KHZ = 0x44,
+};
+
+/* reg_mfcr_pwm_frequency
+ * Controls the frequency of the PWM signal.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mfcr, pwm_frequency, 0x00, 0, 7);
+
+#define MLXSW_MFCR_TACHOS_MAX 10
+
+/* reg_mfcr_tacho_active
+ * Indicates which of the tachometer is active (bit per tachometer).
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mfcr, tacho_active, 0x04, 16, MLXSW_MFCR_TACHOS_MAX);
+
+#define MLXSW_MFCR_PWMS_MAX 5
+
+/* reg_mfcr_pwm_active
+ * Indicates which of the PWM control is active (bit per PWM).
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mfcr, pwm_active, 0x04, 0, MLXSW_MFCR_PWMS_MAX);
+
+static inline void
+mlxsw_reg_mfcr_pack(char *payload,
+ enum mlxsw_reg_mfcr_pwm_frequency pwm_frequency)
+{
+ MLXSW_REG_ZERO(mfcr, payload);
+ mlxsw_reg_mfcr_pwm_frequency_set(payload, pwm_frequency);
+}
+
+static inline void
+mlxsw_reg_mfcr_unpack(char *payload,
+ enum mlxsw_reg_mfcr_pwm_frequency *p_pwm_frequency,
+ u16 *p_tacho_active, u8 *p_pwm_active)
+{
+ *p_pwm_frequency = mlxsw_reg_mfcr_pwm_frequency_get(payload);
+ *p_tacho_active = mlxsw_reg_mfcr_tacho_active_get(payload);
+ *p_pwm_active = mlxsw_reg_mfcr_pwm_active_get(payload);
+}
+
+/* MFSC - Management Fan Speed Control Register
+ * --------------------------------------------
+ * This register controls the settings of the Fan Speed PWM mechanism.
+ */
+#define MLXSW_REG_MFSC_ID 0x9002
+#define MLXSW_REG_MFSC_LEN 0x08
+
+MLXSW_REG_DEFINE(mfsc, MLXSW_REG_MFSC_ID, MLXSW_REG_MFSC_LEN);
+
+/* reg_mfsc_pwm
+ * Fan pwm to control / monitor.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mfsc, pwm, 0x00, 24, 3);
+
+/* reg_mfsc_pwm_duty_cycle
+ * Controls the duty cycle of the PWM. Value range from 0..255 to
+ * represent duty cycle of 0%...100%.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mfsc, pwm_duty_cycle, 0x04, 0, 8);
+
+static inline void mlxsw_reg_mfsc_pack(char *payload, u8 pwm,
+ u8 pwm_duty_cycle)
+{
+ MLXSW_REG_ZERO(mfsc, payload);
+ mlxsw_reg_mfsc_pwm_set(payload, pwm);
+ mlxsw_reg_mfsc_pwm_duty_cycle_set(payload, pwm_duty_cycle);
+}
+
+/* MFSM - Management Fan Speed Measurement
+ * ---------------------------------------
+ * This register controls the settings of the Tacho measurements and
+ * enables reading the Tachometer measurements.
+ */
+#define MLXSW_REG_MFSM_ID 0x9003
+#define MLXSW_REG_MFSM_LEN 0x08
+
+MLXSW_REG_DEFINE(mfsm, MLXSW_REG_MFSM_ID, MLXSW_REG_MFSM_LEN);
+
+/* reg_mfsm_tacho
+ * Fan tachometer index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mfsm, tacho, 0x00, 24, 4);
+
+/* reg_mfsm_rpm
+ * Fan speed (round per minute).
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mfsm, rpm, 0x04, 0, 16);
+
+static inline void mlxsw_reg_mfsm_pack(char *payload, u8 tacho)
+{
+ MLXSW_REG_ZERO(mfsm, payload);
+ mlxsw_reg_mfsm_tacho_set(payload, tacho);
+}
+
+/* MFSL - Management Fan Speed Limit Register
+ * ------------------------------------------
+ * The Fan Speed Limit register is used to configure the fan speed
+ * event / interrupt notification mechanism. Fan speed threshold are
+ * defined for both under-speed and over-speed.
+ */
+#define MLXSW_REG_MFSL_ID 0x9004
+#define MLXSW_REG_MFSL_LEN 0x0C
+
+MLXSW_REG_DEFINE(mfsl, MLXSW_REG_MFSL_ID, MLXSW_REG_MFSL_LEN);
+
+/* reg_mfsl_tacho
+ * Fan tachometer index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mfsl, tacho, 0x00, 24, 4);
+
+/* reg_mfsl_tach_min
+ * Tachometer minimum value (minimum RPM).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mfsl, tach_min, 0x04, 0, 16);
+
+/* reg_mfsl_tach_max
+ * Tachometer maximum value (maximum RPM).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mfsl, tach_max, 0x08, 0, 16);
+
+static inline void mlxsw_reg_mfsl_pack(char *payload, u8 tacho,
+ u16 tach_min, u16 tach_max)
+{
+ MLXSW_REG_ZERO(mfsl, payload);
+ mlxsw_reg_mfsl_tacho_set(payload, tacho);
+ mlxsw_reg_mfsl_tach_min_set(payload, tach_min);
+ mlxsw_reg_mfsl_tach_max_set(payload, tach_max);
+}
+
+static inline void mlxsw_reg_mfsl_unpack(char *payload, u8 tacho,
+ u16 *p_tach_min, u16 *p_tach_max)
+{
+ if (p_tach_min)
+ *p_tach_min = mlxsw_reg_mfsl_tach_min_get(payload);
+
+ if (p_tach_max)
+ *p_tach_max = mlxsw_reg_mfsl_tach_max_get(payload);
+}
+
+/* MTCAP - Management Temperature Capabilities
+ * -------------------------------------------
+ * This register exposes the capabilities of the device and
+ * system temperature sensing.
+ */
+#define MLXSW_REG_MTCAP_ID 0x9009
+#define MLXSW_REG_MTCAP_LEN 0x08
+
+MLXSW_REG_DEFINE(mtcap, MLXSW_REG_MTCAP_ID, MLXSW_REG_MTCAP_LEN);
+
+/* reg_mtcap_sensor_count
+ * Number of sensors supported by the device.
+ * This includes the QSFP module sensors (if exists in the QSFP module).
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mtcap, sensor_count, 0x00, 0, 7);
+
+/* MTMP - Management Temperature
+ * -----------------------------
+ * This register controls the settings of the temperature measurements
+ * and enables reading the temperature measurements. Note that temperature
+ * is in 0.125 degrees Celsius.
+ */
+#define MLXSW_REG_MTMP_ID 0x900A
+#define MLXSW_REG_MTMP_LEN 0x20
+
+MLXSW_REG_DEFINE(mtmp, MLXSW_REG_MTMP_ID, MLXSW_REG_MTMP_LEN);
+
+/* reg_mtmp_sensor_index
+ * Sensors index to access.
+ * 64-127 of sensor_index are mapped to the SFP+/QSFP modules sequentially
+ * (module 0 is mapped to sensor_index 64).
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 7);
+
+/* Convert to milli degrees Celsius */
+#define MLXSW_REG_MTMP_TEMP_TO_MC(val) (val * 125)
+
+/* reg_mtmp_temperature
+ * Temperature reading from the sensor. Reading is in 0.125 Celsius
+ * degrees units.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mtmp, temperature, 0x04, 0, 16);
+
+/* reg_mtmp_mte
+ * Max Temperature Enable - enables measuring the max temperature on a sensor.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtmp, mte, 0x08, 31, 1);
+
+/* reg_mtmp_mtr
+ * Max Temperature Reset - clears the value of the max temperature register.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, mtmp, mtr, 0x08, 30, 1);
+
+/* reg_mtmp_max_temperature
+ * The highest measured temperature from the sensor.
+ * When the bit mte is cleared, the field max_temperature is reserved.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mtmp, max_temperature, 0x08, 0, 16);
+
+/* reg_mtmp_tee
+ * Temperature Event Enable.
+ * 0 - Do not generate event
+ * 1 - Generate event
+ * 2 - Generate single event
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtmp, tee, 0x0C, 30, 2);
+
+#define MLXSW_REG_MTMP_THRESH_HI 0x348 /* 105 Celsius */
+
+/* reg_mtmp_temperature_threshold_hi
+ * High threshold for Temperature Warning Event. In 0.125 Celsius.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtmp, temperature_threshold_hi, 0x0C, 0, 16);
+
+/* reg_mtmp_temperature_threshold_lo
+ * Low threshold for Temperature Warning Event. In 0.125 Celsius.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtmp, temperature_threshold_lo, 0x10, 0, 16);
+
+#define MLXSW_REG_MTMP_SENSOR_NAME_SIZE 8
+
+/* reg_mtmp_sensor_name
+ * Sensor Name
+ * Access: RO
+ */
+MLXSW_ITEM_BUF(reg, mtmp, sensor_name, 0x18, MLXSW_REG_MTMP_SENSOR_NAME_SIZE);
+
+static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index,
+ bool max_temp_enable,
+ bool max_temp_reset)
+{
+ MLXSW_REG_ZERO(mtmp, payload);
+ mlxsw_reg_mtmp_sensor_index_set(payload, sensor_index);
+ mlxsw_reg_mtmp_mte_set(payload, max_temp_enable);
+ mlxsw_reg_mtmp_mtr_set(payload, max_temp_reset);
+ mlxsw_reg_mtmp_temperature_threshold_hi_set(payload,
+ MLXSW_REG_MTMP_THRESH_HI);
+}
+
+static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp,
+ unsigned int *p_max_temp,
+ char *sensor_name)
+{
+ u16 temp;
+
+ if (p_temp) {
+ temp = mlxsw_reg_mtmp_temperature_get(payload);
+ *p_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
+ }
+ if (p_max_temp) {
+ temp = mlxsw_reg_mtmp_max_temperature_get(payload);
+ *p_max_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
+ }
+ if (sensor_name)
+ mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name);
+}
+
+/* MCIA - Management Cable Info Access
+ * -----------------------------------
+ * MCIA register is used to access the SFP+ and QSFP connector's EPROM.
+ */
+
+#define MLXSW_REG_MCIA_ID 0x9014
+#define MLXSW_REG_MCIA_LEN 0x40
+
+MLXSW_REG_DEFINE(mcia, MLXSW_REG_MCIA_ID, MLXSW_REG_MCIA_LEN);
+
+/* reg_mcia_l
+ * Lock bit. Setting this bit will lock the access to the specific
+ * cable. Used for updating a full page in a cable EPROM. Any access
+ * other then subsequence writes will fail while the port is locked.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mcia, l, 0x00, 31, 1);
+
+/* reg_mcia_module
+ * Module number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mcia, module, 0x00, 16, 8);
+
+/* reg_mcia_status
+ * Module status.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mcia, status, 0x00, 0, 8);
+
+/* reg_mcia_i2c_device_address
+ * I2C device address.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mcia, i2c_device_address, 0x04, 24, 8);
+
+/* reg_mcia_page_number
+ * Page number.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mcia, page_number, 0x04, 16, 8);
+
+/* reg_mcia_device_address
+ * Device address.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mcia, device_address, 0x04, 0, 16);
+
+/* reg_mcia_size
+ * Number of bytes to read/write (up to 48 bytes).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mcia, size, 0x08, 0, 16);
+
+#define MLXSW_SP_REG_MCIA_EEPROM_SIZE 48
+
+/* reg_mcia_eeprom
+ * Bytes to read/write.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_SP_REG_MCIA_EEPROM_SIZE);
+
+static inline void mlxsw_reg_mcia_pack(char *payload, u8 module, u8 lock,
+ u8 page_number, u16 device_addr,
+ u8 size, u8 i2c_device_addr)
+{
+ MLXSW_REG_ZERO(mcia, payload);
+ mlxsw_reg_mcia_module_set(payload, module);
+ mlxsw_reg_mcia_l_set(payload, lock);
+ mlxsw_reg_mcia_page_number_set(payload, page_number);
+ mlxsw_reg_mcia_device_address_set(payload, device_addr);
+ mlxsw_reg_mcia_size_set(payload, size);
+ mlxsw_reg_mcia_i2c_device_address_set(payload, i2c_device_addr);
+}
+
+/* MPAT - Monitoring Port Analyzer Table
+ * -------------------------------------
+ * MPAT Register is used to query and configure the Switch PortAnalyzer Table.
+ * For an enabled analyzer, all fields except e (enable) cannot be modified.
+ */
+#define MLXSW_REG_MPAT_ID 0x901A
+#define MLXSW_REG_MPAT_LEN 0x78
+
+MLXSW_REG_DEFINE(mpat, MLXSW_REG_MPAT_ID, MLXSW_REG_MPAT_LEN);
+
+/* reg_mpat_pa_id
+ * Port Analyzer ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mpat, pa_id, 0x00, 28, 4);
+
+/* reg_mpat_system_port
+ * A unique port identifier for the final destination of the packet.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, system_port, 0x00, 0, 16);
+
+/* reg_mpat_e
+ * Enable. Indicating the Port Analyzer is enabled.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, e, 0x04, 31, 1);
+
+/* reg_mpat_qos
+ * Quality Of Service Mode.
+ * 0: CONFIGURED - QoS parameters (Switch Priority, and encapsulation
+ * PCP, DEI, DSCP or VL) are configured.
+ * 1: MAINTAIN - QoS parameters (Switch Priority, Color) are the
+ * same as in the original packet that has triggered the mirroring. For
+ * SPAN also the pcp,dei are maintained.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, qos, 0x04, 26, 1);
+
+/* reg_mpat_be
+ * Best effort mode. Indicates mirroring traffic should not cause packet
+ * drop or back pressure, but will discard the mirrored packets. Mirrored
+ * packets will be forwarded on a best effort manner.
+ * 0: Do not discard mirrored packets
+ * 1: Discard mirrored packets if causing congestion
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, be, 0x04, 25, 1);
+
+enum mlxsw_reg_mpat_span_type {
+ /* Local SPAN Ethernet.
+ * The original packet is not encapsulated.
+ */
+ MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH = 0x0,
+
+ /* Remote SPAN Ethernet VLAN.
+ * The packet is forwarded to the monitoring port on the monitoring
+ * VLAN.
+ */
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH = 0x1,
+
+ /* Encapsulated Remote SPAN Ethernet L3 GRE.
+ * The packet is encapsulated with GRE header.
+ */
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3 = 0x3,
+};
+
+/* reg_mpat_span_type
+ * SPAN type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, span_type, 0x04, 0, 4);
+
+/* Remote SPAN - Ethernet VLAN
+ * - - - - - - - - - - - - - -
+ */
+
+/* reg_mpat_eth_rspan_vid
+ * Encapsulation header VLAN ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_vid, 0x18, 0, 12);
+
+/* Encapsulated Remote SPAN - Ethernet L2
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_version {
+ MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER = 15,
+};
+
+/* reg_mpat_eth_rspan_version
+ * RSPAN mirror header version.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_version, 0x10, 18, 4);
+
+/* reg_mpat_eth_rspan_mac
+ * Destination MAC address.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_mac, 0x12, 6);
+
+/* reg_mpat_eth_rspan_tp
+ * Tag Packet. Indicates whether the mirroring header should be VLAN tagged.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_tp, 0x18, 16, 1);
+
+/* Encapsulated Remote SPAN - Ethernet L3
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_protocol {
+ MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4,
+ MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6,
+};
+
+/* reg_mpat_eth_rspan_protocol
+ * SPAN encapsulation protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_protocol, 0x18, 24, 4);
+
+/* reg_mpat_eth_rspan_ttl
+ * Encapsulation header Time-to-Live/HopLimit.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_ttl, 0x1C, 4, 8);
+
+/* reg_mpat_eth_rspan_smac
+ * Source MAC address
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_smac, 0x22, 6);
+
+/* reg_mpat_eth_rspan_dip*
+ * Destination IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_dip4, 0x4C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_dip6, 0x40, 16);
+
+/* reg_mpat_eth_rspan_sip*
+ * Source IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_sip4, 0x5C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_sip6, 0x50, 16);
+
+static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
+ u16 system_port, bool e,
+ enum mlxsw_reg_mpat_span_type span_type)
+{
+ MLXSW_REG_ZERO(mpat, payload);
+ mlxsw_reg_mpat_pa_id_set(payload, pa_id);
+ mlxsw_reg_mpat_system_port_set(payload, system_port);
+ mlxsw_reg_mpat_e_set(payload, e);
+ mlxsw_reg_mpat_qos_set(payload, 1);
+ mlxsw_reg_mpat_be_set(payload, 1);
+ mlxsw_reg_mpat_span_type_set(payload, span_type);
+}
+
+static inline void mlxsw_reg_mpat_eth_rspan_pack(char *payload, u16 vid)
+{
+ mlxsw_reg_mpat_eth_rspan_vid_set(payload, vid);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l2_pack(char *payload,
+ enum mlxsw_reg_mpat_eth_rspan_version version,
+ const char *mac,
+ bool tp)
+{
+ mlxsw_reg_mpat_eth_rspan_version_set(payload, version);
+ mlxsw_reg_mpat_eth_rspan_mac_memcpy_to(payload, mac);
+ mlxsw_reg_mpat_eth_rspan_tp_set(payload, tp);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(char *payload, u8 ttl,
+ const char *smac,
+ u32 sip, u32 dip)
+{
+ mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+ mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+ mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+ MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4);
+ mlxsw_reg_mpat_eth_rspan_sip4_set(payload, sip);
+ mlxsw_reg_mpat_eth_rspan_dip4_set(payload, dip);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(char *payload, u8 ttl,
+ const char *smac,
+ struct in6_addr sip, struct in6_addr dip)
+{
+ mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+ mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+ mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+ MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6);
+ mlxsw_reg_mpat_eth_rspan_sip6_memcpy_to(payload, (void *)&sip);
+ mlxsw_reg_mpat_eth_rspan_dip6_memcpy_to(payload, (void *)&dip);
+}
+
+/* MPAR - Monitoring Port Analyzer Register
+ * ----------------------------------------
+ * MPAR register is used to query and configure the port analyzer port mirroring
+ * properties.
+ */
+#define MLXSW_REG_MPAR_ID 0x901B
+#define MLXSW_REG_MPAR_LEN 0x08
+
+MLXSW_REG_DEFINE(mpar, MLXSW_REG_MPAR_ID, MLXSW_REG_MPAR_LEN);
+
+/* reg_mpar_local_port
+ * The local port to mirror the packets from.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mpar, local_port, 0x00, 16, 8);
+
+enum mlxsw_reg_mpar_i_e {
+ MLXSW_REG_MPAR_TYPE_EGRESS,
+ MLXSW_REG_MPAR_TYPE_INGRESS,
+};
+
+/* reg_mpar_i_e
+ * Ingress/Egress
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mpar, i_e, 0x00, 0, 4);
+
+/* reg_mpar_enable
+ * Enable mirroring
+ * By default, port mirroring is disabled for all ports.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpar, enable, 0x04, 31, 1);
+
+/* reg_mpar_pa_id
+ * Port Analyzer ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpar, pa_id, 0x04, 0, 4);
+
+static inline void mlxsw_reg_mpar_pack(char *payload, u8 local_port,
+ enum mlxsw_reg_mpar_i_e i_e,
+ bool enable, u8 pa_id)
+{
+ MLXSW_REG_ZERO(mpar, payload);
+ mlxsw_reg_mpar_local_port_set(payload, local_port);
+ mlxsw_reg_mpar_enable_set(payload, enable);
+ mlxsw_reg_mpar_i_e_set(payload, i_e);
+ mlxsw_reg_mpar_pa_id_set(payload, pa_id);
+}
+
+/* MRSR - Management Reset and Shutdown Register
+ * ---------------------------------------------
+ * MRSR register is used to reset or shutdown the switch or
+ * the entire system (when applicable).
+ */
+#define MLXSW_REG_MRSR_ID 0x9023
+#define MLXSW_REG_MRSR_LEN 0x08
+
+MLXSW_REG_DEFINE(mrsr, MLXSW_REG_MRSR_ID, MLXSW_REG_MRSR_LEN);
+
+/* reg_mrsr_command
+ * Reset/shutdown command
+ * 0 - do nothing
+ * 1 - software reset
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, mrsr, command, 0x00, 0, 4);
+
+static inline void mlxsw_reg_mrsr_pack(char *payload)
+{
+ MLXSW_REG_ZERO(mrsr, payload);
+ mlxsw_reg_mrsr_command_set(payload, 1);
+}
+
+/* MLCR - Management LED Control Register
+ * --------------------------------------
+ * Controls the system LEDs.
+ */
+#define MLXSW_REG_MLCR_ID 0x902B
+#define MLXSW_REG_MLCR_LEN 0x0C
+
+MLXSW_REG_DEFINE(mlcr, MLXSW_REG_MLCR_ID, MLXSW_REG_MLCR_LEN);
+
+/* reg_mlcr_local_port
+ * Local port number.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mlcr, local_port, 0x00, 16, 8);
+
+#define MLXSW_REG_MLCR_DURATION_MAX 0xFFFF
+
+/* reg_mlcr_beacon_duration
+ * Duration of the beacon to be active, in seconds.
+ * 0x0 - Will turn off the beacon.
+ * 0xFFFF - Will turn on the beacon until explicitly turned off.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mlcr, beacon_duration, 0x04, 0, 16);
+
+/* reg_mlcr_beacon_remain
+ * Remaining duration of the beacon, in seconds.
+ * 0xFFFF indicates an infinite amount of time.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mlcr, beacon_remain, 0x08, 0, 16);
+
+static inline void mlxsw_reg_mlcr_pack(char *payload, u8 local_port,
+ bool active)
+{
+ MLXSW_REG_ZERO(mlcr, payload);
+ mlxsw_reg_mlcr_local_port_set(payload, local_port);
+ mlxsw_reg_mlcr_beacon_duration_set(payload, active ?
+ MLXSW_REG_MLCR_DURATION_MAX : 0);
+}
+
+/* MCQI - Management Component Query Information
+ * ---------------------------------------------
+ * This register allows querying information about firmware components.
+ */
+#define MLXSW_REG_MCQI_ID 0x9061
+#define MLXSW_REG_MCQI_BASE_LEN 0x18
+#define MLXSW_REG_MCQI_CAP_LEN 0x14
+#define MLXSW_REG_MCQI_LEN (MLXSW_REG_MCQI_BASE_LEN + MLXSW_REG_MCQI_CAP_LEN)
+
+MLXSW_REG_DEFINE(mcqi, MLXSW_REG_MCQI_ID, MLXSW_REG_MCQI_LEN);
+
+/* reg_mcqi_component_index
+ * Index of the accessed component.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mcqi, component_index, 0x00, 0, 16);
+
+enum mlxfw_reg_mcqi_info_type {
+ MLXSW_REG_MCQI_INFO_TYPE_CAPABILITIES,
+};
+
+/* reg_mcqi_info_type
+ * Component properties set.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mcqi, info_type, 0x08, 0, 5);
+
+/* reg_mcqi_offset
+ * The requested/returned data offset from the section start, given in bytes.
+ * Must be DWORD aligned.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mcqi, offset, 0x10, 0, 32);
+
+/* reg_mcqi_data_size
+ * The requested/returned data size, given in bytes. If data_size is not DWORD
+ * aligned, the last bytes are zero padded.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mcqi, data_size, 0x14, 0, 16);
+
+/* reg_mcqi_cap_max_component_size
+ * Maximum size for this component, given in bytes.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mcqi, cap_max_component_size, 0x20, 0, 32);
+
+/* reg_mcqi_cap_log_mcda_word_size
+ * Log 2 of the access word size in bytes. Read and write access must be aligned
+ * to the word size. Write access must be done for an integer number of words.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mcqi, cap_log_mcda_word_size, 0x24, 28, 4);
+
+/* reg_mcqi_cap_mcda_max_write_size
+ * Maximal write size for MCDA register
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mcqi, cap_mcda_max_write_size, 0x24, 0, 16);
+
+static inline void mlxsw_reg_mcqi_pack(char *payload, u16 component_index)
+{
+ MLXSW_REG_ZERO(mcqi, payload);
+ mlxsw_reg_mcqi_component_index_set(payload, component_index);
+ mlxsw_reg_mcqi_info_type_set(payload,
+ MLXSW_REG_MCQI_INFO_TYPE_CAPABILITIES);
+ mlxsw_reg_mcqi_offset_set(payload, 0);
+ mlxsw_reg_mcqi_data_size_set(payload, MLXSW_REG_MCQI_CAP_LEN);
+}
+
+static inline void mlxsw_reg_mcqi_unpack(char *payload,
+ u32 *p_cap_max_component_size,
+ u8 *p_cap_log_mcda_word_size,
+ u16 *p_cap_mcda_max_write_size)
+{
+ *p_cap_max_component_size =
+ mlxsw_reg_mcqi_cap_max_component_size_get(payload);
+ *p_cap_log_mcda_word_size =
+ mlxsw_reg_mcqi_cap_log_mcda_word_size_get(payload);
+ *p_cap_mcda_max_write_size =
+ mlxsw_reg_mcqi_cap_mcda_max_write_size_get(payload);
+}
+
+/* MCC - Management Component Control
+ * ----------------------------------
+ * Controls the firmware component and updates the FSM.
+ */
+#define MLXSW_REG_MCC_ID 0x9062
+#define MLXSW_REG_MCC_LEN 0x1C
+
+MLXSW_REG_DEFINE(mcc, MLXSW_REG_MCC_ID, MLXSW_REG_MCC_LEN);
+
+enum mlxsw_reg_mcc_instruction {
+ MLXSW_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE = 0x01,
+ MLXSW_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE = 0x02,
+ MLXSW_REG_MCC_INSTRUCTION_UPDATE_COMPONENT = 0x03,
+ MLXSW_REG_MCC_INSTRUCTION_VERIFY_COMPONENT = 0x04,
+ MLXSW_REG_MCC_INSTRUCTION_ACTIVATE = 0x06,
+ MLXSW_REG_MCC_INSTRUCTION_CANCEL = 0x08,
+};
+
+/* reg_mcc_instruction
+ * Command to be executed by the FSM.
+ * Applicable for write operation only.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mcc, instruction, 0x00, 0, 8);
+
+/* reg_mcc_component_index
+ * Index of the accessed component. Applicable only for commands that
+ * refer to components. Otherwise, this field is reserved.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mcc, component_index, 0x04, 0, 16);
+
+/* reg_mcc_update_handle
+ * Token representing the current flow executed by the FSM.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, mcc, update_handle, 0x08, 0, 24);
+
+/* reg_mcc_error_code
+ * Indicates the successful completion of the instruction, or the reason it
+ * failed
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mcc, error_code, 0x0C, 8, 8);
+
+/* reg_mcc_control_state
+ * Current FSM state
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mcc, control_state, 0x0C, 0, 4);
+
+/* reg_mcc_component_size
+ * Component size in bytes. Valid for UPDATE_COMPONENT instruction. Specifying
+ * the size may shorten the update time. Value 0x0 means that size is
+ * unspecified.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, mcc, component_size, 0x10, 0, 32);
+
+static inline void mlxsw_reg_mcc_pack(char *payload,
+ enum mlxsw_reg_mcc_instruction instr,
+ u16 component_index, u32 update_handle,
+ u32 component_size)
+{
+ MLXSW_REG_ZERO(mcc, payload);
+ mlxsw_reg_mcc_instruction_set(payload, instr);
+ mlxsw_reg_mcc_component_index_set(payload, component_index);
+ mlxsw_reg_mcc_update_handle_set(payload, update_handle);
+ mlxsw_reg_mcc_component_size_set(payload, component_size);
+}
+
+static inline void mlxsw_reg_mcc_unpack(char *payload, u32 *p_update_handle,
+ u8 *p_error_code, u8 *p_control_state)
+{
+ if (p_update_handle)
+ *p_update_handle = mlxsw_reg_mcc_update_handle_get(payload);
+ if (p_error_code)
+ *p_error_code = mlxsw_reg_mcc_error_code_get(payload);
+ if (p_control_state)
+ *p_control_state = mlxsw_reg_mcc_control_state_get(payload);
+}
+
+/* MCDA - Management Component Data Access
+ * ---------------------------------------
+ * This register allows reading and writing a firmware component.
+ */
+#define MLXSW_REG_MCDA_ID 0x9063
+#define MLXSW_REG_MCDA_BASE_LEN 0x10
+#define MLXSW_REG_MCDA_MAX_DATA_LEN 0x80
+#define MLXSW_REG_MCDA_LEN \
+ (MLXSW_REG_MCDA_BASE_LEN + MLXSW_REG_MCDA_MAX_DATA_LEN)
+
+MLXSW_REG_DEFINE(mcda, MLXSW_REG_MCDA_ID, MLXSW_REG_MCDA_LEN);
+
+/* reg_mcda_update_handle
+ * Token representing the current flow executed by the FSM.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mcda, update_handle, 0x00, 0, 24);
+
+/* reg_mcda_offset
+ * Offset of accessed address relative to component start. Accesses must be in
+ * accordance to log_mcda_word_size in MCQI reg.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mcda, offset, 0x04, 0, 32);
+
+/* reg_mcda_size
+ * Size of the data accessed, given in bytes.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mcda, size, 0x08, 0, 16);
+
+/* reg_mcda_data
+ * Data block accessed.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, mcda, data, 0x10, 0, 32, 4, 0, false);
+
+static inline void mlxsw_reg_mcda_pack(char *payload, u32 update_handle,
+ u32 offset, u16 size, u8 *data)
+{
+ int i;
+
+ MLXSW_REG_ZERO(mcda, payload);
+ mlxsw_reg_mcda_update_handle_set(payload, update_handle);
+ mlxsw_reg_mcda_offset_set(payload, offset);
+ mlxsw_reg_mcda_size_set(payload, size);
+
+ for (i = 0; i < size / 4; i++)
+ mlxsw_reg_mcda_data_set(payload, i, *(u32 *) &data[i * 4]);
+}
+
+/* MPSC - Monitoring Packet Sampling Configuration Register
+ * --------------------------------------------------------
+ * MPSC Register is used to configure the Packet Sampling mechanism.
+ */
+#define MLXSW_REG_MPSC_ID 0x9080
+#define MLXSW_REG_MPSC_LEN 0x1C
+
+MLXSW_REG_DEFINE(mpsc, MLXSW_REG_MPSC_ID, MLXSW_REG_MPSC_LEN);
+
+/* reg_mpsc_local_port
+ * Local port number
+ * Not supported for CPU port
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mpsc, local_port, 0x00, 16, 8);
+
+/* reg_mpsc_e
+ * Enable sampling on port local_port
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpsc, e, 0x04, 30, 1);
+
+#define MLXSW_REG_MPSC_RATE_MAX 3500000000UL
+
+/* reg_mpsc_rate
+ * Sampling rate = 1 out of rate packets (with randomization around
+ * the point). Valid values are: 1 to MLXSW_REG_MPSC_RATE_MAX
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpsc, rate, 0x08, 0, 32);
+
+static inline void mlxsw_reg_mpsc_pack(char *payload, u8 local_port, bool e,
+ u32 rate)
+{
+ MLXSW_REG_ZERO(mpsc, payload);
+ mlxsw_reg_mpsc_local_port_set(payload, local_port);
+ mlxsw_reg_mpsc_e_set(payload, e);
+ mlxsw_reg_mpsc_rate_set(payload, rate);
+}
+
+/* MGPC - Monitoring General Purpose Counter Set Register
+ * The MGPC register retrieves and sets the General Purpose Counter Set.
+ */
+#define MLXSW_REG_MGPC_ID 0x9081
+#define MLXSW_REG_MGPC_LEN 0x18
+
+MLXSW_REG_DEFINE(mgpc, MLXSW_REG_MGPC_ID, MLXSW_REG_MGPC_LEN);
+
+/* reg_mgpc_counter_set_type
+ * Counter set type.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, mgpc, counter_set_type, 0x00, 24, 8);
+
+/* reg_mgpc_counter_index
+ * Counter index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mgpc, counter_index, 0x00, 0, 24);
+
+enum mlxsw_reg_mgpc_opcode {
+ /* Nop */
+ MLXSW_REG_MGPC_OPCODE_NOP = 0x00,
+ /* Clear counters */
+ MLXSW_REG_MGPC_OPCODE_CLEAR = 0x08,
+};
+
+/* reg_mgpc_opcode
+ * Opcode.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, mgpc, opcode, 0x04, 28, 4);
+
+/* reg_mgpc_byte_counter
+ * Byte counter value.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, mgpc, byte_counter, 0x08, 0, 64);
+
+/* reg_mgpc_packet_counter
+ * Packet counter value.
+ * Access: RW
+ */
+MLXSW_ITEM64(reg, mgpc, packet_counter, 0x10, 0, 64);
+
+static inline void mlxsw_reg_mgpc_pack(char *payload, u32 counter_index,
+ enum mlxsw_reg_mgpc_opcode opcode,
+ enum mlxsw_reg_flow_counter_set_type set_type)
+{
+ MLXSW_REG_ZERO(mgpc, payload);
+ mlxsw_reg_mgpc_counter_index_set(payload, counter_index);
+ mlxsw_reg_mgpc_counter_set_type_set(payload, set_type);
+ mlxsw_reg_mgpc_opcode_set(payload, opcode);
+}
+
+/* TIGCR - Tunneling IPinIP General Configuration Register
+ * -------------------------------------------------------
+ * The TIGCR register is used for setting up the IPinIP Tunnel configuration.
+ */
+#define MLXSW_REG_TIGCR_ID 0xA801
+#define MLXSW_REG_TIGCR_LEN 0x10
+
+MLXSW_REG_DEFINE(tigcr, MLXSW_REG_TIGCR_ID, MLXSW_REG_TIGCR_LEN);
+
+/* reg_tigcr_ipip_ttlc
+ * For IPinIP Tunnel encapsulation: whether to copy the ttl from the packet
+ * header.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tigcr, ttlc, 0x04, 8, 1);
+
+/* reg_tigcr_ipip_ttl_uc
+ * The TTL for IPinIP Tunnel encapsulation of unicast packets if
+ * reg_tigcr_ipip_ttlc is unset.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tigcr, ttl_uc, 0x04, 0, 8);
+
+static inline void mlxsw_reg_tigcr_pack(char *payload, bool ttlc, u8 ttl_uc)
+{
+ MLXSW_REG_ZERO(tigcr, payload);
+ mlxsw_reg_tigcr_ttlc_set(payload, ttlc);
+ mlxsw_reg_tigcr_ttl_uc_set(payload, ttl_uc);
+}
+
+/* SBPR - Shared Buffer Pools Register
+ * -----------------------------------
+ * The SBPR configures and retrieves the shared buffer pools and configuration.
+ */
+#define MLXSW_REG_SBPR_ID 0xB001
+#define MLXSW_REG_SBPR_LEN 0x14
+
+MLXSW_REG_DEFINE(sbpr, MLXSW_REG_SBPR_ID, MLXSW_REG_SBPR_LEN);
+
+/* shared direstion enum for SBPR, SBCM, SBPM */
+enum mlxsw_reg_sbxx_dir {
+ MLXSW_REG_SBXX_DIR_INGRESS,
+ MLXSW_REG_SBXX_DIR_EGRESS,
+};
+
+/* reg_sbpr_dir
+ * Direction.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sbpr, dir, 0x00, 24, 2);
+
+/* reg_sbpr_pool
+ * Pool index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sbpr, pool, 0x00, 0, 4);
+
+/* reg_sbpr_size
+ * Pool size in buffer cells.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sbpr, size, 0x04, 0, 24);
+
+enum mlxsw_reg_sbpr_mode {
+ MLXSW_REG_SBPR_MODE_STATIC,
+ MLXSW_REG_SBPR_MODE_DYNAMIC,
+};
+
+/* reg_sbpr_mode
+ * Pool quota calculation mode.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sbpr, mode, 0x08, 0, 4);
+
+static inline void mlxsw_reg_sbpr_pack(char *payload, u8 pool,
+ enum mlxsw_reg_sbxx_dir dir,
+ enum mlxsw_reg_sbpr_mode mode, u32 size)
+{
+ MLXSW_REG_ZERO(sbpr, payload);
+ mlxsw_reg_sbpr_pool_set(payload, pool);
+ mlxsw_reg_sbpr_dir_set(payload, dir);
+ mlxsw_reg_sbpr_mode_set(payload, mode);
+ mlxsw_reg_sbpr_size_set(payload, size);
+}
+
+/* SBCM - Shared Buffer Class Management Register
+ * ----------------------------------------------
+ * The SBCM register configures and retrieves the shared buffer allocation
+ * and configuration according to Port-PG, including the binding to pool
+ * and definition of the associated quota.
+ */
+#define MLXSW_REG_SBCM_ID 0xB002
+#define MLXSW_REG_SBCM_LEN 0x28
+
+MLXSW_REG_DEFINE(sbcm, MLXSW_REG_SBCM_ID, MLXSW_REG_SBCM_LEN);
+
+/* reg_sbcm_local_port
+ * Local port number.
+ * For Ingress: excludes CPU port and Router port
+ * For Egress: excludes IP Router
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sbcm, local_port, 0x00, 16, 8);
+
+/* reg_sbcm_pg_buff
+ * PG buffer - Port PG (dir=ingress) / traffic class (dir=egress)
+ * For PG buffer: range is 0..cap_max_pg_buffers - 1
+ * For traffic class: range is 0..cap_max_tclass - 1
+ * Note that when traffic class is in MC aware mode then the traffic
+ * classes which are MC aware cannot be configured.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sbcm, pg_buff, 0x00, 8, 6);
+
+/* reg_sbcm_dir
+ * Direction.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sbcm, dir, 0x00, 0, 2);
+
+/* reg_sbcm_min_buff
+ * Minimum buffer size for the limiter, in cells.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sbcm, min_buff, 0x18, 0, 24);
+
+/* shared max_buff limits for dynamic threshold for SBCM, SBPM */
+#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN 1
+#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX 14
+
+/* reg_sbcm_max_buff
+ * When the pool associated to the port-pg/tclass is configured to
+ * static, Maximum buffer size for the limiter configured in cells.
+ * When the pool associated to the port-pg/tclass is configured to
+ * dynamic, the max_buff holds the "alpha" parameter, supporting
+ * the following values:
+ * 0: 0
+ * i: (1/128)*2^(i-1), for i=1..14
+ * 0xFF: Infinity
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sbcm, max_buff, 0x1C, 0, 24);
+
+/* reg_sbcm_pool
+ * Association of the port-priority to a pool.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sbcm, pool, 0x24, 0, 4);
+
+static inline void mlxsw_reg_sbcm_pack(char *payload, u8 local_port, u8 pg_buff,
+ enum mlxsw_reg_sbxx_dir dir,
+ u32 min_buff, u32 max_buff, u8 pool)
+{
+ MLXSW_REG_ZERO(sbcm, payload);
+ mlxsw_reg_sbcm_local_port_set(payload, local_port);
+ mlxsw_reg_sbcm_pg_buff_set(payload, pg_buff);
+ mlxsw_reg_sbcm_dir_set(payload, dir);
+ mlxsw_reg_sbcm_min_buff_set(payload, min_buff);
+ mlxsw_reg_sbcm_max_buff_set(payload, max_buff);
+ mlxsw_reg_sbcm_pool_set(payload, pool);
+}
+
+/* SBPM - Shared Buffer Port Management Register
+ * ---------------------------------------------
+ * The SBPM register configures and retrieves the shared buffer allocation
+ * and configuration according to Port-Pool, including the definition
+ * of the associated quota.
+ */
+#define MLXSW_REG_SBPM_ID 0xB003
+#define MLXSW_REG_SBPM_LEN 0x28
+
+MLXSW_REG_DEFINE(sbpm, MLXSW_REG_SBPM_ID, MLXSW_REG_SBPM_LEN);
+
+/* reg_sbpm_local_port
+ * Local port number.
+ * For Ingress: excludes CPU port and Router port
+ * For Egress: excludes IP Router
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sbpm, local_port, 0x00, 16, 8);
+
+/* reg_sbpm_pool
+ * The pool associated to quota counting on the local_port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sbpm, pool, 0x00, 8, 4);
+
+/* reg_sbpm_dir
+ * Direction.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sbpm, dir, 0x00, 0, 2);
+
+/* reg_sbpm_buff_occupancy
+ * Current buffer occupancy in cells.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, sbpm, buff_occupancy, 0x10, 0, 24);
+
+/* reg_sbpm_clr
+ * Clear Max Buffer Occupancy
+ * When this bit is set, max_buff_occupancy field is cleared (and a
+ * new max value is tracked from the time the clear was performed).
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, sbpm, clr, 0x14, 31, 1);
+
+/* reg_sbpm_max_buff_occupancy
+ * Maximum value of buffer occupancy in cells monitored. Cleared by
+ * writing to the clr field.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, sbpm, max_buff_occupancy, 0x14, 0, 24);
+
+/* reg_sbpm_min_buff
+ * Minimum buffer size for the limiter, in cells.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sbpm, min_buff, 0x18, 0, 24);
+
+/* reg_sbpm_max_buff
+ * When the pool associated to the port-pg/tclass is configured to
+ * static, Maximum buffer size for the limiter configured in cells.
+ * When the pool associated to the port-pg/tclass is configured to
+ * dynamic, the max_buff holds the "alpha" parameter, supporting
+ * the following values:
+ * 0: 0
+ * i: (1/128)*2^(i-1), for i=1..14
+ * 0xFF: Infinity
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sbpm, max_buff, 0x1C, 0, 24);
+
+static inline void mlxsw_reg_sbpm_pack(char *payload, u8 local_port, u8 pool,
+ enum mlxsw_reg_sbxx_dir dir, bool clr,
+ u32 min_buff, u32 max_buff)
+{
+ MLXSW_REG_ZERO(sbpm, payload);
+ mlxsw_reg_sbpm_local_port_set(payload, local_port);
+ mlxsw_reg_sbpm_pool_set(payload, pool);
+ mlxsw_reg_sbpm_dir_set(payload, dir);
+ mlxsw_reg_sbpm_clr_set(payload, clr);
+ mlxsw_reg_sbpm_min_buff_set(payload, min_buff);
+ mlxsw_reg_sbpm_max_buff_set(payload, max_buff);
+}
+
+static inline void mlxsw_reg_sbpm_unpack(char *payload, u32 *p_buff_occupancy,
+ u32 *p_max_buff_occupancy)
+{
+ *p_buff_occupancy = mlxsw_reg_sbpm_buff_occupancy_get(payload);
+ *p_max_buff_occupancy = mlxsw_reg_sbpm_max_buff_occupancy_get(payload);
+}
+
+/* SBMM - Shared Buffer Multicast Management Register
+ * --------------------------------------------------
+ * The SBMM register configures and retrieves the shared buffer allocation
+ * and configuration for MC packets according to Switch-Priority, including
+ * the binding to pool and definition of the associated quota.
+ */
+#define MLXSW_REG_SBMM_ID 0xB004
+#define MLXSW_REG_SBMM_LEN 0x28
+
+MLXSW_REG_DEFINE(sbmm, MLXSW_REG_SBMM_ID, MLXSW_REG_SBMM_LEN);
+
+/* reg_sbmm_prio
+ * Switch Priority.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sbmm, prio, 0x00, 8, 4);
+
+/* reg_sbmm_min_buff
+ * Minimum buffer size for the limiter, in cells.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sbmm, min_buff, 0x18, 0, 24);
+
+/* reg_sbmm_max_buff
+ * When the pool associated to the port-pg/tclass is configured to
+ * static, Maximum buffer size for the limiter configured in cells.
+ * When the pool associated to the port-pg/tclass is configured to
+ * dynamic, the max_buff holds the "alpha" parameter, supporting
+ * the following values:
+ * 0: 0
+ * i: (1/128)*2^(i-1), for i=1..14
+ * 0xFF: Infinity
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sbmm, max_buff, 0x1C, 0, 24);
+
+/* reg_sbmm_pool
+ * Association of the port-priority to a pool.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sbmm, pool, 0x24, 0, 4);
+
+static inline void mlxsw_reg_sbmm_pack(char *payload, u8 prio, u32 min_buff,
+ u32 max_buff, u8 pool)
+{
+ MLXSW_REG_ZERO(sbmm, payload);
+ mlxsw_reg_sbmm_prio_set(payload, prio);
+ mlxsw_reg_sbmm_min_buff_set(payload, min_buff);
+ mlxsw_reg_sbmm_max_buff_set(payload, max_buff);
+ mlxsw_reg_sbmm_pool_set(payload, pool);
+}
+
+/* SBSR - Shared Buffer Status Register
+ * ------------------------------------
+ * The SBSR register retrieves the shared buffer occupancy according to
+ * Port-Pool. Note that this register enables reading a large amount of data.
+ * It is the user's responsibility to limit the amount of data to ensure the
+ * response can match the maximum transfer unit. In case the response exceeds
+ * the maximum transport unit, it will be truncated with no special notice.
+ */
+#define MLXSW_REG_SBSR_ID 0xB005
+#define MLXSW_REG_SBSR_BASE_LEN 0x5C /* base length, without records */
+#define MLXSW_REG_SBSR_REC_LEN 0x8 /* record length */
+#define MLXSW_REG_SBSR_REC_MAX_COUNT 120
+#define MLXSW_REG_SBSR_LEN (MLXSW_REG_SBSR_BASE_LEN + \
+ MLXSW_REG_SBSR_REC_LEN * \
+ MLXSW_REG_SBSR_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(sbsr, MLXSW_REG_SBSR_ID, MLXSW_REG_SBSR_LEN);
+
+/* reg_sbsr_clr
+ * Clear Max Buffer Occupancy. When this bit is set, the max_buff_occupancy
+ * field is cleared (and a new max value is tracked from the time the clear
+ * was performed).
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, sbsr, clr, 0x00, 31, 1);
+
+/* reg_sbsr_ingress_port_mask
+ * Bit vector for all ingress network ports.
+ * Indicates which of the ports (for which the relevant bit is set)
+ * are affected by the set operation. Configuration of any other port
+ * does not change.
+ * Access: Index
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, sbsr, ingress_port_mask, 0x10, 0x20, 1);
+
+/* reg_sbsr_pg_buff_mask
+ * Bit vector for all switch priority groups.
+ * Indicates which of the priorities (for which the relevant bit is set)
+ * are affected by the set operation. Configuration of any other priority
+ * does not change.
+ * Range is 0..cap_max_pg_buffers - 1
+ * Access: Index
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, sbsr, pg_buff_mask, 0x30, 0x4, 1);
+
+/* reg_sbsr_egress_port_mask
+ * Bit vector for all egress network ports.
+ * Indicates which of the ports (for which the relevant bit is set)
+ * are affected by the set operation. Configuration of any other port
+ * does not change.
+ * Access: Index
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, sbsr, egress_port_mask, 0x34, 0x20, 1);
+
+/* reg_sbsr_tclass_mask
+ * Bit vector for all traffic classes.
+ * Indicates which of the traffic classes (for which the relevant bit is
+ * set) are affected by the set operation. Configuration of any other
+ * traffic class does not change.
+ * Range is 0..cap_max_tclass - 1
+ * Access: Index
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, sbsr, tclass_mask, 0x54, 0x8, 1);
+
+static inline void mlxsw_reg_sbsr_pack(char *payload, bool clr)
+{
+ MLXSW_REG_ZERO(sbsr, payload);
+ mlxsw_reg_sbsr_clr_set(payload, clr);
+}
+
+/* reg_sbsr_rec_buff_occupancy
+ * Current buffer occupancy in cells.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, sbsr, rec_buff_occupancy, MLXSW_REG_SBSR_BASE_LEN,
+ 0, 24, MLXSW_REG_SBSR_REC_LEN, 0x00, false);
+
+/* reg_sbsr_rec_max_buff_occupancy
+ * Maximum value of buffer occupancy in cells monitored. Cleared by
+ * writing to the clr field.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, sbsr, rec_max_buff_occupancy, MLXSW_REG_SBSR_BASE_LEN,
+ 0, 24, MLXSW_REG_SBSR_REC_LEN, 0x04, false);
+
+static inline void mlxsw_reg_sbsr_rec_unpack(char *payload, int rec_index,
+ u32 *p_buff_occupancy,
+ u32 *p_max_buff_occupancy)
+{
+ *p_buff_occupancy =
+ mlxsw_reg_sbsr_rec_buff_occupancy_get(payload, rec_index);
+ *p_max_buff_occupancy =
+ mlxsw_reg_sbsr_rec_max_buff_occupancy_get(payload, rec_index);
+}
+
+/* SBIB - Shared Buffer Internal Buffer Register
+ * ---------------------------------------------
+ * The SBIB register configures per port buffers for internal use. The internal
+ * buffers consume memory on the port buffers (note that the port buffers are
+ * used also by PBMC).
+ *
+ * For Spectrum this is used for egress mirroring.
+ */
+#define MLXSW_REG_SBIB_ID 0xB006
+#define MLXSW_REG_SBIB_LEN 0x10
+
+MLXSW_REG_DEFINE(sbib, MLXSW_REG_SBIB_ID, MLXSW_REG_SBIB_LEN);
+
+/* reg_sbib_local_port
+ * Local port number
+ * Not supported for CPU port and router port
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sbib, local_port, 0x00, 16, 8);
+
+/* reg_sbib_buff_size
+ * Units represented in cells
+ * Allowed range is 0 to (cap_max_headroom_size - 1)
+ * Default is 0
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sbib, buff_size, 0x08, 0, 24);
+
+static inline void mlxsw_reg_sbib_pack(char *payload, u8 local_port,
+ u32 buff_size)
+{
+ MLXSW_REG_ZERO(sbib, payload);
+ mlxsw_reg_sbib_local_port_set(payload, local_port);
+ mlxsw_reg_sbib_buff_size_set(payload, buff_size);
+}
+
+static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
+ MLXSW_REG(sgcr),
+ MLXSW_REG(spad),
+ MLXSW_REG(smid),
+ MLXSW_REG(sspr),
+ MLXSW_REG(sfdat),
+ MLXSW_REG(sfd),
+ MLXSW_REG(sfn),
+ MLXSW_REG(spms),
+ MLXSW_REG(spvid),
+ MLXSW_REG(spvm),
+ MLXSW_REG(spaft),
+ MLXSW_REG(sfgc),
+ MLXSW_REG(sftr),
+ MLXSW_REG(sfdf),
+ MLXSW_REG(sldr),
+ MLXSW_REG(slcr),
+ MLXSW_REG(slcor),
+ MLXSW_REG(spmlr),
+ MLXSW_REG(svfa),
+ MLXSW_REG(svpe),
+ MLXSW_REG(sfmr),
+ MLXSW_REG(spvmlr),
+ MLXSW_REG(cwtp),
+ MLXSW_REG(cwtpm),
+ MLXSW_REG(pgcr),
+ MLXSW_REG(ppbt),
+ MLXSW_REG(pacl),
+ MLXSW_REG(pagt),
+ MLXSW_REG(ptar),
+ MLXSW_REG(ppbs),
+ MLXSW_REG(prcr),
+ MLXSW_REG(pefa),
+ MLXSW_REG(ptce2),
+ MLXSW_REG(perpt),
+ MLXSW_REG(perar),
+ MLXSW_REG(ptce3),
+ MLXSW_REG(percr),
+ MLXSW_REG(pererp),
+ MLXSW_REG(iedr),
+ MLXSW_REG(qpts),
+ MLXSW_REG(qpcr),
+ MLXSW_REG(qtct),
+ MLXSW_REG(qeec),
+ MLXSW_REG(qrwe),
+ MLXSW_REG(qpdsm),
+ MLXSW_REG(qpdpm),
+ MLXSW_REG(qtctm),
+ MLXSW_REG(pmlp),
+ MLXSW_REG(pmtu),
+ MLXSW_REG(ptys),
+ MLXSW_REG(ppad),
+ MLXSW_REG(paos),
+ MLXSW_REG(pfcc),
+ MLXSW_REG(ppcnt),
+ MLXSW_REG(plib),
+ MLXSW_REG(pptb),
+ MLXSW_REG(pbmc),
+ MLXSW_REG(pspa),
+ MLXSW_REG(htgt),
+ MLXSW_REG(hpkt),
+ MLXSW_REG(rgcr),
+ MLXSW_REG(ritr),
+ MLXSW_REG(rtar),
+ MLXSW_REG(ratr),
+ MLXSW_REG(rtdp),
+ MLXSW_REG(rdpm),
+ MLXSW_REG(ricnt),
+ MLXSW_REG(rrcr),
+ MLXSW_REG(ralta),
+ MLXSW_REG(ralst),
+ MLXSW_REG(raltb),
+ MLXSW_REG(ralue),
+ MLXSW_REG(rauht),
+ MLXSW_REG(raleu),
+ MLXSW_REG(rauhtd),
+ MLXSW_REG(rigr2),
+ MLXSW_REG(recr2),
+ MLXSW_REG(rmft2),
+ MLXSW_REG(mfcr),
+ MLXSW_REG(mfsc),
+ MLXSW_REG(mfsm),
+ MLXSW_REG(mfsl),
+ MLXSW_REG(mtcap),
+ MLXSW_REG(mtmp),
+ MLXSW_REG(mcia),
+ MLXSW_REG(mpat),
+ MLXSW_REG(mpar),
+ MLXSW_REG(mrsr),
+ MLXSW_REG(mlcr),
+ MLXSW_REG(mpsc),
+ MLXSW_REG(mcqi),
+ MLXSW_REG(mcc),
+ MLXSW_REG(mcda),
+ MLXSW_REG(mgpc),
+ MLXSW_REG(tigcr),
+ MLXSW_REG(sbpr),
+ MLXSW_REG(sbcm),
+ MLXSW_REG(sbpm),
+ MLXSW_REG(sbmm),
+ MLXSW_REG(sbsr),
+ MLXSW_REG(sbib),
+};
+
+static inline const char *mlxsw_reg_id_str(u16 reg_id)
+{
+ const struct mlxsw_reg_info *reg_info;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mlxsw_reg_infos); i++) {
+ reg_info = mlxsw_reg_infos[i];
+ if (reg_info->id == reg_id)
+ return reg_info->name;
+ }
+ return "*UNKNOWN*";
+}
+
+/* PUDE - Port Up / Down Event
+ * ---------------------------
+ * Reports the operational state change of a port.
+ */
+#define MLXSW_REG_PUDE_LEN 0x10
+
+/* reg_pude_swid
+ * Switch partition ID with which to associate the port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pude, swid, 0x00, 24, 8);
+
+/* reg_pude_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pude, local_port, 0x00, 16, 8);
+
+/* reg_pude_admin_status
+ * Port administrative state (the desired state).
+ * 1 - Up.
+ * 2 - Down.
+ * 3 - Up once. This means that in case of link failure, the port won't go
+ * into polling mode, but will wait to be re-enabled by software.
+ * 4 - Disabled by system. Can only be set by hardware.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, pude, admin_status, 0x00, 8, 4);
+
+/* reg_pude_oper_status
+ * Port operatioanl state.
+ * 1 - Up.
+ * 2 - Down.
+ * 3 - Down by port failure. This means that the device will not let the
+ * port up again until explicitly specified by software.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, pude, oper_status, 0x00, 0, 4);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h
new file mode 100644
index 000000000..79a31de7c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_RESOURCES_H
+#define _MLXSW_RESOURCES_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+enum mlxsw_res_id {
+ MLXSW_RES_ID_KVD_SIZE,
+ MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE,
+ MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE,
+ MLXSW_RES_ID_MAX_KVD_LINEAR_RANGE,
+ MLXSW_RES_ID_MAX_KVD_ACTION_SETS,
+ MLXSW_RES_ID_MAX_TRAP_GROUPS,
+ MLXSW_RES_ID_CQE_V0,
+ MLXSW_RES_ID_CQE_V1,
+ MLXSW_RES_ID_CQE_V2,
+ MLXSW_RES_ID_COUNTER_POOL_SIZE,
+ MLXSW_RES_ID_MAX_SPAN,
+ MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES,
+ MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC,
+ MLXSW_RES_ID_MAX_SYSTEM_PORT,
+ MLXSW_RES_ID_MAX_LAG,
+ MLXSW_RES_ID_MAX_LAG_MEMBERS,
+ MLXSW_RES_ID_MAX_BUFFER_SIZE,
+ MLXSW_RES_ID_CELL_SIZE,
+ MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS,
+ MLXSW_RES_ID_ACL_MAX_TCAM_RULES,
+ MLXSW_RES_ID_ACL_MAX_REGIONS,
+ MLXSW_RES_ID_ACL_MAX_GROUPS,
+ MLXSW_RES_ID_ACL_MAX_GROUP_SIZE,
+ MLXSW_RES_ID_ACL_FLEX_KEYS,
+ MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE,
+ MLXSW_RES_ID_ACL_ACTIONS_PER_SET,
+ MLXSW_RES_ID_ACL_MAX_ERPT_BANKS,
+ MLXSW_RES_ID_ACL_MAX_ERPT_BANK_SIZE,
+ MLXSW_RES_ID_ACL_MAX_LARGE_KEY_ID,
+ MLXSW_RES_ID_ACL_ERPT_ENTRIES_2KB,
+ MLXSW_RES_ID_ACL_ERPT_ENTRIES_4KB,
+ MLXSW_RES_ID_ACL_ERPT_ENTRIES_8KB,
+ MLXSW_RES_ID_ACL_ERPT_ENTRIES_12KB,
+ MLXSW_RES_ID_MAX_CPU_POLICERS,
+ MLXSW_RES_ID_MAX_VRS,
+ MLXSW_RES_ID_MAX_RIFS,
+ MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES,
+ MLXSW_RES_ID_MAX_LPM_TREES,
+
+ /* Internal resources.
+ * Determined by the SW, not queried from the HW.
+ */
+ MLXSW_RES_ID_KVD_SINGLE_SIZE,
+ MLXSW_RES_ID_KVD_DOUBLE_SIZE,
+ MLXSW_RES_ID_KVD_LINEAR_SIZE,
+
+ __MLXSW_RES_ID_MAX,
+};
+
+static u16 mlxsw_res_ids[] = {
+ [MLXSW_RES_ID_KVD_SIZE] = 0x1001,
+ [MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE] = 0x1002,
+ [MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE] = 0x1003,
+ [MLXSW_RES_ID_MAX_KVD_LINEAR_RANGE] = 0x1005,
+ [MLXSW_RES_ID_MAX_KVD_ACTION_SETS] = 0x1007,
+ [MLXSW_RES_ID_MAX_TRAP_GROUPS] = 0x2201,
+ [MLXSW_RES_ID_CQE_V0] = 0x2210,
+ [MLXSW_RES_ID_CQE_V1] = 0x2211,
+ [MLXSW_RES_ID_CQE_V2] = 0x2212,
+ [MLXSW_RES_ID_COUNTER_POOL_SIZE] = 0x2410,
+ [MLXSW_RES_ID_MAX_SPAN] = 0x2420,
+ [MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES] = 0x2443,
+ [MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC] = 0x2449,
+ [MLXSW_RES_ID_MAX_SYSTEM_PORT] = 0x2502,
+ [MLXSW_RES_ID_MAX_LAG] = 0x2520,
+ [MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521,
+ [MLXSW_RES_ID_MAX_BUFFER_SIZE] = 0x2802, /* Bytes */
+ [MLXSW_RES_ID_CELL_SIZE] = 0x2803, /* Bytes */
+ [MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS] = 0x2901,
+ [MLXSW_RES_ID_ACL_MAX_TCAM_RULES] = 0x2902,
+ [MLXSW_RES_ID_ACL_MAX_REGIONS] = 0x2903,
+ [MLXSW_RES_ID_ACL_MAX_GROUPS] = 0x2904,
+ [MLXSW_RES_ID_ACL_MAX_GROUP_SIZE] = 0x2905,
+ [MLXSW_RES_ID_ACL_FLEX_KEYS] = 0x2910,
+ [MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE] = 0x2911,
+ [MLXSW_RES_ID_ACL_ACTIONS_PER_SET] = 0x2912,
+ [MLXSW_RES_ID_ACL_MAX_ERPT_BANKS] = 0x2940,
+ [MLXSW_RES_ID_ACL_MAX_ERPT_BANK_SIZE] = 0x2941,
+ [MLXSW_RES_ID_ACL_MAX_LARGE_KEY_ID] = 0x2942,
+ [MLXSW_RES_ID_ACL_ERPT_ENTRIES_2KB] = 0x2950,
+ [MLXSW_RES_ID_ACL_ERPT_ENTRIES_4KB] = 0x2951,
+ [MLXSW_RES_ID_ACL_ERPT_ENTRIES_8KB] = 0x2952,
+ [MLXSW_RES_ID_ACL_ERPT_ENTRIES_12KB] = 0x2953,
+ [MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13,
+ [MLXSW_RES_ID_MAX_VRS] = 0x2C01,
+ [MLXSW_RES_ID_MAX_RIFS] = 0x2C02,
+ [MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES] = 0x2C10,
+ [MLXSW_RES_ID_MAX_LPM_TREES] = 0x2C30,
+};
+
+struct mlxsw_res {
+ bool valid[__MLXSW_RES_ID_MAX];
+ u64 values[__MLXSW_RES_ID_MAX];
+};
+
+static inline bool mlxsw_res_valid(struct mlxsw_res *res,
+ enum mlxsw_res_id res_id)
+{
+ return res->valid[res_id];
+}
+
+#define MLXSW_RES_VALID(res, short_res_id) \
+ mlxsw_res_valid(res, MLXSW_RES_ID_##short_res_id)
+
+static inline u64 mlxsw_res_get(struct mlxsw_res *res,
+ enum mlxsw_res_id res_id)
+{
+ if (WARN_ON(!res->valid[res_id]))
+ return 0;
+ return res->values[res_id];
+}
+
+#define MLXSW_RES_GET(res, short_res_id) \
+ mlxsw_res_get(res, MLXSW_RES_ID_##short_res_id)
+
+static inline void mlxsw_res_set(struct mlxsw_res *res,
+ enum mlxsw_res_id res_id, u64 value)
+{
+ res->valid[res_id] = true;
+ res->values[res_id] = value;
+}
+
+#define MLXSW_RES_SET(res, short_res_id, value) \
+ mlxsw_res_set(res, MLXSW_RES_ID_##short_res_id, value)
+
+static inline void mlxsw_res_parse(struct mlxsw_res *res, u16 id, u64 value)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mlxsw_res_ids); i++) {
+ if (mlxsw_res_ids[i] == id) {
+ mlxsw_res_set(res, i, value);
+ return;
+ }
+ }
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
new file mode 100644
index 000000000..1019c9efe
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -0,0 +1,5154 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
+#include <linux/if_bridge.h>
+#include <linux/workqueue.h>
+#include <linux/jiffies.h>
+#include <linux/bitops.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+#include <linux/dcbnl.h>
+#include <linux/inetdevice.h>
+#include <linux/netlink.h>
+#include <net/switchdev.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/netevent.h>
+#include <net/tc_act/tc_sample.h>
+#include <net/addrconf.h>
+
+#include "spectrum.h"
+#include "pci.h"
+#include "core.h"
+#include "reg.h"
+#include "port.h"
+#include "trap.h"
+#include "txheader.h"
+#include "spectrum_cnt.h"
+#include "spectrum_dpipe.h"
+#include "spectrum_acl_flex_actions.h"
+#include "spectrum_span.h"
+#include "../mlxfw/mlxfw.h"
+
+#define MLXSW_SP_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
+
+#define MLXSW_SP1_FWREV_MAJOR 13
+#define MLXSW_SP1_FWREV_MINOR 1703
+#define MLXSW_SP1_FWREV_SUBMINOR 4
+#define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702
+
+static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
+ .major = MLXSW_SP1_FWREV_MAJOR,
+ .minor = MLXSW_SP1_FWREV_MINOR,
+ .subminor = MLXSW_SP1_FWREV_SUBMINOR,
+ .can_reset_minor = MLXSW_SP1_FWREV_CAN_RESET_MINOR,
+};
+
+#define MLXSW_SP1_FW_FILENAME \
+ "mellanox/mlxsw_spectrum-" __stringify(MLXSW_SP1_FWREV_MAJOR) \
+ "." __stringify(MLXSW_SP1_FWREV_MINOR) \
+ "." __stringify(MLXSW_SP1_FWREV_SUBMINOR) ".mfa2"
+
+static const char mlxsw_sp1_driver_name[] = "mlxsw_spectrum";
+static const char mlxsw_sp2_driver_name[] = "mlxsw_spectrum2";
+static const char mlxsw_sp_driver_version[] = "1.0";
+
+/* tx_hdr_version
+ * Tx header version.
+ * Must be set to 1.
+ */
+MLXSW_ITEM32(tx, hdr, version, 0x00, 28, 4);
+
+/* tx_hdr_ctl
+ * Packet control type.
+ * 0 - Ethernet control (e.g. EMADs, LACP)
+ * 1 - Ethernet data
+ */
+MLXSW_ITEM32(tx, hdr, ctl, 0x00, 26, 2);
+
+/* tx_hdr_proto
+ * Packet protocol type. Must be set to 1 (Ethernet).
+ */
+MLXSW_ITEM32(tx, hdr, proto, 0x00, 21, 3);
+
+/* tx_hdr_rx_is_router
+ * Packet is sent from the router. Valid for data packets only.
+ */
+MLXSW_ITEM32(tx, hdr, rx_is_router, 0x00, 19, 1);
+
+/* tx_hdr_fid_valid
+ * Indicates if the 'fid' field is valid and should be used for
+ * forwarding lookup. Valid for data packets only.
+ */
+MLXSW_ITEM32(tx, hdr, fid_valid, 0x00, 16, 1);
+
+/* tx_hdr_swid
+ * Switch partition ID. Must be set to 0.
+ */
+MLXSW_ITEM32(tx, hdr, swid, 0x00, 12, 3);
+
+/* tx_hdr_control_tclass
+ * Indicates if the packet should use the control TClass and not one
+ * of the data TClasses.
+ */
+MLXSW_ITEM32(tx, hdr, control_tclass, 0x00, 6, 1);
+
+/* tx_hdr_etclass
+ * Egress TClass to be used on the egress device on the egress port.
+ */
+MLXSW_ITEM32(tx, hdr, etclass, 0x00, 0, 4);
+
+/* tx_hdr_port_mid
+ * Destination local port for unicast packets.
+ * Destination multicast ID for multicast packets.
+ *
+ * Control packets are directed to a specific egress port, while data
+ * packets are transmitted through the CPU port (0) into the switch partition,
+ * where forwarding rules are applied.
+ */
+MLXSW_ITEM32(tx, hdr, port_mid, 0x04, 16, 16);
+
+/* tx_hdr_fid
+ * Forwarding ID used for L2 forwarding lookup. Valid only if 'fid_valid' is
+ * set, otherwise calculated based on the packet's VID using VID to FID mapping.
+ * Valid for data packets only.
+ */
+MLXSW_ITEM32(tx, hdr, fid, 0x08, 0, 16);
+
+/* tx_hdr_type
+ * 0 - Data packets
+ * 6 - Control packets
+ */
+MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4);
+
+struct mlxsw_sp_mlxfw_dev {
+ struct mlxfw_dev mlxfw_dev;
+ struct mlxsw_sp *mlxsw_sp;
+};
+
+static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev,
+ u16 component_index, u32 *p_max_size,
+ u8 *p_align_bits, u16 *p_max_write_size)
+{
+ struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
+ char mcqi_pl[MLXSW_REG_MCQI_LEN];
+ int err;
+
+ mlxsw_reg_mcqi_pack(mcqi_pl, component_index);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcqi), mcqi_pl);
+ if (err)
+ return err;
+ mlxsw_reg_mcqi_unpack(mcqi_pl, p_max_size, p_align_bits,
+ p_max_write_size);
+
+ *p_align_bits = max_t(u8, *p_align_bits, 2);
+ *p_max_write_size = min_t(u16, *p_max_write_size,
+ MLXSW_REG_MCDA_MAX_DATA_LEN);
+ return 0;
+}
+
+static int mlxsw_sp_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle)
+{
+ struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
+ char mcc_pl[MLXSW_REG_MCC_LEN];
+ u8 control_state;
+ int err;
+
+ mlxsw_reg_mcc_pack(mcc_pl, 0, 0, 0, 0);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_mcc_unpack(mcc_pl, fwhandle, NULL, &control_state);
+ if (control_state != MLXFW_FSM_STATE_IDLE)
+ return -EBUSY;
+
+ mlxsw_reg_mcc_pack(mcc_pl,
+ MLXSW_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE,
+ 0, *fwhandle, 0);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
+}
+
+static int mlxsw_sp_fsm_component_update(struct mlxfw_dev *mlxfw_dev,
+ u32 fwhandle, u16 component_index,
+ u32 component_size)
+{
+ struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
+ char mcc_pl[MLXSW_REG_MCC_LEN];
+
+ mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_UPDATE_COMPONENT,
+ component_index, fwhandle, component_size);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
+}
+
+static int mlxsw_sp_fsm_block_download(struct mlxfw_dev *mlxfw_dev,
+ u32 fwhandle, u8 *data, u16 size,
+ u32 offset)
+{
+ struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
+ char mcda_pl[MLXSW_REG_MCDA_LEN];
+
+ mlxsw_reg_mcda_pack(mcda_pl, fwhandle, offset, size, data);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcda), mcda_pl);
+}
+
+static int mlxsw_sp_fsm_component_verify(struct mlxfw_dev *mlxfw_dev,
+ u32 fwhandle, u16 component_index)
+{
+ struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
+ char mcc_pl[MLXSW_REG_MCC_LEN];
+
+ mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_VERIFY_COMPONENT,
+ component_index, fwhandle, 0);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
+}
+
+static int mlxsw_sp_fsm_activate(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
+{
+ struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
+ char mcc_pl[MLXSW_REG_MCC_LEN];
+
+ mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_ACTIVATE, 0,
+ fwhandle, 0);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
+}
+
+static int mlxsw_sp_fsm_query_state(struct mlxfw_dev *mlxfw_dev, u32 fwhandle,
+ enum mlxfw_fsm_state *fsm_state,
+ enum mlxfw_fsm_state_err *fsm_state_err)
+{
+ struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
+ char mcc_pl[MLXSW_REG_MCC_LEN];
+ u8 control_state;
+ u8 error_code;
+ int err;
+
+ mlxsw_reg_mcc_pack(mcc_pl, 0, 0, fwhandle, 0);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_mcc_unpack(mcc_pl, NULL, &error_code, &control_state);
+ *fsm_state = control_state;
+ *fsm_state_err = min_t(enum mlxfw_fsm_state_err, error_code,
+ MLXFW_FSM_STATE_ERR_MAX);
+ return 0;
+}
+
+static void mlxsw_sp_fsm_cancel(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
+{
+ struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
+ char mcc_pl[MLXSW_REG_MCC_LEN];
+
+ mlxsw_reg_mcc_pack(mcc_pl, MLXSW_REG_MCC_INSTRUCTION_CANCEL, 0,
+ fwhandle, 0);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
+}
+
+static void mlxsw_sp_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle)
+{
+ struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev =
+ container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp;
+ char mcc_pl[MLXSW_REG_MCC_LEN];
+
+ mlxsw_reg_mcc_pack(mcc_pl,
+ MLXSW_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE, 0,
+ fwhandle, 0);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl);
+}
+
+static const struct mlxfw_dev_ops mlxsw_sp_mlxfw_dev_ops = {
+ .component_query = mlxsw_sp_component_query,
+ .fsm_lock = mlxsw_sp_fsm_lock,
+ .fsm_component_update = mlxsw_sp_fsm_component_update,
+ .fsm_block_download = mlxsw_sp_fsm_block_download,
+ .fsm_component_verify = mlxsw_sp_fsm_component_verify,
+ .fsm_activate = mlxsw_sp_fsm_activate,
+ .fsm_query_state = mlxsw_sp_fsm_query_state,
+ .fsm_cancel = mlxsw_sp_fsm_cancel,
+ .fsm_release = mlxsw_sp_fsm_release
+};
+
+static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp,
+ const struct firmware *firmware)
+{
+ struct mlxsw_sp_mlxfw_dev mlxsw_sp_mlxfw_dev = {
+ .mlxfw_dev = {
+ .ops = &mlxsw_sp_mlxfw_dev_ops,
+ .psid = mlxsw_sp->bus_info->psid,
+ .psid_size = strlen(mlxsw_sp->bus_info->psid),
+ },
+ .mlxsw_sp = mlxsw_sp
+ };
+ int err;
+
+ mlxsw_core_fw_flash_start(mlxsw_sp->core);
+ err = mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, firmware);
+ mlxsw_core_fw_flash_end(mlxsw_sp->core);
+
+ return err;
+}
+
+static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp)
+{
+ const struct mlxsw_fw_rev *rev = &mlxsw_sp->bus_info->fw_rev;
+ const struct mlxsw_fw_rev *req_rev = mlxsw_sp->req_rev;
+ const char *fw_filename = mlxsw_sp->fw_filename;
+ const struct firmware *firmware;
+ int err;
+
+ /* Don't check if driver does not require it */
+ if (!req_rev || !fw_filename)
+ return 0;
+
+ /* Validate driver & FW are compatible */
+ if (rev->major != req_rev->major) {
+ WARN(1, "Mismatch in major FW version [%d:%d] is never expected; Please contact support\n",
+ rev->major, req_rev->major);
+ return -EINVAL;
+ }
+ if (MLXSW_SP_FWREV_MINOR_TO_BRANCH(rev->minor) ==
+ MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor) &&
+ (rev->minor > req_rev->minor ||
+ (rev->minor == req_rev->minor &&
+ rev->subminor >= req_rev->subminor)))
+ return 0;
+
+ dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n",
+ rev->major, rev->minor, rev->subminor);
+ dev_info(mlxsw_sp->bus_info->dev, "Flashing firmware using file %s\n",
+ fw_filename);
+
+ err = request_firmware_direct(&firmware, fw_filename,
+ mlxsw_sp->bus_info->dev);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Could not request firmware file %s\n",
+ fw_filename);
+ return err;
+ }
+
+ err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware);
+ release_firmware(firmware);
+ if (err)
+ dev_err(mlxsw_sp->bus_info->dev, "Could not upgrade firmware\n");
+
+ /* On FW flash success, tell the caller FW reset is needed
+ * if current FW supports it.
+ */
+ if (rev->minor >= req_rev->can_reset_minor)
+ return err ? err : -EAGAIN;
+ else
+ return 0;
+}
+
+int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp,
+ unsigned int counter_index, u64 *packets,
+ u64 *bytes)
+{
+ char mgpc_pl[MLXSW_REG_MGPC_LEN];
+ int err;
+
+ mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_NOP,
+ MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl);
+ if (err)
+ return err;
+ if (packets)
+ *packets = mlxsw_reg_mgpc_packet_counter_get(mgpc_pl);
+ if (bytes)
+ *bytes = mlxsw_reg_mgpc_byte_counter_get(mgpc_pl);
+ return 0;
+}
+
+static int mlxsw_sp_flow_counter_clear(struct mlxsw_sp *mlxsw_sp,
+ unsigned int counter_index)
+{
+ char mgpc_pl[MLXSW_REG_MGPC_LEN];
+
+ mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_CLEAR,
+ MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl);
+}
+
+int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+ unsigned int *p_counter_index)
+{
+ int err;
+
+ err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_FLOW,
+ p_counter_index);
+ if (err)
+ return err;
+ err = mlxsw_sp_flow_counter_clear(mlxsw_sp, *p_counter_index);
+ if (err)
+ goto err_counter_clear;
+ return 0;
+
+err_counter_clear:
+ mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_FLOW,
+ *p_counter_index);
+ return err;
+}
+
+void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp,
+ unsigned int counter_index)
+{
+ mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_FLOW,
+ counter_index);
+}
+
+static void mlxsw_sp_txhdr_construct(struct sk_buff *skb,
+ const struct mlxsw_tx_info *tx_info)
+{
+ char *txhdr = skb_push(skb, MLXSW_TXHDR_LEN);
+
+ memset(txhdr, 0, MLXSW_TXHDR_LEN);
+
+ mlxsw_tx_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_1);
+ mlxsw_tx_hdr_ctl_set(txhdr, MLXSW_TXHDR_ETH_CTL);
+ mlxsw_tx_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH);
+ mlxsw_tx_hdr_swid_set(txhdr, 0);
+ mlxsw_tx_hdr_control_tclass_set(txhdr, 1);
+ mlxsw_tx_hdr_port_mid_set(txhdr, tx_info->local_port);
+ mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL);
+}
+
+enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 state)
+{
+ switch (state) {
+ case BR_STATE_FORWARDING:
+ return MLXSW_REG_SPMS_STATE_FORWARDING;
+ case BR_STATE_LEARNING:
+ return MLXSW_REG_SPMS_STATE_LEARNING;
+ case BR_STATE_LISTENING: /* fall-through */
+ case BR_STATE_DISABLED: /* fall-through */
+ case BR_STATE_BLOCKING:
+ return MLXSW_REG_SPMS_STATE_DISCARDING;
+ default:
+ BUG();
+ }
+}
+
+int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
+ u8 state)
+{
+ enum mlxsw_reg_spms_state spms_state = mlxsw_sp_stp_spms_state(state);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char *spms_pl;
+ int err;
+
+ spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL);
+ if (!spms_pl)
+ return -ENOMEM;
+ mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port);
+ mlxsw_reg_spms_vid_pack(spms_pl, vid, spms_state);
+
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl);
+ kfree(spms_pl);
+ return err;
+}
+
+static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp)
+{
+ char spad_pl[MLXSW_REG_SPAD_LEN] = {0};
+ int err;
+
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(spad), spad_pl);
+ if (err)
+ return err;
+ mlxsw_reg_spad_base_mac_memcpy_from(spad_pl, mlxsw_sp->base_mac);
+ return 0;
+}
+
+static int mlxsw_sp_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ bool enable, u32 rate)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char mpsc_pl[MLXSW_REG_MPSC_LEN];
+
+ mlxsw_reg_mpsc_pack(mpsc_pl, mlxsw_sp_port->local_port, enable, rate);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpsc), mpsc_pl);
+}
+
+static int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ bool is_up)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char paos_pl[MLXSW_REG_PAOS_LEN];
+
+ mlxsw_reg_paos_pack(paos_pl, mlxsw_sp_port->local_port,
+ is_up ? MLXSW_PORT_ADMIN_STATUS_UP :
+ MLXSW_PORT_ADMIN_STATUS_DOWN);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(paos), paos_pl);
+}
+
+static int mlxsw_sp_port_dev_addr_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ unsigned char *addr)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char ppad_pl[MLXSW_REG_PPAD_LEN];
+
+ mlxsw_reg_ppad_pack(ppad_pl, true, mlxsw_sp_port->local_port);
+ mlxsw_reg_ppad_mac_memcpy_to(ppad_pl, addr);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppad), ppad_pl);
+}
+
+static int mlxsw_sp_port_dev_addr_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ unsigned char *addr = mlxsw_sp_port->dev->dev_addr;
+
+ ether_addr_copy(addr, mlxsw_sp->base_mac);
+ addr[ETH_ALEN - 1] += mlxsw_sp_port->local_port;
+ return mlxsw_sp_port_dev_addr_set(mlxsw_sp_port, addr);
+}
+
+static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char pmtu_pl[MLXSW_REG_PMTU_LEN];
+ int max_mtu;
+ int err;
+
+ mtu += MLXSW_TXHDR_LEN + ETH_HLEN;
+ mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sp_port->local_port, 0);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl);
+ if (err)
+ return err;
+ max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl);
+
+ if (mtu > max_mtu)
+ return -EINVAL;
+
+ mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sp_port->local_port, mtu);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl);
+}
+
+static int mlxsw_sp_port_swid_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 swid)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char pspa_pl[MLXSW_REG_PSPA_LEN];
+
+ mlxsw_reg_pspa_pack(pspa_pl, swid, mlxsw_sp_port->local_port);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pspa), pspa_pl);
+}
+
+int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char svpe_pl[MLXSW_REG_SVPE_LEN];
+
+ mlxsw_reg_svpe_pack(svpe_pl, mlxsw_sp_port->local_port, enable);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svpe), svpe_pl);
+}
+
+int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
+ bool learn_enable)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char *spvmlr_pl;
+ int err;
+
+ spvmlr_pl = kmalloc(MLXSW_REG_SPVMLR_LEN, GFP_KERNEL);
+ if (!spvmlr_pl)
+ return -ENOMEM;
+ mlxsw_reg_spvmlr_pack(spvmlr_pl, mlxsw_sp_port->local_port, vid, vid,
+ learn_enable);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvmlr), spvmlr_pl);
+ kfree(spvmlr_pl);
+ return err;
+}
+
+static int __mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 vid)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char spvid_pl[MLXSW_REG_SPVID_LEN];
+
+ mlxsw_reg_spvid_pack(spvid_pl, mlxsw_sp_port->local_port, vid);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvid), spvid_pl);
+}
+
+static int mlxsw_sp_port_allow_untagged_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ bool allow)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char spaft_pl[MLXSW_REG_SPAFT_LEN];
+
+ mlxsw_reg_spaft_pack(spaft_pl, mlxsw_sp_port->local_port, allow);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spaft), spaft_pl);
+}
+
+int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
+{
+ int err;
+
+ if (!vid) {
+ err = mlxsw_sp_port_allow_untagged_set(mlxsw_sp_port, false);
+ if (err)
+ return err;
+ } else {
+ err = __mlxsw_sp_port_pvid_set(mlxsw_sp_port, vid);
+ if (err)
+ return err;
+ err = mlxsw_sp_port_allow_untagged_set(mlxsw_sp_port, true);
+ if (err)
+ goto err_port_allow_untagged_set;
+ }
+
+ mlxsw_sp_port->pvid = vid;
+ return 0;
+
+err_port_allow_untagged_set:
+ __mlxsw_sp_port_pvid_set(mlxsw_sp_port, mlxsw_sp_port->pvid);
+ return err;
+}
+
+static int
+mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char sspr_pl[MLXSW_REG_SSPR_LEN];
+
+ mlxsw_reg_sspr_pack(sspr_pl, mlxsw_sp_port->local_port);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sspr), sspr_pl);
+}
+
+static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp,
+ u8 local_port, u8 *p_module,
+ u8 *p_width, u8 *p_lane)
+{
+ char pmlp_pl[MLXSW_REG_PMLP_LEN];
+ int err;
+
+ mlxsw_reg_pmlp_pack(pmlp_pl, local_port);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl);
+ if (err)
+ return err;
+ *p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
+ *p_width = mlxsw_reg_pmlp_width_get(pmlp_pl);
+ *p_lane = mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, 0);
+ return 0;
+}
+
+static int mlxsw_sp_port_module_map(struct mlxsw_sp_port *mlxsw_sp_port,
+ u8 module, u8 width, u8 lane)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char pmlp_pl[MLXSW_REG_PMLP_LEN];
+ int i;
+
+ mlxsw_reg_pmlp_pack(pmlp_pl, mlxsw_sp_port->local_port);
+ mlxsw_reg_pmlp_width_set(pmlp_pl, width);
+ for (i = 0; i < width; i++) {
+ mlxsw_reg_pmlp_module_set(pmlp_pl, i, module);
+ mlxsw_reg_pmlp_tx_lane_set(pmlp_pl, i, lane + i); /* Rx & Tx */
+ }
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl);
+}
+
+static int mlxsw_sp_port_module_unmap(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char pmlp_pl[MLXSW_REG_PMLP_LEN];
+
+ mlxsw_reg_pmlp_pack(pmlp_pl, mlxsw_sp_port->local_port);
+ mlxsw_reg_pmlp_width_set(pmlp_pl, 0);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmlp), pmlp_pl);
+}
+
+static int mlxsw_sp_port_open(struct net_device *dev)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ int err;
+
+ err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true);
+ if (err)
+ return err;
+ netif_start_queue(dev);
+ return 0;
+}
+
+static int mlxsw_sp_port_stop(struct net_device *dev)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+ netif_stop_queue(dev);
+ return mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
+}
+
+static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_port_pcpu_stats *pcpu_stats;
+ const struct mlxsw_tx_info tx_info = {
+ .local_port = mlxsw_sp_port->local_port,
+ .is_emad = false,
+ };
+ u64 len;
+ int err;
+
+ if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &tx_info))
+ return NETDEV_TX_BUSY;
+
+ if (unlikely(skb_headroom(skb) < MLXSW_TXHDR_LEN)) {
+ struct sk_buff *skb_orig = skb;
+
+ skb = skb_realloc_headroom(skb, MLXSW_TXHDR_LEN);
+ if (!skb) {
+ this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped);
+ dev_kfree_skb_any(skb_orig);
+ return NETDEV_TX_OK;
+ }
+ dev_consume_skb_any(skb_orig);
+ }
+
+ if (eth_skb_pad(skb)) {
+ this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped);
+ return NETDEV_TX_OK;
+ }
+
+ mlxsw_sp_txhdr_construct(skb, &tx_info);
+ /* TX header is consumed by HW on the way so we shouldn't count its
+ * bytes as being sent.
+ */
+ len = skb->len - MLXSW_TXHDR_LEN;
+
+ /* Due to a race we might fail here because of a full queue. In that
+ * unlikely case we simply drop the packet.
+ */
+ err = mlxsw_core_skb_transmit(mlxsw_sp->core, skb, &tx_info);
+
+ if (!err) {
+ pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats);
+ u64_stats_update_begin(&pcpu_stats->syncp);
+ pcpu_stats->tx_packets++;
+ pcpu_stats->tx_bytes += len;
+ u64_stats_update_end(&pcpu_stats->syncp);
+ } else {
+ this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped);
+ dev_kfree_skb_any(skb);
+ }
+ return NETDEV_TX_OK;
+}
+
+static void mlxsw_sp_set_rx_mode(struct net_device *dev)
+{
+}
+
+static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ struct sockaddr *addr = p;
+ int err;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ err = mlxsw_sp_port_dev_addr_set(mlxsw_sp_port, addr->sa_data);
+ if (err)
+ return err;
+ memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+ return 0;
+}
+
+static u16 mlxsw_sp_pg_buf_threshold_get(const struct mlxsw_sp *mlxsw_sp,
+ int mtu)
+{
+ return 2 * mlxsw_sp_bytes_cells(mlxsw_sp, mtu);
+}
+
+#define MLXSW_SP_CELL_FACTOR 2 /* 2 * cell_size / (IPG + cell_size + 1) */
+
+static u16 mlxsw_sp_pfc_delay_get(const struct mlxsw_sp *mlxsw_sp, int mtu,
+ u16 delay)
+{
+ delay = mlxsw_sp_bytes_cells(mlxsw_sp, DIV_ROUND_UP(delay,
+ BITS_PER_BYTE));
+ return MLXSW_SP_CELL_FACTOR * delay + mlxsw_sp_bytes_cells(mlxsw_sp,
+ mtu);
+}
+
+/* Maximum delay buffer needed in case of PAUSE frames, in bytes.
+ * Assumes 100m cable and maximum MTU.
+ */
+#define MLXSW_SP_PAUSE_DELAY 58752
+
+static u16 mlxsw_sp_pg_buf_delay_get(const struct mlxsw_sp *mlxsw_sp, int mtu,
+ u16 delay, bool pfc, bool pause)
+{
+ if (pfc)
+ return mlxsw_sp_pfc_delay_get(mlxsw_sp, mtu, delay);
+ else if (pause)
+ return mlxsw_sp_bytes_cells(mlxsw_sp, MLXSW_SP_PAUSE_DELAY);
+ else
+ return 0;
+}
+
+static void mlxsw_sp_pg_buf_pack(char *pbmc_pl, int index, u16 size, u16 thres,
+ bool lossy)
+{
+ if (lossy)
+ mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, index, size);
+ else
+ mlxsw_reg_pbmc_lossless_buffer_pack(pbmc_pl, index, size,
+ thres);
+}
+
+int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu,
+ u8 *prio_tc, bool pause_en,
+ struct ieee_pfc *my_pfc)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u8 pfc_en = !!my_pfc ? my_pfc->pfc_en : 0;
+ u16 delay = !!my_pfc ? my_pfc->delay : 0;
+ char pbmc_pl[MLXSW_REG_PBMC_LEN];
+ int i, j, err;
+
+ mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
+ if (err)
+ return err;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ bool configure = false;
+ bool pfc = false;
+ u16 thres_cells;
+ u16 delay_cells;
+ bool lossy;
+
+ for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) {
+ if (prio_tc[j] == i) {
+ pfc = pfc_en & BIT(j);
+ configure = true;
+ break;
+ }
+ }
+
+ if (!configure)
+ continue;
+
+ lossy = !(pfc || pause_en);
+ thres_cells = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu);
+ delay_cells = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay,
+ pfc, pause_en);
+ mlxsw_sp_pg_buf_pack(pbmc_pl, i, thres_cells + delay_cells,
+ thres_cells, lossy);
+ }
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
+}
+
+static int mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ int mtu, bool pause_en)
+{
+ u8 def_prio_tc[IEEE_8021QAZ_MAX_TCS] = {0};
+ bool dcb_en = !!mlxsw_sp_port->dcb.ets;
+ struct ieee_pfc *my_pfc;
+ u8 *prio_tc;
+
+ prio_tc = dcb_en ? mlxsw_sp_port->dcb.ets->prio_tc : def_prio_tc;
+ my_pfc = dcb_en ? mlxsw_sp_port->dcb.pfc : NULL;
+
+ return __mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, prio_tc,
+ pause_en, my_pfc);
+}
+
+static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
+ int err;
+
+ err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, pause_en);
+ if (err)
+ return err;
+ err = mlxsw_sp_span_port_mtu_update(mlxsw_sp_port, mtu);
+ if (err)
+ goto err_span_port_mtu_update;
+ err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu);
+ if (err)
+ goto err_port_mtu_set;
+ dev->mtu = mtu;
+ return 0;
+
+err_port_mtu_set:
+ mlxsw_sp_span_port_mtu_update(mlxsw_sp_port, dev->mtu);
+err_span_port_mtu_update:
+ mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en);
+ return err;
+}
+
+static int
+mlxsw_sp_port_get_sw_stats64(const struct net_device *dev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ struct mlxsw_sp_port_pcpu_stats *p;
+ u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+ u32 tx_dropped = 0;
+ unsigned int start;
+ int i;
+
+ for_each_possible_cpu(i) {
+ p = per_cpu_ptr(mlxsw_sp_port->pcpu_stats, i);
+ do {
+ start = u64_stats_fetch_begin_irq(&p->syncp);
+ rx_packets = p->rx_packets;
+ rx_bytes = p->rx_bytes;
+ tx_packets = p->tx_packets;
+ tx_bytes = p->tx_bytes;
+ } while (u64_stats_fetch_retry_irq(&p->syncp, start));
+
+ stats->rx_packets += rx_packets;
+ stats->rx_bytes += rx_bytes;
+ stats->tx_packets += tx_packets;
+ stats->tx_bytes += tx_bytes;
+ /* tx_dropped is u32, updated without syncp protection. */
+ tx_dropped += p->tx_dropped;
+ }
+ stats->tx_dropped = tx_dropped;
+ return 0;
+}
+
+static bool mlxsw_sp_port_has_offload_stats(const struct net_device *dev, int attr_id)
+{
+ switch (attr_id) {
+ case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+ return true;
+ }
+
+ return false;
+}
+
+static int mlxsw_sp_port_get_offload_stats(int attr_id, const struct net_device *dev,
+ void *sp)
+{
+ switch (attr_id) {
+ case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+ return mlxsw_sp_port_get_sw_stats64(dev, sp);
+ }
+
+ return -EINVAL;
+}
+
+static int mlxsw_sp_port_get_stats_raw(struct net_device *dev, int grp,
+ int prio, char *ppcnt_pl)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+
+ mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, grp, prio);
+ return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl);
+}
+
+static int mlxsw_sp_port_get_hw_stats(struct net_device *dev,
+ struct rtnl_link_stats64 *stats)
+{
+ char ppcnt_pl[MLXSW_REG_PPCNT_LEN];
+ int err;
+
+ err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT,
+ 0, ppcnt_pl);
+ if (err)
+ goto out;
+
+ stats->tx_packets =
+ mlxsw_reg_ppcnt_a_frames_transmitted_ok_get(ppcnt_pl);
+ stats->rx_packets =
+ mlxsw_reg_ppcnt_a_frames_received_ok_get(ppcnt_pl);
+ stats->tx_bytes =
+ mlxsw_reg_ppcnt_a_octets_transmitted_ok_get(ppcnt_pl);
+ stats->rx_bytes =
+ mlxsw_reg_ppcnt_a_octets_received_ok_get(ppcnt_pl);
+ stats->multicast =
+ mlxsw_reg_ppcnt_a_multicast_frames_received_ok_get(ppcnt_pl);
+
+ stats->rx_crc_errors =
+ mlxsw_reg_ppcnt_a_frame_check_sequence_errors_get(ppcnt_pl);
+ stats->rx_frame_errors =
+ mlxsw_reg_ppcnt_a_alignment_errors_get(ppcnt_pl);
+
+ stats->rx_length_errors = (
+ mlxsw_reg_ppcnt_a_in_range_length_errors_get(ppcnt_pl) +
+ mlxsw_reg_ppcnt_a_out_of_range_length_field_get(ppcnt_pl) +
+ mlxsw_reg_ppcnt_a_frame_too_long_errors_get(ppcnt_pl));
+
+ stats->rx_errors = (stats->rx_crc_errors +
+ stats->rx_frame_errors + stats->rx_length_errors);
+
+out:
+ return err;
+}
+
+static void
+mlxsw_sp_port_get_hw_xstats(struct net_device *dev,
+ struct mlxsw_sp_port_xstats *xstats)
+{
+ char ppcnt_pl[MLXSW_REG_PPCNT_LEN];
+ int err, i;
+
+ err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_EXT_CNT, 0,
+ ppcnt_pl);
+ if (!err)
+ xstats->ecn = mlxsw_reg_ppcnt_ecn_marked_get(ppcnt_pl);
+
+ for (i = 0; i < TC_MAX_QUEUE; i++) {
+ err = mlxsw_sp_port_get_stats_raw(dev,
+ MLXSW_REG_PPCNT_TC_CONG_TC,
+ i, ppcnt_pl);
+ if (!err)
+ xstats->wred_drop[i] =
+ mlxsw_reg_ppcnt_wred_discard_get(ppcnt_pl);
+
+ err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_TC_CNT,
+ i, ppcnt_pl);
+ if (err)
+ continue;
+
+ xstats->backlog[i] =
+ mlxsw_reg_ppcnt_tc_transmit_queue_get(ppcnt_pl);
+ xstats->tail_drop[i] =
+ mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl);
+ }
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_PRIO_CNT,
+ i, ppcnt_pl);
+ if (err)
+ continue;
+
+ xstats->tx_packets[i] = mlxsw_reg_ppcnt_tx_frames_get(ppcnt_pl);
+ xstats->tx_bytes[i] = mlxsw_reg_ppcnt_tx_octets_get(ppcnt_pl);
+ }
+}
+
+static void update_stats_cache(struct work_struct *work)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port =
+ container_of(work, struct mlxsw_sp_port,
+ periodic_hw_stats.update_dw.work);
+
+ if (!netif_carrier_ok(mlxsw_sp_port->dev))
+ /* Note: mlxsw_sp_port_down_wipe_counters() clears the cache as
+ * necessary when port goes down.
+ */
+ goto out;
+
+ mlxsw_sp_port_get_hw_stats(mlxsw_sp_port->dev,
+ &mlxsw_sp_port->periodic_hw_stats.stats);
+ mlxsw_sp_port_get_hw_xstats(mlxsw_sp_port->dev,
+ &mlxsw_sp_port->periodic_hw_stats.xstats);
+
+out:
+ mlxsw_core_schedule_dw(&mlxsw_sp_port->periodic_hw_stats.update_dw,
+ MLXSW_HW_STATS_UPDATE_TIME);
+}
+
+/* Return the stats from a cache that is updated periodically,
+ * as this function might get called in an atomic context.
+ */
+static void
+mlxsw_sp_port_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+ memcpy(stats, &mlxsw_sp_port->periodic_hw_stats.stats, sizeof(*stats));
+}
+
+static int __mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 vid_begin, u16 vid_end,
+ bool is_member, bool untagged)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char *spvm_pl;
+ int err;
+
+ spvm_pl = kmalloc(MLXSW_REG_SPVM_LEN, GFP_KERNEL);
+ if (!spvm_pl)
+ return -ENOMEM;
+
+ mlxsw_reg_spvm_pack(spvm_pl, mlxsw_sp_port->local_port, vid_begin,
+ vid_end, is_member, untagged);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvm), spvm_pl);
+ kfree(spvm_pl);
+ return err;
+}
+
+int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin,
+ u16 vid_end, bool is_member, bool untagged)
+{
+ u16 vid, vid_e;
+ int err;
+
+ for (vid = vid_begin; vid <= vid_end;
+ vid += MLXSW_REG_SPVM_REC_MAX_COUNT) {
+ vid_e = min((u16) (vid + MLXSW_REG_SPVM_REC_MAX_COUNT - 1),
+ vid_end);
+
+ err = __mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid_e,
+ is_member, untagged);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void mlxsw_sp_port_vlan_flush(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, *tmp;
+
+ list_for_each_entry_safe(mlxsw_sp_port_vlan, tmp,
+ &mlxsw_sp_port->vlans_list, list)
+ mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
+}
+
+static struct mlxsw_sp_port_vlan *
+mlxsw_sp_port_vlan_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+ bool untagged = vid == 1;
+ int err;
+
+ err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, true, untagged);
+ if (err)
+ return ERR_PTR(err);
+
+ mlxsw_sp_port_vlan = kzalloc(sizeof(*mlxsw_sp_port_vlan), GFP_KERNEL);
+ if (!mlxsw_sp_port_vlan) {
+ err = -ENOMEM;
+ goto err_port_vlan_alloc;
+ }
+
+ mlxsw_sp_port_vlan->mlxsw_sp_port = mlxsw_sp_port;
+ mlxsw_sp_port_vlan->ref_count = 1;
+ mlxsw_sp_port_vlan->vid = vid;
+ list_add(&mlxsw_sp_port_vlan->list, &mlxsw_sp_port->vlans_list);
+
+ return mlxsw_sp_port_vlan;
+
+err_port_vlan_alloc:
+ mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_port_vlan_destroy(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
+ u16 vid = mlxsw_sp_port_vlan->vid;
+
+ list_del(&mlxsw_sp_port_vlan->list);
+ kfree(mlxsw_sp_port_vlan);
+ mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false);
+}
+
+struct mlxsw_sp_port_vlan *
+mlxsw_sp_port_vlan_get(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+
+ mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
+ if (mlxsw_sp_port_vlan) {
+ mlxsw_sp_port_vlan->ref_count++;
+ return mlxsw_sp_port_vlan;
+ }
+
+ return mlxsw_sp_port_vlan_create(mlxsw_sp_port, vid);
+}
+
+void mlxsw_sp_port_vlan_put(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
+{
+ struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
+
+ if (--mlxsw_sp_port_vlan->ref_count != 0)
+ return;
+
+ if (mlxsw_sp_port_vlan->bridge_port)
+ mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan);
+ else if (fid)
+ mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
+
+ mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan);
+}
+
+static int mlxsw_sp_port_add_vid(struct net_device *dev,
+ __be16 __always_unused proto, u16 vid)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+ /* VLAN 0 is added to HW filter when device goes up, but it is
+ * reserved in our case, so simply return.
+ */
+ if (!vid)
+ return 0;
+
+ return PTR_ERR_OR_ZERO(mlxsw_sp_port_vlan_get(mlxsw_sp_port, vid));
+}
+
+static int mlxsw_sp_port_kill_vid(struct net_device *dev,
+ __be16 __always_unused proto, u16 vid)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+
+ /* VLAN 0 is removed from HW filter when device goes down, but
+ * it is reserved in our case, so simply return.
+ */
+ if (!vid)
+ return 0;
+
+ mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
+ if (!mlxsw_sp_port_vlan)
+ return 0;
+ mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
+
+ return 0;
+}
+
+static int mlxsw_sp_port_get_phys_port_name(struct net_device *dev, char *name,
+ size_t len)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+ return mlxsw_core_port_get_phys_port_name(mlxsw_sp_port->mlxsw_sp->core,
+ mlxsw_sp_port->local_port,
+ name, len);
+}
+
+static struct mlxsw_sp_port_mall_tc_entry *
+mlxsw_sp_port_mall_tc_entry_find(struct mlxsw_sp_port *port,
+ unsigned long cookie) {
+ struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
+
+ list_for_each_entry(mall_tc_entry, &port->mall_tc_list, list)
+ if (mall_tc_entry->cookie == cookie)
+ return mall_tc_entry;
+
+ return NULL;
+}
+
+static int
+mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_port_mall_mirror_tc_entry *mirror,
+ const struct tc_action *a,
+ bool ingress)
+{
+ enum mlxsw_sp_span_type span_type;
+ struct net_device *to_dev;
+
+ to_dev = tcf_mirred_dev(a);
+ if (!to_dev) {
+ netdev_err(mlxsw_sp_port->dev, "Could not find requested device\n");
+ return -EINVAL;
+ }
+
+ mirror->ingress = ingress;
+ span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
+ return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_dev, span_type,
+ true, &mirror->span_id);
+}
+
+static void
+mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_port_mall_mirror_tc_entry *mirror)
+{
+ enum mlxsw_sp_span_type span_type;
+
+ span_type = mirror->ingress ?
+ MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
+ mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->span_id,
+ span_type, true);
+}
+
+static int
+mlxsw_sp_port_add_cls_matchall_sample(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct tc_cls_matchall_offload *cls,
+ const struct tc_action *a,
+ bool ingress)
+{
+ int err;
+
+ if (!mlxsw_sp_port->sample)
+ return -EOPNOTSUPP;
+ if (rtnl_dereference(mlxsw_sp_port->sample->psample_group)) {
+ netdev_err(mlxsw_sp_port->dev, "sample already active\n");
+ return -EEXIST;
+ }
+ if (tcf_sample_rate(a) > MLXSW_REG_MPSC_RATE_MAX) {
+ netdev_err(mlxsw_sp_port->dev, "sample rate not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ rcu_assign_pointer(mlxsw_sp_port->sample->psample_group,
+ tcf_sample_psample_group(a));
+ mlxsw_sp_port->sample->truncate = tcf_sample_truncate(a);
+ mlxsw_sp_port->sample->trunc_size = tcf_sample_trunc_size(a);
+ mlxsw_sp_port->sample->rate = tcf_sample_rate(a);
+
+ err = mlxsw_sp_port_sample_set(mlxsw_sp_port, true, tcf_sample_rate(a));
+ if (err)
+ goto err_port_sample_set;
+ return 0;
+
+err_port_sample_set:
+ RCU_INIT_POINTER(mlxsw_sp_port->sample->psample_group, NULL);
+ return err;
+}
+
+static void
+mlxsw_sp_port_del_cls_matchall_sample(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ if (!mlxsw_sp_port->sample)
+ return;
+
+ mlxsw_sp_port_sample_set(mlxsw_sp_port, false, 1);
+ RCU_INIT_POINTER(mlxsw_sp_port->sample->psample_group, NULL);
+}
+
+static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct tc_cls_matchall_offload *f,
+ bool ingress)
+{
+ struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
+ __be16 protocol = f->common.protocol;
+ const struct tc_action *a;
+ LIST_HEAD(actions);
+ int err;
+
+ if (!tcf_exts_has_one_action(f->exts)) {
+ netdev_err(mlxsw_sp_port->dev, "only singular actions are supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL);
+ if (!mall_tc_entry)
+ return -ENOMEM;
+ mall_tc_entry->cookie = f->cookie;
+
+ a = tcf_exts_first_action(f->exts);
+
+ if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) {
+ struct mlxsw_sp_port_mall_mirror_tc_entry *mirror;
+
+ mall_tc_entry->type = MLXSW_SP_PORT_MALL_MIRROR;
+ mirror = &mall_tc_entry->mirror;
+ err = mlxsw_sp_port_add_cls_matchall_mirror(mlxsw_sp_port,
+ mirror, a, ingress);
+ } else if (is_tcf_sample(a) && protocol == htons(ETH_P_ALL)) {
+ mall_tc_entry->type = MLXSW_SP_PORT_MALL_SAMPLE;
+ err = mlxsw_sp_port_add_cls_matchall_sample(mlxsw_sp_port, f,
+ a, ingress);
+ } else {
+ err = -EOPNOTSUPP;
+ }
+
+ if (err)
+ goto err_add_action;
+
+ list_add_tail(&mall_tc_entry->list, &mlxsw_sp_port->mall_tc_list);
+ return 0;
+
+err_add_action:
+ kfree(mall_tc_entry);
+ return err;
+}
+
+static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct tc_cls_matchall_offload *f)
+{
+ struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
+
+ mall_tc_entry = mlxsw_sp_port_mall_tc_entry_find(mlxsw_sp_port,
+ f->cookie);
+ if (!mall_tc_entry) {
+ netdev_dbg(mlxsw_sp_port->dev, "tc entry not found on port\n");
+ return;
+ }
+ list_del(&mall_tc_entry->list);
+
+ switch (mall_tc_entry->type) {
+ case MLXSW_SP_PORT_MALL_MIRROR:
+ mlxsw_sp_port_del_cls_matchall_mirror(mlxsw_sp_port,
+ &mall_tc_entry->mirror);
+ break;
+ case MLXSW_SP_PORT_MALL_SAMPLE:
+ mlxsw_sp_port_del_cls_matchall_sample(mlxsw_sp_port);
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ kfree(mall_tc_entry);
+}
+
+static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct tc_cls_matchall_offload *f,
+ bool ingress)
+{
+ switch (f->command) {
+ case TC_CLSMATCHALL_REPLACE:
+ return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, f,
+ ingress);
+ case TC_CLSMATCHALL_DESTROY:
+ mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port, f);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_acl_block *acl_block,
+ struct tc_cls_flower_offload *f)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_acl_block_mlxsw_sp(acl_block);
+
+ switch (f->command) {
+ case TC_CLSFLOWER_REPLACE:
+ return mlxsw_sp_flower_replace(mlxsw_sp, acl_block, f);
+ case TC_CLSFLOWER_DESTROY:
+ mlxsw_sp_flower_destroy(mlxsw_sp, acl_block, f);
+ return 0;
+ case TC_CLSFLOWER_STATS:
+ return mlxsw_sp_flower_stats(mlxsw_sp, acl_block, f);
+ case TC_CLSFLOWER_TMPLT_CREATE:
+ return mlxsw_sp_flower_tmplt_create(mlxsw_sp, acl_block, f);
+ case TC_CLSFLOWER_TMPLT_DESTROY:
+ mlxsw_sp_flower_tmplt_destroy(mlxsw_sp, acl_block, f);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlxsw_sp_setup_tc_block_cb_matchall(enum tc_setup_type type,
+ void *type_data,
+ void *cb_priv, bool ingress)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = cb_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSMATCHALL:
+ if (!tc_cls_can_offload_and_chain0(mlxsw_sp_port->dev,
+ type_data))
+ return -EOPNOTSUPP;
+
+ return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data,
+ ingress);
+ case TC_SETUP_CLSFLOWER:
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlxsw_sp_setup_tc_block_cb_matchall_ig(enum tc_setup_type type,
+ void *type_data,
+ void *cb_priv)
+{
+ return mlxsw_sp_setup_tc_block_cb_matchall(type, type_data,
+ cb_priv, true);
+}
+
+static int mlxsw_sp_setup_tc_block_cb_matchall_eg(enum tc_setup_type type,
+ void *type_data,
+ void *cb_priv)
+{
+ return mlxsw_sp_setup_tc_block_cb_matchall(type, type_data,
+ cb_priv, false);
+}
+
+static int mlxsw_sp_setup_tc_block_cb_flower(enum tc_setup_type type,
+ void *type_data, void *cb_priv)
+{
+ struct mlxsw_sp_acl_block *acl_block = cb_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSMATCHALL:
+ return 0;
+ case TC_SETUP_CLSFLOWER:
+ if (mlxsw_sp_acl_block_disabled(acl_block))
+ return -EOPNOTSUPP;
+
+ return mlxsw_sp_setup_tc_cls_flower(acl_block, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct tcf_block *block, bool ingress,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_acl_block *acl_block;
+ struct tcf_block_cb *block_cb;
+ int err;
+
+ block_cb = tcf_block_cb_lookup(block, mlxsw_sp_setup_tc_block_cb_flower,
+ mlxsw_sp);
+ if (!block_cb) {
+ acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, block->net);
+ if (!acl_block)
+ return -ENOMEM;
+ block_cb = __tcf_block_cb_register(block,
+ mlxsw_sp_setup_tc_block_cb_flower,
+ mlxsw_sp, acl_block, extack);
+ if (IS_ERR(block_cb)) {
+ err = PTR_ERR(block_cb);
+ goto err_cb_register;
+ }
+ } else {
+ acl_block = tcf_block_cb_priv(block_cb);
+ }
+ tcf_block_cb_incref(block_cb);
+ err = mlxsw_sp_acl_block_bind(mlxsw_sp, acl_block,
+ mlxsw_sp_port, ingress);
+ if (err)
+ goto err_block_bind;
+
+ if (ingress)
+ mlxsw_sp_port->ing_acl_block = acl_block;
+ else
+ mlxsw_sp_port->eg_acl_block = acl_block;
+
+ return 0;
+
+err_block_bind:
+ if (!tcf_block_cb_decref(block_cb)) {
+ __tcf_block_cb_unregister(block, block_cb);
+err_cb_register:
+ mlxsw_sp_acl_block_destroy(acl_block);
+ }
+ return err;
+}
+
+static void
+mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct tcf_block *block, bool ingress)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_acl_block *acl_block;
+ struct tcf_block_cb *block_cb;
+ int err;
+
+ block_cb = tcf_block_cb_lookup(block, mlxsw_sp_setup_tc_block_cb_flower,
+ mlxsw_sp);
+ if (!block_cb)
+ return;
+
+ if (ingress)
+ mlxsw_sp_port->ing_acl_block = NULL;
+ else
+ mlxsw_sp_port->eg_acl_block = NULL;
+
+ acl_block = tcf_block_cb_priv(block_cb);
+ err = mlxsw_sp_acl_block_unbind(mlxsw_sp, acl_block,
+ mlxsw_sp_port, ingress);
+ if (!err && !tcf_block_cb_decref(block_cb)) {
+ __tcf_block_cb_unregister(block, block_cb);
+ mlxsw_sp_acl_block_destroy(acl_block);
+ }
+}
+
+static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct tc_block_offload *f)
+{
+ tc_setup_cb_t *cb;
+ bool ingress;
+ int err;
+
+ if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) {
+ cb = mlxsw_sp_setup_tc_block_cb_matchall_ig;
+ ingress = true;
+ } else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS) {
+ cb = mlxsw_sp_setup_tc_block_cb_matchall_eg;
+ ingress = false;
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ err = tcf_block_cb_register(f->block, cb, mlxsw_sp_port,
+ mlxsw_sp_port, f->extack);
+ if (err)
+ return err;
+ err = mlxsw_sp_setup_tc_block_flower_bind(mlxsw_sp_port,
+ f->block, ingress,
+ f->extack);
+ if (err) {
+ tcf_block_cb_unregister(f->block, cb, mlxsw_sp_port);
+ return err;
+ }
+ return 0;
+ case TC_BLOCK_UNBIND:
+ mlxsw_sp_setup_tc_block_flower_unbind(mlxsw_sp_port,
+ f->block, ingress);
+ tcf_block_cb_unregister(f->block, cb, mlxsw_sp_port);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return mlxsw_sp_setup_tc_block(mlxsw_sp_port, type_data);
+ case TC_SETUP_QDISC_RED:
+ return mlxsw_sp_setup_tc_red(mlxsw_sp_port, type_data);
+ case TC_SETUP_QDISC_PRIO:
+ return mlxsw_sp_setup_tc_prio(mlxsw_sp_port, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+
+static int mlxsw_sp_feature_hw_tc(struct net_device *dev, bool enable)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+ if (!enable) {
+ if (mlxsw_sp_acl_block_rule_count(mlxsw_sp_port->ing_acl_block) ||
+ mlxsw_sp_acl_block_rule_count(mlxsw_sp_port->eg_acl_block) ||
+ !list_empty(&mlxsw_sp_port->mall_tc_list)) {
+ netdev_err(dev, "Active offloaded tc filters, can't turn hw_tc_offload off\n");
+ return -EINVAL;
+ }
+ mlxsw_sp_acl_block_disable_inc(mlxsw_sp_port->ing_acl_block);
+ mlxsw_sp_acl_block_disable_inc(mlxsw_sp_port->eg_acl_block);
+ } else {
+ mlxsw_sp_acl_block_disable_dec(mlxsw_sp_port->ing_acl_block);
+ mlxsw_sp_acl_block_disable_dec(mlxsw_sp_port->eg_acl_block);
+ }
+ return 0;
+}
+
+typedef int (*mlxsw_sp_feature_handler)(struct net_device *dev, bool enable);
+
+static int mlxsw_sp_handle_feature(struct net_device *dev,
+ netdev_features_t wanted_features,
+ netdev_features_t feature,
+ mlxsw_sp_feature_handler feature_handler)
+{
+ netdev_features_t changes = wanted_features ^ dev->features;
+ bool enable = !!(wanted_features & feature);
+ int err;
+
+ if (!(changes & feature))
+ return 0;
+
+ err = feature_handler(dev, enable);
+ if (err) {
+ netdev_err(dev, "%s feature %pNF failed, err %d\n",
+ enable ? "Enable" : "Disable", &feature, err);
+ return err;
+ }
+
+ if (enable)
+ dev->features |= feature;
+ else
+ dev->features &= ~feature;
+
+ return 0;
+}
+static int mlxsw_sp_set_features(struct net_device *dev,
+ netdev_features_t features)
+{
+ return mlxsw_sp_handle_feature(dev, features, NETIF_F_HW_TC,
+ mlxsw_sp_feature_hw_tc);
+}
+
+static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
+ .ndo_open = mlxsw_sp_port_open,
+ .ndo_stop = mlxsw_sp_port_stop,
+ .ndo_start_xmit = mlxsw_sp_port_xmit,
+ .ndo_setup_tc = mlxsw_sp_setup_tc,
+ .ndo_set_rx_mode = mlxsw_sp_set_rx_mode,
+ .ndo_set_mac_address = mlxsw_sp_port_set_mac_address,
+ .ndo_change_mtu = mlxsw_sp_port_change_mtu,
+ .ndo_get_stats64 = mlxsw_sp_port_get_stats64,
+ .ndo_has_offload_stats = mlxsw_sp_port_has_offload_stats,
+ .ndo_get_offload_stats = mlxsw_sp_port_get_offload_stats,
+ .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid,
+ .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid,
+ .ndo_get_phys_port_name = mlxsw_sp_port_get_phys_port_name,
+ .ndo_set_features = mlxsw_sp_set_features,
+};
+
+static void mlxsw_sp_port_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+
+ strlcpy(drvinfo->driver, mlxsw_sp->bus_info->device_kind,
+ sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, mlxsw_sp_driver_version,
+ sizeof(drvinfo->version));
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%d",
+ mlxsw_sp->bus_info->fw_rev.major,
+ mlxsw_sp->bus_info->fw_rev.minor,
+ mlxsw_sp->bus_info->fw_rev.subminor);
+ strlcpy(drvinfo->bus_info, mlxsw_sp->bus_info->device_name,
+ sizeof(drvinfo->bus_info));
+}
+
+static void mlxsw_sp_port_get_pauseparam(struct net_device *dev,
+ struct ethtool_pauseparam *pause)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+ pause->rx_pause = mlxsw_sp_port->link.rx_pause;
+ pause->tx_pause = mlxsw_sp_port->link.tx_pause;
+}
+
+static int mlxsw_sp_port_pause_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct ethtool_pauseparam *pause)
+{
+ char pfcc_pl[MLXSW_REG_PFCC_LEN];
+
+ mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port);
+ mlxsw_reg_pfcc_pprx_set(pfcc_pl, pause->rx_pause);
+ mlxsw_reg_pfcc_pptx_set(pfcc_pl, pause->tx_pause);
+
+ return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc),
+ pfcc_pl);
+}
+
+static int mlxsw_sp_port_set_pauseparam(struct net_device *dev,
+ struct ethtool_pauseparam *pause)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ bool pause_en = pause->tx_pause || pause->rx_pause;
+ int err;
+
+ if (mlxsw_sp_port->dcb.pfc && mlxsw_sp_port->dcb.pfc->pfc_en) {
+ netdev_err(dev, "PFC already enabled on port\n");
+ return -EINVAL;
+ }
+
+ if (pause->autoneg) {
+ netdev_err(dev, "PAUSE frames autonegotiation isn't supported\n");
+ return -EINVAL;
+ }
+
+ err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en);
+ if (err) {
+ netdev_err(dev, "Failed to configure port's headroom\n");
+ return err;
+ }
+
+ err = mlxsw_sp_port_pause_set(mlxsw_sp_port, pause);
+ if (err) {
+ netdev_err(dev, "Failed to set PAUSE parameters\n");
+ goto err_port_pause_configure;
+ }
+
+ mlxsw_sp_port->link.rx_pause = pause->rx_pause;
+ mlxsw_sp_port->link.tx_pause = pause->tx_pause;
+
+ return 0;
+
+err_port_pause_configure:
+ pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
+ mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en);
+ return err;
+}
+
+struct mlxsw_sp_port_hw_stats {
+ char str[ETH_GSTRING_LEN];
+ u64 (*getter)(const char *payload);
+ bool cells_bytes;
+};
+
+static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = {
+ {
+ .str = "a_frames_transmitted_ok",
+ .getter = mlxsw_reg_ppcnt_a_frames_transmitted_ok_get,
+ },
+ {
+ .str = "a_frames_received_ok",
+ .getter = mlxsw_reg_ppcnt_a_frames_received_ok_get,
+ },
+ {
+ .str = "a_frame_check_sequence_errors",
+ .getter = mlxsw_reg_ppcnt_a_frame_check_sequence_errors_get,
+ },
+ {
+ .str = "a_alignment_errors",
+ .getter = mlxsw_reg_ppcnt_a_alignment_errors_get,
+ },
+ {
+ .str = "a_octets_transmitted_ok",
+ .getter = mlxsw_reg_ppcnt_a_octets_transmitted_ok_get,
+ },
+ {
+ .str = "a_octets_received_ok",
+ .getter = mlxsw_reg_ppcnt_a_octets_received_ok_get,
+ },
+ {
+ .str = "a_multicast_frames_xmitted_ok",
+ .getter = mlxsw_reg_ppcnt_a_multicast_frames_xmitted_ok_get,
+ },
+ {
+ .str = "a_broadcast_frames_xmitted_ok",
+ .getter = mlxsw_reg_ppcnt_a_broadcast_frames_xmitted_ok_get,
+ },
+ {
+ .str = "a_multicast_frames_received_ok",
+ .getter = mlxsw_reg_ppcnt_a_multicast_frames_received_ok_get,
+ },
+ {
+ .str = "a_broadcast_frames_received_ok",
+ .getter = mlxsw_reg_ppcnt_a_broadcast_frames_received_ok_get,
+ },
+ {
+ .str = "a_in_range_length_errors",
+ .getter = mlxsw_reg_ppcnt_a_in_range_length_errors_get,
+ },
+ {
+ .str = "a_out_of_range_length_field",
+ .getter = mlxsw_reg_ppcnt_a_out_of_range_length_field_get,
+ },
+ {
+ .str = "a_frame_too_long_errors",
+ .getter = mlxsw_reg_ppcnt_a_frame_too_long_errors_get,
+ },
+ {
+ .str = "a_symbol_error_during_carrier",
+ .getter = mlxsw_reg_ppcnt_a_symbol_error_during_carrier_get,
+ },
+ {
+ .str = "a_mac_control_frames_transmitted",
+ .getter = mlxsw_reg_ppcnt_a_mac_control_frames_transmitted_get,
+ },
+ {
+ .str = "a_mac_control_frames_received",
+ .getter = mlxsw_reg_ppcnt_a_mac_control_frames_received_get,
+ },
+ {
+ .str = "a_unsupported_opcodes_received",
+ .getter = mlxsw_reg_ppcnt_a_unsupported_opcodes_received_get,
+ },
+ {
+ .str = "a_pause_mac_ctrl_frames_received",
+ .getter = mlxsw_reg_ppcnt_a_pause_mac_ctrl_frames_received_get,
+ },
+ {
+ .str = "a_pause_mac_ctrl_frames_xmitted",
+ .getter = mlxsw_reg_ppcnt_a_pause_mac_ctrl_frames_transmitted_get,
+ },
+};
+
+#define MLXSW_SP_PORT_HW_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_stats)
+
+static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_rfc_2819_stats[] = {
+ {
+ .str = "ether_pkts64octets",
+ .getter = mlxsw_reg_ppcnt_ether_stats_pkts64octets_get,
+ },
+ {
+ .str = "ether_pkts65to127octets",
+ .getter = mlxsw_reg_ppcnt_ether_stats_pkts65to127octets_get,
+ },
+ {
+ .str = "ether_pkts128to255octets",
+ .getter = mlxsw_reg_ppcnt_ether_stats_pkts128to255octets_get,
+ },
+ {
+ .str = "ether_pkts256to511octets",
+ .getter = mlxsw_reg_ppcnt_ether_stats_pkts256to511octets_get,
+ },
+ {
+ .str = "ether_pkts512to1023octets",
+ .getter = mlxsw_reg_ppcnt_ether_stats_pkts512to1023octets_get,
+ },
+ {
+ .str = "ether_pkts1024to1518octets",
+ .getter = mlxsw_reg_ppcnt_ether_stats_pkts1024to1518octets_get,
+ },
+ {
+ .str = "ether_pkts1519to2047octets",
+ .getter = mlxsw_reg_ppcnt_ether_stats_pkts1519to2047octets_get,
+ },
+ {
+ .str = "ether_pkts2048to4095octets",
+ .getter = mlxsw_reg_ppcnt_ether_stats_pkts2048to4095octets_get,
+ },
+ {
+ .str = "ether_pkts4096to8191octets",
+ .getter = mlxsw_reg_ppcnt_ether_stats_pkts4096to8191octets_get,
+ },
+ {
+ .str = "ether_pkts8192to10239octets",
+ .getter = mlxsw_reg_ppcnt_ether_stats_pkts8192to10239octets_get,
+ },
+};
+
+#define MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN \
+ ARRAY_SIZE(mlxsw_sp_port_hw_rfc_2819_stats)
+
+static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_prio_stats[] = {
+ {
+ .str = "rx_octets_prio",
+ .getter = mlxsw_reg_ppcnt_rx_octets_get,
+ },
+ {
+ .str = "rx_frames_prio",
+ .getter = mlxsw_reg_ppcnt_rx_frames_get,
+ },
+ {
+ .str = "tx_octets_prio",
+ .getter = mlxsw_reg_ppcnt_tx_octets_get,
+ },
+ {
+ .str = "tx_frames_prio",
+ .getter = mlxsw_reg_ppcnt_tx_frames_get,
+ },
+ {
+ .str = "rx_pause_prio",
+ .getter = mlxsw_reg_ppcnt_rx_pause_get,
+ },
+ {
+ .str = "rx_pause_duration_prio",
+ .getter = mlxsw_reg_ppcnt_rx_pause_duration_get,
+ },
+ {
+ .str = "tx_pause_prio",
+ .getter = mlxsw_reg_ppcnt_tx_pause_get,
+ },
+ {
+ .str = "tx_pause_duration_prio",
+ .getter = mlxsw_reg_ppcnt_tx_pause_duration_get,
+ },
+};
+
+#define MLXSW_SP_PORT_HW_PRIO_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_prio_stats)
+
+static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_tc_stats[] = {
+ {
+ .str = "tc_transmit_queue_tc",
+ .getter = mlxsw_reg_ppcnt_tc_transmit_queue_get,
+ .cells_bytes = true,
+ },
+ {
+ .str = "tc_no_buffer_discard_uc_tc",
+ .getter = mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get,
+ },
+};
+
+#define MLXSW_SP_PORT_HW_TC_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_tc_stats)
+
+#define MLXSW_SP_PORT_ETHTOOL_STATS_LEN (MLXSW_SP_PORT_HW_STATS_LEN + \
+ MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN + \
+ (MLXSW_SP_PORT_HW_PRIO_STATS_LEN * \
+ IEEE_8021QAZ_MAX_TCS) + \
+ (MLXSW_SP_PORT_HW_TC_STATS_LEN * \
+ TC_MAX_QUEUE))
+
+static void mlxsw_sp_port_get_prio_strings(u8 **p, int prio)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_SP_PORT_HW_PRIO_STATS_LEN; i++) {
+ snprintf(*p, ETH_GSTRING_LEN, "%.29s_%.1d",
+ mlxsw_sp_port_hw_prio_stats[i].str, prio);
+ *p += ETH_GSTRING_LEN;
+ }
+}
+
+static void mlxsw_sp_port_get_tc_strings(u8 **p, int tc)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_SP_PORT_HW_TC_STATS_LEN; i++) {
+ snprintf(*p, ETH_GSTRING_LEN, "%.29s_%.1d",
+ mlxsw_sp_port_hw_tc_stats[i].str, tc);
+ *p += ETH_GSTRING_LEN;
+ }
+}
+
+static void mlxsw_sp_port_get_strings(struct net_device *dev,
+ u32 stringset, u8 *data)
+{
+ u8 *p = data;
+ int i;
+
+ switch (stringset) {
+ case ETH_SS_STATS:
+ for (i = 0; i < MLXSW_SP_PORT_HW_STATS_LEN; i++) {
+ memcpy(p, mlxsw_sp_port_hw_stats[i].str,
+ ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
+ for (i = 0; i < MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN; i++) {
+ memcpy(p, mlxsw_sp_port_hw_rfc_2819_stats[i].str,
+ ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ mlxsw_sp_port_get_prio_strings(&p, i);
+
+ for (i = 0; i < TC_MAX_QUEUE; i++)
+ mlxsw_sp_port_get_tc_strings(&p, i);
+
+ break;
+ }
+}
+
+static int mlxsw_sp_port_set_phys_id(struct net_device *dev,
+ enum ethtool_phys_id_state state)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char mlcr_pl[MLXSW_REG_MLCR_LEN];
+ bool active;
+
+ switch (state) {
+ case ETHTOOL_ID_ACTIVE:
+ active = true;
+ break;
+ case ETHTOOL_ID_INACTIVE:
+ active = false;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ mlxsw_reg_mlcr_pack(mlcr_pl, mlxsw_sp_port->local_port, active);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mlcr), mlcr_pl);
+}
+
+static int
+mlxsw_sp_get_hw_stats_by_group(struct mlxsw_sp_port_hw_stats **p_hw_stats,
+ int *p_len, enum mlxsw_reg_ppcnt_grp grp)
+{
+ switch (grp) {
+ case MLXSW_REG_PPCNT_IEEE_8023_CNT:
+ *p_hw_stats = mlxsw_sp_port_hw_stats;
+ *p_len = MLXSW_SP_PORT_HW_STATS_LEN;
+ break;
+ case MLXSW_REG_PPCNT_RFC_2819_CNT:
+ *p_hw_stats = mlxsw_sp_port_hw_rfc_2819_stats;
+ *p_len = MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN;
+ break;
+ case MLXSW_REG_PPCNT_PRIO_CNT:
+ *p_hw_stats = mlxsw_sp_port_hw_prio_stats;
+ *p_len = MLXSW_SP_PORT_HW_PRIO_STATS_LEN;
+ break;
+ case MLXSW_REG_PPCNT_TC_CNT:
+ *p_hw_stats = mlxsw_sp_port_hw_tc_stats;
+ *p_len = MLXSW_SP_PORT_HW_TC_STATS_LEN;
+ break;
+ default:
+ WARN_ON(1);
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static void __mlxsw_sp_port_get_stats(struct net_device *dev,
+ enum mlxsw_reg_ppcnt_grp grp, int prio,
+ u64 *data, int data_index)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_port_hw_stats *hw_stats;
+ char ppcnt_pl[MLXSW_REG_PPCNT_LEN];
+ int i, len;
+ int err;
+
+ err = mlxsw_sp_get_hw_stats_by_group(&hw_stats, &len, grp);
+ if (err)
+ return;
+ mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl);
+ for (i = 0; i < len; i++) {
+ data[data_index + i] = hw_stats[i].getter(ppcnt_pl);
+ if (!hw_stats[i].cells_bytes)
+ continue;
+ data[data_index + i] = mlxsw_sp_cells_bytes(mlxsw_sp,
+ data[data_index + i]);
+ }
+}
+
+static void mlxsw_sp_port_get_stats(struct net_device *dev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ int i, data_index = 0;
+
+ /* IEEE 802.3 Counters */
+ __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, 0,
+ data, data_index);
+ data_index = MLXSW_SP_PORT_HW_STATS_LEN;
+
+ /* RFC 2819 Counters */
+ __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_RFC_2819_CNT, 0,
+ data, data_index);
+ data_index += MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN;
+
+ /* Per-Priority Counters */
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_PRIO_CNT, i,
+ data, data_index);
+ data_index += MLXSW_SP_PORT_HW_PRIO_STATS_LEN;
+ }
+
+ /* Per-TC Counters */
+ for (i = 0; i < TC_MAX_QUEUE; i++) {
+ __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_TC_CNT, i,
+ data, data_index);
+ data_index += MLXSW_SP_PORT_HW_TC_STATS_LEN;
+ }
+}
+
+static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return MLXSW_SP_PORT_ETHTOOL_STATS_LEN;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+struct mlxsw_sp_port_link_mode {
+ enum ethtool_link_mode_bit_indices mask_ethtool;
+ u32 mask;
+ u32 speed;
+};
+
+static const struct mlxsw_sp_port_link_mode mlxsw_sp_port_link_mode[] = {
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T,
+ .mask_ethtool = ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+ .speed = SPEED_100,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_SGMII |
+ MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX,
+ .mask_ethtool = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+ .speed = SPEED_1000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T,
+ .mask_ethtool = ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+ .speed = SPEED_10000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 |
+ MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4,
+ .mask_ethtool = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
+ .speed = SPEED_10000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR |
+ MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR |
+ MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR |
+ MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR,
+ .mask_ethtool = ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+ .speed = SPEED_10000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2,
+ .mask_ethtool = ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT,
+ .speed = SPEED_20000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4,
+ .mask_ethtool = ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
+ .speed = SPEED_40000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4,
+ .mask_ethtool = ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT,
+ .speed = SPEED_40000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4,
+ .mask_ethtool = ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT,
+ .speed = SPEED_40000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4,
+ .mask_ethtool = ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT,
+ .speed = SPEED_40000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR,
+ .mask_ethtool = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
+ .speed = SPEED_25000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR,
+ .mask_ethtool = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
+ .speed = SPEED_25000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR,
+ .mask_ethtool = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
+ .speed = SPEED_25000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR,
+ .mask_ethtool = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
+ .speed = SPEED_25000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2,
+ .mask_ethtool = ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT,
+ .speed = SPEED_50000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2,
+ .mask_ethtool = ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT,
+ .speed = SPEED_50000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_SR2,
+ .mask_ethtool = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
+ .speed = SPEED_50000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
+ .mask_ethtool = ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT,
+ .speed = SPEED_56000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
+ .mask_ethtool = ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT,
+ .speed = SPEED_56000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
+ .mask_ethtool = ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT,
+ .speed = SPEED_56000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
+ .mask_ethtool = ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT,
+ .speed = SPEED_56000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4,
+ .mask_ethtool = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
+ .speed = SPEED_100000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4,
+ .mask_ethtool = ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
+ .speed = SPEED_100000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4,
+ .mask_ethtool = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
+ .speed = SPEED_100000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4,
+ .mask_ethtool = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT,
+ .speed = SPEED_100000,
+ },
+};
+
+#define MLXSW_SP_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp_port_link_mode)
+
+static void
+mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto,
+ struct ethtool_link_ksettings *cmd)
+{
+ if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR |
+ MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR |
+ MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_SGMII))
+ ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+
+ if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR |
+ MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 |
+ MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX))
+ ethtool_link_ksettings_add_link_mode(cmd, supported, Backplane);
+}
+
+static void mlxsw_sp_from_ptys_link(u32 ptys_eth_proto, unsigned long *mode)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) {
+ if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask)
+ __set_bit(mlxsw_sp_port_link_mode[i].mask_ethtool,
+ mode);
+ }
+}
+
+static void mlxsw_sp_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto,
+ struct ethtool_link_ksettings *cmd)
+{
+ u32 speed = SPEED_UNKNOWN;
+ u8 duplex = DUPLEX_UNKNOWN;
+ int i;
+
+ if (!carrier_ok)
+ goto out;
+
+ for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) {
+ if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask) {
+ speed = mlxsw_sp_port_link_mode[i].speed;
+ duplex = DUPLEX_FULL;
+ break;
+ }
+ }
+out:
+ cmd->base.speed = speed;
+ cmd->base.duplex = duplex;
+}
+
+static u8 mlxsw_sp_port_connector_port(u32 ptys_eth_proto)
+{
+ if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR |
+ MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_SGMII))
+ return PORT_FIBRE;
+
+ if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR |
+ MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4))
+ return PORT_DA;
+
+ if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR |
+ MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 |
+ MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4))
+ return PORT_NONE;
+
+ return PORT_OTHER;
+}
+
+static u32
+mlxsw_sp_to_ptys_advert_link(const struct ethtool_link_ksettings *cmd)
+{
+ u32 ptys_proto = 0;
+ int i;
+
+ for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) {
+ if (test_bit(mlxsw_sp_port_link_mode[i].mask_ethtool,
+ cmd->link_modes.advertising))
+ ptys_proto |= mlxsw_sp_port_link_mode[i].mask;
+ }
+ return ptys_proto;
+}
+
+static u32 mlxsw_sp_to_ptys_speed(u32 speed)
+{
+ u32 ptys_proto = 0;
+ int i;
+
+ for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) {
+ if (speed == mlxsw_sp_port_link_mode[i].speed)
+ ptys_proto |= mlxsw_sp_port_link_mode[i].mask;
+ }
+ return ptys_proto;
+}
+
+static u32 mlxsw_sp_to_ptys_upper_speed(u32 upper_speed)
+{
+ u32 ptys_proto = 0;
+ int i;
+
+ for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) {
+ if (mlxsw_sp_port_link_mode[i].speed <= upper_speed)
+ ptys_proto |= mlxsw_sp_port_link_mode[i].mask;
+ }
+ return ptys_proto;
+}
+
+static void mlxsw_sp_port_get_link_supported(u32 eth_proto_cap,
+ struct ethtool_link_ksettings *cmd)
+{
+ ethtool_link_ksettings_add_link_mode(cmd, supported, Asym_Pause);
+ ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+
+ mlxsw_sp_from_ptys_supported_port(eth_proto_cap, cmd);
+ mlxsw_sp_from_ptys_link(eth_proto_cap, cmd->link_modes.supported);
+}
+
+static void mlxsw_sp_port_get_link_advertise(u32 eth_proto_admin, bool autoneg,
+ struct ethtool_link_ksettings *cmd)
+{
+ if (!autoneg)
+ return;
+
+ ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg);
+ mlxsw_sp_from_ptys_link(eth_proto_admin, cmd->link_modes.advertising);
+}
+
+static void
+mlxsw_sp_port_get_link_lp_advertise(u32 eth_proto_lp, u8 autoneg_status,
+ struct ethtool_link_ksettings *cmd)
+{
+ if (autoneg_status != MLXSW_REG_PTYS_AN_STATUS_OK || !eth_proto_lp)
+ return;
+
+ ethtool_link_ksettings_add_link_mode(cmd, lp_advertising, Autoneg);
+ mlxsw_sp_from_ptys_link(eth_proto_lp, cmd->link_modes.lp_advertising);
+}
+
+static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
+{
+ u32 eth_proto_cap, eth_proto_admin, eth_proto_oper, eth_proto_lp;
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char ptys_pl[MLXSW_REG_PTYS_LEN];
+ u8 autoneg_status;
+ bool autoneg;
+ int err;
+
+ autoneg = mlxsw_sp_port->link.autoneg;
+ mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0, false);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
+ if (err)
+ return err;
+ mlxsw_reg_ptys_eth_unpack(ptys_pl, &eth_proto_cap, &eth_proto_admin,
+ &eth_proto_oper);
+
+ mlxsw_sp_port_get_link_supported(eth_proto_cap, cmd);
+
+ mlxsw_sp_port_get_link_advertise(eth_proto_admin, autoneg, cmd);
+
+ eth_proto_lp = mlxsw_reg_ptys_eth_proto_lp_advertise_get(ptys_pl);
+ autoneg_status = mlxsw_reg_ptys_an_status_get(ptys_pl);
+ mlxsw_sp_port_get_link_lp_advertise(eth_proto_lp, autoneg_status, cmd);
+
+ cmd->base.autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE;
+ cmd->base.port = mlxsw_sp_port_connector_port(eth_proto_oper);
+ mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), eth_proto_oper,
+ cmd);
+
+ return 0;
+}
+
+static int
+mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char ptys_pl[MLXSW_REG_PTYS_LEN];
+ u32 eth_proto_cap, eth_proto_new;
+ bool autoneg;
+ int err;
+
+ mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0, false);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
+ if (err)
+ return err;
+ mlxsw_reg_ptys_eth_unpack(ptys_pl, &eth_proto_cap, NULL, NULL);
+
+ autoneg = cmd->base.autoneg == AUTONEG_ENABLE;
+ if (!autoneg && cmd->base.speed == SPEED_56000) {
+ netdev_err(dev, "56G not supported with autoneg off\n");
+ return -EINVAL;
+ }
+ eth_proto_new = autoneg ?
+ mlxsw_sp_to_ptys_advert_link(cmd) :
+ mlxsw_sp_to_ptys_speed(cmd->base.speed);
+
+ eth_proto_new = eth_proto_new & eth_proto_cap;
+ if (!eth_proto_new) {
+ netdev_err(dev, "No supported speed requested\n");
+ return -EINVAL;
+ }
+
+ mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port,
+ eth_proto_new, autoneg);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
+ if (err)
+ return err;
+
+ mlxsw_sp_port->link.autoneg = autoneg;
+
+ if (!netif_running(dev))
+ return 0;
+
+ mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
+ mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true);
+
+ return 0;
+}
+
+static int mlxsw_sp_flash_device(struct net_device *dev,
+ struct ethtool_flash *flash)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ const struct firmware *firmware;
+ int err;
+
+ if (flash->region != ETHTOOL_FLASH_ALL_REGIONS)
+ return -EOPNOTSUPP;
+
+ dev_hold(dev);
+ rtnl_unlock();
+
+ err = request_firmware_direct(&firmware, flash->data, &dev->dev);
+ if (err)
+ goto out;
+ err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware);
+ release_firmware(firmware);
+out:
+ rtnl_lock();
+ dev_put(dev);
+ return err;
+}
+
+#define MLXSW_SP_I2C_ADDR_LOW 0x50
+#define MLXSW_SP_I2C_ADDR_HIGH 0x51
+#define MLXSW_SP_EEPROM_PAGE_LENGTH 256
+
+static int mlxsw_sp_query_module_eeprom(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 offset, u16 size, void *data,
+ unsigned int *p_read_size)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char eeprom_tmp[MLXSW_SP_REG_MCIA_EEPROM_SIZE];
+ char mcia_pl[MLXSW_REG_MCIA_LEN];
+ u16 i2c_addr;
+ int status;
+ int err;
+
+ size = min_t(u16, size, MLXSW_SP_REG_MCIA_EEPROM_SIZE);
+
+ if (offset < MLXSW_SP_EEPROM_PAGE_LENGTH &&
+ offset + size > MLXSW_SP_EEPROM_PAGE_LENGTH)
+ /* Cross pages read, read until offset 256 in low page */
+ size = MLXSW_SP_EEPROM_PAGE_LENGTH - offset;
+
+ i2c_addr = MLXSW_SP_I2C_ADDR_LOW;
+ if (offset >= MLXSW_SP_EEPROM_PAGE_LENGTH) {
+ i2c_addr = MLXSW_SP_I2C_ADDR_HIGH;
+ offset -= MLXSW_SP_EEPROM_PAGE_LENGTH;
+ }
+
+ mlxsw_reg_mcia_pack(mcia_pl, mlxsw_sp_port->mapping.module,
+ 0, 0, offset, size, i2c_addr);
+
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcia), mcia_pl);
+ if (err)
+ return err;
+
+ status = mlxsw_reg_mcia_status_get(mcia_pl);
+ if (status)
+ return -EIO;
+
+ mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp);
+ memcpy(data, eeprom_tmp, size);
+ *p_read_size = size;
+
+ return 0;
+}
+
+enum mlxsw_sp_eeprom_module_info_rev_id {
+ MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_UNSPC = 0x00,
+ MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8436 = 0x01,
+ MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636 = 0x03,
+};
+
+enum mlxsw_sp_eeprom_module_info_id {
+ MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP = 0x03,
+ MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP = 0x0C,
+ MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS = 0x0D,
+ MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28 = 0x11,
+};
+
+enum mlxsw_sp_eeprom_module_info {
+ MLXSW_SP_EEPROM_MODULE_INFO_ID,
+ MLXSW_SP_EEPROM_MODULE_INFO_REV_ID,
+ MLXSW_SP_EEPROM_MODULE_INFO_SIZE,
+};
+
+static int mlxsw_sp_get_module_info(struct net_device *netdev,
+ struct ethtool_modinfo *modinfo)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
+ u8 module_info[MLXSW_SP_EEPROM_MODULE_INFO_SIZE];
+ u8 module_rev_id, module_id;
+ unsigned int read_size;
+ int err;
+
+ err = mlxsw_sp_query_module_eeprom(mlxsw_sp_port, 0,
+ MLXSW_SP_EEPROM_MODULE_INFO_SIZE,
+ module_info, &read_size);
+ if (err)
+ return err;
+
+ if (read_size < MLXSW_SP_EEPROM_MODULE_INFO_SIZE)
+ return -EIO;
+
+ module_rev_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_REV_ID];
+ module_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_ID];
+
+ switch (module_id) {
+ case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP:
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+ break;
+ case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS:
+ case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28:
+ if (module_id == MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28 ||
+ module_rev_id >= MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636) {
+ modinfo->type = ETH_MODULE_SFF_8636;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+ }
+ break;
+ case MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP:
+ modinfo->type = ETH_MODULE_SFF_8472;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_get_module_eeprom(struct net_device *netdev,
+ struct ethtool_eeprom *ee,
+ u8 *data)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
+ int offset = ee->offset;
+ unsigned int read_size;
+ int i = 0;
+ int err;
+
+ if (!ee->len)
+ return -EINVAL;
+
+ memset(data, 0, ee->len);
+
+ while (i < ee->len) {
+ err = mlxsw_sp_query_module_eeprom(mlxsw_sp_port, offset,
+ ee->len - i, data + i,
+ &read_size);
+ if (err) {
+ netdev_err(mlxsw_sp_port->dev, "Eeprom query failed\n");
+ return err;
+ }
+
+ i += read_size;
+ offset += read_size;
+ }
+
+ return 0;
+}
+
+static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
+ .get_drvinfo = mlxsw_sp_port_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_pauseparam = mlxsw_sp_port_get_pauseparam,
+ .set_pauseparam = mlxsw_sp_port_set_pauseparam,
+ .get_strings = mlxsw_sp_port_get_strings,
+ .set_phys_id = mlxsw_sp_port_set_phys_id,
+ .get_ethtool_stats = mlxsw_sp_port_get_stats,
+ .get_sset_count = mlxsw_sp_port_get_sset_count,
+ .get_link_ksettings = mlxsw_sp_port_get_link_ksettings,
+ .set_link_ksettings = mlxsw_sp_port_set_link_ksettings,
+ .flash_device = mlxsw_sp_flash_device,
+ .get_module_info = mlxsw_sp_get_module_info,
+ .get_module_eeprom = mlxsw_sp_get_module_eeprom,
+};
+
+static int
+mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u32 upper_speed = MLXSW_SP_PORT_BASE_SPEED * width;
+ char ptys_pl[MLXSW_REG_PTYS_LEN];
+ u32 eth_proto_admin;
+
+ eth_proto_admin = mlxsw_sp_to_ptys_upper_speed(upper_speed);
+ mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port,
+ eth_proto_admin, mlxsw_sp_port->link.autoneg);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
+}
+
+int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
+ bool dwrr, u8 dwrr_weight)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char qeec_pl[MLXSW_REG_QEEC_LEN];
+
+ mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index,
+ next_index);
+ mlxsw_reg_qeec_de_set(qeec_pl, true);
+ mlxsw_reg_qeec_dwrr_set(qeec_pl, dwrr);
+ mlxsw_reg_qeec_dwrr_weight_set(qeec_pl, dwrr_weight);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
+}
+
+int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ enum mlxsw_reg_qeec_hr hr, u8 index,
+ u8 next_index, u32 maxrate)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char qeec_pl[MLXSW_REG_QEEC_LEN];
+
+ mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index,
+ next_index);
+ mlxsw_reg_qeec_mase_set(qeec_pl, true);
+ mlxsw_reg_qeec_max_shaper_rate_set(qeec_pl, maxrate);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
+}
+
+static int mlxsw_sp_port_min_bw_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ enum mlxsw_reg_qeec_hr hr, u8 index,
+ u8 next_index, u32 minrate)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char qeec_pl[MLXSW_REG_QEEC_LEN];
+
+ mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index,
+ next_index);
+ mlxsw_reg_qeec_mise_set(qeec_pl, true);
+ mlxsw_reg_qeec_min_shaper_rate_set(qeec_pl, minrate);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
+}
+
+int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ u8 switch_prio, u8 tclass)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char qtct_pl[MLXSW_REG_QTCT_LEN];
+
+ mlxsw_reg_qtct_pack(qtct_pl, mlxsw_sp_port->local_port, switch_prio,
+ tclass);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qtct), qtct_pl);
+}
+
+static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ int err, i;
+
+ /* Setup the elements hierarcy, so that each TC is linked to
+ * one subgroup, which are all member in the same group.
+ */
+ err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_GROUP, 0, 0, false,
+ 0);
+ if (err)
+ return err;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i,
+ 0, false, 0);
+ if (err)
+ return err;
+ }
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_TC, i, i,
+ false, 0);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_TC,
+ i + 8, i,
+ true, 100);
+ if (err)
+ return err;
+ }
+
+ /* Make sure the max shaper is disabled in all hierarchies that
+ * support it.
+ */
+ err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_PORT, 0, 0,
+ MLXSW_REG_QEEC_MAS_DIS);
+ if (err)
+ return err;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_SUBGROUP,
+ i, 0,
+ MLXSW_REG_QEEC_MAS_DIS);
+ if (err)
+ return err;
+ }
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_TC,
+ i, i,
+ MLXSW_REG_QEEC_MAS_DIS);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_TC,
+ i + 8, i,
+ MLXSW_REG_QEEC_MAS_DIS);
+ if (err)
+ return err;
+ }
+
+ /* Configure the min shaper for multicast TCs. */
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_min_bw_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_TC,
+ i + 8, i,
+ MLXSW_REG_QEEC_MIS_MIN);
+ if (err)
+ return err;
+ }
+
+ /* Map all priorities to traffic class 0. */
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, 0);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_port_tc_mc_mode_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ bool enable)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char qtctm_pl[MLXSW_REG_QTCTM_LEN];
+
+ mlxsw_reg_qtctm_pack(qtctm_pl, mlxsw_sp_port->local_port, enable);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qtctm), qtctm_pl);
+}
+
+static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+ bool split, u8 module, u8 width, u8 lane)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ struct net_device *dev;
+ int err;
+
+ err = mlxsw_core_port_init(mlxsw_sp->core, local_port);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to init core port\n",
+ local_port);
+ return err;
+ }
+
+ dev = alloc_etherdev(sizeof(struct mlxsw_sp_port));
+ if (!dev) {
+ err = -ENOMEM;
+ goto err_alloc_etherdev;
+ }
+ SET_NETDEV_DEV(dev, mlxsw_sp->bus_info->dev);
+ mlxsw_sp_port = netdev_priv(dev);
+ mlxsw_sp_port->dev = dev;
+ mlxsw_sp_port->mlxsw_sp = mlxsw_sp;
+ mlxsw_sp_port->local_port = local_port;
+ mlxsw_sp_port->pvid = 1;
+ mlxsw_sp_port->split = split;
+ mlxsw_sp_port->mapping.module = module;
+ mlxsw_sp_port->mapping.width = width;
+ mlxsw_sp_port->mapping.lane = lane;
+ mlxsw_sp_port->link.autoneg = 1;
+ INIT_LIST_HEAD(&mlxsw_sp_port->vlans_list);
+ INIT_LIST_HEAD(&mlxsw_sp_port->mall_tc_list);
+
+ mlxsw_sp_port->pcpu_stats =
+ netdev_alloc_pcpu_stats(struct mlxsw_sp_port_pcpu_stats);
+ if (!mlxsw_sp_port->pcpu_stats) {
+ err = -ENOMEM;
+ goto err_alloc_stats;
+ }
+
+ mlxsw_sp_port->sample = kzalloc(sizeof(*mlxsw_sp_port->sample),
+ GFP_KERNEL);
+ if (!mlxsw_sp_port->sample) {
+ err = -ENOMEM;
+ goto err_alloc_sample;
+ }
+
+ INIT_DELAYED_WORK(&mlxsw_sp_port->periodic_hw_stats.update_dw,
+ &update_stats_cache);
+
+ dev->netdev_ops = &mlxsw_sp_port_netdev_ops;
+ dev->ethtool_ops = &mlxsw_sp_port_ethtool_ops;
+
+ err = mlxsw_sp_port_module_map(mlxsw_sp_port, module, width, lane);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to map module\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_module_map;
+ }
+
+ err = mlxsw_sp_port_swid_set(mlxsw_sp_port, 0);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set SWID\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_swid_set;
+ }
+
+ err = mlxsw_sp_port_dev_addr_init(mlxsw_sp_port);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unable to init port mac address\n",
+ mlxsw_sp_port->local_port);
+ goto err_dev_addr_init;
+ }
+
+ netif_carrier_off(dev);
+
+ dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_LLTX | NETIF_F_SG |
+ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC;
+ dev->hw_features |= NETIF_F_HW_TC;
+
+ dev->min_mtu = 0;
+ dev->max_mtu = ETH_MAX_MTU;
+
+ /* Each packet needs to have a Tx header (metadata) on top all other
+ * headers.
+ */
+ dev->needed_headroom = MLXSW_TXHDR_LEN;
+
+ err = mlxsw_sp_port_system_port_mapping_set(mlxsw_sp_port);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set system port mapping\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_system_port_mapping_set;
+ }
+
+ err = mlxsw_sp_port_speed_by_width_set(mlxsw_sp_port, width);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to enable speeds\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_speed_by_width_set;
+ }
+
+ err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, ETH_DATA_LEN);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set MTU\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_mtu_set;
+ }
+
+ err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
+ if (err)
+ goto err_port_admin_status_set;
+
+ err = mlxsw_sp_port_buffers_init(mlxsw_sp_port);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize buffers\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_buffers_init;
+ }
+
+ err = mlxsw_sp_port_ets_init(mlxsw_sp_port);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize ETS\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_ets_init;
+ }
+
+ err = mlxsw_sp_port_tc_mc_mode_set(mlxsw_sp_port, true);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize TC MC mode\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_tc_mc_mode;
+ }
+
+ /* ETS and buffers must be initialized before DCB. */
+ err = mlxsw_sp_port_dcb_init(mlxsw_sp_port);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize DCB\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_dcb_init;
+ }
+
+ err = mlxsw_sp_port_fids_init(mlxsw_sp_port);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize FIDs\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_fids_init;
+ }
+
+ err = mlxsw_sp_tc_qdisc_init(mlxsw_sp_port);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize TC qdiscs\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_qdiscs_init;
+ }
+
+ mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1);
+ if (IS_ERR(mlxsw_sp_port_vlan)) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create VID 1\n",
+ mlxsw_sp_port->local_port);
+ err = PTR_ERR(mlxsw_sp_port_vlan);
+ goto err_port_vlan_get;
+ }
+
+ mlxsw_sp_port_switchdev_init(mlxsw_sp_port);
+ mlxsw_sp->ports[local_port] = mlxsw_sp_port;
+ err = register_netdev(dev);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register netdev\n",
+ mlxsw_sp_port->local_port);
+ goto err_register_netdev;
+ }
+
+ mlxsw_core_port_eth_set(mlxsw_sp->core, mlxsw_sp_port->local_port,
+ mlxsw_sp_port, dev, module + 1,
+ mlxsw_sp_port->split, lane / width);
+ mlxsw_core_schedule_dw(&mlxsw_sp_port->periodic_hw_stats.update_dw, 0);
+ return 0;
+
+err_register_netdev:
+ mlxsw_sp->ports[local_port] = NULL;
+ mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
+ mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
+err_port_vlan_get:
+ mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
+err_port_qdiscs_init:
+ mlxsw_sp_port_fids_fini(mlxsw_sp_port);
+err_port_fids_init:
+ mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
+err_port_dcb_init:
+ mlxsw_sp_port_tc_mc_mode_set(mlxsw_sp_port, false);
+err_port_tc_mc_mode:
+err_port_ets_init:
+err_port_buffers_init:
+err_port_admin_status_set:
+err_port_mtu_set:
+err_port_speed_by_width_set:
+err_port_system_port_mapping_set:
+err_dev_addr_init:
+ mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
+err_port_swid_set:
+ mlxsw_sp_port_module_unmap(mlxsw_sp_port);
+err_port_module_map:
+ kfree(mlxsw_sp_port->sample);
+err_alloc_sample:
+ free_percpu(mlxsw_sp_port->pcpu_stats);
+err_alloc_stats:
+ free_netdev(dev);
+err_alloc_etherdev:
+ mlxsw_core_port_fini(mlxsw_sp->core, local_port);
+ return err;
+}
+
+static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
+
+ cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw);
+ mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp);
+ unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
+ mlxsw_sp->ports[local_port] = NULL;
+ mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
+ mlxsw_sp_port_vlan_flush(mlxsw_sp_port);
+ mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
+ mlxsw_sp_port_fids_fini(mlxsw_sp_port);
+ mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
+ mlxsw_sp_port_tc_mc_mode_set(mlxsw_sp_port, false);
+ mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
+ mlxsw_sp_port_module_unmap(mlxsw_sp_port);
+ kfree(mlxsw_sp_port->sample);
+ free_percpu(mlxsw_sp_port->pcpu_stats);
+ WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vlans_list));
+ free_netdev(mlxsw_sp_port->dev);
+ mlxsw_core_port_fini(mlxsw_sp->core, local_port);
+}
+
+static bool mlxsw_sp_port_created(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+{
+ return mlxsw_sp->ports[local_port] != NULL;
+}
+
+static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp)
+{
+ int i;
+
+ for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++)
+ if (mlxsw_sp_port_created(mlxsw_sp, i))
+ mlxsw_sp_port_remove(mlxsw_sp, i);
+ kfree(mlxsw_sp->port_to_module);
+ kfree(mlxsw_sp->ports);
+ mlxsw_sp->ports = NULL;
+}
+
+static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp)
+{
+ unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
+ u8 module, width, lane;
+ size_t alloc_size;
+ int i;
+ int err;
+
+ alloc_size = sizeof(struct mlxsw_sp_port *) * max_ports;
+ mlxsw_sp->ports = kzalloc(alloc_size, GFP_KERNEL);
+ if (!mlxsw_sp->ports)
+ return -ENOMEM;
+
+ mlxsw_sp->port_to_module = kmalloc_array(max_ports, sizeof(int),
+ GFP_KERNEL);
+ if (!mlxsw_sp->port_to_module) {
+ err = -ENOMEM;
+ goto err_port_to_module_alloc;
+ }
+
+ for (i = 1; i < max_ports; i++) {
+ /* Mark as invalid */
+ mlxsw_sp->port_to_module[i] = -1;
+
+ err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, &module,
+ &width, &lane);
+ if (err)
+ goto err_port_module_info_get;
+ if (!width)
+ continue;
+ mlxsw_sp->port_to_module[i] = module;
+ err = mlxsw_sp_port_create(mlxsw_sp, i, false,
+ module, width, lane);
+ if (err)
+ goto err_port_create;
+ }
+ return 0;
+
+err_port_create:
+err_port_module_info_get:
+ for (i--; i >= 1; i--)
+ if (mlxsw_sp_port_created(mlxsw_sp, i))
+ mlxsw_sp_port_remove(mlxsw_sp, i);
+ kfree(mlxsw_sp->port_to_module);
+err_port_to_module_alloc:
+ kfree(mlxsw_sp->ports);
+ mlxsw_sp->ports = NULL;
+ return err;
+}
+
+static u8 mlxsw_sp_cluster_base_port_get(u8 local_port)
+{
+ u8 offset = (local_port - 1) % MLXSW_SP_PORTS_PER_CLUSTER_MAX;
+
+ return local_port - offset;
+}
+
+static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port,
+ u8 module, unsigned int count)
+{
+ u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count;
+ int err, i;
+
+ for (i = 0; i < count; i++) {
+ err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true,
+ module, width, i * width);
+ if (err)
+ goto err_port_create;
+ }
+
+ return 0;
+
+err_port_create:
+ for (i--; i >= 0; i--)
+ if (mlxsw_sp_port_created(mlxsw_sp, base_port + i))
+ mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
+ return err;
+}
+
+static void mlxsw_sp_port_unsplit_create(struct mlxsw_sp *mlxsw_sp,
+ u8 base_port, unsigned int count)
+{
+ u8 local_port, module, width = MLXSW_PORT_MODULE_MAX_WIDTH;
+ int i;
+
+ /* Split by four means we need to re-create two ports, otherwise
+ * only one.
+ */
+ count = count / 2;
+
+ for (i = 0; i < count; i++) {
+ local_port = base_port + i * 2;
+ if (mlxsw_sp->port_to_module[local_port] < 0)
+ continue;
+ module = mlxsw_sp->port_to_module[local_port];
+
+ mlxsw_sp_port_create(mlxsw_sp, local_port, false, module,
+ width, 0);
+ }
+}
+
+static struct mlxsw_sp_port *
+mlxsw_sp_port_get_by_local_port(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+{
+ if (mlxsw_sp->ports && mlxsw_sp->ports[local_port])
+ return mlxsw_sp->ports[local_port];
+ return NULL;
+}
+
+static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port,
+ unsigned int count,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ u8 module, cur_width, base_port;
+ int i;
+ int err;
+
+ mlxsw_sp_port = mlxsw_sp_port_get_by_local_port(mlxsw_sp, local_port);
+ if (!mlxsw_sp_port) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port number \"%d\" does not exist\n",
+ local_port);
+ NL_SET_ERR_MSG_MOD(extack, "Port number does not exist");
+ return -EINVAL;
+ }
+
+ module = mlxsw_sp_port->mapping.module;
+ cur_width = mlxsw_sp_port->mapping.width;
+
+ if (count != 2 && count != 4) {
+ netdev_err(mlxsw_sp_port->dev, "Port can only be split into 2 or 4 ports\n");
+ NL_SET_ERR_MSG_MOD(extack, "Port can only be split into 2 or 4 ports");
+ return -EINVAL;
+ }
+
+ if (cur_width != MLXSW_PORT_MODULE_MAX_WIDTH) {
+ netdev_err(mlxsw_sp_port->dev, "Port cannot be split further\n");
+ NL_SET_ERR_MSG_MOD(extack, "Port cannot be split further");
+ return -EINVAL;
+ }
+
+ /* Make sure we have enough slave (even) ports for the split. */
+ if (count == 2) {
+ base_port = local_port;
+ if (mlxsw_sp->ports[base_port + 1]) {
+ netdev_err(mlxsw_sp_port->dev, "Invalid split configuration\n");
+ NL_SET_ERR_MSG_MOD(extack, "Invalid split configuration");
+ return -EINVAL;
+ }
+ } else {
+ base_port = mlxsw_sp_cluster_base_port_get(local_port);
+ if (mlxsw_sp->ports[base_port + 1] ||
+ mlxsw_sp->ports[base_port + 3]) {
+ netdev_err(mlxsw_sp_port->dev, "Invalid split configuration\n");
+ NL_SET_ERR_MSG_MOD(extack, "Invalid split configuration");
+ return -EINVAL;
+ }
+ }
+
+ for (i = 0; i < count; i++)
+ if (mlxsw_sp_port_created(mlxsw_sp, base_port + i))
+ mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
+
+ err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, module, count);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to create split ports\n");
+ goto err_port_split_create;
+ }
+
+ return 0;
+
+err_port_split_create:
+ mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count);
+ return err;
+}
+
+static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ u8 cur_width, base_port;
+ unsigned int count;
+ int i;
+
+ mlxsw_sp_port = mlxsw_sp_port_get_by_local_port(mlxsw_sp, local_port);
+ if (!mlxsw_sp_port) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port number \"%d\" does not exist\n",
+ local_port);
+ NL_SET_ERR_MSG_MOD(extack, "Port number does not exist");
+ return -EINVAL;
+ }
+
+ if (!mlxsw_sp_port->split) {
+ netdev_err(mlxsw_sp_port->dev, "Port was not split\n");
+ NL_SET_ERR_MSG_MOD(extack, "Port was not split");
+ return -EINVAL;
+ }
+
+ cur_width = mlxsw_sp_port->mapping.width;
+ count = cur_width == 1 ? 4 : 2;
+
+ base_port = mlxsw_sp_cluster_base_port_get(local_port);
+
+ /* Determine which ports to remove. */
+ if (count == 2 && local_port >= base_port + 2)
+ base_port = base_port + 2;
+
+ for (i = 0; i < count; i++)
+ if (mlxsw_sp_port_created(mlxsw_sp, base_port + i))
+ mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
+
+ mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count);
+
+ return 0;
+}
+
+static void
+mlxsw_sp_port_down_wipe_counters(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ int i;
+
+ for (i = 0; i < TC_MAX_QUEUE; i++)
+ mlxsw_sp_port->periodic_hw_stats.xstats.backlog[i] = 0;
+}
+
+static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg,
+ char *pude_pl, void *priv)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ enum mlxsw_reg_pude_oper_status status;
+ u8 local_port;
+
+ local_port = mlxsw_reg_pude_local_port_get(pude_pl);
+ mlxsw_sp_port = mlxsw_sp->ports[local_port];
+ if (!mlxsw_sp_port)
+ return;
+
+ status = mlxsw_reg_pude_oper_status_get(pude_pl);
+ if (status == MLXSW_PORT_OPER_STATUS_UP) {
+ netdev_info(mlxsw_sp_port->dev, "link up\n");
+ netif_carrier_on(mlxsw_sp_port->dev);
+ } else {
+ netdev_info(mlxsw_sp_port->dev, "link down\n");
+ netif_carrier_off(mlxsw_sp_port->dev);
+ mlxsw_sp_port_down_wipe_counters(mlxsw_sp_port);
+ }
+}
+
+static void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
+ u8 local_port, void *priv)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+ struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
+ struct mlxsw_sp_port_pcpu_stats *pcpu_stats;
+
+ if (unlikely(!mlxsw_sp_port)) {
+ dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: skb received for non-existent port\n",
+ local_port);
+ return;
+ }
+
+ skb->dev = mlxsw_sp_port->dev;
+
+ pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats);
+ u64_stats_update_begin(&pcpu_stats->syncp);
+ pcpu_stats->rx_packets++;
+ pcpu_stats->rx_bytes += skb->len;
+ u64_stats_update_end(&pcpu_stats->syncp);
+
+ skb->protocol = eth_type_trans(skb, skb->dev);
+ netif_receive_skb(skb);
+}
+
+static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port,
+ void *priv)
+{
+ skb->offload_fwd_mark = 1;
+ return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv);
+}
+
+static void mlxsw_sp_rx_listener_mr_mark_func(struct sk_buff *skb,
+ u8 local_port, void *priv)
+{
+ skb->offload_mr_fwd_mark = 1;
+ skb->offload_fwd_mark = 1;
+ return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv);
+}
+
+static void mlxsw_sp_rx_listener_sample_func(struct sk_buff *skb, u8 local_port,
+ void *priv)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+ struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
+ struct psample_group *psample_group;
+ u32 size;
+
+ if (unlikely(!mlxsw_sp_port)) {
+ dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: sample skb received for non-existent port\n",
+ local_port);
+ goto out;
+ }
+ if (unlikely(!mlxsw_sp_port->sample)) {
+ dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: sample skb received on unsupported port\n",
+ local_port);
+ goto out;
+ }
+
+ size = mlxsw_sp_port->sample->truncate ?
+ mlxsw_sp_port->sample->trunc_size : skb->len;
+
+ rcu_read_lock();
+ psample_group = rcu_dereference(mlxsw_sp_port->sample->psample_group);
+ if (!psample_group)
+ goto out_unlock;
+ psample_sample_packet(psample_group, skb, size,
+ mlxsw_sp_port->dev->ifindex, 0,
+ mlxsw_sp_port->sample->rate);
+out_unlock:
+ rcu_read_unlock();
+out:
+ consume_skb(skb);
+}
+
+#define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl) \
+ MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action, \
+ _is_ctrl, SP_##_trap_group, DISCARD)
+
+#define MLXSW_SP_RXL_MARK(_trap_id, _action, _trap_group, _is_ctrl) \
+ MLXSW_RXL(mlxsw_sp_rx_listener_mark_func, _trap_id, _action, \
+ _is_ctrl, SP_##_trap_group, DISCARD)
+
+#define MLXSW_SP_RXL_MR_MARK(_trap_id, _action, _trap_group, _is_ctrl) \
+ MLXSW_RXL(mlxsw_sp_rx_listener_mr_mark_func, _trap_id, _action, \
+ _is_ctrl, SP_##_trap_group, DISCARD)
+
+#define MLXSW_SP_EVENTL(_func, _trap_id) \
+ MLXSW_EVENTL(_func, _trap_id, SP_EVENT)
+
+static const struct mlxsw_listener mlxsw_sp_listener[] = {
+ /* Events */
+ MLXSW_SP_EVENTL(mlxsw_sp_pude_event_func, PUDE),
+ /* L2 traps */
+ MLXSW_SP_RXL_NO_MARK(STP, TRAP_TO_CPU, STP, true),
+ MLXSW_SP_RXL_NO_MARK(LACP, TRAP_TO_CPU, LACP, true),
+ MLXSW_SP_RXL_NO_MARK(LLDP, TRAP_TO_CPU, LLDP, true),
+ MLXSW_SP_RXL_MARK(DHCP, MIRROR_TO_CPU, DHCP, false),
+ MLXSW_SP_RXL_MARK(IGMP_QUERY, MIRROR_TO_CPU, IGMP, false),
+ MLXSW_SP_RXL_NO_MARK(IGMP_V1_REPORT, TRAP_TO_CPU, IGMP, false),
+ MLXSW_SP_RXL_NO_MARK(IGMP_V2_REPORT, TRAP_TO_CPU, IGMP, false),
+ MLXSW_SP_RXL_NO_MARK(IGMP_V2_LEAVE, TRAP_TO_CPU, IGMP, false),
+ MLXSW_SP_RXL_NO_MARK(IGMP_V3_REPORT, TRAP_TO_CPU, IGMP, false),
+ MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, ARP, false),
+ MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, ARP, false),
+ MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, IP2ME, false),
+ MLXSW_SP_RXL_MARK(IPV6_MLDV12_LISTENER_QUERY, MIRROR_TO_CPU, IPV6_MLD,
+ false),
+ MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD,
+ false),
+ MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_DONE, TRAP_TO_CPU, IPV6_MLD,
+ false),
+ MLXSW_SP_RXL_NO_MARK(IPV6_MLDV2_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD,
+ false),
+ /* L3 traps */
+ MLXSW_SP_RXL_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+ MLXSW_SP_RXL_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+ MLXSW_SP_RXL_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+ MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
+ MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP,
+ false),
+ MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, false),
+ MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_SRC, TRAP_TO_CPU, ROUTER_EXP, false),
+ MLXSW_SP_RXL_MARK(IPV6_ALL_NODES_LINK, TRAP_TO_CPU, ROUTER_EXP, false),
+ MLXSW_SP_RXL_MARK(IPV6_ALL_ROUTERS_LINK, TRAP_TO_CPU, ROUTER_EXP,
+ false),
+ MLXSW_SP_RXL_MARK(IPV4_OSPF, TRAP_TO_CPU, OSPF, false),
+ MLXSW_SP_RXL_MARK(IPV6_OSPF, TRAP_TO_CPU, OSPF, false),
+ MLXSW_SP_RXL_MARK(IPV6_DHCP, TRAP_TO_CPU, DHCP, false),
+ MLXSW_SP_RXL_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false),
+ MLXSW_SP_RXL_MARK(IPV4_BGP, TRAP_TO_CPU, BGP, false),
+ MLXSW_SP_RXL_MARK(IPV6_BGP, TRAP_TO_CPU, BGP, false),
+ MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_SOLICITATION, TRAP_TO_CPU, IPV6_ND,
+ false),
+ MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND,
+ false),
+ MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_SOLICITATION, TRAP_TO_CPU, IPV6_ND,
+ false),
+ MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND,
+ false),
+ MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6_ND, false),
+ MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP,
+ false),
+ MLXSW_SP_RXL_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, HOST_MISS, false),
+ MLXSW_SP_RXL_MARK(HOST_MISS_IPV6, TRAP_TO_CPU, HOST_MISS, false),
+ MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false),
+ MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
+ MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+ MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
+ MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
+ /* PKT Sample trap */
+ MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
+ false, SP_IP2ME, DISCARD),
+ /* ACL trap */
+ MLXSW_SP_RXL_NO_MARK(ACL0, TRAP_TO_CPU, IP2ME, false),
+ /* Multicast Router Traps */
+ MLXSW_SP_RXL_MARK(IPV4_PIM, TRAP_TO_CPU, PIM, false),
+ MLXSW_SP_RXL_MARK(IPV6_PIM, TRAP_TO_CPU, PIM, false),
+ MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false),
+ MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false),
+ MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
+};
+
+static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
+{
+ char qpcr_pl[MLXSW_REG_QPCR_LEN];
+ enum mlxsw_reg_qpcr_ir_units ir_units;
+ int max_cpu_policers;
+ bool is_bytes;
+ u8 burst_size;
+ u32 rate;
+ int i, err;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_CPU_POLICERS))
+ return -EIO;
+
+ max_cpu_policers = MLXSW_CORE_RES_GET(mlxsw_core, MAX_CPU_POLICERS);
+
+ ir_units = MLXSW_REG_QPCR_IR_UNITS_M;
+ for (i = 0; i < max_cpu_policers; i++) {
+ is_bytes = false;
+ switch (i) {
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_STP:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF:
+ rate = 128;
+ burst_size = 7;
+ break;
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD:
+ rate = 16 * 1024;
+ burst_size = 10;
+ break;
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
+ rate = 1024;
+ burst_size = 7;
+ break;
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
+ rate = 4 * 1024;
+ burst_size = 4;
+ break;
+ default:
+ continue;
+ }
+
+ mlxsw_reg_qpcr_pack(qpcr_pl, i, ir_units, is_bytes, rate,
+ burst_size);
+ err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(qpcr), qpcr_pl);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
+{
+ char htgt_pl[MLXSW_REG_HTGT_LEN];
+ enum mlxsw_reg_htgt_trap_group i;
+ int max_cpu_policers;
+ int max_trap_groups;
+ u8 priority, tc;
+ u16 policer_id;
+ int err;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_TRAP_GROUPS))
+ return -EIO;
+
+ max_trap_groups = MLXSW_CORE_RES_GET(mlxsw_core, MAX_TRAP_GROUPS);
+ max_cpu_policers = MLXSW_CORE_RES_GET(mlxsw_core, MAX_CPU_POLICERS);
+
+ for (i = 0; i < max_trap_groups; i++) {
+ policer_id = i;
+ switch (i) {
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_STP:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
+ priority = 5;
+ tc = 5;
+ break;
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
+ priority = 4;
+ tc = 4;
+ break;
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD:
+ priority = 3;
+ tc = 3;
+ break;
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF:
+ priority = 2;
+ tc = 2;
+ break;
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
+ priority = 1;
+ tc = 1;
+ break;
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT:
+ priority = MLXSW_REG_HTGT_DEFAULT_PRIORITY;
+ tc = MLXSW_REG_HTGT_DEFAULT_TC;
+ policer_id = MLXSW_REG_HTGT_INVALID_POLICER;
+ break;
+ default:
+ continue;
+ }
+
+ if (max_cpu_policers <= policer_id &&
+ policer_id != MLXSW_REG_HTGT_INVALID_POLICER)
+ return -EIO;
+
+ mlxsw_reg_htgt_pack(htgt_pl, i, policer_id, priority, tc);
+ err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
+{
+ int i;
+ int err;
+
+ err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_trap_groups_set(mlxsw_sp->core);
+ if (err)
+ return err;
+
+ for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) {
+ err = mlxsw_core_trap_register(mlxsw_sp->core,
+ &mlxsw_sp_listener[i],
+ mlxsw_sp);
+ if (err)
+ goto err_listener_register;
+
+ }
+ return 0;
+
+err_listener_register:
+ for (i--; i >= 0; i--) {
+ mlxsw_core_trap_unregister(mlxsw_sp->core,
+ &mlxsw_sp_listener[i],
+ mlxsw_sp);
+ }
+ return err;
+}
+
+static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) {
+ mlxsw_core_trap_unregister(mlxsw_sp->core,
+ &mlxsw_sp_listener[i],
+ mlxsw_sp);
+ }
+}
+
+static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
+{
+ char slcr_pl[MLXSW_REG_SLCR_LEN];
+ int err;
+
+ mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC |
+ MLXSW_REG_SLCR_LAG_HASH_DMAC |
+ MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE |
+ MLXSW_REG_SLCR_LAG_HASH_VLANID |
+ MLXSW_REG_SLCR_LAG_HASH_SIP |
+ MLXSW_REG_SLCR_LAG_HASH_DIP |
+ MLXSW_REG_SLCR_LAG_HASH_SPORT |
+ MLXSW_REG_SLCR_LAG_HASH_DPORT |
+ MLXSW_REG_SLCR_LAG_HASH_IPPROTO);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl);
+ if (err)
+ return err;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LAG) ||
+ !MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LAG_MEMBERS))
+ return -EIO;
+
+ mlxsw_sp->lags = kcalloc(MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LAG),
+ sizeof(struct mlxsw_sp_upper),
+ GFP_KERNEL);
+ if (!mlxsw_sp->lags)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ kfree(mlxsw_sp->lags);
+}
+
+static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
+{
+ char htgt_pl[MLXSW_REG_HTGT_LEN];
+
+ mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
+ MLXSW_REG_HTGT_INVALID_POLICER,
+ MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+ MLXSW_REG_HTGT_DEFAULT_TC);
+ return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+}
+
+static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
+ unsigned long event, void *ptr);
+
+static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_bus_info *mlxsw_bus_info)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+ int err;
+
+ mlxsw_sp->core = mlxsw_core;
+ mlxsw_sp->bus_info = mlxsw_bus_info;
+
+ err = mlxsw_sp_fw_rev_validate(mlxsw_sp);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_base_mac_get(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to get base mac\n");
+ return err;
+ }
+
+ err = mlxsw_sp_kvdl_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize KVDL\n");
+ return err;
+ }
+
+ err = mlxsw_sp_fids_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize FIDs\n");
+ goto err_fids_init;
+ }
+
+ err = mlxsw_sp_traps_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to set traps\n");
+ goto err_traps_init;
+ }
+
+ err = mlxsw_sp_buffers_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize buffers\n");
+ goto err_buffers_init;
+ }
+
+ err = mlxsw_sp_lag_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize LAG\n");
+ goto err_lag_init;
+ }
+
+ /* Initialize SPAN before router and switchdev, so that those components
+ * can call mlxsw_sp_span_respin().
+ */
+ err = mlxsw_sp_span_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
+ goto err_span_init;
+ }
+
+ err = mlxsw_sp_switchdev_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize switchdev\n");
+ goto err_switchdev_init;
+ }
+
+ err = mlxsw_sp_counter_pool_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to init counter pool\n");
+ goto err_counter_pool_init;
+ }
+
+ err = mlxsw_sp_afa_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL actions\n");
+ goto err_afa_init;
+ }
+
+ err = mlxsw_sp_router_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
+ goto err_router_init;
+ }
+
+ /* Initialize netdevice notifier after router and SPAN is initialized,
+ * so that the event handler can use router structures and call SPAN
+ * respin.
+ */
+ mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event;
+ err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to register netdev notifier\n");
+ goto err_netdev_notifier;
+ }
+
+ err = mlxsw_sp_acl_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n");
+ goto err_acl_init;
+ }
+
+ err = mlxsw_sp_dpipe_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to init pipeline debug\n");
+ goto err_dpipe_init;
+ }
+
+ err = mlxsw_sp_ports_create(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n");
+ goto err_ports_create;
+ }
+
+ return 0;
+
+err_ports_create:
+ mlxsw_sp_dpipe_fini(mlxsw_sp);
+err_dpipe_init:
+ mlxsw_sp_acl_fini(mlxsw_sp);
+err_acl_init:
+ unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
+err_netdev_notifier:
+ mlxsw_sp_router_fini(mlxsw_sp);
+err_router_init:
+ mlxsw_sp_afa_fini(mlxsw_sp);
+err_afa_init:
+ mlxsw_sp_counter_pool_fini(mlxsw_sp);
+err_counter_pool_init:
+ mlxsw_sp_switchdev_fini(mlxsw_sp);
+err_switchdev_init:
+ mlxsw_sp_span_fini(mlxsw_sp);
+err_span_init:
+ mlxsw_sp_lag_fini(mlxsw_sp);
+err_lag_init:
+ mlxsw_sp_buffers_fini(mlxsw_sp);
+err_buffers_init:
+ mlxsw_sp_traps_fini(mlxsw_sp);
+err_traps_init:
+ mlxsw_sp_fids_fini(mlxsw_sp);
+err_fids_init:
+ mlxsw_sp_kvdl_fini(mlxsw_sp);
+ return err;
+}
+
+static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_bus_info *mlxsw_bus_info)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+
+ mlxsw_sp->req_rev = &mlxsw_sp1_fw_rev;
+ mlxsw_sp->fw_filename = MLXSW_SP1_FW_FILENAME;
+ mlxsw_sp->kvdl_ops = &mlxsw_sp1_kvdl_ops;
+ mlxsw_sp->afa_ops = &mlxsw_sp1_act_afa_ops;
+ mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops;
+ mlxsw_sp->mr_tcam_ops = &mlxsw_sp1_mr_tcam_ops;
+ mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops;
+
+ return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
+}
+
+static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_bus_info *mlxsw_bus_info)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+
+ mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops;
+ mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops;
+ mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops;
+ mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops;
+ mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops;
+
+ return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
+}
+
+static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+
+ mlxsw_sp_ports_remove(mlxsw_sp);
+ mlxsw_sp_dpipe_fini(mlxsw_sp);
+ mlxsw_sp_acl_fini(mlxsw_sp);
+ unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
+ mlxsw_sp_router_fini(mlxsw_sp);
+ mlxsw_sp_afa_fini(mlxsw_sp);
+ mlxsw_sp_counter_pool_fini(mlxsw_sp);
+ mlxsw_sp_switchdev_fini(mlxsw_sp);
+ mlxsw_sp_span_fini(mlxsw_sp);
+ mlxsw_sp_lag_fini(mlxsw_sp);
+ mlxsw_sp_buffers_fini(mlxsw_sp);
+ mlxsw_sp_traps_fini(mlxsw_sp);
+ mlxsw_sp_fids_fini(mlxsw_sp);
+ mlxsw_sp_kvdl_fini(mlxsw_sp);
+}
+
+static const struct mlxsw_config_profile mlxsw_sp1_config_profile = {
+ .used_max_mid = 1,
+ .max_mid = MLXSW_SP_MID_MAX,
+ .used_flood_tables = 1,
+ .used_flood_mode = 1,
+ .flood_mode = 3,
+ .max_fid_offset_flood_tables = 3,
+ .fid_offset_flood_table_size = VLAN_N_VID - 1,
+ .max_fid_flood_tables = 3,
+ .fid_flood_table_size = MLXSW_SP_FID_8021D_MAX,
+ .used_max_ib_mc = 1,
+ .max_ib_mc = 0,
+ .used_max_pkey = 1,
+ .max_pkey = 0,
+ .used_kvd_sizes = 1,
+ .kvd_hash_single_parts = 59,
+ .kvd_hash_double_parts = 41,
+ .kvd_linear_size = MLXSW_SP_KVD_LINEAR_SIZE,
+ .swid_config = {
+ {
+ .used_type = 1,
+ .type = MLXSW_PORT_SWID_TYPE_ETH,
+ }
+ },
+};
+
+static const struct mlxsw_config_profile mlxsw_sp2_config_profile = {
+ .used_max_mid = 1,
+ .max_mid = MLXSW_SP_MID_MAX,
+ .used_flood_tables = 1,
+ .used_flood_mode = 1,
+ .flood_mode = 3,
+ .max_fid_offset_flood_tables = 3,
+ .fid_offset_flood_table_size = VLAN_N_VID - 1,
+ .max_fid_flood_tables = 3,
+ .fid_flood_table_size = MLXSW_SP_FID_8021D_MAX,
+ .used_max_ib_mc = 1,
+ .max_ib_mc = 0,
+ .used_max_pkey = 1,
+ .max_pkey = 0,
+ .swid_config = {
+ {
+ .used_type = 1,
+ .type = MLXSW_PORT_SWID_TYPE_ETH,
+ }
+ },
+};
+
+static void
+mlxsw_sp_resource_size_params_prepare(struct mlxsw_core *mlxsw_core,
+ struct devlink_resource_size_params *kvd_size_params,
+ struct devlink_resource_size_params *linear_size_params,
+ struct devlink_resource_size_params *hash_double_size_params,
+ struct devlink_resource_size_params *hash_single_size_params)
+{
+ u32 single_size_min = MLXSW_CORE_RES_GET(mlxsw_core,
+ KVD_SINGLE_MIN_SIZE);
+ u32 double_size_min = MLXSW_CORE_RES_GET(mlxsw_core,
+ KVD_DOUBLE_MIN_SIZE);
+ u32 kvd_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE);
+ u32 linear_size_min = 0;
+
+ devlink_resource_size_params_init(kvd_size_params, kvd_size, kvd_size,
+ MLXSW_SP_KVD_GRANULARITY,
+ DEVLINK_RESOURCE_UNIT_ENTRY);
+ devlink_resource_size_params_init(linear_size_params, linear_size_min,
+ kvd_size - single_size_min -
+ double_size_min,
+ MLXSW_SP_KVD_GRANULARITY,
+ DEVLINK_RESOURCE_UNIT_ENTRY);
+ devlink_resource_size_params_init(hash_double_size_params,
+ double_size_min,
+ kvd_size - single_size_min -
+ linear_size_min,
+ MLXSW_SP_KVD_GRANULARITY,
+ DEVLINK_RESOURCE_UNIT_ENTRY);
+ devlink_resource_size_params_init(hash_single_size_params,
+ single_size_min,
+ kvd_size - double_size_min -
+ linear_size_min,
+ MLXSW_SP_KVD_GRANULARITY,
+ DEVLINK_RESOURCE_UNIT_ENTRY);
+}
+
+static int mlxsw_sp1_resources_kvd_register(struct mlxsw_core *mlxsw_core)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_core);
+ struct devlink_resource_size_params hash_single_size_params;
+ struct devlink_resource_size_params hash_double_size_params;
+ struct devlink_resource_size_params linear_size_params;
+ struct devlink_resource_size_params kvd_size_params;
+ u32 kvd_size, single_size, double_size, linear_size;
+ const struct mlxsw_config_profile *profile;
+ int err;
+
+ profile = &mlxsw_sp1_config_profile;
+ if (!MLXSW_CORE_RES_VALID(mlxsw_core, KVD_SIZE))
+ return -EIO;
+
+ mlxsw_sp_resource_size_params_prepare(mlxsw_core, &kvd_size_params,
+ &linear_size_params,
+ &hash_double_size_params,
+ &hash_single_size_params);
+
+ kvd_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE);
+ err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD,
+ kvd_size, MLXSW_SP_RESOURCE_KVD,
+ DEVLINK_RESOURCE_ID_PARENT_TOP,
+ &kvd_size_params);
+ if (err)
+ return err;
+
+ linear_size = profile->kvd_linear_size;
+ err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR,
+ linear_size,
+ MLXSW_SP_RESOURCE_KVD_LINEAR,
+ MLXSW_SP_RESOURCE_KVD,
+ &linear_size_params);
+ if (err)
+ return err;
+
+ err = mlxsw_sp1_kvdl_resources_register(mlxsw_core);
+ if (err)
+ return err;
+
+ double_size = kvd_size - linear_size;
+ double_size *= profile->kvd_hash_double_parts;
+ double_size /= profile->kvd_hash_double_parts +
+ profile->kvd_hash_single_parts;
+ double_size = rounddown(double_size, MLXSW_SP_KVD_GRANULARITY);
+ err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_HASH_DOUBLE,
+ double_size,
+ MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
+ MLXSW_SP_RESOURCE_KVD,
+ &hash_double_size_params);
+ if (err)
+ return err;
+
+ single_size = kvd_size - double_size - linear_size;
+ err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_HASH_SINGLE,
+ single_size,
+ MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
+ MLXSW_SP_RESOURCE_KVD,
+ &hash_single_size_params);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core)
+{
+ return mlxsw_sp1_resources_kvd_register(mlxsw_core);
+}
+
+static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core)
+{
+ return 0;
+}
+
+static int mlxsw_sp_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_config_profile *profile,
+ u64 *p_single_size, u64 *p_double_size,
+ u64 *p_linear_size)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_core);
+ u32 double_size;
+ int err;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_core, KVD_SINGLE_MIN_SIZE) ||
+ !MLXSW_CORE_RES_VALID(mlxsw_core, KVD_DOUBLE_MIN_SIZE))
+ return -EIO;
+
+ /* The hash part is what left of the kvd without the
+ * linear part. It is split to the single size and
+ * double size by the parts ratio from the profile.
+ * Both sizes must be a multiplications of the
+ * granularity from the profile. In case the user
+ * provided the sizes they are obtained via devlink.
+ */
+ err = devlink_resource_size_get(devlink,
+ MLXSW_SP_RESOURCE_KVD_LINEAR,
+ p_linear_size);
+ if (err)
+ *p_linear_size = profile->kvd_linear_size;
+
+ err = devlink_resource_size_get(devlink,
+ MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
+ p_double_size);
+ if (err) {
+ double_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) -
+ *p_linear_size;
+ double_size *= profile->kvd_hash_double_parts;
+ double_size /= profile->kvd_hash_double_parts +
+ profile->kvd_hash_single_parts;
+ *p_double_size = rounddown(double_size,
+ MLXSW_SP_KVD_GRANULARITY);
+ }
+
+ err = devlink_resource_size_get(devlink,
+ MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
+ p_single_size);
+ if (err)
+ *p_single_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) -
+ *p_double_size - *p_linear_size;
+
+ /* Check results are legal. */
+ if (*p_single_size < MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) ||
+ *p_double_size < MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE) ||
+ MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) < *p_linear_size)
+ return -EIO;
+
+ return 0;
+}
+
+static struct mlxsw_driver mlxsw_sp1_driver = {
+ .kind = mlxsw_sp1_driver_name,
+ .priv_size = sizeof(struct mlxsw_sp),
+ .init = mlxsw_sp1_init,
+ .fini = mlxsw_sp_fini,
+ .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set,
+ .port_split = mlxsw_sp_port_split,
+ .port_unsplit = mlxsw_sp_port_unsplit,
+ .sb_pool_get = mlxsw_sp_sb_pool_get,
+ .sb_pool_set = mlxsw_sp_sb_pool_set,
+ .sb_port_pool_get = mlxsw_sp_sb_port_pool_get,
+ .sb_port_pool_set = mlxsw_sp_sb_port_pool_set,
+ .sb_tc_pool_bind_get = mlxsw_sp_sb_tc_pool_bind_get,
+ .sb_tc_pool_bind_set = mlxsw_sp_sb_tc_pool_bind_set,
+ .sb_occ_snapshot = mlxsw_sp_sb_occ_snapshot,
+ .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear,
+ .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get,
+ .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get,
+ .txhdr_construct = mlxsw_sp_txhdr_construct,
+ .resources_register = mlxsw_sp1_resources_register,
+ .kvd_sizes_get = mlxsw_sp_kvd_sizes_get,
+ .txhdr_len = MLXSW_TXHDR_LEN,
+ .profile = &mlxsw_sp1_config_profile,
+ .res_query_enabled = true,
+};
+
+static struct mlxsw_driver mlxsw_sp2_driver = {
+ .kind = mlxsw_sp2_driver_name,
+ .priv_size = sizeof(struct mlxsw_sp),
+ .init = mlxsw_sp2_init,
+ .fini = mlxsw_sp_fini,
+ .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set,
+ .port_split = mlxsw_sp_port_split,
+ .port_unsplit = mlxsw_sp_port_unsplit,
+ .sb_pool_get = mlxsw_sp_sb_pool_get,
+ .sb_pool_set = mlxsw_sp_sb_pool_set,
+ .sb_port_pool_get = mlxsw_sp_sb_port_pool_get,
+ .sb_port_pool_set = mlxsw_sp_sb_port_pool_set,
+ .sb_tc_pool_bind_get = mlxsw_sp_sb_tc_pool_bind_get,
+ .sb_tc_pool_bind_set = mlxsw_sp_sb_tc_pool_bind_set,
+ .sb_occ_snapshot = mlxsw_sp_sb_occ_snapshot,
+ .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear,
+ .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get,
+ .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get,
+ .txhdr_construct = mlxsw_sp_txhdr_construct,
+ .resources_register = mlxsw_sp2_resources_register,
+ .txhdr_len = MLXSW_TXHDR_LEN,
+ .profile = &mlxsw_sp2_config_profile,
+ .res_query_enabled = true,
+};
+
+bool mlxsw_sp_port_dev_check(const struct net_device *dev)
+{
+ return dev->netdev_ops == &mlxsw_sp_port_netdev_ops;
+}
+
+static int mlxsw_sp_lower_dev_walk(struct net_device *lower_dev, void *data)
+{
+ struct mlxsw_sp_port **p_mlxsw_sp_port = data;
+ int ret = 0;
+
+ if (mlxsw_sp_port_dev_check(lower_dev)) {
+ *p_mlxsw_sp_port = netdev_priv(lower_dev);
+ ret = 1;
+ }
+
+ return ret;
+}
+
+struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+
+ if (mlxsw_sp_port_dev_check(dev))
+ return netdev_priv(dev);
+
+ mlxsw_sp_port = NULL;
+ netdev_walk_all_lower_dev(dev, mlxsw_sp_lower_dev_walk, &mlxsw_sp_port);
+
+ return mlxsw_sp_port;
+}
+
+struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+
+ mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev);
+ return mlxsw_sp_port ? mlxsw_sp_port->mlxsw_sp : NULL;
+}
+
+struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+
+ if (mlxsw_sp_port_dev_check(dev))
+ return netdev_priv(dev);
+
+ mlxsw_sp_port = NULL;
+ netdev_walk_all_lower_dev_rcu(dev, mlxsw_sp_lower_dev_walk,
+ &mlxsw_sp_port);
+
+ return mlxsw_sp_port;
+}
+
+struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+
+ rcu_read_lock();
+ mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev);
+ if (mlxsw_sp_port)
+ dev_hold(mlxsw_sp_port->dev);
+ rcu_read_unlock();
+ return mlxsw_sp_port;
+}
+
+void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ dev_put(mlxsw_sp_port->dev);
+}
+
+static void
+mlxsw_sp_port_lag_uppers_cleanup(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct net_device *lag_dev)
+{
+ struct net_device *br_dev = netdev_master_upper_dev_get(lag_dev);
+ struct net_device *upper_dev;
+ struct list_head *iter;
+
+ if (netif_is_bridge_port(lag_dev))
+ mlxsw_sp_port_bridge_leave(mlxsw_sp_port, lag_dev, br_dev);
+
+ netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) {
+ if (!netif_is_bridge_port(upper_dev))
+ continue;
+ br_dev = netdev_master_upper_dev_get(upper_dev);
+ mlxsw_sp_port_bridge_leave(mlxsw_sp_port, upper_dev, br_dev);
+ }
+}
+
+static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id)
+{
+ char sldr_pl[MLXSW_REG_SLDR_LEN];
+
+ mlxsw_reg_sldr_lag_create_pack(sldr_pl, lag_id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl);
+}
+
+static int mlxsw_sp_lag_destroy(struct mlxsw_sp *mlxsw_sp, u16 lag_id)
+{
+ char sldr_pl[MLXSW_REG_SLDR_LEN];
+
+ mlxsw_reg_sldr_lag_destroy_pack(sldr_pl, lag_id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl);
+}
+
+static int mlxsw_sp_lag_col_port_add(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 lag_id, u8 port_index)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char slcor_pl[MLXSW_REG_SLCOR_LEN];
+
+ mlxsw_reg_slcor_port_add_pack(slcor_pl, mlxsw_sp_port->local_port,
+ lag_id, port_index);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl);
+}
+
+static int mlxsw_sp_lag_col_port_remove(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 lag_id)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char slcor_pl[MLXSW_REG_SLCOR_LEN];
+
+ mlxsw_reg_slcor_port_remove_pack(slcor_pl, mlxsw_sp_port->local_port,
+ lag_id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl);
+}
+
+static int mlxsw_sp_lag_col_port_enable(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 lag_id)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char slcor_pl[MLXSW_REG_SLCOR_LEN];
+
+ mlxsw_reg_slcor_col_enable_pack(slcor_pl, mlxsw_sp_port->local_port,
+ lag_id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl);
+}
+
+static int mlxsw_sp_lag_col_port_disable(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 lag_id)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char slcor_pl[MLXSW_REG_SLCOR_LEN];
+
+ mlxsw_reg_slcor_col_disable_pack(slcor_pl, mlxsw_sp_port->local_port,
+ lag_id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl);
+}
+
+static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *lag_dev,
+ u16 *p_lag_id)
+{
+ struct mlxsw_sp_upper *lag;
+ int free_lag_id = -1;
+ u64 max_lag;
+ int i;
+
+ max_lag = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LAG);
+ for (i = 0; i < max_lag; i++) {
+ lag = mlxsw_sp_lag_get(mlxsw_sp, i);
+ if (lag->ref_count) {
+ if (lag->dev == lag_dev) {
+ *p_lag_id = i;
+ return 0;
+ }
+ } else if (free_lag_id < 0) {
+ free_lag_id = i;
+ }
+ }
+ if (free_lag_id < 0)
+ return -EBUSY;
+ *p_lag_id = free_lag_id;
+ return 0;
+}
+
+static bool
+mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *lag_dev,
+ struct netdev_lag_upper_info *lag_upper_info,
+ struct netlink_ext_ack *extack)
+{
+ u16 lag_id;
+
+ if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported LAG devices");
+ return false;
+ }
+ if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
+ NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type");
+ return false;
+ }
+ return true;
+}
+
+static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp,
+ u16 lag_id, u8 *p_port_index)
+{
+ u64 max_lag_members;
+ int i;
+
+ max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ MAX_LAG_MEMBERS);
+ for (i = 0; i < max_lag_members; i++) {
+ if (!mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i)) {
+ *p_port_index = i;
+ return 0;
+ }
+ }
+ return -EBUSY;
+}
+
+static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct net_device *lag_dev)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+ struct mlxsw_sp_upper *lag;
+ u16 lag_id;
+ u8 port_index;
+ int err;
+
+ err = mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id);
+ if (err)
+ return err;
+ lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id);
+ if (!lag->ref_count) {
+ err = mlxsw_sp_lag_create(mlxsw_sp, lag_id);
+ if (err)
+ return err;
+ lag->dev = lag_dev;
+ }
+
+ err = mlxsw_sp_port_lag_index_get(mlxsw_sp, lag_id, &port_index);
+ if (err)
+ return err;
+ err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index);
+ if (err)
+ goto err_col_port_add;
+
+ mlxsw_core_lag_mapping_set(mlxsw_sp->core, lag_id, port_index,
+ mlxsw_sp_port->local_port);
+ mlxsw_sp_port->lag_id = lag_id;
+ mlxsw_sp_port->lagged = 1;
+ lag->ref_count++;
+
+ /* Port is no longer usable as a router interface */
+ mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, 1);
+ if (mlxsw_sp_port_vlan->fid)
+ mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
+
+ return 0;
+
+err_col_port_add:
+ if (!lag->ref_count)
+ mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
+ return err;
+}
+
+static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct net_device *lag_dev)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u16 lag_id = mlxsw_sp_port->lag_id;
+ struct mlxsw_sp_upper *lag;
+
+ if (!mlxsw_sp_port->lagged)
+ return;
+ lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id);
+ WARN_ON(lag->ref_count == 0);
+
+ mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id);
+
+ /* Any VLANs configured on the port are no longer valid */
+ mlxsw_sp_port_vlan_flush(mlxsw_sp_port);
+ /* Make the LAG and its directly linked uppers leave bridges they
+ * are memeber in
+ */
+ mlxsw_sp_port_lag_uppers_cleanup(mlxsw_sp_port, lag_dev);
+
+ if (lag->ref_count == 1)
+ mlxsw_sp_lag_destroy(mlxsw_sp, lag_id);
+
+ mlxsw_core_lag_mapping_clear(mlxsw_sp->core, lag_id,
+ mlxsw_sp_port->local_port);
+ mlxsw_sp_port->lagged = 0;
+ lag->ref_count--;
+
+ mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1);
+ /* Make sure untagged frames are allowed to ingress */
+ mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1);
+}
+
+static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 lag_id)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char sldr_pl[MLXSW_REG_SLDR_LEN];
+
+ mlxsw_reg_sldr_lag_add_port_pack(sldr_pl, lag_id,
+ mlxsw_sp_port->local_port);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl);
+}
+
+static int mlxsw_sp_lag_dist_port_remove(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 lag_id)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char sldr_pl[MLXSW_REG_SLDR_LEN];
+
+ mlxsw_reg_sldr_lag_remove_port_pack(sldr_pl, lag_id,
+ mlxsw_sp_port->local_port);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl);
+}
+
+static int
+mlxsw_sp_port_lag_col_dist_enable(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ int err;
+
+ err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port,
+ mlxsw_sp_port->lag_id);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id);
+ if (err)
+ goto err_dist_port_add;
+
+ return 0;
+
+err_dist_port_add:
+ mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, mlxsw_sp_port->lag_id);
+ return err;
+}
+
+static int
+mlxsw_sp_port_lag_col_dist_disable(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ int err;
+
+ err = mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port,
+ mlxsw_sp_port->lag_id);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port,
+ mlxsw_sp_port->lag_id);
+ if (err)
+ goto err_col_port_disable;
+
+ return 0;
+
+err_col_port_disable:
+ mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id);
+ return err;
+}
+
+static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct netdev_lag_lower_state_info *info)
+{
+ if (info->tx_enabled)
+ return mlxsw_sp_port_lag_col_dist_enable(mlxsw_sp_port);
+ else
+ return mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port);
+}
+
+static int mlxsw_sp_port_stp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ bool enable)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ enum mlxsw_reg_spms_state spms_state;
+ char *spms_pl;
+ u16 vid;
+ int err;
+
+ spms_state = enable ? MLXSW_REG_SPMS_STATE_FORWARDING :
+ MLXSW_REG_SPMS_STATE_DISCARDING;
+
+ spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL);
+ if (!spms_pl)
+ return -ENOMEM;
+ mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port);
+
+ for (vid = 0; vid < VLAN_N_VID; vid++)
+ mlxsw_reg_spms_vid_pack(spms_pl, vid, spms_state);
+
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl);
+ kfree(spms_pl);
+ return err;
+}
+
+static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ u16 vid = 1;
+ int err;
+
+ err = mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, true);
+ if (err)
+ return err;
+ err = mlxsw_sp_port_stp_set(mlxsw_sp_port, true);
+ if (err)
+ goto err_port_stp_set;
+ err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1,
+ true, false);
+ if (err)
+ goto err_port_vlan_set;
+
+ for (; vid <= VLAN_N_VID - 1; vid++) {
+ err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port,
+ vid, false);
+ if (err)
+ goto err_vid_learning_set;
+ }
+
+ return 0;
+
+err_vid_learning_set:
+ for (vid--; vid >= 1; vid--)
+ mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
+err_port_vlan_set:
+ mlxsw_sp_port_stp_set(mlxsw_sp_port, false);
+err_port_stp_set:
+ mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false);
+ return err;
+}
+
+static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ u16 vid;
+
+ for (vid = VLAN_N_VID - 1; vid >= 1; vid--)
+ mlxsw_sp_port_vid_learning_set(mlxsw_sp_port,
+ vid, true);
+
+ mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1,
+ false, false);
+ mlxsw_sp_port_stp_set(mlxsw_sp_port, false);
+ mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false);
+}
+
+static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
+ struct net_device *dev,
+ unsigned long event, void *ptr)
+{
+ struct netdev_notifier_changeupper_info *info;
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ struct netlink_ext_ack *extack;
+ struct net_device *upper_dev;
+ struct mlxsw_sp *mlxsw_sp;
+ int err = 0;
+
+ mlxsw_sp_port = netdev_priv(dev);
+ mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ info = ptr;
+ extack = netdev_notifier_info_to_extack(&info->info);
+
+ switch (event) {
+ case NETDEV_PRECHANGEUPPER:
+ upper_dev = info->upper_dev;
+ if (!is_vlan_dev(upper_dev) &&
+ !netif_is_lag_master(upper_dev) &&
+ !netif_is_bridge_master(upper_dev) &&
+ !netif_is_ovs_master(upper_dev) &&
+ !netif_is_macvlan(upper_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
+ return -EINVAL;
+ }
+ if (!info->linking)
+ break;
+ if (netdev_has_any_upper_dev(upper_dev) &&
+ (!netif_is_bridge_master(upper_dev) ||
+ !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
+ upper_dev))) {
+ NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
+ return -EINVAL;
+ }
+ if (netif_is_lag_master(upper_dev) &&
+ !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev,
+ info->upper_info, extack))
+ return -EINVAL;
+ if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Master device is a LAG master and this device has a VLAN");
+ return -EINVAL;
+ }
+ if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) &&
+ !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) {
+ NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on a LAG port");
+ return -EINVAL;
+ }
+ if (netif_is_macvlan(upper_dev) &&
+ !mlxsw_sp_rif_find_by_dev(mlxsw_sp, lower_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
+ return -EOPNOTSUPP;
+ }
+ if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Master device is an OVS master and this device has a VLAN");
+ return -EINVAL;
+ }
+ if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on an OVS port");
+ return -EINVAL;
+ }
+ if (is_vlan_dev(upper_dev) &&
+ vlan_dev_vlan_id(upper_dev) == 1) {
+ NL_SET_ERR_MSG_MOD(extack, "Creating a VLAN device with VID 1 is unsupported: VLAN 1 carries untagged traffic");
+ return -EINVAL;
+ }
+ break;
+ case NETDEV_CHANGEUPPER:
+ upper_dev = info->upper_dev;
+ if (netif_is_bridge_master(upper_dev)) {
+ if (info->linking)
+ err = mlxsw_sp_port_bridge_join(mlxsw_sp_port,
+ lower_dev,
+ upper_dev,
+ extack);
+ else
+ mlxsw_sp_port_bridge_leave(mlxsw_sp_port,
+ lower_dev,
+ upper_dev);
+ } else if (netif_is_lag_master(upper_dev)) {
+ if (info->linking) {
+ err = mlxsw_sp_port_lag_join(mlxsw_sp_port,
+ upper_dev);
+ } else {
+ mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port);
+ mlxsw_sp_port_lag_leave(mlxsw_sp_port,
+ upper_dev);
+ }
+ } else if (netif_is_ovs_master(upper_dev)) {
+ if (info->linking)
+ err = mlxsw_sp_port_ovs_join(mlxsw_sp_port);
+ else
+ mlxsw_sp_port_ovs_leave(mlxsw_sp_port);
+ } else if (netif_is_macvlan(upper_dev)) {
+ if (!info->linking)
+ mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev);
+ } else if (is_vlan_dev(upper_dev)) {
+ struct net_device *br_dev;
+
+ if (!netif_is_bridge_port(upper_dev))
+ break;
+ if (info->linking)
+ break;
+ br_dev = netdev_master_upper_dev_get(upper_dev);
+ mlxsw_sp_port_bridge_leave(mlxsw_sp_port, upper_dev,
+ br_dev);
+ }
+ break;
+ }
+
+ return err;
+}
+
+static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev,
+ unsigned long event, void *ptr)
+{
+ struct netdev_notifier_changelowerstate_info *info;
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ int err;
+
+ mlxsw_sp_port = netdev_priv(dev);
+ info = ptr;
+
+ switch (event) {
+ case NETDEV_CHANGELOWERSTATE:
+ if (netif_is_lag_port(dev) && mlxsw_sp_port->lagged) {
+ err = mlxsw_sp_port_lag_changed(mlxsw_sp_port,
+ info->lower_state_info);
+ if (err)
+ netdev_err(dev, "Failed to reflect link aggregation lower state change\n");
+ }
+ break;
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_netdevice_port_event(struct net_device *lower_dev,
+ struct net_device *port_dev,
+ unsigned long event, void *ptr)
+{
+ switch (event) {
+ case NETDEV_PRECHANGEUPPER:
+ case NETDEV_CHANGEUPPER:
+ return mlxsw_sp_netdevice_port_upper_event(lower_dev, port_dev,
+ event, ptr);
+ case NETDEV_CHANGELOWERSTATE:
+ return mlxsw_sp_netdevice_port_lower_event(port_dev, event,
+ ptr);
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev;
+ struct list_head *iter;
+ int ret;
+
+ netdev_for_each_lower_dev(lag_dev, dev, iter) {
+ if (mlxsw_sp_port_dev_check(dev)) {
+ ret = mlxsw_sp_netdevice_port_event(lag_dev, dev, event,
+ ptr);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
+ struct net_device *dev,
+ unsigned long event, void *ptr,
+ u16 vid)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct netlink_ext_ack *extack;
+ struct net_device *upper_dev;
+ int err = 0;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+
+ switch (event) {
+ case NETDEV_PRECHANGEUPPER:
+ upper_dev = info->upper_dev;
+ if (!netif_is_bridge_master(upper_dev) &&
+ !netif_is_macvlan(upper_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
+ return -EINVAL;
+ }
+ if (!info->linking)
+ break;
+ if (netdev_has_any_upper_dev(upper_dev) &&
+ (!netif_is_bridge_master(upper_dev) ||
+ !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
+ upper_dev))) {
+ NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
+ return -EINVAL;
+ }
+ if (netif_is_macvlan(upper_dev) &&
+ !mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
+ return -EOPNOTSUPP;
+ }
+ break;
+ case NETDEV_CHANGEUPPER:
+ upper_dev = info->upper_dev;
+ if (netif_is_bridge_master(upper_dev)) {
+ if (info->linking)
+ err = mlxsw_sp_port_bridge_join(mlxsw_sp_port,
+ vlan_dev,
+ upper_dev,
+ extack);
+ else
+ mlxsw_sp_port_bridge_leave(mlxsw_sp_port,
+ vlan_dev,
+ upper_dev);
+ } else if (netif_is_macvlan(upper_dev)) {
+ if (!info->linking)
+ mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev);
+ } else {
+ err = -EINVAL;
+ WARN_ON(1);
+ }
+ break;
+ }
+
+ return err;
+}
+
+static int mlxsw_sp_netdevice_lag_port_vlan_event(struct net_device *vlan_dev,
+ struct net_device *lag_dev,
+ unsigned long event,
+ void *ptr, u16 vid)
+{
+ struct net_device *dev;
+ struct list_head *iter;
+ int ret;
+
+ netdev_for_each_lower_dev(lag_dev, dev, iter) {
+ if (mlxsw_sp_port_dev_check(dev)) {
+ ret = mlxsw_sp_netdevice_port_vlan_event(vlan_dev, dev,
+ event, ptr,
+ vid);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev,
+ unsigned long event, void *ptr)
+{
+ struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
+ u16 vid = vlan_dev_vlan_id(vlan_dev);
+
+ if (mlxsw_sp_port_dev_check(real_dev))
+ return mlxsw_sp_netdevice_port_vlan_event(vlan_dev, real_dev,
+ event, ptr, vid);
+ else if (netif_is_lag_master(real_dev))
+ return mlxsw_sp_netdevice_lag_port_vlan_event(vlan_dev,
+ real_dev, event,
+ ptr, vid);
+
+ return 0;
+}
+
+static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev,
+ unsigned long event, void *ptr)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct netlink_ext_ack *extack;
+ struct net_device *upper_dev;
+
+ if (!mlxsw_sp)
+ return 0;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+
+ switch (event) {
+ case NETDEV_PRECHANGEUPPER:
+ upper_dev = info->upper_dev;
+ if (!is_vlan_dev(upper_dev) && !netif_is_macvlan(upper_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
+ return -EOPNOTSUPP;
+ }
+ if (!info->linking)
+ break;
+ if (netif_is_macvlan(upper_dev) &&
+ !mlxsw_sp_rif_find_by_dev(mlxsw_sp, br_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
+ return -EOPNOTSUPP;
+ }
+ break;
+ case NETDEV_CHANGEUPPER:
+ upper_dev = info->upper_dev;
+ if (info->linking)
+ break;
+ if (is_vlan_dev(upper_dev))
+ mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, upper_dev);
+ if (netif_is_macvlan(upper_dev))
+ mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev);
+ break;
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_netdevice_macvlan_event(struct net_device *macvlan_dev,
+ unsigned long event, void *ptr)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(macvlan_dev);
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct netlink_ext_ack *extack;
+
+ if (!mlxsw_sp || event != NETDEV_PRECHANGEUPPER)
+ return 0;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+
+ /* VRF enslavement is handled in mlxsw_sp_netdevice_vrf_event() */
+ NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
+
+ return -EOPNOTSUPP;
+}
+
+static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
+{
+ struct netdev_notifier_changeupper_info *info = ptr;
+
+ if (event != NETDEV_PRECHANGEUPPER && event != NETDEV_CHANGEUPPER)
+ return false;
+ return netif_is_l3_master(info->upper_dev);
+}
+
+static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct mlxsw_sp_span_entry *span_entry;
+ struct mlxsw_sp *mlxsw_sp;
+ int err = 0;
+
+ mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb);
+ if (event == NETDEV_UNREGISTER) {
+ span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, dev);
+ if (span_entry)
+ mlxsw_sp_span_entry_invalidate(mlxsw_sp, span_entry);
+ }
+ mlxsw_sp_span_respin(mlxsw_sp);
+
+ if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
+ err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
+ event, ptr);
+ else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev))
+ err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev,
+ event, ptr);
+ else if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU)
+ err = mlxsw_sp_netdevice_router_port_event(dev);
+ else if (mlxsw_sp_is_vrf_event(event, ptr))
+ err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr);
+ else if (mlxsw_sp_port_dev_check(dev))
+ err = mlxsw_sp_netdevice_port_event(dev, dev, event, ptr);
+ else if (netif_is_lag_master(dev))
+ err = mlxsw_sp_netdevice_lag_event(dev, event, ptr);
+ else if (is_vlan_dev(dev))
+ err = mlxsw_sp_netdevice_vlan_event(dev, event, ptr);
+ else if (netif_is_bridge_master(dev))
+ err = mlxsw_sp_netdevice_bridge_event(dev, event, ptr);
+ else if (netif_is_macvlan(dev))
+ err = mlxsw_sp_netdevice_macvlan_event(dev, event, ptr);
+
+ return notifier_from_errno(err);
+}
+
+static struct notifier_block mlxsw_sp_inetaddr_valid_nb __read_mostly = {
+ .notifier_call = mlxsw_sp_inetaddr_valid_event,
+};
+
+static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = {
+ .notifier_call = mlxsw_sp_inetaddr_event,
+};
+
+static struct notifier_block mlxsw_sp_inet6addr_valid_nb __read_mostly = {
+ .notifier_call = mlxsw_sp_inet6addr_valid_event,
+};
+
+static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = {
+ .notifier_call = mlxsw_sp_inet6addr_event,
+};
+
+static const struct pci_device_id mlxsw_sp1_pci_id_table[] = {
+ {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0},
+ {0, },
+};
+
+static struct pci_driver mlxsw_sp1_pci_driver = {
+ .name = mlxsw_sp1_driver_name,
+ .id_table = mlxsw_sp1_pci_id_table,
+};
+
+static const struct pci_device_id mlxsw_sp2_pci_id_table[] = {
+ {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM2), 0},
+ {0, },
+};
+
+static struct pci_driver mlxsw_sp2_pci_driver = {
+ .name = mlxsw_sp2_driver_name,
+ .id_table = mlxsw_sp2_pci_id_table,
+};
+
+static int __init mlxsw_sp_module_init(void)
+{
+ int err;
+
+ register_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb);
+ register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
+ register_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
+ register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
+
+ err = mlxsw_core_driver_register(&mlxsw_sp1_driver);
+ if (err)
+ goto err_sp1_core_driver_register;
+
+ err = mlxsw_core_driver_register(&mlxsw_sp2_driver);
+ if (err)
+ goto err_sp2_core_driver_register;
+
+ err = mlxsw_pci_driver_register(&mlxsw_sp1_pci_driver);
+ if (err)
+ goto err_sp1_pci_driver_register;
+
+ err = mlxsw_pci_driver_register(&mlxsw_sp2_pci_driver);
+ if (err)
+ goto err_sp2_pci_driver_register;
+
+ return 0;
+
+err_sp2_pci_driver_register:
+ mlxsw_pci_driver_unregister(&mlxsw_sp1_pci_driver);
+err_sp1_pci_driver_register:
+ mlxsw_core_driver_unregister(&mlxsw_sp2_driver);
+err_sp2_core_driver_register:
+ mlxsw_core_driver_unregister(&mlxsw_sp1_driver);
+err_sp1_core_driver_register:
+ unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
+ unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
+ unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
+ unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb);
+ return err;
+}
+
+static void __exit mlxsw_sp_module_exit(void)
+{
+ mlxsw_pci_driver_unregister(&mlxsw_sp2_pci_driver);
+ mlxsw_pci_driver_unregister(&mlxsw_sp1_pci_driver);
+ mlxsw_core_driver_unregister(&mlxsw_sp2_driver);
+ mlxsw_core_driver_unregister(&mlxsw_sp1_driver);
+ unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
+ unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
+ unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
+ unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb);
+}
+
+module_init(mlxsw_sp_module_init);
+module_exit(mlxsw_sp_module_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox Spectrum driver");
+MODULE_DEVICE_TABLE(pci, mlxsw_sp1_pci_id_table);
+MODULE_DEVICE_TABLE(pci, mlxsw_sp2_pci_id_table);
+MODULE_FIRMWARE(MLXSW_SP1_FW_FILENAME);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
new file mode 100644
index 000000000..3cdb7aca9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -0,0 +1,728 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_SPECTRUM_H
+#define _MLXSW_SPECTRUM_H
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/rhashtable.h>
+#include <linux/bitops.h>
+#include <linux/if_vlan.h>
+#include <linux/list.h>
+#include <linux/dcbnl.h>
+#include <linux/in6.h>
+#include <linux/notifier.h>
+#include <net/psample.h>
+#include <net/pkt_cls.h>
+#include <net/red.h>
+
+#include "port.h"
+#include "core.h"
+#include "core_acl_flex_keys.h"
+#include "core_acl_flex_actions.h"
+#include "reg.h"
+
+#define MLXSW_SP_FID_8021D_MAX 1024
+
+#define MLXSW_SP_MID_MAX 7000
+
+#define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4
+
+#define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */
+
+#define MLXSW_SP_KVD_LINEAR_SIZE 98304 /* entries */
+#define MLXSW_SP_KVD_GRANULARITY 128
+
+#define MLXSW_SP_RESOURCE_NAME_KVD "kvd"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR "linear"
+#define MLXSW_SP_RESOURCE_NAME_KVD_HASH_SINGLE "hash_single"
+#define MLXSW_SP_RESOURCE_NAME_KVD_HASH_DOUBLE "hash_double"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES "singles"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS "chunks"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS "large_chunks"
+
+enum mlxsw_sp_resource_id {
+ MLXSW_SP_RESOURCE_KVD = 1,
+ MLXSW_SP_RESOURCE_KVD_LINEAR,
+ MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
+ MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+};
+
+struct mlxsw_sp_port;
+struct mlxsw_sp_rif;
+struct mlxsw_sp_span_entry;
+
+struct mlxsw_sp_upper {
+ struct net_device *dev;
+ unsigned int ref_count;
+};
+
+enum mlxsw_sp_rif_type {
+ MLXSW_SP_RIF_TYPE_SUBPORT,
+ MLXSW_SP_RIF_TYPE_VLAN,
+ MLXSW_SP_RIF_TYPE_FID,
+ MLXSW_SP_RIF_TYPE_IPIP_LB, /* IP-in-IP loopback. */
+ MLXSW_SP_RIF_TYPE_MAX,
+};
+
+enum mlxsw_sp_fid_type {
+ MLXSW_SP_FID_TYPE_8021Q,
+ MLXSW_SP_FID_TYPE_8021D,
+ MLXSW_SP_FID_TYPE_RFID,
+ MLXSW_SP_FID_TYPE_DUMMY,
+ MLXSW_SP_FID_TYPE_MAX,
+};
+
+struct mlxsw_sp_mid {
+ struct list_head list;
+ unsigned char addr[ETH_ALEN];
+ u16 fid;
+ u16 mid;
+ bool in_hw;
+ unsigned long *ports_in_mid; /* bits array */
+};
+
+enum mlxsw_sp_port_mall_action_type {
+ MLXSW_SP_PORT_MALL_MIRROR,
+ MLXSW_SP_PORT_MALL_SAMPLE,
+};
+
+struct mlxsw_sp_port_mall_mirror_tc_entry {
+ int span_id;
+ bool ingress;
+};
+
+struct mlxsw_sp_port_mall_tc_entry {
+ struct list_head list;
+ unsigned long cookie;
+ enum mlxsw_sp_port_mall_action_type type;
+ union {
+ struct mlxsw_sp_port_mall_mirror_tc_entry mirror;
+ };
+};
+
+struct mlxsw_sp_sb;
+struct mlxsw_sp_bridge;
+struct mlxsw_sp_router;
+struct mlxsw_sp_mr;
+struct mlxsw_sp_acl;
+struct mlxsw_sp_counter_pool;
+struct mlxsw_sp_fid_core;
+struct mlxsw_sp_kvdl;
+struct mlxsw_sp_kvdl_ops;
+struct mlxsw_sp_mr_tcam_ops;
+struct mlxsw_sp_acl_tcam_ops;
+
+struct mlxsw_sp {
+ struct mlxsw_sp_port **ports;
+ struct mlxsw_core *core;
+ const struct mlxsw_bus_info *bus_info;
+ unsigned char base_mac[ETH_ALEN];
+ struct mlxsw_sp_upper *lags;
+ int *port_to_module;
+ struct mlxsw_sp_sb *sb;
+ struct mlxsw_sp_bridge *bridge;
+ struct mlxsw_sp_router *router;
+ struct mlxsw_sp_mr *mr;
+ struct mlxsw_afa *afa;
+ struct mlxsw_sp_acl *acl;
+ struct mlxsw_sp_fid_core *fid_core;
+ struct mlxsw_sp_kvdl *kvdl;
+ struct notifier_block netdevice_nb;
+
+ struct mlxsw_sp_counter_pool *counter_pool;
+ struct {
+ struct mlxsw_sp_span_entry *entries;
+ int entries_count;
+ } span;
+ const struct mlxsw_fw_rev *req_rev;
+ const char *fw_filename;
+ const struct mlxsw_sp_kvdl_ops *kvdl_ops;
+ const struct mlxsw_afa_ops *afa_ops;
+ const struct mlxsw_afk_ops *afk_ops;
+ const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops;
+ const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops;
+};
+
+static inline struct mlxsw_sp_upper *
+mlxsw_sp_lag_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id)
+{
+ return &mlxsw_sp->lags[lag_id];
+}
+
+struct mlxsw_sp_port_pcpu_stats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 tx_packets;
+ u64 tx_bytes;
+ struct u64_stats_sync syncp;
+ u32 tx_dropped;
+};
+
+struct mlxsw_sp_port_sample {
+ struct psample_group __rcu *psample_group;
+ u32 trunc_size;
+ u32 rate;
+ bool truncate;
+};
+
+struct mlxsw_sp_bridge_port;
+struct mlxsw_sp_fid;
+
+struct mlxsw_sp_port_vlan {
+ struct list_head list;
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ struct mlxsw_sp_fid *fid;
+ unsigned int ref_count;
+ u16 vid;
+ struct mlxsw_sp_bridge_port *bridge_port;
+ struct list_head bridge_vlan_node;
+};
+
+/* No need an internal lock; At worse - miss a single periodic iteration */
+struct mlxsw_sp_port_xstats {
+ u64 ecn;
+ u64 wred_drop[TC_MAX_QUEUE];
+ u64 tail_drop[TC_MAX_QUEUE];
+ u64 backlog[TC_MAX_QUEUE];
+ u64 tx_bytes[IEEE_8021QAZ_MAX_TCS];
+ u64 tx_packets[IEEE_8021QAZ_MAX_TCS];
+};
+
+struct mlxsw_sp_port {
+ struct net_device *dev;
+ struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats;
+ struct mlxsw_sp *mlxsw_sp;
+ u8 local_port;
+ u8 lagged:1,
+ split:1;
+ u16 pvid;
+ u16 lag_id;
+ struct {
+ u8 tx_pause:1,
+ rx_pause:1,
+ autoneg:1;
+ } link;
+ struct {
+ struct ieee_ets *ets;
+ struct ieee_maxrate *maxrate;
+ struct ieee_pfc *pfc;
+ enum mlxsw_reg_qpts_trust_state trust_state;
+ } dcb;
+ struct {
+ u8 module;
+ u8 width;
+ u8 lane;
+ } mapping;
+ /* TC handles */
+ struct list_head mall_tc_list;
+ struct {
+ #define MLXSW_HW_STATS_UPDATE_TIME HZ
+ struct rtnl_link_stats64 stats;
+ struct mlxsw_sp_port_xstats xstats;
+ struct delayed_work update_dw;
+ } periodic_hw_stats;
+ struct mlxsw_sp_port_sample *sample;
+ struct list_head vlans_list;
+ struct mlxsw_sp_qdisc *root_qdisc;
+ struct mlxsw_sp_qdisc *tclass_qdiscs;
+ unsigned acl_rule_count;
+ struct mlxsw_sp_acl_block *ing_acl_block;
+ struct mlxsw_sp_acl_block *eg_acl_block;
+};
+
+static inline bool
+mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ return mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause;
+}
+
+static inline struct mlxsw_sp_port *
+mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ u8 local_port;
+
+ local_port = mlxsw_core_lag_mapping_get(mlxsw_sp->core,
+ lag_id, port_index);
+ mlxsw_sp_port = mlxsw_sp->ports[local_port];
+ return mlxsw_sp_port && mlxsw_sp_port->lagged ? mlxsw_sp_port : NULL;
+}
+
+static inline struct mlxsw_sp_port_vlan *
+mlxsw_sp_port_vlan_find_by_vid(const struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 vid)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+
+ list_for_each_entry(mlxsw_sp_port_vlan, &mlxsw_sp_port->vlans_list,
+ list) {
+ if (mlxsw_sp_port_vlan->vid == vid)
+ return mlxsw_sp_port_vlan;
+ }
+
+ return NULL;
+}
+
+enum mlxsw_sp_flood_type {
+ MLXSW_SP_FLOOD_TYPE_UC,
+ MLXSW_SP_FLOOD_TYPE_BC,
+ MLXSW_SP_FLOOD_TYPE_MC,
+};
+
+/* spectrum_buffers.c */
+int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port);
+int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core,
+ unsigned int sb_index, u16 pool_index,
+ struct devlink_sb_pool_info *pool_info);
+int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core,
+ unsigned int sb_index, u16 pool_index, u32 size,
+ enum devlink_sb_threshold_type threshold_type);
+int mlxsw_sp_sb_port_pool_get(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 pool_index,
+ u32 *p_threshold);
+int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 pool_index,
+ u32 threshold);
+int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 *p_pool_index, u32 *p_threshold);
+int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 pool_index, u32 threshold);
+int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core,
+ unsigned int sb_index);
+int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core,
+ unsigned int sb_index);
+int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 pool_index,
+ u32 *p_cur, u32 *p_max);
+int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u32 *p_cur, u32 *p_max);
+u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells);
+u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes);
+
+/* spectrum_switchdev.c */
+int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port);
+void mlxsw_sp_port_switchdev_fini(struct mlxsw_sp_port *mlxsw_sp_port);
+int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid,
+ bool adding);
+void
+mlxsw_sp_port_vlan_bridge_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
+int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct net_device *brport_dev,
+ struct net_device *br_dev,
+ struct netlink_ext_ack *extack);
+void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct net_device *brport_dev,
+ struct net_device *br_dev);
+bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *br_dev);
+
+/* spectrum.c */
+int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
+ bool dwrr, u8 dwrr_weight);
+int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ u8 switch_prio, u8 tclass);
+int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu,
+ u8 *prio_tc, bool pause_en,
+ struct ieee_pfc *my_pfc);
+int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ enum mlxsw_reg_qeec_hr hr, u8 index,
+ u8 next_index, u32 maxrate);
+enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 stp_state);
+int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
+ u8 state);
+int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable);
+int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
+ bool learn_enable);
+int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid);
+struct mlxsw_sp_port_vlan *
+mlxsw_sp_port_vlan_get(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid);
+void mlxsw_sp_port_vlan_put(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
+int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin,
+ u16 vid_end, bool is_member, bool untagged);
+int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp,
+ unsigned int counter_index, u64 *packets,
+ u64 *bytes);
+int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+ unsigned int *p_counter_index);
+void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp,
+ unsigned int counter_index);
+bool mlxsw_sp_port_dev_check(const struct net_device *dev);
+struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev);
+struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev);
+struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev);
+void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port);
+struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev);
+
+/* spectrum_dcb.c */
+#ifdef CONFIG_MLXSW_SPECTRUM_DCB
+int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port);
+void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port);
+#else
+static inline int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ return 0;
+}
+static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{}
+#endif
+
+/* spectrum_router.c */
+int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_netdevice_router_port_event(struct net_device *dev);
+void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *macvlan_dev);
+int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
+ unsigned long event, void *ptr);
+int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
+ unsigned long event, void *ptr);
+int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
+ unsigned long event, void *ptr);
+int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
+ unsigned long event, void *ptr);
+int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
+ struct netdev_notifier_changeupper_info *info);
+bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *dev);
+bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *dev);
+int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *l3_dev,
+ unsigned long event,
+ struct netdev_notifier_info *info);
+int
+mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *l3_dev,
+ unsigned long event,
+ struct netdev_notifier_info *info);
+void
+mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
+void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
+void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *dev);
+
+/* spectrum_kvdl.c */
+enum mlxsw_sp_kvdl_entry_type {
+ MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+ MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+ MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
+ MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR,
+};
+
+static inline unsigned int
+mlxsw_sp_kvdl_entry_size(enum mlxsw_sp_kvdl_entry_type type)
+{
+ switch (type) {
+ case MLXSW_SP_KVDL_ENTRY_TYPE_ADJ: /* fall through */
+ case MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET: /* fall through */
+ case MLXSW_SP_KVDL_ENTRY_TYPE_PBS: /* fall through */
+ case MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR: /* fall through */
+ default:
+ return 1;
+ }
+}
+
+struct mlxsw_sp_kvdl_ops {
+ size_t priv_size;
+ int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv);
+ void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv);
+ int (*alloc)(struct mlxsw_sp *mlxsw_sp, void *priv,
+ enum mlxsw_sp_kvdl_entry_type type,
+ unsigned int entry_count, u32 *p_entry_index);
+ void (*free)(struct mlxsw_sp *mlxsw_sp, void *priv,
+ enum mlxsw_sp_kvdl_entry_type type,
+ unsigned int entry_count, int entry_index);
+ int (*alloc_size_query)(struct mlxsw_sp *mlxsw_sp, void *priv,
+ enum mlxsw_sp_kvdl_entry_type type,
+ unsigned int entry_count,
+ unsigned int *p_alloc_count);
+ int (*resources_register)(struct mlxsw_sp *mlxsw_sp, void *priv);
+};
+
+int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_kvdl_entry_type type,
+ unsigned int entry_count, u32 *p_entry_index);
+void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_kvdl_entry_type type,
+ unsigned int entry_count, int entry_index);
+int mlxsw_sp_kvdl_alloc_count_query(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_kvdl_entry_type type,
+ unsigned int entry_count,
+ unsigned int *p_alloc_count);
+
+/* spectrum1_kvdl.c */
+extern const struct mlxsw_sp_kvdl_ops mlxsw_sp1_kvdl_ops;
+int mlxsw_sp1_kvdl_resources_register(struct mlxsw_core *mlxsw_core);
+
+/* spectrum2_kvdl.c */
+extern const struct mlxsw_sp_kvdl_ops mlxsw_sp2_kvdl_ops;
+
+struct mlxsw_sp_acl_rule_info {
+ unsigned int priority;
+ struct mlxsw_afk_element_values values;
+ struct mlxsw_afa_block *act_block;
+ unsigned int counter_index;
+};
+
+struct mlxsw_sp_acl_block;
+struct mlxsw_sp_acl_ruleset;
+
+/* spectrum_acl.c */
+enum mlxsw_sp_acl_profile {
+ MLXSW_SP_ACL_PROFILE_FLOWER,
+};
+
+struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl);
+struct mlxsw_sp *mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block);
+unsigned int mlxsw_sp_acl_block_rule_count(struct mlxsw_sp_acl_block *block);
+void mlxsw_sp_acl_block_disable_inc(struct mlxsw_sp_acl_block *block);
+void mlxsw_sp_acl_block_disable_dec(struct mlxsw_sp_acl_block *block);
+bool mlxsw_sp_acl_block_disabled(struct mlxsw_sp_acl_block *block);
+struct mlxsw_sp_acl_block *mlxsw_sp_acl_block_create(struct mlxsw_sp *mlxsw_sp,
+ struct net *net);
+void mlxsw_sp_acl_block_destroy(struct mlxsw_sp_acl_block *block);
+int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ bool ingress);
+int mlxsw_sp_acl_block_unbind(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ bool ingress);
+bool mlxsw_sp_acl_block_is_egress_bound(struct mlxsw_sp_acl_block *block);
+struct mlxsw_sp_acl_ruleset *
+mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block, u32 chain_index,
+ enum mlxsw_sp_acl_profile profile);
+struct mlxsw_sp_acl_ruleset *
+mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block, u32 chain_index,
+ enum mlxsw_sp_acl_profile profile,
+ struct mlxsw_afk_element_usage *tmplt_elusage);
+void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ruleset *ruleset);
+u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset);
+
+struct mlxsw_sp_acl_rule_info *
+mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl);
+void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp_acl_rule_info *rulei);
+int mlxsw_sp_acl_rulei_commit(struct mlxsw_sp_acl_rule_info *rulei);
+void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei,
+ unsigned int priority);
+void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei,
+ enum mlxsw_afk_element element,
+ u32 key_value, u32 mask_value);
+void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei,
+ enum mlxsw_afk_element element,
+ const char *key_value,
+ const char *mask_value, unsigned int len);
+int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei);
+int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
+ u16 group_id);
+int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei);
+int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei);
+int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei);
+int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ struct mlxsw_sp_acl_block *block,
+ struct net_device *out_dev,
+ struct netlink_ext_ack *extack);
+int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ struct net_device *out_dev,
+ struct netlink_ext_ack *extack);
+int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ u32 action, u16 vid, u16 proto, u8 prio,
+ struct netlink_ext_ack *extack);
+int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ struct netlink_ext_ack *extack);
+int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ u16 fid, struct netlink_ext_ack *extack);
+
+struct mlxsw_sp_acl_rule;
+
+struct mlxsw_sp_acl_rule *
+mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ruleset *ruleset,
+ unsigned long cookie,
+ struct netlink_ext_ack *extack);
+void mlxsw_sp_acl_rule_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule *rule);
+int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule *rule);
+void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule *rule);
+struct mlxsw_sp_acl_rule *
+mlxsw_sp_acl_rule_lookup(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ruleset *ruleset,
+ unsigned long cookie);
+struct mlxsw_sp_acl_rule_info *
+mlxsw_sp_acl_rule_rulei(struct mlxsw_sp_acl_rule *rule);
+int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule *rule,
+ u64 *packets, u64 *bytes, u64 *last_use);
+
+struct mlxsw_sp_fid *mlxsw_sp_acl_dummy_fid(struct mlxsw_sp *mlxsw_sp);
+
+int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp);
+
+/* spectrum_acl_tcam.c */
+struct mlxsw_sp_acl_tcam;
+struct mlxsw_sp_acl_tcam_region;
+
+struct mlxsw_sp_acl_tcam_ops {
+ enum mlxsw_reg_ptar_key_type key_type;
+ size_t priv_size;
+ int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv,
+ struct mlxsw_sp_acl_tcam *tcam);
+ void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv);
+ size_t region_priv_size;
+ int (*region_init)(struct mlxsw_sp *mlxsw_sp, void *region_priv,
+ void *tcam_priv,
+ struct mlxsw_sp_acl_tcam_region *region);
+ void (*region_fini)(struct mlxsw_sp *mlxsw_sp, void *region_priv);
+ int (*region_associate)(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_region *region);
+ size_t chunk_priv_size;
+ void (*chunk_init)(void *region_priv, void *chunk_priv,
+ unsigned int priority);
+ void (*chunk_fini)(void *chunk_priv);
+ size_t entry_priv_size;
+ int (*entry_add)(struct mlxsw_sp *mlxsw_sp,
+ void *region_priv, void *chunk_priv,
+ void *entry_priv,
+ struct mlxsw_sp_acl_rule_info *rulei);
+ void (*entry_del)(struct mlxsw_sp *mlxsw_sp,
+ void *region_priv, void *chunk_priv,
+ void *entry_priv);
+ int (*entry_activity_get)(struct mlxsw_sp *mlxsw_sp,
+ void *region_priv, void *entry_priv,
+ bool *activity);
+};
+
+/* spectrum1_acl_tcam.c */
+extern const struct mlxsw_sp_acl_tcam_ops mlxsw_sp1_acl_tcam_ops;
+
+/* spectrum2_acl_tcam.c */
+extern const struct mlxsw_sp_acl_tcam_ops mlxsw_sp2_acl_tcam_ops;
+
+/* spectrum_acl_flex_actions.c */
+extern const struct mlxsw_afa_ops mlxsw_sp1_act_afa_ops;
+extern const struct mlxsw_afa_ops mlxsw_sp2_act_afa_ops;
+
+/* spectrum_acl_flex_keys.c */
+extern const struct mlxsw_afk_ops mlxsw_sp1_afk_ops;
+extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;
+
+/* spectrum_flower.c */
+int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct tc_cls_flower_offload *f);
+void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct tc_cls_flower_offload *f);
+int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct tc_cls_flower_offload *f);
+int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct tc_cls_flower_offload *f);
+void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct tc_cls_flower_offload *f);
+
+/* spectrum_qdisc.c */
+int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port);
+void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port);
+int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct tc_red_qopt_offload *p);
+int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct tc_prio_qopt_offload *p);
+
+/* spectrum_fid.c */
+int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid,
+ enum mlxsw_sp_flood_type packet_type, u8 local_port,
+ bool member);
+int mlxsw_sp_fid_port_vid_map(struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_port *mlxsw_sp_port, u16 vid);
+void mlxsw_sp_fid_port_vid_unmap(struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_port *mlxsw_sp_port, u16 vid);
+enum mlxsw_sp_rif_type mlxsw_sp_fid_rif_type(const struct mlxsw_sp_fid *fid);
+u16 mlxsw_sp_fid_index(const struct mlxsw_sp_fid *fid);
+enum mlxsw_sp_fid_type mlxsw_sp_fid_type(const struct mlxsw_sp_fid *fid);
+void mlxsw_sp_fid_rif_set(struct mlxsw_sp_fid *fid, struct mlxsw_sp_rif *rif);
+enum mlxsw_sp_rif_type
+mlxsw_sp_fid_type_rif_type(const struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_fid_type type);
+u16 mlxsw_sp_fid_8021q_vid(const struct mlxsw_sp_fid *fid);
+struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid);
+struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp,
+ int br_ifindex);
+struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp,
+ u16 rif_index);
+struct mlxsw_sp_fid *mlxsw_sp_fid_dummy_get(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_fid_put(struct mlxsw_sp_fid *fid);
+int mlxsw_sp_port_fids_init(struct mlxsw_sp_port *mlxsw_sp_port);
+void mlxsw_sp_port_fids_fini(struct mlxsw_sp_port *mlxsw_sp_port);
+int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp);
+
+/* spectrum_mr.c */
+enum mlxsw_sp_mr_route_prio {
+ MLXSW_SP_MR_ROUTE_PRIO_SG,
+ MLXSW_SP_MR_ROUTE_PRIO_STARG,
+ MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
+ __MLXSW_SP_MR_ROUTE_PRIO_MAX
+};
+
+#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1)
+
+struct mlxsw_sp_mr_route_key;
+
+struct mlxsw_sp_mr_tcam_ops {
+ size_t priv_size;
+ int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv);
+ void (*fini)(void *priv);
+ size_t route_priv_size;
+ int (*route_create)(struct mlxsw_sp *mlxsw_sp, void *priv,
+ void *route_priv,
+ struct mlxsw_sp_mr_route_key *key,
+ struct mlxsw_afa_block *afa_block,
+ enum mlxsw_sp_mr_route_prio prio);
+ void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv,
+ void *route_priv,
+ struct mlxsw_sp_mr_route_key *key);
+ int (*route_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+ struct mlxsw_sp_mr_route_key *key,
+ struct mlxsw_afa_block *afa_block);
+};
+
+/* spectrum1_mr_tcam.c */
+extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops;
+
+/* spectrum2_mr_tcam.c */
+extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops;
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c
new file mode 100644
index 000000000..2a9eac900
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "reg.h"
+#include "core.h"
+#include "spectrum.h"
+#include "spectrum_acl_tcam.h"
+
+struct mlxsw_sp1_acl_tcam_region {
+ struct mlxsw_sp_acl_ctcam_region cregion;
+ struct mlxsw_sp_acl_tcam_region *region;
+ struct {
+ struct mlxsw_sp_acl_ctcam_chunk cchunk;
+ struct mlxsw_sp_acl_ctcam_entry centry;
+ struct mlxsw_sp_acl_rule_info *rulei;
+ } catchall;
+};
+
+struct mlxsw_sp1_acl_tcam_chunk {
+ struct mlxsw_sp_acl_ctcam_chunk cchunk;
+};
+
+struct mlxsw_sp1_acl_tcam_entry {
+ struct mlxsw_sp_acl_ctcam_entry centry;
+};
+
+static int
+mlxsw_sp1_acl_ctcam_region_entry_insert(struct mlxsw_sp_acl_ctcam_region *cregion,
+ struct mlxsw_sp_acl_ctcam_entry *centry,
+ const char *mask)
+{
+ return 0;
+}
+
+static void
+mlxsw_sp1_acl_ctcam_region_entry_remove(struct mlxsw_sp_acl_ctcam_region *cregion,
+ struct mlxsw_sp_acl_ctcam_entry *centry)
+{
+}
+
+static const struct mlxsw_sp_acl_ctcam_region_ops
+mlxsw_sp1_acl_ctcam_region_ops = {
+ .entry_insert = mlxsw_sp1_acl_ctcam_region_entry_insert,
+ .entry_remove = mlxsw_sp1_acl_ctcam_region_entry_remove,
+};
+
+static int mlxsw_sp1_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv,
+ struct mlxsw_sp_acl_tcam *tcam)
+{
+ return 0;
+}
+
+static void mlxsw_sp1_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+}
+
+static int
+mlxsw_sp1_acl_ctcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp1_acl_tcam_region *region)
+{
+ struct mlxsw_sp_acl_rule_info *rulei;
+ int err;
+
+ mlxsw_sp_acl_ctcam_chunk_init(&region->cregion,
+ &region->catchall.cchunk,
+ MLXSW_SP_ACL_TCAM_CATCHALL_PRIO);
+ rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl);
+ if (IS_ERR(rulei)) {
+ err = PTR_ERR(rulei);
+ goto err_rulei_create;
+ }
+ err = mlxsw_sp_acl_rulei_act_continue(rulei);
+ if (WARN_ON(err))
+ goto err_rulei_act_continue;
+ err = mlxsw_sp_acl_rulei_commit(rulei);
+ if (err)
+ goto err_rulei_commit;
+ err = mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, &region->cregion,
+ &region->catchall.cchunk,
+ &region->catchall.centry,
+ rulei, false);
+ if (err)
+ goto err_entry_add;
+ region->catchall.rulei = rulei;
+ return 0;
+
+err_entry_add:
+err_rulei_commit:
+err_rulei_act_continue:
+ mlxsw_sp_acl_rulei_destroy(rulei);
+err_rulei_create:
+ mlxsw_sp_acl_ctcam_chunk_fini(&region->catchall.cchunk);
+ return err;
+}
+
+static void
+mlxsw_sp1_acl_ctcam_region_catchall_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp1_acl_tcam_region *region)
+{
+ struct mlxsw_sp_acl_rule_info *rulei = region->catchall.rulei;
+
+ mlxsw_sp_acl_ctcam_entry_del(mlxsw_sp, &region->cregion,
+ &region->catchall.cchunk,
+ &region->catchall.centry);
+ mlxsw_sp_acl_rulei_destroy(rulei);
+ mlxsw_sp_acl_ctcam_chunk_fini(&region->catchall.cchunk);
+}
+
+static int
+mlxsw_sp1_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv,
+ void *tcam_priv,
+ struct mlxsw_sp_acl_tcam_region *_region)
+{
+ struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+ int err;
+
+ err = mlxsw_sp_acl_ctcam_region_init(mlxsw_sp, &region->cregion,
+ _region,
+ &mlxsw_sp1_acl_ctcam_region_ops);
+ if (err)
+ return err;
+ err = mlxsw_sp1_acl_ctcam_region_catchall_add(mlxsw_sp, region);
+ if (err)
+ goto err_catchall_add;
+ region->region = _region;
+ return 0;
+
+err_catchall_add:
+ mlxsw_sp_acl_ctcam_region_fini(&region->cregion);
+ return err;
+}
+
+static void
+mlxsw_sp1_acl_tcam_region_fini(struct mlxsw_sp *mlxsw_sp, void *region_priv)
+{
+ struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+
+ mlxsw_sp1_acl_ctcam_region_catchall_del(mlxsw_sp, region);
+ mlxsw_sp_acl_ctcam_region_fini(&region->cregion);
+}
+
+static int
+mlxsw_sp1_acl_tcam_region_associate(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_region *region)
+{
+ return 0;
+}
+
+static void mlxsw_sp1_acl_tcam_chunk_init(void *region_priv, void *chunk_priv,
+ unsigned int priority)
+{
+ struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+ struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv;
+
+ mlxsw_sp_acl_ctcam_chunk_init(&region->cregion, &chunk->cchunk,
+ priority);
+}
+
+static void mlxsw_sp1_acl_tcam_chunk_fini(void *chunk_priv)
+{
+ struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv;
+
+ mlxsw_sp_acl_ctcam_chunk_fini(&chunk->cchunk);
+}
+
+static int mlxsw_sp1_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+ void *region_priv, void *chunk_priv,
+ void *entry_priv,
+ struct mlxsw_sp_acl_rule_info *rulei)
+{
+ struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+ struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv;
+ struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv;
+
+ return mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, &region->cregion,
+ &chunk->cchunk, &entry->centry,
+ rulei, false);
+}
+
+static void mlxsw_sp1_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+ void *region_priv, void *chunk_priv,
+ void *entry_priv)
+{
+ struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+ struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv;
+ struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv;
+
+ mlxsw_sp_acl_ctcam_entry_del(mlxsw_sp, &region->cregion,
+ &chunk->cchunk, &entry->centry);
+}
+
+static int
+mlxsw_sp1_acl_tcam_region_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_region *_region,
+ unsigned int offset,
+ bool *activity)
+{
+ char ptce2_pl[MLXSW_REG_PTCE2_LEN];
+ int err;
+
+ mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_QUERY_CLEAR_ON_READ,
+ _region->tcam_region_info, offset, 0);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
+ if (err)
+ return err;
+ *activity = mlxsw_reg_ptce2_a_get(ptce2_pl);
+ return 0;
+}
+
+static int
+mlxsw_sp1_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
+ void *region_priv, void *entry_priv,
+ bool *activity)
+{
+ struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+ struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv;
+ unsigned int offset;
+
+ offset = mlxsw_sp_acl_ctcam_entry_offset(&entry->centry);
+ return mlxsw_sp1_acl_tcam_region_entry_activity_get(mlxsw_sp,
+ region->region,
+ offset, activity);
+}
+
+const struct mlxsw_sp_acl_tcam_ops mlxsw_sp1_acl_tcam_ops = {
+ .key_type = MLXSW_REG_PTAR_KEY_TYPE_FLEX,
+ .priv_size = 0,
+ .init = mlxsw_sp1_acl_tcam_init,
+ .fini = mlxsw_sp1_acl_tcam_fini,
+ .region_priv_size = sizeof(struct mlxsw_sp1_acl_tcam_region),
+ .region_init = mlxsw_sp1_acl_tcam_region_init,
+ .region_fini = mlxsw_sp1_acl_tcam_region_fini,
+ .region_associate = mlxsw_sp1_acl_tcam_region_associate,
+ .chunk_priv_size = sizeof(struct mlxsw_sp1_acl_tcam_chunk),
+ .chunk_init = mlxsw_sp1_acl_tcam_chunk_init,
+ .chunk_fini = mlxsw_sp1_acl_tcam_chunk_fini,
+ .entry_priv_size = sizeof(struct mlxsw_sp1_acl_tcam_entry),
+ .entry_add = mlxsw_sp1_acl_tcam_entry_add,
+ .entry_del = mlxsw_sp1_acl_tcam_entry_del,
+ .entry_activity_get = mlxsw_sp1_acl_tcam_entry_activity_get,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c
new file mode 100644
index 000000000..09ee0a807
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c
@@ -0,0 +1,428 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+
+#include "spectrum.h"
+
+#define MLXSW_SP1_KVDL_SINGLE_BASE 0
+#define MLXSW_SP1_KVDL_SINGLE_SIZE 16384
+#define MLXSW_SP1_KVDL_SINGLE_END \
+ (MLXSW_SP1_KVDL_SINGLE_SIZE + MLXSW_SP1_KVDL_SINGLE_BASE - 1)
+
+#define MLXSW_SP1_KVDL_CHUNKS_BASE \
+ (MLXSW_SP1_KVDL_SINGLE_BASE + MLXSW_SP1_KVDL_SINGLE_SIZE)
+#define MLXSW_SP1_KVDL_CHUNKS_SIZE 49152
+#define MLXSW_SP1_KVDL_CHUNKS_END \
+ (MLXSW_SP1_KVDL_CHUNKS_SIZE + MLXSW_SP1_KVDL_CHUNKS_BASE - 1)
+
+#define MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE \
+ (MLXSW_SP1_KVDL_CHUNKS_BASE + MLXSW_SP1_KVDL_CHUNKS_SIZE)
+#define MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE \
+ (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE)
+#define MLXSW_SP1_KVDL_LARGE_CHUNKS_END \
+ (MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE + MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE - 1)
+
+#define MLXSW_SP1_KVDL_SINGLE_ALLOC_SIZE 1
+#define MLXSW_SP1_KVDL_CHUNKS_ALLOC_SIZE 32
+#define MLXSW_SP1_KVDL_LARGE_CHUNKS_ALLOC_SIZE 512
+
+struct mlxsw_sp1_kvdl_part_info {
+ unsigned int part_index;
+ unsigned int start_index;
+ unsigned int end_index;
+ unsigned int alloc_size;
+ enum mlxsw_sp_resource_id resource_id;
+};
+
+enum mlxsw_sp1_kvdl_part_id {
+ MLXSW_SP1_KVDL_PART_ID_SINGLE,
+ MLXSW_SP1_KVDL_PART_ID_CHUNKS,
+ MLXSW_SP1_KVDL_PART_ID_LARGE_CHUNKS,
+};
+
+#define MLXSW_SP1_KVDL_PART_INFO(id) \
+[MLXSW_SP1_KVDL_PART_ID_##id] = { \
+ .start_index = MLXSW_SP1_KVDL_##id##_BASE, \
+ .end_index = MLXSW_SP1_KVDL_##id##_END, \
+ .alloc_size = MLXSW_SP1_KVDL_##id##_ALLOC_SIZE, \
+ .resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_##id, \
+}
+
+static const struct mlxsw_sp1_kvdl_part_info mlxsw_sp1_kvdl_parts_info[] = {
+ MLXSW_SP1_KVDL_PART_INFO(SINGLE),
+ MLXSW_SP1_KVDL_PART_INFO(CHUNKS),
+ MLXSW_SP1_KVDL_PART_INFO(LARGE_CHUNKS),
+};
+
+#define MLXSW_SP1_KVDL_PARTS_INFO_LEN ARRAY_SIZE(mlxsw_sp1_kvdl_parts_info)
+
+struct mlxsw_sp1_kvdl_part {
+ struct mlxsw_sp1_kvdl_part_info info;
+ unsigned long usage[0]; /* Entries */
+};
+
+struct mlxsw_sp1_kvdl {
+ struct mlxsw_sp1_kvdl_part *parts[MLXSW_SP1_KVDL_PARTS_INFO_LEN];
+};
+
+static struct mlxsw_sp1_kvdl_part *
+mlxsw_sp1_kvdl_alloc_size_part(struct mlxsw_sp1_kvdl *kvdl,
+ unsigned int alloc_size)
+{
+ struct mlxsw_sp1_kvdl_part *part, *min_part = NULL;
+ int i;
+
+ for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) {
+ part = kvdl->parts[i];
+ if (alloc_size <= part->info.alloc_size &&
+ (!min_part ||
+ part->info.alloc_size <= min_part->info.alloc_size))
+ min_part = part;
+ }
+
+ return min_part ?: ERR_PTR(-ENOBUFS);
+}
+
+static struct mlxsw_sp1_kvdl_part *
+mlxsw_sp1_kvdl_index_part(struct mlxsw_sp1_kvdl *kvdl, u32 kvdl_index)
+{
+ struct mlxsw_sp1_kvdl_part *part;
+ int i;
+
+ for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) {
+ part = kvdl->parts[i];
+ if (kvdl_index >= part->info.start_index &&
+ kvdl_index <= part->info.end_index)
+ return part;
+ }
+
+ return ERR_PTR(-EINVAL);
+}
+
+static u32
+mlxsw_sp1_kvdl_to_kvdl_index(const struct mlxsw_sp1_kvdl_part_info *info,
+ unsigned int entry_index)
+{
+ return info->start_index + entry_index * info->alloc_size;
+}
+
+static unsigned int
+mlxsw_sp1_kvdl_to_entry_index(const struct mlxsw_sp1_kvdl_part_info *info,
+ u32 kvdl_index)
+{
+ return (kvdl_index - info->start_index) / info->alloc_size;
+}
+
+static int mlxsw_sp1_kvdl_part_alloc(struct mlxsw_sp1_kvdl_part *part,
+ u32 *p_kvdl_index)
+{
+ const struct mlxsw_sp1_kvdl_part_info *info = &part->info;
+ unsigned int entry_index, nr_entries;
+
+ nr_entries = (info->end_index - info->start_index + 1) /
+ info->alloc_size;
+ entry_index = find_first_zero_bit(part->usage, nr_entries);
+ if (entry_index == nr_entries)
+ return -ENOBUFS;
+ __set_bit(entry_index, part->usage);
+
+ *p_kvdl_index = mlxsw_sp1_kvdl_to_kvdl_index(info, entry_index);
+
+ return 0;
+}
+
+static void mlxsw_sp1_kvdl_part_free(struct mlxsw_sp1_kvdl_part *part,
+ u32 kvdl_index)
+{
+ const struct mlxsw_sp1_kvdl_part_info *info = &part->info;
+ unsigned int entry_index;
+
+ entry_index = mlxsw_sp1_kvdl_to_entry_index(info, kvdl_index);
+ __clear_bit(entry_index, part->usage);
+}
+
+static int mlxsw_sp1_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, void *priv,
+ enum mlxsw_sp_kvdl_entry_type type,
+ unsigned int entry_count,
+ u32 *p_entry_index)
+{
+ struct mlxsw_sp1_kvdl *kvdl = priv;
+ struct mlxsw_sp1_kvdl_part *part;
+
+ /* Find partition with smallest allocation size satisfying the
+ * requested size.
+ */
+ part = mlxsw_sp1_kvdl_alloc_size_part(kvdl, entry_count);
+ if (IS_ERR(part))
+ return PTR_ERR(part);
+
+ return mlxsw_sp1_kvdl_part_alloc(part, p_entry_index);
+}
+
+static void mlxsw_sp1_kvdl_free(struct mlxsw_sp *mlxsw_sp, void *priv,
+ enum mlxsw_sp_kvdl_entry_type type,
+ unsigned int entry_count, int entry_index)
+{
+ struct mlxsw_sp1_kvdl *kvdl = priv;
+ struct mlxsw_sp1_kvdl_part *part;
+
+ part = mlxsw_sp1_kvdl_index_part(kvdl, entry_index);
+ if (IS_ERR(part))
+ return;
+ mlxsw_sp1_kvdl_part_free(part, entry_index);
+}
+
+static int mlxsw_sp1_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
+ void *priv,
+ enum mlxsw_sp_kvdl_entry_type type,
+ unsigned int entry_count,
+ unsigned int *p_alloc_size)
+{
+ struct mlxsw_sp1_kvdl *kvdl = priv;
+ struct mlxsw_sp1_kvdl_part *part;
+
+ part = mlxsw_sp1_kvdl_alloc_size_part(kvdl, entry_count);
+ if (IS_ERR(part))
+ return PTR_ERR(part);
+
+ *p_alloc_size = part->info.alloc_size;
+
+ return 0;
+}
+
+static void mlxsw_sp1_kvdl_part_update(struct mlxsw_sp1_kvdl_part *part,
+ struct mlxsw_sp1_kvdl_part *part_prev,
+ unsigned int size)
+{
+ if (!part_prev) {
+ part->info.end_index = size - 1;
+ } else {
+ part->info.start_index = part_prev->info.end_index + 1;
+ part->info.end_index = part->info.start_index + size - 1;
+ }
+}
+
+static struct mlxsw_sp1_kvdl_part *
+mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp1_kvdl_part_info *info,
+ struct mlxsw_sp1_kvdl_part *part_prev)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+ struct mlxsw_sp1_kvdl_part *part;
+ bool need_update = true;
+ unsigned int nr_entries;
+ size_t usage_size;
+ u64 resource_size;
+ int err;
+
+ err = devlink_resource_size_get(devlink, info->resource_id,
+ &resource_size);
+ if (err) {
+ need_update = false;
+ resource_size = info->end_index - info->start_index + 1;
+ }
+
+ nr_entries = div_u64(resource_size, info->alloc_size);
+ usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
+ part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
+ if (!part)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(&part->info, info, sizeof(part->info));
+
+ if (need_update)
+ mlxsw_sp1_kvdl_part_update(part, part_prev, resource_size);
+ return part;
+}
+
+static void mlxsw_sp1_kvdl_part_fini(struct mlxsw_sp1_kvdl_part *part)
+{
+ kfree(part);
+}
+
+static int mlxsw_sp1_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp1_kvdl *kvdl)
+{
+ const struct mlxsw_sp1_kvdl_part_info *info;
+ struct mlxsw_sp1_kvdl_part *part_prev = NULL;
+ int err, i;
+
+ for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) {
+ info = &mlxsw_sp1_kvdl_parts_info[i];
+ kvdl->parts[i] = mlxsw_sp1_kvdl_part_init(mlxsw_sp, info,
+ part_prev);
+ if (IS_ERR(kvdl->parts[i])) {
+ err = PTR_ERR(kvdl->parts[i]);
+ goto err_kvdl_part_init;
+ }
+ part_prev = kvdl->parts[i];
+ }
+ return 0;
+
+err_kvdl_part_init:
+ for (i--; i >= 0; i--)
+ mlxsw_sp1_kvdl_part_fini(kvdl->parts[i]);
+ return err;
+}
+
+static void mlxsw_sp1_kvdl_parts_fini(struct mlxsw_sp1_kvdl *kvdl)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++)
+ mlxsw_sp1_kvdl_part_fini(kvdl->parts[i]);
+}
+
+static u64 mlxsw_sp1_kvdl_part_occ(struct mlxsw_sp1_kvdl_part *part)
+{
+ const struct mlxsw_sp1_kvdl_part_info *info = &part->info;
+ unsigned int nr_entries;
+ int bit = -1;
+ u64 occ = 0;
+
+ nr_entries = (info->end_index -
+ info->start_index + 1) /
+ info->alloc_size;
+ while ((bit = find_next_bit(part->usage, nr_entries, bit + 1))
+ < nr_entries)
+ occ += info->alloc_size;
+ return occ;
+}
+
+static u64 mlxsw_sp1_kvdl_occ_get(void *priv)
+{
+ const struct mlxsw_sp1_kvdl *kvdl = priv;
+ u64 occ = 0;
+ int i;
+
+ for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++)
+ occ += mlxsw_sp1_kvdl_part_occ(kvdl->parts[i]);
+
+ return occ;
+}
+
+static u64 mlxsw_sp1_kvdl_single_occ_get(void *priv)
+{
+ const struct mlxsw_sp1_kvdl *kvdl = priv;
+ struct mlxsw_sp1_kvdl_part *part;
+
+ part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_SINGLE];
+ return mlxsw_sp1_kvdl_part_occ(part);
+}
+
+static u64 mlxsw_sp1_kvdl_chunks_occ_get(void *priv)
+{
+ const struct mlxsw_sp1_kvdl *kvdl = priv;
+ struct mlxsw_sp1_kvdl_part *part;
+
+ part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_CHUNKS];
+ return mlxsw_sp1_kvdl_part_occ(part);
+}
+
+static u64 mlxsw_sp1_kvdl_large_chunks_occ_get(void *priv)
+{
+ const struct mlxsw_sp1_kvdl *kvdl = priv;
+ struct mlxsw_sp1_kvdl_part *part;
+
+ part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_LARGE_CHUNKS];
+ return mlxsw_sp1_kvdl_part_occ(part);
+}
+
+static int mlxsw_sp1_kvdl_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+ struct mlxsw_sp1_kvdl *kvdl = priv;
+ int err;
+
+ err = mlxsw_sp1_kvdl_parts_init(mlxsw_sp, kvdl);
+ if (err)
+ return err;
+ devlink_resource_occ_get_register(devlink,
+ MLXSW_SP_RESOURCE_KVD_LINEAR,
+ mlxsw_sp1_kvdl_occ_get,
+ kvdl);
+ devlink_resource_occ_get_register(devlink,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+ mlxsw_sp1_kvdl_single_occ_get,
+ kvdl);
+ devlink_resource_occ_get_register(devlink,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+ mlxsw_sp1_kvdl_chunks_occ_get,
+ kvdl);
+ devlink_resource_occ_get_register(devlink,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+ mlxsw_sp1_kvdl_large_chunks_occ_get,
+ kvdl);
+ return 0;
+}
+
+static void mlxsw_sp1_kvdl_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+ struct mlxsw_sp1_kvdl *kvdl = priv;
+
+ devlink_resource_occ_get_unregister(devlink,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS);
+ devlink_resource_occ_get_unregister(devlink,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS);
+ devlink_resource_occ_get_unregister(devlink,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE);
+ devlink_resource_occ_get_unregister(devlink,
+ MLXSW_SP_RESOURCE_KVD_LINEAR);
+ mlxsw_sp1_kvdl_parts_fini(kvdl);
+}
+
+const struct mlxsw_sp_kvdl_ops mlxsw_sp1_kvdl_ops = {
+ .priv_size = sizeof(struct mlxsw_sp1_kvdl),
+ .init = mlxsw_sp1_kvdl_init,
+ .fini = mlxsw_sp1_kvdl_fini,
+ .alloc = mlxsw_sp1_kvdl_alloc,
+ .free = mlxsw_sp1_kvdl_free,
+ .alloc_size_query = mlxsw_sp1_kvdl_alloc_size_query,
+};
+
+int mlxsw_sp1_kvdl_resources_register(struct mlxsw_core *mlxsw_core)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_core);
+ static struct devlink_resource_size_params size_params;
+ u32 kvdl_max_size;
+ int err;
+
+ kvdl_max_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) -
+ MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) -
+ MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE);
+
+ devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
+ MLXSW_SP1_KVDL_SINGLE_ALLOC_SIZE,
+ DEVLINK_RESOURCE_UNIT_ENTRY);
+ err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES,
+ MLXSW_SP1_KVDL_SINGLE_SIZE,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+ MLXSW_SP_RESOURCE_KVD_LINEAR,
+ &size_params);
+ if (err)
+ return err;
+
+ devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
+ MLXSW_SP1_KVDL_CHUNKS_ALLOC_SIZE,
+ DEVLINK_RESOURCE_UNIT_ENTRY);
+ err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS,
+ MLXSW_SP1_KVDL_CHUNKS_SIZE,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+ MLXSW_SP_RESOURCE_KVD_LINEAR,
+ &size_params);
+ if (err)
+ return err;
+
+ devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
+ MLXSW_SP1_KVDL_LARGE_CHUNKS_ALLOC_SIZE,
+ DEVLINK_RESOURCE_UNIT_ENTRY);
+ err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS,
+ MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE,
+ MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+ MLXSW_SP_RESOURCE_KVD_LINEAR,
+ &size_params);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c
new file mode 100644
index 000000000..c8c675369
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/parman.h>
+
+#include "reg.h"
+#include "spectrum.h"
+#include "core_acl_flex_actions.h"
+#include "spectrum_mr.h"
+
+struct mlxsw_sp1_mr_tcam_region {
+ struct mlxsw_sp *mlxsw_sp;
+ enum mlxsw_reg_rtar_key_type rtar_key_type;
+ struct parman *parman;
+ struct parman_prio *parman_prios;
+};
+
+struct mlxsw_sp1_mr_tcam {
+ struct mlxsw_sp1_mr_tcam_region tcam_regions[MLXSW_SP_L3_PROTO_MAX];
+};
+
+struct mlxsw_sp1_mr_tcam_route {
+ struct parman_item parman_item;
+ struct parman_prio *parman_prio;
+};
+
+static int mlxsw_sp1_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp,
+ struct parman_item *parman_item,
+ struct mlxsw_sp_mr_route_key *key,
+ struct mlxsw_afa_block *afa_block)
+{
+ char rmft2_pl[MLXSW_REG_RMFT2_LEN];
+
+ switch (key->proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, true, parman_item->index,
+ key->vrid,
+ MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
+ ntohl(key->group.addr4),
+ ntohl(key->group_mask.addr4),
+ ntohl(key->source.addr4),
+ ntohl(key->source_mask.addr4),
+ mlxsw_afa_block_first_set(afa_block));
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, true, parman_item->index,
+ key->vrid,
+ MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
+ key->group.addr6,
+ key->group_mask.addr6,
+ key->source.addr6,
+ key->source_mask.addr6,
+ mlxsw_afa_block_first_set(afa_block));
+ }
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
+}
+
+static int mlxsw_sp1_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp,
+ struct parman_item *parman_item,
+ struct mlxsw_sp_mr_route_key *key)
+{
+ struct in6_addr zero_addr = IN6ADDR_ANY_INIT;
+ char rmft2_pl[MLXSW_REG_RMFT2_LEN];
+
+ switch (key->proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index,
+ key->vrid, 0, 0, 0, 0, 0, 0, NULL);
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, false, parman_item->index,
+ key->vrid, 0, 0, zero_addr, zero_addr,
+ zero_addr, zero_addr, NULL);
+ break;
+ }
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
+}
+
+static struct mlxsw_sp1_mr_tcam_region *
+mlxsw_sp1_mr_tcam_protocol_region(struct mlxsw_sp1_mr_tcam *mr_tcam,
+ enum mlxsw_sp_l3proto proto)
+{
+ return &mr_tcam->tcam_regions[proto];
+}
+
+static int
+mlxsw_sp1_mr_tcam_route_parman_item_add(struct mlxsw_sp1_mr_tcam *mr_tcam,
+ struct mlxsw_sp1_mr_tcam_route *route,
+ struct mlxsw_sp_mr_route_key *key,
+ enum mlxsw_sp_mr_route_prio prio)
+{
+ struct mlxsw_sp1_mr_tcam_region *tcam_region;
+ int err;
+
+ tcam_region = mlxsw_sp1_mr_tcam_protocol_region(mr_tcam, key->proto);
+ err = parman_item_add(tcam_region->parman,
+ &tcam_region->parman_prios[prio],
+ &route->parman_item);
+ if (err)
+ return err;
+
+ route->parman_prio = &tcam_region->parman_prios[prio];
+ return 0;
+}
+
+static void
+mlxsw_sp1_mr_tcam_route_parman_item_remove(struct mlxsw_sp1_mr_tcam *mr_tcam,
+ struct mlxsw_sp1_mr_tcam_route *route,
+ struct mlxsw_sp_mr_route_key *key)
+{
+ struct mlxsw_sp1_mr_tcam_region *tcam_region;
+
+ tcam_region = mlxsw_sp1_mr_tcam_protocol_region(mr_tcam, key->proto);
+ parman_item_remove(tcam_region->parman,
+ route->parman_prio, &route->parman_item);
+}
+
+static int
+mlxsw_sp1_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv,
+ void *route_priv,
+ struct mlxsw_sp_mr_route_key *key,
+ struct mlxsw_afa_block *afa_block,
+ enum mlxsw_sp_mr_route_prio prio)
+{
+ struct mlxsw_sp1_mr_tcam_route *route = route_priv;
+ struct mlxsw_sp1_mr_tcam *mr_tcam = priv;
+ int err;
+
+ err = mlxsw_sp1_mr_tcam_route_parman_item_add(mr_tcam, route,
+ key, prio);
+ if (err)
+ return err;
+
+ err = mlxsw_sp1_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+ key, afa_block);
+ if (err)
+ goto err_route_replace;
+ return 0;
+
+err_route_replace:
+ mlxsw_sp1_mr_tcam_route_parman_item_remove(mr_tcam, route, key);
+ return err;
+}
+
+static void
+mlxsw_sp1_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp, void *priv,
+ void *route_priv,
+ struct mlxsw_sp_mr_route_key *key)
+{
+ struct mlxsw_sp1_mr_tcam_route *route = route_priv;
+ struct mlxsw_sp1_mr_tcam *mr_tcam = priv;
+
+ mlxsw_sp1_mr_tcam_route_remove(mlxsw_sp, &route->parman_item, key);
+ mlxsw_sp1_mr_tcam_route_parman_item_remove(mr_tcam, route, key);
+}
+
+static int
+mlxsw_sp1_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp,
+ void *route_priv,
+ struct mlxsw_sp_mr_route_key *key,
+ struct mlxsw_afa_block *afa_block)
+{
+ struct mlxsw_sp1_mr_tcam_route *route = route_priv;
+
+ return mlxsw_sp1_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+ key, afa_block);
+}
+
+#define MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT 16
+#define MLXSW_SP1_MR_TCAM_REGION_RESIZE_STEP 16
+
+static int
+mlxsw_sp1_mr_tcam_region_alloc(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+ char rtar_pl[MLXSW_REG_RTAR_LEN];
+
+ mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_ALLOCATE,
+ mr_tcam_region->rtar_key_type,
+ MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static void
+mlxsw_sp1_mr_tcam_region_free(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+ char rtar_pl[MLXSW_REG_RTAR_LEN];
+
+ mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_DEALLOCATE,
+ mr_tcam_region->rtar_key_type, 0);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static int mlxsw_sp1_mr_tcam_region_parman_resize(void *priv,
+ unsigned long new_count)
+{
+ struct mlxsw_sp1_mr_tcam_region *mr_tcam_region = priv;
+ struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+ char rtar_pl[MLXSW_REG_RTAR_LEN];
+ u64 max_tcam_rules;
+
+ max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES);
+ if (new_count > max_tcam_rules)
+ return -EINVAL;
+ mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_RESIZE,
+ mr_tcam_region->rtar_key_type, new_count);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static void mlxsw_sp1_mr_tcam_region_parman_move(void *priv,
+ unsigned long from_index,
+ unsigned long to_index,
+ unsigned long count)
+{
+ struct mlxsw_sp1_mr_tcam_region *mr_tcam_region = priv;
+ struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+ char rrcr_pl[MLXSW_REG_RRCR_LEN];
+
+ mlxsw_reg_rrcr_pack(rrcr_pl, MLXSW_REG_RRCR_OP_MOVE,
+ from_index, count,
+ mr_tcam_region->rtar_key_type, to_index);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rrcr), rrcr_pl);
+}
+
+static const struct parman_ops mlxsw_sp1_mr_tcam_region_parman_ops = {
+ .base_count = MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT,
+ .resize_step = MLXSW_SP1_MR_TCAM_REGION_RESIZE_STEP,
+ .resize = mlxsw_sp1_mr_tcam_region_parman_resize,
+ .move = mlxsw_sp1_mr_tcam_region_parman_move,
+ .algo = PARMAN_ALGO_TYPE_LSORT,
+};
+
+static int
+mlxsw_sp1_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp1_mr_tcam_region *mr_tcam_region,
+ enum mlxsw_reg_rtar_key_type rtar_key_type)
+{
+ struct parman_prio *parman_prios;
+ struct parman *parman;
+ int err;
+ int i;
+
+ mr_tcam_region->rtar_key_type = rtar_key_type;
+ mr_tcam_region->mlxsw_sp = mlxsw_sp;
+
+ err = mlxsw_sp1_mr_tcam_region_alloc(mr_tcam_region);
+ if (err)
+ return err;
+
+ parman = parman_create(&mlxsw_sp1_mr_tcam_region_parman_ops,
+ mr_tcam_region);
+ if (!parman) {
+ err = -ENOMEM;
+ goto err_parman_create;
+ }
+ mr_tcam_region->parman = parman;
+
+ parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1,
+ sizeof(*parman_prios), GFP_KERNEL);
+ if (!parman_prios) {
+ err = -ENOMEM;
+ goto err_parman_prios_alloc;
+ }
+ mr_tcam_region->parman_prios = parman_prios;
+
+ for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
+ parman_prio_init(mr_tcam_region->parman,
+ &mr_tcam_region->parman_prios[i], i);
+ return 0;
+
+err_parman_prios_alloc:
+ parman_destroy(parman);
+err_parman_create:
+ mlxsw_sp1_mr_tcam_region_free(mr_tcam_region);
+ return err;
+}
+
+static void
+mlxsw_sp1_mr_tcam_region_fini(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
+ parman_prio_fini(&mr_tcam_region->parman_prios[i]);
+ kfree(mr_tcam_region->parman_prios);
+ parman_destroy(mr_tcam_region->parman);
+ mlxsw_sp1_mr_tcam_region_free(mr_tcam_region);
+}
+
+static int mlxsw_sp1_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+ struct mlxsw_sp1_mr_tcam *mr_tcam = priv;
+ struct mlxsw_sp1_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
+ u32 rtar_key;
+ int err;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES))
+ return -EIO;
+
+ rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST;
+ err = mlxsw_sp1_mr_tcam_region_init(mlxsw_sp,
+ &region[MLXSW_SP_L3_PROTO_IPV4],
+ rtar_key);
+ if (err)
+ return err;
+
+ rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST;
+ err = mlxsw_sp1_mr_tcam_region_init(mlxsw_sp,
+ &region[MLXSW_SP_L3_PROTO_IPV6],
+ rtar_key);
+ if (err)
+ goto err_ipv6_region_init;
+
+ return 0;
+
+err_ipv6_region_init:
+ mlxsw_sp1_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
+ return err;
+}
+
+static void mlxsw_sp1_mr_tcam_fini(void *priv)
+{
+ struct mlxsw_sp1_mr_tcam *mr_tcam = priv;
+ struct mlxsw_sp1_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
+
+ mlxsw_sp1_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV6]);
+ mlxsw_sp1_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
+}
+
+const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops = {
+ .priv_size = sizeof(struct mlxsw_sp1_mr_tcam),
+ .init = mlxsw_sp1_mr_tcam_init,
+ .fini = mlxsw_sp1_mr_tcam_fini,
+ .route_priv_size = sizeof(struct mlxsw_sp1_mr_tcam_route),
+ .route_create = mlxsw_sp1_mr_tcam_route_create,
+ .route_destroy = mlxsw_sp1_mr_tcam_route_destroy,
+ .route_update = mlxsw_sp1_mr_tcam_route_update,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c
new file mode 100644
index 000000000..ffd4b055f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+
+#include "spectrum.h"
+#include "spectrum_acl_tcam.h"
+#include "core_acl_flex_actions.h"
+
+struct mlxsw_sp2_acl_tcam {
+ struct mlxsw_sp_acl_atcam atcam;
+ u32 kvdl_index;
+ unsigned int kvdl_count;
+};
+
+struct mlxsw_sp2_acl_tcam_region {
+ struct mlxsw_sp_acl_atcam_region aregion;
+ struct mlxsw_sp_acl_tcam_region *region;
+};
+
+struct mlxsw_sp2_acl_tcam_chunk {
+ struct mlxsw_sp_acl_atcam_chunk achunk;
+};
+
+struct mlxsw_sp2_acl_tcam_entry {
+ struct mlxsw_sp_acl_atcam_entry aentry;
+ struct mlxsw_afa_block *act_block;
+};
+
+static int
+mlxsw_sp2_acl_ctcam_region_entry_insert(struct mlxsw_sp_acl_ctcam_region *cregion,
+ struct mlxsw_sp_acl_ctcam_entry *centry,
+ const char *mask)
+{
+ struct mlxsw_sp_acl_atcam_region *aregion;
+ struct mlxsw_sp_acl_atcam_entry *aentry;
+ struct mlxsw_sp_acl_erp *erp;
+
+ aregion = mlxsw_sp_acl_tcam_cregion_aregion(cregion);
+ aentry = mlxsw_sp_acl_tcam_centry_aentry(centry);
+
+ erp = mlxsw_sp_acl_erp_get(aregion, mask, true);
+ if (IS_ERR(erp))
+ return PTR_ERR(erp);
+ aentry->erp = erp;
+
+ return 0;
+}
+
+static void
+mlxsw_sp2_acl_ctcam_region_entry_remove(struct mlxsw_sp_acl_ctcam_region *cregion,
+ struct mlxsw_sp_acl_ctcam_entry *centry)
+{
+ struct mlxsw_sp_acl_atcam_region *aregion;
+ struct mlxsw_sp_acl_atcam_entry *aentry;
+
+ aregion = mlxsw_sp_acl_tcam_cregion_aregion(cregion);
+ aentry = mlxsw_sp_acl_tcam_centry_aentry(centry);
+
+ mlxsw_sp_acl_erp_put(aregion, aentry->erp);
+}
+
+static const struct mlxsw_sp_acl_ctcam_region_ops
+mlxsw_sp2_acl_ctcam_region_ops = {
+ .entry_insert = mlxsw_sp2_acl_ctcam_region_entry_insert,
+ .entry_remove = mlxsw_sp2_acl_ctcam_region_entry_remove,
+};
+
+static int mlxsw_sp2_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv,
+ struct mlxsw_sp_acl_tcam *_tcam)
+{
+ struct mlxsw_sp2_acl_tcam *tcam = priv;
+ struct mlxsw_afa_block *afa_block;
+ char pefa_pl[MLXSW_REG_PEFA_LEN];
+ char pgcr_pl[MLXSW_REG_PGCR_LEN];
+ char *enc_actions;
+ int i;
+ int err;
+
+ tcam->kvdl_count = _tcam->max_regions;
+ err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+ tcam->kvdl_count, &tcam->kvdl_index);
+ if (err)
+ return err;
+
+ /* Create flex action block, set default action (continue)
+ * but don't commit. We need just the current set encoding
+ * to be written using PEFA register to all indexes for all regions.
+ */
+ afa_block = mlxsw_afa_block_create(mlxsw_sp->afa);
+ if (IS_ERR(afa_block)) {
+ err = PTR_ERR(afa_block);
+ goto err_afa_block;
+ }
+ err = mlxsw_afa_block_continue(afa_block);
+ if (WARN_ON(err))
+ goto err_afa_block_continue;
+ enc_actions = mlxsw_afa_block_cur_set(afa_block);
+
+ for (i = 0; i < tcam->kvdl_count; i++) {
+ mlxsw_reg_pefa_pack(pefa_pl, tcam->kvdl_index + i,
+ true, enc_actions);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl);
+ if (err)
+ goto err_pefa_write;
+ }
+ mlxsw_reg_pgcr_pack(pgcr_pl, tcam->kvdl_index);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pgcr), pgcr_pl);
+ if (err)
+ goto err_pgcr_write;
+
+ err = mlxsw_sp_acl_atcam_init(mlxsw_sp, &tcam->atcam);
+ if (err)
+ goto err_atcam_init;
+
+ mlxsw_afa_block_destroy(afa_block);
+ return 0;
+
+err_atcam_init:
+err_pgcr_write:
+err_pefa_write:
+err_afa_block_continue:
+ mlxsw_afa_block_destroy(afa_block);
+err_afa_block:
+ mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+ tcam->kvdl_count, tcam->kvdl_index);
+ return err;
+}
+
+static void mlxsw_sp2_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+ struct mlxsw_sp2_acl_tcam *tcam = priv;
+
+ mlxsw_sp_acl_atcam_fini(mlxsw_sp, &tcam->atcam);
+ mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+ tcam->kvdl_count, tcam->kvdl_index);
+}
+
+static int
+mlxsw_sp2_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv,
+ void *tcam_priv,
+ struct mlxsw_sp_acl_tcam_region *_region)
+{
+ struct mlxsw_sp2_acl_tcam_region *region = region_priv;
+ struct mlxsw_sp2_acl_tcam *tcam = tcam_priv;
+
+ region->region = _region;
+
+ return mlxsw_sp_acl_atcam_region_init(mlxsw_sp, &tcam->atcam,
+ &region->aregion, _region,
+ &mlxsw_sp2_acl_ctcam_region_ops);
+}
+
+static void
+mlxsw_sp2_acl_tcam_region_fini(struct mlxsw_sp *mlxsw_sp, void *region_priv)
+{
+ struct mlxsw_sp2_acl_tcam_region *region = region_priv;
+
+ mlxsw_sp_acl_atcam_region_fini(&region->aregion);
+}
+
+static int
+mlxsw_sp2_acl_tcam_region_associate(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_region *region)
+{
+ return mlxsw_sp_acl_atcam_region_associate(mlxsw_sp, region->id);
+}
+
+static void mlxsw_sp2_acl_tcam_chunk_init(void *region_priv, void *chunk_priv,
+ unsigned int priority)
+{
+ struct mlxsw_sp2_acl_tcam_region *region = region_priv;
+ struct mlxsw_sp2_acl_tcam_chunk *chunk = chunk_priv;
+
+ mlxsw_sp_acl_atcam_chunk_init(&region->aregion, &chunk->achunk,
+ priority);
+}
+
+static void mlxsw_sp2_acl_tcam_chunk_fini(void *chunk_priv)
+{
+ struct mlxsw_sp2_acl_tcam_chunk *chunk = chunk_priv;
+
+ mlxsw_sp_acl_atcam_chunk_fini(&chunk->achunk);
+}
+
+static int mlxsw_sp2_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+ void *region_priv, void *chunk_priv,
+ void *entry_priv,
+ struct mlxsw_sp_acl_rule_info *rulei)
+{
+ struct mlxsw_sp2_acl_tcam_region *region = region_priv;
+ struct mlxsw_sp2_acl_tcam_chunk *chunk = chunk_priv;
+ struct mlxsw_sp2_acl_tcam_entry *entry = entry_priv;
+
+ entry->act_block = rulei->act_block;
+ return mlxsw_sp_acl_atcam_entry_add(mlxsw_sp, &region->aregion,
+ &chunk->achunk, &entry->aentry,
+ rulei);
+}
+
+static void mlxsw_sp2_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+ void *region_priv, void *chunk_priv,
+ void *entry_priv)
+{
+ struct mlxsw_sp2_acl_tcam_region *region = region_priv;
+ struct mlxsw_sp2_acl_tcam_chunk *chunk = chunk_priv;
+ struct mlxsw_sp2_acl_tcam_entry *entry = entry_priv;
+
+ mlxsw_sp_acl_atcam_entry_del(mlxsw_sp, &region->aregion, &chunk->achunk,
+ &entry->aentry);
+}
+
+static int
+mlxsw_sp2_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
+ void *region_priv, void *entry_priv,
+ bool *activity)
+{
+ struct mlxsw_sp2_acl_tcam_entry *entry = entry_priv;
+
+ return mlxsw_afa_block_activity_get(entry->act_block, activity);
+}
+
+const struct mlxsw_sp_acl_tcam_ops mlxsw_sp2_acl_tcam_ops = {
+ .key_type = MLXSW_REG_PTAR_KEY_TYPE_FLEX2,
+ .priv_size = sizeof(struct mlxsw_sp2_acl_tcam),
+ .init = mlxsw_sp2_acl_tcam_init,
+ .fini = mlxsw_sp2_acl_tcam_fini,
+ .region_priv_size = sizeof(struct mlxsw_sp2_acl_tcam_region),
+ .region_init = mlxsw_sp2_acl_tcam_region_init,
+ .region_fini = mlxsw_sp2_acl_tcam_region_fini,
+ .region_associate = mlxsw_sp2_acl_tcam_region_associate,
+ .chunk_priv_size = sizeof(struct mlxsw_sp2_acl_tcam_chunk),
+ .chunk_init = mlxsw_sp2_acl_tcam_chunk_init,
+ .chunk_fini = mlxsw_sp2_acl_tcam_chunk_fini,
+ .entry_priv_size = sizeof(struct mlxsw_sp2_acl_tcam_entry),
+ .entry_add = mlxsw_sp2_acl_tcam_entry_add,
+ .entry_del = mlxsw_sp2_acl_tcam_entry_del,
+ .entry_activity_get = mlxsw_sp2_acl_tcam_entry_activity_get,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c
new file mode 100644
index 000000000..68c8b148b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+
+#include "spectrum.h"
+#include "core.h"
+#include "reg.h"
+#include "resources.h"
+
+struct mlxsw_sp2_kvdl_part_info {
+ u8 res_type;
+ /* For each defined partititon we need to know how many
+ * usage bits we need and how many indexes there are
+ * represented by a single bit. This could be got from FW
+ * querying appropriate resources. So have the resource
+ * ids for for this purpose in partition definition.
+ */
+ enum mlxsw_res_id usage_bit_count_res_id;
+ enum mlxsw_res_id index_range_res_id;
+};
+
+#define MLXSW_SP2_KVDL_PART_INFO(_entry_type, _res_type, \
+ _usage_bit_count_res_id, _index_range_res_id) \
+[MLXSW_SP_KVDL_ENTRY_TYPE_##_entry_type] = { \
+ .res_type = _res_type, \
+ .usage_bit_count_res_id = MLXSW_RES_ID_##_usage_bit_count_res_id, \
+ .index_range_res_id = MLXSW_RES_ID_##_index_range_res_id, \
+}
+
+static const struct mlxsw_sp2_kvdl_part_info mlxsw_sp2_kvdl_parts_info[] = {
+ MLXSW_SP2_KVDL_PART_INFO(ADJ, 0x21, KVD_SIZE, MAX_KVD_LINEAR_RANGE),
+ MLXSW_SP2_KVDL_PART_INFO(ACTSET, 0x23, MAX_KVD_ACTION_SETS,
+ MAX_KVD_ACTION_SETS),
+ MLXSW_SP2_KVDL_PART_INFO(PBS, 0x24, KVD_SIZE, KVD_SIZE),
+ MLXSW_SP2_KVDL_PART_INFO(MCRIGR, 0x26, KVD_SIZE, KVD_SIZE),
+};
+
+#define MLXSW_SP2_KVDL_PARTS_INFO_LEN ARRAY_SIZE(mlxsw_sp2_kvdl_parts_info)
+
+struct mlxsw_sp2_kvdl_part {
+ const struct mlxsw_sp2_kvdl_part_info *info;
+ unsigned int usage_bit_count;
+ unsigned int indexes_per_usage_bit;
+ unsigned int last_allocated_bit;
+ unsigned long usage[0]; /* Usage bits */
+};
+
+struct mlxsw_sp2_kvdl {
+ struct mlxsw_sp2_kvdl_part *parts[MLXSW_SP2_KVDL_PARTS_INFO_LEN];
+};
+
+static int mlxsw_sp2_kvdl_part_find_zero_bits(struct mlxsw_sp2_kvdl_part *part,
+ unsigned int bit_count,
+ unsigned int *p_bit)
+{
+ unsigned int start_bit;
+ unsigned int bit;
+ unsigned int i;
+ bool wrap = false;
+
+ start_bit = part->last_allocated_bit + 1;
+ if (start_bit == part->usage_bit_count)
+ start_bit = 0;
+ bit = start_bit;
+again:
+ bit = find_next_zero_bit(part->usage, part->usage_bit_count, bit);
+ if (!wrap && bit + bit_count >= part->usage_bit_count) {
+ wrap = true;
+ bit = 0;
+ goto again;
+ }
+ if (wrap && bit + bit_count >= start_bit)
+ return -ENOBUFS;
+ for (i = 0; i < bit_count; i++) {
+ if (test_bit(bit + i, part->usage)) {
+ bit += bit_count;
+ goto again;
+ }
+ }
+ *p_bit = bit;
+ return 0;
+}
+
+static int mlxsw_sp2_kvdl_part_alloc(struct mlxsw_sp2_kvdl_part *part,
+ unsigned int size,
+ u32 *p_kvdl_index)
+{
+ unsigned int bit_count;
+ unsigned int bit;
+ unsigned int i;
+ int err;
+
+ bit_count = DIV_ROUND_UP(size, part->indexes_per_usage_bit);
+ err = mlxsw_sp2_kvdl_part_find_zero_bits(part, bit_count, &bit);
+ if (err)
+ return err;
+ for (i = 0; i < bit_count; i++)
+ __set_bit(bit + i, part->usage);
+ *p_kvdl_index = bit * part->indexes_per_usage_bit;
+ return 0;
+}
+
+static int mlxsw_sp2_kvdl_rec_del(struct mlxsw_sp *mlxsw_sp, u8 res_type,
+ u16 size, u32 kvdl_index)
+{
+ char *iedr_pl;
+ int err;
+
+ iedr_pl = kmalloc(MLXSW_REG_IEDR_LEN, GFP_KERNEL);
+ if (!iedr_pl)
+ return -ENOMEM;
+
+ mlxsw_reg_iedr_pack(iedr_pl);
+ mlxsw_reg_iedr_rec_pack(iedr_pl, 0, res_type, size, kvdl_index);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(iedr), iedr_pl);
+ kfree(iedr_pl);
+ return err;
+}
+
+static void mlxsw_sp2_kvdl_part_free(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp2_kvdl_part *part,
+ unsigned int size, u32 kvdl_index)
+{
+ unsigned int bit_count;
+ unsigned int bit;
+ unsigned int i;
+ int err;
+
+ /* We need to ask FW to delete previously used KVD linear index */
+ err = mlxsw_sp2_kvdl_rec_del(mlxsw_sp, part->info->res_type,
+ size, kvdl_index);
+ if (err)
+ return;
+
+ bit_count = DIV_ROUND_UP(size, part->indexes_per_usage_bit);
+ bit = kvdl_index / part->indexes_per_usage_bit;
+ for (i = 0; i < bit_count; i++)
+ __clear_bit(bit + i, part->usage);
+}
+
+static int mlxsw_sp2_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, void *priv,
+ enum mlxsw_sp_kvdl_entry_type type,
+ unsigned int entry_count,
+ u32 *p_entry_index)
+{
+ unsigned int size = entry_count * mlxsw_sp_kvdl_entry_size(type);
+ struct mlxsw_sp2_kvdl *kvdl = priv;
+ struct mlxsw_sp2_kvdl_part *part = kvdl->parts[type];
+
+ return mlxsw_sp2_kvdl_part_alloc(part, size, p_entry_index);
+}
+
+static void mlxsw_sp2_kvdl_free(struct mlxsw_sp *mlxsw_sp, void *priv,
+ enum mlxsw_sp_kvdl_entry_type type,
+ unsigned int entry_count,
+ int entry_index)
+{
+ unsigned int size = entry_count * mlxsw_sp_kvdl_entry_size(type);
+ struct mlxsw_sp2_kvdl *kvdl = priv;
+ struct mlxsw_sp2_kvdl_part *part = kvdl->parts[type];
+
+ return mlxsw_sp2_kvdl_part_free(mlxsw_sp, part, size, entry_index);
+}
+
+static int mlxsw_sp2_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
+ void *priv,
+ enum mlxsw_sp_kvdl_entry_type type,
+ unsigned int entry_count,
+ unsigned int *p_alloc_count)
+{
+ *p_alloc_count = entry_count;
+ return 0;
+}
+
+static struct mlxsw_sp2_kvdl_part *
+mlxsw_sp2_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp2_kvdl_part_info *info)
+{
+ unsigned int indexes_per_usage_bit;
+ struct mlxsw_sp2_kvdl_part *part;
+ unsigned int index_range;
+ unsigned int usage_bit_count;
+ size_t usage_size;
+
+ if (!mlxsw_core_res_valid(mlxsw_sp->core,
+ info->usage_bit_count_res_id) ||
+ !mlxsw_core_res_valid(mlxsw_sp->core,
+ info->index_range_res_id))
+ return ERR_PTR(-EIO);
+ usage_bit_count = mlxsw_core_res_get(mlxsw_sp->core,
+ info->usage_bit_count_res_id);
+ index_range = mlxsw_core_res_get(mlxsw_sp->core,
+ info->index_range_res_id);
+
+ /* For some partitions, one usage bit represents a group of indexes.
+ * That's why we compute the number of indexes per usage bit here,
+ * according to queried resources.
+ */
+ indexes_per_usage_bit = index_range / usage_bit_count;
+
+ usage_size = BITS_TO_LONGS(usage_bit_count) * sizeof(unsigned long);
+ part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
+ if (!part)
+ return ERR_PTR(-ENOMEM);
+ part->info = info;
+ part->usage_bit_count = usage_bit_count;
+ part->indexes_per_usage_bit = indexes_per_usage_bit;
+ part->last_allocated_bit = usage_bit_count - 1;
+ return part;
+}
+
+static void mlxsw_sp2_kvdl_part_fini(struct mlxsw_sp2_kvdl_part *part)
+{
+ kfree(part);
+}
+
+static int mlxsw_sp2_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp2_kvdl *kvdl)
+{
+ const struct mlxsw_sp2_kvdl_part_info *info;
+ int i;
+ int err;
+
+ for (i = 0; i < MLXSW_SP2_KVDL_PARTS_INFO_LEN; i++) {
+ info = &mlxsw_sp2_kvdl_parts_info[i];
+ kvdl->parts[i] = mlxsw_sp2_kvdl_part_init(mlxsw_sp, info);
+ if (IS_ERR(kvdl->parts[i])) {
+ err = PTR_ERR(kvdl->parts[i]);
+ goto err_kvdl_part_init;
+ }
+ }
+ return 0;
+
+err_kvdl_part_init:
+ for (i--; i >= 0; i--)
+ mlxsw_sp2_kvdl_part_fini(kvdl->parts[i]);
+ return err;
+}
+
+static void mlxsw_sp2_kvdl_parts_fini(struct mlxsw_sp2_kvdl *kvdl)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_SP2_KVDL_PARTS_INFO_LEN; i++)
+ mlxsw_sp2_kvdl_part_fini(kvdl->parts[i]);
+}
+
+static int mlxsw_sp2_kvdl_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+ struct mlxsw_sp2_kvdl *kvdl = priv;
+
+ return mlxsw_sp2_kvdl_parts_init(mlxsw_sp, kvdl);
+}
+
+static void mlxsw_sp2_kvdl_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+ struct mlxsw_sp2_kvdl *kvdl = priv;
+
+ mlxsw_sp2_kvdl_parts_fini(kvdl);
+}
+
+const struct mlxsw_sp_kvdl_ops mlxsw_sp2_kvdl_ops = {
+ .priv_size = sizeof(struct mlxsw_sp2_kvdl),
+ .init = mlxsw_sp2_kvdl_init,
+ .fini = mlxsw_sp2_kvdl_fini,
+ .alloc = mlxsw_sp2_kvdl_alloc,
+ .free = mlxsw_sp2_kvdl_free,
+ .alloc_size_query = mlxsw_sp2_kvdl_alloc_size_query,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c
new file mode 100644
index 000000000..4dd624781
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+
+#include "core_acl_flex_actions.h"
+#include "spectrum.h"
+#include "spectrum_mr.h"
+
+static int
+mlxsw_sp2_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv,
+ void *route_priv,
+ struct mlxsw_sp_mr_route_key *key,
+ struct mlxsw_afa_block *afa_block,
+ enum mlxsw_sp_mr_route_prio prio)
+{
+ return 0;
+}
+
+static void
+mlxsw_sp2_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp, void *priv,
+ void *route_priv,
+ struct mlxsw_sp_mr_route_key *key)
+{
+}
+
+static int
+mlxsw_sp2_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp,
+ void *route_priv,
+ struct mlxsw_sp_mr_route_key *key,
+ struct mlxsw_afa_block *afa_block)
+{
+ return 0;
+}
+
+static int mlxsw_sp2_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+ return 0;
+}
+
+static void mlxsw_sp2_mr_tcam_fini(void *priv)
+{
+}
+
+const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops = {
+ .init = mlxsw_sp2_mr_tcam_init,
+ .fini = mlxsw_sp2_mr_tcam_fini,
+ .route_create = mlxsw_sp2_mr_tcam_route_create,
+ .route_destroy = mlxsw_sp2_mr_tcam_route_destroy,
+ .route_update = mlxsw_sp2_mr_tcam_route_update,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
new file mode 100644
index 000000000..c99f5542d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -0,0 +1,891 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/string.h>
+#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
+#include <net/net_namespace.h>
+#include <net/tc_act/tc_vlan.h>
+
+#include "reg.h"
+#include "core.h"
+#include "resources.h"
+#include "spectrum.h"
+#include "core_acl_flex_keys.h"
+#include "core_acl_flex_actions.h"
+#include "spectrum_acl_tcam.h"
+
+struct mlxsw_sp_acl {
+ struct mlxsw_sp *mlxsw_sp;
+ struct mlxsw_afk *afk;
+ struct mlxsw_sp_fid *dummy_fid;
+ struct rhashtable ruleset_ht;
+ struct list_head rules;
+ struct {
+ struct delayed_work dw;
+ unsigned long interval; /* ms */
+#define MLXSW_SP_ACL_RULE_ACTIVITY_UPDATE_PERIOD_MS 1000
+ } rule_activity_update;
+ struct mlxsw_sp_acl_tcam tcam;
+};
+
+struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl)
+{
+ return acl->afk;
+}
+
+struct mlxsw_sp_acl_block_binding {
+ struct list_head list;
+ struct net_device *dev;
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ bool ingress;
+};
+
+struct mlxsw_sp_acl_block {
+ struct list_head binding_list;
+ struct mlxsw_sp_acl_ruleset *ruleset_zero;
+ struct mlxsw_sp *mlxsw_sp;
+ unsigned int rule_count;
+ unsigned int disable_count;
+};
+
+struct mlxsw_sp_acl_ruleset_ht_key {
+ struct mlxsw_sp_acl_block *block;
+ u32 chain_index;
+ const struct mlxsw_sp_acl_profile_ops *ops;
+};
+
+struct mlxsw_sp_acl_ruleset {
+ struct rhash_head ht_node; /* Member of acl HT */
+ struct mlxsw_sp_acl_ruleset_ht_key ht_key;
+ struct rhashtable rule_ht;
+ unsigned int ref_count;
+ unsigned long priv[0];
+ /* priv has to be always the last item */
+};
+
+struct mlxsw_sp_acl_rule {
+ struct rhash_head ht_node; /* Member of rule HT */
+ struct list_head list;
+ unsigned long cookie; /* HT key */
+ struct mlxsw_sp_acl_ruleset *ruleset;
+ struct mlxsw_sp_acl_rule_info *rulei;
+ u64 last_used;
+ u64 last_packets;
+ u64 last_bytes;
+ unsigned long priv[0];
+ /* priv has to be always the last item */
+};
+
+static const struct rhashtable_params mlxsw_sp_acl_ruleset_ht_params = {
+ .key_len = sizeof(struct mlxsw_sp_acl_ruleset_ht_key),
+ .key_offset = offsetof(struct mlxsw_sp_acl_ruleset, ht_key),
+ .head_offset = offsetof(struct mlxsw_sp_acl_ruleset, ht_node),
+ .automatic_shrinking = true,
+};
+
+static const struct rhashtable_params mlxsw_sp_acl_rule_ht_params = {
+ .key_len = sizeof(unsigned long),
+ .key_offset = offsetof(struct mlxsw_sp_acl_rule, cookie),
+ .head_offset = offsetof(struct mlxsw_sp_acl_rule, ht_node),
+ .automatic_shrinking = true,
+};
+
+struct mlxsw_sp_fid *mlxsw_sp_acl_dummy_fid(struct mlxsw_sp *mlxsw_sp)
+{
+ return mlxsw_sp->acl->dummy_fid;
+}
+
+struct mlxsw_sp *mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block)
+{
+ return block->mlxsw_sp;
+}
+
+unsigned int mlxsw_sp_acl_block_rule_count(struct mlxsw_sp_acl_block *block)
+{
+ return block ? block->rule_count : 0;
+}
+
+void mlxsw_sp_acl_block_disable_inc(struct mlxsw_sp_acl_block *block)
+{
+ if (block)
+ block->disable_count++;
+}
+
+void mlxsw_sp_acl_block_disable_dec(struct mlxsw_sp_acl_block *block)
+{
+ if (block)
+ block->disable_count--;
+}
+
+bool mlxsw_sp_acl_block_disabled(struct mlxsw_sp_acl_block *block)
+{
+ return block->disable_count;
+}
+
+bool mlxsw_sp_acl_block_is_egress_bound(struct mlxsw_sp_acl_block *block)
+{
+ struct mlxsw_sp_acl_block_binding *binding;
+
+ list_for_each_entry(binding, &block->binding_list, list) {
+ if (!binding->ingress)
+ return true;
+ }
+ return false;
+}
+
+static bool
+mlxsw_sp_acl_ruleset_is_singular(const struct mlxsw_sp_acl_ruleset *ruleset)
+{
+ /* We hold a reference on ruleset ourselves */
+ return ruleset->ref_count == 2;
+}
+
+static int
+mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct mlxsw_sp_acl_block_binding *binding)
+{
+ struct mlxsw_sp_acl_ruleset *ruleset = block->ruleset_zero;
+ const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
+
+ return ops->ruleset_bind(mlxsw_sp, ruleset->priv,
+ binding->mlxsw_sp_port, binding->ingress);
+}
+
+static void
+mlxsw_sp_acl_ruleset_unbind(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct mlxsw_sp_acl_block_binding *binding)
+{
+ struct mlxsw_sp_acl_ruleset *ruleset = block->ruleset_zero;
+ const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
+
+ ops->ruleset_unbind(mlxsw_sp, ruleset->priv,
+ binding->mlxsw_sp_port, binding->ingress);
+}
+
+static bool mlxsw_sp_acl_ruleset_block_bound(struct mlxsw_sp_acl_block *block)
+{
+ return block->ruleset_zero;
+}
+
+static int
+mlxsw_sp_acl_ruleset_block_bind(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ruleset *ruleset,
+ struct mlxsw_sp_acl_block *block)
+{
+ struct mlxsw_sp_acl_block_binding *binding;
+ int err;
+
+ block->ruleset_zero = ruleset;
+ list_for_each_entry(binding, &block->binding_list, list) {
+ err = mlxsw_sp_acl_ruleset_bind(mlxsw_sp, block, binding);
+ if (err)
+ goto rollback;
+ }
+ return 0;
+
+rollback:
+ list_for_each_entry_continue_reverse(binding, &block->binding_list,
+ list)
+ mlxsw_sp_acl_ruleset_unbind(mlxsw_sp, block, binding);
+ block->ruleset_zero = NULL;
+
+ return err;
+}
+
+static void
+mlxsw_sp_acl_ruleset_block_unbind(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ruleset *ruleset,
+ struct mlxsw_sp_acl_block *block)
+{
+ struct mlxsw_sp_acl_block_binding *binding;
+
+ list_for_each_entry(binding, &block->binding_list, list)
+ mlxsw_sp_acl_ruleset_unbind(mlxsw_sp, block, binding);
+ block->ruleset_zero = NULL;
+}
+
+struct mlxsw_sp_acl_block *mlxsw_sp_acl_block_create(struct mlxsw_sp *mlxsw_sp,
+ struct net *net)
+{
+ struct mlxsw_sp_acl_block *block;
+
+ block = kzalloc(sizeof(*block), GFP_KERNEL);
+ if (!block)
+ return NULL;
+ INIT_LIST_HEAD(&block->binding_list);
+ block->mlxsw_sp = mlxsw_sp;
+ return block;
+}
+
+void mlxsw_sp_acl_block_destroy(struct mlxsw_sp_acl_block *block)
+{
+ WARN_ON(!list_empty(&block->binding_list));
+ kfree(block);
+}
+
+static struct mlxsw_sp_acl_block_binding *
+mlxsw_sp_acl_block_lookup(struct mlxsw_sp_acl_block *block,
+ struct mlxsw_sp_port *mlxsw_sp_port, bool ingress)
+{
+ struct mlxsw_sp_acl_block_binding *binding;
+
+ list_for_each_entry(binding, &block->binding_list, list)
+ if (binding->mlxsw_sp_port == mlxsw_sp_port &&
+ binding->ingress == ingress)
+ return binding;
+ return NULL;
+}
+
+int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ bool ingress)
+{
+ struct mlxsw_sp_acl_block_binding *binding;
+ int err;
+
+ if (WARN_ON(mlxsw_sp_acl_block_lookup(block, mlxsw_sp_port, ingress)))
+ return -EEXIST;
+
+ binding = kzalloc(sizeof(*binding), GFP_KERNEL);
+ if (!binding)
+ return -ENOMEM;
+ binding->mlxsw_sp_port = mlxsw_sp_port;
+ binding->ingress = ingress;
+
+ if (mlxsw_sp_acl_ruleset_block_bound(block)) {
+ err = mlxsw_sp_acl_ruleset_bind(mlxsw_sp, block, binding);
+ if (err)
+ goto err_ruleset_bind;
+ }
+
+ list_add(&binding->list, &block->binding_list);
+ return 0;
+
+err_ruleset_bind:
+ kfree(binding);
+ return err;
+}
+
+int mlxsw_sp_acl_block_unbind(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ bool ingress)
+{
+ struct mlxsw_sp_acl_block_binding *binding;
+
+ binding = mlxsw_sp_acl_block_lookup(block, mlxsw_sp_port, ingress);
+ if (!binding)
+ return -ENOENT;
+
+ list_del(&binding->list);
+
+ if (mlxsw_sp_acl_ruleset_block_bound(block))
+ mlxsw_sp_acl_ruleset_unbind(mlxsw_sp, block, binding);
+
+ kfree(binding);
+ return 0;
+}
+
+static struct mlxsw_sp_acl_ruleset *
+mlxsw_sp_acl_ruleset_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block, u32 chain_index,
+ const struct mlxsw_sp_acl_profile_ops *ops,
+ struct mlxsw_afk_element_usage *tmplt_elusage)
+{
+ struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
+ struct mlxsw_sp_acl_ruleset *ruleset;
+ size_t alloc_size;
+ int err;
+
+ alloc_size = sizeof(*ruleset) + ops->ruleset_priv_size;
+ ruleset = kzalloc(alloc_size, GFP_KERNEL);
+ if (!ruleset)
+ return ERR_PTR(-ENOMEM);
+ ruleset->ref_count = 1;
+ ruleset->ht_key.block = block;
+ ruleset->ht_key.chain_index = chain_index;
+ ruleset->ht_key.ops = ops;
+
+ err = rhashtable_init(&ruleset->rule_ht, &mlxsw_sp_acl_rule_ht_params);
+ if (err)
+ goto err_rhashtable_init;
+
+ err = ops->ruleset_add(mlxsw_sp, &acl->tcam, ruleset->priv,
+ tmplt_elusage);
+ if (err)
+ goto err_ops_ruleset_add;
+
+ err = rhashtable_insert_fast(&acl->ruleset_ht, &ruleset->ht_node,
+ mlxsw_sp_acl_ruleset_ht_params);
+ if (err)
+ goto err_ht_insert;
+
+ return ruleset;
+
+err_ht_insert:
+ ops->ruleset_del(mlxsw_sp, ruleset->priv);
+err_ops_ruleset_add:
+ rhashtable_destroy(&ruleset->rule_ht);
+err_rhashtable_init:
+ kfree(ruleset);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_sp_acl_ruleset_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ruleset *ruleset)
+{
+ const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
+ struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
+
+ rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node,
+ mlxsw_sp_acl_ruleset_ht_params);
+ ops->ruleset_del(mlxsw_sp, ruleset->priv);
+ rhashtable_destroy(&ruleset->rule_ht);
+ kfree(ruleset);
+}
+
+static void mlxsw_sp_acl_ruleset_ref_inc(struct mlxsw_sp_acl_ruleset *ruleset)
+{
+ ruleset->ref_count++;
+}
+
+static void mlxsw_sp_acl_ruleset_ref_dec(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ruleset *ruleset)
+{
+ if (--ruleset->ref_count)
+ return;
+ mlxsw_sp_acl_ruleset_destroy(mlxsw_sp, ruleset);
+}
+
+static struct mlxsw_sp_acl_ruleset *
+__mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp_acl *acl,
+ struct mlxsw_sp_acl_block *block, u32 chain_index,
+ const struct mlxsw_sp_acl_profile_ops *ops)
+{
+ struct mlxsw_sp_acl_ruleset_ht_key ht_key;
+
+ memset(&ht_key, 0, sizeof(ht_key));
+ ht_key.block = block;
+ ht_key.chain_index = chain_index;
+ ht_key.ops = ops;
+ return rhashtable_lookup_fast(&acl->ruleset_ht, &ht_key,
+ mlxsw_sp_acl_ruleset_ht_params);
+}
+
+struct mlxsw_sp_acl_ruleset *
+mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block, u32 chain_index,
+ enum mlxsw_sp_acl_profile profile)
+{
+ const struct mlxsw_sp_acl_profile_ops *ops;
+ struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
+ struct mlxsw_sp_acl_ruleset *ruleset;
+
+ ops = mlxsw_sp_acl_tcam_profile_ops(mlxsw_sp, profile);
+ if (!ops)
+ return ERR_PTR(-EINVAL);
+ ruleset = __mlxsw_sp_acl_ruleset_lookup(acl, block, chain_index, ops);
+ if (!ruleset)
+ return ERR_PTR(-ENOENT);
+ return ruleset;
+}
+
+struct mlxsw_sp_acl_ruleset *
+mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block, u32 chain_index,
+ enum mlxsw_sp_acl_profile profile,
+ struct mlxsw_afk_element_usage *tmplt_elusage)
+{
+ const struct mlxsw_sp_acl_profile_ops *ops;
+ struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
+ struct mlxsw_sp_acl_ruleset *ruleset;
+
+ ops = mlxsw_sp_acl_tcam_profile_ops(mlxsw_sp, profile);
+ if (!ops)
+ return ERR_PTR(-EINVAL);
+
+ ruleset = __mlxsw_sp_acl_ruleset_lookup(acl, block, chain_index, ops);
+ if (ruleset) {
+ mlxsw_sp_acl_ruleset_ref_inc(ruleset);
+ return ruleset;
+ }
+ return mlxsw_sp_acl_ruleset_create(mlxsw_sp, block, chain_index, ops,
+ tmplt_elusage);
+}
+
+void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ruleset *ruleset)
+{
+ mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset);
+}
+
+u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset)
+{
+ const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
+
+ return ops->ruleset_group_id(ruleset->priv);
+}
+
+struct mlxsw_sp_acl_rule_info *
+mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl)
+{
+ struct mlxsw_sp_acl_rule_info *rulei;
+ int err;
+
+ rulei = kzalloc(sizeof(*rulei), GFP_KERNEL);
+ if (!rulei)
+ return ERR_PTR(-ENOMEM);
+
+ rulei->act_block = mlxsw_afa_block_create(acl->mlxsw_sp->afa);
+ if (IS_ERR(rulei->act_block)) {
+ err = PTR_ERR(rulei->act_block);
+ goto err_afa_block_create;
+ }
+ return rulei;
+
+err_afa_block_create:
+ kfree(rulei);
+ return ERR_PTR(err);
+}
+
+void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp_acl_rule_info *rulei)
+{
+ mlxsw_afa_block_destroy(rulei->act_block);
+ kfree(rulei);
+}
+
+int mlxsw_sp_acl_rulei_commit(struct mlxsw_sp_acl_rule_info *rulei)
+{
+ return mlxsw_afa_block_commit(rulei->act_block);
+}
+
+void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei,
+ unsigned int priority)
+{
+ rulei->priority = priority >> 16;
+}
+
+void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei,
+ enum mlxsw_afk_element element,
+ u32 key_value, u32 mask_value)
+{
+ mlxsw_afk_values_add_u32(&rulei->values, element,
+ key_value, mask_value);
+}
+
+void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei,
+ enum mlxsw_afk_element element,
+ const char *key_value,
+ const char *mask_value, unsigned int len)
+{
+ mlxsw_afk_values_add_buf(&rulei->values, element,
+ key_value, mask_value, len);
+}
+
+int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei)
+{
+ return mlxsw_afa_block_continue(rulei->act_block);
+}
+
+int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
+ u16 group_id)
+{
+ return mlxsw_afa_block_jump(rulei->act_block, group_id);
+}
+
+int mlxsw_sp_acl_rulei_act_terminate(struct mlxsw_sp_acl_rule_info *rulei)
+{
+ return mlxsw_afa_block_terminate(rulei->act_block);
+}
+
+int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei)
+{
+ return mlxsw_afa_block_append_drop(rulei->act_block);
+}
+
+int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei)
+{
+ return mlxsw_afa_block_append_trap(rulei->act_block,
+ MLXSW_TRAP_ID_ACL0);
+}
+
+int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ struct net_device *out_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ u8 local_port;
+ bool in_port;
+
+ if (out_dev) {
+ if (!mlxsw_sp_port_dev_check(out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid output device");
+ return -EINVAL;
+ }
+ mlxsw_sp_port = netdev_priv(out_dev);
+ if (mlxsw_sp_port->mlxsw_sp != mlxsw_sp) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid output device");
+ return -EINVAL;
+ }
+ local_port = mlxsw_sp_port->local_port;
+ in_port = false;
+ } else {
+ /* If out_dev is NULL, the caller wants to
+ * set forward to ingress port.
+ */
+ local_port = 0;
+ in_port = true;
+ }
+ return mlxsw_afa_block_append_fwd(rulei->act_block,
+ local_port, in_port, extack);
+}
+
+int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ struct mlxsw_sp_acl_block *block,
+ struct net_device *out_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_acl_block_binding *binding;
+ struct mlxsw_sp_port *in_port;
+
+ if (!list_is_singular(&block->binding_list)) {
+ NL_SET_ERR_MSG_MOD(extack, "Only a single mirror source is allowed");
+ return -EOPNOTSUPP;
+ }
+ binding = list_first_entry(&block->binding_list,
+ struct mlxsw_sp_acl_block_binding, list);
+ in_port = binding->mlxsw_sp_port;
+
+ return mlxsw_afa_block_append_mirror(rulei->act_block,
+ in_port->local_port,
+ out_dev,
+ binding->ingress,
+ extack);
+}
+
+int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ u32 action, u16 vid, u16 proto, u8 prio,
+ struct netlink_ext_ack *extack)
+{
+ u8 ethertype;
+
+ if (action == TCA_VLAN_ACT_MODIFY) {
+ switch (proto) {
+ case ETH_P_8021Q:
+ ethertype = 0;
+ break;
+ case ETH_P_8021AD:
+ ethertype = 1;
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported VLAN protocol");
+ dev_err(mlxsw_sp->bus_info->dev, "Unsupported VLAN protocol %#04x\n",
+ proto);
+ return -EINVAL;
+ }
+
+ return mlxsw_afa_block_append_vlan_modify(rulei->act_block,
+ vid, prio, ethertype,
+ extack);
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported VLAN action");
+ dev_err(mlxsw_sp->bus_info->dev, "Unsupported VLAN action\n");
+ return -EINVAL;
+ }
+}
+
+int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ struct netlink_ext_ack *extack)
+{
+ return mlxsw_afa_block_append_counter(rulei->act_block,
+ &rulei->counter_index, extack);
+}
+
+int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ u16 fid, struct netlink_ext_ack *extack)
+{
+ return mlxsw_afa_block_append_fid_set(rulei->act_block, fid, extack);
+}
+
+struct mlxsw_sp_acl_rule *
+mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ruleset *ruleset,
+ unsigned long cookie,
+ struct netlink_ext_ack *extack)
+{
+ const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
+ struct mlxsw_sp_acl_rule *rule;
+ int err;
+
+ mlxsw_sp_acl_ruleset_ref_inc(ruleset);
+ rule = kzalloc(sizeof(*rule) + ops->rule_priv_size(mlxsw_sp),
+ GFP_KERNEL);
+ if (!rule) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
+ rule->cookie = cookie;
+ rule->ruleset = ruleset;
+
+ rule->rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl);
+ if (IS_ERR(rule->rulei)) {
+ err = PTR_ERR(rule->rulei);
+ goto err_rulei_create;
+ }
+
+ return rule;
+
+err_rulei_create:
+ kfree(rule);
+err_alloc:
+ mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset);
+ return ERR_PTR(err);
+}
+
+void mlxsw_sp_acl_rule_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule *rule)
+{
+ struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
+
+ mlxsw_sp_acl_rulei_destroy(rule->rulei);
+ kfree(rule);
+ mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset);
+}
+
+int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule *rule)
+{
+ struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
+ const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
+ int err;
+
+ err = ops->rule_add(mlxsw_sp, ruleset->priv, rule->priv, rule->rulei);
+ if (err)
+ return err;
+
+ err = rhashtable_insert_fast(&ruleset->rule_ht, &rule->ht_node,
+ mlxsw_sp_acl_rule_ht_params);
+ if (err)
+ goto err_rhashtable_insert;
+
+ if (!ruleset->ht_key.chain_index &&
+ mlxsw_sp_acl_ruleset_is_singular(ruleset)) {
+ /* We only need ruleset with chain index 0, the implicit
+ * one, to be directly bound to device. The rest of the
+ * rulesets are bound by "Goto action set".
+ */
+ err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset,
+ ruleset->ht_key.block);
+ if (err)
+ goto err_ruleset_block_bind;
+ }
+
+ list_add_tail(&rule->list, &mlxsw_sp->acl->rules);
+ ruleset->ht_key.block->rule_count++;
+ return 0;
+
+err_ruleset_block_bind:
+ rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node,
+ mlxsw_sp_acl_rule_ht_params);
+err_rhashtable_insert:
+ ops->rule_del(mlxsw_sp, rule->priv);
+ return err;
+}
+
+void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule *rule)
+{
+ struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
+ const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
+
+ ruleset->ht_key.block->rule_count--;
+ list_del(&rule->list);
+ if (!ruleset->ht_key.chain_index &&
+ mlxsw_sp_acl_ruleset_is_singular(ruleset))
+ mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset,
+ ruleset->ht_key.block);
+ rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node,
+ mlxsw_sp_acl_rule_ht_params);
+ ops->rule_del(mlxsw_sp, rule->priv);
+}
+
+struct mlxsw_sp_acl_rule *
+mlxsw_sp_acl_rule_lookup(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ruleset *ruleset,
+ unsigned long cookie)
+{
+ return rhashtable_lookup_fast(&ruleset->rule_ht, &cookie,
+ mlxsw_sp_acl_rule_ht_params);
+}
+
+struct mlxsw_sp_acl_rule_info *
+mlxsw_sp_acl_rule_rulei(struct mlxsw_sp_acl_rule *rule)
+{
+ return rule->rulei;
+}
+
+static int mlxsw_sp_acl_rule_activity_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule *rule)
+{
+ struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
+ const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
+ bool active;
+ int err;
+
+ err = ops->rule_activity_get(mlxsw_sp, rule->priv, &active);
+ if (err)
+ return err;
+ if (active)
+ rule->last_used = jiffies;
+ return 0;
+}
+
+static int mlxsw_sp_acl_rules_activity_update(struct mlxsw_sp_acl *acl)
+{
+ struct mlxsw_sp_acl_rule *rule;
+ int err;
+
+ /* Protect internal structures from changes */
+ rtnl_lock();
+ list_for_each_entry(rule, &acl->rules, list) {
+ err = mlxsw_sp_acl_rule_activity_update(acl->mlxsw_sp,
+ rule);
+ if (err)
+ goto err_rule_update;
+ }
+ rtnl_unlock();
+ return 0;
+
+err_rule_update:
+ rtnl_unlock();
+ return err;
+}
+
+static void mlxsw_sp_acl_rule_activity_work_schedule(struct mlxsw_sp_acl *acl)
+{
+ unsigned long interval = acl->rule_activity_update.interval;
+
+ mlxsw_core_schedule_dw(&acl->rule_activity_update.dw,
+ msecs_to_jiffies(interval));
+}
+
+static void mlxsw_sp_acl_rul_activity_update_work(struct work_struct *work)
+{
+ struct mlxsw_sp_acl *acl = container_of(work, struct mlxsw_sp_acl,
+ rule_activity_update.dw.work);
+ int err;
+
+ err = mlxsw_sp_acl_rules_activity_update(acl);
+ if (err)
+ dev_err(acl->mlxsw_sp->bus_info->dev, "Could not update acl activity");
+
+ mlxsw_sp_acl_rule_activity_work_schedule(acl);
+}
+
+int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule *rule,
+ u64 *packets, u64 *bytes, u64 *last_use)
+
+{
+ struct mlxsw_sp_acl_rule_info *rulei;
+ u64 current_packets;
+ u64 current_bytes;
+ int err;
+
+ rulei = mlxsw_sp_acl_rule_rulei(rule);
+ err = mlxsw_sp_flow_counter_get(mlxsw_sp, rulei->counter_index,
+ &current_packets, &current_bytes);
+ if (err)
+ return err;
+
+ *packets = current_packets - rule->last_packets;
+ *bytes = current_bytes - rule->last_bytes;
+ *last_use = rule->last_used;
+
+ rule->last_bytes = current_bytes;
+ rule->last_packets = current_packets;
+
+ return 0;
+}
+
+int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_fid *fid;
+ struct mlxsw_sp_acl *acl;
+ size_t alloc_size;
+ int err;
+
+ alloc_size = sizeof(*acl) + mlxsw_sp_acl_tcam_priv_size(mlxsw_sp);
+ acl = kzalloc(alloc_size, GFP_KERNEL);
+ if (!acl)
+ return -ENOMEM;
+ mlxsw_sp->acl = acl;
+ acl->mlxsw_sp = mlxsw_sp;
+ acl->afk = mlxsw_afk_create(MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ ACL_FLEX_KEYS),
+ mlxsw_sp->afk_ops);
+ if (!acl->afk) {
+ err = -ENOMEM;
+ goto err_afk_create;
+ }
+
+ err = rhashtable_init(&acl->ruleset_ht,
+ &mlxsw_sp_acl_ruleset_ht_params);
+ if (err)
+ goto err_rhashtable_init;
+
+ fid = mlxsw_sp_fid_dummy_get(mlxsw_sp);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ goto err_fid_get;
+ }
+ acl->dummy_fid = fid;
+
+ INIT_LIST_HEAD(&acl->rules);
+ err = mlxsw_sp_acl_tcam_init(mlxsw_sp, &acl->tcam);
+ if (err)
+ goto err_acl_ops_init;
+
+ /* Create the delayed work for the rule activity_update */
+ INIT_DELAYED_WORK(&acl->rule_activity_update.dw,
+ mlxsw_sp_acl_rul_activity_update_work);
+ acl->rule_activity_update.interval = MLXSW_SP_ACL_RULE_ACTIVITY_UPDATE_PERIOD_MS;
+ mlxsw_core_schedule_dw(&acl->rule_activity_update.dw, 0);
+ return 0;
+
+err_acl_ops_init:
+ mlxsw_sp_fid_put(fid);
+err_fid_get:
+ rhashtable_destroy(&acl->ruleset_ht);
+err_rhashtable_init:
+ mlxsw_afk_destroy(acl->afk);
+err_afk_create:
+ kfree(acl);
+ return err;
+}
+
+void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
+
+ cancel_delayed_work_sync(&mlxsw_sp->acl->rule_activity_update.dw);
+ mlxsw_sp_acl_tcam_fini(mlxsw_sp, &acl->tcam);
+ WARN_ON(!list_empty(&acl->rules));
+ mlxsw_sp_fid_put(acl->dummy_fid);
+ rhashtable_destroy(&acl->ruleset_ht);
+ mlxsw_afk_destroy(acl->afk);
+ kfree(acl);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c
new file mode 100644
index 000000000..2dda028f9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c
@@ -0,0 +1,536 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/refcount.h>
+#include <linux/rhashtable.h>
+
+#include "reg.h"
+#include "core.h"
+#include "spectrum.h"
+#include "spectrum_acl_tcam.h"
+#include "core_acl_flex_keys.h"
+
+#define MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_START 6
+#define MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_END 11
+
+struct mlxsw_sp_acl_atcam_lkey_id_ht_key {
+ char enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* MSB blocks */
+ u8 erp_id;
+};
+
+struct mlxsw_sp_acl_atcam_lkey_id {
+ struct rhash_head ht_node;
+ struct mlxsw_sp_acl_atcam_lkey_id_ht_key ht_key;
+ refcount_t refcnt;
+ u32 id;
+};
+
+struct mlxsw_sp_acl_atcam_region_ops {
+ int (*init)(struct mlxsw_sp_acl_atcam_region *aregion);
+ void (*fini)(struct mlxsw_sp_acl_atcam_region *aregion);
+ struct mlxsw_sp_acl_atcam_lkey_id *
+ (*lkey_id_get)(struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_rule_info *rulei, u8 erp_id);
+ void (*lkey_id_put)(struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_lkey_id *lkey_id);
+};
+
+struct mlxsw_sp_acl_atcam_region_generic {
+ struct mlxsw_sp_acl_atcam_lkey_id dummy_lkey_id;
+};
+
+struct mlxsw_sp_acl_atcam_region_12kb {
+ struct rhashtable lkey_ht;
+ unsigned int max_lkey_id;
+ unsigned long *used_lkey_id;
+};
+
+static const struct rhashtable_params mlxsw_sp_acl_atcam_lkey_id_ht_params = {
+ .key_len = sizeof(struct mlxsw_sp_acl_atcam_lkey_id_ht_key),
+ .key_offset = offsetof(struct mlxsw_sp_acl_atcam_lkey_id, ht_key),
+ .head_offset = offsetof(struct mlxsw_sp_acl_atcam_lkey_id, ht_node),
+};
+
+static const struct rhashtable_params mlxsw_sp_acl_atcam_entries_ht_params = {
+ .key_len = sizeof(struct mlxsw_sp_acl_atcam_entry_ht_key),
+ .key_offset = offsetof(struct mlxsw_sp_acl_atcam_entry, ht_key),
+ .head_offset = offsetof(struct mlxsw_sp_acl_atcam_entry, ht_node),
+};
+
+static bool
+mlxsw_sp_acl_atcam_is_centry(const struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+ return mlxsw_sp_acl_erp_is_ctcam_erp(aentry->erp);
+}
+
+static int
+mlxsw_sp_acl_atcam_region_generic_init(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+ struct mlxsw_sp_acl_atcam_region_generic *region_generic;
+
+ region_generic = kzalloc(sizeof(*region_generic), GFP_KERNEL);
+ if (!region_generic)
+ return -ENOMEM;
+
+ refcount_set(&region_generic->dummy_lkey_id.refcnt, 1);
+ aregion->priv = region_generic;
+
+ return 0;
+}
+
+static void
+mlxsw_sp_acl_atcam_region_generic_fini(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+ kfree(aregion->priv);
+}
+
+static struct mlxsw_sp_acl_atcam_lkey_id *
+mlxsw_sp_acl_atcam_generic_lkey_id_get(struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ u8 erp_id)
+{
+ struct mlxsw_sp_acl_atcam_region_generic *region_generic;
+
+ region_generic = aregion->priv;
+ return &region_generic->dummy_lkey_id;
+}
+
+static void
+mlxsw_sp_acl_atcam_generic_lkey_id_put(struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_lkey_id *lkey_id)
+{
+}
+
+static const struct mlxsw_sp_acl_atcam_region_ops
+mlxsw_sp_acl_atcam_region_generic_ops = {
+ .init = mlxsw_sp_acl_atcam_region_generic_init,
+ .fini = mlxsw_sp_acl_atcam_region_generic_fini,
+ .lkey_id_get = mlxsw_sp_acl_atcam_generic_lkey_id_get,
+ .lkey_id_put = mlxsw_sp_acl_atcam_generic_lkey_id_put,
+};
+
+static int
+mlxsw_sp_acl_atcam_region_12kb_init(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+ struct mlxsw_sp *mlxsw_sp = aregion->region->mlxsw_sp;
+ struct mlxsw_sp_acl_atcam_region_12kb *region_12kb;
+ size_t alloc_size;
+ u64 max_lkey_id;
+ int err;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_LARGE_KEY_ID))
+ return -EIO;
+
+ max_lkey_id = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_LARGE_KEY_ID);
+ region_12kb = kzalloc(sizeof(*region_12kb), GFP_KERNEL);
+ if (!region_12kb)
+ return -ENOMEM;
+
+ alloc_size = BITS_TO_LONGS(max_lkey_id) * sizeof(unsigned long);
+ region_12kb->used_lkey_id = kzalloc(alloc_size, GFP_KERNEL);
+ if (!region_12kb->used_lkey_id) {
+ err = -ENOMEM;
+ goto err_used_lkey_id_alloc;
+ }
+
+ err = rhashtable_init(&region_12kb->lkey_ht,
+ &mlxsw_sp_acl_atcam_lkey_id_ht_params);
+ if (err)
+ goto err_rhashtable_init;
+
+ region_12kb->max_lkey_id = max_lkey_id;
+ aregion->priv = region_12kb;
+
+ return 0;
+
+err_rhashtable_init:
+ kfree(region_12kb->used_lkey_id);
+err_used_lkey_id_alloc:
+ kfree(region_12kb);
+ return err;
+}
+
+static void
+mlxsw_sp_acl_atcam_region_12kb_fini(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+ struct mlxsw_sp_acl_atcam_region_12kb *region_12kb = aregion->priv;
+
+ rhashtable_destroy(&region_12kb->lkey_ht);
+ kfree(region_12kb->used_lkey_id);
+ kfree(region_12kb);
+}
+
+static struct mlxsw_sp_acl_atcam_lkey_id *
+mlxsw_sp_acl_atcam_lkey_id_create(struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_lkey_id_ht_key *ht_key)
+{
+ struct mlxsw_sp_acl_atcam_region_12kb *region_12kb = aregion->priv;
+ struct mlxsw_sp_acl_atcam_lkey_id *lkey_id;
+ u32 id;
+ int err;
+
+ id = find_first_zero_bit(region_12kb->used_lkey_id,
+ region_12kb->max_lkey_id);
+ if (id < region_12kb->max_lkey_id)
+ __set_bit(id, region_12kb->used_lkey_id);
+ else
+ return ERR_PTR(-ENOBUFS);
+
+ lkey_id = kzalloc(sizeof(*lkey_id), GFP_KERNEL);
+ if (!lkey_id) {
+ err = -ENOMEM;
+ goto err_lkey_id_alloc;
+ }
+
+ lkey_id->id = id;
+ memcpy(&lkey_id->ht_key, ht_key, sizeof(*ht_key));
+ refcount_set(&lkey_id->refcnt, 1);
+
+ err = rhashtable_insert_fast(&region_12kb->lkey_ht,
+ &lkey_id->ht_node,
+ mlxsw_sp_acl_atcam_lkey_id_ht_params);
+ if (err)
+ goto err_rhashtable_insert;
+
+ return lkey_id;
+
+err_rhashtable_insert:
+ kfree(lkey_id);
+err_lkey_id_alloc:
+ __clear_bit(id, region_12kb->used_lkey_id);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_acl_atcam_lkey_id_destroy(struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_lkey_id *lkey_id)
+{
+ struct mlxsw_sp_acl_atcam_region_12kb *region_12kb = aregion->priv;
+ u32 id = lkey_id->id;
+
+ rhashtable_remove_fast(&region_12kb->lkey_ht, &lkey_id->ht_node,
+ mlxsw_sp_acl_atcam_lkey_id_ht_params);
+ kfree(lkey_id);
+ __clear_bit(id, region_12kb->used_lkey_id);
+}
+
+static struct mlxsw_sp_acl_atcam_lkey_id *
+mlxsw_sp_acl_atcam_12kb_lkey_id_get(struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ u8 erp_id)
+{
+ struct mlxsw_sp_acl_atcam_region_12kb *region_12kb = aregion->priv;
+ struct mlxsw_sp_acl_tcam_region *region = aregion->region;
+ struct mlxsw_sp_acl_atcam_lkey_id_ht_key ht_key = {{ 0 } };
+ struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
+ struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
+ struct mlxsw_sp_acl_atcam_lkey_id *lkey_id;
+
+ mlxsw_afk_encode(afk, region->key_info, &rulei->values, ht_key.enc_key,
+ NULL, MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_START,
+ MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_END);
+ ht_key.erp_id = erp_id;
+ lkey_id = rhashtable_lookup_fast(&region_12kb->lkey_ht, &ht_key,
+ mlxsw_sp_acl_atcam_lkey_id_ht_params);
+ if (lkey_id) {
+ refcount_inc(&lkey_id->refcnt);
+ return lkey_id;
+ }
+
+ return mlxsw_sp_acl_atcam_lkey_id_create(aregion, &ht_key);
+}
+
+static void
+mlxsw_sp_acl_atcam_12kb_lkey_id_put(struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_lkey_id *lkey_id)
+{
+ if (refcount_dec_and_test(&lkey_id->refcnt))
+ mlxsw_sp_acl_atcam_lkey_id_destroy(aregion, lkey_id);
+}
+
+static const struct mlxsw_sp_acl_atcam_region_ops
+mlxsw_sp_acl_atcam_region_12kb_ops = {
+ .init = mlxsw_sp_acl_atcam_region_12kb_init,
+ .fini = mlxsw_sp_acl_atcam_region_12kb_fini,
+ .lkey_id_get = mlxsw_sp_acl_atcam_12kb_lkey_id_get,
+ .lkey_id_put = mlxsw_sp_acl_atcam_12kb_lkey_id_put,
+};
+
+static const struct mlxsw_sp_acl_atcam_region_ops *
+mlxsw_sp_acl_atcam_region_ops_arr[] = {
+ [MLXSW_SP_ACL_ATCAM_REGION_TYPE_2KB] =
+ &mlxsw_sp_acl_atcam_region_generic_ops,
+ [MLXSW_SP_ACL_ATCAM_REGION_TYPE_4KB] =
+ &mlxsw_sp_acl_atcam_region_generic_ops,
+ [MLXSW_SP_ACL_ATCAM_REGION_TYPE_8KB] =
+ &mlxsw_sp_acl_atcam_region_generic_ops,
+ [MLXSW_SP_ACL_ATCAM_REGION_TYPE_12KB] =
+ &mlxsw_sp_acl_atcam_region_12kb_ops,
+};
+
+int mlxsw_sp_acl_atcam_region_associate(struct mlxsw_sp *mlxsw_sp,
+ u16 region_id)
+{
+ char perar_pl[MLXSW_REG_PERAR_LEN];
+ /* For now, just assume that every region has 12 key blocks */
+ u16 hw_region = region_id * 3;
+ u64 max_regions;
+
+ max_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_REGIONS);
+ if (hw_region >= max_regions)
+ return -ENOBUFS;
+
+ mlxsw_reg_perar_pack(perar_pl, region_id, hw_region);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(perar), perar_pl);
+}
+
+static void
+mlxsw_sp_acl_atcam_region_type_init(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+ struct mlxsw_sp_acl_tcam_region *region = aregion->region;
+ enum mlxsw_sp_acl_atcam_region_type region_type;
+ unsigned int blocks_count;
+
+ /* We already know the blocks count can not exceed the maximum
+ * blocks count.
+ */
+ blocks_count = mlxsw_afk_key_info_blocks_count_get(region->key_info);
+ if (blocks_count <= 2)
+ region_type = MLXSW_SP_ACL_ATCAM_REGION_TYPE_2KB;
+ else if (blocks_count <= 4)
+ region_type = MLXSW_SP_ACL_ATCAM_REGION_TYPE_4KB;
+ else if (blocks_count <= 8)
+ region_type = MLXSW_SP_ACL_ATCAM_REGION_TYPE_8KB;
+ else
+ region_type = MLXSW_SP_ACL_ATCAM_REGION_TYPE_12KB;
+
+ aregion->type = region_type;
+ aregion->ops = mlxsw_sp_acl_atcam_region_ops_arr[region_type];
+}
+
+int
+mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam *atcam,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_tcam_region *region,
+ const struct mlxsw_sp_acl_ctcam_region_ops *ops)
+{
+ int err;
+
+ aregion->region = region;
+ aregion->atcam = atcam;
+ mlxsw_sp_acl_atcam_region_type_init(aregion);
+
+ err = rhashtable_init(&aregion->entries_ht,
+ &mlxsw_sp_acl_atcam_entries_ht_params);
+ if (err)
+ return err;
+ err = aregion->ops->init(aregion);
+ if (err)
+ goto err_ops_init;
+ err = mlxsw_sp_acl_erp_region_init(aregion);
+ if (err)
+ goto err_erp_region_init;
+ err = mlxsw_sp_acl_ctcam_region_init(mlxsw_sp, &aregion->cregion,
+ region, ops);
+ if (err)
+ goto err_ctcam_region_init;
+
+ return 0;
+
+err_ctcam_region_init:
+ mlxsw_sp_acl_erp_region_fini(aregion);
+err_erp_region_init:
+ aregion->ops->fini(aregion);
+err_ops_init:
+ rhashtable_destroy(&aregion->entries_ht);
+ return err;
+}
+
+void mlxsw_sp_acl_atcam_region_fini(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+ mlxsw_sp_acl_ctcam_region_fini(&aregion->cregion);
+ mlxsw_sp_acl_erp_region_fini(aregion);
+ aregion->ops->fini(aregion);
+ rhashtable_destroy(&aregion->entries_ht);
+}
+
+void mlxsw_sp_acl_atcam_chunk_init(struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_chunk *achunk,
+ unsigned int priority)
+{
+ mlxsw_sp_acl_ctcam_chunk_init(&aregion->cregion, &achunk->cchunk,
+ priority);
+}
+
+void mlxsw_sp_acl_atcam_chunk_fini(struct mlxsw_sp_acl_atcam_chunk *achunk)
+{
+ mlxsw_sp_acl_ctcam_chunk_fini(&achunk->cchunk);
+}
+
+static int
+mlxsw_sp_acl_atcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_entry *aentry,
+ struct mlxsw_sp_acl_rule_info *rulei)
+{
+ struct mlxsw_sp_acl_tcam_region *region = aregion->region;
+ u8 erp_id = mlxsw_sp_acl_erp_id(aentry->erp);
+ struct mlxsw_sp_acl_atcam_lkey_id *lkey_id;
+ char ptce3_pl[MLXSW_REG_PTCE3_LEN];
+ u32 kvdl_index, priority;
+ int err;
+
+ err = mlxsw_sp_acl_tcam_priority_get(mlxsw_sp, rulei, &priority, true);
+ if (err)
+ return err;
+
+ lkey_id = aregion->ops->lkey_id_get(aregion, rulei, erp_id);
+ if (IS_ERR(lkey_id))
+ return PTR_ERR(lkey_id);
+ aentry->lkey_id = lkey_id;
+
+ kvdl_index = mlxsw_afa_block_first_kvdl_index(rulei->act_block);
+ mlxsw_reg_ptce3_pack(ptce3_pl, true, MLXSW_REG_PTCE3_OP_WRITE_WRITE,
+ priority, region->tcam_region_info,
+ aentry->ht_key.enc_key, erp_id,
+ refcount_read(&lkey_id->refcnt) != 1, lkey_id->id,
+ kvdl_index);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce3), ptce3_pl);
+ if (err)
+ goto err_ptce3_write;
+
+ return 0;
+
+err_ptce3_write:
+ aregion->ops->lkey_id_put(aregion, lkey_id);
+ return err;
+}
+
+static void
+mlxsw_sp_acl_atcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+ struct mlxsw_sp_acl_atcam_lkey_id *lkey_id = aentry->lkey_id;
+ struct mlxsw_sp_acl_tcam_region *region = aregion->region;
+ u8 erp_id = mlxsw_sp_acl_erp_id(aentry->erp);
+ char ptce3_pl[MLXSW_REG_PTCE3_LEN];
+
+ mlxsw_reg_ptce3_pack(ptce3_pl, false, MLXSW_REG_PTCE3_OP_WRITE_WRITE, 0,
+ region->tcam_region_info, aentry->ht_key.enc_key,
+ erp_id, refcount_read(&lkey_id->refcnt) != 1,
+ lkey_id->id, 0);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce3), ptce3_pl);
+ aregion->ops->lkey_id_put(aregion, lkey_id);
+}
+
+static int
+__mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_entry *aentry,
+ struct mlxsw_sp_acl_rule_info *rulei)
+{
+ struct mlxsw_sp_acl_tcam_region *region = aregion->region;
+ char mask[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN] = { 0 };
+ struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
+ struct mlxsw_sp_acl_erp *erp;
+ unsigned int blocks_count;
+ int err;
+
+ blocks_count = mlxsw_afk_key_info_blocks_count_get(region->key_info);
+ mlxsw_afk_encode(afk, region->key_info, &rulei->values,
+ aentry->ht_key.enc_key, mask, 0, blocks_count - 1);
+
+ erp = mlxsw_sp_acl_erp_get(aregion, mask, false);
+ if (IS_ERR(erp))
+ return PTR_ERR(erp);
+ aentry->erp = erp;
+ aentry->ht_key.erp_id = mlxsw_sp_acl_erp_id(erp);
+
+ /* We can't insert identical rules into the A-TCAM, so fail and
+ * let the rule spill into C-TCAM
+ */
+ err = rhashtable_lookup_insert_fast(&aregion->entries_ht,
+ &aentry->ht_node,
+ mlxsw_sp_acl_atcam_entries_ht_params);
+ if (err)
+ goto err_rhashtable_insert;
+
+ err = mlxsw_sp_acl_atcam_region_entry_insert(mlxsw_sp, aregion, aentry,
+ rulei);
+ if (err)
+ goto err_rule_insert;
+
+ return 0;
+
+err_rule_insert:
+ rhashtable_remove_fast(&aregion->entries_ht, &aentry->ht_node,
+ mlxsw_sp_acl_atcam_entries_ht_params);
+err_rhashtable_insert:
+ mlxsw_sp_acl_erp_put(aregion, erp);
+ return err;
+}
+
+static void
+__mlxsw_sp_acl_atcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+ mlxsw_sp_acl_atcam_region_entry_remove(mlxsw_sp, aregion, aentry);
+ rhashtable_remove_fast(&aregion->entries_ht, &aentry->ht_node,
+ mlxsw_sp_acl_atcam_entries_ht_params);
+ mlxsw_sp_acl_erp_put(aregion, aentry->erp);
+}
+
+int mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_chunk *achunk,
+ struct mlxsw_sp_acl_atcam_entry *aentry,
+ struct mlxsw_sp_acl_rule_info *rulei)
+{
+ int err;
+
+ err = __mlxsw_sp_acl_atcam_entry_add(mlxsw_sp, aregion, aentry, rulei);
+ if (!err)
+ return 0;
+
+ /* It is possible we failed to add the rule to the A-TCAM due to
+ * exceeded number of masks. Try to spill into C-TCAM.
+ */
+ err = mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, &aregion->cregion,
+ &achunk->cchunk, &aentry->centry,
+ rulei, true);
+ if (!err)
+ return 0;
+
+ return err;
+}
+
+void mlxsw_sp_acl_atcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_chunk *achunk,
+ struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+ if (mlxsw_sp_acl_atcam_is_centry(aentry))
+ mlxsw_sp_acl_ctcam_entry_del(mlxsw_sp, &aregion->cregion,
+ &achunk->cchunk, &aentry->centry);
+ else
+ __mlxsw_sp_acl_atcam_entry_del(mlxsw_sp, aregion, aentry);
+}
+
+int mlxsw_sp_acl_atcam_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam *atcam)
+{
+ return mlxsw_sp_acl_erps_init(mlxsw_sp, atcam);
+}
+
+void mlxsw_sp_acl_atcam_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam *atcam)
+{
+ mlxsw_sp_acl_erps_fini(mlxsw_sp, atcam);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c
new file mode 100644
index 000000000..1dcf152b2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/parman.h>
+
+#include "reg.h"
+#include "core.h"
+#include "spectrum.h"
+#include "spectrum_acl_tcam.h"
+
+static int
+mlxsw_sp_acl_ctcam_region_resize(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_region *region,
+ u16 new_size)
+{
+ char ptar_pl[MLXSW_REG_PTAR_LEN];
+
+ mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_RESIZE,
+ region->key_type, new_size, region->id,
+ region->tcam_region_info);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl);
+}
+
+static void
+mlxsw_sp_acl_ctcam_region_move(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_region *region,
+ u16 src_offset, u16 dst_offset, u16 size)
+{
+ char prcr_pl[MLXSW_REG_PRCR_LEN];
+
+ mlxsw_reg_prcr_pack(prcr_pl, MLXSW_REG_PRCR_OP_MOVE,
+ region->tcam_region_info, src_offset,
+ region->tcam_region_info, dst_offset, size);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(prcr), prcr_pl);
+}
+
+static int
+mlxsw_sp_acl_ctcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ctcam_region *cregion,
+ struct mlxsw_sp_acl_ctcam_entry *centry,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ bool fillup_priority)
+{
+ struct mlxsw_sp_acl_tcam_region *region = cregion->region;
+ struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
+ char ptce2_pl[MLXSW_REG_PTCE2_LEN];
+ unsigned int blocks_count;
+ char *act_set;
+ u32 priority;
+ char *mask;
+ char *key;
+ int err;
+
+ err = mlxsw_sp_acl_tcam_priority_get(mlxsw_sp, rulei, &priority,
+ fillup_priority);
+ if (err)
+ return err;
+
+ mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_WRITE_WRITE,
+ region->tcam_region_info,
+ centry->parman_item.index, priority);
+ key = mlxsw_reg_ptce2_flex_key_blocks_data(ptce2_pl);
+ mask = mlxsw_reg_ptce2_mask_data(ptce2_pl);
+ blocks_count = mlxsw_afk_key_info_blocks_count_get(region->key_info);
+ mlxsw_afk_encode(afk, region->key_info, &rulei->values, key, mask, 0,
+ blocks_count - 1);
+
+ err = cregion->ops->entry_insert(cregion, centry, mask);
+ if (err)
+ return err;
+
+ /* Only the first action set belongs here, the rest is in KVD */
+ act_set = mlxsw_afa_block_first_set(rulei->act_block);
+ mlxsw_reg_ptce2_flex_action_set_memcpy_to(ptce2_pl, act_set);
+
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
+ if (err)
+ goto err_ptce2_write;
+
+ return 0;
+
+err_ptce2_write:
+ cregion->ops->entry_remove(cregion, centry);
+ return err;
+}
+
+static void
+mlxsw_sp_acl_ctcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ctcam_region *cregion,
+ struct mlxsw_sp_acl_ctcam_entry *centry)
+{
+ char ptce2_pl[MLXSW_REG_PTCE2_LEN];
+
+ mlxsw_reg_ptce2_pack(ptce2_pl, false, MLXSW_REG_PTCE2_OP_WRITE_WRITE,
+ cregion->region->tcam_region_info,
+ centry->parman_item.index, 0);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
+ cregion->ops->entry_remove(cregion, centry);
+}
+
+static int mlxsw_sp_acl_ctcam_region_parman_resize(void *priv,
+ unsigned long new_count)
+{
+ struct mlxsw_sp_acl_ctcam_region *cregion = priv;
+ struct mlxsw_sp_acl_tcam_region *region = cregion->region;
+ struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
+ u64 max_tcam_rules;
+
+ max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES);
+ if (new_count > max_tcam_rules)
+ return -EINVAL;
+ return mlxsw_sp_acl_ctcam_region_resize(mlxsw_sp, region, new_count);
+}
+
+static void mlxsw_sp_acl_ctcam_region_parman_move(void *priv,
+ unsigned long from_index,
+ unsigned long to_index,
+ unsigned long count)
+{
+ struct mlxsw_sp_acl_ctcam_region *cregion = priv;
+ struct mlxsw_sp_acl_tcam_region *region = cregion->region;
+ struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
+
+ mlxsw_sp_acl_ctcam_region_move(mlxsw_sp, region,
+ from_index, to_index, count);
+}
+
+static const struct parman_ops mlxsw_sp_acl_ctcam_region_parman_ops = {
+ .base_count = MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT,
+ .resize_step = MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP,
+ .resize = mlxsw_sp_acl_ctcam_region_parman_resize,
+ .move = mlxsw_sp_acl_ctcam_region_parman_move,
+ .algo = PARMAN_ALGO_TYPE_LSORT,
+};
+
+int
+mlxsw_sp_acl_ctcam_region_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ctcam_region *cregion,
+ struct mlxsw_sp_acl_tcam_region *region,
+ const struct mlxsw_sp_acl_ctcam_region_ops *ops)
+{
+ cregion->region = region;
+ cregion->ops = ops;
+ cregion->parman = parman_create(&mlxsw_sp_acl_ctcam_region_parman_ops,
+ cregion);
+ if (!cregion->parman)
+ return -ENOMEM;
+ return 0;
+}
+
+void mlxsw_sp_acl_ctcam_region_fini(struct mlxsw_sp_acl_ctcam_region *cregion)
+{
+ parman_destroy(cregion->parman);
+}
+
+void mlxsw_sp_acl_ctcam_chunk_init(struct mlxsw_sp_acl_ctcam_region *cregion,
+ struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+ unsigned int priority)
+{
+ parman_prio_init(cregion->parman, &cchunk->parman_prio, priority);
+}
+
+void mlxsw_sp_acl_ctcam_chunk_fini(struct mlxsw_sp_acl_ctcam_chunk *cchunk)
+{
+ parman_prio_fini(&cchunk->parman_prio);
+}
+
+int mlxsw_sp_acl_ctcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ctcam_region *cregion,
+ struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+ struct mlxsw_sp_acl_ctcam_entry *centry,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ bool fillup_priority)
+{
+ int err;
+
+ err = parman_item_add(cregion->parman, &cchunk->parman_prio,
+ &centry->parman_item);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_acl_ctcam_region_entry_insert(mlxsw_sp, cregion, centry,
+ rulei, fillup_priority);
+ if (err)
+ goto err_rule_insert;
+ return 0;
+
+err_rule_insert:
+ parman_item_remove(cregion->parman, &cchunk->parman_prio,
+ &centry->parman_item);
+ return err;
+}
+
+void mlxsw_sp_acl_ctcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ctcam_region *cregion,
+ struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+ struct mlxsw_sp_acl_ctcam_entry *centry)
+{
+ mlxsw_sp_acl_ctcam_region_entry_remove(mlxsw_sp, cregion, centry);
+ parman_item_remove(cregion->parman, &cchunk->parman_prio,
+ &centry->parman_item);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c
new file mode 100644
index 000000000..0a4fd3c86
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c
@@ -0,0 +1,1168 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/bitmap.h>
+#include <linux/errno.h>
+#include <linux/genalloc.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/rhashtable.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+
+#include "core.h"
+#include "reg.h"
+#include "spectrum.h"
+#include "spectrum_acl_tcam.h"
+
+/* gen_pool_alloc() returns 0 when allocation fails, so use an offset */
+#define MLXSW_SP_ACL_ERP_GENALLOC_OFFSET 0x100
+#define MLXSW_SP_ACL_ERP_MAX_PER_REGION 16
+
+struct mlxsw_sp_acl_erp_core {
+ unsigned int erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_MAX + 1];
+ struct gen_pool *erp_tables;
+ struct mlxsw_sp *mlxsw_sp;
+ unsigned int num_erp_banks;
+};
+
+struct mlxsw_sp_acl_erp_key {
+ char mask[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN];
+ bool ctcam;
+};
+
+struct mlxsw_sp_acl_erp {
+ struct mlxsw_sp_acl_erp_key key;
+ u8 id;
+ u8 index;
+ refcount_t refcnt;
+ DECLARE_BITMAP(mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN);
+ struct list_head list;
+ struct rhash_head ht_node;
+ struct mlxsw_sp_acl_erp_table *erp_table;
+};
+
+struct mlxsw_sp_acl_erp_master_mask {
+ DECLARE_BITMAP(bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN);
+ unsigned int count[MLXSW_SP_ACL_TCAM_MASK_LEN];
+};
+
+struct mlxsw_sp_acl_erp_table {
+ struct mlxsw_sp_acl_erp_master_mask master_mask;
+ DECLARE_BITMAP(erp_id_bitmap, MLXSW_SP_ACL_ERP_MAX_PER_REGION);
+ DECLARE_BITMAP(erp_index_bitmap, MLXSW_SP_ACL_ERP_MAX_PER_REGION);
+ struct list_head atcam_erps_list;
+ struct rhashtable erp_ht;
+ struct mlxsw_sp_acl_erp_core *erp_core;
+ struct mlxsw_sp_acl_atcam_region *aregion;
+ const struct mlxsw_sp_acl_erp_table_ops *ops;
+ unsigned long base_index;
+ unsigned int num_atcam_erps;
+ unsigned int num_max_atcam_erps;
+ unsigned int num_ctcam_erps;
+};
+
+static const struct rhashtable_params mlxsw_sp_acl_erp_ht_params = {
+ .key_len = sizeof(struct mlxsw_sp_acl_erp_key),
+ .key_offset = offsetof(struct mlxsw_sp_acl_erp, key),
+ .head_offset = offsetof(struct mlxsw_sp_acl_erp, ht_node),
+};
+
+struct mlxsw_sp_acl_erp_table_ops {
+ struct mlxsw_sp_acl_erp *
+ (*erp_create)(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp_key *key);
+ void (*erp_destroy)(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp *erp);
+};
+
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp_key *key);
+static void
+mlxsw_sp_acl_erp_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp *erp);
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_second_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp_key *key);
+static void
+mlxsw_sp_acl_erp_second_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp *erp);
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_first_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp_key *key);
+static void
+mlxsw_sp_acl_erp_first_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp *erp);
+static void
+mlxsw_sp_acl_erp_no_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp *erp);
+
+static const struct mlxsw_sp_acl_erp_table_ops erp_multiple_masks_ops = {
+ .erp_create = mlxsw_sp_acl_erp_mask_create,
+ .erp_destroy = mlxsw_sp_acl_erp_mask_destroy,
+};
+
+static const struct mlxsw_sp_acl_erp_table_ops erp_two_masks_ops = {
+ .erp_create = mlxsw_sp_acl_erp_mask_create,
+ .erp_destroy = mlxsw_sp_acl_erp_second_mask_destroy,
+};
+
+static const struct mlxsw_sp_acl_erp_table_ops erp_single_mask_ops = {
+ .erp_create = mlxsw_sp_acl_erp_second_mask_create,
+ .erp_destroy = mlxsw_sp_acl_erp_first_mask_destroy,
+};
+
+static const struct mlxsw_sp_acl_erp_table_ops erp_no_mask_ops = {
+ .erp_create = mlxsw_sp_acl_erp_first_mask_create,
+ .erp_destroy = mlxsw_sp_acl_erp_no_mask_destroy,
+};
+
+bool mlxsw_sp_acl_erp_is_ctcam_erp(const struct mlxsw_sp_acl_erp *erp)
+{
+ return erp->key.ctcam;
+}
+
+u8 mlxsw_sp_acl_erp_id(const struct mlxsw_sp_acl_erp *erp)
+{
+ return erp->id;
+}
+
+static unsigned int
+mlxsw_sp_acl_erp_table_entry_size(const struct mlxsw_sp_acl_erp_table *erp_table)
+{
+ struct mlxsw_sp_acl_atcam_region *aregion = erp_table->aregion;
+ struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core;
+
+ return erp_core->erpt_entries_size[aregion->type];
+}
+
+static int mlxsw_sp_acl_erp_id_get(struct mlxsw_sp_acl_erp_table *erp_table,
+ u8 *p_id)
+{
+ u8 id;
+
+ id = find_first_zero_bit(erp_table->erp_id_bitmap,
+ MLXSW_SP_ACL_ERP_MAX_PER_REGION);
+ if (id < MLXSW_SP_ACL_ERP_MAX_PER_REGION) {
+ __set_bit(id, erp_table->erp_id_bitmap);
+ *p_id = id;
+ return 0;
+ }
+
+ return -ENOBUFS;
+}
+
+static void mlxsw_sp_acl_erp_id_put(struct mlxsw_sp_acl_erp_table *erp_table,
+ u8 id)
+{
+ __clear_bit(id, erp_table->erp_id_bitmap);
+}
+
+static void
+mlxsw_sp_acl_erp_master_mask_bit_set(unsigned long bit,
+ struct mlxsw_sp_acl_erp_master_mask *mask)
+{
+ if (mask->count[bit]++ == 0)
+ __set_bit(bit, mask->bitmap);
+}
+
+static void
+mlxsw_sp_acl_erp_master_mask_bit_clear(unsigned long bit,
+ struct mlxsw_sp_acl_erp_master_mask *mask)
+{
+ if (--mask->count[bit] == 0)
+ __clear_bit(bit, mask->bitmap);
+}
+
+static int
+mlxsw_sp_acl_erp_master_mask_update(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+ struct mlxsw_sp_acl_tcam_region *region = erp_table->aregion->region;
+ struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
+ char percr_pl[MLXSW_REG_PERCR_LEN];
+ char *master_mask;
+
+ mlxsw_reg_percr_pack(percr_pl, region->id);
+ master_mask = mlxsw_reg_percr_master_mask_data(percr_pl);
+ bitmap_to_arr32((u32 *) master_mask, erp_table->master_mask.bitmap,
+ MLXSW_SP_ACL_TCAM_MASK_LEN);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(percr), percr_pl);
+}
+
+static int
+mlxsw_sp_acl_erp_master_mask_set(struct mlxsw_sp_acl_erp_table *erp_table,
+ const struct mlxsw_sp_acl_erp *erp)
+{
+ unsigned long bit;
+ int err;
+
+ for_each_set_bit(bit, erp->mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN)
+ mlxsw_sp_acl_erp_master_mask_bit_set(bit,
+ &erp_table->master_mask);
+
+ err = mlxsw_sp_acl_erp_master_mask_update(erp_table);
+ if (err)
+ goto err_master_mask_update;
+
+ return 0;
+
+err_master_mask_update:
+ for_each_set_bit(bit, erp->mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN)
+ mlxsw_sp_acl_erp_master_mask_bit_clear(bit,
+ &erp_table->master_mask);
+ return err;
+}
+
+static int
+mlxsw_sp_acl_erp_master_mask_clear(struct mlxsw_sp_acl_erp_table *erp_table,
+ const struct mlxsw_sp_acl_erp *erp)
+{
+ unsigned long bit;
+ int err;
+
+ for_each_set_bit(bit, erp->mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN)
+ mlxsw_sp_acl_erp_master_mask_bit_clear(bit,
+ &erp_table->master_mask);
+
+ err = mlxsw_sp_acl_erp_master_mask_update(erp_table);
+ if (err)
+ goto err_master_mask_update;
+
+ return 0;
+
+err_master_mask_update:
+ for_each_set_bit(bit, erp->mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN)
+ mlxsw_sp_acl_erp_master_mask_bit_set(bit,
+ &erp_table->master_mask);
+ return err;
+}
+
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_generic_create(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp_key *key)
+{
+ struct mlxsw_sp_acl_erp *erp;
+ int err;
+
+ erp = kzalloc(sizeof(*erp), GFP_KERNEL);
+ if (!erp)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlxsw_sp_acl_erp_id_get(erp_table, &erp->id);
+ if (err)
+ goto err_erp_id_get;
+
+ memcpy(&erp->key, key, sizeof(*key));
+ bitmap_from_arr32(erp->mask_bitmap, (u32 *) key->mask,
+ MLXSW_SP_ACL_TCAM_MASK_LEN);
+ list_add(&erp->list, &erp_table->atcam_erps_list);
+ refcount_set(&erp->refcnt, 1);
+ erp_table->num_atcam_erps++;
+ erp->erp_table = erp_table;
+
+ err = mlxsw_sp_acl_erp_master_mask_set(erp_table, erp);
+ if (err)
+ goto err_master_mask_set;
+
+ err = rhashtable_insert_fast(&erp_table->erp_ht, &erp->ht_node,
+ mlxsw_sp_acl_erp_ht_params);
+ if (err)
+ goto err_rhashtable_insert;
+
+ return erp;
+
+err_rhashtable_insert:
+ mlxsw_sp_acl_erp_master_mask_clear(erp_table, erp);
+err_master_mask_set:
+ erp_table->num_atcam_erps--;
+ list_del(&erp->list);
+ mlxsw_sp_acl_erp_id_put(erp_table, erp->id);
+err_erp_id_get:
+ kfree(erp);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_acl_erp_generic_destroy(struct mlxsw_sp_acl_erp *erp)
+{
+ struct mlxsw_sp_acl_erp_table *erp_table = erp->erp_table;
+
+ rhashtable_remove_fast(&erp_table->erp_ht, &erp->ht_node,
+ mlxsw_sp_acl_erp_ht_params);
+ mlxsw_sp_acl_erp_master_mask_clear(erp_table, erp);
+ erp_table->num_atcam_erps--;
+ list_del(&erp->list);
+ mlxsw_sp_acl_erp_id_put(erp_table, erp->id);
+ kfree(erp);
+}
+
+static int
+mlxsw_sp_acl_erp_table_alloc(struct mlxsw_sp_acl_erp_core *erp_core,
+ unsigned int num_erps,
+ enum mlxsw_sp_acl_atcam_region_type region_type,
+ unsigned long *p_index)
+{
+ unsigned int num_rows, entry_size;
+
+ /* We only allow allocations of entire rows */
+ if (num_erps % erp_core->num_erp_banks != 0)
+ return -EINVAL;
+
+ entry_size = erp_core->erpt_entries_size[region_type];
+ num_rows = num_erps / erp_core->num_erp_banks;
+
+ *p_index = gen_pool_alloc(erp_core->erp_tables, num_rows * entry_size);
+ if (*p_index == 0)
+ return -ENOBUFS;
+ *p_index -= MLXSW_SP_ACL_ERP_GENALLOC_OFFSET;
+
+ return 0;
+}
+
+static void
+mlxsw_sp_acl_erp_table_free(struct mlxsw_sp_acl_erp_core *erp_core,
+ unsigned int num_erps,
+ enum mlxsw_sp_acl_atcam_region_type region_type,
+ unsigned long index)
+{
+ unsigned long base_index;
+ unsigned int entry_size;
+ size_t size;
+
+ entry_size = erp_core->erpt_entries_size[region_type];
+ base_index = index + MLXSW_SP_ACL_ERP_GENALLOC_OFFSET;
+ size = num_erps / erp_core->num_erp_banks * entry_size;
+ gen_pool_free(erp_core->erp_tables, base_index, size);
+}
+
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_table_master_rp(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+ if (!list_is_singular(&erp_table->atcam_erps_list))
+ return NULL;
+
+ return list_first_entry(&erp_table->atcam_erps_list,
+ struct mlxsw_sp_acl_erp, list);
+}
+
+static int mlxsw_sp_acl_erp_index_get(struct mlxsw_sp_acl_erp_table *erp_table,
+ u8 *p_index)
+{
+ u8 index;
+
+ index = find_first_zero_bit(erp_table->erp_index_bitmap,
+ erp_table->num_max_atcam_erps);
+ if (index < erp_table->num_max_atcam_erps) {
+ __set_bit(index, erp_table->erp_index_bitmap);
+ *p_index = index;
+ return 0;
+ }
+
+ return -ENOBUFS;
+}
+
+static void mlxsw_sp_acl_erp_index_put(struct mlxsw_sp_acl_erp_table *erp_table,
+ u8 index)
+{
+ __clear_bit(index, erp_table->erp_index_bitmap);
+}
+
+static void
+mlxsw_sp_acl_erp_table_locate(const struct mlxsw_sp_acl_erp_table *erp_table,
+ const struct mlxsw_sp_acl_erp *erp,
+ u8 *p_erpt_bank, u8 *p_erpt_index)
+{
+ unsigned int entry_size = mlxsw_sp_acl_erp_table_entry_size(erp_table);
+ struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core;
+ unsigned int row;
+
+ *p_erpt_bank = erp->index % erp_core->num_erp_banks;
+ row = erp->index / erp_core->num_erp_banks;
+ *p_erpt_index = erp_table->base_index + row * entry_size;
+}
+
+static int
+mlxsw_sp_acl_erp_table_erp_add(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp *erp)
+{
+ struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp;
+ enum mlxsw_reg_perpt_key_size key_size;
+ char perpt_pl[MLXSW_REG_PERPT_LEN];
+ u8 erpt_bank, erpt_index;
+
+ mlxsw_sp_acl_erp_table_locate(erp_table, erp, &erpt_bank, &erpt_index);
+ key_size = (enum mlxsw_reg_perpt_key_size) erp_table->aregion->type;
+ mlxsw_reg_perpt_pack(perpt_pl, erpt_bank, erpt_index, key_size, erp->id,
+ 0, erp_table->base_index, erp->index,
+ erp->key.mask);
+ mlxsw_reg_perpt_erp_vector_pack(perpt_pl, erp_table->erp_index_bitmap,
+ MLXSW_SP_ACL_ERP_MAX_PER_REGION);
+ mlxsw_reg_perpt_erp_vector_set(perpt_pl, erp->index, true);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(perpt), perpt_pl);
+}
+
+static void mlxsw_sp_acl_erp_table_erp_del(struct mlxsw_sp_acl_erp *erp)
+{
+ char empty_mask[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN] = { 0 };
+ struct mlxsw_sp_acl_erp_table *erp_table = erp->erp_table;
+ struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp;
+ enum mlxsw_reg_perpt_key_size key_size;
+ char perpt_pl[MLXSW_REG_PERPT_LEN];
+ u8 erpt_bank, erpt_index;
+
+ mlxsw_sp_acl_erp_table_locate(erp_table, erp, &erpt_bank, &erpt_index);
+ key_size = (enum mlxsw_reg_perpt_key_size) erp_table->aregion->type;
+ mlxsw_reg_perpt_pack(perpt_pl, erpt_bank, erpt_index, key_size, erp->id,
+ 0, erp_table->base_index, erp->index, empty_mask);
+ mlxsw_reg_perpt_erp_vector_pack(perpt_pl, erp_table->erp_index_bitmap,
+ MLXSW_SP_ACL_ERP_MAX_PER_REGION);
+ mlxsw_reg_perpt_erp_vector_set(perpt_pl, erp->index, false);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(perpt), perpt_pl);
+}
+
+static int
+mlxsw_sp_acl_erp_table_enable(struct mlxsw_sp_acl_erp_table *erp_table,
+ bool ctcam_le)
+{
+ struct mlxsw_sp_acl_tcam_region *region = erp_table->aregion->region;
+ struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp;
+ char pererp_pl[MLXSW_REG_PERERP_LEN];
+
+ mlxsw_reg_pererp_pack(pererp_pl, region->id, ctcam_le, true, 0,
+ erp_table->base_index, 0);
+ mlxsw_reg_pererp_erp_vector_pack(pererp_pl, erp_table->erp_index_bitmap,
+ MLXSW_SP_ACL_ERP_MAX_PER_REGION);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl);
+}
+
+static void
+mlxsw_sp_acl_erp_table_disable(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+ struct mlxsw_sp_acl_tcam_region *region = erp_table->aregion->region;
+ struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp;
+ char pererp_pl[MLXSW_REG_PERERP_LEN];
+ struct mlxsw_sp_acl_erp *master_rp;
+
+ master_rp = mlxsw_sp_acl_erp_table_master_rp(erp_table);
+ /* It is possible we do not have a master RP when we disable the
+ * table when there are no rules in the A-TCAM and the last C-TCAM
+ * rule is deleted
+ */
+ mlxsw_reg_pererp_pack(pererp_pl, region->id, false, false, 0, 0,
+ master_rp ? master_rp->id : 0);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl);
+}
+
+static int
+mlxsw_sp_acl_erp_table_relocate(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+ struct mlxsw_sp_acl_erp *erp;
+ int err;
+
+ list_for_each_entry(erp, &erp_table->atcam_erps_list, list) {
+ err = mlxsw_sp_acl_erp_table_erp_add(erp_table, erp);
+ if (err)
+ goto err_table_erp_add;
+ }
+
+ return 0;
+
+err_table_erp_add:
+ list_for_each_entry_continue_reverse(erp, &erp_table->atcam_erps_list,
+ list)
+ mlxsw_sp_acl_erp_table_erp_del(erp);
+ return err;
+}
+
+static int
+mlxsw_sp_acl_erp_table_expand(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+ unsigned int num_erps, old_num_erps = erp_table->num_max_atcam_erps;
+ struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core;
+ unsigned long old_base_index = erp_table->base_index;
+ bool ctcam_le = erp_table->num_ctcam_erps > 0;
+ int err;
+
+ if (erp_table->num_atcam_erps < erp_table->num_max_atcam_erps)
+ return 0;
+
+ if (erp_table->num_max_atcam_erps == MLXSW_SP_ACL_ERP_MAX_PER_REGION)
+ return -ENOBUFS;
+
+ num_erps = old_num_erps + erp_core->num_erp_banks;
+ err = mlxsw_sp_acl_erp_table_alloc(erp_core, num_erps,
+ erp_table->aregion->type,
+ &erp_table->base_index);
+ if (err)
+ return err;
+ erp_table->num_max_atcam_erps = num_erps;
+
+ err = mlxsw_sp_acl_erp_table_relocate(erp_table);
+ if (err)
+ goto err_table_relocate;
+
+ err = mlxsw_sp_acl_erp_table_enable(erp_table, ctcam_le);
+ if (err)
+ goto err_table_enable;
+
+ mlxsw_sp_acl_erp_table_free(erp_core, old_num_erps,
+ erp_table->aregion->type, old_base_index);
+
+ return 0;
+
+err_table_enable:
+err_table_relocate:
+ erp_table->num_max_atcam_erps = old_num_erps;
+ mlxsw_sp_acl_erp_table_free(erp_core, num_erps,
+ erp_table->aregion->type,
+ erp_table->base_index);
+ erp_table->base_index = old_base_index;
+ return err;
+}
+
+static int
+mlxsw_sp_acl_erp_region_table_trans(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+ struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core;
+ struct mlxsw_sp_acl_erp *master_rp;
+ int err;
+
+ /* Initially, allocate a single eRP row. Expand later as needed */
+ err = mlxsw_sp_acl_erp_table_alloc(erp_core, erp_core->num_erp_banks,
+ erp_table->aregion->type,
+ &erp_table->base_index);
+ if (err)
+ return err;
+ erp_table->num_max_atcam_erps = erp_core->num_erp_banks;
+
+ /* Transition the sole RP currently configured (the master RP)
+ * to the eRP table
+ */
+ master_rp = mlxsw_sp_acl_erp_table_master_rp(erp_table);
+ if (!master_rp) {
+ err = -EINVAL;
+ goto err_table_master_rp;
+ }
+
+ /* Maintain the same eRP bank for the master RP, so that we
+ * wouldn't need to update the bloom filter
+ */
+ master_rp->index = master_rp->index % erp_core->num_erp_banks;
+ __set_bit(master_rp->index, erp_table->erp_index_bitmap);
+
+ err = mlxsw_sp_acl_erp_table_erp_add(erp_table, master_rp);
+ if (err)
+ goto err_table_master_rp_add;
+
+ err = mlxsw_sp_acl_erp_table_enable(erp_table, false);
+ if (err)
+ goto err_table_enable;
+
+ return 0;
+
+err_table_enable:
+ mlxsw_sp_acl_erp_table_erp_del(master_rp);
+err_table_master_rp_add:
+ __clear_bit(master_rp->index, erp_table->erp_index_bitmap);
+err_table_master_rp:
+ mlxsw_sp_acl_erp_table_free(erp_core, erp_table->num_max_atcam_erps,
+ erp_table->aregion->type,
+ erp_table->base_index);
+ return err;
+}
+
+static void
+mlxsw_sp_acl_erp_region_master_mask_trans(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+ struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core;
+ struct mlxsw_sp_acl_erp *master_rp;
+
+ mlxsw_sp_acl_erp_table_disable(erp_table);
+ master_rp = mlxsw_sp_acl_erp_table_master_rp(erp_table);
+ if (!master_rp)
+ return;
+ mlxsw_sp_acl_erp_table_erp_del(master_rp);
+ __clear_bit(master_rp->index, erp_table->erp_index_bitmap);
+ mlxsw_sp_acl_erp_table_free(erp_core, erp_table->num_max_atcam_erps,
+ erp_table->aregion->type,
+ erp_table->base_index);
+}
+
+static int
+mlxsw_sp_acl_erp_region_erp_add(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp *erp)
+{
+ struct mlxsw_sp_acl_tcam_region *region = erp_table->aregion->region;
+ struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp;
+ bool ctcam_le = erp_table->num_ctcam_erps > 0;
+ char pererp_pl[MLXSW_REG_PERERP_LEN];
+
+ mlxsw_reg_pererp_pack(pererp_pl, region->id, ctcam_le, true, 0,
+ erp_table->base_index, 0);
+ mlxsw_reg_pererp_erp_vector_pack(pererp_pl, erp_table->erp_index_bitmap,
+ MLXSW_SP_ACL_ERP_MAX_PER_REGION);
+ mlxsw_reg_pererp_erpt_vector_set(pererp_pl, erp->index, true);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl);
+}
+
+static void mlxsw_sp_acl_erp_region_erp_del(struct mlxsw_sp_acl_erp *erp)
+{
+ struct mlxsw_sp_acl_erp_table *erp_table = erp->erp_table;
+ struct mlxsw_sp_acl_tcam_region *region = erp_table->aregion->region;
+ struct mlxsw_sp *mlxsw_sp = erp_table->erp_core->mlxsw_sp;
+ bool ctcam_le = erp_table->num_ctcam_erps > 0;
+ char pererp_pl[MLXSW_REG_PERERP_LEN];
+
+ mlxsw_reg_pererp_pack(pererp_pl, region->id, ctcam_le, true, 0,
+ erp_table->base_index, 0);
+ mlxsw_reg_pererp_erp_vector_pack(pererp_pl, erp_table->erp_index_bitmap,
+ MLXSW_SP_ACL_ERP_MAX_PER_REGION);
+ mlxsw_reg_pererp_erpt_vector_set(pererp_pl, erp->index, false);
+
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl);
+}
+
+static int
+mlxsw_sp_acl_erp_region_ctcam_enable(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+ /* No need to re-enable lookup in the C-TCAM */
+ if (erp_table->num_ctcam_erps > 1)
+ return 0;
+
+ return mlxsw_sp_acl_erp_table_enable(erp_table, true);
+}
+
+static void
+mlxsw_sp_acl_erp_region_ctcam_disable(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+ /* Only disable C-TCAM lookup when last C-TCAM eRP is deleted */
+ if (erp_table->num_ctcam_erps > 1)
+ return;
+
+ mlxsw_sp_acl_erp_table_enable(erp_table, false);
+}
+
+static void
+mlxsw_sp_acl_erp_ctcam_table_ops_set(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+ switch (erp_table->num_atcam_erps) {
+ case 2:
+ /* Keep using the eRP table, but correctly set the
+ * operations pointer so that when an A-TCAM eRP is
+ * deleted we will transition to use the master mask
+ */
+ erp_table->ops = &erp_two_masks_ops;
+ break;
+ case 1:
+ /* We only kept the eRP table because we had C-TCAM
+ * eRPs in use. Now that the last C-TCAM eRP is gone we
+ * can stop using the table and transition to use the
+ * master mask
+ */
+ mlxsw_sp_acl_erp_region_master_mask_trans(erp_table);
+ erp_table->ops = &erp_single_mask_ops;
+ break;
+ case 0:
+ /* There are no more eRPs of any kind used by the region
+ * so free its eRP table and transition to initial state
+ */
+ mlxsw_sp_acl_erp_table_disable(erp_table);
+ mlxsw_sp_acl_erp_table_free(erp_table->erp_core,
+ erp_table->num_max_atcam_erps,
+ erp_table->aregion->type,
+ erp_table->base_index);
+ erp_table->ops = &erp_no_mask_ops;
+ break;
+ default:
+ break;
+ }
+}
+
+static struct mlxsw_sp_acl_erp *
+__mlxsw_sp_acl_erp_ctcam_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp_key *key)
+{
+ struct mlxsw_sp_acl_erp *erp;
+ int err;
+
+ erp = kzalloc(sizeof(*erp), GFP_KERNEL);
+ if (!erp)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(&erp->key, key, sizeof(*key));
+ bitmap_from_arr32(erp->mask_bitmap, (u32 *) key->mask,
+ MLXSW_SP_ACL_TCAM_MASK_LEN);
+ refcount_set(&erp->refcnt, 1);
+ erp_table->num_ctcam_erps++;
+ erp->erp_table = erp_table;
+
+ err = mlxsw_sp_acl_erp_master_mask_set(erp_table, erp);
+ if (err)
+ goto err_master_mask_set;
+
+ err = rhashtable_insert_fast(&erp_table->erp_ht, &erp->ht_node,
+ mlxsw_sp_acl_erp_ht_params);
+ if (err)
+ goto err_rhashtable_insert;
+
+ err = mlxsw_sp_acl_erp_region_ctcam_enable(erp_table);
+ if (err)
+ goto err_erp_region_ctcam_enable;
+
+ /* When C-TCAM is used, the eRP table must be used */
+ erp_table->ops = &erp_multiple_masks_ops;
+
+ return erp;
+
+err_erp_region_ctcam_enable:
+ rhashtable_remove_fast(&erp_table->erp_ht, &erp->ht_node,
+ mlxsw_sp_acl_erp_ht_params);
+err_rhashtable_insert:
+ mlxsw_sp_acl_erp_master_mask_clear(erp_table, erp);
+err_master_mask_set:
+ erp_table->num_ctcam_erps--;
+ kfree(erp);
+ return ERR_PTR(err);
+}
+
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_ctcam_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp_key *key)
+{
+ struct mlxsw_sp_acl_erp *erp;
+ int err;
+
+ /* There is a special situation where we need to spill rules
+ * into the C-TCAM, yet the region is still using a master
+ * mask and thus not performing a lookup in the C-TCAM. This
+ * can happen when two rules that only differ in priority - and
+ * thus sharing the same key - are programmed. In this case
+ * we transition the region to use an eRP table
+ */
+ err = mlxsw_sp_acl_erp_region_table_trans(erp_table);
+ if (err)
+ return ERR_PTR(err);
+
+ erp = __mlxsw_sp_acl_erp_ctcam_mask_create(erp_table, key);
+ if (IS_ERR(erp)) {
+ err = PTR_ERR(erp);
+ goto err_erp_create;
+ }
+
+ return erp;
+
+err_erp_create:
+ mlxsw_sp_acl_erp_region_master_mask_trans(erp_table);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_acl_erp_ctcam_mask_destroy(struct mlxsw_sp_acl_erp *erp)
+{
+ struct mlxsw_sp_acl_erp_table *erp_table = erp->erp_table;
+
+ mlxsw_sp_acl_erp_region_ctcam_disable(erp_table);
+ rhashtable_remove_fast(&erp_table->erp_ht, &erp->ht_node,
+ mlxsw_sp_acl_erp_ht_params);
+ mlxsw_sp_acl_erp_master_mask_clear(erp_table, erp);
+ erp_table->num_ctcam_erps--;
+ kfree(erp);
+
+ /* Once the last C-TCAM eRP was destroyed, the state we
+ * transition to depends on the number of A-TCAM eRPs currently
+ * in use
+ */
+ if (erp_table->num_ctcam_erps > 0)
+ return;
+ mlxsw_sp_acl_erp_ctcam_table_ops_set(erp_table);
+}
+
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp_key *key)
+{
+ struct mlxsw_sp_acl_erp *erp;
+ int err;
+
+ if (key->ctcam)
+ return __mlxsw_sp_acl_erp_ctcam_mask_create(erp_table, key);
+
+ /* Expand the eRP table for the new eRP, if needed */
+ err = mlxsw_sp_acl_erp_table_expand(erp_table);
+ if (err)
+ return ERR_PTR(err);
+
+ erp = mlxsw_sp_acl_erp_generic_create(erp_table, key);
+ if (IS_ERR(erp))
+ return erp;
+
+ err = mlxsw_sp_acl_erp_index_get(erp_table, &erp->index);
+ if (err)
+ goto err_erp_index_get;
+
+ err = mlxsw_sp_acl_erp_table_erp_add(erp_table, erp);
+ if (err)
+ goto err_table_erp_add;
+
+ err = mlxsw_sp_acl_erp_region_erp_add(erp_table, erp);
+ if (err)
+ goto err_region_erp_add;
+
+ erp_table->ops = &erp_multiple_masks_ops;
+
+ return erp;
+
+err_region_erp_add:
+ mlxsw_sp_acl_erp_table_erp_del(erp);
+err_table_erp_add:
+ mlxsw_sp_acl_erp_index_put(erp_table, erp->index);
+err_erp_index_get:
+ mlxsw_sp_acl_erp_generic_destroy(erp);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_acl_erp_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp *erp)
+{
+ if (erp->key.ctcam)
+ return mlxsw_sp_acl_erp_ctcam_mask_destroy(erp);
+
+ mlxsw_sp_acl_erp_region_erp_del(erp);
+ mlxsw_sp_acl_erp_table_erp_del(erp);
+ mlxsw_sp_acl_erp_index_put(erp_table, erp->index);
+ mlxsw_sp_acl_erp_generic_destroy(erp);
+
+ if (erp_table->num_atcam_erps == 2 && erp_table->num_ctcam_erps == 0)
+ erp_table->ops = &erp_two_masks_ops;
+}
+
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_second_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp_key *key)
+{
+ struct mlxsw_sp_acl_erp *erp;
+ int err;
+
+ if (key->ctcam)
+ return mlxsw_sp_acl_erp_ctcam_mask_create(erp_table, key);
+
+ /* Transition to use eRP table instead of master mask */
+ err = mlxsw_sp_acl_erp_region_table_trans(erp_table);
+ if (err)
+ return ERR_PTR(err);
+
+ erp = mlxsw_sp_acl_erp_generic_create(erp_table, key);
+ if (IS_ERR(erp)) {
+ err = PTR_ERR(erp);
+ goto err_erp_create;
+ }
+
+ err = mlxsw_sp_acl_erp_index_get(erp_table, &erp->index);
+ if (err)
+ goto err_erp_index_get;
+
+ err = mlxsw_sp_acl_erp_table_erp_add(erp_table, erp);
+ if (err)
+ goto err_table_erp_add;
+
+ err = mlxsw_sp_acl_erp_region_erp_add(erp_table, erp);
+ if (err)
+ goto err_region_erp_add;
+
+ erp_table->ops = &erp_two_masks_ops;
+
+ return erp;
+
+err_region_erp_add:
+ mlxsw_sp_acl_erp_table_erp_del(erp);
+err_table_erp_add:
+ mlxsw_sp_acl_erp_index_put(erp_table, erp->index);
+err_erp_index_get:
+ mlxsw_sp_acl_erp_generic_destroy(erp);
+err_erp_create:
+ mlxsw_sp_acl_erp_region_master_mask_trans(erp_table);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_acl_erp_second_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp *erp)
+{
+ if (erp->key.ctcam)
+ return mlxsw_sp_acl_erp_ctcam_mask_destroy(erp);
+
+ mlxsw_sp_acl_erp_region_erp_del(erp);
+ mlxsw_sp_acl_erp_table_erp_del(erp);
+ mlxsw_sp_acl_erp_index_put(erp_table, erp->index);
+ mlxsw_sp_acl_erp_generic_destroy(erp);
+ /* Transition to use master mask instead of eRP table */
+ mlxsw_sp_acl_erp_region_master_mask_trans(erp_table);
+
+ erp_table->ops = &erp_single_mask_ops;
+}
+
+static struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_first_mask_create(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp_key *key)
+{
+ struct mlxsw_sp_acl_erp *erp;
+
+ if (key->ctcam)
+ return ERR_PTR(-EINVAL);
+
+ erp = mlxsw_sp_acl_erp_generic_create(erp_table, key);
+ if (IS_ERR(erp))
+ return erp;
+
+ erp_table->ops = &erp_single_mask_ops;
+
+ return erp;
+}
+
+static void
+mlxsw_sp_acl_erp_first_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp *erp)
+{
+ mlxsw_sp_acl_erp_generic_destroy(erp);
+ erp_table->ops = &erp_no_mask_ops;
+}
+
+static void
+mlxsw_sp_acl_erp_no_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table,
+ struct mlxsw_sp_acl_erp *erp)
+{
+ WARN_ON(1);
+}
+
+struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_get(struct mlxsw_sp_acl_atcam_region *aregion,
+ const char *mask, bool ctcam)
+{
+ struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
+ struct mlxsw_sp_acl_erp_key key;
+ struct mlxsw_sp_acl_erp *erp;
+
+ /* eRPs are allocated from a shared resource, but currently all
+ * allocations are done under RTNL.
+ */
+ ASSERT_RTNL();
+
+ memcpy(key.mask, mask, MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN);
+ key.ctcam = ctcam;
+ erp = rhashtable_lookup_fast(&erp_table->erp_ht, &key,
+ mlxsw_sp_acl_erp_ht_params);
+ if (erp) {
+ refcount_inc(&erp->refcnt);
+ return erp;
+ }
+
+ return erp_table->ops->erp_create(erp_table, &key);
+}
+
+void mlxsw_sp_acl_erp_put(struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_erp *erp)
+{
+ struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
+
+ ASSERT_RTNL();
+
+ if (!refcount_dec_and_test(&erp->refcnt))
+ return;
+
+ erp_table->ops->erp_destroy(erp_table, erp);
+}
+
+static struct mlxsw_sp_acl_erp_table *
+mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+ struct mlxsw_sp_acl_erp_table *erp_table;
+ int err;
+
+ erp_table = kzalloc(sizeof(*erp_table), GFP_KERNEL);
+ if (!erp_table)
+ return ERR_PTR(-ENOMEM);
+
+ err = rhashtable_init(&erp_table->erp_ht, &mlxsw_sp_acl_erp_ht_params);
+ if (err)
+ goto err_rhashtable_init;
+
+ erp_table->erp_core = aregion->atcam->erp_core;
+ erp_table->ops = &erp_no_mask_ops;
+ INIT_LIST_HEAD(&erp_table->atcam_erps_list);
+ erp_table->aregion = aregion;
+
+ return erp_table;
+
+err_rhashtable_init:
+ kfree(erp_table);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_acl_erp_table_destroy(struct mlxsw_sp_acl_erp_table *erp_table)
+{
+ WARN_ON(!list_empty(&erp_table->atcam_erps_list));
+ rhashtable_destroy(&erp_table->erp_ht);
+ kfree(erp_table);
+}
+
+static int
+mlxsw_sp_acl_erp_master_mask_init(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+ struct mlxsw_sp *mlxsw_sp = aregion->region->mlxsw_sp;
+ char percr_pl[MLXSW_REG_PERCR_LEN];
+
+ mlxsw_reg_percr_pack(percr_pl, aregion->region->id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(percr), percr_pl);
+}
+
+static int
+mlxsw_sp_acl_erp_region_param_init(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+ struct mlxsw_sp *mlxsw_sp = aregion->region->mlxsw_sp;
+ char pererp_pl[MLXSW_REG_PERERP_LEN];
+
+ mlxsw_reg_pererp_pack(pererp_pl, aregion->region->id, false, false, 0,
+ 0, 0);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl);
+}
+
+int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+ struct mlxsw_sp_acl_erp_table *erp_table;
+ int err;
+
+ erp_table = mlxsw_sp_acl_erp_table_create(aregion);
+ if (IS_ERR(erp_table))
+ return PTR_ERR(erp_table);
+ aregion->erp_table = erp_table;
+
+ /* Initialize the region's master mask to all zeroes */
+ err = mlxsw_sp_acl_erp_master_mask_init(aregion);
+ if (err)
+ goto err_erp_master_mask_init;
+
+ /* Initialize the region to not use the eRP table */
+ err = mlxsw_sp_acl_erp_region_param_init(aregion);
+ if (err)
+ goto err_erp_region_param_init;
+
+ return 0;
+
+err_erp_region_param_init:
+err_erp_master_mask_init:
+ mlxsw_sp_acl_erp_table_destroy(erp_table);
+ return err;
+}
+
+void mlxsw_sp_acl_erp_region_fini(struct mlxsw_sp_acl_atcam_region *aregion)
+{
+ mlxsw_sp_acl_erp_table_destroy(aregion->erp_table);
+}
+
+static int
+mlxsw_sp_acl_erp_tables_sizes_query(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_erp_core *erp_core)
+{
+ unsigned int size;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_ERPT_ENTRIES_2KB) ||
+ !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_ERPT_ENTRIES_4KB) ||
+ !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_ERPT_ENTRIES_8KB) ||
+ !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_ERPT_ENTRIES_12KB))
+ return -EIO;
+
+ size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_ERPT_ENTRIES_2KB);
+ erp_core->erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_2KB] = size;
+
+ size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_ERPT_ENTRIES_4KB);
+ erp_core->erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_4KB] = size;
+
+ size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_ERPT_ENTRIES_8KB);
+ erp_core->erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_8KB] = size;
+
+ size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_ERPT_ENTRIES_12KB);
+ erp_core->erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_12KB] = size;
+
+ return 0;
+}
+
+static int mlxsw_sp_acl_erp_tables_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_erp_core *erp_core)
+{
+ unsigned int erpt_bank_size;
+ int err;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_ERPT_BANK_SIZE) ||
+ !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_ERPT_BANKS))
+ return -EIO;
+ erpt_bank_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ ACL_MAX_ERPT_BANK_SIZE);
+ erp_core->num_erp_banks = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ ACL_MAX_ERPT_BANKS);
+
+ erp_core->erp_tables = gen_pool_create(0, -1);
+ if (!erp_core->erp_tables)
+ return -ENOMEM;
+ gen_pool_set_algo(erp_core->erp_tables, gen_pool_best_fit, NULL);
+
+ err = gen_pool_add(erp_core->erp_tables,
+ MLXSW_SP_ACL_ERP_GENALLOC_OFFSET, erpt_bank_size,
+ -1);
+ if (err)
+ goto err_gen_pool_add;
+
+ /* Different regions require masks of different sizes */
+ err = mlxsw_sp_acl_erp_tables_sizes_query(mlxsw_sp, erp_core);
+ if (err)
+ goto err_erp_tables_sizes_query;
+
+ return 0;
+
+err_erp_tables_sizes_query:
+err_gen_pool_add:
+ gen_pool_destroy(erp_core->erp_tables);
+ return err;
+}
+
+static void mlxsw_sp_acl_erp_tables_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_erp_core *erp_core)
+{
+ gen_pool_destroy(erp_core->erp_tables);
+}
+
+int mlxsw_sp_acl_erps_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam *atcam)
+{
+ struct mlxsw_sp_acl_erp_core *erp_core;
+ int err;
+
+ erp_core = kzalloc(sizeof(*erp_core), GFP_KERNEL);
+ if (!erp_core)
+ return -ENOMEM;
+ erp_core->mlxsw_sp = mlxsw_sp;
+ atcam->erp_core = erp_core;
+
+ err = mlxsw_sp_acl_erp_tables_init(mlxsw_sp, erp_core);
+ if (err)
+ goto err_erp_tables_init;
+
+ return 0;
+
+err_erp_tables_init:
+ kfree(erp_core);
+ return err;
+}
+
+void mlxsw_sp_acl_erps_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam *atcam)
+{
+ mlxsw_sp_acl_erp_tables_fini(mlxsw_sp, atcam->erp_core);
+ kfree(atcam->erp_core);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
new file mode 100644
index 000000000..e47d1d286
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#include "spectrum_acl_flex_actions.h"
+#include "core_acl_flex_actions.h"
+#include "spectrum_span.h"
+
+static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
+ char *enc_actions, bool is_first, bool ca)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+ char pefa_pl[MLXSW_REG_PEFA_LEN];
+ u32 kvdl_index;
+ int err;
+
+ /* The first action set of a TCAM entry is stored directly in TCAM,
+ * not KVD linear area.
+ */
+ if (is_first)
+ return 0;
+
+ err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+ 1, &kvdl_index);
+ if (err)
+ return err;
+ mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, ca, enc_actions);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl);
+ if (err)
+ goto err_pefa_write;
+ *p_kvdl_index = kvdl_index;
+ return 0;
+
+err_pefa_write:
+ mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+ 1, kvdl_index);
+ return err;
+}
+
+static int mlxsw_sp1_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
+ char *enc_actions, bool is_first)
+{
+ return mlxsw_sp_act_kvdl_set_add(priv, p_kvdl_index, enc_actions,
+ is_first, false);
+}
+
+static int mlxsw_sp2_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
+ char *enc_actions, bool is_first)
+{
+ return mlxsw_sp_act_kvdl_set_add(priv, p_kvdl_index, enc_actions,
+ is_first, true);
+}
+
+static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index,
+ bool is_first)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ if (is_first)
+ return;
+ mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+ 1, kvdl_index);
+}
+
+static int mlxsw_sp1_act_kvdl_set_activity_get(void *priv, u32 kvdl_index,
+ bool *activity)
+{
+ return -EOPNOTSUPP;
+}
+
+static int mlxsw_sp2_act_kvdl_set_activity_get(void *priv, u32 kvdl_index,
+ bool *activity)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+ char pefa_pl[MLXSW_REG_PEFA_LEN];
+ int err;
+
+ mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, true, NULL);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl);
+ if (err)
+ return err;
+ mlxsw_reg_pefa_unpack(pefa_pl, activity);
+ return 0;
+}
+
+static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index,
+ u8 local_port)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+ char ppbs_pl[MLXSW_REG_PPBS_LEN];
+ u32 kvdl_index;
+ int err;
+
+ err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
+ 1, &kvdl_index);
+ if (err)
+ return err;
+ mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbs), ppbs_pl);
+ if (err)
+ goto err_ppbs_write;
+ *p_kvdl_index = kvdl_index;
+ return 0;
+
+err_ppbs_write:
+ mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
+ 1, kvdl_index);
+ return err;
+}
+
+static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
+ 1, kvdl_index);
+}
+
+static int
+mlxsw_sp_act_counter_index_get(void *priv, unsigned int *p_counter_index)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ return mlxsw_sp_flow_counter_alloc(mlxsw_sp, p_counter_index);
+}
+
+static void
+mlxsw_sp_act_counter_index_put(void *priv, unsigned int counter_index)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ mlxsw_sp_flow_counter_free(mlxsw_sp, counter_index);
+}
+
+static int
+mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port,
+ const struct net_device *out_dev,
+ bool ingress, int *p_span_id)
+{
+ struct mlxsw_sp_port *in_port;
+ struct mlxsw_sp *mlxsw_sp = priv;
+ enum mlxsw_sp_span_type type;
+
+ type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
+ in_port = mlxsw_sp->ports[local_in_port];
+
+ return mlxsw_sp_span_mirror_add(in_port, out_dev, type,
+ false, p_span_id);
+}
+
+static void
+mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, int span_id, bool ingress)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+ struct mlxsw_sp_port *in_port;
+ enum mlxsw_sp_span_type type;
+
+ type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
+ in_port = mlxsw_sp->ports[local_in_port];
+
+ mlxsw_sp_span_mirror_del(in_port, span_id, type, false);
+}
+
+const struct mlxsw_afa_ops mlxsw_sp1_act_afa_ops = {
+ .kvdl_set_add = mlxsw_sp1_act_kvdl_set_add,
+ .kvdl_set_del = mlxsw_sp_act_kvdl_set_del,
+ .kvdl_set_activity_get = mlxsw_sp1_act_kvdl_set_activity_get,
+ .kvdl_fwd_entry_add = mlxsw_sp_act_kvdl_fwd_entry_add,
+ .kvdl_fwd_entry_del = mlxsw_sp_act_kvdl_fwd_entry_del,
+ .counter_index_get = mlxsw_sp_act_counter_index_get,
+ .counter_index_put = mlxsw_sp_act_counter_index_put,
+ .mirror_add = mlxsw_sp_act_mirror_add,
+ .mirror_del = mlxsw_sp_act_mirror_del,
+};
+
+const struct mlxsw_afa_ops mlxsw_sp2_act_afa_ops = {
+ .kvdl_set_add = mlxsw_sp2_act_kvdl_set_add,
+ .kvdl_set_del = mlxsw_sp_act_kvdl_set_del,
+ .kvdl_set_activity_get = mlxsw_sp2_act_kvdl_set_activity_get,
+ .kvdl_fwd_entry_add = mlxsw_sp_act_kvdl_fwd_entry_add,
+ .kvdl_fwd_entry_del = mlxsw_sp_act_kvdl_fwd_entry_del,
+ .counter_index_get = mlxsw_sp_act_counter_index_get,
+ .counter_index_put = mlxsw_sp_act_counter_index_put,
+ .mirror_add = mlxsw_sp_act_mirror_add,
+ .mirror_del = mlxsw_sp_act_mirror_del,
+ .dummy_first_set = true,
+};
+
+int mlxsw_sp_afa_init(struct mlxsw_sp *mlxsw_sp)
+{
+ mlxsw_sp->afa = mlxsw_afa_create(MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ ACL_ACTIONS_PER_SET),
+ mlxsw_sp->afa_ops, mlxsw_sp);
+ return PTR_ERR_OR_ZERO(mlxsw_sp->afa);
+}
+
+void mlxsw_sp_afa_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ mlxsw_afa_destroy(mlxsw_sp->afa);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h
new file mode 100644
index 000000000..fe436d816
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_SPECTRUM_ACL_FLEX_ACTIONS_H
+#define _MLXSW_SPECTRUM_ACL_FLEX_ACTIONS_H
+
+#include "spectrum.h"
+
+int mlxsw_sp_afa_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_afa_fini(struct mlxsw_sp *mlxsw_sp);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
new file mode 100644
index 000000000..d409b09ba
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include "spectrum.h"
+#include "item.h"
+#include "core_acl_flex_keys.h"
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_dmac[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(DMAC_32_47, 0x00, 2),
+ MLXSW_AFK_ELEMENT_INST_BUF(DMAC_0_31, 0x02, 4),
+ MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
+ MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
+ MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(SMAC_32_47, 0x00, 2),
+ MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x02, 4),
+ MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3),
+ MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12),
+ MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(SMAC_32_47, 0x02, 2),
+ MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x04, 4),
+ MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x0C, 0, 16),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_sip[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x00, 4),
+ MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
+ MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x00, 4),
+ MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
+ MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x00, 4),
+ MLXSW_AFK_ELEMENT_INST_U32(IP_ECN, 0x04, 4, 2),
+ MLXSW_AFK_ELEMENT_INST_U32(IP_TTL_, 0x04, 24, 8),
+ MLXSW_AFK_ELEMENT_INST_U32(IP_DSCP, 0x08, 0, 6),
+ MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x08, 8, 9), /* TCP_CONTROL+TCP_ECN */
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_ex[] = {
+ MLXSW_AFK_ELEMENT_INST_U32(VID, 0x00, 0, 12),
+ MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 29, 3),
+ MLXSW_AFK_ELEMENT_INST_U32(SRC_L4_PORT, 0x08, 0, 16),
+ MLXSW_AFK_ELEMENT_INST_U32(DST_L4_PORT, 0x0C, 0, 16),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_dip[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_32_63, 0x00, 4),
+ MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_ex1[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_96_127, 0x00, 4),
+ MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_64_95, 0x04, 4),
+ MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_sip[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_32_63, 0x00, 4),
+ MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_sip_ex[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_96_127, 0x00, 4),
+ MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_64_95, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_packet_type[] = {
+ MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x00, 0, 16),
+};
+
+static const struct mlxsw_afk_block mlxsw_sp1_afk_blocks[] = {
+ MLXSW_AFK_BLOCK(0x10, mlxsw_sp_afk_element_info_l2_dmac),
+ MLXSW_AFK_BLOCK(0x11, mlxsw_sp_afk_element_info_l2_smac),
+ MLXSW_AFK_BLOCK(0x12, mlxsw_sp_afk_element_info_l2_smac_ex),
+ MLXSW_AFK_BLOCK(0x30, mlxsw_sp_afk_element_info_ipv4_sip),
+ MLXSW_AFK_BLOCK(0x31, mlxsw_sp_afk_element_info_ipv4_dip),
+ MLXSW_AFK_BLOCK(0x32, mlxsw_sp_afk_element_info_ipv4),
+ MLXSW_AFK_BLOCK(0x33, mlxsw_sp_afk_element_info_ipv4_ex),
+ MLXSW_AFK_BLOCK(0x60, mlxsw_sp_afk_element_info_ipv6_dip),
+ MLXSW_AFK_BLOCK(0x65, mlxsw_sp_afk_element_info_ipv6_ex1),
+ MLXSW_AFK_BLOCK(0x62, mlxsw_sp_afk_element_info_ipv6_sip),
+ MLXSW_AFK_BLOCK(0x63, mlxsw_sp_afk_element_info_ipv6_sip_ex),
+ MLXSW_AFK_BLOCK(0xB0, mlxsw_sp_afk_element_info_packet_type),
+};
+
+#define MLXSW_SP1_AFK_KEY_BLOCK_SIZE 16
+
+static void mlxsw_sp1_afk_encode_block(char *block, int block_index,
+ char *output)
+{
+ unsigned int offset = block_index * MLXSW_SP1_AFK_KEY_BLOCK_SIZE;
+ char *output_indexed = output + offset;
+
+ memcpy(output_indexed, block, MLXSW_SP1_AFK_KEY_BLOCK_SIZE);
+}
+
+const struct mlxsw_afk_ops mlxsw_sp1_afk_ops = {
+ .blocks = mlxsw_sp1_afk_blocks,
+ .blocks_count = ARRAY_SIZE(mlxsw_sp1_afk_blocks),
+ .encode_block = mlxsw_sp1_afk_encode_block,
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_0[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(DMAC_0_31, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_1[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_2[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(SMAC_32_47, 0x04, 2),
+ MLXSW_AFK_ELEMENT_INST_BUF(DMAC_32_47, 0x06, 2),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_3[] = {
+ MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x00, 0, 3),
+ MLXSW_AFK_ELEMENT_INST_U32(VID, 0x04, 16, 12),
+ MLXSW_AFK_ELEMENT_INST_BUF(DMAC_32_47, 0x06, 2),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_4[] = {
+ MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x00, 0, 3),
+ MLXSW_AFK_ELEMENT_INST_U32(VID, 0x04, 16, 12),
+ MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x04, 0, 16),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5[] = {
+ MLXSW_AFK_ELEMENT_INST_U32(VID, 0x04, 16, 12),
+ MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x04, 0, 8), /* RX_ACL_SYSTEM_PORT */
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_0[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_1[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_2[] = {
+ MLXSW_AFK_ELEMENT_INST_U32(IP_DSCP, 0x04, 0, 6),
+ MLXSW_AFK_ELEMENT_INST_U32(IP_ECN, 0x04, 6, 2),
+ MLXSW_AFK_ELEMENT_INST_U32(IP_TTL_, 0x04, 8, 8),
+ MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x04, 16, 8),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_0[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_32_63, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_1[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_64_95, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_2[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_96_127, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_3[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_32_63, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_4[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_64_95, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_5[] = {
+ MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_96_127, 0x04, 4),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l4_0[] = {
+ MLXSW_AFK_ELEMENT_INST_U32(SRC_L4_PORT, 0x04, 16, 16),
+ MLXSW_AFK_ELEMENT_INST_U32(DST_L4_PORT, 0x04, 0, 16),
+};
+
+static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l4_2[] = {
+ MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x04, 16, 9), /* TCP_CONTROL + TCP_ECN */
+};
+
+static const struct mlxsw_afk_block mlxsw_sp2_afk_blocks[] = {
+ MLXSW_AFK_BLOCK(0x10, mlxsw_sp_afk_element_info_mac_0),
+ MLXSW_AFK_BLOCK(0x11, mlxsw_sp_afk_element_info_mac_1),
+ MLXSW_AFK_BLOCK(0x12, mlxsw_sp_afk_element_info_mac_2),
+ MLXSW_AFK_BLOCK(0x13, mlxsw_sp_afk_element_info_mac_3),
+ MLXSW_AFK_BLOCK(0x14, mlxsw_sp_afk_element_info_mac_4),
+ MLXSW_AFK_BLOCK(0x15, mlxsw_sp_afk_element_info_mac_5),
+ MLXSW_AFK_BLOCK(0x38, mlxsw_sp_afk_element_info_ipv4_0),
+ MLXSW_AFK_BLOCK(0x39, mlxsw_sp_afk_element_info_ipv4_1),
+ MLXSW_AFK_BLOCK(0x3A, mlxsw_sp_afk_element_info_ipv4_2),
+ MLXSW_AFK_BLOCK(0x40, mlxsw_sp_afk_element_info_ipv6_0),
+ MLXSW_AFK_BLOCK(0x41, mlxsw_sp_afk_element_info_ipv6_1),
+ MLXSW_AFK_BLOCK(0x42, mlxsw_sp_afk_element_info_ipv6_2),
+ MLXSW_AFK_BLOCK(0x43, mlxsw_sp_afk_element_info_ipv6_3),
+ MLXSW_AFK_BLOCK(0x44, mlxsw_sp_afk_element_info_ipv6_4),
+ MLXSW_AFK_BLOCK(0x45, mlxsw_sp_afk_element_info_ipv6_5),
+ MLXSW_AFK_BLOCK(0x90, mlxsw_sp_afk_element_info_l4_0),
+ MLXSW_AFK_BLOCK(0x92, mlxsw_sp_afk_element_info_l4_2),
+};
+
+#define MLXSW_SP2_AFK_BITS_PER_BLOCK 36
+
+/* A block in Spectrum-2 is of the following form:
+ *
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * | | | | | | | | | | | | | | | | | | | | | | | | | | | | |35|34|33|32|
+ * +-----------------------------------------------------------------------------------------------+
+ * |31|30|29|28|27|26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10| 9| 8| 7| 6| 5| 4| 3| 2| 1| 0|
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ */
+MLXSW_ITEM64(sp2_afk, block, value, 0x00, 0, MLXSW_SP2_AFK_BITS_PER_BLOCK);
+
+/* The key / mask block layout in Spectrum-2 is of the following form:
+ *
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * | | | | | | | | | | | | | | | | | block11_high |
+ * +-----------------------------------------------------------------------------------------------+
+ * | block11_low | block10_high |
+ * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ * ...
+ */
+
+struct mlxsw_sp2_afk_block_layout {
+ unsigned short offset;
+ struct mlxsw_item item;
+};
+
+#define MLXSW_SP2_AFK_BLOCK_LAYOUT(_block, _offset, _shift) \
+ { \
+ .offset = _offset, \
+ { \
+ .shift = _shift, \
+ .size = {.bits = MLXSW_SP2_AFK_BITS_PER_BLOCK}, \
+ .name = #_block, \
+ } \
+ } \
+
+static const struct mlxsw_sp2_afk_block_layout mlxsw_sp2_afk_blocks_layout[] = {
+ MLXSW_SP2_AFK_BLOCK_LAYOUT(block0, 0x30, 0),
+ MLXSW_SP2_AFK_BLOCK_LAYOUT(block1, 0x2C, 4),
+ MLXSW_SP2_AFK_BLOCK_LAYOUT(block2, 0x28, 8),
+ MLXSW_SP2_AFK_BLOCK_LAYOUT(block3, 0x24, 12),
+ MLXSW_SP2_AFK_BLOCK_LAYOUT(block4, 0x20, 16),
+ MLXSW_SP2_AFK_BLOCK_LAYOUT(block5, 0x1C, 20),
+ MLXSW_SP2_AFK_BLOCK_LAYOUT(block6, 0x18, 24),
+ MLXSW_SP2_AFK_BLOCK_LAYOUT(block7, 0x14, 28),
+ MLXSW_SP2_AFK_BLOCK_LAYOUT(block8, 0x0C, 0),
+ MLXSW_SP2_AFK_BLOCK_LAYOUT(block9, 0x08, 4),
+ MLXSW_SP2_AFK_BLOCK_LAYOUT(block10, 0x04, 8),
+ MLXSW_SP2_AFK_BLOCK_LAYOUT(block11, 0x00, 12),
+};
+
+static void mlxsw_sp2_afk_encode_block(char *block, int block_index,
+ char *output)
+{
+ u64 block_value = mlxsw_sp2_afk_block_value_get(block);
+ const struct mlxsw_sp2_afk_block_layout *block_layout;
+
+ if (WARN_ON(block_index < 0 ||
+ block_index >= ARRAY_SIZE(mlxsw_sp2_afk_blocks_layout)))
+ return;
+
+ block_layout = &mlxsw_sp2_afk_blocks_layout[block_index];
+ __mlxsw_item_set64(output + block_layout->offset,
+ &block_layout->item, 0, block_value);
+}
+
+const struct mlxsw_afk_ops mlxsw_sp2_afk_ops = {
+ .blocks = mlxsw_sp2_afk_blocks,
+ .blocks_count = ARRAY_SIZE(mlxsw_sp2_afk_blocks),
+ .encode_block = mlxsw_sp2_afk_encode_block,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
new file mode 100644
index 000000000..30931a2c0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -0,0 +1,973 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/bitops.h>
+#include <linux/list.h>
+#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
+
+#include "reg.h"
+#include "core.h"
+#include "resources.h"
+#include "spectrum.h"
+#include "spectrum_acl_tcam.h"
+#include "core_acl_flex_keys.h"
+
+size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp)
+{
+ const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+
+ return ops->priv_size;
+}
+
+int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam *tcam)
+{
+ const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+ u64 max_tcam_regions;
+ u64 max_regions;
+ u64 max_groups;
+ size_t alloc_size;
+ int err;
+
+ max_tcam_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ ACL_MAX_TCAM_REGIONS);
+ max_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_REGIONS);
+
+ /* Use 1:1 mapping between ACL region and TCAM region */
+ if (max_tcam_regions < max_regions)
+ max_regions = max_tcam_regions;
+
+ alloc_size = sizeof(tcam->used_regions[0]) * BITS_TO_LONGS(max_regions);
+ tcam->used_regions = kzalloc(alloc_size, GFP_KERNEL);
+ if (!tcam->used_regions)
+ return -ENOMEM;
+ tcam->max_regions = max_regions;
+
+ max_groups = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUPS);
+ alloc_size = sizeof(tcam->used_groups[0]) * BITS_TO_LONGS(max_groups);
+ tcam->used_groups = kzalloc(alloc_size, GFP_KERNEL);
+ if (!tcam->used_groups) {
+ err = -ENOMEM;
+ goto err_alloc_used_groups;
+ }
+ tcam->max_groups = max_groups;
+ tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ ACL_MAX_GROUP_SIZE);
+
+ err = ops->init(mlxsw_sp, tcam->priv, tcam);
+ if (err)
+ goto err_tcam_init;
+
+ return 0;
+
+err_tcam_init:
+ kfree(tcam->used_groups);
+err_alloc_used_groups:
+ kfree(tcam->used_regions);
+ return err;
+}
+
+void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam *tcam)
+{
+ const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+
+ ops->fini(mlxsw_sp, tcam->priv);
+ kfree(tcam->used_groups);
+ kfree(tcam->used_regions);
+}
+
+int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ u32 *priority, bool fillup_priority)
+{
+ u64 max_priority;
+
+ if (!fillup_priority) {
+ *priority = 0;
+ return 0;
+ }
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, KVD_SIZE))
+ return -EIO;
+
+ /* Priority range is 1..cap_kvd_size-1. */
+ max_priority = MLXSW_CORE_RES_GET(mlxsw_sp->core, KVD_SIZE) - 1;
+ if (rulei->priority >= max_priority)
+ return -EINVAL;
+
+ /* Unlike in TC, in HW, higher number means higher priority. */
+ *priority = max_priority - rulei->priority;
+ return 0;
+}
+
+static int mlxsw_sp_acl_tcam_region_id_get(struct mlxsw_sp_acl_tcam *tcam,
+ u16 *p_id)
+{
+ u16 id;
+
+ id = find_first_zero_bit(tcam->used_regions, tcam->max_regions);
+ if (id < tcam->max_regions) {
+ __set_bit(id, tcam->used_regions);
+ *p_id = id;
+ return 0;
+ }
+ return -ENOBUFS;
+}
+
+static void mlxsw_sp_acl_tcam_region_id_put(struct mlxsw_sp_acl_tcam *tcam,
+ u16 id)
+{
+ __clear_bit(id, tcam->used_regions);
+}
+
+static int mlxsw_sp_acl_tcam_group_id_get(struct mlxsw_sp_acl_tcam *tcam,
+ u16 *p_id)
+{
+ u16 id;
+
+ id = find_first_zero_bit(tcam->used_groups, tcam->max_groups);
+ if (id < tcam->max_groups) {
+ __set_bit(id, tcam->used_groups);
+ *p_id = id;
+ return 0;
+ }
+ return -ENOBUFS;
+}
+
+static void mlxsw_sp_acl_tcam_group_id_put(struct mlxsw_sp_acl_tcam *tcam,
+ u16 id)
+{
+ __clear_bit(id, tcam->used_groups);
+}
+
+struct mlxsw_sp_acl_tcam_pattern {
+ const enum mlxsw_afk_element *elements;
+ unsigned int elements_count;
+};
+
+struct mlxsw_sp_acl_tcam_group {
+ struct mlxsw_sp_acl_tcam *tcam;
+ u16 id;
+ struct list_head region_list;
+ unsigned int region_count;
+ struct rhashtable chunk_ht;
+ struct mlxsw_sp_acl_tcam_group_ops *ops;
+ const struct mlxsw_sp_acl_tcam_pattern *patterns;
+ unsigned int patterns_count;
+ bool tmplt_elusage_set;
+ struct mlxsw_afk_element_usage tmplt_elusage;
+};
+
+struct mlxsw_sp_acl_tcam_chunk {
+ struct list_head list; /* Member of a TCAM region */
+ struct rhash_head ht_node; /* Member of a chunk HT */
+ unsigned int priority; /* Priority within the region and group */
+ struct mlxsw_sp_acl_tcam_group *group;
+ struct mlxsw_sp_acl_tcam_region *region;
+ unsigned int ref_count;
+ unsigned long priv[0];
+ /* priv has to be always the last item */
+};
+
+struct mlxsw_sp_acl_tcam_entry {
+ struct mlxsw_sp_acl_tcam_chunk *chunk;
+ unsigned long priv[0];
+ /* priv has to be always the last item */
+};
+
+static const struct rhashtable_params mlxsw_sp_acl_tcam_chunk_ht_params = {
+ .key_len = sizeof(unsigned int),
+ .key_offset = offsetof(struct mlxsw_sp_acl_tcam_chunk, priority),
+ .head_offset = offsetof(struct mlxsw_sp_acl_tcam_chunk, ht_node),
+ .automatic_shrinking = true,
+};
+
+static int mlxsw_sp_acl_tcam_group_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_group *group)
+{
+ struct mlxsw_sp_acl_tcam_region *region;
+ char pagt_pl[MLXSW_REG_PAGT_LEN];
+ int acl_index = 0;
+
+ mlxsw_reg_pagt_pack(pagt_pl, group->id);
+ list_for_each_entry(region, &group->region_list, list)
+ mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++, region->id);
+ mlxsw_reg_pagt_size_set(pagt_pl, acl_index);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pagt), pagt_pl);
+}
+
+static int
+mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam *tcam,
+ struct mlxsw_sp_acl_tcam_group *group,
+ const struct mlxsw_sp_acl_tcam_pattern *patterns,
+ unsigned int patterns_count,
+ struct mlxsw_afk_element_usage *tmplt_elusage)
+{
+ int err;
+
+ group->tcam = tcam;
+ group->patterns = patterns;
+ group->patterns_count = patterns_count;
+ if (tmplt_elusage) {
+ group->tmplt_elusage_set = true;
+ memcpy(&group->tmplt_elusage, tmplt_elusage,
+ sizeof(group->tmplt_elusage));
+ }
+ INIT_LIST_HEAD(&group->region_list);
+ err = mlxsw_sp_acl_tcam_group_id_get(tcam, &group->id);
+ if (err)
+ return err;
+
+ err = rhashtable_init(&group->chunk_ht,
+ &mlxsw_sp_acl_tcam_chunk_ht_params);
+ if (err)
+ goto err_rhashtable_init;
+
+ return 0;
+
+err_rhashtable_init:
+ mlxsw_sp_acl_tcam_group_id_put(tcam, group->id);
+ return err;
+}
+
+static void mlxsw_sp_acl_tcam_group_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_group *group)
+{
+ struct mlxsw_sp_acl_tcam *tcam = group->tcam;
+
+ rhashtable_destroy(&group->chunk_ht);
+ mlxsw_sp_acl_tcam_group_id_put(tcam, group->id);
+ WARN_ON(!list_empty(&group->region_list));
+}
+
+static int
+mlxsw_sp_acl_tcam_group_bind(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_group *group,
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ bool ingress)
+{
+ char ppbt_pl[MLXSW_REG_PPBT_LEN];
+
+ mlxsw_reg_ppbt_pack(ppbt_pl, ingress ? MLXSW_REG_PXBT_E_IACL :
+ MLXSW_REG_PXBT_E_EACL,
+ MLXSW_REG_PXBT_OP_BIND, mlxsw_sp_port->local_port,
+ group->id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbt), ppbt_pl);
+}
+
+static void
+mlxsw_sp_acl_tcam_group_unbind(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_group *group,
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ bool ingress)
+{
+ char ppbt_pl[MLXSW_REG_PPBT_LEN];
+
+ mlxsw_reg_ppbt_pack(ppbt_pl, ingress ? MLXSW_REG_PXBT_E_IACL :
+ MLXSW_REG_PXBT_E_EACL,
+ MLXSW_REG_PXBT_OP_UNBIND, mlxsw_sp_port->local_port,
+ group->id);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbt), ppbt_pl);
+}
+
+static u16
+mlxsw_sp_acl_tcam_group_id(struct mlxsw_sp_acl_tcam_group *group)
+{
+ return group->id;
+}
+
+static unsigned int
+mlxsw_sp_acl_tcam_region_prio(struct mlxsw_sp_acl_tcam_region *region)
+{
+ struct mlxsw_sp_acl_tcam_chunk *chunk;
+
+ if (list_empty(&region->chunk_list))
+ return 0;
+ /* As a priority of a region, return priority of the first chunk */
+ chunk = list_first_entry(&region->chunk_list, typeof(*chunk), list);
+ return chunk->priority;
+}
+
+static unsigned int
+mlxsw_sp_acl_tcam_region_max_prio(struct mlxsw_sp_acl_tcam_region *region)
+{
+ struct mlxsw_sp_acl_tcam_chunk *chunk;
+
+ if (list_empty(&region->chunk_list))
+ return 0;
+ chunk = list_last_entry(&region->chunk_list, typeof(*chunk), list);
+ return chunk->priority;
+}
+
+static void
+mlxsw_sp_acl_tcam_group_list_add(struct mlxsw_sp_acl_tcam_group *group,
+ struct mlxsw_sp_acl_tcam_region *region)
+{
+ struct mlxsw_sp_acl_tcam_region *region2;
+ struct list_head *pos;
+
+ /* Position the region inside the list according to priority */
+ list_for_each(pos, &group->region_list) {
+ region2 = list_entry(pos, typeof(*region2), list);
+ if (mlxsw_sp_acl_tcam_region_prio(region2) >
+ mlxsw_sp_acl_tcam_region_prio(region))
+ break;
+ }
+ list_add_tail(&region->list, pos);
+ group->region_count++;
+}
+
+static void
+mlxsw_sp_acl_tcam_group_list_del(struct mlxsw_sp_acl_tcam_group *group,
+ struct mlxsw_sp_acl_tcam_region *region)
+{
+ group->region_count--;
+ list_del(&region->list);
+}
+
+static int
+mlxsw_sp_acl_tcam_group_region_attach(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_group *group,
+ struct mlxsw_sp_acl_tcam_region *region)
+{
+ int err;
+
+ if (group->region_count == group->tcam->max_group_size)
+ return -ENOBUFS;
+
+ mlxsw_sp_acl_tcam_group_list_add(group, region);
+
+ err = mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group);
+ if (err)
+ goto err_group_update;
+ region->group = group;
+
+ return 0;
+
+err_group_update:
+ mlxsw_sp_acl_tcam_group_list_del(group, region);
+ mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group);
+ return err;
+}
+
+static void
+mlxsw_sp_acl_tcam_group_region_detach(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_region *region)
+{
+ struct mlxsw_sp_acl_tcam_group *group = region->group;
+
+ mlxsw_sp_acl_tcam_group_list_del(group, region);
+ mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group);
+}
+
+static struct mlxsw_sp_acl_tcam_region *
+mlxsw_sp_acl_tcam_group_region_find(struct mlxsw_sp_acl_tcam_group *group,
+ unsigned int priority,
+ struct mlxsw_afk_element_usage *elusage,
+ bool *p_need_split)
+{
+ struct mlxsw_sp_acl_tcam_region *region, *region2;
+ struct list_head *pos;
+ bool issubset;
+
+ list_for_each(pos, &group->region_list) {
+ region = list_entry(pos, typeof(*region), list);
+
+ /* First, check if the requested priority does not rather belong
+ * under some of the next regions.
+ */
+ if (pos->next != &group->region_list) { /* not last */
+ region2 = list_entry(pos->next, typeof(*region2), list);
+ if (priority >= mlxsw_sp_acl_tcam_region_prio(region2))
+ continue;
+ }
+
+ issubset = mlxsw_afk_key_info_subset(region->key_info, elusage);
+
+ /* If requested element usage would not fit and the priority
+ * is lower than the currently inspected region we cannot
+ * use this region, so return NULL to indicate new region has
+ * to be created.
+ */
+ if (!issubset &&
+ priority < mlxsw_sp_acl_tcam_region_prio(region))
+ return NULL;
+
+ /* If requested element usage would not fit and the priority
+ * is higher than the currently inspected region we cannot
+ * use this region. There is still some hope that the next
+ * region would be the fit. So let it be processed and
+ * eventually break at the check right above this.
+ */
+ if (!issubset &&
+ priority > mlxsw_sp_acl_tcam_region_max_prio(region))
+ continue;
+
+ /* Indicate if the region needs to be split in order to add
+ * the requested priority. Split is needed when requested
+ * element usage won't fit into the found region.
+ */
+ *p_need_split = !issubset;
+ return region;
+ }
+ return NULL; /* New region has to be created. */
+}
+
+static void
+mlxsw_sp_acl_tcam_group_use_patterns(struct mlxsw_sp_acl_tcam_group *group,
+ struct mlxsw_afk_element_usage *elusage,
+ struct mlxsw_afk_element_usage *out)
+{
+ const struct mlxsw_sp_acl_tcam_pattern *pattern;
+ int i;
+
+ /* In case the template is set, we don't have to look up the pattern
+ * and just use the template.
+ */
+ if (group->tmplt_elusage_set) {
+ memcpy(out, &group->tmplt_elusage, sizeof(*out));
+ WARN_ON(!mlxsw_afk_element_usage_subset(elusage, out));
+ return;
+ }
+
+ for (i = 0; i < group->patterns_count; i++) {
+ pattern = &group->patterns[i];
+ mlxsw_afk_element_usage_fill(out, pattern->elements,
+ pattern->elements_count);
+ if (mlxsw_afk_element_usage_subset(elusage, out))
+ return;
+ }
+ memcpy(out, elusage, sizeof(*out));
+}
+
+static int
+mlxsw_sp_acl_tcam_region_alloc(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_region *region)
+{
+ struct mlxsw_afk_key_info *key_info = region->key_info;
+ char ptar_pl[MLXSW_REG_PTAR_LEN];
+ unsigned int encodings_count;
+ int i;
+ int err;
+
+ mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_ALLOC,
+ region->key_type,
+ MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT,
+ region->id, region->tcam_region_info);
+ encodings_count = mlxsw_afk_key_info_blocks_count_get(key_info);
+ for (i = 0; i < encodings_count; i++) {
+ u16 encoding;
+
+ encoding = mlxsw_afk_key_info_block_encoding_get(key_info, i);
+ mlxsw_reg_ptar_key_id_pack(ptar_pl, i, encoding);
+ }
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl);
+ if (err)
+ return err;
+ mlxsw_reg_ptar_unpack(ptar_pl, region->tcam_region_info);
+ return 0;
+}
+
+static void
+mlxsw_sp_acl_tcam_region_free(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_region *region)
+{
+ char ptar_pl[MLXSW_REG_PTAR_LEN];
+
+ mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_FREE,
+ region->key_type, 0, region->id,
+ region->tcam_region_info);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl);
+}
+
+static int
+mlxsw_sp_acl_tcam_region_enable(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_region *region)
+{
+ char pacl_pl[MLXSW_REG_PACL_LEN];
+
+ mlxsw_reg_pacl_pack(pacl_pl, region->id, true,
+ region->tcam_region_info);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pacl), pacl_pl);
+}
+
+static void
+mlxsw_sp_acl_tcam_region_disable(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_region *region)
+{
+ char pacl_pl[MLXSW_REG_PACL_LEN];
+
+ mlxsw_reg_pacl_pack(pacl_pl, region->id, false,
+ region->tcam_region_info);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pacl), pacl_pl);
+}
+
+static struct mlxsw_sp_acl_tcam_region *
+mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam *tcam,
+ struct mlxsw_afk_element_usage *elusage)
+{
+ const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+ struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
+ struct mlxsw_sp_acl_tcam_region *region;
+ int err;
+
+ region = kzalloc(sizeof(*region) + ops->region_priv_size, GFP_KERNEL);
+ if (!region)
+ return ERR_PTR(-ENOMEM);
+ INIT_LIST_HEAD(&region->chunk_list);
+ region->mlxsw_sp = mlxsw_sp;
+
+ region->key_info = mlxsw_afk_key_info_get(afk, elusage);
+ if (IS_ERR(region->key_info)) {
+ err = PTR_ERR(region->key_info);
+ goto err_key_info_get;
+ }
+
+ err = mlxsw_sp_acl_tcam_region_id_get(tcam, &region->id);
+ if (err)
+ goto err_region_id_get;
+
+ err = ops->region_associate(mlxsw_sp, region);
+ if (err)
+ goto err_tcam_region_associate;
+
+ region->key_type = ops->key_type;
+ err = mlxsw_sp_acl_tcam_region_alloc(mlxsw_sp, region);
+ if (err)
+ goto err_tcam_region_alloc;
+
+ err = mlxsw_sp_acl_tcam_region_enable(mlxsw_sp, region);
+ if (err)
+ goto err_tcam_region_enable;
+
+ err = ops->region_init(mlxsw_sp, region->priv, tcam->priv, region);
+ if (err)
+ goto err_tcam_region_init;
+
+ return region;
+
+err_tcam_region_init:
+ mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region);
+err_tcam_region_enable:
+ mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region);
+err_tcam_region_alloc:
+err_tcam_region_associate:
+ mlxsw_sp_acl_tcam_region_id_put(tcam, region->id);
+err_region_id_get:
+ mlxsw_afk_key_info_put(region->key_info);
+err_key_info_get:
+ kfree(region);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_acl_tcam_region_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_region *region)
+{
+ const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+
+ ops->region_fini(mlxsw_sp, region->priv);
+ mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region);
+ mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region);
+ mlxsw_sp_acl_tcam_region_id_put(region->group->tcam, region->id);
+ mlxsw_afk_key_info_put(region->key_info);
+ kfree(region);
+}
+
+static int
+mlxsw_sp_acl_tcam_chunk_assoc(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_group *group,
+ unsigned int priority,
+ struct mlxsw_afk_element_usage *elusage,
+ struct mlxsw_sp_acl_tcam_chunk *chunk)
+{
+ struct mlxsw_sp_acl_tcam_region *region;
+ bool region_created = false;
+ bool need_split;
+ int err;
+
+ region = mlxsw_sp_acl_tcam_group_region_find(group, priority, elusage,
+ &need_split);
+ if (region && need_split) {
+ /* According to priority, the chunk should belong to an
+ * existing region. However, this chunk needs elements
+ * that region does not contain. We need to split the existing
+ * region into two and create a new region for this chunk
+ * in between. This is not supported now.
+ */
+ return -EOPNOTSUPP;
+ }
+ if (!region) {
+ struct mlxsw_afk_element_usage region_elusage;
+
+ mlxsw_sp_acl_tcam_group_use_patterns(group, elusage,
+ &region_elusage);
+ region = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, group->tcam,
+ &region_elusage);
+ if (IS_ERR(region))
+ return PTR_ERR(region);
+ region_created = true;
+ }
+
+ chunk->region = region;
+ list_add_tail(&chunk->list, &region->chunk_list);
+
+ if (!region_created)
+ return 0;
+
+ err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, group, region);
+ if (err)
+ goto err_group_region_attach;
+
+ return 0;
+
+err_group_region_attach:
+ mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region);
+ return err;
+}
+
+static void
+mlxsw_sp_acl_tcam_chunk_deassoc(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_chunk *chunk)
+{
+ struct mlxsw_sp_acl_tcam_region *region = chunk->region;
+
+ list_del(&chunk->list);
+ if (list_empty(&region->chunk_list)) {
+ mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, region);
+ mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region);
+ }
+}
+
+static struct mlxsw_sp_acl_tcam_chunk *
+mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_group *group,
+ unsigned int priority,
+ struct mlxsw_afk_element_usage *elusage)
+{
+ const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+ struct mlxsw_sp_acl_tcam_chunk *chunk;
+ int err;
+
+ if (priority == MLXSW_SP_ACL_TCAM_CATCHALL_PRIO)
+ return ERR_PTR(-EINVAL);
+
+ chunk = kzalloc(sizeof(*chunk) + ops->chunk_priv_size, GFP_KERNEL);
+ if (!chunk)
+ return ERR_PTR(-ENOMEM);
+ chunk->priority = priority;
+ chunk->group = group;
+ chunk->ref_count = 1;
+
+ err = mlxsw_sp_acl_tcam_chunk_assoc(mlxsw_sp, group, priority,
+ elusage, chunk);
+ if (err)
+ goto err_chunk_assoc;
+
+ ops->chunk_init(chunk->region->priv, chunk->priv, priority);
+
+ err = rhashtable_insert_fast(&group->chunk_ht, &chunk->ht_node,
+ mlxsw_sp_acl_tcam_chunk_ht_params);
+ if (err)
+ goto err_rhashtable_insert;
+
+ return chunk;
+
+err_rhashtable_insert:
+ ops->chunk_fini(chunk->priv);
+ mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk);
+err_chunk_assoc:
+ kfree(chunk);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_acl_tcam_chunk_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_chunk *chunk)
+{
+ const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+ struct mlxsw_sp_acl_tcam_group *group = chunk->group;
+
+ rhashtable_remove_fast(&group->chunk_ht, &chunk->ht_node,
+ mlxsw_sp_acl_tcam_chunk_ht_params);
+ ops->chunk_fini(chunk->priv);
+ mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk);
+ kfree(chunk);
+}
+
+static struct mlxsw_sp_acl_tcam_chunk *
+mlxsw_sp_acl_tcam_chunk_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_group *group,
+ unsigned int priority,
+ struct mlxsw_afk_element_usage *elusage)
+{
+ struct mlxsw_sp_acl_tcam_chunk *chunk;
+
+ chunk = rhashtable_lookup_fast(&group->chunk_ht, &priority,
+ mlxsw_sp_acl_tcam_chunk_ht_params);
+ if (chunk) {
+ if (WARN_ON(!mlxsw_afk_key_info_subset(chunk->region->key_info,
+ elusage)))
+ return ERR_PTR(-EINVAL);
+ chunk->ref_count++;
+ return chunk;
+ }
+ return mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, group,
+ priority, elusage);
+}
+
+static void mlxsw_sp_acl_tcam_chunk_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_chunk *chunk)
+{
+ if (--chunk->ref_count)
+ return;
+ mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, chunk);
+}
+
+static size_t mlxsw_sp_acl_tcam_entry_priv_size(struct mlxsw_sp *mlxsw_sp)
+{
+ const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+
+ return ops->entry_priv_size;
+}
+
+static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_group *group,
+ struct mlxsw_sp_acl_tcam_entry *entry,
+ struct mlxsw_sp_acl_rule_info *rulei)
+{
+ const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+ struct mlxsw_sp_acl_tcam_chunk *chunk;
+ struct mlxsw_sp_acl_tcam_region *region;
+ int err;
+
+ chunk = mlxsw_sp_acl_tcam_chunk_get(mlxsw_sp, group, rulei->priority,
+ &rulei->values.elusage);
+ if (IS_ERR(chunk))
+ return PTR_ERR(chunk);
+
+ region = chunk->region;
+
+ err = ops->entry_add(mlxsw_sp, region->priv, chunk->priv,
+ entry->priv, rulei);
+ if (err)
+ goto err_entry_add;
+ entry->chunk = chunk;
+
+ return 0;
+
+err_entry_add:
+ mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk);
+ return err;
+}
+
+static void mlxsw_sp_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_entry *entry)
+{
+ const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+ struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk;
+ struct mlxsw_sp_acl_tcam_region *region = chunk->region;
+
+ ops->entry_del(mlxsw_sp, region->priv, chunk->priv, entry->priv);
+ mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk);
+}
+
+static int
+mlxsw_sp_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam_entry *entry,
+ bool *activity)
+{
+ const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+ struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk;
+ struct mlxsw_sp_acl_tcam_region *region = chunk->region;
+
+ return ops->entry_activity_get(mlxsw_sp, region->priv,
+ entry->priv, activity);
+}
+
+static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = {
+ MLXSW_AFK_ELEMENT_SRC_SYS_PORT,
+ MLXSW_AFK_ELEMENT_DMAC_32_47,
+ MLXSW_AFK_ELEMENT_DMAC_0_31,
+ MLXSW_AFK_ELEMENT_SMAC_32_47,
+ MLXSW_AFK_ELEMENT_SMAC_0_31,
+ MLXSW_AFK_ELEMENT_ETHERTYPE,
+ MLXSW_AFK_ELEMENT_IP_PROTO,
+ MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+ MLXSW_AFK_ELEMENT_DST_IP_0_31,
+ MLXSW_AFK_ELEMENT_DST_L4_PORT,
+ MLXSW_AFK_ELEMENT_SRC_L4_PORT,
+ MLXSW_AFK_ELEMENT_VID,
+ MLXSW_AFK_ELEMENT_PCP,
+ MLXSW_AFK_ELEMENT_TCP_FLAGS,
+ MLXSW_AFK_ELEMENT_IP_TTL_,
+ MLXSW_AFK_ELEMENT_IP_ECN,
+ MLXSW_AFK_ELEMENT_IP_DSCP,
+};
+
+static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv6[] = {
+ MLXSW_AFK_ELEMENT_ETHERTYPE,
+ MLXSW_AFK_ELEMENT_IP_PROTO,
+ MLXSW_AFK_ELEMENT_SRC_IP_96_127,
+ MLXSW_AFK_ELEMENT_SRC_IP_64_95,
+ MLXSW_AFK_ELEMENT_SRC_IP_32_63,
+ MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+ MLXSW_AFK_ELEMENT_DST_IP_96_127,
+ MLXSW_AFK_ELEMENT_DST_IP_64_95,
+ MLXSW_AFK_ELEMENT_DST_IP_32_63,
+ MLXSW_AFK_ELEMENT_DST_IP_0_31,
+ MLXSW_AFK_ELEMENT_DST_L4_PORT,
+ MLXSW_AFK_ELEMENT_SRC_L4_PORT,
+};
+
+static const struct mlxsw_sp_acl_tcam_pattern mlxsw_sp_acl_tcam_patterns[] = {
+ {
+ .elements = mlxsw_sp_acl_tcam_pattern_ipv4,
+ .elements_count = ARRAY_SIZE(mlxsw_sp_acl_tcam_pattern_ipv4),
+ },
+ {
+ .elements = mlxsw_sp_acl_tcam_pattern_ipv6,
+ .elements_count = ARRAY_SIZE(mlxsw_sp_acl_tcam_pattern_ipv6),
+ },
+};
+
+#define MLXSW_SP_ACL_TCAM_PATTERNS_COUNT \
+ ARRAY_SIZE(mlxsw_sp_acl_tcam_patterns)
+
+struct mlxsw_sp_acl_tcam_flower_ruleset {
+ struct mlxsw_sp_acl_tcam_group group;
+};
+
+struct mlxsw_sp_acl_tcam_flower_rule {
+ struct mlxsw_sp_acl_tcam_entry entry;
+};
+
+static int
+mlxsw_sp_acl_tcam_flower_ruleset_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam *tcam,
+ void *ruleset_priv,
+ struct mlxsw_afk_element_usage *tmplt_elusage)
+{
+ struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
+
+ return mlxsw_sp_acl_tcam_group_add(mlxsw_sp, tcam, &ruleset->group,
+ mlxsw_sp_acl_tcam_patterns,
+ MLXSW_SP_ACL_TCAM_PATTERNS_COUNT,
+ tmplt_elusage);
+}
+
+static void
+mlxsw_sp_acl_tcam_flower_ruleset_del(struct mlxsw_sp *mlxsw_sp,
+ void *ruleset_priv)
+{
+ struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
+
+ mlxsw_sp_acl_tcam_group_del(mlxsw_sp, &ruleset->group);
+}
+
+static int
+mlxsw_sp_acl_tcam_flower_ruleset_bind(struct mlxsw_sp *mlxsw_sp,
+ void *ruleset_priv,
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ bool ingress)
+{
+ struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
+
+ return mlxsw_sp_acl_tcam_group_bind(mlxsw_sp, &ruleset->group,
+ mlxsw_sp_port, ingress);
+}
+
+static void
+mlxsw_sp_acl_tcam_flower_ruleset_unbind(struct mlxsw_sp *mlxsw_sp,
+ void *ruleset_priv,
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ bool ingress)
+{
+ struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
+
+ mlxsw_sp_acl_tcam_group_unbind(mlxsw_sp, &ruleset->group,
+ mlxsw_sp_port, ingress);
+}
+
+static u16
+mlxsw_sp_acl_tcam_flower_ruleset_group_id(void *ruleset_priv)
+{
+ struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
+
+ return mlxsw_sp_acl_tcam_group_id(&ruleset->group);
+}
+
+static size_t mlxsw_sp_acl_tcam_flower_rule_priv_size(struct mlxsw_sp *mlxsw_sp)
+{
+ return sizeof(struct mlxsw_sp_acl_tcam_flower_rule) +
+ mlxsw_sp_acl_tcam_entry_priv_size(mlxsw_sp);
+}
+
+static int
+mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp,
+ void *ruleset_priv, void *rule_priv,
+ struct mlxsw_sp_acl_rule_info *rulei)
+{
+ struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
+ struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv;
+
+ return mlxsw_sp_acl_tcam_entry_add(mlxsw_sp, &ruleset->group,
+ &rule->entry, rulei);
+}
+
+static void
+mlxsw_sp_acl_tcam_flower_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv)
+{
+ struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv;
+
+ mlxsw_sp_acl_tcam_entry_del(mlxsw_sp, &rule->entry);
+}
+
+static int
+mlxsw_sp_acl_tcam_flower_rule_activity_get(struct mlxsw_sp *mlxsw_sp,
+ void *rule_priv, bool *activity)
+{
+ struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv;
+
+ return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, &rule->entry,
+ activity);
+}
+
+static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = {
+ .ruleset_priv_size = sizeof(struct mlxsw_sp_acl_tcam_flower_ruleset),
+ .ruleset_add = mlxsw_sp_acl_tcam_flower_ruleset_add,
+ .ruleset_del = mlxsw_sp_acl_tcam_flower_ruleset_del,
+ .ruleset_bind = mlxsw_sp_acl_tcam_flower_ruleset_bind,
+ .ruleset_unbind = mlxsw_sp_acl_tcam_flower_ruleset_unbind,
+ .ruleset_group_id = mlxsw_sp_acl_tcam_flower_ruleset_group_id,
+ .rule_priv_size = mlxsw_sp_acl_tcam_flower_rule_priv_size,
+ .rule_add = mlxsw_sp_acl_tcam_flower_rule_add,
+ .rule_del = mlxsw_sp_acl_tcam_flower_rule_del,
+ .rule_activity_get = mlxsw_sp_acl_tcam_flower_rule_activity_get,
+};
+
+static const struct mlxsw_sp_acl_profile_ops *
+mlxsw_sp_acl_tcam_profile_ops_arr[] = {
+ [MLXSW_SP_ACL_PROFILE_FLOWER] = &mlxsw_sp_acl_tcam_flower_ops,
+};
+
+const struct mlxsw_sp_acl_profile_ops *
+mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_acl_profile profile)
+{
+ const struct mlxsw_sp_acl_profile_ops *ops;
+
+ if (WARN_ON(profile >= ARRAY_SIZE(mlxsw_sp_acl_tcam_profile_ops_arr)))
+ return NULL;
+ ops = mlxsw_sp_acl_tcam_profile_ops_arr[profile];
+ if (WARN_ON(!ops))
+ return NULL;
+ return ops;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
new file mode 100644
index 000000000..219a4e26c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
@@ -0,0 +1,228 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_SPECTRUM_ACL_TCAM_H
+#define _MLXSW_SPECTRUM_ACL_TCAM_H
+
+#include <linux/list.h>
+#include <linux/parman.h>
+
+#include "reg.h"
+#include "spectrum.h"
+#include "core_acl_flex_keys.h"
+
+struct mlxsw_sp_acl_tcam {
+ unsigned long *used_regions; /* bit array */
+ unsigned int max_regions;
+ unsigned long *used_groups; /* bit array */
+ unsigned int max_groups;
+ unsigned int max_group_size;
+ unsigned long priv[0];
+ /* priv has to be always the last item */
+};
+
+size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam *tcam);
+void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam *tcam);
+int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ u32 *priority, bool fillup_priority);
+
+struct mlxsw_sp_acl_profile_ops {
+ size_t ruleset_priv_size;
+ int (*ruleset_add)(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_tcam *tcam, void *ruleset_priv,
+ struct mlxsw_afk_element_usage *tmplt_elusage);
+ void (*ruleset_del)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv);
+ int (*ruleset_bind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ bool ingress);
+ void (*ruleset_unbind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ bool ingress);
+ u16 (*ruleset_group_id)(void *ruleset_priv);
+ size_t (*rule_priv_size)(struct mlxsw_sp *mlxsw_sp);
+ int (*rule_add)(struct mlxsw_sp *mlxsw_sp,
+ void *ruleset_priv, void *rule_priv,
+ struct mlxsw_sp_acl_rule_info *rulei);
+ void (*rule_del)(struct mlxsw_sp *mlxsw_sp, void *rule_priv);
+ int (*rule_activity_get)(struct mlxsw_sp *mlxsw_sp, void *rule_priv,
+ bool *activity);
+};
+
+const struct mlxsw_sp_acl_profile_ops *
+mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_acl_profile profile);
+
+#define MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT 16
+#define MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP 16
+
+#define MLXSW_SP_ACL_TCAM_CATCHALL_PRIO (~0U)
+
+#define MLXSW_SP_ACL_TCAM_MASK_LEN \
+ (MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN * BITS_PER_BYTE)
+
+struct mlxsw_sp_acl_tcam_group;
+
+struct mlxsw_sp_acl_tcam_region {
+ struct list_head list; /* Member of a TCAM group */
+ struct list_head chunk_list; /* List of chunks under this region */
+ struct mlxsw_sp_acl_tcam_group *group;
+ enum mlxsw_reg_ptar_key_type key_type;
+ u16 id; /* ACL ID and region ID - they are same */
+ char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN];
+ struct mlxsw_afk_key_info *key_info;
+ struct mlxsw_sp *mlxsw_sp;
+ unsigned long priv[0];
+ /* priv has to be always the last item */
+};
+
+struct mlxsw_sp_acl_ctcam_region {
+ struct parman *parman;
+ const struct mlxsw_sp_acl_ctcam_region_ops *ops;
+ struct mlxsw_sp_acl_tcam_region *region;
+};
+
+struct mlxsw_sp_acl_ctcam_chunk {
+ struct parman_prio parman_prio;
+};
+
+struct mlxsw_sp_acl_ctcam_entry {
+ struct parman_item parman_item;
+};
+
+struct mlxsw_sp_acl_ctcam_region_ops {
+ int (*entry_insert)(struct mlxsw_sp_acl_ctcam_region *cregion,
+ struct mlxsw_sp_acl_ctcam_entry *centry,
+ const char *mask);
+ void (*entry_remove)(struct mlxsw_sp_acl_ctcam_region *cregion,
+ struct mlxsw_sp_acl_ctcam_entry *centry);
+};
+
+int
+mlxsw_sp_acl_ctcam_region_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ctcam_region *cregion,
+ struct mlxsw_sp_acl_tcam_region *region,
+ const struct mlxsw_sp_acl_ctcam_region_ops *ops);
+void mlxsw_sp_acl_ctcam_region_fini(struct mlxsw_sp_acl_ctcam_region *cregion);
+void mlxsw_sp_acl_ctcam_chunk_init(struct mlxsw_sp_acl_ctcam_region *cregion,
+ struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+ unsigned int priority);
+void mlxsw_sp_acl_ctcam_chunk_fini(struct mlxsw_sp_acl_ctcam_chunk *cchunk);
+int mlxsw_sp_acl_ctcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ctcam_region *cregion,
+ struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+ struct mlxsw_sp_acl_ctcam_entry *centry,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ bool fillup_priority);
+void mlxsw_sp_acl_ctcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_ctcam_region *cregion,
+ struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+ struct mlxsw_sp_acl_ctcam_entry *centry);
+static inline unsigned int
+mlxsw_sp_acl_ctcam_entry_offset(struct mlxsw_sp_acl_ctcam_entry *centry)
+{
+ return centry->parman_item.index;
+}
+
+enum mlxsw_sp_acl_atcam_region_type {
+ MLXSW_SP_ACL_ATCAM_REGION_TYPE_2KB,
+ MLXSW_SP_ACL_ATCAM_REGION_TYPE_4KB,
+ MLXSW_SP_ACL_ATCAM_REGION_TYPE_8KB,
+ MLXSW_SP_ACL_ATCAM_REGION_TYPE_12KB,
+ __MLXSW_SP_ACL_ATCAM_REGION_TYPE_MAX,
+};
+
+#define MLXSW_SP_ACL_ATCAM_REGION_TYPE_MAX \
+ (__MLXSW_SP_ACL_ATCAM_REGION_TYPE_MAX - 1)
+
+struct mlxsw_sp_acl_atcam {
+ struct mlxsw_sp_acl_erp_core *erp_core;
+};
+
+struct mlxsw_sp_acl_atcam_region {
+ struct rhashtable entries_ht; /* A-TCAM only */
+ struct mlxsw_sp_acl_ctcam_region cregion;
+ const struct mlxsw_sp_acl_atcam_region_ops *ops;
+ struct mlxsw_sp_acl_tcam_region *region;
+ struct mlxsw_sp_acl_atcam *atcam;
+ enum mlxsw_sp_acl_atcam_region_type type;
+ struct mlxsw_sp_acl_erp_table *erp_table;
+ void *priv;
+};
+
+struct mlxsw_sp_acl_atcam_entry_ht_key {
+ char enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* Encoded key */
+ u8 erp_id;
+};
+
+struct mlxsw_sp_acl_atcam_chunk {
+ struct mlxsw_sp_acl_ctcam_chunk cchunk;
+};
+
+struct mlxsw_sp_acl_atcam_entry {
+ struct rhash_head ht_node;
+ struct mlxsw_sp_acl_atcam_entry_ht_key ht_key;
+ struct mlxsw_sp_acl_ctcam_entry centry;
+ struct mlxsw_sp_acl_atcam_lkey_id *lkey_id;
+ struct mlxsw_sp_acl_erp *erp;
+};
+
+static inline struct mlxsw_sp_acl_atcam_region *
+mlxsw_sp_acl_tcam_cregion_aregion(struct mlxsw_sp_acl_ctcam_region *cregion)
+{
+ return container_of(cregion, struct mlxsw_sp_acl_atcam_region, cregion);
+}
+
+static inline struct mlxsw_sp_acl_atcam_entry *
+mlxsw_sp_acl_tcam_centry_aentry(struct mlxsw_sp_acl_ctcam_entry *centry)
+{
+ return container_of(centry, struct mlxsw_sp_acl_atcam_entry, centry);
+}
+
+int mlxsw_sp_acl_atcam_region_associate(struct mlxsw_sp *mlxsw_sp,
+ u16 region_id);
+int
+mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam *atcam,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_tcam_region *region,
+ const struct mlxsw_sp_acl_ctcam_region_ops *ops);
+void mlxsw_sp_acl_atcam_region_fini(struct mlxsw_sp_acl_atcam_region *aregion);
+void mlxsw_sp_acl_atcam_chunk_init(struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_chunk *achunk,
+ unsigned int priority);
+void mlxsw_sp_acl_atcam_chunk_fini(struct mlxsw_sp_acl_atcam_chunk *achunk);
+int mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_chunk *achunk,
+ struct mlxsw_sp_acl_atcam_entry *aentry,
+ struct mlxsw_sp_acl_rule_info *rulei);
+void mlxsw_sp_acl_atcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_atcam_chunk *achunk,
+ struct mlxsw_sp_acl_atcam_entry *aentry);
+int mlxsw_sp_acl_atcam_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam *atcam);
+void mlxsw_sp_acl_atcam_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam *atcam);
+
+struct mlxsw_sp_acl_erp;
+
+bool mlxsw_sp_acl_erp_is_ctcam_erp(const struct mlxsw_sp_acl_erp *erp);
+u8 mlxsw_sp_acl_erp_id(const struct mlxsw_sp_acl_erp *erp);
+struct mlxsw_sp_acl_erp *
+mlxsw_sp_acl_erp_get(struct mlxsw_sp_acl_atcam_region *aregion,
+ const char *mask, bool ctcam);
+void mlxsw_sp_acl_erp_put(struct mlxsw_sp_acl_atcam_region *aregion,
+ struct mlxsw_sp_acl_erp *erp);
+int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion);
+void mlxsw_sp_acl_erp_region_fini(struct mlxsw_sp_acl_atcam_region *aregion);
+int mlxsw_sp_acl_erps_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam *atcam);
+void mlxsw_sp_acl_erps_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_atcam *atcam);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
new file mode 100644
index 000000000..3589432d1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -0,0 +1,1024 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/dcbnl.h>
+#include <linux/if_ether.h>
+#include <linux/list.h>
+
+#include "spectrum.h"
+#include "core.h"
+#include "port.h"
+#include "reg.h"
+
+struct mlxsw_sp_sb_pr {
+ enum mlxsw_reg_sbpr_mode mode;
+ u32 size;
+};
+
+struct mlxsw_cp_sb_occ {
+ u32 cur;
+ u32 max;
+};
+
+struct mlxsw_sp_sb_cm {
+ u32 min_buff;
+ u32 max_buff;
+ u8 pool;
+ struct mlxsw_cp_sb_occ occ;
+};
+
+struct mlxsw_sp_sb_pm {
+ u32 min_buff;
+ u32 max_buff;
+ struct mlxsw_cp_sb_occ occ;
+};
+
+#define MLXSW_SP_SB_POOL_COUNT 4
+#define MLXSW_SP_SB_TC_COUNT 8
+
+struct mlxsw_sp_sb_port {
+ struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT];
+ struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT];
+};
+
+struct mlxsw_sp_sb {
+ struct mlxsw_sp_sb_pr prs[2][MLXSW_SP_SB_POOL_COUNT];
+ struct mlxsw_sp_sb_port *ports;
+ u32 cell_size;
+};
+
+u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells)
+{
+ return mlxsw_sp->sb->cell_size * cells;
+}
+
+u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes)
+{
+ return DIV_ROUND_UP(bytes, mlxsw_sp->sb->cell_size);
+}
+
+static struct mlxsw_sp_sb_pr *mlxsw_sp_sb_pr_get(struct mlxsw_sp *mlxsw_sp,
+ u8 pool,
+ enum mlxsw_reg_sbxx_dir dir)
+{
+ return &mlxsw_sp->sb->prs[dir][pool];
+}
+
+static struct mlxsw_sp_sb_cm *mlxsw_sp_sb_cm_get(struct mlxsw_sp *mlxsw_sp,
+ u8 local_port, u8 pg_buff,
+ enum mlxsw_reg_sbxx_dir dir)
+{
+ return &mlxsw_sp->sb->ports[local_port].cms[dir][pg_buff];
+}
+
+static struct mlxsw_sp_sb_pm *mlxsw_sp_sb_pm_get(struct mlxsw_sp *mlxsw_sp,
+ u8 local_port, u8 pool,
+ enum mlxsw_reg_sbxx_dir dir)
+{
+ return &mlxsw_sp->sb->ports[local_port].pms[dir][pool];
+}
+
+static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u8 pool,
+ enum mlxsw_reg_sbxx_dir dir,
+ enum mlxsw_reg_sbpr_mode mode, u32 size)
+{
+ char sbpr_pl[MLXSW_REG_SBPR_LEN];
+ struct mlxsw_sp_sb_pr *pr;
+ int err;
+
+ mlxsw_reg_sbpr_pack(sbpr_pl, pool, dir, mode, size);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpr), sbpr_pl);
+ if (err)
+ return err;
+
+ pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
+ pr->mode = mode;
+ pr->size = size;
+ return 0;
+}
+
+static int mlxsw_sp_sb_cm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+ u8 pg_buff, enum mlxsw_reg_sbxx_dir dir,
+ u32 min_buff, u32 max_buff, u8 pool)
+{
+ char sbcm_pl[MLXSW_REG_SBCM_LEN];
+ int err;
+
+ mlxsw_reg_sbcm_pack(sbcm_pl, local_port, pg_buff, dir,
+ min_buff, max_buff, pool);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbcm), sbcm_pl);
+ if (err)
+ return err;
+ if (pg_buff < MLXSW_SP_SB_TC_COUNT) {
+ struct mlxsw_sp_sb_cm *cm;
+
+ cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir);
+ cm->min_buff = min_buff;
+ cm->max_buff = max_buff;
+ cm->pool = pool;
+ }
+ return 0;
+}
+
+static int mlxsw_sp_sb_pm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+ u8 pool, enum mlxsw_reg_sbxx_dir dir,
+ u32 min_buff, u32 max_buff)
+{
+ char sbpm_pl[MLXSW_REG_SBPM_LEN];
+ struct mlxsw_sp_sb_pm *pm;
+ int err;
+
+ mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false,
+ min_buff, max_buff);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl);
+ if (err)
+ return err;
+
+ pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir);
+ pm->min_buff = min_buff;
+ pm->max_buff = max_buff;
+ return 0;
+}
+
+static int mlxsw_sp_sb_pm_occ_clear(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+ u8 pool, enum mlxsw_reg_sbxx_dir dir,
+ struct list_head *bulk_list)
+{
+ char sbpm_pl[MLXSW_REG_SBPM_LEN];
+
+ mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, true, 0, 0);
+ return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl,
+ bulk_list, NULL, 0);
+}
+
+static void mlxsw_sp_sb_pm_occ_query_cb(struct mlxsw_core *mlxsw_core,
+ char *sbpm_pl, size_t sbpm_pl_len,
+ unsigned long cb_priv)
+{
+ struct mlxsw_sp_sb_pm *pm = (struct mlxsw_sp_sb_pm *) cb_priv;
+
+ mlxsw_reg_sbpm_unpack(sbpm_pl, &pm->occ.cur, &pm->occ.max);
+}
+
+static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+ u8 pool, enum mlxsw_reg_sbxx_dir dir,
+ struct list_head *bulk_list)
+{
+ char sbpm_pl[MLXSW_REG_SBPM_LEN];
+ struct mlxsw_sp_sb_pm *pm;
+
+ pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir);
+ mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false, 0, 0);
+ return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl,
+ bulk_list,
+ mlxsw_sp_sb_pm_occ_query_cb,
+ (unsigned long) pm);
+}
+
+static const u16 mlxsw_sp_pbs[] = {
+ [0] = 2 * ETH_FRAME_LEN,
+ [9] = 2 * MLXSW_PORT_MAX_MTU,
+};
+
+#define MLXSW_SP_PBS_LEN ARRAY_SIZE(mlxsw_sp_pbs)
+#define MLXSW_SP_PB_UNUSED 8
+
+static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char pbmc_pl[MLXSW_REG_PBMC_LEN];
+ int i;
+
+ mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port,
+ 0xffff, 0xffff / 2);
+ for (i = 0; i < MLXSW_SP_PBS_LEN; i++) {
+ u16 size = mlxsw_sp_bytes_cells(mlxsw_sp, mlxsw_sp_pbs[i]);
+
+ if (i == MLXSW_SP_PB_UNUSED)
+ continue;
+ mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, i, size);
+ }
+ mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl,
+ MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX, 0);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
+}
+
+static int mlxsw_sp_port_pb_prio_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ char pptb_pl[MLXSW_REG_PPTB_LEN];
+ int i;
+
+ mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port);
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, i, 0);
+ return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb),
+ pptb_pl);
+}
+
+static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ int err;
+
+ err = mlxsw_sp_port_pb_init(mlxsw_sp_port);
+ if (err)
+ return err;
+ return mlxsw_sp_port_pb_prio_init(mlxsw_sp_port);
+}
+
+static int mlxsw_sp_sb_ports_init(struct mlxsw_sp *mlxsw_sp)
+{
+ unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
+
+ mlxsw_sp->sb->ports = kcalloc(max_ports,
+ sizeof(struct mlxsw_sp_sb_port),
+ GFP_KERNEL);
+ if (!mlxsw_sp->sb->ports)
+ return -ENOMEM;
+ return 0;
+}
+
+static void mlxsw_sp_sb_ports_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ kfree(mlxsw_sp->sb->ports);
+}
+
+#define MLXSW_SP_SB_PR_INGRESS_SIZE 12440000
+#define MLXSW_SP_SB_PR_INGRESS_MNG_SIZE (200 * 1000)
+#define MLXSW_SP_SB_PR_EGRESS_SIZE 13232000
+
+#define MLXSW_SP_SB_PR(_mode, _size) \
+ { \
+ .mode = _mode, \
+ .size = _size, \
+ }
+
+static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_ingress[] = {
+ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
+ MLXSW_SP_SB_PR_INGRESS_SIZE),
+ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
+ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
+ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
+ MLXSW_SP_SB_PR_INGRESS_MNG_SIZE),
+};
+
+#define MLXSW_SP_SB_PRS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_ingress)
+
+static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_egress[] = {
+ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_PR_EGRESS_SIZE),
+ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
+ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
+ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
+};
+
+#define MLXSW_SP_SB_PRS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_egress)
+
+static int __mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_reg_sbxx_dir dir,
+ const struct mlxsw_sp_sb_pr *prs,
+ size_t prs_len)
+{
+ int i;
+ int err;
+
+ for (i = 0; i < prs_len; i++) {
+ u32 size = mlxsw_sp_bytes_cells(mlxsw_sp, prs[i].size);
+
+ err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, dir, prs[i].mode, size);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp)
+{
+ int err;
+
+ err = __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_INGRESS,
+ mlxsw_sp_sb_prs_ingress,
+ MLXSW_SP_SB_PRS_INGRESS_LEN);
+ if (err)
+ return err;
+ return __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_EGRESS,
+ mlxsw_sp_sb_prs_egress,
+ MLXSW_SP_SB_PRS_EGRESS_LEN);
+}
+
+#define MLXSW_SP_SB_CM(_min_buff, _max_buff, _pool) \
+ { \
+ .min_buff = _min_buff, \
+ .max_buff = _max_buff, \
+ .pool = _pool, \
+ }
+
+static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = {
+ MLXSW_SP_SB_CM(10000, 8, 0),
+ MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
+ MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
+ MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
+ MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
+ MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
+ MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
+ MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0),
+ MLXSW_SP_SB_CM(0, 0, 0), /* dummy, this PG does not exist */
+ MLXSW_SP_SB_CM(20000, 1, 3),
+};
+
+#define MLXSW_SP_SB_CMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_ingress)
+
+static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = {
+ MLXSW_SP_SB_CM(1500, 9, 0),
+ MLXSW_SP_SB_CM(1500, 9, 0),
+ MLXSW_SP_SB_CM(1500, 9, 0),
+ MLXSW_SP_SB_CM(1500, 9, 0),
+ MLXSW_SP_SB_CM(1500, 9, 0),
+ MLXSW_SP_SB_CM(1500, 9, 0),
+ MLXSW_SP_SB_CM(1500, 9, 0),
+ MLXSW_SP_SB_CM(1500, 9, 0),
+ MLXSW_SP_SB_CM(0, 140000, 15),
+ MLXSW_SP_SB_CM(0, 140000, 15),
+ MLXSW_SP_SB_CM(0, 140000, 15),
+ MLXSW_SP_SB_CM(0, 140000, 15),
+ MLXSW_SP_SB_CM(0, 140000, 15),
+ MLXSW_SP_SB_CM(0, 140000, 15),
+ MLXSW_SP_SB_CM(0, 140000, 15),
+ MLXSW_SP_SB_CM(0, 140000, 15),
+ MLXSW_SP_SB_CM(1, 0xff, 0),
+};
+
+#define MLXSW_SP_SB_CMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_egress)
+
+#define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, 0)
+
+static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = {
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+ MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+ MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+ MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+ MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+ MLXSW_SP_CPU_PORT_SB_CM,
+};
+
+#define MLXSW_SP_CPU_PORT_SB_MCS_LEN \
+ ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms)
+
+static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+ enum mlxsw_reg_sbxx_dir dir,
+ const struct mlxsw_sp_sb_cm *cms,
+ size_t cms_len)
+{
+ int i;
+ int err;
+
+ for (i = 0; i < cms_len; i++) {
+ const struct mlxsw_sp_sb_cm *cm;
+ u32 min_buff;
+
+ if (i == 8 && dir == MLXSW_REG_SBXX_DIR_INGRESS)
+ continue; /* PG number 8 does not exist, skip it */
+ cm = &cms[i];
+ /* All pools are initialized using dynamic thresholds,
+ * therefore 'max_buff' isn't specified in cells.
+ */
+ min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, cm->min_buff);
+ err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, dir,
+ min_buff, cm->max_buff, cm->pool);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int mlxsw_sp_port_sb_cms_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ int err;
+
+ err = __mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp,
+ mlxsw_sp_port->local_port,
+ MLXSW_REG_SBXX_DIR_INGRESS,
+ mlxsw_sp_sb_cms_ingress,
+ MLXSW_SP_SB_CMS_INGRESS_LEN);
+ if (err)
+ return err;
+ return __mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp,
+ mlxsw_sp_port->local_port,
+ MLXSW_REG_SBXX_DIR_EGRESS,
+ mlxsw_sp_sb_cms_egress,
+ MLXSW_SP_SB_CMS_EGRESS_LEN);
+}
+
+static int mlxsw_sp_cpu_port_sb_cms_init(struct mlxsw_sp *mlxsw_sp)
+{
+ return __mlxsw_sp_sb_cms_init(mlxsw_sp, 0, MLXSW_REG_SBXX_DIR_EGRESS,
+ mlxsw_sp_cpu_port_sb_cms,
+ MLXSW_SP_CPU_PORT_SB_MCS_LEN);
+}
+
+#define MLXSW_SP_SB_PM(_min_buff, _max_buff) \
+ { \
+ .min_buff = _min_buff, \
+ .max_buff = _max_buff, \
+ }
+
+static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_ingress[] = {
+ MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX),
+ MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX),
+};
+
+#define MLXSW_SP_SB_PMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_ingress)
+
+static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_egress[] = {
+ MLXSW_SP_SB_PM(0, 7),
+ MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+};
+
+#define MLXSW_SP_SB_PMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_egress)
+
+static int __mlxsw_sp_port_sb_pms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+ enum mlxsw_reg_sbxx_dir dir,
+ const struct mlxsw_sp_sb_pm *pms,
+ size_t pms_len)
+{
+ int i;
+ int err;
+
+ for (i = 0; i < pms_len; i++) {
+ const struct mlxsw_sp_sb_pm *pm;
+
+ pm = &pms[i];
+ err = mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, i, dir,
+ pm->min_buff, pm->max_buff);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ int err;
+
+ err = __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp,
+ mlxsw_sp_port->local_port,
+ MLXSW_REG_SBXX_DIR_INGRESS,
+ mlxsw_sp_sb_pms_ingress,
+ MLXSW_SP_SB_PMS_INGRESS_LEN);
+ if (err)
+ return err;
+ return __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp,
+ mlxsw_sp_port->local_port,
+ MLXSW_REG_SBXX_DIR_EGRESS,
+ mlxsw_sp_sb_pms_egress,
+ MLXSW_SP_SB_PMS_EGRESS_LEN);
+}
+
+struct mlxsw_sp_sb_mm {
+ u32 min_buff;
+ u32 max_buff;
+ u8 pool;
+};
+
+#define MLXSW_SP_SB_MM(_min_buff, _max_buff, _pool) \
+ { \
+ .min_buff = _min_buff, \
+ .max_buff = _max_buff, \
+ .pool = _pool, \
+ }
+
+static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = {
+ MLXSW_SP_SB_MM(20000, 0xff, 0),
+ MLXSW_SP_SB_MM(20000, 0xff, 0),
+ MLXSW_SP_SB_MM(20000, 0xff, 0),
+ MLXSW_SP_SB_MM(20000, 0xff, 0),
+ MLXSW_SP_SB_MM(20000, 0xff, 0),
+ MLXSW_SP_SB_MM(20000, 0xff, 0),
+ MLXSW_SP_SB_MM(20000, 0xff, 0),
+ MLXSW_SP_SB_MM(20000, 0xff, 0),
+ MLXSW_SP_SB_MM(20000, 0xff, 0),
+ MLXSW_SP_SB_MM(20000, 0xff, 0),
+ MLXSW_SP_SB_MM(20000, 0xff, 0),
+ MLXSW_SP_SB_MM(20000, 0xff, 0),
+ MLXSW_SP_SB_MM(20000, 0xff, 0),
+ MLXSW_SP_SB_MM(20000, 0xff, 0),
+ MLXSW_SP_SB_MM(20000, 0xff, 0),
+};
+
+#define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms)
+
+static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp)
+{
+ char sbmm_pl[MLXSW_REG_SBMM_LEN];
+ int i;
+ int err;
+
+ for (i = 0; i < MLXSW_SP_SB_MMS_LEN; i++) {
+ const struct mlxsw_sp_sb_mm *mc;
+ u32 min_buff;
+
+ mc = &mlxsw_sp_sb_mms[i];
+ /* All pools are initialized using dynamic thresholds,
+ * therefore 'max_buff' isn't specified in cells.
+ */
+ min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, mc->min_buff);
+ mlxsw_reg_sbmm_pack(sbmm_pl, i, min_buff, mc->max_buff,
+ mc->pool);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbmm), sbmm_pl);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp)
+{
+ u64 sb_size;
+ int err;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, CELL_SIZE))
+ return -EIO;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_BUFFER_SIZE))
+ return -EIO;
+ sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE);
+
+ mlxsw_sp->sb = kzalloc(sizeof(*mlxsw_sp->sb), GFP_KERNEL);
+ if (!mlxsw_sp->sb)
+ return -ENOMEM;
+ mlxsw_sp->sb->cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE);
+
+ err = mlxsw_sp_sb_ports_init(mlxsw_sp);
+ if (err)
+ goto err_sb_ports_init;
+ err = mlxsw_sp_sb_prs_init(mlxsw_sp);
+ if (err)
+ goto err_sb_prs_init;
+ err = mlxsw_sp_cpu_port_sb_cms_init(mlxsw_sp);
+ if (err)
+ goto err_sb_cpu_port_sb_cms_init;
+ err = mlxsw_sp_sb_mms_init(mlxsw_sp);
+ if (err)
+ goto err_sb_mms_init;
+ err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, sb_size,
+ MLXSW_SP_SB_POOL_COUNT,
+ MLXSW_SP_SB_POOL_COUNT,
+ MLXSW_SP_SB_TC_COUNT,
+ MLXSW_SP_SB_TC_COUNT);
+ if (err)
+ goto err_devlink_sb_register;
+
+ return 0;
+
+err_devlink_sb_register:
+err_sb_mms_init:
+err_sb_cpu_port_sb_cms_init:
+err_sb_prs_init:
+ mlxsw_sp_sb_ports_fini(mlxsw_sp);
+err_sb_ports_init:
+ kfree(mlxsw_sp->sb);
+ return err;
+}
+
+void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ devlink_sb_unregister(priv_to_devlink(mlxsw_sp->core), 0);
+ mlxsw_sp_sb_ports_fini(mlxsw_sp);
+ kfree(mlxsw_sp->sb);
+}
+
+int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ int err;
+
+ err = mlxsw_sp_port_headroom_init(mlxsw_sp_port);
+ if (err)
+ return err;
+ err = mlxsw_sp_port_sb_cms_init(mlxsw_sp_port);
+ if (err)
+ return err;
+ err = mlxsw_sp_port_sb_pms_init(mlxsw_sp_port);
+
+ return err;
+}
+
+static u8 pool_get(u16 pool_index)
+{
+ return pool_index % MLXSW_SP_SB_POOL_COUNT;
+}
+
+static u16 pool_index_get(u8 pool, enum mlxsw_reg_sbxx_dir dir)
+{
+ u16 pool_index;
+
+ pool_index = pool;
+ if (dir == MLXSW_REG_SBXX_DIR_EGRESS)
+ pool_index += MLXSW_SP_SB_POOL_COUNT;
+ return pool_index;
+}
+
+static enum mlxsw_reg_sbxx_dir dir_get(u16 pool_index)
+{
+ return pool_index < MLXSW_SP_SB_POOL_COUNT ?
+ MLXSW_REG_SBXX_DIR_INGRESS : MLXSW_REG_SBXX_DIR_EGRESS;
+}
+
+int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core,
+ unsigned int sb_index, u16 pool_index,
+ struct devlink_sb_pool_info *pool_info)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+ u8 pool = pool_get(pool_index);
+ enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
+ struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
+
+ pool_info->pool_type = (enum devlink_sb_pool_type) dir;
+ pool_info->size = mlxsw_sp_cells_bytes(mlxsw_sp, pr->size);
+ pool_info->threshold_type = (enum devlink_sb_threshold_type) pr->mode;
+ return 0;
+}
+
+int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core,
+ unsigned int sb_index, u16 pool_index, u32 size,
+ enum devlink_sb_threshold_type threshold_type)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+ u32 pool_size = mlxsw_sp_bytes_cells(mlxsw_sp, size);
+ u8 pool = pool_get(pool_index);
+ enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
+ enum mlxsw_reg_sbpr_mode mode;
+
+ if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE))
+ return -EINVAL;
+
+ mode = (enum mlxsw_reg_sbpr_mode) threshold_type;
+ return mlxsw_sp_sb_pr_write(mlxsw_sp, pool, dir, mode, pool_size);
+}
+
+#define MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET (-2) /* 3->1, 16->14 */
+
+static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u8 pool,
+ enum mlxsw_reg_sbxx_dir dir, u32 max_buff)
+{
+ struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
+
+ if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC)
+ return max_buff - MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET;
+ return mlxsw_sp_cells_bytes(mlxsw_sp, max_buff);
+}
+
+static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u8 pool,
+ enum mlxsw_reg_sbxx_dir dir, u32 threshold,
+ u32 *p_max_buff)
+{
+ struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
+
+ if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) {
+ int val;
+
+ val = threshold + MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET;
+ if (val < MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN ||
+ val > MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX)
+ return -EINVAL;
+ *p_max_buff = val;
+ } else {
+ *p_max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, threshold);
+ }
+ return 0;
+}
+
+int mlxsw_sp_sb_port_pool_get(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 pool_index,
+ u32 *p_threshold)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port =
+ mlxsw_core_port_driver_priv(mlxsw_core_port);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u8 local_port = mlxsw_sp_port->local_port;
+ u8 pool = pool_get(pool_index);
+ enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
+ struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port,
+ pool, dir);
+
+ *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, pool, dir,
+ pm->max_buff);
+ return 0;
+}
+
+int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 pool_index,
+ u32 threshold)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port =
+ mlxsw_core_port_driver_priv(mlxsw_core_port);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u8 local_port = mlxsw_sp_port->local_port;
+ u8 pool = pool_get(pool_index);
+ enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
+ u32 max_buff;
+ int err;
+
+ err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir,
+ threshold, &max_buff);
+ if (err)
+ return err;
+
+ return mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, pool, dir,
+ 0, max_buff);
+}
+
+int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 *p_pool_index, u32 *p_threshold)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port =
+ mlxsw_core_port_driver_priv(mlxsw_core_port);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u8 local_port = mlxsw_sp_port->local_port;
+ u8 pg_buff = tc_index;
+ enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type;
+ struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port,
+ pg_buff, dir);
+
+ *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool, dir,
+ cm->max_buff);
+ *p_pool_index = pool_index_get(cm->pool, dir);
+ return 0;
+}
+
+int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 pool_index, u32 threshold)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port =
+ mlxsw_core_port_driver_priv(mlxsw_core_port);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u8 local_port = mlxsw_sp_port->local_port;
+ u8 pg_buff = tc_index;
+ enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type;
+ u8 pool = pool_get(pool_index);
+ u32 max_buff;
+ int err;
+
+ if (dir != dir_get(pool_index))
+ return -EINVAL;
+
+ err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir,
+ threshold, &max_buff);
+ if (err)
+ return err;
+
+ return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, dir,
+ 0, max_buff, pool);
+}
+
+#define MASKED_COUNT_MAX \
+ (MLXSW_REG_SBSR_REC_MAX_COUNT / (MLXSW_SP_SB_TC_COUNT * 2))
+
+struct mlxsw_sp_sb_sr_occ_query_cb_ctx {
+ u8 masked_count;
+ u8 local_port_1;
+};
+
+static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core,
+ char *sbsr_pl, size_t sbsr_pl_len,
+ unsigned long cb_priv)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+ struct mlxsw_sp_sb_sr_occ_query_cb_ctx cb_ctx;
+ u8 masked_count;
+ u8 local_port;
+ int rec_index = 0;
+ struct mlxsw_sp_sb_cm *cm;
+ int i;
+
+ memcpy(&cb_ctx, &cb_priv, sizeof(cb_ctx));
+
+ masked_count = 0;
+ for (local_port = cb_ctx.local_port_1;
+ local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
+ if (!mlxsw_sp->ports[local_port])
+ continue;
+ for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
+ cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i,
+ MLXSW_REG_SBXX_DIR_INGRESS);
+ mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++,
+ &cm->occ.cur, &cm->occ.max);
+ }
+ if (++masked_count == cb_ctx.masked_count)
+ break;
+ }
+ masked_count = 0;
+ for (local_port = cb_ctx.local_port_1;
+ local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
+ if (!mlxsw_sp->ports[local_port])
+ continue;
+ for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
+ cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i,
+ MLXSW_REG_SBXX_DIR_EGRESS);
+ mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++,
+ &cm->occ.cur, &cm->occ.max);
+ }
+ if (++masked_count == cb_ctx.masked_count)
+ break;
+ }
+}
+
+int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core,
+ unsigned int sb_index)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+ struct mlxsw_sp_sb_sr_occ_query_cb_ctx cb_ctx;
+ unsigned long cb_priv;
+ LIST_HEAD(bulk_list);
+ char *sbsr_pl;
+ u8 masked_count;
+ u8 local_port_1;
+ u8 local_port = 0;
+ int i;
+ int err;
+ int err2;
+
+ sbsr_pl = kmalloc(MLXSW_REG_SBSR_LEN, GFP_KERNEL);
+ if (!sbsr_pl)
+ return -ENOMEM;
+
+next_batch:
+ local_port++;
+ local_port_1 = local_port;
+ masked_count = 0;
+ mlxsw_reg_sbsr_pack(sbsr_pl, false);
+ for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
+ mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1);
+ mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1);
+ }
+ for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
+ if (!mlxsw_sp->ports[local_port])
+ continue;
+ mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1);
+ mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1);
+ for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) {
+ err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i,
+ MLXSW_REG_SBXX_DIR_INGRESS,
+ &bulk_list);
+ if (err)
+ goto out;
+ err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i,
+ MLXSW_REG_SBXX_DIR_EGRESS,
+ &bulk_list);
+ if (err)
+ goto out;
+ }
+ if (++masked_count == MASKED_COUNT_MAX)
+ goto do_query;
+ }
+
+do_query:
+ cb_ctx.masked_count = masked_count;
+ cb_ctx.local_port_1 = local_port_1;
+ memcpy(&cb_priv, &cb_ctx, sizeof(cb_ctx));
+ err = mlxsw_reg_trans_query(mlxsw_core, MLXSW_REG(sbsr), sbsr_pl,
+ &bulk_list, mlxsw_sp_sb_sr_occ_query_cb,
+ cb_priv);
+ if (err)
+ goto out;
+ if (local_port < mlxsw_core_max_ports(mlxsw_core))
+ goto next_batch;
+
+out:
+ err2 = mlxsw_reg_trans_bulk_wait(&bulk_list);
+ if (!err)
+ err = err2;
+ kfree(sbsr_pl);
+ return err;
+}
+
+int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core,
+ unsigned int sb_index)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+ LIST_HEAD(bulk_list);
+ char *sbsr_pl;
+ unsigned int masked_count;
+ u8 local_port = 0;
+ int i;
+ int err;
+ int err2;
+
+ sbsr_pl = kmalloc(MLXSW_REG_SBSR_LEN, GFP_KERNEL);
+ if (!sbsr_pl)
+ return -ENOMEM;
+
+next_batch:
+ local_port++;
+ masked_count = 0;
+ mlxsw_reg_sbsr_pack(sbsr_pl, true);
+ for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
+ mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1);
+ mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1);
+ }
+ for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
+ if (!mlxsw_sp->ports[local_port])
+ continue;
+ mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1);
+ mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1);
+ for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) {
+ err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i,
+ MLXSW_REG_SBXX_DIR_INGRESS,
+ &bulk_list);
+ if (err)
+ goto out;
+ err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i,
+ MLXSW_REG_SBXX_DIR_EGRESS,
+ &bulk_list);
+ if (err)
+ goto out;
+ }
+ if (++masked_count == MASKED_COUNT_MAX)
+ goto do_query;
+ }
+
+do_query:
+ err = mlxsw_reg_trans_query(mlxsw_core, MLXSW_REG(sbsr), sbsr_pl,
+ &bulk_list, NULL, 0);
+ if (err)
+ goto out;
+ if (local_port < mlxsw_core_max_ports(mlxsw_core))
+ goto next_batch;
+
+out:
+ err2 = mlxsw_reg_trans_bulk_wait(&bulk_list);
+ if (!err)
+ err = err2;
+ kfree(sbsr_pl);
+ return err;
+}
+
+int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 pool_index,
+ u32 *p_cur, u32 *p_max)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port =
+ mlxsw_core_port_driver_priv(mlxsw_core_port);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u8 local_port = mlxsw_sp_port->local_port;
+ u8 pool = pool_get(pool_index);
+ enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
+ struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port,
+ pool, dir);
+
+ *p_cur = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.cur);
+ *p_max = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.max);
+ return 0;
+}
+
+int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u32 *p_cur, u32 *p_max)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port =
+ mlxsw_core_port_driver_priv(mlxsw_core_port);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u8 local_port = mlxsw_sp_port->local_port;
+ u8 pg_buff = tc_index;
+ enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type;
+ struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port,
+ pg_buff, dir);
+
+ *p_cur = mlxsw_sp_cells_bytes(mlxsw_sp, cm->occ.cur);
+ *p_max = mlxsw_sp_cells_bytes(mlxsw_sp, cm->occ.max);
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c
new file mode 100644
index 000000000..83c2e1e5f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+
+#include "spectrum_cnt.h"
+
+#define MLXSW_SP_COUNTER_POOL_BANK_SIZE 4096
+
+struct mlxsw_sp_counter_sub_pool {
+ unsigned int base_index;
+ unsigned int size;
+ unsigned int entry_size;
+ unsigned int bank_count;
+};
+
+struct mlxsw_sp_counter_pool {
+ unsigned int pool_size;
+ unsigned long *usage; /* Usage bitmap */
+ struct mlxsw_sp_counter_sub_pool *sub_pools;
+};
+
+static struct mlxsw_sp_counter_sub_pool mlxsw_sp_counter_sub_pools[] = {
+ [MLXSW_SP_COUNTER_SUB_POOL_FLOW] = {
+ .bank_count = 6,
+ },
+ [MLXSW_SP_COUNTER_SUB_POOL_RIF] = {
+ .bank_count = 2,
+ }
+};
+
+static int mlxsw_sp_counter_pool_validate(struct mlxsw_sp *mlxsw_sp)
+{
+ unsigned int total_bank_config = 0;
+ unsigned int pool_size;
+ int i;
+
+ pool_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, COUNTER_POOL_SIZE);
+ /* Check config is valid, no bank over subscription */
+ for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++)
+ total_bank_config += mlxsw_sp_counter_sub_pools[i].bank_count;
+ if (total_bank_config > pool_size / MLXSW_SP_COUNTER_POOL_BANK_SIZE + 1)
+ return -EINVAL;
+ return 0;
+}
+
+static int mlxsw_sp_counter_sub_pools_prepare(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_counter_sub_pool *sub_pool;
+
+ /* Prepare generic flow pool*/
+ sub_pool = &mlxsw_sp_counter_sub_pools[MLXSW_SP_COUNTER_SUB_POOL_FLOW];
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_SIZE_PACKETS_BYTES))
+ return -EIO;
+ sub_pool->entry_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ COUNTER_SIZE_PACKETS_BYTES);
+ /* Prepare erif pool*/
+ sub_pool = &mlxsw_sp_counter_sub_pools[MLXSW_SP_COUNTER_SUB_POOL_RIF];
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_SIZE_ROUTER_BASIC))
+ return -EIO;
+ sub_pool->entry_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ COUNTER_SIZE_ROUTER_BASIC);
+ return 0;
+}
+
+int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_counter_sub_pool *sub_pool;
+ struct mlxsw_sp_counter_pool *pool;
+ unsigned int base_index;
+ unsigned int map_size;
+ int i;
+ int err;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_POOL_SIZE))
+ return -EIO;
+
+ err = mlxsw_sp_counter_pool_validate(mlxsw_sp);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_counter_sub_pools_prepare(mlxsw_sp);
+ if (err)
+ return err;
+
+ pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool)
+ return -ENOMEM;
+
+ pool->pool_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, COUNTER_POOL_SIZE);
+ map_size = BITS_TO_LONGS(pool->pool_size) * sizeof(unsigned long);
+
+ pool->usage = kzalloc(map_size, GFP_KERNEL);
+ if (!pool->usage) {
+ err = -ENOMEM;
+ goto err_usage_alloc;
+ }
+
+ pool->sub_pools = mlxsw_sp_counter_sub_pools;
+ /* Allocation is based on bank count which should be
+ * specified for each sub pool statically.
+ */
+ base_index = 0;
+ for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) {
+ sub_pool = &pool->sub_pools[i];
+ sub_pool->size = sub_pool->bank_count *
+ MLXSW_SP_COUNTER_POOL_BANK_SIZE;
+ sub_pool->base_index = base_index;
+ base_index += sub_pool->size;
+ /* The last bank can't be fully used */
+ if (sub_pool->base_index + sub_pool->size > pool->pool_size)
+ sub_pool->size = pool->pool_size - sub_pool->base_index;
+ }
+
+ mlxsw_sp->counter_pool = pool;
+ return 0;
+
+err_usage_alloc:
+ kfree(pool);
+ return err;
+}
+
+void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool;
+
+ WARN_ON(find_first_bit(pool->usage, pool->pool_size) !=
+ pool->pool_size);
+ kfree(pool->usage);
+ kfree(pool);
+}
+
+int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_counter_sub_pool_id sub_pool_id,
+ unsigned int *p_counter_index)
+{
+ struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool;
+ struct mlxsw_sp_counter_sub_pool *sub_pool;
+ unsigned int entry_index;
+ unsigned int stop_index;
+ int i;
+
+ sub_pool = &mlxsw_sp_counter_sub_pools[sub_pool_id];
+ stop_index = sub_pool->base_index + sub_pool->size;
+ entry_index = sub_pool->base_index;
+
+ entry_index = find_next_zero_bit(pool->usage, stop_index, entry_index);
+ if (entry_index == stop_index)
+ return -ENOBUFS;
+ /* The sub-pools can contain non-integer number of entries
+ * so we must check for overflow
+ */
+ if (entry_index + sub_pool->entry_size > stop_index)
+ return -ENOBUFS;
+ for (i = 0; i < sub_pool->entry_size; i++)
+ __set_bit(entry_index + i, pool->usage);
+
+ *p_counter_index = entry_index;
+ return 0;
+}
+
+void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_counter_sub_pool_id sub_pool_id,
+ unsigned int counter_index)
+{
+ struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool;
+ struct mlxsw_sp_counter_sub_pool *sub_pool;
+ int i;
+
+ if (WARN_ON(counter_index >= pool->pool_size))
+ return;
+ sub_pool = &mlxsw_sp_counter_sub_pools[sub_pool_id];
+ for (i = 0; i < sub_pool->entry_size; i++)
+ __clear_bit(counter_index + i, pool->usage);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h
new file mode 100644
index 000000000..b7eb3674e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_SPECTRUM_CNT_H
+#define _MLXSW_SPECTRUM_CNT_H
+
+#include "spectrum.h"
+
+enum mlxsw_sp_counter_sub_pool_id {
+ MLXSW_SP_COUNTER_SUB_POOL_RIF,
+ MLXSW_SP_COUNTER_SUB_POOL_FLOW,
+};
+
+int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_counter_sub_pool_id sub_pool_id,
+ unsigned int *p_counter_index);
+void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_counter_sub_pool_id sub_pool_id,
+ unsigned int counter_index);
+int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
new file mode 100644
index 000000000..bf51ed949
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
@@ -0,0 +1,709 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <linux/bitops.h>
+#include <net/dcbnl.h>
+
+#include "spectrum.h"
+#include "reg.h"
+
+static u8 mlxsw_sp_dcbnl_getdcbx(struct net_device __always_unused *dev)
+{
+ return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
+}
+
+static u8 mlxsw_sp_dcbnl_setdcbx(struct net_device __always_unused *dev,
+ u8 mode)
+{
+ return (mode != (DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE)) ? 1 : 0;
+}
+
+static int mlxsw_sp_dcbnl_ieee_getets(struct net_device *dev,
+ struct ieee_ets *ets)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+ memcpy(ets, mlxsw_sp_port->dcb.ets, sizeof(*ets));
+
+ return 0;
+}
+
+static int mlxsw_sp_port_ets_validate(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct ieee_ets *ets)
+{
+ struct net_device *dev = mlxsw_sp_port->dev;
+ bool has_ets_tc = false;
+ int i, tx_bw_sum = 0;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ switch (ets->tc_tsa[i]) {
+ case IEEE_8021QAZ_TSA_STRICT:
+ break;
+ case IEEE_8021QAZ_TSA_ETS:
+ has_ets_tc = true;
+ tx_bw_sum += ets->tc_tx_bw[i];
+ break;
+ default:
+ netdev_err(dev, "Only strict priority and ETS are supported\n");
+ return -EINVAL;
+ }
+
+ if (ets->prio_tc[i] >= IEEE_8021QAZ_MAX_TCS) {
+ netdev_err(dev, "Invalid TC\n");
+ return -EINVAL;
+ }
+ }
+
+ if (has_ets_tc && tx_bw_sum != 100) {
+ netdev_err(dev, "Total ETS bandwidth should equal 100\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_port_pg_prio_map(struct mlxsw_sp_port *mlxsw_sp_port,
+ u8 *prio_tc)
+{
+ char pptb_pl[MLXSW_REG_PPTB_LEN];
+ int i;
+
+ mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port);
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, i, prio_tc[i]);
+
+ return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb),
+ pptb_pl);
+}
+
+static bool mlxsw_sp_ets_has_pg(u8 *prio_tc, u8 pg)
+{
+ int i;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ if (prio_tc[i] == pg)
+ return true;
+ return false;
+}
+
+static int mlxsw_sp_port_pg_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
+ u8 *old_prio_tc, u8 *new_prio_tc)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char pbmc_pl[MLXSW_REG_PBMC_LEN];
+ int err, i;
+
+ mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
+ if (err)
+ return err;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ u8 pg = old_prio_tc[i];
+
+ if (!mlxsw_sp_ets_has_pg(new_prio_tc, pg))
+ mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pg, 0);
+ }
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
+}
+
+static int mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct ieee_ets *ets)
+{
+ bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
+ struct ieee_ets *my_ets = mlxsw_sp_port->dcb.ets;
+ struct net_device *dev = mlxsw_sp_port->dev;
+ int err;
+
+ /* Create the required PGs, but don't destroy existing ones, as
+ * traffic is still directed to them.
+ */
+ err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
+ ets->prio_tc, pause_en,
+ mlxsw_sp_port->dcb.pfc);
+ if (err) {
+ netdev_err(dev, "Failed to configure port's headroom\n");
+ return err;
+ }
+
+ err = mlxsw_sp_port_pg_prio_map(mlxsw_sp_port, ets->prio_tc);
+ if (err) {
+ netdev_err(dev, "Failed to set PG-priority mapping\n");
+ goto err_port_prio_pg_map;
+ }
+
+ err = mlxsw_sp_port_pg_destroy(mlxsw_sp_port, my_ets->prio_tc,
+ ets->prio_tc);
+ if (err)
+ netdev_warn(dev, "Failed to remove ununsed PGs\n");
+
+ return 0;
+
+err_port_prio_pg_map:
+ mlxsw_sp_port_pg_destroy(mlxsw_sp_port, ets->prio_tc, my_ets->prio_tc);
+ return err;
+}
+
+static int __mlxsw_sp_dcbnl_ieee_setets(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct ieee_ets *ets)
+{
+ struct ieee_ets *my_ets = mlxsw_sp_port->dcb.ets;
+ struct net_device *dev = mlxsw_sp_port->dev;
+ int i, err;
+
+ /* Egress configuration. */
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ bool dwrr = ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS;
+ u8 weight = ets->tc_tx_bw[i];
+
+ err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i,
+ 0, dwrr, weight);
+ if (err) {
+ netdev_err(dev, "Failed to link subgroup ETS element %d to group\n",
+ i);
+ goto err_port_ets_set;
+ }
+ }
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
+ ets->prio_tc[i]);
+ if (err) {
+ netdev_err(dev, "Failed to map prio %d to TC %d\n", i,
+ ets->prio_tc[i]);
+ goto err_port_prio_tc_set;
+ }
+ }
+
+ /* Ingress configuration. */
+ err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, ets);
+ if (err)
+ goto err_port_headroom_set;
+
+ return 0;
+
+err_port_headroom_set:
+ i = IEEE_8021QAZ_MAX_TCS;
+err_port_prio_tc_set:
+ for (i--; i >= 0; i--)
+ mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, my_ets->prio_tc[i]);
+ i = IEEE_8021QAZ_MAX_TCS;
+err_port_ets_set:
+ for (i--; i >= 0; i--) {
+ bool dwrr = my_ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS;
+ u8 weight = my_ets->tc_tx_bw[i];
+
+ err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i,
+ 0, dwrr, weight);
+ }
+ return err;
+}
+
+static int mlxsw_sp_dcbnl_ieee_setets(struct net_device *dev,
+ struct ieee_ets *ets)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ int err;
+
+ err = mlxsw_sp_port_ets_validate(mlxsw_sp_port, ets);
+ if (err)
+ return err;
+
+ err = __mlxsw_sp_dcbnl_ieee_setets(mlxsw_sp_port, ets);
+ if (err)
+ return err;
+
+ memcpy(mlxsw_sp_port->dcb.ets, ets, sizeof(*ets));
+ mlxsw_sp_port->dcb.ets->ets_cap = IEEE_8021QAZ_MAX_TCS;
+
+ return 0;
+}
+
+static int mlxsw_sp_dcbnl_app_validate(struct net_device *dev,
+ struct dcb_app *app)
+{
+ if (app->priority >= IEEE_8021QAZ_MAX_TCS) {
+ netdev_err(dev, "APP entry with priority value %u is invalid\n",
+ app->priority);
+ return -EINVAL;
+ }
+
+ switch (app->selector) {
+ case IEEE_8021QAZ_APP_SEL_DSCP:
+ if (app->protocol >= 64) {
+ netdev_err(dev, "DSCP APP entry with protocol value %u is invalid\n",
+ app->protocol);
+ return -EINVAL;
+ }
+ break;
+
+ case IEEE_8021QAZ_APP_SEL_ETHERTYPE:
+ if (app->protocol) {
+ netdev_err(dev, "EtherType APP entries with protocol value != 0 not supported\n");
+ return -EINVAL;
+ }
+ break;
+
+ default:
+ netdev_err(dev, "APP entries with selector %u not supported\n",
+ app->selector);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static u8
+mlxsw_sp_port_dcb_app_default_prio(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ u8 prio_mask;
+
+ prio_mask = dcb_ieee_getapp_default_prio_mask(mlxsw_sp_port->dev);
+ if (prio_mask)
+ /* Take the highest configured priority. */
+ return fls(prio_mask) - 1;
+
+ return 0;
+}
+
+static void
+mlxsw_sp_port_dcb_app_dscp_prio_map(struct mlxsw_sp_port *mlxsw_sp_port,
+ u8 default_prio,
+ struct dcb_ieee_app_dscp_map *map)
+{
+ int i;
+
+ dcb_ieee_getapp_dscp_prio_mask_map(mlxsw_sp_port->dev, map);
+ for (i = 0; i < ARRAY_SIZE(map->map); ++i) {
+ if (map->map[i])
+ map->map[i] = fls(map->map[i]) - 1;
+ else
+ map->map[i] = default_prio;
+ }
+}
+
+static bool
+mlxsw_sp_port_dcb_app_prio_dscp_map(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct dcb_ieee_app_prio_map *map)
+{
+ bool have_dscp = false;
+ int i;
+
+ dcb_ieee_getapp_prio_dscp_mask_map(mlxsw_sp_port->dev, map);
+ for (i = 0; i < ARRAY_SIZE(map->map); ++i) {
+ if (map->map[i]) {
+ map->map[i] = fls64(map->map[i]) - 1;
+ have_dscp = true;
+ }
+ }
+
+ return have_dscp;
+}
+
+static int
+mlxsw_sp_port_dcb_app_update_qpts(struct mlxsw_sp_port *mlxsw_sp_port,
+ enum mlxsw_reg_qpts_trust_state ts)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char qpts_pl[MLXSW_REG_QPTS_LEN];
+
+ mlxsw_reg_qpts_pack(qpts_pl, mlxsw_sp_port->local_port, ts);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpts), qpts_pl);
+}
+
+static int
+mlxsw_sp_port_dcb_app_update_qrwe(struct mlxsw_sp_port *mlxsw_sp_port,
+ bool rewrite_dscp)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char qrwe_pl[MLXSW_REG_QRWE_LEN];
+
+ mlxsw_reg_qrwe_pack(qrwe_pl, mlxsw_sp_port->local_port,
+ false, rewrite_dscp);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qrwe), qrwe_pl);
+}
+
+static int
+mlxsw_sp_port_dcb_toggle_trust(struct mlxsw_sp_port *mlxsw_sp_port,
+ enum mlxsw_reg_qpts_trust_state ts)
+{
+ bool rewrite_dscp = ts == MLXSW_REG_QPTS_TRUST_STATE_DSCP;
+ int err;
+
+ if (mlxsw_sp_port->dcb.trust_state == ts)
+ return 0;
+
+ err = mlxsw_sp_port_dcb_app_update_qpts(mlxsw_sp_port, ts);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_port_dcb_app_update_qrwe(mlxsw_sp_port, rewrite_dscp);
+ if (err)
+ goto err_update_qrwe;
+
+ mlxsw_sp_port->dcb.trust_state = ts;
+ return 0;
+
+err_update_qrwe:
+ mlxsw_sp_port_dcb_app_update_qpts(mlxsw_sp_port,
+ mlxsw_sp_port->dcb.trust_state);
+ return err;
+}
+
+static int
+mlxsw_sp_port_dcb_app_update_qpdpm(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct dcb_ieee_app_dscp_map *map)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char qpdpm_pl[MLXSW_REG_QPDPM_LEN];
+ short int i;
+
+ mlxsw_reg_qpdpm_pack(qpdpm_pl, mlxsw_sp_port->local_port);
+ for (i = 0; i < ARRAY_SIZE(map->map); ++i)
+ mlxsw_reg_qpdpm_dscp_pack(qpdpm_pl, i, map->map[i]);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpdpm), qpdpm_pl);
+}
+
+static int
+mlxsw_sp_port_dcb_app_update_qpdsm(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct dcb_ieee_app_prio_map *map)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char qpdsm_pl[MLXSW_REG_QPDSM_LEN];
+ short int i;
+
+ mlxsw_reg_qpdsm_pack(qpdsm_pl, mlxsw_sp_port->local_port);
+ for (i = 0; i < ARRAY_SIZE(map->map); ++i)
+ mlxsw_reg_qpdsm_prio_pack(qpdsm_pl, i, map->map[i]);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpdsm), qpdsm_pl);
+}
+
+static int mlxsw_sp_port_dcb_app_update(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct dcb_ieee_app_prio_map prio_map;
+ struct dcb_ieee_app_dscp_map dscp_map;
+ u8 default_prio;
+ bool have_dscp;
+ int err;
+
+ default_prio = mlxsw_sp_port_dcb_app_default_prio(mlxsw_sp_port);
+ have_dscp = mlxsw_sp_port_dcb_app_prio_dscp_map(mlxsw_sp_port,
+ &prio_map);
+
+ mlxsw_sp_port_dcb_app_dscp_prio_map(mlxsw_sp_port, default_prio,
+ &dscp_map);
+ err = mlxsw_sp_port_dcb_app_update_qpdpm(mlxsw_sp_port,
+ &dscp_map);
+ if (err) {
+ netdev_err(mlxsw_sp_port->dev, "Couldn't configure priority map\n");
+ return err;
+ }
+
+ err = mlxsw_sp_port_dcb_app_update_qpdsm(mlxsw_sp_port,
+ &prio_map);
+ if (err) {
+ netdev_err(mlxsw_sp_port->dev, "Couldn't configure DSCP rewrite map\n");
+ return err;
+ }
+
+ if (!have_dscp) {
+ err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port,
+ MLXSW_REG_QPTS_TRUST_STATE_PCP);
+ if (err)
+ netdev_err(mlxsw_sp_port->dev, "Couldn't switch to trust L2\n");
+ return err;
+ }
+
+ err = mlxsw_sp_port_dcb_toggle_trust(mlxsw_sp_port,
+ MLXSW_REG_QPTS_TRUST_STATE_DSCP);
+ if (err) {
+ /* A failure to set trust DSCP means that the QPDPM and QPDSM
+ * maps installed above are not in effect. And since we are here
+ * attempting to set trust DSCP, we couldn't have attempted to
+ * switch trust to PCP. Thus no cleanup is necessary.
+ */
+ netdev_err(mlxsw_sp_port->dev, "Couldn't switch to trust L3\n");
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_dcbnl_ieee_setapp(struct net_device *dev,
+ struct dcb_app *app)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ int err;
+
+ err = mlxsw_sp_dcbnl_app_validate(dev, app);
+ if (err)
+ return err;
+
+ err = dcb_ieee_setapp(dev, app);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_port_dcb_app_update(mlxsw_sp_port);
+ if (err)
+ goto err_update;
+
+ return 0;
+
+err_update:
+ dcb_ieee_delapp(dev, app);
+ return err;
+}
+
+static int mlxsw_sp_dcbnl_ieee_delapp(struct net_device *dev,
+ struct dcb_app *app)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ int err;
+
+ err = dcb_ieee_delapp(dev, app);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_port_dcb_app_update(mlxsw_sp_port);
+ if (err)
+ netdev_err(dev, "Failed to update DCB APP configuration\n");
+ return 0;
+}
+
+static int mlxsw_sp_dcbnl_ieee_getmaxrate(struct net_device *dev,
+ struct ieee_maxrate *maxrate)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+ memcpy(maxrate, mlxsw_sp_port->dcb.maxrate, sizeof(*maxrate));
+
+ return 0;
+}
+
+static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev,
+ struct ieee_maxrate *maxrate)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ struct ieee_maxrate *my_maxrate = mlxsw_sp_port->dcb.maxrate;
+ int err, i;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_SUBGROUP,
+ i, 0,
+ maxrate->tc_maxrate[i]);
+ if (err) {
+ netdev_err(dev, "Failed to set maxrate for TC %d\n", i);
+ goto err_port_ets_maxrate_set;
+ }
+ }
+
+ memcpy(mlxsw_sp_port->dcb.maxrate, maxrate, sizeof(*maxrate));
+
+ return 0;
+
+err_port_ets_maxrate_set:
+ for (i--; i >= 0; i--)
+ mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_SUBGROUP,
+ i, 0, my_maxrate->tc_maxrate[i]);
+ return err;
+}
+
+static int mlxsw_sp_port_pfc_cnt_get(struct mlxsw_sp_port *mlxsw_sp_port,
+ u8 prio)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct ieee_pfc *my_pfc = mlxsw_sp_port->dcb.pfc;
+ char ppcnt_pl[MLXSW_REG_PPCNT_LEN];
+ int err;
+
+ mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port,
+ MLXSW_REG_PPCNT_PRIO_CNT, prio);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl);
+ if (err)
+ return err;
+
+ my_pfc->requests[prio] = mlxsw_reg_ppcnt_tx_pause_get(ppcnt_pl);
+ my_pfc->indications[prio] = mlxsw_reg_ppcnt_rx_pause_get(ppcnt_pl);
+
+ return 0;
+}
+
+static int mlxsw_sp_dcbnl_ieee_getpfc(struct net_device *dev,
+ struct ieee_pfc *pfc)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ int err, i;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_pfc_cnt_get(mlxsw_sp_port, i);
+ if (err) {
+ netdev_err(dev, "Failed to get PFC count for priority %d\n",
+ i);
+ return err;
+ }
+ }
+
+ memcpy(pfc, mlxsw_sp_port->dcb.pfc, sizeof(*pfc));
+
+ return 0;
+}
+
+static int mlxsw_sp_port_pfc_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct ieee_pfc *pfc)
+{
+ char pfcc_pl[MLXSW_REG_PFCC_LEN];
+
+ mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port);
+ mlxsw_reg_pfcc_pprx_set(pfcc_pl, mlxsw_sp_port->link.rx_pause);
+ mlxsw_reg_pfcc_pptx_set(pfcc_pl, mlxsw_sp_port->link.tx_pause);
+ mlxsw_reg_pfcc_prio_pack(pfcc_pl, pfc->pfc_en);
+
+ return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc),
+ pfcc_pl);
+}
+
+static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
+ struct ieee_pfc *pfc)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
+ int err;
+
+ if (pause_en && pfc->pfc_en) {
+ netdev_err(dev, "PAUSE frames already enabled on port\n");
+ return -EINVAL;
+ }
+
+ err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
+ mlxsw_sp_port->dcb.ets->prio_tc,
+ pause_en, pfc);
+ if (err) {
+ netdev_err(dev, "Failed to configure port's headroom for PFC\n");
+ return err;
+ }
+
+ err = mlxsw_sp_port_pfc_set(mlxsw_sp_port, pfc);
+ if (err) {
+ netdev_err(dev, "Failed to configure PFC\n");
+ goto err_port_pfc_set;
+ }
+
+ memcpy(mlxsw_sp_port->dcb.pfc, pfc, sizeof(*pfc));
+ mlxsw_sp_port->dcb.pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS;
+
+ return 0;
+
+err_port_pfc_set:
+ __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
+ mlxsw_sp_port->dcb.ets->prio_tc, pause_en,
+ mlxsw_sp_port->dcb.pfc);
+ return err;
+}
+
+static const struct dcbnl_rtnl_ops mlxsw_sp_dcbnl_ops = {
+ .ieee_getets = mlxsw_sp_dcbnl_ieee_getets,
+ .ieee_setets = mlxsw_sp_dcbnl_ieee_setets,
+ .ieee_getmaxrate = mlxsw_sp_dcbnl_ieee_getmaxrate,
+ .ieee_setmaxrate = mlxsw_sp_dcbnl_ieee_setmaxrate,
+ .ieee_getpfc = mlxsw_sp_dcbnl_ieee_getpfc,
+ .ieee_setpfc = mlxsw_sp_dcbnl_ieee_setpfc,
+ .ieee_setapp = mlxsw_sp_dcbnl_ieee_setapp,
+ .ieee_delapp = mlxsw_sp_dcbnl_ieee_delapp,
+
+ .getdcbx = mlxsw_sp_dcbnl_getdcbx,
+ .setdcbx = mlxsw_sp_dcbnl_setdcbx,
+};
+
+static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ mlxsw_sp_port->dcb.ets = kzalloc(sizeof(*mlxsw_sp_port->dcb.ets),
+ GFP_KERNEL);
+ if (!mlxsw_sp_port->dcb.ets)
+ return -ENOMEM;
+
+ mlxsw_sp_port->dcb.ets->ets_cap = IEEE_8021QAZ_MAX_TCS;
+
+ return 0;
+}
+
+static void mlxsw_sp_port_ets_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ kfree(mlxsw_sp_port->dcb.ets);
+}
+
+static int mlxsw_sp_port_maxrate_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ int i;
+
+ mlxsw_sp_port->dcb.maxrate = kmalloc(sizeof(*mlxsw_sp_port->dcb.maxrate),
+ GFP_KERNEL);
+ if (!mlxsw_sp_port->dcb.maxrate)
+ return -ENOMEM;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ mlxsw_sp_port->dcb.maxrate->tc_maxrate[i] = MLXSW_REG_QEEC_MAS_DIS;
+
+ return 0;
+}
+
+static void mlxsw_sp_port_maxrate_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ kfree(mlxsw_sp_port->dcb.maxrate);
+}
+
+static int mlxsw_sp_port_pfc_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ mlxsw_sp_port->dcb.pfc = kzalloc(sizeof(*mlxsw_sp_port->dcb.pfc),
+ GFP_KERNEL);
+ if (!mlxsw_sp_port->dcb.pfc)
+ return -ENOMEM;
+
+ mlxsw_sp_port->dcb.pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS;
+
+ return 0;
+}
+
+static void mlxsw_sp_port_pfc_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ kfree(mlxsw_sp_port->dcb.pfc);
+}
+
+int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ int err;
+
+ err = mlxsw_sp_port_ets_init(mlxsw_sp_port);
+ if (err)
+ return err;
+ err = mlxsw_sp_port_maxrate_init(mlxsw_sp_port);
+ if (err)
+ goto err_port_maxrate_init;
+ err = mlxsw_sp_port_pfc_init(mlxsw_sp_port);
+ if (err)
+ goto err_port_pfc_init;
+
+ mlxsw_sp_port->dcb.trust_state = MLXSW_REG_QPTS_TRUST_STATE_PCP;
+ mlxsw_sp_port->dev->dcbnl_ops = &mlxsw_sp_dcbnl_ops;
+
+ return 0;
+
+err_port_pfc_init:
+ mlxsw_sp_port_maxrate_fini(mlxsw_sp_port);
+err_port_maxrate_init:
+ mlxsw_sp_port_ets_fini(mlxsw_sp_port);
+ return err;
+}
+
+void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ mlxsw_sp_port_pfc_fini(mlxsw_sp_port);
+ mlxsw_sp_port_maxrate_fini(mlxsw_sp_port);
+ mlxsw_sp_port_ets_fini(mlxsw_sp_port);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
new file mode 100644
index 000000000..4fe193c4f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -0,0 +1,1307 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <net/devlink.h>
+
+#include "spectrum.h"
+#include "spectrum_dpipe.h"
+#include "spectrum_router.h"
+
+enum mlxsw_sp_field_metadata_id {
+ MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT,
+ MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD,
+ MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP,
+ MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX,
+ MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE,
+ MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX,
+};
+
+static struct devlink_dpipe_field mlxsw_sp_dpipe_fields_metadata[] = {
+ {
+ .name = "erif_port",
+ .id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT,
+ .bitwidth = 32,
+ .mapping_type = DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX,
+ },
+ {
+ .name = "l3_forward",
+ .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD,
+ .bitwidth = 1,
+ },
+ {
+ .name = "l3_drop",
+ .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP,
+ .bitwidth = 1,
+ },
+ {
+ .name = "adj_index",
+ .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX,
+ .bitwidth = 32,
+ },
+ {
+ .name = "adj_size",
+ .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE,
+ .bitwidth = 32,
+ },
+ {
+ .name = "adj_hash_index",
+ .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX,
+ .bitwidth = 32,
+ },
+};
+
+enum mlxsw_sp_dpipe_header_id {
+ MLXSW_SP_DPIPE_HEADER_METADATA,
+};
+
+static struct devlink_dpipe_header mlxsw_sp_dpipe_header_metadata = {
+ .name = "mlxsw_meta",
+ .id = MLXSW_SP_DPIPE_HEADER_METADATA,
+ .fields = mlxsw_sp_dpipe_fields_metadata,
+ .fields_count = ARRAY_SIZE(mlxsw_sp_dpipe_fields_metadata),
+};
+
+static struct devlink_dpipe_header *mlxsw_dpipe_headers[] = {
+ &mlxsw_sp_dpipe_header_metadata,
+ &devlink_dpipe_header_ethernet,
+ &devlink_dpipe_header_ipv4,
+ &devlink_dpipe_header_ipv6,
+};
+
+static struct devlink_dpipe_headers mlxsw_sp_dpipe_headers = {
+ .headers = mlxsw_dpipe_headers,
+ .headers_count = ARRAY_SIZE(mlxsw_dpipe_headers),
+};
+
+static int mlxsw_sp_dpipe_table_erif_actions_dump(void *priv,
+ struct sk_buff *skb)
+{
+ struct devlink_dpipe_action action = {0};
+ int err;
+
+ action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+ action.header = &mlxsw_sp_dpipe_header_metadata;
+ action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD;
+
+ err = devlink_dpipe_action_put(skb, &action);
+ if (err)
+ return err;
+
+ action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+ action.header = &mlxsw_sp_dpipe_header_metadata;
+ action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP;
+
+ return devlink_dpipe_action_put(skb, &action);
+}
+
+static int mlxsw_sp_dpipe_table_erif_matches_dump(void *priv,
+ struct sk_buff *skb)
+{
+ struct devlink_dpipe_match match = {0};
+
+ match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+ match.header = &mlxsw_sp_dpipe_header_metadata;
+ match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT;
+
+ return devlink_dpipe_match_put(skb, &match);
+}
+
+static void
+mlxsw_sp_erif_match_action_prepare(struct devlink_dpipe_match *match,
+ struct devlink_dpipe_action *action)
+{
+ action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+ action->header = &mlxsw_sp_dpipe_header_metadata;
+ action->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD;
+
+ match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+ match->header = &mlxsw_sp_dpipe_header_metadata;
+ match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT;
+}
+
+static int mlxsw_sp_erif_entry_prepare(struct devlink_dpipe_entry *entry,
+ struct devlink_dpipe_value *match_value,
+ struct devlink_dpipe_match *match,
+ struct devlink_dpipe_value *action_value,
+ struct devlink_dpipe_action *action)
+{
+ entry->match_values = match_value;
+ entry->match_values_count = 1;
+
+ entry->action_values = action_value;
+ entry->action_values_count = 1;
+
+ match_value->match = match;
+ match_value->value_size = sizeof(u32);
+ match_value->value = kmalloc(match_value->value_size, GFP_KERNEL);
+ if (!match_value->value)
+ return -ENOMEM;
+
+ action_value->action = action;
+ action_value->value_size = sizeof(u32);
+ action_value->value = kmalloc(action_value->value_size, GFP_KERNEL);
+ if (!action_value->value)
+ goto err_action_alloc;
+ return 0;
+
+err_action_alloc:
+ kfree(match_value->value);
+ return -ENOMEM;
+}
+
+static int mlxsw_sp_erif_entry_get(struct mlxsw_sp *mlxsw_sp,
+ struct devlink_dpipe_entry *entry,
+ struct mlxsw_sp_rif *rif,
+ bool counters_enabled)
+{
+ u32 *action_value;
+ u32 *rif_value;
+ u64 cnt;
+ int err;
+
+ /* Set Match RIF index */
+ rif_value = entry->match_values->value;
+ *rif_value = mlxsw_sp_rif_index(rif);
+ entry->match_values->mapping_value = mlxsw_sp_rif_dev_ifindex(rif);
+ entry->match_values->mapping_valid = true;
+
+ /* Set Action Forwarding */
+ action_value = entry->action_values->value;
+ *action_value = 1;
+
+ entry->counter_valid = false;
+ entry->counter = 0;
+ entry->index = mlxsw_sp_rif_index(rif);
+
+ if (!counters_enabled)
+ return 0;
+
+ err = mlxsw_sp_rif_counter_value_get(mlxsw_sp, rif,
+ MLXSW_SP_RIF_COUNTER_EGRESS,
+ &cnt);
+ if (!err) {
+ entry->counter = cnt;
+ entry->counter_valid = true;
+ }
+ return 0;
+}
+
+static int
+mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled,
+ struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ struct devlink_dpipe_value match_value, action_value;
+ struct devlink_dpipe_action action = {0};
+ struct devlink_dpipe_match match = {0};
+ struct devlink_dpipe_entry entry = {0};
+ struct mlxsw_sp *mlxsw_sp = priv;
+ unsigned int rif_count;
+ int i, j;
+ int err;
+
+ memset(&match_value, 0, sizeof(match_value));
+ memset(&action_value, 0, sizeof(action_value));
+
+ mlxsw_sp_erif_match_action_prepare(&match, &action);
+ err = mlxsw_sp_erif_entry_prepare(&entry, &match_value, &match,
+ &action_value, &action);
+ if (err)
+ return err;
+
+ rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
+ rtnl_lock();
+ i = 0;
+start_again:
+ err = devlink_dpipe_entry_ctx_prepare(dump_ctx);
+ if (err)
+ goto err_ctx_prepare;
+ j = 0;
+ for (; i < rif_count; i++) {
+ struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
+
+ if (!rif)
+ continue;
+ err = mlxsw_sp_erif_entry_get(mlxsw_sp, &entry, rif,
+ counters_enabled);
+ if (err)
+ goto err_entry_get;
+ err = devlink_dpipe_entry_ctx_append(dump_ctx, &entry);
+ if (err) {
+ if (err == -EMSGSIZE) {
+ if (!j)
+ goto err_entry_append;
+ break;
+ }
+ goto err_entry_append;
+ }
+ j++;
+ }
+
+ devlink_dpipe_entry_ctx_close(dump_ctx);
+ if (i != rif_count)
+ goto start_again;
+ rtnl_unlock();
+
+ devlink_dpipe_entry_clear(&entry);
+ return 0;
+err_entry_append:
+err_entry_get:
+err_ctx_prepare:
+ rtnl_unlock();
+ devlink_dpipe_entry_clear(&entry);
+ return err;
+}
+
+static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+ int i;
+
+ rtnl_lock();
+ for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
+ struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
+
+ if (!rif)
+ continue;
+ if (enable)
+ mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif,
+ MLXSW_SP_RIF_COUNTER_EGRESS);
+ else
+ mlxsw_sp_rif_counter_free(mlxsw_sp, rif,
+ MLXSW_SP_RIF_COUNTER_EGRESS);
+ }
+ rtnl_unlock();
+ return 0;
+}
+
+static u64 mlxsw_sp_dpipe_table_erif_size_get(void *priv)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ return MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
+}
+
+static struct devlink_dpipe_table_ops mlxsw_sp_erif_ops = {
+ .matches_dump = mlxsw_sp_dpipe_table_erif_matches_dump,
+ .actions_dump = mlxsw_sp_dpipe_table_erif_actions_dump,
+ .entries_dump = mlxsw_sp_dpipe_table_erif_entries_dump,
+ .counters_set_update = mlxsw_sp_dpipe_table_erif_counters_update,
+ .size_get = mlxsw_sp_dpipe_table_erif_size_get,
+};
+
+static int mlxsw_sp_dpipe_erif_table_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+ return devlink_dpipe_table_register(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_ERIF,
+ &mlxsw_sp_erif_ops,
+ mlxsw_sp, false);
+}
+
+static void mlxsw_sp_dpipe_erif_table_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+ devlink_dpipe_table_unregister(devlink, MLXSW_SP_DPIPE_TABLE_NAME_ERIF);
+}
+
+static int mlxsw_sp_dpipe_table_host_matches_dump(struct sk_buff *skb, int type)
+{
+ struct devlink_dpipe_match match = {0};
+ int err;
+
+ match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+ match.header = &mlxsw_sp_dpipe_header_metadata;
+ match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT;
+
+ err = devlink_dpipe_match_put(skb, &match);
+ if (err)
+ return err;
+
+ switch (type) {
+ case AF_INET:
+ match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+ match.header = &devlink_dpipe_header_ipv4;
+ match.field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP;
+ break;
+ case AF_INET6:
+ match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+ match.header = &devlink_dpipe_header_ipv6;
+ match.field_id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP;
+ break;
+ default:
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ return devlink_dpipe_match_put(skb, &match);
+}
+
+static int
+mlxsw_sp_dpipe_table_host4_matches_dump(void *priv, struct sk_buff *skb)
+{
+ return mlxsw_sp_dpipe_table_host_matches_dump(skb, AF_INET);
+}
+
+static int
+mlxsw_sp_dpipe_table_host_actions_dump(void *priv, struct sk_buff *skb)
+{
+ struct devlink_dpipe_action action = {0};
+
+ action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+ action.header = &devlink_dpipe_header_ethernet;
+ action.field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC;
+
+ return devlink_dpipe_action_put(skb, &action);
+}
+
+enum mlxsw_sp_dpipe_table_host_match {
+ MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF,
+ MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP,
+ MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT,
+};
+
+static void
+mlxsw_sp_dpipe_table_host_match_action_prepare(struct devlink_dpipe_match *matches,
+ struct devlink_dpipe_action *action,
+ int type)
+{
+ struct devlink_dpipe_match *match;
+
+ match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF];
+ match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+ match->header = &mlxsw_sp_dpipe_header_metadata;
+ match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT;
+
+ match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP];
+ match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+ switch (type) {
+ case AF_INET:
+ match->header = &devlink_dpipe_header_ipv4;
+ match->field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP;
+ break;
+ case AF_INET6:
+ match->header = &devlink_dpipe_header_ipv6;
+ match->field_id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP;
+ break;
+ default:
+ WARN_ON(1);
+ return;
+ }
+
+ action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+ action->header = &devlink_dpipe_header_ethernet;
+ action->field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC;
+}
+
+static int
+mlxsw_sp_dpipe_table_host_entry_prepare(struct devlink_dpipe_entry *entry,
+ struct devlink_dpipe_value *match_values,
+ struct devlink_dpipe_match *matches,
+ struct devlink_dpipe_value *action_value,
+ struct devlink_dpipe_action *action,
+ int type)
+{
+ struct devlink_dpipe_value *match_value;
+ struct devlink_dpipe_match *match;
+
+ entry->match_values = match_values;
+ entry->match_values_count = MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT;
+
+ entry->action_values = action_value;
+ entry->action_values_count = 1;
+
+ match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF];
+ match_value = &match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF];
+
+ match_value->match = match;
+ match_value->value_size = sizeof(u32);
+ match_value->value = kmalloc(match_value->value_size, GFP_KERNEL);
+ if (!match_value->value)
+ return -ENOMEM;
+
+ match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP];
+ match_value = &match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP];
+
+ match_value->match = match;
+ switch (type) {
+ case AF_INET:
+ match_value->value_size = sizeof(u32);
+ break;
+ case AF_INET6:
+ match_value->value_size = sizeof(struct in6_addr);
+ break;
+ default:
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ match_value->value = kmalloc(match_value->value_size, GFP_KERNEL);
+ if (!match_value->value)
+ return -ENOMEM;
+
+ action_value->action = action;
+ action_value->value_size = sizeof(u64);
+ action_value->value = kmalloc(action_value->value_size, GFP_KERNEL);
+ if (!action_value->value)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void
+__mlxsw_sp_dpipe_table_host_entry_fill(struct devlink_dpipe_entry *entry,
+ struct mlxsw_sp_rif *rif,
+ unsigned char *ha, void *dip)
+{
+ struct devlink_dpipe_value *value;
+ u32 *rif_value;
+ u8 *ha_value;
+
+ /* Set Match RIF index */
+ value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF];
+
+ rif_value = value->value;
+ *rif_value = mlxsw_sp_rif_index(rif);
+ value->mapping_value = mlxsw_sp_rif_dev_ifindex(rif);
+ value->mapping_valid = true;
+
+ /* Set Match DIP */
+ value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP];
+ memcpy(value->value, dip, value->value_size);
+
+ /* Set Action DMAC */
+ value = entry->action_values;
+ ha_value = value->value;
+ ether_addr_copy(ha_value, ha);
+}
+
+static void
+mlxsw_sp_dpipe_table_host4_entry_fill(struct devlink_dpipe_entry *entry,
+ struct mlxsw_sp_neigh_entry *neigh_entry,
+ struct mlxsw_sp_rif *rif)
+{
+ unsigned char *ha;
+ u32 dip;
+
+ ha = mlxsw_sp_neigh_entry_ha(neigh_entry);
+ dip = mlxsw_sp_neigh4_entry_dip(neigh_entry);
+ __mlxsw_sp_dpipe_table_host_entry_fill(entry, rif, ha, &dip);
+}
+
+static void
+mlxsw_sp_dpipe_table_host6_entry_fill(struct devlink_dpipe_entry *entry,
+ struct mlxsw_sp_neigh_entry *neigh_entry,
+ struct mlxsw_sp_rif *rif)
+{
+ struct in6_addr *dip;
+ unsigned char *ha;
+
+ ha = mlxsw_sp_neigh_entry_ha(neigh_entry);
+ dip = mlxsw_sp_neigh6_entry_dip(neigh_entry);
+
+ __mlxsw_sp_dpipe_table_host_entry_fill(entry, rif, ha, dip);
+}
+
+static void
+mlxsw_sp_dpipe_table_host_entry_fill(struct mlxsw_sp *mlxsw_sp,
+ struct devlink_dpipe_entry *entry,
+ struct mlxsw_sp_neigh_entry *neigh_entry,
+ struct mlxsw_sp_rif *rif,
+ int type)
+{
+ int err;
+
+ switch (type) {
+ case AF_INET:
+ mlxsw_sp_dpipe_table_host4_entry_fill(entry, neigh_entry, rif);
+ break;
+ case AF_INET6:
+ mlxsw_sp_dpipe_table_host6_entry_fill(entry, neigh_entry, rif);
+ break;
+ default:
+ WARN_ON(1);
+ return;
+ }
+
+ err = mlxsw_sp_neigh_counter_get(mlxsw_sp, neigh_entry,
+ &entry->counter);
+ if (!err)
+ entry->counter_valid = true;
+}
+
+static int
+mlxsw_sp_dpipe_table_host_entries_get(struct mlxsw_sp *mlxsw_sp,
+ struct devlink_dpipe_entry *entry,
+ bool counters_enabled,
+ struct devlink_dpipe_dump_ctx *dump_ctx,
+ int type)
+{
+ int rif_neigh_count = 0;
+ int rif_neigh_skip = 0;
+ int neigh_count = 0;
+ int rif_count;
+ int i, j;
+ int err;
+
+ rtnl_lock();
+ i = 0;
+ rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
+start_again:
+ err = devlink_dpipe_entry_ctx_prepare(dump_ctx);
+ if (err)
+ goto err_ctx_prepare;
+ j = 0;
+ rif_neigh_skip = rif_neigh_count;
+ for (; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
+ struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
+ struct mlxsw_sp_neigh_entry *neigh_entry;
+
+ if (!rif)
+ continue;
+
+ rif_neigh_count = 0;
+ mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) {
+ int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry);
+
+ if (neigh_type != type)
+ continue;
+
+ if (neigh_type == AF_INET6 &&
+ mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
+ continue;
+
+ if (rif_neigh_count < rif_neigh_skip)
+ goto skip;
+
+ mlxsw_sp_dpipe_table_host_entry_fill(mlxsw_sp, entry,
+ neigh_entry, rif,
+ type);
+ entry->index = neigh_count;
+ err = devlink_dpipe_entry_ctx_append(dump_ctx, entry);
+ if (err) {
+ if (err == -EMSGSIZE) {
+ if (!j)
+ goto err_entry_append;
+ else
+ goto out;
+ }
+ goto err_entry_append;
+ }
+ neigh_count++;
+ j++;
+skip:
+ rif_neigh_count++;
+ }
+ rif_neigh_skip = 0;
+ }
+out:
+ devlink_dpipe_entry_ctx_close(dump_ctx);
+ if (i != rif_count)
+ goto start_again;
+
+ rtnl_unlock();
+ return 0;
+
+err_ctx_prepare:
+err_entry_append:
+ rtnl_unlock();
+ return err;
+}
+
+static int
+mlxsw_sp_dpipe_table_host_entries_dump(struct mlxsw_sp *mlxsw_sp,
+ bool counters_enabled,
+ struct devlink_dpipe_dump_ctx *dump_ctx,
+ int type)
+{
+ struct devlink_dpipe_value match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT];
+ struct devlink_dpipe_match matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT];
+ struct devlink_dpipe_value action_value;
+ struct devlink_dpipe_action action = {0};
+ struct devlink_dpipe_entry entry = {0};
+ int err;
+
+ memset(matches, 0, MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT *
+ sizeof(matches[0]));
+ memset(match_values, 0, MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT *
+ sizeof(match_values[0]));
+ memset(&action_value, 0, sizeof(action_value));
+
+ mlxsw_sp_dpipe_table_host_match_action_prepare(matches, &action, type);
+ err = mlxsw_sp_dpipe_table_host_entry_prepare(&entry, match_values,
+ matches, &action_value,
+ &action, type);
+ if (err)
+ goto out;
+
+ err = mlxsw_sp_dpipe_table_host_entries_get(mlxsw_sp, &entry,
+ counters_enabled, dump_ctx,
+ type);
+out:
+ devlink_dpipe_entry_clear(&entry);
+ return err;
+}
+
+static int
+mlxsw_sp_dpipe_table_host4_entries_dump(void *priv, bool counters_enabled,
+ struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ return mlxsw_sp_dpipe_table_host_entries_dump(mlxsw_sp,
+ counters_enabled,
+ dump_ctx, AF_INET);
+}
+
+static void
+mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp,
+ bool enable, int type)
+{
+ int i;
+
+ rtnl_lock();
+ for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
+ struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
+ struct mlxsw_sp_neigh_entry *neigh_entry;
+
+ if (!rif)
+ continue;
+ mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) {
+ int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry);
+
+ if (neigh_type != type)
+ continue;
+
+ if (neigh_type == AF_INET6 &&
+ mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
+ continue;
+
+ mlxsw_sp_neigh_entry_counter_update(mlxsw_sp,
+ neigh_entry,
+ enable);
+ }
+ }
+ rtnl_unlock();
+}
+
+static int mlxsw_sp_dpipe_table_host4_counters_update(void *priv, bool enable)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ mlxsw_sp_dpipe_table_host_counters_update(mlxsw_sp, enable, AF_INET);
+ return 0;
+}
+
+static u64
+mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type)
+{
+ u64 size = 0;
+ int i;
+
+ rtnl_lock();
+ for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
+ struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
+ struct mlxsw_sp_neigh_entry *neigh_entry;
+
+ if (!rif)
+ continue;
+ mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) {
+ int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry);
+
+ if (neigh_type != type)
+ continue;
+
+ if (neigh_type == AF_INET6 &&
+ mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
+ continue;
+
+ size++;
+ }
+ }
+ rtnl_unlock();
+
+ return size;
+}
+
+static u64 mlxsw_sp_dpipe_table_host4_size_get(void *priv)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ return mlxsw_sp_dpipe_table_host_size_get(mlxsw_sp, AF_INET);
+}
+
+static struct devlink_dpipe_table_ops mlxsw_sp_host4_ops = {
+ .matches_dump = mlxsw_sp_dpipe_table_host4_matches_dump,
+ .actions_dump = mlxsw_sp_dpipe_table_host_actions_dump,
+ .entries_dump = mlxsw_sp_dpipe_table_host4_entries_dump,
+ .counters_set_update = mlxsw_sp_dpipe_table_host4_counters_update,
+ .size_get = mlxsw_sp_dpipe_table_host4_size_get,
+};
+
+#define MLXSW_SP_DPIPE_TABLE_RESOURCE_UNIT_HOST4 1
+
+static int mlxsw_sp_dpipe_host4_table_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+ int err;
+
+ err = devlink_dpipe_table_register(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_HOST4,
+ &mlxsw_sp_host4_ops,
+ mlxsw_sp, false);
+ if (err)
+ return err;
+
+ err = devlink_dpipe_table_resource_set(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_HOST4,
+ MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
+ MLXSW_SP_DPIPE_TABLE_RESOURCE_UNIT_HOST4);
+ if (err)
+ goto err_resource_set;
+
+ return 0;
+
+err_resource_set:
+ devlink_dpipe_table_unregister(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_HOST4);
+ return err;
+}
+
+static void mlxsw_sp_dpipe_host4_table_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+ devlink_dpipe_table_unregister(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_HOST4);
+}
+
+static int
+mlxsw_sp_dpipe_table_host6_matches_dump(void *priv, struct sk_buff *skb)
+{
+ return mlxsw_sp_dpipe_table_host_matches_dump(skb, AF_INET6);
+}
+
+static int
+mlxsw_sp_dpipe_table_host6_entries_dump(void *priv, bool counters_enabled,
+ struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ return mlxsw_sp_dpipe_table_host_entries_dump(mlxsw_sp,
+ counters_enabled,
+ dump_ctx, AF_INET6);
+}
+
+static int mlxsw_sp_dpipe_table_host6_counters_update(void *priv, bool enable)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ mlxsw_sp_dpipe_table_host_counters_update(mlxsw_sp, enable, AF_INET6);
+ return 0;
+}
+
+static u64 mlxsw_sp_dpipe_table_host6_size_get(void *priv)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ return mlxsw_sp_dpipe_table_host_size_get(mlxsw_sp, AF_INET6);
+}
+
+static struct devlink_dpipe_table_ops mlxsw_sp_host6_ops = {
+ .matches_dump = mlxsw_sp_dpipe_table_host6_matches_dump,
+ .actions_dump = mlxsw_sp_dpipe_table_host_actions_dump,
+ .entries_dump = mlxsw_sp_dpipe_table_host6_entries_dump,
+ .counters_set_update = mlxsw_sp_dpipe_table_host6_counters_update,
+ .size_get = mlxsw_sp_dpipe_table_host6_size_get,
+};
+
+#define MLXSW_SP_DPIPE_TABLE_RESOURCE_UNIT_HOST6 2
+
+static int mlxsw_sp_dpipe_host6_table_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+ int err;
+
+ err = devlink_dpipe_table_register(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_HOST6,
+ &mlxsw_sp_host6_ops,
+ mlxsw_sp, false);
+ if (err)
+ return err;
+
+ err = devlink_dpipe_table_resource_set(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_HOST6,
+ MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
+ MLXSW_SP_DPIPE_TABLE_RESOURCE_UNIT_HOST6);
+ if (err)
+ goto err_resource_set;
+
+ return 0;
+
+err_resource_set:
+ devlink_dpipe_table_unregister(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_HOST6);
+ return err;
+}
+
+static void mlxsw_sp_dpipe_host6_table_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+ devlink_dpipe_table_unregister(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_HOST6);
+}
+
+static int mlxsw_sp_dpipe_table_adj_matches_dump(void *priv,
+ struct sk_buff *skb)
+{
+ struct devlink_dpipe_match match = {0};
+ int err;
+
+ match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+ match.header = &mlxsw_sp_dpipe_header_metadata;
+ match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX;
+
+ err = devlink_dpipe_match_put(skb, &match);
+ if (err)
+ return err;
+
+ match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+ match.header = &mlxsw_sp_dpipe_header_metadata;
+ match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE;
+
+ err = devlink_dpipe_match_put(skb, &match);
+ if (err)
+ return err;
+
+ match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+ match.header = &mlxsw_sp_dpipe_header_metadata;
+ match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX;
+
+ return devlink_dpipe_match_put(skb, &match);
+}
+
+static int mlxsw_sp_dpipe_table_adj_actions_dump(void *priv,
+ struct sk_buff *skb)
+{
+ struct devlink_dpipe_action action = {0};
+ int err;
+
+ action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+ action.header = &devlink_dpipe_header_ethernet;
+ action.field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC;
+
+ err = devlink_dpipe_action_put(skb, &action);
+ if (err)
+ return err;
+
+ action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+ action.header = &mlxsw_sp_dpipe_header_metadata;
+ action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT;
+
+ return devlink_dpipe_action_put(skb, &action);
+}
+
+static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_nexthop *nh;
+ u64 size = 0;
+
+ mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router)
+ if (mlxsw_sp_nexthop_offload(nh) &&
+ !mlxsw_sp_nexthop_group_has_ipip(nh))
+ size++;
+ return size;
+}
+
+enum mlxsw_sp_dpipe_table_adj_match {
+ MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX,
+ MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE,
+ MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX,
+ MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT,
+};
+
+enum mlxsw_sp_dpipe_table_adj_action {
+ MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC,
+ MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT,
+ MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT,
+};
+
+static void
+mlxsw_sp_dpipe_table_adj_match_action_prepare(struct devlink_dpipe_match *matches,
+ struct devlink_dpipe_action *actions)
+{
+ struct devlink_dpipe_action *action;
+ struct devlink_dpipe_match *match;
+
+ match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX];
+ match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+ match->header = &mlxsw_sp_dpipe_header_metadata;
+ match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX;
+
+ match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE];
+ match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+ match->header = &mlxsw_sp_dpipe_header_metadata;
+ match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE;
+
+ match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX];
+ match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+ match->header = &mlxsw_sp_dpipe_header_metadata;
+ match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX;
+
+ action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC];
+ action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+ action->header = &devlink_dpipe_header_ethernet;
+ action->field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC;
+
+ action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT];
+ action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+ action->header = &mlxsw_sp_dpipe_header_metadata;
+ action->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT;
+}
+
+static int
+mlxsw_sp_dpipe_table_adj_entry_prepare(struct devlink_dpipe_entry *entry,
+ struct devlink_dpipe_value *match_values,
+ struct devlink_dpipe_match *matches,
+ struct devlink_dpipe_value *action_values,
+ struct devlink_dpipe_action *actions)
+{ struct devlink_dpipe_value *action_value;
+ struct devlink_dpipe_value *match_value;
+ struct devlink_dpipe_action *action;
+ struct devlink_dpipe_match *match;
+
+ entry->match_values = match_values;
+ entry->match_values_count = MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT;
+
+ entry->action_values = action_values;
+ entry->action_values_count = MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT;
+
+ match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX];
+ match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX];
+
+ match_value->match = match;
+ match_value->value_size = sizeof(u32);
+ match_value->value = kmalloc(match_value->value_size, GFP_KERNEL);
+ if (!match_value->value)
+ return -ENOMEM;
+
+ match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE];
+ match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE];
+
+ match_value->match = match;
+ match_value->value_size = sizeof(u32);
+ match_value->value = kmalloc(match_value->value_size, GFP_KERNEL);
+ if (!match_value->value)
+ return -ENOMEM;
+
+ match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX];
+ match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX];
+
+ match_value->match = match;
+ match_value->value_size = sizeof(u32);
+ match_value->value = kmalloc(match_value->value_size, GFP_KERNEL);
+ if (!match_value->value)
+ return -ENOMEM;
+
+ action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC];
+ action_value = &action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC];
+
+ action_value->action = action;
+ action_value->value_size = sizeof(u64);
+ action_value->value = kmalloc(action_value->value_size, GFP_KERNEL);
+ if (!action_value->value)
+ return -ENOMEM;
+
+ action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT];
+ action_value = &action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT];
+
+ action_value->action = action;
+ action_value->value_size = sizeof(u32);
+ action_value->value = kmalloc(action_value->value_size, GFP_KERNEL);
+ if (!action_value->value)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void
+__mlxsw_sp_dpipe_table_adj_entry_fill(struct devlink_dpipe_entry *entry,
+ u32 adj_index, u32 adj_size,
+ u32 adj_hash_index, unsigned char *ha,
+ struct mlxsw_sp_rif *rif)
+{
+ struct devlink_dpipe_value *value;
+ u32 *p_rif_value;
+ u32 *p_index;
+
+ value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX];
+ p_index = value->value;
+ *p_index = adj_index;
+
+ value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE];
+ p_index = value->value;
+ *p_index = adj_size;
+
+ value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX];
+ p_index = value->value;
+ *p_index = adj_hash_index;
+
+ value = &entry->action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC];
+ ether_addr_copy(value->value, ha);
+
+ value = &entry->action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT];
+ p_rif_value = value->value;
+ *p_rif_value = mlxsw_sp_rif_index(rif);
+ value->mapping_value = mlxsw_sp_rif_dev_ifindex(rif);
+ value->mapping_valid = true;
+}
+
+static void mlxsw_sp_dpipe_table_adj_entry_fill(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh,
+ struct devlink_dpipe_entry *entry)
+{
+ struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh);
+ unsigned char *ha = mlxsw_sp_nexthop_ha(nh);
+ u32 adj_hash_index = 0;
+ u32 adj_index = 0;
+ u32 adj_size = 0;
+ int err;
+
+ mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size, &adj_hash_index);
+ __mlxsw_sp_dpipe_table_adj_entry_fill(entry, adj_index, adj_size,
+ adj_hash_index, ha, rif);
+ err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &entry->counter);
+ if (!err)
+ entry->counter_valid = true;
+}
+
+static int
+mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp,
+ struct devlink_dpipe_entry *entry,
+ bool counters_enabled,
+ struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ struct mlxsw_sp_nexthop *nh;
+ int entry_index = 0;
+ int nh_count_max;
+ int nh_count = 0;
+ int nh_skip;
+ int j;
+ int err;
+
+ rtnl_lock();
+ nh_count_max = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp);
+start_again:
+ err = devlink_dpipe_entry_ctx_prepare(dump_ctx);
+ if (err)
+ goto err_ctx_prepare;
+ j = 0;
+ nh_skip = nh_count;
+ nh_count = 0;
+ mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
+ if (!mlxsw_sp_nexthop_offload(nh) ||
+ mlxsw_sp_nexthop_group_has_ipip(nh))
+ continue;
+
+ if (nh_count < nh_skip)
+ goto skip;
+
+ mlxsw_sp_dpipe_table_adj_entry_fill(mlxsw_sp, nh, entry);
+ entry->index = entry_index;
+ err = devlink_dpipe_entry_ctx_append(dump_ctx, entry);
+ if (err) {
+ if (err == -EMSGSIZE) {
+ if (!j)
+ goto err_entry_append;
+ break;
+ }
+ goto err_entry_append;
+ }
+ entry_index++;
+ j++;
+skip:
+ nh_count++;
+ }
+
+ devlink_dpipe_entry_ctx_close(dump_ctx);
+ if (nh_count != nh_count_max)
+ goto start_again;
+ rtnl_unlock();
+
+ return 0;
+
+err_ctx_prepare:
+err_entry_append:
+ rtnl_unlock();
+ return err;
+}
+
+static int
+mlxsw_sp_dpipe_table_adj_entries_dump(void *priv, bool counters_enabled,
+ struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ struct devlink_dpipe_value action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT];
+ struct devlink_dpipe_value match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT];
+ struct devlink_dpipe_action actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT];
+ struct devlink_dpipe_match matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT];
+ struct devlink_dpipe_entry entry = {0};
+ struct mlxsw_sp *mlxsw_sp = priv;
+ int err;
+
+ memset(matches, 0, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT *
+ sizeof(matches[0]));
+ memset(match_values, 0, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT *
+ sizeof(match_values[0]));
+ memset(actions, 0, MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT *
+ sizeof(actions[0]));
+ memset(action_values, 0, MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT *
+ sizeof(action_values[0]));
+
+ mlxsw_sp_dpipe_table_adj_match_action_prepare(matches, actions);
+ err = mlxsw_sp_dpipe_table_adj_entry_prepare(&entry,
+ match_values, matches,
+ action_values, actions);
+ if (err)
+ goto out;
+
+ err = mlxsw_sp_dpipe_table_adj_entries_get(mlxsw_sp, &entry,
+ counters_enabled, dump_ctx);
+out:
+ devlink_dpipe_entry_clear(&entry);
+ return err;
+}
+
+static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+ struct mlxsw_sp_nexthop *nh;
+ u32 adj_hash_index = 0;
+ u32 adj_index = 0;
+ u32 adj_size = 0;
+
+ mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
+ if (!mlxsw_sp_nexthop_offload(nh) ||
+ mlxsw_sp_nexthop_group_has_ipip(nh))
+ continue;
+
+ mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size,
+ &adj_hash_index);
+ if (enable)
+ mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
+ else
+ mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_update(mlxsw_sp,
+ adj_index + adj_hash_index, nh);
+ }
+ return 0;
+}
+
+static u64
+mlxsw_sp_dpipe_table_adj_size_get(void *priv)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+ u64 size;
+
+ rtnl_lock();
+ size = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp);
+ rtnl_unlock();
+
+ return size;
+}
+
+static struct devlink_dpipe_table_ops mlxsw_sp_dpipe_table_adj_ops = {
+ .matches_dump = mlxsw_sp_dpipe_table_adj_matches_dump,
+ .actions_dump = mlxsw_sp_dpipe_table_adj_actions_dump,
+ .entries_dump = mlxsw_sp_dpipe_table_adj_entries_dump,
+ .counters_set_update = mlxsw_sp_dpipe_table_adj_counters_update,
+ .size_get = mlxsw_sp_dpipe_table_adj_size_get,
+};
+
+#define MLXSW_SP_DPIPE_TABLE_RESOURCE_UNIT_ADJ 1
+
+static int mlxsw_sp_dpipe_adj_table_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+ int err;
+
+ err = devlink_dpipe_table_register(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_ADJ,
+ &mlxsw_sp_dpipe_table_adj_ops,
+ mlxsw_sp, false);
+ if (err)
+ return err;
+
+ err = devlink_dpipe_table_resource_set(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_ADJ,
+ MLXSW_SP_RESOURCE_KVD_LINEAR,
+ MLXSW_SP_DPIPE_TABLE_RESOURCE_UNIT_ADJ);
+ if (err)
+ goto err_resource_set;
+
+ return 0;
+
+err_resource_set:
+ devlink_dpipe_table_unregister(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_ADJ);
+ return err;
+}
+
+static void mlxsw_sp_dpipe_adj_table_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+ devlink_dpipe_table_unregister(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_ADJ);
+}
+
+int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+ int err;
+
+ err = devlink_dpipe_headers_register(devlink,
+ &mlxsw_sp_dpipe_headers);
+ if (err)
+ return err;
+ err = mlxsw_sp_dpipe_erif_table_init(mlxsw_sp);
+ if (err)
+ goto err_erif_table_init;
+
+ err = mlxsw_sp_dpipe_host4_table_init(mlxsw_sp);
+ if (err)
+ goto err_host4_table_init;
+
+ err = mlxsw_sp_dpipe_host6_table_init(mlxsw_sp);
+ if (err)
+ goto err_host6_table_init;
+
+ err = mlxsw_sp_dpipe_adj_table_init(mlxsw_sp);
+ if (err)
+ goto err_adj_table_init;
+
+ return 0;
+err_adj_table_init:
+ mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp);
+err_host6_table_init:
+ mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp);
+err_host4_table_init:
+ mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp);
+err_erif_table_init:
+ devlink_dpipe_headers_unregister(priv_to_devlink(mlxsw_sp->core));
+ return err;
+}
+
+void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+ mlxsw_sp_dpipe_adj_table_fini(mlxsw_sp);
+ mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp);
+ mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp);
+ mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp);
+ devlink_dpipe_headers_unregister(devlink);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h
new file mode 100644
index 000000000..e68957623
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_PIPELINE_H_
+#define _MLXSW_PIPELINE_H_
+
+#if IS_ENABLED(CONFIG_NET_DEVLINK)
+
+int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp);
+
+#else
+
+static inline int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp)
+{
+ return 0;
+}
+
+static inline void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp)
+{
+}
+
+#endif
+
+#define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif"
+#define MLXSW_SP_DPIPE_TABLE_NAME_HOST4 "mlxsw_host4"
+#define MLXSW_SP_DPIPE_TABLE_NAME_HOST6 "mlxsw_host6"
+#define MLXSW_SP_DPIPE_TABLE_NAME_ADJ "mlxsw_adj"
+
+#endif /* _MLXSW_PIPELINE_H_*/
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
new file mode 100644
index 000000000..562c4429e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
@@ -0,0 +1,961 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/if_vlan.h>
+#include <linux/if_bridge.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+
+#include "spectrum.h"
+#include "reg.h"
+
+struct mlxsw_sp_fid_family;
+
+struct mlxsw_sp_fid_core {
+ struct mlxsw_sp_fid_family *fid_family_arr[MLXSW_SP_FID_TYPE_MAX];
+ unsigned int *port_fid_mappings;
+};
+
+struct mlxsw_sp_fid {
+ struct list_head list;
+ struct mlxsw_sp_rif *rif;
+ unsigned int ref_count;
+ u16 fid_index;
+ struct mlxsw_sp_fid_family *fid_family;
+};
+
+struct mlxsw_sp_fid_8021q {
+ struct mlxsw_sp_fid common;
+ u16 vid;
+};
+
+struct mlxsw_sp_fid_8021d {
+ struct mlxsw_sp_fid common;
+ int br_ifindex;
+};
+
+struct mlxsw_sp_flood_table {
+ enum mlxsw_sp_flood_type packet_type;
+ enum mlxsw_reg_sfgc_bridge_type bridge_type;
+ enum mlxsw_flood_table_type table_type;
+ int table_index;
+};
+
+struct mlxsw_sp_fid_ops {
+ void (*setup)(struct mlxsw_sp_fid *fid, const void *arg);
+ int (*configure)(struct mlxsw_sp_fid *fid);
+ void (*deconfigure)(struct mlxsw_sp_fid *fid);
+ int (*index_alloc)(struct mlxsw_sp_fid *fid, const void *arg,
+ u16 *p_fid_index);
+ bool (*compare)(const struct mlxsw_sp_fid *fid,
+ const void *arg);
+ u16 (*flood_index)(const struct mlxsw_sp_fid *fid);
+ int (*port_vid_map)(struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_port *port, u16 vid);
+ void (*port_vid_unmap)(struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_port *port, u16 vid);
+};
+
+struct mlxsw_sp_fid_family {
+ enum mlxsw_sp_fid_type type;
+ size_t fid_size;
+ u16 start_index;
+ u16 end_index;
+ struct list_head fids_list;
+ unsigned long *fids_bitmap;
+ const struct mlxsw_sp_flood_table *flood_tables;
+ int nr_flood_tables;
+ enum mlxsw_sp_rif_type rif_type;
+ const struct mlxsw_sp_fid_ops *ops;
+ struct mlxsw_sp *mlxsw_sp;
+};
+
+static const int mlxsw_sp_sfgc_uc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
+ [MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST] = 1,
+};
+
+static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
+ [MLXSW_REG_SFGC_TYPE_BROADCAST] = 1,
+ [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP] = 1,
+ [MLXSW_REG_SFGC_TYPE_IPV4_LINK_LOCAL] = 1,
+ [MLXSW_REG_SFGC_TYPE_IPV6_ALL_HOST] = 1,
+ [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6] = 1,
+};
+
+static const int mlxsw_sp_sfgc_mc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
+ [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4] = 1,
+};
+
+static const int *mlxsw_sp_packet_type_sfgc_types[] = {
+ [MLXSW_SP_FLOOD_TYPE_UC] = mlxsw_sp_sfgc_uc_packet_types,
+ [MLXSW_SP_FLOOD_TYPE_BC] = mlxsw_sp_sfgc_bc_packet_types,
+ [MLXSW_SP_FLOOD_TYPE_MC] = mlxsw_sp_sfgc_mc_packet_types,
+};
+
+static const struct mlxsw_sp_flood_table *
+mlxsw_sp_fid_flood_table_lookup(const struct mlxsw_sp_fid *fid,
+ enum mlxsw_sp_flood_type packet_type)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+ int i;
+
+ for (i = 0; i < fid_family->nr_flood_tables; i++) {
+ if (fid_family->flood_tables[i].packet_type != packet_type)
+ continue;
+ return &fid_family->flood_tables[i];
+ }
+
+ return NULL;
+}
+
+int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid,
+ enum mlxsw_sp_flood_type packet_type, u8 local_port,
+ bool member)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+ const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+ const struct mlxsw_sp_flood_table *flood_table;
+ char *sftr_pl;
+ int err;
+
+ if (WARN_ON(!fid_family->flood_tables || !ops->flood_index))
+ return -EINVAL;
+
+ flood_table = mlxsw_sp_fid_flood_table_lookup(fid, packet_type);
+ if (!flood_table)
+ return -ESRCH;
+
+ sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL);
+ if (!sftr_pl)
+ return -ENOMEM;
+
+ mlxsw_reg_sftr_pack(sftr_pl, flood_table->table_index,
+ ops->flood_index(fid), flood_table->table_type, 1,
+ local_port, member);
+ err = mlxsw_reg_write(fid_family->mlxsw_sp->core, MLXSW_REG(sftr),
+ sftr_pl);
+ kfree(sftr_pl);
+ return err;
+}
+
+int mlxsw_sp_fid_port_vid_map(struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
+{
+ if (WARN_ON(!fid->fid_family->ops->port_vid_map))
+ return -EINVAL;
+ return fid->fid_family->ops->port_vid_map(fid, mlxsw_sp_port, vid);
+}
+
+void mlxsw_sp_fid_port_vid_unmap(struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
+{
+ fid->fid_family->ops->port_vid_unmap(fid, mlxsw_sp_port, vid);
+}
+
+enum mlxsw_sp_rif_type mlxsw_sp_fid_rif_type(const struct mlxsw_sp_fid *fid)
+{
+ return fid->fid_family->rif_type;
+}
+
+u16 mlxsw_sp_fid_index(const struct mlxsw_sp_fid *fid)
+{
+ return fid->fid_index;
+}
+
+enum mlxsw_sp_fid_type mlxsw_sp_fid_type(const struct mlxsw_sp_fid *fid)
+{
+ return fid->fid_family->type;
+}
+
+void mlxsw_sp_fid_rif_set(struct mlxsw_sp_fid *fid, struct mlxsw_sp_rif *rif)
+{
+ fid->rif = rif;
+}
+
+enum mlxsw_sp_rif_type
+mlxsw_sp_fid_type_rif_type(const struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_fid_type type)
+{
+ struct mlxsw_sp_fid_core *fid_core = mlxsw_sp->fid_core;
+
+ return fid_core->fid_family_arr[type]->rif_type;
+}
+
+static struct mlxsw_sp_fid_8021q *
+mlxsw_sp_fid_8021q_fid(const struct mlxsw_sp_fid *fid)
+{
+ return container_of(fid, struct mlxsw_sp_fid_8021q, common);
+}
+
+u16 mlxsw_sp_fid_8021q_vid(const struct mlxsw_sp_fid *fid)
+{
+ return mlxsw_sp_fid_8021q_fid(fid)->vid;
+}
+
+static void mlxsw_sp_fid_8021q_setup(struct mlxsw_sp_fid *fid, const void *arg)
+{
+ u16 vid = *(u16 *) arg;
+
+ mlxsw_sp_fid_8021q_fid(fid)->vid = vid;
+}
+
+static enum mlxsw_reg_sfmr_op mlxsw_sp_sfmr_op(bool valid)
+{
+ return valid ? MLXSW_REG_SFMR_OP_CREATE_FID :
+ MLXSW_REG_SFMR_OP_DESTROY_FID;
+}
+
+static int mlxsw_sp_fid_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
+ u16 fid_offset, bool valid)
+{
+ char sfmr_pl[MLXSW_REG_SFMR_LEN];
+
+ mlxsw_reg_sfmr_pack(sfmr_pl, mlxsw_sp_sfmr_op(valid), fid_index,
+ fid_offset);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
+}
+
+static int mlxsw_sp_fid_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
+ u16 vid, bool valid)
+{
+ enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_VID_TO_FID;
+ char svfa_pl[MLXSW_REG_SVFA_LEN];
+
+ mlxsw_reg_svfa_pack(svfa_pl, 0, mt, valid, fid_index, vid);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl);
+}
+
+static int __mlxsw_sp_fid_port_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
+ u8 local_port, u16 vid, bool valid)
+{
+ enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID;
+ char svfa_pl[MLXSW_REG_SVFA_LEN];
+
+ mlxsw_reg_svfa_pack(svfa_pl, local_port, mt, valid, fid_index, vid);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl);
+}
+
+static int mlxsw_sp_fid_8021q_configure(struct mlxsw_sp_fid *fid)
+{
+ struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp;
+ struct mlxsw_sp_fid_8021q *fid_8021q;
+ int err;
+
+ err = mlxsw_sp_fid_op(mlxsw_sp, fid->fid_index, fid->fid_index, true);
+ if (err)
+ return err;
+
+ fid_8021q = mlxsw_sp_fid_8021q_fid(fid);
+ err = mlxsw_sp_fid_vid_map(mlxsw_sp, fid->fid_index, fid_8021q->vid,
+ true);
+ if (err)
+ goto err_fid_map;
+
+ return 0;
+
+err_fid_map:
+ mlxsw_sp_fid_op(mlxsw_sp, fid->fid_index, 0, false);
+ return err;
+}
+
+static void mlxsw_sp_fid_8021q_deconfigure(struct mlxsw_sp_fid *fid)
+{
+ struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp;
+ struct mlxsw_sp_fid_8021q *fid_8021q;
+
+ fid_8021q = mlxsw_sp_fid_8021q_fid(fid);
+ mlxsw_sp_fid_vid_map(mlxsw_sp, fid->fid_index, fid_8021q->vid, false);
+ mlxsw_sp_fid_op(mlxsw_sp, fid->fid_index, 0, false);
+}
+
+static int mlxsw_sp_fid_8021q_index_alloc(struct mlxsw_sp_fid *fid,
+ const void *arg, u16 *p_fid_index)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+ u16 vid = *(u16 *) arg;
+
+ /* Use 1:1 mapping for simplicity although not a must */
+ if (vid < fid_family->start_index || vid > fid_family->end_index)
+ return -EINVAL;
+ *p_fid_index = vid;
+
+ return 0;
+}
+
+static bool
+mlxsw_sp_fid_8021q_compare(const struct mlxsw_sp_fid *fid, const void *arg)
+{
+ u16 vid = *(u16 *) arg;
+
+ return mlxsw_sp_fid_8021q_fid(fid)->vid == vid;
+}
+
+static u16 mlxsw_sp_fid_8021q_flood_index(const struct mlxsw_sp_fid *fid)
+{
+ return fid->fid_index;
+}
+
+static int mlxsw_sp_fid_8021q_port_vid_map(struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 vid)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u8 local_port = mlxsw_sp_port->local_port;
+
+ /* In case there are no {Port, VID} => FID mappings on the port,
+ * we can use the global VID => FID mapping we created when the
+ * FID was configured.
+ */
+ if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 0)
+ return 0;
+ return __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, local_port,
+ vid, true);
+}
+
+static void
+mlxsw_sp_fid_8021q_port_vid_unmap(struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u8 local_port = mlxsw_sp_port->local_port;
+
+ if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 0)
+ return;
+ __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index, local_port, vid,
+ false);
+}
+
+static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021q_ops = {
+ .setup = mlxsw_sp_fid_8021q_setup,
+ .configure = mlxsw_sp_fid_8021q_configure,
+ .deconfigure = mlxsw_sp_fid_8021q_deconfigure,
+ .index_alloc = mlxsw_sp_fid_8021q_index_alloc,
+ .compare = mlxsw_sp_fid_8021q_compare,
+ .flood_index = mlxsw_sp_fid_8021q_flood_index,
+ .port_vid_map = mlxsw_sp_fid_8021q_port_vid_map,
+ .port_vid_unmap = mlxsw_sp_fid_8021q_port_vid_unmap,
+};
+
+static const struct mlxsw_sp_flood_table mlxsw_sp_fid_8021q_flood_tables[] = {
+ {
+ .packet_type = MLXSW_SP_FLOOD_TYPE_UC,
+ .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID,
+ .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET,
+ .table_index = 0,
+ },
+ {
+ .packet_type = MLXSW_SP_FLOOD_TYPE_MC,
+ .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID,
+ .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET,
+ .table_index = 1,
+ },
+ {
+ .packet_type = MLXSW_SP_FLOOD_TYPE_BC,
+ .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID,
+ .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFSET,
+ .table_index = 2,
+ },
+};
+
+/* Range and flood configuration must match mlxsw_config_profile */
+static const struct mlxsw_sp_fid_family mlxsw_sp_fid_8021q_family = {
+ .type = MLXSW_SP_FID_TYPE_8021Q,
+ .fid_size = sizeof(struct mlxsw_sp_fid_8021q),
+ .start_index = 1,
+ .end_index = VLAN_VID_MASK,
+ .flood_tables = mlxsw_sp_fid_8021q_flood_tables,
+ .nr_flood_tables = ARRAY_SIZE(mlxsw_sp_fid_8021q_flood_tables),
+ .rif_type = MLXSW_SP_RIF_TYPE_VLAN,
+ .ops = &mlxsw_sp_fid_8021q_ops,
+};
+
+static struct mlxsw_sp_fid_8021d *
+mlxsw_sp_fid_8021d_fid(const struct mlxsw_sp_fid *fid)
+{
+ return container_of(fid, struct mlxsw_sp_fid_8021d, common);
+}
+
+static void mlxsw_sp_fid_8021d_setup(struct mlxsw_sp_fid *fid, const void *arg)
+{
+ int br_ifindex = *(int *) arg;
+
+ mlxsw_sp_fid_8021d_fid(fid)->br_ifindex = br_ifindex;
+}
+
+static int mlxsw_sp_fid_8021d_configure(struct mlxsw_sp_fid *fid)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+ return mlxsw_sp_fid_op(fid_family->mlxsw_sp, fid->fid_index, 0, true);
+}
+
+static void mlxsw_sp_fid_8021d_deconfigure(struct mlxsw_sp_fid *fid)
+{
+ mlxsw_sp_fid_op(fid->fid_family->mlxsw_sp, fid->fid_index, 0, false);
+}
+
+static int mlxsw_sp_fid_8021d_index_alloc(struct mlxsw_sp_fid *fid,
+ const void *arg, u16 *p_fid_index)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+ u16 nr_fids, fid_index;
+
+ nr_fids = fid_family->end_index - fid_family->start_index + 1;
+ fid_index = find_first_zero_bit(fid_family->fids_bitmap, nr_fids);
+ if (fid_index == nr_fids)
+ return -ENOBUFS;
+ *p_fid_index = fid_family->start_index + fid_index;
+
+ return 0;
+}
+
+static bool
+mlxsw_sp_fid_8021d_compare(const struct mlxsw_sp_fid *fid, const void *arg)
+{
+ int br_ifindex = *(int *) arg;
+
+ return mlxsw_sp_fid_8021d_fid(fid)->br_ifindex == br_ifindex;
+}
+
+static u16 mlxsw_sp_fid_8021d_flood_index(const struct mlxsw_sp_fid *fid)
+{
+ return fid->fid_index - fid->fid_family->start_index;
+}
+
+static int mlxsw_sp_port_vp_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+ int err;
+
+ list_for_each_entry(mlxsw_sp_port_vlan, &mlxsw_sp_port->vlans_list,
+ list) {
+ struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
+ u16 vid = mlxsw_sp_port_vlan->vid;
+
+ if (!fid)
+ continue;
+
+ err = __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index,
+ mlxsw_sp_port->local_port,
+ vid, true);
+ if (err)
+ goto err_fid_port_vid_map;
+ }
+
+ err = mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, true);
+ if (err)
+ goto err_port_vp_mode_set;
+
+ return 0;
+
+err_port_vp_mode_set:
+err_fid_port_vid_map:
+ list_for_each_entry_continue_reverse(mlxsw_sp_port_vlan,
+ &mlxsw_sp_port->vlans_list, list) {
+ struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
+ u16 vid = mlxsw_sp_port_vlan->vid;
+
+ if (!fid)
+ continue;
+
+ __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index,
+ mlxsw_sp_port->local_port, vid,
+ false);
+ }
+ return err;
+}
+
+static void mlxsw_sp_port_vlan_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+
+ mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false);
+
+ list_for_each_entry_reverse(mlxsw_sp_port_vlan,
+ &mlxsw_sp_port->vlans_list, list) {
+ struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
+ u16 vid = mlxsw_sp_port_vlan->vid;
+
+ if (!fid)
+ continue;
+
+ __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index,
+ mlxsw_sp_port->local_port, vid,
+ false);
+ }
+}
+
+static int mlxsw_sp_fid_8021d_port_vid_map(struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 vid)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u8 local_port = mlxsw_sp_port->local_port;
+ int err;
+
+ err = __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index,
+ mlxsw_sp_port->local_port, vid, true);
+ if (err)
+ return err;
+
+ if (mlxsw_sp->fid_core->port_fid_mappings[local_port]++ == 0) {
+ err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port);
+ if (err)
+ goto err_port_vp_mode_trans;
+ }
+
+ return 0;
+
+err_port_vp_mode_trans:
+ mlxsw_sp->fid_core->port_fid_mappings[local_port]--;
+ __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index,
+ mlxsw_sp_port->local_port, vid, false);
+ return err;
+}
+
+static void
+mlxsw_sp_fid_8021d_port_vid_unmap(struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u8 local_port = mlxsw_sp_port->local_port;
+
+ if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 1)
+ mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
+ mlxsw_sp->fid_core->port_fid_mappings[local_port]--;
+ __mlxsw_sp_fid_port_vid_map(mlxsw_sp, fid->fid_index,
+ mlxsw_sp_port->local_port, vid, false);
+}
+
+static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = {
+ .setup = mlxsw_sp_fid_8021d_setup,
+ .configure = mlxsw_sp_fid_8021d_configure,
+ .deconfigure = mlxsw_sp_fid_8021d_deconfigure,
+ .index_alloc = mlxsw_sp_fid_8021d_index_alloc,
+ .compare = mlxsw_sp_fid_8021d_compare,
+ .flood_index = mlxsw_sp_fid_8021d_flood_index,
+ .port_vid_map = mlxsw_sp_fid_8021d_port_vid_map,
+ .port_vid_unmap = mlxsw_sp_fid_8021d_port_vid_unmap,
+};
+
+static const struct mlxsw_sp_flood_table mlxsw_sp_fid_8021d_flood_tables[] = {
+ {
+ .packet_type = MLXSW_SP_FLOOD_TYPE_UC,
+ .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_VFID,
+ .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID,
+ .table_index = 0,
+ },
+ {
+ .packet_type = MLXSW_SP_FLOOD_TYPE_MC,
+ .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_VFID,
+ .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID,
+ .table_index = 1,
+ },
+ {
+ .packet_type = MLXSW_SP_FLOOD_TYPE_BC,
+ .bridge_type = MLXSW_REG_SFGC_BRIDGE_TYPE_VFID,
+ .table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID,
+ .table_index = 2,
+ },
+};
+
+/* Range and flood configuration must match mlxsw_config_profile */
+static const struct mlxsw_sp_fid_family mlxsw_sp_fid_8021d_family = {
+ .type = MLXSW_SP_FID_TYPE_8021D,
+ .fid_size = sizeof(struct mlxsw_sp_fid_8021d),
+ .start_index = VLAN_N_VID,
+ .end_index = VLAN_N_VID + MLXSW_SP_FID_8021D_MAX - 1,
+ .flood_tables = mlxsw_sp_fid_8021d_flood_tables,
+ .nr_flood_tables = ARRAY_SIZE(mlxsw_sp_fid_8021d_flood_tables),
+ .rif_type = MLXSW_SP_RIF_TYPE_FID,
+ .ops = &mlxsw_sp_fid_8021d_ops,
+};
+
+static int mlxsw_sp_fid_rfid_configure(struct mlxsw_sp_fid *fid)
+{
+ /* rFIDs are allocated by the device during init */
+ return 0;
+}
+
+static void mlxsw_sp_fid_rfid_deconfigure(struct mlxsw_sp_fid *fid)
+{
+}
+
+static int mlxsw_sp_fid_rfid_index_alloc(struct mlxsw_sp_fid *fid,
+ const void *arg, u16 *p_fid_index)
+{
+ u16 rif_index = *(u16 *) arg;
+
+ *p_fid_index = fid->fid_family->start_index + rif_index;
+
+ return 0;
+}
+
+static bool mlxsw_sp_fid_rfid_compare(const struct mlxsw_sp_fid *fid,
+ const void *arg)
+{
+ u16 rif_index = *(u16 *) arg;
+
+ return fid->fid_index == rif_index + fid->fid_family->start_index;
+}
+
+static int mlxsw_sp_fid_rfid_port_vid_map(struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 vid)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u8 local_port = mlxsw_sp_port->local_port;
+ int err;
+
+ /* We only need to transition the port to virtual mode since
+ * {Port, VID} => FID is done by the firmware upon RIF creation.
+ */
+ if (mlxsw_sp->fid_core->port_fid_mappings[local_port]++ == 0) {
+ err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port);
+ if (err)
+ goto err_port_vp_mode_trans;
+ }
+
+ return 0;
+
+err_port_vp_mode_trans:
+ mlxsw_sp->fid_core->port_fid_mappings[local_port]--;
+ return err;
+}
+
+static void
+mlxsw_sp_fid_rfid_port_vid_unmap(struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_port *mlxsw_sp_port, u16 vid)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u8 local_port = mlxsw_sp_port->local_port;
+
+ if (mlxsw_sp->fid_core->port_fid_mappings[local_port] == 1)
+ mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
+ mlxsw_sp->fid_core->port_fid_mappings[local_port]--;
+}
+
+static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_rfid_ops = {
+ .configure = mlxsw_sp_fid_rfid_configure,
+ .deconfigure = mlxsw_sp_fid_rfid_deconfigure,
+ .index_alloc = mlxsw_sp_fid_rfid_index_alloc,
+ .compare = mlxsw_sp_fid_rfid_compare,
+ .port_vid_map = mlxsw_sp_fid_rfid_port_vid_map,
+ .port_vid_unmap = mlxsw_sp_fid_rfid_port_vid_unmap,
+};
+
+#define MLXSW_SP_RFID_BASE (15 * 1024)
+#define MLXSW_SP_RFID_MAX 1024
+
+static const struct mlxsw_sp_fid_family mlxsw_sp_fid_rfid_family = {
+ .type = MLXSW_SP_FID_TYPE_RFID,
+ .fid_size = sizeof(struct mlxsw_sp_fid),
+ .start_index = MLXSW_SP_RFID_BASE,
+ .end_index = MLXSW_SP_RFID_BASE + MLXSW_SP_RFID_MAX - 1,
+ .rif_type = MLXSW_SP_RIF_TYPE_SUBPORT,
+ .ops = &mlxsw_sp_fid_rfid_ops,
+};
+
+static int mlxsw_sp_fid_dummy_configure(struct mlxsw_sp_fid *fid)
+{
+ struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp;
+
+ return mlxsw_sp_fid_op(mlxsw_sp, fid->fid_index, 0, true);
+}
+
+static void mlxsw_sp_fid_dummy_deconfigure(struct mlxsw_sp_fid *fid)
+{
+ mlxsw_sp_fid_op(fid->fid_family->mlxsw_sp, fid->fid_index, 0, false);
+}
+
+static int mlxsw_sp_fid_dummy_index_alloc(struct mlxsw_sp_fid *fid,
+ const void *arg, u16 *p_fid_index)
+{
+ *p_fid_index = fid->fid_family->start_index;
+
+ return 0;
+}
+
+static bool mlxsw_sp_fid_dummy_compare(const struct mlxsw_sp_fid *fid,
+ const void *arg)
+{
+ return true;
+}
+
+static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_dummy_ops = {
+ .configure = mlxsw_sp_fid_dummy_configure,
+ .deconfigure = mlxsw_sp_fid_dummy_deconfigure,
+ .index_alloc = mlxsw_sp_fid_dummy_index_alloc,
+ .compare = mlxsw_sp_fid_dummy_compare,
+};
+
+static const struct mlxsw_sp_fid_family mlxsw_sp_fid_dummy_family = {
+ .type = MLXSW_SP_FID_TYPE_DUMMY,
+ .fid_size = sizeof(struct mlxsw_sp_fid),
+ .start_index = VLAN_N_VID - 1,
+ .end_index = VLAN_N_VID - 1,
+ .ops = &mlxsw_sp_fid_dummy_ops,
+};
+
+static const struct mlxsw_sp_fid_family *mlxsw_sp_fid_family_arr[] = {
+ [MLXSW_SP_FID_TYPE_8021Q] = &mlxsw_sp_fid_8021q_family,
+ [MLXSW_SP_FID_TYPE_8021D] = &mlxsw_sp_fid_8021d_family,
+ [MLXSW_SP_FID_TYPE_RFID] = &mlxsw_sp_fid_rfid_family,
+ [MLXSW_SP_FID_TYPE_DUMMY] = &mlxsw_sp_fid_dummy_family,
+};
+
+static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_fid_type type,
+ const void *arg)
+{
+ struct mlxsw_sp_fid_family *fid_family;
+ struct mlxsw_sp_fid *fid;
+ u16 fid_index;
+ int err;
+
+ fid_family = mlxsw_sp->fid_core->fid_family_arr[type];
+ list_for_each_entry(fid, &fid_family->fids_list, list) {
+ if (!fid->fid_family->ops->compare(fid, arg))
+ continue;
+ fid->ref_count++;
+ return fid;
+ }
+
+ fid = kzalloc(fid_family->fid_size, GFP_KERNEL);
+ if (!fid)
+ return ERR_PTR(-ENOMEM);
+ fid->fid_family = fid_family;
+
+ err = fid->fid_family->ops->index_alloc(fid, arg, &fid_index);
+ if (err)
+ goto err_index_alloc;
+ fid->fid_index = fid_index;
+ __set_bit(fid_index - fid_family->start_index, fid_family->fids_bitmap);
+
+ if (fid->fid_family->ops->setup)
+ fid->fid_family->ops->setup(fid, arg);
+
+ err = fid->fid_family->ops->configure(fid);
+ if (err)
+ goto err_configure;
+
+ list_add(&fid->list, &fid_family->fids_list);
+ fid->ref_count++;
+ return fid;
+
+err_configure:
+ __clear_bit(fid_index - fid_family->start_index,
+ fid_family->fids_bitmap);
+err_index_alloc:
+ kfree(fid);
+ return ERR_PTR(err);
+}
+
+void mlxsw_sp_fid_put(struct mlxsw_sp_fid *fid)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+ if (--fid->ref_count == 1 && fid->rif) {
+ /* Destroy the associated RIF and let it drop the last
+ * reference on the FID.
+ */
+ return mlxsw_sp_rif_destroy(fid->rif);
+ } else if (fid->ref_count == 0) {
+ list_del(&fid->list);
+ fid->fid_family->ops->deconfigure(fid);
+ __clear_bit(fid->fid_index - fid_family->start_index,
+ fid_family->fids_bitmap);
+ kfree(fid);
+ }
+}
+
+struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid)
+{
+ return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_8021Q, &vid);
+}
+
+struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp,
+ int br_ifindex)
+{
+ return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D, &br_ifindex);
+}
+
+struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp,
+ u16 rif_index)
+{
+ return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_RFID, &rif_index);
+}
+
+struct mlxsw_sp_fid *mlxsw_sp_fid_dummy_get(struct mlxsw_sp *mlxsw_sp)
+{
+ return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_DUMMY, NULL);
+}
+
+static int
+mlxsw_sp_fid_flood_table_init(struct mlxsw_sp_fid_family *fid_family,
+ const struct mlxsw_sp_flood_table *flood_table)
+{
+ enum mlxsw_sp_flood_type packet_type = flood_table->packet_type;
+ const int *sfgc_packet_types;
+ int i;
+
+ sfgc_packet_types = mlxsw_sp_packet_type_sfgc_types[packet_type];
+ for (i = 0; i < MLXSW_REG_SFGC_TYPE_MAX; i++) {
+ struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp;
+ char sfgc_pl[MLXSW_REG_SFGC_LEN];
+ int err;
+
+ if (!sfgc_packet_types[i])
+ continue;
+ mlxsw_reg_sfgc_pack(sfgc_pl, i, flood_table->bridge_type,
+ flood_table->table_type,
+ flood_table->table_index);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfgc), sfgc_pl);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+mlxsw_sp_fid_flood_tables_init(struct mlxsw_sp_fid_family *fid_family)
+{
+ int i;
+
+ for (i = 0; i < fid_family->nr_flood_tables; i++) {
+ const struct mlxsw_sp_flood_table *flood_table;
+ int err;
+
+ flood_table = &fid_family->flood_tables[i];
+ err = mlxsw_sp_fid_flood_table_init(fid_family, flood_table);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_fid_family_register(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_fid_family *tmpl)
+{
+ u16 nr_fids = tmpl->end_index - tmpl->start_index + 1;
+ struct mlxsw_sp_fid_family *fid_family;
+ int err;
+
+ fid_family = kmemdup(tmpl, sizeof(*fid_family), GFP_KERNEL);
+ if (!fid_family)
+ return -ENOMEM;
+
+ fid_family->mlxsw_sp = mlxsw_sp;
+ INIT_LIST_HEAD(&fid_family->fids_list);
+ fid_family->fids_bitmap = kcalloc(BITS_TO_LONGS(nr_fids),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!fid_family->fids_bitmap) {
+ err = -ENOMEM;
+ goto err_alloc_fids_bitmap;
+ }
+
+ if (fid_family->flood_tables) {
+ err = mlxsw_sp_fid_flood_tables_init(fid_family);
+ if (err)
+ goto err_fid_flood_tables_init;
+ }
+
+ mlxsw_sp->fid_core->fid_family_arr[tmpl->type] = fid_family;
+
+ return 0;
+
+err_fid_flood_tables_init:
+ kfree(fid_family->fids_bitmap);
+err_alloc_fids_bitmap:
+ kfree(fid_family);
+ return err;
+}
+
+static void
+mlxsw_sp_fid_family_unregister(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fid_family *fid_family)
+{
+ mlxsw_sp->fid_core->fid_family_arr[fid_family->type] = NULL;
+ kfree(fid_family->fids_bitmap);
+ WARN_ON_ONCE(!list_empty(&fid_family->fids_list));
+ kfree(fid_family);
+}
+
+int mlxsw_sp_port_fids_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+
+ /* Track number of FIDs configured on the port with mapping type
+ * PORT_VID_TO_FID, so that we know when to transition the port
+ * back to non-virtual (VLAN) mode.
+ */
+ mlxsw_sp->fid_core->port_fid_mappings[mlxsw_sp_port->local_port] = 0;
+
+ return mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false);
+}
+
+void mlxsw_sp_port_fids_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+
+ mlxsw_sp->fid_core->port_fid_mappings[mlxsw_sp_port->local_port] = 0;
+}
+
+int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp)
+{
+ unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
+ struct mlxsw_sp_fid_core *fid_core;
+ int err, i;
+
+ fid_core = kzalloc(sizeof(*mlxsw_sp->fid_core), GFP_KERNEL);
+ if (!fid_core)
+ return -ENOMEM;
+ mlxsw_sp->fid_core = fid_core;
+
+ fid_core->port_fid_mappings = kcalloc(max_ports, sizeof(unsigned int),
+ GFP_KERNEL);
+ if (!fid_core->port_fid_mappings) {
+ err = -ENOMEM;
+ goto err_alloc_port_fid_mappings;
+ }
+
+ for (i = 0; i < MLXSW_SP_FID_TYPE_MAX; i++) {
+ err = mlxsw_sp_fid_family_register(mlxsw_sp,
+ mlxsw_sp_fid_family_arr[i]);
+
+ if (err)
+ goto err_fid_ops_register;
+ }
+
+ return 0;
+
+err_fid_ops_register:
+ for (i--; i >= 0; i--) {
+ struct mlxsw_sp_fid_family *fid_family;
+
+ fid_family = fid_core->fid_family_arr[i];
+ mlxsw_sp_fid_family_unregister(mlxsw_sp, fid_family);
+ }
+ kfree(fid_core->port_fid_mappings);
+err_alloc_port_fid_mappings:
+ kfree(fid_core);
+ return err;
+}
+
+void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_fid_core *fid_core = mlxsw_sp->fid_core;
+ int i;
+
+ for (i = 0; i < MLXSW_SP_FID_TYPE_MAX; i++)
+ mlxsw_sp_fid_family_unregister(mlxsw_sp,
+ fid_core->fid_family_arr[i]);
+ kfree(fid_core->port_fid_mappings);
+ kfree(fid_core);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
new file mode 100644
index 000000000..9f4eb3cde
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -0,0 +1,536 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <net/net_namespace.h>
+#include <net/flow_dissector.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
+
+#include "spectrum.h"
+#include "core_acl_flex_keys.h"
+
+static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ struct tcf_exts *exts,
+ struct netlink_ext_ack *extack)
+{
+ const struct tc_action *a;
+ int err, i;
+
+ if (!tcf_exts_has_actions(exts))
+ return 0;
+
+ /* Count action is inserted first */
+ err = mlxsw_sp_acl_rulei_act_count(mlxsw_sp, rulei, extack);
+ if (err)
+ return err;
+
+ tcf_exts_for_each_action(i, a, exts) {
+ if (is_tcf_gact_ok(a)) {
+ err = mlxsw_sp_acl_rulei_act_terminate(rulei);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot append terminate action");
+ return err;
+ }
+ } else if (is_tcf_gact_shot(a)) {
+ err = mlxsw_sp_acl_rulei_act_drop(rulei);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot append drop action");
+ return err;
+ }
+ } else if (is_tcf_gact_trap(a)) {
+ err = mlxsw_sp_acl_rulei_act_trap(rulei);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot append trap action");
+ return err;
+ }
+ } else if (is_tcf_gact_goto_chain(a)) {
+ u32 chain_index = tcf_gact_goto_chain_index(a);
+ struct mlxsw_sp_acl_ruleset *ruleset;
+ u16 group_id;
+
+ ruleset = mlxsw_sp_acl_ruleset_lookup(mlxsw_sp, block,
+ chain_index,
+ MLXSW_SP_ACL_PROFILE_FLOWER);
+ if (IS_ERR(ruleset))
+ return PTR_ERR(ruleset);
+
+ group_id = mlxsw_sp_acl_ruleset_group_id(ruleset);
+ err = mlxsw_sp_acl_rulei_act_jump(rulei, group_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot append jump action");
+ return err;
+ }
+ } else if (is_tcf_mirred_egress_redirect(a)) {
+ struct net_device *out_dev;
+ struct mlxsw_sp_fid *fid;
+ u16 fid_index;
+
+ fid = mlxsw_sp_acl_dummy_fid(mlxsw_sp);
+ fid_index = mlxsw_sp_fid_index(fid);
+ err = mlxsw_sp_acl_rulei_act_fid_set(mlxsw_sp, rulei,
+ fid_index, extack);
+ if (err)
+ return err;
+
+ out_dev = tcf_mirred_dev(a);
+ err = mlxsw_sp_acl_rulei_act_fwd(mlxsw_sp, rulei,
+ out_dev, extack);
+ if (err)
+ return err;
+ } else if (is_tcf_mirred_egress_mirror(a)) {
+ struct net_device *out_dev = tcf_mirred_dev(a);
+
+ err = mlxsw_sp_acl_rulei_act_mirror(mlxsw_sp, rulei,
+ block, out_dev,
+ extack);
+ if (err)
+ return err;
+ } else if (is_tcf_vlan(a)) {
+ u16 proto = be16_to_cpu(tcf_vlan_push_proto(a));
+ u32 action = tcf_vlan_action(a);
+ u8 prio = tcf_vlan_push_prio(a);
+ u16 vid = tcf_vlan_push_vid(a);
+
+ err = mlxsw_sp_acl_rulei_act_vlan(mlxsw_sp, rulei,
+ action, vid,
+ proto, prio, extack);
+ if (err)
+ return err;
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported action");
+ dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n");
+ return -EOPNOTSUPP;
+ }
+ }
+ return 0;
+}
+
+static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei,
+ struct tc_cls_flower_offload *f)
+{
+ struct flow_dissector_key_ipv4_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ f->key);
+ struct flow_dissector_key_ipv4_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ f->mask);
+
+ mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+ (char *) &key->src,
+ (char *) &mask->src, 4);
+ mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31,
+ (char *) &key->dst,
+ (char *) &mask->dst, 4);
+}
+
+static void mlxsw_sp_flower_parse_ipv6(struct mlxsw_sp_acl_rule_info *rulei,
+ struct tc_cls_flower_offload *f)
+{
+ struct flow_dissector_key_ipv6_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ f->key);
+ struct flow_dissector_key_ipv6_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ f->mask);
+
+ mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_96_127,
+ &key->src.s6_addr[0x0],
+ &mask->src.s6_addr[0x0], 4);
+ mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_64_95,
+ &key->src.s6_addr[0x4],
+ &mask->src.s6_addr[0x4], 4);
+ mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_32_63,
+ &key->src.s6_addr[0x8],
+ &mask->src.s6_addr[0x8], 4);
+ mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31,
+ &key->src.s6_addr[0xC],
+ &mask->src.s6_addr[0xC], 4);
+ mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_96_127,
+ &key->dst.s6_addr[0x0],
+ &mask->dst.s6_addr[0x0], 4);
+ mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_64_95,
+ &key->dst.s6_addr[0x4],
+ &mask->dst.s6_addr[0x4], 4);
+ mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_32_63,
+ &key->dst.s6_addr[0x8],
+ &mask->dst.s6_addr[0x8], 4);
+ mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31,
+ &key->dst.s6_addr[0xC],
+ &mask->dst.s6_addr[0xC], 4);
+}
+
+static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ struct tc_cls_flower_offload *f,
+ u8 ip_proto)
+{
+ struct flow_dissector_key_ports *key, *mask;
+
+ if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS))
+ return 0;
+
+ if (ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) {
+ NL_SET_ERR_MSG_MOD(f->common.extack, "Only UDP and TCP keys are supported");
+ dev_err(mlxsw_sp->bus_info->dev, "Only UDP and TCP keys are supported\n");
+ return -EINVAL;
+ }
+
+ key = skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ f->key);
+ mask = skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ f->mask);
+ mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_DST_L4_PORT,
+ ntohs(key->dst), ntohs(mask->dst));
+ mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_SRC_L4_PORT,
+ ntohs(key->src), ntohs(mask->src));
+ return 0;
+}
+
+static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ struct tc_cls_flower_offload *f,
+ u8 ip_proto)
+{
+ struct flow_dissector_key_tcp *key, *mask;
+
+ if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP))
+ return 0;
+
+ if (ip_proto != IPPROTO_TCP) {
+ NL_SET_ERR_MSG_MOD(f->common.extack, "TCP keys supported only for TCP");
+ dev_err(mlxsw_sp->bus_info->dev, "TCP keys supported only for TCP\n");
+ return -EINVAL;
+ }
+
+ key = skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_TCP,
+ f->key);
+ mask = skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_TCP,
+ f->mask);
+ mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_TCP_FLAGS,
+ ntohs(key->flags), ntohs(mask->flags));
+ return 0;
+}
+
+static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ struct tc_cls_flower_offload *f,
+ u16 n_proto)
+{
+ struct flow_dissector_key_ip *key, *mask;
+
+ if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP))
+ return 0;
+
+ if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) {
+ NL_SET_ERR_MSG_MOD(f->common.extack, "IP keys supported only for IPv4/6");
+ dev_err(mlxsw_sp->bus_info->dev, "IP keys supported only for IPv4/6\n");
+ return -EINVAL;
+ }
+
+ key = skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IP,
+ f->key);
+ mask = skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IP,
+ f->mask);
+ mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_TTL_,
+ key->ttl, mask->ttl);
+
+ mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_ECN,
+ key->tos & 0x3, mask->tos & 0x3);
+
+ mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_DSCP,
+ key->tos >> 6, mask->tos >> 6);
+
+ return 0;
+}
+
+static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct mlxsw_sp_acl_rule_info *rulei,
+ struct tc_cls_flower_offload *f)
+{
+ u16 n_proto_mask = 0;
+ u16 n_proto_key = 0;
+ u16 addr_type = 0;
+ u8 ip_proto = 0;
+ int err;
+
+ if (f->dissector->used_keys &
+ ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_PORTS) |
+ BIT(FLOW_DISSECTOR_KEY_TCP) |
+ BIT(FLOW_DISSECTOR_KEY_IP) |
+ BIT(FLOW_DISSECTOR_KEY_VLAN))) {
+ dev_err(mlxsw_sp->bus_info->dev, "Unsupported key\n");
+ NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported key");
+ return -EOPNOTSUPP;
+ }
+
+ mlxsw_sp_acl_rulei_priority(rulei, f->common.prio);
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+ struct flow_dissector_key_control *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_CONTROL,
+ f->key);
+ addr_type = key->addr_type;
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_dissector_key_basic *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
+ struct flow_dissector_key_basic *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->mask);
+ n_proto_key = ntohs(key->n_proto);
+ n_proto_mask = ntohs(mask->n_proto);
+
+ if (n_proto_key == ETH_P_ALL) {
+ n_proto_key = 0;
+ n_proto_mask = 0;
+ }
+ mlxsw_sp_acl_rulei_keymask_u32(rulei,
+ MLXSW_AFK_ELEMENT_ETHERTYPE,
+ n_proto_key, n_proto_mask);
+
+ ip_proto = key->ip_proto;
+ mlxsw_sp_acl_rulei_keymask_u32(rulei,
+ MLXSW_AFK_ELEMENT_IP_PROTO,
+ key->ip_proto, mask->ip_proto);
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ struct flow_dissector_key_eth_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->key);
+ struct flow_dissector_key_eth_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->mask);
+
+ mlxsw_sp_acl_rulei_keymask_buf(rulei,
+ MLXSW_AFK_ELEMENT_DMAC_32_47,
+ key->dst, mask->dst, 2);
+ mlxsw_sp_acl_rulei_keymask_buf(rulei,
+ MLXSW_AFK_ELEMENT_DMAC_0_31,
+ key->dst + 2, mask->dst + 2, 4);
+ mlxsw_sp_acl_rulei_keymask_buf(rulei,
+ MLXSW_AFK_ELEMENT_SMAC_32_47,
+ key->src, mask->src, 2);
+ mlxsw_sp_acl_rulei_keymask_buf(rulei,
+ MLXSW_AFK_ELEMENT_SMAC_0_31,
+ key->src + 2, mask->src + 2, 4);
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+ struct flow_dissector_key_vlan *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
+ f->key);
+ struct flow_dissector_key_vlan *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
+ f->mask);
+
+ if (mlxsw_sp_acl_block_is_egress_bound(block)) {
+ NL_SET_ERR_MSG_MOD(f->common.extack, "vlan_id key is not supported on egress");
+ return -EOPNOTSUPP;
+ }
+ if (mask->vlan_id != 0)
+ mlxsw_sp_acl_rulei_keymask_u32(rulei,
+ MLXSW_AFK_ELEMENT_VID,
+ key->vlan_id,
+ mask->vlan_id);
+ if (mask->vlan_priority != 0)
+ mlxsw_sp_acl_rulei_keymask_u32(rulei,
+ MLXSW_AFK_ELEMENT_PCP,
+ key->vlan_priority,
+ mask->vlan_priority);
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
+ mlxsw_sp_flower_parse_ipv4(rulei, f);
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS)
+ mlxsw_sp_flower_parse_ipv6(rulei, f);
+
+ err = mlxsw_sp_flower_parse_ports(mlxsw_sp, rulei, f, ip_proto);
+ if (err)
+ return err;
+ err = mlxsw_sp_flower_parse_tcp(mlxsw_sp, rulei, f, ip_proto);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_flower_parse_ip(mlxsw_sp, rulei, f, n_proto_key & n_proto_mask);
+ if (err)
+ return err;
+
+ return mlxsw_sp_flower_parse_actions(mlxsw_sp, block, rulei, f->exts,
+ f->common.extack);
+}
+
+int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct tc_cls_flower_offload *f)
+{
+ struct mlxsw_sp_acl_rule_info *rulei;
+ struct mlxsw_sp_acl_ruleset *ruleset;
+ struct mlxsw_sp_acl_rule *rule;
+ int err;
+
+ ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, block,
+ f->common.chain_index,
+ MLXSW_SP_ACL_PROFILE_FLOWER, NULL);
+ if (IS_ERR(ruleset))
+ return PTR_ERR(ruleset);
+
+ rule = mlxsw_sp_acl_rule_create(mlxsw_sp, ruleset, f->cookie,
+ f->common.extack);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ goto err_rule_create;
+ }
+
+ rulei = mlxsw_sp_acl_rule_rulei(rule);
+ err = mlxsw_sp_flower_parse(mlxsw_sp, block, rulei, f);
+ if (err)
+ goto err_flower_parse;
+
+ err = mlxsw_sp_acl_rulei_commit(rulei);
+ if (err)
+ goto err_rulei_commit;
+
+ err = mlxsw_sp_acl_rule_add(mlxsw_sp, rule);
+ if (err)
+ goto err_rule_add;
+
+ mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
+ return 0;
+
+err_rule_add:
+err_rulei_commit:
+err_flower_parse:
+ mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule);
+err_rule_create:
+ mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
+ return err;
+}
+
+void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct tc_cls_flower_offload *f)
+{
+ struct mlxsw_sp_acl_ruleset *ruleset;
+ struct mlxsw_sp_acl_rule *rule;
+
+ ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, block,
+ f->common.chain_index,
+ MLXSW_SP_ACL_PROFILE_FLOWER, NULL);
+ if (IS_ERR(ruleset))
+ return;
+
+ rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, f->cookie);
+ if (rule) {
+ mlxsw_sp_acl_rule_del(mlxsw_sp, rule);
+ mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule);
+ }
+
+ mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
+}
+
+int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct tc_cls_flower_offload *f)
+{
+ struct mlxsw_sp_acl_ruleset *ruleset;
+ struct mlxsw_sp_acl_rule *rule;
+ u64 packets;
+ u64 lastuse;
+ u64 bytes;
+ int err;
+
+ ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, block,
+ f->common.chain_index,
+ MLXSW_SP_ACL_PROFILE_FLOWER, NULL);
+ if (WARN_ON(IS_ERR(ruleset)))
+ return -EINVAL;
+
+ rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, f->cookie);
+ if (!rule)
+ return -EINVAL;
+
+ err = mlxsw_sp_acl_rule_get_stats(mlxsw_sp, rule, &packets, &bytes,
+ &lastuse);
+ if (err)
+ goto err_rule_get_stats;
+
+ tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
+
+ mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
+ return 0;
+
+err_rule_get_stats:
+ mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
+ return err;
+}
+
+int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct tc_cls_flower_offload *f)
+{
+ struct mlxsw_sp_acl_ruleset *ruleset;
+ struct mlxsw_sp_acl_rule_info rulei;
+ int err;
+
+ memset(&rulei, 0, sizeof(rulei));
+ err = mlxsw_sp_flower_parse(mlxsw_sp, block, &rulei, f);
+ if (err)
+ return err;
+ ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, block,
+ f->common.chain_index,
+ MLXSW_SP_ACL_PROFILE_FLOWER,
+ &rulei.values.elusage);
+
+ /* keep the reference to the ruleset */
+ return PTR_ERR_OR_ZERO(ruleset);
+}
+
+void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_acl_block *block,
+ struct tc_cls_flower_offload *f)
+{
+ struct mlxsw_sp_acl_ruleset *ruleset;
+
+ ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, block,
+ f->common.chain_index,
+ MLXSW_SP_ACL_PROFILE_FLOWER, NULL);
+ if (IS_ERR(ruleset))
+ return;
+ /* put the reference to the ruleset kept in create */
+ mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
+ mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
new file mode 100644
index 000000000..00db26c96
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#include <net/ip_tunnels.h>
+#include <net/ip6_tunnel.h>
+
+#include "spectrum_ipip.h"
+
+struct ip_tunnel_parm
+mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev)
+{
+ struct ip_tunnel *tun = netdev_priv(ol_dev);
+
+ return tun->parms;
+}
+
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev)
+{
+ struct ip6_tnl *tun = netdev_priv(ol_dev);
+
+ return tun->parms;
+}
+
+static bool mlxsw_sp_ipip_parms4_has_ikey(struct ip_tunnel_parm parms)
+{
+ return !!(parms.i_flags & TUNNEL_KEY);
+}
+
+static bool mlxsw_sp_ipip_parms4_has_okey(struct ip_tunnel_parm parms)
+{
+ return !!(parms.o_flags & TUNNEL_KEY);
+}
+
+static u32 mlxsw_sp_ipip_parms4_ikey(struct ip_tunnel_parm parms)
+{
+ return mlxsw_sp_ipip_parms4_has_ikey(parms) ?
+ be32_to_cpu(parms.i_key) : 0;
+}
+
+static u32 mlxsw_sp_ipip_parms4_okey(struct ip_tunnel_parm parms)
+{
+ return mlxsw_sp_ipip_parms4_has_okey(parms) ?
+ be32_to_cpu(parms.o_key) : 0;
+}
+
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms4_saddr(struct ip_tunnel_parm parms)
+{
+ return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.saddr };
+}
+
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms6_saddr(struct __ip6_tnl_parm parms)
+{
+ return (union mlxsw_sp_l3addr) { .addr6 = parms.laddr };
+}
+
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms4_daddr(struct ip_tunnel_parm parms)
+{
+ return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.daddr };
+}
+
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms6_daddr(struct __ip6_tnl_parm parms)
+{
+ return (union mlxsw_sp_l3addr) { .addr6 = parms.raddr };
+}
+
+union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
+ const struct net_device *ol_dev)
+{
+ struct ip_tunnel_parm parms4;
+ struct __ip6_tnl_parm parms6;
+
+ switch (proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+ return mlxsw_sp_ipip_parms4_saddr(parms4);
+ case MLXSW_SP_L3_PROTO_IPV6:
+ parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+ return mlxsw_sp_ipip_parms6_saddr(parms6);
+ }
+
+ WARN_ON(1);
+ return (union mlxsw_sp_l3addr) {0};
+}
+
+static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
+{
+
+ struct ip_tunnel_parm parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+
+ return mlxsw_sp_ipip_parms4_daddr(parms4).addr4;
+}
+
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
+ const struct net_device *ol_dev)
+{
+ struct ip_tunnel_parm parms4;
+ struct __ip6_tnl_parm parms6;
+
+ switch (proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+ return mlxsw_sp_ipip_parms4_daddr(parms4);
+ case MLXSW_SP_L3_PROTO_IPV6:
+ parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+ return mlxsw_sp_ipip_parms6_daddr(parms6);
+ }
+
+ WARN_ON(1);
+ return (union mlxsw_sp_l3addr) {0};
+}
+
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr)
+{
+ union mlxsw_sp_l3addr naddr = {0};
+
+ return !memcmp(&addr, &naddr, sizeof(naddr));
+}
+
+static int
+mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+ u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
+ __be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev);
+ char ratr_pl[MLXSW_REG_RATR_LEN];
+
+ mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
+ true, MLXSW_REG_RATR_TYPE_IPIP,
+ adj_index, rif_index);
+ mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4));
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
+}
+
+static int
+mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp,
+ u32 tunnel_index,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+ u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
+ char rtdp_pl[MLXSW_REG_RTDP_LEN];
+ struct ip_tunnel_parm parms;
+ unsigned int type_check;
+ bool has_ikey;
+ u32 daddr4;
+ u32 ikey;
+
+ parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
+ has_ikey = mlxsw_sp_ipip_parms4_has_ikey(parms);
+ ikey = mlxsw_sp_ipip_parms4_ikey(parms);
+
+ mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index);
+
+ type_check = has_ikey ?
+ MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY :
+ MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE;
+
+ /* Linux demuxes tunnels based on packet SIP (which must match tunnel
+ * remote IP). Thus configure decap so that it filters out packets that
+ * are not IPv4 or have the wrong SIP. IPIP_DECAP_ERROR trap is
+ * generated for packets that fail this criterion. Linux then handles
+ * such packets in slow path and generates ICMP destination unreachable.
+ */
+ daddr4 = be32_to_cpu(mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev));
+ mlxsw_reg_rtdp_ipip4_pack(rtdp_pl, rif_index,
+ MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4,
+ type_check, has_ikey, daddr4, ikey);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
+}
+
+static int
+mlxsw_sp_ipip_fib_entry_op_gre4_ralue(struct mlxsw_sp *mlxsw_sp,
+ u32 dip, u8 prefix_len, u16 ul_vr_id,
+ enum mlxsw_reg_ralue_op op,
+ u32 tunnel_index)
+{
+ char ralue_pl[MLXSW_REG_RALUE_LEN];
+
+ mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_REG_RALXX_PROTOCOL_IPV4, op,
+ ul_vr_id, prefix_len, dip);
+ mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, tunnel_index);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
+static int mlxsw_sp_ipip_fib_entry_op_gre4(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry,
+ enum mlxsw_reg_ralue_op op,
+ u32 tunnel_index)
+{
+ u16 ul_vr_id = mlxsw_sp_ipip_lb_ul_vr_id(ipip_entry->ol_lb);
+ __be32 dip;
+ int err;
+
+ err = mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(mlxsw_sp, tunnel_index,
+ ipip_entry);
+ if (err)
+ return err;
+
+ dip = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
+ ipip_entry->ol_dev).addr4;
+ return mlxsw_sp_ipip_fib_entry_op_gre4_ralue(mlxsw_sp, be32_to_cpu(dip),
+ 32, ul_vr_id, op,
+ tunnel_index);
+}
+
+static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto,
+ const struct net_device *ol_dev)
+{
+ union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev);
+ union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev);
+
+ /* Tunnels with unset local or remote address are valid in Linux and
+ * used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access
+ * (NBMA) tunnels. In principle these can be offloaded, but the driver
+ * currently doesn't support this. So punt.
+ */
+ return !mlxsw_sp_l3addr_is_zero(saddr) &&
+ !mlxsw_sp_l3addr_is_zero(daddr);
+}
+
+static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *ol_dev,
+ enum mlxsw_sp_l3proto ol_proto)
+{
+ struct ip_tunnel *tunnel = netdev_priv(ol_dev);
+ __be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */
+ bool inherit_ttl = tunnel->parms.iph.ttl == 0;
+ bool inherit_tos = tunnel->parms.iph.tos & 0x1;
+
+ return (tunnel->parms.i_flags & ~okflags) == 0 &&
+ (tunnel->parms.o_flags & ~okflags) == 0 &&
+ inherit_ttl && inherit_tos &&
+ mlxsw_sp_ipip_tunnel_complete(MLXSW_SP_L3_PROTO_IPV4, ol_dev);
+}
+
+static struct mlxsw_sp_rif_ipip_lb_config
+mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *ol_dev)
+{
+ struct ip_tunnel_parm parms = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+ enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
+
+ lb_ipipt = mlxsw_sp_ipip_parms4_has_okey(parms) ?
+ MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP :
+ MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP;
+ return (struct mlxsw_sp_rif_ipip_lb_config){
+ .lb_ipipt = lb_ipipt,
+ .okey = mlxsw_sp_ipip_parms4_okey(parms),
+ .ul_protocol = MLXSW_SP_L3_PROTO_IPV4,
+ .saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
+ ol_dev),
+ };
+}
+
+static int
+mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry,
+ struct netlink_ext_ack *extack)
+{
+ union mlxsw_sp_l3addr old_saddr, new_saddr;
+ union mlxsw_sp_l3addr old_daddr, new_daddr;
+ struct ip_tunnel_parm new_parms;
+ bool update_tunnel = false;
+ bool update_decap = false;
+ bool update_nhs = false;
+ int err = 0;
+
+ new_parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
+
+ new_saddr = mlxsw_sp_ipip_parms4_saddr(new_parms);
+ old_saddr = mlxsw_sp_ipip_parms4_saddr(ipip_entry->parms4);
+ new_daddr = mlxsw_sp_ipip_parms4_daddr(new_parms);
+ old_daddr = mlxsw_sp_ipip_parms4_daddr(ipip_entry->parms4);
+
+ if (!mlxsw_sp_l3addr_eq(&new_saddr, &old_saddr)) {
+ u16 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
+
+ /* Since the local address has changed, if there is another
+ * tunnel with a matching saddr, both need to be demoted.
+ */
+ if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp,
+ MLXSW_SP_L3_PROTO_IPV4,
+ new_saddr, ul_tb_id,
+ ipip_entry)) {
+ mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
+ return 0;
+ }
+
+ update_tunnel = true;
+ } else if ((mlxsw_sp_ipip_parms4_okey(ipip_entry->parms4) !=
+ mlxsw_sp_ipip_parms4_okey(new_parms)) ||
+ ipip_entry->parms4.link != new_parms.link) {
+ update_tunnel = true;
+ } else if (!mlxsw_sp_l3addr_eq(&new_daddr, &old_daddr)) {
+ update_nhs = true;
+ } else if (mlxsw_sp_ipip_parms4_ikey(ipip_entry->parms4) !=
+ mlxsw_sp_ipip_parms4_ikey(new_parms)) {
+ update_decap = true;
+ }
+
+ if (update_tunnel)
+ err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
+ true, true, true,
+ extack);
+ else if (update_nhs)
+ err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
+ false, false, true,
+ extack);
+ else if (update_decap)
+ err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
+ false, false, false,
+ extack);
+
+ ipip_entry->parms4 = new_parms;
+ return err;
+}
+
+static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = {
+ .dev_type = ARPHRD_IPGRE,
+ .ul_proto = MLXSW_SP_L3_PROTO_IPV4,
+ .nexthop_update = mlxsw_sp_ipip_nexthop_update_gre4,
+ .fib_entry_op = mlxsw_sp_ipip_fib_entry_op_gre4,
+ .can_offload = mlxsw_sp_ipip_can_offload_gre4,
+ .ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre4,
+ .ol_netdev_change = mlxsw_sp_ipip_ol_netdev_change_gre4,
+};
+
+const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = {
+ [MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
new file mode 100644
index 000000000..bb5c4d4a5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_IPIP_H_
+#define _MLXSW_IPIP_H_
+
+#include "spectrum_router.h"
+#include <net/ip_fib.h>
+#include <linux/if_tunnel.h>
+
+struct ip_tunnel_parm
+mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev);
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev);
+
+union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
+ const struct net_device *ol_dev);
+
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr);
+
+enum mlxsw_sp_ipip_type {
+ MLXSW_SP_IPIP_TYPE_GRE4,
+ MLXSW_SP_IPIP_TYPE_MAX,
+};
+
+struct mlxsw_sp_ipip_entry {
+ enum mlxsw_sp_ipip_type ipipt;
+ struct net_device *ol_dev; /* Overlay. */
+ struct mlxsw_sp_rif_ipip_lb *ol_lb;
+ struct mlxsw_sp_fib_entry *decap_fib_entry;
+ struct list_head ipip_list_node;
+ union {
+ struct ip_tunnel_parm parms4;
+ };
+};
+
+struct mlxsw_sp_ipip_ops {
+ int dev_type;
+ enum mlxsw_sp_l3proto ul_proto; /* Underlay. */
+
+ int (*nexthop_update)(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+ struct mlxsw_sp_ipip_entry *ipip_entry);
+
+ bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *ol_dev,
+ enum mlxsw_sp_l3proto ol_proto);
+
+ /* Return a configuration for creating an overlay loopback RIF. */
+ struct mlxsw_sp_rif_ipip_lb_config
+ (*ol_loopback_config)(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *ol_dev);
+
+ int (*fib_entry_op)(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry,
+ enum mlxsw_reg_ralue_op op,
+ u32 tunnel_index);
+
+ int (*ol_netdev_change)(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry,
+ struct netlink_ext_ack *extack);
+};
+
+extern const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[];
+
+#endif /* _MLXSW_IPIP_H_*/
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
new file mode 100644
index 000000000..1e4cdee7b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "spectrum.h"
+
+struct mlxsw_sp_kvdl {
+ const struct mlxsw_sp_kvdl_ops *kvdl_ops;
+ unsigned long priv[0];
+ /* priv has to be always the last item */
+};
+
+int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
+{
+ const struct mlxsw_sp_kvdl_ops *kvdl_ops = mlxsw_sp->kvdl_ops;
+ struct mlxsw_sp_kvdl *kvdl;
+ int err;
+
+ kvdl = kzalloc(sizeof(*mlxsw_sp->kvdl) + kvdl_ops->priv_size,
+ GFP_KERNEL);
+ if (!kvdl)
+ return -ENOMEM;
+ kvdl->kvdl_ops = kvdl_ops;
+ mlxsw_sp->kvdl = kvdl;
+
+ err = kvdl_ops->init(mlxsw_sp, kvdl->priv);
+ if (err)
+ goto err_init;
+ return 0;
+
+err_init:
+ kfree(kvdl);
+ return err;
+}
+
+void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
+
+ kvdl->kvdl_ops->fini(mlxsw_sp, kvdl->priv);
+ kfree(kvdl);
+}
+
+int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_kvdl_entry_type type,
+ unsigned int entry_count, u32 *p_entry_index)
+{
+ struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
+
+ return kvdl->kvdl_ops->alloc(mlxsw_sp, kvdl->priv, type,
+ entry_count, p_entry_index);
+}
+
+void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_kvdl_entry_type type,
+ unsigned int entry_count, int entry_index)
+{
+ struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
+
+ kvdl->kvdl_ops->free(mlxsw_sp, kvdl->priv, type,
+ entry_count, entry_index);
+}
+
+int mlxsw_sp_kvdl_alloc_count_query(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_kvdl_entry_type type,
+ unsigned int entry_count,
+ unsigned int *p_alloc_count)
+{
+ struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
+
+ return kvdl->kvdl_ops->alloc_size_query(mlxsw_sp, kvdl->priv, type,
+ entry_count, p_alloc_count);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
new file mode 100644
index 000000000..f1874578a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -0,0 +1,1049 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/rhashtable.h>
+#include <net/ipv6.h>
+
+#include "spectrum_mr.h"
+#include "spectrum_router.h"
+
+struct mlxsw_sp_mr {
+ const struct mlxsw_sp_mr_ops *mr_ops;
+ void *catchall_route_priv;
+ struct delayed_work stats_update_dw;
+ struct list_head table_list;
+#define MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL 5000 /* ms */
+ unsigned long priv[0];
+ /* priv has to be always the last item */
+};
+
+struct mlxsw_sp_mr_vif;
+struct mlxsw_sp_mr_vif_ops {
+ bool (*is_regular)(const struct mlxsw_sp_mr_vif *vif);
+};
+
+struct mlxsw_sp_mr_vif {
+ struct net_device *dev;
+ const struct mlxsw_sp_rif *rif;
+ unsigned long vif_flags;
+
+ /* A list of route_vif_entry structs that point to routes that the VIF
+ * instance is used as one of the egress VIFs
+ */
+ struct list_head route_evif_list;
+
+ /* A list of route_vif_entry structs that point to routes that the VIF
+ * instance is used as an ingress VIF
+ */
+ struct list_head route_ivif_list;
+
+ /* Protocol specific operations for a VIF */
+ const struct mlxsw_sp_mr_vif_ops *ops;
+};
+
+struct mlxsw_sp_mr_route_vif_entry {
+ struct list_head vif_node;
+ struct list_head route_node;
+ struct mlxsw_sp_mr_vif *mr_vif;
+ struct mlxsw_sp_mr_route *mr_route;
+};
+
+struct mlxsw_sp_mr_table;
+struct mlxsw_sp_mr_table_ops {
+ bool (*is_route_valid)(const struct mlxsw_sp_mr_table *mr_table,
+ const struct mr_mfc *mfc);
+ void (*key_create)(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route_key *key,
+ struct mr_mfc *mfc);
+ bool (*is_route_starg)(const struct mlxsw_sp_mr_table *mr_table,
+ const struct mlxsw_sp_mr_route *mr_route);
+};
+
+struct mlxsw_sp_mr_table {
+ struct list_head node;
+ enum mlxsw_sp_l3proto proto;
+ struct mlxsw_sp *mlxsw_sp;
+ u32 vr_id;
+ struct mlxsw_sp_mr_vif vifs[MAXVIFS];
+ struct list_head route_list;
+ struct rhashtable route_ht;
+ const struct mlxsw_sp_mr_table_ops *ops;
+ char catchall_route_priv[0];
+ /* catchall_route_priv has to be always the last item */
+};
+
+struct mlxsw_sp_mr_route {
+ struct list_head node;
+ struct rhash_head ht_node;
+ struct mlxsw_sp_mr_route_key key;
+ enum mlxsw_sp_mr_route_action route_action;
+ u16 min_mtu;
+ struct mr_mfc *mfc;
+ void *route_priv;
+ const struct mlxsw_sp_mr_table *mr_table;
+ /* A list of route_vif_entry structs that point to the egress VIFs */
+ struct list_head evif_list;
+ /* A route_vif_entry struct that point to the ingress VIF */
+ struct mlxsw_sp_mr_route_vif_entry ivif;
+};
+
+static const struct rhashtable_params mlxsw_sp_mr_route_ht_params = {
+ .key_len = sizeof(struct mlxsw_sp_mr_route_key),
+ .key_offset = offsetof(struct mlxsw_sp_mr_route, key),
+ .head_offset = offsetof(struct mlxsw_sp_mr_route, ht_node),
+ .automatic_shrinking = true,
+};
+
+static bool mlxsw_sp_mr_vif_valid(const struct mlxsw_sp_mr_vif *vif)
+{
+ return vif->ops->is_regular(vif) && vif->dev && vif->rif;
+}
+
+static bool mlxsw_sp_mr_vif_exists(const struct mlxsw_sp_mr_vif *vif)
+{
+ return vif->dev;
+}
+
+static bool
+mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route)
+{
+ vifi_t ivif = mr_route->mfc->mfc_parent;
+
+ return mr_route->mfc->mfc_un.res.ttls[ivif] != 255;
+}
+
+static int
+mlxsw_sp_mr_route_valid_evifs_num(const struct mlxsw_sp_mr_route *mr_route)
+{
+ struct mlxsw_sp_mr_route_vif_entry *rve;
+ int valid_evifs;
+
+ valid_evifs = 0;
+ list_for_each_entry(rve, &mr_route->evif_list, route_node)
+ if (mlxsw_sp_mr_vif_valid(rve->mr_vif))
+ valid_evifs++;
+ return valid_evifs;
+}
+
+static enum mlxsw_sp_mr_route_action
+mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route)
+{
+ struct mlxsw_sp_mr_route_vif_entry *rve;
+
+ /* If the ingress port is not regular and resolved, trap the route */
+ if (!mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif))
+ return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+
+ /* The kernel does not match a (*,G) route that the ingress interface is
+ * not one of the egress interfaces, so trap these kind of routes.
+ */
+ if (mr_route->mr_table->ops->is_route_starg(mr_route->mr_table,
+ mr_route) &&
+ !mlxsw_sp_mr_route_ivif_in_evifs(mr_route))
+ return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+
+ /* If the route has no valid eVIFs, trap it. */
+ if (!mlxsw_sp_mr_route_valid_evifs_num(mr_route))
+ return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+
+ /* If one of the eVIFs has no RIF, trap-and-forward the route as there
+ * is some more routing to do in software too.
+ */
+ list_for_each_entry(rve, &mr_route->evif_list, route_node)
+ if (mlxsw_sp_mr_vif_exists(rve->mr_vif) && !rve->mr_vif->rif)
+ return MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD;
+
+ return MLXSW_SP_MR_ROUTE_ACTION_FORWARD;
+}
+
+static enum mlxsw_sp_mr_route_prio
+mlxsw_sp_mr_route_prio(const struct mlxsw_sp_mr_route *mr_route)
+{
+ return mr_route->mr_table->ops->is_route_starg(mr_route->mr_table,
+ mr_route) ?
+ MLXSW_SP_MR_ROUTE_PRIO_STARG : MLXSW_SP_MR_ROUTE_PRIO_SG;
+}
+
+static int mlxsw_sp_mr_route_evif_link(struct mlxsw_sp_mr_route *mr_route,
+ struct mlxsw_sp_mr_vif *mr_vif)
+{
+ struct mlxsw_sp_mr_route_vif_entry *rve;
+
+ rve = kzalloc(sizeof(*rve), GFP_KERNEL);
+ if (!rve)
+ return -ENOMEM;
+ rve->mr_route = mr_route;
+ rve->mr_vif = mr_vif;
+ list_add_tail(&rve->route_node, &mr_route->evif_list);
+ list_add_tail(&rve->vif_node, &mr_vif->route_evif_list);
+ return 0;
+}
+
+static void
+mlxsw_sp_mr_route_evif_unlink(struct mlxsw_sp_mr_route_vif_entry *rve)
+{
+ list_del(&rve->route_node);
+ list_del(&rve->vif_node);
+ kfree(rve);
+}
+
+static void mlxsw_sp_mr_route_ivif_link(struct mlxsw_sp_mr_route *mr_route,
+ struct mlxsw_sp_mr_vif *mr_vif)
+{
+ mr_route->ivif.mr_route = mr_route;
+ mr_route->ivif.mr_vif = mr_vif;
+ list_add_tail(&mr_route->ivif.vif_node, &mr_vif->route_ivif_list);
+}
+
+static void mlxsw_sp_mr_route_ivif_unlink(struct mlxsw_sp_mr_route *mr_route)
+{
+ list_del(&mr_route->ivif.vif_node);
+}
+
+static int
+mlxsw_sp_mr_route_info_create(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route *mr_route,
+ struct mlxsw_sp_mr_route_info *route_info)
+{
+ struct mlxsw_sp_mr_route_vif_entry *rve;
+ u16 *erif_indices;
+ u16 irif_index;
+ u16 erif = 0;
+
+ erif_indices = kmalloc_array(MAXVIFS, sizeof(*erif_indices),
+ GFP_KERNEL);
+ if (!erif_indices)
+ return -ENOMEM;
+
+ list_for_each_entry(rve, &mr_route->evif_list, route_node) {
+ if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) {
+ u16 rifi = mlxsw_sp_rif_index(rve->mr_vif->rif);
+
+ erif_indices[erif++] = rifi;
+ }
+ }
+
+ if (mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif))
+ irif_index = mlxsw_sp_rif_index(mr_route->ivif.mr_vif->rif);
+ else
+ irif_index = 0;
+
+ route_info->irif_index = irif_index;
+ route_info->erif_indices = erif_indices;
+ route_info->min_mtu = mr_route->min_mtu;
+ route_info->route_action = mr_route->route_action;
+ route_info->erif_num = erif;
+ return 0;
+}
+
+static void
+mlxsw_sp_mr_route_info_destroy(struct mlxsw_sp_mr_route_info *route_info)
+{
+ kfree(route_info->erif_indices);
+}
+
+static int mlxsw_sp_mr_route_write(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route *mr_route,
+ bool replace)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+ struct mlxsw_sp_mr_route_info route_info;
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+ int err;
+
+ err = mlxsw_sp_mr_route_info_create(mr_table, mr_route, &route_info);
+ if (err)
+ return err;
+
+ if (!replace) {
+ struct mlxsw_sp_mr_route_params route_params;
+
+ mr_route->route_priv = kzalloc(mr->mr_ops->route_priv_size,
+ GFP_KERNEL);
+ if (!mr_route->route_priv) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ route_params.key = mr_route->key;
+ route_params.value = route_info;
+ route_params.prio = mlxsw_sp_mr_route_prio(mr_route);
+ err = mr->mr_ops->route_create(mlxsw_sp, mr->priv,
+ mr_route->route_priv,
+ &route_params);
+ if (err)
+ kfree(mr_route->route_priv);
+ } else {
+ err = mr->mr_ops->route_update(mlxsw_sp, mr_route->route_priv,
+ &route_info);
+ }
+out:
+ mlxsw_sp_mr_route_info_destroy(&route_info);
+ return err;
+}
+
+static void mlxsw_sp_mr_route_erase(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route *mr_route)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+
+ mr->mr_ops->route_destroy(mlxsw_sp, mr->priv, mr_route->route_priv);
+ kfree(mr_route->route_priv);
+}
+
+static struct mlxsw_sp_mr_route *
+mlxsw_sp_mr_route_create(struct mlxsw_sp_mr_table *mr_table,
+ struct mr_mfc *mfc)
+{
+ struct mlxsw_sp_mr_route_vif_entry *rve, *tmp;
+ struct mlxsw_sp_mr_route *mr_route;
+ int err = 0;
+ int i;
+
+ /* Allocate and init a new route and fill it with parameters */
+ mr_route = kzalloc(sizeof(*mr_route), GFP_KERNEL);
+ if (!mr_route)
+ return ERR_PTR(-ENOMEM);
+ INIT_LIST_HEAD(&mr_route->evif_list);
+
+ /* Find min_mtu and link iVIF and eVIFs */
+ mr_route->min_mtu = ETH_MAX_MTU;
+ mr_cache_hold(mfc);
+ mr_route->mfc = mfc;
+ mr_table->ops->key_create(mr_table, &mr_route->key, mr_route->mfc);
+
+ mr_route->mr_table = mr_table;
+ for (i = 0; i < MAXVIFS; i++) {
+ if (mfc->mfc_un.res.ttls[i] != 255) {
+ err = mlxsw_sp_mr_route_evif_link(mr_route,
+ &mr_table->vifs[i]);
+ if (err)
+ goto err;
+ if (mr_table->vifs[i].dev &&
+ mr_table->vifs[i].dev->mtu < mr_route->min_mtu)
+ mr_route->min_mtu = mr_table->vifs[i].dev->mtu;
+ }
+ }
+ mlxsw_sp_mr_route_ivif_link(mr_route,
+ &mr_table->vifs[mfc->mfc_parent]);
+
+ mr_route->route_action = mlxsw_sp_mr_route_action(mr_route);
+ return mr_route;
+err:
+ mr_cache_put(mfc);
+ list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node)
+ mlxsw_sp_mr_route_evif_unlink(rve);
+ kfree(mr_route);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_sp_mr_route_destroy(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route *mr_route)
+{
+ struct mlxsw_sp_mr_route_vif_entry *rve, *tmp;
+
+ mlxsw_sp_mr_route_ivif_unlink(mr_route);
+ mr_cache_put(mr_route->mfc);
+ list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node)
+ mlxsw_sp_mr_route_evif_unlink(rve);
+ kfree(mr_route);
+}
+
+static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route,
+ bool offload)
+{
+ if (offload)
+ mr_route->mfc->mfc_flags |= MFC_OFFLOAD;
+ else
+ mr_route->mfc->mfc_flags &= ~MFC_OFFLOAD;
+}
+
+static void mlxsw_sp_mr_mfc_offload_update(struct mlxsw_sp_mr_route *mr_route)
+{
+ bool offload;
+
+ offload = mr_route->route_action != MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+ mlxsw_sp_mr_mfc_offload_set(mr_route, offload);
+}
+
+static void __mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route *mr_route)
+{
+ mlxsw_sp_mr_mfc_offload_set(mr_route, false);
+ mlxsw_sp_mr_route_erase(mr_table, mr_route);
+ rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node,
+ mlxsw_sp_mr_route_ht_params);
+ list_del(&mr_route->node);
+ mlxsw_sp_mr_route_destroy(mr_table, mr_route);
+}
+
+int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table,
+ struct mr_mfc *mfc, bool replace)
+{
+ struct mlxsw_sp_mr_route *mr_orig_route = NULL;
+ struct mlxsw_sp_mr_route *mr_route;
+ int err;
+
+ if (!mr_table->ops->is_route_valid(mr_table, mfc))
+ return -EINVAL;
+
+ /* Create a new route */
+ mr_route = mlxsw_sp_mr_route_create(mr_table, mfc);
+ if (IS_ERR(mr_route))
+ return PTR_ERR(mr_route);
+
+ /* Find any route with a matching key */
+ mr_orig_route = rhashtable_lookup_fast(&mr_table->route_ht,
+ &mr_route->key,
+ mlxsw_sp_mr_route_ht_params);
+ if (replace) {
+ /* On replace case, make the route point to the new route_priv.
+ */
+ if (WARN_ON(!mr_orig_route)) {
+ err = -ENOENT;
+ goto err_no_orig_route;
+ }
+ mr_route->route_priv = mr_orig_route->route_priv;
+ } else if (mr_orig_route) {
+ /* On non replace case, if another route with the same key was
+ * found, abort, as duplicate routes are used for proxy routes.
+ */
+ dev_warn(mr_table->mlxsw_sp->bus_info->dev,
+ "Offloading proxy routes is not supported.\n");
+ err = -EINVAL;
+ goto err_duplicate_route;
+ }
+
+ /* Put it in the table data-structures */
+ list_add_tail(&mr_route->node, &mr_table->route_list);
+ err = rhashtable_insert_fast(&mr_table->route_ht,
+ &mr_route->ht_node,
+ mlxsw_sp_mr_route_ht_params);
+ if (err)
+ goto err_rhashtable_insert;
+
+ /* Write the route to the hardware */
+ err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace);
+ if (err)
+ goto err_mr_route_write;
+
+ /* Destroy the original route */
+ if (replace) {
+ rhashtable_remove_fast(&mr_table->route_ht,
+ &mr_orig_route->ht_node,
+ mlxsw_sp_mr_route_ht_params);
+ list_del(&mr_orig_route->node);
+ mlxsw_sp_mr_route_destroy(mr_table, mr_orig_route);
+ }
+
+ mlxsw_sp_mr_mfc_offload_update(mr_route);
+ return 0;
+
+err_mr_route_write:
+ rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node,
+ mlxsw_sp_mr_route_ht_params);
+err_rhashtable_insert:
+ list_del(&mr_route->node);
+err_no_orig_route:
+err_duplicate_route:
+ mlxsw_sp_mr_route_destroy(mr_table, mr_route);
+ return err;
+}
+
+void mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
+ struct mr_mfc *mfc)
+{
+ struct mlxsw_sp_mr_route *mr_route;
+ struct mlxsw_sp_mr_route_key key;
+
+ mr_table->ops->key_create(mr_table, &key, mfc);
+ mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key,
+ mlxsw_sp_mr_route_ht_params);
+ if (mr_route)
+ __mlxsw_sp_mr_route_del(mr_table, mr_route);
+}
+
+/* Should be called after the VIF struct is updated */
+static int
+mlxsw_sp_mr_route_ivif_resolve(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route_vif_entry *rve)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+ enum mlxsw_sp_mr_route_action route_action;
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+ u16 irif_index;
+ int err;
+
+ route_action = mlxsw_sp_mr_route_action(rve->mr_route);
+ if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP)
+ return 0;
+
+ /* rve->mr_vif->rif is guaranteed to be valid at this stage */
+ irif_index = mlxsw_sp_rif_index(rve->mr_vif->rif);
+ err = mr->mr_ops->route_irif_update(mlxsw_sp, rve->mr_route->route_priv,
+ irif_index);
+ if (err)
+ return err;
+
+ err = mr->mr_ops->route_action_update(mlxsw_sp,
+ rve->mr_route->route_priv,
+ route_action);
+ if (err)
+ /* No need to rollback here because the iRIF change only takes
+ * place after the action has been updated.
+ */
+ return err;
+
+ rve->mr_route->route_action = route_action;
+ mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
+ return 0;
+}
+
+static void
+mlxsw_sp_mr_route_ivif_unresolve(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route_vif_entry *rve)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+
+ mr->mr_ops->route_action_update(mlxsw_sp, rve->mr_route->route_priv,
+ MLXSW_SP_MR_ROUTE_ACTION_TRAP);
+ rve->mr_route->route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+ mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
+}
+
+/* Should be called after the RIF struct is updated */
+static int
+mlxsw_sp_mr_route_evif_resolve(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route_vif_entry *rve)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+ enum mlxsw_sp_mr_route_action route_action;
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+ u16 erif_index = 0;
+ int err;
+
+ /* Add the eRIF */
+ if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) {
+ erif_index = mlxsw_sp_rif_index(rve->mr_vif->rif);
+ err = mr->mr_ops->route_erif_add(mlxsw_sp,
+ rve->mr_route->route_priv,
+ erif_index);
+ if (err)
+ return err;
+ }
+
+ /* Update the route action, as the new eVIF can be a tunnel or a pimreg
+ * device which will require updating the action.
+ */
+ route_action = mlxsw_sp_mr_route_action(rve->mr_route);
+ if (route_action != rve->mr_route->route_action) {
+ err = mr->mr_ops->route_action_update(mlxsw_sp,
+ rve->mr_route->route_priv,
+ route_action);
+ if (err)
+ goto err_route_action_update;
+ }
+
+ /* Update the minimum MTU */
+ if (rve->mr_vif->dev->mtu < rve->mr_route->min_mtu) {
+ rve->mr_route->min_mtu = rve->mr_vif->dev->mtu;
+ err = mr->mr_ops->route_min_mtu_update(mlxsw_sp,
+ rve->mr_route->route_priv,
+ rve->mr_route->min_mtu);
+ if (err)
+ goto err_route_min_mtu_update;
+ }
+
+ rve->mr_route->route_action = route_action;
+ mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
+ return 0;
+
+err_route_min_mtu_update:
+ if (route_action != rve->mr_route->route_action)
+ mr->mr_ops->route_action_update(mlxsw_sp,
+ rve->mr_route->route_priv,
+ rve->mr_route->route_action);
+err_route_action_update:
+ if (mlxsw_sp_mr_vif_valid(rve->mr_vif))
+ mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv,
+ erif_index);
+ return err;
+}
+
+/* Should be called before the RIF struct is updated */
+static void
+mlxsw_sp_mr_route_evif_unresolve(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route_vif_entry *rve)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+ enum mlxsw_sp_mr_route_action route_action;
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+ u16 rifi;
+
+ /* If the unresolved RIF was not valid, no need to delete it */
+ if (!mlxsw_sp_mr_vif_valid(rve->mr_vif))
+ return;
+
+ /* Update the route action: if there is only one valid eVIF in the
+ * route, set the action to trap as the VIF deletion will lead to zero
+ * valid eVIFs. On any other case, use the mlxsw_sp_mr_route_action to
+ * determine the route action.
+ */
+ if (mlxsw_sp_mr_route_valid_evifs_num(rve->mr_route) == 1)
+ route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+ else
+ route_action = mlxsw_sp_mr_route_action(rve->mr_route);
+ if (route_action != rve->mr_route->route_action)
+ mr->mr_ops->route_action_update(mlxsw_sp,
+ rve->mr_route->route_priv,
+ route_action);
+
+ /* Delete the erif from the route */
+ rifi = mlxsw_sp_rif_index(rve->mr_vif->rif);
+ mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv, rifi);
+ rve->mr_route->route_action = route_action;
+ mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
+}
+
+static int mlxsw_sp_mr_vif_resolve(struct mlxsw_sp_mr_table *mr_table,
+ struct net_device *dev,
+ struct mlxsw_sp_mr_vif *mr_vif,
+ unsigned long vif_flags,
+ const struct mlxsw_sp_rif *rif)
+{
+ struct mlxsw_sp_mr_route_vif_entry *irve, *erve;
+ int err;
+
+ /* Update the VIF */
+ mr_vif->dev = dev;
+ mr_vif->rif = rif;
+ mr_vif->vif_flags = vif_flags;
+
+ /* Update all routes where this VIF is used as an unresolved iRIF */
+ list_for_each_entry(irve, &mr_vif->route_ivif_list, vif_node) {
+ err = mlxsw_sp_mr_route_ivif_resolve(mr_table, irve);
+ if (err)
+ goto err_irif_unresolve;
+ }
+
+ /* Update all routes where this VIF is used as an unresolved eRIF */
+ list_for_each_entry(erve, &mr_vif->route_evif_list, vif_node) {
+ err = mlxsw_sp_mr_route_evif_resolve(mr_table, erve);
+ if (err)
+ goto err_erif_unresolve;
+ }
+ return 0;
+
+err_erif_unresolve:
+ list_for_each_entry_continue_reverse(erve, &mr_vif->route_evif_list,
+ vif_node)
+ mlxsw_sp_mr_route_evif_unresolve(mr_table, erve);
+err_irif_unresolve:
+ list_for_each_entry_continue_reverse(irve, &mr_vif->route_ivif_list,
+ vif_node)
+ mlxsw_sp_mr_route_ivif_unresolve(mr_table, irve);
+ mr_vif->rif = NULL;
+ return err;
+}
+
+static void mlxsw_sp_mr_vif_unresolve(struct mlxsw_sp_mr_table *mr_table,
+ struct net_device *dev,
+ struct mlxsw_sp_mr_vif *mr_vif)
+{
+ struct mlxsw_sp_mr_route_vif_entry *rve;
+
+ /* Update all routes where this VIF is used as an unresolved eRIF */
+ list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node)
+ mlxsw_sp_mr_route_evif_unresolve(mr_table, rve);
+
+ /* Update all routes where this VIF is used as an unresolved iRIF */
+ list_for_each_entry(rve, &mr_vif->route_ivif_list, vif_node)
+ mlxsw_sp_mr_route_ivif_unresolve(mr_table, rve);
+
+ /* Update the VIF */
+ mr_vif->dev = dev;
+ mr_vif->rif = NULL;
+}
+
+int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table,
+ struct net_device *dev, vifi_t vif_index,
+ unsigned long vif_flags, const struct mlxsw_sp_rif *rif)
+{
+ struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index];
+
+ if (WARN_ON(vif_index >= MAXVIFS))
+ return -EINVAL;
+ if (mr_vif->dev)
+ return -EEXIST;
+ return mlxsw_sp_mr_vif_resolve(mr_table, dev, mr_vif, vif_flags, rif);
+}
+
+void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index)
+{
+ struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index];
+
+ if (WARN_ON(vif_index >= MAXVIFS))
+ return;
+ if (WARN_ON(!mr_vif->dev))
+ return;
+ mlxsw_sp_mr_vif_unresolve(mr_table, NULL, mr_vif);
+}
+
+static struct mlxsw_sp_mr_vif *
+mlxsw_sp_mr_dev_vif_lookup(struct mlxsw_sp_mr_table *mr_table,
+ const struct net_device *dev)
+{
+ vifi_t vif_index;
+
+ for (vif_index = 0; vif_index < MAXVIFS; vif_index++)
+ if (mr_table->vifs[vif_index].dev == dev)
+ return &mr_table->vifs[vif_index];
+ return NULL;
+}
+
+int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table,
+ const struct mlxsw_sp_rif *rif)
+{
+ const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
+ struct mlxsw_sp_mr_vif *mr_vif;
+
+ if (!rif_dev)
+ return 0;
+
+ mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev);
+ if (!mr_vif)
+ return 0;
+ return mlxsw_sp_mr_vif_resolve(mr_table, mr_vif->dev, mr_vif,
+ mr_vif->vif_flags, rif);
+}
+
+void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table,
+ const struct mlxsw_sp_rif *rif)
+{
+ const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
+ struct mlxsw_sp_mr_vif *mr_vif;
+
+ if (!rif_dev)
+ return;
+
+ mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev);
+ if (!mr_vif)
+ return;
+ mlxsw_sp_mr_vif_unresolve(mr_table, mr_vif->dev, mr_vif);
+}
+
+void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table,
+ const struct mlxsw_sp_rif *rif, int mtu)
+{
+ const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
+ struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+ struct mlxsw_sp_mr_route_vif_entry *rve;
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+ struct mlxsw_sp_mr_vif *mr_vif;
+
+ if (!rif_dev)
+ return;
+
+ /* Search for a VIF that use that RIF */
+ mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev);
+ if (!mr_vif)
+ return;
+
+ /* Update all the routes that uses that VIF as eVIF */
+ list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node) {
+ if (mtu < rve->mr_route->min_mtu) {
+ rve->mr_route->min_mtu = mtu;
+ mr->mr_ops->route_min_mtu_update(mlxsw_sp,
+ rve->mr_route->route_priv,
+ mtu);
+ }
+ }
+}
+
+/* Protocol specific functions */
+static bool
+mlxsw_sp_mr_route4_validate(const struct mlxsw_sp_mr_table *mr_table,
+ const struct mr_mfc *c)
+{
+ struct mfc_cache *mfc = (struct mfc_cache *) c;
+
+ /* If the route is a (*,*) route, abort, as these kind of routes are
+ * used for proxy routes.
+ */
+ if (mfc->mfc_origin == htonl(INADDR_ANY) &&
+ mfc->mfc_mcastgrp == htonl(INADDR_ANY)) {
+ dev_warn(mr_table->mlxsw_sp->bus_info->dev,
+ "Offloading proxy routes is not supported.\n");
+ return false;
+ }
+ return true;
+}
+
+static void mlxsw_sp_mr_route4_key(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route_key *key,
+ struct mr_mfc *c)
+{
+ const struct mfc_cache *mfc = (struct mfc_cache *) c;
+ bool starg;
+
+ starg = (mfc->mfc_origin == htonl(INADDR_ANY));
+
+ memset(key, 0, sizeof(*key));
+ key->vrid = mr_table->vr_id;
+ key->proto = MLXSW_SP_L3_PROTO_IPV4;
+ key->group.addr4 = mfc->mfc_mcastgrp;
+ key->group_mask.addr4 = htonl(0xffffffff);
+ key->source.addr4 = mfc->mfc_origin;
+ key->source_mask.addr4 = htonl(starg ? 0 : 0xffffffff);
+}
+
+static bool mlxsw_sp_mr_route4_starg(const struct mlxsw_sp_mr_table *mr_table,
+ const struct mlxsw_sp_mr_route *mr_route)
+{
+ return mr_route->key.source_mask.addr4 == htonl(INADDR_ANY);
+}
+
+static bool mlxsw_sp_mr_vif4_is_regular(const struct mlxsw_sp_mr_vif *vif)
+{
+ return !(vif->vif_flags & (VIFF_TUNNEL | VIFF_REGISTER));
+}
+
+static bool
+mlxsw_sp_mr_route6_validate(const struct mlxsw_sp_mr_table *mr_table,
+ const struct mr_mfc *c)
+{
+ struct mfc6_cache *mfc = (struct mfc6_cache *) c;
+
+ /* If the route is a (*,*) route, abort, as these kind of routes are
+ * used for proxy routes.
+ */
+ if (ipv6_addr_any(&mfc->mf6c_origin) &&
+ ipv6_addr_any(&mfc->mf6c_mcastgrp)) {
+ dev_warn(mr_table->mlxsw_sp->bus_info->dev,
+ "Offloading proxy routes is not supported.\n");
+ return false;
+ }
+ return true;
+}
+
+static void mlxsw_sp_mr_route6_key(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route_key *key,
+ struct mr_mfc *c)
+{
+ const struct mfc6_cache *mfc = (struct mfc6_cache *) c;
+
+ memset(key, 0, sizeof(*key));
+ key->vrid = mr_table->vr_id;
+ key->proto = MLXSW_SP_L3_PROTO_IPV6;
+ key->group.addr6 = mfc->mf6c_mcastgrp;
+ memset(&key->group_mask.addr6, 0xff, sizeof(key->group_mask.addr6));
+ key->source.addr6 = mfc->mf6c_origin;
+ if (!ipv6_addr_any(&mfc->mf6c_origin))
+ memset(&key->source_mask.addr6, 0xff,
+ sizeof(key->source_mask.addr6));
+}
+
+static bool mlxsw_sp_mr_route6_starg(const struct mlxsw_sp_mr_table *mr_table,
+ const struct mlxsw_sp_mr_route *mr_route)
+{
+ return ipv6_addr_any(&mr_route->key.source_mask.addr6);
+}
+
+static bool mlxsw_sp_mr_vif6_is_regular(const struct mlxsw_sp_mr_vif *vif)
+{
+ return !(vif->vif_flags & MIFF_REGISTER);
+}
+
+static struct
+mlxsw_sp_mr_vif_ops mlxsw_sp_mr_vif_ops_arr[] = {
+ {
+ .is_regular = mlxsw_sp_mr_vif4_is_regular,
+ },
+ {
+ .is_regular = mlxsw_sp_mr_vif6_is_regular,
+ },
+};
+
+static struct
+mlxsw_sp_mr_table_ops mlxsw_sp_mr_table_ops_arr[] = {
+ {
+ .is_route_valid = mlxsw_sp_mr_route4_validate,
+ .key_create = mlxsw_sp_mr_route4_key,
+ .is_route_starg = mlxsw_sp_mr_route4_starg,
+ },
+ {
+ .is_route_valid = mlxsw_sp_mr_route6_validate,
+ .key_create = mlxsw_sp_mr_route6_key,
+ .is_route_starg = mlxsw_sp_mr_route6_starg,
+ },
+
+};
+
+struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
+ u32 vr_id,
+ enum mlxsw_sp_l3proto proto)
+{
+ struct mlxsw_sp_mr_route_params catchall_route_params = {
+ .prio = MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
+ .key = {
+ .vrid = vr_id,
+ .proto = proto,
+ },
+ .value = {
+ .route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP,
+ }
+ };
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+ struct mlxsw_sp_mr_table *mr_table;
+ int err;
+ int i;
+
+ mr_table = kzalloc(sizeof(*mr_table) + mr->mr_ops->route_priv_size,
+ GFP_KERNEL);
+ if (!mr_table)
+ return ERR_PTR(-ENOMEM);
+
+ mr_table->vr_id = vr_id;
+ mr_table->mlxsw_sp = mlxsw_sp;
+ mr_table->proto = proto;
+ mr_table->ops = &mlxsw_sp_mr_table_ops_arr[proto];
+ INIT_LIST_HEAD(&mr_table->route_list);
+
+ err = rhashtable_init(&mr_table->route_ht,
+ &mlxsw_sp_mr_route_ht_params);
+ if (err)
+ goto err_route_rhashtable_init;
+
+ for (i = 0; i < MAXVIFS; i++) {
+ INIT_LIST_HEAD(&mr_table->vifs[i].route_evif_list);
+ INIT_LIST_HEAD(&mr_table->vifs[i].route_ivif_list);
+ mr_table->vifs[i].ops = &mlxsw_sp_mr_vif_ops_arr[proto];
+ }
+
+ err = mr->mr_ops->route_create(mlxsw_sp, mr->priv,
+ mr_table->catchall_route_priv,
+ &catchall_route_params);
+ if (err)
+ goto err_ops_route_create;
+ list_add_tail(&mr_table->node, &mr->table_list);
+ return mr_table;
+
+err_ops_route_create:
+ rhashtable_destroy(&mr_table->route_ht);
+err_route_rhashtable_init:
+ kfree(mr_table);
+ return ERR_PTR(err);
+}
+
+void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+
+ WARN_ON(!mlxsw_sp_mr_table_empty(mr_table));
+ list_del(&mr_table->node);
+ mr->mr_ops->route_destroy(mlxsw_sp, mr->priv,
+ &mr_table->catchall_route_priv);
+ rhashtable_destroy(&mr_table->route_ht);
+ kfree(mr_table);
+}
+
+void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table)
+{
+ struct mlxsw_sp_mr_route *mr_route, *tmp;
+ int i;
+
+ list_for_each_entry_safe(mr_route, tmp, &mr_table->route_list, node)
+ __mlxsw_sp_mr_route_del(mr_table, mr_route);
+
+ for (i = 0; i < MAXVIFS; i++) {
+ mr_table->vifs[i].dev = NULL;
+ mr_table->vifs[i].rif = NULL;
+ }
+}
+
+bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table)
+{
+ int i;
+
+ for (i = 0; i < MAXVIFS; i++)
+ if (mr_table->vifs[i].dev)
+ return false;
+ return list_empty(&mr_table->route_list);
+}
+
+static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mr_route *mr_route)
+{
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+ u64 packets, bytes;
+
+ if (mr_route->route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP)
+ return;
+
+ mr->mr_ops->route_stats(mlxsw_sp, mr_route->route_priv, &packets,
+ &bytes);
+
+ if (mr_route->mfc->mfc_un.res.pkt != packets)
+ mr_route->mfc->mfc_un.res.lastuse = jiffies;
+ mr_route->mfc->mfc_un.res.pkt = packets;
+ mr_route->mfc->mfc_un.res.bytes = bytes;
+}
+
+static void mlxsw_sp_mr_stats_update(struct work_struct *work)
+{
+ struct mlxsw_sp_mr *mr = container_of(work, struct mlxsw_sp_mr,
+ stats_update_dw.work);
+ struct mlxsw_sp_mr_table *mr_table;
+ struct mlxsw_sp_mr_route *mr_route;
+ unsigned long interval;
+
+ rtnl_lock();
+ list_for_each_entry(mr_table, &mr->table_list, node)
+ list_for_each_entry(mr_route, &mr_table->route_list, node)
+ mlxsw_sp_mr_route_stats_update(mr_table->mlxsw_sp,
+ mr_route);
+ rtnl_unlock();
+
+ interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL);
+ mlxsw_core_schedule_dw(&mr->stats_update_dw, interval);
+}
+
+int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_mr_ops *mr_ops)
+{
+ struct mlxsw_sp_mr *mr;
+ unsigned long interval;
+ int err;
+
+ mr = kzalloc(sizeof(*mr) + mr_ops->priv_size, GFP_KERNEL);
+ if (!mr)
+ return -ENOMEM;
+ mr->mr_ops = mr_ops;
+ mlxsw_sp->mr = mr;
+ INIT_LIST_HEAD(&mr->table_list);
+
+ err = mr_ops->init(mlxsw_sp, mr->priv);
+ if (err)
+ goto err;
+
+ /* Create the delayed work for counter updates */
+ INIT_DELAYED_WORK(&mr->stats_update_dw, mlxsw_sp_mr_stats_update);
+ interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL);
+ mlxsw_core_schedule_dw(&mr->stats_update_dw, interval);
+ return 0;
+err:
+ kfree(mr);
+ return err;
+}
+
+void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+
+ cancel_delayed_work_sync(&mr->stats_update_dw);
+ mr->mr_ops->fini(mlxsw_sp, mr->priv);
+ kfree(mr);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
new file mode 100644
index 000000000..3cde3671f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_SPECTRUM_MCROUTER_H
+#define _MLXSW_SPECTRUM_MCROUTER_H
+
+#include <linux/mroute.h>
+#include <linux/mroute6.h>
+#include "spectrum_router.h"
+#include "spectrum.h"
+
+enum mlxsw_sp_mr_route_action {
+ MLXSW_SP_MR_ROUTE_ACTION_FORWARD,
+ MLXSW_SP_MR_ROUTE_ACTION_TRAP,
+ MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD,
+};
+
+struct mlxsw_sp_mr_route_key {
+ int vrid;
+ enum mlxsw_sp_l3proto proto;
+ union mlxsw_sp_l3addr group;
+ union mlxsw_sp_l3addr group_mask;
+ union mlxsw_sp_l3addr source;
+ union mlxsw_sp_l3addr source_mask;
+};
+
+struct mlxsw_sp_mr_route_info {
+ enum mlxsw_sp_mr_route_action route_action;
+ u16 irif_index;
+ u16 *erif_indices;
+ size_t erif_num;
+ u16 min_mtu;
+};
+
+struct mlxsw_sp_mr_route_params {
+ struct mlxsw_sp_mr_route_key key;
+ struct mlxsw_sp_mr_route_info value;
+ enum mlxsw_sp_mr_route_prio prio;
+};
+
+struct mlxsw_sp_mr_ops {
+ int priv_size;
+ int route_priv_size;
+ int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv);
+ int (*route_create)(struct mlxsw_sp *mlxsw_sp, void *priv,
+ void *route_priv,
+ struct mlxsw_sp_mr_route_params *route_params);
+ int (*route_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+ struct mlxsw_sp_mr_route_info *route_info);
+ int (*route_stats)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+ u64 *packets, u64 *bytes);
+ int (*route_action_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+ enum mlxsw_sp_mr_route_action route_action);
+ int (*route_min_mtu_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+ u16 min_mtu);
+ int (*route_irif_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+ u16 irif_index);
+ int (*route_erif_add)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+ u16 erif_index);
+ int (*route_erif_del)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+ u16 erif_index);
+ void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv,
+ void *route_priv);
+ void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv);
+};
+
+struct mlxsw_sp_mr;
+struct mlxsw_sp_mr_table;
+
+int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_mr_ops *mr_ops);
+void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table,
+ struct mr_mfc *mfc, bool replace);
+void mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
+ struct mr_mfc *mfc);
+int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table,
+ struct net_device *dev, vifi_t vif_index,
+ unsigned long vif_flags,
+ const struct mlxsw_sp_rif *rif);
+void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index);
+int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table,
+ const struct mlxsw_sp_rif *rif);
+void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table,
+ const struct mlxsw_sp_rif *rif);
+void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table,
+ const struct mlxsw_sp_rif *rif, int mtu);
+struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
+ u32 tb_id,
+ enum mlxsw_sp_l3proto proto);
+void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table);
+void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table);
+bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
new file mode 100644
index 000000000..221aa6a47
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
@@ -0,0 +1,615 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+
+#include "spectrum_mr_tcam.h"
+#include "reg.h"
+#include "spectrum.h"
+#include "core_acl_flex_actions.h"
+#include "spectrum_mr.h"
+
+struct mlxsw_sp_mr_tcam {
+ void *priv;
+};
+
+/* This struct maps to one RIGR2 register entry */
+struct mlxsw_sp_mr_erif_sublist {
+ struct list_head list;
+ u32 rigr2_kvdl_index;
+ int num_erifs;
+ u16 erif_indices[MLXSW_REG_RIGR2_MAX_ERIFS];
+ bool synced;
+};
+
+struct mlxsw_sp_mr_tcam_erif_list {
+ struct list_head erif_sublists;
+ u32 kvdl_index;
+};
+
+static bool
+mlxsw_sp_mr_erif_sublist_full(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mr_erif_sublist *erif_sublist)
+{
+ int erif_list_entries = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ MC_ERIF_LIST_ENTRIES);
+
+ return erif_sublist->num_erifs == erif_list_entries;
+}
+
+static void
+mlxsw_sp_mr_erif_list_init(struct mlxsw_sp_mr_tcam_erif_list *erif_list)
+{
+ INIT_LIST_HEAD(&erif_list->erif_sublists);
+}
+
+static struct mlxsw_sp_mr_erif_sublist *
+mlxsw_sp_mr_erif_sublist_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mr_tcam_erif_list *erif_list)
+{
+ struct mlxsw_sp_mr_erif_sublist *erif_sublist;
+ int err;
+
+ erif_sublist = kzalloc(sizeof(*erif_sublist), GFP_KERNEL);
+ if (!erif_sublist)
+ return ERR_PTR(-ENOMEM);
+ err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR,
+ 1, &erif_sublist->rigr2_kvdl_index);
+ if (err) {
+ kfree(erif_sublist);
+ return ERR_PTR(err);
+ }
+
+ list_add_tail(&erif_sublist->list, &erif_list->erif_sublists);
+ return erif_sublist;
+}
+
+static void
+mlxsw_sp_mr_erif_sublist_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mr_erif_sublist *erif_sublist)
+{
+ list_del(&erif_sublist->list);
+ mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR,
+ 1, erif_sublist->rigr2_kvdl_index);
+ kfree(erif_sublist);
+}
+
+static int
+mlxsw_sp_mr_erif_list_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mr_tcam_erif_list *erif_list,
+ u16 erif_index)
+{
+ struct mlxsw_sp_mr_erif_sublist *sublist;
+
+ /* If either there is no erif_entry or the last one is full, allocate a
+ * new one.
+ */
+ if (list_empty(&erif_list->erif_sublists)) {
+ sublist = mlxsw_sp_mr_erif_sublist_create(mlxsw_sp, erif_list);
+ if (IS_ERR(sublist))
+ return PTR_ERR(sublist);
+ erif_list->kvdl_index = sublist->rigr2_kvdl_index;
+ } else {
+ sublist = list_last_entry(&erif_list->erif_sublists,
+ struct mlxsw_sp_mr_erif_sublist,
+ list);
+ sublist->synced = false;
+ if (mlxsw_sp_mr_erif_sublist_full(mlxsw_sp, sublist)) {
+ sublist = mlxsw_sp_mr_erif_sublist_create(mlxsw_sp,
+ erif_list);
+ if (IS_ERR(sublist))
+ return PTR_ERR(sublist);
+ }
+ }
+
+ /* Add the eRIF to the last entry's last index */
+ sublist->erif_indices[sublist->num_erifs++] = erif_index;
+ return 0;
+}
+
+static void
+mlxsw_sp_mr_erif_list_flush(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mr_tcam_erif_list *erif_list)
+{
+ struct mlxsw_sp_mr_erif_sublist *erif_sublist, *tmp;
+
+ list_for_each_entry_safe(erif_sublist, tmp, &erif_list->erif_sublists,
+ list)
+ mlxsw_sp_mr_erif_sublist_destroy(mlxsw_sp, erif_sublist);
+}
+
+static int
+mlxsw_sp_mr_erif_list_commit(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mr_tcam_erif_list *erif_list)
+{
+ struct mlxsw_sp_mr_erif_sublist *curr_sublist;
+ char rigr2_pl[MLXSW_REG_RIGR2_LEN];
+ int err;
+ int i;
+
+ list_for_each_entry(curr_sublist, &erif_list->erif_sublists, list) {
+ if (curr_sublist->synced)
+ continue;
+
+ /* If the sublist is not the last one, pack the next index */
+ if (list_is_last(&curr_sublist->list,
+ &erif_list->erif_sublists)) {
+ mlxsw_reg_rigr2_pack(rigr2_pl,
+ curr_sublist->rigr2_kvdl_index,
+ false, 0);
+ } else {
+ struct mlxsw_sp_mr_erif_sublist *next_sublist;
+
+ next_sublist = list_next_entry(curr_sublist, list);
+ mlxsw_reg_rigr2_pack(rigr2_pl,
+ curr_sublist->rigr2_kvdl_index,
+ true,
+ next_sublist->rigr2_kvdl_index);
+ }
+
+ /* Pack all the erifs */
+ for (i = 0; i < curr_sublist->num_erifs; i++) {
+ u16 erif_index = curr_sublist->erif_indices[i];
+
+ mlxsw_reg_rigr2_erif_entry_pack(rigr2_pl, i, true,
+ erif_index);
+ }
+
+ /* Write the entry */
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rigr2),
+ rigr2_pl);
+ if (err)
+ /* No need of a rollback here because this
+ * hardware entry should not be pointed yet.
+ */
+ return err;
+ curr_sublist->synced = true;
+ }
+ return 0;
+}
+
+static void mlxsw_sp_mr_erif_list_move(struct mlxsw_sp_mr_tcam_erif_list *to,
+ struct mlxsw_sp_mr_tcam_erif_list *from)
+{
+ list_splice(&from->erif_sublists, &to->erif_sublists);
+ to->kvdl_index = from->kvdl_index;
+}
+
+struct mlxsw_sp_mr_tcam_route {
+ struct mlxsw_sp_mr_tcam_erif_list erif_list;
+ struct mlxsw_afa_block *afa_block;
+ u32 counter_index;
+ enum mlxsw_sp_mr_route_action action;
+ struct mlxsw_sp_mr_route_key key;
+ u16 irif_index;
+ u16 min_mtu;
+ void *priv;
+};
+
+static struct mlxsw_afa_block *
+mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_mr_route_action route_action,
+ u16 irif_index, u32 counter_index,
+ u16 min_mtu,
+ struct mlxsw_sp_mr_tcam_erif_list *erif_list)
+{
+ struct mlxsw_afa_block *afa_block;
+ int err;
+
+ afa_block = mlxsw_afa_block_create(mlxsw_sp->afa);
+ if (IS_ERR(afa_block))
+ return afa_block;
+
+ err = mlxsw_afa_block_append_allocated_counter(afa_block,
+ counter_index);
+ if (err)
+ goto err;
+
+ switch (route_action) {
+ case MLXSW_SP_MR_ROUTE_ACTION_TRAP:
+ err = mlxsw_afa_block_append_trap(afa_block,
+ MLXSW_TRAP_ID_ACL1);
+ if (err)
+ goto err;
+ break;
+ case MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD:
+ case MLXSW_SP_MR_ROUTE_ACTION_FORWARD:
+ /* If we are about to append a multicast router action, commit
+ * the erif_list.
+ */
+ err = mlxsw_sp_mr_erif_list_commit(mlxsw_sp, erif_list);
+ if (err)
+ goto err;
+
+ err = mlxsw_afa_block_append_mcrouter(afa_block, irif_index,
+ min_mtu, false,
+ erif_list->kvdl_index);
+ if (err)
+ goto err;
+
+ if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD) {
+ err = mlxsw_afa_block_append_trap_and_forward(afa_block,
+ MLXSW_TRAP_ID_ACL2);
+ if (err)
+ goto err;
+ }
+ break;
+ default:
+ err = -EINVAL;
+ goto err;
+ }
+
+ err = mlxsw_afa_block_commit(afa_block);
+ if (err)
+ goto err;
+ return afa_block;
+err:
+ mlxsw_afa_block_destroy(afa_block);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_mr_tcam_afa_block_destroy(struct mlxsw_afa_block *afa_block)
+{
+ mlxsw_afa_block_destroy(afa_block);
+}
+
+static int
+mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mr_tcam_erif_list *erif_list,
+ struct mlxsw_sp_mr_route_info *route_info)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < route_info->erif_num; i++) {
+ u16 erif_index = route_info->erif_indices[i];
+
+ err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, erif_list,
+ erif_index);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int
+mlxsw_sp_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv,
+ void *route_priv,
+ struct mlxsw_sp_mr_route_params *route_params)
+{
+ const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+ struct mlxsw_sp_mr_tcam *mr_tcam = priv;
+ int err;
+
+ route->key = route_params->key;
+ route->irif_index = route_params->value.irif_index;
+ route->min_mtu = route_params->value.min_mtu;
+ route->action = route_params->value.route_action;
+
+ /* Create the egress RIFs list */
+ mlxsw_sp_mr_erif_list_init(&route->erif_list);
+ err = mlxsw_sp_mr_tcam_erif_populate(mlxsw_sp, &route->erif_list,
+ &route_params->value);
+ if (err)
+ goto err_erif_populate;
+
+ /* Create the flow counter */
+ err = mlxsw_sp_flow_counter_alloc(mlxsw_sp, &route->counter_index);
+ if (err)
+ goto err_counter_alloc;
+
+ /* Create the flexible action block */
+ route->afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp,
+ route->action,
+ route->irif_index,
+ route->counter_index,
+ route->min_mtu,
+ &route->erif_list);
+ if (IS_ERR(route->afa_block)) {
+ err = PTR_ERR(route->afa_block);
+ goto err_afa_block_create;
+ }
+
+ route->priv = kzalloc(ops->route_priv_size, GFP_KERNEL);
+ if (!route->priv) {
+ err = -ENOMEM;
+ goto err_route_priv_alloc;
+ }
+
+ /* Write the route to the TCAM */
+ err = ops->route_create(mlxsw_sp, mr_tcam->priv, route->priv,
+ &route->key, route->afa_block,
+ route_params->prio);
+ if (err)
+ goto err_route_create;
+ return 0;
+
+err_route_create:
+ kfree(route->priv);
+err_route_priv_alloc:
+ mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+err_afa_block_create:
+ mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index);
+err_erif_populate:
+err_counter_alloc:
+ mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list);
+ return err;
+}
+
+static void mlxsw_sp_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp,
+ void *priv, void *route_priv)
+{
+ const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+ struct mlxsw_sp_mr_tcam *mr_tcam = priv;
+
+ ops->route_destroy(mlxsw_sp, mr_tcam->priv, route->priv, &route->key);
+ kfree(route->priv);
+ mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+ mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index);
+ mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list);
+}
+
+static int mlxsw_sp_mr_tcam_route_stats(struct mlxsw_sp *mlxsw_sp,
+ void *route_priv, u64 *packets,
+ u64 *bytes)
+{
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+
+ return mlxsw_sp_flow_counter_get(mlxsw_sp, route->counter_index,
+ packets, bytes);
+}
+
+static int
+mlxsw_sp_mr_tcam_route_action_update(struct mlxsw_sp *mlxsw_sp,
+ void *route_priv,
+ enum mlxsw_sp_mr_route_action route_action)
+{
+ const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+ struct mlxsw_afa_block *afa_block;
+ int err;
+
+ /* Create a new flexible action block */
+ afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, route_action,
+ route->irif_index,
+ route->counter_index,
+ route->min_mtu,
+ &route->erif_list);
+ if (IS_ERR(afa_block))
+ return PTR_ERR(afa_block);
+
+ /* Update the TCAM route entry */
+ err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block);
+ if (err)
+ goto err;
+
+ /* Delete the old one */
+ mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+ route->afa_block = afa_block;
+ route->action = route_action;
+ return 0;
+err:
+ mlxsw_sp_mr_tcam_afa_block_destroy(afa_block);
+ return err;
+}
+
+static int mlxsw_sp_mr_tcam_route_min_mtu_update(struct mlxsw_sp *mlxsw_sp,
+ void *route_priv, u16 min_mtu)
+{
+ const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+ struct mlxsw_afa_block *afa_block;
+ int err;
+
+ /* Create a new flexible action block */
+ afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp,
+ route->action,
+ route->irif_index,
+ route->counter_index,
+ min_mtu,
+ &route->erif_list);
+ if (IS_ERR(afa_block))
+ return PTR_ERR(afa_block);
+
+ /* Update the TCAM route entry */
+ err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block);
+ if (err)
+ goto err;
+
+ /* Delete the old one */
+ mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+ route->afa_block = afa_block;
+ route->min_mtu = min_mtu;
+ return 0;
+err:
+ mlxsw_sp_mr_tcam_afa_block_destroy(afa_block);
+ return err;
+}
+
+static int mlxsw_sp_mr_tcam_route_irif_update(struct mlxsw_sp *mlxsw_sp,
+ void *route_priv, u16 irif_index)
+{
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+
+ if (route->action != MLXSW_SP_MR_ROUTE_ACTION_TRAP)
+ return -EINVAL;
+ route->irif_index = irif_index;
+ return 0;
+}
+
+static int mlxsw_sp_mr_tcam_route_erif_add(struct mlxsw_sp *mlxsw_sp,
+ void *route_priv, u16 erif_index)
+{
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+ int err;
+
+ err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, &route->erif_list,
+ erif_index);
+ if (err)
+ return err;
+
+ /* Commit the action only if the route action is not TRAP */
+ if (route->action != MLXSW_SP_MR_ROUTE_ACTION_TRAP)
+ return mlxsw_sp_mr_erif_list_commit(mlxsw_sp,
+ &route->erif_list);
+ return 0;
+}
+
+static int mlxsw_sp_mr_tcam_route_erif_del(struct mlxsw_sp *mlxsw_sp,
+ void *route_priv, u16 erif_index)
+{
+ const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+ struct mlxsw_sp_mr_erif_sublist *erif_sublist;
+ struct mlxsw_sp_mr_tcam_erif_list erif_list;
+ struct mlxsw_afa_block *afa_block;
+ int err;
+ int i;
+
+ /* Create a copy of the original erif_list without the deleted entry */
+ mlxsw_sp_mr_erif_list_init(&erif_list);
+ list_for_each_entry(erif_sublist, &route->erif_list.erif_sublists, list) {
+ for (i = 0; i < erif_sublist->num_erifs; i++) {
+ u16 curr_erif = erif_sublist->erif_indices[i];
+
+ if (curr_erif == erif_index)
+ continue;
+ err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, &erif_list,
+ curr_erif);
+ if (err)
+ goto err_erif_list_add;
+ }
+ }
+
+ /* Create the flexible action block pointing to the new erif_list */
+ afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, route->action,
+ route->irif_index,
+ route->counter_index,
+ route->min_mtu,
+ &erif_list);
+ if (IS_ERR(afa_block)) {
+ err = PTR_ERR(afa_block);
+ goto err_afa_block_create;
+ }
+
+ /* Update the TCAM route entry */
+ err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block);
+ if (err)
+ goto err_route_write;
+
+ mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+ mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list);
+ route->afa_block = afa_block;
+ mlxsw_sp_mr_erif_list_move(&route->erif_list, &erif_list);
+ return 0;
+
+err_route_write:
+ mlxsw_sp_mr_tcam_afa_block_destroy(afa_block);
+err_afa_block_create:
+err_erif_list_add:
+ mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &erif_list);
+ return err;
+}
+
+static int
+mlxsw_sp_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+ struct mlxsw_sp_mr_route_info *route_info)
+{
+ const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+ struct mlxsw_sp_mr_tcam_erif_list erif_list;
+ struct mlxsw_afa_block *afa_block;
+ int err;
+
+ /* Create a new erif_list */
+ mlxsw_sp_mr_erif_list_init(&erif_list);
+ err = mlxsw_sp_mr_tcam_erif_populate(mlxsw_sp, &erif_list, route_info);
+ if (err)
+ goto err_erif_populate;
+
+ /* Create the flexible action block pointing to the new erif_list */
+ afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp,
+ route_info->route_action,
+ route_info->irif_index,
+ route->counter_index,
+ route_info->min_mtu,
+ &erif_list);
+ if (IS_ERR(afa_block)) {
+ err = PTR_ERR(afa_block);
+ goto err_afa_block_create;
+ }
+
+ /* Update the TCAM route entry */
+ err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block);
+ if (err)
+ goto err_route_write;
+
+ mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+ mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list);
+ route->afa_block = afa_block;
+ mlxsw_sp_mr_erif_list_move(&route->erif_list, &erif_list);
+ route->action = route_info->route_action;
+ route->irif_index = route_info->irif_index;
+ route->min_mtu = route_info->min_mtu;
+ return 0;
+
+err_route_write:
+ mlxsw_sp_mr_tcam_afa_block_destroy(afa_block);
+err_afa_block_create:
+err_erif_populate:
+ mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &erif_list);
+ return err;
+}
+
+static int mlxsw_sp_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+ const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
+ struct mlxsw_sp_mr_tcam *mr_tcam = priv;
+ int err;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES))
+ return -EIO;
+
+ mr_tcam->priv = kzalloc(ops->priv_size, GFP_KERNEL);
+ if (!mr_tcam->priv)
+ return -ENOMEM;
+
+ err = ops->init(mlxsw_sp, mr_tcam->priv);
+ if (err)
+ goto err_init;
+ return 0;
+
+err_init:
+ kfree(mr_tcam->priv);
+ return err;
+}
+
+static void mlxsw_sp_mr_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+ const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
+ struct mlxsw_sp_mr_tcam *mr_tcam = priv;
+
+ ops->fini(mr_tcam->priv);
+ kfree(mr_tcam->priv);
+}
+
+const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops = {
+ .priv_size = sizeof(struct mlxsw_sp_mr_tcam),
+ .route_priv_size = sizeof(struct mlxsw_sp_mr_tcam_route),
+ .init = mlxsw_sp_mr_tcam_init,
+ .route_create = mlxsw_sp_mr_tcam_route_create,
+ .route_update = mlxsw_sp_mr_tcam_route_update,
+ .route_stats = mlxsw_sp_mr_tcam_route_stats,
+ .route_action_update = mlxsw_sp_mr_tcam_route_action_update,
+ .route_min_mtu_update = mlxsw_sp_mr_tcam_route_min_mtu_update,
+ .route_irif_update = mlxsw_sp_mr_tcam_route_irif_update,
+ .route_erif_add = mlxsw_sp_mr_tcam_route_erif_add,
+ .route_erif_del = mlxsw_sp_mr_tcam_route_erif_del,
+ .route_destroy = mlxsw_sp_mr_tcam_route_destroy,
+ .fini = mlxsw_sp_mr_tcam_fini,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h
new file mode 100644
index 000000000..3c84151b4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_SPECTRUM_MCROUTER_TCAM_H
+#define _MLXSW_SPECTRUM_MCROUTER_TCAM_H
+
+#include "spectrum.h"
+#include "spectrum_mr.h"
+
+extern const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops;
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
new file mode 100644
index 000000000..dc63583c4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -0,0 +1,757 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <net/pkt_cls.h>
+#include <net/red.h>
+
+#include "spectrum.h"
+#include "reg.h"
+
+#define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1)
+#define MLXSW_SP_PRIO_CHILD_TO_TCLASS(child) \
+ MLXSW_SP_PRIO_BAND_TO_TCLASS((child - 1))
+
+enum mlxsw_sp_qdisc_type {
+ MLXSW_SP_QDISC_NO_QDISC,
+ MLXSW_SP_QDISC_RED,
+ MLXSW_SP_QDISC_PRIO,
+};
+
+struct mlxsw_sp_qdisc_ops {
+ enum mlxsw_sp_qdisc_type type;
+ int (*check_params)(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ void *params);
+ int (*replace)(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params);
+ int (*destroy)(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc);
+ int (*get_stats)(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ struct tc_qopt_offload_stats *stats_ptr);
+ int (*get_xstats)(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ void *xstats_ptr);
+ void (*clean_stats)(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc);
+ /* unoffload - to be used for a qdisc that stops being offloaded without
+ * being destroyed.
+ */
+ void (*unoffload)(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params);
+};
+
+struct mlxsw_sp_qdisc {
+ u32 handle;
+ u8 tclass_num;
+ u8 prio_bitmap;
+ union {
+ struct red_stats red;
+ } xstats_base;
+ struct mlxsw_sp_qdisc_stats {
+ u64 tx_bytes;
+ u64 tx_packets;
+ u64 drops;
+ u64 overlimits;
+ u64 backlog;
+ } stats_base;
+
+ struct mlxsw_sp_qdisc_ops *ops;
+};
+
+static bool
+mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle,
+ enum mlxsw_sp_qdisc_type type)
+{
+ return mlxsw_sp_qdisc && mlxsw_sp_qdisc->ops &&
+ mlxsw_sp_qdisc->ops->type == type &&
+ mlxsw_sp_qdisc->handle == handle;
+}
+
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent,
+ bool root_only)
+{
+ int tclass, child_index;
+
+ if (parent == TC_H_ROOT)
+ return mlxsw_sp_port->root_qdisc;
+
+ if (root_only || !mlxsw_sp_port->root_qdisc ||
+ !mlxsw_sp_port->root_qdisc->ops ||
+ TC_H_MAJ(parent) != mlxsw_sp_port->root_qdisc->handle ||
+ TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS)
+ return NULL;
+
+ child_index = TC_H_MIN(parent);
+ tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index);
+ return &mlxsw_sp_port->tclass_qdiscs[tclass];
+}
+
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle)
+{
+ int i;
+
+ if (mlxsw_sp_port->root_qdisc->handle == handle)
+ return mlxsw_sp_port->root_qdisc;
+
+ if (mlxsw_sp_port->root_qdisc->handle == TC_H_UNSPEC)
+ return NULL;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ if (mlxsw_sp_port->tclass_qdiscs[i].handle == handle)
+ return &mlxsw_sp_port->tclass_qdiscs[i];
+
+ return NULL;
+}
+
+static int
+mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
+{
+ int err = 0;
+
+ if (!mlxsw_sp_qdisc)
+ return 0;
+
+ if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->destroy)
+ err = mlxsw_sp_qdisc->ops->destroy(mlxsw_sp_port,
+ mlxsw_sp_qdisc);
+
+ mlxsw_sp_qdisc->handle = TC_H_UNSPEC;
+ mlxsw_sp_qdisc->ops = NULL;
+ return err;
+}
+
+static int
+mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ struct mlxsw_sp_qdisc_ops *ops, void *params)
+{
+ int err;
+
+ if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->type != ops->type)
+ /* In case this location contained a different qdisc of the
+ * same type we can override the old qdisc configuration.
+ * Otherwise, we need to remove the old qdisc before setting the
+ * new one.
+ */
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
+ err = ops->check_params(mlxsw_sp_port, mlxsw_sp_qdisc, params);
+ if (err)
+ goto err_bad_param;
+
+ err = ops->replace(mlxsw_sp_port, mlxsw_sp_qdisc, params);
+ if (err)
+ goto err_config;
+
+ if (mlxsw_sp_qdisc->handle != handle) {
+ mlxsw_sp_qdisc->ops = ops;
+ if (ops->clean_stats)
+ ops->clean_stats(mlxsw_sp_port, mlxsw_sp_qdisc);
+ }
+
+ mlxsw_sp_qdisc->handle = handle;
+ return 0;
+
+err_bad_param:
+err_config:
+ if (mlxsw_sp_qdisc->handle == handle && ops->unoffload)
+ ops->unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, params);
+
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
+ return err;
+}
+
+static int
+mlxsw_sp_qdisc_get_stats(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ struct tc_qopt_offload_stats *stats_ptr)
+{
+ if (mlxsw_sp_qdisc && mlxsw_sp_qdisc->ops &&
+ mlxsw_sp_qdisc->ops->get_stats)
+ return mlxsw_sp_qdisc->ops->get_stats(mlxsw_sp_port,
+ mlxsw_sp_qdisc,
+ stats_ptr);
+
+ return -EOPNOTSUPP;
+}
+
+static int
+mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ void *xstats_ptr)
+{
+ if (mlxsw_sp_qdisc && mlxsw_sp_qdisc->ops &&
+ mlxsw_sp_qdisc->ops->get_xstats)
+ return mlxsw_sp_qdisc->ops->get_xstats(mlxsw_sp_port,
+ mlxsw_sp_qdisc,
+ xstats_ptr);
+
+ return -EOPNOTSUPP;
+}
+
+static u64
+mlxsw_sp_xstats_backlog(struct mlxsw_sp_port_xstats *xstats, int tclass_num)
+{
+ return xstats->backlog[tclass_num] +
+ xstats->backlog[tclass_num + 8];
+}
+
+static u64
+mlxsw_sp_xstats_tail_drop(struct mlxsw_sp_port_xstats *xstats, int tclass_num)
+{
+ return xstats->tail_drop[tclass_num] +
+ xstats->tail_drop[tclass_num + 8];
+}
+
+static void
+mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats,
+ u8 prio_bitmap, u64 *tx_packets,
+ u64 *tx_bytes)
+{
+ int i;
+
+ *tx_packets = 0;
+ *tx_bytes = 0;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (prio_bitmap & BIT(i)) {
+ *tx_packets += xstats->tx_packets[i];
+ *tx_bytes += xstats->tx_bytes[i];
+ }
+ }
+}
+
+static int
+mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
+ int tclass_num, u32 min, u32 max,
+ u32 probability, bool is_ecn)
+{
+ char cwtpm_cmd[MLXSW_REG_CWTPM_LEN];
+ char cwtp_cmd[MLXSW_REG_CWTP_LEN];
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ int err;
+
+ mlxsw_reg_cwtp_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num);
+ mlxsw_reg_cwtp_profile_pack(cwtp_cmd, MLXSW_REG_CWTP_DEFAULT_PROFILE,
+ roundup(min, MLXSW_REG_CWTP_MIN_VALUE),
+ roundup(max, MLXSW_REG_CWTP_MIN_VALUE),
+ probability);
+
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtp), cwtp_cmd);
+ if (err)
+ return err;
+
+ mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num,
+ MLXSW_REG_CWTP_DEFAULT_PROFILE, true, is_ecn);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd);
+}
+
+static int
+mlxsw_sp_tclass_congestion_disable(struct mlxsw_sp_port *mlxsw_sp_port,
+ int tclass_num)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char cwtpm_cmd[MLXSW_REG_CWTPM_LEN];
+
+ mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num,
+ MLXSW_REG_CWTPM_RESET_PROFILE, false, false);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd);
+}
+
+static void
+mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
+{
+ u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
+ struct mlxsw_sp_qdisc_stats *stats_base;
+ struct mlxsw_sp_port_xstats *xstats;
+ struct red_stats *red_base;
+
+ xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
+ stats_base = &mlxsw_sp_qdisc->stats_base;
+ red_base = &mlxsw_sp_qdisc->xstats_base.red;
+
+ mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+ mlxsw_sp_qdisc->prio_bitmap,
+ &stats_base->tx_packets,
+ &stats_base->tx_bytes);
+ red_base->prob_mark = xstats->ecn;
+ red_base->prob_drop = xstats->wred_drop[tclass_num];
+ red_base->pdrop = mlxsw_sp_xstats_tail_drop(xstats, tclass_num);
+
+ stats_base->overlimits = red_base->prob_drop + red_base->prob_mark;
+ stats_base->drops = red_base->prob_drop + red_base->pdrop;
+
+ stats_base->backlog = 0;
+}
+
+static int
+mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
+{
+ struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc;
+
+ if (root_qdisc != mlxsw_sp_qdisc)
+ root_qdisc->stats_base.backlog -=
+ mlxsw_sp_qdisc->stats_base.backlog;
+
+ return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port,
+ mlxsw_sp_qdisc->tclass_num);
+}
+
+static int
+mlxsw_sp_qdisc_red_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ void *params)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct tc_red_qopt_offload_params *p = params;
+
+ if (p->min > p->max) {
+ dev_err(mlxsw_sp->bus_info->dev,
+ "spectrum: RED: min %u is bigger then max %u\n", p->min,
+ p->max);
+ return -EINVAL;
+ }
+ if (p->max > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) {
+ dev_err(mlxsw_sp->bus_info->dev,
+ "spectrum: RED: max value %u is too big\n", p->max);
+ return -EINVAL;
+ }
+ if (p->min == 0 || p->max == 0) {
+ dev_err(mlxsw_sp->bus_info->dev,
+ "spectrum: RED: 0 value is illegal for min and max\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int
+mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ void *params)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct tc_red_qopt_offload_params *p = params;
+ u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
+ u32 min, max;
+ u64 prob;
+
+ /* calculate probability in percentage */
+ prob = p->probability;
+ prob *= 100;
+ prob = DIV_ROUND_UP(prob, 1 << 16);
+ prob = DIV_ROUND_UP(prob, 1 << 16);
+ min = mlxsw_sp_bytes_cells(mlxsw_sp, p->min);
+ max = mlxsw_sp_bytes_cells(mlxsw_sp, p->max);
+ return mlxsw_sp_tclass_congestion_enable(mlxsw_sp_port, tclass_num, min,
+ max, prob, p->is_ecn);
+}
+
+static void
+mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ void *params)
+{
+ struct tc_red_qopt_offload_params *p = params;
+ u64 backlog;
+
+ backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
+ mlxsw_sp_qdisc->stats_base.backlog);
+ p->qstats->backlog -= backlog;
+ mlxsw_sp_qdisc->stats_base.backlog = 0;
+}
+
+static int
+mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ void *xstats_ptr)
+{
+ struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base.red;
+ u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
+ struct mlxsw_sp_port_xstats *xstats;
+ struct red_stats *res = xstats_ptr;
+ int early_drops, marks, pdrops;
+
+ xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
+
+ early_drops = xstats->wred_drop[tclass_num] - xstats_base->prob_drop;
+ marks = xstats->ecn - xstats_base->prob_mark;
+ pdrops = mlxsw_sp_xstats_tail_drop(xstats, tclass_num) -
+ xstats_base->pdrop;
+
+ res->pdrop += pdrops;
+ res->prob_drop += early_drops;
+ res->prob_mark += marks;
+
+ xstats_base->pdrop += pdrops;
+ xstats_base->prob_drop += early_drops;
+ xstats_base->prob_mark += marks;
+ return 0;
+}
+
+static int
+mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ struct tc_qopt_offload_stats *stats_ptr)
+{
+ u64 tx_bytes, tx_packets, overlimits, drops, backlog;
+ u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
+ struct mlxsw_sp_qdisc_stats *stats_base;
+ struct mlxsw_sp_port_xstats *xstats;
+
+ xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
+ stats_base = &mlxsw_sp_qdisc->stats_base;
+
+ mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+ mlxsw_sp_qdisc->prio_bitmap,
+ &tx_packets, &tx_bytes);
+ tx_bytes = tx_bytes - stats_base->tx_bytes;
+ tx_packets = tx_packets - stats_base->tx_packets;
+
+ overlimits = xstats->wred_drop[tclass_num] + xstats->ecn -
+ stats_base->overlimits;
+ drops = xstats->wred_drop[tclass_num] +
+ mlxsw_sp_xstats_tail_drop(xstats, tclass_num) -
+ stats_base->drops;
+ backlog = mlxsw_sp_xstats_backlog(xstats, tclass_num);
+
+ _bstats_update(stats_ptr->bstats, tx_bytes, tx_packets);
+ stats_ptr->qstats->overlimits += overlimits;
+ stats_ptr->qstats->drops += drops;
+ stats_ptr->qstats->backlog +=
+ mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
+ backlog) -
+ mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
+ stats_base->backlog);
+
+ stats_base->backlog = backlog;
+ stats_base->drops += drops;
+ stats_base->overlimits += overlimits;
+ stats_base->tx_bytes += tx_bytes;
+ stats_base->tx_packets += tx_packets;
+ return 0;
+}
+
+#define MLXSW_SP_PORT_DEFAULT_TCLASS 0
+
+static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_red = {
+ .type = MLXSW_SP_QDISC_RED,
+ .check_params = mlxsw_sp_qdisc_red_check_params,
+ .replace = mlxsw_sp_qdisc_red_replace,
+ .unoffload = mlxsw_sp_qdisc_red_unoffload,
+ .destroy = mlxsw_sp_qdisc_red_destroy,
+ .get_stats = mlxsw_sp_qdisc_get_red_stats,
+ .get_xstats = mlxsw_sp_qdisc_get_red_xstats,
+ .clean_stats = mlxsw_sp_setup_tc_qdisc_red_clean_stats,
+};
+
+int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct tc_red_qopt_offload *p)
+{
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+
+ mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
+ if (!mlxsw_sp_qdisc)
+ return -EOPNOTSUPP;
+
+ if (p->command == TC_RED_REPLACE)
+ return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
+ mlxsw_sp_qdisc,
+ &mlxsw_sp_qdisc_ops_red,
+ &p->set);
+
+ if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
+ MLXSW_SP_QDISC_RED))
+ return -EOPNOTSUPP;
+
+ switch (p->command) {
+ case TC_RED_DESTROY:
+ return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
+ case TC_RED_XSTATS:
+ return mlxsw_sp_qdisc_get_xstats(mlxsw_sp_port, mlxsw_sp_qdisc,
+ p->xstats);
+ case TC_RED_STATS:
+ return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
+ &p->stats);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
+{
+ int i;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
+ MLXSW_SP_PORT_DEFAULT_TCLASS);
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+ &mlxsw_sp_port->tclass_qdiscs[i]);
+ mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0;
+ }
+
+ return 0;
+}
+
+static int
+mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ void *params)
+{
+ struct tc_prio_qopt_offload_params *p = params;
+
+ if (p->bands > IEEE_8021QAZ_MAX_TCS)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int
+mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ void *params)
+{
+ struct tc_prio_qopt_offload_params *p = params;
+ struct mlxsw_sp_qdisc *child_qdisc;
+ int tclass, i, band, backlog;
+ u8 old_priomap;
+ int err;
+
+ for (band = 0; band < p->bands; band++) {
+ tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+ child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+ old_priomap = child_qdisc->prio_bitmap;
+ child_qdisc->prio_bitmap = 0;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (p->priomap[i] == band) {
+ child_qdisc->prio_bitmap |= BIT(i);
+ if (BIT(i) & old_priomap)
+ continue;
+ err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port,
+ i, tclass);
+ if (err)
+ return err;
+ }
+ }
+ if (old_priomap != child_qdisc->prio_bitmap &&
+ child_qdisc->ops && child_qdisc->ops->clean_stats) {
+ backlog = child_qdisc->stats_base.backlog;
+ child_qdisc->ops->clean_stats(mlxsw_sp_port,
+ child_qdisc);
+ child_qdisc->stats_base.backlog = backlog;
+ }
+ }
+ for (; band < IEEE_8021QAZ_MAX_TCS; band++) {
+ tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+ child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+ child_qdisc->prio_bitmap = 0;
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc);
+ }
+ return 0;
+}
+
+static void
+mlxsw_sp_qdisc_prio_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ void *params)
+{
+ struct tc_prio_qopt_offload_params *p = params;
+ u64 backlog;
+
+ backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
+ mlxsw_sp_qdisc->stats_base.backlog);
+ p->qstats->backlog -= backlog;
+}
+
+static int
+mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ struct tc_qopt_offload_stats *stats_ptr)
+{
+ u64 tx_bytes, tx_packets, drops = 0, backlog = 0;
+ struct mlxsw_sp_qdisc_stats *stats_base;
+ struct mlxsw_sp_port_xstats *xstats;
+ struct rtnl_link_stats64 *stats;
+ int i;
+
+ xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
+ stats = &mlxsw_sp_port->periodic_hw_stats.stats;
+ stats_base = &mlxsw_sp_qdisc->stats_base;
+
+ tx_bytes = stats->tx_bytes - stats_base->tx_bytes;
+ tx_packets = stats->tx_packets - stats_base->tx_packets;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ drops += mlxsw_sp_xstats_tail_drop(xstats, i);
+ drops += xstats->wred_drop[i];
+ backlog += mlxsw_sp_xstats_backlog(xstats, i);
+ }
+ drops = drops - stats_base->drops;
+
+ _bstats_update(stats_ptr->bstats, tx_bytes, tx_packets);
+ stats_ptr->qstats->drops += drops;
+ stats_ptr->qstats->backlog +=
+ mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
+ backlog) -
+ mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
+ stats_base->backlog);
+ stats_base->backlog = backlog;
+ stats_base->drops += drops;
+ stats_base->tx_bytes += tx_bytes;
+ stats_base->tx_packets += tx_packets;
+ return 0;
+}
+
+static void
+mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
+{
+ struct mlxsw_sp_qdisc_stats *stats_base;
+ struct mlxsw_sp_port_xstats *xstats;
+ struct rtnl_link_stats64 *stats;
+ int i;
+
+ xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
+ stats = &mlxsw_sp_port->periodic_hw_stats.stats;
+ stats_base = &mlxsw_sp_qdisc->stats_base;
+
+ stats_base->tx_packets = stats->tx_packets;
+ stats_base->tx_bytes = stats->tx_bytes;
+
+ stats_base->drops = 0;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ stats_base->drops += mlxsw_sp_xstats_tail_drop(xstats, i);
+ stats_base->drops += xstats->wred_drop[i];
+ }
+
+ mlxsw_sp_qdisc->stats_base.backlog = 0;
+}
+
+static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
+ .type = MLXSW_SP_QDISC_PRIO,
+ .check_params = mlxsw_sp_qdisc_prio_check_params,
+ .replace = mlxsw_sp_qdisc_prio_replace,
+ .unoffload = mlxsw_sp_qdisc_prio_unoffload,
+ .destroy = mlxsw_sp_qdisc_prio_destroy,
+ .get_stats = mlxsw_sp_qdisc_get_prio_stats,
+ .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
+};
+
+/* Grafting is not supported in mlxsw. It will result in un-offloading of the
+ * grafted qdisc as well as the qdisc in the qdisc new location.
+ * (However, if the graft is to the location where the qdisc is already at, it
+ * will be ignored completely and won't cause un-offloading).
+ */
+static int
+mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ struct tc_prio_qopt_offload_graft_params *p)
+{
+ int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band);
+ struct mlxsw_sp_qdisc *old_qdisc;
+
+ /* Check if the grafted qdisc is already in its "new" location. If so -
+ * nothing needs to be done.
+ */
+ if (p->band < IEEE_8021QAZ_MAX_TCS &&
+ mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle)
+ return 0;
+
+ if (!p->child_handle) {
+ /* This is an invisible FIFO replacing the original Qdisc.
+ * Ignore it--the original Qdisc's destroy will follow.
+ */
+ return 0;
+ }
+
+ /* See if the grafted qdisc is already offloaded on any tclass. If so,
+ * unoffload it.
+ */
+ old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port,
+ p->child_handle);
+ if (old_qdisc)
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
+
+ mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+ &mlxsw_sp_port->tclass_qdiscs[tclass_num]);
+ return -EOPNOTSUPP;
+}
+
+int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct tc_prio_qopt_offload *p)
+{
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+
+ mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true);
+ if (!mlxsw_sp_qdisc)
+ return -EOPNOTSUPP;
+
+ if (p->command == TC_PRIO_REPLACE)
+ return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
+ mlxsw_sp_qdisc,
+ &mlxsw_sp_qdisc_ops_prio,
+ &p->replace_params);
+
+ if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
+ MLXSW_SP_QDISC_PRIO))
+ return -EOPNOTSUPP;
+
+ switch (p->command) {
+ case TC_PRIO_DESTROY:
+ return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
+ case TC_PRIO_STATS:
+ return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
+ &p->stats);
+ case TC_PRIO_GRAFT:
+ return mlxsw_sp_qdisc_prio_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
+ &p->graft_params);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+ int i;
+
+ mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc), GFP_KERNEL);
+ if (!mlxsw_sp_qdisc)
+ goto err_root_qdisc_init;
+
+ mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc;
+ mlxsw_sp_port->root_qdisc->prio_bitmap = 0xff;
+ mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
+
+ mlxsw_sp_qdisc = kcalloc(IEEE_8021QAZ_MAX_TCS,
+ sizeof(*mlxsw_sp_qdisc),
+ GFP_KERNEL);
+ if (!mlxsw_sp_qdisc)
+ goto err_tclass_qdiscs_init;
+
+ mlxsw_sp_port->tclass_qdiscs = mlxsw_sp_qdisc;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ mlxsw_sp_port->tclass_qdiscs[i].tclass_num = i;
+
+ return 0;
+
+err_tclass_qdiscs_init:
+ kfree(mlxsw_sp_port->root_qdisc);
+err_root_qdisc_init:
+ return -ENOMEM;
+}
+
+void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ kfree(mlxsw_sp_port->tclass_qdiscs);
+ kfree(mlxsw_sp_port->root_qdisc);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
new file mode 100644
index 000000000..93d662de1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -0,0 +1,7572 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/rhashtable.h>
+#include <linux/bitops.h>
+#include <linux/in6.h>
+#include <linux/notifier.h>
+#include <linux/inetdevice.h>
+#include <linux/netdevice.h>
+#include <linux/if_bridge.h>
+#include <linux/socket.h>
+#include <linux/route.h>
+#include <linux/gcd.h>
+#include <linux/random.h>
+#include <linux/if_macvlan.h>
+#include <net/netevent.h>
+#include <net/neighbour.h>
+#include <net/arp.h>
+#include <net/ip_fib.h>
+#include <net/ip6_fib.h>
+#include <net/fib_rules.h>
+#include <net/ip_tunnels.h>
+#include <net/l3mdev.h>
+#include <net/addrconf.h>
+#include <net/ndisc.h>
+#include <net/ipv6.h>
+#include <net/fib_notifier.h>
+#include <net/switchdev.h>
+
+#include "spectrum.h"
+#include "core.h"
+#include "reg.h"
+#include "spectrum_cnt.h"
+#include "spectrum_dpipe.h"
+#include "spectrum_ipip.h"
+#include "spectrum_mr.h"
+#include "spectrum_mr_tcam.h"
+#include "spectrum_router.h"
+#include "spectrum_span.h"
+
+struct mlxsw_sp_fib;
+struct mlxsw_sp_vr;
+struct mlxsw_sp_lpm_tree;
+struct mlxsw_sp_rif_ops;
+
+struct mlxsw_sp_router {
+ struct mlxsw_sp *mlxsw_sp;
+ struct mlxsw_sp_rif **rifs;
+ struct mlxsw_sp_vr *vrs;
+ struct rhashtable neigh_ht;
+ struct rhashtable nexthop_group_ht;
+ struct rhashtable nexthop_ht;
+ struct list_head nexthop_list;
+ struct {
+ /* One tree for each protocol: IPv4 and IPv6 */
+ struct mlxsw_sp_lpm_tree *proto_trees[2];
+ struct mlxsw_sp_lpm_tree *trees;
+ unsigned int tree_count;
+ } lpm;
+ struct {
+ struct delayed_work dw;
+ unsigned long interval; /* ms */
+ } neighs_update;
+ struct delayed_work nexthop_probe_dw;
+#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
+ struct list_head nexthop_neighs_list;
+ struct list_head ipip_list;
+ bool aborted;
+ struct notifier_block fib_nb;
+ struct notifier_block netevent_nb;
+ const struct mlxsw_sp_rif_ops **rif_ops_arr;
+ const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
+};
+
+struct mlxsw_sp_rif {
+ struct list_head nexthop_list;
+ struct list_head neigh_list;
+ struct net_device *dev;
+ struct mlxsw_sp_fid *fid;
+ unsigned char addr[ETH_ALEN];
+ int mtu;
+ u16 rif_index;
+ u16 vr_id;
+ const struct mlxsw_sp_rif_ops *ops;
+ struct mlxsw_sp *mlxsw_sp;
+
+ unsigned int counter_ingress;
+ bool counter_ingress_valid;
+ unsigned int counter_egress;
+ bool counter_egress_valid;
+};
+
+struct mlxsw_sp_rif_params {
+ struct net_device *dev;
+ union {
+ u16 system_port;
+ u16 lag_id;
+ };
+ u16 vid;
+ bool lag;
+};
+
+struct mlxsw_sp_rif_subport {
+ struct mlxsw_sp_rif common;
+ union {
+ u16 system_port;
+ u16 lag_id;
+ };
+ u16 vid;
+ bool lag;
+};
+
+struct mlxsw_sp_rif_ipip_lb {
+ struct mlxsw_sp_rif common;
+ struct mlxsw_sp_rif_ipip_lb_config lb_config;
+ u16 ul_vr_id; /* Reserved for Spectrum-2. */
+};
+
+struct mlxsw_sp_rif_params_ipip_lb {
+ struct mlxsw_sp_rif_params common;
+ struct mlxsw_sp_rif_ipip_lb_config lb_config;
+};
+
+struct mlxsw_sp_rif_ops {
+ enum mlxsw_sp_rif_type type;
+ size_t rif_size;
+
+ void (*setup)(struct mlxsw_sp_rif *rif,
+ const struct mlxsw_sp_rif_params *params);
+ int (*configure)(struct mlxsw_sp_rif *rif);
+ void (*deconfigure)(struct mlxsw_sp_rif *rif);
+ struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
+ struct netlink_ext_ack *extack);
+ void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
+};
+
+static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
+static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_lpm_tree *lpm_tree);
+static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_fib *fib,
+ u8 tree_id);
+static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_fib *fib);
+
+static unsigned int *
+mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
+ enum mlxsw_sp_rif_counter_dir dir)
+{
+ switch (dir) {
+ case MLXSW_SP_RIF_COUNTER_EGRESS:
+ return &rif->counter_egress;
+ case MLXSW_SP_RIF_COUNTER_INGRESS:
+ return &rif->counter_ingress;
+ }
+ return NULL;
+}
+
+static bool
+mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
+ enum mlxsw_sp_rif_counter_dir dir)
+{
+ switch (dir) {
+ case MLXSW_SP_RIF_COUNTER_EGRESS:
+ return rif->counter_egress_valid;
+ case MLXSW_SP_RIF_COUNTER_INGRESS:
+ return rif->counter_ingress_valid;
+ }
+ return false;
+}
+
+static void
+mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
+ enum mlxsw_sp_rif_counter_dir dir,
+ bool valid)
+{
+ switch (dir) {
+ case MLXSW_SP_RIF_COUNTER_EGRESS:
+ rif->counter_egress_valid = valid;
+ break;
+ case MLXSW_SP_RIF_COUNTER_INGRESS:
+ rif->counter_ingress_valid = valid;
+ break;
+ }
+}
+
+static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
+ unsigned int counter_index, bool enable,
+ enum mlxsw_sp_rif_counter_dir dir)
+{
+ char ritr_pl[MLXSW_REG_RITR_LEN];
+ bool is_egress = false;
+ int err;
+
+ if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
+ is_egress = true;
+ mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
+ is_egress);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_rif *rif,
+ enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
+{
+ char ricnt_pl[MLXSW_REG_RICNT_LEN];
+ unsigned int *p_counter_index;
+ bool valid;
+ int err;
+
+ valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
+ if (!valid)
+ return -EINVAL;
+
+ p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
+ if (!p_counter_index)
+ return -EINVAL;
+ mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
+ MLXSW_REG_RICNT_OPCODE_NOP);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
+ if (err)
+ return err;
+ *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
+ return 0;
+}
+
+static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
+ unsigned int counter_index)
+{
+ char ricnt_pl[MLXSW_REG_RICNT_LEN];
+
+ mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
+ MLXSW_REG_RICNT_OPCODE_CLEAR);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
+}
+
+int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_rif *rif,
+ enum mlxsw_sp_rif_counter_dir dir)
+{
+ unsigned int *p_counter_index;
+ int err;
+
+ p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
+ if (!p_counter_index)
+ return -EINVAL;
+ err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
+ p_counter_index);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
+ if (err)
+ goto err_counter_clear;
+
+ err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
+ *p_counter_index, true, dir);
+ if (err)
+ goto err_counter_edit;
+ mlxsw_sp_rif_counter_valid_set(rif, dir, true);
+ return 0;
+
+err_counter_edit:
+err_counter_clear:
+ mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
+ *p_counter_index);
+ return err;
+}
+
+void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_rif *rif,
+ enum mlxsw_sp_rif_counter_dir dir)
+{
+ unsigned int *p_counter_index;
+
+ if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
+ return;
+
+ p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
+ if (WARN_ON(!p_counter_index))
+ return;
+ mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
+ *p_counter_index, false, dir);
+ mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
+ *p_counter_index);
+ mlxsw_sp_rif_counter_valid_set(rif, dir, false);
+}
+
+static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
+{
+ struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+ struct devlink *devlink;
+
+ devlink = priv_to_devlink(mlxsw_sp->core);
+ if (!devlink_dpipe_table_counter_enabled(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
+ return;
+ mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
+}
+
+static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
+{
+ struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+
+ mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
+}
+
+#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
+
+struct mlxsw_sp_prefix_usage {
+ DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
+};
+
+#define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
+ for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
+
+static bool
+mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
+ struct mlxsw_sp_prefix_usage *prefix_usage2)
+{
+ return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
+}
+
+static void
+mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
+ struct mlxsw_sp_prefix_usage *prefix_usage2)
+{
+ memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
+}
+
+static void
+mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
+ unsigned char prefix_len)
+{
+ set_bit(prefix_len, prefix_usage->b);
+}
+
+static void
+mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
+ unsigned char prefix_len)
+{
+ clear_bit(prefix_len, prefix_usage->b);
+}
+
+struct mlxsw_sp_fib_key {
+ unsigned char addr[sizeof(struct in6_addr)];
+ unsigned char prefix_len;
+};
+
+enum mlxsw_sp_fib_entry_type {
+ MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
+ MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
+ MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
+
+ /* This is a special case of local delivery, where a packet should be
+ * decapsulated on reception. Note that there is no corresponding ENCAP,
+ * because that's a type of next hop, not of FIB entry. (There can be
+ * several next hops in a REMOTE entry, and some of them may be
+ * encapsulating entries.)
+ */
+ MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
+};
+
+struct mlxsw_sp_nexthop_group;
+
+struct mlxsw_sp_fib_node {
+ struct list_head entry_list;
+ struct list_head list;
+ struct rhash_head ht_node;
+ struct mlxsw_sp_fib *fib;
+ struct mlxsw_sp_fib_key key;
+};
+
+struct mlxsw_sp_fib_entry_decap {
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+ u32 tunnel_index;
+};
+
+struct mlxsw_sp_fib_entry {
+ struct list_head list;
+ struct mlxsw_sp_fib_node *fib_node;
+ enum mlxsw_sp_fib_entry_type type;
+ struct list_head nexthop_group_node;
+ struct mlxsw_sp_nexthop_group *nh_group;
+ struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
+};
+
+struct mlxsw_sp_fib4_entry {
+ struct mlxsw_sp_fib_entry common;
+ u32 tb_id;
+ u32 prio;
+ u8 tos;
+ u8 type;
+};
+
+struct mlxsw_sp_fib6_entry {
+ struct mlxsw_sp_fib_entry common;
+ struct list_head rt6_list;
+ unsigned int nrt6;
+};
+
+struct mlxsw_sp_rt6 {
+ struct list_head list;
+ struct fib6_info *rt;
+};
+
+struct mlxsw_sp_lpm_tree {
+ u8 id; /* tree ID */
+ unsigned int ref_count;
+ enum mlxsw_sp_l3proto proto;
+ unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
+ struct mlxsw_sp_prefix_usage prefix_usage;
+};
+
+struct mlxsw_sp_fib {
+ struct rhashtable ht;
+ struct list_head node_list;
+ struct mlxsw_sp_vr *vr;
+ struct mlxsw_sp_lpm_tree *lpm_tree;
+ enum mlxsw_sp_l3proto proto;
+};
+
+struct mlxsw_sp_vr {
+ u16 id; /* virtual router ID */
+ u32 tb_id; /* kernel fib table id */
+ unsigned int rif_count;
+ struct mlxsw_sp_fib *fib4;
+ struct mlxsw_sp_fib *fib6;
+ struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
+};
+
+static const struct rhashtable_params mlxsw_sp_fib_ht_params;
+
+static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_vr *vr,
+ enum mlxsw_sp_l3proto proto)
+{
+ struct mlxsw_sp_lpm_tree *lpm_tree;
+ struct mlxsw_sp_fib *fib;
+ int err;
+
+ lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
+ fib = kzalloc(sizeof(*fib), GFP_KERNEL);
+ if (!fib)
+ return ERR_PTR(-ENOMEM);
+ err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
+ if (err)
+ goto err_rhashtable_init;
+ INIT_LIST_HEAD(&fib->node_list);
+ fib->proto = proto;
+ fib->vr = vr;
+ fib->lpm_tree = lpm_tree;
+ mlxsw_sp_lpm_tree_hold(lpm_tree);
+ err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
+ if (err)
+ goto err_lpm_tree_bind;
+ return fib;
+
+err_lpm_tree_bind:
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
+err_rhashtable_init:
+ kfree(fib);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib *fib)
+{
+ mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
+ WARN_ON(!list_empty(&fib->node_list));
+ rhashtable_destroy(&fib->ht);
+ kfree(fib);
+}
+
+static struct mlxsw_sp_lpm_tree *
+mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
+{
+ static struct mlxsw_sp_lpm_tree *lpm_tree;
+ int i;
+
+ for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
+ lpm_tree = &mlxsw_sp->router->lpm.trees[i];
+ if (lpm_tree->ref_count == 0)
+ return lpm_tree;
+ }
+ return NULL;
+}
+
+static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_lpm_tree *lpm_tree)
+{
+ char ralta_pl[MLXSW_REG_RALTA_LEN];
+
+ mlxsw_reg_ralta_pack(ralta_pl, true,
+ (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
+ lpm_tree->id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
+}
+
+static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_lpm_tree *lpm_tree)
+{
+ char ralta_pl[MLXSW_REG_RALTA_LEN];
+
+ mlxsw_reg_ralta_pack(ralta_pl, false,
+ (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
+ lpm_tree->id);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
+}
+
+static int
+mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_prefix_usage *prefix_usage,
+ struct mlxsw_sp_lpm_tree *lpm_tree)
+{
+ char ralst_pl[MLXSW_REG_RALST_LEN];
+ u8 root_bin = 0;
+ u8 prefix;
+ u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
+
+ mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
+ root_bin = prefix;
+
+ mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
+ mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
+ if (prefix == 0)
+ continue;
+ mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
+ MLXSW_REG_RALST_BIN_NO_CHILD);
+ last_prefix = prefix;
+ }
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
+}
+
+static struct mlxsw_sp_lpm_tree *
+mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_prefix_usage *prefix_usage,
+ enum mlxsw_sp_l3proto proto)
+{
+ struct mlxsw_sp_lpm_tree *lpm_tree;
+ int err;
+
+ lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
+ if (!lpm_tree)
+ return ERR_PTR(-EBUSY);
+ lpm_tree->proto = proto;
+ err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
+ if (err)
+ return ERR_PTR(err);
+
+ err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
+ lpm_tree);
+ if (err)
+ goto err_left_struct_set;
+ memcpy(&lpm_tree->prefix_usage, prefix_usage,
+ sizeof(lpm_tree->prefix_usage));
+ memset(&lpm_tree->prefix_ref_count, 0,
+ sizeof(lpm_tree->prefix_ref_count));
+ lpm_tree->ref_count = 1;
+ return lpm_tree;
+
+err_left_struct_set:
+ mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_lpm_tree *lpm_tree)
+{
+ mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
+}
+
+static struct mlxsw_sp_lpm_tree *
+mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_prefix_usage *prefix_usage,
+ enum mlxsw_sp_l3proto proto)
+{
+ struct mlxsw_sp_lpm_tree *lpm_tree;
+ int i;
+
+ for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
+ lpm_tree = &mlxsw_sp->router->lpm.trees[i];
+ if (lpm_tree->ref_count != 0 &&
+ lpm_tree->proto == proto &&
+ mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
+ prefix_usage)) {
+ mlxsw_sp_lpm_tree_hold(lpm_tree);
+ return lpm_tree;
+ }
+ }
+ return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
+}
+
+static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
+{
+ lpm_tree->ref_count++;
+}
+
+static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_lpm_tree *lpm_tree)
+{
+ if (--lpm_tree->ref_count == 0)
+ mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
+}
+
+#define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
+
+static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
+ struct mlxsw_sp_lpm_tree *lpm_tree;
+ u64 max_trees;
+ int err, i;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
+ return -EIO;
+
+ max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
+ mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
+ mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
+ sizeof(struct mlxsw_sp_lpm_tree),
+ GFP_KERNEL);
+ if (!mlxsw_sp->router->lpm.trees)
+ return -ENOMEM;
+
+ for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
+ lpm_tree = &mlxsw_sp->router->lpm.trees[i];
+ lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
+ }
+
+ lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
+ MLXSW_SP_L3_PROTO_IPV4);
+ if (IS_ERR(lpm_tree)) {
+ err = PTR_ERR(lpm_tree);
+ goto err_ipv4_tree_get;
+ }
+ mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
+
+ lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
+ MLXSW_SP_L3_PROTO_IPV6);
+ if (IS_ERR(lpm_tree)) {
+ err = PTR_ERR(lpm_tree);
+ goto err_ipv6_tree_get;
+ }
+ mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
+
+ return 0;
+
+err_ipv6_tree_get:
+ lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
+err_ipv4_tree_get:
+ kfree(mlxsw_sp->router->lpm.trees);
+ return err;
+}
+
+static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_lpm_tree *lpm_tree;
+
+ lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
+
+ lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
+
+ kfree(mlxsw_sp->router->lpm.trees);
+}
+
+static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
+{
+ return !!vr->fib4 || !!vr->fib6 ||
+ !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
+ !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
+}
+
+static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_vr *vr;
+ int i;
+
+ for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
+ vr = &mlxsw_sp->router->vrs[i];
+ if (!mlxsw_sp_vr_is_used(vr))
+ return vr;
+ }
+ return NULL;
+}
+
+static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_fib *fib, u8 tree_id)
+{
+ char raltb_pl[MLXSW_REG_RALTB_LEN];
+
+ mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
+ (enum mlxsw_reg_ralxx_protocol) fib->proto,
+ tree_id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
+}
+
+static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_fib *fib)
+{
+ char raltb_pl[MLXSW_REG_RALTB_LEN];
+
+ /* Bind to tree 0 which is default */
+ mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
+ (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
+}
+
+static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
+{
+ /* For our purpose, squash main, default and local tables into one */
+ if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
+ tb_id = RT_TABLE_MAIN;
+ return tb_id;
+}
+
+static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
+ u32 tb_id)
+{
+ struct mlxsw_sp_vr *vr;
+ int i;
+
+ tb_id = mlxsw_sp_fix_tb_id(tb_id);
+
+ for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
+ vr = &mlxsw_sp->router->vrs[i];
+ if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
+ return vr;
+ }
+ return NULL;
+}
+
+static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
+ enum mlxsw_sp_l3proto proto)
+{
+ switch (proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ return vr->fib4;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ return vr->fib6;
+ }
+ return NULL;
+}
+
+static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
+ u32 tb_id,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
+ struct mlxsw_sp_fib *fib4;
+ struct mlxsw_sp_fib *fib6;
+ struct mlxsw_sp_vr *vr;
+ int err;
+
+ vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
+ if (!vr) {
+ NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
+ return ERR_PTR(-EBUSY);
+ }
+ fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
+ if (IS_ERR(fib4))
+ return ERR_CAST(fib4);
+ fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
+ if (IS_ERR(fib6)) {
+ err = PTR_ERR(fib6);
+ goto err_fib6_create;
+ }
+ mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
+ MLXSW_SP_L3_PROTO_IPV4);
+ if (IS_ERR(mr4_table)) {
+ err = PTR_ERR(mr4_table);
+ goto err_mr4_table_create;
+ }
+ mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
+ MLXSW_SP_L3_PROTO_IPV6);
+ if (IS_ERR(mr6_table)) {
+ err = PTR_ERR(mr6_table);
+ goto err_mr6_table_create;
+ }
+
+ vr->fib4 = fib4;
+ vr->fib6 = fib6;
+ vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
+ vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
+ vr->tb_id = tb_id;
+ return vr;
+
+err_mr6_table_create:
+ mlxsw_sp_mr_table_destroy(mr4_table);
+err_mr4_table_create:
+ mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
+err_fib6_create:
+ mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_vr *vr)
+{
+ mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
+ vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
+ mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
+ vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
+ mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
+ vr->fib6 = NULL;
+ mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
+ vr->fib4 = NULL;
+}
+
+static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_vr *vr;
+
+ tb_id = mlxsw_sp_fix_tb_id(tb_id);
+ vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
+ if (!vr)
+ vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
+ return vr;
+}
+
+static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
+{
+ if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
+ list_empty(&vr->fib6->node_list) &&
+ mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
+ mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
+ mlxsw_sp_vr_destroy(mlxsw_sp, vr);
+}
+
+static bool
+mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
+ enum mlxsw_sp_l3proto proto, u8 tree_id)
+{
+ struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
+
+ if (!mlxsw_sp_vr_is_used(vr))
+ return false;
+ if (fib->lpm_tree->id == tree_id)
+ return true;
+ return false;
+}
+
+static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib *fib,
+ struct mlxsw_sp_lpm_tree *new_tree)
+{
+ struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
+ int err;
+
+ fib->lpm_tree = new_tree;
+ mlxsw_sp_lpm_tree_hold(new_tree);
+ err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
+ if (err)
+ goto err_tree_bind;
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
+ return 0;
+
+err_tree_bind:
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
+ fib->lpm_tree = old_tree;
+ return err;
+}
+
+static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib *fib,
+ struct mlxsw_sp_lpm_tree *new_tree)
+{
+ enum mlxsw_sp_l3proto proto = fib->proto;
+ struct mlxsw_sp_lpm_tree *old_tree;
+ u8 old_id, new_id = new_tree->id;
+ struct mlxsw_sp_vr *vr;
+ int i, err;
+
+ old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
+ old_id = old_tree->id;
+
+ for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
+ vr = &mlxsw_sp->router->vrs[i];
+ if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
+ continue;
+ err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
+ mlxsw_sp_vr_fib(vr, proto),
+ new_tree);
+ if (err)
+ goto err_tree_replace;
+ }
+
+ memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
+ sizeof(new_tree->prefix_ref_count));
+ mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
+
+ return 0;
+
+err_tree_replace:
+ for (i--; i >= 0; i--) {
+ if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
+ continue;
+ mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
+ mlxsw_sp_vr_fib(vr, proto),
+ old_tree);
+ }
+ return err;
+}
+
+static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_vr *vr;
+ u64 max_vrs;
+ int i;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
+ return -EIO;
+
+ max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
+ mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
+ GFP_KERNEL);
+ if (!mlxsw_sp->router->vrs)
+ return -ENOMEM;
+
+ for (i = 0; i < max_vrs; i++) {
+ vr = &mlxsw_sp->router->vrs[i];
+ vr->id = i;
+ }
+
+ return 0;
+}
+
+static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
+
+static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ /* At this stage we're guaranteed not to have new incoming
+ * FIB notifications and the work queue is free from FIBs
+ * sitting on top of mlxsw netdevs. However, we can still
+ * have other FIBs queued. Flush the queue before flushing
+ * the device's tables. No need for locks, as we're the only
+ * writer.
+ */
+ mlxsw_core_flush_owq();
+ mlxsw_sp_router_fib_flush(mlxsw_sp);
+ kfree(mlxsw_sp->router->vrs);
+}
+
+static struct net_device *
+__mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
+{
+ struct ip_tunnel *tun = netdev_priv(ol_dev);
+ struct net *net = dev_net(ol_dev);
+
+ return __dev_get_by_index(net, tun->parms.link);
+}
+
+u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
+{
+ struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
+
+ if (d)
+ return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
+ else
+ return RT_TABLE_MAIN;
+}
+
+static struct mlxsw_sp_rif *
+mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_rif_params *params,
+ struct netlink_ext_ack *extack);
+
+static struct mlxsw_sp_rif_ipip_lb *
+mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_ipip_type ipipt,
+ struct net_device *ol_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_rif_params_ipip_lb lb_params;
+ const struct mlxsw_sp_ipip_ops *ipip_ops;
+ struct mlxsw_sp_rif *rif;
+
+ ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
+ lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
+ .common.dev = ol_dev,
+ .common.lag = false,
+ .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
+ };
+
+ rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
+ if (IS_ERR(rif))
+ return ERR_CAST(rif);
+ return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
+}
+
+static struct mlxsw_sp_ipip_entry *
+mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_ipip_type ipipt,
+ struct net_device *ol_dev)
+{
+ const struct mlxsw_sp_ipip_ops *ipip_ops;
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+ struct mlxsw_sp_ipip_entry *ret = NULL;
+
+ ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
+ ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
+ if (!ipip_entry)
+ return ERR_PTR(-ENOMEM);
+
+ ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
+ ol_dev, NULL);
+ if (IS_ERR(ipip_entry->ol_lb)) {
+ ret = ERR_CAST(ipip_entry->ol_lb);
+ goto err_ol_ipip_lb_create;
+ }
+
+ ipip_entry->ipipt = ipipt;
+ ipip_entry->ol_dev = ol_dev;
+
+ switch (ipip_ops->ul_proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ WARN_ON(1);
+ break;
+ }
+
+ return ipip_entry;
+
+err_ol_ipip_lb_create:
+ kfree(ipip_entry);
+ return ret;
+}
+
+static void
+mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+ mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
+ kfree(ipip_entry);
+}
+
+static bool
+mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
+ const enum mlxsw_sp_l3proto ul_proto,
+ union mlxsw_sp_l3addr saddr,
+ u32 ul_tb_id,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+ u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
+ enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
+ union mlxsw_sp_l3addr tun_saddr;
+
+ if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
+ return false;
+
+ tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
+ return tun_ul_tb_id == ul_tb_id &&
+ mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
+}
+
+static int
+mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+ u32 tunnel_index;
+ int err;
+
+ err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+ 1, &tunnel_index);
+ if (err)
+ return err;
+
+ ipip_entry->decap_fib_entry = fib_entry;
+ fib_entry->decap.ipip_entry = ipip_entry;
+ fib_entry->decap.tunnel_index = tunnel_index;
+ return 0;
+}
+
+static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
+{
+ /* Unlink this node from the IPIP entry that it's the decap entry of. */
+ fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
+ fib_entry->decap.ipip_entry = NULL;
+ mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+ 1, fib_entry->decap.tunnel_index);
+}
+
+static struct mlxsw_sp_fib_node *
+mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
+ size_t addr_len, unsigned char prefix_len);
+static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry);
+
+static void
+mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+ struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
+
+ mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+
+ mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+}
+
+static void
+mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry,
+ struct mlxsw_sp_fib_entry *decap_fib_entry)
+{
+ if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
+ ipip_entry))
+ return;
+ decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
+
+ if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
+ mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
+}
+
+/* Given an IPIP entry, find the corresponding decap route. */
+static struct mlxsw_sp_fib_entry *
+mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+ static struct mlxsw_sp_fib_node *fib_node;
+ const struct mlxsw_sp_ipip_ops *ipip_ops;
+ struct mlxsw_sp_fib_entry *fib_entry;
+ unsigned char saddr_prefix_len;
+ union mlxsw_sp_l3addr saddr;
+ struct mlxsw_sp_fib *ul_fib;
+ struct mlxsw_sp_vr *ul_vr;
+ const void *saddrp;
+ size_t saddr_len;
+ u32 ul_tb_id;
+ u32 saddr4;
+
+ ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
+
+ ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
+ ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
+ if (!ul_vr)
+ return NULL;
+
+ ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
+ saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
+ ipip_entry->ol_dev);
+
+ switch (ipip_ops->ul_proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ saddr4 = be32_to_cpu(saddr.addr4);
+ saddrp = &saddr4;
+ saddr_len = 4;
+ saddr_prefix_len = 32;
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ WARN_ON(1);
+ return NULL;
+ }
+
+ fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
+ saddr_prefix_len);
+ if (!fib_node || list_empty(&fib_node->entry_list))
+ return NULL;
+
+ fib_entry = list_first_entry(&fib_node->entry_list,
+ struct mlxsw_sp_fib_entry, list);
+ if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
+ return NULL;
+
+ return fib_entry;
+}
+
+static struct mlxsw_sp_ipip_entry *
+mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_ipip_type ipipt,
+ struct net_device *ol_dev)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+
+ ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
+ if (IS_ERR(ipip_entry))
+ return ipip_entry;
+
+ list_add_tail(&ipip_entry->ipip_list_node,
+ &mlxsw_sp->router->ipip_list);
+
+ return ipip_entry;
+}
+
+static void
+mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+ list_del(&ipip_entry->ipip_list_node);
+ mlxsw_sp_ipip_entry_dealloc(ipip_entry);
+}
+
+static bool
+mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *ul_dev,
+ enum mlxsw_sp_l3proto ul_proto,
+ union mlxsw_sp_l3addr ul_dip,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+ u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
+ enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
+
+ if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
+ return false;
+
+ return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
+ ul_tb_id, ipip_entry);
+}
+
+/* Given decap parameters, find the corresponding IPIP entry. */
+static struct mlxsw_sp_ipip_entry *
+mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *ul_dev,
+ enum mlxsw_sp_l3proto ul_proto,
+ union mlxsw_sp_l3addr ul_dip)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+
+ list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
+ ipip_list_node)
+ if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
+ ul_proto, ul_dip,
+ ipip_entry))
+ return ipip_entry;
+
+ return NULL;
+}
+
+static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *dev,
+ enum mlxsw_sp_ipip_type *p_type)
+{
+ struct mlxsw_sp_router *router = mlxsw_sp->router;
+ const struct mlxsw_sp_ipip_ops *ipip_ops;
+ enum mlxsw_sp_ipip_type ipipt;
+
+ for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
+ ipip_ops = router->ipip_ops_arr[ipipt];
+ if (dev->type == ipip_ops->dev_type) {
+ if (p_type)
+ *p_type = ipipt;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *dev)
+{
+ return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
+}
+
+static struct mlxsw_sp_ipip_entry *
+mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *ol_dev)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+
+ list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
+ ipip_list_node)
+ if (ipip_entry->ol_dev == ol_dev)
+ return ipip_entry;
+
+ return NULL;
+}
+
+static struct mlxsw_sp_ipip_entry *
+mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *ul_dev,
+ struct mlxsw_sp_ipip_entry *start)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+
+ ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
+ ipip_list_node);
+ list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
+ ipip_list_node) {
+ struct net_device *ipip_ul_dev =
+ __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
+
+ if (ipip_ul_dev == ul_dev)
+ return ipip_entry;
+ }
+
+ return NULL;
+}
+
+bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *dev)
+{
+ return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
+}
+
+static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *ol_dev,
+ enum mlxsw_sp_ipip_type ipipt)
+{
+ const struct mlxsw_sp_ipip_ops *ops
+ = mlxsw_sp->router->ipip_ops_arr[ipipt];
+
+ /* For deciding whether decap should be offloaded, we don't care about
+ * overlay protocol, so ask whether either one is supported.
+ */
+ return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
+ ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
+}
+
+static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *ol_dev)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+ enum mlxsw_sp_l3proto ul_proto;
+ enum mlxsw_sp_ipip_type ipipt;
+ union mlxsw_sp_l3addr saddr;
+ u32 ul_tb_id;
+
+ mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
+ if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
+ ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
+ ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
+ saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
+ if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
+ saddr, ul_tb_id,
+ NULL)) {
+ ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
+ ol_dev);
+ if (IS_ERR(ipip_entry))
+ return PTR_ERR(ipip_entry);
+ }
+ }
+
+ return 0;
+}
+
+static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *ol_dev)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+
+ ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+ if (ipip_entry)
+ mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
+}
+
+static void
+mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+ struct mlxsw_sp_fib_entry *decap_fib_entry;
+
+ decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
+ if (decap_fib_entry)
+ mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
+ decap_fib_entry);
+}
+
+static int
+mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
+ struct mlxsw_sp_vr *ul_vr, bool enable)
+{
+ struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
+ struct mlxsw_sp_rif *rif = &lb_rif->common;
+ struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+ char ritr_pl[MLXSW_REG_RITR_LEN];
+ u32 saddr4;
+
+ switch (lb_cf.ul_protocol) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
+ mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
+ rif->rif_index, rif->vr_id, rif->dev->mtu);
+ mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
+ MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
+ ul_vr->id, saddr4, lb_cf.okey);
+ break;
+
+ case MLXSW_SP_L3_PROTO_IPV6:
+ return -EAFNOSUPPORT;
+ }
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *ol_dev)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+ struct mlxsw_sp_rif_ipip_lb *lb_rif;
+ struct mlxsw_sp_vr *ul_vr;
+ int err = 0;
+
+ ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+ if (ipip_entry) {
+ lb_rif = ipip_entry->ol_lb;
+ ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
+ err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
+ if (err)
+ goto out;
+ lb_rif->common.mtu = ol_dev->mtu;
+ }
+
+out:
+ return err;
+}
+
+static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *ol_dev)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+
+ ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+ if (ipip_entry)
+ mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
+}
+
+static void
+mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+ if (ipip_entry->decap_fib_entry)
+ mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
+}
+
+static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *ol_dev)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+
+ ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+ if (ipip_entry)
+ mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
+}
+
+static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_rif *old_rif,
+ struct mlxsw_sp_rif *new_rif);
+static int
+mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry,
+ bool keep_encap,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
+ struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
+
+ new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
+ ipip_entry->ipipt,
+ ipip_entry->ol_dev,
+ extack);
+ if (IS_ERR(new_lb_rif))
+ return PTR_ERR(new_lb_rif);
+ ipip_entry->ol_lb = new_lb_rif;
+
+ if (keep_encap)
+ mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
+ &new_lb_rif->common);
+
+ mlxsw_sp_rif_destroy(&old_lb_rif->common);
+
+ return 0;
+}
+
+static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_rif *rif);
+
+/**
+ * Update the offload related to an IPIP entry. This always updates decap, and
+ * in addition to that it also:
+ * @recreate_loopback: recreates the associated loopback RIF
+ * @keep_encap: updates next hops that use the tunnel netdevice. This is only
+ * relevant when recreate_loopback is true.
+ * @update_nexthops: updates next hops, keeping the current loopback RIF. This
+ * is only relevant when recreate_loopback is false.
+ */
+int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry,
+ bool recreate_loopback,
+ bool keep_encap,
+ bool update_nexthops,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ /* RIFs can't be edited, so to update loopback, we need to destroy and
+ * recreate it. That creates a window of opportunity where RALUE and
+ * RATR registers end up referencing a RIF that's already gone. RATRs
+ * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
+ * of RALUE, demote the decap route back.
+ */
+ if (ipip_entry->decap_fib_entry)
+ mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
+
+ if (recreate_loopback) {
+ err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
+ keep_encap, extack);
+ if (err)
+ return err;
+ } else if (update_nexthops) {
+ mlxsw_sp_nexthop_rif_update(mlxsw_sp,
+ &ipip_entry->ol_lb->common);
+ }
+
+ if (ipip_entry->ol_dev->flags & IFF_UP)
+ mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
+
+ return 0;
+}
+
+static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *ol_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry =
+ mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+
+ if (!ipip_entry)
+ return 0;
+
+ return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
+ true, false, false, extack);
+}
+
+static int
+mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry,
+ struct net_device *ul_dev,
+ struct netlink_ext_ack *extack)
+{
+ return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
+ true, true, false, extack);
+}
+
+static int
+mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry,
+ struct net_device *ul_dev)
+{
+ return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
+ false, false, true, NULL);
+}
+
+static int
+mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry,
+ struct net_device *ul_dev)
+{
+ /* A down underlay device causes encapsulated packets to not be
+ * forwarded, but decap still works. So refresh next hops without
+ * touching anything else.
+ */
+ return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
+ false, false, true, NULL);
+}
+
+static int
+mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *ol_dev,
+ struct netlink_ext_ack *extack)
+{
+ const struct mlxsw_sp_ipip_ops *ipip_ops;
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+ int err;
+
+ ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+ if (!ipip_entry)
+ /* A change might make a tunnel eligible for offloading, but
+ * that is currently not implemented. What falls to slow path
+ * stays there.
+ */
+ return 0;
+
+ /* A change might make a tunnel not eligible for offloading. */
+ if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
+ ipip_entry->ipipt)) {
+ mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
+ return 0;
+ }
+
+ ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
+ err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
+ return err;
+}
+
+void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+ struct net_device *ol_dev = ipip_entry->ol_dev;
+
+ if (ol_dev->flags & IFF_UP)
+ mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
+ mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
+}
+
+/* The configuration where several tunnels have the same local address in the
+ * same underlay table needs special treatment in the HW. That is currently not
+ * implemented in the driver. This function finds and demotes the first tunnel
+ * with a given source address, except the one passed in in the argument
+ * `except'.
+ */
+bool
+mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_l3proto ul_proto,
+ union mlxsw_sp_l3addr saddr,
+ u32 ul_tb_id,
+ const struct mlxsw_sp_ipip_entry *except)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
+
+ list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
+ ipip_list_node) {
+ if (ipip_entry != except &&
+ mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
+ ul_tb_id, ipip_entry)) {
+ mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *ul_dev)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
+
+ list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
+ ipip_list_node) {
+ struct net_device *ipip_ul_dev =
+ __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
+
+ if (ipip_ul_dev == ul_dev)
+ mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
+ }
+}
+
+int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *ol_dev,
+ unsigned long event,
+ struct netdev_notifier_info *info)
+{
+ struct netdev_notifier_changeupper_info *chup;
+ struct netlink_ext_ack *extack;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
+ case NETDEV_UNREGISTER:
+ mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
+ return 0;
+ case NETDEV_UP:
+ mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
+ return 0;
+ case NETDEV_DOWN:
+ mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
+ return 0;
+ case NETDEV_CHANGEUPPER:
+ chup = container_of(info, typeof(*chup), info);
+ extack = info->extack;
+ if (netif_is_l3_master(chup->upper_dev))
+ return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
+ ol_dev,
+ extack);
+ return 0;
+ case NETDEV_CHANGE:
+ extack = info->extack;
+ return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
+ ol_dev, extack);
+ case NETDEV_CHANGEMTU:
+ return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
+ }
+ return 0;
+}
+
+static int
+__mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry,
+ struct net_device *ul_dev,
+ unsigned long event,
+ struct netdev_notifier_info *info)
+{
+ struct netdev_notifier_changeupper_info *chup;
+ struct netlink_ext_ack *extack;
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ chup = container_of(info, typeof(*chup), info);
+ extack = info->extack;
+ if (netif_is_l3_master(chup->upper_dev))
+ return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
+ ipip_entry,
+ ul_dev,
+ extack);
+ break;
+
+ case NETDEV_UP:
+ return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
+ ul_dev);
+ case NETDEV_DOWN:
+ return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
+ ipip_entry,
+ ul_dev);
+ }
+ return 0;
+}
+
+int
+mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *ul_dev,
+ unsigned long event,
+ struct netdev_notifier_info *info)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
+ int err;
+
+ while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
+ ul_dev,
+ ipip_entry))) {
+ err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
+ ul_dev, event, info);
+ if (err) {
+ mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
+ ul_dev);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+struct mlxsw_sp_neigh_key {
+ struct neighbour *n;
+};
+
+struct mlxsw_sp_neigh_entry {
+ struct list_head rif_list_node;
+ struct rhash_head ht_node;
+ struct mlxsw_sp_neigh_key key;
+ u16 rif;
+ bool connected;
+ unsigned char ha[ETH_ALEN];
+ struct list_head nexthop_list; /* list of nexthops using
+ * this neigh entry
+ */
+ struct list_head nexthop_neighs_list_node;
+ unsigned int counter_index;
+ bool counter_valid;
+};
+
+static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
+ .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
+ .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
+ .key_len = sizeof(struct mlxsw_sp_neigh_key),
+};
+
+struct mlxsw_sp_neigh_entry *
+mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
+ struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+ if (!neigh_entry) {
+ if (list_empty(&rif->neigh_list))
+ return NULL;
+ else
+ return list_first_entry(&rif->neigh_list,
+ typeof(*neigh_entry),
+ rif_list_node);
+ }
+ if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
+ return NULL;
+ return list_next_entry(neigh_entry, rif_list_node);
+}
+
+int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+ return neigh_entry->key.n->tbl->family;
+}
+
+unsigned char *
+mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+ return neigh_entry->ha;
+}
+
+u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+ struct neighbour *n;
+
+ n = neigh_entry->key.n;
+ return ntohl(*((__be32 *) n->primary_key));
+}
+
+struct in6_addr *
+mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+ struct neighbour *n;
+
+ n = neigh_entry->key.n;
+ return (struct in6_addr *) &n->primary_key;
+}
+
+int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry,
+ u64 *p_counter)
+{
+ if (!neigh_entry->counter_valid)
+ return -EINVAL;
+
+ return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
+ p_counter, NULL);
+}
+
+static struct mlxsw_sp_neigh_entry *
+mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
+ u16 rif)
+{
+ struct mlxsw_sp_neigh_entry *neigh_entry;
+
+ neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
+ if (!neigh_entry)
+ return NULL;
+
+ neigh_entry->key.n = n;
+ neigh_entry->rif = rif;
+ INIT_LIST_HEAD(&neigh_entry->nexthop_list);
+
+ return neigh_entry;
+}
+
+static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+ kfree(neigh_entry);
+}
+
+static int
+mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+ return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
+ &neigh_entry->ht_node,
+ mlxsw_sp_neigh_ht_params);
+}
+
+static void
+mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+ rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
+ &neigh_entry->ht_node,
+ mlxsw_sp_neigh_ht_params);
+}
+
+static bool
+mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+ struct devlink *devlink;
+ const char *table_name;
+
+ switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
+ case AF_INET:
+ table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
+ break;
+ case AF_INET6:
+ table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
+ break;
+ default:
+ WARN_ON(1);
+ return false;
+ }
+
+ devlink = priv_to_devlink(mlxsw_sp->core);
+ return devlink_dpipe_table_counter_enabled(devlink, table_name);
+}
+
+static void
+mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+ if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
+ return;
+
+ if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
+ return;
+
+ neigh_entry->counter_valid = true;
+}
+
+static void
+mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+ if (!neigh_entry->counter_valid)
+ return;
+ mlxsw_sp_flow_counter_free(mlxsw_sp,
+ neigh_entry->counter_index);
+ neigh_entry->counter_valid = false;
+}
+
+static struct mlxsw_sp_neigh_entry *
+mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
+{
+ struct mlxsw_sp_neigh_entry *neigh_entry;
+ struct mlxsw_sp_rif *rif;
+ int err;
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
+ if (!rif)
+ return ERR_PTR(-EINVAL);
+
+ neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
+ if (!neigh_entry)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
+ if (err)
+ goto err_neigh_entry_insert;
+
+ mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
+ list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
+
+ return neigh_entry;
+
+err_neigh_entry_insert:
+ mlxsw_sp_neigh_entry_free(neigh_entry);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+ list_del(&neigh_entry->rif_list_node);
+ mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
+ mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
+ mlxsw_sp_neigh_entry_free(neigh_entry);
+}
+
+static struct mlxsw_sp_neigh_entry *
+mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
+{
+ struct mlxsw_sp_neigh_key key;
+
+ key.n = n;
+ return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
+ &key, mlxsw_sp_neigh_ht_params);
+}
+
+static void
+mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
+{
+ unsigned long interval;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ interval = min_t(unsigned long,
+ NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
+ NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
+#else
+ interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
+#endif
+ mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
+}
+
+static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
+ char *rauhtd_pl,
+ int ent_index)
+{
+ struct net_device *dev;
+ struct neighbour *n;
+ __be32 dipn;
+ u32 dip;
+ u16 rif;
+
+ mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
+
+ if (!mlxsw_sp->router->rifs[rif]) {
+ dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
+ return;
+ }
+
+ dipn = htonl(dip);
+ dev = mlxsw_sp->router->rifs[rif]->dev;
+ n = neigh_lookup(&arp_tbl, &dipn, dev);
+ if (!n)
+ return;
+
+ netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
+ neigh_event_send(n, NULL);
+ neigh_release(n);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
+ char *rauhtd_pl,
+ int rec_index)
+{
+ struct net_device *dev;
+ struct neighbour *n;
+ struct in6_addr dip;
+ u16 rif;
+
+ mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
+ (char *) &dip);
+
+ if (!mlxsw_sp->router->rifs[rif]) {
+ dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
+ return;
+ }
+
+ dev = mlxsw_sp->router->rifs[rif]->dev;
+ n = neigh_lookup(&nd_tbl, &dip, dev);
+ if (!n)
+ return;
+
+ netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
+ neigh_event_send(n, NULL);
+ neigh_release(n);
+}
+#else
+static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
+ char *rauhtd_pl,
+ int rec_index)
+{
+}
+#endif
+
+static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
+ char *rauhtd_pl,
+ int rec_index)
+{
+ u8 num_entries;
+ int i;
+
+ num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
+ rec_index);
+ /* Hardware starts counting at 0, so add 1. */
+ num_entries++;
+
+ /* Each record consists of several neighbour entries. */
+ for (i = 0; i < num_entries; i++) {
+ int ent_index;
+
+ ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
+ mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
+ ent_index);
+ }
+
+}
+
+static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
+ char *rauhtd_pl,
+ int rec_index)
+{
+ /* One record contains one entry. */
+ mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
+ rec_index);
+}
+
+static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
+ char *rauhtd_pl, int rec_index)
+{
+ switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
+ case MLXSW_REG_RAUHTD_TYPE_IPV4:
+ mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
+ rec_index);
+ break;
+ case MLXSW_REG_RAUHTD_TYPE_IPV6:
+ mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
+ rec_index);
+ break;
+ }
+}
+
+static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
+{
+ u8 num_rec, last_rec_index, num_entries;
+
+ num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
+ last_rec_index = num_rec - 1;
+
+ if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
+ return false;
+ if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
+ MLXSW_REG_RAUHTD_TYPE_IPV6)
+ return true;
+
+ num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
+ last_rec_index);
+ if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
+ return true;
+ return false;
+}
+
+static int
+__mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
+ char *rauhtd_pl,
+ enum mlxsw_reg_rauhtd_type type)
+{
+ int i, num_rec;
+ int err;
+
+ /* Make sure the neighbour's netdev isn't removed in the
+ * process.
+ */
+ rtnl_lock();
+ do {
+ mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
+ rauhtd_pl);
+ if (err) {
+ dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
+ break;
+ }
+ num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
+ for (i = 0; i < num_rec; i++)
+ mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
+ i);
+ } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
+ rtnl_unlock();
+
+ return err;
+}
+
+static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
+{
+ enum mlxsw_reg_rauhtd_type type;
+ char *rauhtd_pl;
+ int err;
+
+ rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
+ if (!rauhtd_pl)
+ return -ENOMEM;
+
+ type = MLXSW_REG_RAUHTD_TYPE_IPV4;
+ err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
+ if (err)
+ goto out;
+
+ type = MLXSW_REG_RAUHTD_TYPE_IPV6;
+ err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
+out:
+ kfree(rauhtd_pl);
+ return err;
+}
+
+static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_neigh_entry *neigh_entry;
+
+ /* Take RTNL mutex here to prevent lists from changes */
+ rtnl_lock();
+ list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
+ nexthop_neighs_list_node)
+ /* If this neigh have nexthops, make the kernel think this neigh
+ * is active regardless of the traffic.
+ */
+ neigh_event_send(neigh_entry->key.n, NULL);
+ rtnl_unlock();
+}
+
+static void
+mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
+{
+ unsigned long interval = mlxsw_sp->router->neighs_update.interval;
+
+ mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
+ msecs_to_jiffies(interval));
+}
+
+static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
+{
+ struct mlxsw_sp_router *router;
+ int err;
+
+ router = container_of(work, struct mlxsw_sp_router,
+ neighs_update.dw.work);
+ err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
+ if (err)
+ dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
+
+ mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
+
+ mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
+}
+
+static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
+{
+ struct mlxsw_sp_neigh_entry *neigh_entry;
+ struct mlxsw_sp_router *router;
+
+ router = container_of(work, struct mlxsw_sp_router,
+ nexthop_probe_dw.work);
+ /* Iterate over nexthop neighbours, find those who are unresolved and
+ * send arp on them. This solves the chicken-egg problem when
+ * the nexthop wouldn't get offloaded until the neighbor is resolved
+ * but it wouldn't get resolved ever in case traffic is flowing in HW
+ * using different nexthop.
+ *
+ * Take RTNL mutex here to prevent lists from changes.
+ */
+ rtnl_lock();
+ list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
+ nexthop_neighs_list_node)
+ if (!neigh_entry->connected)
+ neigh_event_send(neigh_entry->key.n, NULL);
+ rtnl_unlock();
+
+ mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
+ MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
+}
+
+static void
+mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry,
+ bool removing, bool dead);
+
+static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
+{
+ return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
+ MLXSW_REG_RAUHT_OP_WRITE_DELETE;
+}
+
+static void
+mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry,
+ enum mlxsw_reg_rauht_op op)
+{
+ struct neighbour *n = neigh_entry->key.n;
+ u32 dip = ntohl(*((__be32 *) n->primary_key));
+ char rauht_pl[MLXSW_REG_RAUHT_LEN];
+
+ mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
+ dip);
+ if (neigh_entry->counter_valid)
+ mlxsw_reg_rauht_pack_counter(rauht_pl,
+ neigh_entry->counter_index);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
+}
+
+static void
+mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry,
+ enum mlxsw_reg_rauht_op op)
+{
+ struct neighbour *n = neigh_entry->key.n;
+ char rauht_pl[MLXSW_REG_RAUHT_LEN];
+ const char *dip = n->primary_key;
+
+ mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
+ dip);
+ if (neigh_entry->counter_valid)
+ mlxsw_reg_rauht_pack_counter(rauht_pl,
+ neigh_entry->counter_index);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
+}
+
+bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+ struct neighbour *n = neigh_entry->key.n;
+
+ /* Packets with a link-local destination address are trapped
+ * after LPM lookup and never reach the neighbour table, so
+ * there is no need to program such neighbours to the device.
+ */
+ if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
+ IPV6_ADDR_LINKLOCAL)
+ return true;
+ return false;
+}
+
+static void
+mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry,
+ bool adding)
+{
+ if (!adding && !neigh_entry->connected)
+ return;
+ neigh_entry->connected = adding;
+ if (neigh_entry->key.n->tbl->family == AF_INET) {
+ mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
+ mlxsw_sp_rauht_op(adding));
+ } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
+ if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
+ return;
+ mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
+ mlxsw_sp_rauht_op(adding));
+ } else {
+ WARN_ON_ONCE(1);
+ }
+}
+
+void
+mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry,
+ bool adding)
+{
+ if (adding)
+ mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
+ else
+ mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
+ mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
+}
+
+struct mlxsw_sp_netevent_work {
+ struct work_struct work;
+ struct mlxsw_sp *mlxsw_sp;
+ struct neighbour *n;
+};
+
+static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
+{
+ struct mlxsw_sp_netevent_work *net_work =
+ container_of(work, struct mlxsw_sp_netevent_work, work);
+ struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
+ struct mlxsw_sp_neigh_entry *neigh_entry;
+ struct neighbour *n = net_work->n;
+ unsigned char ha[ETH_ALEN];
+ bool entry_connected;
+ u8 nud_state, dead;
+
+ /* If these parameters are changed after we release the lock,
+ * then we are guaranteed to receive another event letting us
+ * know about it.
+ */
+ read_lock_bh(&n->lock);
+ memcpy(ha, n->ha, ETH_ALEN);
+ nud_state = n->nud_state;
+ dead = n->dead;
+ read_unlock_bh(&n->lock);
+
+ rtnl_lock();
+ mlxsw_sp_span_respin(mlxsw_sp);
+
+ entry_connected = nud_state & NUD_VALID && !dead;
+ neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
+ if (!entry_connected && !neigh_entry)
+ goto out;
+ if (!neigh_entry) {
+ neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
+ if (IS_ERR(neigh_entry))
+ goto out;
+ }
+
+ memcpy(neigh_entry->ha, ha, ETH_ALEN);
+ mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
+ mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
+ dead);
+
+ if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
+ mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
+
+out:
+ rtnl_unlock();
+ neigh_release(n);
+ kfree(net_work);
+}
+
+static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
+
+static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
+{
+ struct mlxsw_sp_netevent_work *net_work =
+ container_of(work, struct mlxsw_sp_netevent_work, work);
+ struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
+
+ mlxsw_sp_mp_hash_init(mlxsw_sp);
+ kfree(net_work);
+}
+
+static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
+
+static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
+{
+ struct mlxsw_sp_netevent_work *net_work =
+ container_of(work, struct mlxsw_sp_netevent_work, work);
+ struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
+
+ __mlxsw_sp_router_init(mlxsw_sp);
+ kfree(net_work);
+}
+
+static int mlxsw_sp_router_schedule_work(struct net *net,
+ struct notifier_block *nb,
+ void (*cb)(struct work_struct *))
+{
+ struct mlxsw_sp_netevent_work *net_work;
+ struct mlxsw_sp_router *router;
+
+ if (!net_eq(net, &init_net))
+ return NOTIFY_DONE;
+
+ net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
+ if (!net_work)
+ return NOTIFY_BAD;
+
+ router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
+ INIT_WORK(&net_work->work, cb);
+ net_work->mlxsw_sp = router->mlxsw_sp;
+ mlxsw_core_schedule_work(&net_work->work);
+ return NOTIFY_DONE;
+}
+
+static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlxsw_sp_netevent_work *net_work;
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ struct mlxsw_sp *mlxsw_sp;
+ unsigned long interval;
+ struct neigh_parms *p;
+ struct neighbour *n;
+
+ switch (event) {
+ case NETEVENT_DELAY_PROBE_TIME_UPDATE:
+ p = ptr;
+
+ /* We don't care about changes in the default table. */
+ if (!p->dev || (p->tbl->family != AF_INET &&
+ p->tbl->family != AF_INET6))
+ return NOTIFY_DONE;
+
+ /* We are in atomic context and can't take RTNL mutex,
+ * so use RCU variant to walk the device chain.
+ */
+ mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
+ if (!mlxsw_sp_port)
+ return NOTIFY_DONE;
+
+ mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
+ mlxsw_sp->router->neighs_update.interval = interval;
+
+ mlxsw_sp_port_dev_put(mlxsw_sp_port);
+ break;
+ case NETEVENT_NEIGH_UPDATE:
+ n = ptr;
+
+ if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
+ return NOTIFY_DONE;
+
+ mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
+ if (!mlxsw_sp_port)
+ return NOTIFY_DONE;
+
+ net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
+ if (!net_work) {
+ mlxsw_sp_port_dev_put(mlxsw_sp_port);
+ return NOTIFY_BAD;
+ }
+
+ INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
+ net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ net_work->n = n;
+
+ /* Take a reference to ensure the neighbour won't be
+ * destructed until we drop the reference in delayed
+ * work.
+ */
+ neigh_clone(n);
+ mlxsw_core_schedule_work(&net_work->work);
+ mlxsw_sp_port_dev_put(mlxsw_sp_port);
+ break;
+ case NETEVENT_IPV4_MPATH_HASH_UPDATE:
+ case NETEVENT_IPV6_MPATH_HASH_UPDATE:
+ return mlxsw_sp_router_schedule_work(ptr, nb,
+ mlxsw_sp_router_mp_hash_event_work);
+
+ case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
+ return mlxsw_sp_router_schedule_work(ptr, nb,
+ mlxsw_sp_router_update_priority_work);
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
+{
+ int err;
+
+ err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
+ &mlxsw_sp_neigh_ht_params);
+ if (err)
+ return err;
+
+ /* Initialize the polling interval according to the default
+ * table.
+ */
+ mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
+
+ /* Create the delayed works for the activity_update */
+ INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
+ mlxsw_sp_router_neighs_update_work);
+ INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
+ mlxsw_sp_router_probe_unresolved_nexthops);
+ mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
+ mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
+ return 0;
+}
+
+static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
+ cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
+ rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
+}
+
+static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_rif *rif)
+{
+ struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
+
+ list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
+ rif_list_node) {
+ mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
+ mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
+ }
+}
+
+enum mlxsw_sp_nexthop_type {
+ MLXSW_SP_NEXTHOP_TYPE_ETH,
+ MLXSW_SP_NEXTHOP_TYPE_IPIP,
+};
+
+struct mlxsw_sp_nexthop_key {
+ struct fib_nh *fib_nh;
+};
+
+struct mlxsw_sp_nexthop {
+ struct list_head neigh_list_node; /* member of neigh entry list */
+ struct list_head rif_list_node;
+ struct list_head router_list_node;
+ struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
+ * this belongs to
+ */
+ struct rhash_head ht_node;
+ struct mlxsw_sp_nexthop_key key;
+ unsigned char gw_addr[sizeof(struct in6_addr)];
+ int ifindex;
+ int nh_weight;
+ int norm_nh_weight;
+ int num_adj_entries;
+ struct mlxsw_sp_rif *rif;
+ u8 should_offload:1, /* set indicates this neigh is connected and
+ * should be put to KVD linear area of this group.
+ */
+ offloaded:1, /* set in case the neigh is actually put into
+ * KVD linear area of this group.
+ */
+ update:1; /* set indicates that MAC of this neigh should be
+ * updated in HW
+ */
+ enum mlxsw_sp_nexthop_type type;
+ union {
+ struct mlxsw_sp_neigh_entry *neigh_entry;
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+ };
+ unsigned int counter_index;
+ bool counter_valid;
+};
+
+struct mlxsw_sp_nexthop_group {
+ void *priv;
+ struct rhash_head ht_node;
+ struct list_head fib_list; /* list of fib entries that use this group */
+ struct neigh_table *neigh_tbl;
+ u8 adj_index_valid:1,
+ gateway:1; /* routes using the group use a gateway */
+ u32 adj_index;
+ u16 ecmp_size;
+ u16 count;
+ int sum_norm_weight;
+ struct mlxsw_sp_nexthop nexthops[0];
+#define nh_rif nexthops[0].rif
+};
+
+void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ struct devlink *devlink;
+
+ devlink = priv_to_devlink(mlxsw_sp->core);
+ if (!devlink_dpipe_table_counter_enabled(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
+ return;
+
+ if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
+ return;
+
+ nh->counter_valid = true;
+}
+
+void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ if (!nh->counter_valid)
+ return;
+ mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
+ nh->counter_valid = false;
+}
+
+int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh, u64 *p_counter)
+{
+ if (!nh->counter_valid)
+ return -EINVAL;
+
+ return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
+ p_counter, NULL);
+}
+
+struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
+ struct mlxsw_sp_nexthop *nh)
+{
+ if (!nh) {
+ if (list_empty(&router->nexthop_list))
+ return NULL;
+ else
+ return list_first_entry(&router->nexthop_list,
+ typeof(*nh), router_list_node);
+ }
+ if (list_is_last(&nh->router_list_node, &router->nexthop_list))
+ return NULL;
+ return list_next_entry(nh, router_list_node);
+}
+
+bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
+{
+ return nh->offloaded;
+}
+
+unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
+{
+ if (!nh->offloaded)
+ return NULL;
+ return nh->neigh_entry->ha;
+}
+
+int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
+ u32 *p_adj_size, u32 *p_adj_hash_index)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
+ u32 adj_hash_index = 0;
+ int i;
+
+ if (!nh->offloaded || !nh_grp->adj_index_valid)
+ return -EINVAL;
+
+ *p_adj_index = nh_grp->adj_index;
+ *p_adj_size = nh_grp->ecmp_size;
+
+ for (i = 0; i < nh_grp->count; i++) {
+ struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
+
+ if (nh_iter == nh)
+ break;
+ if (nh_iter->offloaded)
+ adj_hash_index += nh_iter->num_adj_entries;
+ }
+
+ *p_adj_hash_index = adj_hash_index;
+ return 0;
+}
+
+struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
+{
+ return nh->rif;
+}
+
+bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
+ int i;
+
+ for (i = 0; i < nh_grp->count; i++) {
+ struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
+
+ if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
+ return true;
+ }
+ return false;
+}
+
+static struct fib_info *
+mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
+{
+ return nh_grp->priv;
+}
+
+struct mlxsw_sp_nexthop_group_cmp_arg {
+ enum mlxsw_sp_l3proto proto;
+ union {
+ struct fib_info *fi;
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ };
+};
+
+static bool
+mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
+ const struct in6_addr *gw, int ifindex,
+ int weight)
+{
+ int i;
+
+ for (i = 0; i < nh_grp->count; i++) {
+ const struct mlxsw_sp_nexthop *nh;
+
+ nh = &nh_grp->nexthops[i];
+ if (nh->ifindex == ifindex && nh->nh_weight == weight &&
+ ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
+ const struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+ if (nh_grp->count != fib6_entry->nrt6)
+ return false;
+
+ list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+ struct in6_addr *gw;
+ int ifindex, weight;
+
+ ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
+ weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
+ gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
+ if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
+ weight))
+ return false;
+ }
+
+ return true;
+}
+
+static int
+mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
+ const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
+
+ switch (cmp_arg->proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
+ case MLXSW_SP_L3_PROTO_IPV6:
+ return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
+ cmp_arg->fib6_entry);
+ default:
+ WARN_ON(1);
+ return 1;
+ }
+}
+
+static int
+mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
+{
+ return nh_grp->neigh_tbl->family;
+}
+
+static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
+{
+ const struct mlxsw_sp_nexthop_group *nh_grp = data;
+ const struct mlxsw_sp_nexthop *nh;
+ struct fib_info *fi;
+ unsigned int val;
+ int i;
+
+ switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
+ case AF_INET:
+ fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
+ return jhash(&fi, sizeof(fi), seed);
+ case AF_INET6:
+ val = nh_grp->count;
+ for (i = 0; i < nh_grp->count; i++) {
+ nh = &nh_grp->nexthops[i];
+ val ^= nh->ifindex;
+ }
+ return jhash(&val, sizeof(val), seed);
+ default:
+ WARN_ON(1);
+ return 0;
+ }
+}
+
+static u32
+mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
+{
+ unsigned int val = fib6_entry->nrt6;
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+ struct net_device *dev;
+
+ list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+ dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
+ val ^= dev->ifindex;
+ }
+
+ return jhash(&val, sizeof(val), seed);
+}
+
+static u32
+mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
+{
+ const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
+
+ switch (cmp_arg->proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
+ case MLXSW_SP_L3_PROTO_IPV6:
+ return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
+ default:
+ WARN_ON(1);
+ return 0;
+ }
+}
+
+static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
+ .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
+ .hashfn = mlxsw_sp_nexthop_group_hash,
+ .obj_hashfn = mlxsw_sp_nexthop_group_hash_obj,
+ .obj_cmpfn = mlxsw_sp_nexthop_group_cmp,
+};
+
+static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp)
+{
+ if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
+ !nh_grp->gateway)
+ return 0;
+
+ return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
+ &nh_grp->ht_node,
+ mlxsw_sp_nexthop_group_ht_params);
+}
+
+static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp)
+{
+ if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
+ !nh_grp->gateway)
+ return;
+
+ rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
+ &nh_grp->ht_node,
+ mlxsw_sp_nexthop_group_ht_params);
+}
+
+static struct mlxsw_sp_nexthop_group *
+mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
+ struct fib_info *fi)
+{
+ struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
+
+ cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
+ cmp_arg.fi = fi;
+ return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
+ &cmp_arg,
+ mlxsw_sp_nexthop_group_ht_params);
+}
+
+static struct mlxsw_sp_nexthop_group *
+mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
+
+ cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
+ cmp_arg.fib6_entry = fib6_entry;
+ return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
+ &cmp_arg,
+ mlxsw_sp_nexthop_group_ht_params);
+}
+
+static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
+ .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
+ .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
+ .key_len = sizeof(struct mlxsw_sp_nexthop_key),
+};
+
+static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
+ &nh->ht_node, mlxsw_sp_nexthop_ht_params);
+}
+
+static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
+ mlxsw_sp_nexthop_ht_params);
+}
+
+static struct mlxsw_sp_nexthop *
+mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_key key)
+{
+ return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
+ mlxsw_sp_nexthop_ht_params);
+}
+
+static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_fib *fib,
+ u32 adj_index, u16 ecmp_size,
+ u32 new_adj_index,
+ u16 new_ecmp_size)
+{
+ char raleu_pl[MLXSW_REG_RALEU_LEN];
+
+ mlxsw_reg_raleu_pack(raleu_pl,
+ (enum mlxsw_reg_ralxx_protocol) fib->proto,
+ fib->vr->id, adj_index, ecmp_size, new_adj_index,
+ new_ecmp_size);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
+}
+
+static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp,
+ u32 old_adj_index, u16 old_ecmp_size)
+{
+ struct mlxsw_sp_fib_entry *fib_entry;
+ struct mlxsw_sp_fib *fib = NULL;
+ int err;
+
+ list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
+ if (fib == fib_entry->fib_node->fib)
+ continue;
+ fib = fib_entry->fib_node->fib;
+ err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
+ old_adj_index,
+ old_ecmp_size,
+ nh_grp->adj_index,
+ nh_grp->ecmp_size);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+ struct mlxsw_sp_nexthop *nh)
+{
+ struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
+ char ratr_pl[MLXSW_REG_RATR_LEN];
+
+ mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
+ true, MLXSW_REG_RATR_TYPE_ETHERNET,
+ adj_index, neigh_entry->rif);
+ mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
+ if (nh->counter_valid)
+ mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
+ else
+ mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
+}
+
+int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+ struct mlxsw_sp_nexthop *nh)
+{
+ int i;
+
+ for (i = 0; i < nh->num_adj_entries; i++) {
+ int err;
+
+ err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
+ u32 adj_index,
+ struct mlxsw_sp_nexthop *nh)
+{
+ const struct mlxsw_sp_ipip_ops *ipip_ops;
+
+ ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
+ return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
+}
+
+static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
+ u32 adj_index,
+ struct mlxsw_sp_nexthop *nh)
+{
+ int i;
+
+ for (i = 0; i < nh->num_adj_entries; i++) {
+ int err;
+
+ err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
+ nh);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp,
+ bool reallocate)
+{
+ u32 adj_index = nh_grp->adj_index; /* base */
+ struct mlxsw_sp_nexthop *nh;
+ int i;
+ int err;
+
+ for (i = 0; i < nh_grp->count; i++) {
+ nh = &nh_grp->nexthops[i];
+
+ if (!nh->should_offload) {
+ nh->offloaded = 0;
+ continue;
+ }
+
+ if (nh->update || reallocate) {
+ switch (nh->type) {
+ case MLXSW_SP_NEXTHOP_TYPE_ETH:
+ err = mlxsw_sp_nexthop_update
+ (mlxsw_sp, adj_index, nh);
+ break;
+ case MLXSW_SP_NEXTHOP_TYPE_IPIP:
+ err = mlxsw_sp_nexthop_ipip_update
+ (mlxsw_sp, adj_index, nh);
+ break;
+ }
+ if (err)
+ return err;
+ nh->update = 0;
+ nh->offloaded = 1;
+ }
+ adj_index += nh->num_adj_entries;
+ }
+ return 0;
+}
+
+static bool
+mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
+ const struct mlxsw_sp_fib_entry *fib_entry);
+
+static int
+mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp)
+{
+ struct mlxsw_sp_fib_entry *fib_entry;
+ int err;
+
+ list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
+ if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
+ fib_entry))
+ continue;
+ err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static void
+mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
+ enum mlxsw_reg_ralue_op op, int err);
+
+static void
+mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
+{
+ enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
+ struct mlxsw_sp_fib_entry *fib_entry;
+
+ list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
+ if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
+ fib_entry))
+ continue;
+ mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
+ }
+}
+
+static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
+{
+ /* Valid sizes for an adjacency group are:
+ * 1-64, 512, 1024, 2048 and 4096.
+ */
+ if (*p_adj_grp_size <= 64)
+ return;
+ else if (*p_adj_grp_size <= 512)
+ *p_adj_grp_size = 512;
+ else if (*p_adj_grp_size <= 1024)
+ *p_adj_grp_size = 1024;
+ else if (*p_adj_grp_size <= 2048)
+ *p_adj_grp_size = 2048;
+ else
+ *p_adj_grp_size = 4096;
+}
+
+static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
+ unsigned int alloc_size)
+{
+ if (alloc_size >= 4096)
+ *p_adj_grp_size = 4096;
+ else if (alloc_size >= 2048)
+ *p_adj_grp_size = 2048;
+ else if (alloc_size >= 1024)
+ *p_adj_grp_size = 1024;
+ else if (alloc_size >= 512)
+ *p_adj_grp_size = 512;
+}
+
+static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
+ u16 *p_adj_grp_size)
+{
+ unsigned int alloc_size;
+ int err;
+
+ /* Round up the requested group size to the next size supported
+ * by the device and make sure the request can be satisfied.
+ */
+ mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
+ err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
+ MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+ *p_adj_grp_size, &alloc_size);
+ if (err)
+ return err;
+ /* It is possible the allocation results in more allocated
+ * entries than requested. Try to use as much of them as
+ * possible.
+ */
+ mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
+
+ return 0;
+}
+
+static void
+mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
+{
+ int i, g = 0, sum_norm_weight = 0;
+ struct mlxsw_sp_nexthop *nh;
+
+ for (i = 0; i < nh_grp->count; i++) {
+ nh = &nh_grp->nexthops[i];
+
+ if (!nh->should_offload)
+ continue;
+ if (g > 0)
+ g = gcd(nh->nh_weight, g);
+ else
+ g = nh->nh_weight;
+ }
+
+ for (i = 0; i < nh_grp->count; i++) {
+ nh = &nh_grp->nexthops[i];
+
+ if (!nh->should_offload)
+ continue;
+ nh->norm_nh_weight = nh->nh_weight / g;
+ sum_norm_weight += nh->norm_nh_weight;
+ }
+
+ nh_grp->sum_norm_weight = sum_norm_weight;
+}
+
+static void
+mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
+{
+ int total = nh_grp->sum_norm_weight;
+ u16 ecmp_size = nh_grp->ecmp_size;
+ int i, weight = 0, lower_bound = 0;
+
+ for (i = 0; i < nh_grp->count; i++) {
+ struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+ int upper_bound;
+
+ if (!nh->should_offload)
+ continue;
+ weight += nh->norm_nh_weight;
+ upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
+ nh->num_adj_entries = upper_bound - lower_bound;
+ lower_bound = upper_bound;
+ }
+}
+
+static void
+mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp)
+{
+ u16 ecmp_size, old_ecmp_size;
+ struct mlxsw_sp_nexthop *nh;
+ bool offload_change = false;
+ u32 adj_index;
+ bool old_adj_index_valid;
+ u32 old_adj_index;
+ int i;
+ int err;
+
+ if (!nh_grp->gateway) {
+ mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
+ return;
+ }
+
+ for (i = 0; i < nh_grp->count; i++) {
+ nh = &nh_grp->nexthops[i];
+
+ if (nh->should_offload != nh->offloaded) {
+ offload_change = true;
+ if (nh->should_offload)
+ nh->update = 1;
+ }
+ }
+ if (!offload_change) {
+ /* Nothing was added or removed, so no need to reallocate. Just
+ * update MAC on existing adjacency indexes.
+ */
+ err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
+ if (err) {
+ dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
+ goto set_trap;
+ }
+ return;
+ }
+ mlxsw_sp_nexthop_group_normalize(nh_grp);
+ if (!nh_grp->sum_norm_weight)
+ /* No neigh of this group is connected so we just set
+ * the trap and let everthing flow through kernel.
+ */
+ goto set_trap;
+
+ ecmp_size = nh_grp->sum_norm_weight;
+ err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
+ if (err)
+ /* No valid allocation size available. */
+ goto set_trap;
+
+ err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+ ecmp_size, &adj_index);
+ if (err) {
+ /* We ran out of KVD linear space, just set the
+ * trap and let everything flow through kernel.
+ */
+ dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
+ goto set_trap;
+ }
+ old_adj_index_valid = nh_grp->adj_index_valid;
+ old_adj_index = nh_grp->adj_index;
+ old_ecmp_size = nh_grp->ecmp_size;
+ nh_grp->adj_index_valid = 1;
+ nh_grp->adj_index = adj_index;
+ nh_grp->ecmp_size = ecmp_size;
+ mlxsw_sp_nexthop_group_rebalance(nh_grp);
+ err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
+ if (err) {
+ dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
+ goto set_trap;
+ }
+
+ if (!old_adj_index_valid) {
+ /* The trap was set for fib entries, so we have to call
+ * fib entry update to unset it and use adjacency index.
+ */
+ err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
+ if (err) {
+ dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
+ goto set_trap;
+ }
+ return;
+ }
+
+ err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
+ old_adj_index, old_ecmp_size);
+ mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+ old_ecmp_size, old_adj_index);
+ if (err) {
+ dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
+ goto set_trap;
+ }
+
+ /* Offload state within the group changed, so update the flags. */
+ mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
+
+ return;
+
+set_trap:
+ old_adj_index_valid = nh_grp->adj_index_valid;
+ nh_grp->adj_index_valid = 0;
+ for (i = 0; i < nh_grp->count; i++) {
+ nh = &nh_grp->nexthops[i];
+ nh->offloaded = 0;
+ }
+ err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
+ if (err)
+ dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
+ if (old_adj_index_valid)
+ mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+ nh_grp->ecmp_size, nh_grp->adj_index);
+}
+
+static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
+ bool removing)
+{
+ if (!removing)
+ nh->should_offload = 1;
+ else
+ nh->should_offload = 0;
+ nh->update = 1;
+}
+
+static int
+mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+ struct neighbour *n, *old_n = neigh_entry->key.n;
+ struct mlxsw_sp_nexthop *nh;
+ bool entry_connected;
+ u8 nud_state, dead;
+ int err;
+
+ nh = list_first_entry(&neigh_entry->nexthop_list,
+ struct mlxsw_sp_nexthop, neigh_list_node);
+
+ n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
+ if (!n) {
+ n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
+ nh->rif->dev);
+ if (IS_ERR(n))
+ return PTR_ERR(n);
+ neigh_event_send(n, NULL);
+ }
+
+ mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
+ neigh_entry->key.n = n;
+ err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
+ if (err)
+ goto err_neigh_entry_insert;
+
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ dead = n->dead;
+ read_unlock_bh(&n->lock);
+ entry_connected = nud_state & NUD_VALID && !dead;
+
+ list_for_each_entry(nh, &neigh_entry->nexthop_list,
+ neigh_list_node) {
+ neigh_release(old_n);
+ neigh_clone(n);
+ __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
+ }
+
+ neigh_release(n);
+
+ return 0;
+
+err_neigh_entry_insert:
+ neigh_entry->key.n = old_n;
+ mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
+ neigh_release(n);
+ return err;
+}
+
+static void
+mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry,
+ bool removing, bool dead)
+{
+ struct mlxsw_sp_nexthop *nh;
+
+ if (list_empty(&neigh_entry->nexthop_list))
+ return;
+
+ if (dead) {
+ int err;
+
+ err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
+ neigh_entry);
+ if (err)
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
+ return;
+ }
+
+ list_for_each_entry(nh, &neigh_entry->nexthop_list,
+ neigh_list_node) {
+ __mlxsw_sp_nexthop_neigh_update(nh, removing);
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
+ }
+}
+
+static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
+ struct mlxsw_sp_rif *rif)
+{
+ if (nh->rif)
+ return;
+
+ nh->rif = rif;
+ list_add(&nh->rif_list_node, &rif->nexthop_list);
+}
+
+static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
+{
+ if (!nh->rif)
+ return;
+
+ list_del(&nh->rif_list_node);
+ nh->rif = NULL;
+}
+
+static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ struct mlxsw_sp_neigh_entry *neigh_entry;
+ struct neighbour *n;
+ u8 nud_state, dead;
+ int err;
+
+ if (!nh->nh_grp->gateway || nh->neigh_entry)
+ return 0;
+
+ /* Take a reference of neigh here ensuring that neigh would
+ * not be destructed before the nexthop entry is finished.
+ * The reference is taken either in neigh_lookup() or
+ * in neigh_create() in case n is not found.
+ */
+ n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
+ if (!n) {
+ n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
+ nh->rif->dev);
+ if (IS_ERR(n))
+ return PTR_ERR(n);
+ neigh_event_send(n, NULL);
+ }
+ neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
+ if (!neigh_entry) {
+ neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
+ if (IS_ERR(neigh_entry)) {
+ err = -EINVAL;
+ goto err_neigh_entry_create;
+ }
+ }
+
+ /* If that is the first nexthop connected to that neigh, add to
+ * nexthop_neighs_list
+ */
+ if (list_empty(&neigh_entry->nexthop_list))
+ list_add_tail(&neigh_entry->nexthop_neighs_list_node,
+ &mlxsw_sp->router->nexthop_neighs_list);
+
+ nh->neigh_entry = neigh_entry;
+ list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ dead = n->dead;
+ read_unlock_bh(&n->lock);
+ __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
+
+ return 0;
+
+err_neigh_entry_create:
+ neigh_release(n);
+ return err;
+}
+
+static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
+ struct neighbour *n;
+
+ if (!neigh_entry)
+ return;
+ n = neigh_entry->key.n;
+
+ __mlxsw_sp_nexthop_neigh_update(nh, true);
+ list_del(&nh->neigh_list_node);
+ nh->neigh_entry = NULL;
+
+ /* If that is the last nexthop connected to that neigh, remove from
+ * nexthop_neighs_list
+ */
+ if (list_empty(&neigh_entry->nexthop_list))
+ list_del(&neigh_entry->nexthop_neighs_list_node);
+
+ if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
+ mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
+
+ neigh_release(n);
+}
+
+static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
+{
+ struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
+
+ return ul_dev ? (ul_dev->flags & IFF_UP) : true;
+}
+
+static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+ bool removing;
+
+ if (!nh->nh_grp->gateway || nh->ipip_entry)
+ return;
+
+ nh->ipip_entry = ipip_entry;
+ removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
+ __mlxsw_sp_nexthop_neigh_update(nh, removing);
+ mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
+}
+
+static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
+
+ if (!ipip_entry)
+ return;
+
+ __mlxsw_sp_nexthop_neigh_update(nh, true);
+ nh->ipip_entry = NULL;
+}
+
+static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
+ const struct fib_nh *fib_nh,
+ enum mlxsw_sp_ipip_type *p_ipipt)
+{
+ struct net_device *dev = fib_nh->nh_dev;
+
+ return dev &&
+ fib_nh->nh_parent->fib_type == RTN_UNICAST &&
+ mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
+}
+
+static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ switch (nh->type) {
+ case MLXSW_SP_NEXTHOP_TYPE_ETH:
+ mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_rif_fini(nh);
+ break;
+ case MLXSW_SP_NEXTHOP_TYPE_IPIP:
+ mlxsw_sp_nexthop_rif_fini(nh);
+ mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
+ break;
+ }
+}
+
+static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh,
+ struct fib_nh *fib_nh)
+{
+ const struct mlxsw_sp_ipip_ops *ipip_ops;
+ struct net_device *dev = fib_nh->nh_dev;
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+ struct mlxsw_sp_rif *rif;
+ int err;
+
+ ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
+ if (ipip_entry) {
+ ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
+ if (ipip_ops->can_offload(mlxsw_sp, dev,
+ MLXSW_SP_L3_PROTO_IPV4)) {
+ nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
+ mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
+ return 0;
+ }
+ }
+
+ nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+ if (!rif)
+ return 0;
+
+ mlxsw_sp_nexthop_rif_init(nh, rif);
+ err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
+ if (err)
+ goto err_neigh_init;
+
+ return 0;
+
+err_neigh_init:
+ mlxsw_sp_nexthop_rif_fini(nh);
+ return err;
+}
+
+static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
+}
+
+static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp,
+ struct mlxsw_sp_nexthop *nh,
+ struct fib_nh *fib_nh)
+{
+ struct net_device *dev = fib_nh->nh_dev;
+ struct in_device *in_dev;
+ int err;
+
+ nh->nh_grp = nh_grp;
+ nh->key.fib_nh = fib_nh;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ nh->nh_weight = fib_nh->nh_weight;
+#else
+ nh->nh_weight = 1;
+#endif
+ memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
+ err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
+ if (err)
+ return err;
+
+ mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
+ list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
+
+ if (!dev)
+ return 0;
+
+ in_dev = __in_dev_get_rtnl(dev);
+ if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+ fib_nh->nh_flags & RTNH_F_LINKDOWN)
+ return 0;
+
+ err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
+ if (err)
+ goto err_nexthop_neigh_init;
+
+ return 0;
+
+err_nexthop_neigh_init:
+ mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
+ return err;
+}
+
+static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
+ list_del(&nh->router_list_node);
+ mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
+}
+
+static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
+ unsigned long event, struct fib_nh *fib_nh)
+{
+ struct mlxsw_sp_nexthop_key key;
+ struct mlxsw_sp_nexthop *nh;
+
+ if (mlxsw_sp->router->aborted)
+ return;
+
+ key.fib_nh = fib_nh;
+ nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
+ if (WARN_ON_ONCE(!nh))
+ return;
+
+ switch (event) {
+ case FIB_EVENT_NH_ADD:
+ mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
+ break;
+ case FIB_EVENT_NH_DEL:
+ mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
+ break;
+ }
+
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
+}
+
+static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_rif *rif)
+{
+ struct mlxsw_sp_nexthop *nh;
+ bool removing;
+
+ list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
+ switch (nh->type) {
+ case MLXSW_SP_NEXTHOP_TYPE_ETH:
+ removing = false;
+ break;
+ case MLXSW_SP_NEXTHOP_TYPE_IPIP:
+ removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
+ break;
+ default:
+ WARN_ON(1);
+ continue;
+ }
+
+ __mlxsw_sp_nexthop_neigh_update(nh, removing);
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
+ }
+}
+
+static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_rif *old_rif,
+ struct mlxsw_sp_rif *new_rif)
+{
+ struct mlxsw_sp_nexthop *nh;
+
+ list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
+ list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
+ nh->rif = new_rif;
+ mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
+}
+
+static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_rif *rif)
+{
+ struct mlxsw_sp_nexthop *nh, *tmp;
+
+ list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
+ mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
+ }
+}
+
+static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
+ const struct fib_info *fi)
+{
+ return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
+ mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
+}
+
+static struct mlxsw_sp_nexthop_group *
+mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp;
+ struct mlxsw_sp_nexthop *nh;
+ struct fib_nh *fib_nh;
+ size_t alloc_size;
+ int i;
+ int err;
+
+ alloc_size = sizeof(*nh_grp) +
+ fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
+ nh_grp = kzalloc(alloc_size, GFP_KERNEL);
+ if (!nh_grp)
+ return ERR_PTR(-ENOMEM);
+ nh_grp->priv = fi;
+ INIT_LIST_HEAD(&nh_grp->fib_list);
+ nh_grp->neigh_tbl = &arp_tbl;
+
+ nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
+ nh_grp->count = fi->fib_nhs;
+ fib_info_hold(fi);
+ for (i = 0; i < nh_grp->count; i++) {
+ nh = &nh_grp->nexthops[i];
+ fib_nh = &fi->fib_nh[i];
+ err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
+ if (err)
+ goto err_nexthop4_init;
+ }
+ err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
+ if (err)
+ goto err_nexthop_group_insert;
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+ return nh_grp;
+
+err_nexthop_group_insert:
+err_nexthop4_init:
+ for (i--; i >= 0; i--) {
+ nh = &nh_grp->nexthops[i];
+ mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
+ }
+ fib_info_put(fi);
+ kfree(nh_grp);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp)
+{
+ struct mlxsw_sp_nexthop *nh;
+ int i;
+
+ mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
+ for (i = 0; i < nh_grp->count; i++) {
+ nh = &nh_grp->nexthops[i];
+ mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
+ }
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+ WARN_ON_ONCE(nh_grp->adj_index_valid);
+ fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
+ kfree(nh_grp);
+}
+
+static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry,
+ struct fib_info *fi)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp;
+
+ nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
+ if (!nh_grp) {
+ nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
+ if (IS_ERR(nh_grp))
+ return PTR_ERR(nh_grp);
+ }
+ list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
+ fib_entry->nh_group = nh_grp;
+ return 0;
+}
+
+static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+
+ list_del(&fib_entry->nexthop_group_node);
+ if (!list_empty(&nh_grp->fib_list))
+ return;
+ mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
+}
+
+static bool
+mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
+{
+ struct mlxsw_sp_fib4_entry *fib4_entry;
+
+ fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
+ common);
+ return !fib4_entry->tos;
+}
+
+static bool
+mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
+{
+ struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
+
+ switch (fib_entry->fib_node->fib->proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
+ return false;
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ break;
+ }
+
+ switch (fib_entry->type) {
+ case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
+ return !!nh_group->adj_index_valid;
+ case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
+ return !!nh_group->nh_rif;
+ case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static struct mlxsw_sp_nexthop *
+mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
+ const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
+{
+ int i;
+
+ for (i = 0; i < nh_grp->count; i++) {
+ struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+ struct fib6_info *rt = mlxsw_sp_rt6->rt;
+
+ if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
+ ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
+ &rt->fib6_nh.nh_gw))
+ return nh;
+ continue;
+ }
+
+ return NULL;
+}
+
+static void
+mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+ int i;
+
+ if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
+ fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
+ nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
+ return;
+ }
+
+ for (i = 0; i < nh_grp->count; i++) {
+ struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+
+ if (nh->offloaded)
+ nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
+ else
+ nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
+ }
+}
+
+static void
+mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+ int i;
+
+ if (!list_is_singular(&nh_grp->fib_list))
+ return;
+
+ for (i = 0; i < nh_grp->count; i++) {
+ struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+
+ nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
+ }
+}
+
+static void
+mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+ fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
+ common);
+
+ if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
+ list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
+ list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
+ return;
+ }
+
+ list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+ struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+ struct mlxsw_sp_nexthop *nh;
+
+ nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
+ if (nh && nh->offloaded)
+ mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
+ else
+ mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
+ }
+}
+
+static void
+mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+ fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
+ common);
+ list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+ struct fib6_info *rt = mlxsw_sp_rt6->rt;
+
+ rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
+ }
+}
+
+static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
+{
+ switch (fib_entry->fib_node->fib->proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ mlxsw_sp_fib4_entry_offload_set(fib_entry);
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ mlxsw_sp_fib6_entry_offload_set(fib_entry);
+ break;
+ }
+}
+
+static void
+mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
+{
+ switch (fib_entry->fib_node->fib->proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ mlxsw_sp_fib4_entry_offload_unset(fib_entry);
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ mlxsw_sp_fib6_entry_offload_unset(fib_entry);
+ break;
+ }
+}
+
+static void
+mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
+ enum mlxsw_reg_ralue_op op, int err)
+{
+ switch (op) {
+ case MLXSW_REG_RALUE_OP_WRITE_DELETE:
+ return mlxsw_sp_fib_entry_offload_unset(fib_entry);
+ case MLXSW_REG_RALUE_OP_WRITE_WRITE:
+ if (err)
+ return;
+ if (mlxsw_sp_fib_entry_should_offload(fib_entry))
+ mlxsw_sp_fib_entry_offload_set(fib_entry);
+ else
+ mlxsw_sp_fib_entry_offload_unset(fib_entry);
+ return;
+ default:
+ return;
+ }
+}
+
+static void
+mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
+ const struct mlxsw_sp_fib_entry *fib_entry,
+ enum mlxsw_reg_ralue_op op)
+{
+ struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
+ enum mlxsw_reg_ralxx_protocol proto;
+ u32 *p_dip;
+
+ proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
+
+ switch (fib->proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ p_dip = (u32 *) fib_entry->fib_node->key.addr;
+ mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
+ fib_entry->fib_node->key.prefix_len,
+ *p_dip);
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
+ fib_entry->fib_node->key.prefix_len,
+ fib_entry->fib_node->key.addr);
+ break;
+ }
+}
+
+static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry,
+ enum mlxsw_reg_ralue_op op)
+{
+ char ralue_pl[MLXSW_REG_RALUE_LEN];
+ enum mlxsw_reg_ralue_trap_action trap_action;
+ u16 trap_id = 0;
+ u32 adjacency_index = 0;
+ u16 ecmp_size = 0;
+
+ /* In case the nexthop group adjacency index is valid, use it
+ * with provided ECMP size. Otherwise, setup trap and pass
+ * traffic to kernel.
+ */
+ if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
+ trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
+ adjacency_index = fib_entry->nh_group->adj_index;
+ ecmp_size = fib_entry->nh_group->ecmp_size;
+ } else {
+ trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
+ trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
+ }
+
+ mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
+ mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
+ adjacency_index, ecmp_size);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
+static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry,
+ enum mlxsw_reg_ralue_op op)
+{
+ struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
+ enum mlxsw_reg_ralue_trap_action trap_action;
+ char ralue_pl[MLXSW_REG_RALUE_LEN];
+ u16 trap_id = 0;
+ u16 rif_index = 0;
+
+ if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
+ trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
+ rif_index = rif->rif_index;
+ } else {
+ trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
+ trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
+ }
+
+ mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
+ mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
+ rif_index);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
+static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry,
+ enum mlxsw_reg_ralue_op op)
+{
+ char ralue_pl[MLXSW_REG_RALUE_LEN];
+
+ mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
+ mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
+static int
+mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry,
+ enum mlxsw_reg_ralue_op op)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
+ const struct mlxsw_sp_ipip_ops *ipip_ops;
+
+ if (WARN_ON(!ipip_entry))
+ return -EINVAL;
+
+ ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
+ return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
+ fib_entry->decap.tunnel_index);
+}
+
+static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry,
+ enum mlxsw_reg_ralue_op op)
+{
+ switch (fib_entry->type) {
+ case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
+ return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
+ case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
+ return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
+ case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
+ return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
+ case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
+ return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
+ fib_entry, op);
+ }
+ return -EINVAL;
+}
+
+static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry,
+ enum mlxsw_reg_ralue_op op)
+{
+ int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
+
+ mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
+
+ return err;
+}
+
+static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
+{
+ return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
+ MLXSW_REG_RALUE_OP_WRITE_WRITE);
+}
+
+static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
+{
+ return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
+ MLXSW_REG_RALUE_OP_WRITE_DELETE);
+}
+
+static int
+mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
+ const struct fib_entry_notifier_info *fen_info,
+ struct mlxsw_sp_fib_entry *fib_entry)
+{
+ union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
+ struct net_device *dev = fen_info->fi->fib_dev;
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+ struct fib_info *fi = fen_info->fi;
+
+ switch (fen_info->type) {
+ case RTN_LOCAL:
+ ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
+ MLXSW_SP_L3_PROTO_IPV4, dip);
+ if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
+ return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
+ fib_entry,
+ ipip_entry);
+ }
+ /* fall through */
+ case RTN_BROADCAST:
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+ return 0;
+ case RTN_UNREACHABLE: /* fall through */
+ case RTN_BLACKHOLE: /* fall through */
+ case RTN_PROHIBIT:
+ /* Packets hitting these routes need to be trapped, but
+ * can do so with a lower priority than packets directed
+ * at the host, so use action type local instead of trap.
+ */
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+ return 0;
+ case RTN_UNICAST:
+ if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
+ else
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static struct mlxsw_sp_fib4_entry *
+mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_node *fib_node,
+ const struct fib_entry_notifier_info *fen_info)
+{
+ struct mlxsw_sp_fib4_entry *fib4_entry;
+ struct mlxsw_sp_fib_entry *fib_entry;
+ int err;
+
+ fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
+ if (!fib4_entry)
+ return ERR_PTR(-ENOMEM);
+ fib_entry = &fib4_entry->common;
+
+ err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
+ if (err)
+ goto err_fib4_entry_type_set;
+
+ err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
+ if (err)
+ goto err_nexthop4_group_get;
+
+ fib4_entry->prio = fen_info->fi->fib_priority;
+ fib4_entry->tb_id = fen_info->tb_id;
+ fib4_entry->type = fen_info->type;
+ fib4_entry->tos = fen_info->tos;
+
+ fib_entry->fib_node = fib_node;
+
+ return fib4_entry;
+
+err_nexthop4_group_get:
+err_fib4_entry_type_set:
+ kfree(fib4_entry);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib4_entry *fib4_entry)
+{
+ mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
+ kfree(fib4_entry);
+}
+
+static struct mlxsw_sp_fib4_entry *
+mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
+ const struct fib_entry_notifier_info *fen_info)
+{
+ struct mlxsw_sp_fib4_entry *fib4_entry;
+ struct mlxsw_sp_fib_node *fib_node;
+ struct mlxsw_sp_fib *fib;
+ struct mlxsw_sp_vr *vr;
+
+ vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
+ if (!vr)
+ return NULL;
+ fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
+
+ fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
+ sizeof(fen_info->dst),
+ fen_info->dst_len);
+ if (!fib_node)
+ return NULL;
+
+ list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
+ if (fib4_entry->tb_id == fen_info->tb_id &&
+ fib4_entry->tos == fen_info->tos &&
+ fib4_entry->type == fen_info->type &&
+ mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
+ fen_info->fi) {
+ return fib4_entry;
+ }
+ }
+
+ return NULL;
+}
+
+static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
+ .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
+ .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
+ .key_len = sizeof(struct mlxsw_sp_fib_key),
+ .automatic_shrinking = true,
+};
+
+static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
+ struct mlxsw_sp_fib_node *fib_node)
+{
+ return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
+ mlxsw_sp_fib_ht_params);
+}
+
+static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
+ struct mlxsw_sp_fib_node *fib_node)
+{
+ rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
+ mlxsw_sp_fib_ht_params);
+}
+
+static struct mlxsw_sp_fib_node *
+mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
+ size_t addr_len, unsigned char prefix_len)
+{
+ struct mlxsw_sp_fib_key key;
+
+ memset(&key, 0, sizeof(key));
+ memcpy(key.addr, addr, addr_len);
+ key.prefix_len = prefix_len;
+ return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
+}
+
+static struct mlxsw_sp_fib_node *
+mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
+ size_t addr_len, unsigned char prefix_len)
+{
+ struct mlxsw_sp_fib_node *fib_node;
+
+ fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
+ if (!fib_node)
+ return NULL;
+
+ INIT_LIST_HEAD(&fib_node->entry_list);
+ list_add(&fib_node->list, &fib->node_list);
+ memcpy(fib_node->key.addr, addr, addr_len);
+ fib_node->key.prefix_len = prefix_len;
+
+ return fib_node;
+}
+
+static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
+{
+ list_del(&fib_node->list);
+ WARN_ON(!list_empty(&fib_node->entry_list));
+ kfree(fib_node);
+}
+
+static bool
+mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
+ const struct mlxsw_sp_fib_entry *fib_entry)
+{
+ return list_first_entry(&fib_node->entry_list,
+ struct mlxsw_sp_fib_entry, list) == fib_entry;
+}
+
+static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_node *fib_node)
+{
+ struct mlxsw_sp_prefix_usage req_prefix_usage;
+ struct mlxsw_sp_fib *fib = fib_node->fib;
+ struct mlxsw_sp_lpm_tree *lpm_tree;
+ int err;
+
+ lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
+ if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
+ goto out;
+
+ mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
+ mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
+ lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
+ fib->proto);
+ if (IS_ERR(lpm_tree))
+ return PTR_ERR(lpm_tree);
+
+ err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
+ if (err)
+ goto err_lpm_tree_replace;
+
+out:
+ lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
+ return 0;
+
+err_lpm_tree_replace:
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
+ return err;
+}
+
+static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_node *fib_node)
+{
+ struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
+ struct mlxsw_sp_prefix_usage req_prefix_usage;
+ struct mlxsw_sp_fib *fib = fib_node->fib;
+ int err;
+
+ if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
+ return;
+ /* Try to construct a new LPM tree from the current prefix usage
+ * minus the unused one. If we fail, continue using the old one.
+ */
+ mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
+ mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
+ fib_node->key.prefix_len);
+ lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
+ fib->proto);
+ if (IS_ERR(lpm_tree))
+ return;
+
+ err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
+ if (err)
+ goto err_lpm_tree_replace;
+
+ return;
+
+err_lpm_tree_replace:
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
+}
+
+static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_node *fib_node,
+ struct mlxsw_sp_fib *fib)
+{
+ int err;
+
+ err = mlxsw_sp_fib_node_insert(fib, fib_node);
+ if (err)
+ return err;
+ fib_node->fib = fib;
+
+ err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
+ if (err)
+ goto err_fib_lpm_tree_link;
+
+ return 0;
+
+err_fib_lpm_tree_link:
+ fib_node->fib = NULL;
+ mlxsw_sp_fib_node_remove(fib, fib_node);
+ return err;
+}
+
+static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_node *fib_node)
+{
+ struct mlxsw_sp_fib *fib = fib_node->fib;
+
+ mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
+ fib_node->fib = NULL;
+ mlxsw_sp_fib_node_remove(fib, fib_node);
+}
+
+static struct mlxsw_sp_fib_node *
+mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
+ size_t addr_len, unsigned char prefix_len,
+ enum mlxsw_sp_l3proto proto)
+{
+ struct mlxsw_sp_fib_node *fib_node;
+ struct mlxsw_sp_fib *fib;
+ struct mlxsw_sp_vr *vr;
+ int err;
+
+ vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
+ if (IS_ERR(vr))
+ return ERR_CAST(vr);
+ fib = mlxsw_sp_vr_fib(vr, proto);
+
+ fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
+ if (fib_node)
+ return fib_node;
+
+ fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
+ if (!fib_node) {
+ err = -ENOMEM;
+ goto err_fib_node_create;
+ }
+
+ err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
+ if (err)
+ goto err_fib_node_init;
+
+ return fib_node;
+
+err_fib_node_init:
+ mlxsw_sp_fib_node_destroy(fib_node);
+err_fib_node_create:
+ mlxsw_sp_vr_put(mlxsw_sp, vr);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_node *fib_node)
+{
+ struct mlxsw_sp_vr *vr = fib_node->fib->vr;
+
+ if (!list_empty(&fib_node->entry_list))
+ return;
+ mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
+ mlxsw_sp_fib_node_destroy(fib_node);
+ mlxsw_sp_vr_put(mlxsw_sp, vr);
+}
+
+static struct mlxsw_sp_fib4_entry *
+mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
+ const struct mlxsw_sp_fib4_entry *new4_entry)
+{
+ struct mlxsw_sp_fib4_entry *fib4_entry;
+
+ list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
+ if (fib4_entry->tb_id > new4_entry->tb_id)
+ continue;
+ if (fib4_entry->tb_id != new4_entry->tb_id)
+ break;
+ if (fib4_entry->tos > new4_entry->tos)
+ continue;
+ if (fib4_entry->prio >= new4_entry->prio ||
+ fib4_entry->tos < new4_entry->tos)
+ return fib4_entry;
+ }
+
+ return NULL;
+}
+
+static int
+mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
+ struct mlxsw_sp_fib4_entry *new4_entry)
+{
+ struct mlxsw_sp_fib_node *fib_node;
+
+ if (WARN_ON(!fib4_entry))
+ return -EINVAL;
+
+ fib_node = fib4_entry->common.fib_node;
+ list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
+ common.list) {
+ if (fib4_entry->tb_id != new4_entry->tb_id ||
+ fib4_entry->tos != new4_entry->tos ||
+ fib4_entry->prio != new4_entry->prio)
+ break;
+ }
+
+ list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
+ return 0;
+}
+
+static int
+mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
+ bool replace, bool append)
+{
+ struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
+ struct mlxsw_sp_fib4_entry *fib4_entry;
+
+ fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
+
+ if (append)
+ return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
+ if (replace && WARN_ON(!fib4_entry))
+ return -EINVAL;
+
+ /* Insert new entry before replaced one, so that we can later
+ * remove the second.
+ */
+ if (fib4_entry) {
+ list_add_tail(&new4_entry->common.list,
+ &fib4_entry->common.list);
+ } else {
+ struct mlxsw_sp_fib4_entry *last;
+
+ list_for_each_entry(last, &fib_node->entry_list, common.list) {
+ if (new4_entry->tb_id > last->tb_id)
+ break;
+ fib4_entry = last;
+ }
+
+ if (fib4_entry)
+ list_add(&new4_entry->common.list,
+ &fib4_entry->common.list);
+ else
+ list_add(&new4_entry->common.list,
+ &fib_node->entry_list);
+ }
+
+ return 0;
+}
+
+static void
+mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
+{
+ list_del(&fib4_entry->common.list);
+}
+
+static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
+{
+ struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
+
+ if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
+ return 0;
+
+ /* To prevent packet loss, overwrite the previously offloaded
+ * entry.
+ */
+ if (!list_is_singular(&fib_node->entry_list)) {
+ enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
+ struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
+
+ mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
+ }
+
+ return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+}
+
+static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
+{
+ struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
+
+ if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
+ return;
+
+ /* Promote the next entry by overwriting the deleted entry */
+ if (!list_is_singular(&fib_node->entry_list)) {
+ struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
+ enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
+
+ mlxsw_sp_fib_entry_update(mlxsw_sp, n);
+ mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
+ return;
+ }
+
+ mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
+}
+
+static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib4_entry *fib4_entry,
+ bool replace, bool append)
+{
+ int err;
+
+ err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
+ if (err)
+ goto err_fib_node_entry_add;
+
+ return 0;
+
+err_fib_node_entry_add:
+ mlxsw_sp_fib4_node_list_remove(fib4_entry);
+ return err;
+}
+
+static void
+mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib4_entry *fib4_entry)
+{
+ mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
+ mlxsw_sp_fib4_node_list_remove(fib4_entry);
+
+ if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
+ mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
+}
+
+static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib4_entry *fib4_entry,
+ bool replace)
+{
+ struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
+ struct mlxsw_sp_fib4_entry *replaced;
+
+ if (!replace)
+ return;
+
+ /* We inserted the new entry before replaced one */
+ replaced = list_next_entry(fib4_entry, common.list);
+
+ mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
+ mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+}
+
+static int
+mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
+ const struct fib_entry_notifier_info *fen_info,
+ bool replace, bool append)
+{
+ struct mlxsw_sp_fib4_entry *fib4_entry;
+ struct mlxsw_sp_fib_node *fib_node;
+ int err;
+
+ if (mlxsw_sp->router->aborted)
+ return 0;
+
+ fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
+ &fen_info->dst, sizeof(fen_info->dst),
+ fen_info->dst_len,
+ MLXSW_SP_L3_PROTO_IPV4);
+ if (IS_ERR(fib_node)) {
+ dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
+ return PTR_ERR(fib_node);
+ }
+
+ fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
+ if (IS_ERR(fib4_entry)) {
+ dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
+ err = PTR_ERR(fib4_entry);
+ goto err_fib4_entry_create;
+ }
+
+ err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
+ append);
+ if (err) {
+ dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
+ goto err_fib4_node_entry_link;
+ }
+
+ mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
+
+ return 0;
+
+err_fib4_node_entry_link:
+ mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
+err_fib4_entry_create:
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+ return err;
+}
+
+static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
+ struct fib_entry_notifier_info *fen_info)
+{
+ struct mlxsw_sp_fib4_entry *fib4_entry;
+ struct mlxsw_sp_fib_node *fib_node;
+
+ if (mlxsw_sp->router->aborted)
+ return;
+
+ fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
+ if (WARN_ON(!fib4_entry))
+ return;
+ fib_node = fib4_entry->common.fib_node;
+
+ mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
+ mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+}
+
+static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
+{
+ /* Packets with link-local destination IP arriving to the router
+ * are trapped to the CPU, so no need to program specific routes
+ * for them.
+ */
+ if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
+ return true;
+
+ /* Multicast routes aren't supported, so ignore them. Neighbour
+ * Discovery packets are specifically trapped.
+ */
+ if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
+ return true;
+
+ /* Cloned routes are irrelevant in the forwarding path. */
+ if (rt->fib6_flags & RTF_CACHE)
+ return true;
+
+ return false;
+}
+
+static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
+{
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+ mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
+ if (!mlxsw_sp_rt6)
+ return ERR_PTR(-ENOMEM);
+
+ /* In case of route replace, replaced route is deleted with
+ * no notification. Take reference to prevent accessing freed
+ * memory.
+ */
+ mlxsw_sp_rt6->rt = rt;
+ fib6_info_hold(rt);
+
+ return mlxsw_sp_rt6;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void mlxsw_sp_rt6_release(struct fib6_info *rt)
+{
+ fib6_info_release(rt);
+}
+#else
+static void mlxsw_sp_rt6_release(struct fib6_info *rt)
+{
+}
+#endif
+
+static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
+{
+ mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
+ kfree(mlxsw_sp_rt6);
+}
+
+static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
+{
+ /* RTF_CACHE routes are ignored */
+ return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
+}
+
+static struct fib6_info *
+mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
+ list)->rt;
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
+ const struct fib6_info *nrt, bool replace)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+
+ if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
+ return NULL;
+
+ list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
+ struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+
+ /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
+ * virtual router.
+ */
+ if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
+ continue;
+ if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
+ break;
+ if (rt->fib6_metric < nrt->fib6_metric)
+ continue;
+ if (rt->fib6_metric == nrt->fib6_metric &&
+ mlxsw_sp_fib6_rt_can_mp(rt))
+ return fib6_entry;
+ if (rt->fib6_metric > nrt->fib6_metric)
+ break;
+ }
+
+ return NULL;
+}
+
+static struct mlxsw_sp_rt6 *
+mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
+ const struct fib6_info *rt)
+{
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+ list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+ if (mlxsw_sp_rt6->rt == rt)
+ return mlxsw_sp_rt6;
+ }
+
+ return NULL;
+}
+
+static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
+ const struct fib6_info *rt,
+ enum mlxsw_sp_ipip_type *ret)
+{
+ return rt->fib6_nh.nh_dev &&
+ mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
+}
+
+static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp,
+ struct mlxsw_sp_nexthop *nh,
+ const struct fib6_info *rt)
+{
+ const struct mlxsw_sp_ipip_ops *ipip_ops;
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+ struct net_device *dev = rt->fib6_nh.nh_dev;
+ struct mlxsw_sp_rif *rif;
+ int err;
+
+ ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
+ if (ipip_entry) {
+ ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
+ if (ipip_ops->can_offload(mlxsw_sp, dev,
+ MLXSW_SP_L3_PROTO_IPV6)) {
+ nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
+ mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
+ return 0;
+ }
+ }
+
+ nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+ if (!rif)
+ return 0;
+ mlxsw_sp_nexthop_rif_init(nh, rif);
+
+ err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
+ if (err)
+ goto err_nexthop_neigh_init;
+
+ return 0;
+
+err_nexthop_neigh_init:
+ mlxsw_sp_nexthop_rif_fini(nh);
+ return err;
+}
+
+static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
+}
+
+static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp,
+ struct mlxsw_sp_nexthop *nh,
+ const struct fib6_info *rt)
+{
+ struct net_device *dev = rt->fib6_nh.nh_dev;
+
+ nh->nh_grp = nh_grp;
+ nh->nh_weight = rt->fib6_nh.nh_weight;
+ memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
+ mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
+
+ list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
+
+ if (!dev)
+ return 0;
+ nh->ifindex = dev->ifindex;
+
+ return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
+}
+
+static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
+ list_del(&nh->router_list_node);
+ mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+}
+
+static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
+ const struct fib6_info *rt)
+{
+ return rt->fib6_flags & RTF_GATEWAY ||
+ mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
+}
+
+static struct mlxsw_sp_nexthop_group *
+mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp;
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+ struct mlxsw_sp_nexthop *nh;
+ size_t alloc_size;
+ int i = 0;
+ int err;
+
+ alloc_size = sizeof(*nh_grp) +
+ fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
+ nh_grp = kzalloc(alloc_size, GFP_KERNEL);
+ if (!nh_grp)
+ return ERR_PTR(-ENOMEM);
+ INIT_LIST_HEAD(&nh_grp->fib_list);
+#if IS_ENABLED(CONFIG_IPV6)
+ nh_grp->neigh_tbl = &nd_tbl;
+#endif
+ mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
+ struct mlxsw_sp_rt6, list);
+ nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
+ nh_grp->count = fib6_entry->nrt6;
+ for (i = 0; i < nh_grp->count; i++) {
+ struct fib6_info *rt = mlxsw_sp_rt6->rt;
+
+ nh = &nh_grp->nexthops[i];
+ err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
+ if (err)
+ goto err_nexthop6_init;
+ mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
+ }
+
+ err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
+ if (err)
+ goto err_nexthop_group_insert;
+
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+ return nh_grp;
+
+err_nexthop_group_insert:
+err_nexthop6_init:
+ for (i--; i >= 0; i--) {
+ nh = &nh_grp->nexthops[i];
+ mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
+ }
+ kfree(nh_grp);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop_group *nh_grp)
+{
+ struct mlxsw_sp_nexthop *nh;
+ int i = nh_grp->count;
+
+ mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
+ for (i--; i >= 0; i--) {
+ nh = &nh_grp->nexthops[i];
+ mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
+ }
+ mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+ WARN_ON(nh_grp->adj_index_valid);
+ kfree(nh_grp);
+}
+
+static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp;
+
+ nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
+ if (!nh_grp) {
+ nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
+ if (IS_ERR(nh_grp))
+ return PTR_ERR(nh_grp);
+ }
+
+ list_add_tail(&fib6_entry->common.nexthop_group_node,
+ &nh_grp->fib_list);
+ fib6_entry->common.nh_group = nh_grp;
+
+ return 0;
+}
+
+static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+
+ list_del(&fib_entry->nexthop_group_node);
+ if (!list_empty(&nh_grp->fib_list))
+ return;
+ mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
+}
+
+static int
+mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
+ int err;
+
+ fib6_entry->common.nh_group = NULL;
+ list_del(&fib6_entry->common.nexthop_group_node);
+
+ err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
+ if (err)
+ goto err_nexthop6_group_get;
+
+ /* In case this entry is offloaded, then the adjacency index
+ * currently associated with it in the device's table is that
+ * of the old group. Start using the new one instead.
+ */
+ err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
+ if (err)
+ goto err_fib_node_entry_add;
+
+ if (list_empty(&old_nh_grp->fib_list))
+ mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
+
+ return 0;
+
+err_fib_node_entry_add:
+ mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
+err_nexthop6_group_get:
+ list_add_tail(&fib6_entry->common.nexthop_group_node,
+ &old_nh_grp->fib_list);
+ fib6_entry->common.nh_group = old_nh_grp;
+ return err;
+}
+
+static int
+mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry,
+ struct fib6_info *rt)
+{
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+ int err;
+
+ mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
+ if (IS_ERR(mlxsw_sp_rt6))
+ return PTR_ERR(mlxsw_sp_rt6);
+
+ list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+ fib6_entry->nrt6++;
+
+ err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
+ if (err)
+ goto err_nexthop6_group_update;
+
+ return 0;
+
+err_nexthop6_group_update:
+ fib6_entry->nrt6--;
+ list_del(&mlxsw_sp_rt6->list);
+ mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+ return err;
+}
+
+static void
+mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry,
+ struct fib6_info *rt)
+{
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+ mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
+ if (WARN_ON(!mlxsw_sp_rt6))
+ return;
+
+ fib6_entry->nrt6--;
+ list_del(&mlxsw_sp_rt6->list);
+ mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
+ mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+}
+
+static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry,
+ const struct fib6_info *rt)
+{
+ /* Packets hitting RTF_REJECT routes need to be discarded by the
+ * stack. We can rely on their destination device not having a
+ * RIF (it's the loopback device) and can thus use action type
+ * local, which will cause them to be trapped with a lower
+ * priority than packets that need to be locally received.
+ */
+ if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+ else if (rt->fib6_flags & RTF_REJECT)
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+ else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
+ else
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+}
+
+static void
+mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
+
+ list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
+ list) {
+ fib6_entry->nrt6--;
+ list_del(&mlxsw_sp_rt6->list);
+ mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+ }
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_node *fib_node,
+ struct fib6_info *rt)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_fib_entry *fib_entry;
+ struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+ int err;
+
+ fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
+ if (!fib6_entry)
+ return ERR_PTR(-ENOMEM);
+ fib_entry = &fib6_entry->common;
+
+ mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
+ if (IS_ERR(mlxsw_sp_rt6)) {
+ err = PTR_ERR(mlxsw_sp_rt6);
+ goto err_rt6_create;
+ }
+
+ mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
+
+ INIT_LIST_HEAD(&fib6_entry->rt6_list);
+ list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+ fib6_entry->nrt6 = 1;
+ err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
+ if (err)
+ goto err_nexthop6_group_get;
+
+ fib_entry->fib_node = fib_node;
+
+ return fib6_entry;
+
+err_nexthop6_group_get:
+ list_del(&mlxsw_sp_rt6->list);
+ mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+err_rt6_create:
+ kfree(fib6_entry);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
+ mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
+ WARN_ON(fib6_entry->nrt6);
+ kfree(fib6_entry);
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
+ const struct fib6_info *nrt, bool replace)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
+
+ list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
+ struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+
+ if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
+ continue;
+ if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
+ break;
+ if (replace && rt->fib6_metric == nrt->fib6_metric) {
+ if (mlxsw_sp_fib6_rt_can_mp(rt) ==
+ mlxsw_sp_fib6_rt_can_mp(nrt))
+ return fib6_entry;
+ if (mlxsw_sp_fib6_rt_can_mp(nrt))
+ fallback = fallback ?: fib6_entry;
+ }
+ if (rt->fib6_metric > nrt->fib6_metric)
+ return fallback ?: fib6_entry;
+ }
+
+ return fallback;
+}
+
+static int
+mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
+ bool replace)
+{
+ struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
+ struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+
+ fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
+
+ if (replace && WARN_ON(!fib6_entry))
+ return -EINVAL;
+
+ if (fib6_entry) {
+ list_add_tail(&new6_entry->common.list,
+ &fib6_entry->common.list);
+ } else {
+ struct mlxsw_sp_fib6_entry *last;
+
+ list_for_each_entry(last, &fib_node->entry_list, common.list) {
+ struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
+
+ if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
+ break;
+ fib6_entry = last;
+ }
+
+ if (fib6_entry)
+ list_add(&new6_entry->common.list,
+ &fib6_entry->common.list);
+ else
+ list_add(&new6_entry->common.list,
+ &fib_node->entry_list);
+ }
+
+ return 0;
+}
+
+static void
+mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ list_del(&fib6_entry->common.list);
+}
+
+static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry,
+ bool replace)
+{
+ int err;
+
+ err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
+ if (err)
+ goto err_fib_node_entry_add;
+
+ return 0;
+
+err_fib_node_entry_add:
+ mlxsw_sp_fib6_node_list_remove(fib6_entry);
+ return err;
+}
+
+static void
+mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+ mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
+ mlxsw_sp_fib6_node_list_remove(fib6_entry);
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
+ const struct fib6_info *rt)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_fib_node *fib_node;
+ struct mlxsw_sp_fib *fib;
+ struct mlxsw_sp_vr *vr;
+
+ vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
+ if (!vr)
+ return NULL;
+ fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
+
+ fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
+ sizeof(rt->fib6_dst.addr),
+ rt->fib6_dst.plen);
+ if (!fib_node)
+ return NULL;
+
+ list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
+ struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+
+ if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
+ rt->fib6_metric == iter_rt->fib6_metric &&
+ mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
+ return fib6_entry;
+ }
+
+ return NULL;
+}
+
+static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib6_entry *fib6_entry,
+ bool replace)
+{
+ struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
+ struct mlxsw_sp_fib6_entry *replaced;
+
+ if (!replace)
+ return;
+
+ replaced = list_next_entry(fib6_entry, common.list);
+
+ mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
+ mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+}
+
+static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
+ struct fib6_info *rt, bool replace)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_fib_node *fib_node;
+ int err;
+
+ if (mlxsw_sp->router->aborted)
+ return 0;
+
+ if (rt->fib6_src.plen)
+ return -EINVAL;
+
+ if (mlxsw_sp_fib6_rt_should_ignore(rt))
+ return 0;
+
+ fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
+ &rt->fib6_dst.addr,
+ sizeof(rt->fib6_dst.addr),
+ rt->fib6_dst.plen,
+ MLXSW_SP_L3_PROTO_IPV6);
+ if (IS_ERR(fib_node))
+ return PTR_ERR(fib_node);
+
+ /* Before creating a new entry, try to append route to an existing
+ * multipath entry.
+ */
+ fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
+ if (fib6_entry) {
+ err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
+ if (err)
+ goto err_fib6_entry_nexthop_add;
+ return 0;
+ }
+
+ fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
+ if (IS_ERR(fib6_entry)) {
+ err = PTR_ERR(fib6_entry);
+ goto err_fib6_entry_create;
+ }
+
+ err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
+ if (err)
+ goto err_fib6_node_entry_link;
+
+ mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
+
+ return 0;
+
+err_fib6_node_entry_link:
+ mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
+err_fib6_entry_create:
+err_fib6_entry_nexthop_add:
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+ return err;
+}
+
+static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
+ struct fib6_info *rt)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_fib_node *fib_node;
+
+ if (mlxsw_sp->router->aborted)
+ return;
+
+ if (mlxsw_sp_fib6_rt_should_ignore(rt))
+ return;
+
+ fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
+ if (WARN_ON(!fib6_entry))
+ return;
+
+ /* If route is part of a multipath entry, but not the last one
+ * removed, then only reduce its nexthop group.
+ */
+ if (!list_is_singular(&fib6_entry->rt6_list)) {
+ mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
+ return;
+ }
+
+ fib_node = fib6_entry->common.fib_node;
+
+ mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
+ mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+}
+
+static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_reg_ralxx_protocol proto,
+ u8 tree_id)
+{
+ char ralta_pl[MLXSW_REG_RALTA_LEN];
+ char ralst_pl[MLXSW_REG_RALST_LEN];
+ int i, err;
+
+ mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
+ if (err)
+ return err;
+
+ for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
+ struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
+ char raltb_pl[MLXSW_REG_RALTB_LEN];
+ char ralue_pl[MLXSW_REG_RALUE_LEN];
+
+ mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
+ raltb_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_ralue_pack(ralue_pl, proto,
+ MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
+ mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
+ ralue_pl);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static struct mlxsw_sp_mr_table *
+mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
+{
+ if (family == RTNL_FAMILY_IPMR)
+ return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
+ else
+ return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
+}
+
+static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
+ struct mfc_entry_notifier_info *men_info,
+ bool replace)
+{
+ struct mlxsw_sp_mr_table *mrt;
+ struct mlxsw_sp_vr *vr;
+
+ if (mlxsw_sp->router->aborted)
+ return 0;
+
+ vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
+ if (IS_ERR(vr))
+ return PTR_ERR(vr);
+
+ mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
+ return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
+}
+
+static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
+ struct mfc_entry_notifier_info *men_info)
+{
+ struct mlxsw_sp_mr_table *mrt;
+ struct mlxsw_sp_vr *vr;
+
+ if (mlxsw_sp->router->aborted)
+ return;
+
+ vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
+ if (WARN_ON(!vr))
+ return;
+
+ mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
+ mlxsw_sp_mr_route_del(mrt, men_info->mfc);
+ mlxsw_sp_vr_put(mlxsw_sp, vr);
+}
+
+static int
+mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
+ struct vif_entry_notifier_info *ven_info)
+{
+ struct mlxsw_sp_mr_table *mrt;
+ struct mlxsw_sp_rif *rif;
+ struct mlxsw_sp_vr *vr;
+
+ if (mlxsw_sp->router->aborted)
+ return 0;
+
+ vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
+ if (IS_ERR(vr))
+ return PTR_ERR(vr);
+
+ mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
+ return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
+ ven_info->vif_index,
+ ven_info->vif_flags, rif);
+}
+
+static void
+mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
+ struct vif_entry_notifier_info *ven_info)
+{
+ struct mlxsw_sp_mr_table *mrt;
+ struct mlxsw_sp_vr *vr;
+
+ if (mlxsw_sp->router->aborted)
+ return;
+
+ vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
+ if (WARN_ON(!vr))
+ return;
+
+ mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
+ mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
+ mlxsw_sp_vr_put(mlxsw_sp, vr);
+}
+
+static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
+{
+ enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
+ int err;
+
+ err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
+ MLXSW_SP_LPM_TREE_MIN);
+ if (err)
+ return err;
+
+ /* The multicast router code does not need an abort trap as by default,
+ * packets that don't match any routes are trapped to the CPU.
+ */
+
+ proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
+ return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
+ MLXSW_SP_LPM_TREE_MIN + 1);
+}
+
+static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_node *fib_node)
+{
+ struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
+
+ list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
+ common.list) {
+ bool do_break = &tmp->common.list == &fib_node->entry_list;
+
+ mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
+ mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+ /* Break when entry list is empty and node was freed.
+ * Otherwise, we'll access freed memory in the next
+ * iteration.
+ */
+ if (do_break)
+ break;
+ }
+}
+
+static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_node *fib_node)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
+
+ list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
+ common.list) {
+ bool do_break = &tmp->common.list == &fib_node->entry_list;
+
+ mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
+ mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+ if (do_break)
+ break;
+ }
+}
+
+static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_node *fib_node)
+{
+ switch (fib_node->fib->proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
+ break;
+ }
+}
+
+static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_vr *vr,
+ enum mlxsw_sp_l3proto proto)
+{
+ struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
+ struct mlxsw_sp_fib_node *fib_node, *tmp;
+
+ list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
+ bool do_break = &tmp->list == &fib->node_list;
+
+ mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
+ if (do_break)
+ break;
+ }
+}
+
+static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
+{
+ int i, j;
+
+ for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
+ struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
+
+ if (!mlxsw_sp_vr_is_used(vr))
+ continue;
+
+ for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
+ mlxsw_sp_mr_table_flush(vr->mr_table[j]);
+ mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
+
+ /* If virtual router was only used for IPv4, then it's no
+ * longer used.
+ */
+ if (!mlxsw_sp_vr_is_used(vr))
+ continue;
+ mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
+ }
+}
+
+static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
+{
+ int err;
+
+ if (mlxsw_sp->router->aborted)
+ return;
+ dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
+ mlxsw_sp_router_fib_flush(mlxsw_sp);
+ mlxsw_sp->router->aborted = true;
+ err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
+ if (err)
+ dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
+}
+
+struct mlxsw_sp_fib_event_work {
+ struct work_struct work;
+ union {
+ struct fib6_entry_notifier_info fen6_info;
+ struct fib_entry_notifier_info fen_info;
+ struct fib_rule_notifier_info fr_info;
+ struct fib_nh_notifier_info fnh_info;
+ struct mfc_entry_notifier_info men_info;
+ struct vif_entry_notifier_info ven_info;
+ };
+ struct mlxsw_sp *mlxsw_sp;
+ unsigned long event;
+};
+
+static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
+{
+ struct mlxsw_sp_fib_event_work *fib_work =
+ container_of(work, struct mlxsw_sp_fib_event_work, work);
+ struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
+ bool replace, append;
+ int err;
+
+ /* Protect internal structures from changes */
+ rtnl_lock();
+ mlxsw_sp_span_respin(mlxsw_sp);
+
+ switch (fib_work->event) {
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_APPEND: /* fall through */
+ case FIB_EVENT_ENTRY_ADD:
+ replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
+ append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
+ err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
+ replace, append);
+ if (err)
+ mlxsw_sp_router_fib_abort(mlxsw_sp);
+ fib_info_put(fib_work->fen_info.fi);
+ break;
+ case FIB_EVENT_ENTRY_DEL:
+ mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
+ fib_info_put(fib_work->fen_info.fi);
+ break;
+ case FIB_EVENT_RULE_ADD:
+ /* if we get here, a rule was added that we do not support.
+ * just do the fib_abort
+ */
+ mlxsw_sp_router_fib_abort(mlxsw_sp);
+ break;
+ case FIB_EVENT_NH_ADD: /* fall through */
+ case FIB_EVENT_NH_DEL:
+ mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
+ fib_work->fnh_info.fib_nh);
+ fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
+ break;
+ }
+ rtnl_unlock();
+ kfree(fib_work);
+}
+
+static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
+{
+ struct mlxsw_sp_fib_event_work *fib_work =
+ container_of(work, struct mlxsw_sp_fib_event_work, work);
+ struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
+ bool replace;
+ int err;
+
+ rtnl_lock();
+ mlxsw_sp_span_respin(mlxsw_sp);
+
+ switch (fib_work->event) {
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_APPEND: /* fall through */
+ case FIB_EVENT_ENTRY_ADD:
+ replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
+ err = mlxsw_sp_router_fib6_add(mlxsw_sp,
+ fib_work->fen6_info.rt, replace);
+ if (err)
+ mlxsw_sp_router_fib_abort(mlxsw_sp);
+ mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
+ break;
+ case FIB_EVENT_ENTRY_DEL:
+ mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
+ mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
+ break;
+ case FIB_EVENT_RULE_ADD:
+ /* if we get here, a rule was added that we do not support.
+ * just do the fib_abort
+ */
+ mlxsw_sp_router_fib_abort(mlxsw_sp);
+ break;
+ }
+ rtnl_unlock();
+ kfree(fib_work);
+}
+
+static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
+{
+ struct mlxsw_sp_fib_event_work *fib_work =
+ container_of(work, struct mlxsw_sp_fib_event_work, work);
+ struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
+ bool replace;
+ int err;
+
+ rtnl_lock();
+ switch (fib_work->event) {
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_ADD:
+ replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
+
+ err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
+ replace);
+ if (err)
+ mlxsw_sp_router_fib_abort(mlxsw_sp);
+ mr_cache_put(fib_work->men_info.mfc);
+ break;
+ case FIB_EVENT_ENTRY_DEL:
+ mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
+ mr_cache_put(fib_work->men_info.mfc);
+ break;
+ case FIB_EVENT_VIF_ADD:
+ err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
+ &fib_work->ven_info);
+ if (err)
+ mlxsw_sp_router_fib_abort(mlxsw_sp);
+ dev_put(fib_work->ven_info.dev);
+ break;
+ case FIB_EVENT_VIF_DEL:
+ mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
+ &fib_work->ven_info);
+ dev_put(fib_work->ven_info.dev);
+ break;
+ case FIB_EVENT_RULE_ADD:
+ /* if we get here, a rule was added that we do not support.
+ * just do the fib_abort
+ */
+ mlxsw_sp_router_fib_abort(mlxsw_sp);
+ break;
+ }
+ rtnl_unlock();
+ kfree(fib_work);
+}
+
+static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
+ struct fib_notifier_info *info)
+{
+ struct fib_entry_notifier_info *fen_info;
+ struct fib_nh_notifier_info *fnh_info;
+
+ switch (fib_work->event) {
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_APPEND: /* fall through */
+ case FIB_EVENT_ENTRY_ADD: /* fall through */
+ case FIB_EVENT_ENTRY_DEL:
+ fen_info = container_of(info, struct fib_entry_notifier_info,
+ info);
+ fib_work->fen_info = *fen_info;
+ /* Take reference on fib_info to prevent it from being
+ * freed while work is queued. Release it afterwards.
+ */
+ fib_info_hold(fib_work->fen_info.fi);
+ break;
+ case FIB_EVENT_NH_ADD: /* fall through */
+ case FIB_EVENT_NH_DEL:
+ fnh_info = container_of(info, struct fib_nh_notifier_info,
+ info);
+ fib_work->fnh_info = *fnh_info;
+ fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
+ break;
+ }
+}
+
+static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
+ struct fib_notifier_info *info)
+{
+ struct fib6_entry_notifier_info *fen6_info;
+
+ switch (fib_work->event) {
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_APPEND: /* fall through */
+ case FIB_EVENT_ENTRY_ADD: /* fall through */
+ case FIB_EVENT_ENTRY_DEL:
+ fen6_info = container_of(info, struct fib6_entry_notifier_info,
+ info);
+ fib_work->fen6_info = *fen6_info;
+ fib6_info_hold(fib_work->fen6_info.rt);
+ break;
+ }
+}
+
+static void
+mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
+ struct fib_notifier_info *info)
+{
+ switch (fib_work->event) {
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_ADD: /* fall through */
+ case FIB_EVENT_ENTRY_DEL:
+ memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
+ mr_cache_hold(fib_work->men_info.mfc);
+ break;
+ case FIB_EVENT_VIF_ADD: /* fall through */
+ case FIB_EVENT_VIF_DEL:
+ memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
+ dev_hold(fib_work->ven_info.dev);
+ break;
+ }
+}
+
+static int mlxsw_sp_router_fib_rule_event(unsigned long event,
+ struct fib_notifier_info *info,
+ struct mlxsw_sp *mlxsw_sp)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct fib_rule_notifier_info *fr_info;
+ struct fib_rule *rule;
+ int err = 0;
+
+ /* nothing to do at the moment */
+ if (event == FIB_EVENT_RULE_DEL)
+ return 0;
+
+ if (mlxsw_sp->router->aborted)
+ return 0;
+
+ fr_info = container_of(info, struct fib_rule_notifier_info, info);
+ rule = fr_info->rule;
+
+ switch (info->family) {
+ case AF_INET:
+ if (!fib4_rule_default(rule) && !rule->l3mdev)
+ err = -EOPNOTSUPP;
+ break;
+ case AF_INET6:
+ if (!fib6_rule_default(rule) && !rule->l3mdev)
+ err = -EOPNOTSUPP;
+ break;
+ case RTNL_FAMILY_IPMR:
+ if (!ipmr_rule_default(rule) && !rule->l3mdev)
+ err = -EOPNOTSUPP;
+ break;
+ case RTNL_FAMILY_IP6MR:
+ if (!ip6mr_rule_default(rule) && !rule->l3mdev)
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ if (err < 0)
+ NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
+
+ return err;
+}
+
+/* Called with rcu_read_lock() */
+static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct mlxsw_sp_fib_event_work *fib_work;
+ struct fib_notifier_info *info = ptr;
+ struct mlxsw_sp_router *router;
+ int err;
+
+ if (!net_eq(info->net, &init_net) ||
+ (info->family != AF_INET && info->family != AF_INET6 &&
+ info->family != RTNL_FAMILY_IPMR &&
+ info->family != RTNL_FAMILY_IP6MR))
+ return NOTIFY_DONE;
+
+ router = container_of(nb, struct mlxsw_sp_router, fib_nb);
+
+ switch (event) {
+ case FIB_EVENT_RULE_ADD: /* fall through */
+ case FIB_EVENT_RULE_DEL:
+ err = mlxsw_sp_router_fib_rule_event(event, info,
+ router->mlxsw_sp);
+ if (!err || info->extack)
+ return notifier_from_errno(err);
+ break;
+ case FIB_EVENT_ENTRY_ADD:
+ if (router->aborted) {
+ NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
+ return notifier_from_errno(-EINVAL);
+ }
+ break;
+ }
+
+ fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
+ if (!fib_work)
+ return NOTIFY_BAD;
+
+ fib_work->mlxsw_sp = router->mlxsw_sp;
+ fib_work->event = event;
+
+ switch (info->family) {
+ case AF_INET:
+ INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
+ mlxsw_sp_router_fib4_event(fib_work, info);
+ break;
+ case AF_INET6:
+ INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
+ mlxsw_sp_router_fib6_event(fib_work, info);
+ break;
+ case RTNL_FAMILY_IP6MR:
+ case RTNL_FAMILY_IPMR:
+ INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
+ mlxsw_sp_router_fibmr_event(fib_work, info);
+ break;
+ }
+
+ mlxsw_core_schedule_work(&fib_work->work);
+
+ return NOTIFY_DONE;
+}
+
+struct mlxsw_sp_rif *
+mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *dev)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
+ if (mlxsw_sp->router->rifs[i] &&
+ mlxsw_sp->router->rifs[i]->dev == dev)
+ return mlxsw_sp->router->rifs[i];
+
+ return NULL;
+}
+
+static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
+{
+ char ritr_pl[MLXSW_REG_RITR_LEN];
+ int err;
+
+ mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+ if (WARN_ON_ONCE(err))
+ return err;
+
+ mlxsw_reg_ritr_enable_set(ritr_pl, false);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_rif *rif)
+{
+ mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
+ mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
+ mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
+}
+
+static bool
+mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
+ unsigned long event)
+{
+ struct inet6_dev *inet6_dev;
+ bool addr_list_empty = true;
+ struct in_device *idev;
+
+ switch (event) {
+ case NETDEV_UP:
+ return rif == NULL;
+ case NETDEV_DOWN:
+ idev = __in_dev_get_rtnl(dev);
+ if (idev && idev->ifa_list)
+ addr_list_empty = false;
+
+ inet6_dev = __in6_dev_get(dev);
+ if (addr_list_empty && inet6_dev &&
+ !list_empty(&inet6_dev->addr_list))
+ addr_list_empty = false;
+
+ /* macvlans do not have a RIF, but rather piggy back on the
+ * RIF of their lower device.
+ */
+ if (netif_is_macvlan(dev) && addr_list_empty)
+ return true;
+
+ if (rif && addr_list_empty &&
+ !netif_is_l3_slave(rif->dev))
+ return true;
+ /* It is possible we already removed the RIF ourselves
+ * if it was assigned to a netdev that is now a bridge
+ * or LAG slave.
+ */
+ return false;
+ }
+
+ return false;
+}
+
+static enum mlxsw_sp_rif_type
+mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *dev)
+{
+ enum mlxsw_sp_fid_type type;
+
+ if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
+ return MLXSW_SP_RIF_TYPE_IPIP_LB;
+
+ /* Otherwise RIF type is derived from the type of the underlying FID. */
+ if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
+ type = MLXSW_SP_FID_TYPE_8021Q;
+ else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
+ type = MLXSW_SP_FID_TYPE_8021Q;
+ else if (netif_is_bridge_master(dev))
+ type = MLXSW_SP_FID_TYPE_8021D;
+ else
+ type = MLXSW_SP_FID_TYPE_RFID;
+
+ return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
+}
+
+static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
+ if (!mlxsw_sp->router->rifs[i]) {
+ *p_rif_index = i;
+ return 0;
+ }
+ }
+
+ return -ENOBUFS;
+}
+
+static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
+ u16 vr_id,
+ struct net_device *l3_dev)
+{
+ struct mlxsw_sp_rif *rif;
+
+ rif = kzalloc(rif_size, GFP_KERNEL);
+ if (!rif)
+ return NULL;
+
+ INIT_LIST_HEAD(&rif->nexthop_list);
+ INIT_LIST_HEAD(&rif->neigh_list);
+ ether_addr_copy(rif->addr, l3_dev->dev_addr);
+ rif->mtu = l3_dev->mtu;
+ rif->vr_id = vr_id;
+ rif->dev = l3_dev;
+ rif->rif_index = rif_index;
+
+ return rif;
+}
+
+struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
+ u16 rif_index)
+{
+ return mlxsw_sp->router->rifs[rif_index];
+}
+
+u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
+{
+ return rif->rif_index;
+}
+
+u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
+{
+ return lb_rif->common.rif_index;
+}
+
+u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
+{
+ return lb_rif->ul_vr_id;
+}
+
+int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
+{
+ return rif->dev->ifindex;
+}
+
+const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
+{
+ return rif->dev;
+}
+
+struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
+{
+ return rif->fid;
+}
+
+static struct mlxsw_sp_rif *
+mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_rif_params *params,
+ struct netlink_ext_ack *extack)
+{
+ u32 tb_id = l3mdev_fib_table(params->dev);
+ const struct mlxsw_sp_rif_ops *ops;
+ struct mlxsw_sp_fid *fid = NULL;
+ enum mlxsw_sp_rif_type type;
+ struct mlxsw_sp_rif *rif;
+ struct mlxsw_sp_vr *vr;
+ u16 rif_index;
+ int i, err;
+
+ type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
+ ops = mlxsw_sp->router->rif_ops_arr[type];
+
+ vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
+ if (IS_ERR(vr))
+ return ERR_CAST(vr);
+ vr->rif_count++;
+
+ err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
+ goto err_rif_index_alloc;
+ }
+
+ rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
+ if (!rif) {
+ err = -ENOMEM;
+ goto err_rif_alloc;
+ }
+ rif->mlxsw_sp = mlxsw_sp;
+ rif->ops = ops;
+
+ if (ops->fid_get) {
+ fid = ops->fid_get(rif, extack);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ goto err_fid_get;
+ }
+ rif->fid = fid;
+ }
+
+ if (ops->setup)
+ ops->setup(rif, params);
+
+ err = ops->configure(rif);
+ if (err)
+ goto err_configure;
+
+ for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
+ err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
+ if (err)
+ goto err_mr_rif_add;
+ }
+
+ mlxsw_sp_rif_counters_alloc(rif);
+ mlxsw_sp->router->rifs[rif_index] = rif;
+
+ return rif;
+
+err_mr_rif_add:
+ for (i--; i >= 0; i--)
+ mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
+ ops->deconfigure(rif);
+err_configure:
+ if (fid)
+ mlxsw_sp_fid_put(fid);
+err_fid_get:
+ kfree(rif);
+err_rif_alloc:
+err_rif_index_alloc:
+ vr->rif_count--;
+ mlxsw_sp_vr_put(mlxsw_sp, vr);
+ return ERR_PTR(err);
+}
+
+void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
+{
+ const struct mlxsw_sp_rif_ops *ops = rif->ops;
+ struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+ struct mlxsw_sp_fid *fid = rif->fid;
+ struct mlxsw_sp_vr *vr;
+ int i;
+
+ mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
+ vr = &mlxsw_sp->router->vrs[rif->vr_id];
+
+ mlxsw_sp->router->rifs[rif->rif_index] = NULL;
+ mlxsw_sp_rif_counters_free(rif);
+ for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
+ mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
+ ops->deconfigure(rif);
+ if (fid)
+ /* Loopback RIFs are not associated with a FID. */
+ mlxsw_sp_fid_put(fid);
+ kfree(rif);
+ vr->rif_count--;
+ mlxsw_sp_vr_put(mlxsw_sp, vr);
+}
+
+void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *dev)
+{
+ struct mlxsw_sp_rif *rif;
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+ if (!rif)
+ return;
+ mlxsw_sp_rif_destroy(rif);
+}
+
+static void
+mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
+
+ params->vid = mlxsw_sp_port_vlan->vid;
+ params->lag = mlxsw_sp_port->lagged;
+ if (params->lag)
+ params->lag_id = mlxsw_sp_port->lag_id;
+ else
+ params->system_port = mlxsw_sp_port->local_port;
+}
+
+static int
+mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
+ struct net_device *l3_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u16 vid = mlxsw_sp_port_vlan->vid;
+ struct mlxsw_sp_rif *rif;
+ struct mlxsw_sp_fid *fid;
+ int err;
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
+ if (!rif) {
+ struct mlxsw_sp_rif_params params = {
+ .dev = l3_dev,
+ };
+
+ mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
+ rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
+ if (IS_ERR(rif))
+ return PTR_ERR(rif);
+ }
+
+ /* FID was already created, just take a reference */
+ fid = rif->ops->fid_get(rif, extack);
+ err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
+ if (err)
+ goto err_fid_port_vid_map;
+
+ err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
+ if (err)
+ goto err_port_vid_learning_set;
+
+ err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
+ BR_STATE_FORWARDING);
+ if (err)
+ goto err_port_vid_stp_set;
+
+ mlxsw_sp_port_vlan->fid = fid;
+
+ return 0;
+
+err_port_vid_stp_set:
+ mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
+err_port_vid_learning_set:
+ mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
+err_fid_port_vid_map:
+ mlxsw_sp_fid_put(fid);
+ return err;
+}
+
+void
+mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
+ struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
+ u16 vid = mlxsw_sp_port_vlan->vid;
+
+ if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
+ return;
+
+ mlxsw_sp_port_vlan->fid = NULL;
+ mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
+ mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
+ mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
+ /* If router port holds the last reference on the rFID, then the
+ * associated Sub-port RIF will be destroyed.
+ */
+ mlxsw_sp_fid_put(fid);
+}
+
+static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
+ struct net_device *port_dev,
+ unsigned long event, u16 vid,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+
+ mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
+ if (WARN_ON(!mlxsw_sp_port_vlan))
+ return -EINVAL;
+
+ switch (event) {
+ case NETDEV_UP:
+ return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
+ l3_dev, extack);
+ case NETDEV_DOWN:
+ mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
+ break;
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
+ unsigned long event,
+ struct netlink_ext_ack *extack)
+{
+ if (netif_is_bridge_port(port_dev) ||
+ netif_is_lag_port(port_dev) ||
+ netif_is_ovs_port(port_dev))
+ return 0;
+
+ return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
+ extack);
+}
+
+static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
+ struct net_device *lag_dev,
+ unsigned long event, u16 vid,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *port_dev;
+ struct list_head *iter;
+ int err;
+
+ netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
+ if (mlxsw_sp_port_dev_check(port_dev)) {
+ err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
+ port_dev,
+ event, vid,
+ extack);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
+ unsigned long event,
+ struct netlink_ext_ack *extack)
+{
+ if (netif_is_bridge_port(lag_dev))
+ return 0;
+
+ return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
+ extack);
+}
+
+static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
+ unsigned long event,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
+ struct mlxsw_sp_rif_params params = {
+ .dev = l3_dev,
+ };
+ struct mlxsw_sp_rif *rif;
+
+ switch (event) {
+ case NETDEV_UP:
+ rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
+ if (IS_ERR(rif))
+ return PTR_ERR(rif);
+ break;
+ case NETDEV_DOWN:
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
+ mlxsw_sp_rif_destroy(rif);
+ break;
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
+ unsigned long event,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
+ u16 vid = vlan_dev_vlan_id(vlan_dev);
+
+ if (netif_is_bridge_port(vlan_dev))
+ return 0;
+
+ if (mlxsw_sp_port_dev_check(real_dev))
+ return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
+ event, vid, extack);
+ else if (netif_is_lag_master(real_dev))
+ return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
+ vid, extack);
+ else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
+ return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
+
+ return 0;
+}
+
+static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
+{
+ u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
+ u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
+
+ return ether_addr_equal_masked(mac, vrrp4, mask);
+}
+
+static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
+{
+ u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
+ u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
+
+ return ether_addr_equal_masked(mac, vrrp6, mask);
+}
+
+static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
+ const u8 *mac, bool adding)
+{
+ char ritr_pl[MLXSW_REG_RITR_LEN];
+ u8 vrrp_id = adding ? mac[5] : 0;
+ int err;
+
+ if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
+ !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
+ return 0;
+
+ mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+ if (err)
+ return err;
+
+ if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
+ mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
+ else
+ mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *macvlan_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
+ struct mlxsw_sp_rif *rif;
+ int err;
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
+ if (!rif) {
+ NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
+ return -EOPNOTSUPP;
+ }
+
+ err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
+ mlxsw_sp_fid_index(rif->fid), true);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
+ macvlan_dev->dev_addr, true);
+ if (err)
+ goto err_rif_vrrp_add;
+
+ /* Make sure the bridge driver does not have this MAC pointing at
+ * some other port.
+ */
+ if (rif->ops->fdb_del)
+ rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
+
+ return 0;
+
+err_rif_vrrp_add:
+ mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
+ mlxsw_sp_fid_index(rif->fid), false);
+ return err;
+}
+
+void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *macvlan_dev)
+{
+ struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
+ struct mlxsw_sp_rif *rif;
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
+ /* If we do not have a RIF, then we already took care of
+ * removing the macvlan's MAC during RIF deletion.
+ */
+ if (!rif)
+ return;
+ mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
+ false);
+ mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
+ mlxsw_sp_fid_index(rif->fid), false);
+}
+
+static int mlxsw_sp_inetaddr_macvlan_event(struct net_device *macvlan_dev,
+ unsigned long event,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp *mlxsw_sp;
+
+ mlxsw_sp = mlxsw_sp_lower_get(macvlan_dev);
+ if (!mlxsw_sp)
+ return 0;
+
+ switch (event) {
+ case NETDEV_UP:
+ return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
+ case NETDEV_DOWN:
+ mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
+ break;
+ }
+
+ return 0;
+}
+
+static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
+ unsigned long event,
+ struct netlink_ext_ack *extack)
+{
+ if (mlxsw_sp_port_dev_check(dev))
+ return mlxsw_sp_inetaddr_port_event(dev, event, extack);
+ else if (netif_is_lag_master(dev))
+ return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
+ else if (netif_is_bridge_master(dev))
+ return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
+ else if (is_vlan_dev(dev))
+ return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
+ else if (netif_is_macvlan(dev))
+ return mlxsw_sp_inetaddr_macvlan_event(dev, event, extack);
+ else
+ return 0;
+}
+
+int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
+ struct net_device *dev = ifa->ifa_dev->dev;
+ struct mlxsw_sp *mlxsw_sp;
+ struct mlxsw_sp_rif *rif;
+ int err = 0;
+
+ /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
+ if (event == NETDEV_UP)
+ goto out;
+
+ mlxsw_sp = mlxsw_sp_lower_get(dev);
+ if (!mlxsw_sp)
+ goto out;
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+ if (!mlxsw_sp_rif_should_config(rif, dev, event))
+ goto out;
+
+ err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
+out:
+ return notifier_from_errno(err);
+}
+
+int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct in_validator_info *ivi = (struct in_validator_info *) ptr;
+ struct net_device *dev = ivi->ivi_dev->dev;
+ struct mlxsw_sp *mlxsw_sp;
+ struct mlxsw_sp_rif *rif;
+ int err = 0;
+
+ mlxsw_sp = mlxsw_sp_lower_get(dev);
+ if (!mlxsw_sp)
+ goto out;
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+ if (!mlxsw_sp_rif_should_config(rif, dev, event))
+ goto out;
+
+ err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
+out:
+ return notifier_from_errno(err);
+}
+
+struct mlxsw_sp_inet6addr_event_work {
+ struct work_struct work;
+ struct net_device *dev;
+ unsigned long event;
+};
+
+static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
+{
+ struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
+ container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
+ struct net_device *dev = inet6addr_work->dev;
+ unsigned long event = inet6addr_work->event;
+ struct mlxsw_sp *mlxsw_sp;
+ struct mlxsw_sp_rif *rif;
+
+ rtnl_lock();
+ mlxsw_sp = mlxsw_sp_lower_get(dev);
+ if (!mlxsw_sp)
+ goto out;
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+ if (!mlxsw_sp_rif_should_config(rif, dev, event))
+ goto out;
+
+ __mlxsw_sp_inetaddr_event(dev, event, NULL);
+out:
+ rtnl_unlock();
+ dev_put(dev);
+ kfree(inet6addr_work);
+}
+
+/* Called with rcu_read_lock() */
+int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
+ struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
+ struct net_device *dev = if6->idev->dev;
+
+ /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
+ if (event == NETDEV_UP)
+ return NOTIFY_DONE;
+
+ if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
+ return NOTIFY_DONE;
+
+ inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
+ if (!inet6addr_work)
+ return NOTIFY_BAD;
+
+ INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
+ inet6addr_work->dev = dev;
+ inet6addr_work->event = event;
+ dev_hold(dev);
+ mlxsw_core_schedule_work(&inet6addr_work->work);
+
+ return NOTIFY_DONE;
+}
+
+int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
+ struct net_device *dev = i6vi->i6vi_dev->dev;
+ struct mlxsw_sp *mlxsw_sp;
+ struct mlxsw_sp_rif *rif;
+ int err = 0;
+
+ mlxsw_sp = mlxsw_sp_lower_get(dev);
+ if (!mlxsw_sp)
+ goto out;
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+ if (!mlxsw_sp_rif_should_config(rif, dev, event))
+ goto out;
+
+ err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
+out:
+ return notifier_from_errno(err);
+}
+
+static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
+ const char *mac, int mtu)
+{
+ char ritr_pl[MLXSW_REG_RITR_LEN];
+ int err;
+
+ mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
+ mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
+ mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
+{
+ struct mlxsw_sp *mlxsw_sp;
+ struct mlxsw_sp_rif *rif;
+ u16 fid_index;
+ int err;
+
+ mlxsw_sp = mlxsw_sp_lower_get(dev);
+ if (!mlxsw_sp)
+ return 0;
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+ if (!rif)
+ return 0;
+ fid_index = mlxsw_sp_fid_index(rif->fid);
+
+ err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
+ dev->mtu);
+ if (err)
+ goto err_rif_edit;
+
+ err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
+ if (err)
+ goto err_rif_fdb_op;
+
+ if (rif->mtu != dev->mtu) {
+ struct mlxsw_sp_vr *vr;
+ int i;
+
+ /* The RIF is relevant only to its mr_table instance, as unlike
+ * unicast routing, in multicast routing a RIF cannot be shared
+ * between several multicast routing tables.
+ */
+ vr = &mlxsw_sp->router->vrs[rif->vr_id];
+ for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
+ mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
+ rif, dev->mtu);
+ }
+
+ ether_addr_copy(rif->addr, dev->dev_addr);
+ rif->mtu = dev->mtu;
+
+ netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
+
+ return 0;
+
+err_rif_fdb_op:
+ mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
+err_rif_edit:
+ mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
+ return err;
+}
+
+static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *l3_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_rif *rif;
+
+ /* If netdev is already associated with a RIF, then we need to
+ * destroy it and create a new one with the new virtual router ID.
+ */
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
+ if (rif)
+ __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
+
+ return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
+}
+
+static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *l3_dev)
+{
+ struct mlxsw_sp_rif *rif;
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
+ if (!rif)
+ return;
+ __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
+}
+
+int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
+ int err = 0;
+
+ /* We do not create a RIF for a macvlan, but only use it to
+ * direct more MAC addresses to the router.
+ */
+ if (!mlxsw_sp || netif_is_macvlan(l3_dev))
+ return 0;
+
+ switch (event) {
+ case NETDEV_PRECHANGEUPPER:
+ return 0;
+ case NETDEV_CHANGEUPPER:
+ if (info->linking) {
+ struct netlink_ext_ack *extack;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+ err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
+ } else {
+ mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
+ }
+ break;
+ }
+
+ return err;
+}
+
+static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
+{
+ struct mlxsw_sp_rif *rif = data;
+
+ if (!netif_is_macvlan(dev))
+ return 0;
+
+ return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
+ mlxsw_sp_fid_index(rif->fid), false);
+}
+
+static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
+{
+ if (!netif_is_macvlan_port(rif->dev))
+ return 0;
+
+ netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
+ return netdev_walk_all_upper_dev_rcu(rif->dev,
+ __mlxsw_sp_rif_macvlan_flush, rif);
+}
+
+static struct mlxsw_sp_rif_subport *
+mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
+{
+ return container_of(rif, struct mlxsw_sp_rif_subport, common);
+}
+
+static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
+ const struct mlxsw_sp_rif_params *params)
+{
+ struct mlxsw_sp_rif_subport *rif_subport;
+
+ rif_subport = mlxsw_sp_rif_subport_rif(rif);
+ rif_subport->vid = params->vid;
+ rif_subport->lag = params->lag;
+ if (params->lag)
+ rif_subport->lag_id = params->lag_id;
+ else
+ rif_subport->system_port = params->system_port;
+}
+
+static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
+{
+ struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+ struct mlxsw_sp_rif_subport *rif_subport;
+ char ritr_pl[MLXSW_REG_RITR_LEN];
+
+ rif_subport = mlxsw_sp_rif_subport_rif(rif);
+ mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
+ rif->rif_index, rif->vr_id, rif->dev->mtu);
+ mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
+ mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
+ rif_subport->lag ? rif_subport->lag_id :
+ rif_subport->system_port,
+ rif_subport->vid);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
+{
+ int err;
+
+ err = mlxsw_sp_rif_subport_op(rif, true);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
+ mlxsw_sp_fid_index(rif->fid), true);
+ if (err)
+ goto err_rif_fdb_op;
+
+ mlxsw_sp_fid_rif_set(rif->fid, rif);
+ return 0;
+
+err_rif_fdb_op:
+ mlxsw_sp_rif_subport_op(rif, false);
+ return err;
+}
+
+static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
+{
+ struct mlxsw_sp_fid *fid = rif->fid;
+
+ mlxsw_sp_fid_rif_set(fid, NULL);
+ mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
+ mlxsw_sp_fid_index(fid), false);
+ mlxsw_sp_rif_macvlan_flush(rif);
+ mlxsw_sp_rif_subport_op(rif, false);
+}
+
+static struct mlxsw_sp_fid *
+mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
+ struct netlink_ext_ack *extack)
+{
+ return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
+}
+
+static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
+ .type = MLXSW_SP_RIF_TYPE_SUBPORT,
+ .rif_size = sizeof(struct mlxsw_sp_rif_subport),
+ .setup = mlxsw_sp_rif_subport_setup,
+ .configure = mlxsw_sp_rif_subport_configure,
+ .deconfigure = mlxsw_sp_rif_subport_deconfigure,
+ .fid_get = mlxsw_sp_rif_subport_fid_get,
+};
+
+static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
+ enum mlxsw_reg_ritr_if_type type,
+ u16 vid_fid, bool enable)
+{
+ struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+ char ritr_pl[MLXSW_REG_RITR_LEN];
+
+ mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
+ rif->dev->mtu);
+ mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
+ mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
+{
+ return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
+}
+
+static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
+{
+ struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+ u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
+ int err;
+
+ err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+ mlxsw_sp_router_port(mlxsw_sp), true);
+ if (err)
+ goto err_fid_mc_flood_set;
+
+ err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
+ mlxsw_sp_router_port(mlxsw_sp), true);
+ if (err)
+ goto err_fid_bc_flood_set;
+
+ err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
+ mlxsw_sp_fid_index(rif->fid), true);
+ if (err)
+ goto err_rif_fdb_op;
+
+ mlxsw_sp_fid_rif_set(rif->fid, rif);
+ return 0;
+
+err_rif_fdb_op:
+ mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
+ mlxsw_sp_router_port(mlxsw_sp), false);
+err_fid_bc_flood_set:
+ mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+ mlxsw_sp_router_port(mlxsw_sp), false);
+err_fid_mc_flood_set:
+ mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
+ return err;
+}
+
+static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
+{
+ u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
+ struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+ struct mlxsw_sp_fid *fid = rif->fid;
+
+ mlxsw_sp_fid_rif_set(fid, NULL);
+ mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
+ mlxsw_sp_fid_index(fid), false);
+ mlxsw_sp_rif_macvlan_flush(rif);
+ mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
+ mlxsw_sp_router_port(mlxsw_sp), false);
+ mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+ mlxsw_sp_router_port(mlxsw_sp), false);
+ mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
+}
+
+static struct mlxsw_sp_fid *
+mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
+ struct netlink_ext_ack *extack)
+{
+ u16 vid;
+ int err;
+
+ if (is_vlan_dev(rif->dev)) {
+ vid = vlan_dev_vlan_id(rif->dev);
+ } else {
+ err = br_vlan_get_pvid(rif->dev, &vid);
+ if (err < 0 || !vid) {
+ NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
+}
+
+static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
+{
+ u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
+ struct switchdev_notifier_fdb_info info;
+ struct net_device *br_dev;
+ struct net_device *dev;
+
+ br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
+ dev = br_fdb_find_port(br_dev, mac, vid);
+ if (!dev)
+ return;
+
+ info.addr = mac;
+ info.vid = vid;
+ call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
+}
+
+static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
+ .type = MLXSW_SP_RIF_TYPE_VLAN,
+ .rif_size = sizeof(struct mlxsw_sp_rif),
+ .configure = mlxsw_sp_rif_vlan_configure,
+ .deconfigure = mlxsw_sp_rif_vlan_deconfigure,
+ .fid_get = mlxsw_sp_rif_vlan_fid_get,
+ .fdb_del = mlxsw_sp_rif_vlan_fdb_del,
+};
+
+static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
+{
+ struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+ u16 fid_index = mlxsw_sp_fid_index(rif->fid);
+ int err;
+
+ err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
+ true);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+ mlxsw_sp_router_port(mlxsw_sp), true);
+ if (err)
+ goto err_fid_mc_flood_set;
+
+ err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
+ mlxsw_sp_router_port(mlxsw_sp), true);
+ if (err)
+ goto err_fid_bc_flood_set;
+
+ err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
+ mlxsw_sp_fid_index(rif->fid), true);
+ if (err)
+ goto err_rif_fdb_op;
+
+ mlxsw_sp_fid_rif_set(rif->fid, rif);
+ return 0;
+
+err_rif_fdb_op:
+ mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
+ mlxsw_sp_router_port(mlxsw_sp), false);
+err_fid_bc_flood_set:
+ mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+ mlxsw_sp_router_port(mlxsw_sp), false);
+err_fid_mc_flood_set:
+ mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
+ return err;
+}
+
+static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
+{
+ u16 fid_index = mlxsw_sp_fid_index(rif->fid);
+ struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+ struct mlxsw_sp_fid *fid = rif->fid;
+
+ mlxsw_sp_fid_rif_set(fid, NULL);
+ mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
+ mlxsw_sp_fid_index(fid), false);
+ mlxsw_sp_rif_macvlan_flush(rif);
+ mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
+ mlxsw_sp_router_port(mlxsw_sp), false);
+ mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+ mlxsw_sp_router_port(mlxsw_sp), false);
+ mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
+}
+
+static struct mlxsw_sp_fid *
+mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
+ struct netlink_ext_ack *extack)
+{
+ return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
+}
+
+static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
+{
+ struct switchdev_notifier_fdb_info info;
+ struct net_device *dev;
+
+ dev = br_fdb_find_port(rif->dev, mac, 0);
+ if (!dev)
+ return;
+
+ info.addr = mac;
+ info.vid = 0;
+ call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
+}
+
+static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
+ .type = MLXSW_SP_RIF_TYPE_FID,
+ .rif_size = sizeof(struct mlxsw_sp_rif),
+ .configure = mlxsw_sp_rif_fid_configure,
+ .deconfigure = mlxsw_sp_rif_fid_deconfigure,
+ .fid_get = mlxsw_sp_rif_fid_fid_get,
+ .fdb_del = mlxsw_sp_rif_fid_fdb_del,
+};
+
+static struct mlxsw_sp_rif_ipip_lb *
+mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
+{
+ return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
+}
+
+static void
+mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
+ const struct mlxsw_sp_rif_params *params)
+{
+ struct mlxsw_sp_rif_params_ipip_lb *params_lb;
+ struct mlxsw_sp_rif_ipip_lb *rif_lb;
+
+ params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
+ common);
+ rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
+ rif_lb->lb_config = params_lb->lb_config;
+}
+
+static int
+mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
+{
+ struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
+ u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
+ struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+ struct mlxsw_sp_vr *ul_vr;
+ int err;
+
+ ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
+ if (IS_ERR(ul_vr))
+ return PTR_ERR(ul_vr);
+
+ err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
+ if (err)
+ goto err_loopback_op;
+
+ lb_rif->ul_vr_id = ul_vr->id;
+ ++ul_vr->rif_count;
+ return 0;
+
+err_loopback_op:
+ mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
+ return err;
+}
+
+static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
+{
+ struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
+ struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+ struct mlxsw_sp_vr *ul_vr;
+
+ ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
+ mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
+
+ --ul_vr->rif_count;
+ mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
+}
+
+static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
+ .type = MLXSW_SP_RIF_TYPE_IPIP_LB,
+ .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb),
+ .setup = mlxsw_sp_rif_ipip_lb_setup,
+ .configure = mlxsw_sp_rif_ipip_lb_configure,
+ .deconfigure = mlxsw_sp_rif_ipip_lb_deconfigure,
+};
+
+static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
+ [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
+ [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_ops,
+ [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
+ [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp_rif_ipip_lb_ops,
+};
+
+static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
+{
+ u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
+
+ mlxsw_sp->router->rifs = kcalloc(max_rifs,
+ sizeof(struct mlxsw_sp_rif *),
+ GFP_KERNEL);
+ if (!mlxsw_sp->router->rifs)
+ return -ENOMEM;
+
+ mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
+
+ return 0;
+}
+
+static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
+ WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
+
+ kfree(mlxsw_sp->router->rifs);
+}
+
+static int
+mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
+{
+ char tigcr_pl[MLXSW_REG_TIGCR_LEN];
+
+ mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
+}
+
+static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
+{
+ mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
+ INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
+ return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
+}
+
+static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
+}
+
+static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
+{
+ struct mlxsw_sp_router *router;
+
+ /* Flush pending FIB notifications and then flush the device's
+ * table before requesting another dump. The FIB notification
+ * block is unregistered, so no need to take RTNL.
+ */
+ mlxsw_core_flush_owq();
+ router = container_of(nb, struct mlxsw_sp_router, fib_nb);
+ mlxsw_sp_router_fib_flush(router->mlxsw_sp);
+}
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
+{
+ mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
+}
+
+static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
+{
+ mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
+}
+
+static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
+{
+ bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
+
+ mlxsw_sp_mp_hash_header_set(recr2_pl,
+ MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
+ mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
+ mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
+ mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
+ if (only_l3)
+ return;
+ mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
+ mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
+ mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
+ mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
+}
+
+static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
+{
+ bool only_l3 = !ip6_multipath_hash_policy(&init_net);
+
+ mlxsw_sp_mp_hash_header_set(recr2_pl,
+ MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
+ mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
+ mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
+ mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
+ mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
+ if (only_l3) {
+ mlxsw_sp_mp_hash_field_set(recr2_pl,
+ MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
+ } else {
+ mlxsw_sp_mp_hash_header_set(recr2_pl,
+ MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
+ mlxsw_sp_mp_hash_field_set(recr2_pl,
+ MLXSW_REG_RECR2_TCP_UDP_SPORT);
+ mlxsw_sp_mp_hash_field_set(recr2_pl,
+ MLXSW_REG_RECR2_TCP_UDP_DPORT);
+ }
+}
+
+static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
+{
+ char recr2_pl[MLXSW_REG_RECR2_LEN];
+ u32 seed;
+
+ get_random_bytes(&seed, sizeof(seed));
+ mlxsw_reg_recr2_pack(recr2_pl, seed);
+ mlxsw_sp_mp4_hash_init(recr2_pl);
+ mlxsw_sp_mp6_hash_init(recr2_pl);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
+}
+#else
+static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
+{
+ return 0;
+}
+#endif
+
+static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
+{
+ char rdpm_pl[MLXSW_REG_RDPM_LEN];
+ unsigned int i;
+
+ MLXSW_REG_ZERO(rdpm, rdpm_pl);
+
+ /* HW is determining switch priority based on DSCP-bits, but the
+ * kernel is still doing that based on the ToS. Since there's a
+ * mismatch in bits we need to make sure to translate the right
+ * value ToS would observe, skipping the 2 least-significant ECN bits.
+ */
+ for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
+ mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
+}
+
+static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
+{
+ bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority;
+ char rgcr_pl[MLXSW_REG_RGCR_LEN];
+ u64 max_rifs;
+ int err;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
+ return -EIO;
+ max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
+
+ mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
+ mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
+ mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
+ if (err)
+ return err;
+ return 0;
+}
+
+static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ char rgcr_pl[MLXSW_REG_RGCR_LEN];
+
+ mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
+}
+
+int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_router *router;
+ int err;
+
+ router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
+ if (!router)
+ return -ENOMEM;
+ mlxsw_sp->router = router;
+ router->mlxsw_sp = mlxsw_sp;
+
+ INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
+ err = __mlxsw_sp_router_init(mlxsw_sp);
+ if (err)
+ goto err_router_init;
+
+ err = mlxsw_sp_rifs_init(mlxsw_sp);
+ if (err)
+ goto err_rifs_init;
+
+ err = mlxsw_sp_ipips_init(mlxsw_sp);
+ if (err)
+ goto err_ipips_init;
+
+ err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
+ &mlxsw_sp_nexthop_ht_params);
+ if (err)
+ goto err_nexthop_ht_init;
+
+ err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
+ &mlxsw_sp_nexthop_group_ht_params);
+ if (err)
+ goto err_nexthop_group_ht_init;
+
+ INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
+ err = mlxsw_sp_lpm_init(mlxsw_sp);
+ if (err)
+ goto err_lpm_init;
+
+ err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
+ if (err)
+ goto err_mr_init;
+
+ err = mlxsw_sp_vrs_init(mlxsw_sp);
+ if (err)
+ goto err_vrs_init;
+
+ err = mlxsw_sp_neigh_init(mlxsw_sp);
+ if (err)
+ goto err_neigh_init;
+
+ mlxsw_sp->router->netevent_nb.notifier_call =
+ mlxsw_sp_router_netevent_event;
+ err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
+ if (err)
+ goto err_register_netevent_notifier;
+
+ err = mlxsw_sp_mp_hash_init(mlxsw_sp);
+ if (err)
+ goto err_mp_hash_init;
+
+ err = mlxsw_sp_dscp_init(mlxsw_sp);
+ if (err)
+ goto err_dscp_init;
+
+ mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
+ err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
+ mlxsw_sp_router_fib_dump_flush);
+ if (err)
+ goto err_register_fib_notifier;
+
+ return 0;
+
+err_register_fib_notifier:
+err_dscp_init:
+err_mp_hash_init:
+ unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
+err_register_netevent_notifier:
+ mlxsw_sp_neigh_fini(mlxsw_sp);
+err_neigh_init:
+ mlxsw_sp_vrs_fini(mlxsw_sp);
+err_vrs_init:
+ mlxsw_sp_mr_fini(mlxsw_sp);
+err_mr_init:
+ mlxsw_sp_lpm_fini(mlxsw_sp);
+err_lpm_init:
+ rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
+err_nexthop_group_ht_init:
+ rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
+err_nexthop_ht_init:
+ mlxsw_sp_ipips_fini(mlxsw_sp);
+err_ipips_init:
+ mlxsw_sp_rifs_fini(mlxsw_sp);
+err_rifs_init:
+ __mlxsw_sp_router_fini(mlxsw_sp);
+err_router_init:
+ kfree(mlxsw_sp->router);
+ return err;
+}
+
+void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
+ unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
+ mlxsw_sp_neigh_fini(mlxsw_sp);
+ mlxsw_sp_vrs_fini(mlxsw_sp);
+ mlxsw_sp_mr_fini(mlxsw_sp);
+ mlxsw_sp_lpm_fini(mlxsw_sp);
+ rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
+ rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
+ mlxsw_sp_ipips_fini(mlxsw_sp);
+ mlxsw_sp_rifs_fini(mlxsw_sp);
+ __mlxsw_sp_router_fini(mlxsw_sp);
+ kfree(mlxsw_sp->router);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
new file mode 100644
index 000000000..1a60391da
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_ROUTER_H_
+#define _MLXSW_ROUTER_H_
+
+#include "spectrum.h"
+#include "reg.h"
+
+enum mlxsw_sp_l3proto {
+ MLXSW_SP_L3_PROTO_IPV4,
+ MLXSW_SP_L3_PROTO_IPV6,
+#define MLXSW_SP_L3_PROTO_MAX (MLXSW_SP_L3_PROTO_IPV6 + 1)
+};
+
+union mlxsw_sp_l3addr {
+ __be32 addr4;
+ struct in6_addr addr6;
+};
+
+struct mlxsw_sp_rif_ipip_lb;
+struct mlxsw_sp_rif_ipip_lb_config {
+ enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
+ u32 okey;
+ enum mlxsw_sp_l3proto ul_protocol; /* Underlay. */
+ union mlxsw_sp_l3addr saddr;
+};
+
+enum mlxsw_sp_rif_counter_dir {
+ MLXSW_SP_RIF_COUNTER_INGRESS,
+ MLXSW_SP_RIF_COUNTER_EGRESS,
+};
+
+struct mlxsw_sp_neigh_entry;
+struct mlxsw_sp_nexthop;
+struct mlxsw_sp_ipip_entry;
+
+struct mlxsw_sp_rif *mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *dev);
+struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
+ u16 rif_index);
+u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
+u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif);
+u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif);
+u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev);
+int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
+u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp);
+const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif);
+struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif);
+int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_rif *rif,
+ enum mlxsw_sp_rif_counter_dir dir,
+ u64 *cnt);
+void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_rif *rif,
+ enum mlxsw_sp_rif_counter_dir dir);
+int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_rif *rif,
+ enum mlxsw_sp_rif_counter_dir dir);
+struct mlxsw_sp_neigh_entry *
+mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
+ struct mlxsw_sp_neigh_entry *neigh_entry);
+int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry);
+unsigned char *
+mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry);
+u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry);
+struct in6_addr *
+mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry);
+
+#define mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) \
+ for (neigh_entry = mlxsw_sp_rif_neigh_next(rif, NULL); neigh_entry; \
+ neigh_entry = mlxsw_sp_rif_neigh_next(rif, neigh_entry))
+int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry,
+ u64 *p_counter);
+void
+mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_neigh_entry *neigh_entry,
+ bool adding);
+bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry);
+int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry,
+ bool recreate_loopback,
+ bool keep_encap,
+ bool update_nexthops,
+ struct netlink_ext_ack *extack);
+void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry);
+bool
+mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_l3proto ul_proto,
+ union mlxsw_sp_l3addr saddr,
+ u32 ul_tb_id,
+ const struct mlxsw_sp_ipip_entry *except);
+struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
+ struct mlxsw_sp_nexthop *nh);
+bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh);
+unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh);
+int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
+ u32 *p_adj_size, u32 *p_adj_hash_index);
+struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh);
+bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh);
+#define mlxsw_sp_nexthop_for_each(nh, router) \
+ for (nh = mlxsw_sp_nexthop_next(router, NULL); nh; \
+ nh = mlxsw_sp_nexthop_next(router, nh))
+int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh, u64 *p_counter);
+int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+ struct mlxsw_sp_nexthop *nh);
+void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh);
+void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh);
+
+static inline bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1,
+ const union mlxsw_sp_l3addr *addr2)
+{
+ return !memcmp(addr1, addr2, sizeof(*addr1));
+}
+
+#endif /* _MLXSW_ROUTER_H_*/
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
new file mode 100644
index 000000000..d965fd275
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -0,0 +1,977 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/if_bridge.h>
+#include <linux/list.h>
+#include <net/arp.h>
+#include <net/gre.h>
+#include <net/lag.h>
+#include <net/ndisc.h>
+#include <net/ip6_tunnel.h>
+
+#include "spectrum.h"
+#include "spectrum_ipip.h"
+#include "spectrum_span.h"
+#include "spectrum_switchdev.h"
+
+int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
+{
+ int i;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN))
+ return -EIO;
+
+ mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ MAX_SPAN);
+ mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count,
+ sizeof(struct mlxsw_sp_span_entry),
+ GFP_KERNEL);
+ if (!mlxsw_sp->span.entries)
+ return -ENOMEM;
+
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+ INIT_LIST_HEAD(&curr->bound_ports_list);
+ curr->id = i;
+ }
+
+ return 0;
+}
+
+void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ int i;
+
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+ WARN_ON_ONCE(!list_empty(&curr->bound_ports_list));
+ }
+ kfree(mlxsw_sp->span.entries);
+}
+
+static int
+mlxsw_sp_span_entry_phys_parms(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ sparmsp->dest_port = netdev_priv(to_dev);
+ return 0;
+}
+
+static int
+mlxsw_sp_span_entry_phys_configure(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_port *dest_port = sparms.dest_port;
+ struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+ u8 local_port = dest_port->local_port;
+ char mpat_pl[MLXSW_REG_MPAT_LEN];
+ int pa_id = span_entry->id;
+
+ /* Create a new port analayzer entry for local_port. */
+ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+ MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure_common(struct mlxsw_sp_span_entry *span_entry,
+ enum mlxsw_reg_mpat_span_type span_type)
+{
+ struct mlxsw_sp_port *dest_port = span_entry->parms.dest_port;
+ struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+ u8 local_port = dest_port->local_port;
+ char mpat_pl[MLXSW_REG_MPAT_LEN];
+ int pa_id = span_entry->id;
+
+ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false, span_type);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_phys_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure_common(span_entry,
+ MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = {
+ .can_handle = mlxsw_sp_port_dev_check,
+ .parms = mlxsw_sp_span_entry_phys_parms,
+ .configure = mlxsw_sp_span_entry_phys_configure,
+ .deconfigure = mlxsw_sp_span_entry_phys_deconfigure,
+};
+
+static int mlxsw_sp_span_dmac(struct neigh_table *tbl,
+ const void *pkey,
+ struct net_device *dev,
+ unsigned char dmac[ETH_ALEN])
+{
+ struct neighbour *neigh = neigh_lookup(tbl, pkey, dev);
+ int err = 0;
+
+ if (!neigh) {
+ neigh = neigh_create(tbl, pkey, dev);
+ if (IS_ERR(neigh))
+ return PTR_ERR(neigh);
+ }
+
+ neigh_event_send(neigh, NULL);
+
+ read_lock_bh(&neigh->lock);
+ if ((neigh->nud_state & NUD_VALID) && !neigh->dead)
+ memcpy(dmac, neigh->ha, ETH_ALEN);
+ else
+ err = -ENOENT;
+ read_unlock_bh(&neigh->lock);
+
+ neigh_release(neigh);
+ return err;
+}
+
+static int
+mlxsw_sp_span_entry_unoffloadable(struct mlxsw_sp_span_parms *sparmsp)
+{
+ sparmsp->dest_port = NULL;
+ return 0;
+}
+
+static struct net_device *
+mlxsw_sp_span_entry_bridge_8021q(const struct net_device *br_dev,
+ unsigned char *dmac,
+ u16 *p_vid)
+{
+ struct bridge_vlan_info vinfo;
+ struct net_device *edev;
+ u16 vid = *p_vid;
+
+ if (!vid && WARN_ON(br_vlan_get_pvid(br_dev, &vid)))
+ return NULL;
+ if (!vid ||
+ br_vlan_get_info(br_dev, vid, &vinfo) ||
+ !(vinfo.flags & BRIDGE_VLAN_INFO_BRENTRY))
+ return NULL;
+
+ edev = br_fdb_find_port(br_dev, dmac, vid);
+ if (!edev)
+ return NULL;
+
+ if (br_vlan_get_info(edev, vid, &vinfo))
+ return NULL;
+ if (vinfo.flags & BRIDGE_VLAN_INFO_UNTAGGED)
+ *p_vid = 0;
+ else
+ *p_vid = vid;
+ return edev;
+}
+
+static struct net_device *
+mlxsw_sp_span_entry_bridge_8021d(const struct net_device *br_dev,
+ unsigned char *dmac)
+{
+ return br_fdb_find_port(br_dev, dmac, 0);
+}
+
+static struct net_device *
+mlxsw_sp_span_entry_bridge(const struct net_device *br_dev,
+ unsigned char dmac[ETH_ALEN],
+ u16 *p_vid)
+{
+ struct mlxsw_sp_bridge_port *bridge_port;
+ enum mlxsw_reg_spms_state spms_state;
+ struct net_device *dev = NULL;
+ struct mlxsw_sp_port *port;
+ u8 stp_state;
+
+ if (br_vlan_enabled(br_dev))
+ dev = mlxsw_sp_span_entry_bridge_8021q(br_dev, dmac, p_vid);
+ else if (!*p_vid)
+ dev = mlxsw_sp_span_entry_bridge_8021d(br_dev, dmac);
+ if (!dev)
+ return NULL;
+
+ port = mlxsw_sp_port_dev_lower_find(dev);
+ if (!port)
+ return NULL;
+
+ bridge_port = mlxsw_sp_bridge_port_find(port->mlxsw_sp->bridge, dev);
+ if (!bridge_port)
+ return NULL;
+
+ stp_state = mlxsw_sp_bridge_port_stp_state(bridge_port);
+ spms_state = mlxsw_sp_stp_spms_state(stp_state);
+ if (spms_state != MLXSW_REG_SPMS_STATE_FORWARDING)
+ return NULL;
+
+ return dev;
+}
+
+static struct net_device *
+mlxsw_sp_span_entry_vlan(const struct net_device *vlan_dev,
+ u16 *p_vid)
+{
+ *p_vid = vlan_dev_vlan_id(vlan_dev);
+ return vlan_dev_real_dev(vlan_dev);
+}
+
+static struct net_device *
+mlxsw_sp_span_entry_lag(struct net_device *lag_dev)
+{
+ struct net_device *dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(lag_dev, dev, iter)
+ if (netif_carrier_ok(dev) &&
+ net_lag_port_dev_txable(dev) &&
+ mlxsw_sp_port_dev_check(dev))
+ return dev;
+
+ return NULL;
+}
+
+static __maybe_unused int
+mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *edev,
+ union mlxsw_sp_l3addr saddr,
+ union mlxsw_sp_l3addr daddr,
+ union mlxsw_sp_l3addr gw,
+ __u8 ttl,
+ struct neigh_table *tbl,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ unsigned char dmac[ETH_ALEN];
+ u16 vid = 0;
+
+ if (mlxsw_sp_l3addr_is_zero(gw))
+ gw = daddr;
+
+ if (!edev || mlxsw_sp_span_dmac(tbl, &gw, edev, dmac))
+ goto unoffloadable;
+
+ if (is_vlan_dev(edev))
+ edev = mlxsw_sp_span_entry_vlan(edev, &vid);
+
+ if (netif_is_bridge_master(edev)) {
+ edev = mlxsw_sp_span_entry_bridge(edev, dmac, &vid);
+ if (!edev)
+ goto unoffloadable;
+ }
+
+ if (is_vlan_dev(edev)) {
+ if (vid || !(edev->flags & IFF_UP))
+ goto unoffloadable;
+ edev = mlxsw_sp_span_entry_vlan(edev, &vid);
+ }
+
+ if (netif_is_lag_master(edev)) {
+ if (!(edev->flags & IFF_UP))
+ goto unoffloadable;
+ edev = mlxsw_sp_span_entry_lag(edev);
+ if (!edev)
+ goto unoffloadable;
+ }
+
+ if (!mlxsw_sp_port_dev_check(edev))
+ goto unoffloadable;
+
+ sparmsp->dest_port = netdev_priv(edev);
+ sparmsp->ttl = ttl;
+ memcpy(sparmsp->dmac, dmac, ETH_ALEN);
+ memcpy(sparmsp->smac, edev->dev_addr, ETH_ALEN);
+ sparmsp->saddr = saddr;
+ sparmsp->daddr = daddr;
+ sparmsp->vid = vid;
+ return 0;
+
+unoffloadable:
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+}
+
+#if IS_ENABLED(CONFIG_NET_IPGRE)
+static struct net_device *
+mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
+ __be32 *saddrp, __be32 *daddrp)
+{
+ struct ip_tunnel *tun = netdev_priv(to_dev);
+ struct net_device *dev = NULL;
+ struct ip_tunnel_parm parms;
+ struct rtable *rt = NULL;
+ struct flowi4 fl4;
+
+ /* We assume "dev" stays valid after rt is put. */
+ ASSERT_RTNL();
+
+ parms = mlxsw_sp_ipip_netdev_parms4(to_dev);
+ ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp,
+ 0, 0, parms.link, tun->fwmark);
+
+ rt = ip_route_output_key(tun->net, &fl4);
+ if (IS_ERR(rt))
+ return NULL;
+
+ if (rt->rt_type != RTN_UNICAST)
+ goto out;
+
+ dev = rt->dst.dev;
+ *saddrp = fl4.saddr;
+ *daddrp = rt->rt_gateway;
+
+out:
+ ip_rt_put(rt);
+ return dev;
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_parms(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ struct ip_tunnel_parm tparm = mlxsw_sp_ipip_netdev_parms4(to_dev);
+ union mlxsw_sp_l3addr saddr = { .addr4 = tparm.iph.saddr };
+ union mlxsw_sp_l3addr daddr = { .addr4 = tparm.iph.daddr };
+ bool inherit_tos = tparm.iph.tos & 0x1;
+ bool inherit_ttl = !tparm.iph.ttl;
+ union mlxsw_sp_l3addr gw = daddr;
+ struct net_device *l3edev;
+
+ if (!(to_dev->flags & IFF_UP) ||
+ /* Reject tunnels with GRE keys, checksums, etc. */
+ tparm.i_flags || tparm.o_flags ||
+ /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+ inherit_ttl || !inherit_tos ||
+ /* A destination address may not be "any". */
+ mlxsw_sp_l3addr_is_zero(daddr))
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+ l3edev = mlxsw_sp_span_gretap4_route(to_dev, &saddr.addr4, &gw.addr4);
+ return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+ tparm.iph.ttl,
+ &arp_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_configure(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_port *dest_port = sparms.dest_port;
+ struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+ u8 local_port = dest_port->local_port;
+ char mpat_pl[MLXSW_REG_MPAT_LEN];
+ int pa_id = span_entry->id;
+
+ /* Create a new port analayzer entry for local_port. */
+ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+ mlxsw_reg_mpat_eth_rspan_pack(mpat_pl, sparms.vid);
+ mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+ MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+ sparms.dmac, !!sparms.vid);
+ mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(mpat_pl,
+ sparms.ttl, sparms.smac,
+ be32_to_cpu(sparms.saddr.addr4),
+ be32_to_cpu(sparms.daddr.addr4));
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap4_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure_common(span_entry,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = {
+ .can_handle = is_gretap_dev,
+ .parms = mlxsw_sp_span_entry_gretap4_parms,
+ .configure = mlxsw_sp_span_entry_gretap4_configure,
+ .deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure,
+};
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6_GRE)
+static struct net_device *
+mlxsw_sp_span_gretap6_route(const struct net_device *to_dev,
+ struct in6_addr *saddrp,
+ struct in6_addr *daddrp)
+{
+ struct ip6_tnl *t = netdev_priv(to_dev);
+ struct flowi6 fl6 = t->fl.u.ip6;
+ struct net_device *dev = NULL;
+ struct dst_entry *dst;
+ struct rt6_info *rt6;
+
+ /* We assume "dev" stays valid after dst is released. */
+ ASSERT_RTNL();
+
+ fl6.flowi6_mark = t->parms.fwmark;
+ if (!ip6_tnl_xmit_ctl(t, &fl6.saddr, &fl6.daddr))
+ return NULL;
+
+ dst = ip6_route_output(t->net, NULL, &fl6);
+ if (!dst || dst->error)
+ goto out;
+
+ rt6 = container_of(dst, struct rt6_info, dst);
+
+ dev = dst->dev;
+ *saddrp = fl6.saddr;
+ *daddrp = rt6->rt6i_gateway;
+
+out:
+ dst_release(dst);
+ return dev;
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_parms(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(to_dev);
+ bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS;
+ union mlxsw_sp_l3addr saddr = { .addr6 = tparm.laddr };
+ union mlxsw_sp_l3addr daddr = { .addr6 = tparm.raddr };
+ bool inherit_ttl = !tparm.hop_limit;
+ union mlxsw_sp_l3addr gw = daddr;
+ struct net_device *l3edev;
+
+ if (!(to_dev->flags & IFF_UP) ||
+ /* Reject tunnels with GRE keys, checksums, etc. */
+ tparm.i_flags || tparm.o_flags ||
+ /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+ inherit_ttl || !inherit_tos ||
+ /* A destination address may not be "any". */
+ mlxsw_sp_l3addr_is_zero(daddr))
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+ l3edev = mlxsw_sp_span_gretap6_route(to_dev, &saddr.addr6, &gw.addr6);
+ return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+ tparm.hop_limit,
+ &nd_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_configure(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_port *dest_port = sparms.dest_port;
+ struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+ u8 local_port = dest_port->local_port;
+ char mpat_pl[MLXSW_REG_MPAT_LEN];
+ int pa_id = span_entry->id;
+
+ /* Create a new port analayzer entry for local_port. */
+ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+ mlxsw_reg_mpat_eth_rspan_pack(mpat_pl, sparms.vid);
+ mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+ MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+ sparms.dmac, !!sparms.vid);
+ mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(mpat_pl, sparms.ttl, sparms.smac,
+ sparms.saddr.addr6,
+ sparms.daddr.addr6);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap6_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure_common(span_entry,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = {
+ .can_handle = is_ip6gretap_dev,
+ .parms = mlxsw_sp_span_entry_gretap6_parms,
+ .configure = mlxsw_sp_span_entry_gretap6_configure,
+ .deconfigure = mlxsw_sp_span_entry_gretap6_deconfigure,
+};
+#endif
+
+static bool
+mlxsw_sp_span_vlan_can_handle(const struct net_device *dev)
+{
+ return is_vlan_dev(dev) &&
+ mlxsw_sp_port_dev_check(vlan_dev_real_dev(dev));
+}
+
+static int
+mlxsw_sp_span_entry_vlan_parms(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ struct net_device *real_dev;
+ u16 vid;
+
+ if (!(to_dev->flags & IFF_UP))
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+ real_dev = mlxsw_sp_span_entry_vlan(to_dev, &vid);
+ sparmsp->dest_port = netdev_priv(real_dev);
+ sparmsp->vid = vid;
+ return 0;
+}
+
+static int
+mlxsw_sp_span_entry_vlan_configure(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_port *dest_port = sparms.dest_port;
+ struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+ u8 local_port = dest_port->local_port;
+ char mpat_pl[MLXSW_REG_MPAT_LEN];
+ int pa_id = span_entry->id;
+
+ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH);
+ mlxsw_reg_mpat_eth_rspan_pack(mpat_pl, sparms.vid);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_vlan_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure_common(span_entry,
+ MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_vlan = {
+ .can_handle = mlxsw_sp_span_vlan_can_handle,
+ .parms = mlxsw_sp_span_entry_vlan_parms,
+ .configure = mlxsw_sp_span_entry_vlan_configure,
+ .deconfigure = mlxsw_sp_span_entry_vlan_deconfigure,
+};
+
+static const
+struct mlxsw_sp_span_entry_ops *const mlxsw_sp_span_entry_types[] = {
+ &mlxsw_sp_span_entry_ops_phys,
+#if IS_ENABLED(CONFIG_NET_IPGRE)
+ &mlxsw_sp_span_entry_ops_gretap4,
+#endif
+#if IS_ENABLED(CONFIG_IPV6_GRE)
+ &mlxsw_sp_span_entry_ops_gretap6,
+#endif
+ &mlxsw_sp_span_entry_ops_vlan,
+};
+
+static int
+mlxsw_sp_span_entry_nop_parms(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp)
+{
+ return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_nop_configure(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ return 0;
+}
+
+static void
+mlxsw_sp_span_entry_nop_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_nop = {
+ .parms = mlxsw_sp_span_entry_nop_parms,
+ .configure = mlxsw_sp_span_entry_nop_configure,
+ .deconfigure = mlxsw_sp_span_entry_nop_deconfigure,
+};
+
+static void
+mlxsw_sp_span_entry_configure(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms)
+{
+ if (sparms.dest_port) {
+ if (sparms.dest_port->mlxsw_sp != mlxsw_sp) {
+ netdev_err(span_entry->to_dev, "Cannot mirror to %s, which belongs to a different mlxsw instance",
+ sparms.dest_port->dev->name);
+ sparms.dest_port = NULL;
+ } else if (span_entry->ops->configure(span_entry, sparms)) {
+ netdev_err(span_entry->to_dev, "Failed to offload mirror to %s",
+ sparms.dest_port->dev->name);
+ sparms.dest_port = NULL;
+ }
+ }
+
+ span_entry->parms = sparms;
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+ if (span_entry->parms.dest_port)
+ span_entry->ops->deconfigure(span_entry);
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev,
+ const struct mlxsw_sp_span_entry_ops *ops,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_span_entry *span_entry = NULL;
+ int i;
+
+ /* find a free entry to use */
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ if (!mlxsw_sp->span.entries[i].ref_count) {
+ span_entry = &mlxsw_sp->span.entries[i];
+ break;
+ }
+ }
+ if (!span_entry)
+ return NULL;
+
+ span_entry->ops = ops;
+ span_entry->ref_count = 1;
+ span_entry->to_dev = to_dev;
+ mlxsw_sp_span_entry_configure(mlxsw_sp, span_entry, sparms);
+
+ return span_entry;
+}
+
+static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure(span_entry);
+}
+
+struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev)
+{
+ int i;
+
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+ if (curr->ref_count && curr->to_dev == to_dev)
+ return curr;
+ }
+ return NULL;
+}
+
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_span_entry *span_entry)
+{
+ mlxsw_sp_span_entry_deconfigure(span_entry);
+ span_entry->ops = &mlxsw_sp_span_entry_ops_nop;
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id)
+{
+ int i;
+
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+ if (curr->ref_count && curr->id == span_id)
+ return curr;
+ }
+ return NULL;
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_get(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev,
+ const struct mlxsw_sp_span_entry_ops *ops,
+ struct mlxsw_sp_span_parms sparms)
+{
+ struct mlxsw_sp_span_entry *span_entry;
+
+ span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, to_dev);
+ if (span_entry) {
+ /* Already exists, just take a reference */
+ span_entry->ref_count++;
+ return span_entry;
+ }
+
+ return mlxsw_sp_span_entry_create(mlxsw_sp, to_dev, ops, sparms);
+}
+
+static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_span_entry *span_entry)
+{
+ WARN_ON(!span_entry->ref_count);
+ if (--span_entry->ref_count == 0)
+ mlxsw_sp_span_entry_destroy(span_entry);
+ return 0;
+}
+
+static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
+{
+ struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+ struct mlxsw_sp_span_inspected_port *p;
+ int i;
+
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+ list_for_each_entry(p, &curr->bound_ports_list, list)
+ if (p->local_port == port->local_port &&
+ p->type == MLXSW_SP_SPAN_EGRESS)
+ return true;
+ }
+
+ return false;
+}
+
+static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp,
+ int mtu)
+{
+ return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1;
+}
+
+int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
+{
+ struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+ char sbib_pl[MLXSW_REG_SBIB_LEN];
+ int err;
+
+ /* If port is egress mirrored, the shared buffer size should be
+ * updated according to the mtu value
+ */
+ if (mlxsw_sp_span_is_egress_mirror(port)) {
+ u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu);
+
+ mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+ if (err) {
+ netdev_err(port->dev, "Could not update shared buffer for mirroring\n");
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static struct mlxsw_sp_span_inspected_port *
+mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_span_entry *span_entry,
+ enum mlxsw_sp_span_type type,
+ struct mlxsw_sp_port *port,
+ bool bind)
+{
+ struct mlxsw_sp_span_inspected_port *p;
+
+ list_for_each_entry(p, &span_entry->bound_ports_list, list)
+ if (type == p->type &&
+ port->local_port == p->local_port &&
+ bind == p->bound)
+ return p;
+ return NULL;
+}
+
+static int
+mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port,
+ struct mlxsw_sp_span_entry *span_entry,
+ enum mlxsw_sp_span_type type,
+ bool bind)
+{
+ struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+ char mpar_pl[MLXSW_REG_MPAR_LEN];
+ int pa_id = span_entry->id;
+
+ /* bind the port to the SPAN entry */
+ mlxsw_reg_mpar_pack(mpar_pl, port->local_port,
+ (enum mlxsw_reg_mpar_i_e)type, bind, pa_id);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
+}
+
+static int
+mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
+ struct mlxsw_sp_span_entry *span_entry,
+ enum mlxsw_sp_span_type type,
+ bool bind)
+{
+ struct mlxsw_sp_span_inspected_port *inspected_port;
+ struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+ char sbib_pl[MLXSW_REG_SBIB_LEN];
+ int i;
+ int err;
+
+ /* A given (source port, direction) can only be bound to one analyzer,
+ * so if a binding is requested, check for conflicts.
+ */
+ if (bind)
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr =
+ &mlxsw_sp->span.entries[i];
+
+ if (mlxsw_sp_span_entry_bound_port_find(curr, type,
+ port, bind))
+ return -EEXIST;
+ }
+
+ /* if it is an egress SPAN, bind a shared buffer to it */
+ if (type == MLXSW_SP_SPAN_EGRESS) {
+ u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
+ port->dev->mtu);
+
+ mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+ if (err) {
+ netdev_err(port->dev, "Could not create shared buffer for mirroring\n");
+ return err;
+ }
+ }
+
+ if (bind) {
+ err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+ true);
+ if (err)
+ goto err_port_bind;
+ }
+
+ inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL);
+ if (!inspected_port) {
+ err = -ENOMEM;
+ goto err_inspected_port_alloc;
+ }
+ inspected_port->local_port = port->local_port;
+ inspected_port->type = type;
+ inspected_port->bound = bind;
+ list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
+
+ return 0;
+
+err_inspected_port_alloc:
+ if (bind)
+ mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+ false);
+err_port_bind:
+ if (type == MLXSW_SP_SPAN_EGRESS) {
+ mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+ }
+ return err;
+}
+
+static void
+mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
+ struct mlxsw_sp_span_entry *span_entry,
+ enum mlxsw_sp_span_type type,
+ bool bind)
+{
+ struct mlxsw_sp_span_inspected_port *inspected_port;
+ struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+ char sbib_pl[MLXSW_REG_SBIB_LEN];
+
+ inspected_port = mlxsw_sp_span_entry_bound_port_find(span_entry, type,
+ port, bind);
+ if (!inspected_port)
+ return;
+
+ if (bind)
+ mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+ false);
+ /* remove the SBIB buffer if it was egress SPAN */
+ if (type == MLXSW_SP_SPAN_EGRESS) {
+ mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+ }
+
+ mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
+
+ list_del(&inspected_port->list);
+ kfree(inspected_port);
+}
+
+static const struct mlxsw_sp_span_entry_ops *
+mlxsw_sp_span_entry_ops(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(mlxsw_sp_span_entry_types); ++i)
+ if (mlxsw_sp_span_entry_types[i]->can_handle(to_dev))
+ return mlxsw_sp_span_entry_types[i];
+
+ return NULL;
+}
+
+int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
+ const struct net_device *to_dev,
+ enum mlxsw_sp_span_type type, bool bind,
+ int *p_span_id)
+{
+ struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
+ const struct mlxsw_sp_span_entry_ops *ops;
+ struct mlxsw_sp_span_parms sparms = {NULL};
+ struct mlxsw_sp_span_entry *span_entry;
+ int err;
+
+ ops = mlxsw_sp_span_entry_ops(mlxsw_sp, to_dev);
+ if (!ops) {
+ netdev_err(to_dev, "Cannot mirror to %s", to_dev->name);
+ return -EOPNOTSUPP;
+ }
+
+ err = ops->parms(to_dev, &sparms);
+ if (err)
+ return err;
+
+ span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms);
+ if (!span_entry)
+ return -ENOBUFS;
+
+ netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n",
+ span_entry->id);
+
+ err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind);
+ if (err)
+ goto err_port_bind;
+
+ *p_span_id = span_entry->id;
+ return 0;
+
+err_port_bind:
+ mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
+ return err;
+}
+
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
+ enum mlxsw_sp_span_type type, bool bind)
+{
+ struct mlxsw_sp_span_entry *span_entry;
+
+ span_entry = mlxsw_sp_span_entry_find_by_id(from->mlxsw_sp, span_id);
+ if (!span_entry) {
+ netdev_err(from->dev, "no span entry found\n");
+ return;
+ }
+
+ netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n",
+ span_entry->id);
+ mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
+}
+
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp)
+{
+ int i;
+ int err;
+
+ ASSERT_RTNL();
+ for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+ struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+ struct mlxsw_sp_span_parms sparms = {NULL};
+
+ if (!curr->ref_count)
+ continue;
+
+ err = curr->ops->parms(curr->to_dev, &sparms);
+ if (err)
+ continue;
+
+ if (memcmp(&sparms, &curr->parms, sizeof(sparms))) {
+ mlxsw_sp_span_entry_deconfigure(curr);
+ mlxsw_sp_span_entry_configure(mlxsw_sp, curr, sparms);
+ }
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
new file mode 100644
index 000000000..5e04252f2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_SPECTRUM_SPAN_H
+#define _MLXSW_SPECTRUM_SPAN_H
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "spectrum_router.h"
+
+struct mlxsw_sp;
+struct mlxsw_sp_port;
+
+enum mlxsw_sp_span_type {
+ MLXSW_SP_SPAN_EGRESS,
+ MLXSW_SP_SPAN_INGRESS
+};
+
+struct mlxsw_sp_span_inspected_port {
+ struct list_head list;
+ enum mlxsw_sp_span_type type;
+ u8 local_port;
+
+ /* Whether this is a directly bound mirror (port-to-port) or an ACL. */
+ bool bound;
+};
+
+struct mlxsw_sp_span_parms {
+ struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */
+ unsigned int ttl;
+ unsigned char dmac[ETH_ALEN];
+ unsigned char smac[ETH_ALEN];
+ union mlxsw_sp_l3addr daddr;
+ union mlxsw_sp_l3addr saddr;
+ u16 vid;
+};
+
+struct mlxsw_sp_span_entry_ops;
+
+struct mlxsw_sp_span_entry {
+ const struct net_device *to_dev;
+ const struct mlxsw_sp_span_entry_ops *ops;
+ struct mlxsw_sp_span_parms parms;
+ struct list_head bound_ports_list;
+ int ref_count;
+ int id;
+};
+
+struct mlxsw_sp_span_entry_ops {
+ bool (*can_handle)(const struct net_device *to_dev);
+ int (*parms)(const struct net_device *to_dev,
+ struct mlxsw_sp_span_parms *sparmsp);
+ int (*configure)(struct mlxsw_sp_span_entry *span_entry,
+ struct mlxsw_sp_span_parms sparms);
+ void (*deconfigure)(struct mlxsw_sp_span_entry *span_entry);
+};
+
+int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp);
+
+int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
+ const struct net_device *to_dev,
+ enum mlxsw_sp_span_type type,
+ bool bind, int *p_span_id);
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
+ enum mlxsw_sp_span_type type, bool bind);
+struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *to_dev);
+
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_span_entry *span_entry);
+
+int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
new file mode 100644
index 000000000..8d556eb37
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -0,0 +1,2450 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
+#include <linux/if_bridge.h>
+#include <linux/workqueue.h>
+#include <linux/jiffies.h>
+#include <linux/rtnetlink.h>
+#include <linux/netlink.h>
+#include <net/switchdev.h>
+
+#include "spectrum_span.h"
+#include "spectrum_router.h"
+#include "spectrum_switchdev.h"
+#include "spectrum.h"
+#include "core.h"
+#include "reg.h"
+
+struct mlxsw_sp_bridge_ops;
+
+struct mlxsw_sp_bridge {
+ struct mlxsw_sp *mlxsw_sp;
+ struct {
+ struct delayed_work dw;
+#define MLXSW_SP_DEFAULT_LEARNING_INTERVAL 100
+ unsigned int interval; /* ms */
+ } fdb_notify;
+#define MLXSW_SP_MIN_AGEING_TIME 10
+#define MLXSW_SP_MAX_AGEING_TIME 1000000
+#define MLXSW_SP_DEFAULT_AGEING_TIME 300
+ u32 ageing_time;
+ bool vlan_enabled_exists;
+ struct list_head bridges_list;
+ DECLARE_BITMAP(mids_bitmap, MLXSW_SP_MID_MAX);
+ const struct mlxsw_sp_bridge_ops *bridge_8021q_ops;
+ const struct mlxsw_sp_bridge_ops *bridge_8021d_ops;
+};
+
+struct mlxsw_sp_bridge_device {
+ struct net_device *dev;
+ struct list_head list;
+ struct list_head ports_list;
+ struct list_head mids_list;
+ u8 vlan_enabled:1,
+ multicast_enabled:1,
+ mrouter:1;
+ const struct mlxsw_sp_bridge_ops *ops;
+};
+
+struct mlxsw_sp_bridge_port {
+ struct net_device *dev;
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct list_head list;
+ struct list_head vlans_list;
+ unsigned int ref_count;
+ u8 stp_state;
+ unsigned long flags;
+ bool mrouter;
+ bool lagged;
+ union {
+ u16 lag_id;
+ u16 system_port;
+ };
+};
+
+struct mlxsw_sp_bridge_vlan {
+ struct list_head list;
+ struct list_head port_vlan_list;
+ u16 vid;
+};
+
+struct mlxsw_sp_bridge_ops {
+ int (*port_join)(struct mlxsw_sp_bridge_device *bridge_device,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ struct netlink_ext_ack *extack);
+ void (*port_leave)(struct mlxsw_sp_bridge_device *bridge_device,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ struct mlxsw_sp_port *mlxsw_sp_port);
+ struct mlxsw_sp_fid *
+ (*fid_get)(struct mlxsw_sp_bridge_device *bridge_device,
+ u16 vid);
+};
+
+static int
+mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ u16 fid_index);
+
+static void
+mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_port *bridge_port);
+
+static void
+mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_device
+ *bridge_device);
+
+static void
+mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ bool add);
+
+static struct mlxsw_sp_bridge_device *
+mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge,
+ const struct net_device *br_dev)
+{
+ struct mlxsw_sp_bridge_device *bridge_device;
+
+ list_for_each_entry(bridge_device, &bridge->bridges_list, list)
+ if (bridge_device->dev == br_dev)
+ return bridge_device;
+
+ return NULL;
+}
+
+bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *br_dev)
+{
+ return !!mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+}
+
+static int mlxsw_sp_bridge_device_upper_rif_destroy(struct net_device *dev,
+ void *data)
+{
+ struct mlxsw_sp *mlxsw_sp = data;
+
+ mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev);
+ return 0;
+}
+
+static void mlxsw_sp_bridge_device_rifs_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *dev)
+{
+ mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev);
+ netdev_walk_all_upper_dev_rcu(dev,
+ mlxsw_sp_bridge_device_upper_rif_destroy,
+ mlxsw_sp);
+}
+
+static struct mlxsw_sp_bridge_device *
+mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge,
+ struct net_device *br_dev)
+{
+ struct device *dev = bridge->mlxsw_sp->bus_info->dev;
+ struct mlxsw_sp_bridge_device *bridge_device;
+ bool vlan_enabled = br_vlan_enabled(br_dev);
+
+ if (vlan_enabled && bridge->vlan_enabled_exists) {
+ dev_err(dev, "Only one VLAN-aware bridge is supported\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ bridge_device = kzalloc(sizeof(*bridge_device), GFP_KERNEL);
+ if (!bridge_device)
+ return ERR_PTR(-ENOMEM);
+
+ bridge_device->dev = br_dev;
+ bridge_device->vlan_enabled = vlan_enabled;
+ bridge_device->multicast_enabled = br_multicast_enabled(br_dev);
+ bridge_device->mrouter = br_multicast_router(br_dev);
+ INIT_LIST_HEAD(&bridge_device->ports_list);
+ if (vlan_enabled) {
+ bridge->vlan_enabled_exists = true;
+ bridge_device->ops = bridge->bridge_8021q_ops;
+ } else {
+ bridge_device->ops = bridge->bridge_8021d_ops;
+ }
+ INIT_LIST_HEAD(&bridge_device->mids_list);
+ list_add(&bridge_device->list, &bridge->bridges_list);
+
+ return bridge_device;
+}
+
+static void
+mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge,
+ struct mlxsw_sp_bridge_device *bridge_device)
+{
+ mlxsw_sp_bridge_device_rifs_destroy(bridge->mlxsw_sp,
+ bridge_device->dev);
+ list_del(&bridge_device->list);
+ if (bridge_device->vlan_enabled)
+ bridge->vlan_enabled_exists = false;
+ WARN_ON(!list_empty(&bridge_device->ports_list));
+ WARN_ON(!list_empty(&bridge_device->mids_list));
+ kfree(bridge_device);
+}
+
+static struct mlxsw_sp_bridge_device *
+mlxsw_sp_bridge_device_get(struct mlxsw_sp_bridge *bridge,
+ struct net_device *br_dev)
+{
+ struct mlxsw_sp_bridge_device *bridge_device;
+
+ bridge_device = mlxsw_sp_bridge_device_find(bridge, br_dev);
+ if (bridge_device)
+ return bridge_device;
+
+ return mlxsw_sp_bridge_device_create(bridge, br_dev);
+}
+
+static void
+mlxsw_sp_bridge_device_put(struct mlxsw_sp_bridge *bridge,
+ struct mlxsw_sp_bridge_device *bridge_device)
+{
+ if (list_empty(&bridge_device->ports_list))
+ mlxsw_sp_bridge_device_destroy(bridge, bridge_device);
+}
+
+static struct mlxsw_sp_bridge_port *
+__mlxsw_sp_bridge_port_find(const struct mlxsw_sp_bridge_device *bridge_device,
+ const struct net_device *brport_dev)
+{
+ struct mlxsw_sp_bridge_port *bridge_port;
+
+ list_for_each_entry(bridge_port, &bridge_device->ports_list, list) {
+ if (bridge_port->dev == brport_dev)
+ return bridge_port;
+ }
+
+ return NULL;
+}
+
+struct mlxsw_sp_bridge_port *
+mlxsw_sp_bridge_port_find(struct mlxsw_sp_bridge *bridge,
+ struct net_device *brport_dev)
+{
+ struct net_device *br_dev = netdev_master_upper_dev_get(brport_dev);
+ struct mlxsw_sp_bridge_device *bridge_device;
+
+ if (!br_dev)
+ return NULL;
+
+ bridge_device = mlxsw_sp_bridge_device_find(bridge, br_dev);
+ if (!bridge_device)
+ return NULL;
+
+ return __mlxsw_sp_bridge_port_find(bridge_device, brport_dev);
+}
+
+static struct mlxsw_sp_bridge_port *
+mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device,
+ struct net_device *brport_dev)
+{
+ struct mlxsw_sp_bridge_port *bridge_port;
+ struct mlxsw_sp_port *mlxsw_sp_port;
+
+ bridge_port = kzalloc(sizeof(*bridge_port), GFP_KERNEL);
+ if (!bridge_port)
+ return NULL;
+
+ mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(brport_dev);
+ bridge_port->lagged = mlxsw_sp_port->lagged;
+ if (bridge_port->lagged)
+ bridge_port->lag_id = mlxsw_sp_port->lag_id;
+ else
+ bridge_port->system_port = mlxsw_sp_port->local_port;
+ bridge_port->dev = brport_dev;
+ bridge_port->bridge_device = bridge_device;
+ bridge_port->stp_state = BR_STATE_DISABLED;
+ bridge_port->flags = BR_LEARNING | BR_FLOOD | BR_LEARNING_SYNC |
+ BR_MCAST_FLOOD;
+ INIT_LIST_HEAD(&bridge_port->vlans_list);
+ list_add(&bridge_port->list, &bridge_device->ports_list);
+ bridge_port->ref_count = 1;
+
+ return bridge_port;
+}
+
+static void
+mlxsw_sp_bridge_port_destroy(struct mlxsw_sp_bridge_port *bridge_port)
+{
+ list_del(&bridge_port->list);
+ WARN_ON(!list_empty(&bridge_port->vlans_list));
+ kfree(bridge_port);
+}
+
+static struct mlxsw_sp_bridge_port *
+mlxsw_sp_bridge_port_get(struct mlxsw_sp_bridge *bridge,
+ struct net_device *brport_dev)
+{
+ struct net_device *br_dev = netdev_master_upper_dev_get(brport_dev);
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct mlxsw_sp_bridge_port *bridge_port;
+ int err;
+
+ bridge_port = mlxsw_sp_bridge_port_find(bridge, brport_dev);
+ if (bridge_port) {
+ bridge_port->ref_count++;
+ return bridge_port;
+ }
+
+ bridge_device = mlxsw_sp_bridge_device_get(bridge, br_dev);
+ if (IS_ERR(bridge_device))
+ return ERR_CAST(bridge_device);
+
+ bridge_port = mlxsw_sp_bridge_port_create(bridge_device, brport_dev);
+ if (!bridge_port) {
+ err = -ENOMEM;
+ goto err_bridge_port_create;
+ }
+
+ return bridge_port;
+
+err_bridge_port_create:
+ mlxsw_sp_bridge_device_put(bridge, bridge_device);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_sp_bridge_port_put(struct mlxsw_sp_bridge *bridge,
+ struct mlxsw_sp_bridge_port *bridge_port)
+{
+ struct mlxsw_sp_bridge_device *bridge_device;
+
+ if (--bridge_port->ref_count != 0)
+ return;
+ bridge_device = bridge_port->bridge_device;
+ mlxsw_sp_bridge_port_destroy(bridge_port);
+ mlxsw_sp_bridge_device_put(bridge, bridge_device);
+}
+
+static struct mlxsw_sp_port_vlan *
+mlxsw_sp_port_vlan_find_by_bridge(struct mlxsw_sp_port *mlxsw_sp_port,
+ const struct mlxsw_sp_bridge_device *
+ bridge_device,
+ u16 vid)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+
+ list_for_each_entry(mlxsw_sp_port_vlan, &mlxsw_sp_port->vlans_list,
+ list) {
+ if (!mlxsw_sp_port_vlan->bridge_port)
+ continue;
+ if (mlxsw_sp_port_vlan->bridge_port->bridge_device !=
+ bridge_device)
+ continue;
+ if (bridge_device->vlan_enabled &&
+ mlxsw_sp_port_vlan->vid != vid)
+ continue;
+ return mlxsw_sp_port_vlan;
+ }
+
+ return NULL;
+}
+
+static struct mlxsw_sp_port_vlan*
+mlxsw_sp_port_vlan_find_by_fid(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 fid_index)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+
+ list_for_each_entry(mlxsw_sp_port_vlan, &mlxsw_sp_port->vlans_list,
+ list) {
+ struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
+
+ if (fid && mlxsw_sp_fid_index(fid) == fid_index)
+ return mlxsw_sp_port_vlan;
+ }
+
+ return NULL;
+}
+
+static struct mlxsw_sp_bridge_vlan *
+mlxsw_sp_bridge_vlan_find(const struct mlxsw_sp_bridge_port *bridge_port,
+ u16 vid)
+{
+ struct mlxsw_sp_bridge_vlan *bridge_vlan;
+
+ list_for_each_entry(bridge_vlan, &bridge_port->vlans_list, list) {
+ if (bridge_vlan->vid == vid)
+ return bridge_vlan;
+ }
+
+ return NULL;
+}
+
+static struct mlxsw_sp_bridge_vlan *
+mlxsw_sp_bridge_vlan_create(struct mlxsw_sp_bridge_port *bridge_port, u16 vid)
+{
+ struct mlxsw_sp_bridge_vlan *bridge_vlan;
+
+ bridge_vlan = kzalloc(sizeof(*bridge_vlan), GFP_KERNEL);
+ if (!bridge_vlan)
+ return NULL;
+
+ INIT_LIST_HEAD(&bridge_vlan->port_vlan_list);
+ bridge_vlan->vid = vid;
+ list_add(&bridge_vlan->list, &bridge_port->vlans_list);
+
+ return bridge_vlan;
+}
+
+static void
+mlxsw_sp_bridge_vlan_destroy(struct mlxsw_sp_bridge_vlan *bridge_vlan)
+{
+ list_del(&bridge_vlan->list);
+ WARN_ON(!list_empty(&bridge_vlan->port_vlan_list));
+ kfree(bridge_vlan);
+}
+
+static struct mlxsw_sp_bridge_vlan *
+mlxsw_sp_bridge_vlan_get(struct mlxsw_sp_bridge_port *bridge_port, u16 vid)
+{
+ struct mlxsw_sp_bridge_vlan *bridge_vlan;
+
+ bridge_vlan = mlxsw_sp_bridge_vlan_find(bridge_port, vid);
+ if (bridge_vlan)
+ return bridge_vlan;
+
+ return mlxsw_sp_bridge_vlan_create(bridge_port, vid);
+}
+
+static void mlxsw_sp_bridge_vlan_put(struct mlxsw_sp_bridge_vlan *bridge_vlan)
+{
+ if (list_empty(&bridge_vlan->port_vlan_list))
+ mlxsw_sp_bridge_vlan_destroy(bridge_vlan);
+}
+
+static void mlxsw_sp_port_bridge_flags_get(struct mlxsw_sp_bridge *bridge,
+ struct net_device *dev,
+ unsigned long *brport_flags)
+{
+ struct mlxsw_sp_bridge_port *bridge_port;
+
+ bridge_port = mlxsw_sp_bridge_port_find(bridge, dev);
+ if (WARN_ON(!bridge_port))
+ return;
+
+ memcpy(brport_flags, &bridge_port->flags, sizeof(*brport_flags));
+}
+
+static int mlxsw_sp_port_attr_get(struct net_device *dev,
+ struct switchdev_attr *attr)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+
+ switch (attr->id) {
+ case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
+ attr->u.ppid.id_len = sizeof(mlxsw_sp->base_mac);
+ memcpy(&attr->u.ppid.id, &mlxsw_sp->base_mac,
+ attr->u.ppid.id_len);
+ break;
+ case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+ mlxsw_sp_port_bridge_flags_get(mlxsw_sp->bridge, attr->orig_dev,
+ &attr->u.brport_flags);
+ break;
+ case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
+ attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD |
+ BR_MCAST_FLOOD;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+mlxsw_sp_port_bridge_vlan_stp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_vlan *bridge_vlan,
+ u8 state)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+
+ list_for_each_entry(mlxsw_sp_port_vlan, &bridge_vlan->port_vlan_list,
+ bridge_vlan_node) {
+ if (mlxsw_sp_port_vlan->mlxsw_sp_port != mlxsw_sp_port)
+ continue;
+ return mlxsw_sp_port_vid_stp_set(mlxsw_sp_port,
+ bridge_vlan->vid, state);
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct switchdev_trans *trans,
+ struct net_device *orig_dev,
+ u8 state)
+{
+ struct mlxsw_sp_bridge_port *bridge_port;
+ struct mlxsw_sp_bridge_vlan *bridge_vlan;
+ int err;
+
+ if (switchdev_trans_ph_prepare(trans))
+ return 0;
+
+ /* It's possible we failed to enslave the port, yet this
+ * operation is executed due to it being deferred.
+ */
+ bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp_port->mlxsw_sp->bridge,
+ orig_dev);
+ if (!bridge_port)
+ return 0;
+
+ list_for_each_entry(bridge_vlan, &bridge_port->vlans_list, list) {
+ err = mlxsw_sp_port_bridge_vlan_stp_set(mlxsw_sp_port,
+ bridge_vlan, state);
+ if (err)
+ goto err_port_bridge_vlan_stp_set;
+ }
+
+ bridge_port->stp_state = state;
+
+ return 0;
+
+err_port_bridge_vlan_stp_set:
+ list_for_each_entry_continue_reverse(bridge_vlan,
+ &bridge_port->vlans_list, list)
+ mlxsw_sp_port_bridge_vlan_stp_set(mlxsw_sp_port, bridge_vlan,
+ bridge_port->stp_state);
+ return err;
+}
+
+static int
+mlxsw_sp_port_bridge_vlan_flood_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_vlan *bridge_vlan,
+ enum mlxsw_sp_flood_type packet_type,
+ bool member)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+
+ list_for_each_entry(mlxsw_sp_port_vlan, &bridge_vlan->port_vlan_list,
+ bridge_vlan_node) {
+ if (mlxsw_sp_port_vlan->mlxsw_sp_port != mlxsw_sp_port)
+ continue;
+ return mlxsw_sp_fid_flood_set(mlxsw_sp_port_vlan->fid,
+ packet_type,
+ mlxsw_sp_port->local_port,
+ member);
+ }
+
+ return 0;
+}
+
+static int
+mlxsw_sp_bridge_port_flood_table_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ enum mlxsw_sp_flood_type packet_type,
+ bool member)
+{
+ struct mlxsw_sp_bridge_vlan *bridge_vlan;
+ int err;
+
+ list_for_each_entry(bridge_vlan, &bridge_port->vlans_list, list) {
+ err = mlxsw_sp_port_bridge_vlan_flood_set(mlxsw_sp_port,
+ bridge_vlan,
+ packet_type,
+ member);
+ if (err)
+ goto err_port_bridge_vlan_flood_set;
+ }
+
+ return 0;
+
+err_port_bridge_vlan_flood_set:
+ list_for_each_entry_continue_reverse(bridge_vlan,
+ &bridge_port->vlans_list, list)
+ mlxsw_sp_port_bridge_vlan_flood_set(mlxsw_sp_port, bridge_vlan,
+ packet_type, !member);
+ return err;
+}
+
+static int
+mlxsw_sp_port_bridge_vlan_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_vlan *bridge_vlan,
+ bool set)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+ u16 vid = bridge_vlan->vid;
+
+ list_for_each_entry(mlxsw_sp_port_vlan, &bridge_vlan->port_vlan_list,
+ bridge_vlan_node) {
+ if (mlxsw_sp_port_vlan->mlxsw_sp_port != mlxsw_sp_port)
+ continue;
+ return mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, set);
+ }
+
+ return 0;
+}
+
+static int
+mlxsw_sp_bridge_port_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ bool set)
+{
+ struct mlxsw_sp_bridge_vlan *bridge_vlan;
+ int err;
+
+ list_for_each_entry(bridge_vlan, &bridge_port->vlans_list, list) {
+ err = mlxsw_sp_port_bridge_vlan_learning_set(mlxsw_sp_port,
+ bridge_vlan, set);
+ if (err)
+ goto err_port_bridge_vlan_learning_set;
+ }
+
+ return 0;
+
+err_port_bridge_vlan_learning_set:
+ list_for_each_entry_continue_reverse(bridge_vlan,
+ &bridge_port->vlans_list, list)
+ mlxsw_sp_port_bridge_vlan_learning_set(mlxsw_sp_port,
+ bridge_vlan, !set);
+ return err;
+}
+
+static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct switchdev_trans *trans,
+ struct net_device *orig_dev,
+ unsigned long brport_flags)
+{
+ struct mlxsw_sp_bridge_port *bridge_port;
+ int err;
+
+ if (switchdev_trans_ph_prepare(trans))
+ return 0;
+
+ bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp_port->mlxsw_sp->bridge,
+ orig_dev);
+ if (!bridge_port)
+ return 0;
+
+ err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port,
+ MLXSW_SP_FLOOD_TYPE_UC,
+ brport_flags & BR_FLOOD);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_bridge_port_learning_set(mlxsw_sp_port, bridge_port,
+ brport_flags & BR_LEARNING);
+ if (err)
+ return err;
+
+ if (bridge_port->bridge_device->multicast_enabled)
+ goto out;
+
+ err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port,
+ MLXSW_SP_FLOOD_TYPE_MC,
+ brport_flags &
+ BR_MCAST_FLOOD);
+ if (err)
+ return err;
+
+out:
+ memcpy(&bridge_port->flags, &brport_flags, sizeof(brport_flags));
+ return 0;
+}
+
+static int mlxsw_sp_ageing_set(struct mlxsw_sp *mlxsw_sp, u32 ageing_time)
+{
+ char sfdat_pl[MLXSW_REG_SFDAT_LEN];
+ int err;
+
+ mlxsw_reg_sfdat_pack(sfdat_pl, ageing_time);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdat), sfdat_pl);
+ if (err)
+ return err;
+ mlxsw_sp->bridge->ageing_time = ageing_time;
+ return 0;
+}
+
+static int mlxsw_sp_port_attr_br_ageing_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct switchdev_trans *trans,
+ unsigned long ageing_clock_t)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock_t);
+ u32 ageing_time = jiffies_to_msecs(ageing_jiffies) / 1000;
+
+ if (switchdev_trans_ph_prepare(trans)) {
+ if (ageing_time < MLXSW_SP_MIN_AGEING_TIME ||
+ ageing_time > MLXSW_SP_MAX_AGEING_TIME)
+ return -ERANGE;
+ else
+ return 0;
+ }
+
+ return mlxsw_sp_ageing_set(mlxsw_sp, ageing_time);
+}
+
+static int mlxsw_sp_port_attr_br_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct switchdev_trans *trans,
+ struct net_device *orig_dev,
+ bool vlan_enabled)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_bridge_device *bridge_device;
+
+ if (!switchdev_trans_ph_prepare(trans))
+ return 0;
+
+ bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, orig_dev);
+ if (WARN_ON(!bridge_device))
+ return -EINVAL;
+
+ if (bridge_device->vlan_enabled == vlan_enabled)
+ return 0;
+
+ netdev_err(bridge_device->dev, "VLAN filtering can't be changed for existing bridge\n");
+ return -EINVAL;
+}
+
+static int mlxsw_sp_port_attr_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct switchdev_trans *trans,
+ struct net_device *orig_dev,
+ bool is_port_mrouter)
+{
+ struct mlxsw_sp_bridge_port *bridge_port;
+ int err;
+
+ if (switchdev_trans_ph_prepare(trans))
+ return 0;
+
+ bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp_port->mlxsw_sp->bridge,
+ orig_dev);
+ if (!bridge_port)
+ return 0;
+
+ if (!bridge_port->bridge_device->multicast_enabled)
+ goto out;
+
+ err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port,
+ MLXSW_SP_FLOOD_TYPE_MC,
+ is_port_mrouter);
+ if (err)
+ return err;
+
+ mlxsw_sp_port_mrouter_update_mdb(mlxsw_sp_port, bridge_port,
+ is_port_mrouter);
+out:
+ bridge_port->mrouter = is_port_mrouter;
+ return 0;
+}
+
+static bool mlxsw_sp_mc_flood(const struct mlxsw_sp_bridge_port *bridge_port)
+{
+ const struct mlxsw_sp_bridge_device *bridge_device;
+
+ bridge_device = bridge_port->bridge_device;
+ return bridge_device->multicast_enabled ? bridge_port->mrouter :
+ bridge_port->flags & BR_MCAST_FLOOD;
+}
+
+static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct switchdev_trans *trans,
+ struct net_device *orig_dev,
+ bool mc_disabled)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct mlxsw_sp_bridge_port *bridge_port;
+ int err;
+
+ if (switchdev_trans_ph_prepare(trans))
+ return 0;
+
+ /* It's possible we failed to enslave the port, yet this
+ * operation is executed due to it being deferred.
+ */
+ bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, orig_dev);
+ if (!bridge_device)
+ return 0;
+
+ if (bridge_device->multicast_enabled != !mc_disabled) {
+ bridge_device->multicast_enabled = !mc_disabled;
+ mlxsw_sp_bridge_mdb_mc_enable_sync(mlxsw_sp_port,
+ bridge_device);
+ }
+
+ list_for_each_entry(bridge_port, &bridge_device->ports_list, list) {
+ enum mlxsw_sp_flood_type packet_type = MLXSW_SP_FLOOD_TYPE_MC;
+ bool member = mlxsw_sp_mc_flood(bridge_port);
+
+ err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port,
+ bridge_port,
+ packet_type, member);
+ if (err)
+ return err;
+ }
+
+ bridge_device->multicast_enabled = !mc_disabled;
+
+ return 0;
+}
+
+static int mlxsw_sp_smid_router_port_set(struct mlxsw_sp *mlxsw_sp,
+ u16 mid_idx, bool add)
+{
+ char *smid_pl;
+ int err;
+
+ smid_pl = kmalloc(MLXSW_REG_SMID_LEN, GFP_KERNEL);
+ if (!smid_pl)
+ return -ENOMEM;
+
+ mlxsw_reg_smid_pack(smid_pl, mid_idx,
+ mlxsw_sp_router_port(mlxsw_sp), add);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl);
+ kfree(smid_pl);
+ return err;
+}
+
+static void
+mlxsw_sp_bridge_mrouter_update_mdb(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_bridge_device *bridge_device,
+ bool add)
+{
+ struct mlxsw_sp_mid *mid;
+
+ list_for_each_entry(mid, &bridge_device->mids_list, list)
+ mlxsw_sp_smid_router_port_set(mlxsw_sp, mid->mid, add);
+}
+
+static int
+mlxsw_sp_port_attr_br_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct switchdev_trans *trans,
+ struct net_device *orig_dev,
+ bool is_mrouter)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_bridge_device *bridge_device;
+
+ if (switchdev_trans_ph_prepare(trans))
+ return 0;
+
+ /* It's possible we failed to enslave the port, yet this
+ * operation is executed due to it being deferred.
+ */
+ bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, orig_dev);
+ if (!bridge_device)
+ return 0;
+
+ if (bridge_device->mrouter != is_mrouter)
+ mlxsw_sp_bridge_mrouter_update_mdb(mlxsw_sp, bridge_device,
+ is_mrouter);
+ bridge_device->mrouter = is_mrouter;
+ return 0;
+}
+
+static int mlxsw_sp_port_attr_set(struct net_device *dev,
+ const struct switchdev_attr *attr,
+ struct switchdev_trans *trans)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ int err;
+
+ switch (attr->id) {
+ case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
+ err = mlxsw_sp_port_attr_stp_state_set(mlxsw_sp_port, trans,
+ attr->orig_dev,
+ attr->u.stp_state);
+ break;
+ case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+ err = mlxsw_sp_port_attr_br_flags_set(mlxsw_sp_port, trans,
+ attr->orig_dev,
+ attr->u.brport_flags);
+ break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+ err = mlxsw_sp_port_attr_br_ageing_set(mlxsw_sp_port, trans,
+ attr->u.ageing_time);
+ break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
+ err = mlxsw_sp_port_attr_br_vlan_set(mlxsw_sp_port, trans,
+ attr->orig_dev,
+ attr->u.vlan_filtering);
+ break;
+ case SWITCHDEV_ATTR_ID_PORT_MROUTER:
+ err = mlxsw_sp_port_attr_mrouter_set(mlxsw_sp_port, trans,
+ attr->orig_dev,
+ attr->u.mrouter);
+ break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED:
+ err = mlxsw_sp_port_mc_disabled_set(mlxsw_sp_port, trans,
+ attr->orig_dev,
+ attr->u.mc_disabled);
+ break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER:
+ err = mlxsw_sp_port_attr_br_mrouter_set(mlxsw_sp_port, trans,
+ attr->orig_dev,
+ attr->u.mrouter);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ if (switchdev_trans_ph_commit(trans))
+ mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp);
+
+ return err;
+}
+
+static int
+mlxsw_sp_port_vlan_fid_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
+ struct mlxsw_sp_bridge_port *bridge_port)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
+ struct mlxsw_sp_bridge_device *bridge_device;
+ u8 local_port = mlxsw_sp_port->local_port;
+ u16 vid = mlxsw_sp_port_vlan->vid;
+ struct mlxsw_sp_fid *fid;
+ int err;
+
+ bridge_device = bridge_port->bridge_device;
+ fid = bridge_device->ops->fid_get(bridge_device, vid);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ err = mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_UC, local_port,
+ bridge_port->flags & BR_FLOOD);
+ if (err)
+ goto err_fid_uc_flood_set;
+
+ err = mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_MC, local_port,
+ mlxsw_sp_mc_flood(bridge_port));
+ if (err)
+ goto err_fid_mc_flood_set;
+
+ err = mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_BC, local_port,
+ true);
+ if (err)
+ goto err_fid_bc_flood_set;
+
+ err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
+ if (err)
+ goto err_fid_port_vid_map;
+
+ mlxsw_sp_port_vlan->fid = fid;
+
+ return 0;
+
+err_fid_port_vid_map:
+ mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_BC, local_port, false);
+err_fid_bc_flood_set:
+ mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_MC, local_port, false);
+err_fid_mc_flood_set:
+ mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_UC, local_port, false);
+err_fid_uc_flood_set:
+ mlxsw_sp_fid_put(fid);
+ return err;
+}
+
+static void
+mlxsw_sp_port_vlan_fid_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
+ struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
+ u8 local_port = mlxsw_sp_port->local_port;
+ u16 vid = mlxsw_sp_port_vlan->vid;
+
+ mlxsw_sp_port_vlan->fid = NULL;
+ mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
+ mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_BC, local_port, false);
+ mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_MC, local_port, false);
+ mlxsw_sp_fid_flood_set(fid, MLXSW_SP_FLOOD_TYPE_UC, local_port, false);
+ mlxsw_sp_fid_put(fid);
+}
+
+static u16
+mlxsw_sp_port_pvid_determine(const struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 vid, bool is_pvid)
+{
+ if (is_pvid)
+ return vid;
+ else if (mlxsw_sp_port->pvid == vid)
+ return 0; /* Dis-allow untagged packets */
+ else
+ return mlxsw_sp_port->pvid;
+}
+
+static int
+mlxsw_sp_port_vlan_bridge_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
+ struct mlxsw_sp_bridge_port *bridge_port)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
+ struct mlxsw_sp_bridge_vlan *bridge_vlan;
+ u16 vid = mlxsw_sp_port_vlan->vid;
+ int err;
+
+ /* No need to continue if only VLAN flags were changed */
+ if (mlxsw_sp_port_vlan->bridge_port) {
+ mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
+ return 0;
+ }
+
+ err = mlxsw_sp_port_vlan_fid_join(mlxsw_sp_port_vlan, bridge_port);
+ if (err)
+ return err;
+
+ err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid,
+ bridge_port->flags & BR_LEARNING);
+ if (err)
+ goto err_port_vid_learning_set;
+
+ err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
+ bridge_port->stp_state);
+ if (err)
+ goto err_port_vid_stp_set;
+
+ bridge_vlan = mlxsw_sp_bridge_vlan_get(bridge_port, vid);
+ if (!bridge_vlan) {
+ err = -ENOMEM;
+ goto err_bridge_vlan_get;
+ }
+
+ list_add(&mlxsw_sp_port_vlan->bridge_vlan_node,
+ &bridge_vlan->port_vlan_list);
+
+ mlxsw_sp_bridge_port_get(mlxsw_sp_port->mlxsw_sp->bridge,
+ bridge_port->dev);
+ mlxsw_sp_port_vlan->bridge_port = bridge_port;
+
+ return 0;
+
+err_bridge_vlan_get:
+ mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_DISABLED);
+err_port_vid_stp_set:
+ mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
+err_port_vid_learning_set:
+ mlxsw_sp_port_vlan_fid_leave(mlxsw_sp_port_vlan);
+ return err;
+}
+
+void
+mlxsw_sp_port_vlan_bridge_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
+ struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
+ struct mlxsw_sp_bridge_vlan *bridge_vlan;
+ struct mlxsw_sp_bridge_port *bridge_port;
+ u16 vid = mlxsw_sp_port_vlan->vid;
+ bool last_port, last_vlan;
+
+ if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_8021Q &&
+ mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_8021D))
+ return;
+
+ bridge_port = mlxsw_sp_port_vlan->bridge_port;
+ last_vlan = list_is_singular(&bridge_port->vlans_list);
+ bridge_vlan = mlxsw_sp_bridge_vlan_find(bridge_port, vid);
+ last_port = list_is_singular(&bridge_vlan->port_vlan_list);
+
+ list_del(&mlxsw_sp_port_vlan->bridge_vlan_node);
+ mlxsw_sp_bridge_vlan_put(bridge_vlan);
+ mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_DISABLED);
+ mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
+ if (last_port)
+ mlxsw_sp_bridge_port_fdb_flush(mlxsw_sp_port->mlxsw_sp,
+ bridge_port,
+ mlxsw_sp_fid_index(fid));
+ if (last_vlan)
+ mlxsw_sp_bridge_port_mdb_flush(mlxsw_sp_port, bridge_port);
+
+ mlxsw_sp_port_vlan_fid_leave(mlxsw_sp_port_vlan);
+
+ mlxsw_sp_bridge_port_put(mlxsw_sp_port->mlxsw_sp->bridge, bridge_port);
+ mlxsw_sp_port_vlan->bridge_port = NULL;
+}
+
+static int
+mlxsw_sp_bridge_port_vlan_add(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ u16 vid, bool is_untagged, bool is_pvid)
+{
+ u16 pvid = mlxsw_sp_port_pvid_determine(mlxsw_sp_port, vid, is_pvid);
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+ u16 old_pvid = mlxsw_sp_port->pvid;
+ int err;
+
+ mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_get(mlxsw_sp_port, vid);
+ if (IS_ERR(mlxsw_sp_port_vlan))
+ return PTR_ERR(mlxsw_sp_port_vlan);
+
+ err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, true,
+ is_untagged);
+ if (err)
+ goto err_port_vlan_set;
+
+ err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, pvid);
+ if (err)
+ goto err_port_pvid_set;
+
+ err = mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port);
+ if (err)
+ goto err_port_vlan_bridge_join;
+
+ return 0;
+
+err_port_vlan_bridge_join:
+ mlxsw_sp_port_pvid_set(mlxsw_sp_port, old_pvid);
+err_port_pvid_set:
+ mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false);
+err_port_vlan_set:
+ mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
+ return err;
+}
+
+static int
+mlxsw_sp_br_ban_rif_pvid_change(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *br_dev,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ struct mlxsw_sp_rif *rif;
+ struct mlxsw_sp_fid *fid;
+ u16 pvid;
+ u16 vid;
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, br_dev);
+ if (!rif)
+ return 0;
+ fid = mlxsw_sp_rif_fid(rif);
+ pvid = mlxsw_sp_fid_8021q_vid(fid);
+
+ for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+ if (vlan->flags & BRIDGE_VLAN_INFO_PVID) {
+ if (vid != pvid) {
+ netdev_err(br_dev, "Can't change PVID, it's used by router interface\n");
+ return -EBUSY;
+ }
+ } else {
+ if (vid == pvid) {
+ netdev_err(br_dev, "Can't remove PVID, it's used by router interface\n");
+ return -EBUSY;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct switchdev_trans *trans)
+{
+ bool flag_untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
+ bool flag_pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct net_device *orig_dev = vlan->obj.orig_dev;
+ struct mlxsw_sp_bridge_port *bridge_port;
+ u16 vid;
+
+ if (netif_is_bridge_master(orig_dev)) {
+ int err = 0;
+
+ if ((vlan->flags & BRIDGE_VLAN_INFO_BRENTRY) &&
+ br_vlan_enabled(orig_dev) &&
+ switchdev_trans_ph_prepare(trans))
+ err = mlxsw_sp_br_ban_rif_pvid_change(mlxsw_sp,
+ orig_dev, vlan);
+ if (!err)
+ err = -EOPNOTSUPP;
+ return err;
+ }
+
+ if (switchdev_trans_ph_prepare(trans))
+ return 0;
+
+ bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev);
+ if (WARN_ON(!bridge_port))
+ return -EINVAL;
+
+ if (!bridge_port->bridge_device->vlan_enabled)
+ return 0;
+
+ for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
+ int err;
+
+ err = mlxsw_sp_bridge_port_vlan_add(mlxsw_sp_port, bridge_port,
+ vid, flag_untagged,
+ flag_pvid);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static enum mlxsw_reg_sfdf_flush_type mlxsw_sp_fdb_flush_type(bool lagged)
+{
+ return lagged ? MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID :
+ MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID;
+}
+
+static int
+mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ u16 fid_index)
+{
+ bool lagged = bridge_port->lagged;
+ char sfdf_pl[MLXSW_REG_SFDF_LEN];
+ u16 system_port;
+
+ system_port = lagged ? bridge_port->lag_id : bridge_port->system_port;
+ mlxsw_reg_sfdf_pack(sfdf_pl, mlxsw_sp_fdb_flush_type(lagged));
+ mlxsw_reg_sfdf_fid_set(sfdf_pl, fid_index);
+ mlxsw_reg_sfdf_port_fid_system_port_set(sfdf_pl, system_port);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+static enum mlxsw_reg_sfd_rec_policy mlxsw_sp_sfd_rec_policy(bool dynamic)
+{
+ return dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS :
+ MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_MLAG;
+}
+
+static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding)
+{
+ return adding ? MLXSW_REG_SFD_OP_WRITE_EDIT :
+ MLXSW_REG_SFD_OP_WRITE_REMOVE;
+}
+
+static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+ const char *mac, u16 fid, bool adding,
+ enum mlxsw_reg_sfd_rec_action action,
+ enum mlxsw_reg_sfd_rec_policy policy)
+{
+ char *sfd_pl;
+ u8 num_rec;
+ int err;
+
+ sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
+ if (!sfd_pl)
+ return -ENOMEM;
+
+ mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
+ mlxsw_reg_sfd_uc_pack(sfd_pl, 0, policy, mac, fid, action, local_port);
+ num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
+ if (err)
+ goto out;
+
+ if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
+ err = -EBUSY;
+
+out:
+ kfree(sfd_pl);
+ return err;
+}
+
+static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+ const char *mac, u16 fid, bool adding,
+ bool dynamic)
+{
+ return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, adding,
+ MLXSW_REG_SFD_REC_ACTION_NOP,
+ mlxsw_sp_sfd_rec_policy(dynamic));
+}
+
+int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid,
+ bool adding)
+{
+ return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, 0, mac, fid, adding,
+ MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER,
+ MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY);
+}
+
+static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id,
+ const char *mac, u16 fid, u16 lag_vid,
+ bool adding, bool dynamic)
+{
+ char *sfd_pl;
+ u8 num_rec;
+ int err;
+
+ sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
+ if (!sfd_pl)
+ return -ENOMEM;
+
+ mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
+ mlxsw_reg_sfd_uc_lag_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic),
+ mac, fid, MLXSW_REG_SFD_REC_ACTION_NOP,
+ lag_vid, lag_id);
+ num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
+ if (err)
+ goto out;
+
+ if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
+ err = -EBUSY;
+
+out:
+ kfree(sfd_pl);
+ return err;
+}
+
+static int
+mlxsw_sp_port_fdb_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct switchdev_notifier_fdb_info *fdb_info, bool adding)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct net_device *orig_dev = fdb_info->info.dev;
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct mlxsw_sp_bridge_port *bridge_port;
+ u16 fid_index, vid;
+
+ bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev);
+ if (!bridge_port)
+ return -EINVAL;
+
+ bridge_device = bridge_port->bridge_device;
+ mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_bridge(mlxsw_sp_port,
+ bridge_device,
+ fdb_info->vid);
+ if (!mlxsw_sp_port_vlan)
+ return 0;
+
+ fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid);
+ vid = mlxsw_sp_port_vlan->vid;
+
+ if (!bridge_port->lagged)
+ return mlxsw_sp_port_fdb_uc_op(mlxsw_sp,
+ bridge_port->system_port,
+ fdb_info->addr, fid_index,
+ adding, false);
+ else
+ return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp,
+ bridge_port->lag_id,
+ fdb_info->addr, fid_index,
+ vid, adding, false);
+}
+
+static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr,
+ u16 fid, u16 mid_idx, bool adding)
+{
+ char *sfd_pl;
+ u8 num_rec;
+ int err;
+
+ sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
+ if (!sfd_pl)
+ return -ENOMEM;
+
+ mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
+ mlxsw_reg_sfd_mc_pack(sfd_pl, 0, addr, fid,
+ MLXSW_REG_SFD_REC_ACTION_NOP, mid_idx);
+ num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
+ if (err)
+ goto out;
+
+ if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
+ err = -EBUSY;
+
+out:
+ kfree(sfd_pl);
+ return err;
+}
+
+static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, u16 mid_idx,
+ long *ports_bitmap,
+ bool set_router_port)
+{
+ char *smid_pl;
+ int err, i;
+
+ smid_pl = kmalloc(MLXSW_REG_SMID_LEN, GFP_KERNEL);
+ if (!smid_pl)
+ return -ENOMEM;
+
+ mlxsw_reg_smid_pack(smid_pl, mid_idx, 0, false);
+ for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) {
+ if (mlxsw_sp->ports[i])
+ mlxsw_reg_smid_port_mask_set(smid_pl, i, 1);
+ }
+
+ mlxsw_reg_smid_port_mask_set(smid_pl,
+ mlxsw_sp_router_port(mlxsw_sp), 1);
+
+ for_each_set_bit(i, ports_bitmap, mlxsw_core_max_ports(mlxsw_sp->core))
+ mlxsw_reg_smid_port_set(smid_pl, i, 1);
+
+ mlxsw_reg_smid_port_set(smid_pl, mlxsw_sp_router_port(mlxsw_sp),
+ set_router_port);
+
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl);
+ kfree(smid_pl);
+ return err;
+}
+
+static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 mid_idx, bool add)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char *smid_pl;
+ int err;
+
+ smid_pl = kmalloc(MLXSW_REG_SMID_LEN, GFP_KERNEL);
+ if (!smid_pl)
+ return -ENOMEM;
+
+ mlxsw_reg_smid_pack(smid_pl, mid_idx, mlxsw_sp_port->local_port, add);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl);
+ kfree(smid_pl);
+ return err;
+}
+
+static struct
+mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp_bridge_device *bridge_device,
+ const unsigned char *addr,
+ u16 fid)
+{
+ struct mlxsw_sp_mid *mid;
+
+ list_for_each_entry(mid, &bridge_device->mids_list, list) {
+ if (ether_addr_equal(mid->addr, addr) && mid->fid == fid)
+ return mid;
+ }
+ return NULL;
+}
+
+static void
+mlxsw_sp_bridge_port_get_ports_bitmap(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ unsigned long *ports_bitmap)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ u64 max_lag_members, i;
+ int lag_id;
+
+ if (!bridge_port->lagged) {
+ set_bit(bridge_port->system_port, ports_bitmap);
+ } else {
+ max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ MAX_LAG_MEMBERS);
+ lag_id = bridge_port->lag_id;
+ for (i = 0; i < max_lag_members; i++) {
+ mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp,
+ lag_id, i);
+ if (mlxsw_sp_port)
+ set_bit(mlxsw_sp_port->local_port,
+ ports_bitmap);
+ }
+ }
+}
+
+static void
+mlxsw_sp_mc_get_mrouters_bitmap(unsigned long *flood_bitmap,
+ struct mlxsw_sp_bridge_device *bridge_device,
+ struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_bridge_port *bridge_port;
+
+ list_for_each_entry(bridge_port, &bridge_device->ports_list, list) {
+ if (bridge_port->mrouter) {
+ mlxsw_sp_bridge_port_get_ports_bitmap(mlxsw_sp,
+ bridge_port,
+ flood_bitmap);
+ }
+ }
+}
+
+static bool
+mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mid *mid,
+ struct mlxsw_sp_bridge_device *bridge_device)
+{
+ long *flood_bitmap;
+ int num_of_ports;
+ int alloc_size;
+ u16 mid_idx;
+ int err;
+
+ mid_idx = find_first_zero_bit(mlxsw_sp->bridge->mids_bitmap,
+ MLXSW_SP_MID_MAX);
+ if (mid_idx == MLXSW_SP_MID_MAX)
+ return false;
+
+ num_of_ports = mlxsw_core_max_ports(mlxsw_sp->core);
+ alloc_size = sizeof(long) * BITS_TO_LONGS(num_of_ports);
+ flood_bitmap = kzalloc(alloc_size, GFP_KERNEL);
+ if (!flood_bitmap)
+ return false;
+
+ bitmap_copy(flood_bitmap, mid->ports_in_mid, num_of_ports);
+ mlxsw_sp_mc_get_mrouters_bitmap(flood_bitmap, bridge_device, mlxsw_sp);
+
+ mid->mid = mid_idx;
+ err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap,
+ bridge_device->mrouter);
+ kfree(flood_bitmap);
+ if (err)
+ return false;
+
+ err = mlxsw_sp_port_mdb_op(mlxsw_sp, mid->addr, mid->fid, mid_idx,
+ true);
+ if (err)
+ return false;
+
+ set_bit(mid_idx, mlxsw_sp->bridge->mids_bitmap);
+ mid->in_hw = true;
+ return true;
+}
+
+static int mlxsw_sp_mc_remove_mdb_entry(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mid *mid)
+{
+ if (!mid->in_hw)
+ return 0;
+
+ clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap);
+ mid->in_hw = false;
+ return mlxsw_sp_port_mdb_op(mlxsw_sp, mid->addr, mid->fid, mid->mid,
+ false);
+}
+
+static struct
+mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_bridge_device *bridge_device,
+ const unsigned char *addr,
+ u16 fid)
+{
+ struct mlxsw_sp_mid *mid;
+ size_t alloc_size;
+
+ mid = kzalloc(sizeof(*mid), GFP_KERNEL);
+ if (!mid)
+ return NULL;
+
+ alloc_size = sizeof(unsigned long) *
+ BITS_TO_LONGS(mlxsw_core_max_ports(mlxsw_sp->core));
+
+ mid->ports_in_mid = kzalloc(alloc_size, GFP_KERNEL);
+ if (!mid->ports_in_mid)
+ goto err_ports_in_mid_alloc;
+
+ ether_addr_copy(mid->addr, addr);
+ mid->fid = fid;
+ mid->in_hw = false;
+
+ if (!bridge_device->multicast_enabled)
+ goto out;
+
+ if (!mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid, bridge_device))
+ goto err_write_mdb_entry;
+
+out:
+ list_add_tail(&mid->list, &bridge_device->mids_list);
+ return mid;
+
+err_write_mdb_entry:
+ kfree(mid->ports_in_mid);
+err_ports_in_mid_alloc:
+ kfree(mid);
+ return NULL;
+}
+
+static int mlxsw_sp_port_remove_from_mid(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_mid *mid)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ int err = 0;
+
+ clear_bit(mlxsw_sp_port->local_port, mid->ports_in_mid);
+ if (bitmap_empty(mid->ports_in_mid,
+ mlxsw_core_max_ports(mlxsw_sp->core))) {
+ err = mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid);
+ list_del(&mid->list);
+ kfree(mid->ports_in_mid);
+ kfree(mid);
+ }
+ return err;
+}
+
+static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct switchdev_trans *trans)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct net_device *orig_dev = mdb->obj.orig_dev;
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+ struct net_device *dev = mlxsw_sp_port->dev;
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct mlxsw_sp_bridge_port *bridge_port;
+ struct mlxsw_sp_mid *mid;
+ u16 fid_index;
+ int err = 0;
+
+ if (switchdev_trans_ph_commit(trans))
+ return 0;
+
+ bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev);
+ if (!bridge_port)
+ return 0;
+
+ bridge_device = bridge_port->bridge_device;
+ mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_bridge(mlxsw_sp_port,
+ bridge_device,
+ mdb->vid);
+ if (!mlxsw_sp_port_vlan)
+ return 0;
+
+ fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid);
+
+ mid = __mlxsw_sp_mc_get(bridge_device, mdb->addr, fid_index);
+ if (!mid) {
+ mid = __mlxsw_sp_mc_alloc(mlxsw_sp, bridge_device, mdb->addr,
+ fid_index);
+ if (!mid) {
+ netdev_err(dev, "Unable to allocate MC group\n");
+ return -ENOMEM;
+ }
+ }
+ set_bit(mlxsw_sp_port->local_port, mid->ports_in_mid);
+
+ if (!bridge_device->multicast_enabled)
+ return 0;
+
+ if (bridge_port->mrouter)
+ return 0;
+
+ err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true);
+ if (err) {
+ netdev_err(dev, "Unable to set SMID\n");
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid);
+ return err;
+}
+
+static void
+mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_device
+ *bridge_device)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_mid *mid;
+ bool mc_enabled;
+
+ mc_enabled = bridge_device->multicast_enabled;
+
+ list_for_each_entry(mid, &bridge_device->mids_list, list) {
+ if (mc_enabled)
+ mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid,
+ bridge_device);
+ else
+ mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid);
+ }
+}
+
+static void
+mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ bool add)
+{
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct mlxsw_sp_mid *mid;
+
+ bridge_device = bridge_port->bridge_device;
+
+ list_for_each_entry(mid, &bridge_device->mids_list, list) {
+ if (!test_bit(mlxsw_sp_port->local_port, mid->ports_in_mid))
+ mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, add);
+ }
+}
+
+struct mlxsw_sp_span_respin_work {
+ struct work_struct work;
+ struct mlxsw_sp *mlxsw_sp;
+};
+
+static void mlxsw_sp_span_respin_work(struct work_struct *work)
+{
+ struct mlxsw_sp_span_respin_work *respin_work =
+ container_of(work, struct mlxsw_sp_span_respin_work, work);
+
+ rtnl_lock();
+ mlxsw_sp_span_respin(respin_work->mlxsw_sp);
+ rtnl_unlock();
+ kfree(respin_work);
+}
+
+static void mlxsw_sp_span_respin_schedule(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_span_respin_work *respin_work;
+
+ respin_work = kzalloc(sizeof(*respin_work), GFP_ATOMIC);
+ if (!respin_work)
+ return;
+
+ INIT_WORK(&respin_work->work, mlxsw_sp_span_respin_work);
+ respin_work->mlxsw_sp = mlxsw_sp;
+
+ mlxsw_core_schedule_work(&respin_work->work);
+}
+
+static int mlxsw_sp_port_obj_add(struct net_device *dev,
+ const struct switchdev_obj *obj,
+ struct switchdev_trans *trans)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ const struct switchdev_obj_port_vlan *vlan;
+ int err = 0;
+
+ switch (obj->id) {
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+ err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, vlan, trans);
+
+ if (switchdev_trans_ph_prepare(trans)) {
+ /* The event is emitted before the changes are actually
+ * applied to the bridge. Therefore schedule the respin
+ * call for later, so that the respin logic sees the
+ * updated bridge state.
+ */
+ mlxsw_sp_span_respin_schedule(mlxsw_sp_port->mlxsw_sp);
+ }
+ break;
+ case SWITCHDEV_OBJ_ID_PORT_MDB:
+ err = mlxsw_sp_port_mdb_add(mlxsw_sp_port,
+ SWITCHDEV_OBJ_PORT_MDB(obj),
+ trans);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ return err;
+}
+
+static void
+mlxsw_sp_bridge_port_vlan_del(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_port *bridge_port, u16 vid)
+{
+ u16 pvid = mlxsw_sp_port->pvid == vid ? 0 : mlxsw_sp_port->pvid;
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+
+ mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
+ if (WARN_ON(!mlxsw_sp_port_vlan))
+ return;
+
+ mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan);
+ mlxsw_sp_port_pvid_set(mlxsw_sp_port, pvid);
+ mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false);
+ mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
+}
+
+static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct net_device *orig_dev = vlan->obj.orig_dev;
+ struct mlxsw_sp_bridge_port *bridge_port;
+ u16 vid;
+
+ if (netif_is_bridge_master(orig_dev))
+ return -EOPNOTSUPP;
+
+ bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev);
+ if (WARN_ON(!bridge_port))
+ return -EINVAL;
+
+ if (!bridge_port->bridge_device->vlan_enabled)
+ return 0;
+
+ for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++)
+ mlxsw_sp_bridge_port_vlan_del(mlxsw_sp_port, bridge_port, vid);
+
+ return 0;
+}
+
+static int
+__mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ struct mlxsw_sp_mid *mid)
+{
+ struct net_device *dev = mlxsw_sp_port->dev;
+ int err;
+
+ if (bridge_port->bridge_device->multicast_enabled &&
+ !bridge_port->mrouter) {
+ err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false);
+ if (err)
+ netdev_err(dev, "Unable to remove port from SMID\n");
+ }
+
+ err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid);
+ if (err)
+ netdev_err(dev, "Unable to remove MC SFD\n");
+
+ return err;
+}
+
+static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port,
+ const struct switchdev_obj_port_mdb *mdb)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct net_device *orig_dev = mdb->obj.orig_dev;
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct net_device *dev = mlxsw_sp_port->dev;
+ struct mlxsw_sp_bridge_port *bridge_port;
+ struct mlxsw_sp_mid *mid;
+ u16 fid_index;
+
+ bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev);
+ if (!bridge_port)
+ return 0;
+
+ bridge_device = bridge_port->bridge_device;
+ mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_bridge(mlxsw_sp_port,
+ bridge_device,
+ mdb->vid);
+ if (!mlxsw_sp_port_vlan)
+ return 0;
+
+ fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid);
+
+ mid = __mlxsw_sp_mc_get(bridge_device, mdb->addr, fid_index);
+ if (!mid) {
+ netdev_err(dev, "Unable to remove port from MC DB\n");
+ return -EINVAL;
+ }
+
+ return __mlxsw_sp_port_mdb_del(mlxsw_sp_port, bridge_port, mid);
+}
+
+static void
+mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_port *bridge_port)
+{
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct mlxsw_sp_mid *mid, *tmp;
+
+ bridge_device = bridge_port->bridge_device;
+
+ list_for_each_entry_safe(mid, tmp, &bridge_device->mids_list, list) {
+ if (test_bit(mlxsw_sp_port->local_port, mid->ports_in_mid)) {
+ __mlxsw_sp_port_mdb_del(mlxsw_sp_port, bridge_port,
+ mid);
+ } else if (bridge_device->multicast_enabled &&
+ bridge_port->mrouter) {
+ mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false);
+ }
+ }
+}
+
+static int mlxsw_sp_port_obj_del(struct net_device *dev,
+ const struct switchdev_obj *obj)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ int err = 0;
+
+ switch (obj->id) {
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ err = mlxsw_sp_port_vlans_del(mlxsw_sp_port,
+ SWITCHDEV_OBJ_PORT_VLAN(obj));
+ break;
+ case SWITCHDEV_OBJ_ID_PORT_MDB:
+ err = mlxsw_sp_port_mdb_del(mlxsw_sp_port,
+ SWITCHDEV_OBJ_PORT_MDB(obj));
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ mlxsw_sp_span_respin_schedule(mlxsw_sp_port->mlxsw_sp);
+
+ return err;
+}
+
+static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp,
+ u16 lag_id)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ u64 max_lag_members;
+ int i;
+
+ max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ MAX_LAG_MEMBERS);
+ for (i = 0; i < max_lag_members; i++) {
+ mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i);
+ if (mlxsw_sp_port)
+ return mlxsw_sp_port;
+ }
+ return NULL;
+}
+
+static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = {
+ .switchdev_port_attr_get = mlxsw_sp_port_attr_get,
+ .switchdev_port_attr_set = mlxsw_sp_port_attr_set,
+ .switchdev_port_obj_add = mlxsw_sp_port_obj_add,
+ .switchdev_port_obj_del = mlxsw_sp_port_obj_del,
+};
+
+static int
+mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+
+ if (is_vlan_dev(bridge_port->dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Can not enslave a VLAN device to a VLAN-aware bridge");
+ return -EINVAL;
+ }
+
+ mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, 1);
+ if (WARN_ON(!mlxsw_sp_port_vlan))
+ return -EINVAL;
+
+ /* Let VLAN-aware bridge take care of its own VLANs */
+ mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
+
+ return 0;
+}
+
+static void
+mlxsw_sp_bridge_8021q_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1);
+ /* Make sure untagged frames are allowed to ingress */
+ mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1);
+}
+
+static struct mlxsw_sp_fid *
+mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device,
+ u16 vid)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+
+ return mlxsw_sp_fid_8021q_get(mlxsw_sp, vid);
+}
+
+static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021q_ops = {
+ .port_join = mlxsw_sp_bridge_8021q_port_join,
+ .port_leave = mlxsw_sp_bridge_8021q_port_leave,
+ .fid_get = mlxsw_sp_bridge_8021q_fid_get,
+};
+
+static bool
+mlxsw_sp_port_is_br_member(const struct mlxsw_sp_port *mlxsw_sp_port,
+ const struct net_device *br_dev)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+
+ list_for_each_entry(mlxsw_sp_port_vlan, &mlxsw_sp_port->vlans_list,
+ list) {
+ if (mlxsw_sp_port_vlan->bridge_port &&
+ mlxsw_sp_port_vlan->bridge_port->bridge_device->dev ==
+ br_dev)
+ return true;
+ }
+
+ return false;
+}
+
+static int
+mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+ struct net_device *dev = bridge_port->dev;
+ u16 vid;
+
+ vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
+ mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
+ if (WARN_ON(!mlxsw_sp_port_vlan))
+ return -EINVAL;
+
+ if (mlxsw_sp_port_is_br_member(mlxsw_sp_port, bridge_device->dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Can not bridge VLAN uppers of the same port");
+ return -EINVAL;
+ }
+
+ /* Port is no longer usable as a router interface */
+ if (mlxsw_sp_port_vlan->fid)
+ mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
+
+ return mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port);
+}
+
+static void
+mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+ struct net_device *dev = bridge_port->dev;
+ u16 vid;
+
+ vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
+ mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
+ if (!mlxsw_sp_port_vlan)
+ return;
+
+ mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan);
+}
+
+static struct mlxsw_sp_fid *
+mlxsw_sp_bridge_8021d_fid_get(struct mlxsw_sp_bridge_device *bridge_device,
+ u16 vid)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+
+ return mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex);
+}
+
+static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021d_ops = {
+ .port_join = mlxsw_sp_bridge_8021d_port_join,
+ .port_leave = mlxsw_sp_bridge_8021d_port_leave,
+ .fid_get = mlxsw_sp_bridge_8021d_fid_get,
+};
+
+int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct net_device *brport_dev,
+ struct net_device *br_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct mlxsw_sp_bridge_port *bridge_port;
+ int err;
+
+ bridge_port = mlxsw_sp_bridge_port_get(mlxsw_sp->bridge, brport_dev);
+ if (IS_ERR(bridge_port))
+ return PTR_ERR(bridge_port);
+ bridge_device = bridge_port->bridge_device;
+
+ err = bridge_device->ops->port_join(bridge_device, bridge_port,
+ mlxsw_sp_port, extack);
+ if (err)
+ goto err_port_join;
+
+ return 0;
+
+err_port_join:
+ mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port);
+ return err;
+}
+
+void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct net_device *brport_dev,
+ struct net_device *br_dev)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct mlxsw_sp_bridge_port *bridge_port;
+
+ bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+ if (!bridge_device)
+ return;
+ bridge_port = __mlxsw_sp_bridge_port_find(bridge_device, brport_dev);
+ if (!bridge_port)
+ return;
+
+ bridge_device->ops->port_leave(bridge_device, bridge_port,
+ mlxsw_sp_port);
+ mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port);
+}
+
+static void
+mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type,
+ const char *mac, u16 vid,
+ struct net_device *dev)
+{
+ struct switchdev_notifier_fdb_info info;
+
+ info.addr = mac;
+ info.vid = vid;
+ call_switchdev_notifiers(type, dev, &info.info);
+}
+
+static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
+ char *sfn_pl, int rec_index,
+ bool adding)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct mlxsw_sp_bridge_port *bridge_port;
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ enum switchdev_notifier_type type;
+ char mac[ETH_ALEN];
+ u8 local_port;
+ u16 vid, fid;
+ bool do_notification = true;
+ int err;
+
+ mlxsw_reg_sfn_mac_unpack(sfn_pl, rec_index, mac, &fid, &local_port);
+ mlxsw_sp_port = mlxsw_sp->ports[local_port];
+ if (!mlxsw_sp_port) {
+ dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect local port in FDB notification\n");
+ goto just_remove;
+ }
+
+ mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_fid(mlxsw_sp_port, fid);
+ if (!mlxsw_sp_port_vlan) {
+ netdev_err(mlxsw_sp_port->dev, "Failed to find a matching {Port, VID} following FDB notification\n");
+ goto just_remove;
+ }
+
+ bridge_port = mlxsw_sp_port_vlan->bridge_port;
+ if (!bridge_port) {
+ netdev_err(mlxsw_sp_port->dev, "{Port, VID} not associated with a bridge\n");
+ goto just_remove;
+ }
+
+ bridge_device = bridge_port->bridge_device;
+ vid = bridge_device->vlan_enabled ? mlxsw_sp_port_vlan->vid : 0;
+
+do_fdb_op:
+ err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid,
+ adding, true);
+ if (err) {
+ dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to set FDB entry\n");
+ return;
+ }
+
+ if (!do_notification)
+ return;
+ type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE;
+ mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev);
+
+ return;
+
+just_remove:
+ adding = false;
+ do_notification = false;
+ goto do_fdb_op;
+}
+
+static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp,
+ char *sfn_pl, int rec_index,
+ bool adding)
+{
+ struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct mlxsw_sp_bridge_port *bridge_port;
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ enum switchdev_notifier_type type;
+ char mac[ETH_ALEN];
+ u16 lag_vid = 0;
+ u16 lag_id;
+ u16 vid, fid;
+ bool do_notification = true;
+ int err;
+
+ mlxsw_reg_sfn_mac_lag_unpack(sfn_pl, rec_index, mac, &fid, &lag_id);
+ mlxsw_sp_port = mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id);
+ if (!mlxsw_sp_port) {
+ dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Cannot find port representor for LAG\n");
+ goto just_remove;
+ }
+
+ mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_fid(mlxsw_sp_port, fid);
+ if (!mlxsw_sp_port_vlan) {
+ netdev_err(mlxsw_sp_port->dev, "Failed to find a matching {Port, VID} following FDB notification\n");
+ goto just_remove;
+ }
+
+ bridge_port = mlxsw_sp_port_vlan->bridge_port;
+ if (!bridge_port) {
+ netdev_err(mlxsw_sp_port->dev, "{Port, VID} not associated with a bridge\n");
+ goto just_remove;
+ }
+
+ bridge_device = bridge_port->bridge_device;
+ vid = bridge_device->vlan_enabled ? mlxsw_sp_port_vlan->vid : 0;
+ lag_vid = mlxsw_sp_port_vlan->vid;
+
+do_fdb_op:
+ err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid,
+ adding, true);
+ if (err) {
+ dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to set FDB entry\n");
+ return;
+ }
+
+ if (!do_notification)
+ return;
+ type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE;
+ mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev);
+
+ return;
+
+just_remove:
+ adding = false;
+ do_notification = false;
+ goto do_fdb_op;
+}
+
+static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp,
+ char *sfn_pl, int rec_index)
+{
+ switch (mlxsw_reg_sfn_rec_type_get(sfn_pl, rec_index)) {
+ case MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC:
+ mlxsw_sp_fdb_notify_mac_process(mlxsw_sp, sfn_pl,
+ rec_index, true);
+ break;
+ case MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC:
+ mlxsw_sp_fdb_notify_mac_process(mlxsw_sp, sfn_pl,
+ rec_index, false);
+ break;
+ case MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG:
+ mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl,
+ rec_index, true);
+ break;
+ case MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG:
+ mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl,
+ rec_index, false);
+ break;
+ }
+}
+
+static void mlxsw_sp_fdb_notify_work_schedule(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_bridge *bridge = mlxsw_sp->bridge;
+
+ mlxsw_core_schedule_dw(&bridge->fdb_notify.dw,
+ msecs_to_jiffies(bridge->fdb_notify.interval));
+}
+
+static void mlxsw_sp_fdb_notify_work(struct work_struct *work)
+{
+ struct mlxsw_sp_bridge *bridge;
+ struct mlxsw_sp *mlxsw_sp;
+ char *sfn_pl;
+ u8 num_rec;
+ int i;
+ int err;
+
+ sfn_pl = kmalloc(MLXSW_REG_SFN_LEN, GFP_KERNEL);
+ if (!sfn_pl)
+ return;
+
+ bridge = container_of(work, struct mlxsw_sp_bridge, fdb_notify.dw.work);
+ mlxsw_sp = bridge->mlxsw_sp;
+
+ rtnl_lock();
+ mlxsw_reg_sfn_pack(sfn_pl);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl);
+ if (err) {
+ dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get FDB notifications\n");
+ goto out;
+ }
+ num_rec = mlxsw_reg_sfn_num_rec_get(sfn_pl);
+ for (i = 0; i < num_rec; i++)
+ mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i);
+
+out:
+ rtnl_unlock();
+ kfree(sfn_pl);
+ mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp);
+}
+
+struct mlxsw_sp_switchdev_event_work {
+ struct work_struct work;
+ struct switchdev_notifier_fdb_info fdb_info;
+ struct net_device *dev;
+ unsigned long event;
+};
+
+static void mlxsw_sp_switchdev_event_work(struct work_struct *work)
+{
+ struct mlxsw_sp_switchdev_event_work *switchdev_work =
+ container_of(work, struct mlxsw_sp_switchdev_event_work, work);
+ struct net_device *dev = switchdev_work->dev;
+ struct switchdev_notifier_fdb_info *fdb_info;
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ int err;
+
+ rtnl_lock();
+ mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev);
+ if (!mlxsw_sp_port)
+ goto out;
+
+ switch (switchdev_work->event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ fdb_info = &switchdev_work->fdb_info;
+ if (!fdb_info->added_by_user)
+ break;
+ err = mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, true);
+ if (err)
+ break;
+ mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
+ fdb_info->addr,
+ fdb_info->vid, dev);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ fdb_info = &switchdev_work->fdb_info;
+ mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, false);
+ break;
+ case SWITCHDEV_FDB_ADD_TO_BRIDGE: /* fall through */
+ case SWITCHDEV_FDB_DEL_TO_BRIDGE:
+ /* These events are only used to potentially update an existing
+ * SPAN mirror.
+ */
+ break;
+ }
+
+ mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp);
+
+out:
+ rtnl_unlock();
+ kfree(switchdev_work->fdb_info.addr);
+ kfree(switchdev_work);
+ dev_put(dev);
+}
+
+/* Called under rcu_read_lock() */
+static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ struct mlxsw_sp_switchdev_event_work *switchdev_work;
+ struct switchdev_notifier_fdb_info *fdb_info = ptr;
+ struct net_device *br_dev;
+
+ /* Tunnel devices are not our uppers, so check their master instead */
+ br_dev = netdev_master_upper_dev_get_rcu(dev);
+ if (!br_dev)
+ return NOTIFY_DONE;
+ if (!netif_is_bridge_master(br_dev))
+ return NOTIFY_DONE;
+ if (!mlxsw_sp_port_dev_lower_find_rcu(br_dev))
+ return NOTIFY_DONE;
+
+ switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
+ if (!switchdev_work)
+ return NOTIFY_BAD;
+
+ INIT_WORK(&switchdev_work->work, mlxsw_sp_switchdev_event_work);
+ switchdev_work->dev = dev;
+ switchdev_work->event = event;
+
+ switch (event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */
+ case SWITCHDEV_FDB_DEL_TO_DEVICE: /* fall through */
+ case SWITCHDEV_FDB_ADD_TO_BRIDGE: /* fall through */
+ case SWITCHDEV_FDB_DEL_TO_BRIDGE:
+ memcpy(&switchdev_work->fdb_info, ptr,
+ sizeof(switchdev_work->fdb_info));
+ switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
+ if (!switchdev_work->fdb_info.addr)
+ goto err_addr_alloc;
+ ether_addr_copy((u8 *)switchdev_work->fdb_info.addr,
+ fdb_info->addr);
+ /* Take a reference on the device. This can be either
+ * upper device containig mlxsw_sp_port or just a
+ * mlxsw_sp_port
+ */
+ dev_hold(dev);
+ break;
+ default:
+ kfree(switchdev_work);
+ return NOTIFY_DONE;
+ }
+
+ mlxsw_core_schedule_work(&switchdev_work->work);
+
+ return NOTIFY_DONE;
+
+err_addr_alloc:
+ kfree(switchdev_work);
+ return NOTIFY_BAD;
+}
+
+static struct notifier_block mlxsw_sp_switchdev_notifier = {
+ .notifier_call = mlxsw_sp_switchdev_event,
+};
+
+u8
+mlxsw_sp_bridge_port_stp_state(struct mlxsw_sp_bridge_port *bridge_port)
+{
+ return bridge_port->stp_state;
+}
+
+static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_bridge *bridge = mlxsw_sp->bridge;
+ int err;
+
+ err = mlxsw_sp_ageing_set(mlxsw_sp, MLXSW_SP_DEFAULT_AGEING_TIME);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to set default ageing time\n");
+ return err;
+ }
+
+ err = register_switchdev_notifier(&mlxsw_sp_switchdev_notifier);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to register switchdev notifier\n");
+ return err;
+ }
+
+ INIT_DELAYED_WORK(&bridge->fdb_notify.dw, mlxsw_sp_fdb_notify_work);
+ bridge->fdb_notify.interval = MLXSW_SP_DEFAULT_LEARNING_INTERVAL;
+ mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp);
+ return 0;
+}
+
+static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ cancel_delayed_work_sync(&mlxsw_sp->bridge->fdb_notify.dw);
+ unregister_switchdev_notifier(&mlxsw_sp_switchdev_notifier);
+
+}
+
+int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_bridge *bridge;
+
+ bridge = kzalloc(sizeof(*mlxsw_sp->bridge), GFP_KERNEL);
+ if (!bridge)
+ return -ENOMEM;
+ mlxsw_sp->bridge = bridge;
+ bridge->mlxsw_sp = mlxsw_sp;
+
+ INIT_LIST_HEAD(&mlxsw_sp->bridge->bridges_list);
+
+ bridge->bridge_8021q_ops = &mlxsw_sp_bridge_8021q_ops;
+ bridge->bridge_8021d_ops = &mlxsw_sp_bridge_8021d_ops;
+
+ return mlxsw_sp_fdb_init(mlxsw_sp);
+}
+
+void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ mlxsw_sp_fdb_fini(mlxsw_sp);
+ WARN_ON(!list_empty(&mlxsw_sp->bridge->bridges_list));
+ kfree(mlxsw_sp->bridge);
+}
+
+void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ mlxsw_sp_port->dev->switchdev_ops = &mlxsw_sp_port_switchdev_ops;
+}
+
+void mlxsw_sp_port_switchdev_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h
new file mode 100644
index 000000000..c218e10bd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/netdevice.h>
+
+struct mlxsw_sp_bridge;
+struct mlxsw_sp_bridge_port;
+
+struct mlxsw_sp_bridge_port *
+mlxsw_sp_bridge_port_find(struct mlxsw_sp_bridge *bridge,
+ struct net_device *brport_dev);
+
+u8 mlxsw_sp_bridge_port_stp_state(struct mlxsw_sp_bridge_port *bridge_port);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchib.c b/drivers/net/ethernet/mellanox/mlxsw/switchib.c
new file mode 100644
index 000000000..bcf2e79a2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchib.c
@@ -0,0 +1,573 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
+#include <net/switchdev.h>
+
+#include "pci.h"
+#include "core.h"
+#include "reg.h"
+#include "port.h"
+#include "trap.h"
+#include "txheader.h"
+#include "ib.h"
+
+static const char mlxsw_sib_driver_name[] = "mlxsw_switchib";
+static const char mlxsw_sib2_driver_name[] = "mlxsw_switchib2";
+
+struct mlxsw_sib_port;
+
+struct mlxsw_sib {
+ struct mlxsw_sib_port **ports;
+ struct mlxsw_core *core;
+ const struct mlxsw_bus_info *bus_info;
+};
+
+struct mlxsw_sib_port {
+ struct mlxsw_sib *mlxsw_sib;
+ u8 local_port;
+ struct {
+ u8 module;
+ } mapping;
+};
+
+/* tx_v1_hdr_version
+ * Tx header version.
+ * Must be set to 1.
+ */
+MLXSW_ITEM32(tx_v1, hdr, version, 0x00, 28, 4);
+
+/* tx_v1_hdr_ctl
+ * Packet control type.
+ * 0 - Ethernet control (e.g. EMADs, LACP)
+ * 1 - Ethernet data
+ */
+MLXSW_ITEM32(tx_v1, hdr, ctl, 0x00, 26, 2);
+
+/* tx_v1_hdr_proto
+ * Packet protocol type. Must be set to 1 (Ethernet).
+ */
+MLXSW_ITEM32(tx_v1, hdr, proto, 0x00, 21, 3);
+
+/* tx_v1_hdr_swid
+ * Switch partition ID. Must be set to 0.
+ */
+MLXSW_ITEM32(tx_v1, hdr, swid, 0x00, 12, 3);
+
+/* tx_v1_hdr_control_tclass
+ * Indicates if the packet should use the control TClass and not one
+ * of the data TClasses.
+ */
+MLXSW_ITEM32(tx_v1, hdr, control_tclass, 0x00, 6, 1);
+
+/* tx_v1_hdr_port_mid
+ * Destination local port for unicast packets.
+ * Destination multicast ID for multicast packets.
+ *
+ * Control packets are directed to a specific egress port, while data
+ * packets are transmitted through the CPU port (0) into the switch partition,
+ * where forwarding rules are applied.
+ */
+MLXSW_ITEM32(tx_v1, hdr, port_mid, 0x04, 16, 16);
+
+/* tx_v1_hdr_type
+ * 0 - Data packets
+ * 6 - Control packets
+ */
+MLXSW_ITEM32(tx_v1, hdr, type, 0x0C, 0, 4);
+
+static void
+mlxsw_sib_tx_v1_hdr_construct(struct sk_buff *skb,
+ const struct mlxsw_tx_info *tx_info)
+{
+ char *txhdr = skb_push(skb, MLXSW_TXHDR_LEN);
+
+ memset(txhdr, 0, MLXSW_TXHDR_LEN);
+
+ mlxsw_tx_v1_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_1);
+ mlxsw_tx_v1_hdr_ctl_set(txhdr, MLXSW_TXHDR_ETH_CTL);
+ mlxsw_tx_v1_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH);
+ mlxsw_tx_v1_hdr_swid_set(txhdr, 0);
+ mlxsw_tx_v1_hdr_control_tclass_set(txhdr, 1);
+ mlxsw_tx_v1_hdr_port_mid_set(txhdr, tx_info->local_port);
+ mlxsw_tx_v1_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL);
+}
+
+static int
+mlxsw_sib_port_admin_status_set(struct mlxsw_sib_port *mlxsw_sib_port,
+ bool is_up)
+{
+ struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
+ char paos_pl[MLXSW_REG_PAOS_LEN];
+
+ mlxsw_reg_paos_pack(paos_pl, mlxsw_sib_port->local_port,
+ is_up ? MLXSW_PORT_ADMIN_STATUS_UP :
+ MLXSW_PORT_ADMIN_STATUS_DOWN);
+ return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(paos), paos_pl);
+}
+
+static int mlxsw_sib_port_mtu_set(struct mlxsw_sib_port *mlxsw_sib_port,
+ u16 mtu)
+{
+ struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
+ char pmtu_pl[MLXSW_REG_PMTU_LEN];
+ int max_mtu;
+ int err;
+
+ mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sib_port->local_port, 0);
+ err = mlxsw_reg_query(mlxsw_sib->core, MLXSW_REG(pmtu), pmtu_pl);
+ if (err)
+ return err;
+ max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl);
+
+ if (mtu > max_mtu)
+ return -EINVAL;
+
+ mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sib_port->local_port, mtu);
+ return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(pmtu), pmtu_pl);
+}
+
+static int mlxsw_sib_port_set(struct mlxsw_sib_port *mlxsw_sib_port, u8 port)
+{
+ struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
+ char plib_pl[MLXSW_REG_PLIB_LEN] = {0};
+ int err;
+
+ mlxsw_reg_plib_local_port_set(plib_pl, mlxsw_sib_port->local_port);
+ mlxsw_reg_plib_ib_port_set(plib_pl, port);
+ err = mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(plib), plib_pl);
+ return err;
+}
+
+static int mlxsw_sib_port_swid_set(struct mlxsw_sib_port *mlxsw_sib_port,
+ u8 swid)
+{
+ struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
+ char pspa_pl[MLXSW_REG_PSPA_LEN];
+
+ mlxsw_reg_pspa_pack(pspa_pl, swid, mlxsw_sib_port->local_port);
+ return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(pspa), pspa_pl);
+}
+
+static int mlxsw_sib_port_module_info_get(struct mlxsw_sib *mlxsw_sib,
+ u8 local_port, u8 *p_module,
+ u8 *p_width)
+{
+ char pmlp_pl[MLXSW_REG_PMLP_LEN];
+ int err;
+
+ mlxsw_reg_pmlp_pack(pmlp_pl, local_port);
+ err = mlxsw_reg_query(mlxsw_sib->core, MLXSW_REG(pmlp), pmlp_pl);
+ if (err)
+ return err;
+ *p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
+ *p_width = mlxsw_reg_pmlp_width_get(pmlp_pl);
+ return 0;
+}
+
+static int mlxsw_sib_port_speed_set(struct mlxsw_sib_port *mlxsw_sib_port,
+ u16 speed, u16 width)
+{
+ struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
+ char ptys_pl[MLXSW_REG_PTYS_LEN];
+
+ mlxsw_reg_ptys_ib_pack(ptys_pl, mlxsw_sib_port->local_port, speed,
+ width);
+ return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(ptys), ptys_pl);
+}
+
+static bool mlxsw_sib_port_created(struct mlxsw_sib *mlxsw_sib, u8 local_port)
+{
+ return mlxsw_sib->ports[local_port] != NULL;
+}
+
+static int __mlxsw_sib_port_create(struct mlxsw_sib *mlxsw_sib, u8 local_port,
+ u8 module, u8 width)
+{
+ struct mlxsw_sib_port *mlxsw_sib_port;
+ int err;
+
+ mlxsw_sib_port = kzalloc(sizeof(*mlxsw_sib_port), GFP_KERNEL);
+ if (!mlxsw_sib_port)
+ return -ENOMEM;
+ mlxsw_sib_port->mlxsw_sib = mlxsw_sib;
+ mlxsw_sib_port->local_port = local_port;
+ mlxsw_sib_port->mapping.module = module;
+
+ err = mlxsw_sib_port_swid_set(mlxsw_sib_port, 0);
+ if (err) {
+ dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set SWID\n",
+ mlxsw_sib_port->local_port);
+ goto err_port_swid_set;
+ }
+
+ /* Expose the IB port number as it's front panel name */
+ err = mlxsw_sib_port_set(mlxsw_sib_port, module + 1);
+ if (err) {
+ dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set IB port\n",
+ mlxsw_sib_port->local_port);
+ goto err_port_ib_set;
+ }
+
+ /* Supports all speeds from SDR to FDR (bitmask) and support bus width
+ * of 1x, 2x and 4x (3 bits bitmask)
+ */
+ err = mlxsw_sib_port_speed_set(mlxsw_sib_port,
+ MLXSW_REG_PTYS_IB_SPEED_EDR - 1,
+ BIT(3) - 1);
+ if (err) {
+ dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set speed\n",
+ mlxsw_sib_port->local_port);
+ goto err_port_speed_set;
+ }
+
+ /* Change to the maximum MTU the device supports, the SMA will take
+ * care of the active MTU
+ */
+ err = mlxsw_sib_port_mtu_set(mlxsw_sib_port, MLXSW_IB_DEFAULT_MTU);
+ if (err) {
+ dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set MTU\n",
+ mlxsw_sib_port->local_port);
+ goto err_port_mtu_set;
+ }
+
+ err = mlxsw_sib_port_admin_status_set(mlxsw_sib_port, true);
+ if (err) {
+ dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to change admin state to UP\n",
+ mlxsw_sib_port->local_port);
+ goto err_port_admin_set;
+ }
+
+ mlxsw_core_port_ib_set(mlxsw_sib->core, mlxsw_sib_port->local_port,
+ mlxsw_sib_port);
+ mlxsw_sib->ports[local_port] = mlxsw_sib_port;
+ return 0;
+
+err_port_admin_set:
+err_port_mtu_set:
+err_port_speed_set:
+err_port_ib_set:
+ mlxsw_sib_port_swid_set(mlxsw_sib_port, MLXSW_PORT_SWID_DISABLED_PORT);
+err_port_swid_set:
+ kfree(mlxsw_sib_port);
+ return err;
+}
+
+static int mlxsw_sib_port_create(struct mlxsw_sib *mlxsw_sib, u8 local_port,
+ u8 module, u8 width)
+{
+ int err;
+
+ err = mlxsw_core_port_init(mlxsw_sib->core, local_port);
+ if (err) {
+ dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to init core port\n",
+ local_port);
+ return err;
+ }
+ err = __mlxsw_sib_port_create(mlxsw_sib, local_port, module, width);
+ if (err)
+ goto err_port_create;
+
+ return 0;
+
+err_port_create:
+ mlxsw_core_port_fini(mlxsw_sib->core, local_port);
+ return err;
+}
+
+static void __mlxsw_sib_port_remove(struct mlxsw_sib *mlxsw_sib, u8 local_port)
+{
+ struct mlxsw_sib_port *mlxsw_sib_port = mlxsw_sib->ports[local_port];
+
+ mlxsw_core_port_clear(mlxsw_sib->core, local_port, mlxsw_sib);
+ mlxsw_sib->ports[local_port] = NULL;
+ mlxsw_sib_port_admin_status_set(mlxsw_sib_port, false);
+ mlxsw_sib_port_swid_set(mlxsw_sib_port, MLXSW_PORT_SWID_DISABLED_PORT);
+ kfree(mlxsw_sib_port);
+}
+
+static void mlxsw_sib_port_remove(struct mlxsw_sib *mlxsw_sib, u8 local_port)
+{
+ __mlxsw_sib_port_remove(mlxsw_sib, local_port);
+ mlxsw_core_port_fini(mlxsw_sib->core, local_port);
+}
+
+static void mlxsw_sib_ports_remove(struct mlxsw_sib *mlxsw_sib)
+{
+ int i;
+
+ for (i = 1; i < MLXSW_PORT_MAX_IB_PORTS; i++)
+ if (mlxsw_sib_port_created(mlxsw_sib, i))
+ mlxsw_sib_port_remove(mlxsw_sib, i);
+ kfree(mlxsw_sib->ports);
+}
+
+static int mlxsw_sib_ports_create(struct mlxsw_sib *mlxsw_sib)
+{
+ size_t alloc_size;
+ u8 module, width;
+ int i;
+ int err;
+
+ alloc_size = sizeof(struct mlxsw_sib_port *) * MLXSW_PORT_MAX_IB_PORTS;
+ mlxsw_sib->ports = kzalloc(alloc_size, GFP_KERNEL);
+ if (!mlxsw_sib->ports)
+ return -ENOMEM;
+
+ for (i = 1; i < MLXSW_PORT_MAX_IB_PORTS; i++) {
+ err = mlxsw_sib_port_module_info_get(mlxsw_sib, i, &module,
+ &width);
+ if (err)
+ goto err_port_module_info_get;
+ if (!width)
+ continue;
+ err = mlxsw_sib_port_create(mlxsw_sib, i, module, width);
+ if (err)
+ goto err_port_create;
+ }
+ return 0;
+
+err_port_create:
+err_port_module_info_get:
+ for (i--; i >= 1; i--)
+ if (mlxsw_sib_port_created(mlxsw_sib, i))
+ mlxsw_sib_port_remove(mlxsw_sib, i);
+ kfree(mlxsw_sib->ports);
+ return err;
+}
+
+static void
+mlxsw_sib_pude_ib_event_func(struct mlxsw_sib_port *mlxsw_sib_port,
+ enum mlxsw_reg_pude_oper_status status)
+{
+ if (status == MLXSW_PORT_OPER_STATUS_UP)
+ pr_info("ib link for port %d - up\n",
+ mlxsw_sib_port->mapping.module + 1);
+ else
+ pr_info("ib link for port %d - down\n",
+ mlxsw_sib_port->mapping.module + 1);
+}
+
+static void mlxsw_sib_pude_event_func(const struct mlxsw_reg_info *reg,
+ char *pude_pl, void *priv)
+{
+ struct mlxsw_sib *mlxsw_sib = priv;
+ struct mlxsw_sib_port *mlxsw_sib_port;
+ enum mlxsw_reg_pude_oper_status status;
+ u8 local_port;
+
+ local_port = mlxsw_reg_pude_local_port_get(pude_pl);
+ mlxsw_sib_port = mlxsw_sib->ports[local_port];
+ if (!mlxsw_sib_port) {
+ dev_warn(mlxsw_sib->bus_info->dev, "Port %d: Link event received for non-existent port\n",
+ local_port);
+ return;
+ }
+
+ status = mlxsw_reg_pude_oper_status_get(pude_pl);
+ mlxsw_sib_pude_ib_event_func(mlxsw_sib_port, status);
+}
+
+static const struct mlxsw_listener mlxsw_sib_listener[] = {
+ MLXSW_EVENTL(mlxsw_sib_pude_event_func, PUDE, EMAD),
+};
+
+static int mlxsw_sib_taps_init(struct mlxsw_sib *mlxsw_sib)
+{
+ int i;
+ int err;
+
+ for (i = 0; i < ARRAY_SIZE(mlxsw_sib_listener); i++) {
+ err = mlxsw_core_trap_register(mlxsw_sib->core,
+ &mlxsw_sib_listener[i],
+ mlxsw_sib);
+ if (err)
+ goto err_rx_listener_register;
+ }
+
+ return 0;
+
+err_rx_listener_register:
+ for (i--; i >= 0; i--) {
+ mlxsw_core_trap_unregister(mlxsw_sib->core,
+ &mlxsw_sib_listener[i],
+ mlxsw_sib);
+ }
+
+ return err;
+}
+
+static void mlxsw_sib_traps_fini(struct mlxsw_sib *mlxsw_sib)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mlxsw_sib_listener); i++) {
+ mlxsw_core_trap_unregister(mlxsw_sib->core,
+ &mlxsw_sib_listener[i], mlxsw_sib);
+ }
+}
+
+static int mlxsw_sib_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
+{
+ char htgt_pl[MLXSW_REG_HTGT_LEN];
+
+ mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
+ MLXSW_REG_HTGT_INVALID_POLICER,
+ MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+ MLXSW_REG_HTGT_DEFAULT_TC);
+ mlxsw_reg_htgt_swid_set(htgt_pl, MLXSW_PORT_SWID_ALL_SWIDS);
+ mlxsw_reg_htgt_local_path_rdq_set(htgt_pl,
+ MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SIB_EMAD);
+ return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+}
+
+static int mlxsw_sib_init(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_bus_info *mlxsw_bus_info)
+{
+ struct mlxsw_sib *mlxsw_sib = mlxsw_core_driver_priv(mlxsw_core);
+ int err;
+
+ mlxsw_sib->core = mlxsw_core;
+ mlxsw_sib->bus_info = mlxsw_bus_info;
+
+ err = mlxsw_sib_ports_create(mlxsw_sib);
+ if (err) {
+ dev_err(mlxsw_sib->bus_info->dev, "Failed to create ports\n");
+ return err;
+ }
+
+ err = mlxsw_sib_taps_init(mlxsw_sib);
+ if (err) {
+ dev_err(mlxsw_sib->bus_info->dev, "Failed to set traps\n");
+ goto err_traps_init_err;
+ }
+
+ return 0;
+
+err_traps_init_err:
+ mlxsw_sib_ports_remove(mlxsw_sib);
+ return err;
+}
+
+static void mlxsw_sib_fini(struct mlxsw_core *mlxsw_core)
+{
+ struct mlxsw_sib *mlxsw_sib = mlxsw_core_driver_priv(mlxsw_core);
+
+ mlxsw_sib_traps_fini(mlxsw_sib);
+ mlxsw_sib_ports_remove(mlxsw_sib);
+}
+
+static const struct mlxsw_config_profile mlxsw_sib_config_profile = {
+ .used_max_system_port = 1,
+ .max_system_port = 48000,
+ .used_max_ib_mc = 1,
+ .max_ib_mc = 27,
+ .used_max_pkey = 1,
+ .max_pkey = 32,
+ .swid_config = {
+ {
+ .used_type = 1,
+ .type = MLXSW_PORT_SWID_TYPE_IB,
+ }
+ },
+};
+
+static struct mlxsw_driver mlxsw_sib_driver = {
+ .kind = mlxsw_sib_driver_name,
+ .priv_size = sizeof(struct mlxsw_sib),
+ .init = mlxsw_sib_init,
+ .fini = mlxsw_sib_fini,
+ .basic_trap_groups_set = mlxsw_sib_basic_trap_groups_set,
+ .txhdr_construct = mlxsw_sib_tx_v1_hdr_construct,
+ .txhdr_len = MLXSW_TXHDR_LEN,
+ .profile = &mlxsw_sib_config_profile,
+};
+
+static struct mlxsw_driver mlxsw_sib2_driver = {
+ .kind = mlxsw_sib2_driver_name,
+ .priv_size = sizeof(struct mlxsw_sib),
+ .init = mlxsw_sib_init,
+ .fini = mlxsw_sib_fini,
+ .basic_trap_groups_set = mlxsw_sib_basic_trap_groups_set,
+ .txhdr_construct = mlxsw_sib_tx_v1_hdr_construct,
+ .txhdr_len = MLXSW_TXHDR_LEN,
+ .profile = &mlxsw_sib_config_profile,
+};
+
+static const struct pci_device_id mlxsw_sib_pci_id_table[] = {
+ {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHIB), 0},
+ {0, },
+};
+
+static struct pci_driver mlxsw_sib_pci_driver = {
+ .name = mlxsw_sib_driver_name,
+ .id_table = mlxsw_sib_pci_id_table,
+};
+
+static const struct pci_device_id mlxsw_sib2_pci_id_table[] = {
+ {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHIB2), 0},
+ {0, },
+};
+
+static struct pci_driver mlxsw_sib2_pci_driver = {
+ .name = mlxsw_sib2_driver_name,
+ .id_table = mlxsw_sib2_pci_id_table,
+};
+
+static int __init mlxsw_sib_module_init(void)
+{
+ int err;
+
+ err = mlxsw_core_driver_register(&mlxsw_sib_driver);
+ if (err)
+ return err;
+
+ err = mlxsw_core_driver_register(&mlxsw_sib2_driver);
+ if (err)
+ goto err_sib2_driver_register;
+
+ err = mlxsw_pci_driver_register(&mlxsw_sib_pci_driver);
+ if (err)
+ goto err_sib_pci_driver_register;
+
+ err = mlxsw_pci_driver_register(&mlxsw_sib2_pci_driver);
+ if (err)
+ goto err_sib2_pci_driver_register;
+
+ return 0;
+
+err_sib2_pci_driver_register:
+ mlxsw_pci_driver_unregister(&mlxsw_sib_pci_driver);
+err_sib_pci_driver_register:
+ mlxsw_core_driver_unregister(&mlxsw_sib2_driver);
+err_sib2_driver_register:
+ mlxsw_core_driver_unregister(&mlxsw_sib_driver);
+ return err;
+}
+
+static void __exit mlxsw_sib_module_exit(void)
+{
+ mlxsw_pci_driver_unregister(&mlxsw_sib2_pci_driver);
+ mlxsw_pci_driver_unregister(&mlxsw_sib_pci_driver);
+ mlxsw_core_driver_unregister(&mlxsw_sib2_driver);
+ mlxsw_core_driver_unregister(&mlxsw_sib_driver);
+}
+
+module_init(mlxsw_sib_module_init);
+module_exit(mlxsw_sib_module_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Elad Raz <eladr@@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox SwitchIB and SwitchIB-2 driver");
+MODULE_ALIAS("mlxsw_switchib2");
+MODULE_DEVICE_TABLE(pci, mlxsw_sib_pci_id_table);
+MODULE_DEVICE_TABLE(pci, mlxsw_sib2_pci_id_table);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
new file mode 100644
index 000000000..b22c190e0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -0,0 +1,1736 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
+#include <net/switchdev.h>
+
+#include "pci.h"
+#include "core.h"
+#include "reg.h"
+#include "port.h"
+#include "trap.h"
+#include "txheader.h"
+#include "ib.h"
+
+static const char mlxsw_sx_driver_name[] = "mlxsw_switchx2";
+static const char mlxsw_sx_driver_version[] = "1.0";
+
+struct mlxsw_sx_port;
+
+struct mlxsw_sx {
+ struct mlxsw_sx_port **ports;
+ struct mlxsw_core *core;
+ const struct mlxsw_bus_info *bus_info;
+ u8 hw_id[ETH_ALEN];
+};
+
+struct mlxsw_sx_port_pcpu_stats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 tx_packets;
+ u64 tx_bytes;
+ struct u64_stats_sync syncp;
+ u32 tx_dropped;
+};
+
+struct mlxsw_sx_port {
+ struct net_device *dev;
+ struct mlxsw_sx_port_pcpu_stats __percpu *pcpu_stats;
+ struct mlxsw_sx *mlxsw_sx;
+ u8 local_port;
+ struct {
+ u8 module;
+ } mapping;
+};
+
+/* tx_hdr_version
+ * Tx header version.
+ * Must be set to 0.
+ */
+MLXSW_ITEM32(tx, hdr, version, 0x00, 28, 4);
+
+/* tx_hdr_ctl
+ * Packet control type.
+ * 0 - Ethernet control (e.g. EMADs, LACP)
+ * 1 - Ethernet data
+ */
+MLXSW_ITEM32(tx, hdr, ctl, 0x00, 26, 2);
+
+/* tx_hdr_proto
+ * Packet protocol type. Must be set to 1 (Ethernet).
+ */
+MLXSW_ITEM32(tx, hdr, proto, 0x00, 21, 3);
+
+/* tx_hdr_etclass
+ * Egress TClass to be used on the egress device on the egress port.
+ * The MSB is specified in the 'ctclass3' field.
+ * Range is 0-15, where 15 is the highest priority.
+ */
+MLXSW_ITEM32(tx, hdr, etclass, 0x00, 18, 3);
+
+/* tx_hdr_swid
+ * Switch partition ID.
+ */
+MLXSW_ITEM32(tx, hdr, swid, 0x00, 12, 3);
+
+/* tx_hdr_port_mid
+ * Destination local port for unicast packets.
+ * Destination multicast ID for multicast packets.
+ *
+ * Control packets are directed to a specific egress port, while data
+ * packets are transmitted through the CPU port (0) into the switch partition,
+ * where forwarding rules are applied.
+ */
+MLXSW_ITEM32(tx, hdr, port_mid, 0x04, 16, 16);
+
+/* tx_hdr_ctclass3
+ * See field 'etclass'.
+ */
+MLXSW_ITEM32(tx, hdr, ctclass3, 0x04, 14, 1);
+
+/* tx_hdr_rdq
+ * RDQ for control packets sent to remote CPU.
+ * Must be set to 0x1F for EMADs, otherwise 0.
+ */
+MLXSW_ITEM32(tx, hdr, rdq, 0x04, 9, 5);
+
+/* tx_hdr_cpu_sig
+ * Signature control for packets going to CPU. Must be set to 0.
+ */
+MLXSW_ITEM32(tx, hdr, cpu_sig, 0x04, 0, 9);
+
+/* tx_hdr_sig
+ * Stacking protocl signature. Must be set to 0xE0E0.
+ */
+MLXSW_ITEM32(tx, hdr, sig, 0x0C, 16, 16);
+
+/* tx_hdr_stclass
+ * Stacking TClass.
+ */
+MLXSW_ITEM32(tx, hdr, stclass, 0x0C, 13, 3);
+
+/* tx_hdr_emad
+ * EMAD bit. Must be set for EMADs.
+ */
+MLXSW_ITEM32(tx, hdr, emad, 0x0C, 5, 1);
+
+/* tx_hdr_type
+ * 0 - Data packets
+ * 6 - Control packets
+ */
+MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4);
+
+static void mlxsw_sx_txhdr_construct(struct sk_buff *skb,
+ const struct mlxsw_tx_info *tx_info)
+{
+ char *txhdr = skb_push(skb, MLXSW_TXHDR_LEN);
+ bool is_emad = tx_info->is_emad;
+
+ memset(txhdr, 0, MLXSW_TXHDR_LEN);
+
+ /* We currently set default values for the egress tclass (QoS). */
+ mlxsw_tx_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_0);
+ mlxsw_tx_hdr_ctl_set(txhdr, MLXSW_TXHDR_ETH_CTL);
+ mlxsw_tx_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH);
+ mlxsw_tx_hdr_etclass_set(txhdr, is_emad ? MLXSW_TXHDR_ETCLASS_6 :
+ MLXSW_TXHDR_ETCLASS_5);
+ mlxsw_tx_hdr_swid_set(txhdr, 0);
+ mlxsw_tx_hdr_port_mid_set(txhdr, tx_info->local_port);
+ mlxsw_tx_hdr_ctclass3_set(txhdr, MLXSW_TXHDR_CTCLASS3);
+ mlxsw_tx_hdr_rdq_set(txhdr, is_emad ? MLXSW_TXHDR_RDQ_EMAD :
+ MLXSW_TXHDR_RDQ_OTHER);
+ mlxsw_tx_hdr_cpu_sig_set(txhdr, MLXSW_TXHDR_CPU_SIG);
+ mlxsw_tx_hdr_sig_set(txhdr, MLXSW_TXHDR_SIG);
+ mlxsw_tx_hdr_stclass_set(txhdr, MLXSW_TXHDR_STCLASS_NONE);
+ mlxsw_tx_hdr_emad_set(txhdr, is_emad ? MLXSW_TXHDR_EMAD :
+ MLXSW_TXHDR_NOT_EMAD);
+ mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL);
+}
+
+static int mlxsw_sx_port_admin_status_set(struct mlxsw_sx_port *mlxsw_sx_port,
+ bool is_up)
+{
+ struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+ char paos_pl[MLXSW_REG_PAOS_LEN];
+
+ mlxsw_reg_paos_pack(paos_pl, mlxsw_sx_port->local_port,
+ is_up ? MLXSW_PORT_ADMIN_STATUS_UP :
+ MLXSW_PORT_ADMIN_STATUS_DOWN);
+ return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(paos), paos_pl);
+}
+
+static int mlxsw_sx_port_oper_status_get(struct mlxsw_sx_port *mlxsw_sx_port,
+ bool *p_is_up)
+{
+ struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+ char paos_pl[MLXSW_REG_PAOS_LEN];
+ u8 oper_status;
+ int err;
+
+ mlxsw_reg_paos_pack(paos_pl, mlxsw_sx_port->local_port, 0);
+ err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(paos), paos_pl);
+ if (err)
+ return err;
+ oper_status = mlxsw_reg_paos_oper_status_get(paos_pl);
+ *p_is_up = oper_status == MLXSW_PORT_ADMIN_STATUS_UP ? true : false;
+ return 0;
+}
+
+static int __mlxsw_sx_port_mtu_set(struct mlxsw_sx_port *mlxsw_sx_port,
+ u16 mtu)
+{
+ struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+ char pmtu_pl[MLXSW_REG_PMTU_LEN];
+ int max_mtu;
+ int err;
+
+ mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sx_port->local_port, 0);
+ err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(pmtu), pmtu_pl);
+ if (err)
+ return err;
+ max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl);
+
+ if (mtu > max_mtu)
+ return -EINVAL;
+
+ mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sx_port->local_port, mtu);
+ return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(pmtu), pmtu_pl);
+}
+
+static int mlxsw_sx_port_mtu_eth_set(struct mlxsw_sx_port *mlxsw_sx_port,
+ u16 mtu)
+{
+ mtu += MLXSW_TXHDR_LEN + ETH_HLEN;
+ return __mlxsw_sx_port_mtu_set(mlxsw_sx_port, mtu);
+}
+
+static int mlxsw_sx_port_mtu_ib_set(struct mlxsw_sx_port *mlxsw_sx_port,
+ u16 mtu)
+{
+ return __mlxsw_sx_port_mtu_set(mlxsw_sx_port, mtu);
+}
+
+static int mlxsw_sx_port_ib_port_set(struct mlxsw_sx_port *mlxsw_sx_port,
+ u8 ib_port)
+{
+ struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+ char plib_pl[MLXSW_REG_PLIB_LEN] = {0};
+ int err;
+
+ mlxsw_reg_plib_local_port_set(plib_pl, mlxsw_sx_port->local_port);
+ mlxsw_reg_plib_ib_port_set(plib_pl, ib_port);
+ err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(plib), plib_pl);
+ return err;
+}
+
+static int mlxsw_sx_port_swid_set(struct mlxsw_sx_port *mlxsw_sx_port, u8 swid)
+{
+ struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+ char pspa_pl[MLXSW_REG_PSPA_LEN];
+
+ mlxsw_reg_pspa_pack(pspa_pl, swid, mlxsw_sx_port->local_port);
+ return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(pspa), pspa_pl);
+}
+
+static int
+mlxsw_sx_port_system_port_mapping_set(struct mlxsw_sx_port *mlxsw_sx_port)
+{
+ struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+ char sspr_pl[MLXSW_REG_SSPR_LEN];
+
+ mlxsw_reg_sspr_pack(sspr_pl, mlxsw_sx_port->local_port);
+ return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sspr), sspr_pl);
+}
+
+static int mlxsw_sx_port_module_info_get(struct mlxsw_sx *mlxsw_sx,
+ u8 local_port, u8 *p_module,
+ u8 *p_width)
+{
+ char pmlp_pl[MLXSW_REG_PMLP_LEN];
+ int err;
+
+ mlxsw_reg_pmlp_pack(pmlp_pl, local_port);
+ err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(pmlp), pmlp_pl);
+ if (err)
+ return err;
+ *p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
+ *p_width = mlxsw_reg_pmlp_width_get(pmlp_pl);
+ return 0;
+}
+
+static int mlxsw_sx_port_open(struct net_device *dev)
+{
+ struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
+ int err;
+
+ err = mlxsw_sx_port_admin_status_set(mlxsw_sx_port, true);
+ if (err)
+ return err;
+ netif_start_queue(dev);
+ return 0;
+}
+
+static int mlxsw_sx_port_stop(struct net_device *dev)
+{
+ struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
+
+ netif_stop_queue(dev);
+ return mlxsw_sx_port_admin_status_set(mlxsw_sx_port, false);
+}
+
+static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
+ struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+ struct mlxsw_sx_port_pcpu_stats *pcpu_stats;
+ const struct mlxsw_tx_info tx_info = {
+ .local_port = mlxsw_sx_port->local_port,
+ .is_emad = false,
+ };
+ u64 len;
+ int err;
+
+ if (mlxsw_core_skb_transmit_busy(mlxsw_sx->core, &tx_info))
+ return NETDEV_TX_BUSY;
+
+ if (unlikely(skb_headroom(skb) < MLXSW_TXHDR_LEN)) {
+ struct sk_buff *skb_orig = skb;
+
+ skb = skb_realloc_headroom(skb, MLXSW_TXHDR_LEN);
+ if (!skb) {
+ this_cpu_inc(mlxsw_sx_port->pcpu_stats->tx_dropped);
+ dev_kfree_skb_any(skb_orig);
+ return NETDEV_TX_OK;
+ }
+ dev_consume_skb_any(skb_orig);
+ }
+ mlxsw_sx_txhdr_construct(skb, &tx_info);
+ /* TX header is consumed by HW on the way so we shouldn't count its
+ * bytes as being sent.
+ */
+ len = skb->len - MLXSW_TXHDR_LEN;
+ /* Due to a race we might fail here because of a full queue. In that
+ * unlikely case we simply drop the packet.
+ */
+ err = mlxsw_core_skb_transmit(mlxsw_sx->core, skb, &tx_info);
+
+ if (!err) {
+ pcpu_stats = this_cpu_ptr(mlxsw_sx_port->pcpu_stats);
+ u64_stats_update_begin(&pcpu_stats->syncp);
+ pcpu_stats->tx_packets++;
+ pcpu_stats->tx_bytes += len;
+ u64_stats_update_end(&pcpu_stats->syncp);
+ } else {
+ this_cpu_inc(mlxsw_sx_port->pcpu_stats->tx_dropped);
+ dev_kfree_skb_any(skb);
+ }
+ return NETDEV_TX_OK;
+}
+
+static int mlxsw_sx_port_change_mtu(struct net_device *dev, int mtu)
+{
+ struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
+ int err;
+
+ err = mlxsw_sx_port_mtu_eth_set(mlxsw_sx_port, mtu);
+ if (err)
+ return err;
+ dev->mtu = mtu;
+ return 0;
+}
+
+static void
+mlxsw_sx_port_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
+ struct mlxsw_sx_port_pcpu_stats *p;
+ u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+ u32 tx_dropped = 0;
+ unsigned int start;
+ int i;
+
+ for_each_possible_cpu(i) {
+ p = per_cpu_ptr(mlxsw_sx_port->pcpu_stats, i);
+ do {
+ start = u64_stats_fetch_begin_irq(&p->syncp);
+ rx_packets = p->rx_packets;
+ rx_bytes = p->rx_bytes;
+ tx_packets = p->tx_packets;
+ tx_bytes = p->tx_bytes;
+ } while (u64_stats_fetch_retry_irq(&p->syncp, start));
+
+ stats->rx_packets += rx_packets;
+ stats->rx_bytes += rx_bytes;
+ stats->tx_packets += tx_packets;
+ stats->tx_bytes += tx_bytes;
+ /* tx_dropped is u32, updated without syncp protection. */
+ tx_dropped += p->tx_dropped;
+ }
+ stats->tx_dropped = tx_dropped;
+}
+
+static int mlxsw_sx_port_get_phys_port_name(struct net_device *dev, char *name,
+ size_t len)
+{
+ struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
+
+ return mlxsw_core_port_get_phys_port_name(mlxsw_sx_port->mlxsw_sx->core,
+ mlxsw_sx_port->local_port,
+ name, len);
+}
+
+static const struct net_device_ops mlxsw_sx_port_netdev_ops = {
+ .ndo_open = mlxsw_sx_port_open,
+ .ndo_stop = mlxsw_sx_port_stop,
+ .ndo_start_xmit = mlxsw_sx_port_xmit,
+ .ndo_change_mtu = mlxsw_sx_port_change_mtu,
+ .ndo_get_stats64 = mlxsw_sx_port_get_stats64,
+ .ndo_get_phys_port_name = mlxsw_sx_port_get_phys_port_name,
+};
+
+static void mlxsw_sx_port_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
+ struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+
+ strlcpy(drvinfo->driver, mlxsw_sx_driver_name, sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, mlxsw_sx_driver_version,
+ sizeof(drvinfo->version));
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%d",
+ mlxsw_sx->bus_info->fw_rev.major,
+ mlxsw_sx->bus_info->fw_rev.minor,
+ mlxsw_sx->bus_info->fw_rev.subminor);
+ strlcpy(drvinfo->bus_info, mlxsw_sx->bus_info->device_name,
+ sizeof(drvinfo->bus_info));
+}
+
+struct mlxsw_sx_port_hw_stats {
+ char str[ETH_GSTRING_LEN];
+ u64 (*getter)(const char *payload);
+};
+
+static const struct mlxsw_sx_port_hw_stats mlxsw_sx_port_hw_stats[] = {
+ {
+ .str = "a_frames_transmitted_ok",
+ .getter = mlxsw_reg_ppcnt_a_frames_transmitted_ok_get,
+ },
+ {
+ .str = "a_frames_received_ok",
+ .getter = mlxsw_reg_ppcnt_a_frames_received_ok_get,
+ },
+ {
+ .str = "a_frame_check_sequence_errors",
+ .getter = mlxsw_reg_ppcnt_a_frame_check_sequence_errors_get,
+ },
+ {
+ .str = "a_alignment_errors",
+ .getter = mlxsw_reg_ppcnt_a_alignment_errors_get,
+ },
+ {
+ .str = "a_octets_transmitted_ok",
+ .getter = mlxsw_reg_ppcnt_a_octets_transmitted_ok_get,
+ },
+ {
+ .str = "a_octets_received_ok",
+ .getter = mlxsw_reg_ppcnt_a_octets_received_ok_get,
+ },
+ {
+ .str = "a_multicast_frames_xmitted_ok",
+ .getter = mlxsw_reg_ppcnt_a_multicast_frames_xmitted_ok_get,
+ },
+ {
+ .str = "a_broadcast_frames_xmitted_ok",
+ .getter = mlxsw_reg_ppcnt_a_broadcast_frames_xmitted_ok_get,
+ },
+ {
+ .str = "a_multicast_frames_received_ok",
+ .getter = mlxsw_reg_ppcnt_a_multicast_frames_received_ok_get,
+ },
+ {
+ .str = "a_broadcast_frames_received_ok",
+ .getter = mlxsw_reg_ppcnt_a_broadcast_frames_received_ok_get,
+ },
+ {
+ .str = "a_in_range_length_errors",
+ .getter = mlxsw_reg_ppcnt_a_in_range_length_errors_get,
+ },
+ {
+ .str = "a_out_of_range_length_field",
+ .getter = mlxsw_reg_ppcnt_a_out_of_range_length_field_get,
+ },
+ {
+ .str = "a_frame_too_long_errors",
+ .getter = mlxsw_reg_ppcnt_a_frame_too_long_errors_get,
+ },
+ {
+ .str = "a_symbol_error_during_carrier",
+ .getter = mlxsw_reg_ppcnt_a_symbol_error_during_carrier_get,
+ },
+ {
+ .str = "a_mac_control_frames_transmitted",
+ .getter = mlxsw_reg_ppcnt_a_mac_control_frames_transmitted_get,
+ },
+ {
+ .str = "a_mac_control_frames_received",
+ .getter = mlxsw_reg_ppcnt_a_mac_control_frames_received_get,
+ },
+ {
+ .str = "a_unsupported_opcodes_received",
+ .getter = mlxsw_reg_ppcnt_a_unsupported_opcodes_received_get,
+ },
+ {
+ .str = "a_pause_mac_ctrl_frames_received",
+ .getter = mlxsw_reg_ppcnt_a_pause_mac_ctrl_frames_received_get,
+ },
+ {
+ .str = "a_pause_mac_ctrl_frames_xmitted",
+ .getter = mlxsw_reg_ppcnt_a_pause_mac_ctrl_frames_transmitted_get,
+ },
+};
+
+#define MLXSW_SX_PORT_HW_STATS_LEN ARRAY_SIZE(mlxsw_sx_port_hw_stats)
+
+static void mlxsw_sx_port_get_strings(struct net_device *dev,
+ u32 stringset, u8 *data)
+{
+ u8 *p = data;
+ int i;
+
+ switch (stringset) {
+ case ETH_SS_STATS:
+ for (i = 0; i < MLXSW_SX_PORT_HW_STATS_LEN; i++) {
+ memcpy(p, mlxsw_sx_port_hw_stats[i].str,
+ ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
+ break;
+ }
+}
+
+static void mlxsw_sx_port_get_stats(struct net_device *dev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
+ struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+ char ppcnt_pl[MLXSW_REG_PPCNT_LEN];
+ int i;
+ int err;
+
+ mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sx_port->local_port,
+ MLXSW_REG_PPCNT_IEEE_8023_CNT, 0);
+ err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ppcnt), ppcnt_pl);
+ for (i = 0; i < MLXSW_SX_PORT_HW_STATS_LEN; i++)
+ data[i] = !err ? mlxsw_sx_port_hw_stats[i].getter(ppcnt_pl) : 0;
+}
+
+static int mlxsw_sx_port_get_sset_count(struct net_device *dev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return MLXSW_SX_PORT_HW_STATS_LEN;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+struct mlxsw_sx_port_link_mode {
+ u32 mask;
+ u32 supported;
+ u32 advertised;
+ u32 speed;
+};
+
+static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = {
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T,
+ .supported = SUPPORTED_100baseT_Full,
+ .advertised = ADVERTISED_100baseT_Full,
+ .speed = 100,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_TX,
+ .speed = 100,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_SGMII |
+ MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX,
+ .supported = SUPPORTED_1000baseKX_Full,
+ .advertised = ADVERTISED_1000baseKX_Full,
+ .speed = 1000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T,
+ .supported = SUPPORTED_10000baseT_Full,
+ .advertised = ADVERTISED_10000baseT_Full,
+ .speed = 10000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 |
+ MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4,
+ .supported = SUPPORTED_10000baseKX4_Full,
+ .advertised = ADVERTISED_10000baseKX4_Full,
+ .speed = 10000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR |
+ MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR |
+ MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR |
+ MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR,
+ .supported = SUPPORTED_10000baseKR_Full,
+ .advertised = ADVERTISED_10000baseKR_Full,
+ .speed = 10000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2,
+ .supported = SUPPORTED_20000baseKR2_Full,
+ .advertised = ADVERTISED_20000baseKR2_Full,
+ .speed = 20000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4,
+ .supported = SUPPORTED_40000baseCR4_Full,
+ .advertised = ADVERTISED_40000baseCR4_Full,
+ .speed = 40000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4,
+ .supported = SUPPORTED_40000baseKR4_Full,
+ .advertised = ADVERTISED_40000baseKR4_Full,
+ .speed = 40000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4,
+ .supported = SUPPORTED_40000baseSR4_Full,
+ .advertised = ADVERTISED_40000baseSR4_Full,
+ .speed = 40000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4,
+ .supported = SUPPORTED_40000baseLR4_Full,
+ .advertised = ADVERTISED_40000baseLR4_Full,
+ .speed = 40000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR |
+ MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR |
+ MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR,
+ .speed = 25000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2 |
+ MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2,
+ .speed = 50000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
+ .supported = SUPPORTED_56000baseKR4_Full,
+ .advertised = ADVERTISED_56000baseKR4_Full,
+ .speed = 56000,
+ },
+ {
+ .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4,
+ .speed = 100000,
+ },
+};
+
+#define MLXSW_SX_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sx_port_link_mode)
+#define MLXSW_SX_PORT_BASE_SPEED 10000 /* Mb/s */
+
+static u32 mlxsw_sx_from_ptys_supported_port(u32 ptys_eth_proto)
+{
+ if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR |
+ MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR |
+ MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_SGMII))
+ return SUPPORTED_FIBRE;
+
+ if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR |
+ MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 |
+ MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX))
+ return SUPPORTED_Backplane;
+ return 0;
+}
+
+static u32 mlxsw_sx_from_ptys_supported_link(u32 ptys_eth_proto)
+{
+ u32 modes = 0;
+ int i;
+
+ for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) {
+ if (ptys_eth_proto & mlxsw_sx_port_link_mode[i].mask)
+ modes |= mlxsw_sx_port_link_mode[i].supported;
+ }
+ return modes;
+}
+
+static u32 mlxsw_sx_from_ptys_advert_link(u32 ptys_eth_proto)
+{
+ u32 modes = 0;
+ int i;
+
+ for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) {
+ if (ptys_eth_proto & mlxsw_sx_port_link_mode[i].mask)
+ modes |= mlxsw_sx_port_link_mode[i].advertised;
+ }
+ return modes;
+}
+
+static void mlxsw_sx_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto,
+ struct ethtool_link_ksettings *cmd)
+{
+ u32 speed = SPEED_UNKNOWN;
+ u8 duplex = DUPLEX_UNKNOWN;
+ int i;
+
+ if (!carrier_ok)
+ goto out;
+
+ for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) {
+ if (ptys_eth_proto & mlxsw_sx_port_link_mode[i].mask) {
+ speed = mlxsw_sx_port_link_mode[i].speed;
+ duplex = DUPLEX_FULL;
+ break;
+ }
+ }
+out:
+ cmd->base.speed = speed;
+ cmd->base.duplex = duplex;
+}
+
+static u8 mlxsw_sx_port_connector_port(u32 ptys_eth_proto)
+{
+ if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR |
+ MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_SGMII))
+ return PORT_FIBRE;
+
+ if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR |
+ MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4))
+ return PORT_DA;
+
+ if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR |
+ MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 |
+ MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 |
+ MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4))
+ return PORT_NONE;
+
+ return PORT_OTHER;
+}
+
+static int
+mlxsw_sx_port_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
+ struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+ char ptys_pl[MLXSW_REG_PTYS_LEN];
+ u32 eth_proto_cap;
+ u32 eth_proto_admin;
+ u32 eth_proto_oper;
+ u32 supported, advertising, lp_advertising;
+ int err;
+
+ mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0, false);
+ err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
+ if (err) {
+ netdev_err(dev, "Failed to get proto");
+ return err;
+ }
+ mlxsw_reg_ptys_eth_unpack(ptys_pl, &eth_proto_cap,
+ &eth_proto_admin, &eth_proto_oper);
+
+ supported = mlxsw_sx_from_ptys_supported_port(eth_proto_cap) |
+ mlxsw_sx_from_ptys_supported_link(eth_proto_cap) |
+ SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+ advertising = mlxsw_sx_from_ptys_advert_link(eth_proto_admin);
+ mlxsw_sx_from_ptys_speed_duplex(netif_carrier_ok(dev),
+ eth_proto_oper, cmd);
+
+ eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
+ cmd->base.port = mlxsw_sx_port_connector_port(eth_proto_oper);
+ lp_advertising = mlxsw_sx_from_ptys_advert_link(eth_proto_oper);
+
+ ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+ supported);
+ ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+ advertising);
+ ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising,
+ lp_advertising);
+
+ return 0;
+}
+
+static u32 mlxsw_sx_to_ptys_advert_link(u32 advertising)
+{
+ u32 ptys_proto = 0;
+ int i;
+
+ for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) {
+ if (advertising & mlxsw_sx_port_link_mode[i].advertised)
+ ptys_proto |= mlxsw_sx_port_link_mode[i].mask;
+ }
+ return ptys_proto;
+}
+
+static u32 mlxsw_sx_to_ptys_speed(u32 speed)
+{
+ u32 ptys_proto = 0;
+ int i;
+
+ for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) {
+ if (speed == mlxsw_sx_port_link_mode[i].speed)
+ ptys_proto |= mlxsw_sx_port_link_mode[i].mask;
+ }
+ return ptys_proto;
+}
+
+static u32 mlxsw_sx_to_ptys_upper_speed(u32 upper_speed)
+{
+ u32 ptys_proto = 0;
+ int i;
+
+ for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) {
+ if (mlxsw_sx_port_link_mode[i].speed <= upper_speed)
+ ptys_proto |= mlxsw_sx_port_link_mode[i].mask;
+ }
+ return ptys_proto;
+}
+
+static int
+mlxsw_sx_port_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
+{
+ struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
+ struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+ char ptys_pl[MLXSW_REG_PTYS_LEN];
+ u32 speed;
+ u32 eth_proto_new;
+ u32 eth_proto_cap;
+ u32 eth_proto_admin;
+ u32 advertising;
+ bool is_up;
+ int err;
+
+ speed = cmd->base.speed;
+
+ ethtool_convert_link_mode_to_legacy_u32(&advertising,
+ cmd->link_modes.advertising);
+
+ eth_proto_new = cmd->base.autoneg == AUTONEG_ENABLE ?
+ mlxsw_sx_to_ptys_advert_link(advertising) :
+ mlxsw_sx_to_ptys_speed(speed);
+
+ mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0, false);
+ err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
+ if (err) {
+ netdev_err(dev, "Failed to get proto");
+ return err;
+ }
+ mlxsw_reg_ptys_eth_unpack(ptys_pl, &eth_proto_cap, &eth_proto_admin,
+ NULL);
+
+ eth_proto_new = eth_proto_new & eth_proto_cap;
+ if (!eth_proto_new) {
+ netdev_err(dev, "Not supported proto admin requested");
+ return -EINVAL;
+ }
+ if (eth_proto_new == eth_proto_admin)
+ return 0;
+
+ mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port,
+ eth_proto_new, true);
+ err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
+ if (err) {
+ netdev_err(dev, "Failed to set proto admin");
+ return err;
+ }
+
+ err = mlxsw_sx_port_oper_status_get(mlxsw_sx_port, &is_up);
+ if (err) {
+ netdev_err(dev, "Failed to get oper status");
+ return err;
+ }
+ if (!is_up)
+ return 0;
+
+ err = mlxsw_sx_port_admin_status_set(mlxsw_sx_port, false);
+ if (err) {
+ netdev_err(dev, "Failed to set admin status");
+ return err;
+ }
+
+ err = mlxsw_sx_port_admin_status_set(mlxsw_sx_port, true);
+ if (err) {
+ netdev_err(dev, "Failed to set admin status");
+ return err;
+ }
+
+ return 0;
+}
+
+static const struct ethtool_ops mlxsw_sx_port_ethtool_ops = {
+ .get_drvinfo = mlxsw_sx_port_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = mlxsw_sx_port_get_strings,
+ .get_ethtool_stats = mlxsw_sx_port_get_stats,
+ .get_sset_count = mlxsw_sx_port_get_sset_count,
+ .get_link_ksettings = mlxsw_sx_port_get_link_ksettings,
+ .set_link_ksettings = mlxsw_sx_port_set_link_ksettings,
+};
+
+static int mlxsw_sx_port_attr_get(struct net_device *dev,
+ struct switchdev_attr *attr)
+{
+ struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
+ struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+
+ switch (attr->id) {
+ case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
+ attr->u.ppid.id_len = sizeof(mlxsw_sx->hw_id);
+ memcpy(&attr->u.ppid.id, &mlxsw_sx->hw_id, attr->u.ppid.id_len);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static const struct switchdev_ops mlxsw_sx_port_switchdev_ops = {
+ .switchdev_port_attr_get = mlxsw_sx_port_attr_get,
+};
+
+static int mlxsw_sx_hw_id_get(struct mlxsw_sx *mlxsw_sx)
+{
+ char spad_pl[MLXSW_REG_SPAD_LEN] = {0};
+ int err;
+
+ err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(spad), spad_pl);
+ if (err)
+ return err;
+ mlxsw_reg_spad_base_mac_memcpy_from(spad_pl, mlxsw_sx->hw_id);
+ return 0;
+}
+
+static int mlxsw_sx_port_dev_addr_get(struct mlxsw_sx_port *mlxsw_sx_port)
+{
+ struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+ struct net_device *dev = mlxsw_sx_port->dev;
+ char ppad_pl[MLXSW_REG_PPAD_LEN];
+ int err;
+
+ mlxsw_reg_ppad_pack(ppad_pl, false, 0);
+ err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ppad), ppad_pl);
+ if (err)
+ return err;
+ mlxsw_reg_ppad_mac_memcpy_from(ppad_pl, dev->dev_addr);
+ /* The last byte value in base mac address is guaranteed
+ * to be such it does not overflow when adding local_port
+ * value.
+ */
+ dev->dev_addr[ETH_ALEN - 1] += mlxsw_sx_port->local_port;
+ return 0;
+}
+
+static int mlxsw_sx_port_stp_state_set(struct mlxsw_sx_port *mlxsw_sx_port,
+ u16 vid, enum mlxsw_reg_spms_state state)
+{
+ struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+ char *spms_pl;
+ int err;
+
+ spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL);
+ if (!spms_pl)
+ return -ENOMEM;
+ mlxsw_reg_spms_pack(spms_pl, mlxsw_sx_port->local_port);
+ mlxsw_reg_spms_vid_pack(spms_pl, vid, state);
+ err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(spms), spms_pl);
+ kfree(spms_pl);
+ return err;
+}
+
+static int mlxsw_sx_port_ib_speed_set(struct mlxsw_sx_port *mlxsw_sx_port,
+ u16 speed, u16 width)
+{
+ struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+ char ptys_pl[MLXSW_REG_PTYS_LEN];
+
+ mlxsw_reg_ptys_ib_pack(ptys_pl, mlxsw_sx_port->local_port, speed,
+ width);
+ return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
+}
+
+static int
+mlxsw_sx_port_speed_by_width_set(struct mlxsw_sx_port *mlxsw_sx_port, u8 width)
+{
+ struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+ u32 upper_speed = MLXSW_SX_PORT_BASE_SPEED * width;
+ char ptys_pl[MLXSW_REG_PTYS_LEN];
+ u32 eth_proto_admin;
+
+ eth_proto_admin = mlxsw_sx_to_ptys_upper_speed(upper_speed);
+ mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port,
+ eth_proto_admin, true);
+ return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
+}
+
+static int
+mlxsw_sx_port_mac_learning_mode_set(struct mlxsw_sx_port *mlxsw_sx_port,
+ enum mlxsw_reg_spmlr_learn_mode mode)
+{
+ struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+ char spmlr_pl[MLXSW_REG_SPMLR_LEN];
+
+ mlxsw_reg_spmlr_pack(spmlr_pl, mlxsw_sx_port->local_port, mode);
+ return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(spmlr), spmlr_pl);
+}
+
+static int __mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port,
+ u8 module, u8 width)
+{
+ struct mlxsw_sx_port *mlxsw_sx_port;
+ struct net_device *dev;
+ int err;
+
+ dev = alloc_etherdev(sizeof(struct mlxsw_sx_port));
+ if (!dev)
+ return -ENOMEM;
+ SET_NETDEV_DEV(dev, mlxsw_sx->bus_info->dev);
+ mlxsw_sx_port = netdev_priv(dev);
+ mlxsw_sx_port->dev = dev;
+ mlxsw_sx_port->mlxsw_sx = mlxsw_sx;
+ mlxsw_sx_port->local_port = local_port;
+ mlxsw_sx_port->mapping.module = module;
+
+ mlxsw_sx_port->pcpu_stats =
+ netdev_alloc_pcpu_stats(struct mlxsw_sx_port_pcpu_stats);
+ if (!mlxsw_sx_port->pcpu_stats) {
+ err = -ENOMEM;
+ goto err_alloc_stats;
+ }
+
+ dev->netdev_ops = &mlxsw_sx_port_netdev_ops;
+ dev->ethtool_ops = &mlxsw_sx_port_ethtool_ops;
+ dev->switchdev_ops = &mlxsw_sx_port_switchdev_ops;
+
+ err = mlxsw_sx_port_dev_addr_get(mlxsw_sx_port);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Port %d: Unable to get port mac address\n",
+ mlxsw_sx_port->local_port);
+ goto err_dev_addr_get;
+ }
+
+ netif_carrier_off(dev);
+
+ dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_LLTX | NETIF_F_SG |
+ NETIF_F_VLAN_CHALLENGED;
+
+ dev->min_mtu = 0;
+ dev->max_mtu = ETH_MAX_MTU;
+
+ /* Each packet needs to have a Tx header (metadata) on top all other
+ * headers.
+ */
+ dev->needed_headroom = MLXSW_TXHDR_LEN;
+
+ err = mlxsw_sx_port_system_port_mapping_set(mlxsw_sx_port);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set system port mapping\n",
+ mlxsw_sx_port->local_port);
+ goto err_port_system_port_mapping_set;
+ }
+
+ err = mlxsw_sx_port_swid_set(mlxsw_sx_port, 0);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set SWID\n",
+ mlxsw_sx_port->local_port);
+ goto err_port_swid_set;
+ }
+
+ err = mlxsw_sx_port_speed_by_width_set(mlxsw_sx_port, width);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set speed\n",
+ mlxsw_sx_port->local_port);
+ goto err_port_speed_set;
+ }
+
+ err = mlxsw_sx_port_mtu_eth_set(mlxsw_sx_port, ETH_DATA_LEN);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set MTU\n",
+ mlxsw_sx_port->local_port);
+ goto err_port_mtu_set;
+ }
+
+ err = mlxsw_sx_port_admin_status_set(mlxsw_sx_port, false);
+ if (err)
+ goto err_port_admin_status_set;
+
+ err = mlxsw_sx_port_stp_state_set(mlxsw_sx_port,
+ MLXSW_PORT_DEFAULT_VID,
+ MLXSW_REG_SPMS_STATE_FORWARDING);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set STP state\n",
+ mlxsw_sx_port->local_port);
+ goto err_port_stp_state_set;
+ }
+
+ err = mlxsw_sx_port_mac_learning_mode_set(mlxsw_sx_port,
+ MLXSW_REG_SPMLR_LEARN_MODE_DISABLE);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set MAC learning mode\n",
+ mlxsw_sx_port->local_port);
+ goto err_port_mac_learning_mode_set;
+ }
+
+ err = register_netdev(dev);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to register netdev\n",
+ mlxsw_sx_port->local_port);
+ goto err_register_netdev;
+ }
+
+ mlxsw_core_port_eth_set(mlxsw_sx->core, mlxsw_sx_port->local_port,
+ mlxsw_sx_port, dev, module + 1, false, 0);
+ mlxsw_sx->ports[local_port] = mlxsw_sx_port;
+ return 0;
+
+err_register_netdev:
+err_port_mac_learning_mode_set:
+err_port_stp_state_set:
+err_port_admin_status_set:
+err_port_mtu_set:
+err_port_speed_set:
+ mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT);
+err_port_swid_set:
+err_port_system_port_mapping_set:
+err_dev_addr_get:
+ free_percpu(mlxsw_sx_port->pcpu_stats);
+err_alloc_stats:
+ free_netdev(dev);
+ return err;
+}
+
+static int mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port,
+ u8 module, u8 width)
+{
+ int err;
+
+ err = mlxsw_core_port_init(mlxsw_sx->core, local_port);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to init core port\n",
+ local_port);
+ return err;
+ }
+ err = __mlxsw_sx_port_eth_create(mlxsw_sx, local_port, module, width);
+ if (err)
+ goto err_port_create;
+
+ return 0;
+
+err_port_create:
+ mlxsw_core_port_fini(mlxsw_sx->core, local_port);
+ return err;
+}
+
+static void __mlxsw_sx_port_eth_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
+{
+ struct mlxsw_sx_port *mlxsw_sx_port = mlxsw_sx->ports[local_port];
+
+ mlxsw_core_port_clear(mlxsw_sx->core, local_port, mlxsw_sx);
+ unregister_netdev(mlxsw_sx_port->dev); /* This calls ndo_stop */
+ mlxsw_sx->ports[local_port] = NULL;
+ mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT);
+ free_percpu(mlxsw_sx_port->pcpu_stats);
+ free_netdev(mlxsw_sx_port->dev);
+}
+
+static bool mlxsw_sx_port_created(struct mlxsw_sx *mlxsw_sx, u8 local_port)
+{
+ return mlxsw_sx->ports[local_port] != NULL;
+}
+
+static int __mlxsw_sx_port_ib_create(struct mlxsw_sx *mlxsw_sx, u8 local_port,
+ u8 module, u8 width)
+{
+ struct mlxsw_sx_port *mlxsw_sx_port;
+ int err;
+
+ mlxsw_sx_port = kzalloc(sizeof(*mlxsw_sx_port), GFP_KERNEL);
+ if (!mlxsw_sx_port)
+ return -ENOMEM;
+ mlxsw_sx_port->mlxsw_sx = mlxsw_sx;
+ mlxsw_sx_port->local_port = local_port;
+ mlxsw_sx_port->mapping.module = module;
+
+ err = mlxsw_sx_port_system_port_mapping_set(mlxsw_sx_port);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set system port mapping\n",
+ mlxsw_sx_port->local_port);
+ goto err_port_system_port_mapping_set;
+ }
+
+ /* Adding port to Infiniband swid (1) */
+ err = mlxsw_sx_port_swid_set(mlxsw_sx_port, 1);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set SWID\n",
+ mlxsw_sx_port->local_port);
+ goto err_port_swid_set;
+ }
+
+ /* Expose the IB port number as it's front panel name */
+ err = mlxsw_sx_port_ib_port_set(mlxsw_sx_port, module + 1);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set IB port\n",
+ mlxsw_sx_port->local_port);
+ goto err_port_ib_set;
+ }
+
+ /* Supports all speeds from SDR to FDR (bitmask) and support bus width
+ * of 1x, 2x and 4x (3 bits bitmask)
+ */
+ err = mlxsw_sx_port_ib_speed_set(mlxsw_sx_port,
+ MLXSW_REG_PTYS_IB_SPEED_EDR - 1,
+ BIT(3) - 1);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set speed\n",
+ mlxsw_sx_port->local_port);
+ goto err_port_speed_set;
+ }
+
+ /* Change to the maximum MTU the device supports, the SMA will take
+ * care of the active MTU
+ */
+ err = mlxsw_sx_port_mtu_ib_set(mlxsw_sx_port, MLXSW_IB_DEFAULT_MTU);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set MTU\n",
+ mlxsw_sx_port->local_port);
+ goto err_port_mtu_set;
+ }
+
+ err = mlxsw_sx_port_admin_status_set(mlxsw_sx_port, true);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to change admin state to UP\n",
+ mlxsw_sx_port->local_port);
+ goto err_port_admin_set;
+ }
+
+ mlxsw_core_port_ib_set(mlxsw_sx->core, mlxsw_sx_port->local_port,
+ mlxsw_sx_port);
+ mlxsw_sx->ports[local_port] = mlxsw_sx_port;
+ return 0;
+
+err_port_admin_set:
+err_port_mtu_set:
+err_port_speed_set:
+err_port_ib_set:
+ mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT);
+err_port_swid_set:
+err_port_system_port_mapping_set:
+ kfree(mlxsw_sx_port);
+ return err;
+}
+
+static void __mlxsw_sx_port_ib_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
+{
+ struct mlxsw_sx_port *mlxsw_sx_port = mlxsw_sx->ports[local_port];
+
+ mlxsw_core_port_clear(mlxsw_sx->core, local_port, mlxsw_sx);
+ mlxsw_sx->ports[local_port] = NULL;
+ mlxsw_sx_port_admin_status_set(mlxsw_sx_port, false);
+ mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT);
+ kfree(mlxsw_sx_port);
+}
+
+static void __mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
+{
+ enum devlink_port_type port_type =
+ mlxsw_core_port_type_get(mlxsw_sx->core, local_port);
+
+ if (port_type == DEVLINK_PORT_TYPE_ETH)
+ __mlxsw_sx_port_eth_remove(mlxsw_sx, local_port);
+ else if (port_type == DEVLINK_PORT_TYPE_IB)
+ __mlxsw_sx_port_ib_remove(mlxsw_sx, local_port);
+}
+
+static void mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
+{
+ __mlxsw_sx_port_remove(mlxsw_sx, local_port);
+ mlxsw_core_port_fini(mlxsw_sx->core, local_port);
+}
+
+static void mlxsw_sx_ports_remove(struct mlxsw_sx *mlxsw_sx)
+{
+ int i;
+
+ for (i = 1; i < mlxsw_core_max_ports(mlxsw_sx->core); i++)
+ if (mlxsw_sx_port_created(mlxsw_sx, i))
+ mlxsw_sx_port_remove(mlxsw_sx, i);
+ kfree(mlxsw_sx->ports);
+ mlxsw_sx->ports = NULL;
+}
+
+static int mlxsw_sx_ports_create(struct mlxsw_sx *mlxsw_sx)
+{
+ unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sx->core);
+ size_t alloc_size;
+ u8 module, width;
+ int i;
+ int err;
+
+ alloc_size = sizeof(struct mlxsw_sx_port *) * max_ports;
+ mlxsw_sx->ports = kzalloc(alloc_size, GFP_KERNEL);
+ if (!mlxsw_sx->ports)
+ return -ENOMEM;
+
+ for (i = 1; i < max_ports; i++) {
+ err = mlxsw_sx_port_module_info_get(mlxsw_sx, i, &module,
+ &width);
+ if (err)
+ goto err_port_module_info_get;
+ if (!width)
+ continue;
+ err = mlxsw_sx_port_eth_create(mlxsw_sx, i, module, width);
+ if (err)
+ goto err_port_create;
+ }
+ return 0;
+
+err_port_create:
+err_port_module_info_get:
+ for (i--; i >= 1; i--)
+ if (mlxsw_sx_port_created(mlxsw_sx, i))
+ mlxsw_sx_port_remove(mlxsw_sx, i);
+ kfree(mlxsw_sx->ports);
+ mlxsw_sx->ports = NULL;
+ return err;
+}
+
+static void mlxsw_sx_pude_eth_event_func(struct mlxsw_sx_port *mlxsw_sx_port,
+ enum mlxsw_reg_pude_oper_status status)
+{
+ if (status == MLXSW_PORT_OPER_STATUS_UP) {
+ netdev_info(mlxsw_sx_port->dev, "link up\n");
+ netif_carrier_on(mlxsw_sx_port->dev);
+ } else {
+ netdev_info(mlxsw_sx_port->dev, "link down\n");
+ netif_carrier_off(mlxsw_sx_port->dev);
+ }
+}
+
+static void mlxsw_sx_pude_ib_event_func(struct mlxsw_sx_port *mlxsw_sx_port,
+ enum mlxsw_reg_pude_oper_status status)
+{
+ if (status == MLXSW_PORT_OPER_STATUS_UP)
+ pr_info("ib link for port %d - up\n",
+ mlxsw_sx_port->mapping.module + 1);
+ else
+ pr_info("ib link for port %d - down\n",
+ mlxsw_sx_port->mapping.module + 1);
+}
+
+static void mlxsw_sx_pude_event_func(const struct mlxsw_reg_info *reg,
+ char *pude_pl, void *priv)
+{
+ struct mlxsw_sx *mlxsw_sx = priv;
+ struct mlxsw_sx_port *mlxsw_sx_port;
+ enum mlxsw_reg_pude_oper_status status;
+ enum devlink_port_type port_type;
+ u8 local_port;
+
+ local_port = mlxsw_reg_pude_local_port_get(pude_pl);
+ mlxsw_sx_port = mlxsw_sx->ports[local_port];
+ if (!mlxsw_sx_port) {
+ dev_warn(mlxsw_sx->bus_info->dev, "Port %d: Link event received for non-existent port\n",
+ local_port);
+ return;
+ }
+
+ status = mlxsw_reg_pude_oper_status_get(pude_pl);
+ port_type = mlxsw_core_port_type_get(mlxsw_sx->core, local_port);
+ if (port_type == DEVLINK_PORT_TYPE_ETH)
+ mlxsw_sx_pude_eth_event_func(mlxsw_sx_port, status);
+ else if (port_type == DEVLINK_PORT_TYPE_IB)
+ mlxsw_sx_pude_ib_event_func(mlxsw_sx_port, status);
+}
+
+static void mlxsw_sx_rx_listener_func(struct sk_buff *skb, u8 local_port,
+ void *priv)
+{
+ struct mlxsw_sx *mlxsw_sx = priv;
+ struct mlxsw_sx_port *mlxsw_sx_port = mlxsw_sx->ports[local_port];
+ struct mlxsw_sx_port_pcpu_stats *pcpu_stats;
+
+ if (unlikely(!mlxsw_sx_port)) {
+ dev_warn_ratelimited(mlxsw_sx->bus_info->dev, "Port %d: skb received for non-existent port\n",
+ local_port);
+ return;
+ }
+
+ skb->dev = mlxsw_sx_port->dev;
+
+ pcpu_stats = this_cpu_ptr(mlxsw_sx_port->pcpu_stats);
+ u64_stats_update_begin(&pcpu_stats->syncp);
+ pcpu_stats->rx_packets++;
+ pcpu_stats->rx_bytes += skb->len;
+ u64_stats_update_end(&pcpu_stats->syncp);
+
+ skb->protocol = eth_type_trans(skb, skb->dev);
+ netif_receive_skb(skb);
+}
+
+static int mlxsw_sx_port_type_set(struct mlxsw_core *mlxsw_core, u8 local_port,
+ enum devlink_port_type new_type)
+{
+ struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core);
+ u8 module, width;
+ int err;
+
+ if (!mlxsw_sx->ports || !mlxsw_sx->ports[local_port]) {
+ dev_err(mlxsw_sx->bus_info->dev, "Port number \"%d\" does not exist\n",
+ local_port);
+ return -EINVAL;
+ }
+
+ if (new_type == DEVLINK_PORT_TYPE_AUTO)
+ return -EOPNOTSUPP;
+
+ __mlxsw_sx_port_remove(mlxsw_sx, local_port);
+ err = mlxsw_sx_port_module_info_get(mlxsw_sx, local_port, &module,
+ &width);
+ if (err)
+ goto err_port_module_info_get;
+
+ if (new_type == DEVLINK_PORT_TYPE_ETH)
+ err = __mlxsw_sx_port_eth_create(mlxsw_sx, local_port, module,
+ width);
+ else if (new_type == DEVLINK_PORT_TYPE_IB)
+ err = __mlxsw_sx_port_ib_create(mlxsw_sx, local_port, module,
+ width);
+
+err_port_module_info_get:
+ return err;
+}
+
+#define MLXSW_SX_RXL(_trap_id) \
+ MLXSW_RXL(mlxsw_sx_rx_listener_func, _trap_id, TRAP_TO_CPU, \
+ false, SX2_RX, FORWARD)
+
+static const struct mlxsw_listener mlxsw_sx_listener[] = {
+ MLXSW_EVENTL(mlxsw_sx_pude_event_func, PUDE, EMAD),
+ MLXSW_SX_RXL(FDB_MC),
+ MLXSW_SX_RXL(STP),
+ MLXSW_SX_RXL(LACP),
+ MLXSW_SX_RXL(EAPOL),
+ MLXSW_SX_RXL(LLDP),
+ MLXSW_SX_RXL(MMRP),
+ MLXSW_SX_RXL(MVRP),
+ MLXSW_SX_RXL(RPVST),
+ MLXSW_SX_RXL(DHCP),
+ MLXSW_SX_RXL(IGMP_QUERY),
+ MLXSW_SX_RXL(IGMP_V1_REPORT),
+ MLXSW_SX_RXL(IGMP_V2_REPORT),
+ MLXSW_SX_RXL(IGMP_V2_LEAVE),
+ MLXSW_SX_RXL(IGMP_V3_REPORT),
+};
+
+static int mlxsw_sx_traps_init(struct mlxsw_sx *mlxsw_sx)
+{
+ char htgt_pl[MLXSW_REG_HTGT_LEN];
+ int i;
+ int err;
+
+ mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_SX2_RX,
+ MLXSW_REG_HTGT_INVALID_POLICER,
+ MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+ MLXSW_REG_HTGT_DEFAULT_TC);
+ mlxsw_reg_htgt_local_path_rdq_set(htgt_pl,
+ MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_RX);
+
+ err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(htgt), htgt_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_SX2_CTRL,
+ MLXSW_REG_HTGT_INVALID_POLICER,
+ MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+ MLXSW_REG_HTGT_DEFAULT_TC);
+ mlxsw_reg_htgt_local_path_rdq_set(htgt_pl,
+ MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_CTRL);
+
+ err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(htgt), htgt_pl);
+ if (err)
+ return err;
+
+ for (i = 0; i < ARRAY_SIZE(mlxsw_sx_listener); i++) {
+ err = mlxsw_core_trap_register(mlxsw_sx->core,
+ &mlxsw_sx_listener[i],
+ mlxsw_sx);
+ if (err)
+ goto err_listener_register;
+
+ }
+ return 0;
+
+err_listener_register:
+ for (i--; i >= 0; i--) {
+ mlxsw_core_trap_unregister(mlxsw_sx->core,
+ &mlxsw_sx_listener[i],
+ mlxsw_sx);
+ }
+ return err;
+}
+
+static void mlxsw_sx_traps_fini(struct mlxsw_sx *mlxsw_sx)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mlxsw_sx_listener); i++) {
+ mlxsw_core_trap_unregister(mlxsw_sx->core,
+ &mlxsw_sx_listener[i],
+ mlxsw_sx);
+ }
+}
+
+static int mlxsw_sx_flood_init(struct mlxsw_sx *mlxsw_sx)
+{
+ char sfgc_pl[MLXSW_REG_SFGC_LEN];
+ char sgcr_pl[MLXSW_REG_SGCR_LEN];
+ char *sftr_pl;
+ int err;
+
+ /* Configure a flooding table, which includes only CPU port. */
+ sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL);
+ if (!sftr_pl)
+ return -ENOMEM;
+ mlxsw_reg_sftr_pack(sftr_pl, 0, 0, MLXSW_REG_SFGC_TABLE_TYPE_SINGLE, 0,
+ MLXSW_PORT_CPU_PORT, true);
+ err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sftr), sftr_pl);
+ kfree(sftr_pl);
+ if (err)
+ return err;
+
+ /* Flood different packet types using the flooding table. */
+ mlxsw_reg_sfgc_pack(sfgc_pl,
+ MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST,
+ MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID,
+ MLXSW_REG_SFGC_TABLE_TYPE_SINGLE,
+ 0);
+ err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sfgc), sfgc_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_sfgc_pack(sfgc_pl,
+ MLXSW_REG_SFGC_TYPE_BROADCAST,
+ MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID,
+ MLXSW_REG_SFGC_TABLE_TYPE_SINGLE,
+ 0);
+ err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sfgc), sfgc_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_sfgc_pack(sfgc_pl,
+ MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP,
+ MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID,
+ MLXSW_REG_SFGC_TABLE_TYPE_SINGLE,
+ 0);
+ err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sfgc), sfgc_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_sfgc_pack(sfgc_pl,
+ MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6,
+ MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID,
+ MLXSW_REG_SFGC_TABLE_TYPE_SINGLE,
+ 0);
+ err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sfgc), sfgc_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_sfgc_pack(sfgc_pl,
+ MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4,
+ MLXSW_REG_SFGC_BRIDGE_TYPE_1Q_FID,
+ MLXSW_REG_SFGC_TABLE_TYPE_SINGLE,
+ 0);
+ err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sfgc), sfgc_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_sgcr_pack(sgcr_pl, true);
+ return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sgcr), sgcr_pl);
+}
+
+static int mlxsw_sx_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
+{
+ char htgt_pl[MLXSW_REG_HTGT_LEN];
+
+ mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
+ MLXSW_REG_HTGT_INVALID_POLICER,
+ MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+ MLXSW_REG_HTGT_DEFAULT_TC);
+ mlxsw_reg_htgt_swid_set(htgt_pl, MLXSW_PORT_SWID_ALL_SWIDS);
+ mlxsw_reg_htgt_local_path_rdq_set(htgt_pl,
+ MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_EMAD);
+ return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+}
+
+static int mlxsw_sx_init(struct mlxsw_core *mlxsw_core,
+ const struct mlxsw_bus_info *mlxsw_bus_info)
+{
+ struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core);
+ int err;
+
+ mlxsw_sx->core = mlxsw_core;
+ mlxsw_sx->bus_info = mlxsw_bus_info;
+
+ err = mlxsw_sx_hw_id_get(mlxsw_sx);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Failed to get switch HW ID\n");
+ return err;
+ }
+
+ err = mlxsw_sx_ports_create(mlxsw_sx);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Failed to create ports\n");
+ return err;
+ }
+
+ err = mlxsw_sx_traps_init(mlxsw_sx);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Failed to set traps\n");
+ goto err_listener_register;
+ }
+
+ err = mlxsw_sx_flood_init(mlxsw_sx);
+ if (err) {
+ dev_err(mlxsw_sx->bus_info->dev, "Failed to initialize flood tables\n");
+ goto err_flood_init;
+ }
+
+ return 0;
+
+err_flood_init:
+ mlxsw_sx_traps_fini(mlxsw_sx);
+err_listener_register:
+ mlxsw_sx_ports_remove(mlxsw_sx);
+ return err;
+}
+
+static void mlxsw_sx_fini(struct mlxsw_core *mlxsw_core)
+{
+ struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core);
+
+ mlxsw_sx_traps_fini(mlxsw_sx);
+ mlxsw_sx_ports_remove(mlxsw_sx);
+}
+
+static const struct mlxsw_config_profile mlxsw_sx_config_profile = {
+ .used_max_vepa_channels = 1,
+ .max_vepa_channels = 0,
+ .used_max_mid = 1,
+ .max_mid = 7000,
+ .used_max_pgt = 1,
+ .max_pgt = 0,
+ .used_max_system_port = 1,
+ .max_system_port = 48000,
+ .used_max_vlan_groups = 1,
+ .max_vlan_groups = 127,
+ .used_max_regions = 1,
+ .max_regions = 400,
+ .used_flood_tables = 1,
+ .max_flood_tables = 2,
+ .max_vid_flood_tables = 1,
+ .used_flood_mode = 1,
+ .flood_mode = 3,
+ .used_max_ib_mc = 1,
+ .max_ib_mc = 6,
+ .used_max_pkey = 1,
+ .max_pkey = 0,
+ .swid_config = {
+ {
+ .used_type = 1,
+ .type = MLXSW_PORT_SWID_TYPE_ETH,
+ },
+ {
+ .used_type = 1,
+ .type = MLXSW_PORT_SWID_TYPE_IB,
+ }
+ },
+};
+
+static struct mlxsw_driver mlxsw_sx_driver = {
+ .kind = mlxsw_sx_driver_name,
+ .priv_size = sizeof(struct mlxsw_sx),
+ .init = mlxsw_sx_init,
+ .fini = mlxsw_sx_fini,
+ .basic_trap_groups_set = mlxsw_sx_basic_trap_groups_set,
+ .txhdr_construct = mlxsw_sx_txhdr_construct,
+ .txhdr_len = MLXSW_TXHDR_LEN,
+ .profile = &mlxsw_sx_config_profile,
+ .port_type_set = mlxsw_sx_port_type_set,
+};
+
+static const struct pci_device_id mlxsw_sx_pci_id_table[] = {
+ {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHX2), 0},
+ {0, },
+};
+
+static struct pci_driver mlxsw_sx_pci_driver = {
+ .name = mlxsw_sx_driver_name,
+ .id_table = mlxsw_sx_pci_id_table,
+};
+
+static int __init mlxsw_sx_module_init(void)
+{
+ int err;
+
+ err = mlxsw_core_driver_register(&mlxsw_sx_driver);
+ if (err)
+ return err;
+
+ err = mlxsw_pci_driver_register(&mlxsw_sx_pci_driver);
+ if (err)
+ goto err_pci_driver_register;
+
+ return 0;
+
+err_pci_driver_register:
+ mlxsw_core_driver_unregister(&mlxsw_sx_driver);
+ return err;
+}
+
+static void __exit mlxsw_sx_module_exit(void)
+{
+ mlxsw_pci_driver_unregister(&mlxsw_sx_pci_driver);
+ mlxsw_core_driver_unregister(&mlxsw_sx_driver);
+}
+
+module_init(mlxsw_sx_module_init);
+module_exit(mlxsw_sx_module_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox SwitchX-2 driver");
+MODULE_DEVICE_TABLE(pci, mlxsw_sx_pci_id_table);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
new file mode 100644
index 000000000..53020724c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_TRAP_H
+#define _MLXSW_TRAP_H
+
+enum {
+ /* Ethernet EMAD and FDB miss */
+ MLXSW_TRAP_ID_FDB_MC = 0x01,
+ MLXSW_TRAP_ID_ETHEMAD = 0x05,
+ /* L2 traps for specific packet types */
+ MLXSW_TRAP_ID_STP = 0x10,
+ MLXSW_TRAP_ID_LACP = 0x11,
+ MLXSW_TRAP_ID_EAPOL = 0x12,
+ MLXSW_TRAP_ID_LLDP = 0x13,
+ MLXSW_TRAP_ID_MMRP = 0x14,
+ MLXSW_TRAP_ID_MVRP = 0x15,
+ MLXSW_TRAP_ID_RPVST = 0x16,
+ MLXSW_TRAP_ID_DHCP = 0x19,
+ MLXSW_TRAP_ID_IGMP_QUERY = 0x30,
+ MLXSW_TRAP_ID_IGMP_V1_REPORT = 0x31,
+ MLXSW_TRAP_ID_IGMP_V2_REPORT = 0x32,
+ MLXSW_TRAP_ID_IGMP_V2_LEAVE = 0x33,
+ MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34,
+ MLXSW_TRAP_ID_PKT_SAMPLE = 0x38,
+ MLXSW_TRAP_ID_FID_MISS = 0x3D,
+ MLXSW_TRAP_ID_ARPBC = 0x50,
+ MLXSW_TRAP_ID_ARPUC = 0x51,
+ MLXSW_TRAP_ID_MTUERROR = 0x52,
+ MLXSW_TRAP_ID_TTLERROR = 0x53,
+ MLXSW_TRAP_ID_LBERROR = 0x54,
+ MLXSW_TRAP_ID_IPV4_OSPF = 0x55,
+ MLXSW_TRAP_ID_IPV4_PIM = 0x58,
+ MLXSW_TRAP_ID_IPV4_VRRP = 0x59,
+ MLXSW_TRAP_ID_RPF = 0x5C,
+ MLXSW_TRAP_ID_IP2ME = 0x5F,
+ MLXSW_TRAP_ID_IPV6_UNSPECIFIED_ADDRESS = 0x60,
+ MLXSW_TRAP_ID_IPV6_LINK_LOCAL_DEST = 0x61,
+ MLXSW_TRAP_ID_IPV6_LINK_LOCAL_SRC = 0x62,
+ MLXSW_TRAP_ID_IPV6_ALL_NODES_LINK = 0x63,
+ MLXSW_TRAP_ID_IPV6_OSPF = 0x64,
+ MLXSW_TRAP_ID_IPV6_MLDV12_LISTENER_QUERY = 0x65,
+ MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_REPORT = 0x66,
+ MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_DONE = 0x67,
+ MLXSW_TRAP_ID_IPV6_MLDV2_LISTENER_REPORT = 0x68,
+ MLXSW_TRAP_ID_IPV6_DHCP = 0x69,
+ MLXSW_TRAP_ID_IPV6_ALL_ROUTERS_LINK = 0x6F,
+ MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
+ MLXSW_TRAP_ID_IPV6_PIM = 0x79,
+ MLXSW_TRAP_ID_IPV6_VRRP = 0x7A,
+ MLXSW_TRAP_ID_IPV4_BGP = 0x88,
+ MLXSW_TRAP_ID_IPV6_BGP = 0x89,
+ MLXSW_TRAP_ID_L3_IPV6_ROUTER_SOLICITATION = 0x8A,
+ MLXSW_TRAP_ID_L3_IPV6_ROUTER_ADVERTISMENT = 0x8B,
+ MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_SOLICITATION = 0x8C,
+ MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_ADVERTISMENT = 0x8D,
+ MLXSW_TRAP_ID_L3_IPV6_REDIRECTION = 0x8E,
+ MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
+ MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91,
+ MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92,
+ MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1,
+ MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
+ MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
+ MLXSW_TRAP_ID_ACL0 = 0x1C0,
+ /* Multicast trap used for routes with trap action */
+ MLXSW_TRAP_ID_ACL1 = 0x1C1,
+ /* Multicast trap used for routes with trap-and-forward action */
+ MLXSW_TRAP_ID_ACL2 = 0x1C2,
+
+ MLXSW_TRAP_ID_MAX = 0x1FF
+};
+
+enum mlxsw_event_trap_id {
+ /* Port Up/Down event generated by hardware */
+ MLXSW_TRAP_ID_PUDE = 0x8,
+};
+
+#endif /* _MLXSW_TRAP_H */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/txheader.h b/drivers/net/ethernet/mellanox/mlxsw/txheader.h
new file mode 100644
index 000000000..da51dd9d5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/txheader.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_TXHEADER_H
+#define _MLXSW_TXHEADER_H
+
+#define MLXSW_TXHDR_LEN 0x10
+#define MLXSW_TXHDR_VERSION_0 0
+#define MLXSW_TXHDR_VERSION_1 1
+
+enum {
+ MLXSW_TXHDR_ETH_CTL,
+ MLXSW_TXHDR_ETH_DATA,
+};
+
+#define MLXSW_TXHDR_PROTO_ETH 1
+
+enum {
+ MLXSW_TXHDR_ETCLASS_0,
+ MLXSW_TXHDR_ETCLASS_1,
+ MLXSW_TXHDR_ETCLASS_2,
+ MLXSW_TXHDR_ETCLASS_3,
+ MLXSW_TXHDR_ETCLASS_4,
+ MLXSW_TXHDR_ETCLASS_5,
+ MLXSW_TXHDR_ETCLASS_6,
+ MLXSW_TXHDR_ETCLASS_7,
+};
+
+enum {
+ MLXSW_TXHDR_RDQ_OTHER,
+ MLXSW_TXHDR_RDQ_EMAD = 0x1f,
+};
+
+#define MLXSW_TXHDR_CTCLASS3 0
+#define MLXSW_TXHDR_CPU_SIG 0
+#define MLXSW_TXHDR_SIG 0xE0E0
+#define MLXSW_TXHDR_STCLASS_NONE 0
+
+enum {
+ MLXSW_TXHDR_NOT_EMAD,
+ MLXSW_TXHDR_EMAD,
+};
+
+enum {
+ MLXSW_TXHDR_TYPE_DATA,
+ MLXSW_TXHDR_TYPE_CONTROL = 6,
+};
+
+#endif