Commit 50b17cfb authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

 1) We have to be careful to not try and place a checksum after the end
    of a rawv6 packet, fix from Dave Jones with help from Hannes
    Frederic Sowa.

 2) Missing memory barriers in tcp_tasklet_func() lead to crashes, from
    Eric Dumazet.

 3) Several bug fixes for the new XDP support in virtio_net, from Jason
    Wang.

 4) Increase headroom in RX skbs in be2net driver to accomodate
    encapsulations such as geneve. From Kalesh A P.

 5) Fix SKB frag unmapping on TX in mvpp2, from Thomas Petazzoni.

 6) Pre-pulling UDP headers created a regression in RECVORIGDSTADDR
    socket option support, from Willem de Bruijn.

 7) UID based routing added a potential OOPS in ip_do_redirect() when we
    see an SKB without a socket attached. We just need it for the
    network namespace which we can get from skb->dev instead. Fix from
    Lorenzo Colitti.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (30 commits)
  sctp: fix recovering from 0 win with small data chunks
  sctp: do not loose window information if in rwnd_over
  virtio-net: XDP support for small buffers
  virtio-net: remove big packet XDP codes
  virtio-net: forbid XDP when VIRTIO_NET_F_GUEST_UFO is support
  virtio-net: make rx buf size estimation works for XDP
  virtio-net: unbreak csumed packets for XDP_PASS
  virtio-net: correctly handle XDP_PASS for linearized packets
  virtio-net: fix page miscount during XDP linearizing
  virtio-net: correctly xmit linearized page on XDP_TX
  virtio-net: remove the warning before XDP linearizing
  mlxsw: spectrum_router: Correctly remove nexthop groups
  mlxsw: spectrum_router: Don't reflect dead neighs
  neigh: Send netevent after marking neigh as dead
  ipv6: handle -EFAULT from skb_copy_bits
  inet: fix IP(V6)_RECVORIGDSTADDR for udp sockets
  net/sched: cls_flower: Mandate mask when matching on flags
  net/sched: act_tunnel_key: Fix setting UDP dst port in metadata under IPv6
  stmmac: CSR clock configuration fix
  net: ipv4: Don't crash if passing a null sk to ip_do_redirect.
  ...
parents a307d0a0 1636098c
......@@ -65,7 +65,7 @@
/* Number of bytes of an RX frame that are copied to skb->data */
#define BE_HDR_LEN ((u16) 64)
/* allocate extra space to allow tunneling decapsulation without head reallocation */
#define BE_RX_SKB_ALLOC_SIZE (BE_HDR_LEN + 64)
#define BE_RX_SKB_ALLOC_SIZE 256
#define BE_MAX_JUMBO_FRAME_SIZE 9018
#define BE_MIN_MTU 256
......
......@@ -770,6 +770,17 @@ struct mvpp2_rx_desc {
u32 reserved8;
};
struct mvpp2_txq_pcpu_buf {
/* Transmitted SKB */
struct sk_buff *skb;
/* Physical address of transmitted buffer */
dma_addr_t phys;
/* Size transmitted */
size_t size;
};
/* Per-CPU Tx queue control */
struct mvpp2_txq_pcpu {
int cpu;
......@@ -785,11 +796,8 @@ struct mvpp2_txq_pcpu {
/* Number of Tx DMA descriptors reserved for each CPU */
int reserved_num;
/* Array of transmitted skb */
struct sk_buff **tx_skb;
/* Array of transmitted buffers' physical addresses */
dma_addr_t *tx_buffs;
/* Infos about transmitted buffers */
struct mvpp2_txq_pcpu_buf *buffs;
/* Index of last TX DMA descriptor that was inserted */
int txq_put_index;
......@@ -979,10 +987,11 @@ static void mvpp2_txq_inc_put(struct mvpp2_txq_pcpu *txq_pcpu,
struct sk_buff *skb,
struct mvpp2_tx_desc *tx_desc)
{
txq_pcpu->tx_skb[txq_pcpu->txq_put_index] = skb;
if (skb)
txq_pcpu->tx_buffs[txq_pcpu->txq_put_index] =
tx_desc->buf_phys_addr;
struct mvpp2_txq_pcpu_buf *tx_buf =
txq_pcpu->buffs + txq_pcpu->txq_put_index;
tx_buf->skb = skb;
tx_buf->size = tx_desc->data_size;
tx_buf->phys = tx_desc->buf_phys_addr;
txq_pcpu->txq_put_index++;
if (txq_pcpu->txq_put_index == txq_pcpu->size)
txq_pcpu->txq_put_index = 0;
......@@ -4401,17 +4410,16 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port,
int i;
for (i = 0; i < num; i++) {
dma_addr_t buf_phys_addr =
txq_pcpu->tx_buffs[txq_pcpu->txq_get_index];
struct sk_buff *skb = txq_pcpu->tx_skb[txq_pcpu->txq_get_index];
struct mvpp2_txq_pcpu_buf *tx_buf =
txq_pcpu->buffs + txq_pcpu->txq_get_index;
mvpp2_txq_inc_get(txq_pcpu);
dma_unmap_single(port->dev->dev.parent, buf_phys_addr,
skb_headlen(skb), DMA_TO_DEVICE);
if (!skb)
dma_unmap_single(port->dev->dev.parent, tx_buf->phys,
tx_buf->size, DMA_TO_DEVICE);
if (!tx_buf->skb)
continue;
dev_kfree_skb_any(skb);
dev_kfree_skb_any(tx_buf->skb);
}
}
......@@ -4651,15 +4659,10 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
for_each_present_cpu(cpu) {
txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
txq_pcpu->size = txq->size;
txq_pcpu->tx_skb = kmalloc(txq_pcpu->size *
sizeof(*txq_pcpu->tx_skb),
GFP_KERNEL);
if (!txq_pcpu->tx_skb)
goto error;
txq_pcpu->tx_buffs = kmalloc(txq_pcpu->size *
sizeof(dma_addr_t), GFP_KERNEL);
if (!txq_pcpu->tx_buffs)
txq_pcpu->buffs = kmalloc(txq_pcpu->size *
sizeof(struct mvpp2_txq_pcpu_buf),
GFP_KERNEL);
if (!txq_pcpu->buffs)
goto error;
txq_pcpu->count = 0;
......@@ -4673,8 +4676,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
error:
for_each_present_cpu(cpu) {
txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
kfree(txq_pcpu->tx_skb);
kfree(txq_pcpu->tx_buffs);
kfree(txq_pcpu->buffs);
}
dma_free_coherent(port->dev->dev.parent,
......@@ -4693,8 +4695,7 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port,
for_each_present_cpu(cpu) {
txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
kfree(txq_pcpu->tx_skb);
kfree(txq_pcpu->tx_buffs);
kfree(txq_pcpu->buffs);
}
if (txq->descs)
......
......@@ -942,7 +942,7 @@ static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work)
char rauht_pl[MLXSW_REG_RAUHT_LEN];
struct net_device *dev;
bool entry_connected;
u8 nud_state;
u8 nud_state, dead;
bool updating;
bool removing;
bool adding;
......@@ -953,10 +953,11 @@ static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work)
dip = ntohl(*((__be32 *) n->primary_key));
memcpy(neigh_entry->ha, n->ha, sizeof(neigh_entry->ha));
nud_state = n->nud_state;
dead = n->dead;
dev = n->dev;
read_unlock_bh(&n->lock);
entry_connected = nud_state & NUD_VALID;
entry_connected = nud_state & NUD_VALID && !dead;
adding = (!neigh_entry->offloaded) && entry_connected;
updating = neigh_entry->offloaded && entry_connected;
removing = neigh_entry->offloaded && !entry_connected;
......@@ -1351,7 +1352,7 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_neigh_entry *neigh_entry;
struct net_device *dev = fib_nh->nh_dev;
struct neighbour *n;
u8 nud_state;
u8 nud_state, dead;
/* Take a reference of neigh here ensuring that neigh would
* not be detructed before the nexthop entry is finished.
......@@ -1383,8 +1384,9 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
read_lock_bh(&n->lock);
nud_state = n->nud_state;
dead = n->dead;
read_unlock_bh(&n->lock);
__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID));
__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
return 0;
}
......@@ -1394,6 +1396,7 @@ static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
{
struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
__mlxsw_sp_nexthop_neigh_update(nh, true);
list_del(&nh->neigh_list_node);
/* If that is the last nexthop connected to that neigh, remove from
......@@ -1452,6 +1455,8 @@ mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
nh = &nh_grp->nexthops[i];
mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
}
mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
WARN_ON_ONCE(nh_grp->adj_index_valid);
kfree(nh_grp);
}
......
......@@ -864,6 +864,10 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv)
int ret;
struct device *dev = &bsp_priv->pdev->dev;
ret = gmac_clk_enable(bsp_priv, true);
if (ret)
return ret;
/*rmii or rgmii*/
if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RGMII) {
dev_info(dev, "init for RGMII\n");
......@@ -880,10 +884,6 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv)
if (ret)
return ret;
ret = gmac_clk_enable(bsp_priv, true);
if (ret)
return ret;
pm_runtime_enable(dev);
pm_runtime_get_sync(dev);
......
......@@ -539,7 +539,7 @@ struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins,
mac->mii.reg_shift = 6;
mac->mii.reg_mask = 0x000007C0;
mac->mii.clk_csr_shift = 2;
mac->mii.clk_csr_mask = 0xF;
mac->mii.clk_csr_mask = GENMASK(5, 2);
/* Get and dump the chip ID */
*synopsys_id = stmmac_get_synopsys_id(hwid);
......
......@@ -197,7 +197,7 @@ struct mac_device_info *dwmac100_setup(void __iomem *ioaddr, int *synopsys_id)
mac->mii.reg_shift = 6;
mac->mii.reg_mask = 0x000007C0;
mac->mii.clk_csr_shift = 2;
mac->mii.clk_csr_mask = 0xF;
mac->mii.clk_csr_mask = GENMASK(5, 2);
/* Synopsys Id is not available on old chips */
*synopsys_id = 0;
......
......@@ -81,8 +81,8 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
value |= (phyaddr << priv->hw->mii.addr_shift)
& priv->hw->mii.addr_mask;
value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask;
value |= (priv->clk_csr & priv->hw->mii.clk_csr_mask)
<< priv->hw->mii.clk_csr_shift;
value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift)
& priv->hw->mii.clk_csr_mask;
if (priv->plat->has_gmac4)
value |= MII_GMAC4_READ;
......@@ -122,8 +122,8 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
& priv->hw->mii.addr_mask;
value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask;
value |= ((priv->clk_csr & priv->hw->mii.clk_csr_mask)
<< priv->hw->mii.clk_csr_shift);
value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift)
& priv->hw->mii.clk_csr_mask;
if (priv->plat->has_gmac4)
value |= MII_GMAC4_WRITE;
......
......@@ -1483,7 +1483,7 @@ void mac_drv_clear_rx_queue(struct s_smc *smc)
r = queue->rx_curr_get ;
while (queue->rx_used) {
DRV_BUF_FLUSH(r,DDI_DMA_SYNC_FORCPU) ;
DB_RX("switch OWN bit of RxD 0x%x ",r,0,5) ;
DB_RX("switch OWN bit of RxD 0x%p ",r,0,5) ;
r->rxd_rbctrl &= ~cpu_to_le32(BMU_OWN) ;
frag_count = 1 ;
DRV_BUF_FLUSH(r,DDI_DMA_SYNC_FORDEV) ;
......@@ -1645,7 +1645,7 @@ void hwm_tx_frag(struct s_smc *smc, char far *virt, u_long phys, int len,
DB_TX("hwm_tx_frag: len = %d, frame_status = %x ",len,frame_status,2) ;
if (frame_status & LAN_TX) {
/* '*t' is already defined */
DB_TX("LAN_TX: TxD = %x, virt = %x ",t,virt,3) ;
DB_TX("LAN_TX: TxD = %p, virt = %p ",t,virt,3) ;
t->txd_virt = virt ;
t->txd_txdscr = cpu_to_le32(smc->os.hwm.tx_descr) ;
t->txd_tbadr = cpu_to_le32(phys) ;
......@@ -1819,7 +1819,7 @@ void smt_send_mbuf(struct s_smc *smc, SMbuf *mb, int fc)
__le32 tbctrl;
NDD_TRACE("THSB",mb,fc,0) ;
DB_TX("smt_send_mbuf: mb = 0x%x, fc = 0x%x",mb,fc,4) ;
DB_TX("smt_send_mbuf: mb = 0x%p, fc = 0x%x",mb,fc,4) ;
mb->sm_off-- ; /* set to fc */
mb->sm_len++ ; /* + fc */
......@@ -1960,7 +1960,7 @@ static void mac_drv_clear_txd(struct s_smc *smc)
do {
DRV_BUF_FLUSH(t1,DDI_DMA_SYNC_FORCPU) ;
DB_TX("check OWN/EOF bit of TxD 0x%x",t1,0,5) ;
DB_TX("check OWN/EOF bit of TxD 0x%p",t1,0,5) ;
tbctrl = le32_to_cpu(CR_READ(t1->txd_tbctrl));
if (tbctrl & BMU_OWN || !queue->tx_used){
......@@ -1988,7 +1988,7 @@ static void mac_drv_clear_txd(struct s_smc *smc)
}
else {
#ifndef PASS_1ST_TXD_2_TX_COMP
DB_TX("mac_drv_tx_comp for TxD 0x%x",t2,0,4) ;
DB_TX("mac_drv_tx_comp for TxD 0x%p",t2,0,4) ;
mac_drv_tx_complete(smc,t2) ;
#else
DB_TX("mac_drv_tx_comp for TxD 0x%x",
......@@ -2052,7 +2052,7 @@ void mac_drv_clear_tx_queue(struct s_smc *smc)
tx_used = queue->tx_used ;
while (tx_used) {
DRV_BUF_FLUSH(t,DDI_DMA_SYNC_FORCPU) ;
DB_TX("switch OWN bit of TxD 0x%x ",t,0,5) ;
DB_TX("switch OWN bit of TxD 0x%p ",t,0,5) ;
t->txd_tbctrl &= ~cpu_to_le32(BMU_OWN) ;
DRV_BUF_FLUSH(t,DDI_DMA_SYNC_FORDEV) ;
t = t->txd_next ;
......
......@@ -284,7 +284,7 @@ void smt_pmf_received_pack(struct s_smc *smc, SMbuf *mb, int local)
SMbuf *reply ;
sm = smtod(mb,struct smt_header *) ;
DB_SMT("SMT: processing PMF frame at %x len %d\n",sm,mb->sm_len) ;
DB_SMT("SMT: processing PMF frame at %p len %d\n",sm,mb->sm_len) ;
#ifdef DEBUG
dump_smt(smc,sm,"PMF Received") ;
#endif
......
......@@ -504,7 +504,7 @@ void smt_received_pack(struct s_smc *smc, SMbuf *mb, int fs)
#endif
smt_swap_para(sm,(int) mb->sm_len,1) ;
DB_SMT("SMT : received packet [%s] at 0x%x\n",
DB_SMT("SMT : received packet [%s] at 0x%p\n",
smt_type_name[m_fc(mb) & 0xf],sm) ;
DB_SMT("SMT : version %d, class %s\n",sm->smt_version,
smt_class_name[(sm->smt_class>LAST_CLASS)?0 : sm->smt_class]) ;
......
......@@ -333,9 +333,9 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
static void virtnet_xdp_xmit(struct virtnet_info *vi,
struct receive_queue *rq,
struct send_queue *sq,
struct xdp_buff *xdp)
struct xdp_buff *xdp,
void *data)
{
struct page *page = virt_to_head_page(xdp->data);
struct virtio_net_hdr_mrg_rxbuf *hdr;
unsigned int num_sg, len;
void *xdp_sent;
......@@ -343,32 +343,46 @@ static void virtnet_xdp_xmit(struct virtnet_info *vi,
/* Free up any pending old buffers before queueing new ones. */
while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) {
struct page *sent_page = virt_to_head_page(xdp_sent);
if (vi->mergeable_rx_bufs) {
struct page *sent_page = virt_to_head_page(xdp_sent);
if (vi->mergeable_rx_bufs)
put_page(sent_page);
else
give_pages(rq, sent_page);
} else { /* small buffer */
struct sk_buff *skb = xdp_sent;
kfree_skb(skb);
}
}
/* Zero header and leave csum up to XDP layers */
hdr = xdp->data;
memset(hdr, 0, vi->hdr_len);
if (vi->mergeable_rx_bufs) {
/* Zero header and leave csum up to XDP layers */
hdr = xdp->data;
memset(hdr, 0, vi->hdr_len);
num_sg = 1;
sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
} else { /* small buffer */
struct sk_buff *skb = data;
/* Zero header and leave csum up to XDP layers */
hdr = skb_vnet_hdr(skb);
memset(hdr, 0, vi->hdr_len);
num_sg = 1;
sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
num_sg = 2;
sg_init_table(sq->sg, 2);
sg_set_buf(sq->sg, hdr, vi->hdr_len);
skb_to_sgvec(skb, sq->sg + 1, 0, skb->len);
}
err = virtqueue_add_outbuf(sq->vq, sq->sg, num_sg,
xdp->data, GFP_ATOMIC);
data, GFP_ATOMIC);
if (unlikely(err)) {
if (vi->mergeable_rx_bufs)
if (vi->mergeable_rx_bufs) {
struct page *page = virt_to_head_page(xdp->data);
put_page(page);
else
give_pages(rq, page);
} else /* small buffer */
kfree_skb(data);
return; // On error abort to avoid unnecessary kick
} else if (!vi->mergeable_rx_bufs) {
/* If not mergeable bufs must be big packets so cleanup pages */
give_pages(rq, (struct page *)page->private);
page->private = 0;
}
virtqueue_kick(sq->vq);
......@@ -377,23 +391,26 @@ static void virtnet_xdp_xmit(struct virtnet_info *vi,
static u32 do_xdp_prog(struct virtnet_info *vi,
struct receive_queue *rq,
struct bpf_prog *xdp_prog,
struct page *page, int offset, int len)
void *data, int len)
{
int hdr_padded_len;
struct xdp_buff xdp;
void *buf;
unsigned int qp;
u32 act;
u8 *buf;
buf = page_address(page) + offset;
if (vi->mergeable_rx_bufs)
if (vi->mergeable_rx_bufs) {
hdr_padded_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
else
hdr_padded_len = sizeof(struct padded_vnet_hdr);
xdp.data = data + hdr_padded_len;
xdp.data_end = xdp.data + (len - vi->hdr_len);
buf = data;
} else { /* small buffers */
struct sk_buff *skb = data;
xdp.data = buf + hdr_padded_len;
xdp.data_end = xdp.data + (len - vi->hdr_len);
xdp.data = skb->data;
xdp.data_end = xdp.data + len;
buf = skb->data;
}
act = bpf_prog_run_xdp(xdp_prog, &xdp);
switch (act) {
......@@ -403,8 +420,8 @@ static u32 do_xdp_prog(struct virtnet_info *vi,
qp = vi->curr_queue_pairs -
vi->xdp_queue_pairs +
smp_processor_id();
xdp.data = buf + (vi->mergeable_rx_bufs ? 0 : 4);
virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp);
xdp.data = buf;
virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp, data);
return XDP_TX;
default:
bpf_warn_invalid_xdp_action(act);
......@@ -414,26 +431,17 @@ static u32 do_xdp_prog(struct virtnet_info *vi,
}
}
static struct sk_buff *receive_small(struct virtnet_info *vi, void *buf, unsigned int len)
static struct sk_buff *receive_small(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
void *buf, unsigned int len)
{
struct sk_buff * skb = buf;
struct bpf_prog *xdp_prog;
len -= vi->hdr_len;
skb_trim(skb, len);
return skb;
}
static struct sk_buff *receive_big(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
void *buf,
unsigned int len)
{
struct bpf_prog *xdp_prog;
struct page *page = buf;
struct sk_buff *skb;
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
if (xdp_prog) {
......@@ -442,7 +450,7 @@ static struct sk_buff *receive_big(struct net_device *dev,
if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
goto err_xdp;
act = do_xdp_prog(vi, rq, xdp_prog, page, 0, len);
act = do_xdp_prog(vi, rq, xdp_prog, skb, len);
switch (act) {
case XDP_PASS:
break;
......@@ -456,18 +464,33 @@ static struct sk_buff *receive_big(struct net_device *dev,
}
rcu_read_unlock();
skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
return skb;
err_xdp:
rcu_read_unlock();
dev->stats.rx_dropped++;
kfree_skb(skb);
xdp_xmit:
return NULL;
}
static struct sk_buff *receive_big(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
void *buf,
unsigned int len)
{
struct page *page = buf;
struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
if (unlikely(!skb))
goto err;
return skb;
err_xdp:
rcu_read_unlock();
err:
dev->stats.rx_dropped++;
give_pages(rq, page);
xdp_xmit:
return NULL;
}
......@@ -483,7 +506,7 @@ xdp_xmit:
* anymore.
*/
static struct page *xdp_linearize_page(struct receive_queue *rq,
u16 num_buf,
u16 *num_buf,
struct page *p,
int offset,
unsigned int *len)
......@@ -497,7 +520,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
page_off += *len;
while (--num_buf) {
while (--*num_buf) {
unsigned int buflen;
unsigned long ctx;
void *buf;
......@@ -507,19 +530,22 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
if (unlikely(!ctx))
goto err_buf;
buf = mergeable_ctx_to_buf_address(ctx);
p = virt_to_head_page(buf);
off = buf - page_address(p);
/* guard against a misconfigured or uncooperative backend that
* is sending packet larger than the MTU.
*/
if ((page_off + buflen) > PAGE_SIZE)
if ((page_off + buflen) > PAGE_SIZE) {
put_page(p);
goto err_buf;
buf = mergeable_ctx_to_buf_address(ctx);
p = virt_to_head_page(buf);
off = buf - page_address(p);
}
memcpy(page_address(page) + page_off,
page_address(p) + off, buflen);
page_off += buflen;
put_page(p);
}
*len = page_off;
......@@ -552,16 +578,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
struct page *xdp_page;
u32 act;
/* No known backend devices should send packets with
* more than a single buffer when XDP conditions are
* met. However it is not strictly illegal so the case
* is handled as an exception and a warning is thrown.
*/
/* This happens when rx buffer size is underestimated */
if (unlikely(num_buf > 1)) {
bpf_warn_invalid_xdp_buffer();
/* linearize data for XDP */
xdp_page = xdp_linearize_page(rq, num_buf,
xdp_page = xdp_linearize_page(rq, &num_buf,
page, offset, &len);
if (!xdp_page)
goto err_xdp;
......@@ -575,16 +595,25 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
* the receive path after XDP is loaded. In practice I
* was not able to create this condition.
*/
if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
if (unlikely(hdr->hdr.gso_type))
goto err_xdp;
act = do_xdp_prog(vi, rq, xdp_prog, page, offset, len);
act = do_xdp_prog(vi, rq, xdp_prog,
page_address(xdp_page) + offset, len);
switch (act) {
case XDP_PASS:
if (unlikely(xdp_page != page))
__free_pages(xdp_page, 0);
/* We can only create skb based on xdp_page. */
if (unlikely(xdp_page != page)) {
rcu_read_unlock();
put_page(page);
head_skb = page_to_skb(vi, rq, xdp_page,
0, len, PAGE_SIZE);
ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
return head_skb;
}
break;
case XDP_TX:
ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
if (unlikely(xdp_page != page))
goto err_xdp;
rcu_read_unlock();
......@@ -593,6 +622,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
default:
if (unlikely(xdp_page != page))