Commit c3673464 authored by Karen Xie's avatar Karen Xie Committed by James Bottomley
Browse files

[SCSI] cxgb3i: Add cxgb3i iSCSI driver.

This patch implements the cxgb3i iscsi connection acceleration for the
open-iscsi initiator.

The cxgb3i driver offers the iscsi PDU based offload:
- digest insertion and verification
- payload direct-placement into host memory buffer.
Signed-off-by: default avatarKaren Xie <>
Signed-off-by: default avatarJames Bottomley <>
parent b632ade2
Chelsio S3 iSCSI Driver for Linux
The Chelsio T3 ASIC based Adapters (S310, S320, S302, S304, Mezz cards, etc.
series of products) supports iSCSI acceleration and iSCSI Direct Data Placement
(DDP) where the hardware handles the expensive byte touching operations, such
as CRC computation and verification, and direct DMA to the final host memory
- iSCSI PDU digest generation and verification
On transmitting, Chelsio S3 h/w computes and inserts the Header and
Data digest into the PDUs.
On receiving, Chelsio S3 h/w computes and verifies the Header and
Data digest of the PDUs.
- Direct Data Placement (DDP)
S3 h/w can directly place the iSCSI Data-In or Data-Out PDU's
payload into pre-posted final destination host-memory buffers based
on the Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT)
in Data-Out PDUs.
- PDU Transmit and Recovery
On transmitting, S3 h/w accepts the complete PDU (header + data)
from the host driver, computes and inserts the digests, decomposes
the PDU into multiple TCP segments if necessary, and transmit all
the TCP segments onto the wire. It handles TCP retransmission if
On receving, S3 h/w recovers the iSCSI PDU by reassembling TCP
segments, separating the header and data, calculating and verifying
the digests, then forwards the header to the host. The payload data,
if possible, will be directly placed into the pre-posted host DDP
buffer. Otherwise, the payload data will be sent to the host too.
The cxgb3i driver interfaces with open-iscsi initiator and provides the iSCSI
acceleration through Chelsio hardware wherever applicable.
Using the cxgb3i Driver
The following steps need to be taken to accelerates the open-iscsi initiator:
1. Load the cxgb3i driver: "modprobe cxgb3i"
The cxgb3i module registers a new transport class "cxgb3i" with open-iscsi.
* in the case of recompiling the kernel, the cxgb3i selection is located at
Device Drivers
SCSI device support --->
[*] SCSI low-level drivers --->
<M> Chelsio S3xx iSCSI support
2. Create an interface file located under /etc/iscsi/ifaces/ for the new
transport class "cxgb3i".
The content of the file should be in the following format:
iface.transport_name = cxgb3i
iface.net_ifacename = <ethX>
iface.ipaddress = <iscsi ip address>
* if iface.ipaddress is specified, <iscsi ip address> needs to be either the
same as the ethX's ip address or an address on the same subnet. Make
sure the ip address is unique in the network.
3. edit /etc/iscsi/iscsid.conf
The default setting for MaxRecvDataSegmentLength (131072) is too big,
replace "node.conn[0].iscsi.MaxRecvDataSegmentLength" to be a value no
bigger than 15360 (for example 8192):
node.conn[0].iscsi.MaxRecvDataSegmentLength = 8192
* The login would fail for a normal session if MaxRecvDataSegmentLength is
too big. A error message in the format of
"cxgb3i: ERR! MaxRecvSegmentLength <X> too big. Need to be <= <Y>."
would be logged to dmesg.
4. To direct open-iscsi traffic to go through cxgb3i's accelerated path,
"-I <iface file name>" option needs to be specified with most of the
iscsiadm command. <iface file name> is the transport interface file created
in step 2.
......@@ -352,6 +352,8 @@ config ISCSI_TCP
source "drivers/scsi/cxgb3i/Kconfig"
config SGIWD93_SCSI
tristate "SGI WD93C93 SCSI Driver"
depends on SGI_HAS_WD93 && SCSI
......@@ -126,6 +126,7 @@ obj-$(CONFIG_SCSI_HPTIOP) += hptiop.o
obj-$(CONFIG_SCSI_STEX) += stex.o
obj-$(CONFIG_SCSI_MVSAS) += mvsas.o
obj-$(CONFIG_PS3_ROM) += ps3rom.o
obj-$(CONFIG_SCSI_CXGB3_ISCSI) += libiscsi.o libiscsi_tcp.o cxgb3i/
obj-$(CONFIG_ARM) += arm/
EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/cxgb3
cxgb3i-y := cxgb3i_init.o cxgb3i_iscsi.o cxgb3i_pdu.o cxgb3i_offload.o
obj-$(CONFIG_SCSI_CXGB3_ISCSI) += cxgb3i_ddp.o cxgb3i.o
tristate "Chelsio S3xx iSCSI support"
select CHELSIO_T3
This driver supports iSCSI offload for the Chelsio S3 series devices.
* cxgb3i.h: Chelsio S3xx iSCSI driver.
* Copyright (c) 2008 Chelsio Communications, Inc.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation.
* Written by: Karen Xie (
#ifndef __CXGB3I_H__
#define __CXGB3I_H__
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/scatterlist.h>
#include <scsi/libiscsi_tcp.h>
/* from cxgb3 LLD */
#include "common.h"
#include "t3_cpl.h"
#include "t3cdev.h"
#include "cxgb3_ctl_defs.h"
#include "cxgb3_offload.h"
#include "firmware_exports.h"
#include "cxgb3i_offload.h"
#include "cxgb3i_ddp.h"
#define CXGB3I_MAX_LUN 512
(sizeof(struct iscsi_hdr) + ISCSI_MAX_AHS_SIZE + 2*ISCSI_DIGEST_SIZE)
struct cxgb3i_adapter;
struct cxgb3i_hba;
struct cxgb3i_endpoint;
* struct cxgb3i_hba - cxgb3i iscsi structure (per port)
* @snic: cxgb3i adapter containing this port
* @ndev: pointer to netdev structure
* @shost: pointer to scsi host structure
struct cxgb3i_hba {
struct cxgb3i_adapter *snic;
struct net_device *ndev;
struct Scsi_Host *shost;
* struct cxgb3i_adapter - cxgb3i adapter structure (per pci)
* @listhead: list head to link elements
* @lock: lock for this structure
* @tdev: pointer to t3cdev used by cxgb3 driver
* @pdev: pointer to pci dev
* @hba_cnt: # of hbas (the same as # of ports)
* @hba: all the hbas on this adapter
* @tx_max_size: max. tx packet size supported
* @rx_max_size: max. rx packet size supported
* @tag_format: ddp tag format settings
struct cxgb3i_adapter {
struct list_head list_head;
spinlock_t lock;
struct t3cdev *tdev;
struct pci_dev *pdev;
unsigned char hba_cnt;
struct cxgb3i_hba *hba[MAX_NPORTS];
unsigned int tx_max_size;
unsigned int rx_max_size;
struct cxgb3i_tag_format tag_format;
* struct cxgb3i_conn - cxgb3i iscsi connection
* @listhead: list head to link elements
* @cep: pointer to iscsi_endpoint structure
* @conn: pointer to iscsi_conn structure
* @hba: pointer to the hba this conn. is going through
* @task_idx_bits: # of bits needed for session->cmds_max
struct cxgb3i_conn {
struct list_head list_head;
struct cxgb3i_endpoint *cep;
struct iscsi_conn *conn;
struct cxgb3i_hba *hba;
unsigned int task_idx_bits;
* struct cxgb3i_endpoint - iscsi tcp endpoint
* @c3cn: the h/w tcp connection representation
* @hba: pointer to the hba this conn. is going through
* @cconn: pointer to the associated cxgb3i iscsi connection
struct cxgb3i_endpoint {
struct s3_conn *c3cn;
struct cxgb3i_hba *hba;
struct cxgb3i_conn *cconn;
int cxgb3i_iscsi_init(void);
void cxgb3i_iscsi_cleanup(void);
struct cxgb3i_adapter *cxgb3i_adapter_add(struct t3cdev *);
void cxgb3i_adapter_remove(struct t3cdev *);
int cxgb3i_adapter_ulp_init(struct cxgb3i_adapter *);
void cxgb3i_adapter_ulp_cleanup(struct cxgb3i_adapter *);
struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *);
struct cxgb3i_hba *cxgb3i_hba_host_add(struct cxgb3i_adapter *,
struct net_device *);
void cxgb3i_hba_host_remove(struct cxgb3i_hba *);
int cxgb3i_pdu_init(void);
void cxgb3i_pdu_cleanup(void);
void cxgb3i_conn_cleanup_task(struct iscsi_task *);
int cxgb3i_conn_alloc_pdu(struct iscsi_task *, u8);
int cxgb3i_conn_init_pdu(struct iscsi_task *, unsigned int, unsigned int);
int cxgb3i_conn_xmit_pdu(struct iscsi_task *);
void cxgb3i_release_itt(struct iscsi_task *task, itt_t hdr_itt);
int cxgb3i_reserve_itt(struct iscsi_task *task, itt_t *hdr_itt);
This diff is collapsed.
* cxgb3i_ddp.h: Chelsio S3xx iSCSI DDP Manager.
* Copyright (c) 2008 Chelsio Communications, Inc.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation.
* Written by: Karen Xie (
#ifndef __CXGB3I_ULP2_DDP_H__
#define __CXGB3I_ULP2_DDP_H__
* struct cxgb3i_tag_format - cxgb3i ulp tag format for an iscsi entity
* @sw_bits: # of bits used by iscsi software layer
* @rsvd_bits: # of bits used by h/w
* @rsvd_shift: h/w bits shift left
* @rsvd_mask: reserved bit mask
struct cxgb3i_tag_format {
unsigned char sw_bits;
unsigned char rsvd_bits;
unsigned char rsvd_shift;
unsigned char filler[1];
u32 rsvd_mask;
* struct cxgb3i_gather_list - cxgb3i direct data placement memory
* @tag: ddp tag
* @length: total data buffer length
* @offset: initial offset to the 1st page
* @nelem: # of pages
* @pages: page pointers
* @phys_addr: physical address
struct cxgb3i_gather_list {
u32 tag;
unsigned int length;
unsigned int offset;
unsigned int nelem;
struct page **pages;
dma_addr_t phys_addr[0];
* struct cxgb3i_ddp_info - cxgb3i direct data placement for pdu payload
* @list: list head to link elements
* @tdev: pointer to t3cdev used by cxgb3 driver
* @max_txsz: max tx packet size for ddp
* @max_rxsz: max rx packet size for ddp
* @llimit: lower bound of the page pod memory
* @ulimit: upper bound of the page pod memory
* @nppods: # of page pod entries
* @idx_last: page pod entry last used
* @idx_bits: # of bits the pagepod index would take
* @idx_mask: pagepod index mask
* @rsvd_tag_mask: tag mask
* @map_lock: lock to synchonize access to the page pod map
* @gl_map: ddp memory gather list
* @gl_skb: skb used to program the pagepod
struct cxgb3i_ddp_info {
struct list_head list;
struct t3cdev *tdev;
struct pci_dev *pdev;
unsigned int max_txsz;
unsigned int max_rxsz;
unsigned int llimit;
unsigned int ulimit;
unsigned int nppods;
unsigned int idx_last;
unsigned char idx_bits;
unsigned char filler[3];
u32 idx_mask;
u32 rsvd_tag_mask;
spinlock_t map_lock;
struct cxgb3i_gather_list **gl_map;
struct sk_buff **gl_skb;
#define ULP2_MAX_PKT_SIZE 16224
#define PPOD_PAGES_MAX 4
#define PPOD_PAGES_SHIFT 2 /* 4 pages per pod */
* struct pagepod_hdr, pagepod - pagepod format
struct pagepod_hdr {
u32 vld_tid;
u32 pgsz_tag_clr;
u32 maxoffset;
u32 pgoffset;
u64 rsvd;
struct pagepod {
struct pagepod_hdr hdr;
u64 addr[PPOD_PAGES_MAX + 1];
#define PPOD_SIZE sizeof(struct pagepod) /* 64 */
#define PPOD_COLOR_MASK ((1 << PPOD_COLOR_SIZE) - 1)
#define PPOD_IDX_MAX_SIZE 24
#define S_PPOD_TID 0
#define V_PPOD_TID(x) ((x) << S_PPOD_TID)
#define S_PPOD_VALID 24
#define V_PPOD_VALID(x) ((x) << S_PPOD_VALID)
#define S_PPOD_COLOR 0
#define M_PPOD_COLOR 0x3F
#define V_PPOD_COLOR(x) ((x) << S_PPOD_COLOR)
#define S_PPOD_TAG 6
#define V_PPOD_TAG(x) ((x) << S_PPOD_TAG)
#define S_PPOD_PGSZ 30
#define M_PPOD_PGSZ 0x3
#define V_PPOD_PGSZ(x) ((x) << S_PPOD_PGSZ)
* large memory chunk allocation/release
* use vmalloc() if kmalloc() fails
static inline void *cxgb3i_alloc_big_mem(unsigned int size,
gfp_t gfp)
void *p = kmalloc(size, gfp);
if (!p)
p = vmalloc(size);
if (p)
memset(p, 0, size);
return p;
static inline void cxgb3i_free_big_mem(void *addr)
if (is_vmalloc_addr(addr))
* cxgb3i ddp tag are 32 bits, it consists of reserved bits used by h/w and
* non-reserved bits that can be used by the iscsi s/w.
* The reserved bits are identified by the rsvd_bits and rsvd_shift fields
* in struct cxgb3i_tag_format.
* The upper most reserved bit can be used to check if a tag is ddp tag or not:
* if the bit is 0, the tag is a valid ddp tag
* cxgb3i_is_ddp_tag - check if a given tag is a hw/ddp tag
* @tformat: tag format information
* @tag: tag to be checked
* return true if the tag is a ddp tag, false otherwise.
static inline int cxgb3i_is_ddp_tag(struct cxgb3i_tag_format *tformat, u32 tag)
return !(tag & (1 << (tformat->rsvd_bits + tformat->rsvd_shift - 1)));
* cxgb3i_sw_tag_usable - check if a given s/w tag has enough bits left for
* the reserved/hw bits
* @tformat: tag format information
* @sw_tag: s/w tag to be checked
* return true if the tag is a ddp tag, false otherwise.
static inline int cxgb3i_sw_tag_usable(struct cxgb3i_tag_format *tformat,
u32 sw_tag)
sw_tag >>= (32 - tformat->rsvd_bits);
return !sw_tag;
* cxgb3i_set_non_ddp_tag - mark a given s/w tag as an invalid ddp tag
* @tformat: tag format information
* @sw_tag: s/w tag to be checked
* insert 1 at the upper most reserved bit to mark it as an invalid ddp tag.
static inline u32 cxgb3i_set_non_ddp_tag(struct cxgb3i_tag_format *tformat,
u32 sw_tag)
unsigned char shift = tformat->rsvd_bits + tformat->rsvd_shift - 1;
u32 mask = (1 << shift) - 1;
if (sw_tag && (sw_tag & ~mask)) {
u32 v1 = sw_tag & ((1 << shift) - 1);
u32 v2 = (sw_tag >> (shift - 1)) << shift;
return v2 | v1 | 1 << shift;
return sw_tag | 1 << shift;
* cxgb3i_ddp_tag_base - shift the s/w tag bits so that reserved bits are not
* used.
* @tformat: tag format information
* @sw_tag: s/w tag to be checked
static inline u32 cxgb3i_ddp_tag_base(struct cxgb3i_tag_format *tformat,
u32 sw_tag)
u32 mask = (1 << tformat->rsvd_shift) - 1;
if (sw_tag && (sw_tag & ~mask)) {
u32 v1 = sw_tag & mask;
u32 v2 = sw_tag >> tformat->rsvd_shift;
v2 <<= tformat->rsvd_shift + tformat->rsvd_bits;
return v2 | v1;
return sw_tag;
* cxgb3i_tag_rsvd_bits - get the reserved bits used by the h/w
* @tformat: tag format information
* @tag: tag to be checked
* return the reserved bits in the tag
static inline u32 cxgb3i_tag_rsvd_bits(struct cxgb3i_tag_format *tformat,
u32 tag)
if (cxgb3i_is_ddp_tag(tformat, tag))
return (tag >> tformat->rsvd_shift) & tformat->rsvd_mask;
return 0;
* cxgb3i_tag_nonrsvd_bits - get the non-reserved bits used by the s/w
* @tformat: tag format information
* @tag: tag to be checked
* return the non-reserved bits in the tag.
static inline u32 cxgb3i_tag_nonrsvd_bits(struct cxgb3i_tag_format *tformat,
u32 tag)
unsigned char shift = tformat->rsvd_bits + tformat->rsvd_shift - 1;
u32 v1, v2;
if (cxgb3i_is_ddp_tag(tformat, tag)) {
v1 = tag & ((1 << tformat->rsvd_shift) - 1);
v2 = (tag >> (shift + 1)) << tformat->rsvd_shift;
} else {
u32 mask = (1 << shift) - 1;
tag &= ~(1 << shift);
v1 = tag & mask;
v2 = (tag >> 1) & ~mask;
return v1 | v2;
int cxgb3i_ddp_tag_reserve(struct t3cdev *, unsigned int tid,
struct cxgb3i_tag_format *, u32 *tag,
struct cxgb3i_gather_list *, gfp_t gfp);
void cxgb3i_ddp_tag_release(struct t3cdev *, u32 tag);
struct cxgb3i_gather_list *cxgb3i_ddp_make_gl(unsigned int xferlen,
struct scatterlist *sgl,
unsigned int sgcnt,
struct pci_dev *pdev,
gfp_t gfp);
void cxgb3i_ddp_release_gl(struct cxgb3i_gather_list *gl,
struct pci_dev *pdev);
int cxgb3i_setup_conn_host_pagesize(struct t3cdev *, unsigned int tid,
int reply);
int cxgb3i_setup_conn_pagesize(struct t3cdev *, unsigned int tid, int reply,
unsigned long pgsz);
int cxgb3i_setup_conn_digest(struct t3cdev *, unsigned int tid,
int hcrc, int dcrc, int reply);
int cxgb3i_ddp_find_page_index(unsigned long pgsz);
int cxgb3i_adapter_ddp_init(struct t3cdev *, struct cxgb3i_tag_format *,
unsigned int *txsz, unsigned int *rxsz);
void cxgb3i_adapter_ddp_cleanup(struct t3cdev *);
/* cxgb3i_init.c: Chelsio S3xx iSCSI driver.
* Copyright (c) 2008 Chelsio Communications, Inc.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation.
* Written by: Karen Xie (
#include "cxgb3i.h"
#define DRV_MODULE_NAME "cxgb3i"
#define DRV_MODULE_VERSION "1.0.0"
#define DRV_MODULE_RELDATE "Jun. 1, 2008"
static char version[] =
"Chelsio S3xx iSCSI Driver " DRV_MODULE_NAME
MODULE_AUTHOR("Karen Xie <>");
MODULE_DESCRIPTION("Chelsio S3xx iSCSI Driver");
static void open_s3_dev(struct t3cdev *);
static void close_s3_dev(struct t3cdev *);
static cxgb3_cpl_handler_func cxgb3i_cpl_handlers[NUM_CPL_CMDS];
static struct cxgb3_client t3c_client = {
.name = "iscsi_cxgb3",
.handlers = cxgb3i_cpl_handlers,
.add = open_s3_dev,
.remove = close_s3_dev,
* open_s3_dev - register with cxgb3 LLD
* @t3dev: cxgb3 adapter instance
static void open_s3_dev(struct t3cdev *t3dev)
static int vers_printed;
if (!vers_printed) {
printk(KERN_INFO "%s", version);
vers_printed = 1;
cxgb3i_sdev_add(t3dev, &t3c_client);
* close_s3_dev - de-register with cxgb3 LLD
* @t3dev: cxgb3 adapter instance
static void close_s3_dev(struct t3cdev *t3dev)
* cxgb3i_init_module - module init entry point