/* * Copyright (C) 2003-2017 Chelsio Communications. All rights reserved. * * Written by Dimitris Michailidis (dm@chelsio.com) * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this * release for licensing terms and conditions. */ #ifndef _CHELSIO_TOM_T4_H #define _CHELSIO_TOM_T4_H #include #include #include #include #include #include #include #include #include #include "t4_hw.h" #include "t4_tls.h" struct sock; struct cxgb4_lld_info; #define S_TP_VERSION_MAJOR 16 #define M_TP_VERSION_MAJOR 0xFF #define V_TP_VERSION_MAJOR(x) ((x) << S_TP_VERSION_MAJOR) #define G_TP_VERSION_MAJOR(x) \ (((x) >> S_TP_VERSION_MAJOR) & M_TP_VERSION_MAJOR) #define S_TP_VERSION_MINOR 8 #define M_TP_VERSION_MINOR 0xFF #define V_TP_VERSION_MINOR(x) ((x) << S_TP_VERSION_MINOR) #define G_TP_VERSION_MINOR(x) \ (((x) >> S_TP_VERSION_MINOR) & M_TP_VERSION_MINOR) #define S_TP_VERSION_MICRO 0 #define M_TP_VERSION_MICRO 0xFF #define V_TP_VERSION_MICRO(x) ((x) << S_TP_VERSION_MICRO) #define G_TP_VERSION_MICRO(x) \ (((x) >> S_TP_VERSION_MICRO) & M_TP_VERSION_MICRO) enum { TP_VERSION_MAJOR = 1, TP_VERSION_MINOR = 1, TP_VERSION_MICRO = 0 }; struct listen_info { struct listen_info *next; /* Link to next entry */ struct sock *sk; /* The listening socket */ unsigned int stid; /* The server TID */ }; /* * TOM tunable parameters. They can be manipulated through sysctl(2) or /proc. */ struct tom_tunables { int max_host_sndbuf; // max host RAM consumed by a sndbuf int tx_hold_thres; // push/pull threshold for non-full TX sk_buffs int max_wr_credits; // max # of outstanding WR credits per connection int rx_credit_thres; // min # of RX credits needed for RX_DATA_ACK int mss; // max TX_DATA WR payload size int delack; // delayed ACK control int max_conn; // maximum number of offloaded connections int soft_backlog_limit; // whether the listen backlog limit is soft int kseg_ddp; int ddp; // whether to put new connections in DDP mode int ddp_thres; // min recvmsg size before activating DDP (default) int ddp_xlthres; // min recvmsg size before activating DDP (40Gbps) int ddp_maxpages; // max pages for DDP buffer to limit pods/buffer int ddp_maxfail; // max failures for DDP buffer allocation/post int ddp_copy_limit; // capacity of kernel DDP buffer int ddp_push_wait; // whether blocking DDP waits for PSH flag #if defined(CONFIG_T4_ZCOPY_SENDMSG) || defined(CONFIG_T4_ZCOPY_SENDMSG_MODULE) int zcopy_sendmsg_partial_thres; // < is never zcopied int zcopy_sendmsg_partial_xlthres; // < is never zcopied for 40G int zcopy_sendmsg_partial_copy; // bytes copied in partial zcopy int zcopy_sendmsg_ret_pending_dma;// pot. return while pending DMA #endif int activated; // TOE engine activation state int cop_managed_offloading;// offloading decisions managed by a COP #if defined(CONFIG_CHELSIO_IO_SPIN) int recvmsg_spin_us; // time to spin in recvmsg() for input data #endif int recvmsg_ddp_wait_us; // time to wait for ddp invalidate in recvmsg() int lro; /* LRO enabled/disabled */ /* * This code demonstrates how one would selectively Offload * (TOE) certain incoming connections by using the extended * "Filter Information" capabilities of Server Control Blocks * (SCB). (See "Classification and Filtering" in the T4 Data * Book for a description of Ingress Packet pattern matching * capabilities. See also documentation on the * TP_VLAN_PRI_MAP register.) Because this selective * Offloading is happening in the chip, this allows * non-Offloading and Offloading drivers to coexist. For * example, an Offloading Driver might be running in a * Hypervisor while non-Offloading vNIC Drivers might be * running in Virtual Machines. * * This particular example code demonstrates how one would * selectively Offload incoming connections based on VLANs. * We allow one VLAN to be designated as the "Offloading * VLAN". Ingress SYNs on this Offload VLAN will match the * filter which we put into the Listen SCB and will result in * Offloaded Connections on that VLAN. Incoming SYNs on other * VLANs will not match and will go through normal NIC * processing. * * This is not production code since one would want a lot more * infrastructure to allow a variety of filter specifications * on a per-server basis. But this demonstrates the * fundamental mechanisms one would use to build such an * infrastructure. */ int offload_vlan; #define MAX_TLS_PORTS 12 int tls_ports[MAX_TLS_PORTS]; }; #define FAILOVER_MAX_ATTEMPTS 5 struct tom_sysctl_table; struct pci_dev; struct tom_data; struct key_map; #define LISTEN_INFO_HASH_SIZE 32 #define TOM_RSPQ_HASH_BITS 5 typedef int (*t4tom_cpl_handler_func)(struct tom_data *td, struct sk_buff *skb); struct tom_data { struct list_head list_node; struct pci_dev *pdev; struct toedev tdev; struct cxgb4_lld_info *lldi; struct tom_tunables conf; struct tom_sysctl_table *sysctl; /* * The next three locks listen_lock, deferq.lock, and tid_release_lock * are used rarely so we let them potentially share a cacheline. */ struct listen_info *listen_hash_tab[LISTEN_INFO_HASH_SIZE]; spinlock_t listen_lock; struct sk_buff_head deferq; struct work_struct deferq_task; struct sock **tid_release_list; spinlock_t tid_release_lock; struct work_struct tid_release_task; #ifdef T4_TRACE_TOM #define T4_TRACE_TOM_BUFFERS 8 struct dentry *debugfs_root; struct trace_buf *tb[T4_TRACE_TOM_BUFFERS]; #endif unsigned int pfvf; unsigned int ddp_llimit; unsigned long *ppod_bmap; unsigned int nppods; unsigned int start_tag; spinlock_t ppod_map_lock; struct dma_pool *dma_pool; struct adap_ports *ports; const unsigned short *mtus; struct tid_info *tids; unsigned int rss_qid[2]; // per Rx C-channel unsigned int tx_max_chunk; unsigned int max_wr_credits; unsigned int send_page_order; unsigned int offload_vlan; struct net_device *egr_dev[NCHAN*2]; // Ports + Loopback /* * Synchronizes access to the various SYN queues. We assume that SYN * queue accesses do not cause much contention so that one lock for all * the queues suffices. This is because the primary user of this lock * is the TOE softirq, which runs on one CPU and so most accesses * should be naturally contention-free. The only contention can come * from listening sockets processing backlogged messages, and that * should not be high volume. */ spinlock_t synq_lock ____cacheline_aligned_in_smp; int round_robin_cnt; #ifdef DEBUG atomic_t rspq_alloc_count; atomic_t rspq_reuse_count; #endif struct sk_buff *rspq_skb_cache[1 << TOM_RSPQ_HASH_BITS]; struct list_head rcu_node; struct list_head na_node; spinlock_t aidr_lock ____cacheline_aligned_in_smp; struct idr aidr; /* ATID id space */ spinlock_t idr_lock ____cacheline_aligned_in_smp; struct idr hwtid_idr; /* TID id space */ struct key_map kmap; }; enum { T4_LISTEN_START_PENDING, T4_LISTEN_STARTED }; struct listen_ctx { struct sock *lsk; struct tom_data *tom_data; u32 state; }; #include "cpl_io_state.h" /* * toedev -> tom_data accessor */ #define TOM_DATA(dev) container_of(dev, struct tom_data, tdev) #ifdef T4_TRACE_TOM static inline struct trace_buf *TIDTB(struct sock *sk) { struct cpl_io_state *cplios = CPL_IO_STATE(sk); struct toedev *tdev = cplios->toedev; if (tdev == NULL) return NULL; return TOM_DATA(tdev)->tb[cplios->tid % T4_TRACE_TOM_BUFFERS]; } #endif #define RX_PULL_LEN 128 /* * Access a configurable parameter of a TOE device's TOM. */ #define TOM_TUNABLE(dev, param) (TOM_DATA(dev)->conf.param) static inline u64 t4_tcb_get_field64(__be64 *tcb, u16 word) { u64 tlo = be64_to_cpu(tcb[((31 - word) /2)]); u64 thi = be64_to_cpu(tcb[((31 - word) /2) - 1]); u64 t; u32 shift = 32; t = (thi << shift) | (tlo >> shift); return t; } static inline u32 t4_tcb_get_field32(__be64 *tcb, u16 word, u32 mask, u32 shift) { u32 v; u64 t = be64_to_cpu(tcb[(31 - word) /2]); if (word & 0x1) shift += 32; v = (t >> shift) & mask; return v; } /* * Allocate a chunk of memory using kmalloc or, if that fails, vmalloc. * The allocated memory is cleared. */ static inline void *t4tom_alloc_mem(unsigned long size) { void *p = kmalloc(size, GFP_KERNEL); if (!p) p = vmalloc(size); if (p) memset(p, 0, size); return p; } /* * Free memory allocated through t4tom_alloc_mem(). */ static inline void t4tom_free_mem(void *addr) { unsigned long p = (unsigned long) addr; if (p >= VMALLOC_START && p < VMALLOC_END) vfree(addr); else kfree(addr); } void t4_init_tunables(struct tom_data *t); void t4_sysctl_unregister(struct tom_sysctl_table *t); struct tom_sysctl_table *t4_sysctl_register(struct toedev *dev, const struct tom_tunables *p); int t4_get_tcb(struct sock *sk, unsigned short cookie); int tls_init_kmap(struct tom_data *td, struct cxgb4_lld_info *lldi); #endif