The major work that ONTAP plans to offload in the write path as follows: 1. compress (8k/32k data) + cksum 2. cksum + 4k compress 3. Encryption Compression + Checksum ====================== This is scenario where ONTAP has 8k or 32k data that needs to be compressed and deduped. Depending on the compressesion status, deduplication will be performed. -In case of compression succeeds, the deduplication happens on compressed data. -In case of compression fails, the deduplication happens on uncompressed data. Also, in the compressed output a special header is placed which is of 8 bytes and is as follows: struct compression_header { uint32_t cksum; /* [IN] Adler-32 bit checksum on uncompressed data. */ uint16_t data_len; /* [OUT] Compressed output len of compressed data */ uint16_t data_format; /* [IN] Data format that tracks the algorithm used for compression */ }; The plan to use the compress and checksum services as pipeline from the hardware card. NOTE: We might use the bulk interface but shouldn't change the requirements of compress or batch interface. Requirements from API's: -API need to give provision to have header such that: (1) cksum, data_format as mentioned in compression_hdr will be provided by ONTAP. (2) data_len will be filled by hardware card in the compressed output. -Gives provision for checksum calculation based on compression status. On success compute checksum on compressed, otherwise input data -Generates error(PNSO_ERR_CPDC_DATA_TOO_LONG) about compressed len is crossing ONTAP specified threshold_len. Code sample: NOTE: I have added the hdr information to the compression description. #define HWA_COMPR_CKSUM_SVC_SGL_SIZE 8 #define HWA_COMPR_CKSUM_SVC_COUNT 2 typedef struct hwa_compr_cksum_svc_cb_args { TAILQ_ENTRY(hwa_cpmr_cksum_svc_cb_args) cb_next; /* WAFL specific information */ wafl_ise_witem_t *witem; /* HW Assist service specific information */ bool svc_done; /* Used for sync wait */ struct pnso_service_request *svc_req; struct pnso_service_result *svc_res; /* The following is the information used in service */ /* Input sgl list */ char svc_input_sgl[sizeof(struct pnso_buffer_list) + sizeof(struct pnso_flat_buffer) * HWA_COMPR_CKSUM_SVC_SGL_SIZE]; /* Request data */ char svc_req_data[sizeof(struct pnso_service_request) + sizeof(struct pnso_service) * HWA_COMPR_CKSUM_SVC_COUNT]; /* Request result */ char svc_res_data[sizeof(struct pnso_service_result) + sizeof(struct pnso_service_status) * HWA_COMPR_CKSUM_SVC_COUNT]; /* Resul result status */ char svc_dst_sgl_svc_1[sizeof(struct pnso_buffer_list) + sizeof(struct pnso_flat_buffer) * HWA_COMPR_CKSUM_SVC_SGL_SIZE]; char svc_dst_chksum_svc_2[sizeof(struct pnso_chksum_tag) * HWA_COMPR_CKSUM_SVC_SGL_SIZE]; } hwa_compr_cksum_svc_cb_args_t; hwa_compr_cksum_svc_cb_args_t * hwa_pnso_compr_cksum_callback_create() { hwa_compr_cksum_svc_cb_args_t *cb = NULL; int rc = 0; /* Allocate the callback memory for this service */ cb = sk_kmem_zalloc(sizeof(hwa_compr_cksum_svc_cb_args_t), M_DENSE, SKMA_NOWAIT); if (cb == NULL) { rc = 1; goto out; } cb->svc_req = (struct pnso_service_request *)&cb->svc_req_data; cb->svc_req->sgl = (struct pnso_buffer_list*)&cb->svc_input_sgl; cb->svc_res = (struct pnso_service_result *)&cb->svc_res_data; cb->svc_res->svc[0].u.dst.sgl = (struct pnso_buffer_list *)&cb->svc_dst_sgl_svc_1; cb->svc_res->svc[1].u.chksum.tags = (struct pnso_chksum_tag *)&cb->svc_dst_chksum_svc_2; out: if (rc != 0) { cb = NULL; } return cb; } void hwa_pnso_compr_cksum_service_init( enum pnso_compressor_type comp_type, uint32_t hdr_cksum, uint16_t hdr_data_format, uint16_t num_tags, enum pnso_chksum_type cksum_type, hwa_compr_cksum_svc_cb_args_t *cb) { struct pnso_service_request *svc_req; struct pnso_service_result *svc_res; uint16_t i; char *datap; wafl_ise_witem_t *cwitem = cb->witem; wafl_ise_witem_t *witem; uint16_t cg_size = cwitem->ewi_cmpr_cg_size; uint32_t svc_cnt; struct pnso_service *cmpr_svc_req, * cksum_svc_req; struct pnso_service_status *cmpr_svc_res, *cksum_svc_res; svc_req = cb->svc_req; svc_res = cb->svc_res; /* Prepare service request input */ svc_req->sgl->count = cg_size; for (i = 0, witem = cwitem; i < cg_size; i++, witem = wafl_ise_witem_get_cnext(witem)) { datap = EE_WAFL_DATA(WAFL_EXTENT_PL4K(witem->ewi_owner_bdp)); svc_req->sgl->buffers[i].buf = (uint64_t)datap; svc_req->sgl->buffers[i].len = BLOCK_SIZE; } /* Initialize the compress request */ svc_cnt = 0; cmpr_svc_req = &svc_req->svc[svc_cnt]; cmpr_svc_req->svc_type = PNSO_SVC_TYPE_COMPRESS; cmpr_svc_req->u.cp_desc.algo_type = comp_type; cmpr_svc_req->u.cp_desc.threshold_len = (cg_size * 3 / 4) * BLOCK_SIZE; cmpr_svc_req->u.cp_desc.flags = PNSO_DFLAG_INSERT_HEADER | PNSO_DFLAG_ZERO_PAD | PNSO_DFLAG_BYPASS_ONFAIL; cmpr_svc_req->u.cp_desc.hdr.chksum = hdr_cksum; cmpr_svc_req->u.cp_desc.hdr.version = hdr_data_format; cmpr_svc_req->u.cp_desc.hdr.data_len = 0; /* Initialize the compressed output buffers */ cmpr_svc_res = &svc_res->svc[svc_cnt]; cmpr_svc_res->u.dst.sgl->count = cg_size; for (i = 0; i < cg_size; i++) { datap = (char *)free_cache_pop_extent(NULL, NULL, 1, true); cmpr_svc_res->u.dst.sgl->buffers[i].buf = (uint64_t) datap; cmpr_svc_res->u.dst.sgl->buffers[i].len = BLOCK_SIZE; } svc_cnt = ++svc_req->num_services; /* Initialize the cksum request */ cksum_svc_req = &svc_req->svc[svc_cnt]; cksum_svc_req->svc_type = PNSO_SVC_TYPE_CHKSUM; cksum_svc_req->u.chksum_desc.algo_type = cksum_type; cksum_svc_req->u.chksum_desc.flags = PNSO_DFLAG_CHKSUM_PER_BLOCK; /* Initailzie the checksum output result */ cksum_svc_res = &svc_res->svc[svc_cnt]; cksum_svc_res->u.chksum.num_tags = cg_size; svc_cnt = ++svc_req->num_services; } ise_compress_work(wafl_ise_witem_t *witem) { hwa_compr_cksum_svc_cb_args_t *cb = NULL; uint16_t cg_size; int err = 0; cg_size = witem->ewi_cmpr_cg_size; cb = hwa_pnso_compr_cksum_callback_create(); if (cb == NULL) { err = 1; return; } hwa_pnso_compr_cksum_service_init( PNSO_COMPRESSOR_TYPE_LZRW1A, 0, 0, cg_size, PNSO_CHKSUM_TYPE_MCRC64, cb); err = pnso_submit_request(PNSO_BATCH_REQ_NONE, cb->svc_req, cb->svc_res, hwa_pnso_compress_cksum_service_done_handler, cb, NULL, NULL); if (err != PNSO_OK) { /* : Memory clean up */ err = 1; return; } return err; } void hwa_pnso_compress_cksum_service_done_handler(hwa_cmpr_cksum_cb_args_t *cb, struct pnso_service_result *svc_res) { if (svc_res->err != PNSO_OK) { /* Complete service has failed? */ /* Free the resorces */ return; } cmpr_status = &svc_res->svc[0]; cksum_status = &svc_res->svc[1]; cksum_tags = cksum_status->tags; if (cmpr_status->err == PNSO_OK) { /* Compression successful */ /* Do the compression */ do_compression_complete_on(cmpr_status->output_buf); /* Do the deduplication using the cksum tags */ if (cksum_status->err == PNSO_OK) { for (i = 0; i < cmpr_status->output_buf->count; i++) { do_deduplication(cmpr_status->output_buf->buffers[i].buf, cksum_tags[i]) } else { cksum_failure_stats++; } } else { /* Compression failure */ switch (cmpr_status->err) { case PNSO_ERR_CPDC_COMPRESSION_FAILED: compress_fail_stats++; break; case PNSO_ERR_CPDC_DATA_TOO_LONG: compress_threshold_cross_stats++; break; default: compress_fail_others++; break; } /* Continue deduplication on uncompressed data */ if (cksum_status->err == PNSO_OK) { for (i = 0; i < input_buf->count; i++) { do_deduplication(input_buf->buffers[i].buf, cksum_tags[i]) } else { cksum_failure_stats++; } } /* Now move to next stage work on witem */ move_to_next_stage_work(cb->witem); } Bulk Encryption Code Sample =========================== Encryption Workflow Environment: -We have tetris (batch of 4K blocks) for encryption. The number of 4K blocks will be around 16*28 = 448 blocks. -We will use bactch workflow to submit the request. -Once callback completion is done, the tetris is moved to next stage. Requirements from API's: -Bulk interface will be used that provide single completion callback. -Allow bulk request to complete irrespective of 'per_core_qdepth'. -The intermediate submit request (before flush) or flush failure will be treated as whole service failure. -Possible special error CRYPTO_ERR_MISSING_KEY. Any other error we plan to fallback to software for the specific service. Error handling: -In debug, the errors(CRYPTO_ERR_MISSING_KEY) we are interested key missing for debugging. -In non-debug any error will fallback to on-CPU execution. Code sample: #define HWA_CRYPTO_SVC_SGL_SIZE 1 #define HWA_CRYPTO_SVC_COUNT 1 #define HWA_CRYPTO_TWEAK_SIZE 16 typedef struct hwa_crypto_svc_cb_args { TAILQ_ENTRY(hwa_crypto_svc_cb_args) cb_next; /* HW Assist service specific information */ bool svc_done; /* Used for sync wait */ raidio_t *raidio; struct pnso_service_request *svc_req; struct pnso_service_result *svc_res; /* The following is the information used in service */ /* Input sgl list */ char svc_input_sgl[sizeof(struct pnso_buffer_list) + sizeof(struct pnso_flat_buffer) * HWA_CRYPTO_SVC_SGL_SIZE]; /* Request data */ char svc_req_data[sizeof(struct pnso_service_request) + sizeof(struct pnso_service) * HWA_CRYPTO_SVC_COUNT]; /* Request result */ char svc_res_data[sizeof(struct pnso_service_result) + sizeof(struct pnso_service_status) * HWA_CRYPTO_SVC_COUNT]; /* Tweak used for the crypto service */ char crypto_tweak[HWA_CRYPTO_TWEAK_SIZE]; } hwa_crypto_svc_cb_args_t; hwa_crypto_svc_cb_args_t * hwa_pnso_crypto_callback_create() { hwa_crypto_svc_cb_args_t *cb = NULL; int rc = 0; /* Allocate the callback memory for this service */ cb = sk_kmem_zalloc(sizeof(hwa_crypto_svc_cb_args_t), M_DENSE, SKMA_NOWAIT); if (cb == NULL) { rc = 1; goto out; } cb->svc_req = (struct pnso_service_request *)&cb->svc_req_data; cb->svc_req->sgl = (struct pnso_buffer_list*)&cb->svc_input_sgl; cb->svc_res = (struct pnso_service_result *)&cb->svc_res_data; out: if (rc != 0) { hwa_pnso_crypto_callback_free(cb); cb = NULL; } return cb; } void hwa_pnso_crypto_service_init(char *input, char *output, uint32_t len, enum pnso_service_type crypto_service, uint32_t key_idx, uint64_t iv_addr, hwa_crypto_cb_args_t *cb) { struct pnso_service_request *svc_req; struct pnso_service_result *svc_res; struct pnso_service *crypto_svc; struct pnso_service_status *crypto_svc_stat; svc_req = cb->svc_req; svc_res = cb->svc_res; /* Initialize the input data */ svc_req->sgl->count = 1; svc_req->sgl->buffers[0].buf = (uint64_t) input; svc_req->sgl->buffers[0].len = len; /* Initailize the service crypto parameters */ svc_req->num_services = 1; crypto_svc = &svc_req->svc[0]; crypto_svc->svc_type = crypto_service; crypto_svc->u.crypto_desc.key_desc_idx = key_idx; crypto_svc->u.crypto_desc.iv_addr = iv_addr; /* Initialize the crypto status result */ crypto_svc_stat = &svc_res->svc[0]; crypto_svc_stat->u.dst.sgl->count = 1; crypto_svc_stat->u.dst.sgl->buffers[0].buf = (uint64_t) output; crypto_svc_stat->u.dst.sgl->buffers[0].len = len; } pnso_error_t hwa_wafl_bulk_encrypt(raidio_t *raidio) { wafl_Buf *bp; pnso_error_t err; char *tweak; TAILQ_HEAD(, hwa_crypto_svc_cb_args_t) cb_list; TAILQ_INIT(&cb_list); // Get each buffer in tetris while ((bp = get_next_bp_in_tetris(raidio)) != NULL) { /* Create the callback for crypto service */ cb = hwa_pnso_crypto_callback_create(); // Assume non-blocking call ASSERT(cb != NULL); cb->raidio = raidio; /* Fill the tweak information */ tweak = cb->crypto_tweak; wafl_crypto_fill_tweak(bp, tweak); /* Initilaize the crypto service */ hwa_pnso_crypto_service_init(bp->wb_io_data, bp->wb_io_encrypted_data, BLOCK_SIZE, bp->wb_key_index, tweak, cb); TAILQ_INSERT_TAIL(&cb_list, cb, cb_next); /* Enque the service request */ err = pnso_batch_request(cb->svc_req, cb->svc_res); if (err != PNSO_OK) { goto out; } } /* Flush the service to start processing */ err = pnso_batch_flush(hwa_pnso_crypto_done_handler, TAILQ_FISRT(&cb), NULL, NULL); if (err != PNSO_OK) { goto out; } /* Successful service submit */ return; out: while (!TAILQ_EMPTY(&cb_list)) { cb = TAILQ_FIRST(&cb_list); TAILQ_REMOVE(&cb_list, cb, cb_next); hwa_pnso_crypto_callback_free(cb); } } void hwa_pnso_crypto_done_handler(hwa_crypto_svc_cb_args_t *cb) { hwa_crypto_svc_cb_args_t *cur_cb; struct pnso_service_result *svc_res; for (cur_cb = cb; cur_cb != NULL; cur_cb = TAILQ_NEXT(cur_cb, cb_next)) { svc_res = cur_cb->svc_res; if (svc_res->err == PNSO_OK && svc_res->svc[0].err == PNSO_OK) { /* Mark the wafl buf that encryption is done */ WAFL_BUF_R_GET(cur_cb->bp, RAID_BUF_ENCRYPTION_DONE); } /* Free the callback for individual crypto service */ hwa_pnso_crypto_callback_free(cur_cb); } move_to_next_stage(raidio) } Cksum and 4k compression service: ================================= Workflow environment: -We have 4k compression used for compaction in addition to 8K/32K compression. -The 4k compression is tried after deduplication is completed. -Hence the service to card is cksum+4k compression at the same time. #define HWA_CKSUM_COMPR_SVC_SGL_SIZE 1 #define HWA_CKSUM_COMPR_SVC_COUNT 2 typedef struct hwa_chksum_compr_svc_cb_args { TAILQ_ENTRY(hwa_crypto_svc_cb_args) cb_next; /* WAFL specific information */ wafl_ise_witem_t *witem; /* HW Assist service specific information */ bool svc_done; /* Used for sync wait */ struct pnso_service_request *svc_req; struct pnso_service_result *svc_res; /* The following is the information used in service */ /* Input sgl list */ char svc_input_sgl[sizeof(struct pnso_buffer_list) + sizeof(struct pnso_flat_buffer) * HWA_CKSUM_COMPR_SVC_SGL_SIZE]; /* Request data */ char svc_req_data[sizeof(struct pnso_service_request) + sizeof(struct pnso_service) * HWA_CKSUM_COMPR_SVC_COUNT]; /* Request result */ char svc_res_data[sizeof(struct pnso_service_result) + sizeof(struct pnso_service_status) * HWA_CKSUM_COMPR_SVC_COUNT]; /* Resul result status */ char svc_dst_sgl[sizeof(struct pnso_buffer_list) + sizeof(struct pnso_flat_buffer) * HWA_CKSUM_COMPR_SVC_SGL_SIZE]; char svc_dst_chksum[sizeof(struct pnso_chksum_tag)]; } hwa_chksum_compr_svc_cb_args_t; hwa_chksum_compr_svc_cb_args_t * hwa_pnso_chksum_compr_callback_create() { hwa_chksum_compr_svc_cb_args_t *cb = NULL; int rc = 0; /* Allocate the callback memory for this service */ cb = sk_kmem_zalloc(sizeof(hwa_crypto_svc_cb_args_t), M_DENSE, SKMA_NOWAIT); if (cb == NULL) { rc = 1; goto out; } cb->svc_req = (struct pnso_service_request *)&cb->svc_req_data; cb->svc_req->sgl = (struct pnso_buffer_list*)&cb->svc_input_sgl; cb->svc_res = (struct pnso_service_result *)&cb->svc_res_data; out: if (rc != 0) { cb = NULL; } return cb; } void hwa_pnso_cksum_compress_service_init( char *input, char *output, uint16_t num_tags, struct pnso_chksum_tag *cksum_tags, enum pnso_chksum_type cksum_type, enum pnso_compressor_type comp_type, hwa_chksum_compr_svc_cb_args_t *cb) { struct pnso_service_request *svc_req; struct pnso_service_result *svc_res; uint32_t svc_cnt; struct pnso_service *cmpr_svc_req, * cksum_svc_req; struct pnso_service_status *cmpr_svc_res, *cksum_svc_res; /* Prepare the service request, service status for the HW offload */ svc_cnt = 0; svc_req = cb->svc_req; svc_res = cb->svc_res; /* Prepare service request input */ /* Initialize the input data */ svc_req->sgl->count = 1; svc_req->sgl->buffers[0].buf = (uint64_t) input; svc_req->sgl->buffers[0].len = BLOCK_SIZE; /* Initialize the cksum request */ cksum_svc_req = &svc_req->svc[svc_cnt]; cksum_svc_req->svc_type = PNSO_SVC_TYPE_CHKSUM; cksum_svc_req->u.chksum_desc.algo_type = cksum_type; cksum_svc_req->u.chksum_desc.flags = PNSO_DFLAG_CHKSUM_PER_BLOCK; /* Initailzie the checksum output result */ cksum_svc_res = &svc_res->svc[svc_cnt]; cksum_svc_res->u.chksum.num_tags = num_tags; cksum_svc_res->u.chksum.tags = cksum_tags; svc_cnt = ++svc_req->num_services; /* Initialize the compress request */ cmpr_svc_req = &svc_req->svc[svc_cnt]; cmpr_svc_req->svc_type = PNSO_SVC_TYPE_COMPRESS; cmpr_svc_req->u.cp_desc.algo_type = comp_type; cmpr_svc_req->u.cp_desc.threshold_len = BLOCK_SIZE; cmpr_svc_req->u.cp_desc.flags = PNSO_DFLAG_ZERO_PAD; /* Initialize the compressed output buffers */ cmpr_svc_res = &svc_res->svc[svc_cnt]; cmpr_svc_res->u.dst.sgl->count = 1; cmpr_svc_res->u.dst.sgl->buffers[0].buf = (uint64_t) output; cmpr_svc_res->u.dst.sgl->buffers[0].len = BLOCK_SIZE; svc_cnt = ++svc_req->num_services; } pnso_error_t hwa_wafl_bulk_cksum_compress(witem_list) { wafl_Buf *bp; pnso_error_t err; TAILQ_HEAD(, hwa_chksum_compr_svc_cb_args_t) cb_list; TAILQ_INIT(&cb_list); // Get each buffer in tetris while ((witem = get_next_witem(witem_list)) != NULL) { /* Create the callback for crypto service */ cb = hwa_pnso_cksum_compr_callback_create(); // Assume non-blocking call ASSERT(cb != NULL); /* Fill the tags information */ cb->witem = witem; /* Initilaize the checksum/compress service */ hwa_pnso_cksum_compress_service_init(witem->input, witem->cmpr_data, 1, &cb->svc_dst_chksum, PNSO_COMPRESSOR_TYPE_LZRW1A, cb); TAILQ_INSERT_TAIL(&cb_list, cb, cb_next); /* Enque the service request */ err = pnso_batch_request(cb->svc_req, cb->svc_res); if (err != PNSO_OK) { goto out; } } /* Flush the service to start processing */ err = pnso_batch_flush(hwa_pnso_cksum_compress_done_handler, TAILQ_FISRT(&cb), NULL, NULL); if (err != PNSO_OK) { goto out; } out: while (!TAILQ_EMPTY(&cb_list)) { cb = TAILQ_FIRST(&cb_list); TAILQ_REMOVE(&cb_list, cb, cb_next); hwa_pnso_crypto_callback_free(cb); } } void hwa_pnso_cksum_compress_done_handler(hwa_chksum_compr_svc_cb_args_t *cb, struct pnso_service_result *svc_res) { if (svc_res->err != PNSO_OK) { /* Complete service has failed? */ /* Free the resorces */ return; } cksum_status = &svc_res->svc[0]; cmpr_status = &svc_res->svc[1]; cksum_tags = cksum_status->tags; if (cksum_status->err == PNSO_OK) { /* Do the deduplication */ dedup_res = do_deduplication(cb->witem, svc_req->sql, cksum_status->u.dst.tags[0]); /* Do compression when deduplication fails */ if ((dedup_res != 0) && cmpr_status->err == PNSO_OK) { do_compression(witem, cmpr_status->u.dst.sgl) } } /* Move to next stage */ move_to_next_stage(cb->witem_list); }