/* * posixaio engine * * IO engine that uses the posix defined aio interface. * */ #include #include #include #include #include #include "../fio.h" struct posixaio_data { struct io_u **aio_events; unsigned int queued; }; static int fill_timespec(struct timespec *ts) { #ifdef CONFIG_CLOCK_GETTIME #ifdef CONFIG_CLOCK_MONOTONIC clockid_t clk = CLOCK_MONOTONIC; #else clockid_t clk = CLOCK_REALTIME; #endif if (!clock_gettime(clk, ts)) return 0; perror("clock_gettime"); return 1; #else struct timeval tv; gettimeofday(&tv, NULL); ts->tv_sec = tv.tv_sec; ts->tv_nsec = tv.tv_usec * 1000; return 0; #endif } static unsigned long long ts_utime_since_now(struct timespec *t) { long long sec, nsec; struct timespec now; if (fill_timespec(&now)) return 0; sec = now.tv_sec - t->tv_sec; nsec = now.tv_nsec - t->tv_nsec; if (sec > 0 && nsec < 0) { sec--; nsec += 1000000000; } sec *= 1000000; nsec /= 1000; return sec + nsec; } static int fio_posixaio_cancel(struct thread_data fio_unused *td, struct io_u *io_u) { struct fio_file *f = io_u->file; int r = aio_cancel(f->fd, &io_u->aiocb); if (r == AIO_ALLDONE || r == AIO_CANCELED) return 0; return 1; } static int fio_posixaio_prep(struct thread_data fio_unused *td, struct io_u *io_u) { os_aiocb_t *aiocb = &io_u->aiocb; struct fio_file *f = io_u->file; aiocb->aio_fildes = f->fd; aiocb->aio_buf = io_u->xfer_buf; aiocb->aio_nbytes = io_u->xfer_buflen; aiocb->aio_offset = io_u->offset; aiocb->aio_sigevent.sigev_notify = SIGEV_NONE; io_u->seen = 0; return 0; } #define SUSPEND_ENTRIES 8 static int fio_posixaio_getevents(struct thread_data *td, unsigned int min, unsigned int max, struct timespec *t) { struct posixaio_data *pd = td->io_ops->data; os_aiocb_t *suspend_list[SUSPEND_ENTRIES]; struct timespec start; int have_timeout = 0; int suspend_entries; struct io_u *io_u; unsigned int r; int i; if (t && !fill_timespec(&start)) have_timeout = 1; else memset(&start, 0, sizeof(start)); r = 0; restart: memset(suspend_list, 0, sizeof(*suspend_list)); suspend_entries = 0; io_u_qiter(&td->io_u_all, io_u, i) { int err; if (io_u->seen || !(io_u->flags & IO_U_F_FLIGHT)) continue; err = aio_error(&io_u->aiocb); if (err == EINPROGRESS) { if (suspend_entries < SUSPEND_ENTRIES) { suspend_list[suspend_entries] = &io_u->aiocb; suspend_entries++; } continue; } io_u->seen = 1; pd->queued--; pd->aio_events[r++] = io_u; if (err == ECANCELED) io_u->resid = io_u->xfer_buflen; else if (!err) { ssize_t retval = aio_return(&io_u->aiocb); io_u->resid = io_u->xfer_buflen - retval; } else io_u->error = err; } if (r >= min) return r; if (have_timeout) { unsigned long long usec; usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000); if (ts_utime_since_now(&start) > usec) return r; } /* * must have some in-flight, wait for at least one */ aio_suspend((const os_aiocb_t * const *)suspend_list, suspend_entries, t); goto restart; } static struct io_u *fio_posixaio_event(struct thread_data *td, int event) { struct posixaio_data *pd = td->io_ops->data; return pd->aio_events[event]; } static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u) { struct posixaio_data *pd = td->io_ops->data; os_aiocb_t *aiocb = &io_u->aiocb; int ret; fio_ro_check(td, io_u); if (io_u->ddir == DDIR_READ) ret = aio_read(aiocb); else if (io_u->ddir == DDIR_WRITE) ret = aio_write(aiocb); else if (io_u->ddir == DDIR_TRIM) { if (pd->queued) return FIO_Q_BUSY; do_io_u_trim(td, io_u); return FIO_Q_COMPLETED; } else { #ifdef CONFIG_POSIXAIO_FSYNC ret = aio_fsync(O_SYNC, aiocb); #else if (pd->queued) return FIO_Q_BUSY; do_io_u_sync(td, io_u); return FIO_Q_COMPLETED; #endif } if (ret) { int aio_err = aio_error(aiocb); /* * At least OSX has a very low limit on the number of pending * IOs, so if it returns EAGAIN, we are out of resources * to queue more. Just return FIO_Q_BUSY to naturally * drop off at this depth. */ if (aio_err == EAGAIN) return FIO_Q_BUSY; io_u->error = aio_err; td_verror(td, io_u->error, "xfer"); return FIO_Q_COMPLETED; } pd->queued++; return FIO_Q_QUEUED; } static void fio_posixaio_cleanup(struct thread_data *td) { struct posixaio_data *pd = td->io_ops->data; if (pd) { free(pd->aio_events); free(pd); } } static int fio_posixaio_init(struct thread_data *td) { struct posixaio_data *pd = malloc(sizeof(*pd)); memset(pd, 0, sizeof(*pd)); pd->aio_events = malloc(td->o.iodepth * sizeof(struct io_u *)); memset(pd->aio_events, 0, td->o.iodepth * sizeof(struct io_u *)); td->io_ops->data = pd; return 0; } static struct ioengine_ops ioengine = { .name = "posixaio", .version = FIO_IOOPS_VERSION, .init = fio_posixaio_init, .prep = fio_posixaio_prep, .queue = fio_posixaio_queue, .cancel = fio_posixaio_cancel, .getevents = fio_posixaio_getevents, .event = fio_posixaio_event, .cleanup = fio_posixaio_cleanup, .open_file = generic_open_file, .close_file = generic_close_file, .get_file_size = generic_get_file_size, }; static void fio_init fio_posixaio_register(void) { register_ioengine(&ioengine); } static void fio_exit fio_posixaio_unregister(void) { unregister_ioengine(&ioengine); }