test/read-mshot-empty: add test case for fully emptying all data

Kernel side explanation: === io_read_mshot() always relies on poll triggering retries, and this works fine as long as we do a retry per size of the buffer being read. The buffer size is given by the size of the buffer(s) in the given buffer group ID. But if we're reading less than what is available, then we don't always get to read everything that is available. For example, if the buffers available are 32 bytes and we have 64 bytes to read, then we'll correctly read the first 32 bytes and then wait for another poll trigger before we attempt the next read. This next poll trigger may never happen, in which case we just sit forever and never make progress, or it may trigger at some point in the future, and now we're just delivering the available data much later than we should have. io_read_mshot() could do retries itself, but that is wasteful as we'll be going through all of __io_read() again, and most likely in vain. Rather than do that, bump our poll reference count and have io_poll_check_events() do one more loop and check with vfs_poll() if we have more data to read. If we do, io_read_mshot() will get invoked again directly and we'll read the next chunk. === Test this by having twice the amount of data available as we have available in a single buffer. If the kernel supports read multishot and isn't behaving properly, this test case will stall waiting on CQEs and eventually get timed out by the test framework. Signed-off-by: Jens Axboe <[email protected]>
axboe · Jan 27, 2024 · a827766 · a827766
1 parent c44319a
commit a827766
Show file tree

Hide file tree

Showing 2 changed files with 151 additions and 0 deletions.
diff --git a/test/Makefile b/test/Makefile
@@ -145,6 +145,7 @@ test_srcs := \
 	probe.c \
 	read-before-exit.c \
 	read-mshot.c \
+	read-mshot-empty.c \
 	read-write.c \
 	recv-msgall.c \
 	recv-msgall-stream.c \

diff --git a/test/read-mshot-empty.c b/test/read-mshot-empty.c
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test that multishot read correctly keeps reading until all
+ *		data has been emptied. the original implementation failed
+ *		to do so, if the available buffer size was less than what
+ *		was available, hence requiring multiple reads to empty the
+ *		file buffer.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <pthread.h>
+#include <sys/time.h>
+
+#include "liburing.h"
+#include "helpers.h"
+
+#define BGID		17
+#define NR_BUFS		4
+#define BR_MASK		(NR_BUFS - 1)
+#define BUF_SIZE	32
+
+static int do_write(int fd, void *buf, int buf_size)
+{
+	int ret;
+
+	ret = write(fd, buf, buf_size);
+	if (ret < 0) {
+		perror("write");
+		return 0;
+	} else if (ret != buf_size) {
+		fprintf(stderr, "bad write size %d\n", ret);
+		return 0;
+	}
+
+	return 1;
+}
+
+static void *thread_fn(void *data)
+{
+	char w1[BUF_SIZE], w2[BUF_SIZE];
+	int *fds = data;
+
+	memset(w1, 0x11, BUF_SIZE);
+	memset(w2, 0x22, BUF_SIZE);
+
+	if (!do_write(fds[1], w1, BUF_SIZE))
+		return NULL;
+	if (!do_write(fds[1], w2, BUF_SIZE))
+		return NULL;
+
+	usleep(100000);
+
+	if (!do_write(fds[1], w1, BUF_SIZE))
+		return NULL;
+	if (!do_write(fds[1], w2, BUF_SIZE))
+		return NULL;
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring_buf_ring *br;
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	pthread_t thread;
+	int i, ret, fds[2];
+	void *buf, *tret;
+
+	if (pipe(fds) < 0) {
+		perror("pipe");
+		return T_EXIT_FAIL;
+	}
+
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "queue_init: %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	br = io_uring_setup_buf_ring(&ring, NR_BUFS, BGID, 0, &ret);
+	if (!br) {
+		if (ret == -EINVAL)
+			return T_EXIT_SKIP;
+		fprintf(stderr, "failed buffer ring %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	buf = malloc(NR_BUFS * BUF_SIZE);
+	for (i = 0; i < NR_BUFS; i++) {
+		void *this_buf = buf + i * BUF_SIZE;
+
+		io_uring_buf_ring_add(br, this_buf, BUF_SIZE, i, BR_MASK, i);
+	}
+	io_uring_buf_ring_advance(br, NR_BUFS);
+
+	sqe = io_uring_get_sqe(&ring);
+	io_uring_prep_read_multishot(sqe, fds[0], 0, 0, BGID);
+
+	ret = io_uring_submit(&ring);
+	if (ret != 1) {
+		fprintf(stderr, "bad submit %d\n", ret);
+		return T_EXIT_FAIL;
+	}
+
+	/*
+	 * read multishot not available would be ready as a cqe when
+	 * submission returns, check and skip if not.
+	 */
+	ret = io_uring_peek_cqe(&ring, &cqe);
+	if (!ret) {
+		if (cqe->res == -EINVAL || cqe->res == -EBADF)
+			return T_EXIT_SKIP;
+	}
+
+	pthread_create(&thread, NULL, thread_fn, fds);
+
+	for (i = 0; i < 4; i++) {
+		int buf_index;
+
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait %d\n", ret);
+			break;
+		}
+
+		if (cqe->res != BUF_SIZE) {
+			fprintf(stderr, "size %d\n", cqe->res);
+			return T_EXIT_FAIL;
+		}
+		if (!(cqe->flags & IORING_CQE_F_BUFFER)) {
+			fprintf(stderr, "buffer not set\n");
+			return T_EXIT_FAIL;
+		}
+		if (!(cqe->flags & IORING_CQE_F_MORE)) {
+			fprintf(stderr, "more not set\n");
+			return T_EXIT_FAIL;
+		}
+		buf_index = cqe->flags >> 16;
+		assert(buf_index >= 0 && buf_index <= NR_BUFS);
+		io_uring_cqe_seen(&ring, cqe);
+	}
+
+	pthread_join(thread, &tret);
+	return T_EXIT_PASS;
+}