Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion fs/fuse/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ obj-$(CONFIG_FUSE_FS) += fuse.o
obj-$(CONFIG_CUSE) += cuse.o
obj-$(CONFIG_VIRTIO_FS) += virtiofs.o

fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o fuse_dlm_cache.o compound.o
fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o fuse_dlm_cache.o compound.o gds.o
fuse-y += iomode.o
fuse-$(CONFIG_FUSE_DAX) += dax.o
fuse-$(CONFIG_FUSE_IO_URING) += dev_uring.o
Expand Down
56 changes: 56 additions & 0 deletions fs/fuse/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -1832,6 +1832,56 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
return err;
}

static int fuse_notify_register_gds_netdev(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
char netdev_name[256];
int err;

err = -EINVAL;
if (size >= sizeof(netdev_name))
goto copy_finish;

err = fuse_copy_one(cs, netdev_name, size);
if (err)
goto copy_finish;

netdev_name[size] = '\0';
fuse_copy_finish(cs);

err = fuse_dmabuf_register_netdev(fc, netdev_name);
return err;

copy_finish:
fuse_copy_finish(cs);
return err;
}

static int fuse_notify_unregister_gds_netdev(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
char netdev_name[256];
int err;

err = -EINVAL;
if (size >= sizeof(netdev_name))
goto copy_finish;

err = fuse_copy_one(cs, netdev_name, size);
if (err)
goto copy_finish;

netdev_name[size] = '\0';
fuse_copy_finish(cs);

err = fuse_dmabuf_unregister_netdev(fc, netdev_name);
return err;

copy_finish:
fuse_copy_finish(cs);
return err;
}

static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
unsigned int size, struct fuse_copy_state *cs)
{
Expand All @@ -1857,6 +1907,12 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
case FUSE_NOTIFY_DELETE:
return fuse_notify_delete(fc, size, cs);

case FUSE_NOTIFY_REGISTER_GDS_NETDEV:
return fuse_notify_register_gds_netdev(fc, size, cs);

case FUSE_NOTIFY_UNREGISTER_GDS_NETDEV:
return fuse_notify_unregister_gds_netdev(fc, size, cs);

default:
fuse_copy_finish(cs);
return -EINVAL;
Expand Down
62 changes: 56 additions & 6 deletions fs/fuse/dev_uring.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ module_param(enable_uring, bool, 0644);
MODULE_PARM_DESC(enable_uring,
"Enable userspace communication through io-uring");

#define FUSE_URING_IOV_SEGS 2 /* header and payload */
#define FUSE_URING_IOV_SEGS 3 /* header, payload and mr */
#define FUSE_RING_HEADER_PG 0
#define FUSE_RING_PAYLOAD_PG 1
#define FUSE_RING_PAYLOAD_MR_PG 2
#define FUSE_URING_IOV_SEGS_COMPAT 2 /* header and payload */

/* Threshold that determines if a better queue should be searched for */
#define FUSE_URING_Q_THRESHOLD 2
Expand Down Expand Up @@ -231,6 +233,7 @@ void fuse_uring_destruct(struct fuse_conn *fc)
io_pages_free(&ent->header_pages, ent->nr_header_pages);
io_pages_free(&ent->payload_pages,
ent->nr_payload_pages);
fuse_dmabuf_clear_sgt(&ent->dmabuf_ent);
kfree(ent);
}

Expand Down Expand Up @@ -837,13 +840,32 @@ static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
return err ? -EFAULT : 0;
}

static void fuse_uring_prepare_mr(struct fuse_ring_ent *ent,
struct fuse_req *req)
{
struct fuse_mr_in *mr = &req->args->mr.mr_in;

if (req->args->is_gds) {
BUG_ON(mr->type != FUSE_MR_DMABUF);

/* Associate GPU scatter-gather table with DMA-buf file descriptor */
fuse_dmabuf_set_sgt(&ent->dmabuf_ent, (struct fuse_refcnt_sgt *)mr->rdma_dmabuf.sgt);

/* Get DMA-buf file descriptor for userspace */
mr->rdma_dmabuf.dmabuf_fd = ent->dmabuf_ent.fd;
} else {
mr->type = FUSE_MR_NONE;
}
}

static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
struct fuse_req *req)
{
struct fuse_ring_queue *queue = ent->queue;
struct fuse_ring *ring = queue->ring;
int err;
struct fuse_uring_req_header *headers = NULL;
struct fuse_mr *mr;

err = -EIO;
if (WARN_ON(ent->state != FRRS_FUSE_REQ)) {
Expand All @@ -856,13 +878,19 @@ static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
if (WARN_ON(req->in.h.unique == 0))
return err;

fuse_uring_prepare_mr(ent, req);

/* copy fuse_in_header */
if (ent->header_pages) {
headers = kmap_local_page(
ent->header_pages[FUSE_RING_HEADER_PG]);

memcpy(&headers->in_out, &req->in.h, sizeof(req->in.h));

/* copy MR info located after header in same page */
mr = (struct fuse_mr *)(headers + 1);
memcpy(&mr->mr_in, &req->args->mr.mr_in, sizeof(struct fuse_mr_in));

err = fuse_uring_args_to_ring_pages(ring, req, ent, headers);
kunmap_local(headers);
} else {
Expand All @@ -874,6 +902,10 @@ static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
}
err = copy_to_user(&ent->headers->in_out, &req->in.h,
sizeof(req->in.h));
if (!err) {
err = copy_to_user(ent->headers + 1, &req->args->mr.mr_in,
sizeof(struct fuse_mr_in));
}
if (err)
err = -EFAULT;
}
Expand Down Expand Up @@ -1187,15 +1219,15 @@ static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe,
struct iov_iter iter;
ssize_t ret;

if (sqe->len != FUSE_URING_IOV_SEGS)
if (sqe->len < FUSE_URING_IOV_SEGS_COMPAT || sqe->len > FUSE_URING_IOV_SEGS)
return -EINVAL;

/*
* Direction for buffer access will actually be READ and WRITE,
* using write for the import should include READ access as well.
*/
ret = import_iovec(WRITE, uiov, FUSE_URING_IOV_SEGS,
FUSE_URING_IOV_SEGS, &iov, &iter);
ret = import_iovec(WRITE, uiov, sqe->len,
sqe->len, &iov, &iter);
if (ret < 0)
return ret;

Expand Down Expand Up @@ -1296,14 +1328,32 @@ fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
ent->headers = iov[0].iov_base;
ent->payload = iov[1].iov_base;

/* Payload MR is optional - iov[2] indicates existence, actual location follows header */
if (cmd->sqe->len > FUSE_RING_PAYLOAD_MR_PG)
ent->payload_mr = iov[FUSE_RING_PAYLOAD_MR_PG].iov_base;
else
ent->payload_mr = NULL;

err = fuse_uring_pin_pages(ent);
if (err) {
kfree(ent);
return ERR_PTR(err);
goto out;
}

err = fuse_create_dmabuf(&ent->dmabuf_ent, payload_size);
if (err) {
goto dmabuf_out;
}

atomic_inc(&ring->queue_refs);
return ent;

dmabuf_out:
io_pages_free(&ent->header_pages, ent->nr_header_pages);
io_pages_free(&ent->payload_pages, ent->nr_payload_pages);

out:
kfree(ent);
return ERR_PTR(err);
}

/*
Expand Down
5 changes: 5 additions & 0 deletions fs/fuse/dev_uring_i.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#ifndef _FS_FUSE_DEV_URING_I_H
#define _FS_FUSE_DEV_URING_I_H

#include "gds.h"
#include "fuse_i.h"

#ifdef CONFIG_FUSE_IO_URING
Expand Down Expand Up @@ -45,6 +46,10 @@ struct fuse_ring_ent {
void __user *payload;
struct page **payload_pages;
int nr_payload_pages;
void __user *payload_mr;

/* DMA-buf object info for GPU Direct Storage */
struct fuse_dmabuf_entry dmabuf_ent;

/* the ring queue that owns the request */
struct fuse_ring_queue *queue;
Expand Down
53 changes: 49 additions & 4 deletions fs/fuse/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "fuse_i.h"
#include "fuse_dlm_cache.h"
#include "gds.h"

#include <linux/pagemap.h>
#include <linux/slab.h>
Expand Down Expand Up @@ -682,9 +683,19 @@ void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
args->in_numargs = 1;
args->in_args[0].size = sizeof(ia->read.in);
args->in_args[0].value = &ia->read.in;

if (args->is_gds) {
/* file data is passed through RDMA, the read size is returned in out.args[0] */
args->user_pages = false;
args->out_pages = false;
args->out_args[0].size = sizeof(ia->read.out);
args->out_args[0].value = &ia->read.out;
}
else {
args->out_args[0].size = count;
}
args->out_argvar = true;
args->out_numargs = 1;
args->out_args[0].size = count;
}

static void fuse_release_user_pages(struct fuse_args_pages *ap,
Expand Down Expand Up @@ -846,6 +857,8 @@ static ssize_t fuse_send_read(struct fuse_io_args *ia, loff_t pos, size_t count,
struct file *file = ia->io->iocb->ki_filp;
struct fuse_file *ff = file->private_data;
struct fuse_mount *fm = ff->fm;
struct fuse_args *args = &ia->ap.args;
int err;

fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
if (owner != NULL) {
Expand All @@ -856,7 +869,14 @@ static ssize_t fuse_send_read(struct fuse_io_args *ia, loff_t pos, size_t count,
if (ia->io->async)
return fuse_async_req_send(fm, ia, count);

return fuse_simple_request(fm, &ia->ap.args);
err = fuse_simple_request(fm, &ia->ap.args);

/* Handle different return values of fuse_simple_request: regular read returns
* bytes read, GDS read returns sizeof(ia->read.out) - normalize to bytes read */
if (args->is_gds && err == sizeof(ia->read.out)) {
err = ia->read.out.size;
}
return err;
}

static void fuse_read_update_size(struct inode *inode, loff_t size,
Expand Down Expand Up @@ -1099,13 +1119,23 @@ static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff,
ia->write.in.size = count;
args->opcode = FUSE_WRITE;
args->nodeid = ff->nodeid;
args->in_numargs = 2;
args->in_numargs = 1;
if (ff->fm->fc->minor < 9)
args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
else
args->in_args[0].size = sizeof(ia->write.in);
args->in_args[0].value = &ia->write.in;
args->in_args[1].size = count;

if (args->is_gds) {
/* skip data copy */
args->user_pages = false;
args->in_pages = false;
}
else {
args->in_numargs++;
args->in_args[1].size = count;
}

args->out_numargs = 1;
args->out_args[0].size = sizeof(ia->write.out);
args->out_args[0].value = &ia->write.out;
Expand Down Expand Up @@ -1596,8 +1626,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
struct fuse_io_args *ia;
unsigned int max_pages;
bool fopen_direct_io = ff->open_flags & FOPEN_DIRECT_IO;
bool is_gds = false;

max_pages = iov_iter_npages(iter, fc->max_pages);

ia = fuse_io_alloc(io, max_pages);
if (!ia)
return -ENOMEM;
Expand Down Expand Up @@ -1636,6 +1668,15 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
if (err && !nbytes)
break;

if (fuse_is_gds_buffer(&ia->ap)) {
is_gds = true;
err = fuse_gds_map_sg(fc, write, ia);
if (err) {
fuse_release_user_pages(&ia->ap, io->should_dirty);
break;
}
}

if (write) {
if (!capable(CAP_FSETID))
ia->write.in.write_flags |= FUSE_WRITE_KILL_SUIDGID;
Expand All @@ -1645,6 +1686,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
nres = fuse_send_read(ia, pos, nbytes, owner);
}

if (is_gds) {
fuse_gds_unmap_sg(fc, write, ia);
}

if (!io->async || nres < 0) {
fuse_release_user_pages(&ia->ap, io->should_dirty);
fuse_io_free(ia);
Expand Down
Loading