Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions include/posix/sys/ioctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@
#ifndef ZEPHYR_INCLUDE_POSIX_SYS_IOCTL_H_
#define ZEPHYR_INCLUDE_POSIX_SYS_IOCTL_H_

#include <stdarg.h>

__syscall int sys_ioctl(int fd, unsigned long request, long n_args, uintptr_t *args);
int ioctl(int fd, unsigned long request, ...);

#define FIONBIO 0x5421

#ifndef CONFIG_ARCH_POSIX
#include <syscalls/ioctl.h>
#endif /* CONFIG_ARCH_POSIX */

#endif /* ZEPHYR_INCLUDE_POSIX_SYS_IOCTL_H_ */
14 changes: 13 additions & 1 deletion include/posix/unistd.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,15 @@ extern "C" {
#endif

#ifdef CONFIG_POSIX_API
/* File related operations */
/* File related operations. Convention: "sys_name" is a syscall (needs
* prototype in this file for usage). "name" is a normal userspace
* function (implemented as a wrapper for syscall), usable even
* without prototype, per classical C handling. This distinction
* is however implemented on demand, based on the actual usecases seen.
*/
__syscall int sys_close(int file);
__syscall ssize_t sys_write(int file, const void *buffer, size_t count);
__syscall ssize_t sys_read(int file, void *buffer, size_t count);
extern int close(int file);
extern ssize_t write(int file, const void *buffer, size_t count);
extern ssize_t read(int file, void *buffer, size_t count);
Expand Down Expand Up @@ -50,4 +58,8 @@ int usleep(useconds_t useconds);
}
#endif

#ifndef CONFIG_ARCH_POSIX
#include <syscalls/unistd.h>
#endif /* CONFIG_ARCH_POSIX */

#endif /* ZEPHYR_INCLUDE_POSIX_UNISTD_H_ */
30 changes: 21 additions & 9 deletions include/sys/fdtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ extern "C" {
struct fd_op_vtable {
ssize_t (*read)(void *obj, void *buf, size_t sz);
ssize_t (*write)(void *obj, const void *buf, size_t sz);
int (*ioctl)(void *obj, unsigned int request, va_list args);
int (*ioctl)(void *obj, unsigned long request,
long n_args, uintptr_t *args);
};

/**
Expand Down Expand Up @@ -113,14 +114,20 @@ void *z_get_fd_obj_and_vtable(int fd, const struct fd_op_vtable **vtable);
* @param ... Variadic arguments to ioctl
*/
static inline int z_fdtable_call_ioctl(const struct fd_op_vtable *vtable, void *obj,
unsigned long request, ...)
unsigned long request, int n_args, ...)
{
va_list args;
int res;
va_list varargs;
int i, res;
uintptr_t args[3];

va_start(args, request);
res = vtable->ioctl(obj, request, args);
va_end(args);
__ASSERT_NO_MSG(n_args <= ARRAY_SIZE(args));

va_start(varargs, n_args);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's important here that the VA argument to ioctl can only ever be a single argument. Older specs took advantage of looser typing rules in C, and this is only really a way of allowing various pointer types to be passed to this function. It is quite reasonable to make this a single pointer argument.

It might also be helpful to look at the Linux implementation (or BSD), which uses various bits of the request to determine exactly how much data is expected in this argument, and even which direction data flows.

We definitely shouldn't be passing n_args/args through the systemcall. A single pointer is fine.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might also be helpful to look at the Linux implementation (or BSD)

I don't try to "lift" implementation from other systems, together with associated copyright and licensing issues. I'm definitely aware of Linux maintainers' dislike for ioctl(), and understand why - just as ioctl() was a nice, clean, clever idea in the original Unix, just as it is unnice and unclear with all the "modern systems".

We definitely shouldn't be passing n_args/args through the systemcall. A single pointer is fine.

Someone will need to elaborate in detail on those points.

It's otherwise pretty clear that syscall validation rules will need to be extended for the ioctl() case. That's why I'm questioning @andrewboie for details on both (intended) design and (actual) implementation of them.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My point here is that that "..." in the ioctl call is a modern artifact, and that ioctl calls should always have exactly one pointer argument. The "..." is needed because it isn't really possible to get it right as some kind of generic pointer. But, there should never be an instance of 2, or more arguments.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@d3zd3z:

My point here is that that "..." in the ioctl call is a modern artifact

Can you elaborate on this a bit more? ("modern artifact")

But, there should never be an instance of 2, or more arguments.

Regardless of the possible answer to the above question, the reason why ioctl() was brought in here is to exactly allow passing multiple arguments. ioctl() is used both for userspace->kernel and kernel->kernel communication. userspace->kernel subset if really lean so far - as you may imagine, nobody wants to add the complete Linux mess in there. The usecase behind supporting userspace ioctl() is, well, application portability POSIX-ish systems -> Zephyr. I.e. as people will port more apps, they will find ioctl() request types needed, and can implement then, then maybe even contribute upstream (maybe keep out of tree in forks). The ideas is, again, to provide a general interface which would work for any request type (and its params).

Now, kernel->kernel ioctl is used to generically implement some processing between implementations of the same interface (file descriptor) in kernel. E.g., there's a generic implementation of poll(), which uses kernel-ioctl to implement processing sub-steps polymorphically across different file descriptor objects. And ioctl is used there to save on the code size. The more ironically that protected userspace now adds much more overhead to it (i.e. no-split: code size savings comparing to "middle line", split kernel/userspace - code size bloat).

You're looking exactly at the support APIs for kernel->kernel ioctls.

All this may be not ideal, but nothing is ideal here, I look at support infra/docs for syscalls, and find it not ideal. This patch has a specific goal - set framework to allow all this work with kernel/userspace split. It doesn't do any further revolutions, where varargs calls were used before for kernel-kernel ioctl, there they're used now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My point here is that that "..." in the ioctl call is a modern artifact, and that ioctl calls should always have exactly one pointer argument. The "..." is needed because it isn't really possible to get it right as some kind of generic pointer. But, there should never be an instance of 2, or more arguments.

@d3zd3z indeed, this is obvious from the documentation:

DESCRIPTION         top

       The ioctl() system call manipulates the underlying device parameters
       of special files.  In particular, many operating characteristics of
       character special files (e.g., terminals) may be controlled with
       ioctl() requests.  The argument fd must be an open file descriptor.

       The second argument is a device-dependent request code.  The third
       argument is an untyped pointer to memory.  It's traditionally char
       *argp (from the days before void * was valid C), and will be so named
       for this discussion.

       An ioctl() request has encoded in it whether the argument is an in
       parameter or out parameter, and the size of the argument argp in
       bytes.  Macros and defines used in specifying an ioctl() request are
       located in the file <sys/ioctl.h>.

ioctl() never been something you can send lots of arguments, and this is not ambiguous:

The third argument is an untyped pointer to memory. It's traditionally char *argp (from the days before void * was valid C), and will be so named for this discussion.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The third argument is an untyped pointer to memory.

Ok, as explained above, ->ioctl() is used to implement both userspace->kernel (let's call it just "ioctl") and kernel->kernel (let's call it "kioctl") functions. While userspace ioctl() accepts only one param, it's a matter of fact that kioctl() accepts of multiple. I explained why this was done - to save on the size of vtable pointers and function prologues/epilogues. Good or bad, that's how it is. If someone wants to change that, go ahead and submit a proposal and prototype refactor.

This patch is concerned with different matter - allowing ioctl() to work across userspace/kernel, while preserving how it works currently for kernel->kernel. (I agree that if userspace # of arg is fixed, the patch can be simplified. If there's something to say, then it's that I shouldn't have started on that while being assigned to other big and deep tasks, and multitasking among them, forgetting and overlooking such points.)

for (i = 0; i < n_args; i++) {
args[i] = va_arg(varargs, uintptr_t);
}
res = vtable->ioctl(obj, request, n_args, args);
va_end(varargs);

return res;
}
Expand All @@ -138,10 +145,15 @@ enum {
ZFD_IOCTL_CLOSE = 0x100,
ZFD_IOCTL_FSYNC,
ZFD_IOCTL_LSEEK,
ZFD_IOCTL_POLL_PREPARE,
ZFD_IOCTL_GETSOCKNAME,

/* Codes above 0xff00 are private kernel-only requests, not
* available from userspace.
*/
ZFD_IOCTL_PRIVATE = 0xff00,
ZFD_IOCTL_POLL_PREPARE = ZFD_IOCTL_PRIVATE,
ZFD_IOCTL_POLL_UPDATE,
ZFD_IOCTL_POLL_OFFLOAD,
ZFD_IOCTL_GETSOCKNAME,
};

#ifdef __cplusplus
Expand Down
155 changes: 137 additions & 18 deletions lib/os/fdtable.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,21 @@
#include <kernel.h>
#include <sys/fdtable.h>
#include <sys/speculation.h>
#ifdef CONFIG_POSIX_API
#include <posix/unistd.h>
#include <posix/sys/ioctl.h>
#endif
#include <syscall_handler.h>

/* Number of arguments which can be passed to public ioctl calls
* (i.e. from userspace to kernel space).
* Arbitrary value is supported (at the expense of stack usage). Can
* be increased when ioctl's with more arguments are added.
* Note that kernelspace-kernelspace ioctl calls are handled
* differently (in z_fdtable_call_ioctl()).
*/
#define MAX_USERSPACE_IOCTL_ARGS 1

struct fd_entry {
void *obj;
const struct fd_op_vtable *vtable;
Expand Down Expand Up @@ -167,39 +180,91 @@ int z_alloc_fd(void *obj, const struct fd_op_vtable *vtable)

#ifdef CONFIG_POSIX_API

ssize_t read(int fd, void *buf, size_t sz)
ssize_t z_impl_sys_read(int fd, void *buf, size_t sz)
{
if (_check_fd(fd) < 0) {
return -1;
}

return fdtable[fd].vtable->read(fdtable[fd].obj, buf, sz);
}

#ifdef CONFIG_USERSPACE
ssize_t z_vrfy_sys_read(int fd, void *buf, size_t sz)
{
if (Z_SYSCALL_MEMORY_WRITE(buf, sz)) {
errno = EFAULT;
return -1;
}

return z_impl_sys_read(fd, buf, sz);
}
#include <syscalls/sys_read_mrsh.c>
#endif /* CONFIG_USERSPACE */

/* Normal C function wrapping a corresponding syscall. Required to ensure
* classic C linkage.
*/
ssize_t read(int fd, void *buf, size_t sz)
{
return sys_read(fd, buf, sz);
}
FUNC_ALIAS(read, _read, ssize_t);

ssize_t write(int fd, const void *buf, size_t sz)
ssize_t z_impl_sys_write(int fd, const void *buf, size_t sz)
{
if (_check_fd(fd) < 0) {
return -1;
}

return fdtable[fd].vtable->write(fdtable[fd].obj, buf, sz);
}

#ifdef CONFIG_USERSPACE
ssize_t z_vrfy_sys_write(int fd, const void *buf, size_t sz)
{
Z_OOPS(Z_SYSCALL_MEMORY_READ(buf, sz));

return z_impl_sys_write(fd, buf, sz);
}
#include <syscalls/sys_write_mrsh.c>
#endif /* CONFIG_USERSPACE */

/* Normal C function wrapping a corresponding syscall. Required to ensure
* classic C linkage.
*/
ssize_t write(int fd, const void *buf, size_t sz)
{
return sys_write(fd, buf, sz);
}
FUNC_ALIAS(write, _write, ssize_t);

int close(int fd)
int z_impl_sys_close(int fd)
{
int res;

if (_check_fd(fd) < 0) {
return -1;
}

res = z_fdtable_call_ioctl(fdtable[fd].vtable, fdtable[fd].obj, ZFD_IOCTL_CLOSE);
res = z_fdtable_call_ioctl(fdtable[fd].vtable, fdtable[fd].obj, ZFD_IOCTL_CLOSE, 0);
z_free_fd(fd);

return res;
}

#ifdef CONFIG_USERSPACE
ssize_t z_vrfy_sys_close(int fd)
{
return z_impl_sys_close(fd);
}
#include <syscalls/sys_close_mrsh.c>
#endif /* CONFIG_USERSPACE */

int close(int fd)
{
return sys_close(fd);
}
FUNC_ALIAS(close, _close, int);

int fsync(int fd)
Expand All @@ -208,7 +273,8 @@ int fsync(int fd)
return -1;
}

return z_fdtable_call_ioctl(fdtable[fd].vtable, fdtable[fd].obj, ZFD_IOCTL_FSYNC);
return z_fdtable_call_ioctl(fdtable[fd].vtable, fdtable[fd].obj,
ZFD_IOCTL_FSYNC, 0);
}

off_t lseek(int fd, off_t offset, int whence)
Expand All @@ -217,22 +283,78 @@ off_t lseek(int fd, off_t offset, int whence)
return -1;
}

return z_fdtable_call_ioctl(fdtable[fd].vtable, fdtable[fd].obj, ZFD_IOCTL_LSEEK,
offset, whence);
return z_fdtable_call_ioctl(fdtable[fd].vtable, fdtable[fd].obj,
ZFD_IOCTL_LSEEK,
2, offset, whence);
}
FUNC_ALIAS(lseek, _lseek, off_t);

int ioctl(int fd, unsigned long request, ...)
int z_impl_sys_ioctl(int fd, unsigned long request, long n_args, uintptr_t *args)
{
va_list args;
int res;

if (_check_fd(fd) < 0) {
return -1;
}

return fdtable[fd].vtable->ioctl(fdtable[fd].obj, request, n_args, args);
}

#ifdef CONFIG_USERSPACE
ssize_t z_vrfy_sys_ioctl(int fd, unsigned long request, long n_args, uintptr_t *args)
{
Z_OOPS(Z_SYSCALL_MEMORY_READ(args, sizeof(*args) * n_args));

if (request >= ZFD_IOCTL_PRIVATE) {
errno = EINVAL;
return -1;
}

return z_impl_sys_ioctl(fd, request, n_args, args);
}
#include <syscalls/sys_ioctl_mrsh.c>
#endif /* CONFIG_USERSPACE */

static int _vioctl(int fd, unsigned long request, va_list args)
{
int i, n_args;
/* We assume that for argument passing [on stack], natural word size
* of the plaform is used. So for example, for LP64 platform, where
* int is 32-bit, it's still pushed as 64-bit value on stack.
*/
uintptr_t marshalled_args[MAX_USERSPACE_IOCTL_ARGS];

/* Calculate number of arguments for individual ioctl requests. */
switch (request) {
case F_GETFL:
n_args = 0;
break;
case F_SETFL:
n_args = 1;
break;
default:
errno = EINVAL;
return -1;
}

if (n_args > ARRAY_SIZE(marshalled_args)) {
/* Use distinguishable error code. */
errno = EDOM;
return -1;
}

for (i = 0; i < n_args; i++) {
marshalled_args[i] = va_arg(args, uintptr_t);
}

return sys_ioctl(fd, request, n_args, marshalled_args);
}

int ioctl(int fd, unsigned long request, ...)
{
va_list args;
int res;

va_start(args, request);
res = fdtable[fd].vtable->ioctl(fdtable[fd].obj, request, args);
res = _vioctl(fd, request, args);
va_end(args);

return res;
Expand All @@ -249,10 +371,6 @@ int fcntl(int fd, int cmd, ...)
va_list args;
int res;

if (_check_fd(fd) < 0) {
return -1;
}

/* Handle fdtable commands. */
switch (cmd) {
case F_DUPFD:
Expand All @@ -263,7 +381,7 @@ int fcntl(int fd, int cmd, ...)

/* The rest of commands are per-fd, handled by ioctl vmethod. */
va_start(args, cmd);
res = fdtable[fd].vtable->ioctl(fdtable[fd].obj, cmd, args);
res = _vioctl(fd, cmd, args);
va_end(args);

return res;
Expand Down Expand Up @@ -292,7 +410,8 @@ static ssize_t stdinout_write_vmeth(void *obj, const void *buffer, size_t count)
#endif
}

static int stdinout_ioctl_vmeth(void *obj, unsigned int request, va_list args)
static int stdinout_ioctl_vmeth(void *obj, unsigned long request,
long n_args, uintptr_t *args)
{
errno = EINVAL;
return -1;
Expand Down
15 changes: 8 additions & 7 deletions lib/posix/eventfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ static ssize_t eventfd_write_op(void *obj, const void *buf, size_t sz)
return sizeof(eventfd_t);
}

static int eventfd_ioctl_op(void *obj, unsigned int request, va_list args)
static int eventfd_ioctl_op(void *obj, unsigned long request,
long n_args, uintptr_t *args)
{
struct eventfd *efd = (struct eventfd *)obj;

Expand All @@ -144,7 +145,7 @@ static int eventfd_ioctl_op(void *obj, unsigned int request, va_list args)
case F_SETFL: {
int flags;

flags = va_arg(args, int);
flags = (int)args[0];

if (flags & ~EFD_FLAGS_SET) {
errno = EINVAL;
Expand All @@ -165,9 +166,9 @@ static int eventfd_ioctl_op(void *obj, unsigned int request, va_list args)
struct k_poll_event **pev;
struct k_poll_event *pev_end;

pfd = va_arg(args, struct zsock_pollfd *);
pev = va_arg(args, struct k_poll_event **);
pev_end = va_arg(args, struct k_poll_event *);
pfd = (struct zsock_pollfd *)args[0];
pev = (struct k_poll_event **)args[1];
pev_end = (struct k_poll_event *)args[2];

return eventfd_poll_prepare(obj, pfd, pev, pev_end);
}
Expand All @@ -176,8 +177,8 @@ static int eventfd_ioctl_op(void *obj, unsigned int request, va_list args)
struct zsock_pollfd *pfd;
struct k_poll_event **pev;

pfd = va_arg(args, struct zsock_pollfd *);
pev = va_arg(args, struct k_poll_event **);
pfd = (struct zsock_pollfd *)args[0];
pev = (struct k_poll_event **)args[1];

return eventfd_poll_update(obj, pfd, pev);
}
Expand Down
Loading