From f6116db5934b8dccbdb8f119f8abd0f1000717fd Mon Sep 17 00:00:00 2001 From: mohanson Date: Fri, 19 Apr 2024 13:52:48 +0800 Subject: [PATCH] Update spawn syscalls --- rfcs/0050-vm-syscalls-3/0050-vm-syscalls-3.md | 424 +++++++++++++----- 1 file changed, 311 insertions(+), 113 deletions(-) diff --git a/rfcs/0050-vm-syscalls-3/0050-vm-syscalls-3.md b/rfcs/0050-vm-syscalls-3/0050-vm-syscalls-3.md index ba8a0d56..3b1f3431 100644 --- a/rfcs/0050-vm-syscalls-3/0050-vm-syscalls-3.md +++ b/rfcs/0050-vm-syscalls-3/0050-vm-syscalls-3.md @@ -2,7 +2,7 @@ Number: "0050" Category: Standards Track Status: Draft -Author: Xu Jiandong , Dingwei Zhang +Author: Xuejie Xiao , Xu Jiandong , Wanbiao Ye , Dingwei Zhang Created: 2023-04-17 --- @@ -12,174 +12,139 @@ Created: 2023-04-17 This document describes the addition of the syscalls during the CKB2023. This update significantly enhances the flexibility of CKB Script. -The following four syscalls are added: +## Introduction + +The design of the syscall `spawn` function draws inspiration from Unix and Linux, hence they share the same terminologies: process, pipe, and file descriptor. The `spawn` is a mechanism used in `ckb-vm` to create new processes, which can then execute a different program or command independently of the parent process. + +In the context of `ckb-vm`, a process represents the active execution of a RISC-V binary. This binary can be located within a cell. Additionally, a RISC-V binary can also be found within the witness during a syscall `spawn` . A pipe is established by associating two file descriptors, each linked to one of its ends. These file descriptors cannot be duplicated and are exclusively owned by the process. Furthermore, the file descriptors can only be either read from or written to; they cannot be both read from and written to simultaneously. + +We added 8 spawn-related syscalls and one block-related syscall, respectively: - [Spawn] -- [Get Memory Limit] -- [Current Memory] -- [Set Content] +- [Pipe] +- [Inherited FD] +- [Read] +- [Write] +- [Close] +- [Wait] +- [Process ID] - [Load Block Extension] ### Spawn [Spawn]: #spawn -The syscall Spawn is the core part of this update. The *Spawn* and the latter two syscalls: *Get Memory Limit* and *Set Content* together, implement a way to call another CKB Script in a CKB Script. Unlike the *Exec*[1](../0034-vm-syscalls-2/0034-vm-syscalls-2.md) syscall, *Spawn* saves the execution context of the current script, like [posix_spawn](https://man7.org/linux/man-pages/man3/posix_spawn.3.html), the parent script continues when the child script ends. - +The syscall Spawn is the core part of this update. The parent process calls the Spawn system call, which creates a new process (a child process) that is an independent ckb-vm instance. It's important to note that the parent process will not be blocked by the child process as a result of this syscall. ```c typedef struct spawn_args_t { - uint64_t memory_limit; - int8_t* exit_code; - uint8_t* content; - uint64_t* content_length; + size_t argc; + const char** argv; + /* Spawned VM process ID */ + uint64_t* process_id; + /* A list of file descriptor, 0 indicates end of array */ + const uint64_t* inherited_fds; } spawn_args_t; -int ckb_spawn(size_t index, size_t source, size_t bounds, - int argc, char* argv[], spawn_args_t* spgs); +int ckb_spawn(size_t index, size_t source, size_t place, size_t bounds, + spawn_args_t* spawn_args); ``` The arguments used here are: -- `index`: an index value denoting the index of entries to read. -- `source`: a flag denoting the source of cells to locate, possible values include: +- index: an index value denoting the index of entries to read. +- source: a flag denoting the source of cells or witnesses to locate, possible values include: - 1: input cells. - `0x0100000000000001`: input cells with the same running script as current script - 2: output cells. - `0x0100000000000002`: output cells with the same running script as current script - 3: dep cells. -- `bounds`: high 32 bits means `offset`, low 32 bits means `length`. If `length` equals to zero, it read to end instead of reading 0 bytes. -- `argc`: argc contains the number of arguments passed to the program -- `argv`: argv is a one-dimensional array of strings -- `memory_limit`: an integer value denoting the memory size to use(Not including descendant children scripts), possible values include: - - 1 (0.5 M) - - 2 (1 M) - - 3 (1.5 M) - - 4 (2 M) - - 5 (2.5 M) - - 6 (3 M) - - 7 (3.5 M) - - 8 (4 M) -- `exit_code`: an int8 pointer denoting where we save the exit code of a child script. -- `content`: a pointer to a buffer in VM memory space denoting where we would load the sub-script data. The child script will write data in this buffer via `set_content`. -- `content_length`: a pointer to a 64-bit unsigned integer in VM memory space. When calling the syscall, this memory location should store the length of the buffer specified by `content`. When returning from the syscall, CKB VM would fill in `content_length` with the actual length of the buffer. `content_length` up to 256K. - -The arguments used here `index`, `source`, `bounds`, `argc` and `argv` follow the usage described in [EXEC]. +- place: A value of 0 or 1: + - 0: read from cell data + - 1: read from witness +- bounds: high 32 bits means offset, low 32 bits means length. if length equals to zero, it read to end instead of reading 0 bytes. +- spawn_args: pass data during process creation or save return data. + - argc: argc contains the number of arguments passed to the program + - argv: argv is a one-dimensional array of strings + - process_id: a pointer used to save the process_id of the child process + - inherited_fds: an array representing the file descriptors passed to the child process. It must end with zero. -If the sub-script does not write data, then `NULL` can be passed in for `content` and `content_length`. +The arguments used here index, source, bounds, place, argc and argv follow the usage described in [EXEC]. -This syscall might return the following results: +### Pipe +[Pipe]: #pipe -- 0: Success. -- 1-3: Reserved. These values are already assigned to other syscalls. -- 4: Elf format error -- 5: Exceeded max content length. -- 6: Wrong memory limit -- 7: Exceeded max peak memory +This syscall create a pipe with read-write pair of file descriptions. The file descriptor with read permission is located at `fds[0]`, and the corresponding file descriptor with write permission is located at `fds[1]`. -Note that now we have a new limit called *Peak Memory Usage*. The maximum memory usage of the parent script and its descendant children cannot exceed this value. Currently, this limit is set at 32M. - -Unlike cycles which always increase, the current memory can decrease or increase. When a child script is returned, the occupied memory is freed. This makes current memory usage lower. +```c +int ckb_pipe(uint64_t fds[2]); +``` +File descriptors can be passed to a child process via the `inherited_fds` parameter of the Spawn syscall. -### Get Memory Limit -[Get Memory Limit]: #get-memory-limit +### Inherited FD +[Inherited FD]: #inherited-fd -Get the maximum available memory for the current script. +This sycall retrieves the file descriptors available to the current process, which are passed in from the parent process. These results are copied from the `inherited_fds` parameter of the Spawn syscall. ```c -int ckb_get_memory_limit(); +int ckb_inherited_file_descriptors(uint64_t* fd, size_t* count); ``` -For the script(prime script) directly invoked by CKB, it will always return 8(4M). For the child script invoked by prime script or other child script, it depends on the parameters set by *Spawn*. +When returning from the syscall, the syscall fills `fd` with the file descriptors in unit of `uint64_t` and fills in `count` with the count of corresponding file descriptors. The actual count of file descriptor written to `fd` is the minimum value between the count of `inherited_fds` in the Spawn syscall and the input value pointed to by `count` before syscall. + +By providing `NULL` as `fd` and a `size_t` pointer with a value of 0 as `count`, this syscall can be used to fetch the count of `inherited_fds` in the Spawn syscall. However, by providing `NULL` as `fd` and a non-zero value as `count`, this syscall fails. -### Current Memory -[Current Memory]: #current-memory +### Read +[Read]: #read -Get the Current Memory Usage. The result is the sum of the memory usage of the parent script and the child script. +This syscall reads data from a pipe via a file descriptor. The syscall Read attempts to read up to value pointed by length bytes from file descriptor fd into the buffer, and the actual length of data read is written back to the length parameter. The syscall may pause the execution of current process. ```c -int ckb_current_memory(); +int ckb_read(uint64_t fd, void* buffer, size_t* length); ``` -The system call returns an integer. Multiply it by 0.5 to get the actual memory usage (unit: megabytes) +### Write +[Write]: #write -### Set Content -[Set Content]: #set-content - -The child script can return bytes data to the parent script through `Set Content`. +This syscall writes data to a pipe via a file descriptor. The syscall Write writes up to value pointed by length bytes from the buffer, and the actual length of data written is written back to the length parameter. The syscall may pause the execution of current process. ```c -int ckb_set_content(uint8_t* content, uint64_t* length); +int ckb_write(uint64_t fd, const void* buffer, size_t* length); ``` -- Length up to 256K. -- If the written length is greater than the limit given by *Spawn*, the final written length is the minimum of the two. -- Returns 0 for success, otherwise for failure. -- This function is optional. Not every child script needs to call this, and nothing will happen if you call it from the prime script. +### Close +[Close]: #close -### Spawn example +This syscall manually closes a file descriptor. After calling this, any attempt to read/write the file descriptor pointed to the other end would fail. -Consider the creation of a dependency library with a straightforward function that receives parameters, concatenates them, and subsequently returns the resulting string to the caller. +```c +int ckb_close(uint64_t fd); +``` -**lib_strcat.c** +It's not always necessary to manually close file descriptors. When a process is terminated, all file descriptors owned by the process are automatically closed. -```c -#include -#include +### Wait +[Wait]: #wait -#include "ckb_syscalls.h" +The syscall pauses until the execution of a process specified by `pid` has ended. -int main(int argc, char *argv[]) { - char content[80]; - for (int i = 0; i < argc; i++) { - strcat(content, argv[i]); - } - uint64_t content_size = (uint64_t)strlen(content); - ckb_set_content(&content[0], &content_size); - if (content_size != (uint64_t)strlen(content)) { - return 1; - } - return 0; -} +```c +int ckb_wait(uint64_t pid, int8_t* exit_code); ``` -We can call this dependent library in the prime script. The prime script passes in two parameters "hello", "world" and checks if the return value is equal to "helloworld”: - -**prime.c** +Retrieve the exit code of the process through the `exit_code` parameter. -```c -#include -#include +### Process ID +[Process ID]: #process-id -#include "ckb_syscalls.h" +This syscall is used to get the current process id. -int main() { - const char *argv[] = {"hello", "world"}; - - int8_t spgs_exit_code = 255; - uint8_t spgs_content[80] = {}; - uint64_t spgs_content_length = 80; - spawn_args_t spgs = { - .memory_limit = 8, - .exit_code = &spgs_exit_code, - .content = &spgs_content[0], - .content_length = &spgs_content_length, - }; - - ckb_spawn(1, 3, 0, 2, argv, &spgs); - if (strlen(spgs_content) != 10) { - return 1; - } - if (strcmp(spgs_content, "helloworld") != 0) { - return 1; - } - if (spgs_exit_code != 0) { - return 1; - } - return 0; -} +```c +uint64_t ckb_process_id(); ``` +Root process ID is 0. + ### Load Block Extension [Load Block Extension]: #load-block-extension @@ -213,5 +178,238 @@ This syscall might return the following errors: In case of errors, `addr` and `index` will not contain meaningful data to use. +## Errors Code + +Five new error types added: + +- Error code 5: The file descriptor is invalid during syscall `Wait`. +- Error code 6: The file descriptor is not owned by this process. +- Error code 7: The other end of the pipe is closed. +- Error code 8: The maximum count of spawned processes has been reached. +- Error code 9: The maximum count of created pipes has been reached. + +## Cycles + +Two new constants for cycles consumption are introduced: + +```rust +pub const SPAWN_EXTRA_CYCLES_BASE: u64 = 100_000; +pub const SPAWN_YIELD_CYCLES_BASE: u64 = 800; +``` + +The Cycles consumption of each Syscall is as follows. Among them, the constant 500 and BYTES_TRANSFERD_CYCLES can be referred to [RFC-0014](https://github.com/nervosnetwork/rfcs/blob/master/rfcs/0014-vm-cycle-limits/0014-vm-cycle-limits.md). + +| Syscall Name | Cycles Charge | +| -------------------- | -------------------------------------------------------------------------------- | +| spawn | 500 + SPAWN_YIELD_CYCLES_BASE + BYTES_TRANSFERD_CYCLES + SPAWN_EXTRA_CYCLES_BASE | +| pipe | 500 + SPAWN_YIELD_CYCLES_BASE | +| inherited_fd | 500 + SPAWN_YIELD_CYCLES_BASE | +| read | 500 + SPAWN_YIELD_CYCLES_BASE + BYTES_TRANSFERD_CYCLES | +| write | 500 + SPAWN_YIELD_CYCLES_BASE + BYTES_TRANSFERD_CYCLES | +| close | 500 + SPAWN_YIELD_CYCLES_BASE | +| wait | 500 + SPAWN_YIELD_CYCLES_BASE | +| process_id | 500 | +| load block extension | 500 + BYTES_TRANSFERD_CYCLES | + +## Spawn Example + +Consider the creation of a dependency library with a straightforward function that receives strings, concatenates them, and subsequently returns the resulting string to the caller(a.k.a echo). + +**Caller** + +```c +#include +#include + +#include "ckb_syscalls.h" + +#define CKB_STDIN (0) +#define CKB_STDOUT (1) + +// Function read_all reads from fd until an error or EOF and returns the data it read. +int ckb_read_all(uint64_t fd, void* buffer, size_t* length) { + int err = 0; + size_t read_length = 0; + size_t full_length = *length; + uint8_t* b = buffer; + while (true) { + size_t n = full_length - read_length; + err = ckb_read(fd, b, &n); + if (err == CKB_OTHER_END_CLOSED) { + err = 0; + *length = read_length; + break; + } else { + if (err != 0) { + goto exit; + } + } + if (full_length - read_length == 0) { + err = CKB_LENGTH_NOT_ENOUGH; + if (err != 0) { + goto exit; + } + } + b += n; + read_length += n; + *length = read_length; + } + +exit: + return err; +} + +// Mimic stdio fds on linux +int create_std_fds(uint64_t* fds, uint64_t* inherited_fds) { + int err = 0; + + uint64_t to_child[2] = {0}; + uint64_t to_parent[2] = {0}; + err = ckb_pipe(to_child); + if (err != 0) { + goto exit; + } + err = ckb_pipe(to_parent); + if (err != 0) { + goto exit; + } + + inherited_fds[0] = to_child[0]; + inherited_fds[1] = to_parent[1]; + inherited_fds[2] = 0; + + fds[CKB_STDIN] = to_parent[0]; + fds[CKB_STDOUT] = to_child[1]; + +exit: + return err; +} + +int main() { + int err = 0; + + const char* argv[] = {}; + uint64_t pid = 0; + uint64_t fds[2] = {0}; + // it must be end with zero + uint64_t inherited_fds[3] = {0}; + err = create_std_fds(fds, inherited_fds); + if (err != 0) { + goto exit; + } + + spawn_args_t spgs = { + .argc = 0, + .argv = argv, + .process_id = &pid, + .inherited_fds = inherited_fds, + }; + err = ckb_spawn(0, 3, 0, 0, &spgs); + if (err != 0) { + goto exit; + } + + size_t length = 0; + length = 12; + err = ckb_write(fds[CKB_STDOUT], "Hello World!", &length); + if (err != 0) { + goto exit; + } + err = ckb_close(fds[CKB_STDOUT]); + if (err != 0) { + goto exit; + } + + uint8_t buffer[1024] = {0}; + length = 1024; + err = ckb_read_all(fds[CKB_STDIN], buffer, &length); + if (err != 0) { + goto exit; + } + err = memcmp("Hello World!", buffer, length); + if (err != 0) { + goto exit; + } + +exit: + return err; +} +``` + +**Callee** + +```c +#include +#include + +#include "ckb_syscalls.h" + +#define CKB_STDIN (0) +#define CKB_STDOUT (1) + +// Function read_all reads from fd until an error or EOF and returns the data it read. +int ckb_read_all(uint64_t fd, void* buffer, size_t* length) { + int err = 0; + size_t read_length = 0; + size_t full_length = *length; + uint8_t* b = buffer; + while (true) { + size_t n = full_length - read_length; + err = ckb_read(fd, b, &n); + if (err == CKB_OTHER_END_CLOSED) { + err = 0; + *length = read_length; + break; + } else { + if (err != 0) { + goto exit; + } + } + if (full_length - read_length == 0) { + err = CKB_LENGTH_NOT_ENOUGH; + if (err != 0) { + goto exit; + } + } + b += n; + read_length += n; + *length = read_length; + } + +exit: + return err; +} + +int main() { + int err = 0; + + uint64_t fds[2] = {0}; + uint64_t fds_len = 2; + err = ckb_inherited_file_descriptors(fds, &fds_len); + if (err != 0) { + goto exit; + } + + uint8_t buffer[1024] = {0}; + size_t length; + length = 1024; + err = ckb_read_all(fds[CKB_STDIN], buffer, &length); + if (err != 0) { + goto exit; + } + err = ckb_write(fds[CKB_STDOUT], buffer, &length); + if (err != 0) { + goto exit; + } + err = ckb_close(fds[CKB_STDOUT]); + if (err != 0) { + goto exit; + } + +exit: + return err; +} +``` + [EXEC]: ../0034-vm-syscalls-2/0034-vm-syscalls-2.md#exec [Partial Loading]: ../0009-vm-syscalls/0009-vm-syscalls.md#partial-loading