From b8bb2f7ab564a6a6a75d0c939a14b09f4869fb76 Mon Sep 17 00:00:00 2001 From: xxchan Date: Wed, 10 Jul 2024 13:31:59 +0800 Subject: [PATCH] doc(dev-guide): merge design docs into dev guide (#17640) Signed-off-by: xxchan --- .pre-commit-config.yaml | 8 +-- docs/README.md | 24 +-------- docs/dev/README.md | 4 ++ docs/dev/src/SUMMARY.md | 25 ++++++++- docs/{ => dev/src/design}/aggregation.md | 6 +-- .../src/design}/architecture-design.md | 8 +-- docs/{ => dev/src/design}/backfill.md | 16 +++--- .../src/design}/batch-local-execution-mode.md | 6 +-- docs/{ => dev/src/design}/checkpoint.md | 4 +- docs/{ => dev/src/design}/consistent-hash.md | 14 ++--- .../src/design}/data-model-and-encoding.md | 7 +-- docs/{ => dev/src/design}/data-source.md | 2 +- docs/{ => dev/src/design}/keys.md | 2 +- docs/{ => dev/src/design}/meta-service.md | 5 +- .../src/design}/multi-object-store.md | 2 +- docs/{ => dev/src/design}/mv-on-mv.md | 4 +- .../src/design/relational-table.md} | 50 +++++++++++++----- docs/{ => dev/src/design}/shared-buffer.md | 2 +- .../src/design}/state-store-overview.md | 10 ++-- .../src/design}/streaming-overview.md | 5 +- .../images/aggregation/agg-components.png | Bin .../images/aggregation/init-agg-group.png | Bin .../architecture-design/architecture.svg | 0 .../architecture-design/batch-query.svg | 0 .../architecture-design/plan-fragments.svg | 0 .../architecture-design/stream-pipeline.png | Bin .../src}/images/backfill/backfill-sides.png | Bin .../src}/images/backfill/handle-poll.png | Bin .../{ => dev/src}/images/backfill/polling.png | Bin .../images/backfill/replication-example.png | Bin .../backfill/replication-replicated.png | Bin .../images/backfill/replication-simple.png | Bin docs/{ => dev/src}/images/backfill/schema.png | Bin .../batch-local-execution-mode/example1.svg | 0 .../batch-local-execution-mode/example2.svg | 0 .../frontend-flow.svg | 0 .../src}/images/checkpoint/checkpoint.svg | 0 .../src}/images/checkpoint/shared-buffer.svg | 0 .../images/consistent-hash/actor-data.svg | 0 .../consistent-hash/actor-state-table.svg | 0 .../consistent-hash/data-distribution.svg | 0 .../consistent-hash/data-redistribution-1.svg | 0 .../consistent-hash/data-redistribution-2.svg | 0 .../consistent-hash/storage-data-layout.svg | 0 .../images/data-model-and-encoding/chunk.svg | 0 .../data-model-and-encoding/row-format.svg | 0 .../images/data-source/data-source-arch.svg | 0 docs/{ => dev/src}/images/logo-title.svg | 0 docs/{ => dev/src}/images/logo.svg | 0 .../meta-service/cluster-deployment.svg | 0 .../src}/images/meta-service/notification.svg | 0 .../src}/images/mv-on-mv/mv-on-mv-01.svg | 0 .../src}/images/mv-on-mv/mv-on-mv-02.svg | 0 .../relational-table}/relational-table-01.svg | 0 .../relational-table}/relational-table-02.svg | 0 .../relational-table}/relational-table-03.svg | 0 .../state-store-overview-01.svg | 0 .../state-store-overview-02.svg | 0 .../state-store-overview-03.svg | 0 .../state-store-overview-04.svg | 0 .../state-store-overview-05.svg | 0 .../streaming-architecture.svg | 0 .../streaming-executor-and-compute-node.svg | 0 docs/{ => dev/src}/metrics.md | 2 +- .../relational-table-schema.md | 35 ------------ docs/rustdoc/README.md | 2 +- docs/rustdoc/index.md | 4 +- docs/rustdoc/rust.css | 44 +++++++++++++-- 68 files changed, 157 insertions(+), 134 deletions(-) rename docs/{ => dev/src/design}/aggregation.md (91%) rename docs/{ => dev/src/design}/architecture-design.md (95%) rename docs/{ => dev/src/design}/backfill.md (97%) rename docs/{ => dev/src/design}/batch-local-execution-mode.md (94%) rename docs/{ => dev/src/design}/checkpoint.md (97%) rename docs/{ => dev/src/design}/consistent-hash.md (92%) rename docs/{ => dev/src/design}/data-model-and-encoding.md (95%) rename docs/{ => dev/src/design}/data-source.md (98%) rename docs/{ => dev/src/design}/keys.md (99%) rename docs/{ => dev/src/design}/meta-service.md (95%) rename docs/{ => dev/src/design}/multi-object-store.md (99%) rename docs/{ => dev/src/design}/mv-on-mv.md (96%) rename docs/{relational_table/storing-state-using-relational-table.md => dev/src/design/relational-table.md} (64%) rename docs/{ => dev/src/design}/shared-buffer.md (99%) rename docs/{ => dev/src/design}/state-store-overview.md (96%) rename docs/{ => dev/src/design}/streaming-overview.md (97%) rename docs/{ => dev/src}/images/aggregation/agg-components.png (100%) rename docs/{ => dev/src}/images/aggregation/init-agg-group.png (100%) rename docs/{ => dev/src}/images/architecture-design/architecture.svg (100%) rename docs/{ => dev/src}/images/architecture-design/batch-query.svg (100%) rename docs/{ => dev/src}/images/architecture-design/plan-fragments.svg (100%) rename docs/{ => dev/src}/images/architecture-design/stream-pipeline.png (100%) rename docs/{ => dev/src}/images/backfill/backfill-sides.png (100%) rename docs/{ => dev/src}/images/backfill/handle-poll.png (100%) rename docs/{ => dev/src}/images/backfill/polling.png (100%) rename docs/{ => dev/src}/images/backfill/replication-example.png (100%) rename docs/{ => dev/src}/images/backfill/replication-replicated.png (100%) rename docs/{ => dev/src}/images/backfill/replication-simple.png (100%) rename docs/{ => dev/src}/images/backfill/schema.png (100%) rename docs/{ => dev/src}/images/batch-local-execution-mode/example1.svg (100%) rename docs/{ => dev/src}/images/batch-local-execution-mode/example2.svg (100%) rename docs/{ => dev/src}/images/batch-local-execution-mode/frontend-flow.svg (100%) rename docs/{ => dev/src}/images/checkpoint/checkpoint.svg (100%) rename docs/{ => dev/src}/images/checkpoint/shared-buffer.svg (100%) rename docs/{ => dev/src}/images/consistent-hash/actor-data.svg (100%) rename docs/{ => dev/src}/images/consistent-hash/actor-state-table.svg (100%) rename docs/{ => dev/src}/images/consistent-hash/data-distribution.svg (100%) rename docs/{ => dev/src}/images/consistent-hash/data-redistribution-1.svg (100%) rename docs/{ => dev/src}/images/consistent-hash/data-redistribution-2.svg (100%) rename docs/{ => dev/src}/images/consistent-hash/storage-data-layout.svg (100%) rename docs/{ => dev/src}/images/data-model-and-encoding/chunk.svg (100%) rename docs/{ => dev/src}/images/data-model-and-encoding/row-format.svg (100%) rename docs/{ => dev/src}/images/data-source/data-source-arch.svg (100%) rename docs/{ => dev/src}/images/logo-title.svg (100%) rename docs/{ => dev/src}/images/logo.svg (100%) rename docs/{ => dev/src}/images/meta-service/cluster-deployment.svg (100%) rename docs/{ => dev/src}/images/meta-service/notification.svg (100%) rename docs/{ => dev/src}/images/mv-on-mv/mv-on-mv-01.svg (100%) rename docs/{ => dev/src}/images/mv-on-mv/mv-on-mv-02.svg (100%) rename docs/{images/relational-table-layer => dev/src/images/relational-table}/relational-table-01.svg (100%) rename docs/{images/relational-table-layer => dev/src/images/relational-table}/relational-table-02.svg (100%) rename docs/{images/relational-table-layer => dev/src/images/relational-table}/relational-table-03.svg (100%) rename docs/{ => dev/src}/images/state-store-overview/state-store-overview-01.svg (100%) rename docs/{ => dev/src}/images/state-store-overview/state-store-overview-02.svg (100%) rename docs/{ => dev/src}/images/state-store-overview/state-store-overview-03.svg (100%) rename docs/{ => dev/src}/images/state-store-overview/state-store-overview-04.svg (100%) rename docs/{ => dev/src}/images/state-store-overview/state-store-overview-05.svg (100%) rename docs/{ => dev/src}/images/streaming-overview/streaming-architecture.svg (100%) rename docs/{ => dev/src}/images/streaming-overview/streaming-executor-and-compute-node.svg (100%) rename docs/{ => dev/src}/metrics.md (98%) delete mode 100644 docs/relational_table/relational-table-schema.md diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ab8ba3d9d7eb9..cb54c1606356e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,6 +7,10 @@ repos: hooks: - id: end-of-file-fixer - id: trailing-whitespace +- repo: https://github.com/crate-ci/typos + rev: v1.23.1 + hooks: + - id: typos - repo: local hooks: - id: rustfmt @@ -14,10 +18,6 @@ repos: entry: rustfmt --edition 2021 language: system types: [rust] - - id: typos - name: typos - entry: typos -w - language: system - id: cargo sort name: cargo sort entry: cargo sort -g -w diff --git a/docs/README.md b/docs/README.md index e905cea7849ea..f371d9bda8f47 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,25 +2,5 @@ This directory contains RisingWave design documents that are intended to be used by contributors to understand our development process, and how we design and implement RisingWave. To learn about how to use RisingWave, check out the [RisingWave user documentation](https://www.risingwave.dev). -## Developer guide - -After you learn about the basics of RisingWave, take a look at our [developer guide](https://risingwavelabs.github.io/risingwave/) to get up to speed with the development process. - -## Table of Contents - -* [Architecture Design](./architecture-design.md) -* [An Overview of RisingWave Streaming Engine](./streaming-overview.md) -* [An Overview of RisingWave State Store](./state-store-overview.md) -* [Meta Service](./meta-service.md) -* [Create MView on Top of MView](./mv-on-mv.md) -* [Checkpoint](./checkpoint.md) -* [Design of Data Source](./data-source.md) -* [Data Model and Encoding](./data-model-and-encoding.md) -* [Design of Batch Local Execution Mode](./batch-local-execution-mode.md) -* [Consistent Hash](./consistent-hash.md) -* [Build RisingWave with Multiple Object Storage Backends](./multi-object-store.md) -* [Backfill](./backfill.md) - -## Images - -We recommend that you use [draw.io](https://app.diagrams.net/) to draw illustrations and export as SVG images, with "include a copy of my diagram" selected for further editing. +- `/dev` contains the source code for the [RisingWave Developer Guide](https://risingwavelabs.github.io/risingwave/) +- `/rustdoc` contains source code for the [crate level documentation](https://risingwavelabs.github.io/risingwave/rustdoc) diff --git a/docs/dev/README.md b/docs/dev/README.md index e19f10c08e3c3..7e47920d49400 100644 --- a/docs/dev/README.md +++ b/docs/dev/README.md @@ -28,3 +28,7 @@ including the `` marker at the place where you want the TOC. We use `mdbook-linkcheck` to validate URLs included in our documentation. `linkcheck` will be run automatically when you build with the instructions in the section above. + +## Images + +We recommend that you use [draw.io](https://app.diagrams.net/) to draw illustrations and export as SVG images, with "include a copy of my diagram" selected for further editing. diff --git a/docs/dev/src/SUMMARY.md b/docs/dev/src/SUMMARY.md index 76cf57c007c23..382cc0f80fec8 100644 --- a/docs/dev/src/SUMMARY.md +++ b/docs/dev/src/SUMMARY.md @@ -11,10 +11,11 @@ - [Testing](./tests/intro.md) - [Debugging](./debugging.md) - [Observability](./observability.md) + - [Metrics](./metrics.md) --- -# Benchmarking and Profiling +# Benchmarking and profiling - [CPU Profiling](./benchmark-and-profile/cpu-profiling.md) - [Memory (Heap) Profiling](./benchmark-and-profile/memory-profiling.md) @@ -27,6 +28,28 @@ - [Develop Connectors](./connector/intro.md) - [Continuous Integration](./ci.md) +--- + +# Design docs + + + +- [Architecture Design](./design/architecture-design.md) +- [An Overview of RisingWave Streaming Engine](./design/streaming-overview.md) +- [An Overview of RisingWave State Store](./design/state-store-overview.md) +- [Meta Service](./design/meta-service.md) +- [Create MView on Top of MView](./design/mv-on-mv.md) +- [Checkpoint](./design/checkpoint.md) +- [Design of Data Source](./design/data-source.md) +- [Data Model and Encoding](./design/data-model-and-encoding.md) +- [Design of Batch Local Execution Mode](./design/batch-local-execution-mode.md) +- [Consistent Hash](./design/consistent-hash.md) +- [Build RisingWave with Multiple Object Storage Backends](./design/multi-object-store.md) +- [Backfill](./design/backfill.md) +- [Aggregation](./design/aggregation.md) +- [Shared Buffer](./design/shared-buffer.md) +- [Relational Table](./design/relational-table.md) +- [Keys](./design/keys.md) + ## Row-based Encoding @@ -23,8 +15,6 @@ We implement a relational table layer as the bridge between executors and KV sta | join | table_id \| join_key \| pk | materialized value | | agg | table_id \| group_key | agg_value | -For the detailed schema, please check [doc](relational-table-schema.md) - ## Relational Table Layer [source code](https://github.com/risingwavelabs/risingwave/blob/4e66ca3d41435c64af26b5e0003258c4f7116822/src/storage/src/table/state_table.rs) @@ -36,13 +26,13 @@ Relational table layer consists of State Table, Mem Table and Storage Table. The State Table provides the table operations by these APIs: `get_row`, `scan`, `insert_row`, `delete_row` and `update_row`, which are the read and write interfaces for streaming executors. The Mem Table is an in-memory buffer for caching table operations during one epoch. The Storage Table is read only, and will output the partial columns upper level needs. -![Overview of Relational Table](../images/relational-table-layer/relational-table-01.svg) +![Overview of Relational Table](../images/relational-table/relational-table-01.svg) ### Write Path To write into KV state store, executors first perform operations on State Table, and these operations will be cached in Mem Table. Once a barrier flows through one executor, executor will flush the cached operations into state store. At this moment, State Table will covert these operations into kv pairs and write to state store with specific epoch. For example, an executor performs `insert(a, b, c)` and `delete(d, e, f)` through the State Table APIs, Mem Table first caches these two operations in memory. After receiving new barrier, State Table converts these two operations into KV operations by row-based format, and writes these KV operations into state store (Hummock). -![write example](../images/relational-table-layer/relational-table-03.svg) +![write example](../images/relational-table/relational-table-03.svg) ### Read Path In streaming mode, executors should be able to read the latest written data, which means uncommitted data is visible. The data in Mem Table (memory) is fresher than that in shared storage (state store). State Table provides both point-get and scan to read from state store by merging data from Mem Table and Storage Table. #### Get @@ -68,4 +58,36 @@ Get(pk = 3): [3, 3333, 3333] #### Scan Scan on relational table is implemented by `StateTableIter`, which is a merge iterator of `MemTableIter` and `StorageIter`. If a pk exists in both KV state store (shared storage) and memory (MemTable), result of `MemTableIter` is returned. For example, in the following figure, `StateTableIter` will generate `1->4->5->6` in order. -![Scan example](../images/relational-table-layer/relational-table-02.svg) +![Scan example](../images/relational-table/relational-table-02.svg) + + +## Example: HashAgg + +In this doc, we will take HashAgg with extreme state (`max`, `min`) or value state (`sum`, `count`) for example, and introduce a more detailed design for the internal table schema. + +[Code](https://github.com/risingwavelabs/risingwave/blob/7f9ad2240712aa0cfe3edffb4535d43b42f32cc5/src/frontend/src/optimizer/plan_node/logical_agg.rs#L144) + +### Table id +`table_id` is a globally unique id allocated in meta for each relational table object. Meta is responsible for traversing the Plan Tree and calculating the total number of Relational Tables needed. For example, the Hash Join Operator needs 2, one for the left table and one for the right table. The number of tables needed for Agg depends on the number of agg calls. + +### Value State (Sum, Count) +Query example: +```sql +select sum(v2), count(v3) from t group by v1 +``` + +This query will need to initiate 2 Relational Tables. The schema is `table_id/group_key`. + +### Extreme State (Max, Min) +Query example: +```sql +select max(v2), min(v3) from t group by v1 +``` + +This query will need to initiate 2 Relational Tables. If the upstream is not append-only, the schema becomes `table_id/group_key/sort_key/upstream_pk`. + +The order of `sort_key` depends on the agg call kind. For example, if it's `max()`, `sort_key` will order with `Ascending`. if it's `min()`, `sort_key` will order with `Descending`. +The `upstream_pk` is also appended to ensure the uniqueness of the key. +This design allows the streaming executor not to read all the data from the storage when the cache fails, but only a part of it. The streaming executor will try to write all streaming data to storage, because there may be `update` or `delete` operations in the stream, it's impossible to always guarantee correct results without storing all data. + +If `t` is created with append-only flag, the schema becomes `table_id/group_key`, which is the same for Value State. This is because in the append-only mode, there is no `update` or `delete` operation, so the cache will never miss. Therefore, we only need to write one value to the storage. diff --git a/docs/shared-buffer.md b/docs/dev/src/design/shared-buffer.md similarity index 99% rename from docs/shared-buffer.md rename to docs/dev/src/design/shared-buffer.md index 2b63a040b4c9a..7c7dac8f06e2d 100644 --- a/docs/shared-buffer.md +++ b/docs/dev/src/design/shared-buffer.md @@ -137,4 +137,4 @@ For all data a, b of the same type, we must ensure that: ``` in-memory representation of a < in-memory representation of b, iff memcomparable(a) < memcomparable(b) -``` \ No newline at end of file +``` diff --git a/docs/state-store-overview.md b/docs/dev/src/design/state-store-overview.md similarity index 96% rename from docs/state-store-overview.md rename to docs/dev/src/design/state-store-overview.md index 0fc64516ac52f..be8f3491550fc 100644 --- a/docs/state-store-overview.md +++ b/docs/dev/src/design/state-store-overview.md @@ -22,7 +22,7 @@ In RisingWave, all streaming executors store their data into a state store. This Reading this document requires prior knowledge of LSM-Tree-based KV storage engines, like RocksDB, LevelDB, etc. -![Overview of Architecture](images/state-store-overview/state-store-overview-01.svg) +![Overview of Architecture](../images/state-store-overview/state-store-overview-01.svg) Hummock consists of a manager service on the meta node, clients on worker nodes (including compute nodes, frontend nodes, and compactor nodes), and a shared storage to store files (SSTs). Every time a new write batch is produced, the Hummock client will upload those files to shared storage, and notify the Hummock manager of the new data. With compaction going on, new files will be added and unused files will be vacuumed. The Hummock manager will take care of the lifecycle of a file — is a file being used? can we delete a file? etc. @@ -104,7 +104,7 @@ The Hummock client will batch writes and generate SSTs to sync to the underlying After the SST is uploaded to an S3-compatible service, the Hummock client will let the Hummock manager know there's a new table. The list of all SSTs along with some metadata forms a ***version***. When the Hummock client adds new SSTs to the Hummock manager, a new version will be generated with the new set of SST files. -![Write Path](images/state-store-overview/state-store-overview-02.svg) +![Write Path](../images/state-store-overview/state-store-overview-02.svg) ### Read Path @@ -114,7 +114,7 @@ For every read operation (`scan`, `get`), we will first select SSTs that might c For `scan`, we simply select by overlapping key range. For point get, we will filter SSTs further by Bloom filter. After that, we will compose a single `MergeIterator` over all SSTs. The `MergeIterator` will return all keys in range along with their epochs. Then, we will create `UserIterator` over `MergeIterator`, and for all user keys, the user iterator will pick the first full key whose epoch <= read epoch. Therefore, users can perform a snapshot read from Hummock based on the given epoch. The snapshot should be acquired beforehand and released afterwards. -![Read Path](images/state-store-overview/state-store-overview-03.svg) +![Read Path](../images/state-store-overview/state-store-overview-03.svg) Hummock implements the following iterators: - `BlockIterator`: iterates a block of an SSTable. @@ -148,7 +148,7 @@ As mentioned in [Read Path](#read-path), reads are performed on a ***version*** The SQL frontend will get the latest epoch from the meta service. Then, it will embed the epoch number into SQL plans, so that all compute nodes will read from that epoch. In theory, both SQL frontend and compute nodes will ***pin the snapshot***, to handle the case that frontend goes down and the compute nodes are still reading from Hummock (#622). However, to simplify the process, currently we ***only pin on the frontend side***. -![Hummock Service](images/state-store-overview/state-store-overview-04.svg) +![Hummock Service](../images/state-store-overview/state-store-overview-04.svg) Hummock only guarantees that writes on one node can be immediately read from the same node. However, the worker nodes running batch queries might have a slightly outdated version when a batch query plan is received (due to the local version caching). Therefore, we have a `wait_epoch` interface to wait until the local cached version contains full data of one epoch. @@ -164,7 +164,7 @@ From the perspective of the streaming executors, when they receive a barrier, th Here we have two cases: Agg executors always persist and produce new write batches when receiving a barrier; Join executors (in the future when async flush gets implemented) will produce write batches within an epoch. -![Checkpoint in Streaming](images/state-store-overview/state-store-overview-05.svg) +![Checkpoint in Streaming](../images/state-store-overview/state-store-overview-05.svg) Streaming executors cannot control when data will be persisted — they can only write to Hummock's `shared buffer`. When a barrier flows across the system and is collected by the meta service, we can ensure that all executors have written their states of ***the previous epoch*** to the shared buffer, so we can initiate checkpoint process on all worker nodes, and upload SSTs to persistent remote storage. diff --git a/docs/streaming-overview.md b/docs/dev/src/design/streaming-overview.md similarity index 97% rename from docs/streaming-overview.md rename to docs/dev/src/design/streaming-overview.md index 2379fe2db13d3..b24eeaba51cb9 100644 --- a/docs/streaming-overview.md +++ b/docs/dev/src/design/streaming-overview.md @@ -26,7 +26,7 @@ In this document we give an overview of the RisingWave streaming engine. ## Architecture -![streaming-architecture](./images/streaming-overview/streaming-architecture.svg) +![streaming-architecture](../images/streaming-overview/streaming-architecture.svg) The overall architecture of RisingWave is depicted in the figure above. In brief, RisingWave streaming engine consists of three sets of nodes: frontend, compute nodes, and meta service. The frontend node consists of the serving layer, handling users' SQL requests concurrently. Underlying is the processing layer. Each compute node hosts a collection of long-running actors for stream processing. All actors access a shared persistence layer of storage (currently AWS S3) as its state storage. The meta service maintains all meta-information and coordinates the whole cluster. @@ -38,7 +38,7 @@ When receiving a create materialized view statement at the frontend, a materiali 4. Initializing the job at the backend. The meta service notifies all compute nodes to start serving streaming pipelines. ## Actors, executors, and states -![streaming-executor](./images/streaming-overview/streaming-executor-and-compute-node.svg) +![streaming-executor](../images/streaming-overview/streaming-executor-and-compute-node.svg) ### Actors @@ -75,4 +75,3 @@ See more detailed descriptions on [Checkpoint](./checkpoint.md). ### Fault tolerance When the streaming engine crashes down, the system must globally rollback to a previous consistent snapshot. To achieve this, whenever the meta detects the failover of some certain compute node or any undergoing checkpoint procedure, it triggers a recovery process. After rebuilding the streaming pipeline, each executor will reset its local state from a consistent snapshot on the storage and recover its computation. - diff --git a/docs/images/aggregation/agg-components.png b/docs/dev/src/images/aggregation/agg-components.png similarity index 100% rename from docs/images/aggregation/agg-components.png rename to docs/dev/src/images/aggregation/agg-components.png diff --git a/docs/images/aggregation/init-agg-group.png b/docs/dev/src/images/aggregation/init-agg-group.png similarity index 100% rename from docs/images/aggregation/init-agg-group.png rename to docs/dev/src/images/aggregation/init-agg-group.png diff --git a/docs/images/architecture-design/architecture.svg b/docs/dev/src/images/architecture-design/architecture.svg similarity index 100% rename from docs/images/architecture-design/architecture.svg rename to docs/dev/src/images/architecture-design/architecture.svg diff --git a/docs/images/architecture-design/batch-query.svg b/docs/dev/src/images/architecture-design/batch-query.svg similarity index 100% rename from docs/images/architecture-design/batch-query.svg rename to docs/dev/src/images/architecture-design/batch-query.svg diff --git a/docs/images/architecture-design/plan-fragments.svg b/docs/dev/src/images/architecture-design/plan-fragments.svg similarity index 100% rename from docs/images/architecture-design/plan-fragments.svg rename to docs/dev/src/images/architecture-design/plan-fragments.svg diff --git a/docs/images/architecture-design/stream-pipeline.png b/docs/dev/src/images/architecture-design/stream-pipeline.png similarity index 100% rename from docs/images/architecture-design/stream-pipeline.png rename to docs/dev/src/images/architecture-design/stream-pipeline.png diff --git a/docs/images/backfill/backfill-sides.png b/docs/dev/src/images/backfill/backfill-sides.png similarity index 100% rename from docs/images/backfill/backfill-sides.png rename to docs/dev/src/images/backfill/backfill-sides.png diff --git a/docs/images/backfill/handle-poll.png b/docs/dev/src/images/backfill/handle-poll.png similarity index 100% rename from docs/images/backfill/handle-poll.png rename to docs/dev/src/images/backfill/handle-poll.png diff --git a/docs/images/backfill/polling.png b/docs/dev/src/images/backfill/polling.png similarity index 100% rename from docs/images/backfill/polling.png rename to docs/dev/src/images/backfill/polling.png diff --git a/docs/images/backfill/replication-example.png b/docs/dev/src/images/backfill/replication-example.png similarity index 100% rename from docs/images/backfill/replication-example.png rename to docs/dev/src/images/backfill/replication-example.png diff --git a/docs/images/backfill/replication-replicated.png b/docs/dev/src/images/backfill/replication-replicated.png similarity index 100% rename from docs/images/backfill/replication-replicated.png rename to docs/dev/src/images/backfill/replication-replicated.png diff --git a/docs/images/backfill/replication-simple.png b/docs/dev/src/images/backfill/replication-simple.png similarity index 100% rename from docs/images/backfill/replication-simple.png rename to docs/dev/src/images/backfill/replication-simple.png diff --git a/docs/images/backfill/schema.png b/docs/dev/src/images/backfill/schema.png similarity index 100% rename from docs/images/backfill/schema.png rename to docs/dev/src/images/backfill/schema.png diff --git a/docs/images/batch-local-execution-mode/example1.svg b/docs/dev/src/images/batch-local-execution-mode/example1.svg similarity index 100% rename from docs/images/batch-local-execution-mode/example1.svg rename to docs/dev/src/images/batch-local-execution-mode/example1.svg diff --git a/docs/images/batch-local-execution-mode/example2.svg b/docs/dev/src/images/batch-local-execution-mode/example2.svg similarity index 100% rename from docs/images/batch-local-execution-mode/example2.svg rename to docs/dev/src/images/batch-local-execution-mode/example2.svg diff --git a/docs/images/batch-local-execution-mode/frontend-flow.svg b/docs/dev/src/images/batch-local-execution-mode/frontend-flow.svg similarity index 100% rename from docs/images/batch-local-execution-mode/frontend-flow.svg rename to docs/dev/src/images/batch-local-execution-mode/frontend-flow.svg diff --git a/docs/images/checkpoint/checkpoint.svg b/docs/dev/src/images/checkpoint/checkpoint.svg similarity index 100% rename from docs/images/checkpoint/checkpoint.svg rename to docs/dev/src/images/checkpoint/checkpoint.svg diff --git a/docs/images/checkpoint/shared-buffer.svg b/docs/dev/src/images/checkpoint/shared-buffer.svg similarity index 100% rename from docs/images/checkpoint/shared-buffer.svg rename to docs/dev/src/images/checkpoint/shared-buffer.svg diff --git a/docs/images/consistent-hash/actor-data.svg b/docs/dev/src/images/consistent-hash/actor-data.svg similarity index 100% rename from docs/images/consistent-hash/actor-data.svg rename to docs/dev/src/images/consistent-hash/actor-data.svg diff --git a/docs/images/consistent-hash/actor-state-table.svg b/docs/dev/src/images/consistent-hash/actor-state-table.svg similarity index 100% rename from docs/images/consistent-hash/actor-state-table.svg rename to docs/dev/src/images/consistent-hash/actor-state-table.svg diff --git a/docs/images/consistent-hash/data-distribution.svg b/docs/dev/src/images/consistent-hash/data-distribution.svg similarity index 100% rename from docs/images/consistent-hash/data-distribution.svg rename to docs/dev/src/images/consistent-hash/data-distribution.svg diff --git a/docs/images/consistent-hash/data-redistribution-1.svg b/docs/dev/src/images/consistent-hash/data-redistribution-1.svg similarity index 100% rename from docs/images/consistent-hash/data-redistribution-1.svg rename to docs/dev/src/images/consistent-hash/data-redistribution-1.svg diff --git a/docs/images/consistent-hash/data-redistribution-2.svg b/docs/dev/src/images/consistent-hash/data-redistribution-2.svg similarity index 100% rename from docs/images/consistent-hash/data-redistribution-2.svg rename to docs/dev/src/images/consistent-hash/data-redistribution-2.svg diff --git a/docs/images/consistent-hash/storage-data-layout.svg b/docs/dev/src/images/consistent-hash/storage-data-layout.svg similarity index 100% rename from docs/images/consistent-hash/storage-data-layout.svg rename to docs/dev/src/images/consistent-hash/storage-data-layout.svg diff --git a/docs/images/data-model-and-encoding/chunk.svg b/docs/dev/src/images/data-model-and-encoding/chunk.svg similarity index 100% rename from docs/images/data-model-and-encoding/chunk.svg rename to docs/dev/src/images/data-model-and-encoding/chunk.svg diff --git a/docs/images/data-model-and-encoding/row-format.svg b/docs/dev/src/images/data-model-and-encoding/row-format.svg similarity index 100% rename from docs/images/data-model-and-encoding/row-format.svg rename to docs/dev/src/images/data-model-and-encoding/row-format.svg diff --git a/docs/images/data-source/data-source-arch.svg b/docs/dev/src/images/data-source/data-source-arch.svg similarity index 100% rename from docs/images/data-source/data-source-arch.svg rename to docs/dev/src/images/data-source/data-source-arch.svg diff --git a/docs/images/logo-title.svg b/docs/dev/src/images/logo-title.svg similarity index 100% rename from docs/images/logo-title.svg rename to docs/dev/src/images/logo-title.svg diff --git a/docs/images/logo.svg b/docs/dev/src/images/logo.svg similarity index 100% rename from docs/images/logo.svg rename to docs/dev/src/images/logo.svg diff --git a/docs/images/meta-service/cluster-deployment.svg b/docs/dev/src/images/meta-service/cluster-deployment.svg similarity index 100% rename from docs/images/meta-service/cluster-deployment.svg rename to docs/dev/src/images/meta-service/cluster-deployment.svg diff --git a/docs/images/meta-service/notification.svg b/docs/dev/src/images/meta-service/notification.svg similarity index 100% rename from docs/images/meta-service/notification.svg rename to docs/dev/src/images/meta-service/notification.svg diff --git a/docs/images/mv-on-mv/mv-on-mv-01.svg b/docs/dev/src/images/mv-on-mv/mv-on-mv-01.svg similarity index 100% rename from docs/images/mv-on-mv/mv-on-mv-01.svg rename to docs/dev/src/images/mv-on-mv/mv-on-mv-01.svg diff --git a/docs/images/mv-on-mv/mv-on-mv-02.svg b/docs/dev/src/images/mv-on-mv/mv-on-mv-02.svg similarity index 100% rename from docs/images/mv-on-mv/mv-on-mv-02.svg rename to docs/dev/src/images/mv-on-mv/mv-on-mv-02.svg diff --git a/docs/images/relational-table-layer/relational-table-01.svg b/docs/dev/src/images/relational-table/relational-table-01.svg similarity index 100% rename from docs/images/relational-table-layer/relational-table-01.svg rename to docs/dev/src/images/relational-table/relational-table-01.svg diff --git a/docs/images/relational-table-layer/relational-table-02.svg b/docs/dev/src/images/relational-table/relational-table-02.svg similarity index 100% rename from docs/images/relational-table-layer/relational-table-02.svg rename to docs/dev/src/images/relational-table/relational-table-02.svg diff --git a/docs/images/relational-table-layer/relational-table-03.svg b/docs/dev/src/images/relational-table/relational-table-03.svg similarity index 100% rename from docs/images/relational-table-layer/relational-table-03.svg rename to docs/dev/src/images/relational-table/relational-table-03.svg diff --git a/docs/images/state-store-overview/state-store-overview-01.svg b/docs/dev/src/images/state-store-overview/state-store-overview-01.svg similarity index 100% rename from docs/images/state-store-overview/state-store-overview-01.svg rename to docs/dev/src/images/state-store-overview/state-store-overview-01.svg diff --git a/docs/images/state-store-overview/state-store-overview-02.svg b/docs/dev/src/images/state-store-overview/state-store-overview-02.svg similarity index 100% rename from docs/images/state-store-overview/state-store-overview-02.svg rename to docs/dev/src/images/state-store-overview/state-store-overview-02.svg diff --git a/docs/images/state-store-overview/state-store-overview-03.svg b/docs/dev/src/images/state-store-overview/state-store-overview-03.svg similarity index 100% rename from docs/images/state-store-overview/state-store-overview-03.svg rename to docs/dev/src/images/state-store-overview/state-store-overview-03.svg diff --git a/docs/images/state-store-overview/state-store-overview-04.svg b/docs/dev/src/images/state-store-overview/state-store-overview-04.svg similarity index 100% rename from docs/images/state-store-overview/state-store-overview-04.svg rename to docs/dev/src/images/state-store-overview/state-store-overview-04.svg diff --git a/docs/images/state-store-overview/state-store-overview-05.svg b/docs/dev/src/images/state-store-overview/state-store-overview-05.svg similarity index 100% rename from docs/images/state-store-overview/state-store-overview-05.svg rename to docs/dev/src/images/state-store-overview/state-store-overview-05.svg diff --git a/docs/images/streaming-overview/streaming-architecture.svg b/docs/dev/src/images/streaming-overview/streaming-architecture.svg similarity index 100% rename from docs/images/streaming-overview/streaming-architecture.svg rename to docs/dev/src/images/streaming-overview/streaming-architecture.svg diff --git a/docs/images/streaming-overview/streaming-executor-and-compute-node.svg b/docs/dev/src/images/streaming-overview/streaming-executor-and-compute-node.svg similarity index 100% rename from docs/images/streaming-overview/streaming-executor-and-compute-node.svg rename to docs/dev/src/images/streaming-overview/streaming-executor-and-compute-node.svg diff --git a/docs/metrics.md b/docs/dev/src/metrics.md similarity index 98% rename from docs/metrics.md rename to docs/dev/src/metrics.md index b0216c07fc83e..14d98c7a365ea 100644 --- a/docs/metrics.md +++ b/docs/dev/src/metrics.md @@ -5,7 +5,7 @@ It covers what each metric measures, and what information we may derive from it. ## Barrier Latency -Prerequisite: [Checkpoint](./checkpoint.md) +Prerequisite: [Checkpoint](./design/checkpoint.md) This metric measures the duration from which a barrier is injected into **all** sources in the stream graph, to the barrier flown through all executors in the graph. diff --git a/docs/relational_table/relational-table-schema.md b/docs/relational_table/relational-table-schema.md deleted file mode 100644 index 64cd615feda25..0000000000000 --- a/docs/relational_table/relational-table-schema.md +++ /dev/null @@ -1,35 +0,0 @@ -# Relational Table Schema - -We introduce the rough row-based encoding format in [relational states](storing-state-using-relational-table.md#row-based-encoding) - -In this doc, we will take HashAgg with extreme state (`max`, `min`) or value state (`sum`, `count`) for example, and introduce a more detailed design for the internal table schema. - -[Code](https://github.com/risingwavelabs/risingwave/blob/7f9ad2240712aa0cfe3edffb4535d43b42f32cc5/src/frontend/src/optimizer/plan_node/logical_agg.rs#L144) - -## Table id -`table_id` is a globally unique id allocated in meta for each relational table object. Meta is responsible for traversing the Plan Tree and calculating the total number of Relational Tables needed. For example, the Hash Join Operator needs 2, one for the left table and one for the right table. The number of tables needed for Agg depends on the number of agg calls. - -## Value State (Sum, Count) -Query example: -```sql -select sum(v2), count(v3) from t group by v1 -``` - -This query will need to initiate 2 Relational Tables. The schema is `table_id/group_key`. - -## Extreme State (Max, Min) -Query example: -```sql -select max(v2), min(v3) from t group by v1 -``` - -This query will need to initiate 2 Relational Tables. If the upstream is not append-only, the schema becomes `table_id/group_key/sort_key/upstream_pk`. - -The order of `sort_key` depends on the agg call kind. For example, if it's `max()`, `sort_key` will order with `Ascending`. if it's `min()`, `sort_key` will order with `Descending`. -The `upstream_pk` is also appended to ensure the uniqueness of the key. -This design allows the streaming executor not to read all the data from the storage when the cache fails, but only a part of it. The streaming executor will try to write all streaming data to storage, because there may be `update` or `delete` operations in the stream, it's impossible to always guarantee correct results without storing all data. - -If `t` is created with append-only flag, the schema becomes `table_id/group_key`, which is the same for Value State. This is because in the append-only mode, there is no `update` or `delete` operation, so the cache will never miss. Therefore, we only need to write one value to the storage. - - - diff --git a/docs/rustdoc/README.md b/docs/rustdoc/README.md index 1b3e70e1113c2..0adf956748290 100644 --- a/docs/rustdoc/README.md +++ b/docs/rustdoc/README.md @@ -1,6 +1,6 @@ This folder contains files for generating a nice rustdoc index page. -Online version (for latest main): +Online version (for latest main): To build and open locally, run the following command in the project root: diff --git a/docs/rustdoc/index.md b/docs/rustdoc/index.md index cfb74b8055b8a..a76edb23cb2b4 100644 --- a/docs/rustdoc/index.md +++ b/docs/rustdoc/index.md @@ -4,9 +4,7 @@ Welcome to an overview of the developer documentations of RisingWave! ## Developer Docs -To learn how to develop RisingWave, see the [RisingWave Developer Guide](https://risingwavelabs.github.io/risingwave/). - -The [design docs](https://github.com/risingwavelabs/risingwave/blob/main/docs/README.md) covers some high-level ideas of how we built RisingWave. +To learn how to develop RisingWave, and access high-level design docs, see the [RisingWave Developer Guide](https://risingwavelabs.github.io/risingwave/). ## Crate Docs diff --git a/docs/rustdoc/rust.css b/docs/rustdoc/rust.css index 71cf5e3df0004..9c76bb08c3898 100644 --- a/docs/rustdoc/rust.css +++ b/docs/rustdoc/rust.css @@ -1,18 +1,21 @@ /* This file is copied from the Rust Project, which is dual-licensed under -Apache 2.0 and MIT terms. */ +Apache 2.0 and MIT terms. https: //github.com/rust-lang/rust/blob/7d640b670e521a0491ea1e49082d1cb5632e2562/src/doc/rust.css +*/ /* General structure */ body { + font-family: serif; margin: 0 auto; padding: 0 15px; font-size: 18px; - color: #333; + color: #000; line-height: 1.428571429; -webkit-box-sizing: unset; -moz-box-sizing: unset; box-sizing: unset; + background: #fff; } @media (min-width: 768px) { body { @@ -20,6 +23,14 @@ body { } } +h1, +h2, +h3, +h4, +h5, +h6 { + font-family: sans-serif; +} h2, h3, h4, h5, h6 { font-weight: 400; line-height: 1.1; @@ -37,8 +48,8 @@ h4, h5, h6 { margin-bottom: 10px; padding: 5px 10px; } -h5, h6 { - color: black; +h5, +h6 { text-decoration: underline; } @@ -135,6 +146,31 @@ h1 a:link, h1 a:visited, h2 a:link, h2 a:visited, h3 a:link, h3 a:visited, h4 a:link, h4 a:visited, h5 a:link, h5 a:visited {color: black;} +h1, +h2, +h3, +h4, +h5 { + /* This is needed to be able to position the doc-anchor. Ideally there + would be a
around the whole document, but we don't have that. */ + position: relative; +} + +a.doc-anchor { + color: black; + display: none; + position: absolute; + left: -20px; + /* We add this padding so that when the cursor moves from the heading's text to the anchor, + the anchor doesn't disappear. */ + padding-right: 5px; + /* And this padding is used to make the anchor larger and easier to click on. */ + padding-left: 3px; +} + +*:hover>.doc-anchor { + display: block; +} /* Code */ pre, code {