Skip to content

Commit

Permalink
Merge branch 'main' into yiming/refactor-create-mv-tracker
Browse files Browse the repository at this point in the history
  • Loading branch information
wenym1 authored Jul 19, 2024
2 parents 889fffd + 2a52dd3 commit a72f3d9
Show file tree
Hide file tree
Showing 121 changed files with 2,151 additions and 803 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/typo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ jobs:
uses: actions/checkout@v3

- name: Check spelling of the entire repository
uses: crate-ci/typos@v1.20.4
uses: crate-ci/typos@v1.23.2
4 changes: 4 additions & 0 deletions .typos.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,8 @@ extend-exclude = [
"**/Cargo.toml",
"**/go.mod",
"**/go.sum",
# https://github.com/risingwavelabs/risingwave/blob/0ce6228df6a4da183ae91146f2cdfff1ca9cc6a7/src/common/src/cast/mod.rs#L30
# We don't want to fix "fals" here, but may want in other places.
# Ideally, we should just ignore that line: https://github.com/crate-ci/typos/issues/316
"src/common/src/cast/mod.rs",
]
4 changes: 2 additions & 2 deletions Makefile.toml
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,7 @@ set -euo pipefail
RC_ENV_FILE="${PREFIX_CONFIG}/risedev-env"
if [ ! -f "${RC_ENV_FILE}" ]; then
echo "risedev-env file not found. Did you start cluster using $(tput setaf 4)\`./risedev d\`$(tput sgr0) or $(tput setaf 4)\`./risedev p\`$(tput sgr0)?"
echo "risedev-env file not found at ${RC_ENV_FILE}. Did you start cluster using $(tput setaf 4)\`./risedev d\`$(tput sgr0) or $(tput setaf 4)\`./risedev p\`$(tput sgr0)?"
exit 1
fi
'''
Expand Down Expand Up @@ -1138,7 +1138,7 @@ fi
private = true
category = "RiseDev - Check"
description = "Run cargo typos-cli check"
install_crate = { min_version = "1.20.4", crate_name = "typos-cli", binary = "typos", test_arg = [
install_crate = { min_version = "1.23.2", crate_name = "typos-cli", binary = "typos", test_arg = [
"--help",
], install_command = "binstall" }
script = """
Expand Down
262 changes: 262 additions & 0 deletions ci/scripts/find-regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
#!/usr/bin/env python3

import subprocess
import unittest
import os
import sys

'''
@kwannoel
This script is used to find the commit that introduced a regression in the codebase.
It uses binary search to find the regressed commit.
It works as follows:
1. Use the start (inclusive) and end (exclusive) bounds, find the middle commit.
e.g. given commit 0->1(start)->2->3->4(bad), start will be 1, end will be 4. Then the middle commit is (1+4)//2 = 2
given commit 0->1(start)->2->3(bad)->4, start will be 1, end will be 3. Then the middle commit is (1+3)//2 = 2
given commit 0->1(start)->2(bad), start will be 1, end will be 2. Then the middle commit is (1+2)//2 = 1.
given commit 0->1(start,bad), start will be 1, end will be 1. We just return the bad commit (1) immediately.
2. Run the pipeline on the middle commit.
3. If the pipeline fails, the regression is in the first half of the commits. Recurse (start, mid)
4. If the pipeline passes, the regression is in the second half of the commits. Recurse (mid+1, end)
5. If start>=end, return start as the regressed commit.
We won't run the entire pipeline, only steps specified by the CI_STEPS environment variable.
For step (2), we need to check its outcome and only run the next step, if the outcome is successful.
'''


def format_step(env):
commit = get_bisect_commit(env["GOOD_COMMIT"], env["BAD_COMMIT"])
step = f'''
cat <<- YAML | buildkite-agent pipeline upload
steps:
- label: "run-{commit}"
key: "run-{commit}"
trigger: "main-cron"
soft_fail: true
build:
branch: {env["BISECT_BRANCH"]}
commit: {commit}
env:
CI_STEPS: {env['CI_STEPS']}
- wait
- label: 'check'
command: |
GOOD_COMMIT={env['GOOD_COMMIT']} BAD_COMMIT={env['BAD_COMMIT']} BISECT_BRANCH={env['BISECT_BRANCH']} CI_STEPS=\'{env['CI_STEPS']}\' ci/scripts/find-regression.py check
YAML'''
return step


def report_step(commit):
step = f'''
cat <<- YAML | buildkite-agent pipeline upload
steps:
- label: "Regressed Commit: {commit}"
command: "echo 'Regressed Commit: {commit}'"
YAML'''
print(f"--- reporting regression commit: {commit}")
result = subprocess.run(step, shell=True)
if result.returncode != 0:
print(f"stderr: {result.stderr}")
print(f"stdout: {result.stdout}")
sys.exit(1)


# Triggers a buildkite job to run the pipeline on the given commit, with the specified tests.
def run_pipeline(env):
step = format_step(env)
print(f"--- running upload pipeline for step\n{step}")
result = subprocess.run(step, shell=True)
if result.returncode != 0:
print(f"stderr: {result.stderr}")
print(f"stdout: {result.stdout}")
sys.exit(1)


# Number of commits for [start, end)
def get_number_of_commits(start, end):
cmd = f"git rev-list --count {start}..{end}"
result = subprocess.run([cmd], shell=True, capture_output=True, text=True)
if result.returncode != 0:
print(f"stderr: {result.stderr}")
print(f"stdout: {result.stdout}")
sys.exit(1)
return int(result.stdout)


def get_bisect_commit(start, end):
number_of_commits = get_number_of_commits(start, end)
commit_offset = number_of_commits // 2
if commit_offset == 0:
return start

cmd = f"git rev-list --reverse {start}..{end} | head -n {commit_offset} | tail -n 1"
result = subprocess.run([cmd], shell=True, capture_output=True, text=True)
if result.returncode != 0:
print(f"stderr: {result.stderr}")
print(f"stdout: {result.stdout}")
sys.exit(1)
return result.stdout.strip()


def get_commit_after(branch, commit):
cmd = f"git log --reverse --ancestry-path {commit}..origin/{branch} --format=%H | head -n 1"
result = subprocess.run([cmd], shell=True, capture_output=True, text=True)

if result.returncode != 0:
print(f"stderr: {result.stderr}")
print(f"stdout: {result.stdout}")
sys.exit(1)

return result.stdout.strip()


def get_env():
env = {
"GOOD_COMMIT": os.environ['GOOD_COMMIT'],
"BAD_COMMIT": os.environ['BAD_COMMIT'],
"BISECT_BRANCH": os.environ['BISECT_BRANCH'],
"CI_STEPS": os.environ['CI_STEPS'],
}

print(f'''
GOOD_COMMIT={env["GOOD_COMMIT"]}
BAD_COMMIT={env["BAD_COMMIT"]}
BISECT_BRANCH={env["BISECT_BRANCH"]}
CI_STEPS={env["CI_STEPS"]}
''')

return env


def fetch_branch_commits(branch):
cmd = f"git fetch -q origin {branch}"
result = subprocess.run([cmd], shell=True)
if result.returncode != 0:
print(f"stderr: {result.stderr}")
print(f"stdout: {result.stdout}")
sys.exit(1)


def main():
cmd = sys.argv[1]

if cmd == "start":
print("--- start bisecting")
env = get_env()
fetch_branch_commits(env["BISECT_BRANCH"])
run_pipeline(env)
elif cmd == "check":
print("--- check pipeline outcome")
env = get_env()
fetch_branch_commits(env["BISECT_BRANCH"])
commit = get_bisect_commit(env["GOOD_COMMIT"], env["BAD_COMMIT"])
step = f"run-{commit}"
cmd = f"buildkite-agent step get outcome --step {step}"
outcome = subprocess.run(cmd, shell=True, capture_output=True, text=True)

if outcome.returncode != 0:
print(f"stderr: {outcome.stderr}")
print(f"stdout: {outcome.stdout}")
sys.exit(1)

outcome = outcome.stdout.strip()
if outcome == "soft_failed":
print(f"commit failed: {commit}")
env["BAD_COMMIT"] = commit
elif outcome == "passed":
print(f"commit passed: {commit}")
env["GOOD_COMMIT"] = get_commit_after(env["BISECT_BRANCH"], commit)
else:
print(f"invalid outcome: {outcome}")
sys.exit(1)

if env["GOOD_COMMIT"] == env["BAD_COMMIT"]:
report_step(env["GOOD_COMMIT"])
return
else:
print(f"run next iteration, start: {env['GOOD_COMMIT']}, end: {env['BAD_COMMIT']}")
run_pipeline(env)
else:
print(f"invalid cmd: {cmd}")
sys.exit(1)


# For the tests, we use RisingWave's sequence of commits, from earliest to latest:
# 617d23ddcac88ced87b96a2454c9217da0fe7915
# 72f70960226680e841a8fbdd09c79d74609f27a2
# 5c7b556ea60d136c5bccf1b1f7e313d2f9c79ef0
# 9ca415a9998a5e04e021c899fb66d93a17931d4f
class Test(unittest.TestCase):
def test_get_commit_after(self):
fetch_branch_commits("kwannoel/find-regress")
commit = get_commit_after("kwannoel/find-regress", "72f70960226680e841a8fbdd09c79d74609f27a2")
self.assertEqual(commit, "5c7b556ea60d136c5bccf1b1f7e313d2f9c79ef0")
commit2 = get_commit_after("kwannoel/find-regress", "617d23ddcac88ced87b96a2454c9217da0fe7915")
self.assertEqual(commit2, "72f70960226680e841a8fbdd09c79d74609f27a2")
commit3 = get_commit_after("kwannoel/find-regress", "5c7b556ea60d136c5bccf1b1f7e313d2f9c79ef0")
self.assertEqual(commit3, "9ca415a9998a5e04e021c899fb66d93a17931d4f")

def test_get_number_of_commits(self):
fetch_branch_commits("kwannoel/find-regress")
n = get_number_of_commits("72f70960226680e841a8fbdd09c79d74609f27a2",
"9ca415a9998a5e04e021c899fb66d93a17931d4f")
self.assertEqual(n, 2)
n2 = get_number_of_commits("617d23ddcac88ced87b96a2454c9217da0fe7915",
"9ca415a9998a5e04e021c899fb66d93a17931d4f")
self.assertEqual(n2, 3)
n3 = get_number_of_commits("72f70960226680e841a8fbdd09c79d74609f27a2",
"5c7b556ea60d136c5bccf1b1f7e313d2f9c79ef0")
self.assertEqual(n3, 1)

def test_get_bisect_commit(self):
fetch_branch_commits("kwannoel/find-regress")
commit = get_bisect_commit("72f70960226680e841a8fbdd09c79d74609f27a2",
"9ca415a9998a5e04e021c899fb66d93a17931d4f")
self.assertEqual(commit, "5c7b556ea60d136c5bccf1b1f7e313d2f9c79ef0")
commit2 = get_bisect_commit("617d23ddcac88ced87b96a2454c9217da0fe7915",
"9ca415a9998a5e04e021c899fb66d93a17931d4f")
self.assertEqual(commit2, "72f70960226680e841a8fbdd09c79d74609f27a2")
commit3 = get_bisect_commit("72f70960226680e841a8fbdd09c79d74609f27a2",
"5c7b556ea60d136c5bccf1b1f7e313d2f9c79ef0")
self.assertEqual(commit3, "72f70960226680e841a8fbdd09c79d74609f27a2")

def test_format_step(self):
fetch_branch_commits("kwannoel/find-regress")
self.maxDiff = None
env = {
"GOOD_COMMIT": "72f70960226680e841a8fbdd09c79d74609f27a2",
"BAD_COMMIT": "9ca415a9998a5e04e021c899fb66d93a17931d4f",
"BISECT_BRANCH": "kwannoel/find-regress",
"CI_STEPS": "test"
}
step = format_step(env)
self.assertEqual(
step,
'''
cat <<- YAML | buildkite-agent pipeline upload
steps:
- label: "run-5c7b556ea60d136c5bccf1b1f7e313d2f9c79ef0"
key: "run-5c7b556ea60d136c5bccf1b1f7e313d2f9c79ef0"
trigger: "main-cron"
soft_fail: true
build:
branch: kwannoel/find-regress
commit: 5c7b556ea60d136c5bccf1b1f7e313d2f9c79ef0
env:
CI_STEPS: test
- wait
- label: 'check'
command: |
GOOD_COMMIT=72f70960226680e841a8fbdd09c79d74609f27a2 BAD_COMMIT=9ca415a9998a5e04e021c899fb66d93a17931d4f BISECT_BRANCH=kwannoel/find-regress CI_STEPS='test' ci/scripts/find-regression.py check
YAML'''
)


if __name__ == "__main__":
# You can run tests by just doing ./ci/scripts/find-regression.py
if len(sys.argv) == 1:
unittest.main()
else:
main()
2 changes: 1 addition & 1 deletion ci/scripts/run-e2e-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ cluster_stop
echo "--- e2e, $mode, batch"
RUST_LOG="info,risingwave_stream=info,risingwave_batch=info,risingwave_storage=info" \
cluster_start
sqllogictest -p 4566 -d dev './e2e_test/ddl/**/*.slt' --junit "batch-ddl-${profile}"
sqllogictest -p 4566 -d dev './e2e_test/ddl/**/*.slt' --junit "batch-ddl-${profile}" --label "can-use-recover"
if [[ "$mode" != "single-node" ]]; then
sqllogictest -p 4566 -d dev './e2e_test/background_ddl/basic.slt' --junit "batch-ddl-${profile}"
fi
Expand Down
6 changes: 6 additions & 0 deletions ci/scripts/single-node-utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ export PREFIX_LOG=$RW_PREFIX/log
start_single_node() {
mkdir -p "$HOME/.risingwave/state_store"
mkdir -p "$HOME/.risingwave/meta_store"
mkdir -p .risingwave/config
cat <<EOF > .risingwave/config/risedev-env
RW_META_ADDR="http://127.0.0.1:5690"
RISEDEV_RW_FRONTEND_LISTEN_ADDRESS="127.0.0.1"
RISEDEV_RW_FRONTEND_PORT="4566"
EOF
RUST_BACKTRACE=1 "$PREFIX_BIN"/risingwave >"$1" 2>&1
}

Expand Down
12 changes: 12 additions & 0 deletions ci/workflows/main-cron-bisect.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
auto-retry: &auto-retry
automatic:
# Agent terminated because the AWS EC2 spot instance killed by AWS.
- signal_reason: agent_stop
limit: 3

steps:
- label: "find regressed step"
key: "find-regressed-step"
command: "GOOD_COMMIT=$GOOD_COMMIT BAD_COMMIT=$BAD_COMMIT BISECT_BRANCH=$BISECT_BRANCH CI_STEPS=$CI_STEPS ci/scripts/find-regression.py start"
if: build.env("CI_STEPS") != null
retry: *auto-retry
Loading

0 comments on commit a72f3d9

Please sign in to comment.