diff --git a/presentation_london/docker-compose-debezium.yml b/presentation_london/docker-compose-debezium.yml index aa0e7bee4aeca..ee93702b7eacb 100644 --- a/presentation_london/docker-compose-debezium.yml +++ b/presentation_london/docker-compose-debezium.yml @@ -1,55 +1,37 @@ -version: "2.1" - +version: '2' services: - pd: - image: pingcap/pd:latest + zookeeper: + image: quay.io/debezium/zookeeper:latest ports: - - "2379:2379" - volumes: - - ./config/pd.toml:/pd.toml - - ./logs:/logs - command: - - --client-urls=http://0.0.0.0:2379 - - --peer-urls=http://0.0.0.0:2380 - - --advertise-client-urls=http://pd:2379 - - --advertise-peer-urls=http://pd:2380 - - --initial-cluster=pd=http://pd:2380 - - --data-dir=/data/pd - - --config=/pd.toml - - --log-file=/logs/pd.log - restart: on-failure - - tikv: - image: pingcap/tikv:latest + - 2182:2181 + - 2888:2888 + - 3888:3888 + kafka: + image: quay.io/debezium/kafka:latest ports: - - "20160:20160" - volumes: - - ./config/tikv.toml:/tikv.toml - - ./logs:/logs - command: - - --addr=0.0.0.0:20160 - - --advertise-addr=tikv:20160 - - --data-dir=/data/tikv - - --pd=pd:2379 - - --config=/tikv.toml - - --log-file=/logs/tikv.log - depends_on: - - "pd" - restart: on-failure - - tidb: - image: pingcap/tidb:latest + - 9093:9092 + links: + - zookeeper + environment: + - ZOOKEEPER_CONNECT=zookeeper:2181 + mysql: + image: quay.io/debezium/example-mysql:latest ports: - - "4000:4000" - volumes: - - ./config/tidb.toml:/tidb.toml - - ./logs:/logs - command: - - --store=tikv - - --path=pd:2379 - - --config=/tidb.toml - - --log-file=/logs/tidb.log - - --advertise-address=tidb - depends_on: - - "tikv" - restart: on-failure \ No newline at end of file + - 3306:3306 + environment: + - MYSQL_ROOT_PASSWORD=debezium + - MYSQL_USER=mysqluser + - MYSQL_PASSWORD=mysqlpw + connect: + image: quay.io/debezium/connect:latest + ports: + - 8083:8083 + links: + - kafka + - mysql + environment: + - BOOTSTRAP_SERVERS=kafka:9092 + - GROUP_ID=1 + - CONFIG_STORAGE_TOPIC=my_connect_configs + - OFFSET_STORAGE_TOPIC=my_connect_offsets + - STATUS_STORAGE_TOPIC=my_connect_statuses diff --git a/presentation_london/presentation.md b/presentation_london/presentation.md index fa3a8d6686a7d..388ca47f76c0b 100644 --- a/presentation_london/presentation.md +++ b/presentation_london/presentation.md @@ -7,4 +7,272 @@ ## Demo - This is the big streaming demo -- Showing how to get data from TiDB to RW \ No newline at end of file +- Showing how to get data from TiDB to RW +- TODO: Add yaml file and commands + +## Intro to Capture Data Change (CDC) + +- Purpose of CDC + - Quickly get data from one DB to a different DB + - Derived databases: You have your OLTP DB and then a derived cache, search index, OLAP or stream processor + +![https://twitter.com/gunnarmorling/status/1123191912800845825?lang=en](./presentation_resources/cdc_purpose.png) + +## Where does CDC come from? + +- Logging in DBs + - You do a commit, before confirming the commit, the DB needs to persist + - Persisting to many different DB files may be slow + - If DB crashes it can then recover from these file + - It is faster to use an append only file i.e. a log + - Originally these files were meant for internal use only + +- TODO: Connection to Transactions? + +- CDC comes along and consumes these logs + - CDC then emits events and pushes them to e.g. Kafka + - Example CDC implementation: Debezium + +## CDC Demo + +- New, simpler demo, because changes are easier to observe here + +```sh +# start the setup +docker compose -f presentation_london/docker-compose-debezium.yml up + +# We have a mysql source, a debezium connector, and a kafka to transport the change events +docker ps +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +838f78ce665c quay.io/debezium/connect:latest "/docker-entrypoint.…" 11 seconds ago Up 10 seconds 8778/tcp, 0.0.0.0:8083->8083/tcp, 9092/tcp presentation_london-connect-1 +a0732e1a8894 quay.io/debezium/kafka:latest "/docker-entrypoint.…" 11 seconds ago Up 10 seconds 0.0.0.0:9093->9092/tcp presentation_london-kafka-1 +8e2a49cb75b0 quay.io/debezium/zookeeper:latest "/docker-entrypoint.…" 11 seconds ago Up 10 seconds 0.0.0.0:2888->2888/tcp, 0.0.0.0:3888->3888/tcp, 0.0.0.0:2182->2181/tcp presentation_london-zookeeper-1 +af3a840a46b4 quay.io/debezium/example-mysql:latest "docker-entrypoint.s…" 11 seconds ago Up 10 seconds 0.0.0.0:3306->3306/tcp, 33060/tcp presentation_london-mysql-1 +``` + +- Lets have a look at the actual the logs which we mentioned earlier + +```sh +# password is debezium +mysql -h localhost --protocol=TCP -u root -P 3306 -p + +# The current data in the system +use inventory; +show tables; +select * from customers; + +show binary logs; ++------------------+-----------+-----------+ +| Log_name | File_size | Encrypted | ++------------------+-----------+-----------+ +| mysql-bin.000001 | 180 | No | +| mysql-bin.000002 | 3050181 | No | +| mysql-bin.000003 | 157 | No | ++------------------+-----------+-----------+ + +insert into customers (first_name, last_name, email) values ("evil", "eve", "ee@gmail.com"); + +show binary logs; ++------------------+-----------+-----------+ +| Log_name | File_size | Encrypted | ++------------------+-----------+-----------+ +| mysql-bin.000001 | 180 | No | +| mysql-bin.000002 | 3050181 | No | +| mysql-bin.000003 | 488 | No | ++------------------+-----------+-----------+ +``` + +- We can see multiple binlogs + - TODO: When does the DB create a new log? +- We can observe that we are writing into binlog, because that is the one where the size increased + - File size will also increase on deletes + +```sh +delete from customers where first_name = "evil"; +show binary logs; ++------------------+-----------+-----------+ +| Log_name | File_size | Encrypted | ++------------------+-----------+-----------+ +| mysql-bin.000001 | 180 | No | +| mysql-bin.000002 | 3050181 | No | +| mysql-bin.000003 | 819 | No | ++------------------+-----------+-----------+ + +# I will also do this. We will get to that in a minute +insert into customers (first_name, last_name, email) values ("very evil", "eve", "ee@gmail.com"); +update customers set first_name= "good" where first_name = "very evil"; + +# We can also observe the binlogs as actual files +docker exec presentation_london-mysql-1 ls -alh /var/lib/mysql | grep mysql-bin +-rw-r----- 1 mysql mysql 180 May 9 14:50 mysql-bin.000001 +-rw-r----- 1 mysql mysql 3.0M May 9 14:50 mysql-bin.000002 +-rw-r----- 1 mysql mysql 1.5K May 9 15:07 mysql-bin.000003 +-rw-r----- 1 mysql mysql 57 May 9 14:50 mysql-bin.index + +# Index tells the DB which binlogs we currently have available +docker exec presentation_london-mysql-1 cat /var/lib/mysql/mysql-bin.index +./mysql-bin.000001 +./mysql-bin.000002 +./mysql-bin.000003 +``` + + +- Let's have a look at the binlog + +```sh +docker cp presentation_london-mysql-1:/var/lib/mysql/mysql-bin.000003 ~/Downloads/binlog + +# Indeed binary gibberish... +head ~/Downloads/binlog +bbin��