From 08f80acbee878729fb9859a9276f61572c413aad Mon Sep 17 00:00:00 2001
From: robalb <11277482+robalb@users.noreply.github.com>
Date: Wed, 24 Jul 2024 12:52:23 +0200
Subject: [PATCH] created cyberchallenge demo version
---
.../src/components/SharedLayout.astro | 5 +-
.../src/layouts/BlogPostNoHeader.astro | 18 ++
.../pages/cyberchallenge/x64-introduction.mdx | 217 ++++++++++++++++
.../pages/cyberchallenge/x64-moving-data.mdx | 238 ++++++++++++++++++
4 files changed, 476 insertions(+), 2 deletions(-)
create mode 100644 astro-website/src/layouts/BlogPostNoHeader.astro
create mode 100644 astro-website/src/pages/cyberchallenge/x64-introduction.mdx
create mode 100644 astro-website/src/pages/cyberchallenge/x64-moving-data.mdx
diff --git a/astro-website/src/components/SharedLayout.astro b/astro-website/src/components/SharedLayout.astro
index e6af403..3f96d9f 100644
--- a/astro-website/src/components/SharedLayout.astro
+++ b/astro-website/src/components/SharedLayout.astro
@@ -13,8 +13,9 @@ export interface Props {
lang?: string;
isBlog?: boolean;
isLarge?: boolean;
+ showHeader?: boolean;
}
-const { title, description, permalink, activePage, lang="en", isBlog=false, isLarge=false } = Astro.props;
+const { title, description, permalink, activePage, lang="en", isBlog=false, isLarge=false, showHeader=true} = Astro.props;
//theme logic is defined in BlogHeader, with the switch js handler
---
@@ -34,7 +35,7 @@ const { title, description, permalink, activePage, lang="en", isBlog=false, isLa
else
document.body.classList.remove("light")
-
+ {showHeader && }
diff --git a/astro-website/src/layouts/BlogPostNoHeader.astro b/astro-website/src/layouts/BlogPostNoHeader.astro
new file mode 100644
index 0000000..ed657aa
--- /dev/null
+++ b/astro-website/src/layouts/BlogPostNoHeader.astro
@@ -0,0 +1,18 @@
+---
+import SharedLayout from '../components/SharedLayout.astro';
+import BlogPost from '../components/BlogPost.astro';
+
+const { content } = Astro.props;
+const {
+ title, description, permalink,
+ subtitle, lang="en", publishDate, author, tags, heroImage, alt
+ } = content;
+const isBlog = true
+const showHeader = false
+---
+
+
+
+
+
+
diff --git a/astro-website/src/pages/cyberchallenge/x64-introduction.mdx b/astro-website/src/pages/cyberchallenge/x64-introduction.mdx
new file mode 100644
index 0000000..632cd75
--- /dev/null
+++ b/astro-website/src/pages/cyberchallenge/x64-introduction.mdx
@@ -0,0 +1,217 @@
+---
+layout: '../../layouts/BlogPostNoHeader.astro'
+title: An interactive guide to x86-64 assembly - introduction
+publishDate: 2024-02-18
+description:
+tags: ['x86-64', 'pwn']
+permalink: https://halb.it/cyberchallenge/x64-introduction/
+---
+import Spoiler from '../../components/Spoiler.astro'
+import SliderHexdump from '../../components/SliderHexdump.svelte'
+import RegistersTable from '../../components/RegistersTable.svelte'
+import PtrSyntaxEmbed from '../../components/PtrSyntaxEmbed.svelte'
+import StackEmbed from '../../components/StackEmbed.svelte'
+import EndiannessEmbed from '../../components/EndiannessEmbed.svelte'
+
+It's often said that assembly language is complex. Most people are scared of it, everyone avoids it.
+ After all, there's a reason why high-level languages and compilers were invented, right?
+But while it's true that you would have a hard time writing a large project in assembly,
+the language itself is surprisingly simple.
+That's because Assembly is the native language of the processor,
+and at it's essence, all the processor does is moving data.
+
+This guide is not about writing assembly; it's about understanding the way data moves
+behind the scenes when you execute a program. We'll use concrete examples for the
+x86-64 architecture, but these informations apply eveywhere and are foundamental knowledge
+for reverse engineering, binary exploitation, or just writing better code.
+
+This is the first part of a series of interactive articles:
+
+- [introduction](/cyberchallenge/x64-introduction/) (you are here)
+- [moving data](/cyberchallenge/x64-moving-data/)
+- stack frames
+
+### what is data?
+
+Data is just bits, representing information.
+A sequence of bits can encode any kind of information, however this article will only focus
+on text and integers.
+
+But before we talk about any kind of encoding, we have to introduce a new notation:
+The issue is that while circuits understand sequences of bits very well, humans don't.
+For example, can you tell the difference between
+`1101010101111110` and `1101010101111110` ?
+
+
+Ok, the two sequences are identical, but I bet you couldn't immediately see that.
+
+
+In order to visualize binary data in a more human friendly way, we use
+hexadecimal numbers, which associate a number or a letter
+between A and F to a group of 4 bits.
+A long sequence of bits can be represented in this way:
+
+```
+0010 0101 0111 1101 1111
+
+ 2 5 7 d f
+```
+
+Note that in order to avoid confusion with decimal numbers, it's common to prefix
+hexadecimal numbers with `0x`.
+For example, `0x1234` is not the same
+thing as the decimal number `1234`.
+I'm not going to explain how conversions between decimal, binary, and hexadecimal numbers work,
+The only assumption i'm making in this article is that you know that.
+If you have a python terminal, you can perform these conversions very easily:
+
+```python
+al@thinkpad:~/$ python
+>>>
+>>> 0b0010 #print the binary number 0010 in decimal
+2
+>>> 0x1234 #print the hex number 1234 in decimal
+4660
+>>> hex(0b00100101011111011111) #print a binary number in hex
+'0x257df'
+>>> hex(4660) #print a decimal number in hex
+'0x1234'
+```
+
+One more thing: we call a group of 8 bits a `byte`, but that's not the only
+group of bits with a name. The following table
+contains all the names that you will encounter while working with the x86-64 architecture:
+
+| N. of bits | example hex value | name |
+| ------------ | ----------------- | ---------------------- |
+| 4 | f | nibble |
+| 8 | ff | byte |
+| 16 | ffff | word |
+| 32 | fffffff | dword (double word) |
+| 64 | fffffffffffff | qword (quadruple word) |
+
+
+
+### text
+
+There are a lot of different ways to encode text, and I recommend that you
+read the [bare minimum foundamentals](https://www.joelonsoftware.com/2003/10/08/the-absolute-minimum-every-software-developer-absolutely-positively-must-know-about-unicode-and-character-sets-no-excuses/)
+, it's a very interesting topic in itself.
+In this article however we'll only focus on ASCII encoding, which is extremely simple:
+
+All you need to know is that text is stored as a seqence of bytes. every byte represents a character,
+so there are `127` possible characters between numbers, english letters and puctuation.
+You can find a table of all the ascii characters in the
+[linux man pages](https://man.archlinux.org/man/core/man-pages/ascii.7.en).
+
+For example, the letter 'c' is stored as the byte `0x63`,
+The letter 'o' is `0x6f`,
+The text `ciao` is stored as the sequence of bytes `63 69 61 6f`.
+
+
+### where is data?
+
+Now that we know how to represent text and numbers, we need some place
+to store them.
+Like all kind of data, we can store it in only two places:
+
+- in memory, which means in your RAM
+- in registers, which are special containers inside your CPU
+
+### memory
+
+Memory is just a very long list of
+contiguous cells, each containing 8 bits of information, and reachable by a numeric address.
+
+Since printing a long list of bytes would take a lot of space,
+when visualizing memory we usually group bytes in rows of 8 or 16.
+It's also common to include a column to the side that shows the ascii letter associated to each byte.
+
+The memory dump below was taken from a program that was running on my computer.
+Use the slider to adjust the number of bytes you wanto to show in a row.
+
+
+
+### registers
+
+Registers are containers for data, located inside your CPU.
+The x86-64 architecture has
+[a lot of registers](https://en.wikipedia.org/wiki/X86#/media/File:Table_of_x86_Registers_svg.svg),
+each with an associated name.
+Some of them have a specific purpose, other are generic containers we can use in our program.
+We mostly interact with these:
+
+
+
+
+
+In order to understand these tables, we'll look at the register `rax`, displayed in the first row.
+`rax` is a generic register that contains 8 bytes of data: from byte 0 to byte 7
+as indicated by the byte numbers at the top of the table.
+
+The register `eax` gives you access to the
+lower 4 bytes of `rax`; reading or writing into `eax` is the same as reading or writing
+the bytes from 0 to 3 of `rax`.
+Similarly, `ax` gives you access to the lower 2 bytes, and `al` to the lowest byte.
+
+### Finally, some code
+
+We are assuming that you are familiar with some programming language, it doesn't matter which one.
+Assembly code syntax is similar to the programming language concepts you know:
+a sequence of instructions, usually one on every line, that will be executed in order.
+
+The x86-64 assembly syntax has two different dialects: AT&T and Intel.
+All the code snippets in this series of articles are using the Intel syntax.
+The following snippet is an example of how the syntax looks like, don't worry about what it does for now.
+
+```yaml
+# this is a comment
+push rbp
+mov rbp, rsp
+mov DWORD PTR [rbp-4], edi
+mov eax, DWORD PTR [rbp-4]
+add eax, 0x42
+pop rbp
+ret
+```
+
+A good way to familiarize yourself with the syntax is to look at the assembly
+generated from small snippets of code.
+The
+[compiler explorer website](https://godbolt.org/z/7qGb91oo8)
+is designed exactly for this use case: You can type snippets of code
+in any compiled language you know, and observe the generated assembly.
+If you hover the mouse over an assembly instruction you can even see
+a description of what it does.
+
+In the [next article](/cyberchallenge/x64-moving-data/)
+we are going to see in details how each of the
+instruction in the previous example works
+
+### Further Reading
+
+This article is still under development, and it's improving over time.
+If you reached this point, you might be interested in the next articles:
+
+- [introduction](/cyberchallenge/x64-introduction/) (you are here)
+- [moving data](/cyberchallenge/x64-moving-data/)
+- stack frames
+
+Additional resources:
+
+- pwn.college's assembly module and lectures https://pwn.college/fundamentals/assembly-crash-course
+- the compiler explorer website https://godbolt.org/z/c6brc1df9
+- [the official x86_64 reference](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html)
+- unofficial x86_64 instructions reference https://www.felixcloutier.com/x86/
+- The best linux syscall table reference https://syscalls.mebeim.net/?table=x86/64/x64/latest
diff --git a/astro-website/src/pages/cyberchallenge/x64-moving-data.mdx b/astro-website/src/pages/cyberchallenge/x64-moving-data.mdx
new file mode 100644
index 0000000..c401fb5
--- /dev/null
+++ b/astro-website/src/pages/cyberchallenge/x64-moving-data.mdx
@@ -0,0 +1,238 @@
+---
+layout: '../../layouts/BlogPostNoHeader.astro'
+title: An interactive guide to x86-64 assembly - moving data
+publishDate: 2024-07-22
+description:
+tags: ['x86-64', 'pwn']
+permalink: https://halb.it/cyberchallenge/x64-moving-data/
+---
+import Spoiler from '../../components/Spoiler.astro'
+import SliderHexdump from '../../components/SliderHexdump.svelte'
+import SliderAlign from '../../components/SliderAlign.svelte'
+import RegistersTable from '../../components/RegistersTable.svelte'
+import PtrSyntaxEmbed from '../../components/PtrSyntaxEmbed.svelte'
+import StackEmbed from '../../components/StackEmbed.svelte'
+import EndiannessEmbed from '../../components/EndiannessEmbed.svelte'
+import Picture from '../../components/Picture.astro'
+
+This is the second part of a series of interactive articles on the x86-64 architecture.
+This part will focus on the first assembly instructions, visualizing the way data moves in memory
+when they are executed.
+
+- [introduction](/cyberchallenge/x64-introduction/)
+- [moving data](/cyberchallenge/x64-moving-data/) (you are here)
+- stack frames
+
+### Visualizing memory
+
+In the [previous post](/cyberchallenge/x64-introduction)
+we introduced some basics on data, encodings, and the places
+where data is stored: registers and memory.
+We also introduced a common way to visualize memory, that will be used extensively
+in this article: hex dumps.
+
+The example below shows a hexdump of some example data taken from the stack frame of a process.
+Use the slider to adjust the number of bytes you want to see in a single row.
+
+
+
+ The reason I want you to familiarize with this visualization is also the rationale behind
+ this series of articles:
+ Most resources online explain low-level topics (such as
+ stack frames, data alignment, or buffer overflows) using
+ [abstract diagrams](https://stackoverflow.com/a/40106523/9169799).
+ But when you will approach these topics in practice, you will use
+ tools like gdb, that visualize data in a completely different way compared to the diagrams.
+ For example, this is a screenshot of my setup when
+ [running gdb with GEF and the python pwntools library](/posts/pwntools-gdb):
+
+
+
+ All the visualizations in this article emulate the way data is visualized in real-world
+ scenarios, with tools like gdb or [PWNDBG](https://github.com/pwndbg/pwndbg/),
+ popular in CTF competitions.
+ My hope is that this will lower the steep learning curve of those tools.
+
+
+### Moving data
+
+The first instruction we are going to see is `mov`, which moves data around. It can move data from a register to another,
+from a register to memory, or vice-versa from memory to a register
+
+ These first examples are self-explanatory:
+
+```python
+mov rbx, 0x10 #copies the integer 0x10 into rbx
+mov rax, rbx #copies the content of rbx into rax
+```
+
+Moving data to memory requires some extra syntax:
+The following snippet writes the byte `0xff` in the memory cell at address `0x10`.
+
+```c
+mov rax, 0x10
+mov byte ptr [rax], 0xff
+```
+
+Let's break it down:
+- First, we put in a register `0x10`, the address of the cell we want to write to.
+- Then we perform a mov instruction with square brackets around the register name, to indicate that we want to move `0xff` in
+the memory address pointed by the register, and not into the register itself.
+
+Notice how in that example we moved a single byte, and we used the syntax `byte ptr`.
+You can change that in `word`, `dword` or `qword` if you want to move a different amount of bytes.
+
+The interactive example below allows you to experiment with all possible variations of the pointer syntax.
+You can click "run" to see how the memory is affected
+
+
+
+
+### A sidenote on endianness
+
+We managed to reach this point by ignoring an important fact: x86-64 is a little endian architecture,
+which means that numbers are not stored in the way you would expect.
+In the previous example, you saw what the number `0x4242424242424242` looks like in memory,
+but we choose that number carefully to hide the issue. In the next example, you can enter the number you want.
+Can you spot what's happening?
+
+
+
+In case you missed it, numbers are being saved with their bytes in an inverted order:
+For example, the number `0xcafe` is composed of the byte `ca` followed by `fe`,
+but it will be saved as the byte `fe` followed by the byte `ca`.
+
+What's going on here is that
+both humans and computers use a positional number system to represent integers,
+but with a different order.
+When we (humans using Hindu-Arabic numerals) represent numbers,
+we write the most significant value first, and continue in descending order.
+This is the same as Big endian architectures.
+
+```python
+ human-readable decimal number
+ 1337
+ | |
+ | Least significant digit
+ Most significant digit
+
+ human-readable hex number
+ 0xcafebabe
+ | |
+ | Least significant byte
+ Most significant byte
+```
+Little endian architectures write the least significant value first instead, and continue in ascending order.
+
+This topic is explained in depth on [wikipedia](https://en.wikipedia.org/wiki/Endianness), with some
+useful diagrams that will solve any doubts you might have.
+Endianness is only related to the way the processor handles integers.
+Other kinds of data, such as text, are usually encoded in the same order as you would expect.
+Floating point numbers are stored in a completely different
+format instead, you can read more about them
+[in this great article](https://fabiensanglard.net/floating_point_visually_explained/)
+, or in
+[this visual guide by Ciechanowski](https://ciechanow.ski/exposing-floating-point/)
+
+### The stack
+
+x64, like most architectures, has the concept of stack: an area in memory pointed
+by the special register `rsp`.
+You can add or remove elements from the top of the stack by using the
+`push` and `pop` instructions. This is the most common interaction, but it's also valid to directly adjust the value of `rsp`.
+In this interactive example
+the stack area is highlighted in blue, together with the value of the `rsp` and `rax` registers.
+
+
+
+There are two key elements you should notice by plaing with the example above:
+- `rsp` points to the top of the stack. It is decreased by 8 when we push a value, and increased by 8 when we pop a value.
+- Every time we pop a value from the stack that value is not deleted, the area of memory that contains it
+simply stops being part of the stack. The only thing that changes is the memory address pointed by `rsp`.
+
+Basically, `push rax` does the same as the following code:
+```c
+sub rsp, 8
+mov qword ptr [rsp], rax
+```
+
+And `pop rax` does the same as the following code
+```c
+mov rax, qword ptr [rsp]
+add rsp, 8
+```
+
+There is a confusing element here: when we put something onto the stack we are
+growing the stack, and yet we are moving towards lower addresses of memory.
+
+With the way we visualize memory this actually looks correct, the stack is growing
+towards the top.
+But if we only look at the numeric adresses of elements on the stack, newer elements have smaller addresses,
+which looks backwards.
+Even when you are aware of this, it's common to get confused
+and end up thinking:
+"i put a new value on the stack, but it has a smaller address than the previous value, what is going on?"
+
+### Memory alignment
+
+I don't think memory alignment can be explained in a better way than
+what [this article does](https://web.archive.org/web/20080607055623/http://www.ibm.com/developerworks/library/pa-dalign/), so check it out. Here we'll only focus on how memory alignment impacts
+the way we visualize the stack:
+Every time you push or pop something from the stack, you move the stack pointer
+8 bytes up or down. If you observe carefully the
+previous example, you'll also notice that the addresses in the stack pointer are
+always multiples of 8: they always end with either `0` or `8`.
+
+This kind of alignment is done on purpose for performance reasons, and you will encounter it everywhere.
+As a consequence, when we visualize memory in a hexdump it's common to start from addresses multiples
+of 8 or 16, so that data will fit properly in a row.
+
+This is a hexdump taken from the stack memory of a function. Two different variables are highlighted:
+one is the 32-bit integer `0xcafebabe`, the other is a stack canary, which we'll see in another article.
+You can adjust the slider to change the start address in the hexdump.
+
+
+
+What I'm trying to show here is that everything is relative.
+What you see is always an abstract representation of the actual data,
+and it's up to you to visualize it in a way that matches
+your mental model.
+
+
+### Further Reading
+
+This article is still under development, and it's improving over time.
+If you reached this point, you might be interested in the next articles:
+
+- [introduction](/cyberchallenge/x64-introduction/)
+- [moving data](/cyberchallenge/x64-moving-data/) (you are here)
+- stack frames
+
+Additional resources:
+
+- pwn.college's assembly module and lectures https://pwn.college/fundamentals/assembly-crash-course
+- the compiler explorer website https://godbolt.org/z/c6brc1df9
+- [the official x86_64 reference](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html)
+- unofficial x86_64 instructions reference https://www.felixcloutier.com/x86/
+- the best linux syscall table reference https://syscalls.mebeim.net/?table=x86/64/x64/latest
+
+
+
+
+
+
+