From 08f80acbee878729fb9859a9276f61572c413aad Mon Sep 17 00:00:00 2001 From: robalb <11277482+robalb@users.noreply.github.com> Date: Wed, 24 Jul 2024 12:52:23 +0200 Subject: [PATCH] created cyberchallenge demo version --- .../src/components/SharedLayout.astro | 5 +- .../src/layouts/BlogPostNoHeader.astro | 18 ++ .../pages/cyberchallenge/x64-introduction.mdx | 217 ++++++++++++++++ .../pages/cyberchallenge/x64-moving-data.mdx | 238 ++++++++++++++++++ 4 files changed, 476 insertions(+), 2 deletions(-) create mode 100644 astro-website/src/layouts/BlogPostNoHeader.astro create mode 100644 astro-website/src/pages/cyberchallenge/x64-introduction.mdx create mode 100644 astro-website/src/pages/cyberchallenge/x64-moving-data.mdx diff --git a/astro-website/src/components/SharedLayout.astro b/astro-website/src/components/SharedLayout.astro index e6af403..3f96d9f 100644 --- a/astro-website/src/components/SharedLayout.astro +++ b/astro-website/src/components/SharedLayout.astro @@ -13,8 +13,9 @@ export interface Props { lang?: string; isBlog?: boolean; isLarge?: boolean; + showHeader?: boolean; } -const { title, description, permalink, activePage, lang="en", isBlog=false, isLarge=false } = Astro.props; +const { title, description, permalink, activePage, lang="en", isBlog=false, isLarge=false, showHeader=true} = Astro.props; //theme logic is defined in BlogHeader, with the switch js handler --- @@ -34,7 +35,7 @@ const { title, description, permalink, activePage, lang="en", isBlog=false, isLa else document.body.classList.remove("light") - + {showHeader && }
diff --git a/astro-website/src/layouts/BlogPostNoHeader.astro b/astro-website/src/layouts/BlogPostNoHeader.astro new file mode 100644 index 0000000..ed657aa --- /dev/null +++ b/astro-website/src/layouts/BlogPostNoHeader.astro @@ -0,0 +1,18 @@ +--- +import SharedLayout from '../components/SharedLayout.astro'; +import BlogPost from '../components/BlogPost.astro'; + +const { content } = Astro.props; +const { + title, description, permalink, + subtitle, lang="en", publishDate, author, tags, heroImage, alt + } = content; +const isBlog = true +const showHeader = false +--- + + + + + + diff --git a/astro-website/src/pages/cyberchallenge/x64-introduction.mdx b/astro-website/src/pages/cyberchallenge/x64-introduction.mdx new file mode 100644 index 0000000..632cd75 --- /dev/null +++ b/astro-website/src/pages/cyberchallenge/x64-introduction.mdx @@ -0,0 +1,217 @@ +--- +layout: '../../layouts/BlogPostNoHeader.astro' +title: An interactive guide to x86-64 assembly - introduction +publishDate: 2024-02-18 +description: +tags: ['x86-64', 'pwn'] +permalink: https://halb.it/cyberchallenge/x64-introduction/ +--- +import Spoiler from '../../components/Spoiler.astro' +import SliderHexdump from '../../components/SliderHexdump.svelte' +import RegistersTable from '../../components/RegistersTable.svelte' +import PtrSyntaxEmbed from '../../components/PtrSyntaxEmbed.svelte' +import StackEmbed from '../../components/StackEmbed.svelte' +import EndiannessEmbed from '../../components/EndiannessEmbed.svelte' + +It's often said that assembly language is complex. Most people are scared of it, everyone avoids it. + After all, there's a reason why high-level languages and compilers were invented, right?
+But while it's true that you would have a hard time writing a large project in assembly, +the language itself is surprisingly simple. +That's because Assembly is the native language of the processor, +and at it's essence, all the processor does is moving data. + +This guide is not about writing assembly; it's about understanding the way data moves +behind the scenes when you execute a program. We'll use concrete examples for the +x86-64 architecture, but these informations apply eveywhere and are foundamental knowledge +for reverse engineering, binary exploitation, or just writing better code. + +This is the first part of a series of interactive articles: + +- [introduction](/cyberchallenge/x64-introduction/) (you are here) +- [moving data](/cyberchallenge/x64-moving-data/) +- stack frames + +### what is data? + +Data is just bits, representing information. +A sequence of bits can encode any kind of information, however this article will only focus +on text and integers. + +But before we talk about any kind of encoding, we have to introduce a new notation: +The issue is that while circuits understand sequences of bits very well, humans don't. +For example, can you tell the difference between +`1101010101111110` and `1101010101111110` ? + + +Ok, the two sequences are identical, but I bet you couldn't immediately see that. + + +In order to visualize binary data in a more human friendly way, we use +hexadecimal numbers, which associate a number or a letter +between A and F to a group of 4 bits.
+A long sequence of bits can be represented in this way: + +``` +0010 0101 0111 1101 1111 + + 2 5 7 d f +``` + +Note that in order to avoid confusion with decimal numbers, it's common to prefix +hexadecimal numbers with `0x`. +For example, `0x1234` is not the same +thing as the decimal number `1234`.
+I'm not going to explain how conversions between decimal, binary, and hexadecimal numbers work, +The only assumption i'm making in this article is that you know that.
+If you have a python terminal, you can perform these conversions very easily: + +```python +al@thinkpad:~/$ python +>>> +>>> 0b0010 #print the binary number 0010 in decimal +2 +>>> 0x1234 #print the hex number 1234 in decimal +4660 +>>> hex(0b00100101011111011111) #print a binary number in hex +'0x257df' +>>> hex(4660) #print a decimal number in hex +'0x1234' +``` + +One more thing: we call a group of 8 bits a `byte`, but that's not the only +group of bits with a name. The following table +contains all the names that you will encounter while working with the x86-64 architecture: + +| N. of bits | example hex value | name | +| ------------ | ----------------- | ---------------------- | +| 4 | f | nibble | +| 8 | ff | byte | +| 16 | ffff | word | +| 32 | fffffff | dword (double word) | +| 64 | fffffffffffff | qword (quadruple word) | + + + +### text + +There are a lot of different ways to encode text, and I recommend that you +read the [bare minimum foundamentals](https://www.joelonsoftware.com/2003/10/08/the-absolute-minimum-every-software-developer-absolutely-positively-must-know-about-unicode-and-character-sets-no-excuses/) +, it's a very interesting topic in itself. +In this article however we'll only focus on ASCII encoding, which is extremely simple: + +All you need to know is that text is stored as a seqence of bytes. every byte represents a character, +so there are `127` possible characters between numbers, english letters and puctuation. +You can find a table of all the ascii characters in the +[linux man pages](https://man.archlinux.org/man/core/man-pages/ascii.7.en). + +For example, the letter 'c' is stored as the byte `0x63`, +The letter 'o' is `0x6f`, +The text `ciao` is stored as the sequence of bytes `63 69 61 6f`. + + +### where is data? + +Now that we know how to represent text and numbers, we need some place +to store them. +Like all kind of data, we can store it in only two places: + +- in memory, which means in your RAM +- in registers, which are special containers inside your CPU + +### memory + +Memory is just a very long list of +contiguous cells, each containing 8 bits of information, and reachable by a numeric address. + +Since printing a long list of bytes would take a lot of space, +when visualizing memory we usually group bytes in rows of 8 or 16. +It's also common to include a column to the side that shows the ascii letter associated to each byte. + +The memory dump below was taken from a program that was running on my computer. +Use the slider to adjust the number of bytes you wanto to show in a row. + +
+ +### registers + +Registers are containers for data, located inside your CPU. +The x86-64 architecture has +[a lot of registers](https://en.wikipedia.org/wiki/X86#/media/File:Table_of_x86_Registers_svg.svg), +each with an associated name. +Some of them have a specific purpose, other are generic containers we can use in our program. +We mostly interact with these: + + + +
+ +In order to understand these tables, we'll look at the register `rax`, displayed in the first row. +`rax` is a generic register that contains 8 bytes of data: from byte 0 to byte 7 +as indicated by the byte numbers at the top of the table. + +The register `eax` gives you access to the +lower 4 bytes of `rax`; reading or writing into `eax` is the same as reading or writing +the bytes from 0 to 3 of `rax`.
+Similarly, `ax` gives you access to the lower 2 bytes, and `al` to the lowest byte. + +### Finally, some code + +We are assuming that you are familiar with some programming language, it doesn't matter which one. +Assembly code syntax is similar to the programming language concepts you know: +a sequence of instructions, usually one on every line, that will be executed in order. + +The x86-64 assembly syntax has two different dialects: AT&T and Intel. +All the code snippets in this series of articles are using the Intel syntax. +The following snippet is an example of how the syntax looks like, don't worry about what it does for now. + +```yaml +# this is a comment +push rbp +mov rbp, rsp +mov DWORD PTR [rbp-4], edi +mov eax, DWORD PTR [rbp-4] +add eax, 0x42 +pop rbp +ret +``` + +A good way to familiarize yourself with the syntax is to look at the assembly +generated from small snippets of code. +The +[compiler explorer website](https://godbolt.org/z/7qGb91oo8) +is designed exactly for this use case: You can type snippets of code +in any compiled language you know, and observe the generated assembly. +If you hover the mouse over an assembly instruction you can even see +a description of what it does. + +In the [next article](/cyberchallenge/x64-moving-data/) +we are going to see in details how each of the +instruction in the previous example works + +### Further Reading + +This article is still under development, and it's improving over time.
+If you reached this point, you might be interested in the next articles: + +- [introduction](/cyberchallenge/x64-introduction/) (you are here) +- [moving data](/cyberchallenge/x64-moving-data/) +- stack frames + +Additional resources: + +- pwn.college's assembly module and lectures https://pwn.college/fundamentals/assembly-crash-course +- the compiler explorer website https://godbolt.org/z/c6brc1df9 +- [the official x86_64 reference](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html) +- unofficial x86_64 instructions reference https://www.felixcloutier.com/x86/ +- The best linux syscall table reference https://syscalls.mebeim.net/?table=x86/64/x64/latest diff --git a/astro-website/src/pages/cyberchallenge/x64-moving-data.mdx b/astro-website/src/pages/cyberchallenge/x64-moving-data.mdx new file mode 100644 index 0000000..c401fb5 --- /dev/null +++ b/astro-website/src/pages/cyberchallenge/x64-moving-data.mdx @@ -0,0 +1,238 @@ +--- +layout: '../../layouts/BlogPostNoHeader.astro' +title: An interactive guide to x86-64 assembly - moving data +publishDate: 2024-07-22 +description: +tags: ['x86-64', 'pwn'] +permalink: https://halb.it/cyberchallenge/x64-moving-data/ +--- +import Spoiler from '../../components/Spoiler.astro' +import SliderHexdump from '../../components/SliderHexdump.svelte' +import SliderAlign from '../../components/SliderAlign.svelte' +import RegistersTable from '../../components/RegistersTable.svelte' +import PtrSyntaxEmbed from '../../components/PtrSyntaxEmbed.svelte' +import StackEmbed from '../../components/StackEmbed.svelte' +import EndiannessEmbed from '../../components/EndiannessEmbed.svelte' +import Picture from '../../components/Picture.astro' + +This is the second part of a series of interactive articles on the x86-64 architecture. +This part will focus on the first assembly instructions, visualizing the way data moves in memory +when they are executed. + +- [introduction](/cyberchallenge/x64-introduction/) +- [moving data](/cyberchallenge/x64-moving-data/) (you are here) +- stack frames + +### Visualizing memory + +In the [previous post](/cyberchallenge/x64-introduction) +we introduced some basics on data, encodings, and the places +where data is stored: registers and memory. +We also introduced a common way to visualize memory, that will be used extensively +in this article: hex dumps. + +The example below shows a hexdump of some example data taken from the stack frame of a process. +Use the slider to adjust the number of bytes you want to see in a single row. + +
+ + The reason I want you to familiarize with this visualization is also the rationale behind + this series of articles: + Most resources online explain low-level topics (such as + stack frames, data alignment, or buffer overflows) using + [abstract diagrams](https://stackoverflow.com/a/40106523/9169799). + But when you will approach these topics in practice, you will use + tools like gdb, that visualize data in a completely different way compared to the diagrams.
+ For example, this is a screenshot of my setup when + [running gdb with GEF and the python pwntools library](/posts/pwntools-gdb): + +
+ + All the visualizations in this article emulate the way data is visualized in real-world + scenarios, with tools like gdb or [PWNDBG](https://github.com/pwndbg/pwndbg/), + popular in CTF competitions. + My hope is that this will lower the steep learning curve of those tools. + + +### Moving data + +The first instruction we are going to see is `mov`, which moves data around. It can move data from a register to another, +from a register to memory, or vice-versa from memory to a register + + These first examples are self-explanatory: + +```python +mov rbx, 0x10 #copies the integer 0x10 into rbx +mov rax, rbx #copies the content of rbx into rax +``` + +Moving data to memory requires some extra syntax:
+The following snippet writes the byte `0xff` in the memory cell at address `0x10`. + +```c +mov rax, 0x10 +mov byte ptr [rax], 0xff +``` + +Let's break it down: +- First, we put in a register `0x10`, the address of the cell we want to write to. +- Then we perform a mov instruction with square brackets around the register name, to indicate that we want to move `0xff` in +the memory address pointed by the register, and not into the register itself. + +Notice how in that example we moved a single byte, and we used the syntax `byte ptr`. +You can change that in `word`, `dword` or `qword` if you want to move a different amount of bytes. + +The interactive example below allows you to experiment with all possible variations of the pointer syntax. +You can click "run" to see how the memory is affected + + + + +### A sidenote on endianness + +We managed to reach this point by ignoring an important fact: x86-64 is a little endian architecture, +which means that numbers are not stored in the way you would expect.
+In the previous example, you saw what the number `0x4242424242424242` looks like in memory, +but we choose that number carefully to hide the issue. In the next example, you can enter the number you want.
+Can you spot what's happening? + +
+ +In case you missed it, numbers are being saved with their bytes in an inverted order: +For example, the number `0xcafe` is composed of the byte `ca` followed by `fe`, +but it will be saved as the byte `fe` followed by the byte `ca`. + +What's going on here is that +both humans and computers use a positional number system to represent integers, +but with a different order.
+When we (humans using Hindu-Arabic numerals) represent numbers, +we write the most significant value first, and continue in descending order. +This is the same as Big endian architectures. + +```python + human-readable decimal number + 1337 + | | + | Least significant digit + Most significant digit + + human-readable hex number + 0xcafebabe + | | + | Least significant byte + Most significant byte +``` +Little endian architectures write the least significant value first instead, and continue in ascending order. + +This topic is explained in depth on [wikipedia](https://en.wikipedia.org/wiki/Endianness), with some +useful diagrams that will solve any doubts you might have.
+Endianness is only related to the way the processor handles integers. +Other kinds of data, such as text, are usually encoded in the same order as you would expect. +Floating point numbers are stored in a completely different +format instead, you can read more about them +[in this great article](https://fabiensanglard.net/floating_point_visually_explained/) +, or in +[this visual guide by Ciechanowski](https://ciechanow.ski/exposing-floating-point/) + +### The stack + +x64, like most architectures, has the concept of stack: an area in memory pointed +by the special register `rsp`.
+You can add or remove elements from the top of the stack by using the +`push` and `pop` instructions. This is the most common interaction, but it's also valid to directly adjust the value of `rsp`. +In this interactive example +the stack area is highlighted in blue, together with the value of the `rsp` and `rax` registers. + +
+ +There are two key elements you should notice by plaing with the example above: +- `rsp` points to the top of the stack. It is decreased by 8 when we push a value, and increased by 8 when we pop a value. +- Every time we pop a value from the stack that value is not deleted, the area of memory that contains it +simply stops being part of the stack. The only thing that changes is the memory address pointed by `rsp`. + +Basically, `push rax` does the same as the following code: +```c +sub rsp, 8 +mov qword ptr [rsp], rax +``` + +And `pop rax` does the same as the following code +```c +mov rax, qword ptr [rsp] +add rsp, 8 +``` + +There is a confusing element here: when we put something onto the stack we are +growing the stack, and yet we are moving towards lower addresses of memory. + +With the way we visualize memory this actually looks correct, the stack is growing +towards the top.
+But if we only look at the numeric adresses of elements on the stack, newer elements have smaller addresses, +which looks backwards.
+Even when you are aware of this, it's common to get confused +and end up thinking: +"i put a new value on the stack, but it has a smaller address than the previous value, what is going on?" + +### Memory alignment + +I don't think memory alignment can be explained in a better way than +what [this article does](https://web.archive.org/web/20080607055623/http://www.ibm.com/developerworks/library/pa-dalign/), so check it out. Here we'll only focus on how memory alignment impacts +the way we visualize the stack:
+Every time you push or pop something from the stack, you move the stack pointer +8 bytes up or down. If you observe carefully the +previous example, you'll also notice that the addresses in the stack pointer are +always multiples of 8: they always end with either `0` or `8`. + +This kind of alignment is done on purpose for performance reasons, and you will encounter it everywhere. +As a consequence, when we visualize memory in a hexdump it's common to start from addresses multiples +of 8 or 16, so that data will fit properly in a row. + +This is a hexdump taken from the stack memory of a function. Two different variables are highlighted: +one is the 32-bit integer `0xcafebabe`, the other is a stack canary, which we'll see in another article. +You can adjust the slider to change the start address in the hexdump. + +
+ +What I'm trying to show here is that everything is relative. +What you see is always an abstract representation of the actual data, +and it's up to you to visualize it in a way that matches +your mental model. + + +### Further Reading + +This article is still under development, and it's improving over time.
+If you reached this point, you might be interested in the next articles: + +- [introduction](/cyberchallenge/x64-introduction/) +- [moving data](/cyberchallenge/x64-moving-data/) (you are here) +- stack frames + +Additional resources: + +- pwn.college's assembly module and lectures https://pwn.college/fundamentals/assembly-crash-course +- the compiler explorer website https://godbolt.org/z/c6brc1df9 +- [the official x86_64 reference](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html) +- unofficial x86_64 instructions reference https://www.felixcloutier.com/x86/ +- the best linux syscall table reference https://syscalls.mebeim.net/?table=x86/64/x64/latest + + + + + + +