6mac-crhf.tex

\input{main.tex}

\title{Message Authentication Codes and Collision-Resistant Hash Functions}

\begin{document}
\maketitle
\begin{frame}
\frametitle{Outline}
\tableofcontents
\end{frame}
\section{Message Authentication Codes (MAC) -- Definitions}
\begin{frame}\frametitle{Integrity and Authentication}
\begin{figure}
\begin{center}
\input{tikz/integrity}
\input{tikz/authentication}
\end{center}
\end{figure}
\end{frame}
\begin{frame}\frametitle{The Syntax of MAC}
\begin{figure}
\begin{center}
\input{tikz/mac}
\end{center}
\end{figure}
\begin{itemize}
\item key $k$, tag $t$, a bit $b$ means $\mathsf{valid}$ if $b=1$; $\mathsf{invalid}$ if $b=0$.
\item \textbf{Key-generation} algorithm~$k \gets \mathsf{Gen}(1^n), \abs{k} \ge n$.
\item \textbf{Tag-generation} algorithm~$t \gets \mathsf{Mac}_k(m)$.
\item \textbf{Verification} algorithm~$b:= \mathsf{Vrfy}_k(m,t)$.
\item \textbf{Message authentication code}: $\Pi = (\mathsf{Gen}, \mathsf{Mac}, \mathsf{Vrfy})$.
\item \textbf{Basic correctness requirement}: $\mathsf{Vrfy}_k(m,\mathsf{Mac}_k(m)) = 1$.
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Security of MAC}
\begin{itemize}
\item \textbf{Intuition}: No adversary should be able to generate a \textbf{valid} tag on any ``\textbf{new}'' message\footnote{A stronger requirement is concerning \emph{new message/tag pair}.} that was not previously sent.
\item \textbf{Replay attack}: Copy a message and tag previously sent. (\textbf{excluded by only considering ``new'' message})
\begin{itemize}
\item Sequence numbers: receiver must store the previous ones.
\item Time-Stamps: sender/receiver maintain synchronized clocks.
\end{itemize}
\item \textbf{Existential unforgeability}: \textbf{Not} be able to forge a valid tag on \textbf{any} message.
\begin{itemize}
\item \textbf{Existential forgery}: \emph{at least one} message.
\item \textbf{Selective forgery}: message chosen \emph{prior} to the attack.
\item \textbf{Universal forgery}: \emph{any} given message.
\end{itemize}
\item \textbf{Adaptive chosen-message attack (CMA)}: be able to obtain tags on \emph{any} message chosen adaptively \emph{during} its attack.
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Definition of MAC Security}
The message authentication experiment $\mathsf{Macforge}_{\mathcal{A},\Pi }(n)$:
\begin{enumerate}
\item $k \gets \mathsf{Gen}(1^n)$.
\item $\mathcal{A}$ is given input $1^n$ and oracle access to $\mathsf{Mac}_k(\cdot)$, and outputs $(m,t)$. $\mathcal{Q}$ is the set of queries to its oracle.
\item $\mathsf{Macforge}_{\mathcal{A},\Pi }(n)=1 \iff$ $\mathsf{Vrfy}_k(m,t)=1$ $\land$ $m \notin \mathcal{Q}$. 
\end{enumerate}
\begin{figure}
\begin{center}
\input{tikz/macforge-exp.tex}
\end{center}
\end{figure}
\begin{definition}
A MAC $\Pi$ is \textbf{existentially unforgeable under an adaptive CMA} if $\forall$ \textsc{ppt} $\mathcal{A}$, $\exists$ $\mathsf{negl}$ such that:
\[ \Pr [\mathsf{Macforge}_{\mathcal{A},\Pi }(n)=1] \le \mathsf{negl}(n).
\]
\end{definition}
\end{frame}
\begin{frame}\frametitle{Questions}
\begin{exampleblock}{Suppose $\left<S, V\right>$ are CMA-secure,  are $\left<S', V'\right>$ secure?}
\begin{itemize}
%\item Suppose an attacker can find $m_{0} \neq m_{1}$ s.t. $t_{0} = t_{1}$ for $\frac{1}{8}$.
%\item Suppose tag is always $32$ bits long.
\item $S'_{k_{1},k_{2}}(m) = (S_{k1}(m),S_{k_{2}}(m))$\\
$V'_{k_{1},k_{2}}(m,(t_{1},t_{2})) = V_{k1}(m,t_{1}) \land V_{k_{2}}(m,t_{2})$
\item $S'_{k}(m) = (S_{k}(m),S_{k}(m))$\\
$ V'_{k}(m,(t_{1},t_{2})) = \left\{ 
  \begin{array}{l l}
    V_{k}(m,t_{1}) & \quad \text{if $t_{1}=t_{2}$}\\
    0 & \quad \text{otherwise}\\
  \end{array} \right. $
\item $S'_{k}(m) = (S_{k}(m),S_{k}(0^{n}))$\\
$ V'_{k}(m,(t_{1},t_{2})) = V_{k}(m,t_{1}) \land  V_{k}(0^{n},t_{2})$
\item $S'_{k}(m) = S_{k}(m)$, 
$ V'_{k}(m,t) = \left\{ 
  \begin{array}{l l}
    V_{k}(m,t) & \quad \text{if $m \neq 0^{n}$}\\
    1 & \quad \text{otherwise}\\
  \end{array} \right. $
\item $S'_{k}(m) = S_{k}(m)\ \text{without the LSB}$ \\
$V'_{k}(m,t) = V_{k}(m,t\| 0)\ \lor \ V_{k}(m,t\| 1)$
\item $S'_{k}(m) = (S_{k}(m),m)$, 
$V'_{k}(m,(t_{1},t_{2})) = V_{k}(m,t_{1}) \land t_{2} = m$
\end{itemize}
\end{exampleblock}
\end{frame}
\section{Constructing Secure Message Authentication Codes}
\begin{frame}\frametitle{Constructing Secure MACs}
\begin{columns}[c]
\column{.4\textwidth}
\begin{figure}
\begin{center}
\input{tikz/macwithprf}
\end{center}
\end{figure}
\column{.6\textwidth}
\begin{construction}
\begin{itemize}
\item $F$ is PRF. $\abs{m} = n$.
\item $\mathsf{Gen}(1^n)$: $k \gets \{0,1\}^n$ \emph{u.a.r}.
\item $\mathsf{Mac}_k(m)$: $t := F_k(m)$.
\item $\mathsf{Vrfy}_k(m,t)$: $1 \iff t \overset{?}{=} F_k(m)$.
\end{itemize}
\end{construction}
\begin{theorem}\label{thm:mac}
If $F$ is a PRF, Construction is a secure fixed-length MAC.
\end{theorem}
\end{columns}
\begin{lemma}
\textbf{Truncating MACs based on PRFs}:
If $F$ is a PRF, so is $F^t_k(m) = F_k(m)[1,\dots,t]$.
\end{lemma}
\end{frame}
\begin{frame}\frametitle{Proof of Secure MAC from PRF}
\textbf{Idea}: Show $\Pi$ is secure unless $F_k$ is not PRF by reduction.  
\begin{proof}
$D$ distinguishes $F_k$; $\mathcal{A}$ attacks $\Pi$. 
\begin{figure}
\begin{center}
\input{tikz/pgfMAC}
\end{center}
\end{figure}
\end{proof}
\end{frame}
\begin{frame}\frametitle{Proof of Secure MAC from PRF (Cont.)}
\begin{proof}
(1) If true random $f$ is used, $t=f(m)$ is uniformly distributed.
\[ \Pr[D^{f(\cdot)}(1^n)=1] = \Pr[\mathsf{Macforge}_{\mathcal{A},\tilde{\Pi}}(n) = 1] \le 2^{-n}.\]
(2) If $F_k$ is used, conduct the experiment $\mathsf{Macforge}_{\mathcal{A},\Pi}(n)$. 
\[ \Pr[D^{F_k(\cdot)}(1^n)=1] = \Pr[\mathsf{Macforge}_{\mathcal{A},\Pi}(n) = 1] = \varepsilon(n).\]
According to the definition of PRF,
\[ \left| \Pr[D^{F_k(\cdot)}(1^n)=1] - \Pr[D^{f(\cdot)}(1^n)=1] \right| \ge \varepsilon(n) - 2^{-n}. \]
\end{proof}
\end{frame}
\begin{frame}\frametitle{Extension to Variable-Length Messages}
\begin{exampleblock}{For variable-length messages, would the following suggestions be secure?}
\begin{itemize}
\item \textbf{Suggestion 1}: XOR all the blocks together and authenticate the result. $t := \mathsf{Mac}_k'(\oplus_i m_i)$.
\item \textbf{Suggestion 2}: Authenticate each block separately. $t_i := \mathsf{Mac}_k'(m_i)$.
\item \textbf{Suggestion 3}: Authenticate each block along with a sequence number. $t_i := \mathsf{Mac}_k'(i\| m_i)$.
%\item \textbf{Weakness}: forgeable, changing the order, dropping blocks.
\end{itemize}
\end{exampleblock}
%\item \textbf{Countermeasure}: add information. 
%\begin{itemize}
%\item random ``\textbf{message identifier}'' provides randomness; prevents combination.
%\item \textbf{sequence number} prevents reordering.
%\item the \textbf{length} of message prevents dropping/appending.
%\end{itemize}
%\end{itemize}
\end{frame}
\begin{comment}
\begin{frame}\frametitle{Constructing Secure Variable-Length MACs}
\begin{construction}
\begin{itemize}
\item $\Pi' = (\mathsf{Gen}', \mathsf{Mac}', \mathsf{Vrfy}')$ be a fixed-length MAC.
\item $\mathsf{Gen}$: is identical to $\mathsf{Gen}'$.
\item $\mathsf{Mac}$: $m$ of length $\ell < 2^{n/4}$ and of $d$ blocks $m_1,\dotsc,m_d $ of length $n/4$ (padded with 0s); $r \gets \{0,1\}^{n/4}$.\\
For $i=1,\dotsc,d$, $t_i \gets \mathsf{Mac}_k'(r\| \ell\| i\| m_i)$, $i$ and $\ell$ are uniquely encoded as strings of length $n/4$.\\
Output $t:=\left<r,t_1,\dotsc,t_d\right>$.
\item $\mathsf{Vrfy}$: Input $m$ of $d'$ blocks and check $d'=d$.\\
Output $1 \iff \mathsf{Vrfy}_k'(r\| \ell\| i\| m_i, t_i)=1$ for $1\le i \le d$.
\end{itemize}
\end{construction}
\begin{theorem}
If $\Pi'$ is a secure fixed-length MAC, Construction is a secure MAC.
\end{theorem}
\end{frame}
\begin{frame}\frametitle{Proof of Secure Variable-Length MACs}
\textbf{Intuition}: The extra information prevents all possible attacks.
\begin{proof}
\begin{description}
\item[$\mathsf{Repeat}$]: the same identifier $r$ is used twice by oracle $\mathcal{O}$. 
\item[$\mathsf{Forge}$]: at least one new block $r\| \ell\| i\| m_i$ is forged. 
\item[$\mathsf{Break}$]: $\mathsf{Macforge}_{\mathcal{A},\Pi }(n)=1, \Pr[\mathsf{Break}]=\varepsilon(n)$. 
\end{description}
\[
\begin{split}
	\Pr[\mathsf{Break}] =& \Pr[\mathsf{Break} \land \mathsf{Repeat}] + \Pr[\mathsf{Break} \land \overline{\mathsf{Repeat}} \land \overline{\mathsf{Forge}}] \\
	&+ \Pr[\mathsf{Break} \land \overline{\mathsf{Repeat}} \land \mathsf{Forge}].
\end{split}
\]
To prove the below statements:
\begin{enumerate}
\item $\Pr[\mathsf{Break} \land \mathsf{Repeat}] \le \Pr[\mathsf{Repeat}] \le \mathsf{negl}(n)$.
\item $\Pr[\mathsf{Break} \land \overline{\mathsf{Repeat}} \land \overline{\mathsf{Forge}}] = 0$.
\item For $\Pi'$, $\Pr[\mathsf{Break}'] = \Pr[\mathsf{Break} \land \mathsf{Forge}] \ge \Pr[\mathsf{Break} \land \overline{\mathsf{Repeat}} \land \mathsf{Forge}] \ge \varepsilon(n) - \mathsf{negl}(n)$.
\end{enumerate}
\end{proof}
\end{frame}
\begin{frame}\frametitle{Proof of Secure Variable-Length MACs (Cont.)}
\begin{proof}
\begin{enumerate}
\item $r \gets \{0,1\}^{\frac{n}{4}}$. By ``brithday bound'', $\Pr[\mathsf{Repeat}] \le q(n)^2/2^{\frac{n}{4}}$.
\item If $\mathsf{Repeat}$ does not occur, $\mathsf{Break}$ implies $\mathsf{Forge}$. \\
$\mathcal{A}$ finally outputs $(m,t), t:=\left<r,t_1,\dotsc,t_d\right>$.
\begin{itemize}
\item $r$ is new, then $r\| \ell\| i\| m_i$ is new.
\item $r$ is used exactly once, then the queried message $m' \neq m$. 
\begin{itemize}
\item $\ell' \neq \ell$, then $r\| \ell\| i\| m_i$ is new.
\item $\ell' = \ell$, then $\exists\; m_i' \neq m_i$, so $r\| \ell\| i\| m_i'$ is new.
\end{itemize}
\end{itemize}
So the block is new, $\mathsf{Forge}$ occurs.
\item Reduce $\mathcal{A}'$ to $\mathcal{A}$: $\mathcal{A}'$ attacks $\Pi'$ with $\mathcal{A}$ as a sub-routine and answer the queries of $\mathcal{A}$ with $\mathcal{A}'$'s own oracle. $\mathcal{A}$ output $(m,t)$; $\mathcal{A}'$ parses it and output a new block $(r\| \ell\| i\| m_i, t_i)$ if possible.
\end{enumerate}
\end{proof}
\end{frame}
\end{comment}
\section{CBC-MAC}
\begin{frame}\frametitle{Constructing Fixed-Length CBC-MAC}
\begin{columns}[c]
\column{.5\textwidth}
\begin{figure}
\begin{center}
\input{tikz/CBC-small}
\end{center}
\end{figure}
\column{.5\textwidth}
\begin{figure}
\begin{center}
\input{tikz/CBC-MAC}
\end{center}
\end{figure}
\end{columns}
Modify CBC encryption into CBC-MAC:
\begin{itemize}
\item Change random $IV$ to encrypted fixed $0^{n}$,\emph{otherwise}:\\
\alert{Q: query $m_1$ and get $(IV, t_1)$; output $m_1' = IV' \oplus  IV \oplus m_{1}$ and $t' =$ \underline{$\qquad $}.} %(IV',t_1)$.
\item Tag only includes the output of the final block,\emph{otherwise}:\\
\alert{Q: query $m_i$ and get $t_i$; output $m_i' = t_{i-1}' \oplus t_{i-1} \oplus m_{i}$ and $t_{i}' = $ \underline{$\qquad$}.}%$t_i$.
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Constructing Fixed-Length CBC-MAC (Cont.)}
\begin{construction}
\begin{itemize}
\item a PRF $F$ and a length function $\ell$. $\abs{m} = \ell(n)\cdot n$.
$\ell=\ell(n)$. $m = m_1,\dotsc,m_{\ell}$.
\item $\mathsf{Gen}(1^n)$: $k \gets \{0,1\}^n$ \emph{u.a.r}.
\item $\mathsf{Mac}_k(m)$: $t_i := F_k(t_{i-1}\oplus m_i), t_0=0^n$. Output $t = t_\ell$.
\item $\mathsf{Vrfy}_k(m,t)$: $1 \iff t \overset{?}{=} \mathsf{Mac}_k(m)$.
\end{itemize}
\end{construction}
\begin{theorem}
If $F$ is a PRF, Construction is a secure \textbf{fixed-length} MAC.
\end{theorem}
\textbf{Not} for \textbf{variable-length} message:\\
\alert{Q: For one-block message $m$ with tag $t$, adversary can append a block \underline{$\qquad$} and output tag $t$.} %$t\oplus m$
\end{frame}
\begin{frame}\frametitle{Secure Variable-Length MAC}
\begin{itemize}
\item \textbf{Input-length key separation}: $k_{\ell} := F_k(\ell)$, use $k_{\ell}$ for CBC-MAC.
\item \textbf{Length-prepending}: Prepend $m$ with $|m|$, then use CBC-MAC.
\begin{figure}
\begin{center}
\input{tikz/VCBC-MAC}
\end{center}
\end{figure}
\item \textbf{Encrypt last block (ECBC-MAC)}: Use two keys $k_1, k_2$. Get $t$ with $k_1$ by CBC-MAC, then output $\hat{t} := F_{k_2}(t)$.
\end{itemize}
\alert{Q: To authenticate a voice stream, which approach do you prefer?}
\end{frame}
\begin{comment}
\begin{frame}{Brute-force Attack against CBC-MAC}
Query $2^{\abs{t}/2}$ message to find $m \neq m'$ and $t = t'$.
\newline

\textbf{Extension property} of ECBC-MAC:
\[ \forall x,y,z: F_k(x)=F_k(y) \Rightarrow F_k(x\|z)=F_k(y\|z).   \]

So the tag of $m\|w$ is the same with that of $m'\|w$.
\newline

Lesson: the tag space should be enough large.\\
Improvement: Add a random string $r$, and output $(r, \mathsf{Mac}_{k'}(t\|r))$ instead of $t$.
\end{frame}
\end{comment}
\begin{frame}\frametitle{MAC Padding}
Padding must be invertible!\[ m_0\neq m_1 \Rightarrow \mathsf{pad}(m_0) \neq \mathsf{pad}(m_1). \]
\textbf{ISO}: pad with ``100\dots00''. Add dummy block if needed.\\
\alert{Q: What if no dummy block?} \\
\textbf{CMAC (Cipher-based MAC from NIST)}: key$=(k,k_1,k_2)$.
\begin{figure}
\begin{center}
\input{tikz/CMAC}
\end{center}
\end{figure}
\begin{itemize}
\item No final encryption: extension attack thwarted by keyed XOR.
\item No dummy block: ambiguity resolved by use of $k_1$ or $k_2$.
\end{itemize}
\end{frame}
\section{Collision-Resistant Hash Functions}
\begin{frame}\frametitle{Defining Hash Function}
\begin{figure}
\begin{center}
\input{tikz/hash}
\end{center}
\end{figure}
\begin{definition}
A \textbf{hash function (compression function)} is a pair of \textsc{ppt} algorithms $(\mathsf{Gen}, H)$ satisfying:
\begin{itemize}
\item a key $s \gets \mathsf{Gen}(1^n)$, $s$ is \textbf{not kept secret}.
\item $H^s(x) \in \{0,1\}^{\ell(n)}$, where $x \in \{0,1\}^*$ and $\ell$ is polynomial.
\end{itemize}
If $H^s$ is defined only for $x \in \{0,1\}^{\ell'(n)}$ and $\ell'(n) > \ell(n)$, then $(\mathsf{Gen}, H)$ is a \textbf{fixed-length} hash function.
\end{definition}
\end{frame}
\begin{frame}\frametitle{Defining Collision Resistance}
\begin{itemize}
\item \textbf{Collision} in $H$: $x \neq x'$ and $H(x) = H(x')$.
\item \textbf{Collision Resistance}: infeasible for any \textsc{ppt} alg. to find.
\end{itemize}
The collision-finding experiment $\mathsf{Hashcoll}_{\mathcal{A},\Pi}(n)$:
\begin{enumerate}
\item $s \gets \mathsf{Gen}(1^n)$.
\item $\mathcal{A}$ is given $s$ and outputs $x, x'$.
\item $\mathsf{Hashcoll}_{\mathcal{A},\Pi}(n) =1 \iff x\ne x' \land H^s(x) = H^s(x')$.
\end{enumerate}
\begin{definition}
$\Pi$ ($\mathsf{Gen}$, $H^s$) is \textbf{collision resistant} if $\forall$ \textsc{ppt} $\mathcal{A}$, $\exists\;\mathsf{negl}$ such that
\[ \Pr[\mathsf{Hashcoll}_{\mathcal{A},\Pi}(n)=1] \le \mathsf{negl}(n).
\]
\end{definition}
\end{frame}
%\begin{comment}
\begin{frame}\frametitle{Weaker Notions of Security for Hash Functions}
\begin{figure}
\begin{center}
\input{tikz/collision}
\end{center}
\end{figure}
\begin{itemize}
\item \textbf{Collision resistance}: It is hard to find $(x, x'), x' \ne x$ such that $H(x) = H(x')$.
\item \textbf{Second pre-image resistance}: Given $s$ and $x$, it is hard to find $x' \ne x$ such that $H^s(x') = H^s(x)$.
\item \textbf{Pre-image resistance}: Given $s$ and $y = H^s(x)$, it is hard to find $x'$ such that $H^s(x')=y$.
\end{itemize}
\end{frame}
\begin{frame}{Questions}
\begin{exampleblock}{$H$ is CRHF. Is $H'$ CRHF?}
\begin{itemize}
\item $H'(m) = H(m)\oplus H(m\oplus 1^{\abs{m}})$ % H(000) = H(111)
\item $H'(m) = H(m)\| H(0)$
\item $H'(m) = H(m)\| H(m)$
\item $H'(m) = H(m) \oplus H(m)$
\item $H'(m) = H(m[0,\dots,\abs{m}-2])$
\item $H'(m) = H(m\| 0)$
\end{itemize}
\end{exampleblock}
\end{frame}
\begin{frame}{Applications of Hash Functions}
\begin{itemize}
\item \textbf{Fingerprinting and Deduplication}: $H(alargefile)$ for virus fingerprinting, deduplication, P2P file sharing
\item \textbf{Merkle Trees}: $H(H(H(file1), H(file2)), H(H(file3), H(file4)))$ fingerprinting multiple files / parts of a file
\item \textbf{Passward Hashing}: $(salt, H(salt, pw))$ mitigating the risk of leaking password stored in the clear 
\item \textbf{Key Derivation}: $H(secret)$ deriving a key from a high-entropy (but not necessarily uniform) shared secret
\item \textbf{Commitment Schemes}: $H(info)$ hiding the commited info; binding the commitment to a info
\end{itemize}
\end{frame}
%\end{comment}
%\begin{frame}\frametitle{Applications of Hash Functions}
%\begin{itemize}
%\item \textbf{digital signatures}:CRHF
%\item \textbf{information authentication/integrity check}
%\item \textbf{protection of passwords}: pre-image resistant.
%\item \textbf{confirmation of knowledge/commitment}: CRHF
%\item \textbf{pseudo-random string generation/key derivation}
%\item \textbf{micropayments (e.g. micromint)}
%\item \textbf{construction of MACs, stream/block ciphers}
%\end{itemize}
%\end{frame}
\begin{frame}\frametitle{The ``Birthday'' Problem}
\begin{exampleblock}{The ``Birthday'' Problem}
\textbf{Q}: ``\emph{What size group of people do we need to take such that with probability $1/2$ some pair of people in the group share a birthday?}''

\textbf{A}: 23.
\end{exampleblock}
\begin{lemma}
Choose $q$ elements $y_1,\dotsc , y_q$ \emph{u.a.r} from a set of size $N$, the probability that $\exists \; i \ne j$ with $y_i = y_j$ is $\mathsf{coll}(q,N)$, then 
\[ \mathsf{coll}(q,N) \le \frac{q^2}{2N}.
\]
\[ \mathsf{coll}(q,N) \ge  \frac{q(q-1)}{4N}\quad \text{if}\; q \le \sqrt{2N}.
\]
\[ \mathsf{coll}(q,N) = \Theta(q^2/N)\quad \text{if}\; q < \sqrt{N}.
\]
\end{lemma}
The length of hash value should be long enough.
\end{frame}
%\begin{frame}\frametitle{A Generic ``Birthday'' Attack}
%\begin{itemize}
%\item \textbf{Birthday Attack}: $H : \{0,1\}^* \to \{0,1\}^\ell$. Choose $q$ distinct inputs $x_1,\dotsc,x_q \in \{0,1\}^{2\ell}$, check whether any of two $y_i := H(x_i)$ are equal.
%\item \textbf{Birthday problem}: Choose $y_1,\dotsc,y_q \gets \{0,1\}^{\ell}$ \emph{u.a.r}, $\mathsf{coll}(q,2^{\ell}) = ?$
%\item Collision occurs with a high probability when $\mathcal{O}(q) = \mathcal{O}(2^{\ell/2})$.
%\item To let time $T > 2^{\ell/2}$, then $\ell = 2\log T$ at least.
%\item Work only for collision resistance, no generic attacks for 2nd pre-image or pre-image resistance better than $2^\ell$.
%\item Require too much space $\mathcal{O}(2^{\ell/2})$.
%\end{itemize}
%\end{frame}
\begin{comment}
\begin{frame}\frametitle{Improved Birthday Attack}
\begin{algorithm}[H]
\SetKwInOut{Input}{input}
\SetKwInOut{Output}{output}
\SetKw{KwB}{break}
\SetKw{KwH}{halt}
\DontPrintSemicolon
\caption{Improved birthday attack}
\Input{A hash function $H : \{0, 1\}^*\to \{0, 1\}^\ell$}
\Output{Distinct $x, x'$ with $H(x) = H(x')$}
\BlankLine
$x_0 \gets \{0,1\}^{\ell+1}$, $x' := x := x_0$\;
\For{$i = 1$ \KwTo $2^{\ell/2} +1$}{
  $x := H(x)$, $x' := H(H(x'))$
  \tcp{$x = H^i(x_0)$, $x' = H^{2i}(x_0)$}
  \lIf{$x=x'$}{\KwB}\;
}
\lIf{$x\ne x'$}{\Return fail}\;
$x' := x$, $x := x_0$\;
\For{$j=1$ \KwTo $i$}{
  \lIf{$H(x)=H(x')$}{\Return $x, x'$ and \KwH}\;
  \lElse{$x := H(x), x' := H(x')$}
  \tcp{$x = H^j(x_0)$, $x' = H^{j+i}(x_0)$}
}
\end{algorithm}
\end{frame}
\begin{frame}\frametitle{Proof of Improved Birthday Attack}
\begin{lemma}
Let $x_1,\dotsc,x_q$ be a sequence of values with $x_m = H(x_{m-1})$. If $x_I =x_J$ with $I < J$, then $\exists\; i<J$ such that $x_i =x_{2i}$.
\end{lemma}
\begin{figure}
\begin{center}
\input{tikz/birthdayattack}
\end{center}
\end{figure}
\begin{proof}
If $x_I =x_J$, then $x_I, x_{I+1}, \dotsc$ repeats with period $J-I$.\\
Let $i$ to be the smallest multiple of $J-I$ with $i \ge I$, \[i \overset{\text{def}}{=} (J-I)\cdot \lceil I/(J-I)\rceil.\] \\
$i < J$ since $I,\dotsc,J-1$ contains a multiple of $J-I$.\\
Since $2i-i=i$ is a multiple of the period and $i \ge I$, $x_i = x_{2i}$. 
\end{proof}
\end{frame}
\end{comment}
\begin{frame}\frametitle{Constructing ``Meaningful'' Collisions}
\begin{exampleblock}{How many different meaningful sentences are in the below paragraph?}
It is \textbf{hard/difficult/challenging/impossible} to \textbf{imagine/believe} that we will \textbf{find/locate/hire} another \textbf{employee/person} having similar \textbf{abilities/skills/character} as Alice. She has done a \textbf{great/super} job.
\end{exampleblock}
\end{frame}
\begin{frame}\frametitle{The Merkle-Damg\r{a}rd Transform}
\begin{figure}
\begin{center}
\input{tikz/MDtransform}
\end{center}
\end{figure}
\begin{construction}
Construct \textbf{variable-length} CRHF $(\mathsf{Gen}, H)$ from fixed-length $(\mathsf{Gen}, h)$ ($2\ell$ bits $\to \ell$ bits, $\ell = \ell(n)$):
\begin{itemize}
\item $\mathsf{Gen}$: remains unchanged
\item $H$: key $s$ and string $x \in \{0,1\}^*$, $L=|x|< 2^{\ell}$:
\begin{itemize}
\item $B := \lceil \frac{L}{\ell} \rceil$ (\# blocks). \textbf{Pad $x$ with 0s}.  $\ell$-bit blocks $x_1,\dotsc,x_B$. $x_{B+1} := L$, $L$ is encoded using $\ell$ bits
\item $z_0 := IV = 0^\ell$. For $i=1,\dotsc,B+1$, compute $z_i := h^s(z_{i-1}\| x_i)$
\end{itemize}
\end{itemize}
\end{construction}
\end{frame}
\begin{frame}\frametitle{Security of the Merkle-Damg\r{a}rd Transform}
\begin{theorem}
If $(\mathsf{Gen},h)$ is a fixed-length CRHF, then $(\mathsf{Gen},H)$ is a CRHF.
\end{theorem}
\begin{proof}
\textbf{Idea}: a collision in $H^s$ yields a collision in $h^s$. \\
Two messages $x \ne x'$ of respective lengths $L$ and $L'$ such that $H^s(x) = H^s(x')$. \# blocks are $B$ and $B'$. \\
$x_{B+1} := L$ is necessary since \textbf{Padding with 0s} will lead to the same input with different messages.
\begin{enumerate}
\item $L \ne L'$: $z_B\| L \ne z_{B'}\| L'$
\item $L = L'$: $z_{i^*-1}\| x_{i^*} \ne z_{i^*-1}'\| x_{i^*}'$
\end{enumerate}
So there must be $x \neq x'$ such that $h^s(x) = h^s(x')$.
\end{proof}
\end{frame}
\begin{frame}\frametitle{CRHF from Block Cipher}
\begin{columns}
\column{.5\textwidth}
Davies-Meyer (SHA-1/2, MD5)
\begin{figure}
\begin{center}
\input{tikz/Davies-Meyer.tex}
\end{center}
\end{figure}
$h_{i} = F_{m_{i}}(h_{i-1}) \oplus h_{i-1}$
\column{.5\textwidth}
Miyaguchi-Preneel (Whirlpool)
\begin{figure}
\begin{center}
\input{tikz/Miyaguchi-Preneel.tex}
\end{center}
\end{figure}
$h_{i} = F_{h_{i-1}}(m_{i}) \oplus h_{i-1} \oplus m$
\end{columns}
$\quad$\\
\begin{theorem}
If $F$ is modeled as an ideal cipher, then Davies-Meyer construction yields a CRHF.
\end{theorem}
\alert{Q: what if $h_{i} = F_{m_{i}}(h_{i-1})$ without XOR with $h_{i-1}$? }\\
\alert{Q: what if $F$ is not ideal such that $\exists x, F_k(x)=x$?}
\end{frame}
\begin{frame}\frametitle{Cryptographic Hash Functions: SHA-1 and MD5}
\begin{columns}[c]
\column{.5\textwidth}
SHA-1:
\begin{figure}
\begin{center}
\includegraphics[width=40mm]{pic/SHA1}
\end{center}
\end{figure}
\column{.5\textwidth}
MD5:
\begin{figure}
\begin{center}
\includegraphics[width=40mm]{pic/MD5}
\end{center}
\end{figure}
\end{columns}
$A, B, C, D$ and $E$ are 32-bit words of the state;
$F$ is a nonlinear function that varies;
$\lll n$ denotes a left bit rotation by $n$ places;
$W_t$/$M_t$ is the expanded message word of round $t$;
$K_t$ is the round constant of round $t$;
$\boxplus$ denotes addition modulo $2^{32}$.
\begin{itemize}
\item Finding a collision in 128-bit MD5 requires time $2^{20.96}$
\item Finding a collision in 160-bit SHA-1 requires time $2^{51}$
\end{itemize}
\end{frame}
\section{HMAC}
%\begin{frame}\frametitle{Nested MAC (NMAC)}
%\begin{figure}
%\begin{center}
%\input{tikz/NMAC}
%\end{center}
%\end{figure}
%\begin{construction}
%$(\widetilde{\mathsf{Gen}}, h)$ is a fixed-length CRHF. $(\widetilde{\mathsf{Gen}}, H)$ is Merkle-Damg\r{a}rd transform. NMAC:
%\begin{itemize}
%\item $\mathsf{Gen}(1^n)$: Output $(s, k_1, k_2)$. $s \gets \widetilde{\mathsf{Gen}}, k_1,k_2 \gets \{0,1\}^n$ \emph{u.a.r}.
%\item $\mathsf{Mac}_{s,k_1,k_2}(m)$: $t_i := h_{k_1}^s(H_{k_2}^s(m))$. $h_{k}^s \overset{\mathsf{def}}{=} h^s(k\|x)$.\\
%$H^s_{k_2}$ is \emph{inner} function; $h^s_{k_1}$ is \emph{outer} function.
%\item $\mathsf{Vrfy}_{s,k_1,k_2}(m,t)$: $1 \iff t \overset{?}{=} \mathsf{Mac}_{s,k_1,k_2}(m)$.
%\end{itemize}
%\end{construction}
%\end{frame}
%\begin{frame}\frametitle{Security of NMAC}
%\begin{theorem}
%If $(\widetilde{\mathsf{Gen}}, h)$ is CRHF and yields a secure MAC, then NMAC is secure. (existentially unforgeable under an adaptive CMA for arbitrary-length messages)
%\end{theorem}
%\begin{itemize}
%\item $k_2$ is not needed once $(\widetilde{\mathsf{Gen}}, h)$ is CRHF.
%\begin{itemize}
%\item \textbf{Weak collision resistance}: It is hard to find $(x, x'), x' \ne x$ such that $H^s_{k_2}(x) = H^s_{k_2}(x')$.
%\item $H_s^{k_2}(x)$ is hidden by $h_s^{k_1}(H_s^{k_2}(x))$.
%\item \textbf{Disadvantage}: $IV$ of $H$ must be modified.
%\end{itemize}
%\end{itemize}
%\end{frame}
\begin{frame}\frametitle{Hash-based MAC (HMAC)}
\begin{figure}
\begin{center}
\input{tikz/HMAC}
\end{center}
\end{figure}
\begin{construction}
$(\widetilde{\mathsf{Gen}}, h)$ is a fixed-length CRHF. $(\widetilde{\mathsf{Gen}}, H)$ is the Merkle-Damg\r{a}rd transform.
$IV$, $\mathsf{opad}$ (0x36), $\mathsf{ipad}$ (0x5C) are fixed constants of length $n$.
HMAC:
\begin{itemize}
\item $\mathsf{Gen}(1^n)$: Output $(s, k)$. $s \gets \widetilde{\mathsf{Gen}}, k \gets \{0,1\}^n$ \emph{u.a.r}
\item $\mathsf{Mac}_{s,k}(m)$: $t := H_{IV}^s\Big((k \oplus \mathsf{opad}) \| H_{IV}^s\big((k \oplus \mathsf{ipad}) \| m\big)\Big)$
\item $\mathsf{Vrfy}_{s,k}(m,t)$: $1 \iff t \overset{?}{=} \mathsf{Mac}_{s,k}(m)$
\end{itemize}
\end{construction}
\end{frame}
\begin{frame}\frametitle{Security of HMAC}
\begin{theorem}
\[ G(k) \overset{\text{def}}{=} h^s(IV\| (k\oplus \mathsf{opad})) \| 
h^s(IV\| (k\oplus \mathsf{ipad})) = k_1\| k_2
\]
$(\widetilde{\mathsf{Gen}}, h)$ is CRHF. If $G$ is a PRG, then HMAC is secure.
\end{theorem}
\begin{itemize}
\item HMAC is an industry standard (RFC2104)
\item HMAC is faster than CBC-MAC
\item Before HMAC, a common mistake was to use $H^s(k\| x)$
\item \alert{Verification timing attacks: (Keyczar crypto library (Python))} \\
def Verify(key, msg, sig\underline{\ }bytes): \\
$\qquad$ return HMAC(key, msg) == sig\underline{\ }bytes \\
The problem:  implemented as a byte-by-byte comparison
\item \alert{\emph{Don't implement it yourself}}
\end{itemize}
\end{frame}
\section{Information-Theoretic MACs}

\begin{frame}\frametitle{Definition of Information-Theoretic MAC Security}
It is impossible to achieve "perfect" MAC, as the adversary can output a valid tag with probability $1/2^{\abs{t}}$ at least.
\newline

The one-time MAC experiment $\mathsf{Macforge}^{\mathsf{1-time}}_{\mathcal{A},\Pi }$:
\begin{enumerate}
\item $k \gets \mathsf{Gen}$.
\item $\mathcal{A}$ outputs a message $m'$, and is given a tag $t' \gets \mathsf{Mac}_k(m')$, and outputs $(m,t)$.
\item $\mathsf{Macforge}^{\mathsf{1-time}}_{\mathcal{A},\Pi }=1 \iff$ $\mathsf{Vrfy}_k(m,t)=1$ $\land$ $m \neq m'$. 
\end{enumerate}
\begin{definition}
A MAC $\Pi$ is \textbf{one-time $\varepsilon$-secure} if $\forall$ \textsc{ppt} $\mathcal{A}$:
\[ \Pr [\mathsf{Macforge}^{\mathsf{1-time}}_{\mathcal{A},\Pi}=1] \le \varepsilon.
\]
\end{definition}
\end{frame}

\begin{frame}\frametitle{Construction of Information-Theoretic MACs}
\begin{definition}
A function $h$: $\mathcal{K} \times \mathcal{M} \to \mathcal{T}$ is a \textbf{Strongly Universal Function (SUF)} if for all distinct $m, m' \in \mathcal{M}$ and all $t, t' \in \mathcal{T}$, it holds that:
\[ \Pr [h_k(m) = t  \land h_k(m') = t'] = 1 / \abs{\mathcal{T}}^2.
\]
where the probability is taken over uniform choice of $k \in \mathcal{K}$.
\end{definition}
\begin{construction}
\begin{itemize}
\item Let $h$: $\mathcal{K} \times \mathcal{M} \to \mathcal{T}$ be an SUF.
\item $\mathsf{Gen}$: $k \gets \{0,1\}^n$ \emph{u.a.r}.
\item $\mathsf{Mac}_k(m)$: $t := h_k(m)$.
\item $\mathsf{Vrfy}_k(m,t)$: $1 \iff t \overset{?}{=} h_k(m)$. (If $m \in \mathcal{M}$, then output 0.)
\end{itemize}
\end{construction}
\begin{theorem}
If $h$ is an SUF, Construction is a $1/\abs{\mathcal{T}}-$secure MAC.
\end{theorem}
\end{frame}
\begin{frame}\frametitle{Construction of An SUF}
\begin{theorem}
For any prime $P$, the function $h$ is an SUF:
\[ h_{a,b}(m) \overset{\mathsf{def}}{=} [ a \cdot m + b \mod p]
\]
\end{theorem}
\begin{proof}
$h_{a,b}(m) = t$ and $h_{a,b}(m') = t'$, only if 
$a \cdot m + b  = t \mod p$  and  $a \cdot m' + b = t' \mod p$. We have $a = [(t-t') \cdot (m - m')^{-1} \mod p]$ and $b = [t - a \cdot m \mod p]$, which means there is a unique key $(a, b)$. Since there are $\abs{\mathcal{T}}^2$ keys, 
\[ \Pr [h_k(m) = t  \land h_k(m') = t'] = \frac{1}{\abs{\mathcal{T}^2}}.
\]
\end{proof}
\end{frame}
\begin{frame}\frametitle{Limitations on Information-Theoretic MACs}
Any $\ell$-time $2^{-n}$-secure MAC requires keys of length at least $(\ell +1) \cdot n$. 
\begin{theorem}
Let $\Pi$ be a 1-time $2^{-n}$-secure MAC where all keys are the same length. Then the keys must have length at least $2n$.
\end{theorem}
\begin{proof}
Let $\mathcal{K}(t) \overset{\mathsf{def}}{=} \{ k | \mathsf{Vrfy}_k(m, t) = 1\}$. For any $t$, $\abs{\mathcal{K}(t)} \leq 2^{-n} \cdot \abs{\mathcal{K}}$. Otherwise, $(m, t)$ would be a valid forgery with probability at least $\abs{\mathcal{K}(t)}/\abs{\mathcal{K}}> 2^{-n}$. The probablity that $\mathcal{A}$ outputs a valid forgery is at least
\[ \sum_{t} \Pr [\mathsf{Mac}_k(m) = t] \cdot \frac{1}{\abs{\mathcal{K}(m, t)}} \geq \sum_{t} \Pr [\mathsf{Mac}_k(m) = t] \cdot \frac{2^n}{\abs{\mathcal{K}}} = \frac{2^n}{\abs{\mathcal{K}}} 
\]
As the probability is at most $2^{-n}$, $\abs{\mathcal{K}} \geq 2^{2n}$. Since all keys have the same length, each key must have length at least $2n$.
\end{proof} 
\end{frame}
\begin{frame}\frametitle{Summary}
\begin{itemize}
\item adaptive CMA, replay attack, birthday attack 
\item existential unforgeability, collision resistance
\item CBC-MAC, CRHF, Merkle-Damg\r{a}rd transform, NMAC, HMAC
\item information-theoretic MAC
\end{itemize}
\end{frame}
\end{document}