From 95eecef11c5ce23dfc61da18966b175056edd75c Mon Sep 17 00:00:00 2001 From: Michael Nightingale <9887246+mnightingale@users.noreply.github.com> Date: Thu, 26 Oct 2023 23:54:25 +0100 Subject: [PATCH] Static builds of rapidyenc --- .github/workflows/build.yml | 167 ++ .gitignore | 1 + .gitmodules | 3 - Dockerfile | 5 + decoder.go | 4 + encoder.go | 4 + lib/.gitignore | 0 librapidyenc_darwin.a | Bin 0 -> 253488 bytes librapidyenc_linux_amd64.a | Bin 0 -> 273098 bytes librapidyenc_linux_arm64.a | Bin 0 -> 96526 bytes librapidyenc_windows_amd64.a | Bin 0 -> 295504 bytes rapidyenc | 1 - rapidyenc/CMakeLists.txt | 212 ++ rapidyenc/README.md | 65 + rapidyenc/crcutil-1.0/LICENSE | 202 ++ rapidyenc/crcutil-1.0/code/base_types.h | 73 + rapidyenc/crcutil-1.0/code/crc32c_sse4.cc | 369 +++ rapidyenc/crcutil-1.0/code/crc32c_sse4.h | 252 ++ .../crcutil-1.0/code/crc32c_sse4_intrin.h | 102 + rapidyenc/crcutil-1.0/code/crc_casts.h | 68 + rapidyenc/crcutil-1.0/code/generic_crc.h | 687 ++++++ rapidyenc/crcutil-1.0/code/gf_util.h | 304 +++ .../code/multiword_128_64_gcc_amd64_sse2.cc | 291 +++ .../code/multiword_64_64_cl_i386_mmx.cc | 304 +++ .../code/multiword_64_64_gcc_amd64_asm.cc | 298 +++ .../code/multiword_64_64_gcc_i386_mmx.cc | 284 +++ .../multiword_64_64_intrinsic_i386_mmx.cc | 243 ++ rapidyenc/crcutil-1.0/code/platform.h | 245 ++ rapidyenc/crcutil-1.0/code/protected_crc.h | 61 + rapidyenc/crcutil-1.0/code/rolling_crc.h | 106 + rapidyenc/crcutil-1.0/code/std_headers.h | 51 + rapidyenc/crcutil-1.0/code/uint128_sse2.h | 312 +++ rapidyenc/crcutil-1.0/examples/interface.cc | 307 +++ rapidyenc/crcutil-1.0/examples/interface.h | 204 ++ rapidyenc/crcutil-1.0/tests/aligned_alloc.h | 66 + rapidyenc/rapidyenc.c | 104 + rapidyenc/rapidyenc.h | 188 ++ rapidyenc/src/common.h | 333 +++ rapidyenc/src/crc.cc | 175 ++ rapidyenc/src/crc.h | 27 + rapidyenc/src/crc_arm.cc | 209 ++ rapidyenc/src/crc_common.h | 4 + rapidyenc/src/crc_folding.cc | 375 +++ rapidyenc/src/crc_folding_256.cc | 231 ++ rapidyenc/src/decoder.cc | 68 + rapidyenc/src/decoder.h | 57 + rapidyenc/src/decoder_avx.cc | 19 + rapidyenc/src/decoder_avx2.cc | 19 + rapidyenc/src/decoder_avx2_base.h | 632 +++++ rapidyenc/src/decoder_common.h | 512 +++++ rapidyenc/src/decoder_neon.cc | 475 ++++ rapidyenc/src/decoder_neon64.cc | 455 ++++ rapidyenc/src/decoder_sse2.cc | 17 + rapidyenc/src/decoder_sse_base.h | 725 ++++++ rapidyenc/src/decoder_ssse3.cc | 19 + rapidyenc/src/decoder_vbmi2.cc | 32 + rapidyenc/src/encoder.cc | 181 ++ rapidyenc/src/encoder.h | 25 + rapidyenc/src/encoder_avx.cc | 17 + rapidyenc/src/encoder_avx2.cc | 17 + rapidyenc/src/encoder_avx_base.h | 564 +++++ rapidyenc/src/encoder_common.h | 109 + rapidyenc/src/encoder_neon.cc | 547 +++++ rapidyenc/src/encoder_rvv.cc | 220 ++ rapidyenc/src/encoder_sse2.cc | 14 + rapidyenc/src/encoder_sse_base.h | 723 ++++++ rapidyenc/src/encoder_ssse3.cc | 19 + rapidyenc/src/encoder_vbmi2.cc | 25 + rapidyenc/src/hedley.h | 2042 +++++++++++++++++ rapidyenc/src/platform.cc | 197 ++ rapidyenc/tool/bench.cc | 73 + rapidyenc/tool/cli.c | 131 ++ rapidyenc_darwin_arm64.go | 6 - windows.bat | 7 +- 74 files changed, 14870 insertions(+), 14 deletions(-) create mode 100644 .github/workflows/build.yml create mode 100644 .gitignore delete mode 100644 .gitmodules create mode 100644 Dockerfile delete mode 100644 lib/.gitignore create mode 100644 librapidyenc_darwin.a create mode 100644 librapidyenc_linux_amd64.a create mode 100644 librapidyenc_linux_arm64.a create mode 100644 librapidyenc_windows_amd64.a delete mode 160000 rapidyenc create mode 100644 rapidyenc/CMakeLists.txt create mode 100644 rapidyenc/README.md create mode 100644 rapidyenc/crcutil-1.0/LICENSE create mode 100644 rapidyenc/crcutil-1.0/code/base_types.h create mode 100644 rapidyenc/crcutil-1.0/code/crc32c_sse4.cc create mode 100644 rapidyenc/crcutil-1.0/code/crc32c_sse4.h create mode 100644 rapidyenc/crcutil-1.0/code/crc32c_sse4_intrin.h create mode 100644 rapidyenc/crcutil-1.0/code/crc_casts.h create mode 100644 rapidyenc/crcutil-1.0/code/generic_crc.h create mode 100644 rapidyenc/crcutil-1.0/code/gf_util.h create mode 100644 rapidyenc/crcutil-1.0/code/multiword_128_64_gcc_amd64_sse2.cc create mode 100644 rapidyenc/crcutil-1.0/code/multiword_64_64_cl_i386_mmx.cc create mode 100644 rapidyenc/crcutil-1.0/code/multiword_64_64_gcc_amd64_asm.cc create mode 100644 rapidyenc/crcutil-1.0/code/multiword_64_64_gcc_i386_mmx.cc create mode 100644 rapidyenc/crcutil-1.0/code/multiword_64_64_intrinsic_i386_mmx.cc create mode 100644 rapidyenc/crcutil-1.0/code/platform.h create mode 100644 rapidyenc/crcutil-1.0/code/protected_crc.h create mode 100644 rapidyenc/crcutil-1.0/code/rolling_crc.h create mode 100644 rapidyenc/crcutil-1.0/code/std_headers.h create mode 100644 rapidyenc/crcutil-1.0/code/uint128_sse2.h create mode 100644 rapidyenc/crcutil-1.0/examples/interface.cc create mode 100644 rapidyenc/crcutil-1.0/examples/interface.h create mode 100644 rapidyenc/crcutil-1.0/tests/aligned_alloc.h create mode 100644 rapidyenc/rapidyenc.c create mode 100644 rapidyenc/rapidyenc.h create mode 100644 rapidyenc/src/common.h create mode 100644 rapidyenc/src/crc.cc create mode 100644 rapidyenc/src/crc.h create mode 100644 rapidyenc/src/crc_arm.cc create mode 100644 rapidyenc/src/crc_common.h create mode 100644 rapidyenc/src/crc_folding.cc create mode 100644 rapidyenc/src/crc_folding_256.cc create mode 100644 rapidyenc/src/decoder.cc create mode 100644 rapidyenc/src/decoder.h create mode 100644 rapidyenc/src/decoder_avx.cc create mode 100644 rapidyenc/src/decoder_avx2.cc create mode 100644 rapidyenc/src/decoder_avx2_base.h create mode 100644 rapidyenc/src/decoder_common.h create mode 100644 rapidyenc/src/decoder_neon.cc create mode 100644 rapidyenc/src/decoder_neon64.cc create mode 100644 rapidyenc/src/decoder_sse2.cc create mode 100644 rapidyenc/src/decoder_sse_base.h create mode 100644 rapidyenc/src/decoder_ssse3.cc create mode 100644 rapidyenc/src/decoder_vbmi2.cc create mode 100644 rapidyenc/src/encoder.cc create mode 100644 rapidyenc/src/encoder.h create mode 100644 rapidyenc/src/encoder_avx.cc create mode 100644 rapidyenc/src/encoder_avx2.cc create mode 100644 rapidyenc/src/encoder_avx_base.h create mode 100644 rapidyenc/src/encoder_common.h create mode 100644 rapidyenc/src/encoder_neon.cc create mode 100644 rapidyenc/src/encoder_rvv.cc create mode 100644 rapidyenc/src/encoder_sse2.cc create mode 100644 rapidyenc/src/encoder_sse_base.h create mode 100644 rapidyenc/src/encoder_ssse3.cc create mode 100644 rapidyenc/src/encoder_vbmi2.cc create mode 100644 rapidyenc/src/hedley.h create mode 100644 rapidyenc/src/platform.cc create mode 100644 rapidyenc/tool/bench.cc create mode 100644 rapidyenc/tool/cli.c delete mode 100644 rapidyenc_darwin_arm64.go diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..ce85763 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,167 @@ +name: Build + +on: + push: + paths: + - 'rapidyenc/**' + - '.github/workflows/build.yml' + +jobs: + build_macos: + name: Build macOS + runs-on: macos-13 + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Build + run: | + export ZERO_AR_DATE=1 + cmake -S rapidyenc -B rapidyenc/build \ + -DCMAKE_OSX_ARCHITECTURES="x86_64;arm64" \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=12.0 \ + -DCMAKE_C_COMPILER=/Applications/Xcode_15.0.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/cc \ + -DCMAKE_CXX_COMPILER=/Applications/Xcode_15.0.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/c++ \ + -DCMAKE_AR=/Applications/Xcode_15.0.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/ar + cmake --build rapidyenc/build --target rapidyenc_static -j$(sysctl -n hw.ncpu) + cp rapidyenc/build/rapidyenc_static/librapidyenc.a librapidyenc_darwin.a + + - name: Upload static library + uses: actions/upload-artifact@v3 + with: + name: static + path: librapidyenc_darwin.a + + build_windows: + name: Build Windows x64 + runs-on: windows-latest + + defaults: + run: + shell: msys2 {0} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - uses: msys2/setup-msys2@v2 + with: + msystem: MINGW64 + update: true + pacboy: >- + gcc:p + cmake:p + ninja:p + + - name: Build + run: | + cmake -S rapidyenc -B rapidyenc/build -G Ninja + cmake --build rapidyenc/build --config Release --target rapidyenc_static + cp rapidyenc/build/rapidyenc_static/librapidyenc.a librapidyenc_windows_amd64.a + + - name: Upload shared library + uses: actions/upload-artifact@v3 + with: + name: static + path: librapidyenc_windows_amd64.a + + build_linux: + name: Build ${{ matrix.platform }} + runs-on: ubuntu-latest + + strategy: + matrix: + include: + - platform: linux/amd64 + library: librapidyenc_linux_amd64.a + - platform: linux/arm64 + library: librapidyenc_linux_arm64.a + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up QEMU + if: matrix.platform == 'linux/arm64' + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build cross-compilation environment + uses: docker/build-push-action@v4 + with: + context: . + tags: builder/${{ matrix.platform }}:latest + platforms: ${{ matrix.platform }} + load: true + cache-from: type=gha + cache-to: type=gha,mode=max + push: false + + - name: Build with Docker + uses: addnab/docker-run-action@v3 + with: + image: builder/${{ matrix.platform }}:latest + options: | + --platform ${{ matrix.platform }} + --volume ${{ github.workspace }}:/workspace + --workdir /workspace + run: | + cmake -S rapidyenc -B rapidyenc/build \ + -D CMAKE_BUILD_TYPE=Release + cmake --build rapidyenc/build --target rapidyenc_static -j$(sysctl -n hw.ncpu) + cp rapidyenc/build/rapidyenc_static/librapidyenc.a ${{ matrix.library }} + + - name: Upload static library + uses: actions/upload-artifact@v3 + with: + name: static + path: ${{ matrix.library }} + + test: + name: Test ${{ matrix.os }} go/${{ matrix.go }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + go: [1.21] + runs-on: ${{ matrix.os }} + needs: [build_linux, build_macos, build_windows] + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Install Go ${{ matrix.go }} + uses: actions/setup-go@v4 + with: + go-version: ${{ matrix.go }} + - uses: actions/download-artifact@v3 + with: + name: static + path: lib + - name: Run tests + run: go test -v + + publish: + name: Publish rapidyenc build + runs-on: ubuntu-latest + + needs: [test] + + permissions: + # Give the default GITHUB_TOKEN write permission to commit and push the + # added or changed files to the repository. + contents: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + - uses: actions/download-artifact@v3 + with: + name: static + - uses: stefanzweifel/git-auto-commit-action@v5 + with: + file_pattern: '*.a' + commit_message: Built rapidyenc \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a9b0835 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +rapidyenc/build \ No newline at end of file diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 40a6e3b..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "rapidyenc"] - path = rapidyenc - url = https://github.com/animetosho/rapidyenc.git diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..08f7a7a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,5 @@ +FROM gcc:13 AS cross + +ARG TARGETPLATFORM + +RUN apt-get update && apt-get install -y cmake diff --git a/decoder.go b/decoder.go index 87de410..c49b3ec 100644 --- a/decoder.go +++ b/decoder.go @@ -2,6 +2,10 @@ package rapidyenc /* #cgo CFLAGS: -I${SRCDIR}/rapidyenc +#cgo darwin LDFLAGS: ${SRCDIR}/librapidyenc_darwin.a -lstdc++ +#cgo windows,amd64 LDFLAGS: ${SRCDIR}/librapidyenc_windows_amd64.a -lstdc++ +#cgo linux,amd64 LDFLAGS: ${SRCDIR}/librapidyenc_linux_amd64.a -lstdc++ +#cgo linux,arm64 LDFLAGS: ${SRCDIR}/librapidyenc_linux_arm64.a -lstdc++ #include "rapidyenc.h" */ import "C" diff --git a/encoder.go b/encoder.go index 94a91a9..fa5f55f 100644 --- a/encoder.go +++ b/encoder.go @@ -2,6 +2,10 @@ package rapidyenc /* #cgo CFLAGS: -I${SRCDIR}/rapidyenc +#cgo darwin LDFLAGS: ${SRCDIR}/librapidyenc_darwin.a -lstdc++ +#cgo windows,amd64 LDFLAGS: ${SRCDIR}/librapidyenc_windows_amd64.a -lstdc++ +#cgo linux,amd64 LDFLAGS: ${SRCDIR}/librapidyenc_linux_amd64.a -lstdc++ +#cgo linux,arm64 LDFLAGS: ${SRCDIR}/librapidyenc_linux_arm64.a -lstdc++ #include "rapidyenc.h" */ import "C" diff --git a/lib/.gitignore b/lib/.gitignore deleted file mode 100644 index e69de29..0000000 diff --git a/librapidyenc_darwin.a b/librapidyenc_darwin.a new file mode 100644 index 0000000000000000000000000000000000000000..aaa07fc8a46af8074cc172220912519168e58b96 GIT binary patch literal 253488 zcmeEv3t&{$o%fw2gd`AdBA`Ku3^pi1f|(E?u|(&>49wJtM!^@-1`LT%LxN<6heb)8 zq~>lM%WkRbZr!b0?XKIkyS8f)@s$aA0YzR)fT#ozWf&e2lmrZ!@Ap6F+<7GsP*A_` z<0Nzc=lMU+d!GOEyBoXLy(>vlgiezBgGS<)E=5KpkREmeL7|HXb(=|)bTySQNf zZE-&`+>(*552#@MgmeQOxU(TWePU*&UT-i=oHUVnpA(m#KW@f@xhC^8Fx1rZQIb>$ zLt7fRA$lhgK-3^;lAyBiccY*>VShxJ_Xv8cpm~D21^uI-&j`9y(7E9AA_6TF^mWiA zqa^w)yc3i?W3xiKl(}1q2-cv8_JI8|G=JTy|CXb>}LzJ6b*M|z7aK@_nsuZ0yELv z7*2-?bD}U$68;{AdCqZ3dI~fKt>U>jP9GQa7C~Q(<@Sk!Zil@wLnj?VE7~<(Ck+(z zFzm_PFP_t14B+%%gu4R!L#TuYP|;hT)k&)bodtU`uNcH>ys-BQ`@g_^=q;VJ5j5*v zowN^h87iwAw7E_vC81TfqcSIeZr!DmMkJzsQ7JoNCi=Lb|14;(pq+x=EByZgw0uT{ z^tzx^uR{GTh>*U5nW+9p;QxyV=>P|4V{hX2 zj|lq25N>V~=A(iZjOO+Yg2oDe|1g-_Unl7AkY0-C??t$S!k=X*xBo9eTLm3(HMhT6 z&^rWuRL~khHw$`3(0i`o;eI3NMnR7Vx+ICacMJOTFm6t~mebz~dc$?x{J(-u9nQ@k z3VL5MH}4Si!RxvCBSC*Qf}0Nt`jDQRw+Lz)$<6BoO}~MgUlQ~X$}@lQ+(irKEiWu7 z$S+u25N6CTC|&sQf|9~;>jfnX?BRBg7A`I+EDpDQyl`=8S+M22!h+IyAq=@qe&Mp7 z-trfe6f7=WSXg488^T!x2~UQw?wJzgE-X@R6Benk3}-8>&&4)>;oN2U#f2r0*yji5 zU`gTPvIV6jQhxp{!^DC`j{GvmqD7^P?Pd83%H}R1>x}deYZSlV$uMbNX}*Z}k;0O~ z#S01+aw|#&@V>0Q3X-V@F|L=tYB{O+{ISg!|7)FnGKl_ zBB+Vg$6|BIygXtL{&|LpK@KzQbM2_EbFeaeBUTydfm9Wi%**$u%y7}E%E$W*M5tSPiC^rY|QJnMeek={KV`lExU(Tqy@flQk+@~-Gw_Wybu^MJIgXp*? z{|n}~5pN6%d=jtO#rb7rh2V~Wtl1bc{fW&lTd;7Rzea5f#%Gvq2sl2-Ls?S9_Z<{<8LL=dIpe z+uLPK&^K#umm)!zw!K}VVU#zkK;IZx8k7{4mIRHX{^51;k|hCabfrNvM;0WAS2it6 z3W^sxirK_6;F6Kycfmp-%+WB39Oo`x7-oMb**okDiVf*gX;oA(V}?1?Fwt5@G(%Z3 zcR}&ohl>l%Y}VSV=S(YBI_8J5l$sTApL9=Yaq)tZM*=*#w|nlmb4Gq}ROW^E+_5ku z?8NCs_aZ&kg=Q_t!iAjs9lfP>B9FqHcgK>2oRQyKfKx_#uA|t#U{Ue%drL}67cZP! zyx{S|d1get5X&VNeJ`OVOVqrJ3PaP+~V^WU)1dd4?hXJc)s8Xl_H2 za~^IPnZ2cO23ERA<@_0$`S;wR@n%hpniN{Jy>M~S+=4V!ktE$1aOC&{BfS z@lLP^&(3&}9W!%G540;QA~l=M6HSH1g?6ohez$R((#3o69Y<^U*3oJ$wwF>J&q{Ub z;zDdBu{Y%58Clp(KwD%j=bLA>&p0npP4*SfC8)JNqc~3ujh}kY)N^*Qy_7EbM<>Us zWBKxh57X~_wL$);W}Lf)ko)b6Fack!f7V!NgHF#v2n6eqc`Z;c zV_q*U;xhK82E)|Sk^;Q>CG_?$U%otVIo^IgWPIl-SzK^e(JUND6qcF4$8?OJI=7^> z1oMUa<`z2&&EH>vWKx^Q{{CY7_nwj&g~dfv=NA?{`hBNl0+q@An5LC3owD4HhQbG_ zJHF$#I&td!xh0Pj22~DTo$vAG$>5#oTsx-Vd8Nh6OG+0mm|Oh)wcQNE414L~!l|YH zS>WR3m`|GXmQXQ$AKeRknam3pes8Zc=BT7tYHKCcMxaK z!TURiw3mVUyNI>tp!^-gdg%w?KHJf`2iwb^-*XSF-$C4$br9{V*nu+AF|bA0M1AfYF2p7L_i&(;T*8 zt1Yp6S(gXx^CGT1{CbaTp&27*&=}@l`};R{wMZ74&zYNx+;N_H-<{v>TEerZ7-3!VI2FV4e7p%FnkKF0*qzH_tRbFf(A*bhipV z#E-(JgAv4B$xu#2Q~CK1mz8M-(ENOpGE)gyXz75cvtJ3;%p9Jje3B7eG`GN>?442gr6Z?;ZP&E{>A$uuCR2gKq7r_$Yr985SBEIy4n6v~DV~K}^<|5Qu zih6y~IC(@R3ss)ugOcpJ^^N^Vr!&-g$U$?rxCY){(OnK3N20}jyr0GSdAG$Cm2Gj> zPFZg$=(ecMme@{<`;gv#4fKZXmW~dK+GNrB{JzMx<5Z|!k;XsIs&2KYUs%*LO{Nso zg(O*OSUp%=_LQjiC}QPJs$^fngX`rHQl8&i)hUY$K`gSVT?8R7LR*78$|F$oedTY4 z#d$<;QBQ&`q~Z~~x*{dXqS{ju{OKSIBqqV)vZW04!QKRY>Mo13UjNW~5q?K*RV^iF zpj9^426)+t*UXfJDxV|P#`ERvFEkdHpRu~pqPmrE)-3%&`Q4!9xA|M-SIbq6;zUYgG$Q0(A*)(uuXQ^;-4nzMYb1YB(LKWF33pu6=+*r- z;jSllsV9Gk*fXVj{2ocSEJ2qruz$=dgnmQGA?@|S}0u!wZ9awd;q{wO{h{FR?4y~RQ9y`}dVxGxgn!_!Or zUk!3+FCV2tD02TB+~)yIF>=y`ISLecNb5q1pC)km>jr>4eBiz%4`=4M(?s{SV5hx2U{G%+G zS$m(UT*zH}f1eWW?VzN(@gsLi2dRF7pVr@yHM!T4Bh;h#vG|xtk`4-Un=n5NzvP}S z%-e;#FiJlIf8wdByTi{#|I;w@=V9UN!rbo) zGY4Pw;4-aYmr1zD4YLf~F6eQaz<B8Q&$GW<5dmv6$8i0P~U?nQ75;pqsrK_oQSWfzmQn@K^+9BB{*gLe~eW@J$nm zW`xHQfpyz7rf!9qqG$)r0u5d-z{nK2i)09!3ny!-JS|V3lz)$hf?2BCtf(6m_u*?4 z=YAczmTRu+7UQWV{kxPqMK+x|B$ z10S?NphD4G?V@VVMp0u5@f= zmgU18_Zv3Ix~+;@OLnJfqaD-MQhPFNusyv={yiQ@amUvG91T{EOy}o%`z`NAq8{M2 z&G8a4hasX?sW?r<@oVQkqbtf4iH5x%v7?j_d&T<|T(hba)6OxM`A2bRze^RDmhNxL zR|`++A$>@D>)2+f?Qlm#pnj%xxKZ-M9k;V8QQaNen9o!0XvfWl4Xj$Uc4$1+sFznG zAicQqYIlrVUbA5>y#T4r>shGvYz_wt)XX{72{B?``3UMZSke4!GJX#rt)z;A9ig8O z|1r8KHaDR8hoCfeIqfZ2Y0)@?i;;AY1Fgm zSih_7ZE%~==U960pqfR#Mhmmn?`rF;3+;EQf7JMqwjB$LShDER?-rDnlm#}T!uMq~ zK3Zp+g#-!K#I1+IKsnbwH((vU54DIg1SPU3Wg%VE(`BK<4XS45haPMnY9a313Kh;z zE2rQzX-$KIg!AlcUu&62pHakT6zMY}eS&W=`#$%z*c%02iv+)N!A}@zoSCobiT1Ux zm4^Vd#!H|FG6$o7UyBk(KPn4qFIt;{_JY;D%KgfH%Du|W88zdoj+#_kN|$|_*>K9L zZplq;Tvn{8M{H`Rb45y*WKWjYN=gBvGOz5!&pShzb}Wx z=Bi&#lh@cYPMhU5W3M+?9+fK_8J}f3Q}t+ay5UT~>2Gi{SKngwH8Gd311>v4UAh7; zdqQ2#23+=qx^xFz4u!h-0xrjcT{3S7IGpl3csikF`&!fGH!>eX0jAqr%Ob+^Qk990 zm3uI&ldR4r?jew+s-ZB%ih5M(_)yWE&Bf+ojN&{SE4!k*eLkzRJ|ZI=C@OCW z8@Caxslq+C>Zq(9!j6|Y`=ck)rps$G$5?$03N<%lYC6XqWJ5K#OA8U zP|*Q3WHY7E{-UA0cUk(6+}k+RzTH9Qo;gTFlTs2xi=k?3nyL_ffGJ#%R8<~Pv9 z%x|FXNi%TIV{RUsHtv>mL&o^b2@@w}W#9Va$iNf3@oG|KZoGfAicwKL1bu{M#4)%Tq{?|2ctbKFcTW`Pf?t8WFy7l!9jZMuPHg5Xc=J&U3-L`$l z2OsWi*|mF5>qj4dvbSyD{sW&L{Ot2Xp2OZFM~@wE|Kh~SFTXm~(RupJ*Ij43eUdIB zGOAy6|Cm^5z(8q`oRE0c;2}e=z9wndwbu<#zJ7#$vgQR!lWV`R1$zF(gS z|1{Sg-HC1#it#5N*r%Q{C~3+d{ggqYrwmG024y+>KNIch_iWU!B7Ygd!pwv>>J3O4 zDGVvG&t8o5U_O~v1d3iK?LkmXoziTenA@e@4vM)+S_UZkhO|HM(8>P_itZ$Nt}qip zW(Vm(VGpmIQny0;u#0E|18O&Z%nW{{eurQ1x}cWK=xVh2B@N>v;8&X;{=M+8mE+?u zQ@5rm(g&LjVdjU!%(M;(_HPX{V`2!D7O_&u54{!dzgAiX1dw1{20t=L*!5uG-@BuCJ(B%%`_R&m*LLtzE$t!ma(!BVt}(O<|x82{RG;iDriUT-dq%pIA$U zECBdXlU#(CfU)eczB>PU=rzHI+7qcPyi~LvdIkK5hO55TLt4JXYr=wyEHiz05%skm zY8L66BNCD)(pMWuUoiVV*F&R4xz`GQuL*v_c){zTR0N=Xyj@5K`Wv?Hp)zcP85?BM zR&+i|NLldu2-Zv?O12gjSrfJwMl3ba7FxILgLhkvY^mB{&v0hz9BG)QCo1K2&)Hl? zEGrV4m;{Nkj>_Ar+h$GOZgrmNmY;RP?7hlt*lty81NKjr=Bhi0S2Pc2b$;!WtDb>h zu(e_;dpc+7QtLV?1vY5%x$;XJ%N`;B#o(lrpUzR_-_%Wa4ZKS6xlL-U_g??}OetT# z(_Eco#mvz)OGjzQRrfrx8negYiHh^CW2jwUw2GFCGc4*3tBNI}&r)-{rDh_V5?Xc9R^13g z?L)NqBO!uHmi8Gm0Y~}mM%~zIuE|B3Z79-?J(QaQQ_V1BJz9R+4bNEb9fq$v3x@yJ zs%uqz2T&GQd1X9@prp?%mn-Q|(d?S_9M7PIl6@v#uA-P&B-seAluu5KbJ*c1SH1%W z9^82*$9}EiJTuNQi@8jMPrr);E7&^6KQ`&8fp}7hiStanBb{sqH0h|T@C^2|9)@ac zqH30`KDSMkZE7^zBztJR#d+rJlMbu8J)~yQ3aV(JGMyPap^bt=o7{Z48fjwiAFj*=^Nm=HBpRgw5d|rZ%Cy>?RR%D|W z2CLWGQulah0g6x&o@8iPNmH*FC~2fadT2eBrO)KL2Q7h$0xtzi!|FO~#~!qKpW_kc z1I3fGbZX!|vdS-QFB?y_GaVIdkB75kQj?B&dM7mLP{oor4F#+)u6`Cv-f{gzyb|6) zP1vM_H}T@~`hRG>)fJV9)tW`^kLJMNX|`k9!v$d_(Ogy)Vy{$>-@W*4#cw5kuiNab~Zp!-urkX^H+K%lQ`=e+K5%vr`b&g_r&Fv=FUC&uu z)6k&};?hIb)NRIcA6jTK3W=9cZqelkx$Ju04 zGvYu;c2<5i5n;X87`9WOaWX0i{UdYeL}Az<#DheMLPL#pJEo%K^XOHv-%c&|I(iuw zlBaZ~e3>uKk;>z=q~aJtUbD>Sy8k)031!&M(z?2ix)*8%nZyXq39Z0rc~9$2aoEvP zmQMVJP%Fp?Y6ZlSerN-HTT2VSq!vw}I)HdCi_ zJ^q}rIt!^W_|Wi;@^6}~y3^j@QsX*@N}xs#brXJZ1EZ*^hbW_PzmD<~)pNa3POTow z$eumjhcJ80ka&408I0IP*Uf`Bnw4HVJoU243vej~_Hf4x&12*^gDfk0v1 zXviS(Y@x)WkePbL>7JM^yw8b)Y=wW#*qa>p18Dz}o?_v05a(MjlFS>aWrDH=2F(4% z?_$@BpAa)B;zzISGCZh{fT5g-rutgnY3V@UJ;MPB<_FS5Bm{FReus5ydPE%@P|lvbk5(3e~|&6G(!&xzC~_zB|$uNNs&wmw86 zXid0^DiFMb*M}4p{isW%dKT%0?!r|bW;qtMTis?+2c}qRawsIN6V-hdwGQhk#dmjB zO;ifWV3(5{{BKh`EoxMXdP;b=)bzKiZHDb;!v?8{Wr)eCLKv}(7nxaz3{GQ&q_UJl2bcRz*7#s;gEUeBfDr?eQ@2pR9cIxbL&dvx& zoQHz4$9mF*s#Ip|IsT^$Z#SO9eU5Oy{v7V*!u@BA_MD&eHNRMfH3li3J}?pcmNf29 z#;;nq^Yn$f&xY}N;m*?+>P};s=FZax3lrPIWc!q8J82)lDi+iYiaJPp8x&19`LCcf zkNCMT-!3TC7rBFw8Z-wFw8dznpn5?;nEmUZ!E>jRFw^)RBi^omAu|}Zfs#6gAGvF9 zpOegppd6IcNBEIht2dW0&jBR`C-!3oJhH;f*M*s9gqhKC%rbm4NV4*LiS7nX z>=g%x1OWN&Cx##{h^L`4`z=Bw6|uLOk=2`1zSuCOYwakCBSA!TGwqz^*% z#BUh{CPdYQ)M8)bre4(JGQsbV;3te1J#JPb9q7-bBnYIpg9eYAR4??SJkuZ&Tu(4w zkh+()5nXHuvedA;vZy+Xi;bV}QrWC4&lZxaRkij3mZpBRup`!n+9J8M-#~-`jUU44 z(BYgWr+`}4>Cnwn>I|p63-~%*UiX>K*&-?G7Z9MtPS`#jY$BD!(`twNc$D*VGWIhi zZoyH1PiE>Y8GE3PU`iRif)QWuFOJZ*=m@u zixt}*?zVn+xduk49dA;K!QCFKH0s*f82lC`Y{vmb-88i^YM2D6y9oQhsg0&#x-YE0 z^(J+<@)p&+LSEiiVq4*4(COeU${uHR-Km3lP{fIMsL*#DR5vNH-YJcy0TB`K5o3=b zedI}>`yFbiLGF=}a8KawQD4HH$~ee9Dhlp;?%wY!xa&jR`}Kpnk-JBqg1a%)Jvtih z<&LCx%9%YL;{$-5j4L>H_l~*Mml- zdU%}0=e9UcCpd6b4He5ohhBJ$HZh76xpX8Nz>y;PlS4(a9klddkzCfLE0SCH;di)5 z_QJBuTQo&pS*%MixqeBql8Uoazw}|7{8C-nY*xi>F_4~+Fgz<2%{fwhoKn%2F5BZ=_sFlL?b+y3bPOGsz}* zZb5S{wv({+>a}b8godNJ*_({=?;1>MYpORldzWnb#8xu_0dXABIJNqP@kRrLIz3DN73bmPofM}W6sMpJt^H7F}Nq%Adsn$?$5(2_cg8bAX z;pboB$Fmd$aT%_Oy_7g0`04sp0l72W0K`U|Z@|i{1F~ZtN;k^%2@Fdlm$p2ni49DK zqmT`A`VhyPFk2z#EI;)>(2?BQr*!)eb8%bBR<;qD+G%+nk}~fZ!74YkiS82~vbh#x zkJgm3%c5(f8;i}d8jTl)t|o4+I_kIyyK4h)gD9AV%dJ6d?I%Szy{O88r*iigz5(cK zqz-5ktf&Lp!RgRePLmICI?4;`%|HYE62BuS<(299%}-#xhO<$Ri;J?lg!&G?F37F& z$?DhW4^K{&OEYhyXYp+b5ZTcql5RBc&LWds+K7kUjfb~MSf3&5_sMz_S#K#a%id-@ zTDHN%SuI=xdGLoLYZh*EFX1+jr1`#tDXt-jzKy2XZB^UkDmT(erxW8)m*N(tNq%Po zZQZ)}N7JkLc7hSrk9bQf6Y@xhRG}}G_r6bdTS@mJ>2||&+1EOHo?*}3GTd4l;O2GH zqYkfbH=Mm+5977~%drVqH1>r=CGuhm`xt6 z7|v2iYLczZsLj2d=%0O5>)zAk(BxPLi%()igi6I38Y9Z?V9|Mg4Mz0#(3|rn4OVZ_ zLs^H{3dw3VQwG;^KlFBuevdgbpKp>M4<*Is`ZZ<4tR7_P-RL|q!o5G@y>hsxwpiSs zN3rwT9s8+iM%}6ySQ~9d0$S;n+S`b6PLo$gC0G!Wr@PWZeIj}*6iDe;5dSN()ubUw zyv(h-&FXR19D>VRerJm+lK*Zwyr%B7xIgnJA8E5skPI!X=qUN<9`<&ibh}w`w^QXh z-iG<(7!eg+ub|S7rkK1lb6qYfD3jVG`lCMMn{43GHBod8hz`wH)GlS~xZ@p*>S3jW z_t+kfvT|#t74ZpZe$S*)5#i|ZnSP|P>NZeS<2O0Rrf#7o+iB5tD!Sb^-TTYqEH&t@ zJv-0%eCl3{Yxr%JjvY4LCb{}Wl!aNXr@J+(SL>V9eTw|XY75O1OzO5B`)N`ozwr#S zu*7b}8H~Eyy2FEG8=M90S^TKQhpXY>k5r-i+vq+VBd^&G!|DV|nG&%&hA28l%pED| zU%)DztSoL%lIAS0*=SAOLOnM6cF(JjoFw{dqPnkP-R}7*svU!i_t)s5Z0dSTM=R<+ z%e$Bc9C~M6V<-7GldfGIr>r$fG;VcTYA~Rg@c7hHLnD|7=a_7%YF$1_VY5jS&XTSb z;?h&(H-^tBs%n+1mg5#;fYtjF7nX(&J zY|lRX#9buI`Zx){j#A3IqU<9y;a7d5l%tf_|J+nP2j|#ZbYlGIwSfvmNrE9VImEFLw5_hP0$U3x&{3= zPLoNd7eUk75jj}qG=UPI7X^J*&_@I%dg(6VxK8QBb;n1%EC~71+Lg z5;rH)ej}2egE&bg>+yg;SaC`?CTu?*=_5t*sDq%__6hfp8EI-08<(3jGuy^sJlOs_ zteK)+QSkSxzl53J4Kv>uW@h{C=MWFJO)4%eebliCAMjjMTDD+W{=!1~(#0eEqn3S$ z1w-7IKBgoDhxvyJ#kUDHJNhX(nO@~~(6z8cx9Go@Nm0^|It!|Eq8F>UoNe%axHK?^BRN2#R&0i18)_cv z70a#oVn&Z|hdy}`w;Q#L)3kCyr}jVv#o{Sz4a9BDXb!C+sE5U58|k;JIK#-`=ApLld zVLgH*1%gC}2IhTy;oao&ror}p?a^m0X;(v3=Y+M<;91E3e?^p`PX&t(gg`m4wW?D+cVUzGHftx z58hv5SFv||X7JIW$jDj~E|8K~sVTNjciQK6o{m}ZarNp*Nf-3$Io`5VtfzMn*Tr%5 z)H^wtAHf#xwds88DDKoch|<4X4&5RAuy5a#p!gcC6+W7C$(27r)1a+90Dv^=WW3-^ z7a>>DqzQI7d}-mj7`gHQ*Coi6$GHxN5?^zjQLgNdj437vyISf^U(fxQzV1wKT?T=1rjQX*} z&LNPT2)Swkys2*VTDsi|GNfbKs!9#Djeujf&Y1DO)Z8U7l<*K8hw*9r@soEFn5Tg}U)rX-Dz=3I`S%X})A=Dv`9H&Q4i&{wJ(_wVq#s z^m3}qCSAeD%;6RsVzP-1+x!Ac;Cu?=Mhpg6@Aaw16@F~Bv$Y!^#rav%nM#zp2OE!dE&ZnV1h z>n*7pZ8d{(=|&ATe`gkAKL!nAPr@CZ=h(Sm^{^%;OTae=t-5Aw>L#4b;WpA_ihV2j z55JJKq_+!6*^~_JLX!Nn9>&0B3c8deI|srJS&%E^$!~9$lRPI8->Tm4CAqYlCw<;a z^87DPwWpEMRw@sF#-4lxdzw7Y7Iu^Bx8zA)W5i9W&K&var(m?c#bnJTcAIK_*LS z*v(GD9jf(og{ryt+f$yycxx1AEw1iFfM+hRMcp==I>-_UVL{mnI9MQS$WQHvr1o{S zsU`_Gadbf80g1+$ZOkURIj{!xxUWsF{1to&2l9q1|Lc|XR?PkS)ng&*piTXDZw#Ij zUWPeW*PN?vvUZS%`2U3k=+L(~O5S<2`U6poN zo$K{-*bTfvo%zSzuI&azAj)(UI zximT&Jy+8!!Ux`tZOX{aXqZQFR`lx4G@6>!VjNn#kH#qOqy3eNuaFYSH1a66wz<1m zJ)*cj4R$q;{K_BNqRvj~G^t132NO)O2a$8`vExJT>2(%`S^W|$l9bpZrpS3IkZL*B zf2wPEilTl@I2uULNZd-HzM05IIDiF0}b$ary0jsp+#KvFX+;_JP5p&QZaRTSWDARrH}~32%$2w z+J)$2P*$I!kC-=#Y9m+ofmuBR3^i=wqnvb7SuEPlS6&NnHLMj4RA(mjL(U)d#g0aH z9H81Vd`$7#Kb?*WLjeI+S%0p&ix~7>ecLeB29j1 z?5G)pe%p}>?qbAHaN#OZ^x0uINS}N{+#YQXx;?r8oi+E4o(UI6e+kCmCN7RX7TQOb z&F!sQj`7Y6nXmNTKmYTE_0K?*LvINo5nLHB_Q`5lC3`uE7{Y2OkFiowhbqvGxG^4z_(VHN@(cf?? zCvF_A6IVCtPFZUJ&ew@fw-yAQE)dWxuBqKP6to~txw46M;+@f`WOU1&)a}+5IlFbr zTjQ`Bs;nbUX4$l(2xmt(qJgA6ja^(culmE1Io%D5e*X>&yPi6R_!?-v*OITtpcC%D zgx1kU?#&h*L{?~B1TXPV!X9)exq~L(#_1^H2lE8soPp@=woOu@ku`fspo zLkEry`foUdLJ!_dJveNKOroc0QUbZLgYd)9b!~*LotEjPyGXa29?X?GQr4NvfcENxXSJs<<1mC3xP?%*Ok-3Wssr3U`-YV~5{W-)*knC+{b%#=MfOqJx zunrv?G<34gI`qx{@eT3-n^=F&yK}|hr3ldR5b@vv0Ww6khq&(Dh=-T*Qo0TO`F&uE zk}b7QiXc)v=_<1bP{^*BP8(h%W2RwTC^1z=FsF@!^9c|F&m1jJFbe?Y*C z_UdSDK!~xry`f#c7w#`4kvrru6!$^6dNEv_q~XFma+BZH2H~azlln0m5RRhoxW|qi z+|x6H#U{ml00V;86ge9ZN^VbongW^C25mt2FjuWZKa9C1*qt!T39Fm=fPhFb2{Fr8 za|>-ifR7iL51YCT0|MN+5hYQw4~6j9m(eJh<85Y@HLl=Vs+vV(!YV3UD(0^BtmOvA zgN@pFumf49@t_r%ot=#FV3Sq1!=!F!!@(Ig9K;cqX78PW@j(61FVC=B*wcC_*F zn`Dl(3a5tdAHY%JBthk+h~BFyz@^|>(EgnnKuanLv}AUj{|s=PNuhxy+*B=`@mNMoT11iHN@XGbKY7pWV=n!gD{7!y(j z5Gn9V2d3awza@3ORlNoh1NLkD{d7=|Yt>zk4M}Hx(mDJ4iX+dX+^uSh6*D%`phO zg~;SLEV_!*Im>A>_!i6+r|G<1u5uv&9m;zP;M?;@xPZ!U#Bjg%Uugae++T$E$C^8h z2#$H+8=>({h9AxUbnZXOJAkDk&T$X6HLbWmj;>kU68HG9fy`W&J2K(w*$1;fY6m3V zY9lRQ=lHXOyA z?tOfp0wdh}*s9jt$vcg3=F07kvD9YcErY<%EZ1+4OzDkiwf!D}g$Yy8Bn)@(kUSk` zFB@y($C4BGrD(n!H1jWo>}97_J&nugU=g1QN6?{@IZgf&8@(`(TEWd}!hM1bW^ZO- zJ`PS@hp4N#mL5O%4|45%ko&~ybYqZhjun?<)dP(B#nn#Gq3k0z zzEL@N_firLV0`;`@I5(2F2()xlPEP?42^H4BwJ=D`M6eEwP9feBMpg6Qm(Xv*s`)Z z!-@nZO!QHD@byL2DY=Sp?upxqm`KiaFgbP8gG5Bp;~~Ra{9F*YrGvt47SohXo(7B) zLd2d%O%m2>$M5wF8WmN{*|CVi(x&1Rjfo?_BBwFT=_?GEeh1UYPyB&w5DC_EA#l{) zAsS!cuVW|tpof2fF*d26xIarW#WqG}rqGycat)b?MMlSmBR6B2maBdUZkufo2-4Fq z`SL~@Cc%VWDU5>LjC%w!yfu@LO6J3+diXHOjVOSUlgRsHr`Ryr4v0foh17P{RWwWn z>KE>%RBc_f7#44WXbhAhwuUmP$FL$qQchtxX_|l`6N`wgCiO5IGS9Lhvp+HV*n5YC zubM7ZB#j zF{GtaqE;XIT#=FoQGYZr8cNR~ZLMgEuwBFhN0Of*DO2Qx6){nA>WUO2Z;peR%@DHD zQ}aV0-UqM-znwzaQ|5q_+Ea`mnBpQv$X1q80KRR3nBYDng`BCFT4%Blh>mPn(?+hK zr_6zr<}8G`1Zif1r&)HiQ01S9ftK|vNCf8x+wIJjS)h@@-Z3@We5wTD^DIhIYpj{!je-br?6ZsB9|@n=)l99nZkY;C?c@j1@usw5%Ue0GwHE?aAyh>+U{Ym=2T+GNFhBjA>904r^b_RIoE21f2EHV2?d0GqM)m>~hDX*)|S0*;IG zq#zWxi9oZ!aPPSaHkbpaKy2_H{2mu{7U&=GYsT*iF3kT&xG-o7S8%wl;BZ~R;R+yh z(da;OI{*_qFAf*^BZV_eNspfqzjFL2DCWs@8cGMBP_^tCtO>UZ@gWSF-E(2`*Wt#T zFclxB;=)vIm;wsHdDXX#e_WVK2~%OoLAKAEoVZJ!o}A^Of6K#ESaMT(rdMLB9Bb;{ z@UzICR4&n8ph%lP+kb+ zF!^HG9B+eeJ<7wg{F4KhXAZ_Io<3-+t_K6kTNi%mnA4C7$Mquj;q;=O12=3h4tnqo zr!~VP3VrBsLHAzA&5hS0EaZPD2|5hNq+}kG#A(7c++U2K^l>}l(}{cYMB4>DBv2pL~a)6VXvoyyFDDQPArJ zy;@NEx+3{I1^FDJUP1Q>x>L{wLDNAg{t70DCncZYfJDjLN$?Trd0f!PuwzF276>|5 z&<6#z2uj}pA%BbTzTiKn2oE&pF^f^JF5VSzhL_udFFjx(>32G8_vY_dLjuuo83YCmz z$?)y?SS2Nw!wivJ{n;pH#zUw_Kndq=4&o*CAK=fk&dt9KGgGqfTez`RN!vqkA--7WO zW}6W0*F2DDzvg}p5&yh9=HF9H68`h=v760?%m;BTz{FtQ7Mn}v<;@t6FQ-G`KhH2R z$Yq9ouDy_oj9o6!Vq36qo`?=WbLUMC+`p^jaLl`NCr7wnj^y}jidtzXa^A(>f0$tC z@jWZFV!+XdU4Zzu_(Ixyui*{_^L4XwTqb_|FUU=3uT zo4)Q-B!~V- z2dDG8&4y2zC}O?U`Q=DL>T#adDeI{J%pJBpH+Ew#u;AVMqDgf3^#Q946c@pF+$tZD zVV5PnJfNIe;#8IdBn*BGqvI7a0f9HloomIER#)T|xscpHbpR?joYM!ZK_&YHG)(FDivDM5= z>6FzIU`K~T^KNHnM8&t`Krm}K@Rg3>hzQchNO?EM(y@2pT!>_6vWAm*4a7UMBplh< zk(ZWZID6ffFwHekr}#FQV%;9PnP66Hapb#|5OGwmyvA$6Sug~JaKe{uVZpewIcra5r3|(h44|Vs@L-vws073 zK0{rK((wvUxt*%L_;!JZ#p+ztH!Afa>RM#X^DfSt0m722HsR}dcs=zMjh%sjv-Y)6J%oL%9WQgawk0c(jiP z;`@XVo)NGRa%39Vl*WM|r|4TN9Q}hec~27#Oc^>1I-6;(u{SBc&89GHm^Q+O`O=>g z$89LN<=On~9`ee=Zw*g0Q&j}86bG_%;BK^b5_84sZ* zfd&~Rx>gu$rm0h~`KB;4+q3k4pE2x%j_2L~?d$uC-xO$Nf+53fUUIpBRxWm*(U6gm z*%N1_?|nwv=b4X&DCzNoqGPsQ!4<)51CK>Fay{XqkeV;d+CC%kBXyzsjMX=A|HKc? zOKKl2Nps2SV)q#ZA9|@sC5__zxCw1l(+JwvzMz&byxxALbunZSUPOKE3$9AxY+mF< zNSiD3Km_)u{0z3_A4tgJ!D!Ejdo1Pyi~FBoU#SMZOUgk2yS z&Po^ItXOK~Q*OYl9LDUy3Ctk_cPG?*1SoMbR7$DzSi(lBhD^#-$I z)474JOh_T}vuWV#*#`d*%MbnT^U~g6R>r_Pvc<3O%TJT2Pv{kX*?E@dK(6fOTzh^4 z+Vd$~@O|c9zyJIUw&O1B`_G=2cm$rmGDge;Cfn-@y|l9Ozs0!D>Im3O=;E zsq7J4ZQ!4(_Cq3gXZMP$@+$W0h7SwFRe{~?VSVAMkl2^g4e)`uFjy5^B)-Ipf1r;M ztjcK2aq-=0Q%wSW>y}^OKNAhQ-pr6yQ2ebNT4gVqCj1cQitt*er0>DhuirMDTQg{t zLkwCa?awgNy$j3=8=(aq`aY-0J2@To38?p$U|cA|ml#ctMcB-))g#a$6ca(LM7k$qO2l0z+^v)Md_l(qbb~%idY_IOaH7HO=z>y~v=ay?$Dy;+4Z0S( zzHZ-2x;?mBAL|tXy4vj(>?#em{XZwq>**|@9UHP%pkm+U8asKaYwV*{ALB#{)vCd$ zu$zel-(SW))zH<9gC^Q>zg2Dmt_l#}ZTzEmNWob}ZKcciE9uDOKorh1vZS|1&=eiP zyV@z&S;9B@bbANFcROylGM}$ApJw%>qSE!d&(LI3ZX%Ep-0Oq+D9n>@$<=*+mn$U; z-(XmondSB-`0r%5;=EyalGS}I3I3wet-4*gx;DzVM=RT^m$75(p%+O@1Ko=M2&_NF zNzbb{vAco*W3kdb!lV5;6pJ#F;(hfNe)GN;C>Fo#IZ#;O{<;YL74C@Q(*03WBw`^H zmg%l}Y54vzFj;7~zg_A00O?Uienc=V?dWP+PfV2)-rg%QD3+uBk+`Na6 zk55d;l|tqN_(UjC$WRC?EK@z zZUaz=7*+}Y-aYju7mZc2ycevJ!Z56or@$CF5m+S)Lp!>%pCWBN`nDKvUI4eD*Z%Ea zQOp<6zg;?8Xn z=oUBc=7Mobq;YUrhX<-4fK!q)9r-|0sXwFPkQBj!g}@x_1R~P1 zo=$DdvCYtzQlPG8xpE0MWy$Qn-VOLE80mtoy_Et1r-bm=&`S;Hz348C2sjayf-pL$ICdJ1f^q}5lk9utn6s9X|-Z+LVc z9$2xT^_S%D8Rie4tH2R0UZ~yl3S3JV9)bRH$e3K+o;zK`6X|$(U?T1zCkPZg1gOR7 zV3f{^d&n4R2gYKQx9O7l%r(dy62jn4aOst!ogAf-IY^Ha2-1pkb$;cR zMuyVQVdbSW?v@Ye^DEm!Y2Q zAk-6Kag^?Je@I?lU^X_Y90q&BjL3Qj?1`ct@(e=WsTk*RK#*bpf}r$1qX%K!3Pg~z zRGO?1sjagzhQ&d$W!H$QX!&*9BLD@N=9>8%$sAjUcem^#pn~Auup;m3GOJrXb8z^G zJFfkGQvDCA2k23II;^b)p>A2$hjv@2uZ$cI3bfS0w<(DPE!0hA@5>;)e4}f zn(-EAH-ia@zZUM6f+2sl=r;QHTWbcDXZvA7?$%YD34#eZ!(l?2!2^95g9#ZVU_u7T zZzOWRfC&-)O1M7;6T;m!n2@_8G`#EyAE+d_!kAN^n+mr@5Pr9_AQ#uIqka zgw#?GV|DHaM#yw{=g#n@V@M*EKF7_}hLfMg_BJ&2dn6?tQ`FLae}V;Gu`K&2a*ro> z?+E6#3|ir-T6l7_l1uYX@aZv&x;mvA^-7XT94LhR(+KlpPLqXs)LYz~Cd?ClOJ=a? z_Y^d^{e5d_dxKNgR_a${=#fXF1#Hl1#w^@|Z}-qQ&2(qv$`ouS6PwIzdd|#d$RX6H z?P2&mG{w%62&)H}Sr~Ze0sITHWXK|fX@U0|Q*eUJV1NQG$2msGhjQgoG2F1X{{S&L zjPQU5!pmJxfsR@TEMY%zLoyST?5_abin35#L&{85hvll@p+Cj^4?!4O5YS5S{`bHQ zfh-rSyL19>8NR!=B7&}cfQ6{|j1M`VMZp^KUVyqlcH#ITm5N>jVfYaPD?VdG4*6Tn z{NY1OaKY@ekw%aRn)n_il!>Dd`4|J?Ih!jgf!GX5z)*w%2U8^^9{nZU6D*ja(*)h3 z+X`V9$~vHD{Ka0S(ung4<}YASx+0qaY|YMQ09%wxhR#4=6=NleE)n4gkAY!Iw(f;) zAu4N$q-5SJZE@y^a>;3TIg#3%3! z#%7YNGs*Kf27!o|fg^zRPU2w~GnG9a25HUV#T8m&>=J`*Wdt9_q5xHLEXIW11_Lrc zyxM&zAJfQ24a-D)meK@4LcJfiWaP0>=LAzEoVg!{d(yF!yu6AU0vgIOKV}J5GA38l zM4K9wVxt&4wo-yd9E4cu~;Vcn_H)D&-UI^dg*)yxG~2COH)Wm_CZ> zjZ8n7>GPO=2=p(ctRilN-+|S&9rA&QehxEZnE^A!!=OG&ClIb5C5+-iYUc1LUlXYj zoHTdQ{Kyj+u!OR3!wCfh?UHQZkO!hQD`rm?EJgr2BrL zu<-Kyw4#xqfV`g|F2D*Q7D7ou?GP{rfKy1%-cT>;wRfeC3J?x^$)?4ExdacXXFlBf zud)HM3tyo82U9FV;roQJL`n%?ZPzD?`k}3cw=U0NV16J=le7KxsHb{FBjNQ5ND)dA znjqR?BDKRsU}T?gKSn}mSmKfHZXP>#`>}8X3<|PAs5~erz&{b%NDE5_fiEvY%vRlL zZ#1w)Y-$VOMKFiVQr044koo0fmtBue&z49iBZ-)TU0r@U&XHL34#OCU8|N759f{#7 zx%@Po04ov~=a3o92&B=-NHA?j0cPbq3ycxB^NUF62x6oeOLsO6lTF*y!5YR$)VVN5 zQh+g{2UaZ~-#y6;VZ;rD5lkP@WBCzALg!ZNfe(j-7Oj)Uc@E=dEg1nW0w$hT7=b5o z{FKj^46PYIqNDJ0;P-3%g6DTva7M1+jMPM1I`&|f#9m;k8HOG5X!&V3y(g|@5@o-$ zVEAvXx>m(^0OQeBUK!6J*x^63T&_HVaoOye^_;mn3D~muh5)SN<*G^wOmAT}f-B{d z6XP6qILejpz<~#Mp2@Lai~ZYij#$&K zrk8peTnNG{`+PVfF6z5_!5Pu+ea@v`tQVY-a*i7z{S~_#Qqm3R6YSA*}=FYPhy5f zX087_4sXm|^mi{aGjK;l|2AKk=O7+Zqwous6MqH#*`$xTO418qW_lfh-ANy8o*HI; zAj}NGWIwvYd2vU^bKr-?`M?7~fbIagp}`+613+Vg^OuQzm|z63NtzNMAu|Ntr#! zJk$KZOutc@gK>)~T4X|vhERMkKW-a%_I0m7D<|xB`ITVJ%)xybS@d9CG&wh(hf?~ zhCKX8Q906&@{GC0dCL{fy{d16H_~vrwBHLz9BBN@oDO}J)8yAdzX!aL(I_`@Qg2lu z64lo4F4w>awc}07JiZ2Nk5w9V?d($#;xkK%x@l@-)G*12&mGzaPHi*|(|v)DqS3{2 z;3i{!zvF^P`y!dFSAG@1rPM8HRkJ%;p= zC;#8a8>x3MFCE#i`;$WryX^-Xw&C2aVOy)GVR!5PhHc(Y8+LacsQFX!XZhT6! z4(Br!-Fs2oa^)JPKgINagdPXIp5KEOt#Wn`w*QC8HEj%b&1%6C93g=b%FD6VdlyBp3(Y)+@S58+-+!i%?&)2%Q}DHOn|1TFz;5HPPM2KlQwa`eMiC zf$t5V8SFNxn{m>MLybN71{=X+yqzTJkq7sIe)Sd0OD8805C)v_l_6ML$XZ#BPXrQv zM!oWwci`yyDR)cT>L0#a+FM`on0IVK{lix2V11ocYOmLg?3$gFP_LwR%^aOj@3wZ? zVbTrjnwOA(uL-P$9KygDeAFYM;qjG8_zlUb|F_3i+NFg0k&mxTiQ&i>%ZnIngby8g zeZ}LYV{fCU{Wb<0QNMrr@UwVGz6Q7bY{T-FEo9TJ*>uCEg+3kyiCJ^(CdjI;-DGnu z&Rl9z=Oj+3A-S8t@HOgk)qU2EZMo`JeC%!6O%118&hGdenQm!TVo#gY zEkMZFaU7@`ZbH>KDX-n)q0eOI>Nb-fw|W{N7?hF#21c%W$lFBU@1m3!-=^akNMwm` za349tl1msT7nKRXzsOJDii9_~AM5Df;BNi8VfQ>3_IBelAzxd38!WM>E$SAF`}1h3 zvC1sMgsDGZmA{hlv~`r%*WE3>nnj>6_3pd7UdF|yO1oYTddh#c9S0nhVsi%U_@!N)(kM1YU;VvUcq3h zTfxpd6sRKJdWPgtXYJU7SHYg)ExAC%3=x75GqwXUgN`0CSh*Hn!XQ>OmOxm+$A>6s0B7L{E9-HPdtMk1Z#goEIb)dm zw)h=k@EkF;KM<_K;62K(;CJxSxq{yj+)93!`W$0;x-K1l$8k)R1`Ld%UMEDg!=98E zbC}-FMId+1)Vc6Gj3>}bK@oqZ&Vk<%1GxuMEFSXM$GOGdWBrEFBRLZkiB6u31vk+N zpol7Y3pOdpYy~BMtw=x7dQ973KlE)uUl;UMLH~}^WEm7e(%QfbEOOoerSPi+eNxc- z1sxBjtlp^{Sg z9uPP6eCFpDFR&Lbo?C2~$Q<+YL-*-k6#T@winI*;So#F3p8I|emz4!Ylb>%=W-0*- zt(@@g`4wIpztZ`2T+`P+UEYtlzZXTg(Kqsay3sfL^Bq!)eeKh=3w~K5A1EkpdUJ#^j)KqsqOTN@6m9&q^;rfqEE4g`f1Xqx`vNtw$-m+eodPWJ7@B`Hl5hE*HsVEHSAvE!BTI+SKxB=O#Ok}q@%iq5AOBWA9!LI z`0ZY@53ZxWq&SX|?o;wY5#77L;e-2NdSci?UBj;X4%Q!79(Cjx>BPmKhR(q+yCsPC zVj*M6q2iOPvM`<*Y7HAae}pk?-EYZhhow>eO{4MDC%EQ&J?#xh!+gf-*CXhg%0g89 z3ckk`d=F|ESMWXXwn#fZGkgntkK2Ry0n{doZm&(}TL*avDYXtFitm;~*U-825D4}U zn-UaXqqPG2t})oqosKSYEjcKs8g+6d?N~tTdbzTb z>x^2>+$d&Xl9Wo#yxsqLcgaIR*T*+`nVZc!Jf7tsP_^66=@7<6< zf`k*4AXp+BO_W$d6GAXiqr0$?vwEUvvBj@~2|=_#5He5(BpC-urTX$v$VE`FhTr znP=uaGxHy*+$@4YmrLb}aG1z3LpaRjm@OQoa8$%W4|#tGj?NA;d28gYe33GH4yLPA z7j&2Lpk-^MWCS--su+iuI&K8G(Ncvem^%R6IH_WIFgFrhlT;BM%#8xqELC*DxS&gS zAh@McMO!d;kXAMo9|wCxYvoi?AM6oB9#m!(JA*w2lLwVwMOCoJ734u>S@CAD#}M+M za;G>w*%W5>Ci;{6we_i-F!-;2TfI0p4}ad;oJ zu%{>92c}1&{sl|#iU;DZ%@>W|%%|W6JOOTFe}r4m_@8i^Fo)A*;ZA)4Zr^vvuI>4A z*J+Qd>zv#5%*v&h*LY@S8sUCWs(RcHT5j%6#AL}ZO!E~j9~S4yK2+z^k zZuYt@+4fpY6CUt-QtZ3w<{(4&u&?)I+jqilf8dD6Vt=1*PWFQ7@dnzm+m*QZjFC+G zr#2G4hkHtjR4cg)mMQP9^<;N#_57i$8woEsEf6w1wbZv^ZARQf0R$kgL{v|;=7z_t?iML>5-6?d6hTOzcu%+t}Jg`grgcYb8ohSkxvSS|) z6@=808*_{ybxd*2OU09&U_c$Pg&*s{K5?Lud^(8UEgd$DgxI!)2DkB9fySW_ga_eq zxQ$r?kOQH+S%6Py_)QXGPVI2OKheit0jn!`1>Th?iHij5IDqH8WU!8Ty?}Mh2mP0ZC3?H;y!!$ULYEy!8iC;iC3dm+9LdlW zQQyZRMzm}9cEaBv#?xTf$9R?%#Xo{O-zeP0PR5<^rtrl_z^qF@jo`8528|!XX~NB% zCg00x>Jm;fg*&GMi)0Wxirl_QLH)PeGOBB@C$XymbFedLGM4S2W?2c;j&`a1@6cvo zCEbjlM{dTX?x;gv_vEJxEuO?fC+X%iSoRiDRhS7)w0}x>0Ip!T9MET@>?b^l_RopS z0__S#-=GBre)NsAaJXH@#pjLgf)r&Rnw=M#Ek@wB#O=1r!j<5SXQ*#dj{!!0iMRVBpZ&g2-1Z$rqA;I=#)_ZoxAkg>}Xuo-U4$Pt`@y3q9j^FOZXNF=o{5CD6Z+1sQF$X=#5 z6GedjPr2F1$}^4K4-;i;vk$r1la3Ni6P`8%jANWQ5oGCD@=cqLqFxDpPAuW>6@1aW)~27^&B{LdGSuaST>E(Tv?3v7V|c;UMhh!_k6 zqbI(`?+`}A*VxCye<8lc^9ZBI*LVuV555= z!80*{jhO)0XhBztRU%D|_S;ZE1N0B%{(;(hRG2EfLs0Kmo}ES#WDg2GcL@ncRC zUg0$PElyJ@KltZ7NpAEvBo9`4-Adg%ce?5c(`G}HQzG}3^SE7q2Aq=&r> zV1q3_T87>J@WYO6H2p(cscWe67!Mt#8>s9=dlfZ6XmF$5O?S;$eh{Hypm+{}Iv%t$(NN^bsLS169%HmFpe1|EgWJV~aT-h-(=G_Ml?-mI6f|W+aN$MF2G0O0C#@Vy4^mL|7OhRF2<_!TW(Y_zr z3JMgDydNEgJv+jqV)Ht69uuLbVj50*&S0}W=`=gES&Z*A71n+vfK+PjnY4L^p13Un zAto}&i#gPXq}!)w4-iSGBiXAwF%A1M-aqZJ%|=X~$Fh~flRPORNkY1qh~W!fT8H;n z5Ly%$!<8Tys#>g|Su@zc@S27Qpo<{@y6|E=-cI9jYvQf|l1014btIZrzQy^0F06W_ z3-Slj1^ihcyt!xu?AVV$7>RSRi(J)2VofF`f+#}*%%s)>Z)dS$xG`i6?J`5DIOfJ=uHOcK<~8z)0`d~il4fz=}46B zhmp_2+-G7o-Ua2h46&*xx(zP~&4gYy@#=_2m1O`b3&0-OeI&cb+ADw_T*We6OTm^R z*l_f3_M};OahaW%i}u9Wry~+&z7Y4QU`3g2oKLyZt6#_x*gy%K0j$F;NRat1g1B8; zgjMS09Q>UX*P1hx{)Kx%Rfal}2 z*hHmJOo43(Jj)-L1Z5REIQT42=U7of@^oqg^q6s;8r-8ySjn6z62)a}n2zRQBWYD1 z`JZ93p$yVIDR|;pkO`H28J?Ef(rNw*Q0BgqtU*!jf0>oXf&GM9!O8{7`M`dLSuvV0 zD}EWX&!aB>kc|=W9nIgpcOe!fu@Z`-r(1%a45f*8%o{AdeC9!YB74zPE$BVaOQ&i< z_qC8P4a1)Xk_(yJ;6{benkK~VpiS{nbY_(CEY{juXj0q{M^SuTdbbtBFxmGox*Vql{UwABljHy-w1=1Aq47$g80L#kMf8+-yt z<+-@AlrHNYELGfsTaJt2r9(ebtumJaZcOW^BCfk`~>|gKs_91 z2iu$Rn`57W-`L{4lS`KmHV!U+AKuf4MKGVo@H@VE3s5y+6C5Cwk3-s;^(e=FkHv8= zws?i?*n*iXBL>?H{zEFCgiy%H_hSq{rVlm-Kto_;CD0Iy^z&-Pc*y%Wq9F2sV#)hE z8ojV|F5^{vdAtf(ddLjI*0`YaVrw@{pl1A+;-9W6_S?se@brfX3EeMVL=d_J9C*R} z>K(7*g^k1^)hp;~glUqlAs8THu~rX%emK0r@MZBU+8X!^CHdLa2Yy9X z6A#Cpg97%m4)itzS&o0Rm&aqz*Lytc&{AU${0WLrR?oCgd3^T#y~m$AbVJ|x6()>% z=xU)xtGh(P{O&OdM-16X9BnPUO@EKCO?|)_Y!oObNe2)0fMUD7iKL*n;rn z*7V^k%w1*B^j|V_<40U`K*t!X3~|)Zas3<8=a0(WzYK8?16%J;_juSn3~%ZW z-y(D3BoRCa-(rFv-~#P}u74Egq7D5ET^i0svv5N_2d#a+Sey&dulR8;Hlp8Ss^JpX z&u`%Uk)f39Z^K;lTTx8X-hcXAKW`Gx#k*aDp&5#ZcQrgcBp-D3hjXEolhDsBt`L>o z>vbA2`ddGrCgd}-*f=*6@=s9SQ&hP*^ zgI2&&xRU{|lYq?dMB4{*kI%4<(;w@z0q}~`ygt`^eI5xfljeofsROZ2MZwFgd12#Q z!0Yrtc$I2i_@)SWbqwn8uool*!3Raavoji=c$HerGkVW6G4MpWYM$7d8;E&!Fg($U zHBVT%1D@xufG1Rj=7~xd@a!4_PgB4Xb20(X^Re(W2RvaE3V2=^3eQri6Y=2FtL~=4 zjl6;{mQ4f`i)Ak$K|KZr%+7)N(7%nrOHJv(sS+m1dPXVOX2WSMbZFR^a(G~dJbQ~p z`qyg9=?1wnff#qLhH6Gt$#Xu`otIsWnc!+mRntIQ_Sv^@g12NnWQ}f6c-ScjYeHB6 z)8hCuK+l?re=m1$#pI$H873C55)E#fVE~F1s|2DSev~c84agT*9{?M@eYm6hij^Y* zCUq}K1Wb@ze2P?#uV`5oy$yF%^MUj4=#Gvo*WxfNAX#YQkOzgXs?J?nrOz`pQ5lOFxHlGG>{9F)~PzV9<^0k^H= z0}}BNGWEtmZQ8IbU#&sP%U+OgeI%*A_&dmLIOV3xWE!^NBbVXG1=s-$IzYz!OTFxT z$Vd<#ou2bWQ3`xPV`p<3zns&Abwtk;B@+&iZ$#jIVdFX(V%2LQio0w@(QUWV@4Q=0 zHs`%&_tBEk9r!Vv_T<^Ok?Rz9y=y<^c9k4x_LLm>lvZNCOG}nKdCtd7fS#}rxCM43 z;m`%?FXw@L@fRShXTpicJPYiOfo@|-(V7udDQ!58!*zr))(s_w+qk0Wkz2bxfBC4} z-IT0!uO)`{)-mp~+^%;$-D)oN++y227>3(^|%5f&l)dF1N7{Ddw-Fk-wG4EVG&t$sW899RH z(5E_wm@E#LNipt_RyU$OTEMj7^r&2Qo9`;IhSYa&p!}5V&z4-@y+zJ`&puJkJ|sK# z0FUCJoKi2s+SFZM+RJvQGx81#upS)S8WrOiCA-=lWy`Mj?Iys9T~_H&@45@-E3Q&3 z|JvvI3l?@Z%~#rIdEV{9Sr}J1jx|E+D=eT?&h03&C@RBpiL@JDL=hI6;*LN@jx%r+ zc&@vec~Vg`Pj|t2!$(&4MlrD8cN+wxyrd& z?t%oRX@h&q0|u|V=>f&J*0W;eNGiqUX>qyg5fpc{J8=cjQiwM%J`RN!$qTQBl^^W~ zz+qCBl!M@okybB4ImiwV<`_#3Y@r`4MBdY}H!o(3d-5MUwtSW+ZEmv}cjZa1Zg;$g zVyw5Nd`PV>!%{iq`4HHS(~>$1$x&rpo&i`B)Y*(gz&}}7o%OtX;`4s%t+aU`tG0?W z-(-#1f~iI6IS*YHNl`tE#-3W-2YE+Va6NRMWwfUXN;;VfI(kT<20$&BiItM@*=ZRXFj4Ds7%~ zp-CP9wIQkVQRLpS=&tC@zk2VhaH96I5~s$x-#uBS0zb|oyK;?3yk)55&S^J6+Zk5_ zWvQCf+v+9g4ZYDc@!P#sQQKK!*m}WmFP!7Xbib>Pw=iXFOR147p?a+NIYHV8$3JrI zR(va9IhIYL2~>jtYw<^4r#M-MgepGzVW_eOsp4k%0p4ZxNNgU&@c^W$w-5%(`-pnU z7$`RNJYhm9PNr%UCk%H`b*XGQQJ}p-lG`%O<=ysN^Xe>QTRFblLs@nK>1UZU&$&Fw zh$)}Z*P+8`ux5Lf@3v;Ql-z59%nt#&#hz>_Jr`*oBOpV3rmcn?tWbUu05zDKs*0!_ z@9;$Ue$O#$=&1-ls5}GRIud>4Iv}99fq?P}5wsaFAx%HrAaG=*j?ANL~dVfGA<8B|NAbFHr+6(fWi4?jceDu-9@}YqJ%vTk_Z~zUTK+DmP zOu}?fWRdVG#-rqp1SS6uFis^(V`-v~3hIsGZo-;Fy!g4Gh!;Og&>4bG6ZCpc6aI>E z5v5N!EMTSn0Te}(qk^*Z1)T~9xl{3jufkL_9*NxPoH}m+g@~zSpC>+9Ng$dwU6)|u ze6X1R!23Z7uS9zv^zBaec+Gx9TWp3xFcUYo7-}>(OfYmID(vg*mi-7h{EXk@PUmfS zJH(CYvLOG&5VwKwOK#TTmrT$AOy(EPhw0ofb79fqCzj5H>#=!HJidU3hozH8nfZ9( zg2F{l6g)BQ2^f~A&7J=Q1yIr_20|=;V*YHKf?RYw*yAVIl=RF8a~Bj?3kz(EA5#jS zE}WO1HsR*42)%?!l|HYi=!eA%7hAqJgX~>&5uVAV*k#ivWTtW3Z4jPGzjj&FIYTt+ zn8Ad9T=ZVVoD*}0{-oW=Ids4{U1NotnzQy#{EM~AzGR}%NC)Ym`WdF(WH3x8#--Y2 zO%w@_L5bk1x&DjoeSW)a77rhaYf_cO`DRWG&3I1n$c!Vh5x(@iUyPH&0M`*(vrUd2 z%b!_`m^&+OXkUJ;KZ;L@=g~RiCH<4KqdLiy9_Xc9z2Kx^ zjdwF85o+jdAVV+vbbwWi@+G@#`e_)wbapZKQCr6uYzeRc1T)cPm2!7uiB^?0X1mq6 zwR@}MLiF;(tJk5xbwDM>Z|A0;&c&9444gmK2QC5=C=?XoE|do|wqpA}KoMLJ33mWR z*un3xl#J9ncmPFc;&%W=Xytb%sr($jGfU-z@H`9zp!_Pj!%lh-shl)FIgpW5{%zqv zlaR{klZkv#FQsytTqXxzoK$|VaG1#PuyB~k@wjl5!clP*I&JR)aM*{De_u~1E5vKpE<#E$SKuNf1ak#0!mwbjz(t4(<_cT{+IXhRP2eK51akwp2uFfF0=NkKf;|Gb z2(`f;0bGQ?1bYN<5&jVD5x_SaP3?b*Hp|dlgcD{%2W zmL?@-i&8lhpr%+~iG&Y%kR|6zyhd#`>0tZEw3-yF-U)wT7ub&sGs2FJs8znd z5fnBJ+K`*q#b^UcDDbE&3KX@0mxAn|$9MsO=qUh_1W<>SaM|8AB!Qq4wn?wUOEozS-J5Kxy@U%j#>$itl_FW`}bZY-&Tv2C~D9a-17%_mZHOfRiv7 zUxdYB!cK-~_Q1Sd0uE^W&pA!V!}1rnlYhhAsR`Vjd5XJpen+&VD9Qus&M$c| z)`~2AnE|_y+meiZ$O*I!bj3>8%T}zM0lRON8IkJJTWpSMb6)mAsr(8^?M^Hy8a;}B zXOA*@l4&dQd~8KdIMhxz*ovHB$4|+D3pUp$CZtOnlS>Yq%8R*R|Ai;Peu6CLhds%( zAGzef-)-t)o9kHOz0P@s2Ai=Zui{gwf}nC_mv4-_AokAl5$?oC?ej*BaZibrI#XW$Wlj8K=B7xuWm(a< zYZUb}q&%doi6JUXiO(8$jggGPhY5FKBy*63T1Zh`^jiH+SR%1Y%ptuTtWN5 zyf32+ENY7%b>SG=(>oZvVmB4$c>s~niXC7bWTVPekI_c1+{E)iL9lN-BuHFnkY-&F zi|fS5+{7*<@!EAfm)S{mU?P{#DHluP2b8?zWIAP(F7_!=S+MJ_jDltuD9UQzpSHfF-nf3ci>1%*7MLP=>I<)iLw(kI1E5Zi2hiICUFf$%;-#^D_J+BIAilu%y*dy&7Bm1Ezq- z<1-Z<^br|S%&9nSe-sFWy8sNhtk%+c6-OkGz+Ib-h8|VLE8*$0qLtlZTYb{~G~iK4XYp6H#J6 zg)eCQzj2!IS0qe=CRdDP9;q$xfP4>TmUBkVa)h9+<8z8pQj{0Ri1ck zE55MOh-od02|K*a$ikhQ{xRu&B^!OzDWUBr4kB$oZxFP3>>;;%axr!l#~(UNH{H}O z@KbW&0A}7O0QMNeRg)L92m6X+aWp`@-9swB&Zh1|8%B_l10UtZ9L~%3+OsJ97I)%& z`|Lpx=r|e>H{O1f7|NjQ+?LD+sPzP24aN7OnNb2Rz1KNzUS5S4qXgDf97pev zm$lpIPJF;_9Rx=d;qWw-DBITB)c4)4oTu7YSFx?s6WfIi&N*Ez*nqge?K$&Q+d$9x zR<~8%UUroC6h|Ym^DlCQqHdP~P$BRs3Uaa{M?j%x;>n*`iO%{@d-z`DJ??^%Yzy@h zNI>)x*rL3LM@K)gSA)1nq*U3U@HqMj4tsIENH(4&YcUakiK+G>gP7k!x>p97m}8n}d7OeRpcAJP zV8ilZ^t+-+!6wek1|HLs(|~Qe^b&xEg!ji@0*<7(?k?xNpBrp3Uh2ncjBZ!<;(Bb) zoL}Fly_NDOAsTf{8f4I6O#iGy6C{nkj)`Se_hHkp@aKJpC9y>p4BJzChKUOeGff|c z@PRCeJ|wapqZ`WWktED_`_P4b#=EfhC=5OGHbos69p8aGPEsDg->@d`MgPV8WZFf49R1hyBs$Ck{Z|wEFC6ubhLEBF3j}8| z%uY&6RM4ZCz(X&F1MLwCy%-7rEegFDFEE>P`29-EAtmOxJOHB8Pqci+^rSdB=D029 zefQ*-PS#R4hPmXD2dmuqSgu8D-)OLx^Ej;D#EH6-IC!DE@dL~&OHKne>E~c+18XYO zg^u*@!t5zvI0gZRV^Amz$Dkk>jzNAHjzJuT!-MW{?;OVkY**dlI5_~PoI5TUrQEZa z&J^2K4^SRsE$Z+jn{l9wj~o@s@e83i%G}hO1eVZLW2ukTV9oggDn0Ea{LH)9mF zLs9>(U?#iFO(6nlc#$G?K+M*R?;wKW7=o|4gSN_nuis%$cglBQjGp3B3cOk($7S`P zu+B&o&%hGSp*XD4s_()P0>x1TmK+Mj@domg{&b(Q>}<|5vP%37ZVfVH70elE+XH-$ z0~EvipYYK`ZSWJi5jR4MI|t#lxD5Bh;fnNqTZ=>K+wa!=ui|M(_^x1p8-t7Qm$AHw z&Z|c@9CO7A#&`+w@)1qIjZbd;iW_5$Of?P(5Qhh|ANQG`QRa-dyy{oUlD)(A(l z9CHf&XD)C#qJT!nywwhzD8u^C8YO18oPAy@ci#Agzg4jHe*L&wu$v7S`> zC!S3EN%XJ{_|l59AH^pZz=BeFm%5)+Iu3ZjDuftR1o*v=n8_9N+=&VHM~njy8G1jt z;UKvc7G*@Eo=1A3}`peM7nIS3=fvaCqS#CY;kB&>mg zOv%gw4;^xj!G#VPmGu<*=(G~{%O(!_h#z3YnM)E~9O@-x^&Qsl;ZsNZYTJQ+CEAjB4oiIAN#J3YCoK#KfF4gD;GHA_kM)NEWCnV? zGn4|1)s)L}K!0diBzEYsU!G+4dstKefdGRl@eDr0*07j3TuJBs8AJ=p*`l7)^@6TX z(aC*?ue9}xUC7RfOO%*h@&I5m(2rRjz(FVED>x2@67!)PbJBgZ`D6=qd%(KU zy1gS%>(uSVuf~(HZm*R%qWanoQMZTXS2_0WtV(XA9+u?x7QiS;UjbRvhHj-eFQ=j< z7%@hEB*{#Jr|Cu-SfSIU5N9waKuo}krveDW3?H5ld43=~r=!V}ZyfWbm*-Ag9ycA$ zYxywVv2%AM+(?5vu4fJm*C=EXbFdtrL!zGF+1In0ga%F!9+WASGG)sAiET!n?=i1B z;Wdw^{Vk}7dq0s(F(n3-Rd%>r?@m(>I;v^SS z`M8pw`YhV>8{c~NrpdV{+LP?tfxrqRDk+K`M1eCAI8GOsgeT|TKwuPB(Hizn=&gN- z5hK-UW2)Xury$`Gl9S*dg*e>^?UrDljS7Jd zh-UHKiLv&HMv@G=m5Lvwv!C&kQHe|tt^vlN}{z4kgh4|;8{jAi8v+%W1DaCk}+!NaH|k_{GUw6Gb) zXz;D%;hyB-A`IRk2nXp|f%#>vm=4o0dQ4gw7mX+Jkf{V%CRiH_EUxwebW2z!+hHjY zgTp=QRe8pNSgmYRBa^Ul%zl+N`Z}wPzI+ity$T)9Rb&3@msn0;ga0}~Be8})3ICJv zPxED$aY*ng&*m&K%YW01=-A|OU~w8v9}EY0gna_Mi*MdWJvi2$wpG(Vz$f?-ZPKP` zEdJ9~&b#)nj>gJ9^4?~|*Cz|i=T10}YKqNq=KP8z?~SMfe7vruWOLpJbNrd}STHI% zuYfl#^o}MVi(`aiF_vH~-kVao+z1$p_YgP&_t6pK6;u=-mfHL0>kti1(D}>@d>LbU zH$W+Hs=_v#@r>_Rs1M&oJ)t?5gOE_Eyc(w+UJ1Lr+;NDWx4>FC3UdMjrRQ8Ezcb-D zLfum^`d2n%gQdF(9ov-*fEExNrDvA`&K+eEfWQD|JV9XaQ8~*b8_^kXMzI~9Qu$wy zDiUD`9Xa-E7y`#b%x4lp`hALV`rVe|7q=N{0@+JDZV@uJIE}bhY%{{DW$+S^F;99F z=6<%(%c1QFiYmd#NC$Wr9IAz#KJ z83{NQKak0~Y9-~6_Xbo&pqE5=X)>3UG^@t`lBR-x7`sO{-G}{4_rJg)S)kS!7Row0 z`Vb}FUHmu;!CKmsz@!_dtM)F=ptm!XT3W2GrTH>>Bl4x9g;*HSi$95{|ANM5Sse{& zeM7g=<}QOzyk7qPAFj4JBe7_?N>&GKCUY7}lew3&qGUj^oiEj!5XbS1twCS%@n_Tm!@vDXu7S4HVZPafRY) zgu1o#1;rRW_}bsk^^cZMf4C%}n|?pq`!hyFM#o`WN2pt?kA3ltw6;fgjfZtufAw(d zJ?TCqyyiU(YP&a_9OBbeLJPKfSr=S~(JWn0QeQms+F;?pGQ$o;7wu9oj2z#?e-^&r z>H5+*IR|-#)Z{eufs-@xFb~I`gEH)A-SKS*vK;@jKIZY*^YtEY>d>~nadOrk=keL| z_Zh!$oSagzUb-)woM{-h(-n%7lYxZ{==f5%U_KZ${+EKT7WD6!d&A3!-zDf~LH|S0 zQ;hrSuctNiRCAs5-`y4*)Dcxh7 z7B&dF6O`n9N6>!-rFqcieDt?uwB;yB7IpU+e$BGKiWl6bQgEUzAV)sL^ zh3p7V=|L|WYS53|KnYypeh4j{4MHwDhnTBiO04JQ-@h=C`-Sd@*ns$SSvfJ#oF5gF zc8P2>C!Y6<_d~Ra^w90GE1LJ0{`kV%@9q!xMk{yp5&larlU`R+fd2MF7{q%ti_}I! zK1#2X_wU5jKR(hcoI7*QqQ~YJ@(7{jXBO!#$ifTnkr`P76ERko#IxuO)?&3?dyM!g$UGh`fUFr|DyUarEE@vj)EkY_$4{CR@ z-_`Eo^R&B!soGt#MY~JARlCc~)b4V=Lw83hy%gNhh6#W9<{v4M8*1}kDVC@1$!{+- zFRC-m?O%?(HLUxvrd z{8t{8ryj|Fb-p>d=|X<{eE3J^ADM4%*u(s{=fCoRKf|ct6!R#>S06Pe9}*D<21m?g z5h=$(!8zuz91G3KM@7Ww;E0+29AkoWObyL(aB#$Ae~woK=a>+h16Y#>_je8r9VXqIGcY(kzo_cP1t|}LoUtt)#N#sVm+af^u8a} zW+-IARAlT8pjZ+!BmVzDW6<1WCV2qXhTOj4y7oyh7V3YYM{&DW6pg!h+yA=MyGNOfAjqEq=zLzKgYF*&R31ph4E z7zL`a8uxSqxMvH%Y2HKd;yr{*^Bx^WhX40*bV(f6y2rW8&a7SIq`oikFpO70JbIkd zM$G9}LkFkiZ)H9|7oo-TFy3#p&^Uy=q-X~V{jP!+&fskhQqgW_WrQV5&4e&x9T{g) zeKdCsSRMS>jMe05V-CVgnK+Ve+W^wCQW+Izb&f_kRRbxdKF|1(w7Ly;eQU}YhCxI7 ziEL>3Q!Cdo(WX|Gb8#mBp`$7sf) zY^=fS7{IC+?Hi}cHSb!?ol_l4L>*%wG^hL^H2>^+W?WKYb>4^9UYm!y7gFE0Svqja z*t0*~9e72oESdaCSk;Y2ulHqWSF6*^pk}PAu`_BLDWBDc2y5nX z{E&K}l%aDcJICFj2brn&gUr02r*d^ud>iSPo8LzH8?Y92^eqDM1*&EELx7jT(JJZi z3qY|j>=Y%XS{-jwKSJC~2AJXKGR)42DN|%?@ZIRgUBi6rxY6eF7W)+Q>akE{8m>%7 zUd8EU*yOf^U}c8i0zG(?U}fsE&zIagS^Cqsf;kl}ger5Ep~{R!u}270nJqw-nWjUP z`9nZUB2B6Mb|?U(IsF#QCquQX7m)VZJMr|-;^7HZ<|t5QwsTY&baqt&RfbM1umD!Z z!f|D+st#D@ZukL4hC)YhU>Q13S_78RbaRAvCYEdT1uVmR2JaOJ(igVOTLN2#UL6hc zY1|L}!N$_i=j^+VzB6e2P3C4Ady}~hV=x1Kx6n{O6o)3%-4E={O`3haFM1hw28E34 zGVTm7hj9MQF^5X`kK@j0ZIjw1wMDWcnDB4Le<}Xi$1gSBJ_49=Q84YAq1}Yg|6oRh z2udmF+Q|6$<|ZRgFY{ISEN53Jj?HLr>3@U3vEk{1fq5c|K<1a*@Si5;V_5p+(a$`R z8;S^JlHh_;g0Km1fkMQD2ZXx}ii`ZG3wkexwJ4hSsh|iSe+MYS$4?UO@q(U3AWCnY zpt}(WUa9Lr@#s0fN1_z(H-i33(0>v1IYCPVT`cGmg3c850YUE;)FLS12~hfkdqebk zK}QK13rh0ejv~Ts!gn}zCV`?}q+)v&l@v-Wx>9>b}j zyVb(2*?5W4b+LBd)nYzc!@b#f1BQu|z$MywH;8mLpmESuWAfv*Fn8#W-!7ZQeR|j_ z6FHx3k95sNLlQ|dw`f1?o__r4rB7nK(DY`B^rndztC5~QjZ3xjYUNHkU%T}3>6#$( z?$6GH`v&QSfjY^37N{1B-A8R#|ck2M^By1bPbFEAkBq>9=e2ihEO1fqhe>asDEZ zWN`i>pk$6R%$vPn>o9MsX^-L``4f0OjD}#|JPynoQ2-qD<`)PR0KHiSR|x10Hg5!j z-dNP3KAqjSSKOQRUmEx3QS_z#o9nPrB~LvmRSd+NfnBuD8-*KiXy*tP^LGQU!D%%Z^VIzoHG(#bbq1qt zyoq|m(Ke{E72k*~jgbI%0K96wOtg7^lFXdpyfmWUjrNwP}huA06JF(danVq_xiKp;_44dA^wNhNG|4NWYwC~ zCIEo!V8d(~4;*Ozo}5FPGiR*57Lx&I2Ds~qFelABYy~n;$9v%I$iKhjw*}Z$g@>}G zQ~_1T#QZHC`zeK8%)5#|?%QT$h+XH#dhKy52&T>Z3)pfkz_Z~{H@S%?7>EpPn(J?* zp$_5CG^>YofCVQo9f19lCxU`B_6UMKl)*vjbwL0&fX$mNib}XWui?Xzc2b6d?zqX1 z3Bh50fHQE47U^5mZGg=I-cBoQpl{?*Y$W6X0zn;!W9NKCs2OWxx~E0lxrSHs@jX!F3G# zHG#;3u-LC@O=^~%kyDfyY*}tbU?t`g_vEURr}1iP{O{AGsNLRgqh?v07Hnz27M3bN zrz^k!FkYQr71)`t$8mF<$oe7>nF$bGw})SGDoI1oGu7HEZ#SRbc85KycMzG{JauO? z3J{v*h|M@C1WINn^qr;B_>UsVEKeF){C(PknmHZq4m}>`o;Z@s_b{&&{p{BK&(25R z5sRwV9hu+0QZ0(2aBL6!FR=iG?SW6ABaqt2@IO+nj3Ee_4DT6e6exwvN$}F8TKw-| zCRo7K1qU1>LesQTTo9N{6fHG{iijNsO8e{MH^PVkMuGuGCD7yFpzq=jbX~n z9)yjNQHKb7$K zVeVV`+rYDy4@W)B#SU)`CdG&G0*6T<EB)_#?C+u5(M3r@LgdnN_ z8~4%>RmzbD)XzmDsuUu;hM4jUkJ~4rN*wq9LPV7t5k`-w5(BsNr(qnxr5Kast_bfb ze8u&KsB##5+M7KMh8fF7qt?lBxRk9NJjG>+ZqIXmzbkz&My3aCj-yp}i|`S!D!JuV z#gkZ&`R&Cf>Uq$e(8#u3=BO;xJ#kPL`{Vg9Ul?0d&h|!A z?=#Pc-lxPwy-#r>^*-^eCyEX9KDVP!#8NDieJ9i6hUlt(3Qy{) zywE+*$`$*hiV7-z=kPlSc!lepsFberKsFg;Q$GV^cm7cfqPOPXS7Pcq&ATqf0FKE% z_0x|12*O9n-BgZp3WjLHGT zWlh{pA82vtAn;wnJBX1`vBH-X^^pz!$U3f8drfN|4>Ifaf6L->9EJTU7*BWc13(>f!Dh4Lv4 zjN|aZ`DP}Cu&JkP2tfc}X#n*Ry~+{bReq94l_chX9MeHjve2VoPz`tt4uWD&G9f(c zT1MiHeou+n&U%*vwwQD7qZ>}P1ifApE&56GUW4LC)iPmtFboyEKK~CniT2lwVt7EK z24bu?@}WUV5>>?bZ`=3ezZyGsz!@W}t#9%wM-}%CR@w3sal6h4YqcT245`-Sr{niS zeBRmX^If=Y;F)!k*b<$86b?`RtG^k$uqrw~ zqtKLmqBH;1VpH;#&iuXcDL0&QA31Mv=l@s9bvHD+kL+H~`y@0wH8xk_jsB-{lxq3s8x*A=FbCLZ!yWz(}076(GM2n@CHD}h4rSf#7py7W&wH9AZO72luCTPux z%rG^;wtG>-lq(7Q<4QmF#~fgOV2nQm7$8@U$Eln*686V6d5hd-zYIg=@IJEUw1d z!@NRi`|A6`)!5v~!?EX}4EtFRc^iT($N#iXcs%xey~nc-UFaCJW}q|bCy~z&pfd#N zqWJINf9(k#pFKZ$^vu^fbZt*uj?oC(h5tC5Fi%%sxEhUp;A*5`u@6il@g4XwCi=0U zyFl@(Qg_2{3Q^;4hmD`;Btgdsnk49GL5B-EM9@e}1qaG1014UI#eIE;#NN&QPAnF#Ba45+A zrl4;K`kJ6PES~wV1EqW`h1((MQc#ljVd1`0&~I~Em?-E-P?9ee6rX_e+t3auy&r*+ zd`n>uBsmreI#*5^_`z?T)nxj?!M=K@Pkd&Rkc87`HYCFNI&A3VpvS%R7cH3512vyUL*Hg}K5#ix#ipO2)r_ zo$P-iefk8~u2(|bs768l6jmR8N{D+#h#LzO{kQ@%7d^Hl$X&Q#w%|Q9Aw!RQ!Pnc) zDPB;p_-O;3G09Gx!~v2wiv_E_9X~l?LLjFFMU?j9g2(1Pwuk{s2G(aHM4)NwM=R1+E1@<5I)5c%n~O zhdzNX`1;>40>1c)1p4y87ykr-{I8xWC#x7UpIH2uL76hmHeqH4Dm1Q3Jr6W9V}gla zvj%Ic$QKkAEf`-=FusT<$$opozaUgfgNX+k1=7rKd;K*F?rPB}H8-h2`bTqS*Tt@j z*4j3u2Q?J9daR57V!Q>R7Y$h=g|$cz`qh6y=aOpjUlVXgDvIY1jT?fuPy-M0tH+NC z6!HD*ZgI;l@~8hVb5)B-nBr$wU;9St`Ea5;G#_&7^(rZq!1=lK=Ee2_kv`R@G*Lr{ z=1k<`CSrkoX8I%%I#8b$J^i%5I$f?uykIY-9#MV!A5o7cX&MA?TvVff zL?3BA?PYx=8Y8a<|LgS;21tC^!?4AeP7*^b-oNQ}kp8|meKg_bY@v{54K7$zFmq1P zyxC7Ic>D|X(G2R0OuWpgUn5=K*Yy&8FXPhA!q(hczlI?*yDqk0+budXN{?>iFv!Oz z7&%IbXjgyxY3f8e+MW;k?hq`N1Sa5vF53UHV6olL3*Db%5)aZQLcE0Zv}f~2(%tm8 zpC(R}n@Pl3jgFZ5-3@qNQQX>)fQ9I9KMg^D(4{qw*F}1q%k-z_#aR96-`61r{tQ|8 z2-^Uv*T3}K^iOh9w)*h`oELTVy(hqoK-BQd~1-Hz!wWtp%e93`IT71Gm zL^@4KHOt_}T?o(z2%4gfpV5u{ z(}<5$ine<{rkvT!ZLMYH<7mVx1M)`{h~_9nCBdC(Q9c(C`J#JzH?_uNj}ipGI+?Zo%}wB+qac5@gk`L2;MHbL;cf&Q7CW_D9i8xD&s+Vklj>P+v)9~ zTHj)g=WUJ^OVM`3ji-Golvh1@A7VvzaPz+!{(UF4-$rDM7fP?XwOSuaYON3PLTRDS zptu7uwjjnW)<#Mmt(($tQyNusq{VF}%FbV~IIlyxWoS(o5~$hClm#+JIHoJ~0jcVZ zsEX*oOK_X`#w4Vxdy(EGJYn!V@zTnBDs*<42R_w+M>dp(T*BM%{Imt$?`BVDr^5arlhngLcKN;{m&+;emNBPe$1ci>C-;5vl#2?^v{vzRi z2oymQh6w(@ArSr&A_bj;M957zMWFMkhJeO@0!r~-2Spd1S^!G<(et4%%|Um|@B)x5 zT}*?+e<0SDv3>U@;^O)l{%ICM8|PFCcN-F?%YlFLk7N44V3f80-!Asyq zKK|?a5I6bh{h76|VBs?M{hUoB%Yu3Hi|4Tst7Z=*iz0k7Cg5ABU{T@x!Ugun=1~M? z!ld53coNg@etQ0sEL8fWg^TAF&&i*;c0kq3K}QRoRB%mKUSucd;5NW9p!8=kuS$V9ZT*$u8pGnX-_g;np+z$ zi(wod)LytWFdT(@dhU$rJdXbNdGWp}J-S_rN`-?#$x%u~yZRe1uNCQt;xmxlUa(ja zmpWdq5|L*K0ao`rUKTj0!Vneuyjs1D6X`W0Jyd7^rI(eM{f)oV#CtG_6gowEbHC*H zdz!ADQIaF^9`y2I{5)4=#YlhS?}d;9Nf_EdUjo&Szv)@&pV}j3tIr3{i@LsG{Eeim z*FGSt$G$5&K5oSF`B{N6IL?pOjlr?>Z$T}VW@E_B#^8Q;E%pFCC5EP+_SPy-EqZ7S zf5u49KM(Q5$oo75_nd+6d5pYe^%D%caf`L1*eTgePodUhWGzO~SnB;yWHgppk50mm zl4NopEjHB9(bevw^9^cT@452LX>(eCNqTchmekyRbg}T%5G(1*GLUKRcC< z3UxOXDhH<_iB|mY@{%5v)poh#Adn6*kRN0Jp65c*wT>gmm#qh9`3`d*&FeoiM`zrk znUv4W9{KoYdhno-#wt8N|N?gea_O(vmOM_VV?D9oab2UwfO%WsC7K)drWk@C601o-vGdlbadi_ z#DT|UY5i79>vi0q0D^E>Db<5UO4XnK2bACuuH+W+5R+@o6d6(c{1KjwQdwuW8W)CWuKq3y#U7M;j%adr9`8DE&6jEr{7j z19@y%lgj@uo{?Nl$16c$By?O$7G+)6VboU2Q?OZoyax2 zor)rRixKNj6aGlW-(<5k#d!|4*0P~DHPc4ooiL(t0_gH7tB;O+x9&80_MkbQX?0Pf z-Vq1i?R4j+a8-1-6AczknYHP546}93oLXYjvIxqx)x}2Hlur%Cs)Yo8b-+{Sqiknk z=fjT@HYz#e&R(R#I8C?$l)`r*GSTINE*3OR&>IEq!jn=sub}UP zB8m8S1btP|N)N5Z`kLef=$e;Vhqju9kuocj!TNX~!V5aK33y}y2(J7I!RM%u(l ze9nP=CuK}1n7Mdy;Y5F3>h1ezGc_m^&yeO5vVs~I=lg#b^SgR2jBrC|uz7N!A1?a) z=w}N9>5+=4$2S#I6+OO>3X0}0e5}Aevv|RhC$J`EreftW`R_|2okqc2xQLItJCPpA zh6`FLaP?=eU-}detE~r^hUe6tjeIWkdm*iue=lqVrSjE(FKAL}WvCUc#=jV<+fSIv!hgluPsG)m$24#_O0&9AYNum`omGChM3?AY!KKm`tK(l4zTdC{9jq z3onUAhMhzvCJ~D1eO(pm;BRd!wv@(}{aw_doB3NKX1tEc-Wo9zbWHZvh?%TovbRRe zR2{SDTN}&jQ8-DI7IqS4g`Gs^u#-p*JBg-+okTOjPNG?1C(+!nlW1YsNwkzn#QXmB zqVHedTi?IFx4s|Do_#-1rXEU0>ES~fSCRk048ez zFd}4dBLZ3g%pTQp_I~L{`7rw{*{2r`^S%jT(PhISIbO%TRXv$maH8rd+%QJk^gfaDyjgX`z+xpfVs$gnSj}Jx0rE7eaT#u$GtIE;*ufp?_fP z5j=tiuhW9>$BmZw4H~Q2m$l&kS05bD6%ss>2VbNGU#brt*&{fBzDf2eT5ua~ z6cuj|^Tzyn?}^ft9iDa|k1-YjZP|My$0jU1WQ9-VJTN%yARe}BiU|8-eb_<4VX0!- z)cr7<<)yrfa$Gdr`wqq9%9hIYcjPQP-v)ak9k14@sP?2btiqhY*%H0Hw-cG8b0s+r z!*EA1Xw5cfeEMn4tqe#ta<*GA6pe-WnhOT)ijb4bk`>Ny<{veQ10>U7w7eOlSn^KmWOXvQ0f4 z)cAZ^$c;5Rf2XYaY#Hm5UWZ15dJmpz=xwO5uD78Am_3IIVD=m;fXT`VIMX^sMH3Bc z*hIr!G?~E^)taP@kV#J8BW=RpVTEQQ?t;F}C>P($+c7~PCLO>O-^;`llMal@#3H7c zbYM&-7BR)717kugStX`6>EQB`RJ@y}^o3NLA(fKu3Nxvy!cwZ5u#~DUETyUsOQ{<5 zQpuH2pLe01WW3R{4T9OT4KikE8)VGTHprMy_SOa|U9>#};B8TTj}+E4jJ?85Va(I_?S6xQ6ZlxksEO0_gBr78_esa6L|wN4BguzX^&rd<4khIKs^mXZNxsKR2* zP=&>qp$dyJNm*g=fP0~^VDLL{bGTvtBNO1Zs=VwEq*aaWK!wGsk+NzUd*RS}S@q?q zyKNa|8}X%yF=7=PxW35-uJ>fTDb{_EokENem5eug8@PhW9)Lm$yA2D;9)Os_Zo`=D z0bH_*)fSrRhVj`m9M4kJHVhOG@C6sYn6Zz5?!T@okE=f z*2s`didZAV%ALJz-FOt@f z*rPE))oCl+wiz!oFZ&~GL51}K=4e*b$A^|3?Sdhq5&+uR+97Q^0prlfjCIc&(hP=- zb-$;BTr<`^M{8i^^!H#G=4COo>8Z z%^9Lwqblw16}~ca23k7QZor%|oy-}>QO}P{o3?6G3;}b-o`6v3M1=Y!re7g|$rJ)H zdrUD9Gh~XvrtS+eXB^j?Gej?iPXnm1udkc3VYxy7wu~8&DGWaiAXONC8ib`(3&T>X zrC}*mX;@0NI#?=h%YgZ|w@-s!Y#9jI+ou7TZ2S^rPXv>VUqbAOED4MlLD{DPhWup9 z*nq)SI4CSd^#ukoYm#1vSenjif&H~LEbOlvA(f`H!bhsjVJTHrSV~nBmQvM)rBwBL zsbGI)Z4jo=o;r&eP}w&ZDINdDi(YgV3mK}j81qT`WYg;`&Gt%jA%6R7ujY&+%$%_s zU%WK@f;q#co>)uvjf}Ec_`(cJd&Wj=KmGdj?#0_FlkiJ#rw?5J}0e8=); zl~!v!caN}Yz>p%XdM{wmz;TS4L4$VF>(0+6`v%tMkbNTpDPFd3=s&Tz zh3m3?gTAHxAEPLgzN}LhS5lYl8`K;0y)H?c?ziGFz03BEi?eUge$lVZ41baK4alT@ zD}>KljaiUN`&I}asR%(J{G?hKmQpPZOQ}l3QYzXtud7#KWZ%GGD;yM7sC@%sX*w%> zED0NFWGa?xgpX94!&0iMu#}2W2gC0T)`g{1^?IrPhuAluW`pe;_}~mr`v&_y53lta z{}b#Rc%NTr-@wrux+NPhJkXMj;oQD4x=;HCi{-a(P%PLtC>HD+h!tetAd`k>-(c`C ztEvzI#t+4~UoP9akY4N$wn%b9>(wgFG-F1G%tbqb3~XqTSHpC>QJ+VJR2wj1I^0f^{P- z<)U5E{_^rKpJMHiuio-+Nckr%pK{w=C(sr@)^>wo#_>kJzMC&Efvs6lkJ~a{q|KmQ zZ|SfSdgv`|>+GSo7%xO`FVlSV1QZujLQFwCw-Mr?th62FempK=o7b(t zVXM>FOw2#D(u^0sUz-Lt^95GFJ-3+&g*8EtOONpMjS5v2jzZOhqfm9>C{%qo3f1_{ z3#F|#Gn7yZyyZxo;40Mo|v}4*?ra zu<4|lHlJYQ$pLBACv2t0E?e1F8ey@GCw0=8QrsKz)a~qh@n1IL0~4b{{?W*R3+^Et zo5|GZ=_Aj-*jNM4A*NW@t7L%5lp8Vq+bY0hiiDVcMFJ)tfFW7k7z~-Q4>JVCJ{pdd zk*9uu#UwKT9Kw{z3EhgcuT;YY?~Q@xgJQ!98|;k%riivTM(jcPCf96GS;^{q8nj4Y z!N!)&c$<0IC#6-Raf263Z&p!{AcAg3Ni_9kpW?wrPRR1*A#Blfr5-CpZ|VS5T8bqMed zY1Mb%p*0-!-WT5@pZx&$(0NXRr|$^aKXmxJjuTlputGg2mu)kc4TjB2Z#}(=R)M6+ zPJ2??TPEb&jDLe%>4-xuev9_Gq@TtU`9`GIN@L2&&j2tPdCzg#ap4O4veR3P_Ex#_ zozJ4p5ZP(zwx!o7&hLYq&C9Msvr(kUyYnh)?PyhXvNZWjI+i?jhO9SHx7!l8*dV=R zJ)}sv2lCQ)*!DC#J~5()JU13{DCv6?bz7bhCYyuBb>3%DyJhEn-E#W&^wYj!>D}sW zeHCqt#c{-VPe%C;F2F~5sw>ZUI!`@WyxaFYvc)lBqC9fbtE_7IAL*^?r1+tX z!vI~ML@UmzD2+}VmBv|3n4NIS(v&lZYE#d!;^;#jl*dO*9xp48HLN`H((4s+wN2aIAIq1V5|!m0kI1(nV>QGcgvGwiiB`nh?dHiVFFSHKwg~ej66jnnVOt@M%kmy<1#VAqn>m~@HMDt~` zL9iBr3E*~FjknzD_4;0EueVpNy`{HQv9&cJ2murlP(CXpfJj(00R#dndH?^JIlE^! z%ZK0Xd*AcRX3jkG%ri63JoC)VGjnD99um4xF!yxe-AGPkt-dWFtG~HM_ohK zxqRz6e#ZE<8RKVA&#Gy);WN`lIey)Q^*uF!-q`*i2GHU1xC0d!p>D}>tmSEci9u)7 zc+^qvcGMe~i6ryD$%eN6hHbms?@mpb;HzCLmL$6yT=P$I?5)@C#^edbccS<>X|j*o z2Rwbzb{gIMzb8|S!1ZF|`7onzC#N3vtUqF)s$tL?yaYq)Y_P4~g}=@1uFQ3u|0L=Q zc~g|(&rTiY@*Q;zU#HjpkBITU8#_ULM}zhp$4<-sbyxT6hVOvj?*Vk2;O_+g zt3mSD6&T-*b1B_{c_=o{*X+Wa89aT@vtXb15AeG5aUX%Afq9A=U%lrVa)Rr|diDv7 z$5AW$K^^09%xu+o{3C<&6HE_2RPz&SewZE$;`Y61e788&j>KJ=s}0{)rerMW_Ilt& z4tC@?P^4`x-)Yy7<1XI`q<0$ic@()6^|_bzxs&zjaUA1($%c7qH|8VBtg(^d`N;c5 zAO|9ZGZ}w^l6Dv<6OPT$$Gzj>t>sU%iZZ*9;QOdrUx&xHD;E@r6a56U8MJF zP|8oI%l8S=^Y}ih+Sq;yQnQU$>sU%cbWW_Bb|+^xK%en1A7Z4$PB;{O&;d#r$}oW>tpe zXi-b~arfk*WM5NYLU-0H5F8D8GW>6@p{NRn(S6| zA1+AwT6f@|;2ia2Zg+`F*}^IDDtIv`Kg>D#;c$J*`2>#&MgAb~yWEb=C}_=o%*Pj> zklDjo9vejVc9`>VM}y%XY(SM5jxA=QnW$B&s|L@4dhY~x<_4s;j``kTl~>HHT|>aY zm5}!qq_-AY52eBM+elx&QKq*Js>fr)xuMNf&A~X5c*0JG_+=F~w8a|>*6OuSD88%? zUg9`z@*Dhd`I<1JM+MnX_p=^2-eB{#yZzZ@&m{2WuqQADt-4XL=Li}~GuiWJc#-+l z4m9fqi+n}!2lcm2@`w6%5>_^3hIhflpco|eEfhWbibNyuL<(~bgG-{cwbZOizO&In zKWwi-w%w3i|6>$TgD1i67n)ECL!car)SKF6^c9plJ$|URnP_{j(_~YedLTGh=&u~d zakt|*6snQAPG7hT6!=XKRKiMwy7VRREXM%@(p!K8Q*kT>hukrZyx>|y+nvZSJ zm9yq%9bX0cn&dC;PILwCMT&2`0(Zj+8kz5SaK5ZQ?wDu6aqn_^^>GJ5((QXcB>!&T zQG@oDMzpM*Xz%ZM97m0o_ocphv4i|`Id-^w?~C@n3++9!zw!hgL>Ghdx&6~qJ3T|* z@Hn=*Ge7arfHUq06ZX)HT12tLDdIyM??Q%0dS+DgD`Q zme=C|!S_P);sHTxZ7RL$LfUJM%y#G#mX0!dJ-ujPkP)eC+-OEnT|>~MIUd-K@-_*5 zdQgrBmMHmbLd!JuDH>EgG;D-6waDY-71W}RP5Li=O$`SILd+?yVSN#nS18^T^pi9& z`d&_?{mJ)4G95qq;YB?9+3>%lIq785K-aJ*-sN8?Mg>rx>?U~Kt}u$VP`M4J(NS}C zQ-iu%4!Y2OQMg%N9C-}+yV1X7HT!dS;C8g(^`5{zsG_$#nR{KZjp^e~dC;@|1zxn_ z7ROrDLTJ47v1`aGm+y7(=Mea_$K&_}(oK1|G5zT+vcGHd_;z?~IMJs=$)B=QxnXz2 zxk19Xc@mV@Qq^B4jYAvRikk2}XQVx|McFgkY@YPm4U+s;=pi*6qdr4zFSL^{0R#H8 z!2`$RXmDqu7NHeoRZoN$3D&bHP)A|@OZjqP0HSAqM16$z0DaB+zA=6AYMCGwIhsWO zeG2j&2QdQsp|S%HAidL8ea&$J^|fBxj=GRKdm{!WP}+)rZr>VX$QpEbXulf`J?A8{ zD)qH1eYq%~%khcO*T`v8RkJJp=Nt~|wa;&Ctrg zdd^lg1P+7>*6xr#cC~z9K>UxdjDvOCd)S2)q?SWm6N8RCzQ%?g`2+m+dmQiN_|QuB z+FU1Kh~MEEa>U~}=o+%q&h{DQ1&$Qv%-o2vbS zEZQ%|PkW(Hm+zMS?_Q7ZXsG=jfls;f(yNdF%gl913NJgrEE*Dxdo{rdZ4IP9*7AJ-#(s|ufhX{MIauSa-VnT$V;Y#-RW zN4TJiAlrjCYqr_Rw&VeQ61f8wb_Q;x)n-1*ac+0FC_nrf?hPK7lrk>KJ}zn4xTLhK zq>P$Ai+cxpJ(}=X{K7bK*Ki)tmc1+E3BJURmz`7|5tUf{N?BWiU%`hmzTg`JkZyL8 zU9gE>=I>Sfx^QO^pHRR#^2aiLh+pnW8E+uo#e;aH0sDbA5al<+1z(BKlWulWZqNKP zUwMdM@Uf&X_|Ev_EPTsKN~`I;xK|+I(fG$yjSNOT;RebP`%1;t!Xv&7xxjtJBYq3{ zLe-9_2MLhvy;Ql?$}Lc?Q@M8K!V&nm7gM_o5luO2Q+`u!Yn0!Vqt}GLUNhx(j0%S% z9^OoSnXO(k<@Rnsg1x~)Tjeb(cBGB3n1-ueCr{29 z?HJ>#q&q5W*3_cnsrM8Y=FAev6JueIkr8upC(bG*|}!ii?X%e-zHZXjf^G7pRja=UelYQ-atLxeg~_Mky(|Cj!G7U*gE{ zP*K7pMx}YGioHeU#dB^cEiJ1knOa=*VBvJ;xMXsAzNX>c-1IT_@FVZUmHw;J9C#Te zK#sJuF{4Mr$A97}=vP9@50q6*&mS|If76Qdi_$a3ApKI1*Q6hrRI)a?AHX}m}G zN%3~}f82LxeWLGx&CmB1&h~~e^7Hd@rsReF(Qnc85MFJ?83hm4e3Xn~wJ`o0A{hJ% zae!<%7r#blW-9jfEPVd%E()+HSP1>Kir%wOS4Ti64P(;jcT0F`SLB! zLN7r^mH$8G{lkfhj#JTS#T~lL%X1foE*_5PJI;0gu%YNZqUhN{$1EE`xIcKN3KGtl z>XnJIzE`MxY=c`3KjLe-_*oy~Mr6_&c#ik*`~=1kj4}T~U&OoO-To(-oZYfEjmTk587o;tC*LgHhPk5X4s)d%2q%pv$xZfh=MD93hw3kTg+gJ{%|7-i3}$HZD!D}Kb$5r_%~X` z!}N#mk{Ps%U9;j=YbkL{#HJm{A*90GHM^t_MNY!ZU9*5e<|NGAH47MoPQuJxvw%VB zBrJTZwG=yENVQ})#m-ezKa9r8%qIQeIE-7`U&KF8U|_P_|5&I2tvBYMvOzNShaZOa z%l6gRwBpL6S$OwkuGAO)7xod!bP}^hf${5Mjo}t7K|kQMJqP6LfsEA+z2-qH@)XPE zP6|7Y*eNEqI|AEsx0Aw-C-y;2(Y`bS+j6y&!cHJI2cIG*ohVT#+fx#HLDW?0JsC7QxaW%BiZH8P4Qs8 z!S^AEVR_Fx`>#BKw(RzghI82MzXr|`w?7rmhY-YM9Mg9~-0Jlb z3_3x0mn-iv7}?={Re6tN*BrdhDsQKVmkf~sZN?F=hXuZPJtA<$YdD=0;)l}_-f%j? z8%{@fWjgk7I^tEPBVJ`X;x(Mku*h_TH=K^}hSL#VnNC_b9q}sD5w9{G@%o`Cdj@LC z&*F&U3m?_`F~I`iHK0U~3pNNZDhc(sXbNYuur;7=JpL=u3y5rc(qB9(ENlexJ(*bv z2c4QSCklA6)qmkK=sY4WV=I? z{fd%Ck`Dxsb|0o~cj+%__#MOfz_{RSHSz(QBxD6uO~h85{eZ=&_BS=5_LHGjDlTlN zIs|44l}ycV;qh(A9=;leX5+*r!?w<-*&pv6j**hN-jXva5sDx&OMj`s9J;`S3k0Kk z1MeR^6<<+owO>NQ&?!KbK=`pSw{|OaF;0Nb8s)S>X8?*V#OV73l;6{$56Ok1-xS8h zx$;PGloJXaxN^9JEGMu4OEJ7d5+UQy-Hiyb3yqnMiF~j%WN*VNm^*OEAT=;_`)wXa zbEseQihiw0^lSVw!jrjPU-&aalB8b!=IpW4l^(RonEMLMwJw z40O@6C7S4CBGBV{QWss@c2tNv6V&@epvU(_7k%3cCi*sZ4rbP~5_+PebMwvvs8VF8}T5#$4#eZbCQqvA9Q~0o_)DOj z`0)g{28n^lms^8KlHFJ%y0P`58?&Js>%_M(vKwpQ(%Nx~7ZF>bx1riX8oyDCcn>u zdqWX#m^Lko#(kq->Qp$EXY%*uw_n%F4n~Asaj8>GRr)HJwy6&9oPr zepr9FUKFyap0B<5Hm+0RAw7(j!q2C)7f(@ioQgskx?q?Lv~6%H8+eo>J?F9)KdI== zl;~QyqNlu{+FpD>}4SGMca$BZk;UlewHnyew~q}mnF~8JDQEaHT}4y^#Lco z!qFH0AEfN)o}rhmo1s^3x6N?rbDOhAC5+{@>#l~@gc;-Xj>WyTP7F%A*8r`>$UIr~ zR{L<&tTF#2ZDkcRd@VI?eY_<`V1muEpTo#JRXHx2a9k|9VqaT#J*nf0acn86!oBgQ zDu%Z=qH`dH_CKl@f-e-7JA&cUYo0-n5qMOqhEvmVoha1L(eD{%>TweRPj8Wco0GA_hk$2yNdSZwEqBpa z>>h>;NC}4UN+FIAqygABCx@3xh!!8FWFf28GHvI`q9V~ zeNci~m%H_s;*fot$UY{I>;ksiuxCx`=Ed77_rQDry+=pW41HeWomv0X7~U@^`)4EY z7j|7)`Y?1~?dZN*^o6SXT8nt0E-ZHUwTg*1BH?Y8?n`Xgmfcr^=zFkdAN^an`)aXt zUjsz_+JXN27Ku-Kj5}-=3_DP1Wq1Q;etvdVUY3l@|H~AeR+aYya=)^c z3_}k04o5%v`P0fu%BN2A=2w+IfNi?@`EH6^=&4it>8s-Kene&h%S_q|_{ol@Qt)taY&#Qe?fjWd z4zK6^v=h@}_u)rX)^%~c|JnUUpck`k`1&-8??&W4$2@-iY8M|+wxi`Dlm{oxl8;F{lE4M$(}EdFCOT)Kh(V5ur@z>pi4bL$J-sCZa$N*nkO1My-G z#s{Rn_VQ?(JSHk&q|9&Yh8gpZWFWpK#7CG0KjI9eCcA1@VSiFWGQY>eI&N>3TbC~( zm0!~(JN1{|mhsZeJ_Dn6YzV z=by6jxsc!O)R)xbo1tvqNn`UtmU>A;VgpdCPui;Rh1}Yz^#WK`q8QQa;bF%_Bc4g}?o{WbT5g+0#IIb`F0l?($TK=n7 z%uh&d-v^4u3jE_!ofWvv7Q77gGj`Ahd?9rL-%7y})o zjSUF28jKyfL383O9&ZCa6IK4;ABejSTq463{0jStcoc!Yh+l@s$PJC26~47b;G!&Y zaoT{N8n%}1HqM@G!Esn=6Wl*w;G?kxx00C2XVmfA2ELoZCsSa?XfCcXgd=jvOp z{G!keHlw9ol(E4EUeh?+lvUGh^ZvMdZSZ1&vJNN*1dsG+9ebmiUf&s`~kSBHXE zf@?UGp`X!LgbZEDHZpb)X07C_UfE*9_fpN$JN};XOf={qa3GJe01#ZURwfce| zqFn7O@#V;jx+au!2wcz57F(FSngk{>-K%%#`ov`0jZ+4uy~Gk zhiUj-S$E5p$M4K|asfV0p4Y{fUP7Bid>0B%EwZCE&(OE@7R-yYMTFy{_D2v5#avD?WLa>?U6AOew@gjB2zsG`N&OvNM^ggiJk+9_%d4Rhl| zdVDO810liDzyrxgvh-w)|C01UP5STwsiK7-3npFY-YCMuIDV{?^isqJeE6CV)${UF zlowwIve@o`V2T~-F&5kxex!#VQ9QYU*+pJm>jE4OQ zC}-`}b@*%wHkH~w=(vIhFc(}?!arT(j$7Z4ZnUY z@okS0q7QdNEd&N0W7Q+mP!DFl??QkdnMaOx2)Ws1gP!vCMpo7bbpv1Q^Nk@x*RnY# zq#AyAs<4J_G`cs6fQ`)NdT1mOt(np0FdAofV~1`Pwx_Ko@>U{?dEZ79YHqVTb4|rX zRl`Ku8)Z40Z2(qY1PXhxGtKC`uIe(pH?{W{-3pFWY=*ZLKYVA-U~d~fqlb*GGx|2^ zweO;gTt`}xng#~0KpfHIZVPMp;Mf^;B5Q|5*4ho&wzKq(K?aN++OB~* z(s>=f{d%pRq@X@mdQnpyZvc3(if`|WIC})KHflw^>_bUkd{_&$)}Xus5vl4OI5Guz zJNE)IcMrbJPT)(22>-z54kU~by7mAnqg(gg2t{Ssj^ivGA`Jb&)_yG@%(G76O%l=i zvsFNNMR(m8)gC|MsALL{BFHrYqTze|Kk)cRB4#OdE(RnVafp5qz83hvi#{7j(%REL$!*^``H-<#~EsWq9QEv1M&Ih->UfJ_7I;RQhZR^uk+tq zp~{9V4W`@;8Wg2rXVh8cDb0=J4)Dn~53xY^_=ypCS?C?8SsQh2A~a7(>*LTN{Whpx zsew!}GG&Ngcq{d@M@aGhkotKeq<-EI%s#vNc?0SPRp(8$e|9DGJ&}V5C3GQZ{uS-F z3dbdtb+-4liV@bb%2pxFyHBs}q~t*91R_MI*`jlz@Xpf4Dx|CiKrfk?$RX*mkLZf|7ou{%M!W4XNrKty2F`=g-|?>Yw)n zAsjtp>Yvt_`e(n=KaibO6rhblcjz4(qLfiuXbjU>H<^fHZUenS-C_bBJU+KL$E3i9_)2x?iBqH#eXMRId%{}zR%n#`k{l|8w)95Kh)BW4qAil z(v6iuy^Pu(=YcWU+lFJkK(VyN-4{nXyf3xMGZ7-uY7vR?mieg2VwcEbr!ro@stb8w zhM;Iz{7wz2AU!DSvJSm=u}C0e@OmK+Zv%K#$is0W4?;Dga0fVy*ULVU^rMFIU1a*$ zK}c5bI4lr%9%7_+S3_F@Uyh3KBescwOD-f!h8{)L?9_depg*tx?CM@ggrP@l?blGp z)Vx-WI?=pN;FfOZKlGl(*tpOEx1$Nn37yk|z`0G2BqWJZ5D%sBFJfFHv!P<&osEuq zaq5YMKOx%bB)gXXR)Tf|Xo~qb>D5bm+iX$vKn??^vPHQR7;_LAH9n5E2SnQo!t(=T z{-G`~o?mJCPV;PM46rx}%Md!v@NwcFsF5hysZ(5me@{z*yafK+o`_8>*x>=x;J3xW z6^&w@d;aTULSORk5Y^N7sR>Dw=`SA1l89JN6H;$Na!m+X zE^rD=NUMN=JLSlD?Z{4eS0v+KDDQRfglkET(s43+jBG>TqOzTALPFWLn~+ep(@aPx z+fEY_%66^^31z#$gp`{FDVLDNf_o;g+60PHh>8L;P2f@%O#zphz-1QTG80&D0oI$q zW(%;{1h!g$tpa$P#ZAS>)Z39+}D_@PE{R`x>tBc+0;Jqbvdy(T2gUK0{#uL%jW z*Mx-GYeK^8m5?a*n!pJ5n!u$jn#o=h7{Oi>SZ~3sH-QoCHG!=b%vKZFX#sYcKn<)m zQ>RH7j9NT0*#b;9fp!bfZUWORz%&!+v;dtZFxLXiHGu^dV1WrNw*bpcV6_EUZ362o zz&aDS)B;>;0+(5U%S>Rs1z2wan=Qa*6WD42whAD{6+kJjfUIzWyGmTMQ_Ig&R-t2! zHTB7~b@2)HuTY@z&8QZA_~LkGg8o+JXZrMaDt{;ZMooQM)w&hc);ttMW5J6D#>sBS78yo-A^vzz0wSI2-Yf|xfE)XE@yGZuW) zsL4*v&0zltlNI~VM?QW3nS1vBvzLYfi(XV87V180M-obaXc^HugQrr(*2~E%TD^wo z(a~aRayw$6H>(}Vs6rCb@WP};^F1#bP7#%Dj!~5w$TfjX!2}ivAW~x@BDHdcBDJL& z+*qkiPqllLu}qB*&})j=Q3=+WKx=|aO&}9AbFs_>IxWC@6PRlOHk-f#3$RrHQHXN! z!mgB|C`6ssvk*dFdz3(%&+qm_fldq1X##UCz+4kpU;)y)F6z150xUOy)fQm2 z39PdK>rCKM3vj6kTxJ0-GlBIMV7&=!wg8(=V5f)|tSi7T{77xXc1v zW&-Oizhr*rM&R1_62OLO z1D`N#r@HrG290A3E4ys%M~SU`aqYCXlO%Wn0!;s z%G2Ka=pVF^7OU%Mu%P*6?9e6}keEU@KL=q7FzSK8MiSs!uKrfToZqOZ7OXWkE-1=k zCIof!H8IGQnbrERBz?F)nLG*P_Ted-4IMbw`cLJA_-baufw{cK4!3r#cQ6xd5NWJ| z6>Y8e+n~`DOqJ7hv06MxvZEn5)WE$(3?@2q&!|mCxAKB&u*vloaDh)X(&#UV;K=lO zf65ifmcfuR&tJftn5(jKCAy!x?_oi;ffYPtK1=ZVCshWx&=pTcRBUiQ%DQP z!r};e6n=33b+UCX%+Arv+rIkaHv|2JYi)XcMZj-jn{Xm?% zU#zP^GPr$j?4aF{lmV0!qz)me*L{!cTq>R`y+4I)=qC|__K z$i=C4{Kqy8Vf=&v)4=J=fV7EN?+GIBnG0#}kqATPZl&b6%7wI{f*{u1XxM8N3u%<& z%|_oNRo}wP;owEMCjqNCc*p8a*?I(c->a%bQGf5G`1pS& zewg@f@>uufiRun%o*lOKae&OI8~RaqcN%^_#IG-AQ*_PWK={q|R^C%kwouCYygp** z&qZ;Vm#lq_I@A@~MM6t3+xz!a{{atdpz{)4JiUGs&R53fAe-B_Nnf}H5ykntJjz%t z1PXgLx1TK7>E$};_TZ&rdKYf59-J}H03ph-SK}M6yg|v6{CDwR-k=nU#BNtN4(;9P z{U#9i;KB#%{E!DY6D;EIljh~!lqgc1`yOJ75Funln!B6-_IWbjgO&Vy3`1KI&ORG} zmDKUrcCypmce7rrgQdZXalkFka}$X`*37V@+=xOGu7{ZI%C|dsy(nNQkfTmkNPm(m|QwgRqC=qkuA+zOg7HCoIy{?A6!XEyCkg6b*$Ti;Zr&{ct zFaqDHgu!!0wqAzKV^C3P!S5>jcNV{%;1bkYgm$0-mf`XX=} z4_d~Ki!93PHo^66QlD(4yfTHkyG^AiM3|~xD#dM5DUM<;1}UY8?Zenza2bfT&2VD_ z0*Yw#ZK)a#KvVlgXpCY#0Y_?Ma{*wj_%&D%(F3BB(`$p|A*hMJyJ5G5ZUw`T`snJG7Zn6U>8cCr97eBA7z>pe;~T z_1*zw$I9SEl7HMeLO)qQv_UBJHL{lGicWCQT1499?E{9j1e5Dj{fY&^q0O>>*O0*- zq#$HPPzu-i8n#TeeZ;{q4yULb@e0~`?pcEmho?>e%pwLog*z?uIH}N zYmZnfHnfl;0)ttR>;IjiSO?hOasNnCsD8!3XY9}es>6E|l%b!@`dugV6Bl)@`$yUY zF|5m?GgJFVI%TJ4^gSTT-YM%Bsp=i=veOg$M{sSb?DV91IvDQsI%KCO_m6}+y@P7~ z)>^+f6Q7DFD`0TfWCM3iK5zwgANq0sP&avm%}VY62c4qXpAiz*?iY}3+#`Uo)T2Lv zkJq78U6So<$if9FdTkj%!CSH4hWH8Xcfw1#Su5m*8wA_=Hgv7IQf+!gI7Gw@#~wpN z2-I3hcQ6QHqu~$C`H>KShz*C?P=(lWsMj1iCDU9dXrfa^55P3ng?1g9eu03!ijWBO z-fG+7e*!vpo!EA$HZy9$$El$Bp?MPunmxQNa8sB5;<|Xwn=Um1S9M@SfO*r;4cigU zn<`J*f}`lf1+Q4JcJ2?dPP~P|4DF_g`hwk%%N*M(@5SwxAQrI_g1zXlTWAqT9>0#_ z%@va)(bnYttaTwRD@T=UMaH3sZ%ib5?V_AtX?u^*4nv?8&xWklBbx32j1g~NLh)kIC zg*LSx>&39XJfHm#A0g56+Yf=9$>d~vLSy}6_CsXr9cv&M^I*STHQPGvrgDO821auc zeXiUz$H86XEEs}^&~@f4c%9bGA!G}LL$5uF9jy@e?^*TFTpS1|C(N55H)ZRDus3r{ zFVowI^afr>ieip26zMNGJ|~GlbJ}f55b0o2+%o4m5f`}O$@wN*@SkVYokn}(DuW$A zr*iqu-_jG^-4`R>A@~i$?*`@aoxe%=iBlfXX!@b+;2zyt+7^&&HI&BG7aA1h6=CZU zX8ch~3MRaW_VZ+bZ~_Fn*o|ayVwTj**lfYQBpj4)$8k57s`-vg6O=Lsh+Q^dVFO@Q z0mCtwhBLRGp#nX$y`ow91y;Qoeb6Z}o;`{CLIkywz=x$|iL0;d_D*WSM@)StwvqQ^ zp4PVigt?O$VSVC$vJM7poIx$N5kznS6*d`eRen0Geqcpq5@qicDR)|O%5tG+5ws|Q zV+WlOaoHeGu)G~x1iv;p^?zAIes$ts#}U5ms_I1tM@f{OP&-dhi-%?A5U~lNqC!Wq z^UdZgaK|&+QUf><-W4aZSyF`UqoB zx2Tp@!3W%H%?UrN2Uo&a5LsT#sG-|&6;L-2VAUYI>TNd$2FmJRi3l(wVLJ|vR1_qH zK+Nsp1|1QfY?EdwdNE#MdEbp-)G-!fgDimDri5KL*(kTZpo^0q@N!f82Yllbp8p`$ zdO;3trAe}Hp3v(jgebtUhK=BT!THl-e5|%fofg##F$JeSl3ShTwuuO>EQ~jiNv{fC zQ0L;-D88cvV}~{hK_B`NT4cv*@}iM($+Sjhwu9Te=6(cJ1c-f7X@cfQKRU-H<1OxY^UVE#06EnbY`I#;BO-OPp>*snL zHPKs)uUJVKAToTb=IYzU7A>T)QLpVMXk`rU^!5U{ql@V^3*O@!by>TlxQ1;54BjTF z?2c+dm_x8sNE{{KA{*PzmE?*%YX&cpSPMkulD>jXo59^py6<{uX`GaSNxp!#*xE+} z0y^{h>cisrZ6a)4^M>5nSQjxLoPzBs25wu#R!ZN?$#(XnPo%)nhuQSui!{8s121u( z5dr3Rj+jrm8uq~eiM>4G*_d929o=XmbGMncD4bd>nhdu^aWW^TsE}kJ;WE7@1`e&K zE*?#DR-;jKO{yjgP#c2Z6ypyWPj(|QjFZ^+M^gNU3$vPFACSXwk;GVz^D~<&F9m%} z)iVYkuNsP1SkSl^i_^*W!IW;j391vbVz9E02lcITL)xVv~|4 zycS_vR5qzLbI{r#c-SU~6G&9=*dq+&D0?@&bxneFk zh&#{a9?UlxaW*6J5SOqi-MaZ-9Pc6~WVnhlyq{l9$eKd?`EfQZrdlh-KFVY5{ZR3| zoK=h&**gbiu(_ZA2u8Y;Nn(S&6;GFx;^VP-a6|>t5V+wVVO6c=V@U#p^fj!;yv4zOpjf8>|kVV zd>VXg%CUg!3!a9rx3CHrYcLtoG-ykeAB5*3 z9N+rXG5~h!Mak7?$1nKjvxk=>JVZ~~cXP$IPV>_xV@*P;NX*yvN2V^gx?kG8@nxfH z{+a@j=e9rEQD(U{QiSpoOc-AK7;EB1{2KW^7rvuJg2*-|+P6;5RUa#%7Q~%>__W3+ zFE@)B9bXe!#s`eAK2Gv4n#q#-pJ0KEz+-^uJv?1p0lwijc$$2%Db?%{@4zK>n?1M#)#KRW$vn!6!>zt#`)lyx z!n#ckTw!Y7L&$4Ic<(JPD-`$q9&tH#t9uA1r9ZtjgSG28BJLr??Pa`om-k@e+S1fv zZa=P~yA`S7i?Ta#gCA~Zz=fGiVF#`2cf*UTR=4mTe8Svq48eC<8{B&KQQX_N z1Gi4XqKs<~-RVn2a)twoD4TG?2QE*m!r|!nYAwymbgZgCTzH+R3aQ#3gyvH8L*HX~ z{K+u?fSrpZJ6DRkStUDn!i0%?9XE-49r3XgZoUojURJ}meO4vcQ`#r^pWZFmDXyh< z2d1H%fAwVU=ZDO=Fqzjnf}Q(Oz#R@;YrV@%U)&34-s>o@o8!H3xE2lv?C+NB+=I(U z)wOWV;@&?Ot_gNKS`-_NOx%;W*<|A#@S*}Y@qS#?zu+If)kVc_GaQ`(f*BAx?aKXgn=7o?qL8jKZGg~~k)K4F` zU0me603-?0$Kje-!E4?Pxz6MVmp6vsln{|$+_Sn})^`>z)}55TWFr}3I8a`mYQzfP zhz;!qfSUIo4RFNKAa8;Vjej2*FbFp~Bx0(LnkAxTsZcf#}DZ%|Dm)ideaPq81q<`BlOVkdkx3Ff8dO4Sp% z9-k&=r>3|9|CGU|cmk`!q4LS40F-V%FvQlu`Hgg-Q-zP$1J%Bk0D^%5Z-Y(9FxDxI z;Y}|8qsh#8FYaw%YRP)-Gk9h5YYJWp9U|?|-S~nRHydM}tqbzrtS{s&`Z=3-Kd!kv zo>l+6kL8mV0U9(D2?~I zCk8JT9cb@c(0*_aJE`-Y$%*Mpc9Fs2Uh>UY48%?JNFAI$PJQ% zlEOXM7jSwWP7hREfm=|vgDzaCNga(c`3@j~2C=bK-uuq$t>2{VyIFa|Pr5embKz44q=_Z}CVVK3G&^D2^WFt7F7RyS zEx*V=?jiT>;W-o-(>WnkC&Ks0BK?n9Jo0xR`MY1He-iUhOkZ)`IPR?-7*Sp@jihi< z4aFFJBE)TiZ$K-qsm6(=5LW%++hOL*60d+85vhx~|~V%kvJFheh|&vxOH6Pymu zoilORnu9g)gvtcR@nDJJJB4i)r@6;CWl~_keq32Ao{+vcigm%G&G?^pv=yUROk_Kn z@lEwS_u*DLd51d%2LBJld@hVI;IItG=lQD>aXQlJs^$1v%T?3%u`sb+Zq&dM3$Y)D zm;;V5fg_ku$@p&D{-ZdA1JHSZg1vgJ7ZwIr&02gBmGGWb9wzz+ZZsV0Cin(6VfOFI ze5309c7wQqg^5ncu2h8oo`H7{4%36<*z~FO;l{J*!Low6|>B9{)8@(*`VW-fCM`G#2ElMBmXZtx$aT5A)%X#a=`3+=>Qj4{{ z>EYwT)YI))6RjG>y18}#QdmD4kVfdoUw>--_?OuFaf2inr5`s~^y9J5qaS~a;_b#6 z!IAp$R8RfbVGP+V_2X8w-;ZMH$1a!eZ2D0Y#G)T@>?1}vA^ixiRX^?yrw9Gm6+=Jb z2BGuPk8@DOjVNMI{dmfvA92sMrFz0aVGs}R^IT#7X4^sc3Jf!y#MR;V@Um- zK|Q(;+ogWQ~!{PCXyr&*_)-it= zukPZ#n=S1%__#ZL@d=qd%%3)h?Cq$M^D|yj?;mVnPHi~0n2Ba$*4&KL8aO$fAn&MO z$9!+F_J0^WyM};)D>1)rL3(RF4vfA(!0^UMU%pYMhi!+4#dxY2;iuQWyf|7^HhsyKrC>U0gDUE^j6j z=Igv_zBu(jaIhHv=g<&v+_kyg$Xur{UKH)k z?NHfDF|Ksi3wXAp2c`d=&E|S0<`=4^r65rJLG+<9GoeecZ?3HOw1~ z9*!R^FS~`j9LmB3a8mlS-7GI={2m;T!t{5bydDs=*5+{NqBa*=vKy^EvmN?`rK5~q zPcJ79exbcMEyCHTEv_MG(wuMNeDs_q4=z^Yc)b<;U83X@7oed7-U+J}%8M<8m?2qA zt`YeU*^Au%K}>UiO3anHuByM=cOleayF*CTGl?h*g>OAZdz>y}`<+vB?8KSTSuaC1 z!N`Ghp*xx(CBS;lhU3+E1Q8l2mj9G}3br5^o3Oqehi#xk!nd`kHvc$ex*eg*?8`joXGoB%Dff5*V z2qg9bUm{oSWWkDEfjhzCogPffJdO?S%%dJ3PUszX5~eJ@_9`0n0Mf^A2Z|2|Myvb^ zdlq(NzDcs+H^|bS#hguoK}+g`y^um>(AwC32=(js^$RgvSWH6l7H?{V;z@weT zKHA99ygP6^bmDq$^Yu91@?`E6rq6MwXwdr$ywHg)G+>$b(Fl7U%D+dHA2q}F_4H;A z=yAK7u#dj)@pY*7?8bKsE=QLe7BiP)9qvKJ8ZRiX<(vT*$2Fs4+9;+Xb;5oE34uW^ z8*}`e7R(o*&Cx&LUbFR*{8s37bG`s^#(ciUPQJi8f}TkkN!*!e73gJT)0qe_60B!Y zpx&d~tIM|r>3xLqY=eFY>-R>PAnb8Xlq%mT(SBeiJi#t7@Bq>~ZSDWMkOuodY*Z|L{}bgi9I&IH|3gljs+x`XpV4%q*Zvm> z(ju`sI24oKko?eahk0^K2lt6%cC0U)4@9v~-0;7;URLq-;CiPrUVXs9ARW=vgj)sFwx&wa_qKd96nP)`u zKsV7p(f*XE)+W0}zv%Y0%dXLJV4@G(z%aXVb+1xJtg4M}Ec^}MX7th?0pXf1d*w|u zFonzSU)(6J3HA+GZQwm|Hlq977^1?HtUOpZS=^YSJSkXxO2Fz9lvq_m`y#TnK1>hG zR)QYB`V`T#^a~PJSK^FxK&$58+pnS9N`i=c?b zcLPVncLS#}{@F6iD~?P=|DA`Csv7x(7DKqQq!vRk9)h}KH;N%FR^Uzj%=M5hu=1mQ zoJRYIS`U%^S!eWmh{55CW3>myHjMAwnIO;r;!PAeC%mF5qWfAc+dCEvHnY8hezUL- zK%cVzTCVypsEbY|aH{`mlB*sl`~QFbpXNZWD&4QJK-za;KMXk{AMuR>Ku;5#!XZiI zQ-IGJ_&mgaG2*wXaGswXm)J9YnmUjBdJ*2c2mNLh&Iw9Fa?kvA(pZFN5@W_l--!c} z`CKQ$2lSv{{S6sDiXjPcJ;I$B7w~zV_j#p!zejlTWg=KhLOJ6Sd(wBN%J6FtGa;SUU42Pe|4i}cxM9KlnZ;rw~v+KMU0-@lfQK;oMhq>6MDpN zR^dN9L%8!FCHAJa6C^>wNg!>qM}2TR*rl z%D>Dd!~ejNCGbceiRr3R{Meb@eKo*|R>3dl!DoRCsj7GW;R@mYya2X@u9QaMg?ekq_nN zRs5<|IQ55kvXYX=C8dl@vX4s|mX(y2os?VCdvUKv6CR6S$UIzw--r06JSWS=G{j?# zH!dkHD=8x@$r*8Vs6e{}|3{;+vsHYPf0BsgOWrxcy_RsLKFsKX=SvVi^DOk2pCkM& zOL#PW^6f)}Pf_$AGE>HV1Tpv)NO&~QqihEOW-$6EbC9XUmULNq8 z6dv`ec;tNt*-2>Rxc%2IG28CuiSK{SqgxjB&^+f$Eo=`rsl7_Kzdo31OWVzk| zPV%oMzR<^v5W^Cx=(9<#F6;O^s@tFT>5HIbwGM>=ij0ZVlTW0uO z2+#eU40kfj@`&;yyj+FHlB1_B;i$5Rhw)!Uc!7!^-9E2D_+KsQi~fTYddm3?Nz3AT z2s-QW66loH*JS?Lzlg`I#{yZ8tfy??9atst*w2`FQl3yxW+fqj`SJq4Yqi7|{SG7a z%vW~OGFy*)vE0iMf5k=_U-U~VKKV}iZz24-w`92JmsEI2zGeH)PO6Jde;~>_WxI?Y zt6oN1!f{1G#KZjMS;C{+AH%%}Hx&I?@t2$UNX%?Gm;KHGzMWIbmk*~9JnO5oKgJk>3l?LUMk$`8Ef6y8Xao^qL<7vU>ZIQwn! z#7OUH;592e_TMHR>XGS%+Wjivb?uP#CHiq9hRQ43eMI9$F!S*-;%R@C@!r6zcw|0g zp#fldQv0Ah3h#1>75dNcY=o~+;j!v-x+UB!hDm_&Yc2FqbrH`s@I8z0Cl!4$&LBpt z_7X#$SB6%*^^lAw#xIQ5Gd*hgK8t$?!kswKSguzQzrIt(=lDiEGTo4T%$lreS83XaJKf^` zlo6#EuU!a#7F@{ih`TU8L>WdL#W)jr8u7ex?^Ui_xsPRv`&>qRoB?|R@LqvC3ht&c z(%lOp^xyIW>E?Y`)6(I;7B1n}50&ndm&*7DrC+;PxD>%LV?NdmlJ2DgrTf~2(oIzE(hH>jcI95G+*O!Ykj`|t0F7|KB|qvg&?4O3SGpHK zcIdwyZ3V0vae;DQfuPd=v~sy6;ad2A1s9}7)WOA+WyAw;Gc@fx*BtR(T! zJ!CP7pUOb@B-O*k6efwEiJ|SHOVKn`MbdL{hrwM67gdz>B-|_D*1=7K`v}|-aG9U) z!<`KmRhv`}_j_<>!cB!+02ewpX$suoaC70Z=oxVFmz0KoD00#;xR=4T!?nXrfs3k7 zN{0JQxQTF4^+}9`s!Zxaxu6@8I^hn1djKv(CaD!Jsy=BO+!VOYa3KOoO>i%QTMzd; za96;ENF^U>euT>r}2?xtelY5n04n?o#De zE4M(oPUYH_t0}h?`NV7QdgU%vZnbg?l>Na#8tgig4>1^hBw zy;ct);ggK^ML+NR<}(PwOTS(DFF<*OU*TT?m;P!9Kc7V^z7vgtPe8>_Q~sX`d>O|7 z3j{src?CL-M#Fo>oXUSt&=YMyKsp6ImO(yS6u!!yc0cq4^JCg6vX$SQ7keO-3^(lr z<%!a7&Rc)4@J&0$P~|u0&G!vZ^wk9LnDU!;nO{OlkbW}q#ph8d6Z+GXKShNXC_no@ zhUY5(4$!6FwBwwHvZ23K@f)lWe$!6#6BS+!zVPArlK50Ue9F+sM11AH0*n&jW_^Mn z{q@Kfp98AArXA`EC}V~ZY1bI6!c9BM1uERMn~a2Vyetm&eXETB ztnzm%e{U$$%ODfxy=#rAPu98lw1U3Xe>Lb^{e2PF>bFJt`$gd=p}wv7^5#9q)zd1b zRe6hw^Fw#tIYzrm%8Lt23QN6Hy+viEu1O|N)MbB;9=H9atK0r^CgJ)$N7~J0#l=OX zKf+yoF5JnNlan*bG1*&IQ8*qK$EgG=<{&gDch(#{pEpM(CGxJp7^$a!`KgN{N8NsS zE`DT4b|?;f+2!w<+VtYwSxj~MROwFo?BZ}P=8qX)Q8?9Gm~->Ql9GESXXKaU=1vr8 z$4WDO{LSOd2y^C?+{0f8f8uA)QS6>g+=N_L;&7B*C z6)_+%Yr!!JEzwg|>@9krtYSJYRvKQCLw1?R;j;p4SuSTs5VWO8PPdPWYjEfnccv<_ob0%l0d!A!xrt@s#pCubtbDUX)9*iRRh2$b6nj$+9<6AMp>{0N{&t33y zB^h1F&!6IFVYOq-_?c5ne^e+c{nn|)RfRbm(a!WnWx(FpxuCKo+5h%q)El_q8;<;8Pu zDJ?ClD4ALeT{oR`3=EcGq!dH?7;}s>#w#hAoGxRTsh@Rn(=$t)+_;NI!#WxsxWjHJ zW<9ZU)02jEl7@oh;uje;Vsg?mJ_|4M5OeOPsO8-n%T)Izj^A@whR>Dixis%tQsC@3 zhqm}3*G#RLHuH!5zU%mhQEB$@V|~9W&4B_|%+GkUb$epPy!LMiB%pFeW)?VjwM z>rLZ6+-BJ87sKtsIz+B=`TZ094=8`3awjQxgYti0xi2V}ODv4XwOzV4nmGM)l)DRg<)U;0{B*@`U%0m^_%D_FZLIk)yuWf! zD0oo0!vUX-%$6wkPUT*u+?$l^R_;&eW^3BNz|95o{s(TFp=s;kW=znucEa6A6Kf~A zljOd!0x)wl{4;OXw4310&eODe;Z|eajZoF~>Si2&8 zuyTLdPx?nHcLmmJ82)qRPFC*smD^jn`?1DDys~dd_gBh4OS#u8cQ5iuylUm9D0j6A ze_zGF18bPXYgF!45E%NORqlmY!=rzJa(}DbeadY@x$`TgmKRN*Q&>7Je_F+~C~y9> zvXXm>N(-aGMKJV4!|p4rC@q8)CW2o6gRt^cT7lCGroCk5>>!xvI=}AviT)bXXh6emj1{)(^`UAg%y=pj-yh99i_Y~ zzp|>lysW}onO|C1Rw}IYj;p7apt zVKb^qr&UV9$p}ZPEKIk=N{@`iKufeSX0)lZW?LdTtdSUGi4w{l;AhO92(v^AWpCC! zB}LX)5!sW0mS~~0*h0ejbA+;8F>96uJUUdJ`ISW_(?c@vE=nJj;|6Oj2&5Q7$e9I+ zxw-h}+?hAeyxAK=8ZpSr#pv>kjl@D&r7WAsSyEPQ=*vkN#Lrx2ri>={IaB^(TeC&9 zMO}x5;Bz$fI~^2s zgfYhIm$~xObD7DqtA;bV^ItkM(=(TjN?e>gzOQe!V;PzEXSfSGHT{i(j%mm-3BiE&cMe zZqJQ|lfJ~B<&2B&J^Q=J?%>QjMM`oUJsUiCF2dGpntMN+kvK-Io*QhwcAF0F~WP*eJa5n-mELN8(|8!!r^;n9;NYus!C&B@N<=vlZT5 zqqThUW83rl;^SV}8*h7IPrUX*x32Mmh`L0Z^CW+5>DuF3a`nyMLwcI+x-FmlIO}=D z6Lih^jB@~Z*xG;CW}E+%HGXs)GfcBNUpZH~y5nsvAKn=E{3kz-fBq=)$8_4_ZO@xD zn7`I(+T(5Gwa51|pUB^V{?3z{?VIaA`SJMYKfH0=^PnN~YN2%?KK_NHC^PA`Bi=K} z_jhfzPqpdVNytgvaM0b;Upv_k?-TG26a4)yo9`*Y&Gh!fC%#}mTY8GOCU52;jd)wl z7seZt2l2>Lfp*yz%7KstRd*(TM?)qdd&NA+67JpnR^N_Q03P6ja*enIE2{i=^E9l-nxZYu|#4@7;=+cvmM;+~9qIxaEp!hJH~>4c>T3GvGkmM5%$8W2x8GRcN@eAbH<|39Me?~d}n z7ZrbHRQQcie(Mr}w8lm)LP%Sz0EX?=J?zsVo3&}tm1_Z}A@>YR7|~#rRx~(l!48cK zXvj7fbE1vdVWiNSM;N5a*mLnjrQI`tDfNVgvT9inB7cBQqrBA$-y`wZ3p6TcEd0pn zWs!?qq10l8ncV3Si2*$={UL~TIVU=c*0l$#gvfkYAxH$sX5K5ss)n$ps)Y%~4+Io@ zOUfPMe;Q=)(~b|XHkZROJ}fVu>Yahz7bB;AapOa_BR=N%(DZ-S@u6Fi6ZFu8wJx+v z^iMIyhtn#(71-0L@T13@CLJt|if5ly8301s*5_Q~L-w20eT;{GQ0Q@v0cQf>glioi zGQa#WN%kjENdt23$2tak1Y@)V@!Cnms>APbj@>Y(dpusN-h^Q@RqYavWe(`hR}%U< zA8hUO%*EF6+{_0?mX-6H!l}h0X{KlDSK^1fjbK@jKlecCUfU;bAVEAMA)^1oSU~h= zCSN&jX8&gTSw^eAx5FP%?lY~Q7Uz#dMQZt#b1J8Ki|?6(Zz2k#WFDkGwcL-odUR+X zh|;ZAvDaeFi$vedjN@T>ABiY{0EhnZtP(yM6bL*|D!(cB<~ZVf=S7Q!kV|?b*$iP5 zeQl3<(X_JCN{));6D{{99oc9!Z54#{EXAP0n(CcuQtLU-s#E1M`~4=oFssQJP{*Q+ z4?4<9N~DaDYV^F#TBN zBGQHTyq*tleC_#Rn)8(=G?9L2d+nHWTyIMV{%4$)Hxy%aLvv;#Y0NdW$1k5Vb2jce z98H|dzo*SM(2g(1kNDrSX?Ym44NQDqyMW=tF48iFS;C>O@^TRO^27NV`SZ2=v%lqf zumSVHU-zlL`HI7rWlXTyF1Q?V5_@SE4AZn%?6&yzAKuvT9OqsWv;=?u1ns7P7Uu^q z>J}p}zqvqr{J*p2-#n?;(uy*qlZGF3W1S;TdwlmW?RU?C_MZ4*myZL>@>8|@*Y)DD zUe%lOF~3Q?VB&-O{_B~GZ_xTq+-%oco`J5}_svBYKcXe}UzPl`r-onf@KcccfkPj> zXZBE(a~x>bCTNc*+T!}}JK6oxWw!qJ9sGutH?C-U#=@d|GFG4N9{qjfeW(@hboa|R zQfhGOgVSfHg2uD9-u~JF&df!K)2F{S`Uk*IwZ%h3Q)Pd&RNtJo2>~Wq@A4?_S8`!>H42@b3Glqur2! z+LY`EZGhK4r(Hn)?$Nqlo(%jw@d+=~;vM&!4*ovMy|TUgB?I-IddJM!zk&=dye2ul zDJ!u*r;t{~^sWDt+`-W3JA~9Q%*gXg|EpnKcfd&0T+k;hHef?U{6A?oBsO z&b#H-+os%p$DMcG{ZIJ?Q}3BJy>P~lW)|IhUvWukS^51HmENjZ56qtP;Ex~r$=rF> z^B=CMU9fP`Bfg*d7Y81FtnTrD{@Id$`S}x1{_9i!_VoYx_h**=;+OyN?0^31zn**k zgAe}I;xCtq;kHwRucXmHB6F8=l<-?{WM`;ejEP5s{Ye=uygsj#jUJ-X?O zcCYx@qGTGzD`2)ou39TLlbZ9yz(7*>r=8GYRdFI@C(BRDTd|LlH4 zy4Nm;i+N0E~QJ2P?&_gv%MW<^L$f)GCu`J$*9%8e#@uJNwPCzC&n&+?j2 zgscL1uJLXG@{tHU(qJBE!q4$9&*iBHG3$7j{NxYgQJ%RLkuJRF^wk_hBr01>6a?IRAaNxb^e;YQHN< z(DK06f&ImJIq?N+SRh`@I~3plg=skE(~SR2FYR}~K%5C2`ySKoPsBTE-Zy03Qv<4N zHx1DC9hqkvaL|dk)8e(|#JQJgUi8ex_ac3cs~117-E<}LRNPB@oOuYw4|w6=$K81z zTYvwH$oDhDF@E`$=RvKzJMS69pMSbL?>^wZ*vpx@*k-#4#|#eci~NFr11EqM$J)bf ziR;M^uC+jao=g<)aematNe$_9Omh6TKVyBuhP912tZl@-@FCVWj$!>p1MU#~!1N`L z#F2*Peej`!K!}?R*Z!pGx6r%9kqT-AVeRY4m?XoTRp`7HqnK!pB(eDRr-n+K@v1dN7 z7ML0Os!_)Ork?uQJ}@{G3pjLT4_R=i(^a@~z~1C7fr(0-?#=tjLbGnqdp=1~VA z%ex_;_kpfihvaEuuScH3@u`CoLC^jn(gM9_P}k(!9gu@W&?-0zIA{^hH2u%pM46b^ z;NWL2PPFx3-$WXHwa0N}?_iUUk63taS3I8xH&*?Um;Dvp%SiW|&+M3ox~A-qx6ypX z9QY>Ae?=V})9#;%^2_$#6w%(7B7Xeo?&YKjr7Y_47jTDxChPiav}1-Lp8rf`I1A00 zXA8qT8?rfY=0#x{r7W3pYK5D0On#niuCo$)pfBe02igAdejnPuAM?%w&@Yhj`zdeW z@%>ZLE}<_n?*;!E)(?E8%>g=hJ!Rwo5#{TTvJu~uuc@Fj z9AVdi)&bIIxreNMs(*FurvB(N@@@SO+5!JAXkUkTPP`MBdEvTy5%OlHVY>gfy>Eew zvP%Dd-**O>fngjjDIqX}mjV>!CYNGz04c?3B(O5K77#H&7_Swm<*1~{pn~h}&t=`b zWV=df_FFC6t;BL$MKg`o{FOh&OKIYbbdxES-}ian^9}>zZMXXD|G%Bjyz`vryyt$N z^PJmro|kw()u9}35&CO5+|+QB{fy`mwx|m}5jgb|st<3Jw z^tbJ!2k@Rwekb#iT!dMP{C4Ff(Aio$5&VSsaTVI@EaH`Uf(CKVMwkq@s=AYK_v5tp z2S&TjLOKusvoO%1jUy)mVy+Om4s!|o%xfiR%mf5oN77saV~Q4balJsXP!JDctzsP} zN}J%%b-nN=;h3z*^R)UdIK@xv19{%{65+_EJa4)WxSqFBGNL(;f5M~SPG&70!fgx( z9{}FKfnSNHmcqNl_afmg>CyQB;ms%%8H$GRgA51XM0_}yX7QymKLDcXkADi|G%mMO zZhYF8Gq&@+kS@aWkP++Vq7j?cI~OMZiirIWcnzT2TTASk#1C<6>7Tj)g(aJO<|f~$ zq6oiA*qNxMO! z6k4~25q<-+95hJgAoF8mIFThiY+L@e*IRKe@lljx5CO?9ao4T4N{XfzOwB8aj;FVX zzq1^diN#|O-(n4dcwD&(tutKBt=30!xoFQ`HY^Dd?bNs6a;x=QE(=WjiA*B$Cpw|h zy2eHCk-wXsBNnpw0HJQ4+lB5 zW-rs*u4lb+JaQb=R?hI@kbgsXIyde%tcxt2-}ss=zqj6<+KzMQILu!-;2dxsVFjfv zt1sNZI>daq1A34jtZk+AXf0d8aN8%?f2cuSh%$HvzQIU%D8ZP@tc71}oa%}2UjJLx zbVlJ{e-?AajXdYKm-qAB>-KQQ>uy5!q!*TWo_DiktnpS_QiBYZ)a9!zeS*eWKDx_2 zJM#HOp639qFD&*RahgY_S(wY_aW$v7hK9pzO^Sv^|?BNyu;pFbnkENF5*+_K{c6@zI~nZ4xvN2gFpxO!7~FM$B=kpPT7BEr3Sa zzr)W=@`LypNuCTUq$B$QVjsb5kf4#|2Yunb89apCX(nDx-yTayIG}m&SKh8ls zuJ|s(QDB*m`iO~eoQW|}oU4lSO&KRSI+}lGviJEE9%;#Z-HYDiosGSBBOHauWq5lR z|3nC`>7NBo^TkT$AL|=&zA1-qX7AGm#0-uUVs#UV+UImNZ4+t0ED#|^}|u<;#}=nUc_;g!R7(@9TY zG1n2=DQVS7Bt+amkEcvbD=Eq^NX(f|>S#IY796hbmy}E*fhu6^ZZ}Q!hKLisxbCKX zX_+Hpb7Zlq5)q?eRNQxG8|k+-|BG|Z7IQu2lLNW|?wklxq%d9QgVav~s38q_U}!$L zT(ounNB+qqvc{KjnhVk+)5PuWzsun%K6H-`(`7z*qxlDgq4|c1h0g^v0C$~lz)8#( z!A@xazg2%KHatEL204+;$b9dCPPOUS}^Az#+{(oY-hM|&w ztagQr_$}MD^IBPKTXrR!H7??~F6uG5Rkk6~!X48=LcGZ5 zUCvF>&;K*&0Q1+I_VXWt4w7#{2l{U4Af}VL5s4ODbwbD+V_h|@bZ`fWKgd0v8r;?1 z-M&uT5P27CJ=}olh zxQzdA-|G zxY@d(Q~SX0ZJJJWa32D>(gE>t)h}MNM{LB}+ z^w--Y26*d44_((0v@dWgdgxd#xG(^kM>y9PuKMjH&Tz+c(9JsHKa~!Cig`oVbp-7r z+=>o5?<2VOeGo(Ei9;uyhVocJBzvcHF(e@#T8YVY@CQ2W@4_Cgn>WyQcmQph8Suam zombw3p-wiCFRbO+M%!d|5G(Hw%KLq{yFa)OLRxZs6y-V{$EA@&mwkquoSUB)%kiwD zurQEKqjLzSx&FNP=6XNma?#<0Y+U2(T19mJd^7ps$uxkGVf**%kiV{RNFJpB8|{Y< z!fR9!*MNMr0aBgOoMkHxSD=4^gM6E~SL!-m;uW2}i0KiNV_AUh_Dx&PyO0&-Au}pK ze};6{7EWi5I3me_OeoT;fgGqq!_Cc#U}tc2H?0JBZDcnIF#n+yq;eJ8#PZZPlE_nHY{^{4bV;mC^ZGcnZ!zah_&?%-Ewo zDm&ze68w;_Hw8cY5rIqE$*bb=J^0%9-MOUSgf`Vp6MELaDfruW125!t37`jm=(SFY zMcQ(h%4wF4BYY?c*uy+j1mX z;R?vT>IZUDszsSRQSMii?YlDR&+ET?nuk9D{uQ7VCFGOc)<-~wu$XI$$N6j4P{^kZ z=ahCm$4{9D`MZ@I-(IHUhm0yc;OuOkqG3W z1#~tZdR|%;%H8!iPsLfMKi!WcypiXIyur7B@Y)CXwTd7tv? z%yiV%Fw|KZ>Mj*^n6m!Lqz^^8>{oG*Z6xkeYH-gIEJKvP-iY`hLsxSi{Y#B}*W=Dx zy?{HMh@0vfW%QBqq^qy_zWq&n1GrYn``G8A{&INDL_6ezbLg%eaL10~p?C*DKTyUx-2ZgrlENVaD8{YEJyOsF-80?P*1kE)ldGMJ`-!KY7+2Pa zdVB--2U9JxKSxz@}sCsYj8%dI3bAk zwXl=hD-8Yt@Q)|q5A=aPTeA;1!A}_6519R>Vw83j?wsz z+?*^Ozj|ux2V4wzCVfV}26SAT&AZ#5{iSI`lZ~rAxY%HI;Q4%hsO5V+V|0O*Cv?3n zJ@tJppN0I{5)XJFW zAuAtVDR8af{IBaayDh3)Ab7l*4*B@6z?c5?UHe<3P=Cw%jILk4Em`~P5Ytje`9G3> z^UKVp@5;YT{;!vEo4zl=kK;Z!1uP_2e&U2 zHskNIGMheC^tLY)0_`gVoADAC^y({+**^(+ii614r#jAO*$aI3GVSwAKji#(e~A9r z&bhZtKk%!%A#U7$vWNTpx^BKN?(*^;yFcVSc0Yvr|8~4-!)NYMmNL*pVoWbfz>;`t z!h%ps(h`I930*JL(^kujkUZ;S3*uq#hx+Pg8N2O%Joi~1Lftn#z-v#saS77|ZA2{S z?0&S4Qy}%s)JXia)o4t-`vt&dD~L6Gt5IqXj5TQghPpmE%cu9rSpnJe1>70t-FHtA z+^PKb2E?}+{dsfB>Z#L10%GIPZ%%TiPkzBQ7{}`GM_D_UrNPlY`lR8Z`inuPrI*Vu zCSQCqv#E{Xirgj#AnHVW@({KE$>h&`dx>p+&^N$R?svE4*|gqb`+tkFJ`Nf;r2G#`|Hfiraua5``4VG{R(F@E&%N; z;63brMj3up_H5a&%bqEl*5`i$F|I_MRNNc!^gVwI{E!Q@H<`Fvl0`m)auSbq<OT9i+xPI?$ORZPoi?9={0Gt3?9fxOb6mLnXHdu#&myN6FXUVU=$13}MStnT1!JF0wt+v`K z3>GDLTtQGjF)oVZ9@xj@+3`+4O9h^4Zo*66$-H);l1q^J8RUr~B06?yw2|>1Dv`|DD|Qhw?MYD65v&@fOa+Sq0S5 z%Q}T+mC4QOu5-5-RJ?W07=v~oh955|RXT&oCc|s(oz2h(jLESv(NTj3MGoxS zC)A)12@dk}_0jdvdTSH{&zayMj(OF(bb8ckjE_OIVL z5Azh)w&s2kG^zmcGb<%%d<_#+@LBUZ2^v=cVhw6uAwgp%Abw`tT_I?U!k-HGKnWT( zfLK?WasP~u`bSBSZ?xk7@_b11#1 z!qNL`WS;z2O3+AqM%YV;w4v$Y#XL)bA}LTgY2OUc7zc=S*AgW6D25ZH_x%8IkG8}h z!KZ=%u`gm~=dniGufUc@3GycDpY#xB*`P8Y+2kQa#kngC{SrNdfFO5;&VQhXQ0Z3f z<{{YJE8IPV8jocj9zumjCI0uRKUE9DR`ok-C>v03R`bF-^(MHiQ@@7)W$NW{sa7w8 zy;=<|sX~Ri8ukixCH^m|+Mr+NjOvm~!2boBEH9cRnhch_yvZm7%m>fz{huZC_WvCn9MhL2*-YmXihUcis8o?ZesZ749{fv zCk)SJ_;$iUtCP`r$h?e4#Myft<;SJG7*adE2ij?$mi9HtO*W*g#1&uI1()PmWl77f z`W$YqsR0@o=!-8*mxJ`h7n}mPxKVu4r<>@bGNtM1rA66MIrxkW)KA~^Q%(?aO_)~r z%mi80Ll?~tNd;w-^YA_QZWTd5_%u2vi6bG@=$u3b6BvwV5TdN&d#5m~h&dHVh?rB6 zgowEcw?>P1o2ZUy|5`q=qFrfMd$)2BQEdC#W&awV?{RKl|GS@SQJF=f7C@PR}Efk`h^ycgVH;80QhqK^R?N z3A$sAVM{!h(7^76Co*(T;5J@W zd{(h2@KxR`qS~!G5chO^R)%s@24J6iWKo!?pMuU5=>9X#60-ae*JC(;cn;5UJa6Oq z9MAW7f^as0c}dL{=(R&y{_6#m-N5_X=PFdY=^O(ETDB~etDS~Dl9fVg>nfa6{E6cd zwiU-=|JRoFD^3*;`t+y#{)tm1P*#ZMCg;x7t@_-Tr;_=}3D_-V?- z_=`$Y{4}@W@fY1PqFn*fH)w= zgvl}2OVVZYfm??z8`5(!0y;)+N0&_`_7(f1EnW4$tDyf~jkt&Un7%RTpm*ST&L^X& zw{Hfo9%+6)w8Abm(A@b|BJS1YHKC17{KT_vs7{Is)E~ z1Pq5RfDOFo?rqSQ5D9%H2}l=x20ABiPlrw@!pJ&wDxrsg&iT6P&}l$PTR36Z? zBge+IzU)i7Crl1+wDUH!^V+)~oN)%aRYsng`Aq}pW82-_lyku8-C4koaQ~I|KhG*w zj$O_64e9K~guToDL8~q4*ehI3XD#Rp}|?#?rr2eFTN_5-&Vj6XGd(NC}z6wjY@ zoXywc1UCuiXEjkcXCW93_{mqwQFJDTy!sOlerA4u>u=`wwLFS*j&!6KK8LG~p3Ajj zzCSv4waNVK;!x{yJbw>))AFd!&(aHf{*UYYu@}F^k~DJlpuX7W|KMu-<}D%pESK?Y zc`pHHGW{%f<1FCvdr2a`<-H-6m$9e4_5qEy8uwsdbW=ub;}pf)(SFsl^x3`2dRjlj zzUOoE6RZbj1XvHCyuX?sZyn|aJ)(SI?2AFXWi$5cD<}?LIkLhq%&Q=g^ZoRRFOx3` z{+7|$OU&T=+4IYE*`GdfJb4=2vf(!GNnLgZudvHynu9%h>boxnb6Kx&%BnAm3KHih ztFk>p(lfBCM9HT5$oh~odM@l{VWFnp9`95 zy*xkG@+@G}{5Z?szz>Oju;)9a1np7nwlYv7?tOcS^puJ{{|M048uU@;C~{{OJ}2hP+ZlayX(Ad71(P&@eTHUt18g{n!!`){X|L&{m%>OVpZ(F0sZ7* zYR_yhDg#2zp|69xY-!|6&Fy_~Og#NdI^}j;IF&uFa&mO_6ci%|o z7U{|YpO?M`lE&|E%i(ceE8yH#firI9`b)S^{SWA+xd-s^SXnoq2Qf4moUcoo9V z@xAQ10_Rv+#kHmxxY6|fBLTcX%}ueP&CUwk5VU#HQ0Q$Tz9936)37h}9?C$)BB3`WB#XjBL``kd%3#7Jp2xXCZUJYeIwi;I8#*%dudEc-lj#gPw-%$yXM~T0@q^TCe&BSl?X|XMIb4i1kf*T z;M9zK@YOTI7H}qb>Z}C`mKlJLFG#e! zg8Y<1$5aXU*tQ|)d+LlCx#%aPOKCe`33$;;@S;4}NpBMAQQ~1YgPw?2lKwn$%LC7# z{(&`=*uTKjri=tW(aj#X5`4HtVGqnfA1DUzZWOq;i_zaP_p?Ke!?8?I?_Ng`I;ULu z&W(773Tu@@_~#J+)tSS<$I`&hQXzi=K9|P6c{EShfBt@<^lYr6v}J&@^y|KErENLx z@aF;lD)2q^`hS4$eG9&KiSfOiC|5K1-BIwYV~V@#ug)9+zrzvcA;kUQ`mg(@Bd%eH zGp+Re{i&tx^MqRPHhGq)`TFO23!??m`g;)R$4p`6ekBCP?cuuL6LRGOHf_OhFJ{ei{sw+r=8 zJTe7x0=s-nW%c(i5&572^X8#L!lul*7x8&>wFcx9JR=Kp%q$8w1UL&{E{9o#ekz8+ zTz@BIW*~Rx^ds|Envd*qGdaUF_m{%t-GeaD>xOl)942Wf=!RrUc4FMBM0?FfyPJ^y z6BtjNd54ZY9%Upy!Ei@;oN=e1{wN>NPeAT(hd;&nhClqvM}~&SiS$P0!n)mAE)IQG z=If-Vuils2pMi0LLmu~F?n(7|fsG}tfY76{oASehPxCLK`K$nHnsPIJd&jCc11&rgG&y_gTu+*5wH1N@upUo!rE9{gJ3(_F1I$He!#82{$s zFADM!1fv0I?kV%{Atx219s>`5{40-9tI@8+$Gwo>`xo@JKDnTur8oF^8u&b=zhMsd z2KH@6zmId`C&0^-!N;G%^AcoqeyO|L@)z*7UqB}4Fy^L5z}MTs%a4JV&jl|(tnUv# z(ckg~k|u-t!d!p`QYXG;N>&G%hM3oab7Vp-;m}t4D-{#u_uy0 zR|HxDMPA<5o{ITt;MilyNpQOxZsSB=F4*NV^5EqwP!ESuw;6sR)-dq$96X7Bfz|-< zbG1IqGGo-?JP>o$f%LDlfNho{Bt<2cNIt6jl9-$0rtm&$|Z=%Xo!zuj*f1kT?P5@(7aV z)b|umNX!SH_Xv6r0;PX4c=f{K3S8!etsa!wLiQw}qz~^V`XKAm1 z&;K5Leg*jayJ)j_!QWQ^t^oW!`1^Yck}N9#f4ks53(5WrLRt2rETd4C^MK*OgDl~X z_84_qVKauWH&_DngDn9lPXNlZ6J?z1`os&Yk4g=!D0`7)$&4mz@%&X_bfx< zrqL%@Rt9eq>roGUIQ-~=&qkVu^>CN#Qps%zL79dI_qSXFJsy@G?cvA!^q8rCPrC#3 zG?x=10x-6YMHwX?N4j(xjI@{59Y0uoqSV{5G zSSk8}8;zBeHySI|fI~4>(s)YydP6Z*eyykuoE^+f4aZnngnTUozoxbGLgaOe;JJGz z;6EV8+A)^gc?Ix{=;S^lguQ_{a{z5|_^eV}KR*4#hqUIrgP zzs4N!XWh#`>t6o$|0GfBxuwCQoi1aaXbR!Ot%F4k_4Z^$Gzx#yXH&@ z8qI*Act2c%Pm%sW9F?0*5;T(jz&?0S=kI_|(RXQa)?z08fq=#^Km*>B9znpTR)Oj< z4~?t@gp;{Kf}&g*mCFDe5Z@a#^n*AUU&*^Gnw2M%XP^ympYl_syJDwukMa<(251X( zS8Rozz=ObEQ*HvbR=Exz>lK9MUMswKp~h>umw^9eUJx?#s=$An=M_&uIOTc9lNSzo zo`U_5XEW?OJP*OX!*eJ8KlMN8&kK9}KlO)jnEyTqb#3*32mhP=H^Y6c|0cMTm=`ZB z_g@S9a{m>u+x(ZqZu76e|4zR>e()c(IS9gPhUEqbs~MIVAgpGnF?a}9^fvro(zod$ zl%_wWhftdS1ndX(hhTq4e-QR}^bPoLzURa}9>RslOOYJsKxMhggNz{@4Jpe7OptL9 zHJL96$0N%%kS;=3yp_G*$K)QG7~afqoRf?GFEISS7*51NrWcD($MCP&djrFFGdzmn z8yIe4_!5R^G93FPVtTm@@4@1q%nRYJmig;~?n z)34J<4$;2M?6{oL%;@+hAQw1H`~>gR?CE*ap<1$Y#4agACHc)$j`}fDmK34iBxR;4 z{a-7yv#N=bT)#*>)|^-{<-ww|4k1P;>YSXHKVAHq6+{kWb0i_f7zSf=Agl=Y7zU#W zN?*MaeM5Lrd@I4|99Ln+*c=Emif<(tO*q0rs8I|n!5Gru2|T((lP46p4}q{FDRhq| zDEgKlDR`HBNBt9WkS=;YNeStMNhRa)_2K_0Imj^Z8drHSnHLdc>=Cv*GqQe9V!y8A z9>wX(H^ttVL^YpZQu<6u4(WR9q_~A8d#Cb{aW49NT-r4!W=OoVZ7J%1cR5!RDvat3 zlRg^(*hT@T3P<%$M#eGs zr}@9L?6)fyD~G4}s1L5fAJzr8O$USE73(N&AHor&I7lYB85SyA6o{h`(3LL84?R=_ zq8+l|sloK=#xj7ANhDb=;ti+py@8=7uC)bK)$G9~m{h6+8}vV@hncpjM(hd<)j?hY zavdjd&KCaVjHRb>=GlTX&ypdg^{R*W)Tti+sIFv4iiM%-c zR~>p`E#Z0B?u}Q6ZC@w9=dbDG)fJ{T!na@T{6+w&8q$+JM>)ZBjy=4)7_gQK_BGG+hNWVktrE z!V&<2Pq{1n1F%d{Y1|c@5(JP7ch~-jp;E+&_miPEckQpEnH#+$L-bF6oQC>u_xi^% z(?Aa+b=}tdsRgA4*6DdA#OBR0G~c`K6?H|XDqMa!P5Pn>|5bi3mtWS`E#v=eKbOOIjSucN`5nPq@p}qS?L+*6Xu`E$lRG_RWGG>`|4-pu z=|`Nkp@Gmov2u+YU~YBqBsQT#|0>4&=|#^?nmRrwK8}9*B{@a0iSaq5r9YfLR=I>- z>-Rsnz2fAXKeyFh^0g9-n_b)M>3oc%rQ^|ZdDA+NrvLi9>gRT2HNY+ep`ZPr?RH&9 zW6Ax@A7#qBGk6pJ_=B}uk#+jiBI~pwe6OP`oBWs8F>+~t=9A9vO8fxr79EpF16v29 zq>mgp=OR?+KleA8^6m`Yq`%$KJ=o}&c=r8}Tu=0`f~gbo@(WyAsB8Ps+Nq2ZH%R%V zdGRU9+UOd|+v6nn1o8PM%{5u8aR2eR0^dDD`p1||Z?h;r+`VkF z4f*11Ic+9_$Q#wKDy=3-hWbv0dk(92RR+E(2Y1nra05T)rQL_MEbR!3e5z50 zUM93Di>H2MO_ql9#P^a_^(%R=7BysbOCiUG`*a!J8hmF^JxXGIsb5eM=hdw#??$Eo&eTrxBJQU5(uvdD3!TyK%S6ZoSKtxEJZ}h5n_* z<<_=Fq>KAz;`jev9kmqqV7=@Vp7Kcd0juFxqrrCzk;f|d9f&+$=Y~99=Y~9Xbqhxx zhobzHN3TpNEy5|UUdSt@t%18Yr{4V+~#o9k7qRYcet9Y%A&$HPu{aJl9g>6E88Hn8MW_X zZ~E5dEC;pkAhg+aZfLXX+|XuS-J~}27TautST^q#DjW8v#IkvFOR{hWt5aKbEgQAf zaFi(xWt%OR4SHb7ZC-o(mMhpVDn#3<(azz#(5TknJD#A~@S@_ju+ZVadEm7oj=NWl zg$U@ZyMrFUEi)?cO*K+ z>xOuhh*wcxN%6kSIpf9MFQkKb5%&qieXDpy8b=w`sLKJovQgum;wXmxHw*98SOqwW z*EAmCd$izA(kF|e?RDOeAtZcabakCZo#N0wM0`PO-^OcBpI21Zo#nk7&nxV8r$JYf zc+=94%O|z9xQ%Sw;F#TX_FDTEuizBNMBE*LZ2TwqhI=E%jmGf80g2)GmMZADmc#qm zJlB$syYj?Gu^l9R&wT{=Da&U#!b2AKlRkyviK$nhmyN<9|3kYA_h9#YSc)6>LFOs; zUzwzWE>tz{jdY^!arKtOjXan3i-2Xaj1FHYXYwcqFm!SSB9_Si4 zeNItX2fo~R4)wnqVHTs_wKJ`4d)&;EDm0sGZdz6N&={R@8X2Td_QuJ3oci}wT@8~X6;D{tZtI;0&9a~qO* zjmp9Wg<2ct&Q0E_mBQ`}!TbQ}-6)NQK3&6BBsa|6o-D=lBJA%o`~2jcAuP@Z5GV8| z+%V46u5nhn#JSKV&MMeHVDZ~w-@)wFu>Y0W7s37^voB8G`2fqq3&;a>$=)yzb2-P( zKFZot_jb+;hkF!Xj8oRiWz2=V|DiR6OXaiy|DR)oOZhSY|9c-d;8NaJL4W3F;|Q1X z*9?5;8n@b0eJLF&pOnVVV{p@x+uy{zi*9|~Yftqix1Aks7ez)eAJdpq1-VD)$n^@w@m4eL?h z9USOtJLoGk$Y9;sM}_kZn{hj6M8wC*`1=yRM#evo@MAK*gN0YK@PB3DW&A@HUdI2% z!przh7T%ME-^Ieqcq0ohp@P1Y z8_|&pytF|OL|R&iHA3hdiJrnkb+w_0N0kCS@Y{Q|hGJaWc`b$RM>p=grf^)q+DHez z*UJP$tsc6}FUFc2G!`8FxP5dl#3_#t{&4#iW5N~i*dlE&m+^u7C&q{AKic?^J-C;B zrmv}XwqUq!d}vk#Imd=v^x^Br2AB7q21g;!Nsqy!I3+!V|>V+H#fNu zbRdQWkGg(*a0%O|YuJk9+Mb134xZhUZwGqr1+l`e5E zbcwSHc8L$?&a=ZV@!;Hf)v!zaH+SA5*d@N3J8yAv;ZT-`*~r6n3v;`yqPGA{9a)x$C_@qE=N z8JBpz>Jb^2c)luI#wDJwdQ`?Gp09db#wDJwdP2q}p0E0aj7vOU^^}ZDJYSW=@M4El z-aHxqhlJ-#IMGC2@hQozfVqJ#oNiAuH_(RDt%$jSKAdh7nHy-t=~l|zKqpSONz4tj z;&hwB+(0i*w`r1Ft`O=Vx^cQc!~BAFoNmuaZiUx!#lEWs9px2&EtW}LPWJW5h1U=d z=*byorWA%~iri>CP(3fv*Y#sS2TkP)eMJ7hvxg1(!+X|;vhg&OpJNOM|5vHG);#e4 z!gh)O7q%-Lm%;lFfcLjE-XFTZee~6t4`U5s##&+|))X07Ta3VW7KTHwheFcp0XQ9MrZss6@eaYjJ5DQv-5Soew5oq&otKJt zVEsEK!)Ik3wig|qt5mgmf8H?=VKo7~V$Wpk zKd2Dq4Av0gtm2e$ z>zXR;4bZ)|BE%Vqa*O~Rg!Sh(UfV+JL^IY+d$49Cyg0h5j`F+`@t;Nf*duB@tEj45 zk9Fw7h+o_za>E`G;>VrREovR_PzR+rQ0As0CF$C-eR8WZ%WJtGv&>ZIXD|i}xvE-P z@6&qi>dX$^F+@wAh7?B*FSI~M*l40PYICahk9qIauic(!rgr@ulw|CMuV z{7O+#w;y5VbEc*4GbXg{P!4N+rEPZ8m#n=*5obTVU&$+5C{KN3>~$}reF>iuU0XLB zah*V1^EgH02}Nz)9FA}7CzzHhXN+&FRX);~kGPIgT)euf7WwKgC>x^z?-BUMZTNNs z<)?2{Mcq8y4JUj~)SNDH&Z(O z+bWcg=e!#Yplt!PHIx%t3^9x94Cp6he*<()@#s0#>7(FV299ey3fhiDm{8F6_VT>8 zXwWtX+HPk3g!=aq)Y)r*HH_C%J~yJi&VlZT-%bR-rF<_!zB}_=yU25C5ASuhhbQyg z4ZhH0h&U&K{$2%~%;Ki~sEyIYv!%Gb5qA&7y+r6C#(e?#)(YCj3utFA!K-l-;=X|P z@pkXv=T)fN62v62cY>B+QC*rrG&!E$bygY;UAQ6AHpuu^A#LuB~Q{?Aw z@;yaeOO$&G`Z z-+=E)iD7umeQu<~XN1^CNSB+saRB;;8oX#C>la#)=cqgNg-NK}+1UGRQS=b&b|U&h z4~@ot9&?0Es8b*G-wS{|>J56SS}ve&qo^;y{yO@;*cSvfc*nxJC_x9_v7{~%VZeKq z?k}IxHrRbsBk>sO^E5{|i~dM7RRWqKnw^BUTZw&aiDs+tjTRa|dVmjO{(<}+;nkv_<*uN?X*Yi05tUhPEhu(&0D|aW6(2$-IH~ z@`)#uMlZy8Bkk>n(^!N&Zv*Xi>@6?E97@xOv!SI{ocZl?8{r&>Y^YPVpN?tB&*opM z0o;Ss!Do^r~<43M3rkR z0U;WqwJ})6;A9318O&vH41<{r4rkEBU?PK242CgiU@(9|9fKZ%e}IRuncs?!TPzn= z2wz|k=_=p+cfyfzSx#mZ>V@uqP6i}19+gWt(F2*I@Jsk`hO0zedPo0P@j$kOXh$^L z*?Vd)GJg}}6YEgKhcO&9B%0OCzlz~+p;0NlOqQNM!_8kG5Jpp>3iFCoIH5bLD zr>9%8aZ55PYf?!Gt}W6Ve9{8WG11x6CrvF#!&Oce+0=5pP0xI`9OlPPJyA-WrAMia zFUfpXgn1IQbW`-$w5+uFw5bKSq?=w|UOJAROk555v2zWRUxs=l=EgdJu-;u>`veeb{k4URY)N662f9mvfFp13a z?4+C@n^PnZKYa!rmoBXRBq>J6rxoT|pDqx~`B+}bGsGHUka)t6UR;pPBx^p{u~lNp zKUu0S#QM{u8Ar5~UP^8H<5z2p)R*(7W5mv!R8nr8R9ci*lKzvV9W9pBg_?e%RI>|8 zCZrV>GtE~lJZBb)=5)K z^GcA-@iY*WVgh$VK7KtnU?Etb(T{aEvHF2VymZP|_bJe{AYInvA>E*nrpEsVA9M%Z literal 0 HcmV?d00001 diff --git a/librapidyenc_linux_amd64.a b/librapidyenc_linux_amd64.a new file mode 100644 index 0000000000000000000000000000000000000000..169b736a00969322c031f2159259f0eea653631c GIT binary patch literal 273098 zcmeFa3t*JhnK%AU5+LD{H>jo9ssxSJpv6oG5G?2n%*c#RFcrMiOSyzZpn)Jv5M0#I zNowB3vFxt8-LBpGwRYETyIa5g{Nr7dfFXcr0Ob}n0j)B|OHd00k^FzZ^PV$v@+O0V zpzZf@l6l{Ae&;#&=Um=%dxnpnxu|5$W&JP7?ymAQqpjDU+1c*AF$Fd@iZF{c*Qn>i3oy;`f6+ zMk;^7>;7-~Et2nv)Tw+YWF+Z;xDgw7{ z++$}~OqY3mYw3d0MRQB$7SAodYEJQ-`Ew21Vv!wFMFu~O>2X)WcbH@@lrES(eR0Xm z@|lZ*p;>Of-(Bz{c;XYeyU4#_b}_w%-^K1Rn#I)c%y21ua*s*)6!#_1ADWeSLVR)` zyH9qqR1T=3mN}rMdC|<k;4b9T zSXDBM7nkOX;t|E1ucEuSG(25cl~pb%Sxh^3ftIy%=g&s**cls|J1XBF0>LPaU~y@} z-aYz9eo=(M#WO>tOG?Z0N00IQm!Kf8DZi$8&NXwc3HMSuQ%6lNo;rGZvV!gz8@q6l z$41(Jjj=(9N%MxW!S7_mCO5DO~0RVY-0`F7pxz5e9qC7gD1c+55ND3O3)M~#HL zFu|OQOGhQRb5wWkq}|D2&(ZGuzasWLd9gVj_B;WxISOa|ZLsGU#hx>lEKM+{TW3z% zoD9|+J7(Np5o?~j*c=aQo`Bf=&9i2_TL%Ls>(+(UmtoE^1)^KmdRjU?O26*T%O86Z z*)wT;*q34~(xWduiSZ$W9yLB7I4SY@n`hB@=e}gt{J9#BcI(`2yOYDDqeq?0HuR`W z(JPaly!ad+lb(S1{LM3Iyd7OoTCpJB$GhV_|Dq*J;wTbpf1OU}N0V@nVBB&1Dk~|U zUs*0};}-e(aSMD6b+>bmrQOU$^Si@St|DF;o?GtDo5T;nlBrYu1!HWjzxb*p^XKC; zZAx!M1wnil`e!Axc8>`zreFEqB{S!i&zx0W>gN|Iu9`A+ zx*i+<{A;cfvC!f=*`5`E+u;29^FwnB{Qi6TWh%ak;q9EM1=Fv&3bf3P-jj3`uX~N9 z^h}g7%Mxr9E?rbMv!v8r5S+iT+@1jnV=gP0$p5@v^3Xp8Ub*uUB@@IKv)}Lc<=cgK z5<_r*)euC~9R)Iq1%i2jUeZ-?V$yY7p~|#gb@H+^-!9#mVN|f7PIU>tL6{so{!6rD!kx#Y>hg6K&$jr z>;%+=Q`t&k0)ofX7wB4ubxfq@fYmzq#uniP0!mO}7e^1W1g7Y)^5~#SAR3J`L@roLcZy0ZEjj$sF-6CTe1}ua`a2wI zL?xKBFRLh@jafWNq#@HW$)7+mbGC72W*RXAq){P`-)PKsMg zus*;k^XW4e&rjkDX}-W4Og%4}yI}F$qY!e!9Kj?4)QUB}*K#CFIXheN069e8>pP~P zKrl@~{-}JLduH^%+aJ0j#pS}rUt771%Z!9cp@vaAD53WVt;ZN>3^1NFK)$AIq&ciw zpey_ERZ|!4yT%;mGmLe@Ods8KFOblW?K1dA8Y2*Se`vtk7qptaPtn(PzMO#qw+;oO;hcdlRKe8sEdfQ= z4qYSYYv3E`;Ryb!UBREt= zLPlVv4|rs4b|81hO-Q2w_ge$jhLBZHUy5q#M9k?NzR?fsW;{$^Q%zUEj;rzzpvY?x zURQIVo6qs|*!i*EnIBCjD?hrO| z$1u*YUxw?BYYf-YEZ27j_Df#{M#utwmbB|D-^8RdW4MNaErrUJZ~A$I)C+oFX7*1q z(?wVdC4L^?z<&&!{?Gb@rZIWuozW|j~B(7svPUmv$G z%FIjg2?EBeL)tsTPbc$3>EWLxF%>@JlqWGgPka&+>;f9n-Qd6&cDck<7=V&0lBOB^ zx_To>?X^WQj8wRADmF7N;OWvrQ7}Hlm07*ND{)C4w?7xaX(=)R(5v(pNStY=8g9P~ z96b>EAdT@C%Z2`UB%*j`vBVcjJWsx>@m79INW6ZIC;fR?f}%X3|EWN$6W7y~jcS zQ4;zilD=5dtMr}@b8q>XI1Vt9JIxoAj?A}TZu^y&Aozl_7Ha%-><2HjGXe3Tg*r3P z?ZAW`#B_CLK^nQ=J>kwCbBawGh8ll5qGZF)UH3h_K+uetr0!weZDW0$4)LBnotyZ3 z(5qOh2zHA#hm4L`0utfljlKVRXbG?BOr7;W6l_ujbSJaV3dD(yb{0g2hzO!?3a&N` zvQsL4svSJnflH(D&l=8w(%~*!J)H#KqT!}SzemFvu2LUMjf#)*{|DfED&nW(r|`Ry z;P+`bM{kOr^^l7Gd>cQ7@6qrxHT+`@XFL^s3O#^Q@jNK=;am+r8}^F+8VzTgrtt66 z6Da0;27U_voFEkO!T2eBko1Q*`znRIHGG(cdo_HxhG#=hMQceS%`SKQ7-2&ywZwssk^U_@@s1F^Lb9Z{?5bFR~qY zhcp*B@N7DS3OI1p&fo07J(7N*16S>~F>>*;`Qe$f@D4As-*c31QEBS#%VPoX% zncx%5oHvI>!j`~Ac z^#{{e?CC(~7-=&znn7@6VEv)A74LL_JhI2k_&9Q~uUY+Tnhs8{JS}o4<@Q0=!K#;B zH*s3QhtI$Rbu6p!dqh)cLUD$7dk-{wSF|DYv&@>`!?k<8>1wtb4>Uj8*R1&kfG1fN zh~|z1&A-18(KKrs0C@JVOEIgTgzmt$CsR?}E8b~?q4FONY+dmdn7Xd=p2%*`O_5I5 znRR(}eNxdv$aq8!on=#xGQpS?Cp~SPo-9+;*)fh5ioySZ~pWO*vZJ6)#G4@ z9J<-8A!;4G2~n5+2t#UO&Lr-nFP*$MJ#uJ*c~2Jf0cI{aHyi#pFM2xm*^#cUvQ=fP+}i%!vCb*5E#Wshl|}Ex_j}5m{+a2M2c%TuHFdyUcwRYR5}ri^ z7UP?q^|6=^pl;wpxVawo0a=p?c31TJSiYTErSxxG5jSAxNqiMOgc9LXqg5_UWFOnf zVmQyHA5hFk{*;;~H40bZ9<8tZCc8=Yam<=zedJ!5Zz_FRN+aLH@g@n|1zlaJXNqRw z@Xq)i?0W0tzN~xpk@S;%sVn9Fi~6|HaO8{={rSO{j6eG*x_6R$Bru1bX-f4`3i3n4 z?#QD6`Br_CN*CR+jFfxUDXQMWCjO`le<=-6sxNc8SGY};cxKB2RdXZC#+g?f*E`Fe zw5uej>O(U3@{8PFCqUPBGUn-?*3+Vp`7w@kMXy3PeRW?zLJ&Q87ajj2t>( zM)xC!eAbSuqT`47a@!&Y)6M(d0u;h*^}$#xLQl;5nyV+fytQWrqqn)t)n~b?Tg+-s zP1T+gh+YBY_7tFQq8&srRlX4D-ZvByuOv}TC1EVHnfaRt&EPj3zTMK~;2SZ3uR)v$}Fx~6|mcPi~Q zX+7Pk9H=>hZdF7k5uVD2nuE5T9$q{VA3NN|-evzR#w?+gDAS^v_2Kh<p6|yE&zr=EXmJLnJ_`IuYRitqQ zXj1Fn$y)KUAj=bEzYt{L8zZ(8pZW>@kJNh_JzW!I9@PfAV&|d&6ZYn6l)hcbo~HAn z+y?WBfwu*$jxuEWnPpc0Q1r?a8G{+{T8@Pxb%@86F7LB+RGSj>GgJ#pN_8nQvYjZX zrr0l0kSHxNNQybuXJP=yxY?uiq+;Qd_} zVWBFWIcZ>BPHE%oa?>zd!PlJa_)6g3)8f7LL302jh*)F86pRPV)J$m>dXRc~z)r%GJd`yk_dO zYp=Whh9BK{)6KX1c>0W)vr1-{mfbpM?!2Fr&tFin@U}&Z!<9>JU%Kp$zgvFiU3XWl z_-UlN=4bcRT7SRt-spWlU-b|F_)qsg@ZdlH;$MFGuMhp|-+uk@|N6~uANk$C|Hq@h z|Ia`C@xT82PmlfY|M@?U|9`8Wc=D;IpLzDV=U=F+Z&o;tCY17NEG;iLr z_0`w5ZQrqTSIg^fyxH2ed(T^M|M{JF_r~69fB%CIKkC@`@h6}DW&eSLorgX<{P~eC z!W@i07C6GDvzSJu+v!iML%nR@B@%0C)!^m98=Dg#W9y}EIpRuYi05PAD*G^Pki4bQ*3^Y6c=YAo#s6AG`< za1IL;4y{m3zWyDXZNnlTWD*53$APQ*aI|mq-siIg%K2?hV2>=Xz};Y&%dW6zyuY@| zZl=?jb|%ro2b3lA%{KL{>&fQ1F%c*f$NW@w`_%Ir^a9wCM6wG>g<9_jSK-n-WlTEH z{gPJRabjqk-i`s*TlmeAFy2l`Pn>4A^wT5l)hnF|hEm_!G_pP^_X@uP7{k|j3`V0( zMbwco*|gNu6zJ(*ed{Z$HaXAyMROQzznytXKR%J}m4E8(q%DUpW06yf-ij3=P-ZL7pUFnkF zdx4U)6#-XMSKBIcb=r{eBg6B?PcW-3xDG@P9l3K9(SWsi{2;UXJh-D}3E%zA>QuZV zd+}A#1#MdSH9>C4>DvD_-fBY9BU?{dlcz6VP7?V zNPn}YfDl8f`3`jO8(vGlW48mbsWM3w^$l^w2|zP0b~ZxAXL@SD>TJhXClYh-d-!CH zoelE%w`PMY_KNt9t)kae%ZIr7SH{fMz9A{d&l|>1sC*qRKR2r{6-n?7No4}~Eg20U z{taohCZc|S9g||Nt_$Qg#KdPYJaCO41RC>!x>{4&MyI8?Vlxn2(ApY|tVuPi{{o*F zTI%>g;VTi^5`=cxo--hv#?T&(ht{XoY+q&XeT7}nVlri6iG?Ukaa4nt1=f&eRxd zx#K-o>@J&}k&P{fUQCxPhURJdkH}o(wN8u80-%_HxSi72+bmqfn~d!y=RW~DxH$(2 zsJYyZ8j9A4oooAC`B7{ry=G|b{{fT1REQ-c}#3kFp&7tEmgG79xO zwDKHO@qo2m)pFL;;7V*`F37U0xLN%>K#_5%SI8#9Q)I8FeTzM}X0E=jtLjky@bs!fgTgmfVKq%O50UpqV8gVlc5oNQb`?Tnl%VP3`}t=i{|Ot0QP_nHpGmj%X2{ zfn{LjSTPa3DR5v%D0Ue+=3ANJ#Tunq{qHEqfVC3_ENd=&t zlZrp{q(5^Zf(-p3G8B`KAh~li+KV$%nOV0WvpDCtdRnRr=IA2S(t@DfCmEMFscy@(<6v*8fTsbu0+8DHky6bKVxxRgi%wKQS@>Dc5_wb!E z(yU=Niix*H&|ppt;Z8zDw5!>fNI-W6pfzfuS^YNhh4uAyzEQZ`J0)tQ;FB1=6?3pv zmyN7U7nzMIQDhNgY2ItXI(8KvtGrd?QxT2VP+6F)*h|oeK#(+48TyXU+GS`ji_j_` zM3NDn5jz83K+h@@sHt1_oigDcn(rET0}Cp8yYv^xj@U<{Mh*#pHKG$AtW6cs3gtF4 z*PjBl-Ssej6`+hgg2yTmjsJp9Rrx8_wMOhA0CrXkLc_t9cCcMMb%DrUSL}9JvjVr{ z?X18vfyK7qVb>G8y8nT})Lg(^@zRYbJ{5o0#@J|pA~DGyUt#b0Q%pD%hcId^n8W;q z%E8YkbcXRu<}=oYe}?_wLnEmvoBE3_-;+6}xfs84NjI18lQJihCPV1{N{x>=owyue zc85{Ua^TnlcaFyFaoV|9%vVa~)pY>QO-cBp^6O!NC;Sn42JAijPSfU0L>>V1$I_UFW18T{9Nd3Tbzs{C#@7cRz3lXfj}_sikLq*dY$xNZ&|R~~kED5kFx z4wK$Bt8FSy>{*WQMb;q4{o;ZrIP|6>zpy<}b3Y>pLJ?=`4VT7MK=nUu;?<_?0Fku@(NAq*pJJC2C_LKEyK=5#B0X;wsLx&gKJ3`EQ#Hqd;ni ztFj{$&~Of%m0Bn@q~|ZLj(Lx1e5P*XiL3wi{#1Kn??igboj>sJL^UpA!)-GEaN*{$ zN*b_}?VR8IlJlA)Z(USTsfsmToX4x-pqukFPUVrn8QPottm>P3Cv!hdx>Gz&f2ddP z)H|8`n|pE3dZXN_cQ!EA9i8W4G*kwu{+bw-s&BT*-X~ALJPMF+wU>02G&BCBQ|VFl z{{mnPUk=5fv(-+ealA>ZtBYZvpHBB2pHr!H+NPdnHAmaGTjjHE8#KIq|Q93a2}EoKT84f?M%D22{N%%!`-2W=wD|hPsj0#8r@(uEC>M`lwk^M%UbpMNP z)QR{j_z!U3Tl(u{`e#T&rl04LRJ2t386RiZbaI32WDm*+e+wAfFQNgT2;ua5y0zc$ULRQ5&%LFqct+fDH+tBMAfwY5ZojVL;y9g}a{+x8{6PfW`{x<_ zVlkf1#^Z(T$dez;Gv*n4VL&QyqTLeJRhT0OQ^JEH2VG&Fbx+4+B`NH!MDaOXX{K{s z9pkq+c~Z4+qv#LLY%jpR1>yxXLy3B+o$(IYuyG;Zz)L{SK47-QXWIDCD>Jiy$Q=}U z!OW~1u{*-2c1N7ReG#k+DWdhW0~cw4_7eQKPk{DR>P>s@bMO#^Qf-b8dg2QOYS?Lm zeyYSDmbge8^z7TSB&NUEm-L4JE(ucY=CR`vj+a?w&#&bpXvKSO{8 z1yv`WiM3Kyzf@nHY^>~cge$$#S|%GwHdYQu_s;mL@RfgRtnBRXm3t?>(;OcwD-Y#6 zT{bByo#ahI%Duv`0LG~1;is!&8UNFD5PFp@>V36_>7<-1>`gi0*&rgO*9E$pBMDd0 zpeYr91x<&BMeR|5d}qt_&yZ%OpY>U}SLyF9Y&ub6G?fnnfiWDX`($IafGp77=3wZ* zQpv_@RWkky6rp@O^Y0RUaE1|>ZnX9GQ{8=3=$4lt9m80ynUzc`k9d2~Cz=Ls=P|3- zAx7Toz1E?iwb6T>-+I}5Ee=*pjdTq#@8du|RB}Pc)qwkac*LUi_ZaU6BL_X^&;A*9 zkuKA`ZwDZI^3JYJudc}2nR_r$vg1hx$18uDL%CSkd)>4iK>>uu&CY2yYvx0X zjrF+oClFnV-2@`XCh`mdLMubuY6hWnBK0EzE8#&9ONHLrj8L@1Rc2O*;UV5)>JV=c zGT!Gi24z08LU{IxS>c;t0N#$@n}lBi6q=BQRo(0|GrR-8X29X5GP8XvXxkN_vrIT4-~*N4YVy{%WqC82ywMxFd^Ky#nz^VSekQ32 z5d-z%IzF1P9+hnl_OzLU106w(IZdk!*&3+nGHX3p0Gb?~eQp=>-D}=o{}i**XI&7; z?Tl5RwZ#}TajGE3WUs=82(iuWbjeG0v=R+l( zlX>jGKW@)mBg_1VclCC7RMm-f|Cgb`nS_BsdRAryj=-DnczQ7cUZey{I@O52p+w|3ueNbbmCrGxHo!YYK3yS$vO`(L^62h$n(lQoy)|3WP_>|aTYQN5vjLtq z2~T}1LpiTcGFRU_hVn7)-!8V;nq%6wjfr%i*nn{+LM(@@bEWP`&yP+lwr(8it^<0m0?dIw?>SI~d*Y6z~kLTo+Y7u{bO0_6cyoDKs z+!V-tB@n$j8*NqWObqwSR@J7XZNnCeIv1Lk*m;Ppz3$E~2Gt#)eGI97ch-va;!srJ z^Vs1-tW;HCcl`iX>=L;4BmWVbGmA>rdn0Y=wblpfclQlO?;YeJKbRF~;`hTWn@2@C zxMU3xh0zksqH!y-b@5IUq~PR=2AWV*s513@fIklZ2z?by=3#Fh+Up5j=HQ{;XefOI zdXB8xKz1M+7>>?kxM>YQq|ma(p6L|(H`bw>P~CMEbYuZW54_Jg@iWfLRg17Jk>&C5+tDC_#-jllsx*jMt{ml+9fa3PMgQQ#;W=b=k!xqH z5dI-uNL}nablRBSqIM{?B?sksQ+9PraX8Pc`FHq>LvKaVt5PvW7(_OMk75%X${|#O z9;qp|Yd?6C?Mdu2aw&8>V(XE`VkE@VnHUYFU?`Ll>i{Kc^4mo&3?@Fx@~dhFM#=dU zbq#hMc2C5*m$5$fPskn)vI2HzyG~5;8a4$w2h%fBXymeZNpm23kY+!YfvhZUZ>)Q<#85 zJ1t5$Xsrucje%&DkxdNuOoIG;>FjLz<`k^)iFub84~DFEpY@TCW0v;<^>6hFSZC$n z>I<`aF(U45?A!4?D?||v?0TPN|5b)_z5}13&if3L_8cb$#NvimY{d~5FL)bSK+WFG z2lqFmgwHCo%5w%<2dtN^os$~VhU_b{b`@D~+5GLbUM1%8)*nvU-;fqQ-50$s1;{xc z(?LHF+&@Q}2xB;7AbukY&>eZF_=TR!y47h@+I*V7E&fSYp$dbjQ&W99fWcG*&y&x1uF}3&>(JGJ~q8$S2!dBZg}r+@SyA86bf2p(vd86fwYi#!odJWb%N9E8J@J}UTYZUOcy#uL z++E{3D*uBRJw`jX;RV(}MKq!PPmG_aN<~(Os9fG>S;fj&=+Ci!KZTf~-ifCue0jDP z4NqiUnqw|0DH^e$KANI&d*6&4whb#aR+DUl<%3g=#0+|@r)4~s5u?ZC^TQ&J;G z`k-AA76UYkyV4>@@a=P>@M@su)m?oeN3he{D=Y>jEFwp+OY|b3-o`7;t7F5V>rtNb zUO=m8#BM<2EWX>&ec&sm4|vgt;7jZN`iu==&R7ZD3VSav= z1-9zOUBTX1!6;08L9Vfm#NH7Bc#zLnp)r5m;LF_}djZZ6_)*oVGtWoOp1%sq2_9!O zPMvxFs%M^0cc`}ToEJLmj|;4wX7heynG%%i&h7El%QC=c@RbYd_Ci zPmG_r_S$EzU2AaWx)Do;FX7bnCLFtd8RxEF$xNR@>!0<#w@)DOM&byJAF zJmI}~?)=%o5OxHP(y+y)!JhKkA+!PF?B^EGg(To>78TFAX1?D)f9}-$=@1J%b@cT4 zb9wlEY^7$CW$2dO5@G%;`9{o5y z?e+AkK0stuJlyhGWz zMwFJlHpIanhIT@&((nuoe^$f!HLmE_YB;y>DEuW2=hhg7zpCNn4~4&>;p9q%e~9v@ zVs+Ky^A{&B&7VuwCB2@mdnBtn{drQu^>9rF21R-uzg5Ha_*6>%9|8Jr-)Y(Nzr)o^MR{!0zl)AhK9vyD{r&uaKU4PUF_db(cHaPGTO{=6;sWig&y zuT=P_EL5lw_??NL!Z*rtV0yVPM&a4eQ_&y(l+yW)xN1j^W363Y7a(^Rmn;suQN0o- zRK0`&HS}v+h8IcA#*HoS&VVgi=m^R$=s{(lmn+3pQ0jsz3fR_rNVc3oJ~tjVeU$}CvWPi zN-m-=)IsQ#x9U9^7}Ml*-yz*Q(?q>FdZU+R=nQ?XfJ-8@)ZMSEDwIPKWNp-$zD$L;$&S2w>L(gq=#m1 zS`KIj=76xGk&l;QJX?AprhRd|91Hn-t2jMwTRLAHvIOMnKR!q zbK$3X1S^Mg+kr`%icewpXJCH;_8#o9Vm9*~=rMUXGX{Jg<_*u6p??nSLtsZ>KibA- zegymP+xlDKPkckdVc_qVnYV4{#G{PdK?VX)nQwb7CE;56au_~Vz%_|)+n(xlfrUin zT)9whrt(fE+iNaCdJk)9JBU29C*;lCMIF$~ zyl9DGUY5iyZPa|o2ePu|6KeLewBFTliIr`FMcG=~zRJgkDDqj&pqtlba{z3~1}(c_ z+j7`v=3@FB&*eGUv_C>}uGh}%h?B1)#wg4?NFQIMk!x!6Vl>(X4~@+_@R>1ac@uty za`NI7A|MZqak`fTNinHgyVK3+M%N_h7Lx7_o$jHWDliIS)3O0@tKemr>?Lv)t(KRt zY-pDRRsna%-P0K(+G;Z!oOMf03u;NLT@G-zubZ=%=*;#K&NMF(!B^nS<_0CJ(beVwvG8mn_27PKv>7cyFA`$uN(A9EfPc)Tvy0U#@FZ_E}{ z$d=DgBH$6Aui(Lzd|8M6=8~6@+9i!3>a>YK$%0lmSmJY99HHwsU@@ze#rqNSh&ypw z!6I-BmTfH;Y=YHP@X9tk%D8>!eVPvOu^Eqfui!CjJA7zizO{yOJOEnWqkbAV0&5Ms z;piP|_QR(AHR#F&yX^(dET&-|B3tpeop*5iXOLB}v?@NLw;lA#mqRXr?_{9%#<*`N z@3I5PO`pxs?+E6}0u4Mi=dr1I+|EyU`6A$oKdNj$fb>0^Q^mYSn2ZYoS%|16+9weW zJod7}c#tQIw#_+gRt3W#z;c`M6>A}%S!+Sr&H$MAaZ~uOX#&*1$2=kVG@q&XQHFjGc(qcaisOC&6@P?rid)TDK(J0_#>(m3B1l2muWH ziQSBetRSDw!g^vNBs(KeE!o8|li)uRgMDZ+5Dx>%a|D8@x3ZhDBW$*R3WpVM*;#=k znz>={0WutP6^M+alZDo463~#{%UIv+CVicaA+J6|AhXuMRu&-=C|0oT zVs3vO)-5ce*0qdH8wu_90Hr1)2pw4*Wrc*&XXF>iSh3s5-(W>%8;sopst^r*+&9*C z?3+-2bkgN3aET~FN0w@D!h?+6!ZxJrMF#aTEw|y(&b?lCn}vXOFh~)&?LWL>ht}Rj zHwW-QONNjd=z6(bkKTlPyTZV;Rv6G(p_Z{#0+C&=W;?2H3-Y;@jKrE$2EJ%VosZYG z_Sac6vw^VmNk0p$Fb`~`MmuniNRU4cO1H9fkJxou6)*i21xZFSTUqvY{K!fOT~sUP z1~_uI%^!@&`JyT!e?%eghy8APBRh6v;6WNi&o260lA+8+Im6-|=#Hnb?Oq)PjvB#U z0tJq2vvXf$GMkN_<*RZfQM$;qLvV`XWjQB`*sS;qU=%X~=wJZokKzR`cnqZrcc|;O zy912CaTYG3^E$j|U1Kp~Sc6azAX@2os$_Af=AfkxG$?PB49Zm%vR!~ENn|`5UKDlr zQjQb!R?3PMkStLv(UEJV&9cSlSC%+D62mZAo-B5me%S0sv_#RW$}FjS)M!a2YIM)a zEUF(OBZl~5=OJ~d)~dW&TSNxLbE&sN7lo};a32+NEm`f37riR*1Pcf!$oJ1wsegh( zLZxHvWwXiVT-BOh>MkPj1}NS~R<*|)08#5vgHdtq8qC_O3e>LcJ&IAq3?9m+Le=gT z)dGn@cK4br*bG^YS}eK;+5W3CRz+*JsBQ&*fZQ&4QG~W*yNa`WT7p!xsJ=4M1Pl_5 zf^xBqgpwK=C>n&9$u02)VF^j}o_b}F>BhgXFk}{Gi3dM;unz}b%aJ&8ehZ@__SB zgLMr2UWP@5_C*6%pVH%v^pmbyIp&WKI_%G>_4G~pi@er{k+LO3)$}|p{4sNHcntaW zzL~qQ4tb8R{??bw+~Q_nn?Mz4!>=PSZo64i@)0i2!;D4$bmQq^s0Z%#t0JA)ZSw(U zzIb)w3z$V}G$3sVd(XJ}rP1Sp0UJI3M;KCIz+RBw!w@-?=6#BbA|CUB#shCg4)yWW zE==>7zi#x^22u(1ZSC^n;B65G~IRf7Oz!SqkD^cePZ{Y`P-Mx$h%09 z-`b3AhmraeU&e+=XTQ7N@z&mtDWY`l!*>NHaBiov!M(-%3}=*rPghA5qX0#mX=v<+ z4WCzI^Cl+|#l|7AFG%c_ayNU*OCR!Oj*1~$4Rh@Cd^fErUIn4yCzp8a)D#dClT zs(21efZP_?1ff_S+z%eT*xcWUQdx<7YfKwEh@1E?N2W3HRt$8FF4Kn{@2<|=`bcNG z8KIP={q<>P4MjuZD$haQMpxDT`V_O~Iyw_qaAvJIWK^LX#3goCGqj= zrNw?NH%?;L#lpa?jR#shL-%`X^CGzV?!I?XW`fsw_1&Ao0AB9|ufGNZ_9}Jb^-d?R zcRG2!Q|I+go!2{cUhmX-y;JA)PMy~~bzbjG%Ih&V){|Cw8>>17V1gLQ!7s(YzB~8+ z*v+uA_j-26^H4QxoWeSOikxY5WqPBYlt#b7Qb2L!)}ts`5I^OOaFDW+j@`o+u8Mo- zV39S=bugH_&*xen*?myZ%m)qhsBUDa^eNUEeaAH|`nY>3)!YawwWT&HI`jZUXV@ah zvFjFjj*YWPfpuc$Hn3tXCueWE#XdXajRqKLMjpFT*Egn_rZ;jBm;1F~BUsEGPbXUl zB&H?}IU=^#d)K*GmA&hH8HcHf{U;neSyhFM@mju&Be7q0+es!anr*^00l2PlDbM*w z3%(sVuu~jKKzjP5*UD=DKe%hsRY-g^`BE_MeVboEoQ<*|JY z(q}90^+BA>!KP{Nw4n7V1RH&hLor*iXZpgx%F6>Q$Kbw79J;$7H5J#3=A4CVC$Z;U zY~~4>zJrjVH_WVg7`8!d*I(H$XzdS1+S(utX-f3sevpQQy9> zAF{+;yBLJLfK~!bQ=XHK;e#CJCgvjIA75%bXm|4M!&dpt!56U8-o+iU?)}(D|6`nJ zYrsBI-sZuhB(uL|z{~CUamtC~Ld-`1zxi7jQHNi}j}w%K--ZUWBw^f9*^W6e&cR^V zV1_X#vK_m4F`Z_HIf6KhS$#}mn(b|{c(r~{Rz`zay+rWmc68s{H(=v>MpJYkB&hDI z*IxUzL}5u`n|(KD@6r?m++oPE--l5VPV`X4r6eqFaqJM(Bc*o!PFzd4D;|B ztNDEHi40#EjQ)W`0KpGD3Swc{Rj~(~ExlV2sl~6mP=RnFB+bWjI5;t8;E8D)3|7Qh zAIv)9beA6|avIFljmWVf)@wMYl?I_v4LHA&mg;rY6_wN#xjO1U$O>ayyE)8bh;TaL z5$-r`L`REoS`y*#h)%`5kO`U2uz2zZS%z_u+1}tEx399nW3K+7{=K0H%Rop47crRA zy{^WhlKn-lMp0fh#cWg}M_hrGV1m%fw45{RB5f(5+%@P~yjH)E%Wo7}1vwOx^xfb} zXja~dI@%eqc8LX^perpW;JSquWL${rMiYnf?DjDftr!OCVC2tj!Q9V_N!W~mCw%y9*b-dUS&$RE0BwiuKF+rGc46+P4OshRSq2Vl9*%w5>yftV0UUFP z10Fu>MV!(>e~og9K}e;4coPWfc!nsIM1ERe#FFmI*o1*j^y1oz}|-IAFc)lfE#1Bsj=7yaBB=L|-03)%j$SdDp#bxRwPBCzTL#me(9| zZ#y9dOM+@a01pmxBBkP05J(8@6?=#c<*B>4Pu<0R>Mrgp{;$1@8%OT13qoic58DT< zO`@}3@&1$Sd__vhrxRQS%KD`1_ z(S_sw{*p8PIp8|fwI>vvF$`Y=0V|x79*n$=CUp zKReI7rwiLJI3{ZeX9{P#Le>F6icuotrZwm4-Xo_)jB%_g#EtO*j1~gXsfX>)Ha6Vd zM-73TklnLGV^3Z{BXXvFG&+%LMvy~+!)wqFUXD+hAZ`PIcCuK@+k`_m z+3X2%H9kf!3?Ct5)FIYtDmrns0f#OVIbmFZb$cu=nH5|L#2IfqIF)sn^NwL2KEilG zTrhx19Lzp&@fl+ixTZ18`IL4pl(ciQa(lBJ!|HH~Se#wnjc#)*#$bujj9aAqj7(VKaH<;2v~TocfPwo28j+Kx*V=}@ zZeW@m$)tH*^*AJ38K9oX9-#yjpn<%{rnzdeY- z@v9fJ(T|TW;6UaKmYUbKuBfD|$n}aSz8Z0ZHqJoiQT}5gKH|L3i2*K59-h|6HDSwh z(kE6eH;)X$)dW5hHyAtt3L$;~Z`^eOvC|m;VDMx4jX>_kper+ET^8Gh?;d{hR)2>> z6J)pGZL&KUie@oCnuNv0<^ENLBhBgu$zCf8CoHRc2J<5+L94Gg6}!&9fIL>i@68p3 z{Xw}Nlq<7V>;ZQeD-QLau*|5eg_G3wjph?sWh)L1!qw=l!nZ>cmf^w)(6#@usu1^| z{}@_MzR+{9kpa)CVi(#Uvvz&>ggpH*pVy@FdF3afSwKp{sK$sSu#YhdHVyNs&~Ba* z%{hmbc9jMPwh13n5)Ocg`Fz3poHpFvWCubLZpOe`8GPbOo9zpH_vK_Iyb(0bxTK(F z6TesRr6869cqDrJM#2Y}T$S~)H;^3M4=;)wH_7LW#7T9t<_8$f#wOxH2)5#cD_fX} zRpT$jW%J*Kl{|b~7yAwj!IgbukKm}S`9!fxX6rvN{VSNQ)qf8P7P2f@G+g3CPPAQ5 zj>P`fczkf3ip~jW*kHA`*@&Hm6TR>}MR*>*Qg|G`)Zr=1$@aAWNQi*=Jz*n$c1J;V>6@ zf5FQbL1(bFyKSEsCX2d&sBEC*b1_QxVwC)nfg!QE8nV z7MqE{nqoapFp`kQ5nwA}K7o!#G!^82Psp{N@*QfnBSe0+o2WYAIehM17JD6s%F8gr z8~zraDV68IiiJiio;R13jK=@5jp(3-?QiK&M4!)m#`$HiemG|aS$`2{xRD5SAe`N9 zX%rwXK|tLohE)akkC-j9VGtxF=3*_t#2h@A$6Ij99&Q%W%^Hyp*sfKHT)FL&bYh;SDo{TNy%yn-u{a^8>NLHGi1~_<=o<42|jt)_7p+RVG(z_iDSNge-xx(pw@$rv% z+ik%F@IB(nj6h2xdh+J_cTR(0@vck;tHvJfw#MB4QvL3fjC~m!VvnFekm#c;d%jAD z0eK6Qjv1?w;0o_bOs=3q_m|1-nED$9KU=kW7xkSv5 zNdYfrb)&EY^>d0kkGVk+CxS4>Zb{6SR9y~PAZnh>o?L^hjV_d8Y#eA-AyR0an?(Co z?~1(u4MRaKG^_u`j)7UdTLt&uc5sysAO#4}5K?jMd70Ihb6zfb^@G~H90V&Pj#VH` z%+Mk96%74xh9u?-v33K!IK<3J!6L7f9{atFNA$`EYt!B9nKKX6GjV}P%oU@g5$4f7 z6nq_cN|V;J*^DfJWo#ea5A(_6&+UIi_@b#tD^tq}_1MqwU?OYs;YI9Hm}H@_FcD#; z`2FC8fEq|a?>=9MI~Ef2h1ue(z?XBeJzvNT_6Ft)_uBJ?7Yf^{N4zWjm~Yt6J}nd- z_-$tK2e2X&*Z80*TXA{+kx#NC!VLs8tHnn-&~{_>cvjbpr!br92yW5g`u zby=Kn47r+7(<^q#ArQ3Jbig;U^^Pm?Vyg~D>(IT;2UcLiT#F2IBiLbVVGD1ksf`cO z*t~|83)5#9g~P9AF^Wfv!`VRI$FzM5Jm+{F{n0kgJC-3iu-lGFOKs3zh@wqQK)wYN zXj;~@0o$ofKq5Y38mI-6k7%x5H~HjkM!Y&n4B)ZK$-eFIK7_{xQS41Qyj6|cr^F{B z<@}&nC~uP!k@PY#Ls*BHb0!jjaw?MhpK$LGl7+8nO-4@PWMmb~ zO3p{ZyIpcZ65++i@;VT7G6+t*7`hKZ<>%1-I=nfDsfO-e zF?Qb?a9!Km+$Fw`d(2%HcAvXk6^h=CdCOQZ@@`u&w+o*bA?r{m_cO=5C3srQTMl3H zkeIjpH+zVU`i^<`a0dH_nun2VNc{x7_&%F&douGGP^U&<4rZ!4@x)td)H`8e6EI)&pXO5f;B?WQA{$itPdKU* zPeJH`R{z;Hb@mHhS3#%$;uf~1&T-U#+k*~ZfH$(b`!pMhBS!bv^O`%fM~|Nlsjpp?9c8PM0^NA>dzJcR!c2Y+9x zzx(|KLT3DxnH{c_hkCtFATVHZuee`?tK>V>kl~=1u0ol{W)4;o;U?}e(9^Xrb#iL2 z>6*l>>s8VF(x!FxXHS^m3>A_9<+rO+XyPr&EOPmhi!~M&l9>cd)zT_p~Ppz zDQ%4y*}As#SRn+d@aOcTZnHnjPUjDDgH2{*c5) zBd>Y&8;N%~=pU81GhO@=6#ORZt}x>9xX^(A9sWEm@oWcuoy7ATIN?-r+EEa6d!&j} z`Q0FF1ElI1?n@GPhPy@L;-x*dOWdPj)V?mvXU63uDC^x4x647HhVhQXs}hjVwM+al z2fk0@4?FM!60dXMpG&;ifv1XmKeLBSo($lpX)*>y-vhVDdRIpf{$x4Aq;VQ>T{ffW zmw@j{LcdSY%LBil6YV|TcY_al^Zy@#pPQ1X7ouMReKrPhz0qF`{M?j8H=z2?TLqq| zkE+l6-vVccY%A3#DcOnI-6V+{L~!AUd$fJ+hq?g7EiMh=Z3VEiuMH75h=|KV+~T4T zab-xpc8`dCn~0#uw=IPy`NETY5kS81YLuWHB`8M;%2C30l%O9a=ttRqXhSNB_vbpu|^N5?nkpRJx?J+`jXL7rvBV zgBxGwTod;D-333w6)PaBm|eQazhHLp)KSxmc>zqZdyHl?H9Rw1D(+a3AqgRiy+zKJ z6PG-4(*C4Iu5`id=|_zqB9S!r7(x6_Mg&DJPv=E6f<@xxx}btFUR*M>eC8rie*XC= zrQ$ig7s3e?OA0Iy zkJRwLKu<+nr+1;r!B62AXt++#yP&8@pNF5K|AB_<^fzd@Tca=4 zaGid>hUaVal^U+ob2Wj=nJz9IE1Y9|70<^toMUo@iwl%trauK5zE-2x!+j|U{)UF@ z;l`5S`W;F-J#Sj2;!EeNKP17|Cc(ER!QW4Uf0hJ4gOvn|*8H?o`F4?p^Q%hX<1}2S zze2-x`e_=j)6ddyou2!7sGRY+Q^R%ol^U-5|1TP@)BlHt>-3LnxK6)b!*%-Y8m`m7 zt>HR-r-tkFgV?ai^m25h^7&gy@N<&j6O!QeozixG>h0ctllh>S&pQ5`hI9E^#Yf#e z=cNBwqu2eJ$VLi^^twNDli-ge!FMFVvsh`Noc>Qtf?uWKdOKgK;d(qR4cF=acM^PK z61)#9zl^7TSKG)W_<|((@+A2BB>3wZuJ=b7WXfLRbDoBCE=85QPs8>2>vy&3{;x_x z|9cJB={F_8zspJj#q!nTb7K-*-38~Qe>w^MJ4tXg#Xx^}*N@7FJo)KDoL@Z(FVt{7 zo=cM8Ki6>GpMOn)zmNnMl@{@E#`8K>!d~#DN$@`;!CRBy{aHzR@qa=R+^^vr%c*>u zqT&3?Q}~ZHoNJ*9U##J}KUNa_mr3y7Xt?gbl99{&)bTw@=nL5>K#^XL|BXrTN0Z>& zlHh$=*`S>MUy=l$pyB+ASNT?=;d(s(q~SXK#w7T@BzPJtGZf>g`*TYY{Ej5}?~>r< zEDWd-_;F5ErE9Uo8SbSTey2wN0}Zd%=*MgLeHuNNrj-8=X!Mt9_^&m3&gCllCD2nb zKHtGl;nf78h-c%c@CTu%B7QD@3ja5PP{fDhr||zE2u1un{1pBeK`7#SxK9bx#&e)o z^!j+4LlT9*NDrVmjPT&6aPbk<3;um=JTpwA_iDKA{}URn$CKXzR1BWqbn$QrLJ{XR zavuDs?`ilO8m`lu+PGAw zFVb*5K5u9^+Zq+_`x>t2!z^uFs;Bou4cFeekLNDXP%+tF z{8T)NQ87Qc<^_?%LdjPgZ<6`0*B8>~OG3TA5O0?BdVL{&lcZPkGb&t4w4>7N3+WYq zSJ4uR^m;zm3bZFb{941ap;zgBT*HTJ_$CcMSHpK`_<0(>Tf_D8dQZc1H2Ox3|MhbJ zq0A>Hf1*ZTn*?u=IP*a--whhB*LVH9Qs;C1drs{Mh$)^SG+V>ZlzP7O{*L{EQfsA#{*d34x=<^x3l)g> zcn#0h@MFIlIb6ClJn}0{wT@g=T0V26^yRNdY@CeNhX$U zPvvFN$h8ydZSqW%^X*^%e7qyQy_D}a7qu_u+@sIWcVmzwDLdK{5>pQQ6Tb+eT}IB8 zzb|N=L4L|BLUU_a?D6fHox(AGWP-LUiH&-f9Wx}pC;X_ zew*&S$KZZWFYYfp2KURQdp-RVkHLMlbg!qM-*e9V%Lc|KUJl2gv(+m3R_pBw&r%xs zX5T_TJ<_aRZI|1+M`Z+1T(ismG*ywf?TGoaw(E7QMNz<3~P}+D~Acdan5|f`!OGNBFfN+lAU! z#{6Ses`#t?%R=D2g|B3Kt9=5tZ#}~w0LQ(BUsq(iP~k9qPQf_C=Q@EFOQ*0Nt zGF=Rx^N`N)uZ2Csccz~bUa7t){l#$KTlncewB76JXItb9zg>p!O#ciSetbWO?Ol&O zCvH@n-tw=5gHE4Me_X^)9-8-UbTqxaFIUK0;EBEii zy~sZW$oHdDZH-?3Ni{J@47KV3PJ zlT@Zb@ig0&`o30vINe_|%9c=&E!7vvG#Hy-ART;pnFhrwvk(c7h=>REz!c(%tH&bu zc)8NYI4{Nf7Q~UMYkjzKA<{L#ye}QrS4PJ}9Q_a+o5kgQwT&n)9Q(#?OKEf(If9G* zX5nk@NhVERG!u$mU5C>kzT6I6I`<^Qs7vOvv#)4B)I@{OH_Z*P2#wigX5m>l1XXC@x+UCAfU^m>3#jlc9Co6N zb{ob`MBCR}8|&Z7@@1@v{-mqs4f{4Y-0UYrAGB`3eP$29-KA!SX+GCp-+pE&>dg+6 ze8BT=k+(d73>==>6sZ3*#9sgLqAFBsIJ_3;qFRdZC^Y;qANC_It=n+s0l^(E%EMWm zayNun8`1K!A;UU=8wTg#1Xo?i)nL|Mh+QqVq|;z-QwUcdRA(ZA=CA889IZk(Q~*vNxIJM3)1WowPYnFq+0 z_i-9lW=*V39E0&_w_C)R&8x%-NaSaBf-67oH(ABSI%Kszu%f6A=D5&Cc`t}7V$J<5O%lx&9W z8O3F|N^Ld{rGY99A+6XTM3@&J7P5@(a4Tzh=CkI5@DOu#psT7g!>k?#Ca=Qvj@3h; zxha&pH*sTAZMwK`uFeJDVv|rX@~n`Qmkv}Me(xqO{VYLNjTkfi;@l@w9t#4;MNBvp zk2{c1mh~T`*SGViAb6sp!)xKjr<-#+Le`5E+#GVXfTj9`tmQdvA(s%IaC)%*T?lk8 z&k;AI;wA=bQ<19`T;2W}NqI!aB&iprN;bPH3$pDWN;R@sXMH(s2<1+mdc$S!B&icm zo(w;HdzRN~wJ(DLF}VVFlpVenC(OzrI@<;5=c^wYK!12bd@oFYyoe^yc(V}aHlGXk zh^&6a^#(ZF99)n-(HwjRGy{hQTzdnq_srUBF^3e&eK8Qdr7MuT)vP@Y8BH0-ZHTnE z=HnyW7v{~_3rWjcA-cMrvV~C>gv|S$IPxT<-HVj?Q>wqqu+7zP;FvseQ2}_1C?X+4HFw&_N1H;fHDroIjFVeZ>w&i79e_lS4m!FdG6r#Jx#oxMk{uQ9`cKQGK`MloTzMrg@^ z-BlBeS=}LF+KVUUx93SGY7adpX?vsRnVsJ7;YsjIlHi*B_Qj>6zKQ_%(eC^cDZpJE zh!iIz#feAzKx6#aqd4(spNb?l>dT03FHDGTM^aCK>~TFIZ>kHzId6`xxqo(7C-911U$IQ{9b;qx_|^Y4oO4h`4idB28p4NlQN zq~Y9ltnf!Pob^QEf7I~78ori=0mXQ-EmZWcvM`{SUQVGa{0Ah2VtVzx|JekgXe~*E z`BCvq;HsV<`yL{DKa4I!{clOWdb*hRk(q&)vkgnOmR=?IQfJ9br8i`4@+_5F-namI z;^ZA&1svfjJ6S!HQJfE@pUK>-@RS3mnM-!`Q!jhc&QReyJkF-2rnqmN?wxCaGh{|B z)M%+y`KR8Kf%WFTSrRzorOfI*Es6Ub(!Ddi%BfPAZXEBFp*Sf@P-kndG;FGHR*vpCsgDEEs0dJCIQ_Oh{o zxy=|0(7q~{E}HVk#f);9IbD+c3WF+r;KVpz(dd^_{+lF`Su776+4W5&`<8ECFO}|;cc^`CbSX=8&@cPkZrc=-!?qNpY$z-M{ zq_0b^!)=-gN$UxW*L(hr%2r1nD4FWAlu;I9EQ^xHeS=cSI`W7&s0&e_+#@i7Rj#Ml zb;u>QiG*NlP3onP-6=$QO9AT?eJLcqLKY`+dooLmSu|?m#rQbtIkGf=PB9h zB6qQ$MqDavw?ep&_^(*Dx{$ql6p`v*uQYXAlDh1nF~*TwxghvfN>f+;EZTNn1Dz17 zjys>RtA!$bd&*W9+@8fYiPu#NIou~xL@Vtj3p-JQ^BT?g!VbF?iZ{giBVj&wC? z+bG*pvenxu=}`q$Z&9O#IQCY`6^9sbK@xWb%_-UH$WrDFE>Led5wg|edC!f9 zUq-Gv3(+3fh;irNmP~aeNq(b{4iAy)sH>ibk(E-Wx{$NJiE`E{Umcd3Z1r1v$W|A7 z(@x1(KP6wi(RE6`It7wL@EpVoy@@6YlF3iWSN|)PuZ{`vQ}We0I>C18>mVAbfB2h| zul|L?)p-)aDO?@GR-0^LNf*I3Xtk`wiH(ngk+<4H)|*qJ1BV3;yeXu9g}p7KK>oK2i}p?I~5t$6I>huf}Z3_1#p9RxW~SZD}&i<_av0U*dQspDfk z9)~FtT?i({5g|~o#XfPub-oOUf?9;NeViboT)mfpf({ReVrk3rXEaZao?!%ChkP}V z&2uv&M#(g0%O*-{39Yn;x9|}HP%bs=O`pYqZCnm14qwo-!!H3mOmU?U2Iw)X;GAimJJS8U#aJ$xN#4(^Y`8W)cMQAbxlN|rA)ss#a$S@rk3_7;`Y1znKW zO<{uXLHPDy9G+R7mIA41c%N~as{uRjalS(2X3hUlWKEDiE%-eYy?q$VJ{WnYEtq=*B8)=T$D!O$_R?0!NQa{E7W!2s{_ zoC51rep~Y$dsF3csePnuM9gQ`ArB5Bd!D{b5}O|eYkZjcFLGMGg*kJ@@)C zR;yXBm)eI*M{raJipMe#SAh&&)Oc&@-qVPhpg}+C009pyxrc?suVmR-ConL$3LbPJ zAxeJ|D9~$Z$ER@OFw)DMN}A94&_Cxx|C|r~&zldOn{z)TpW#VRqQ41B^!47Q?=_$a zkd)}Z;9aP?;ALI~B|7hD&`Y;SO7v07uktOw-4Q$uO7v}_KNnL__7QKzfXBVguI`;` z52HUc8*T13**w?D=2^7OAVnYPFZ|FOJOw`V2e4d%eCP)QgCXp-4`8Pk_M-{Gq+(~? zW8G_g+qw^2o1T=qr%|z$RPdqyJ3RA-%6sEO7wUmGxUfe1hDa~~c?BTnf^Z|`+eVA=G`26sT!SyA zGO&Xx2K5U;gFEBE?rF%NtT_Rnr_MMGWLh@b7)ZaK~Y zvP8M9n4Nim3I%v$B`5kx%yczFs5t_iA+I4&4N*bYh)u@eU33LD3VUc3+pv|iG;do; zgRAB(1YH^$kjHqIJ0qnvz{;_14<5(qINlIRK*NwzUq~DHX|Ho_0`WKr2PNq_*%MFJ zQo#V-W8t@V^r!;IxI>zx%=}u%z04!-7k^i!`x%B$z1Ny&pDEdTO~;fZJ>B2IqjUyxsZLL zU=^Y&L(G>BfUYR0A__@XWE2a;H&J{=!Ae-DaE+3SZ=x5{7A~-J9*_Lwj(cH+ku5wr zgrE32_ybl};YkkURPn=s$|nv>EAIgvIfR8rPs`|FNB+|?y0G8}PZ3O<+~K&Vt2VL+ z<33d@Y7Ve@f`1~*npM1}fWV$Fb~76RN&S1sitH8?;M)e3<*`|4NtEWX-Kcys@pJ5Y zF1OdJF2r|ii#3906U2FJz%Y)EEIV#rRA5Eg`dP=el-hAO+L7nt%P%Ux`0809QD(o7 zVNE{-l3SV==aao5l8{54V_V8bfaCnRcrViEJm|EZ5={jSh^Ta1;T@S>hhn zI46LepJF?GA_WhHlze0_nRw?cB

>pW_|*od5hY$A7NI3m7bHkp~o{$j@g8{l6#q zd1^lIuVCvB{pIQ8gx)QovBODyelt1+L4E$WkdV~p>=#`%H^73ej_9_`ZWbW=Sijxw ztFM*B=92JS2>J_K$_shu&eiY)^JD7MGW;2+0HLURLyw8+JE5$%6|AK0=(n4oq9Y{N z;BVnn7bvdIPBvcKT-7%z-Q&@A#^|f?77BKy;0EXFCh?VhJ5NJ7xfeg%YpJ4R5)KoA zxDg!18}LMd0GXtzAva@rZgZF_dV63guu57~awQ;U3h@Ky%?u3$=X=qDZ72bY^gkgQ z(9*Bb8hSIcOzEmpL*^_oV!zqL{Slc2-hBpxD(TZS`o+DB>_we&U5&dk1t3~3zlJS@1g>3 z%|3%aob!PQB*TOt9MU63L(T|}@Y&-F=Kq92ByK~0r_bI=@`yjf>ZGfqJRyaTqJv0O z3KykPLg=&Xn(sWy-}{8lOSLROJp)|U=&!}H0->sj?vrYE8<+Sszg|XSac(_G2A0S} zwyF3~<=rT9QK`J^@Q~PqA9*GWdBv%xx~e14*OFeCuTP>DHc~+j!JXEMQ7$AF|1SMo zO8*e`C8D}t%T!cS=Nd0GkPm1j-Xh!t3Iui-agV=2!3TuaNi8KnJzuUX=Hip)Hc=G^ zMczFZcng=w3MCX{RlH=)R41%FDh!EX$W0tgWUb+>tJl7*V5m|XyqJNtH~VqKR6dSt z8S$adZuSTw`{$l6WS?$dQD7giKa3pchw0bNBM!)V`1D|5cVNpdOups~Dd6O5iyz+R zm)pn7?0OzqHPkxRY>lYLpumnf!=O5E!t`tDh)&BsW*-3=y0vs4`+ZEvBF(ej7W1uL z?EfJUIW!b0GZh9aa#+m1itZm;Nw=7N74p;$pbAtUp<_I(P0U4c!0Xgr6Kcu%X@8i@ z5PM1Vl{sVnaci0R=Wyoq$;}%uuqw@HyeC_EwNJB_aVhBUhJC24f!t~9#x z;c0Z`{nF^l#gGE;zu0fXYZ!ktVeyNry3yB^g%m(TDE8^-tKm}lknW44&V8Yls~P#`ZGw_rvH2zeCP5^hckJ;f1%=|_x9?r|4rfP zn>l}@@bviqR^e`TD29<|Hw<$WrQvIY4C-?l9#eSwxx4EXJ|#__@WgYY&TS>L*9kr(urP z6eRA>QFwbAp0DuPY3b$J7QOMElXo^h%@00W8SpzZ;M0JceCMJcgFo#V3Pe4fBl@1s7)e2u_)`hqmc^Q|Qn_9Ni; zPM+n_gYaA&ry&m35ZvLCHe5WLbmx-kT#}rNV>iY~5^^52F(!2mM-RGlNpvo}8Y8@w zq~~KKX}Y94H;K+A^|?WJew94TL$aGo-f|%?3DCh|j=)pY=R&v#;kj_4XwQX1N$x)O zYBF12mCP7He$LZBJ_GFM^1zhODEs-R8M!|S`}wC3!9Oqd^Pc?YJVyd!RAKaaSZroF zO{ek8W%D7rAv7sT{7wUB>!aoGGjO(J8lP+6)J-)m^%dE@e$~K#V#rg~*7CnF@V*8v z`)}z_KLf8dDNp_j8s?`dpD=J!{-}ZT1B1@b(*|z(GbD$;OaD#z zf(-ce8Sol5Qpz8*UHx?i`~?Fy&$fEQz)gP+Wx%gzBdEeP{h6KtUzGuWBLhBwjdCyk zebczygmbt$>wA{c}|Wl_scTi-^qYKmI04tz*`L5>>vBE zv4df~HRE$_2K;sdH~m>`;HE!+Z{ViSOYiX`A7zQxq+Mh{Cx)er40B<12^OO4K^Y$jHihQ zGT{G~0gq?E2e2`MN%#MJ10M=|oj)TD+&sJQn+DENu9nwlmZkf@*pN5<`I`**zhuCl zGjKE9`V9C$Hg+(Kk16jqa5KGkWWaxw0pFYf&thW)!*EUiFVBFN7&vB^lKFX`ft&vP zhk={&Yct^QX23f$;J32Tgkk(m|7T^uf0_X|`PIJ$zjQq@`PIi8xXG{1PmfyO*jhp=H9s@V|)hz>m1ot#dFULOGpDNtb5dRYXG`@@=4Dn0xr}32p zVTc#tPvbu!2t(ZTe-%L(;v?~=<)0!5L;TD5)A)QtU#04y@tY|KLp5R|{xoi9z<;N3 zUPc)5+YQ|G|9(TyG2=f5ZfWR@-x74V#At|Lj6VP#hJ3eyoB2Fk zrH}HL8uC{g_?Haa9N(Gs>Msm=vs}Ju;AXwzW-=OvYt}3B5z-L95`Q`$#u0$w#jFq4 z8}-3V?=1#y#>cG+p8kx&pY}hL0e{)RP5&nv`sx*iKermV8SZ8SFEr#o%z*#W&{s|Q zO&RcEhTeFs;r|K)@8M4d{I3mt&&=mSL%%Z9wH;y6Fq*d+clg`4jk{TZ)t)yya5*DBn6?$H~rV`2jgkJ7uyW^k%m7{8Se{|s^i1OZ8Xg9Qv7LrT?V|r z$|qWz_54BuA7S`&rGcC8>0<_N`two-TrSy`oK6KPu_sd9wJG*oEqzm-`L;iw6FTfwSG#CaP}eH|v`=CLcR>0&T_{<#o9Rc^y4> z!NY;6({K$uBCikXyCiYCWAyz~15+KNr_G(~&`j!M#-s?#XI1u~EH5f6dzcjE4?aA( z_?o+Mf_SNzHGbrqfvJJ%IE49+!^i%KC%1bgbd3_ff-tkd9$Y z_Z~cdRQC(%7)I+E48IhAAJu&|9m8mSgzm4$U%GoYFgEc@O~aiudOY@r^08OlApOy2 z&@+6ha5_C&zoE{+@Ktw?K{7=fqK1GGE*MsQr($oKZhVV5Xdy^^`)?d!+ zrq|z0eC&Hh$q>($ey)4!E&nIqD&0RNFJg{>ZAp6gnfTa+5(@d`dKKkQdipc*v9DCh zo?d^WoS(6#CKDffr;2|oUv3!YALkL%HU{OnmG?<=%A5d+lE1nfTb3Dfd-r4s`mp`%HZ7E#H(9X8E%&Y4^JQ zr1xoABpl{mE!{t+8g{-8Y-u~(@M^sIdB*M2+> zglkI1D9ZMa&Vi82il>Z6Ts^4AijVy!~ zqf>O(f{WHgcTIIP#*PHm#TGR0B+^jaD6y_UBd{Z-b~`p@mFBg+?^@kJ_I2b=Uz}fr zZO*R8eaMtj*DGPtu}Ag~!_i04-3wgDU9lrO!ogOLKQH358*q?P->-+N`;neK8oB#^ zZ+^7)AxC~R^3ci2?%44q7esek{xF^wob(i7cSz~iy30bPj>}+mV}9wf8{ObQb}i!$ zfYom1ZDE7AqzU}wsH(0facC1r@9VvV&3;gsW3$!Qz|juAXYb%IA3 zIl);T=D}@WbIPm$C42n{?ssVtnGrkk-iBm`Bo1KL56?PC5ltr$JT|EeujUmebKuC0 zQq+%QE#-C;wWa!1_zw>0($FMqmKPPGCqKJ50qVT!%{}<=R3Jqeuh06{Olcp z>*zxoGI^s(gkwiedh?6Ju`W=lzi+u#N8x@?Fih(971ui4q;3xswBqUM zukeW#_Ktj?{T}Nq{>G2S-lzuL zI#i4oJqXiVc-&{d;XC%0%YF;-^Hg`ZYJLQ~yk?JUS$`mj7=DTyYZ~za&|zDX{Wh{d zct?Nnra50lHaLnK+kP$I8#Ee;Ng}_{h$g_Jj=~-5v|gr^qEYu1z5yon*=SIYx@ww% zi`LO{DmL|HgGCjS)aTf_1~T)FI7$K4$8Wc`J@bME7x1*yiKEpbno3C(40DENOE=YS|7K)S!SNmByp2wX=Tg+;KsRrw%HbwfS}-*B-NnMj zyIM1m#x_Mcxr(}t-l)~P+M^5~TVmU0?(weHq@;wJVmoKXV~LrYWxe#?UhdXGd*T;G!A!%z^>fv$(f+OYj_Hr910|~x)`we1FD{?Zcqc$gS zUTNr#EFhO0rGxT{PtLc#ncZXPzMS$3oZxUzL4jp&u_7n>Fw`6Z?v7e_AU{~&$6L{D z)#Y6Op?xe^=Y)$s_M6JhsK}|ll<(6~>!E00CEB4x@9b;6#P`M8vbz*pIIpm`$-{k0 z``?3;^tSABJ?AedD9hU}rSKwN0ZD?R%JQ~IQApX_k|MD7R;))v>v9I2m2dqzB#@pB zuDx62tj5?>eF8Arq17Kt^U4Euto&I^1g^vq>=D}$%qt{?mzg7vO( zVlqKA6|P?%a|IUzvs}+du*bE}1joR0@XkSfu!5~lE4at$!e}bJzeIDjk}O#v-`O`G*m$7orWcgqDI+mR@xuWzfS)?b_mS+c#HQb=5A8 z)4JCn9o@lB=#qtu^s<*-9;ikkZ+FQC_SMz@fT(HV%hqsu9!<9v3)VV=o#>Cb5;Syk zD~^IHX4enICp~~?)YtULc5>tel2t5$J1C-G|sC$lJSB~q6NZvYB#>8AW zlTx*|BB$OUn(r(vVuAkSI z7B(m9;S7QIY9`F;6;|McMZeGXbGtuH4ovW*hx0j~`lrpOepgx+Ypq3J|Cs#fZ&7tg z@uN>1j^nY+1y9 zYyY63Kg>>xhuClZ7uaw8RG)JOjYHJZ=+-T@`Z6se)gzjE%w-L{OI!qE}Bf-!LXBp`9A(W4nO)6!W9dur}?RP?0d%C=RYaj-BaB0 zONFP;Ny@oAJp-3=^c!)@uOmIp|KO2%k#05vAb)wQimE76VEggcoBZhvcpeI@H~BAQ zz?1ytQ|CYSN!ZJ;y{6>TWG_!~mWw@+ACsy48qHLWM14v;<@X`KX;=-ZL_tLo=Km3d zA2o7*X#HUR5*h<&%n=DIQ?P6q2(_&aDJcE_+9&0{w$4O4=9#wq<9mRmKh9z&M5y3N(sn}GG^eu@%EkC%SdSAJ^g&HaPQ zeR_KJv%XGM?u&3W&1~hK@3d(c7LC5jepzDXZ&0~6G2Ux;8egI?ogR%ZQ|{X}psqUJ z8vZVBdaHll3#3Gm5@d0*9n$V~{bPL6!={tR2J_e9=u{DA8^d-Z-96jvv?>C`;h3#5 z?n`M2PJ7Jw4ec=%e_QyL`bl^H)sk!6rcocr1SId}?=@q`3aq%KXq-eo`ZqN1#8mhm zar-CX5q}VA-Ou1KB{`DU7g#5Jc34eHayk;{gvGj+;9%xh*Q}lRO!oro0`9lPx@PZj zyui{D_HI1y78WnCK@(U}3gh@$V4f5Pu`sY-6%-*J0_L>kHt!3taD?$EEIqW`IdivR zM@!+^tk?tB^;} z7Kq4nRj!L2IemXY5q@#VcWln-r)3h3#Oq>-)3+74W830~@QLg480OUuMI*lWhwvF5 z#=0c&V0725mmi8PSaL-{Y?GuxhwtYj5{S+NF-WOTi*uUrEoDwqR#GYwms7~vRid*a zmc&jTrf^OYXzctjdB15+Q+Co9?Rt}R(Jv(hfUY3jH!0mUr>RfsK_);aB(~{f7?q*z zrLc(m?1K)t4@akV&07Cb7#K3*6p}u%J=S$NjLblU=6A*#=eNh^#ADIOt?`4glKJ7N zRXnpXwrT$Uq%A-`N2e_qc`O#0yCau(d!trfd=v6@WOsDpf|2lN?mNQb?WpC9Cy<{{ z6s$x%qLu}*NXgumTncvxi&es6SbS4#)7-an={_v*X9Vt({vL!YNYJCOz()YLZjIy9 zETASrNZOW{L+A8hDFY) zY=TUfG2i`=|5=fSDIyb5XJw|qmuCxBQKG`>e_2Gniec2#lY`Mo5BcL&q7;RTRtO^6 zFH0A@^ZZX2FjbR9!3wWfCXjftKoky~A4h(X@HuA1a|)srd58wnyb`r&N9^u+Gjd`h z96T$NGGcy9tZ;r48;lb#x#?H&{03%2EHbk@8nN2mj42Uq!F0YKjf|KZ&y7|LMxhXh z$E>zP(Mdl4!Dr~`17h2w6~p}R;Mq5KdoJwXM0|fGD;<*B5wqKH0(NxLt^UnZ@Xd4N z5juR2Zr+`O+CFz%F639pghL+H&1yRm-CgYOKe8H76cn0d!C1%zu|mFp4}fHGU-a-*L1xtT!SX)H}#GY$9rbdRLII zM^_@FnFPRLQF{irtz)H0RK-TL)v{1G%J*^72!F%MSYcZ&V)SG(2fCw^3jIy9DOVT! zY1?sGt#%LdcUMKNhuYT4S_QXZrsPSH6DYW9=>c5D!BAK&w!qMD%Xwk*yc24|t(a19Ctmfja zH zV+&@jM>sDRP#uX{3uZSu@DdLxh(_kb8~K8x5U0ks#g3rNx6N;j?L-Yv%x_A24o4$- zZOsf1tBzRYxZ4v&o4a`>>sn*f8r-&#wY@PPuS45L)Vj%4(F&2;BXi%%jYbOF_TdHf zN^#uXwJN&1(7z|2K5aqnzOv(bzRac>i?!*DpeHM;v&e;bN-$%F0Ql4e)UBOPGNfF62IuH(hwDEQ(=eESmo zupeSnfu%~-+%#0@=nfvow`^|w5}cXG!I}A@cmQYSaeQ$4Z#)E#=U6Xr62s&AQ5~rK zj`#5_o1Nov{k*PpnKzq2-#U-W0k9820{!sNS3l(9p`U)p!$YoqI0p}T`k_A_&e0D8 z@X%jB48+3#{oumGKz^0Qx}M+x=dpLNWF#NhX?(46)eJ*4v0_C~$KXR)HIj}MR&~Xj ziyLQv)JHzMe)&01?B_hOs2HNo|DWcGEyR2wzS&o`_s5VD$-!Ti1E2MaI}*3SN`4FO zi5KA&kvJJ*zK8F&b}vJf&E?J}5V;ACSX5uh5qm9t zSiMylRA~-OS}@%ZPoX3ag_3Y3Ym+6$EEH-7B<5vsmE-;el1BoHf%v)*Yn<89HYSyy zvBXLA$<|3hcZ!g(Q1XseIDQ8=H@0&Xva9+qf`C3zdD}R2`3owCj6*NHy*9RKI~FJ3 z?ZCM2ZmdxL_dqz7z>4IqX7tL7?$V6j1Pi{smPS(bb z9Dc{HUAjw>LF~$j?s~}a#Q7W-g5BZ30L1@-rMt)j0ZqYyV>e(i152cvT+eTwf%;sy z)o*XdGwy1c)_l-Hh~(A`#>?Zw>S?mClJa$nrhF}T4gty9JO{WP<~@pKAZJj!mU1=R z?BRYcbd=Pt<%NPfu<(7byNY=;pR>$So;?;65GRJ0gHM%#V2RKC{}4nDyYcSH_6dF@ zJGND2HAo4p5nHj6kPCBf z1aHyRWF9ERI=);i*|qGgf*JM@nPJ;#+rAfq5PY?-~0 zHA;~az}B+sKu=J6tO-m=lAoa2_1uOsa1=;_g6J>4ySVxk=L{{oIdTBxNQd)doflNU zP6)B;_P(Is1B$K?q6nf%R9Z2^$M+)Ee!+@2=>%(qQQsEK+8`^-k;Cysv)Q11&J+7N zPweMBvHw5hiDhfrlPC5qB7!HDcQk1{vE`B{wo&xwqBpgVdMk$fz1MlXd)LR}iRGz? zzlSH5aWj?_a_uMBV|$_rMo{p?E`w*@(CsJD)Qb=ms&(oKhgj9eWr25bA7{7D&YegK~VsXuGb5zJZDM1t+m1DdLv;#;vUgWuq?`c9VD&eX*tV z4z81dRbTww(eAS`HMqS6y2SPbHI6uc4#v6ySg;UbN92sXAnajvO|3|5Lr?NrB;!lH*FvWZ#< zl6`l+7keO(Em951LWfRj%e%;GImK^xmlbc8G)~}!{le*ildx^#3!tNV3h5yo6+3*Y zEyKb}UrF)8mOB!Q(Fw9%30~M&@xU5{{6BlON!1uu0$Gb2VUqa_uy3Nf>(@WZRyq z8V4RJA8t9GNGFVa5yKiizkxa9!0Rzz;M+$@7>k=bvZB_A__nCEAn-cgfcP#vw!MLQ zru#7${9Vw=!lrpU?(WDIU{P$CBzOgmuXYPifj!ZRsjfHSPhosd%n2WX78JyawH#nU z___?Sjty%Yz;*SC1C~p=@KS)aE5Q}dWW$<_!5~<#N^LL}`*EN6rT}1bJ#Ik%+7GN& zSK(F>T{R@e;6c#8J|I|HCxiY~C6ai^qdwM(?;ff6%ZU4gJNc3mmq=BuFH#vzeRxt9x z-YSvogcmo=(MymU7a;$zw*q$;!zTb43v--SPou z&B4YBKuu)>%Ypy56%-jZ&%g~I-;jb~dVxrNfC?tscNxtQZR#tKRoEsXorB<2yy&CH z6vx*`OdkuM3NaflLSW{_iWy_R0hC~3M<8Eg!{8B9fRNg@q~-GMvIfhLnE7Z<;nHnI zcHxa!&>Tn9=VI3gm{g;ducZ#w~FDxq5Kqas+26V-=QSLEHV#{*ly*$Q);ir zwlcQDAWq#i)XIb7;evs66KGSxm}+D5P#lzNO{MnsrMuYEV8iL+CpSAVhHyRC?0qRY z>WECaK*ErW9LVSJ27?SybFq7XV+{;AFftKNAtU+*L^T?jeSrkgb8O=~ww~`8dX2_J z4Ew!A+wOWE-L2m~CffLN=N7?Sd=c{wn?YUmin$o=ckCD%QA}6|&d0C8TnwvU!%7~5 z<+Jhl58@GX*O+F&;|e_D{-c`Dd0dfGpYynW#(7+!6Qy`ud9)+v+x%4KabZO^_<+6@-m3mV>h0x)+j{C&?P7ASSnNeYGqWl70@Y;l zymAxs1&EGNkh>~3aKZ{(S36zRo$wq%_QT(u?XiedklW6tAY)!`K>#1A9||0ZQ9_%!6->SvoSa=0X>=s{UFsvlgMPp9EF-D~LK(;U&1HGqY|nX77G4GwWSF zm|6WH$;(=FR$kU3lb7{pVmhxHYEZAdtUU%;au6r>j8lneLH9f}E~eo*pQlGsR^Yy) zRBqozryKZ?I_+CAEARmN;9+qra~grwCXofrhiVSGYW|rO?gdjZg8EB8M?wiRcv;E9 zYE^v=swhsjcnuGUE%=f1j458$L(ryD@kJ#Rk?dJ>F)D!nLWSHVlyL|Ag+|LyC`yeD zqqNA^dOySYEBqj3OWN^sHDXJh32TrRRNjD_g;@2L7g*xN&!b1gvl}`SQD=N1wHIgn zB!B{;W*@@`@VPsne(Z(%fjr^QJIskTtm$gm5qUSOH19)f?7j>|QI74>a*DY=ryv)h zVj>hXqvJ6J>cy0&n>)UFzD3WO(0m*RQh*K1WO6^Yb<2L7d@$k&_|iEUI*|GW?St?T zGp1WwF`bJ^(R@yd)?-3+d>O)o-$i_%;haIZtr2yw%C{qOW)z(6m|lf`gE?c%Zm^c_ zr8&gyeArid$X}wc znEgEs-X5|3mO8}8?ov z6yCr*&q6yJf9I6!4(v@EH|dQ1WH+6am-PI+-{-&BH+S=(Rau`3FX?0iHBO~*8avao zrE8@#`<~v3Nq_%FIe5)rGVhgB8(Sb_nrwrIRO2k4-uOv*RuoU5r@d)Lh=*Ygw+X8I zkHc$8C)p0Jh(FVO8vK=C7TykXtV;8qdp&yNLmZ>h&h>NL${g}c*m!XJmH@($%sdi?KExTrAVUj9texPkHZ0fiT(;WHKPNyFzT zT&5#s`KZEYryy~+Na0h`@Swu0(r{bhE7S1D6<(BzfHip1S@_fJhvx6xam(z20UTloXpVvd}!dNKjLI7 z$T2>b81igaX?QW^znB3pGH@=w)!|MsaMPbM19uzp7+ta4+#l{AvHcsc<^` zvVq@g$bZGa?>FQ}8Tc$izR@g(ENTaEX|t3&zTcy);P#;Zg8HT==2Ezn8-d*jui z{0Sl5lUIjYicW8hvS)r?Y2g2v0Y9v8rpwGX@_N${H|vARdt=rgli$XyCnlegSx>%a z_;1z|&hpVPT<*Ko>EiLLG`yJQcq0=EL)<)5U{(fvC>u=}%2Qj@{{NSOvn(_|iZ3|~ zuaefGfxS7w}1uE;QS?&rJ^3)zHnRzfO zdy;s8!j<<){O1a19iw5m^oQp;(`emRrs}FW5t^uEfo9`L%(Y z_3(EFZq~zB4BX70jRtPk!)60F>*0@#ddR5ja;#tkVVHlT@u%^hWx)3+oEP)`%2D}2 z+^pxj4cx3J-!SU486P*?)6gFklz5&jekEqk65Ma#x~vI3W8iFOw27)4`pt6GW|WaX z;_HF3FnM57qYqgA+3>)8V!ifr9+>GfKc!6kWAMOy@~C}Qd0;+y%KmThz)+u5&1UlY zad=?9{4w0$rrJN(xeYJA0`+2$_)r)&K zFgE>4ZNZ&1E7euc!)cs-uxT=#0pt;%+rVd{Ix&XNeoLoE>kTXuhOfFw+)4BO^netu zx+yIt#u)T7-Mv0TMu(>zq(2wKLZsj9Y=Ny*;p=%h?LIwPk7jVcib@HLo@bO#Dg8=6 zKY|;UsV6P`hHIrnu1YZTk7cRjuk(-d(!Irhit5Dm{Gs&i%C`;(=LNA|*x{^_Ti&dia@mVD7q9x<6a`Gx5Nz{+e`erk`z5dia@mV5(I3 z$$4Vw-5Kf6!~>JhNe>u3?=7FstUsA}V5W?hF7$UbmLJCf>G99R15?Gu62`;AHH?1$ z^feO?O!#u?=xk?LW#WO^qTCxni`%s5I5P3Tv?%vnt7{saevM_~f%(Yymb_ z-%EO*R`rF$DeSCZ9#kC$=g3$k(%o}xkluCzax}1Egn2a0GA%h7&#>}bFWsM&2d21q z?8iC7YFyFSaZ>)HpJDY3J1J#6;_AU%*&xU5Ui)3^PU{ZqcI&opdV_mfxq_loY&qQq zu`+vu;DX`M)L$_OOS-bk?b4F;PZTKvv+SWXnkhDft(*)wqK4NZ0eD$TAc$LZM>QATo@Dik8lnDzy0Nj@N`2P}KyDX{~Rt0osO*bqp$0XmI)3OY+emEHc3Xc7F9&ZO-s ztQqoF2p*F!kjJD2?t_u<)HxT!c(+=tx872hN?fj5B($etlr!3%Pkz;=Lt z2G*}W3NjN*oKw!V;0RPwnjmP~dlJs(eP6CMA~i^R8N9i`^_*fgX%xX>?`aq}%omW# z7#SnoEyL;YlHde>CwTb0#7RMc4Fv$@)|egyC)p6wYqBD8d$PAXajkSLwKK)*A?wln zsn{jKYV`R3VCo9xfD*t-_~mvJip6D@!6Ebc1T0xYmwlV-o4!i#l4mZ6vI^FE7~48e z_k?9Q9W3lbHY6^k+)5By_-w4pZ;{(O$_tyY2x$l?h~Ovc%f#UL$*7_Ta$MP>hHFi~>hkE+rQuq+M@iKp5Ef&UozYML0r|R^1u4+H-h=*>) zy2yIxGAFCHP4;Pj$wIiUc^?fFmN?~{r=sBxO><5}UH9NbH$jCo1hU9!UudH6!CH1M z7dP$awxXukro#vEPQ4a8CD%o=%7(p5>l`_5vAeXvXLm$@vAA;Vm$0d&toUHLy_1jk ze97$#Mm^D8i!Ry_TU1%c8yo<1FSZw-syxA)aO`z#C4LusjZZZb+=Ocr7LKppR2KS{ z8*Uo-aG)&j0O=drvGcY0mQc=6i${Gt{si8>;_ldoJFv29apl7P-Nha6yI$!+L|Y^K z`r`KJEuq``4@YCOCz2Qz+1oc7x&8jJ*9UictgLe)2ZjYBmqjCsE6ey z_rrosY0=5jYuxZ~iNojI=vp=dO}M}0xNAiTJ((b`29HN_6TCsa}9F;3mO*RfgV?ETjjaH+~y zP)rRKS;}ds;?1$0hp`P{ai!yC@Y1-Ekz3J_wnldKEi1<9Sr6~PX5H!ckG&vrXjtrY zWgdKcrMnclzQ;ng2lslSOQ+v|(e;tT!(>h_^4B6KkvkW-UC#{;?(;A|LxBsJp^n(^ zcGM#uap;MABAVp$Jkj0r{NWEtY_(tXSUhsLKb8sd zHVo#jJ}98Z=;HmDOXY#oJrGk-dG8Q{%QdAvCVZiOi~e zIg57D-KQLlhxi*l1g|#Z>_R$RAswEM?wan{5v`r)XpqrBLfUEngs^{=+EBFiVaF!y zjOz%^!J$2i9_v8;a@2Zl98)&k|ITa{(1t2j`mHNbfdLvP-JgKja)KhsL#2U zrId4(6$(snd!6k*=P}o^+fiIrXx{MB!qcv0yBQGbBaR$M9K-&bMa_-gyme?jqPx}~ z+x)(tG2i7ZckU{69=A7x*U4A7Inja~79GDRzGSzL#t@ycI)8(Ge+%hz_9q5n_Y*jj z{B~2K7`I4jE0}Na(pe!8lJQ-v39v-L7?t1+8f|hB-H7cPJ`x&*6JNtEj?obH01abS zA_2e)|6af@3{lfq<;u~Na_e2P(urMKi4~}kOjwf>xqjr>y4b1aW~32BEI#LJ37$&8 z_II+VkO;(y9w1A0ul3nz*&K<5*j0kHywE?$L!|YOldI7ZIHdmJ7SX9qe2MW)_D{Ei z3vC$=O?uTczzqfCT~qT4zK3P8Xz8hWj9cTzbw3}9Hw!8vOT;`$NEES~4Az(6ar85n zpc4xHk0ge<%=Lo<@G3g7@mQ^I1Qu~SVHrGqnX6_CU|+~)eJt+qu2C#So>*5@1no%h z^Cdd>YKHBob3*Hx>a<>kxWjry{PN@`MLQ(UR22KuK#^16@O0E=1 z1C+!7NVuiMz@8EVmBc_uc+lEauS78h!-Gmg-*9<<0!wCmXyPEBbDeNF2y#`FTS2*6 zXB7N2SvaLD-&M01h|>S+c%+Pb>1j?uxT;sf!fp?OL8zt{ub9t%%_0vVYL57oHAPe! zaeAwK86%L?^$le@NW~Koo9x0p;-g|xLsl!h)~d^Lt>)mju!*O?R%`}QT(C3O6}%7D zPvu}yKCaw(av$E~x-8FAIBkquTdfy89;8IZcm^1vvQDdb*LYzqS}&+IV%82jBB6Zx$BDrM+&2Z~#F6p*jbmMmohnK+GSBr*v;-b5-|7=#L=vmNI-?fMB zHCE!!!f|Sg7Ief;VHJLK;-beki}8rvRk$g5+-g_nTfzP{qy_ON3 z8Lb+HRqgc4cOcjqr-;Xi$73HpupWWWct?i9&aWl9`%3=+Y%id1+Q~@@ZISMTC~P-J zr}eq~IDFZHhaKn{FMm5a>6D`z9?<)!RUYqV7;ySPbL`XuYl&|bw&51^$WU6({6qRr z*0N8GxT8xKFR6CAk)J-W5CxBqaMdh8AE?@2oQZ@tH3p3&G`&@gg?Jd02fG8%+g^9G zz3|*lf9M;z*yq&gFWl^G3buB#3V!cB^i{aW_Hlblur~KE?C}?t)IM@F@dj|ZmoHIb z0;+s{7TezX(yWubCjJK!Ggg+`Ehz5N&|S&$#@?ynuId4Z+@#Rh?9!5L3*(|fWF;#^ zVk3Mr2$w87=WlT?vU926^+G?v-l1j}dMr?a^&uZv+gmJU(|pd8Wah?a^L~OSJzk&l zBz@XENv&Hu&fB_s>(vYEaFe>CVK zsCxX#!)Axq?Q+~SY0`D>Q8(Rkd!f7ds?lRc7XePL{c?Wy8xH4V`OYs6=$E?^5gi9R z#-C;eGezvdnKZC(`QzC?%n9{b(KpyH>+N$OeRKcGym+`D03W!`&2+cQmXJWVXGERpk4UZe%L596;lo+fHc zf{#gxCnv0bsBsn`jS+=xQHvz3f1eaLOnguC3-Q?3;qD1L{#88lt7A{|AL6mE!_yO1 zYi9a7rljGV0A`J@64%tvi{iGgV^sv20tiE^$Y| z7@xoR>buKkRg_Mz@Xej-pFV&3%;KUk*Pfjssg?(F`)|LE&n5DaPow>l4+(pNMQY5aL?h+6N0eTwU4wEY%)c8rhDyn`YAi1rzD6c~Z91-=&*69k zCSCq-GvFHxoK;K5=OH%qF!YD*dlF|PV2IPH#{W*$OU^g3ZP)nE70$k6sDb}h;pzU6 zubhTBTRY652s1?J{}N|Eqy4|gzOpWneJOANF}#{xCrp<`4Tvjk|GAL;v}wjah$)>-ON|uh~13zv`+n zj?t-ALwKmDeE8w%vjUFMvnm7AN24daa&BO1#a}^c`pl`Lr$2b#j7O$EJY7fa6J6bK z%_^MU19*VZbgKN5PiMAa)E}6K9xb4*Ev$4f`ddHi4#QE^Sky_{e_7$`bs!|nxZWB| z*r?Y{Ny+u_^fQxt9iDcO{#>m5G|QT@9vyy~$8K60hD#b4C)P#Nc+^L7Pg;cCN=RRC z!gFu#tJET?^myrKeZ5V&FH-*M%lDM~vwf?-SGm{j^z}!|y?)l$wB$G*fdw%gA1$Z( z%BnP=uIc&zF!0{$k5Eiur_;sy!zN0*SJkW6up#CcDn5|lJyj87bHXI0yJvHlR>VNi z&$BUwxj8MtX^$Dd)fe%_fLSOn$=`JMUoF1ss*m%nzWC~EOU4KXAN^Z>k(hy!QSgY1 z2kRC_h`eK4&p0uEnMb^5Q_9z#uK?R#VyxfhSb`YG6TIp%#qF>1NQE_YhEMUt}i}2-{O?>>w*G$9J*rA(%lY^!+~k; zd5_lRTt5(ps#kvz*~Fmj1f=SmJXPK2JW?3JDdcnKbVM){ItCJ09Wc-34~)qjJ$wlRT)v&A3b6d2A676%%OY1(2Og&0mO1U_RWj$-~m0fFf?49?(joZEf{rqL3D7x#tJEpd|6(&JT-cYXh_(R8FA-tKpmwt711Z*bLoAGSEW z-CCYg;I)rpqBZQdH{C3c#1}`n|04u~oglvFT7%RXIr`Kb%&p%YUp*Qj`IigjG^Q3v zVCuYO^;VQVr{_5-Z=CebqvO;BzL>4=nzbJicXxPKPf&&@@`!K42N^&(QHU2xt#OI{ zE)a|TCh%r#Q8T8Xn~(b@`1@F~a%)d~A;H9_ep#bEQK#A5+{shJDe=h}Xu4W_+$*x{n93^I*jH`+6h$ zasXChCVWBVfD(wrMjn0#!3kdH4p*IRhpuAhN5 z1G^WMd?Vip>ux}B4yPCS>=ShDa#%}u(-Yc8m%{&HK6|Y)t?@bU7W@xDbPSDwV@gfe6U{6i_@v-Ztl`1KBMCZc{tJoA2zDSP~v z=TRk3lgE5`LAmpG)TRq?oP1!IZ}~O8<%_V)XQ$WRE;>D*v#-y2uUP!lV^NrFAN-*u zH$d1I+!yv0c9&Ol`|acY!V^iC^vj8FU5n88oVa2BKfwRQM6@M{8>i_UvJXql1{9sP z`sfaIF5}A>-{c||3k!0~?Grw`+ZQ>U%hMI36W!@TSU8bex@=)VKVM{DZh0Q1x%ej+ ztA_3&csTzfILzo`JbPk|M~g(f4=AEXV90p2z%*!Ra6%2Ckh_q zRclQDjE2D*sxDYaP#mYvI~wXu-sSzCx?x_SwR~)WZ}|nm(|uh(ARjPR98tnP;;k6) zxYya$y;JEc{?KEoeyv{iYob%D3D5hAzwl)l_VQ|Z4R%g~e`wWsG^Vt3%$v>i!p36Z zKmJrc%K3OmYQFT54WWfeTclpx=E#msDAa13Pw8t6cB#x8axLAdL3?qIEX9&`g z9r$vYCFqa+c8A|SEFzp13vqP+F+|n7{87_d@*6pEzZmZ?E7vPZxo4`0ZCt(!dp4s_H9 z;nCL?zb%t2)_xdz<+Me2pAZ5@EP%Z)FvcT}B_mH3c#si^TktbuEfp z@&N4z_@Fdi36jCs+b4HXC5>Q(j1TG;RWPZV3PKoa zrZ_4UN*;KOt`mau4sSFy(}%|mxz5l`v62zWW0t9zPW-5vY0ib2G}A@naxR2sdat7u zo=?-7=`=$##mcY;P;`UwcXzALem@r5zAN_bb}Wg$-GNm*w?i>K7}%p$LZ3v|rW8}I z8Hv4%Bd4jOr}WZqK`&j>rSk;(={KOC)_LarG+FQMZaK&Lyr)20X%w3&;@|I z06|E7R=*X+acI#!Uxf|Hw;@SspuPAUM0kL}nzFo1m15>kN7svO~pRCwOo^*O~+xr;X9m2xu zIoWs_pDk#m=UhCGSAdy;^|)4f0K~!N0j%7;8sx8U$2-b+PAty?iv6x_FWkQ;fVl)t zRZ7zG#1tfugazlIKH)f6S4|!(mKe+whebO$5I`auKdJe*svF><6{89e8Mxr)O^!J_ z9~Jo`tBR|7E3gEasj#NZx4g`ectT8y{q(FFvXvBVA*jm~ z%>h~rY>#z0pe zcP4&}b+WGKy&zl=5%Z)X_7b9>j2N;FJ=&SgcX9Xyj`b7MR1HbkbVa6wCV7*Scoa9* zE1Z=^nHA!%S`;@6?_E3%X94bsTIUD$qXtZMvId}Lj0|A4*~Rg9W4{9<#XrWu7<_Mh zckK1H4_FmMQ4U9zde`VvQJL0bOkhXiZ;&Jv;Hk>5umT>mUcozo^>u+?$*@rc4~h!8 ziqSeh@X*fKj}XqH%BKqivZykOS~7w*plrA}-U{JTFvBE0h)kHXDHq_c3aaP>ZQUYi z!uk2}_hJ(Q)*g{YL}~%jHxg;NcOJ$sZ_k^W6MK8k2iQ3=a?bm?v5{@}!0jopb$9@8 z%|%SOOcC@Or$D1E-V6roKoKw#TJh7i8(}@E?BXg8BLmg?%}^ZDBLjD1l43%9Eu=3h z5CM7c4901IQLOoE!`MH5KhuR*WI`LDMa}y~=GJ00w|GyOJi>2bTIJH%spiwsibW-R zo#_3IAWV>tfw zbz)c;Y0HI15jos1TI-QwuzOP2fzJcsV(9%gTtWg9Ao-c3FwRa%4b3ElL3K*Xokz$DlZT^aRJlB5_{(gd7j=fE~xf!QD<@UMFjEy)hV` zhWRdHdN3@;)f@;fn6*(1dxhFCL5+Mx<2p?}#ZIA^BZ~rSqig5+BaYZB@z>CF?8E+v zz+y>4AI-6-bC8e+v$8$`!0 zI`ZOac$gNB70<*K9eTyp;~JcjwxJc>u^&6sz}kxO7-9t01~p0|Cqu~V76DU_kaO~C z=M`q$%~4Y)bv39L<}7?GJ6Yqne9qJDhuqzJ_;WD9G|X+X|M1SLzx?dm&sw$ z20og@qzTDkQXL|&QH+rGa`Ik{koM*vv*ie>$ZOZdKI8!D(14|T<1NEq!x<9O@?&bG^ zAO=X|dJK?`(qlP55=6tb9<{mj(MLiY+ zq}VGQA4NRmmN>oSLR70@tp`4Cl!GH@!@O03VuCyKMxkRO#l%G@Kn{U;K4=VjEy_cz z$*<`)hCrC*{{!dtSAMK{eQW;DIj{d~DC&Bz3cf-G^7sX+=6?{g)UX}N60;5BD>iFo z11n@>;(&1Laae1__@q2=D+toicgD&CcW1GJ^Lt2OXY5K$C1c)rCk7qZ)^R0utmczt z*fU(iF&uQD%GVQjK$qo!;VtAZc_qNL$DEeiC@C7pBfi{j!$rrpIER+%m z9`!{I;3ErEyorvZ$0_4qKBNG0FAF(-WBH2w#&R5=5%A!*NkmL%_{4HHXVCo4a7LQl zvxEj!@C;}8ymB^Y?p~bXFGXT4)6m5|uSY7u{xA5K8M7~OTH(jZau zI1&=8VS@Xd(P>x8uP<7etfMly)`1E_WpXc-NznlB(aPj*sZ1`xFpO2@$GS(!a2<^Z zNF(_y(x2%EjZ>K{Wi*dQjxv3e>8p(9)zDv+GF?Y=0Bf3FSJr1v5MA+ zAZ{-ek7x2=d`00^;+pzl{&O|1M_f}sT#HG3in#Xt`8fO!*9fN<_VfTxtV<>zf~eEL z?!q7INxGQ6kI5J9Z-7i6oHgHzifrWKX>`Il*1#Gi<{AY=53{}j>t zNZbX#H;RbnA0-F>jMxk0T3NnPT+ht4#?Tyv(fjJ~diB`5@e=|tm*J0ZotFQ(!g(?G zbo{#^Kh%)tIaf5iU=2{xAF(?g&-9aUC!EW9GB?{0EG3m+60e?;5 zjE`Bq{aKk{h@1I8)WFU1Dll-fT-@{ohW_wR8^5|Crm{vLQw&^H8G)=ca21q5er4dy zXKnVV8~U%yh)}12Q-@BP&+Iws8T;5jo|f|Gc#?X?Y@{~z$5B;eHmY5EJr&Kl4?TRp zPlU^`ANt>j+@I<+_1~MeKaQ4d?2PQWPqG-Bjz8i?$vtQEb(f$0FfiZm5&EifPy_Z`*>#I@+^kc?z)ti)uIy~(l z{h9M4BK>CP3rxG$<9qEsJ^hbnaNnvjSkG5va{mY{DC$WI|G_Jyi?F>EXYs!ta|Fem)}$qvvzv)0y>`yXbmL|9%y|KG&Y%Q}0Xx!2G7%DgsBKV+V$KPHHKuX68BbD;B2i}T!a z{C9XXpsur3g3S8E{!_cx?{#ls(~Ad3Dvyvp8aMu!7wPUfrcffdOPL2p$@`IMmTAe! zxYv8AoVa!jW#SBz?*6OASIe#2YQuMtvGH5p%in9p%H!Lwx#sF?q{v6#>s^(azE%0` zL0dRboNryd1ChJkvau@MB5mCvX6}AK&XL*5f1R{aIDs(6_31Zgi*DxxuxJd?i?!;l!E0 zu;R((FJYMIt-!InVz`Jy$H&f|0lcOK-7XHEqyiqlYWMxA}rsl*~! z@!Y$fEA$hPmuoYM!#Gy26j46b!hrTeSMb~IWufor!`8WSvCA)a1GkM&?nU=jG-8#& zfd18u>fH5up1Y0(Xk7B?S@u}Bwfu(Aqu3dKEw=o*suv+VoMAj5%ZHQoy!OV@l0yq$ z_E$9G==A}9tK{h7D{-tmj(v|ouBt1?RsAw<#Nq1nZ-(u~(d+TRb@1Ur_<)nwk1!Jg z$b`VW$X=c?jx*O=mTus&EcWb^B12@@^OlM9AsLIi5QK5=`U?!}xZ73z3~rb|{=)Xu zwu!ZJt@g59Bm~}L#dAyOQD@?E%uq5vaXvOstgCKeFXh1L@AANR+j}a}4>G}eao##} zBEdz|R=pUxtC}%?N^yrcXZ>;%XbrYew0m4n)MGQVIA{HK$a|`>hG05ux%hb5On%hA z9&j@MgS8$Wug;h)n~eQ){hWys(;g@EVt)qCS$91*K1*-9fOjbA!iE{a@sD9?O_!_Y zuK_VBp&Omc28om)qD)ZR57b)qk|VC_zk}2ZOMzNg?S%8yBRpUI1ZtL5brR>^W}zJr zHonke?rg_d?R{Bo?R9v;y(Qo?aMjGfE$Vtf|Kt(tmN;U43O166Bi5@SAYyWp@M4+k zh072iSqj?M;HC;6qDAzl9bGi)yNMTYVxuP->6(Rwq&o;L-1));h;fy}k=Tj9YZhWh zaLQ!mfPVZf?dL9TEGyo_(o+9m67; zE%-Y=?fvPaM!wdGRXpqL^`)VQ+&mwAUuoX%Y41NgYGkw%r-P3wx7)B%()IW_}r59pB(f*IOYh?M0jlr)>&GMgXpd;0CO{$+w(wpZzw48iaj=aGjHj zeA3e%Id$Tc9%C#G#d1~qmvzi_T|V_7Qi zc23)ZOpSD6Gr7pSF67<+$KJcZM^#<><1-;bK;%SgE$yXsY-3A&W(aEHPcJhx$Qhhb zd{t|;KoF{4FkVR1TG3z<wFZXR+Hc zUWS!LyQPI*(QWJ3E!N!!04Q@hA^$m$|CC$wurn~Zi)rg+4+Wf}3l7d8(Bt(X-!VsM+k%jUBoZ@%dibFVdN#*4jhH@k(#c+IR9(9Rb&{PxhMASPd77p zJk&LOoZmLx83^^8qlT=OLFfm*zt}3njV*J6IdK5CdoC|Hbzs9xpR+W*&LYkNG-PzqqTd{W`c2 z|FbwVKS*ch)EZ+Gd)s4mjaD65RReGxKYb*wpzZhr1eWiZW2+5A_S^*PhDh0Ww8ncy6rscXOqP9fJ0Q?I3 zK$9TbP`-GMZUM5ttD=;ZYW%Kvb!eR7Aw zw(~j8L8Q1hG-_PxDBq>nLPl;AI&4Nk;RE288&AH%5&T~&-w2+zkp~C1=yEu@E#Cx8 z773v9IWD;9ns&?#9oS|nA>zOzfLJ_^2SsLvSr*a=PUGJw{{tA=sW1k0Lb{x7EFSXB z#Sw$pFM)A`V7wJ2ghBRQb3znAm6Yokg7Nb5jbIAI3|#CIG@A|H)kwnBZO`%55-=vN zzeBl8HGaFQM0zMzOC%GJtqP+skn4a-v_w#DJwWE1{NjP~9e3JyO*m+mF16~KWq1vKUHomOI|Gx2My%~zp`r~5d%X&TV%6k3XuXR?^d|@Ka4|!4fyI<>1 z>ereSO4 z`u?4R@q}Ud_vBn~%)qht4;Xh}E><4IGxYD9#=baq`Pda>SB4j3uji#h2jl^9OyWfs=8(bXD`^kquE7No216{6zmkBFXK{>(>(O>nHk2z%fMixmp3C zKjmk1g+fcfI`Q9M`aN0Tezc>%^x6C}r~E13QaBTG|0B?rlmfzG@rCS(nUQMr!N^5TUb>xJv^D_YLH>93*E zz~zPi%mt_ZF!hQpt1StwKda^M6r`t)GUdFa;p`di zy)RuF?k)d(B8@B6EX8hG z!R^tht%V6xONYVr%3*NC`Kn#>*1m{yjelRH(B&|=&gC$8f-|B&hrzR8^L!$Q!5H3B z4uiA%3*LtYnb%)sLK5*8T!7WqWC_>qz5aq*kiqRQ_>Txn2bCZAols7LZ?~OSqVe3X zz%vex@LRPHA-bj%ewcrY2RV9u;NTtK>@L%OjjG=rQaPk%tNM`gL@XrpOLhki&N&fp zLXR)1e)pKlx8cp9ssq70;NA++@hcm;S1dr;l>=M+REJDoIVf7#inp$KzbT)99#VM* z^A;j+1e0&C$p_&%n;X8Vodc|lnsw+mlM z*JsVY25gcKCNtCB^3gx%*yJsM3O&+(5Abq3;_Rm1;P*eoZ}1xs%YOU@_bR`^besi; z!M3v@TtCpRd9wiyt!@8ad+!>Y*S2ac=ppeM%v~wz1JC%nY5C3%Mq=sh4B!<0O9;Ps zJ@IuSh-KfAx3`IV0xWQiyuB-(t$Y^pw8|I%Bvr$64QQ35>P`LudHHUIr@A|MqPq>@$6L++v342$;iC6sy68O_p3t_y zMejDa=$-Rw0&m$6et$W>j2md)_Wq*iV0@U}LiDB7ze-S$5TL)N8B= zzGwW^2D@-qO=k{N5?)h9{P0=mTmh$s>ni^tQeOpMRp%8%!MK!z-f?hpHll#9$um6k zlD_I3|BfvFd6AK2(LwKd1>&IhUsyr1107erV@6X?CZE+8R+H}{BC1}8EgJje&Q=@} z=jX56a7g@dm>P81I|7Hv*TkGAcmcv;@+6LT7wuf8^DucjY5mZGvU%4OSju@2^F$qh ztc6j+x^brbFmvN*d7s=kT7HKC`n`yA=V!YheQB;l+}4z*Xp`$cCQOSrQxEfsQjM=5Gk& zv&HN!cz+DvMJOnM4tDDMtEF>E$65wPC**6<6bOCU_{Mkb|3x5Le`2)$gc!dDzIQEk z*~vAfaI(qsK=uA;T&4z*skw8b{?>z?X+Leyv$^@_ol!aKyz{ME9s<((951>K%sES9 zBThR#U3?QIG0}~@!z~^bUVJrOQ234b?t3HlHH?MUfA_#0%Q95@y9caVPC^j{2Mj0R zDe4T0IwxRn4Rczx7Xze_fJ5C|(Lo8Wx~av_$+=3~&EATn+&&~pW5DRgR_z@K<7a zYPcFD<@F>t{L~@~`PhgZNa907QFb*n(uMJ|yrWDerV^`HzDXK&)+V2!8DcFA9iSMk zNA!8;SI#(ZNafKGA0s-9htrVCJAw7>6tfLT=Za4@qY&~YRS!*uQ7!L=^-e-LvX?x7bSS13O%fq&;bNmNxfg3VNK@6D+=eGa z-kXn>*nd8_4!45`U&q#oMysYBaml?X6Il-PO4}>x?;N}Ydu5sS(-cVg>%AK#dMy>( zD(g_5dcRX&Z2a^tK?6V3Pp^$ndx-yekDuP-V}C|1EiG2Q;vN#guA;!=<2dxEqNNJvSO3 z??>kIiL$n@#JzfW|4n=iJo5Hl;(gO%k3rU%Hpt-9#yDn_3c8Y8P?j3^>5y(Cjk9)G zGq2%)DZki&@g`c@iXCGh!o~3s7h^ZpV)=Z-Y-r2y^Bi8r$43taS{PtfmUE=R0P>Ru zS40xY6J-!ti#S+i?>)E#o={-0!G>E36Ko$-(IrZm_<>R*6_Re}o-ipVfxS@WJ7Qr~M`!}FZKoj9h3%DghM3SVS zF$e4Kt~XdAth$y^)gvM{N`hj}YBWMk#^KO*h;nE<1XA=g1c=IM1?~_oYroA~i z1MK>;f;{@NjTA1TBfrzpvBixocfxBowp{quc?i4xIDvpg+UDQ{vBFlFcm z_aq-i`Py;qKLaa|?;}eAihc$2ce>`;1W|fQy^ylBy z&`Z7*e*VYIh5dnJR^c{h zxT5;HN-QH3GD-SxF#Yk8Ui{GVi-mCaYW&b1lcHqpe*s5fZE%TfN>c3ADg&Fnw8J>H z9TQvB+U&irQ%F-RUNp5Z(C%+8oYF=Yb&A`~h2;prM_u6z-07q4^pZAz@`#6TO~gZPvJsH*^L*!_xv~pf zaZmx72G0EaB=tD0aLiKj@6<7k!U%M|A7WQkonI9T$5}g7 z(c$OCsqE>Ms~G;t>gJhMKQ2Her;K^P)OY%rr=&h~_qUt3RAE=>`Y^UQ?ha$-c<0EG z?R0iquxR54;pRmfw}iJrA{Oi`-?eSztku|MhXmLzbIiz-U*1=~ddtXJuLCyov~Mph z-*x)PlV96czH{1{-(H8p#w|tWaokKwae53MSet+H&}$dPPy1hN>Y^#{Jp@a9Gdy*5 zFWUU$VCy%Kuxv^BC3ihl(H0%tk-U$a%ILE774!01GVIRK2qy{1=XLGJ=VKy&A(NXAWbH7~jr(e{C4vxWIG0Efz+ZVhesiiuv<#r$qVisi9e_&9G1M#}Dm{k&h!92M@Gh*E$rNlSl6zLn!jSKJ zzbx$h0*`cRS^FPIR)^l%YSoSf3`<@I?l`&o+Vp&^|8+Dq6;tbIF|&?#d66G$WAWQD zFK7(SD}hk@t%a*>|0{a&%{vu4aG>)0vc~pfwZ>M>7KW>_95wEH$u`CGNc;Z1c(W+e} zkx2f$NzTu~rC0cpSD`wr1Hvm{Ri0V5Q;U6=fAvsdbN?mu`*}ss>VFW;w%DKiEgqy6 zvE+ZQP_Sw@o9doc)m8mW8{#{Vp6fK$!ZBPYjNgh$LTa5bRDBQ_)*(x3#SoR#$zbyT z=zLPm(+J3hRIXLVoM7^Q0w7q#XFU>{R}|cXh>-QO#pID)v)!MpL;@1&UU+jmX#dG& z_P1L#*CH~h5cA0>gAyZvwfHH&JUuW^JQ>~z80)VqLLtfTND3tIk-fb0O3w!lZ~byI zs_@PV;I~z;U*3e+X#BhI+d5Fbd)5h2UW~6E4uUa*{qwavH7m|rdHfExmfL%4G2&9* zTj5fbi*V^-v579(WjJgzj@gYv_Do;w=-Xm)%SbDBxB6h3nB3mg&#ttw-KkbTXTcA+}VQ_P?Hao0|3MSefp`f2!wK9^%sy1hLoP28K{p^Y5J5D~O^4@RaiB<({qkqRErXQ~)*QDyF zrTP+jW_2mF)M`9{Son)!eM;bPgSraP)cT6}K@Dq1s5CLsD4W*+z3FbFI*dLjQH|Pt z)upKLpIG6K{itz@sxfWtVp*&lu;f+vaan+?vE%sEG5;IPqy;$hlrfK<#77!o_{hr# z;YJus8)zi$gOQX&m!`Cm8pKL!!1vHVfCXLU?RWi0#V%S)Ys5B^e#&KPV;gl~oPXg& zwM=~;mZ{&vBsJs1a?yIpqwOP|w0;DZsqf&Dw0vF;dAP`pMORt6c70v$P`P0>vBr$t z=}*iZDmSdCKDpE1nLAYOKtH*$Os&6Ccc|R3vij7GW$MGs4XX>e)u-pMOic?@*H^7a6W*~E2ClBsWCKwjC!+*{+!B)i_oH~MnTM9$$jQ6?3 zkVxiZN>8I(3^aAK_wfFMEVZ&<%&=2XVv>(#43_qktOV?1}2){1S9; z%I5F$Wbs3)e3^28B(FDFAFlK8{{$a!UBJyC{re2f`MKH*YW$FX-|P+L9)wQ(^LmVm z2-j)&e@C8tzBybE?Z3>#OqX{`r;!`uZUy_@np_9VzAX^&iSNxo-cVB8z}sh=20edwm~({@~o}(;xhH zz}@~sMOFbD0tWY&{;Pny{fCMSAw8F1`b+zCS&QtzFU3+h{I1mjM(_?6{+heHOm-LHKEnW$OX)YY=a<8ef&UWyix>A4e+mBv{-lQU z#_g~M{(Dz?n#>0NCs%s*Rs(NxrFZ)sUFu5D{gfvCORn_P#s>a4o_R5v`za0lM4g`P zb^9Zwp5f(%*J?O#9Q;fS?rKjn{MgZhCN zDzMD_zt z|5=+jEXU2?UuJ=?%mV))3;a|~Xm9!7&jP@m{eeJ*&~x&!dPkw4W> zY=HdTpXx2sW&}SaGJZDxR6jX#UtE8xpS*3F=2K@^{x$hirM;-tcj}(apQx?%m>Ku5 zdMP%};pV?w=QraZ^V6O=-2B(~lb>Vf;pV?X=Qray>*qLmxcO^!es}w6=X>)90Hdkp z9!n5R`BOFaF=4!ny`VyAnuH8M-yi#qdg#z$;?Hr(w8z*H-v*5J>){UzZs&~R-Q znq*ne6(vQ3}_#66V^QXE%5B6qVMwQl=f7$%0E+|wvOg*fh_JFtk9|1q2==G<1n>H#a zv+m2tpMjMlbARoBQrG{aPRRO6dvE=G#mxG7^1^*Rq3bv6TPDuSzij?gCvt*~>v|TB zi}iEW%v*mpf2v)23G7hq&*o3{xl>uI&*yGGb&@pz?>*E%h7FPv)M@pK7&E;N`aon|l)doG2FQ za?Ndl&d)U#_obU7c?i$uPxaRUcdqoa$!{Rp{HZ>v^9MW`4F663Z2nXqJ6)x4%g@&V zGWkuvX7Qe`BNkL*v?RcT1D3Sq5ch_A`lze5sZGx%@6)aSn~-~SKrDzX4&5N`B=t4Jxa zzaOT_{!XWF30yyD4`^{zK`WvvhvkGOo)rLg|1P{|Sg9EQ8kwU!}8n@iKj1NBhR%LB?(a(D!^@ zkiAyTw2 zAZ`4+4hm~JcyBIokf^`wp#F8xa7`#<=g@9B684Xt0;bsL=TdH) z$NijX|BNmQeqdijcHKeaqr9xNUXz+s>}NZPTrr8tj_41JkVvY4LRH+%omb z43JiamSdARe)FeWJ&Ebo?2bG{wBbmFb=Pnd!;emU0J{TDTLM48;pb{r19#gL`g-szee{UiLG#ZJ=Sk0ItRity`0NN)J|63hVugn zWH`61H{ZJQ6@cGipwq8FsOypj>&iC)-$h7A8a}1Xx^e^HI|y%0!@u8YUAY4JBE5EL zqUupQv1uS&qyWJ)t^2qtCPkH56`}m;_+^tdu0%oAp$g7GMB$)O*0_~fJ&P31HwREw z&r;O73&$aftlGyxDi%Mn2&_OA2k_r+CpPEmPa;eYTNO(H<5Yzh6%5DETI5HiCB7H= z^BR9%K_Ml+74rMibgO$QGqy=^G2>stBY;-Ls|=wQf=H{d)%`Bx8WCD8_zH#EK;)g) zm3sk|oyoW-@c(6n#<^uJLV=Y?+tZ3NdWa(uoLfeaiiW5lq5%;Q0e~dpb;=OJJ>10EMxY*T!I>rQq?DNHQicAvOyeGJZ|ET&CEbC6&&Af!Qw4^m#@+X%{+_!{IF#4hpeN18VL zr)q^zR6zKF`<+*(iqO4Mk+BVA zWREODJ9mOw`IX-%hU6zH^^&2KbfiS`>o_E^0C05q*5rJYF73rvTh3-KprFvyxn3j? ze684@d;@72N|@4LOkN`g4$!v&PNiI$97ixfWY)6eIS8dZQhhlG8?9C@lvLVCr!^86IqX%4!P3(NU85>-E159oC6+W?>RV>igB_LF&6@_fEYl4_tH z`_oyj({NRP8h(R@Pfmm6=_dT+1!UMX_Y>}i!|l9PS2 zbRT?t2F2M^&@}lLDeth8#FjPtwE_g3=1mYr`YlJ4f*)QgW|5|r4!f7wR zt;>1F1?T%?Q_iVOh>JJ({=;)zaQ8m3|Kozwq%`Ghcfs9w-T}zaAtF- zf5-)=6r1$FbHUy1a_=K`w`;E}y}SH#8NlVO_ZL~dR5;pzIz)e5-==;Ico)e$zTfQIR>%D@@C;15=Geya(VM2iMvOFru+xN}hkC3EJ z|2#vX$JMFjlrqOh@|u^K1AZINglqOPO0J7N^%}S?&IGgWb2!}mL6pQY-MnStlXd;Q z47jd<>zR+G_-Fp~$q_<*KI3!yW))2R8v2?0c*HP&Q7VOcXwVEf)Vq#}I=?5oZ?%`9 zI)Rsd({z4s`#W_0$)17@{|uP%>{228yRQXgI4;vJ1}PFIzu|v>v}e8&c79w&&rvr~ z-+1%0Ot0(!o;ipc^=7qdGnt+>TErCchB$L&zW=m;v6aL3HEZSU!2>!n4 zZ{FRr4F4D7qg^dk%hR*<6RrDs7F$*!e#Z*;>k9W*bl^FOB1;?m{KZGlHcu&g2jxvE zd(DsLEyjNno6I_X%!)Vmtj9mSOk~mFxn!N^u=~)5_?ayWUCJ@Y| zTDQ!kg!>r8HIkT3_)j2&X=;ji>a4cV247Lc`SrVG7KwYbAYeNOY^McjyI8l4&uFSy zAQn@aKd8tpXkOFv27G5)_-f(RLv!R)K!D$YV)oy}rYqiVnsXbzW;n-}ShVR{-^S+M z*C8~{N3vYdnrGe1e$%{r-7<#XKWWkCYcJS#5=wao|H3!Gqb)^?Hpimt@mp2)Hb|w} zv79VP&c}q+B!(ccEy#m(oHoPnLx3TW8)sviw}8A`damk30NxVuY0-<3nG&JgGy+^y zvq{=>+OdD;E_nHh9guHcR9=N2)s~oVck|0=hO}T&)0_+T0q^rK*nSe{sMdYM&?4X% zZ7o81EjX6mV%l&;bVL7bXvG9ka+`K!U>THs+9F0=hBfuT?tj&b`VEO&);Sj}BFEkrR~fQqhR{3YDehake>!~u?z<&C=PrB;4J4nQ_H!b0@A{+I zXo%M;@7x$Fy9omiXRHGUgU6Fs`t-&F3IxUb4kGoFl93c zZxHhN0`dz-9CGhFXH@IWdO*}^&8@#VF&G`g#oy8Gyde z#68~wW3UqcoHGVUdt7GDxB$=nrT@1q@E5Yc-E+oCqs}_}u;-2LDdVh)O3#$Bm~+G; z{*302cgi?wRBFnIIzNqBqgn5u%|$6Na}7}$2op{_$IKb4@yv^Gnq?;a4gzrzPCLlJ z$3au_BHTR(G-u-or~I4r37wv7bI$>nxyo_R#rC=2Zv0DFFfQKQcuva#|BVaoE}!4I z<;7?>{1+~`yZmoydE?y8ea33E3r_hn?G50W7t7ZuNO+?Brr}18Kdm`k=IMgZ>+3Q~ zzmR35nfnUqfH{k4;=Fw|+Z=D7R!P-b9w<9}^xEj()^N=}Q;qK~&7TxNKj`98;M4HL zGCywqY;(M7U4I}|lX~!mv(53I(D{wrm|_h5%zZo}*e35BQ93{10sOnKfDV}Z6$mqb zk%pUFKo22@nj=#0duhrtigkWdp1FI|&PD`b?!LmbB|86P23(inzX5TLk)#S4fWF_R z*9c6zC_mI!CjX)4h>z*|AJYhPe#kPs`LoRti?sZ4 z@5Sw_JHsi$Wj=Uim%5iQ(??BelmAeUZQ(@Ma0%ko9a8ek7pWBGDsy6M5x^ti-82<*BakVseRbk2 z+?_@s&|ocm4XptJ55LN$*u|IRPhhhI0k+VC0HOqe2895Ef`EK_LsbKiAi%HIXapKk zF@gY62m)CspxSi=DG)f)6c~A=WuWLtQ=sJ2q5yk9hWMjb(BJQ&dO_$&Q^5YTD1h$$ zvGl;iBTa#;jx+@(A886)f21jJ^O2^&ZAX*>uGVu1Hltx`H2bth!kT2YUAn@mc>_+= zZKpR9kN9CJ%!Nm4yQZ_+uIXa*pQ>U1*w$ElY%vjaIwTSn&Amrq!9rO%2Vu)ZijTDx z?y(jwwd?1vlu^7ml4lps-%C3JuGVe1$qylgfPlaAN@DwP31A#o@qtu4#&HE2nF4VD zXDM5YP*RC#Qj{&0!+fzTtcAO+h3#Tba0rJz0kY}ZSEOqPh_$bf+7Xc28Lw*xNNOiY z*A9@>4p36p4oeD9sr})VG%EVEU9~M++hNWHK;FQoPJE&*c1W38T*Q&g)1oFAjJSX zp1Yeg{763%BW;jpG>e@os+Wi`8FrOT@D*`i)M*!F{K!#`~LyXa?j`wtiWPhI3a z-9B(cyZy)`s`84CXxmFZgSKy1g;LYfoWx-Ys7(wCFr1FbXHjlw?>^NyLj#sIS6ASe370A zM~g4XixxlJL&a?^+%AtYio9$mws8dACLoSsFDJGQmw=36jOQ4p;$;j2h{NSJPQ_C( z7|M`IhRmqbh!mKEKP_``D7-ei_^GRapIwjn2C6F{AT)4w1%QMIeY*l;NCZ6}k2Y2VuOH$4gj%BP$e{w3lbxAP>W*_ioU`S z4hXZP`XeL>b8nU|0j3ES(2IB!BBb~rpwCbk8gjk>!f?q*;g}HN2uQ-*+oVSrHXJEo zXay^dsAjl?mX=WqT+04rYJnY6=|mKY-bgipsn9AjL_jT2Crr=-HP+8LP*;N&7)E8F zL<$VVvSyC)42dyJy{?K-gLT+x3}d*I^iU3}L@wQ<5~L^>5S0V+QaPkzfDTI5117;7 z|f>P5F2ZIDhN(67m z_^ZqYdTzKV^%A8EnnKe-y~U9zp+gs6qDWBaz_Q4O1_22j#ApCm#4;5ngn)?7VmsS7 zV8SlQ8x9*cE3$V#Nxu|;vr*NTHvsf_+Ifm17y z6X`D!6gf%t7Xbl9bT&_akr-fTrf0UZl$8@5g8`OuAVW%PeUc6f0<`hPs+n=2EaAliU0{#FLzKAcvtDv2R8mS@ zp^~A*0TPbLqz7$CD{;gKN0iJ8E%3%V6BC1i(#a|H#oqU+D6WXJ6y->#0Vxu|Z};=WOkMfW#l2{9u8|n)b+Z4*2rsBu%=VoT)YrxBGj<&&e4H{&tEKaUA3mJ)(`G&=;2xdcL!MaoY%<@2hXj>>Lfl`7;FeGi4-3fDgNn`T&{Fp zi4@;0g}?P*GupKfg*Yd9s%@j6;CaUC3=7$ zOX>A?$P$&XiV<0Y$uNs4(7VyCoNl}RSv`esj}-s@86sE=h~V(DJPHm>4$A~82a!Z! zDFi_Rf&!xhCR#9{_P{tmX;zs~JC^yagqpEhg?rEmWAlS219*EFVw`pA{*D1aJid9MTxP zEsql@FsV7KfQW%wIzUpvAT?)|c)`FsXO$R1;92RCV1Z2CVU>c`dhsAqJXS48h!B8Q z7-t26lrT^D7CwGB>+TKtac?J;5CT= z5>;%rAfR?_5+O!vH)8=-aC_Zj0eG)#M!WIoZ8t!yS+yGhsoB+TfTZTW?M95$tgI32 z`m`3p5^%Ts?oOouH3GYZ3W9)ZOMq^=+7e-!&Uj6D({!`{L61UUNvi336iP_b)20Yi zPrB)N^fn!!bkh;=HXWcWO-GD0eZIB;^y>>41hNSaA0Fc)ZLu#P{_<-tTrHxkITCN7nO>l~UkG;z|+2RxI z;`<0t?;Y*pdpK>tM9cNk5%$DbTGpTK3rAQFQgBRBnb$8 zQbv-1gg#y)Nn$`A+(kU2PCF;q;EY+bNo{`a!yvtAh0mzxrI-LD)WVgp3|M@;HJd0l zT-9BVStHp&e3E1ox!RXh(|k zBgM}=i70KLJG&yq^CsfBm%DdFibI&L8U}pUWAjewpBaYwf@g?fdNp4jClF~@A4T_| z45~hgfVYnVl&y~<>PD$R`Y7iK*v4k&&6ut{tXF~-Tt&*H@c3*e44kkz-bIv1@kCBs z)2138g%GYe8$cw4nzJDwgmBH-01`sXN<=IcsyzTc4RMOn6q)08GH6V31Ns0wM-#)`ob&Af=Eb zzF&nTF@nIedc4Fs29^#mUUJ@QmS=5@L&jD^f54+4@N#;@0Eh^vc`E{f0OR$D0gxa7 zkZ0bC7(oE5tFtKLeB11{RU*X;8cuPGA*O9$oFMSINx}8P12AhWN2DVl1vdwTRZ{&C z5rKKF!US+ja5w@|aC3$-Is#GzCe{BP_+vQ%D=rbj+Pi z(>XWh+(}V`2&lO;;spWM+!-K2pzqunF{=kHmmLxJzqVA6OlGGUxgJa^9HpgP+p+O zyrKV{2A}O-Fpz3HH?rb%47ilunwZi`vJXi}lrmBUL=thnAORr}^!wXKeWNL8 zYwh4p;ptA)H@yDYKl0rD*y3mJRoSs@hwSDRDxk>j?BcZ7|Ler)(8=iZ2;Fh9lKh#|zny*C0ATDRnLZv>L|vpvDg$0Z7^)q9B~ z21#+YBxt2JO__+!;@<^Vm zAU*XiNo(QX=D7+=4yPMXY)9%`1-Rgv+5U~5tB|gMg(dI-D?Bu7^fHrPcvl805@4t` zi|z#}kPcz6I0C!r!+|3%33G3jE@7{OdVak^Bp`W86j6Y+Cg%9u3ec{VvUwz|>NX?7>Q0*Q&{@XlQIhQ@pSiJhRj=i@iC;fRh z+Zxxw-MS&beQvljX&*oVfFjO&5ods1*HAeYYjxv-OZ0j2y$R&2wrXFJG!2npqNdXy zX~(gj+QyM%X%=CNdG6q-lZXb}vDQ|#BI>k8YMSz*rR%HyBN~6OB*en^vHI%p^8wr< z|A`-IL-)`KmXRXu-Plx8IV-%U$zQoWQlDqXuRmx9o1^i_!ANjPv~>Bb!Ptxdv{1BN zx5TR50%U-q!R=}UvFBIeX1BvGds=Yg0%-iS!M2l(29wd!Z5lUIW7U1O_2+#j+Ho7T zB_gFS&O%ar2cpAv;DNuPG8`e&f+*m@22)+WkYbG;?5NocQY53r^Vcd;EQ`j6f)q=l z!9)z-OnNfpsO*!DNX{0+8$Nvy7^a$qF5ED~(AOS`Jx ziq);NYR90SSbXe3JANUgz9R-7gxG&l+B$0>R%;_z(L>4S1efUcZU<607zTL@*KDFJ z;2DxxAAts%R^0%6 zQ6O1_A4nEbc*Ke+<{%OjO#T*kkh+gBNHHxe<{kX#{uFhV^}{89gH?A17CynBPb$&V=REWlZH8~`GPPpmP7=PlLhUs(3 zSm~n5TkZHiLHO6h$D_aUtw=rAN2D{@>~rqUVAY-k9;H~&MY(X+flJcr?r8Djx(lp> zlS*))BN}Xrm9DN@X~%B>1((^u<*KW*+A1fFRK$5HxMW;#ooF2Bzwyq{$Duy_k-6`& z=UKH)!tIX8>Q3w}Lbqsu*jP3H3Mf<$az!BBb?Yiy@EG3P0C08XnGvH5BJJ4irIbO` z>AE<68f2sk;uR}>x$tZV==% zU{nZvn*TV@Rb}vsPM~9smsjkz8N6!x$h>;sFnDz%NQ$QR&#P^jy!tBArg`6Rr=V!zrJX<3B(!Q|uYbWT@t z8UpmuHg|DK|K$S&{l}%b|GWzA2n&nS23N#+mu8!@y?ZeBKj*>FsQhs>{zI^67l*3u zNPSKeLls&GKWQ|NV7K)~vHOAM(bveKb;_RBHp4pM(+)ehtL9B@v+Y!TS^=94WNgLA zLYr-6H2wo1y&)1@rz@_y7ctJ8FmUYnV<^TSb55Yh%(Ra}Bk*paBU;*F)x3r%`EJt& zKG2BVfh-G*(fF_+kbv1EWTw%mS<>n;8Y9>a%>3@ksvfdyw#fj3-L`hk2g9)i8^ncU zxNE}V9M0L%r(MFI4}?FaMae>79NZIWX~O+r@>hri<@z8bhRb)6LZgzTV>{3n?|zaa z@rZUMzlS@=x66X7#7eY&l}H}{1E&>*HMu^av;d}m_)6P(6>h7;-wR(6zWj>IBF-~; zXhy_Ykk8iCA8Xg&=L_*^VLs#maPCEp#&423gG-|@UK4K(h+w06Z8ZkaSp1iq3JF3V zP(BtxKDJd}AB|rHfoKZtY4%rcjnyAt5Q$$6gJE@~blt4MF}M(bc?e^$Zk1L04&K@Z z*G21IsT^nrn<9K++$3b^juk(iP=e7B^RKFUjp86Rp!;FOXx~=#STueq>RE;UL&0n0 zc4oYVg^79!Wu$H~?b}s$us7nY7Nd=2wyQGNf+;AICyRH&=y@LP^mQM@F%)Kdq~^Wc z%IjnjbYXq%8fjzvMwn+WN8=MwV`H@R<=Lol6?Rm@F1G7dRW>6OT&3HA{=PG1tVW8T zmUf{3NjqG1R6UM2Z&Gz?LwQuK;Jr5A|i1Uu=yFHaNBJpSec(F0Gr^R2Hl-?`Mioy{h zW=tC+&LW&Fv0D-ltAnw+H!7P!o@m{hq$T+C4zL1$pavqv^L4vgWBxa)UW;JLz>XSo zK-M*Odr#HF5eF_9e@+~n83iv!2JeQ==v-5P#=RN^1k-`tVE4G-9w9$^(;!Qxz_xQo zL8tAM6<`NMSwXMTAJO`S4r4cd6FDo0;KHy6xs=bV*IsH200lb?^>jdJ(1!UbQ|~Cl)*^+cC@i24@{2+ zUj-4W9t3i#{;0DlQu@KHfg%%~F$hWwg;5KRq821rxUObfCU&}p~8}H<2m9S zNS=cT2oD4yxf>-w$dd~&Ph;8Yf_{=KpzoYY>&I$+ms<)x{=45jH!yPiMOPFCg2iWy zIs;=rFSsI*>%+eFos1#L*kSqirFO%+_3!nbL?Tm?;9|7SRkG|cG$@IM}>zK-7_#etfXepfU9$d z3@fsSjT}EL5HWR(1CAedK^$Q;>~K*yoqB+*&7{M8!juFT=DD8h%Mw1S&o|V*rr`lk z`V$d$e`7^x4cIsh?iUa3aMK6P zeGKB+E^oTPWR>wy-|C@GWPa~eJlP9TVQ<=N0Va%c>Ap%Pt5B~Sh-X881LkC^KmJ|c z5C53gLkPw=9QO_ zE06opoBunAr2Kx|{M0op)7#fWfblw1Uq1_Jyf`u6W&G*q@}_0j+s~PeeVCWQ3mJZFPTBG%k`l>n6+;7K+o)E3)Vi;aK*mK{d2E1joeCp~9c;26NboVGav8%lo zaBJZSIB@O9Bq`CAhdtAg;@WoKZ5Z(IVXUe(Onp42V|>5lG&H5p8Hc}zH$}FyH-h&! zT*ZjQpW+yKS)`_WOr-QZ>z-R6g|XlpcKsl`eo~%Ye{sHDf93F)vjuBM_r2t# zX1<6F?t}w_SbZ*Bitmcm7vQ!dR(~39J7e{uaeD`t<;RL^cTl*kg?sHpH-f*>L2Twv zBKWip?!oqN1fSKxUdflwe7hu%JnoS6^0-sd%4517D1Uc75=_@4!E`+mRP_YX^~j^D zM;=u@@|dn?WM(}QOxGj9bUhMO^%SM+kw;aJJgR!+@f~S<3FamBgU}plzJw-$&%1>L z61*6vTlNSUBzWa;^6%cOFvH{9CGZ9_*4-e8;Eng(yU7QE^Escmq@#wrqSCFETxdWl zmqJj$s1$;L6~51}wj_JY=hikXWMG}*^Pwbr7M8Y3z`|Q9+4s0i@;TLwI&Yni$_KIX z09ZXJ0^z~gk~}-O7>M`{yP2>PZykSdEp&=rv4x_NclyvV`aN`;UGwgl*1i0|^ksJa zy;N;j8=VjzishHahDWPuU)^f~otnvgE}hy7d#!lF7> z-J$$jy0-{F(6an5K+EuT$A=W1hkpUE`OdA!fldK=xs8Y6$@`!*L8$-&BY_`Ic0wv^ zwN%!wR8mPFBH}It`!rT^LLPS7!#LU{L6J(Ppj0jqkVqv#lu88>sccV4WfD?}qC_fd zwNy4H{XizG37%~|Ed=VqwWeH4VIKH$W^sY+LN1VD5o>;KmURzRcz^3#-vr#fE+sev zR<7Y~Yy&RAgAd-h#E+)pL&`I+!>nS;Y<^CeD?F|xrSa2lnl|f(saWEP%$zwhdc){( z<3@3vDQ2c55e<;#$=Id|WqQAPCrB_!3 zT_cy7bD8>wd*C{lx?Rr~(z+G~;vtKkCHP;3f9yHyc_WIgY4sEGqV<>NN9(U1 zZr3yh0A4iRw(i7Se|AS+B-nw4f(`&%ZT~v17vPvjZU+-AM}jp-kVJwc5@3hTHcW(= zU}zT;tVV*hNU#S5nZfS)+hYLw6=qb zS*(dwARS=yyEJSqOGV*;J#5l-M-tsS-Qy-*W_f{r%2Tk+@)RtyJO#@v4^~?r{1hy+ zJO#@vPr)+F3-wc;f@PMcV43A9SY~+>`zcSsGRsr2%<>d0v%Jaul&4^sEEDhvDxK?# zjwoL${8b>@ggyLUd_<{OahRz@6!GtUSedNP0W9yY0CV=Si?EXn=nn$=x118S77};A z;Et7KsNLBe{%EjCY2exoly#)M^%V$wH7$W3V3XdpR^gzg1^5Pb&D+B{E>X!p!bonV z^tU6FK=MYbFt>nT&c1cLUAlDknbQ}rmWEL|QU`0LTbD#Iqn#zB>(C=>73)~L1D_R7l+RX8$Ly&@ zNta%TwAmNMp-&VdiLhpUW@3zQYU!`=+>87v3_ z*8u)08`<_p#5Em|M6P`EpzoaXkVYXLHn#`>&vL!I1IX8)6TZlR>+L$i<{(6Ca9*?`NpCh z*M5122ST}jPv5V^{VM%TUUG{u)BOPd*XU>NC;Xv)ruJq0Wc*Lj&*ivJ)z8=AK21MQ z$Nfk8`Fh-Mz`y&#n2dwo??vOs55e>pAu&8KN-wqN?q{#L<-0Tw{xMp*YS!_|h3E!) zY%j$Ti2uzXjF2UxhQJ6}<5xz=5<9VxM#y|WjS$)itC*WM!crXE30s9n^JRIhpb@?@ zb(|=E^)Gq811H?qT6bTLqH&abe-wuqYSdAzS81C_b#}4E`trC-z*@K--g0?FKCyjh z1U1inv)2Mzu%l}Iz6F?%IGonk4)i?%Zql#r_7cpKN7#usbHj5tN?PDLkhm(hyof*s zm|PWrKErJyJ5U;%(|{6GEMls4ammwAWPvJ4*PY7UMshJ{r#!SqOIxhFOL_JnK4G6- zh&Q3h0e1ZIrSP(_%Bm>^Q{CjGFIG1xZTPc{!se{WPbHk-7sB_GGDlLr=`%%6pq;n| zHe#PL2S~~n(kTaIOPMPvA55nNq53cLD5RV{Lp5f?QQ1-rWU32vs)5;34PvT*weade zk}|kVCtPhOHi?|&kawVS`wt*q*FwI&q&8=SC&8))7@TYbbO2%@cs=W)^+Tfdw_{~z z7FKp{9FFMC_{Y(p@_=1`<48R3!hh5BGX8+`t+sP{K`(^6NDyH4SecqALR=`&&iU|+a-2lOD+>i zpUBnI?c9-N&T^!N)7ePkZFGk%=nik_?$8PZYLDW84`k3=P9qs=bA2IpfNcf3-v9-; z#Lzf_Oj3troXCvLlF3gs#WsNI>7*JJL3j}mMrA-4h=}J}7DR-Y>T!ec2q4@Xa~AcB zF!);WxJH=%X$4_qO(ERLmZj>~$$=)_zd6#s9~Xs+LqdvNT_8GmVpTjVhfQqfgYaA_ z1Z^o{%bxWSW$cz{5mcrAo7slWTqNdd%n0m|Kxx}X@B_-KZpNbAUD7hKs8AsQz+F)7 zISt4c%zca4>iS&CfiKezB1x@<9}rjUWMqYj9-=G7J}n1QR1t7Q4Di!~6mh#sLe|0q zD*aoExOg!uWsWG}mdM^j4}_s>1bc<_h6IvXJ>90|s&k!3)WcmtJ(NI5>rzv zvowux5lV4h=jXo?J)+E0wW2dspeDf)HFclHng=QRLQ!uPNl=4?)uR&wMWz}c_|;!i z(1{TC_Tl@n2ce^f5(8DcUVIvDOj5s(a3jT&TZ;MYY7H0g;vQ%YBp2pK}dIM@cH_EWFC8x3OW z4J87nLJkyS4}^C6P;A~Y_;bYOc=rT(wFe1M+B4(^$0(sMk;5pu0LaLwiIP;Z24;bC z;_Xm08x^B~4F=z8oqsDv$YjCXO+t7z;sSf9O*6>$3iVNAn|jF6<7I-VH%5c>p9Ce0 z`k;A^kL)v^x@+Ewj1~6{&3$!+$rj` z6Y;r&BoxJVDoJ$Pl#wVlqLLHDq~s?h@=!`2ihF^(te)TW#U6rz!b+6vYhw;7hrJ5| zg@;89WXg;k1l?g#0BdU@?Nz8B7sjfSrh4B3-TT<{wvS*d1PiBKu}Io2=D3XM11aw+ zk!rTcw6TJbl?bzk;j<1H_Gw|IY-B0Gn6kBEblLSVj8I%FjHAiP=sP%V>%+Amz26=d zSUSLcnx1$&eiR|LgB4KP_v!h7J@@T-jH!RRpaJ)2{j&u?4!#*M@z{+_SZ>5gXbeMP z+A)jhG@|C+kacefbV0Q}3nzhXgU1vhu0 zXn0MUINXk3l{a_OL5#nZL+rxU_S_~lDt#AcsrOXEd%?hHe0DxetL1jQvf$<@99*a- zL@^*b15`Qn59L>*xccV$k z6?EASmU-hf!&v|@3y5oied-K8yp&bm7*+nn#KpG^G03DAQTsk|A)FOU=Xm~S@KezIR@SaA0<+cg+waeJrC=tN%bmPaZRlBajfpz5%uJi$@I1)voCMD^kry^Tf<#0Li8!hxsc z3vsSPQK>Bwgk9B+Gxqe0Ry=<-ONK{fdP9=6@Bj^_2wySbcDi z#{I|Sk{!PSb!`GUD+i0vF-5Ns%Ot}Doo3`S14M{59=;TQqmgHUvj)VvSp z2tOaJM5-hTxeBRye5`O2q=jA)fd9VeNlCoi>i$x?%hF!~goTcEqygbQ4e-hlsfS~P ztHG_+F*<$_w=FnmvoxX{KZw-9@dNi`lcq=~+#*etqdlQ1G6=U~;`@Pq5#Taz4Qg}w zMSxSDXz9=batf#Q;Rg`SN4x15;TcF2BulkJ>OpSQ6LVfy{sww3Dt=)di5GL;6KRaZ z!#Eq#YSTf&T;ztk82BNqJVo$aNjw|u;Qsz;zohE5SP%AN^n zeiyD)QqEtrG_O#8BZ4a~a-MCP{8RKRxJro3-N)r0tL|%3|3mp$e}uxAOqq=TK|;<$ z5)!V~Ux_aD{Pm2jc?*jUknM-ALQ(N~T&T1Q7h?@#m;JBFU!#A*Gs@g|(G~GkjEC_n zstDdRFsrRNufQt2qu^$%_E(62-bIu1#$f_N|15t3h*ve8jh6bU0o!qF`KkSD`L|e((ew&&tKtv&UDliO^T9<2G4^#|r`55N?b@5as>pChNG3cFk5l`b*R*TZ%TL(Bm~>P~}FX z9s$M_aNF*hrdNvZYDTcRx!>!uV!!Ma0*_i$y32pwNgW)+^MGJ`s8Oedoq^Rpn6a6|zoh*J1k=)qb4VAcNS zIN@PCB8V6lLn@Y|%y)s2$~zc5{H7K^Oe1?5zzqz1gc$xIeP^17-KA!Iw-4zSY0@b! zw32kSizx}H5JkU-J1Dqry>;AbRxTSDRvGR?+IxUn&s)}UuQC?XuZk8vz?;r?&3)+a zeefSd7dKR0iu?9pSP?Fk8Sdk?I7lvwG8K=fm=AY7!hGi+X+qVsVv)51cpD*51(8*N zu=b^-N8Pzd4BUFj!HLksli9C9Qv9#L|As$7-TJ!oVJf3TC+*T#tXeBa8r~*uJ4wqS z;QW5D64U|LaWq!faL_t#ANkU5Ll>&v++x#Hk#nB#w}nG`9yr)#nI_cJsdSUFo=wMj8!dUzuEsy&G&vq8wDumSBeKy5i7*Y+e z_^f#)rfXkFZ?i&btKuao4$UOyVrDj(gWAbu4^TG%DSL zn0;V_>HeN>jg~aB;Kn123_s@hL#u-Qh{Qe~j?^5~6Ea_NFoGKmK%Nc7pg!8u`%hyp zZ#A7Cf~Tw_5W(u%z`nDFogafbKM*eXI2@~>IN7aRtm8y!sDZS_s_iC|h#9gURD=3T ztF~2GS8+0K?yfHb_qz4y`Ma2>gE|A2U4mn}ETWNlFnsVn`{P()M6{zU~g)I*!gD@x|z$ z)J?s~uxJ3-(lRP2Nud6aGDL0I!YS$>s5Yy1ky?gboO~blsm_KPl>lKbsH;}Fh&{RXybE@jKbcSi*el4am+v+pD zYAQ#GZ_3TVHsmSzLWsC@6QYJSUps3)lnH5Yix$=)hSC|cs&4;x%|@v^)K5IC!o2hS zd3Mq!>f3F3Gm?b~lB2e2eiAk_%)v%%(@&Igk6EGngYg%!FdEOCwvU1(SGXav(fFOi z4PTMsKdqLJbpL2CKkupFoYkPM%nOn(HDkAI8+LxYtmRFAg9~f)b4c+sY&v|?lvFkQ z*^mU@PNbo59`pE>l=_bYD7yisp1&O@_@?EHWWD^7u*eyskV?q_rp^ zQI!TluNd&z)YR9d8LdAW76*@BP9)dtwVtJ+s(l-@p7c`V2~>;bPcMX@_lUmF@Ks43 z3))LiY}e?A?E>IpNBM5GR_pt#iXv5+LAwt3lyrw;q7olz+UKV4i$Dzwo)uc?wIixh zPMv2MbDTQu^9hdk1AZ@yl)!1FaNsxygaQu_sr`M{ zZ<0@yDzG=r0iUYtB8*$6n$TY#tD;5SLih>cwmpY6{Ri-2L!Ijdg4Ls`KWKZ&&B)k1 zg3*tu)a^)vpzRHKS(u1B26h?f8G#`9!^U^?GkIX!KPsQ4RAwn5A6Iw4(F5Cs9McO3 zl5*g2<84n3c?7o`S>Rq@7^@EOEznKUC-tUBZO;vFhhm1+iD66cvWS$3qN=>&xjj@( z9xl@$LPq3O6Pke8$fO0FD$IlNG(~M}f0X2E@n%i;b=WD-P<`_FJjx>7L@rStU1(1= zcJyOk>W%I7C-v5kT3gfFU()i7hDkWE8RAo44pzS7} z?Hj*$s#aj2alhdL;#h%Fts5DRucv|}HpSPOqgtMGk}e|+!_P{vtqHqe^rI$L9IH~2 z=KDu&W>d>mEVDn_fzx3aq|A8q$9i(T6wZ1|g>n)Z!G_ecoB?X{J37*9D`WUkEpM>*Ga~AX|^hn9tnBs#xn5 zh`tdn>e()h70j5y4cNmt!MX*@nD)w;4Hu=Ry(;pC)c9Z^k+{NUeQLfTr?iSQbS&{T z-$hs8EKVzSQ)=grWW}JkiJZtuH{ZUD)jj%P%M017UJUhKbxo3`u!a;KQfdTGOkN%8 zGAM(7#)|GJul1U^e8lL7NZ6qSrfcnx78j_D$AsWJ`q=I}XPrtk#lKgYTwT!9t!Y43 z%`JUKKcVo(c1Bo!SqC1Jy;VY4NYGGl^5ZK{#a^T0)*$owP?>YB@NJ534!Z4WOx@X& z$wQ2`&HE7C>Dvnl&K+l3LvTr0mD$`p`O93-7x0g^^0a7=ml zG%>X*<*w`uC<+;#;;+Z#k4Y3s#MW2APBfc~xKj5A<7;`N5-o|F)@C2l@4<@4Y<#ru(xW;e+G@uRFq%2Tt_(s+b+Xdmg9ar$t+ne!UWyfr?jeA)dkUZc;oW zIsLRUnV5S@LXr+7=xg+jcN39dUuDx1!=y`Pbkl=*yUY*?x$-RQfV_010Jp;Dg(*XP zhGcA+l_N-{R0{-1&^}*np23I3x~o52=4gLdDXv3=j9UB8BGJ>?e)?6UP>a+BPwQg` zQ)#gJ-aQojPkg9A&L)=hx+l%4vlDEP?KXV~`@dkyDJ*h&y-Rv|_1>*KIsYQ%mDttS-8#063mxB(RbQSQ?pigQeWpa- zsAE#=trgtheSK2+;->g$b=Sf**BpOz2-2y^+T6$eIyxB2RRO*e_tU;{T9kHNU#WkY z@?@qoha6RsUyIfpi0@;V6iDwZ+%f5Is3Uq&i4}r-%tuNpoxlt^W?D&H`#C@tLnW=*Xdc?0~_PzD)kzc!5b=91Y)u5tg zE)G|2Sx{-e7ynY^Tj|Vzb`&MoBiJlcC(B)5+|J)uV&PK??3BhYBb;v}g+S096eE-C z>y(Lw?%iXYr^T9fzWfoy_8HD9y`vT9KQ5S*YDH?QN+`k=Y=*n zk4t8rv$E(M(CNGqH5b1~=a1BxzpJbase;Z?VF>dw#%R=f_qYhMpbtfu7x2}8yJC)R zjs<105dK}(a;Z#ja00IQ^$zh4>Jp(TjrNqemWPOo7CfRbp1iR6Ab7Pge#iCZ`sw(C z+&;-nFJABDaLP@0+-< z#a&!;e5wD!-mwdcMB%jnh0k zU;jLuohEVhsY^}(*vY_o95~MnR?r7QFf1I6fCOW>T8^1)yEij1r0iSb( zyab>8L+PnI)h=;3-6tFy))tEl3rVzek-3u};xd2-E1NwW1sGhTX&lw?3yB3hsJ%%hBxtqg#@@ zJThbO*=m}DW7z4y1t?Gsy+d@f%OkR*jczjGSZ`)nJCU;Hd}92G!r8;woyp8=bJ7Gu zj!W_}5qT*B)^~h3XgiKl>pO4t%Ff_;hI@!-$cFUW#u0zvT|r_*Prql?xNX+H-A2D( zg!D6iJ@gX|GV5$HExzb-?c1HFU&^uWPEMvgW4T@(_ z62+qA`LBC_{HMrworfnnu7iU6vbv%ysa~V2g!H!TN+6=T9t7C+(~zpN2KP!Gy;}3U zY&9&$WNOy68JaVBeKZ)Kt}=%lZvVNuV?*;N6Xr%)548kfiHhH1k2FFS)lJPztmdJR z%YlNFmgbO^ALmjq+2n~~y|Ud(WM0^!9J(Sdmjl|k9G6joEOeCely_5HMMqN>s|GFa z^>?p#7O)Q86106vzmpzSTuXdbrV>#k`-9x_{71`e(*C4GD(8UiuhuwYxvgqFT3TVN z0YQ_ezy>Q!N*X20_l=jN=Mn7Nn^c`GLgf|A*$A*B5AkIn2g?&SAWO_XH<=Bxt(;YYhQf>CWpF7tw#bm#4&yVK43r z+V)V4Dp0$6lT48ok<_cD==fmtYr*(Nc$MFG@%tZT;WrL_IoWwJYZf+fC>Vl}f9*3O zB0?!&_7M_}oz=sA;z*_@GRw3?9}R2{Q{ydCnd%C`yHx@Md5at;7nd1tF)*}&1Yf$m z8ZUqqkxQ z-`H!1j7}6oJfuZ_gqZJe--#K>H9!N}wUeei6eeV5P|Fp�#y&5URFXsT{4mMGnI2 z4fm#u(9V0UM?$HU%GR?LWkAhbVja9 zFRy4oL{xRIupW3h$>m*r5!uAi>&WDuP%P2?*#3+Oj47P_y)PBiKIka#L@)Jj(wt@E z*#t0QEC0y}rS0k+v8?{zsc;hC7y?^{h@}q<7E2Ggn_Cf0tNN=Kt`SoXl1ER4Y&u9D zxd^6Q!~&jdv;S{U#n1b3roBcIM@6&9|vE(9;>Vj=ty;XB@U2q~d0T#-n zoU#NOFCe}rg!ba$$)$Be<Nu}9?rP88(*^MTZ{KUw=oW7)GkF9ptnKu_lD)#PI zYJ%^5(WYoAjk~djoeo%Q(nmEX-Q_oACQrEj-JXoLF8jfvg5hTj7X;h!OFH|<(O`rOTlTS27`Vb4Xol6MW;g<^~8+_ zt4KW@wryC7)Q_CZVp%9+NkzG5kRnwTkyyKIRv#@09g8E}G^xiUn$&5&-9zM7f2`c{ z3z$%kVy1t}jYFFMR5%Xh6W4kmUX8-t36~r>J<1T@kKpop)H}%gZC^x-Ivqm{2#XQ! z%Ta2OBK61q$Bu%!Jgxo0R;Gw5&4&7wR(#=Y74If36sRa^S^%1dU zG9&v>!Ep27&NL^+&y|X$U8QB`b3ekAROWiw zG~l|W>}kJon%;v(_UnJ>>XE!pYkNItyMrN;!eo*jwK3WGRV*j>raJ#+!&`ZTV}ECts@Y?E*O3i(y&P;)eZ*Sj{2?`xZ=s#^53=5t$JPB zZI`>WvV??Udqfl{Z}5)^R;BHq4BfZtXVR zo5?PF8m-!>{bqO3r01LG+b*q_QXOS!#jE35F}w6S_M6Fg<@cM(>d^K@3gBAe_B&PW z{di%pR=dq+RQrId)@?TPg%fSwa!YC|n>S>rk@3hBwEE1w)jc)82SguiK>ub+YB^4J z$Sr+l?R>Sax~mT3L|_JU#kD|u?o@So=nf)$OSaN^X}7)R4R-cGA)-;(7})l2l$+{Y zw6DH%4%Wpd($)9WSMT6bUfp53jx+Ps;6PpqA0cDUa8Cz5jo0YPP7p?BfKE+r`y30zdH)d<~jb^rg9is^RDvPC_Y#ZPC z{c8RtTi@E(@&RqyBi(6()6p{+OE9@I020$Q03NE(+&qR-*rI+E)JS)()`}yYA)55& z#3#iMID#IRm94F7Bwv|?*VJe3PiNMOYq`yvE^bM|O=;Z>yIwo;m<>zP?h@wo)F;)+ zgqL&e?4PWYk0M;m_I9nJ9`2RJ?@(X;3Z2ac$Qqc8*n9jA#$?JOzeB2X2Ccf2%Y7j> z;*U6{^(`F=&%X()I2VNLFaZVIsTsXTnRO;T9*^~|x9Zlk zE>Ok{Cf%nOgK%i)|CvOHcEjD`cD?-)d`WVLIQ!P>U3OV;J?>1=4Eb=o?h@{jbWy1^ zqPD6VIm0!{Hb11tKb_{jVV3P0>*u2xHRUAsp+bl zmb;sSv74PmwYVhd?^RS3+LQgD#Gk>SeAxEk?cyq>N8F%%UfuLqlJSlIC^fZ<^LNN= z&VzmK{@_&mUJdGX>FNhCi)($tnByWA<;N|WEZIl?>esYC?iTJ3PECC*+#d&tp&sOi z%(AB1riSj?%+B$T%5HxX*9fQCuiE)tIPl3N{KhDJv>r)`^qQLL_=7r%nAcFRX=k6F z?ep6^=w9I`g-E^b72c>ZxmiWun#`R11h=w3Zj9CXR>|e*Waq3uC0qBFUjA_n1dOI< ze1w6+l74S`{GI$z5wtf*QwGmlF@TLsm9S$${gmvOnyel;J97p*Fz)2Io{cTLl3OmT zx)lp{(0(BR(;Gc+JI&BYLIu^z{M@;$9TH) zf0c+PjJLk}%*#gK`i_6mxJ_mr{5Oqqd{x9Prdt0*2s3--ZKTqj4+6K}>pgWT;&u)0 z6#i0`*QNS4q(@ONj98m_R2yJ!NsoWc&ujetV(O$vqxXK_6^>uK_d6WFA1>~c{9UQ= z7vmRCmUb!)8o$pcry>Xa`0cA_<5<1l;5wg*s!_yTJ?4*m z1b<}oK6*cX-F@`Q$o=1^l9#@blg;k`u9uu<{MNI*IA;8E!m#|AuO>BgUkp|&@?61% zw`#=t@hj@GqjW>9_6s*x-84R%)X>eQk?x$?TaP|~5WnH!jhAk7O=_2*j9BEXwjS>5>8?kSeTs}S7dA1v|ABDe0b9hY|zMkax zy&O+gF-;MS=ZihuP-R`4?kK6R?oDS#*f>SCV4Rk0wNd(G&Al|BTcdQlADGhOdNtg! zNDUB6^919A0>ja(a|C{!-Z`@+l(B@VB<^ zp@=^cSCze;k2OwL$o`b~Bz7;Uy_-=HN2W_CAA?ZiMwB%M7$-hhd93l9Mtp!p#ht;2 zMg+*73dtS@9D%YMOQ2)W)=W&86L*tOzTborV(kOSzCT&Bp9i<>*GZGZ&)t(=N)v>i zHY6QIcW1i+Xp+OdpS@eOVu!bM_4|ZheF?km-xWVJ_7R-O<`C?QBKTcOPj`+uHa)pY z8=B9_RIM}2Mnc%^H~Ep-(C-}2eg;HsrnY&ydRO*A$Qaq_t1aA8Ur<9C_cz&=DKSg< zBs-_G@~FvdFl+7{;_ZUAHSA|0=C1i$gvteNuYOS9rUh-EV2hK2V|t}$>ZMPGFmYZt zhF@wjciP&84pnF~H@D$UYu`v{^IDj-MzPJ$+JaG%rkCYoGoRRTb>7RfjC54hev6Hz zI+ipw7VbwZp^TPngOh>MrAl|6eGGXfBr`Xz)YOyN7>xgyO5U_Y3daAcLa&+Ymq}{3 zF{}TOGi-^z3&vBSCb7Ph`iWH>zFiJ3lJNy?KB?`#VAiMQFKy&s|J~TMzo%2Vn)akI zFT+&0Uab^ntzi6_!T76ZQwzv^LM`hJ#@{@j@2}rg!ciE(@B7X)s5IQpc@?DkxLTI( zyxK6#Sp;b~O0|(jbwix~FAcE|!XHA)ewI%pXU+-QcTuSJN3p(ej(lOQt$gLC)bu!N zHYv^;hc7Xis0P!7v^=m6u`++&Sun_Dme)~WF@RZR;6BorTce9aJ4yT?sH)5Gzjmkm zabs;?G7_N#~NK;9g0Ayi0?E`6{DzuW-zqm|v7DVX?;I+4!)n z#Rt}A&)`MvAgpww@K=;i(5|0DTC~5)NJ+KsWchhBn|gG%H*Q*1gE~~B%Q+i};ua;g z+bouaI8AJiFq7#$idTh*;3;};kz6;Tn~faTU~YS{X@S-|pwVskQ>2U4M{dWeNx!j zhQjt4XhIawV~IlhvTv&84%#am`D`PHtmzninjTd2G*KI@CBIK~Y7V<~ba;WoT8NrM z0QwE^>Y8pYmR>bRZ9q*>8(7ew8>cad?1X~JF84b1*j>*2`jmQ1HJaH~ByIX=F#0XE!KnF!4RNL5oSCoF61M)y ztBV&`gPm0xI2-xijF??TetOJzmF!tcO?&SRQ+wfHj2rfnc~Nlf_G#M_won%L{1cnu zLwQT~Nkaph>nHR^i?O+`F^Mt2Ss$xtinV3CZyg!$3fey*&URZz-?9;WregwaDPcS^ zE`7v?wPFuONB9Jhte>}OeD|0f)>jjygUoo0YeR4wgzL!x5HT|Zm1LYJnJv6E0tu2kx0NaJmh#n(kkcPZr* zu;8BcP_gG8cCzdiW@`;SS$Rj&3U)IM*j~cBNi?vQM0;|HtXJAkvYvcL7@O!I#0CcV z8IArLlI6yUZ~}2$H5b=>UE}cHIBcS*Pl5Nf{vZPD63(*U-k zmwgZtSIFW$6ht>-w9pn17bLUCBr+~a^JrUGkufNWInCWo|K7^dY=udPrhBvB;sr^g zogpfe=5Jw}cuovj%+dyRBjc!uEYz>q9GrC1=re)H94g@$3d9|fAz}QI4PnPJ;$I09 zXnUv8RTAZdpskjLHW=^a&rV?2gnAnhDM8yqepJz}fqgbQ)(S##>(O_~wu7ncF__HJ zOl8wCYi&hFnN)O?MXIRtoX_#v zDK=fg5sENndAahO9A}3l(NU&qxsx>{?S^k!&V!nE+QVi|Q#)j*XfkC*LX~So(G4{p z+I}18Ze`eji4>O89L#0-OB#Ex?axq)o!TWf%$+pD?>+q!{kqrL{V{dXonevmqnMkC zh}elqw$&mR+lABC0-KByMqmvZ%^}FIJ<8_H{f=9WwlLV<{m~YFLI&p5hcIp;Pu4-^ z{yMnW$8|YDCk zrD4DuztDui{hDJHe1=fxpE+s493j3}_vWGo6){rInj`zBH8tuc+3J`>n^2R=KS{?m zFhO;t*56I_`>^SWQ?q~1Ie1WA!(}Z5fVHhj5-W7qp6p`f%dx0d1|`{@uS8+eF!G&5 zQHn`i#jsD9s8`dXe5=t#6P;wBMjLwSp6&UN+)=gCMWouQD%sh- zQf7$G8>$kkSomfBB-Xm4s?w#wkHJc?#f}ZM!=i4hQd}U$MX8qBV~%J4WvWuQzNMBw zAZLf^m`|#tdX|t`?W+}`HpqHlJH41>2b6jvrb?8|u2fLVVs9FzHEoShB9d#{vo_^w z(`O?)B`{;uxA&woY`bG_WBRk;mb(<@1FLB_p7@Q74=1NSt_|l4xiG@vWU>z#X^Rlp zE-QSWPmkhG^}B+$*&M=X_yGvo&rvQd_k{mmYgoN3Pp-M_&=1ojs;JCQYB2r%`kH#q*WzH>1vm^-Rg0SLw)O;BoM)v{FUKFp7|jwT;f z{@RG2%-m3Q4T);(G#}xE1pjr_8bI7%#R-nV=xxFoN8hzoHv!d`?j+W2Q!6r@&sSTu zD47d=tRK%2AE&bArYC5h$d()#-4-R+kEo(4V3gn2-2W`oyke^!GGETaTnZt~D^i`m zw!Y^V7K7-O7jgIyq`xqYC3Jo+e%75h0$1$yI2i#jfPZJ7i}!#W6OiV6(2+KyBe|XX z{no>x^#;w-k2|*Lfh~ynn&H_(-OQMK2Ml!Vh|XEKQf#pc4uUwG-M~!0IUUrR=D1mS zjTH6S@L8*HZj2(&rKS$(z=a0&I6rNdKL?s)<$h2V_X{gg@`2%?*P)lQAZz}JXttD&~sA=szuv5rrC;D>YnzyTI63H|N z8!3kwNp!A<F!QFy9D+h`3ZXDMNI<}+++J|r z(6+CvnkXbMV^@S76eDxcODfKG@a{CeQY(|0QyHCf8{_0Ym2$uBo4*#425sNer(2rq z7{8!xhi4sWqJ;DU*&qlN5ojCk7iN<^jJk<-Sp?iB-?%NNkD>$)Y_kkL)Km*hBl>fi zdx9QfmdwS4x?MbsG7lCnv8oP#Z{j&VO zrV>k8vh&nYnsP>J9X(R4KwPdh+7*K42(_z730eaW8RJVI)gcOBREc1<;_;ESnK$$$ zi~W{EH!WP4T^$~^ZSH(RigjENG;wx{ZhH76N^G*j35+D?*GfuoaA!Tkw8KdUb_b`& zZ^+zbcFGgzbL^Bs+lls48MMg|X|~Fb@FLvWyL9ftYr@4$SGzfsl;YgE3tjbg?n0N2 zbYQ!@?P7yKLdWqj5crt<#m|?3ywhNLIv1E8E`LBTV ztP1T4PMf(OzOse<2JzJ+$x-VSDQG7Gij~+%Y=>v51%}WT(>Ha{O3wynVdh1)BpKex zH=a!5%^hXO$90>Sf~X45JQ)V+GMT5N^6e;w8Sq%8cua1%Yer(mkr*Gz`u$9K6g17{ z53^F|bm?@o_T!WgX|qg?yDQl_vjSY%b;zBGy(6Ym^3nC3$1z0lOnxHYPDE#y5^f+x zI>3}G3sQw_RXdRIuk8^p(!L2 z5(OGWC39PSXPZqR_~b)N;sEEXP@ZUzS01LlzP_Z{l!uWK-C<)+cVOlVb%(68l-E9* zR?tpHhX!2)k)X>V>kg+e?@53>P_MJSo6sG!m}rXV4jk`$FJX;dci5O_8&LZz`iA%$ zq&v)#PZPW80B0x#?E%vREl|JCcXKft0J2U;V4A*CRiGct@%q8TLg2!9{h*Q&B`JU5F!Y0a5E#Yu zgG%WK6EzRdK4ATTDAR+~4>)fAkoAM}but-!a3K1@EyudPAC7(?X09|PLHa?Z^n;1g z4`vt955~pm2bIFGk+Y2sNI%%1rtL>R_>`E%=?9IPvBaUGMiKQOSzJAsrhJ!GO*<&{ z;BTO4(DpTbM%4rJ%z$8CJ=iEALPd||{XaDE{~Gw%hZB^t~Rxx!d_4dK4dS`g0`73jaLgk@6`hHez6Y~ zpQTp|wz}h(w$?DmJD=r9Q=y9K1$5!dbd%`?=2OsAC(euN1)BW|^a3<|GsUho)x$Pq9A5{<`*UZCpCaIs6=!Z*_k3}PIW6QE=ore0v?z#)18vsqp*z>Z93 z_iVsDCYN`?X~El3OOhF{I|--I+>t*cQfjBE1o~M=iXtL{Yu;gY2vvj8Ae`321_9ONZJX#Gs4HRl(VKI5l=+|9N{MI^k3L*q0pjNOQSz%d4U7 zTLWfWb~}^NAzxCSx5NZW4S=0pT=6rL3qL+>YYD(+PiMtPz*E1iM$loL%AX#E1CaFt zi8NAZYYBcn)EuH_%&EeX3K7@a*2vYLPZgU`lGpTr7^UbKq&pw6vntc9xE zMw6kT!*@ENK6~RFlhU=4x9RdbpHdDhc7rg`<>bG_EJP1 zj?T5g5l=j59*BKr%0?cZu9riUho5?lGKR>*SH*}X58sH9hZ1_Cr>T(U4>ozo@Wgf* zEAv#`8xo$9l{P%h05dZxG|f!&fOV0qFb6CTFVY=C?yfaT$< z!NiZAA97_J)pADXE?laCv8%6L7OSKgP(!xRvhkBtptX zCTBAnv^{8|H}CEA0?sbQK)`LnrdcT97*I~Y!Jhp({m=Hq;`-u_Qh5nrdxjjnP=-;F1R>i<>@UUoJEsyks^{c!)43bEE*8dl*+rLNac*euH zImjNR+vDG(b?7uIJ<)WPF|boTESfhNlkOr9GfO1ps@`QWur?3-52~C;>2C6{8iP2P zqV4oxTD3>%x(Z-bz#j6jFDX?`dBs(wwdG@4%g!mU{6_g0zSoqGOq|M3Z~JuCFVo8_ zI!kAkkGZz&tn$jMMx@HewH|eL`Lxm*<>P9~D{IQfkYIZGNR_WW3woyN!^%&p{Ikj@ zF3gp${=dkln~n7&QM$Uk;`-9*Mai*7{3K?flSy}_Pj`y?fi_%QN*z~~CCe*YN0h#Ptkna_XOi67d|R|!Yja9% zrhBd$0qI(gDhsF~-&eI{6pqjMeBbbG7mj;8Oq+~c`(~?sWk;7e9M2}N=&S&DJvZoo zy?BJ~@paC$I!nl*I?poxs3V`sztHFVx&9md>aG0G`0{H!ZmWE{=oqoO$mZG8+Hh-3 zx;s3KqU@>Vv)jX(#dCFHIcR!a*|}o5q*(6kJj;zc?%48)HRa>r#~jZIuQSLx#v}9{ z?DL74OKX`O{>%(C6Pn?1%dH`qK1Z)@QU_ zb#$#*`>gWblCp1A99uqxGGoxuz08SQeR*G!P`2`l4yCioyGqJVJeu-qbwJ@hF{*a1#Meke zS>g*t;8TjgO;Qf|tt@f+5SRdEi7ys`i!+BlWeJn?Lw+broH+z0Kv|-u2;8LNkl)G@ zGlswfC`;58fuB_bo+<*LH3Y6i%yV|2ae#5?m5;*^hTt(`c!skqq zjxEj0fiUOa7J-*h#MsijdhOxThKfyb5efe$KuDpyXUe_LhnSvhz}@s|uGN{trygDkl4DErawc3qv>5#ST)n0n^6hzp=jBY8ejx-ITY5n*eW>HTVeq`1 zF+Gn|-y@P_Y-wJegu2!-9~L|>PfTwk{f)p!myS<_@;B`Fb-?53{IX9!pMKC2j_VH{ z@5(8JPxSC*K7CmJxgNgC!(jt^mI6Pi^fLb~k+{XbU0Wpm&w-CE&C5g63(50{!RPzj ziG=BSeER?cWNhgrIk@R>d`klt9oBe!Ob-M8UEpza_yO>fN*CqIxy8RdW$=YV;1`^L zuAPHMUKTHHZdkr>!KDooF1<7{@2}38^@U59U)s{VxUqWTSq)1YmM^|^#_~&3=ggT` zJ*kDX)sv>oTd|^HQr(>Dc}voZPphk|ordS>5bemoGVQ7QaqdykhwR z%m5S8GbXCw>agH>xhktqO)Xj0*s!EwY4d{S#Z607=Y(knRlZ~iHJw^lS9cD@R!=;y zsj+eK(r*Z@Ig{rBuKj%7<%d>-6vUovb-Rvvod|{rAts__UX|)aCSl-a!y7LG& zv3l~11xuTjF8)r#R~9t3G`uHmt3Fjjeopg(<<0M5rJq-&7c5?SS<|Fb|^i)zj~(TGy^R>m^r69dH$~Jh}ad& zk|R6`R-Zbjp>g4iMGcq!^?Rqpd|^h@l4ULQx>jX*>b$p{7uWLlWe65hqvrjX`-gDs zA!D(>F*2#zDSNPjUu!SFBGmkjxb9b7ZOY$YH7svJg}&mw&~2%*Y3Zd4nj4n#S9iq~ z?@gtr%~-Tx={Fi8s}yQJJar$8xIdzuelQ3PWrX)d2E;MP`-4bvhH?1B2HD+H2O7ve#T?KO`deA(iBTur0Ck>b{W-NTijS=KYP8dNZ~VF z;d8$dX9!gOx*{e#{Wil_oo{5sHW|vlQ&US9H`kpvYsr#JR8*Muw6hkTk2y2OgjpBn zkS%jiomADWFK>Pn)s4!3m=!mt1khl1pYYAA{r41=2d=NphHMqoV2|Rd6t4 zol56jx^lt1Wi87a=3Tycd2`ExMsIR%_4aMtZ3%e;{MUqb3G2a4mbjEZc1+td)5Dcs zR{1dfmm}$AeGk*`#{ZzFu$+_e8|YCv4g7`We9^;|egS`BIc?bQ^(g(P_zU5+*wOVA z!Y>{Z51*s>44%UD7b?yo2EO6r@$fHugoUSKpF(`T6@kkK7Q#cnh|&)SjyugBrJu=P zSWYqcSeM-BEU#CXzPboJT?GD#BJj_6xag+8u>52Z_@|4&LyvA@IY)T{2_EfV`Hl4y z!YAPGje{RsB>ki!=|5g1{aHoQ|HxH41fL~E@PEQPxC-$<)^}tfTxTQ1wd)t2uL{%O zSp@z`1yt}9ra!L;yrBquyer1|YCi8_zt8aS!t#Gvgbwmo>nTkC^r(1v=wnlHdG8r< zS}wWy8KdyZBJgoV;1i3$L(hC+z0-=M5Ba|^{p=#?B`fq4rk`)nq3}gT;LD1@R~CV< zDgwWz2s}#vI-f@WqV&&4CFFgXYF9ogA(4pEKOdElkLy+X(3h|Bqx27B3k0904<4lt zWtYB3>7V!BCoG0ezbpMYdht(`{vi+Rr}_SOrJw8H^)E{Qyf>b>UY|zk&-d^87p4E4 z2z-@)SNhrrJWBt3v_&GZCXzms;R4+hflv4E`WNl5e3ZoC{>n$e4DPSVk#eH_b!r4Y z(Z37-&qv_V{yHrJpXT3{{_F@m+F$u7jKTdCrct(Nf90cT2KU$ONcoHWyXrkJ0$&z^ ze>nn=_E$d2Bav7YNguXRplc%VFm^tCgRmpt@SjX^@13$D38 zd_tNC?xz&{=^~#%@T2{^{)O!ooY11&)8^j_fwO7@3T(__o|{n-j2Zi6l9PJbs&$*A*3evR7KzwIVf@?0w)wE_oO3m zuZr4l^CNJtavHoj0zW=i3jH2|6Vj1;eiDIuRn&gFJpvD7!t}%45%`!~DT&1P2>c@v zxDkNwqJvjm?bj&+@aW%1?ah76kHGz_{h+Iw^yr^gY3=)~5qPNk>4%Sbrqs`$w3P0n zHUh7Vz%PuzPl~`-Mc`v2@Y^Hszlgy5BJipR{Ot(*FC*|OKS+dUsB`M)iz4uGk@Rg5 zxblQ?PB#_6=X>V8tpFazEp-*ZSNZf0Mc|?Cr0PEw(E~$WTJVb^@Xre1pW7mE;Tgsu zB_^!6VoCFYOZeZs-2Gn^zAbNPTri=zVP$h-!etAZ7bGTJvSLNT027XEypK!?hoPQ> zS=yS7BvVp1qLG6<>W8VB2O}|x4`l4Thl3CYgVf<2+y{d}%z%F%=@BOc-cNLnkYG5N zjv6$MM3daKe}rdw`4bLTWe3x6i;{W2253Y?zmI%aD7Ft%*B?x8iU zSu&F8ZNl;<#AAF^hKqz5n!d5=(glr0a}6_CR?6P78Fjd@SwO+cS zx%#xkgiD*2Xs2Le!qTSZh6$WBbjph61(*J{$FE`0yoJjbENMtg_{Q?4me3s$nl}&n z|36`rkJ|Ucf~NVG&?nHu_mlba=Gff-V)0TE^UEJN-2B&A?!o!x?;CFZH8J_+w;OK$&`%e| zU;e$}=D#JTe)-3ToB!6B{PLp@H~;N1`Q>jPZvNlKie-25A2^2;AS z-2B}!`DYwJ{_QdOYY!lQug@R)JT?C74j}&vG5O`kA8z~i#NkrR(i{h`ik>Tc_=If8jk8=+oe|Ww_H2>@a z$Ui%#e#JQq7yk=m@+*E~xcNhWdX#>Oe;IE6#+d$_djR=Y#^hIA&T#7w&*Y2ZuXvx~ z=3f&-KgGcgH~&pB_|HFp{6CG!zu*A!hw%#0_FsGe`R|CSzu^G#Z-~i%Sxo*)U`iT| zb>VBG*XQ3nPT%rRMgG;^6f2=VnP%VeAIZOS?~8mnlHc;*hvRXKPx>kTD~S(B@X$o! z7g48v4(ifIzM4*|G7mO?HF>T53JQokxBL1%`ng{U+y7&JiOb(L%=~{w=D7UbK7TaU zMMxCZ|6hva-|h2<^bQ+Pn12NAj;lXB6m);}f3HaXN*{c&zx;cPqab$@Z83Pet-i^ZECO{|^bqi>rV3FzY{&3gYrF z^Z6mqpr?@jqsedbuTXycy~`HLImtii|6zMVcw3SBR}HiN?-!|mLrneB{|oC^aAI8h z*ZBIQF~e$6Vf(KtlE2&MuTiGRQ<#7BvGMg+%2>ctXqza^|3UJ{(Z4sQ{WB=9u>Cg` zsejrq>;F-a`uD`tUl&vVu^)(U|9oG+cqaN3w*M&d$F;xWr!H5>XW~Dte+t`wb&>jS z@%1-GE9bqi{?$e5ALr|z=<}=o6n};FA9GxM`zt26jM122K?>_HC4XG|FN|sbteE<> zUn`FPXZ!jWJ2Y~ih4p{0Nd3!V+Fu`2|A`-rufNOJKRs8odnv4cH2LG&e@jgJ&yA`7 zXGQ9-_?)Zn?ETdLcSY*|ZA|@IKNa@>fiBS;BQ-SU_R^ z&jm&5mtx2ho*(GG4duUoERuhXjFmk5tG^nA;`p!EZz2fKBXp$=t^Z{5$Mt`A4E-;l zkV5)zE>iz&zxg0M_pn&~8;aE5=j)&7GdA)6t1F z!R3h7|FxL`(+fe$ySfu`BO#NE_71BRL5Pe+xC)TG9v~GphgQ^H+%fz9RKc)MA|{Jpa*s8%n=7i`2j9f4Ceg zeMZ%P34ew4&tkKI=ojv5^vM(BzT@6*2?73pm_G#1B!67{dwu<@R5*|7zl^^ye>ksx zw@CdPhFSmRMe3h_t7=UoLOX)$U&vo!{Uh<;#I=9Xca=MTzNh<^&mVp-BY#}`EB?`? zh}ORdP+|R7`ugd{K~Ke%?rX&$XukhL_z!*li3RB@eSXL@=<(fQZPpJz^7#iP;>%>8 zzYreg58=1^`~~WJVv5hd=cq8H`yaMHg#JDGjea4(|L^*_dzqpidDMSP_zUxg{3Cn{ z>jspVxLTi_TaSLNXBmG|2@3P;=Q92ZpY0tE^3CWb8bF2ZM|{QR-}5V%|97-6`V{8> EKcqISA^-pY literal 0 HcmV?d00001 diff --git a/librapidyenc_linux_arm64.a b/librapidyenc_linux_arm64.a new file mode 100644 index 0000000000000000000000000000000000000000..04059c9ef9bbbcb8437b53f0b9273f21bc5095e0 GIT binary patch literal 96526 zcmeEv4M0@Kwg24R1y(>^l_bW1>>?@>MLwAc?OYKZDRWNDPZ$9r|ZOO8#vX$j7XYq>i+PbQxF6M^- zy0omkxXM*k-yd32(okGo?pj=5*)Odhpt!uDKWs^Pt*g9R11~ENmYoXij~2kvN9wFC zuPJxcmjvP4FF$9kQ(rcKG&vE1b7@U!m8)C_y|=t}X`K$n`4@jZZHjZ*J;inREL*m; zw!W^|Rld}XOnoK_@AD7hp zd8MhJ3a7Md=cE=(Xh75`*MO$XwI$zDl)^!Qw+y91wM|O*Rh$o?)|s!;Ihs(hN>t_Y z8xSB$t%;hct1C}es)s5wJza=fS6*MNBvstwa@Hvoo#iK~E1%@2nG{GvX?~(iK~%D$ z!B3K=C7~2Q!4wTaD8WFHt|9RWwxYDAN<$MMn3@(OA2MX0WNAKmYgerBK?BvI5cnBo zXe_R(rVLiA+S|OU%#8ebFi8ypk@^twSHP4PRxd29Tv)lVK8SrTN-r*4lu;bY)HQtT z)Qna0yiCU2em|3p!8>$`DPuI|PnI#N!RIDiO9LDQns<%rbAtPYPtU_#MhFz}vO=Uy zrDp1^uDPeWnA(@mhSJl03F(tF1JmGkK~z#(69~2|@O$d3s$r$|<+T+h&hoU3IW^0w z)t;grecl{9(GpHt>MYeY&!~0Isi~=%SCx^UpKo83R-Bf)aA|dQm1{AjESgjda9(;o zwfE05hcq>Zxu0Dj($x}|)T3yHORHD9mey32ROc72Tr{Pa3ilaiotam@xVF6fGcRJ= zq!}fyrLL-b%fC`meNXx4Qns`VG8go=we_E4rjsetuU5IrmVPUDWqohk)}p{ZpoDzC1X zQCaR>^7)zllo?BFmfeHW70z8jmN@mZc3e|^ZJV&SAc%ZaIq3%~r8D?gu1GiOwmxE7adyf$*yEVOq#K9Mq1{a zx5dHYy>6{177+g_vi3 z+0jtO9IOZAFPvLbQ+gRznKNq3)vBv0UAeNR6thtpD+QP9QyP!uvx)j;Ce12Su*;08 z=^X5c6w@i1%%wP;DN83t`X*m~kRJ=vw|;1Tn83pfOo!!%XmIp7(!`C^2`pt^rsJiB z2|SuQ=}0j}(-9IbC{T3v<_V1Gz42HJk;q;x0Eb_FE{==D|KT3TCHoS8xY zofu?K%E~ORsS%~tV=Q)Jj9ya)+>*K)&Nmp&71&@XUt8s>tHKISsQl8>vq&U=hMJ1? zt#PGLJzK4zRwFYrG8FsB$jtDqMy|@AH`5?VSd>(Q#D2~EcLK_ob+G?hRxV{0&7zo< zwJz*17QYs6HU0CCXagRvsp{qke7 zS$c(;_qWI<)<`t$;>>PwZ3MqdgGXNIH5%GSgI5KU`@HdN?=hp)e$>d?kJPbdW^pg< zVAEf_6p?#@=)*b9bjHpd0RBnf5X}*`xL(mB*C#E`FUwaqghh$HJcooBKvLWx*2hdzCH_>gi98n!X^C5OSjC za6FRbBBdh)lVvz>gj!j~bC^}3QgtVD7|Bn8;vx=1y(@uWl{#1qVE1#F$V0^r{+Ccq z?3_1Psxhd?V9bbr!GaN-^kJX^-~wq9)pGU69|jyr!$)9@;Bav-VjG7GdlGQU%E;g` zbV1EGLNR7t67VDRAm!f3!ur9fZigEDPg)9^v z34#9uj?eH+j)Ev7$tSMW6!q_KIKGAB3;QJbg5e+G_%S;C_e0>DfJgeYa(q$Vn{gkE zzyEPiijyTau*5PvjBu5wq^nVy8m0QtuJFW7jOVWX@tfw6k#C?Q9w`MpHdC`G7EiAD zEj<$P)2U-%Wrd%K@vu%L6|8?K(~kwRCdoKZxOAx;NR zRKfk=qP>pL*{_4+6MypgA}r$LHJFA3)`ba;U9^AvJV?PIe#jb7A|xc1~^Ais8TttfLr)|o(?YX z?$p5@{9g2Xf{(E8CLMkUzu%&Rzs&FV=-_Ah{p&h-D$1b*9*ca1eZ=bE!VUzTz!&wM zr^6RYx>E-i^;E5ci+XbF;G&*bO5Mtu`jS$_^|fkTDXwbEt4mVq%Ny!hN?A#L2}>!h zt79o}fV8QCrMQ;Xm#1JdGNG=%#JPl}DCp&t#TB(BHRW6jpE$CD1BtF5Vak9}7_T)N z+*=f=|IaPBG~p-vLCTdJ$pL(bJpi|qkC}souOXW!xPByFt;?5_DY)XA;_>(q&rtMY z96`7gNLFn~!wu!1ss-X&skajhiqKdY`%{`s6tU%BU{Zq0&Z-AJEE6U7cs!p0f>gkEEUiSd8;T%!;C4y1V6^9K0 zEEs)+3Cw2ZV&s}dZMSG%^fPNN`M5k*~Fx->Zc z0yDS|0kn~sq(zq^axTzXO0abVA`@*#yOu!pAnHPia|T(SFytw7exeB$NC*Fnxh~1_ zS<3$1USD!g!wPIvqabYf7GxErK-52>`pW<5 zK3wQg6jiUwP(J(uuL?nbtv>utk)ba3*RYxp%7c4sV_|;+9Lk4_F^5+F*W$xlc?MQ4F!^x$iTt&EgZf}jm%;gnNMHU?K0Jy0%`5qE zKKE9iPrx@{>G{BNb91zbEqFVOy)u0a+Zj2G6|Iphb5BY`8jG!Ld36{odJNOa6DE0TUJT1J z{k|frH;iSRF#GRK;kb|X-zU<&#ebhi_c8waGP)n`zxU96tp9!s?kTS^39LDfMRl1B zrl`Y-Y-eXUD_Sp^=AN{&=3*OLeh2Q)8coUPjN$EPu%3T{$y1LSquNPE&xhX0i7P7_ z6uA>w1HsA!jAZAqJOLy5IV@4Ys0c{pqfhbiR&Ok1YL zK5x-eSkm6aqL+->{7_42+nN^BX3vPzC=bEzHnO4vCf58^RYg|B@0VoFGO&zUH?S;^ zi48ewjBGz}$-6rr&n)D##aZ!gw%jy)&b|Aw1WoA}jSSfu;a-@3Ha<&+K4oU?O2o4u z>chvbT+-k%vmvt}&yO#9cfaHH-h}5OOYW9f(HZ2`3EQB23xJ2Uy5(gz=iOTZTUfju zG~iWn(Yxz3@LmGmE5K9q-*9)sLy%>2&gz9T-mdX1Lk z6UHHUb`;hA67tLizb(LDBO8)SC$l|6jBHmb@V`lEs86y}+#mWA@)=@K?Kcn4tX^1I z*5JW2F5+D_nl;QN{B3MVDReU%*99n7_&>b6_u`uJr2Kw`Yl;&=^Gkl6hwB}<7SBtR zF%QN&4#uX4@eajkQZY)i;(w;Ow0HXJX%FZj*Y7`n4NQ~uc}|-%jXEPX?GAlPu={C_ z{?O!G$4^L^n3|S8DPwZxl&q<@eJPvWK3&eqospM6bJpxR1#{=!VV_^PVBw;oFMs8$ zcYf{b-?;0W|5EHIDRq{WS1hiqx_e1=jce($<+XM7_pJCIU|+VXEd`u8U`wYF`3a?92q|76?t_NRXO^v{02 zW9Kh^xoh{Ye!XY!Z+`pCv%lNdvH!W}UwHArO9y}7+4YA%{^`#zzjCPi)&F?yFR%aS zU;p2K{r4M(dyX7E)_eTK$y0BhKJ(Vwe|zWbxp&XM_x|5MxX}0EKQ4ZB$;%{z(G(UQ zVIIJuqS>IqmLWrj#oTb?O~Y>t|8pkw@qB;yP(@Y^ zoR1k21YFF$cvdPOJ}FhR^ZgArH&OBRs~akbksib>==qtj4L=w48Er6lBaqER#B2_y_1S=qkJpFbOsQUqmzF-qMMW#hf}#i15pJ1Ge-P$zy{UKo zZ9ddo^kv>uJ2)ZvAIeAAx3HTr9InfTZO{WaAL5}6q2{83UbHhoqrVq(QEpBGjWvn< zMa)z5ZToh>NhTc{YvM*P$}1S_@24VxWRe8}kDF-XhamJs`XiQtUcl{uQU1a%ajAqK z^8B@JfD(lMMEq3^Od;fdp5pXc7+niG0k7sTz5aG^`a*%oVy9-h4}inA6hoDz$HsztA6zeo4wW^(@B8klZ#=tw0ETj*Lskjt#v2lg>ZfU*rGINJDVQg0f_G)1- zUDs&DZ!$yU42kW+p1QqQ{ybGLwG?`;`J8I!aUv!o%9%3krz7Ik8(W}&33j9WJO~zH$B`dEq9O}k*CLk zJ$^TBYB>OTJj~GbW9WkBeZ-Ua=YlVlZ6>9gJS~*I4e9H^&kBBHt17d+NbfPJdD*~s zjfL%$BPp*^C9hUoD|yBA&+Gf(bvOp~2l}JNu=c~l+49w>?=h%5l5_vxy+vyvM;UZj z1|1|whDNm0J}cKL)dTWt7I4xt;Zi!`UjToUai{ydSFs1Gd+8l-(Q}YROCtlHbWHqu zjpp{Hqyw@o;IGE~oMfPMK|}bI*L%PtJ9+9YZ&5AL94XEseYv3rvbPSDi+DJ&_CWSV zww7=XIDq91zvEsRbwqenA0oYiWJbN)V8hQLAF@Sl-J_0gS_Ao?M@%}~jd;`_vnZgnj|wBtzadX#wGHflR%_HvM%b;JF+>AJ=)fCO)|wAA5~BO>O9_aQ3l!Mw6;S>O; zx3WBI6XbM2&P|YIHQE8C6Wqh31@A%*YVV{sDkss&pc@5QIvDG6FxF^A9W$g$kjn;{ zFuv#>P)^Mr- z--@>9L48mes<yZ%71{FyDC|DV-m`bebapj z$b;G)>4M7t6D~W+P5jB&ty6J%& z`=Glv)Xix4oUQOT`vCt5bV2^+B@NvY^3NZ7cN32`=q3?ujq2BGR`qXrqntZE?|F-;p2(LH|EhTcySSlNpi}IoJYwKYyWM~1+vGd+t1gG*j(6>=N z5nL$`U=M-^wOP{Zqa03sCY99+e4=aRcG-shgKUWG?mNIG9hd?0x62n=oM$64e~Ywy zZWF6C*QP%2uc<5#0QUSx-rWs|6S<9$Ph1B(SpgUe6+Y&g$a{Q1L}O(dpHmR~IS3{b z0O5x)ObUtk6)9JI-uS7motQZ1z$n<-17U>Tiz`Nsem*oN^^Yk1aJmY%R-xjfM+g#v ztyQS_+7X#bsKWb~OC9F%fv+wPW5APBHBrUaJZRC-7?s@v8UQX;{J}xz&qkgaNI`nw z8yj>9q4#)mqv(gH-I3tFNP^-FNA6BhPI zYfNDp=BfOe;C?G(EKdXXTRmfUYTyB_>YDes(WbYQPw#*oWfL9veOe}WS$_Pml505t zNZq>kG1uf$7S}GlXPKtjKK7;J2ZB(`9473F)*#5kE{smajQ(vI3JAl|(J`|~<3^YM^tE`s>qtC)71`6dm0^Kdcn;pJ& zO)J8c_6cRAW@Hhb)q3@NJuT{BSdmBOe`%l4*thO~^r$f*If6wr#1S&DA1YAD}R(NFm|ykG-(H-h(rJntpQoA{C}1;CkubdsY0 za*zz=$S)T%ltBhHznJDq@Ff{M$jdB+72J*dJIR4942+zAiva5vL^xZud=dp1F%kV#2O?mk)5+*o#9AgkhCOrPR4xy zNK%lr@$8%f^Y$a-gQTTGeylAWNez;g0r@e{Kavq7Et{R&k16qyEPon984M^x4{Vy& zambeGJ@y_WOFm+hl6wZTzFsG5cA(C}uwHQt`>hO!!)d;sO6y{Hhx`_KuXr~bKFvpO z1Rl}LzcCaU)&xzDYm@vddBu(GB@qp;QxfnPZ2C@-&(^_dVGOzuMxV)pB2-#vey zv>kS|5k9(!-m5P$;Jte0Ih0_cwJ5fcDZod6%r%=H-pNtlLE|0@GW}8BX}?3+z(Q%W35JkCe>CsHC*rn@ zU@E(s zawAjf9k3-FPMsVie-}b|SpvE1PjU3WK}$;-{7!dPYYg;2f%ee|ozcA*D~{B`Y0Zj) z9$&nFr^gp|Bk+ZuFL1vi@C97hlO8{V+mjyO!{M|RBg#VZt>bXPhhP-M{tf+mpg+>^ zxj{u${A z@|OVTs#QQ@E-m&Y3A|K(A>g8(=IP*~J}Px^(Y`f#j=l5 zy%^5tqC!ukKl0*&Ucie0qx^+E<5CG(JpU`L1<-y?qSuGHSE>J65E8w%%@MCcPQQ+B zG(pHOKsC4sR{m68eg{XS^3xLvK`+W5EN>!tUF6RNqooB}PtencS_^P+#IwXq6ZEI9eoakJKT$jR#|&S4t6r;r#hGGhI9QEV0G6@H3i-I<3OdaeU zPtnFh?d{gN1cjsa50&xS$E+G5HOzvYZA3Z`ZneS3rm7DJ59l977Qx1vlosslBgTfjvtqPJZOoh-baf3ujC`uxe6HQ!dG?o;#Mzz0TKwN zx9~eTL6RWkvG7p>oR15#HCX_7yoYBbd?C>V>m!2YO)T8>2f@twkS`48V?+5!amGoC z!o~lGv-1H$`N)?!BK2X~pr@ahHdXO>;?I?RWM+nnd-dz3shY}>6=AOXNKq%BijU+Q zdX=wU1U#q@`ArNZ`|G|^tp(w$M4gCO-&U^s%HY1TgDdKv?JFmVisP}q{zCc6*E!-9 zedQ$nsE9kuEBeZeRE~1>zOo^RucR|-5n{eL<4OH!K~;FLah*E%)7ON+*N=J_w#ZZX zXFg@fu`>pJxUTEY0|owc4+DwfGW(ecLzo0(-Z~r zaeVFngtMP$?4%8P-G*AjIK@pu_zW5c5lD>B1bhsK>n-kTeKw6xiRZ_qPviN2-0^Lw zv!AYZd^?}#uW<{2`1e^QDR=%qLVp56<728LSpY=L845j<{%H+T&+MKKs)<`w0?V_t^m})&9x~ z_MD4sE||}zIilXLD4hJ`UY*ZQ5BPqX>bEtYtn$(6z7*#dPr178!&QZZ^5GYFV-vpO zx(`?NrT0Ofl@GUkL45cEUAxhjKa>v_Yo}NA;Zv^FhhHCetNv176KX{TTXRs?=|1_m zo1P~I^WiiO3+ltgAX@zhi+|MZ`*H>%wfL{~E#*;t?wX^e$IY<&E%6k6AZOxV*Y}R zx9R*HZMa(BZN1txle9Gcj8Apm*VnnV+77;df%;3;>H=};F4%Zm-8b>c$J?RyP0(}W zpuSr(-lp#v`Ul*;@L!kI=|0;K=!MMK-UiaOqE<*Y4 z>*H;|t!S@4FW>!tXS_W*{ThAu^?fCZLFz40wbEeY?SJ~d67gYh|A5=KmaQcDN8G^% z;=bm0-Dj(+2<5Y{kGFlM5^$k!DgWEY+n@QqlC&w;=(DfyEBT`B1-#5iXO6b{FTm|*&oA(Zer}*>XD_co`_8?qoO4{Hm5d! zv%%oXWvtOXvdjg0ek!-Yu`Q=GR{+92Yx5; z4YNex=M7^S;kl;dS+I+ld2A2mM|qfW4zEl1eX}FV@wbo*XT-EY-W)Cu)w|3FcAdX( z|2E|vR(h6>^L6OD181kx_srPEyvI7CSy#M)o#{zsMJHcYH>j19eXI zO*T_%%C=|07HlJT+HXcI>Ry^;Q0m^cVU&yNJpsSlW82jFN8Mi4pD-k zB-2^ghk@(^*SA9s!?#&;YkZjtXHzw{#XDgqgB#-jH^Wxnn3CYKnVD-dVmx4ly*OQl z*fQ5&VK3i^-E41ZW%KA9<6e9Z@F?2s;sGqhiX5%6#><&}Q-5LQ+J3Ti9au z+XC4E*#n&;J_@!*a*jrw+TaJaFq~bk*qfulox~D&ZzulwgG3akxk~@y)(NE*A|26sgBC6*-l)C-EUcBLOkXo*vGporAK|-d}GAu7{0|+8WD-dSDBWq8;O)t#f(!K1SL|oMi^R zkgXg*IjjxK8*B{~4b1u?j;EE9S>mA)EZQ{a6!d@|?oq5@H^T3Bk;^mAVLw5UJgjH1cy=&(6M(O>NbN`vvHC z%HXGSOg6jdiwe=tgdr_2U9u1Lq$%y1!Iu~T^K3|PwOWSS!_Y6Kj_k#`T&;FD&gRPX zq@u4GWgi_o+MWYn!qB%o8T$)6d&Ua-iYIz*Vp9z8El&=__blLBaE^a6j{bS24fa^f z*y$X!yH@xV3H~J>{soWJn{4neB1tRiI+?j2IE%vNt zuN?GrEkE|gN6LBeumkx}9{M2i(XHsST65ZJjaFlFZtOI>8~HH!A7u;^$(pT|4#f}s z8MJT2%a7v7il(iwVLF>HA_scFd2S~$zH&0l+${~?ZfI*LX>eORxJ^Glg1&|61V0%v zoy0iJR_ScWfnRgL4h)o+Rc?pP45Ipny*EdoUZ@?LP%n)8462Wu%9GjTJMy9LFw{v7 z>Vx4v7rx>M>M0LCL_$5us0SMhi&}>Iq57hFp!&(TgxRTn$}A>(CF*A^>W2IiEY2Q= z_EbXkfqJ1jDMx*5MwvvNn5urFsgo_)ccV z!kVdVQJcuIqMf6?pnV^jb{V~x4tjt61L%g%%v1Fu_nlw#>~^IsLr~OW{Jd_-!(=i(;0t1fh-GvLo$C4zn|gvNBrKxZ{#5NP!sx> z8TigbJ;o^K;Zrt(rUJh?_`Pq8Nj_x^Pwp`eNd6V@N8z{lMwYe8z^0lcb0*FQOg?73 z5$R^&-H<%Ca$MG%_y+C?V@!KGuJa^I<^VI!&$AsI;e-z$d}lTrs^Sj-{+O&W7+^nX@t`wU*$`8Yv>@?dLRNe%zAJ&V zBn^px{TGMfyE-z#k^p<1!(!O-R`fN7@qoR`Vf4Kvf~5lXGKWRu8x$C`8ZrR;1BaRM zy%UT#4OxI4RADk>lW)&vQ=KbEH*Ce2(kA0uEX<^wYuSrFe>1j`m!FH6G81)KkKZ^= zo&Fv5O7%^3Yec;^!X|T3rm-m7OxuAG70~_jsH^c*J}bWEf^!e``5eLbm)`$I&h|^g z$VQ(RV_mp$HN56_zjnCi82;nJ?RF=p<_3hn_Z<5>zz8P%hPlz7@qjD*mkcJe#VWf+ ze90Rarr$m-ds2E@>co@@<3}aftnnk`VuuYKVi`PWV6;*5efwz`MT8F5zmXeSH=ke1 zG+Ce5^>5^U;&0?$!oLp z()E1xT7etpsyE9iVS#!*hpuO;*I%aVL}aspSNkq>c5KF=)W^a%hRyuG=?BIq42CCP z?SA7wUK_+Q`bX#!(FL6IyA2IW=J|LCvwU7UYzc>#<2j9lwgirh4Q~CW49@E*FTKFyPH?{tF3RQRaKT^T|4;`P_^)dCXzk>64%h49 ze(oaldh>9&9$$R)MvpJfy42%ut_dt`|vs3LNA3O z5I)(J2(cV4^d{g|9lnJpU~Nzdf{&HM$LshA{A?Xu;LAEbshrPj9ln4U>F{Naf0qtl zzz^#19UT8<9ef#wzpCRS@ZZwG1%4`5qEG+4zA`x6r+*F?-@71ut-Zac!Pml5>E(9> zq31&NmSuCes4oFurorbXsZi8&xKAF==b#>+%lQXAK8J7Q!s+GN!r^*(L_akJ<)wFc zMd;=Dv}mk_zop@?wezzaPJHOy1QFiT@X^9A>iKX2Ge10sDS>h&3`vhjUWo!`b9&Tot^3X*=3icSF0H94ch$2L*V6j( zlv(yKPpGReaW3J96_-~QSJal&(4goO1}o^HBJIdZ;V3JA@i7GcJs5u5Z%h*E+5GGq zREh@fw@B@jkML1q96pA_xqDWL@b^_+z9NBmXhW!TdwK*tt+;A}MRQG;Y4{OO9U2q# zZlr00pcnCUBoe*W#t7dbhV;6E2|B`|FbhATPo*18aHs=wd5_Ex!bb^kJ}!d! z$UQtG;R}h5x=TSX;KA}H-fsGXfM=#kAYVws5Iub;AGwaxk$$y-gMFY-KJsObSmeW0 zFZA>i)6=J3laHL7mYSguT>W{N>PLUn{ADGQ(N1qs>P?J2s1KoG0m(<*GsT1Pv+!#w zC&8%e5jJ&x(eJjS?wUM!<^+u&8jI%$m!eo7z5$hNz_o-QJ;yp>-9Mr{Y5$0=!&oNv zgW!8g;mOnS%mn)#iVT&BSulWR>`6M8gMB;bnwmD^n^VI;TWVlKkKkF-Bnz7&gFihR zDg&IJLrurNhHhq_g8fU$*oU$3sL_O&O) zA(R=04t)?h85UMKrIn{AhO-RP#}Q*#JK-Dyd=&I#G`JVKfs5ztb0rJyi(?z>@Jw`L zE^A(uATM8yy$)fhAADQAF9OdSF*d+n54jKRvI(|9&zA8Wd$I-fIVIV~r6_!p3^4cy z^%*aoHzweDV;O!w#_#;1=alD0z5nn|E`@D$a2byvCfV8`*I)5$%8L5WgI?)b)f(J8 zfV&?!rFbq#IE25t)0#yztMDy$8Q<}u@5S3tPUX8@X98{Q$Z2m;Inp7kFPy-)%NX)W z2xGfm#9o75qoHe5INRmGzQTADJ>!$wB|Miqp!v4^V#tHNX$!((kBd<@d_!#KT;QOM z#1!M(@NPV_%Z+8ja<{Ps2H;@rq?vFEfU~)>EUOr@I?xV^-@`LD*gvHc9QyzkybJl9 zSoSPM9;KYOL)~-MMSF8LpDoU!Jo1L$ajygXh(`xvkr!HQ*WbZ|RSaF`4WD`M ze(c#aD0bssNc%npuz`)pwuxXnus{j~&@xQfSvM{?B`wK2&&&Ed-UaueJQ@y zH5$J$_}vy?=ep1HC0D*@v}=rKjBAAFHrLv$`@22Z!?LcSu-m=I(f#m+W!}DzXuh{qCvF0^Ciaio?<*DqKSz;r7%W^&TUql(A z8$H;Af%a@)E=4xh#Y*-AmA}XyV=y(+K8vk@hu!_tY#Dnq?gP*Js@k)Uyyq=)8|1#} zOn&#&*cmn0SF!N)DET9sY14@P2C}t*jdg~BUGOn|KgRDd?7t*@>>+4xLwwXY40gsg z(zOlyPTPP(>F9A9iH4pvqkS&CbQ3*q?o|AS=^|@BIzwvq^xpgE2iOk*U(-eWimYtj zo@4(%*gn|}+0k9NCRv0XQ#$R@J885fV;s?h zZy{r*+u(=M@W(O11!?M z6~Ft8k@l~DFWX)Kc@J#Jwr|DnKHT3HceiT{exvdG5`Lc8N;}yK*$mkZ*-RbEu?BuF zg2{d4=SCxb6FzSS>ZAhx?lA33z;`uI4Pr%y(Oxn5DmrY24@8W;vy&6B2eNn%_A_9Q z1Nj{p``k8}+0IV*A4;SB*W}v>UrEQeLdjQIaZmn9@pJHpMAHa81o%0?+(W<*KFb0B zf-zbj`gYiXdm;Hf(C)+`*GfK=hXlVyupG#`891fzYeYl%M0Xx#JprG66#d;x+oT0p zSC~rsB}j(DpwB^HVTxh9lrlla{rK)9@uP1&!nXzR>H#mJdl7MhWNMs+Z>oAhQvx48 z3*S)nLMJ`g&*6nmsD2-cW;savk_t!EY?~EqjsPgYTPCUN-PuV_`dGe6Mv(RjHC!E3TEi zV*2OxeegOQgZcwK>E$r?tE@(S(>`jFbN}DHMQb2O8FYxgdT)YcXoO$xvvQqMJs`hk z0Vh2ZE~OLx1@K21ce*iV5PFr~@fJM?S+q1V@JYwSkM_eZB^{7$L8q(VR%M`cK|}bI z*L%Pt+j;6OZ&5AL96?_QeYm;3b)a0t!$JKo>}@mjmvGMCU&^Q>vP-HDkzPSEquy<> z;pdQ#-i}bmH?4vE&m$(C?M6K6jCU86oAM@}RQ4jm3qrpDaFQVpbxQbKAXD$KO@EyU zcrM4!$8{dAiBB%aUyC+LWogA2hUgd{FOc0PaJnhLebVdQJsB~Ts}eSBbbB5>=KaUJ z&ZOsBjvbH4Jb*O#8edTQ?-8u%{G#^l)Q7)^em)F-;AmY~Gwm^3gueWJ^#9b?{~dj| zw^-V{4}E(Z+8+7D8iTyI(;)Zl8v@yGWkqv_v!Z=BK)wlxZ^pM$!MhdhE}rauGWl25 zcY30!=B+7VHGi8PUIRgzJVV(l15pO@-${tg_?~PV_TR<8*S(3o&V++GO4Ck^2bRFs z(mqgvy@v5X3X?ET=$JMS%o{{EE~{x!Z@J42jhY#`&Zi?GtFNpgoeu(F^rN~mn6EemI8{#HmyfDfBG~y-royPUc z`27sOi3EFgJjMVM>;=!J+W+wEL|osBYl0=)AH=wz3H^1vA?%EVF%<2UGnm5Ki<#wg z$y8Rfy*4)NR*vIUs z>d9UpO=&#)FNNJlpKx?fG*}uxctag0ybB*Ov+8%*1%{g%9aSfkF~aD(cVs)Nj%SaP znElPF&DkXwBbpt!8=6^Df^kJF#s&tuwQucM#D{An+8eePTkBfeRnV1h_Nx@9>rYr2*wsY z#v7FJ1m!__spAnfAJ}G0%@CHcZvcEUd{7&cSIYYzTsR${UbUiqpgru(X?u8~ZE$NX zyNeCQyB142P;PBIAX!B_(6$${W3u1d(2n(OiRh^fQQIL}YE$GR^?3Wj;HP@MMU?gj z@Ot_OXF^{7b1Qt?rD1g;D3vEPctJv zyD#kN8q3WXL*ME$$EDck#--ZZ0l#6m*IcoJa^#tl)s%H^X~G%etX~a?oPscL00J70b3*+)!HZ7mSc3aFq!EnLj~w+Dbzq!AX{&La19}@X&y4}jRzSA}F()|*{Fel8y;EW zI*m3t75oNUCD&=>^WC_oU44t()80bbJJT^=Mt)PR2G{$GI;Kqp-I=&#`#WFinD(#0 zn|F)Z^*7{Cc0YK;Ew0FsJM5O5Ew1;G_V)CdPrnbDR2e7R-=E(xZ8u~hS))h3?K+G6 zQ{#8K_QE&Z5qBTt=$JM;ey^(!ylw+dVgB%?LP!8C!s!%8k3S~U1~Ektfw`pT+G8Fa6b>@gTFzKbUg<) zaj>#9>uua0jjv43#e6Z>n!lcKUjqDS{Nm)3u?^ktFuZd}X>wrNsn})R3fvJ0KN)Mr zyE+jHjaeGpp6gcA{|ufd8QLudE9Q=ipO`r$(v@$E zbj?kRbj^|mVa_-RbH+jTCAdDhVS@btep~Td^2h}H!*Nxvhwyt4zwf}#Jh6BC=8H6r zpz*_5yxU+fv0dSOT?}iyyULlocOR{bVJx-JjD8#KZ|eXy`8fLb4y<93U+~0uT4)aBJiY~fiq?@C#tejqIqOs_uJP2N zi)d)hWnyYRx55veLR;03{lTZ1@;yysd%o^Q^IIA#w;`_%_!&mN4C4>OHg(QLa|S_2 z@01!p^zJ4eH16)exP@@Br>7_$JYNa=P=)B1uczh2xg-7Ih0?)gP#}aRPN^> zE0uxxMt*?#5tl9JAF50Sng@c1gggzHU%KJfi+OpwI4y%d5}^;8N0RI$w;~(Lr`7pf z%x5=&X6<{l_UB$mI31Y3(>#Y{p)^Hit^=Bz5U(F_xD)dbDvuj9L`!`Rm1#BR8mNzj zD7(Vv5#SU2Ao4R{4ssN8kldt>mfQ;wQz)MYkuK&tM{zB4+QhToUo(`0`nI)r-;zrJvO(e5j+PhyRc<~B!viS@~!Yt|?E9ESRq=L}e%jAXCS`sCBc?|pPI z)~QZnow78ZWfYrHKCEpR&^PB`99N9~)n;Qw68y()@Ea0jr}ZMtjp@_n%DQDC<|8wZ zM(dQcZb@m&ai0r6B-Sl4r-|8!bxVdh%nc8Z#&}!WdE>)ll=aJ-9vE7mYC0mh!W z-5!Sdi3xl*v9L4yvED*!pp}@P#7m~rZ5ZEXH^eIQlbi<0)rR>=>3s&*H}1W~l^tK} zvf-D2-N@Aw_8VXnm*=7U(nB;Dj=U2+`OFey(b z<|*_(n0bVh>|tSw&W6QFSkE+dy%4aL`DXSjnzhVZ`C2CQZ?D2y=5w@;iM1PAAH_PO z-KMN#9?!-%=I!@m9dkA2C`YlL&_nA5uyYyZBztegdzQ4eNo^+=^Ab14IJBOr&PxU= z^OEN~Mz4rA@4`g#QzP=fbxeV5VXe>tSo5g(n{Rm)Wn>LhZ1o~E5 zgSR97udwIs7`rrL?2W?=T>ou}aIM=g&hyYj z*FJMN=qI?$7^iy?Z$NB@t!{`L2R>=`OZb^FHg8!z!Zj{#8sJ!Ok59Ib!#KbUJEb|H z8SCukXOrw^j0q?&7P}4W?8f%5pj_m;_MgL;R-&~)Ng30w#{GAZCc%eJhhNP>A12!N z0Kf^C=yJ}2E=-);R^G2J;A_QZiPnmf)>on4$oCkq zRxDwyI2LQgO8~n?8eq?51MD{ZeqX!c{1@z7jf79n zq$y~>|DA|?u_xdK;+r$b`j}~>F(2=dY>32*B!gHEe6B((Bc>3Iil%_IHgw8r=z$_Z z+bTU4vvFA7_lN#qNZu2JXNS!h7&d!QPFO*-;oISJ!e{4(7kq`7(%UIHQF)JB3jdM= z1VSr_p7FRPd^WHOa{XjEe*s&P`x!MGhUaQZKp)Cp=CD*OT;Uy<9>ZhfH{Lq)`^i5T zm;c0AFc}>-d(0h=C*_3A9TirPm=kt~Eo@#wPT2hTFuT?8h$_$A@PeH1JHqGXgwGGR z&-}{_zjQf|Cw=8Fxm?&>&YhS)F&2EmeZk{NGlH?6@t5#|@VVjhz}6lZtET%yHV8#`E_*|ZgK|g zkRH2n;|+P=kNLr{yeEdh<_3q&wjfj3)F4B%ubLP6t2XcPp^D*I2)AGmP@Q3SM1{?Z zE+SL-=Ho+!GN1$vGsPH@WvD>SBJ_Jzm^lRrq2Ko*qYeysHjdDr5A8vdXqOIem;T*4 zv_FbkZ(vaQ^oJ9ck*gUbJ=fOljBhB090 zx_}a&8~it#tI+T3@kXM-rAs6J1q;F6<5Tgek@XM3-m}XD+E9bvK9&9iu!Z=EzDLXH zYa!s&uLQ%d13batx5FM@l($0QqiYSyCkpkGVDQ_6;zxk%QQ&?%RQeUbKT_cS_9FVf zET}la;J0(pH?2|Ne)|-@i;1(e>D{!`yu??zus%OOE#qr=?Juv~xwNdjHs4iNh*$aw7iARJ6sBcrQWn*h z)R$}JD0h{qvhYl*>Pmvkm3Ae$RD@3^S1^STU;IKVL`C?78KW9SH5I#-l2+=RCDkRh zio)}2pzOlR;JW;Jda9o^fG{mcwGgAwoI(o$UY8L9?ZAW|1$r+=T+_YS8>)ZLMT`x7 z_@JY3ry42m(I|X4oW^t%Zb6)&UMhdz}I6O-S$9kF)2tQGSZ{l#m*TThF z-y|oVk^4flhL7sA)g-KyDuMWD;V~RedeHJ2uHiG*#~pjVHGH&iD~FRjNgDhl4Sy|s zyau28A`wzG_*!@wM}*Jej)G$e2WfW=xv(@u9S=Gc?XC4%EjTkI9%`- zeD-MgPG_`hef#VDQsT}{JhQC(N zW-N425cpQ*ji0fFtvr!@AuQkg!FL8z}xA9oaZHHEnB)PhR+38xk*fp0foPMli8vH}5vSAU zPyMZ+6LBc|yEwfze;PLmIuVDWuMCnu`FBAl;!yOWcC`5?1);YBMqLc=_Hm^KH;=`- z1Hbi+GsS(V_sg^RAo(CMM4mI$N-(SM115p#CpmXS_~r=DK) zb4(IBsw5U4rFxNuNGU(}<04tmi}eWs*VBJ11igb-P>)V9J$)_c302nRF9vsRjsB=E zMQMco{vINKnInpI6rd?VpZ{F&4_1De8;)4F2}M5>#KGiuQ^g>Nc^1__^^bb_u~$E6 z{>%7MpKrZ~m?7xpr}<+r`4xi$zF3b^h=b5afj*f0+fr1D9%7C_^3y7pUjFa#{QWCn zgw5Epkvvu--(MlWi2t3_r}{9|iylr-VU)CGr;=kp5c6R5uD|{!rcX{)UWMgCs=@A8luLj)tZ}jpPAc{^g$c)tg^WU_MQM9kG3AIXsJujrbl}6Yqor)*^ z20QPP(t`O~>Su$U!9vd?#h8U?eL>?pi_y1f{bzc*&wnCC>q4;*5yH0$zSqBxsA?kA z{OT+>4dElGjT1=U_CwCEgraq^-r~sj63>t8yT$zK<4D;v{%0(M&7>IEcaE$$bGr<4z%RX zd$FZrW6t*6c`vmr9xnGevA37t74G5-lorN}UAglP0`DdK4z%DL`=^d$-@oVL%x-)? zbB>p**>Hbj{#yvCmWP7|{bA(4KQ6@N5>8b@uZk=p9-r zAL^>f#=sx%{X=a{py@-kDrcM|FxH1QtL|67hBLUy#|xW|<;FmL3ZKpgJVOOIXdtrC zd$;r!rn0z}y)@AZNq;`HzMwr5TgXr7!TQW}h0iDNGiiKDeWqy3NeW-a<{>71>O(02 z!Qjl_*y!uRINUj%&W8qcQaX+|bJmvAk&`9W)F2DzuBf?NJ@Ywj3a1O`VS^De!3(=a z5QMz+Y+u!>Drq8DEj^Rh!pCzs;cK6Pq-yZB@C**8XE@y5C>ao63iOOf3y)R7{m$}C z)xm|_*ZwT!D%ZDpexEe5f&mqk#`M}C^PAaR4@V}wvZK=He@gjOa(3d|{-|yh^U#ah(m_Ak8-CwcqpPDYvufFfU zQzNvjlt#w$SZE?dpA}GG%?+`+_C81Jef-Qq_eK2SvmaHt3cBA@@!HPfy1H^@bK>QE z6+IkfDP<+~B`l@1u8yUIa#Ol|Kh7=r6w*~FpQPuhsU9WC#_9F*0a;JT@AdSd)`G5< zemwXRPto>xHcZf8$)|)`3tA)?>0-V7bRW!W?&Ngacuf8vm`@1RrjottkmuZBSmGb;A9jjNl>T9c9bydzUM}t*56=;SbfXeqH_bnLhFP+NjK?vMgnc zLVIy(eliL_DjV(jp;*`@J%^+FI3CkhV7iazv6aU*9+SRC^XoA@CVxpZNjx6M<76Jw zyA+fz)^jKDd)jtAkzdmqZyJ7q!L6LXPS3yT$IoMn5%=KP1i6|K_fgvUMZd-QY=-t+ z>>K#NY1m$Tk)5;tQ2y}JEvygUcRBRIt#V(n zItORFzxzP9X@K1K(14u2$;{k0#boZwG@JYWjB>OYOr4`Auw9YHrbEjNk&S<3gFDX{ zE$uy!`?N8P@`@?=D5ragjWu`Tj6@@7ux`KiQRKgkiDCaD|PBKHMml@CCdnS*{ zZg)$<>NPF$B`?n7cORO=9_aoPXucy2Xgti==^mU9JnDv=KBDR9Th(1^%I)mAjQ`K3FJXq@L7F+7WEgd+&Zve_id5l6HoycR9WK{C# zz**g;F_eeYDI<>@Q;w2{2fQO9nhu>o`Kk>Ojgg=~iSiDR+=t%0nN2wf-H$_gy)637 zYMe{n#iBY_BY#I2d&R~~$Reuqs4=R2I%8*+q07glwfOnIGWP}VcBqv&xteG#B*W6@7_SXt3=$nhrRBAweV=5%j` zEN#&F1myj+G@!EtI&YgsHO==JM$gc|5E=E~MzI{^InR}3COLMpEHW)iu>UUb) z!`QAEjJ!p4ymc0S= zFEV4N0q2DuX2#R4IA1SEu|3xLBLpYQ|B@V!F@wi^C`H98E&M)Eau)^`t`7w*j6e(4ta{>Y2U`Fzwa z8F*5gl#d3s$y=~jh}smTlTIAH-rdxm#sl61yDWvB=EC-mp?!$*;C#t5QK^kBj#PKc zt;fBvgQ)f>UZy{x{$?m;GMsi`UseftFOgX1b7-%mv&}e@n#!)lLpc?^T&~yWaIWSs z5Vn?xYqBXDaLJxZ-tya1HPWa) zojlLAXT7`WOi;2Bp^Kiky}PycF%D9;$u>H=K%O<^2^H%0~+53@894&K5c)-`!65J zXh7XF)VC?8&x$?_{$f`#@5=~DQqrYHX3gnSXsvQH!ydO_xP-6R67gfRgW^bJYw&x_ zJX_%xhI{zTX7qK9<*<+NhLQ_yIR6*>-#TBk=65pOBdyZ`nergpCXH;Ap9%7~6QN`B zuh2(A<)|zt_RZy9oZU_ERq(-1_-IjXUwJLT%L|r)Tbi-QPDZ{2Cmxhvn9Q2PA@@(_ zHXVv);f>hu+_)Y(*@u4T4En=2jnSyn$o4hZ8(4<36qgKP&6TedXW>l7g2gyv(}VI= z;G9hl%WmG%Ptd#^W64 z#)0T3QCEMG2Eva-mR#c{FUVwwp#6-W+|+I z&b2g4+0C<8R$d6R<#(QBVFh%)^-18(TDkbbaoFzxzzzU*05JQ?iVJ@N-YRLpnS-dW zT-smx-nSn`+zfkf+$2e7svr-Yjm#j=X4GLWWT5k~b3t!tD7|1u-kZ=@Q5`1WoaE6s z8#xpX)nNk8Pacgk-9zC}9onMhsW#+eLq0a-Lv3yl@*y3$bnVUryEma;8T7dxZTLH; z*?s2JIeqQWvDHR8j#P9!5PIJxnH3$AP15%-NWbUqC=#BaGHU@g@u*#k2w+pB)s2)b69$EoI|K_vtq?KhC%r+VNjZpN9 zKKc~~{bG{?^ve|e&Vekwki`w%QvQ^;$k&W~soy2tqHmd+2;GLlA>CS`+eGL#6b|Xu z3f%@vC*913ZfC%^z(=XR6;37sdOZXCJO=;oK|lE*`h4u4!kC4|mB!8!@ag}*y{iw7 zqdM}lGi&YIvV|~EHi<#2m4hXd<0PZ_BgK_vtpu_hjIbR-2v@Sak}XAFjx2Lh2xVaj zFo@Jym=Ci@5dkKLm5}06_FYn!Is|rbScebWgg8K8gB@eEhrwL9AfV9wesA8}ecGMD zN(yb|kDDrKW_!9{_v_c)uV3%%yeApcikVm58aalzcM`EI276)tbt~q=Cc_`iLR{Kn z_(#@4&R?RfYL7QsfVpoMa!ckJj88GdN6y6|w9EL(ys)4#xESL#5Jvpu{aP)4vK`KE zY-1ntChn`319|h`yXi<3#`{_1g3nofLDhitT;Ec7pq`xkk#`niYP zAjXq&-HDu_9{n4{e0d!G>qDLqLma-vm@e{G^1K-`djLOlv{4hp?hB0px%ePzlxQl38GP?kQtKV6sQ z_s|*JVNSp{hNT{v(=wm^8SWp49;mNdkT<=A&oGO`L4Ajz?>nTv3!&?M(6b3WXCsDS z?;N4$F!apz*gojlM0=N`FJZYJ<67*$kz?EIv8;Hx(DR*|gi``LvE_K`=!7CPpE&HM0i*c{Yu( z{q{)xZWS_4AF6;IC-_0XL9}n{m+M`xT<<3J3tJNU<(e0DBB5W*lL`G|uf%8%bhk{d zc^~&oi9QFtUIG2W-i3ZC5A{n~s9#+cUB7HY*Duy1cX8j7ppbnkWS^SQZyd4L10Da* z>Ywi}4EP8B2s=Cte=PGFS7Xo6fq9U7Jz@dtBX0$UBdnp6eG)n{+HCFqc$0VcrxzQe zSr-_)jq|MC;d70JG3WtNbI6agAN>qKcFVK<@X~WVqhH24#=C6$Vdi7lP^$kJ#tZq_ z=-k?H0%Zha!h*s~4&3t_zdm}i)W z<$0`lNYp;=h&%#4Gp^i@zU6?n2z!v_VGTTteIyFex5q)V4?Mh6?768KdB-s;3wp^O zIssYZn5*8zIE_G8e)Qo5*cQj;aXBu3_x{-42w?j#w$+fM8h%B6b8M-%TI5?{)L_bs zzaQx}ZCR=z%e#9kRA01$W9xng6t@QBy9w;TWGTgv}+-|s+GNoecoef1c}t6>vf^zBOTm12G`N4xL9E>^+T68sPs z|M$G?gR_jY55Am*xg4?CkKZZ<(QY2%0_U5>SjT>Vew_n&%-ZnaN2rbF+?1Mis&%Yy%y2!`qH}j}ZtP4fI>GL-hz~|BL_2?)4 zq#6CLx5`Fu!{<-Y@3+wJG0ca3J-v}|&x4WU^Nl^Mmuy{PuV(j+8ewf!hpAM2>V}G{F!3t~ImbuR`8`#T@&oHD~BXD<>L;y@v{oJ#*0h z3DLe4<^FTE9{3(=(EHK;1C}ppq5a#uR|tFWMf+i6PIQa$#rR{uIfk}=0{>kPdhDqq z>2pQ`1vY&%{Oc6x86QtUzB;_0-pA8ecYXHA3SM1CthKFQBFvNmY*+l1QO0qOT|k)LVvGrEGGT#k7g z`#hXpj$AE--|211HBLVZyWVe_{e8ICYs@;hrOY_}P(EXr(Qm;%eAr{79`iM8RP|lK z4>_N2!T2mgT;tq__r-mu?$j7+@__;5Q-|h3#ysQP__C8@YknRx20j|Q?XEwJ?{E8SWA?FPE1@rm^!Y`%{tum(yzYzRjA6>J~KX|=0 zrGK_BF!1l_<9V?EV(9uw=@)+wzc_`uEcf)K?f=O-YyKGOw~ujeRrH7V2*>@y*qG$d zCNoQ{=bAld9c(mnP}j{yyqFFgyEe`GDs%1wGB?6HC%OT?P>#MU+@Fj5`whb!iD8aqoeuj+M4$e6%aOG*KTSh;GApltI3ep#KB*rJ_GP|D~5AW2n>g!XM(E z>=nb<7ukd!%Aps=LCQ%Pz3>C-8}o_q1M)e5m|^qrj;xh@nCl#ud>9uz((eYlia)XN zT~1z>?Faag@1Z>D2d0(VUk*PgGv*x3GiJpf$~CYD@s$2S*aP>PG2sV{%Vt+G*5Igz zU~K3Ixu|dN%`(382l&CG@Pj1<#v!hqo`MgE{UA{v9&HXGKdV4~#yar zt~g|`2~^D9fx0B)8}Bo=d5?~dZO`I9wa_z=18uY z<|2nQz5bPK_XO;ldxz#XVLzQY!M2+lAGs`CX2*^?=MzJ*!PD`SWkHN7x= zmoWu!vxIR@r=1RB zKSte7-yUII**|m=ZG0WL%u68iU98R5Vy=G(d+J^#+o1m0?tZk}4wx6;Ujz{^*-z5- zqMzJ@=&5hVL_e1`4qj%UpR^73vj_e3zUy1aeU@k=4|kz{gYOV%Gv$a^4|fGV%>TW; zUomsQnULeZ&}Vi10rpsXAG&4!u*&pBb8+u3@!5;d-soX`PAq=^|KhLLfXnaO)bD@v zePsxnEjb~d69-}Z?wp7E-7H~c1mIBZ@Qle)9I6e%u(l9~PWwFx(`mO$7>aY^;Jsb) zTJb0BXA-8j_iG8$WsXUh&f`xK_Jqg2;621sanxuq^?UB?HCVB{e!@l<{jQL(dJXSd zd9Bm7IKX-&%&)a~w}k2az?>rv9rgM(7%xg#>2!Pui= z_DfT~u@1^!i- zPS8xwBP!zW74YPIp?Y!w=HH;Hu#$Ng9vlNa8UGbVmPL3n&Z~C11UwnfRX&C9zJMP{ zbW(Zv)#ymjOvY!`{*wZpjKiwG`BRcPU!pNw6apSJ=O#c(o_}`1zgOTVLKNrqYUGDEJG2&oq;9 zPkGL#60Z8G^mD;f8$T6(nS|SMg}IP@d`rRu^0T7*F5vlQG9C`f`#W9m{{!%#nT)q0 z4+PCIiC-+~)&3nbr#X+~Y{2tPg>4v$Z?_A658wgW_PCS&@G;*qG`Aiv8d@pdA+))( z2|s984=0zDE^pn~V!v{zvk7}yt>k-#h*MI0MMGOdM^iY|5nkTf+WNJo(#p!pm8(l? zONv*ux3u6js=TxM(prEkzEXL^By(6W=^RRI4l5>4O$%(D)^$R8HSI0kZSAd1buE=O z-K!VXQhk%idSOMwhK`1YN$+CGrJ=gE_O_<)He6HJvbiB6eJd%YL1G)Vj?N4+{R)|W zqp7XF{h!LaI~z7lx@tu)D_dJLVJ?eTceZylz`-kPZs_iwWZPT}K3A<5%#*EMvsBfxZL!t=qhy)BF#DckT@+1;HP zrVB%jb!{6Ov=|P~8gGYqaA{~&sAS>tO{-T|mM&O!LtRr#-MSVz!MUJUxF->8twLr1 zZq#<{9kLr=bB!B9rAvk+h$ZXK%)UyOw>5QEu3FLBy6!WYS{UkRu!pX7U3Yiu zx*9kN(A(--@!wgx&nXM6G_bvDcq68Z+HhB0?Z(X=4YfBkb#!j7Yq7Iam4EZS&ug@V zH6X9g0X&Rrm4~Yp4c{TD!|#;%+)r;+A+;T~?o5T3xwKQ|0jJ0Fd8Ep-(FUi%yIkNsF7RzGa9#gjmpDA>`d^{J1M-@3eocex`oB_x>-t}* z!FBy#rNPVOHQWD&2G{kkbj#bi{=cc=D;&0?aCm#I#3B4!@|q`I|Np4LH_B_`U$4Qt zH28HIyhnpyt-*EuuhQV#HT-G~zEgwizXw+J5ALUj5=G)DmRE$MgpfEYG&q*pi9_j! z_Mwf;K}M1 z;*SPTR;%!yUbM@D?I){Jr~zp3Wc3LT7HaT(n^gR-(%=OeT-9CJ{+BfPMh$T^nJz}W5e<#rDm8f({g z)U`GkpW`mVhOq-lzMhAxl(0-*s63PVSn(+Yxr|So-RNs1yD78er|0@{ zqV#Kl%R{HeSJ=dcXrQhg4&volW>AM{0sndzl_c+jVeK}G3Z4UHYhfb7! zyQH5k&m4c&3nohcfTYvMkL&x1((iJhr%g?i{>KjVtQSm_evbn^*ZmWvSLTHv(7P5{;CeB%g=h?MCo6X{B`?by>g=TCmrY;9q0pqaY>BFF2)~n zoxDHgM(%hl@dPDhDocSMrNQGE7XQV@RHA;N^f6n)FT$CuEcscT_UlC%1;km3m4JL1 zS8VgIr7w|fX6eNSz+RO6mA(NIM{57?2UO@Ug+3bWyF`432XE5&}d$wwIn6`?#U`&Hnl0dTVaUrGDxVdrqr{`vHrqE~+L zXBYm3vS>XB8vRi83iiGW|EeW6hO%GsFUFsuSNxk{=M<6m^y62Ozh2Adec7GFxl_{X zw7h;*FS=_t6}XLsG!U%T7duDja@i;rrX1PKon0w@pDnh32jj7k&>1d;qdpL6cLb7v;u z?PK@vh1~hxbH3+$&iS72d4JFOj+#1qN%_2M23(mP`{X)4CG31>XXoWj%AX`~$_6Nk zvQ|-YGv(jJ5=C+TO`o7BC;PkWDn;q-Z+4fWSbq+fihqMYWRddD=y z{#$%pQBU`;wLnp=zkRuidYXSpV|FX{pZi56>2&`ZzN{oQ;_v&}O41Kq(Z3&GuOwN2 zzuc%KJr(9!MDy4QPK^y@nmd~!5y~O9Aljrf|O}GO@+`@B-XW`ruk_PRPyh#p^ zS%KMs3Q)?M6s2TS$@Tc>ZX3OG$nMsQSjyx}!{cXV%(<$~-L9W2_@%g3Y z)%VVszpx_eaDT;;>ZOV(5b_v+Vxn4+$bj<~mM^Kew_;&nb`|+F*(#Z(ODpn4@hnru zEvcwn6j(COQ8u=dS;B(mo%#7zu9j8=N(F_=nuX;{g{0(1OJHGjOlhJFmP^6tqwpp~ zgD#za?_3CgEIR-E@%bJ*u(TrTpEvOij}Q{y(%JrsWffKV6DN5*%MjVjs+lG8 zX3m=#=%xH;jV~>kHLdX&ddyyS-x*hqJZI$~fzMghm^^99 z|H`UyKJxXss2b;-kx#N}*uyGF_Bjr#1cgtya!i^ahEd|M; z`WjiCmz;ef738E@>1}Lv?pgUHtB5_gS~ln2`Db27Vg^@|#OJP(OdS6|qYpW$WTB!Y zjN0?gN6tPcmE@eW@<~<_yB}FtQN7R{V{vuE8hI^Qw#;^oosEkCc8$oJ!j*yYs(Wjy zN;y|IJ)B8TTpqA}O)oCjV9sw-!sPlKiRDcuvDr)RbJp5-%d5UFsab%tE`LRjDO<;(r^CwM&h%s_EXRbc+NtC!53 zQC_a!I~UcBc{r``dHK%prw+$d=GV+$7|6>n5GyjCS$U;qQYJV_SxK3NbsvzOH)}%a z%o~79ZuFj|laTH;l_oJ!#w<$+YM^3C>T=SGmq z%Z-+d4=XkvkH?)S3-9b>5QO8{HJYb+F3b)<5KF47 zmM^TncmC`uPs#FGlV$mQa#2rmSKPCt!mJgaU>5VnPoKT8dg1*0D{h-zRa5b)m8O9G!_q!StG*XJ*sTm9{6%L5flrObYUtQEQREMNXHk@@dtDlK{B{sEZU$AGBe>65#6#;W(aQ0E3YU~pe%{dhD zsp6Spj|DiWMqfGQTwCx{#h5o?Rz+3i^m!HK-}>~1ACsq7-@B*=#l{)O`N-wjh)!=g z|4iyoJU8@nE%USCMuOy>b1F_HdHH#Am-h|_` zNl4!UBh%AMCp&)n%;|ltVfBIT&hK-OMrtQnL=oP75AoaIGdX5QO13{V)nPjm6n#@H@kFE&60}JW%HK=YGzlB!)jqK ztT<1Z6|%Wx_M-W7mt(=ZbXmocrC2RgqAqe7T&#d6y52kczS63Sh4%#JCHN&clvdoA z==QCOB?~L69PXwvkx*icCPKO%P%QO3(ahAxs!CaEBE4arOmoCZ7}NyLp93b-(=NQF=rdBVgPFD*T+vEwDPB#Z$ zes73TM+;MIWhS7;x>~s0%%d@aKH8m6Eb?b7u!2;3s{Gl#-@#bCxbWDH1$uiFaQt zK^#s{K1_-YS)}9^jaw9pHw-8#NWUL{e3&ERU5Qf+&Ll*KM@u&Ebpn;G1gj&1|>k+2}T!nvZ2RA5B}iw@Xu$V5gSeCa{iX zG#^XRS9anMgc97Vxzk&MNBe1Y|H99sr8SwsV@cm0=r%UizO3H0P8+G{M_LqQFYu0V zOX8=JrUdrsN4D!L_aYU;wfZL!IB%1twjpsx+MevE)jtZCXZqu%{nXffr0tiNBfVPv zGjPxkHz#Rz|4Hab+cU{2_+8oAiO-tVM|P~-rzmPu&3A)u76rT2OPX?)y?hc=?P7HJ$Q8Tx3Bd+n_Qc$v?;3;WZgm8(te1oveJ4%GfNMSliN zRstQ77gozhRaHfWFj&9vRlbLm@mY=Eu$jsk)2CmRefi9aW%*;Wr`61_3S`e&o?Sfa zj#>HHiNj#@39#Tx=A9#z3n5;M)Fll?Oq5c45$Vrnx3B&*C{?BlG-WL9zlhN3uYF)K!| zIF1@j9we*GYwY8!y@E$7>#uBuoS4^|AM{C9zvlE3@b!3)Tf&yWb^rbMW9WKjhdvxZ z&CbVaPt7({G?l%!pW`P?k;kY+`^l=PD39Sp)d&G4wx31F<`{7FFX;k7$U}B7?I(Fv zh3wAc=`1`f1hh{H?PnrSdv8Cp5q?D+WBY199|zvuX1oj{@C0}9VLd4JH3{^}dTnRd zn0d8=vYpkxuHUBLs^4Bxp`=k$htzR-=Tzj1{t&2Mb<=Nu)N zc^$G?fY;O{PhF2z_g&z-8>aX}y6SE8yM}3NGt%7f9Uy#5;Ol89@P)i8!(~UqeJL6a zL2{!(-bIETL54+x{2o4zsIp_Ds*i@7hNvzF#S|p5DNyQeP`yZ=5B+E8`n2MlE?wOn z>;^mcB8YAr^aY#KJUJWmU=O&ug^?KR(9`;iR$W`04l)DknzZ_NPxSO?Pp8}jo+P0- z`-S(>VPug4#~GCdqezPUGx|-G{Y^kfL#C6$i_wK>BWpoaxlSOu4Hdjf3SY&(Mf4{z z`00J&??P<7;Qw4Cq8IwrHa<)3^5Xi#fgF9x{uMd8vDNARBDHg4t;B_OOE0 zRe+hus~H{vQxLEj0aJ8@X$p+hn>#c8p+y)KZ5)Kw#cha+S@#o=64f+Zf1Jfvhi^d~ zxXI0LX0F^sY$FYk1aAL51X&5+Ef8x3Uegt*>ME-`z4Qh(JP5w4gDpj^MLmTUuNz8w z!hd5Sxs73DXxN9;qHs_$;pgFQ^VI0F*eb^m;UD+qiv~dn*TPrG722@N_cK9-NoW-x z$}LPnd-KpGg%~?24^1oVLE;|#8j9fZFp(-(+n_10r~Pe?TS&$EsF~?X=A4Y&fn@_0 zrI(~N^=nJ%OzJ|vfx&vpG>Q2VyJ0+S_??ofDC}#kr*NSK_4Zr%KyR*JxP7`DC@hMP zuh3U`J>3e6<9&MjE-XuQz2a+Q#*Vu(H!pwugo%?T7fiY8>Z!^#*B0s1rn^1Y&G7oJ zzrkO8<4q+u&zyD3t+##s_B-yp>+WxSv$SmXobtI9mG{h>zu;R{_b#knw0OzVK+Upm z-?#k!zhCjdgAdiN{7$g0{<{x17~gAL75e@UR{!uH{?8--_~`$A>_`9f&yWAhkAL#S z|NH6B{`KE}{);Dn`Ky0_>OcPTzkdCj-~MjRf3JP|nP;E-{qryU;g3ztE$d!f-@0Mr zrp+(C{K}TCZQHi*c=ffNyLRt+z5P#rexsxF&At2n^4GWC-XA^?dFS2t4tBl&!J!ZT zcKFEA?qeSv{}^LtH7PkIwO{|VbY)=g7= z|L&`o=bY)d{%Ww0bK?}{VH^6s4K<>+OW&W_&}*n|7mE@njMFM zd4glb00{w3;+#WH2xL%^FV)O!9JM>mZ^7|i<|Xv5FmcE&_8E$@SiI$zbXj4#C<fHSLQVcdk|a#l)k8 zoXH$7eod~mXuvtFhP7*pxH(=n{D%WC!=%R` zoV#6_H)*0HsAzqr!gHpHC-MP$SLRQc2b|+^7vfRu^xWhmq4$i=(8SsTIsWXZ=2=gglb?#l#QG|U8M~(r2<#5-$Hb&rZO}21KYC)t7Dm}$ z-=I=1>7vzL3W)%O8TB|oKH~zPF)*(wycsmO7K>SpR(At^%#;k$>X&fB0FRpcL5clF zUXu(LeuOcLASoh9m)Cd+smq_bT&tS{m(Zd|nH-`NHH@DMTH`?rQzETyxQwHAYSI9$ zeuNo=R{s%Z6^I3qL}M9;Pb5d8egFh-$epB`SUw{OIS5acI5PgE$ah6%SG@$}Fa|~T z+{#!67Va2>P6VhZOv(t{O% z;5rav?`5WK>Y#uR@w_JDNzv-q`9xDKJe;N$2JV1N4T#DVe0$)s2uZA>;QbL;QQz=? zfihCTHs6*IAv+sXj!0~`Z=$qGs@GBo?pMQiOG1zkC447fNU>ITGabcV1frkmG1Tz2 zZ~+7IDGC&DQxuMozaUT9&!0L#t9uG~2=gT*xxSc^wn3$Ug?lht22#zz@ZSK*_3_Bh zf%WiLf#rLqryV~%Jsm#nXH8mg8*svd@oi2+#gCelq$K64We^#q9z?Ig-;eP32EG^L z_Y(YB&tI5EJXrN>%=Z~DdCg{mI~o4QEbN^edA(R|5t)4wZfGe`7ykgrYrG#k*0W-G z_(6QEjxCd5iweI7=6K6nu!4f7V^H87h|lYAx156eiy~Vp$JsKdt(}Rbk^zC+YL5*H z++2%oiMk*N2M=7q<*SA)Z%&stIoWHpdX4dUO~q<5#0j!Sep+n_q6|FkH8w_eU_LKW zr>(jlyotPpeFx@~$Q`<&MqXd-<su z>mxt4*=+F`R24RLe7JPlcok~9*VrlA+F!uY-*^cs4cb|ConYdXK-L?xg2%wdb%1@w zG3-5Qbw2>iWt++PWh;3}WG1$3z>tApfe(z+8)o*f!JXtSZ^McrQa*GDd36VPRr9L1 zyqS4Po;oO0@+cY)z2O1|($+Sq8%b_+45N;o2+Txt8(8eiIp%G=#;@-38rS4C`N>r- zM9Fr+TfR-NojPPdpdWZzSGhV=l7vPEoTKYFt!^RMV{8oHgCE%+Xltj7ifSO=tcnwn z7YN2cR7aVEX+22fbW~TZu7^0N)JII*+nDbtUU-h6S_@yaL2V+&nlw=q?i{rA$JlSI zdLOkaJkE;W!qe)14|g>0;osp`#3)$5o>3v{ZvGJ)#;~g1L1J%ES;Om_mR|-L z5%Hoc_?Gm6hXS(qJqn(MH^WV?A$lM4hrZ4N$w&_Ak2dr-+K|NB?m>ayTEfJ%vb_uc zy$mGzD-v}-T%MzFqK@3C8(HD`aFAR|C`vX%B*WIA4Zq&!tkwW~b}d?P9NfZB<0aGe z9CH%+cScjsExzzzc*zn+!k8Hpd^ib?QmobFWF*|da*BW@L>vimFef2~l@RCT65^awLI4CLXZCcHGy5R|qOY7;Gd>A1j!W<-n^JDB z$gmQkF9%f8SFQjstc2)0O86$O3Pk5cK(L>`GD&zyBUxqpEs9dOnO@3785wZ0mA-Z& zA2M;6f@IZd!vqIcRW5so=i6bPbm-IO2KZ6l8KJTRFTz%nsQ+&RN!{_H_ zT&z32?f4SsHzp3bBGdSS>B2AR(gqinK-8rJE>?|{I0wv7m@S*3qJ?3(jIfKxa+#9g za$ADSk{Fi|zJxxFi}w%O*L;b~yX?FD&hRoe$@B9`P%Mv~PlD7S0=@HflcAmC^G49h z@p&V7vK-6%<<969$dl*zYzUq-$7e$@s(W$11^hf#m6H|A`MXl{EGSt#>s0{HqB#1f zY-d5q(z}#vyh5Yrd^Qc19G>^quvbo=P03g(JZ}Uo9`;J`EJ{drsC~7sXG1Vrm#r&o zt(AN!4fbV|yjIn8io4yF0vL0@E+~k*RAVkXT8fqW*qbEfrMTh_6|v}VU;C0D6EN|X zBu7B8{2vY2*Y$h@pebzc0C08xl$x@WCKeDc+o37@a|}JWd&kjnyrygshRM{FeN_%J z#3&@Y4^1yOaJX{M2SF69vY(Tt*DYq8f}o(6s41HZ_WQ&MJApe7Kt#yokO93+590+)Lm)t-_PAYYE?3b4FT z%kaRzJ#V}IEcvTHla2SY*}-StCH*3NDEJk-0dyd56gy@D*>u^@m)8&(6malpQa}qH zRRfnIrrV`P4WTd*?Rd>^do4!-L~%Z1*83flKQ%V2-#O;@PbKD6W`H96a?g;gtHs|h z@y9Xkukn|M407&rrZZ;BG3UA|iqD2t+0ci8E)u>U*=`h7xwAdOj59?U3y5=5IR)bI z7Y=v`oz31gb0Wi9bEIS?8*Xwo>#}_*A`)+m?VC8mm31BnF^_ZKyEjc%B6EYP2!ASK zPr@(wi@#d@Nf(|wU?<5EkS@;lfILW6Su!lK=#x0>q(8BvPvWeTB^~=XXI|udvYH)> zfG_9Fixa|Rsy&N6v{=;J%uh~=dYgG6rvig1Wz!+KFVvpZgyV*P~z94$JcptZoJ1Lr^%+2cr!q|fD?VuRX z5cYA%uJ5bn`@Vt<9cb?ton7hdrqCBTKP4>R*7=Ufc@rIb*IXq)PlHO8Euh3+D;w%7 zE5PG6R(3pW6?*HnglF9ATzV}(Xgz*GEWy6?llKGfRW>|$D5bsWC)ZLmxrF9;ij>~< zlWWbqTByhe_PArVxMk}Kyi7Joc)%j;4Fhii*8`&u>t+-1E{(&B(RW_?P6N=itbi+5 z%Ua7xE3|8aUBH;6l1;6^6UcW$1*>6fDR#M=qC@&x&cMgzCRysVh!8hI2JUN;!s(W-`MKTY4evKeWpl|g~G8n zXJfJ2p&8R5AO0zD(CRY)Qu#B*kF|eqNUR`1(K_XJD1%mF>$3hSAW#D|G(&CByh3d` z^pJW;t1e`#fZvIk@^@^u{2iUkU$<7BT0~&FR-KYjsa5a7-y4;1ewj}Gj7%s9XIE;0 z-S}&RdthUwh7A_2`p?2q*CKlaws3{_CJ|oSwHe-fB)z~kN02?0B8Z@|>zHnQq&M%& z)YD(mL*MFg*Z*0oKLBnCoz^=bOF%x}#Mee>iDqkAMV(rfx62pYkgXYSgWp`+ZYaXq z_KeWoS78sgy-0gx!?R?h+ql%9vni~i6B2sLnJnB)6s{L~u+(y84x!btgH5@J- z1cr+0L7%%_!&tuS!F-Q#$WwnU#vU>AYRb%G9z_(tK(wCH_@3b0j-dRK0m%iZApZLsYcbj9CHoLz zs$v}7(dkB`i_FL@tV3KKfX#e};sDoPU|hrCyhkwXIx{jE&uY?-CGB>SZqxJ7V0572 zY)5;$^cXp`8ALY+u#h zS{f~@AO121gG9Dp#bJ<-*wZwuImVsAk9@(1TAcHd*02=30?IH{jOQOvzN#oY-Zr{r zw5CJ)RxkmTa>qfVyXf!+RjXfxFkWm?LZr2|-H_;EcA7j%kBhWwYd>xdXKHIVSq}Ie z#7rPJFZpsd`9i*IpRp-?A=DbAJT;`DdduS(#+LBVk1Th2*dduMkT_@%J(%GAIQ%Yh zW7V(VdT}a(4P|%~f?Mv|3+QZ2M3?QhY z98|N$h{9<1W%6?e__}nD23Y`Bbql{xRH!`7d_g9T-+{gpU)pg!ce=LLpV6ab4bwyZ zfmfg}%WUvwdqdt)=*~uI#xW!sOjHLsX84^rbAv*T@eT`s#RxTS9mKM4MC0O;KSZ69P>HUzu78s)!Nz4%ne#HtCf?Zf793L#a z9ly})@f!y{M%ZoacN+)Tym@iJFPjIwwYtT?&|CX;>CbSoQ5?EW)t_e%hqFYGpFa}( z15lI~xWD==Au%wKs#JPw5=Qac^=-!vwmfjs(SPBq{A&KfuYcGOoGd#4qW( zl{@{A@J7q--|-e*yyUhhhkR*% z^>FRpTA@Ux)!&9Z35jj(*MG;}^w&8ot>*;>-bUFg?_4oCt#Z2YNHt}f{nTK6<;NW; zB)1XPp`nPc!}vnSw?%uT1!JsNa8@6UE85U@4i=80D{Oz!Y*br(!H97L zFFB;#udiIk*`!P(hJ?XRRa<+DD>vjWNOP-g2;^2f)i%0Kz=wp21Y{f#KhqoPuB5Dc zxjyO(u2+3iTDAHs*b+SlXVjv0x!?&ocN^RDnj*KY)?4c|jz!Q|AtzAu!fde&srEFi z(u$zi9`g2JDCRBStkpFmbr$*+`m@xhfm~RKQ2HVp>UIDtbbTkv^{GWnq?li_qQVl|@H>K`(5VnvT^P|?j~MWL0l!Y~zdDNWkb zBT5i;r2&m)oC2=<9eEn^rg=hMREJHThK#`gQL0D#W0D~~(~${yga?%S2GXPTI_;*g z3W)PjE0YWKXVFqB;j7SK(;eLjW<9BpQ6-4k*(1&An<2E}8$>Yl&pLWjJ=ju&A-f*@ zC_9j*8|%VP!`E!4Hwdxk+_Z$V-7>6x-{+jhuJfQz+9V>YMZ5~iLB~Z6KUt^e> zbj($8^~#~jl~49pavtlae5D~(8F62VGHhP5GDOW%;!icImkv@cy|15g;k;BOb8oya zV?mq1rEENz_`L*cT3i*jo-fWiTAcX&{uUO+r|pZgj^}#TaY)>=FYehH_jJZRSHwL> z`pYu1PKkEKJwd&5cGjbpFC5{o{GyKzxJ3rgILV~nv+ z;%=Nj76D(0yK&qwAQz%fqMm52WKk4FKtoM-usZg(V@ITsCJuQhd6sXwF#Xcyx+q&s zm%D8jt1P%TWIZxeLyXJ+!Ixt<%>?bc@Ek`&bS0xLNtN_SLWws zJD*eS8RkzY$d@=g@5**YQMSxblp6(P^!n6SCVGM^Bs+orJXq$AhdYU2NhVk2oiqH$G<5knw&9ye?{N7 zcD#AxV+yVb`6!E{C*z%vo9m!QzGOQSf^Qk`gh}!7UJktPC&W8(ymR;X2{ivKp{Gnd z_WCm3Nd?Y$r^5Z!LcAQ0jJF`)$w1~e6Lqoyb(c>xzf&eV3xu?Xn{`(p;wAUMr^FNp z#}Izt{oJHSc`4!^cz$l4Grz>U8+gA?#GB~M2l1+a_go@gOn!;C0(dXR;fcG$u$PW> zXjyAqoj}vFo|l`gT2`G)fd{tJSGicR0X*W$mcZk#TnYTEt4IP*xk@Ck##JVP=Us~c zh*ijEm5ek68&(Go+pCYMMTpF2w3thdTx`U}gnnatap-%DmK*xnSZ0J88v<6h^0fuu zBgznt%a4oj>kYPGTXTOcH^oOVMFa{7$n~cagGzu^sh!|re<%L-S8B8OQA;2jj%}6N z{0K-5suf7pAHwsmaQhn&55OP$IJ4gZ`~iN$aDN}`Sff`<+>hb*9^6hK{KQ`ix7i2b zKUVtR^H;c!eL~_MhxW7`jP#wwoc3gJLy86M6Bj z6e~oRKwA7-ax{4R*v?)?0&lq4Dd;@48~dbJ<0uszy7maNC`=uG)* zCZv7z6C|%|%T8deUgXUr^C3u%Cah&G=^`(rdwbYKZto=U9$#Q(`ym2{xu*WM!`*bN zd<7nrFVgo_zTQKb&uIhRf=(#{@TDBIzYgE_Q_%hfX!>0xHin3(%$70Yy(~}`5xc{Q_qeMTjEX_s z2-H?v%oF_!{Ge5>j_|!-eyqhxUhsw_h2K}Mq&gnV>@fK@WKw&n^7+_xgBGz zU@}jn&0t#)Y@!wH{g_}cGd4*Qu{lU0*j5B9w1RcBHTX4i1i=x6b*-J8>_`@p?h4I>y&FycTT5YtAlEXn&u9p=$z%_5*}(ah1Tm`b`9Si@;&{L|y|_DTHo6aON-% z3tExei1ms#7Dxw7H#ZA zA|cKySqa82Vj&4`vjzh*LQLjRU`j8ufY4Z0J0q%mliti4+YT@k0GWn%4KI4m5lsP$ zbHP^T1sgMf(PEmI9sB4y!c0WuuZTOsGH$0DQ^zZ0+J1aiOYyhRB6NBKuS#l4L^cC$ z4&4^4lOB*T(cHWNkB$S3;X)A<#OWS+m2?l_C7JjlKhc6!zRlom#CQ$)m#jfx>~DxB zEyD7(S{#{)j<+E!Y%?KxTGm`wF2KBL z!4gDM!0v`j0)1o#>J4TjWsIaGE)pcY<8OitLu-)_5wLonWCgOw&BHP{v5~I+*v2yp zEn@jZDF`1?K&%)5DD;)Ab+UQiAl<;Lj}Xb6bt2`plp|!YN9K*i9QhMo?LwL5MW&{c zUa0~Ro@N38ttWYExwVrr_ysgp?+8U4qXV)HpRo~DAB~~LI**-wi$*s?Zbe8W5fDWx zvJo#()7sgRRIX=KAJB6rUXfkkOQD&xIlCFhW=Xq$*MVzp=rBdEdc=O@i)9*LlQ95ut5rF%lwX~?Ze2IvA&)JqHnI^JR=k^%%1v5RU&Za^aEO8H7WvURG$mOVTQ95sS{ z#ezeE`wT9bEaWM^e909n2D5a*v|}g_6fes;TEtrQ-$XGpf-Xj|-w{~I7)lr6P}gO+ zBk_eRBI>npe;r@AEJxNgvlubNL9FNo(Mm%YQlDV1P#mNvM>r6sCg7mFQ8Fl3tB_>@ zq9nn1HoPcmyOc$GYy>tJW+{_A;*z6O!pN3Tb=UyH(b8tw9steJ0w09LkjyMk6uXsw z_#8&EMA2H6*~B($w8+0GWD`x)=)}q_svi;~#{0XGEHUz*Lyaw2-t5WA0Kp}TYJ8!C z7m>pG6r% zEJOo9RP?P-7!Va#*5C!`1grv;wcS~a48oK(@s~|S8;i`W-S|!I%y`vqWnZoh1}r9PQVfw@J-5qzyM{tz2rzjH1pLRU_2|h zqq!j4Eg~FCMogIifvse*Nli5UXeAOG2sR6#AUgwrEkwnVTaFXZO2ju1Qeq@^AW)V# zVC@hCDJdTy+)2Xh7%ap&vV-Z8#S6N~8CxF__8=LA4P%t7nAn+#5wRa9aWPa89T~d; zNSJ7t=dg!bu)*en7>BW_zzP(o7;L37MlI{UT1F_i&ZehB3^m7Esc8awwWyfv!{@L? zkA$Hlx0>l1EwIq!vk&QLrqdwdgdDO109Cv=?SYO6NSotvBw#y}A~e)D17!{eZVR>p z5~Fp9AyBC3+OY*5)pA~ArRYP_e2Fv}s$&OZ9btJ2?buDEX3|AU9K%R(UE~Z^v*-yy zN6WhX*WlYbTGpZ_@Ej`u9WQHHC0I3xyaFt5r%-I4vQ4YM^&oV&Lf=saq$$sF69^_I zM%LmN3IR~v;HnYt3F^Ras{(5e(1>a$T%_Uyx(3_DM-o1uk+2CLK^VUHJ(Ws$sTgj2 z3m!{#Hx#AlP_TA4+@364`n48mxL&*nDz-n7IQ{X}&;CloSAY8}9bako*8qH_+g}6m z1^pV6N(R0L+Fyh4m0^Er_`*eJk2FC~EqFjhI-qfw3C9yb=!n-r3-x{R1CfCvyO8xT;)dKCS=FFAH z6bLxfd4O!RQQ@j?9#mAIMJn}0aqQHSxBZU@poEvg!`OoJ6T=|As1mC7{SVi5xtz#|ITrAGC7K<~j#o|nBu{hIOEY5t~TrAGC z7K<~j#bS^tg9PCSj$AC}DIR=N{Ru~bp>q|wp3ERqppm%=dW`8B^r~?aSOAJo;oZoJ z;2+x~aH;-rDC~xRi#y*y%xEeB*%kI;ZA@c?+1JyhwB{#V&X_3nLk8*;W z3wU@RAus9-+<#+esEX4PNA~bwOWh;zlXvfN&)$n#(%pkQ;yk<|ZU8UILj}a8`XcE) z59ILbHe3*Q0WXM)@PfEq@Ua|&2Z|yun{6tHNAJ(;?g{kw=j;Zvs4Xq;;~Pg3IUo@z zk46*OW3|C%N`4T5jwbC4iL1;=^l98>7WpmiP}}4UO%u1(4G>4Y-DnsNEYKT2{GKhecr+7;(NkXe&NJ}juo+|L72?F6+f|0%6pLYaKjNx| zH-N{*niYV5bv*$Wud&5vyyk68_cmVOH`e)rN1}Pf#!l#sb{3Z(gHe?0Lu01E1_}TOE|qJp->lW^NG!%j?c4ZjZr(rA--x|P&NQkyP^;dp!$gccJwc^j9Gz&_tfMZ? z$l7sE?~fyy$JZHcqkDRAJA%JB^t_AXqUbpQ?S+kQ19OP#y~c(eNcz%0(OL%1a-`&S zQM)(;G^??ig)yAg!6~vhi#X*(O{=!H6->Jbf<7TdTib%)loYiUI?d2$)=|pM?_~x& zZlg=ewTOo^03>_E4vc=6w%ZXMWkjnoM8ZS_?ciGn&PYaD+*5jLUc|wO!_5bV;phzx z#7vmLn1Ub&;>}R_GWH_8GhPld(S4qj=RXQTMZmx}PO^P2yxCDg+KVqC%hztpy$ct3sd+ zeF83MC|QwW^hC|}0p?mpqJ!x4;YezEQ#78&!Ty|eC~2=T*c<$4r1sqfkOU?u{3^Pq zX!)Qf1cr)QkO_H^!>aHH)C`vQoqCONJ_Dh05!`GMET<80g2fV3Y`jOdjcUfpA#iU^ z28VXyz=v)eX@0LiO61^?ouf_^fzkAzK!4-Nj<;dkMo-63AiHLd+i0C0O1ai;bRXH- z93DApr_ll_T(}3C|3^4USbV4-z7LLItsC`$(Yo@j6JQBS_Q6#jD%eZZ)_$mK7fIM? zZQef&^1Q{*i`!2u*V%>`bD7@ioMCV}X)w+i%?u`SKtDi|%@}EqDer@~(bX>rfBanD0> z&&aq(i+lRVJ;`wo^XB|(!Z)8L@y4$SSieyZi9o(M{~RGr3h^OgF|vW>{1bti#9JU1 z0d)RJpChW37=RSxPu0BLWi&h;{WIj)xwKE%)-_fkDMvgXTrV|sC84b}-@0^wTJ z0e>i!%7iePP(E}uj3o5z^@eUA=?(7d^yaL?p*w55Ro_(e6>rWP-r$L^Xb)fBk8P3{ z*Bxj+*eo*+;IN$-Ev@G4R%8{k(oWFD@XN|7X15etF*1``#mG!%6(h4;>1AaVWJ;uq z%qj+LbldF^6i!?9r{@D|8f5zozWwp8K|&Blytm)p3-RmHhcHt^)kiB=qM z_l8!aWpeIR1a}e|O`2YuO)L{bPS$7wV=bVt1TK+$b8O4`#X8IZuHi)L6&%XrsX$-w z#ca;0yul6l#hoK{v_gS_KjuMVL%q5igHlqRK@Fw;aOL!aN*lNu1ut$l9OphjfTtHR zX%g3_V4)o=#8ei-WC_vnGr%?P$U45bp|q>boK(@=FD6tRpYGa*S(Q7zZAyz)Uyrn7 zdPVZ_=cJdUb`^I^2&Qr};_UNyg)8MqFId+oWEj0bZoGa`F(#fUJu8r0k5%7duZVRH4Z zoLsTwU@tQ;7V){U`ho&<%(76D^^-8m@*BrE!7B0MmWhJAO@92`&af300gEh+;y+AYbgyACXKH=+*|EtjLoQa3}EAlx<6 zg?Ns!3nDO56bQCiu7NnA1Sy493Ess^Gi0T7Lv%`k6BZ?_rL(2)3Li2}Tf8HUECnOkAMp#d|1Bm%tPVeJDV~Bd z(!v!R6bPc+LzGoArC>V9A@w2;*+3NA$xyWd&bL!!P7p?XuW>!0AP?xgO~p*aNA*7x z9=Dt&+0{e=hq!|*7Ehz4T7Lyf2GL2L0BKhnUe%D$y%zv`>Fa|T|AnGKMI@rt!l>Zo z<~Om+xh;KjxC><_R+ZFmeM$M&(TkKh`!7`P*gaG!**HX*@yrFvHBV+K6CcY|#x!Wk zSMD3ETsChI)|amAb$#hBobBKPQ(@9h*d^#FVEqS6d@umSaoYkocp9^?VS@hcHGJUW zA$(xVjg1#u*t%-C29KRs;fl<$+B)2Cz$P_-1M{>3)}&&87hBUO9=O_H-r^3;RMBIn zsO(q+e!L57wm<^0bCHE@2X|;1Oxd(xn~wo;4BoA7C;9bAPju7b*onaXwANGiD3XEQ zi@U^LPjPt*nq+lAU@%e;`M%xuf+qTGT+z`)?{PJ6hO5zHnH8jd`|{D&x~CaltN)tx zCv(u*YYpE6kk@n<##HPJwY4{&KvK0j1J1%~(I7A(ZdV3Kv~8r-?G#ABe#OH68d+v< z--M?kGC>z+l0^E_L~r8GX#uy9cWu>rNKLpu&}97JqR5C=AA|=IC``;SLQ!Z>hZbuEd;#21+Gaa;*E(x`6`4|VkwQG;Mhg7K%|Ca;>5TV`&`dog znh*eMTumT>T_mruD{dDl3(VK_DF z<;p1-tko}u8@8FyNQ`&_?WI;X3l8`iE&BC$vGv4T*e-|5#BBIS_mnt?gsa#~DprrN zElTA86K;hJONY2^r7=+%;FbjTmufgxtKOUUF6KqIVqUa9yLiejSWO~rcwB(yRw!Y; zi*~B|Lpn|4ig-0usyQbLaGNJUJ@?~K;|@@B9+M-yL<9L6x;X5`V3vb`ZDN*$CX#y6 zyj^nFc2MVh2p#G!ZZyq9nZs`vs@<+lVyB5yqVFLum=bx_FlR%H$T>8?o7m#+aqKuD zJybZW-e*t;cj=!rzIp2$XGr23nm%lqkH3%j-XfqHdmds`?L{V7g^fD$iUXfHusdnH7}U6B$Oly zvLze)P-Dhmr-b`Z>x;|ZEyf#VTkm2m#9gP9A6~_W8>iNO9WMR+YLj03&vuGliKDc7?n~KfiG$cc_MX8!vWjP+{llY#4N+Om3 zsL0p;XNUw&FiABxYToVI$mT2vA{OEBI1J%xr;MvvZr1zNh()VULkxc78n1D2_?MzR ztVO)+WgztU)@!dtYf%j-Z#zmutD};PFeE1W5_ki#L?Aw-0#le?G&T^mo-Hu)C#NY_ z6X(e5fs3v5;MZ(y!r#aA53Mcg7((O$fI%q>5+t5Ia zEF|7A8yagvSKE-+hQ47#9Hz*y%Wdd8HuV2&=n$5O%{U;j68fqQO|zl9ZRjZ*+F?V3 z`&(hB+0gwq^pp+#w+(gJ&>#ZA1ru$k+J+vpq1`t0mJN+hx58d$LrZNaWJCXH zL)&cVm<^C~y$!u+LvP!V3v)DyS7Jj8Z0MhC=%+T+X+zyMl!|RUNhQaI z7T8de4ehm|3vmI04B@k(fDQe^hW^Wjp0S~gHuPs3I%GpBxRO9p&ak1YY{+jzKeeG3 zZRi5r!z05&b=ia#+0c({=odEhiVa#_43NkDJas9uv%{c^LDPn(FLJ&36mlUMiP#Yr0q#O6; zz&M1>wjq}dU2a2TY$(@;CfE=FK~JWclqJV`2}rW?TfE2PFGbmbH;Nb#Z!?n6#deDx z?E|j2-R9WPw`^#+4SmOk8f|Ey&C}6#>p0$q-m#%{yZswsLpRybY8$%CZsDSB;xM#a zvfO@Qw_!u=R;AEx8LqP-&hcb^k#WIs$t&_+@`~4NAnAB65fG(QLWn}STDV1Jayz~y zPyb>=$84yd-PaDbp^I&Za}gPq`V2B3b%>g`kEAH6h*&~A2O#lov!N;*`h^Ygu2_jT z+2-!GHZ%v2jAMuGR&R4R%4b9_ZWWV!p0VUq=`w;Y7?4}(p&tZLJS$8bk%K;P{>j84pZlWo zyotjby!yh~W#WLHz_B!XMKbL43`76Oryp}FpjoC1f09+|!LdD*kCltJZ5%694dj~- zTb9186y`A!JVWr8tXiv1!pY9p5*x>g>px5v zQKwXn;R(ft(#@=1B;rcEGmTo~4_kr6?T0u-a{7AerO>jE_Z>yOLG)>?S3efRb zT+Kqi&W9orZ-dstnPuXz*pgMY=h4T)*>B>okouzY06R83OqP^Y@hM)Ux*yI8zzSeysG&VM6e*-|dUe5fewW`RdmMe*7zm6#@_2A4&T{(}k^#bQxp1u%1bm z8Kw(c66tc6=>iASWpRSb0|_px6I^~0JStQIreM7xzY!7a%oCR?k^_@IU|c{BtoA zyKnC7z--JXd#OZb1NZ4SfO3`Q^Oh)!Q|GPuvIZ*Wji8mnUdr;bYl5s?ey$Q_aq6ra zMq5PBo`RKzJ{a@aG)bo@trVi0dmLVj-mPgs%WDrGGN;*1g757tEx3)nl6B2QR%-hPt%+ zTtLuv%t&?^Sf_bSud!L!SN|jYgDtT2IbCfS;%yx6@`h%qxIM;W@X~T&%aY1+*(d}2 zj?KhxF>Vnt_D8ZTU3e-l8$H;yk%nEY@LguFr3={{UD=I__SDus@QmEhr35&Oc7Y4pO=O}Ht zOG?h*y@#XXe;var##WeeKhnXI50j7!KkRCUzS(11iREorD4EDrQN3y$f`xz$rYZML z2(AZTTmVzV0P0Tx`}ru(USYiQ+7P`0t~s&fFbw4*Qe}! zFi#qCUkglO$o)S^leTGbS|Z7^e=)kw~Vv0P5+Xvnp{Z%N!_I12F}0uG}mv z*OnX?N}w_YHQ%XR4ZYr|?e?!CGs1TJmjL*4_J=px<5*#~w{2|Oa=@=hAri8=IOI?741a@a&$dlveFNgOTGZWu(B`5={RcpaEoy`< zDhyAjxSlCupq!227kJDNdeFhW?6V_<=wfZ*@D;j+;Mg_!! zF*p$;gjpOL5=N+ZhY#aKq|aDqbVvSEe^v(~2uV}ZUL4K4ys_@PZ1{A;qrld*u?|+D zgX@z0Y9zc00g(`+JIq^5yyb1)kk~5Mo*sHL>S)bFum&pXq%ZW1WF7?1`9K(%#vvV2 zqj~9TK?2gbYolM?MM>F)uR&OL zakARS+K~?b0M*-Q6AJ!_oz|m!iM=_-CUn}QG7Agvi!(!C*pU9hhV&OUq@knwg$-#X#)fn*+J`S}NT1AxGzT9TU7w>3 zX(;!Md9t7KcaNni*$pYM5uFSh(Mi49h&~+~(r;0L3{Mw+yU|gOk)PuIg$-#3Gd`za zVNra-zOW&Et~R7QcOD+HbKlOLogF)eq~JI1+O@RX<5+&CHlpK9R1b+WIeo?XK6ADb z>E*gpit+;9T!~9j-m#$%ZRofSb)&74cnX?z2~l@jQsD_A={6CqhJ*$|$&}iKDasXq zsEe4Q(Bg_Zj4hxTu>8Qq4+&B+t5oZ#*Y($@N%1gs$@f-({hdd{jvE87Ltv4ekI;c zOVH2J1-SuBO`MH@*cy{EJn(di_sMNTA2V^7_uh2Ho$IqqZnDbOo_viypPZhvKN^V% zCF(hIMV4HMK2jq?ehdDnwUUid6aigm@*hk?zIxZ2%{FnkaxJ;a^N@VVG3ml{k=7$! zzG1sqW$}RRVudndTpq!fsYxPjA&cEhdker}y*NSN#IpQM8^~v68+u}1zQ?odyxE2> zkcs8F2}pkF{Dt!a%dEWF8NxP)L`(2V%<5-l92z!(vz^bW_B!$>OrAVm1c=&({_%{y zZ9{jV8qqd%#x+uU8u9aK>~$0%o>kGq6e9B8_Bz<^y$HPfrwbI(&JZtt4?f^Fz`fcL#b zyae0O$AR}q9NzyH+tApB!1mgSI7`qc+Dt3Z)eZ~L1`O#-GJXWC{ntw47P$EX!(iDX)V?i8Mu zHRwL9K(jpsIcWu22Di;OkCoq6NYe_m<%k4IW4^+$bZ=Imvjy$;(Yf;1B-cJ#P=IZ= z6WApYwgZawGu6y%Va8W3mq(-NyD_b@JQN)7Ji@((;pqujx2AgiwQ;xwjGw) z-iS1eYX!q*b;ra-q>o_Iewr1VvfZSxQrX*Qz$LFng*gxRB=DMkPw=YC6X}5D{b`&AIJ@jQo<6n zX&jmc|12jVsxTNQ%*B3T7usb>AT1c42fNU#Su`f^KQp_~W+F~y7urHQ3%k%FC`9^9 z>_VGiPHh*OmUVMcMPShv@6im$ih}!&WzQqcL&IyVAn7y{4MCs7>oaL$9ZejAjabAs z#n}kMrx9?$8B~~ORbu_-8y_VfE`Zfp65Zbw0f)){%=Ao_N@g{;%52U4EkUlHB&?e#2 zvI{M6;|o`Ct4(mrQOZaTZX@f{E_4&{k#u2jSPpDZv#IlPMs}e^dPL{Qf{(Wu?6ee& z&hun;p_v>O+F9F$7Wo&YYZiTskzcfErjd6@Pb(U+)3gh1QH?Kj2%%{g+QJZ*S)QI< zXc6QL?Lvz%y;fpmEov89j6d1_PhMl_UrRLOc)a-8cGNdu64wOrShEYLW|-6Q+U7NsqI1w>@%k5VvU)+Db_ii#^^ZG>b!AzRBWt*n<{<&?=sHd(c*D zdbI~_d7g$nXrfwHuwSBOYxKwOpsp4@0alv!poNX-S7;-ewxQwaunT=hFLt4&P3WVb zOfA%1ST(Y2Lhm4K*@WIKK1`d?&G`7=W)pg~enz&SH+^9XTK&QnG)$p?{%t{D28CDJ z<8AHL7PMRVVr?E(-DxBGf2J*HHjudBxf8Qa+HmHoCUo%se`gE&J#Hr8`N9^oR<|6A z(L&++)V83X1j|nMUVm>R>?ge}tw8&Y_0n40V8I?;72e5{Uboa2=p%jaKthw8vyJ(VemVmh;F!mzeN6y zrWd}2U1A{$PAu+8{{^KBDv?E}Wm*}=`6!Mt z_tUSYEoBH#FXog*5^~lv6^PRt{CMQD@A(=%O89$J+qcdeA?$uKk%pd&)f88_3dLZW z@$eZFkX8@W-iu&g>Q?+Q?fY$fpE|~6S{I>U1~v+z%!P!>Qxf7#g?YWZMJuw_PdiPb z;9zHB3~zYfgEKj>s*bqq=b4I+6OzJKno?bc#FP zH}H=49WW~}#$&uOJ(K}s%pd8n#vB=`k7_kGLLjuYe@1wiJsSfeI18woR+y*Z?)D=J z>?^^oIXnznT*kP7qlnZ>uKyiWz?FXv|$1TF`n`oc3wbOdsp`u6K z4XP)vY3d+d`^P3P4`=Z(*4?Ou2!vKh3Ev4=SP{CJj=alWnfm{+_criRRcHSAoiKrf zw+R9Yt;kS=B`TVrNYaHh7bbQ_CoNjrN~;x!P<4?&8K_W!p-D7Xm@V77-FDY)+11^3 zw|3Y5Y84ReOfoklAdm!skWmN$kxM|{G$a_v|NA}X-aC^wa23VwpZiJfJolXEo|osm z-1G9B=Q|*9PoCy5Zifp{b;ddD4hmoIe#RMe-V7dZ1;$MT$kB`7!#meZoa8FWkN1s6 zJ4VT1T6oO(5R?SHYx;L^J@r*^`PN#nLq6AYVOLQNc#MJYFd>B`Mnh7Pl9cHX>Nh3$ z4t)*(#24TRvHAEVT-GbdJRsZ^2eeP;;rD&~j|1&w{t_;*Ds*BN(>nd*6(#XM1<#59 z)Q?L1R~*jdl+^wMf}JkQy(3CIXnq8e>n)`Hu zX%_0I%)j*?2F)uh?)Wtc>o@Kp87Q~J|AaTq5IVonk;4tH#DDQpSlk6;F37}x=@XVe z2s#KV@!$54#07DhJHag9bQ9{Cy!y7c517>63C4L+Cz$4NCm81e9YC7#&>5DwBWg3? zK07C%lLNES18#FE%8cMw4PYT|vz(q`Tb_l9vFNJwG)X)o@Ppv;tOeKnyam_%q6OFd zvVdy_&xZ7Qc+0D8^CsXnQ$v`z&2Qru4Hvl0q!o4wj$Zz6ni148`_C#35A>I3Qz)CU1v z7eq{m>NB4MTTzNb@B+X2Zb%^XjkcnF4c>y?;%F>UMyLCYLdpY%y)1ENknakYXjXDeH0~PjKupf z1`?|@QIj;zx(e$l>oJ@c)d*~|m`h@lVLpUyoi^YfwY{lDFr+?2NNVOgVQ$0|>Z^db z6rUUkcrSByL|@`Dp<2E2CQmr`_W=K@^8^VVP5_bB;&;f9_!J2uE%`mZvR-D%yAmMB zr}MH7sNyuZA8ylO*};EZyWdgg&)731_{4QY2zwuK1zSN3KmiJ%V`P*0v`TUd4*dsPfiISddLn_yQb4T} zJj6`yji; zrlWGoAtU-m36YE*ave|CB=(RtPO>GKmvOqD)uVfyWZJ)gR18!`FgcCyAjg9EZ%OnL zILR#En|Zs_!8=!f0t42_qC)*a0dJ!FJts)A)MVKXwOF_p7Gs&m2=xwoE>^h~k3p$; zV7X$B)wJ^*?}`Gy7oiI<;UVCVsLpXP;o(R%${#H8@`nikv*uelUk@iH>nh%^N5yJ-Yda^@xyfP5DbJ8Hr=0*?;_*k1rZ2*r8CRcWDxJ-ku3#E z40F=w39i+M7=&y;g$6^vQ(zV!7WLvKnFc$73klRa{{)S0n9M}|g)5$kO*y|dMFbm0 zund3@=i&vpu1a46S8}oup?Kg?L-th?dss%DP=Ya`fhm3QM$;CCh+THl!pkpOeSqo^ zu}uib549|Y;;#k&XQr5=HEo#9Dsp!W{r zFe-$vV2c;FDW)4ywu{r-&F>$^7wV`jF!pj3AFT~Ha=|E1I}7;BKI-Wm9<9lZj_s8u z_`N9n;6lI1{uFwrex$+}k^wC6V`+J#&YGb}&OGgy8H!4CNu!XOk67WyfNKN3hgeIB zyaz<^kMQzHZ8w&pkk+|-9aZvEjIui=V8KP-pr&rY(iEc{1$53W>WX*IL1+R}@Q5Oo zrM8G<93|%9s<-LH#E{_`!N3nVyq>@A{P z@N!Z`rhG>kNI7A`G|8GTCDX4d9rGp550I-bvGc7K+eCN5)K?kco2y;Lc^H`SBXT`& zGt)4s;T)z=3?Az7$tu+GT0I{*^{B`GO}2s^n?kC#Wh)Wf1ERm*Oo}L|*s=0({vk_ltf_8M$KA$=|Aq z4I^Q`k7=ux8%7?vVN`Ft3DQ-_%mCq@GkCq}T5b@ep~+eX4u zZW|5sbiZ%ZTkacOX-$eK#FE!AHu7SB#=BA$6{byI%z_T7!N)lRBA}?40i6UP0MUtt zoGb>Zm<0JVV8tv?@o7WSy|8Ij@+K?t$KG1Pj<{8oJEJ1Q%dE;PqHG-ql&C-6F! zDh6jj82h2*U~7pPa-#Zi{|OmenUCj|c-Z8>3jEY%z!qtsuh=pzNr8mW9{d_0(BFns zIm8&6DQmlcFMkaTit#Szi^4BRKUA(qIsO2;xGTWHMxYT@A$C-FTp`>r$EJrE5JQ_E z+n8?>Y?*-dqt!6O@N<>?5K&sWGBS|z)nb}Mh-{Vf%#v$C zWmR#kT>Mp##t7U@yya|sZ=8bhapEnvA|9U>7Q^b(ppjcS&pAf9#_g}2uE!cSUK1H* zbGA~7xygRaP1t0=!rPD1cwaJzITsVmMr1JdKHnR^2Jg|GCs$&}1RpFb?)s2!ry~ci zpisorLCgzwP%@$i03dte5h502Q?`C!DsA4?9Zsz@?&B1N`+%Z7dNJ1?gN;C|=6mcN zLA0gB9l?{>?JA$>IQ76%gxFdL_+z~vvy;1B4?h-qK*gNEIV4n7o7bf_|U`x>I z0@)G3jGfpXM7rm0M;~yS>nHP1iow}X3aRT>VKYZra;uPggcyo(Q}7&J#ipR}M<{KZ zu_?Hf&U}a%uR_KNV=~8s*C-FR4EUJX%pdz<=`O%Zin z^3~eZ9-AUcr1a&!i=_J7)DW8*ZBtn`HOZ##v#A+26}73;Hgyd;KN$y&N=eFRQ?qSq zsZFi4DSq`}D2V)Bv-VmG0u&D_)m2XpVq|BMN+v7F` zlTBuL|7ugmuuN>mftoF;Yi;VUZR$Rof&nO#Z<|eVHB`R)*EY4trvA>RUa_f|O&znT zt5U2`vTW*aZ0Z@CddsF7ZR%zi*N`cgY*U3c^)EK{XPY`?Qw{(S%6CWGRJKi(+SEpy z+HX^C%+KVzew&(YQ@^yS-`LdKHua%RIj{w2rog6V+Z3j^rtcn`8Z^Li%d@FPHnrTQ zer8jz+tenTI%HEH*pv&V17upa-G)e9(yGTx@47KZalP_(tz$%2+{-!(FBngg`s};-fkL(N1$cC=@K@ z!Wn_2uD1Jen%k9m`)ixxQmAyh-==2S)MGZa$flOt)O1_Ae{W0oIh&ei_o91jYJlB4 zZnmkzHuVp7ueaW&9<+P2)i$-?rl#5b)o*QTI{G15=DqE{v?(4lktw|olBA&*rxHhf=2ud>8K7i%Cv1wC(9+Fs zQ`F0lZq&+O}-JW<}H+jBv9-car2SP+R-D5ouN4?|~$9lfkbm0%s za#23`RG62Q3x0=rr7CVSXNyp)dEsM+x2U~<%41&ETNO5s^;Q~M058$YIk;}ho?c$o zThH4()?4Ek39iBT?+NR9fz4yR_1~roJ{Wk5a{^NGvf`SIFDkE|u%4+EzEZN7_s`A- z8m%>Tyt>*z6VmR;gr{mV7wDvrQxssrW##k=vrt(@dcqgJ*W_Wb^>Sn;greB>*)Vxn zNIiwKjGsMROqQ(A;!)g~o0nTQ?aaLuU84iDXip$Nago_Z%Jz+in$>b4M z{Wy#>+b`d<|3OZ*K}b|B3ha1HSG2z=&T6gt3c{uqZ9@eZjDvKYgOkgT3@gq1ZlsqAoJ}W4uOQED%UbY8MSf zt>^msNkP;;eJNZ3D+~$i5v9*ebQk!-PoF4D@;v~UU7AE$_4P8=^V|Z@tODPH8U9&| zX3hEZrG0@a_(_oAK7FDtPz5hdq7vciI@+gAfRpRSKCS5+hpFrI^2f(|zC{KF+p@C) zrhVDj3+EOrSOnv@m%*MdCf4bvuEG};dG%AL#vXBAq_6WA8tnN~ORL+myL5wuPy6Zn zVu3w>cBOM^z6#d&^E`mD7nsahAG=Q;f|Z7DMr@aEhGAvv3zaBKQWtGjVde6o1!$$P z8?jxwF38H|7b;Pfq|VcBE&ye3W$TlNV5Ol4!Caay$;#FjDp84}XA7s<8u?=Fv$N9| z0PMN%&DpmJJOk@?Aq0fm#!WC;;%Lt=V(HW^mjAkJ9;cE3rgzzMsp#xVVEN_&T9IbBD2|OL*O*J>QqG8p<$*rT?t)B<+g})1~11 zl^HMhP=v+!TgTZvB*^zG@cnBiz7FV*D~|jMY*1t270>biSh2 zT-TGop?^D2O8)~+5~Z}D#wReJ-_2<8OXC&(g5zs*5s_aDq^I~ZfR%aDrS*kiy*Yq< zb(I{!yEqvK=;x^p6ALsE`gsvK38X3TpYt50FbNX68{p5Lbhp0y*a1~L=UrzVq>k=u zooo1coKHU^VWIImJ`IJX*UUNEG}N}{;n(VDAx3*1n5F$69_g|(4NU!3NsC4vX`AYfMwToe6y2}-!*E#KmYWF$TF>D2ypTt^o_=atUa9H^%Bib#EHot7 zx*=pjYg5tKgM7K34Z{?O&r#;fsP$!R2h^@}aK5%%U0M8;G79>ZS}!1?;zXU#5fQSW zGBl-d?_h@%27sW;00fOv65`NL9NB7%#5e?1$+-5U-U1a`R5sv2=lX#M&3crt?b9Dm z6ZH;w(9<v z;udd22K`z?=t00lbL$2bX)O0Sn#Qe$SsK?X_YPHxYhjiKK59{gfQOEl@X+(x5t6F) z6Y$W~R`nI^$P48Y9(oAs%+8$6%QyH7&JZ4YNzS$o=+I}JuHxS_NC3;;gHvWehn{*4 z;RJ{djWCD~Jp|~`n+P9$=WKY)#F17WM?;h=ssuW;w@s+5$^O@(@)Q@7l~;SlzxaFEye?vOuAgS3$LTZ3Hc;XrBYti;UkvD_5c#OVDQ78}6oD z*&F@?2=kQgB#?kEzAx<8&?6GE*^{0kyF*|$CnFs)oT9Q^8?t$7JNh?O8*F3=o#r5e zP(%JJ+x@WK`>E@4;h&#TgR+!W-KFtlr-j?hbA_d;a4;%y%c?4= zLK7fv^EPUWcL)-Y#%S>lq4b?q)%368G((fa*nngrXuq~kJ-s`>ph~3jqsZs=v9 z?@m!OcFdkk;ATI8n}s0CuJ_{nxw=spohTh7s+oz~T;|md8GnVdrKm!An$h%P0&X+# zr-|EqeY}@)rW3eX_X4=77AByZACE&f&q?ovE;{2( z9J(3m^MGz%jzCz{n>6E_p?i~#dmSw(bo8^kTGiB6!vWEYug+|!)_X&FN%-a@8BgJ* z$^1AWTj)0cV~Q`9nk3c9e}PwQ%;q`i1a0mbZ3;r5+&g`*OA(m(UQZR@Yks3W=o?oG z39sizKQHU3H#A}>&aIii&BNM77l!ZKBr3n+cYH2>dj@XSmn=&jL(ZD$*pc|fNdr%M zws-Au^WwL&!Mirsy!cqB@1AqOyVimsq*JxNZ%#}%<^U9Zq=((24Lx1gG|ynu^}X)wLW%<%hoGP&ZH|L7d{f@y{gRU@~5(mVr2@@fu|+Py4DBN%%n3 zsx@bt%aQ`4X2Y%l>{dlotr8II70zo;c(qf04HoZ57ULbfrM*RGhZ<>1YA$mIhNz+Y zlHhV1G~xM{x8RhvTiZ^Tq{v6TFha;|E=vhqp%z7Yp+--sa1J~rIn~hOUfzPe46T>8 zJ@Szgytg)&rMXJ>B6(_P8XY`1zd;TsIg;U!+kA1*p6B-#tq(&b|H?r6jobHYtpX%H zb&qi0zMrV{p43y@-y`AG&UN%nJ%zXY+KKJ&c?-6xq4|AMKSEG9YP-Az&5`y#en)e# zF^Gf@>@CdtsUaT*VD}(j4DH5>-hH+=rq=TtSMoCyjTeEr(MWcBCZx=sqCHcnd{qon zVguC`XZJ$t9?bv*d>Bv7c+w*Q^hsr&)H1ylM&SsZI2s()bh{M~lFAj7c3AvO zDJ>RX3@`fDY+sS?@a0svN=BorRM)$aDOmbb#mGw5caK(^6>j>@YPAAZ?=lpnU0sLV zv=_CY+XuMrUozUWhL0MWgVSsdAFw&_1U+dgD#&8w0}aN@JxnhFIVx6(sMw9Cp?l| z#e?Zlafj=hBI9=dVwU%#>bgf!D~zv+Pjjw<4-LlH$*8B8C^=ovL{c}Q;u>E^Ao4U) zXbQ4emYb*5;mDOckfLhk#;fo(`}U2>3N$n^#zohU(KEV!44FAB<~Jd6SX`cm&(?Y~ zUX+IlGmXD3S2K1SG~$tPa)zYiYgZ~?TbrU}z0gOw;a9zt(WS}CNE|L04#nfaj=^0{ z%Q!}-fu|36dV?n!JcGLMAU(>9R4v2x5`G<+%V)|*alecLogj=aC^;#&bHXm8KwoSW z==(Z}_(G`9ol%`Ty?)V8uW@?9A*EBq|I+e4wZ!%O)CEpdGWGG*?-EOw2IC}FId{IY;P;fI*nK^Eh znwxmxGeB>ivFP!e3JMq>FOU4eAj1s}P4l7C&u`3U2+7nZ{V&sn17Er-%6i_Ao489Q z97HdQkl}%gy8FDO{xx$Kk5F=8%~5bjtAyQYh<8PAW-o23kecs)cACpvW$k!%)m$D1 z21+4*C29c+`8cLZp~YDQHP{pfDiq4%hEBfn@fwqdGL$mQGF>Q7>B2+!)+JrOZ@XA! z@e|v{dW(Y}>q=67jwe&^<#R`Go^V^v#YAtu%|dTR&HqcrZqCZ=uyt{vor`f<<1;&B zH`jlyXYA$%bXdf0zW+8^3#do^dF)(dBc3P3RNtz7iqhTA1^dL8!T0C{$;T!{zV5J_ z!=1ttn7!R#H&?T}PPlr)Zhi}VrHS!&gWa47*)@Rg-%UQw`bE6mU^k~xxCvKJ*v+}% z>q`OQ68UAk-C#HS!8gX_V>1@_qNG*6~Ytg%j$~Ve7sYq*AsSg=8cL{mVdRy zyLlq`5-%BVSM27tn-aN@U&^@~>}Kc9DB~{icEoO8jV?Y7eCJF)ZlcL}Z|i_%oD26g zrZuM2kbMDSH~ z6gop(T4$FHsM%*FG5;jxZAlyI9GcY}dP={7y?2DUKx zo)^|?g)IaK z#X5a=?0syf-JoM_?EPrO7yA&wBUHULmOK?HqfU)_tL ztf7bXw)sY%I2VH8TZp(fxx(abonM`7zk(c83&&XUbpF<~g;68kesWoH-*%}sG1t9fnZ0sM<*{Hjc^w15gzYERY%N#AfFM20Hdl3wn)x`I&Nx? zZWbO)U{oC*tI=x~ypzKB!@`}piK;#s?oqr24hGa9+(*G(y*pMVoQ~58S=;%BsD(?G z%RNw$+o+l`rHHAtE z&s;ouE2SHqGRS{yH6P!l3@WiDw?PO^3R4Pgio<^}EJSC@2|Pj``ycVYEHW`&ND-Cx z8DXUwp}d?PX66eVerd5O$T&7KkfYHlkN9IjQH`n1?C$YL90@v_Xzs6};p#-eKW z8|U6|BMd!jQ6=S|N9MFgBWlwQ-DI&mn9)q6bl_C> z_*coCgC7-6Z8{O%pXF~x`NBiwoHgJj<{SYWCx^jXJJ~**4mRwxGpxE4QCGs`6k)0J zV-c1>`&1|z#9C#BQRa~uu@m|qV6s!z3n2%y8M2X0Mm;c_3j}*|HAKxkz*#gxeNfXT zRvROzk7_Dov2K%1a>^)wI^j|lfkriwIaDRI+sy@rvEpm*vblE~9k2IZ<+1fwK-yF2Fs;_7|$xNP`igW^o84`acJqQ_~ zD!hZLz)tr=QM}A&jh$<@1Qt47*?`QP@3#saISa zrJz0Ei=HqtKUT%YjzXLf+pV8Kq3>SQpzlLPHx^ZQdJab;sZF~WAyl4N=g9N;Hzu^d8!(SF(e;Z%uT6hW|hsL)s4U z;PrwWe`F1UM@NcIYcIl>{*nkC?72{oQvI9L_>6AJ2k%yXrT6>%r{gx z$qf$7ti{rVR~u$-3`5yRxNDh~gY9MS8C#11usgb%L;uHt*11L!)?z@KwHN@U%WCOm zEe61T>2c{|Ee0SQE(T!lnX3)ldoG{7S`FQqRFS7(&wVoVVyd7#lIl{6&cM)W0Nc(+ zo!F3uT%l)J)Cz5qXQ3=+{6E~FBNfD@ZuInPBVl;Jzcv_z%A2Zr`kgaw72DR1Szl6S zwTx2kKjc=vRX0+(v+^3~0F6+xU$|Pi`Bz_7t}FeLGUD;!%8-YLDgA)G=gjOiJ!zg} zDy6e0jdyB^3XSD(V97T^LmYshftnf>KA(Y~T9gB2B<{#~wd$hQ_9uon}bzVIR z7M2C>$emyzbWpL%3b%Hrc>$!1^RhZBnF$2X{0`n29gRJXB;j{7CyMIpZandg<8Ye9 zi6TZq1JdMgawdPP<}^2DndJt(QVc}YHlRS31H%(To+Zn_g-Led9#T_GIL{Lda}@QU z(@fRy$ldj2dQCkP!lx;i`%Z&a_7s^eV=UEM|$u7Ke@Xv1)NwX6KJN=$`g0zP?H zx_sL4LlBYTMQijrCJ5yf)FddcOoh5O)mNtDF6t7xpCCB#Xx61}kU7zAh|TFMYH09f zR6b8>(RakP&xxry7&Tx-10s_EIYtFI&{`lL zG<;ZI#!D0Nw%LFzCz=-aYZ0?30B4lWl}nh|g>K}HqiTS+NyGwSUDu;Mkv@ica_L$% zU<&||uxtAvSiTiWrVZYuVm_gA3`Y07;^$-EydyL5XoAYaw1%M{R_|7%k!m zYh|aafluTa)uL*9Zv1|>r?$O+A$?y)(?Z~++CWo*vN*qoQKIpvt-GB&5g=6nNv!&LnD z#V*o)_&tsPzXIJGU-!g|oi&{}9Qxm4XLiQQ?2M&(A?(b~Sec!%G=FJzvF2}QD8~z7 zNp^aD=Px^5mk?X>1;}0OEpjv=_7=qmihGM(?swF-H)7o2JrB0zUZ5yk>w0QFsu{5*+XEQ4;KWa(S2M%_V>L!BnD~esbK)a% zg9(((ahjE5j9_GrF@ljf1^~o#$!z5q;RL0NnPZG{@Z3RpSB&#N*C-9jRG>@xAcN3% z+lm^2E?IxNo)GNXv*|&23xLUq0GQm)b5X@LkaSiI1E~QG@D%%;-nCXmpN)5(hBLRc zD#b=teBAC0eGk*F7t>e4AFqqSImaD1Bu4B)I}G@Z=K!95VGPxig|wFxPNMjMRe4pK z-a4lm9^*49&?SJ1;4e02u*`?e)28uKjz$~y@D{h>a3w}i?yP3`4#F^g8?cQ9KqH=2 zjOGbS$q^vNyM;---E%x&tJACM>o7*I$C1uyN{qLodTRiOS_1gpRgV81ayb7h#1Yub z;ruZ;ZpVqwqZrP6QV$b_gdLS9X&k*gp031mi z1$w4aof736uUJ{ltSXiIi8)n92=NGv$$v)@XRpYfw0L1D&r>p09Y7|(!qmN*HhaYx zf=tqoG)RI>5qt`fBt_LxvE5jQr%zU_5syu8VR;)m0a0}c&?n)w z>oCcCdI{(0`tZPLb}jIL_P`R<^Lf#N8Lk}&DI<0O+t&zGaAxXR7AHIeU|>I_N*~?? ztU3lOQ6s6Th^7NXIph0@lOVI~J|t%38RYItR17uv*MLYF3QDXUV4CMD8H}f_TBE=8 z24gcr2~Hdd&L)s?PJ&6Ur(BG^pyKl}RyQMCfJaURc%&!>cDk|{ivJNI+Z9HJ*0DdD zZB~o*gpkzF6ef#F|KjK-ocTE{Ixw@xDiK(fMH@sVa=1zgQD8_qLCm@K?|OAA;3N@! z;2{xp;9f*45166q6^plIuehEXPMvzirV@LyXo_wDgC7EvvU zUdBccEaq!?y8;DxKenis6polKih59>7BwSqr-PMb6Ulo{=z{{_p~IUj`t5oKwsBCf zS=e2&OLr6G?S0Mal-U{M$SSl7j6&2&Ms+1`eG#98MC8+*C$M`Hs73V|AY;#<>H^#y zV0%Wa@*wVK($_!;`ly(kt6SV)c~<%?Da5fkrVO3$sQKGdkUl^2WU5vmb3^Bey0;w8 z;9Gd-2iQIYprc4k7=a5%C*r!7yVSN31Rt zX5tv;yhMKJWnizCVPP}&8L2HmQR-PtHJc?l%)E}Gr1jwl>?IL-M$`6Aa=%C94E$>Z z!(x0WD37UXqyY_e@vbIhU;$3X0f-U(KwVW{>LE{R*w~FKg+01S!dLTW+*S3p}cQ{E*=Ev;$fT< zR5kLvjE@B&7Y_n*@f1LLMr!pQt=UnlrtWaPvd2PJ+@VTjMXn({dq}}LfvW8gaUI2$ z-}Eb=t)m;G^uA_|X2p7g;N_|Y))hv$At!t{=T?ZXn^<$G=6m???Pgz&libkfX+2hD z^oD3WE9IcU1VYVI@*FxkwuN_zo=8QNo^7@6z_k_$P`8=r;?X+U_t+n$tFiwFH#o)J zQn)8ru)~WEB?xyT331|d)MM8ukF^X}9y&Bk`F`D1$~~1=D!#QtmE0GGC=*~_^$53p$)m z$4qZkdXQe67Os|r*%?0$z(;nZ@Om*lEQyz;{t~B!4R8^Fk6XPsnoPjQa=ERrpC7l- zz3G0=3_a<7@NrtWNo*@T3D3~CfcJ=Pg>rnh{RNG8hHe~{htYTOPcd^5=Qm3lz%_eC z4n}w-XWLw58CvM0Az;4#YLn&k- zrza}sC%+c+lkt=<6%p&j{A5!&Ppjqxh5dJ@844y8iRg}E_dx(SYLSRx?mq03yAPP0 z2p>*PDqXKuiQR|Ie$9}(57-}Bvi$qleb~&rmpD1bUfJ7=DG7z{dga@gkTe9BaxOx2 z6fqZ}wFPsNEF4G{Lq<$T{2BYbT5msA_X%~rkwT;y8uK8n*o&}gD9_Yl7s9Hin9Q#s z`{S6*%X*64(6Dr|>M0fl8sLK6P}ctrlX-9G5sBsaZi6>ti?`sY59UoWn#_%hKHilB zy$-{x4fGa$WCA%7O+6O_LRl|>18MU*t}*U_2<)NeO$72@c@@F{Kx8798(ii-7S~0H z;n;#9?=Z*+Lt^a^#D#Hne13(xB&P9lKZF(jI5)$LG$g~?55q(|7w_vifk%^Yn|TGR zQ?ovZn#qen438a*Mg}!~fXcO%Rsr@=Ao0N`!aOCy{DlZpVmUqq9@)@>2-;PA9m+1= zzF6_Cwr7-xX=Ej4! zEO}4RhvL%_%+Cnv_*@5}n9(1PCrG9Sf)R7fQy6hTaO2RPCrzhh&Mb!w1i1VNLm(d{ zk{aMMsYhy2+VY#1vXQw597asXu);OQK%C`CwX}6T}X1*n}Bxoq@Dnv z<0Hs&e8UO5OcSx$gs#q=LeZLyz|(c-DVrNhZqJ3bp4?bRaNAo?ZfreebL*)LJ53X@ z^~4#D=-SXR*;`QAs#bDFpf zeb8W}+hCg-ZBt`yYP?Nx6J5SRTON}7fld95O)apgVw?K4P5rw~{kKiMVpB03)X5WR zZUn^>rB1~Fb&a4-fx1IbXFv@TlrzmjZ5s&6E8KAV#8_=_;$4}yw`}S?n>uDwr)}!j zkg1fjuyF!+r}96@5t7K%+j%2Go8*Isg2ZA60$UU&#g;i%xX~7$OaX|$5Dd0I*=eo^ zH5C8I8RR0Vuh=q(BW*6V-JZ6opWD=PHbnq#DUC;L`TZPAp3-fDP5l8aPr5DbV^JU3 zR0hN=eZOZ@zqhG(Y-(YuRYvz?<|W@aXy*gj5@*Xl33}Qdyeq@&Yq!BT&gw|p?RuNK z&87@cvgTw%20XUtR35Uar)`QVr815qHuZHBgmmj|%W4*=YeguyeWti)9nZOc9Ne>S zqbztK1L9v%W|%HiRqO6FI=O#h@?3o$o?n|hlwVKbyk_!z={!6;O&)gjJ>i~Rg?hv* zj(c{G>B6604(c(OQ+qt4ii_)@&Eq!R0wxnrwe^I17PWb-w+7<0Coj>*IJkzu^Pcju z;`&#c$9gN1k>DDP|DJHq=G#2hTQ8a}SBkgz{bRo;FDtIx0rI&g+%sy}u~g#MXJb)rI~(Dn>_q>^c2pUCJ*a%PwAOq8l>Pjp{H<~O`hR`=LX!nUJf=0TB62wx_r-cfrIHXuZznQU0icYJHVQ@0AU+o`m2a8b zK99lf3xr_V#{T3X^o%3*Np0;*8x~(M+_O)gC`;7cm?6J(lCr=wE)o#R|1WXRK7G}& z+Hp5kNiA{e?GEc;L006AO)+_S7i+%sBcx{P`@%N|X2 zQ&%q8=e1kptV@;}d%Ss(KK5T|)U!|BBX(PMf4LvNFBa`dX_cre`Py#!Y$)X{y6GcBNj8=TtLJQP@)xH z;e<6&qMp5IF81@stxTPsxRx&Xy1C^lQ^PmST(BU~%UsS)bW(!k6SXw%{4mHT%ym@D zwL9CPT!%dXzMqr8#r+e(*BSK;OY3c2!s{OO?Ed)1h6oe+fX&G9g0^nV$13n`HRFYd zdPF^Y7JPM`_&TAe{0grNJ+)lipMV}vcjwwY(6J!knYPVq;+=UlAY6HAFQ7l%i`s#A z_D6(-L;AkZkQ9GGS{@;`)?lgFyP`D+AJ>!D3Pu=~`8T+Vvj^UlT!h9sr6oZC3jy+s zj6D64=GQg@m5bw;WC|p`Aw4CfC*&Zl>F!w?i zN+hvI{4Ea;P9=tz7|!|LFOS=r(Y9)v)zf<@z+#{az;a|IHGNY;h3#fW{h`rZa&Nkg z`ueml0XU)l#mIOaIVHSID7?L^Fnt(jQ#uifPa*; zWBF_Tg4R0#!?nzlvuoMSv-3*YUBz#LGYEKw;&KoaEp_*2YhJ)`g#&l`3Mvss62tHp z)ZpDA-@$9Of%#OY11E5}DpU!N<VN4b zI@iPPMt0nNBO7p4pVM9EspS^-jWn>7#&viXhp_+x_(r7>;w-_frS_DJ0tMoyt zTis@-hX7qEMUh;lmhe=iDM%g;dR=7@aBlW$ZF9c1>GW;KQLO>|Cj6JSbuI@X# zXyScn0=r!$AHi9aJ2L2_VT;nfGJ_k1O?3k{LBx3te-0Z-@jx^Mb^Qv26uRSjU&f}1 z16)r|L^aP1o*V2co&pN!WWLZeqQM}nEVdu*n}|ccU($?j$`^Xn0sZBixc`_m^z zBg+bJy*aBnE9TRn^S`X{{+r$2B0V?Sxb*6+`qIJ*5-?=<0E|SOE&PZ?SlN6r=m|{8iJVD;Bo-2e=`OQmEqDLbzgSnzeuX_3QUE-LHgbyQQ?U*j>)sJg7s zGr^Hnn-%kh=BgC$6B9;7%a&eUtsgxy)J+zM7p5a0P->-UU&pzYyg;SCbXQqaUAjv^ zwN(?ZZFgviJGEZO^XlxPGmvL4(wkMQ?>iQQ6dX8iIKd6Z*Q-(P>5<0Xc{pqqd2Da> zsXWM71!iM3GV7thP!u~<{-D>_TLo?-WEx#D>!JI{^4rHSpt(#3-Z}~C)L`z#2 zMDy`x0GeyC(H9DJ-i?uSG5olmi@1LJIvgSgQ_zHxQ2&!cPfmIiO4F+5F#bSRBq&X! zYNhi@3nbF(C^ue!r*OBvH|l!4hn5X~KsHrEX8ou-T(7Rb9`10eQCBoHwBw*06^c=E zkH=J?F-DAjq{F^QW3swp9~saDs-mNo6+SZ%!zU;zFZFMB^IpC>8d=y@sh@4zNblm_ z^oE5L0B0scirKpz;L5HBVCAyH4Ohx{&ni{#NB5sq!tW#0T3F_dHqKJ^M#~l|m8eNd z83S4+16n%?>15)LN6Q{lYCIto%u63{hK6Qz|1AHWc`T`|LDv7Bt5K4}n5K#~h;4a) zCIe1Gvb~Otba3{$BjmZ(?RSLp9Tl!pFN)3+nldsksbBpQHfVR(d!!}sF}=DHR8;3G(;OuoUYZ+ z?%IYd8pGJNzv{eCR7zB_B=rAD@)IgC^b8nKYR9mksGNv-<^ZQ{En2!F2RL7qi8QWf z3gc%C1&ZFc)q73**oVM>!Lc6(gywlK9#B}rS9n0-B}tfuGCacAbJo;9KXoPN>!4gNKg`9JF!`(2{5H!BX)#^MrnC!k%64 zda55BqBGZ>-kv{dldI$_{Mr?@To1)&yy*+AWGzNd?^|ag@#X5R(d64H5W|fx0`!_I z94M1V9Iw$$eI4wIgw_*ydVdCwBE~2cdGKfsCj|yoICFqa=+*XWC;eKJM>_}y2fUnt zw9t~I3g>Y6Y9|4Er=7z?5`0nu127Wt{;I<15PqTiVErow&tC9M3k<}t$nq>IbA;}5 zN?#{@-SkcF;F~Ocli{07-`*X3drRNm@D0+pPY2&V(iir2!U{TYGNg21NRbRFWI*1@ zklKMERWhWK0m33f-wq6YB|~2_V7yI+ejOP4Nrrx8z@&u?{W~!9mkj;M;Fb&nIxq~7 z3!EP=zuB({!?o}@PZ8Yj`)iq2ABRyck-@`~TFLHy!- zzj$utTH2X^&5eY^ee@do1MG4w92IpPZH2;lL|8f>mmh`6OYg%m#knnO`6 zGPEN!m>^xcxEWnPY-vX)FMXU)n8jeC=&Lenih#uH*_;(dJG~?1Ov{P5R#8tChOJOq zQ!0v~qUu(F80cQufbeTrwg??Yz8+ijHg6UB(M1Cqkd~zW(cgh-S9Ovg!cp~s7&_o@#|%CO6`GzwHW;-7C;yZl9JJBg;mG(M|EkD-INbu?pcj;$;SWfp^;Tx{QM;{~RuKy4~ zTNyvZ5e&taEiWGIhCpFDA-CwvC|Akjs0n6Yy;12!F>p|XV?pM;xZ7DQ9IX!nP6Vw% zQNqRgFu=S=Sh~m$15O0-Yh#@VDv!5E?Kny~G|w(&n7A7$&#<5-T$yv?i9MngB-ypV zs7K>Vzz|e5=Pz*3%CRECuFm{`l>8ZANY|^8WWr%Ky`6A0!g1@69&B`VgYfsj)ye}c z1C)CY^;hny>!(bv?5pI!##bh6e0>!*zDB{u*Oy`AYbb1dxf~;ul*~TUd(TUr>I^6G zYp9$sHDIY6`ysl38|i3Xmy!Q0NqljCC*^id+hydxi;eu(u(Nsa&iy-gHtgLw*ooi! zI*Yv1b9-DNyOWQ6#CMLLDE0*$*(mrZuwz$}@sClXLVah9R|H}lnkAux1U4S^j;T$| z;Q6&4gnusEQazRWz0}{ORydbvhvA<(w$z5Djx052xs-Vg{&`)hxcODS*k`r4Vn}&7 zm7Ta#0#1b^AxTx()Gr`t>GoBKTT&0&)NzPkx}AgYWq5@UKPBi?mV=@UoytFelD(QE)DbB%fatj z$8C$m9U9RBrC`k1LDHc3|726yjIn$pIlB$8=fX zaTY;+*-R&Uh@P+#uQPf0VV6?PF5`w2U+Gd{yI5sWWV={z{j7t_KjKNIJN;r~ zCEof_;e)a=J_sGTaXx?6_-T1_3p}$5d<$myXDymFCo6N@gbSl3W@XJOEc}~=^A@Pz zxo@2E;P?DlnX?uY%$PUpA^*4U<#UdFluua`5*xr@G-l$h6Wj^cg)kGdvM1!ojZf}- zvbRBBm)SFBB5^#4nb>9y7FpBWpToW}v8_dLE);QUWTVmzi(qZG1@m>wSKw`YzU?zs~r5OXhpPPI_erZ#u-+asXEHilV zla2c!d?ykw5%2iSafxn>*Ntz-kc%A8ra%Y!#$KRSLm`nti2EK<)zIc9b%Sq(RMR>V4 zTA|Bd8EZPK(6p6Rox|fk{&kaqqFl)LHuzGQ09*;-<#gsd0=_Fc@rgCzq9bG0GoW>}eI*xbRk@ODtpFdw9aKwKA;=UMM#Re7BqnTO zSyzY@4enxnx`GS3UdNwVp;WQ@rV4xh(wLxFe`_vFTIi~9z`SDFU7`NN^0nPsOLLiX zVVb|7vci!BXG8^`l;*M&)3H~gW18vcOmuXcj>(CRxu#?9M8}}%*eAiU!eKh5BswOU zj;V=`RyzA8I$G)Mm*{Avvwz&tUr?v*x+`?!uods2J5ZV{+zXdZsc@zY;GVr3GNh0- z18fxzmv$oGQ4Sb{)|3T{nj=nENj4Y&nJ_@D=oM^^z$Vy4zEOr9-Ei89GfI$)r$2+- zf%<^u(BgSm7s9rVtO9viRoKT6E9J&d;9;(Rmb5SY`w9o4E(m#nC=qo=1o0x)A3dEP zx+_Ji^l4xA742*BY4`VP$b2mloDI7Wc- z{n;|_mzNcbs;zVPVwrndOsGg<6GBBPXS&3eP9PR_z&T~))#r#ou7L6 zo1q)OWm5 zeSkhgYPSo*Pr8@m*TCJk=Y!(@wY+#s~u9ABr8lGJW*1JwAYKfKQmZ zdTwDhJRGy3gR!j9T;rv(d?5p*rWDGzXLl*4Dxmp&*2-^EFso zL?A8&Mi&{D@E0+SMNGOkawyXmIeH^@u)`)tl(CIfxLcq^5XWRUf@A2DGabP6z_M{J z_JVQ)1yrK&r?~ku1UPH6>Z^Fe_B&Kkcnd?MRS;dx>mtZ>_~c`o3mfz9an8qsG@?)6aVteh$_SP-n zGV3E+L0~Jli*ms)za~Z#w$!CaY%^YcnqG-cUUTv=&1L3mySN<{#tV1A7$@3IisFkL zOUajJKckb=P97s4JU0l>eC@0+@?J`Q>gjxKCvAHABJZb>?R}F?hJp8ek%MU+*oO7Q zHmn01HmB(B;^O>PS z2yZx&oGEGU+#q|IgJ{O6irMS}4xovn(e}xQkelAepD=&p zbr}=oGA7DpOcXg*yo`w=F;Rl(8{AmJJ&4~i`28#V{{a8XF`lumUs~g^9)3Gxj&y#u z!f}{al6hHJCrm~$H<{}%Rygk{VF|qNOy*J00T~5oyO7G z5EDkwFH7XZ7U*oP!3P{eW&j3D0=B{>0o&h_cs9L363?5+htT~}7!EN@hjx zoaR{x{ZYc1+=T+S;Y=dFg8onY9X0LyfMs&KA14ylASh_YguNN%(7>^5fR=<84)bQb z<1IQn#r4$Rp_&lWk^ z9EN#Un&klJ;s&RpY*dg_6z8|`wpFx;0Ep+;_8o$UV!Vo>uK-KQfN(?kZJ5^#$b|=1 z3dEvO(Y~;^;DkRkG1IF_0Ct~tDsF65m{-NPV_I{bR*%^ga1I52;bRz_i0Kj!UNG_A zm6LGCs?d)doGFPZ6R-$mMiA-P>G0K|WOP@^f0diV8JKjnnm=Qy^J_BuLiH$N3N>@- zw2uTsLaN|)O1Lp;9Le9G?l$g+D1hs^jtSuqXdh!apg$IfLPtGHCF(lN3@Uw*{TK}H z_U8b$@EB?VCX9Fas6BrJR?MUO2V^%m)U}*$K`RyGCs$NVwHj2RKIJthTtJXs=#PqM zDj5yb-`mRd4+A~`){g}?wEj+8qfZM%6q~(86=DJdm0@BW3S>H{`8t8?QYCO0`+yPJ zz{j@vB8M;qYmig0$Fhg?ku$I`W?-B|DrJ}*CHXVTR4t4d7zcMbljRI73|EAL05AiK zh%h(<6NZa90}C@ECy;_7aspsKU%E-ur5Li7a8@glyLEYb@B9R_hy{6FkNUq8HXr} z3G0dmB9v?Ki+Nfg**v`@J1Px!n9*~vf;yb(Y%azxFdP=BE3e~afx6-ld>jBKwaqxWeab*!?L2{jTm_&6&g2kFzAjjfnDnRqPQQ7u=dtk{PdrL1Tr9?Qai z;%OM4=1jNRGnT**wMep&jD&FwOjtpli?@tC1m=1*W%i0z;=Vj0(%VYh7wR4&sHCPp z3Ro9aKEjN6V#o+sg1i{NgQoFn^|b+T(DlIlMs)*Cw_*zS$bK#pYzEfLRcw7#`m*J7 z0A!)g3haZ{Twphzfx4nU5b!nX?*wKf=_6n#7$GF-qho*=9no|O4GZ(=!uh>Wk}szR z87z;CpcQ_=<^i+8`GKR+NPggWR2?7N0{8OQb3 z8EpE~s`RRvc$gW6S!(>wLF3QQKJGAP%aj0&Mx^9B@s#`;sTV1Myv&rWL|(ZKAV79lyNnkR6bPr`1EIc#uMsOv2L3(ixh!K;pJl*o;Amv# zfrZl!=JsXyR64wiIzX$-zNqn#-peJzJhX&=J}~JoN74Fk^;eJ?QJ*t_N}7n%8B_n2|xs;WB2-Wy~1vcCA5O z5W8J$Z6M6g5~t0e~-uoWxYa*TcfH)F?B=q_SEfwKLazxC<3@TUwc=C(P8uK08lCloj3Hb zcUFX{+YKG2ZjBwMZr+?Nfte_}yWO}A*SFL+a?>o*&AjdGuBbQC;PwH?9n&{|YKyCc zx&@rN>92F<7Qr;OEEhwW?}brMaNc{Hn6@E?g})KFHm~GCzk$dPoRbYf*4+ELrYrttXy++bdX1BTpZ^A^2}rKs~`+^DW6zt1TZFm8UqxrmrQ zBR%;WL{cN3;>?iMyVZivb$o1u@5y@liXGrq!6=DYgXrXw8i9Tni3>4*zY zg>M>v=FC!lvw}I83;hp@!c5RtnRB~UvxDHEvO@=9)MFUP$rm9tCB#i%))+DQLV>$V z`r#2Xj8yzsI!F@hhWL^a>!uDA>j|n0>kD5i-VHZn8*V5sxIwt|wQ5Zt<5vmciR1<* z$CGL$F)mGZHL^>SLcA8qmcb%N+#5B%E|VSK?T5QOC_iPwjV_(-|i*K=Z3QcaYT5Rzx)=q2~Tb~o@ z|FbnVMi-gQG4y-U$*WJYSDkhM?>v7kWrD8L7de3O_6N`|z<3)Iw${1P{L}^xvT?9+ zEp}MNqF%IdtT~ECX&#Es9IFgn1L4Ziwir;0qQ$7Xf}?6((SL7_s`Ir&99VDV_d*P; zRp?)FaQ&_vT)&BVGxk?62ByzgDq7;Y*yluKs6F3@z{`h!r!oq62o}_D(e&~JMI0TT zrf@1Rfx1IbuY%%<8mAHg^>sm2f*K~MT2Nj=;r5B~abkE^re!}UD(X6wnY}Pt5R|aV zAt(Z=Nw?jgBwt?)3MBPtvPI!Apy`I!#7UO_JE%AuR?7UI0R?3Q$m)A21xdwml8Cl@ zt?)erO1=@tuBk$wC*5ws%uZ4hZ0a0(O&J~n5GQv&2|5k8K4i$;dXi}w07|+YLJ59Z zq;CjHP*PvBsl|4w_O;8h1p6EE-M26UmsF!oF{M1cBvUFWHnQ7=8*a*-_($}(?y@Pr zP2~%!>xm)DnlVpl#7_+Q&Gze1aF#>{iuXHS51Ty5sCgPFV7iQ;i#Tf&e-(u!uD>;T zL=Jl3dC}yd)OreMtI5N&T|My_CJzJ)9!u{}BtxvflDeL%`?z{Dr-GVgy6~r$gPJ0y z1D>s1d}Q-jZ>{8478e&1dP1Fv**w-;qx*@MWE*yH6~c6YvlZ8CHjnkzU5o_RVEp%l zI@j7be z0*wbbMN><-EUDf+P&%@T^n`RW-{fJj^>Sna@rRS8%!_#{`q0T(bgy7;gx2JGMv9jahiP)ab&E077u>X#^|-ole@1-xeng$tM}w#=QL$K>h- zLa=O;e)14{hETA~3g32Xp1-5rVR;IW_zL{*=``SHj5~8Fl4Xgz8~e1EQd$<4$3-$n z|9=8|rpM}GntlG9YprE$`cfq8Jhk>xCd;a|J$IiMus8iNy3Y%QU`ggY-RA{D=(+o( zXwfM=a^4dE{JYOfnJi1(-E^OqGFgeXv@Z<~%{Xi3wqRC3oUy+QL=*T}Kr}rzF7tEz z8T9fUhe8V;dF&ydXz1r}?4MnF>|y9d`ucyaU^G3JpZQ_>yvomv?);rKm_CE_*yZ<0 zzk{DGAkBG8=(G8Jb=$%&yr=Et&Dy05Ue!xAR1Kno?D>Vdwmw44=ZC|uw*3; zsyn06d@%uPZqB+bJHbqo`$Be_Zp*ywR>>d7r{TJwn@f$@=}NSGMwjVWGA2F^F<^Lc z;r8>e)8s}xZ+BXN?#@mVS8UtBS8T3ya;HV`b;73!Vv&A`xq1y2apfxWy4Z8dgwHc( zUQITH2fH@i;L}9G7dGRCh^FG}s=XnxhXKfwyJy3>uuaJQ?-EC_9Wj19-;xTZENxmR@GUqwV(KR1~uY^CZI(&x;vwgT7iZe|TuEleaw6 zuWi#dpWe%K?Lg52rz9^_01M}1`Hnv{iL2vHw^Aob;KK}Y6(gyY@GGhyJPdX3b4%y7 zt1G`2`fif8SJkedW*^QF;e~CX2bu5wDPC=>C#Ug=4gLZH5HE8cQ`DU80w3n6tC+S5 z1U}5~K)7DLHy_y`K+M#0I3e>jZ^8D^gGmeo7O!jI{cQ!p3e1d1bQo=~#D)PhjF6j% zeqjl9^zurYi3c;aJ$MFqF#pY1U>v&m-#`!#=9o~=@hD1Zn!0ivqI76`L*H>2ejJ%XfRiSXEk6@_!cv7+ye)7BP4}fOPHcX)3b1R{v!auj6-Q#_%FH? zwo7{uFL<>Itu?p!=)#FGxl{Ti(qnkxY?W70SuSB;7)R;UcC{$UVR+zem%p~zuWc3h zFQP7*P%A>*8G8Z$;)T-CCRa%S(F1YB3!t9TD@9h2P`0HeP3nsIa(20j325;83b+FF z1sZ1reK~{dNYEDtS~kI80Qv&uI)J`7SdFzR?7De#hNQbnencv?(BXRJ#+%J_0CwPE zm=6+&FHgbSM10}P%>`^0&yw-OWgS{!+c z>1P2|@?qN+D4XlhcRHn?-BmyH)T}Y1-)tdX$yVSOg%-PMOQgw@dUWQg$Ht6~w%mz> zRry+5(OKY^OoBLO{^9JH!s=+`CqLf<4OGwO&?BReyo{MOGfynPet9XSnipE?7HpN! zJ4{0c%BK*a1l*N^$vH;6QR&gng2CktId6s;cUS3m@Uk~&hig?9WC5I+Sm-W?H)oTp zcmkeXzm2$Fsq#9ungmJH0f^2?Q9@MX+Kd93D(a1!_qNr(V2f(IHrwf=VCMe2V6hyS`&(x-ccrv66)zD*ZQ4w~*v&p9& zgBoW&9B!Do8!{Uv7@a8$V2gL<#B?E0qJdliXw<_Jns)d@@8QijVSE<~haDhW+Trm3vG+ajQB_y|Ga+C? zu`l4C(k`u|jW$>-nM?>77fXf)eGg}B>Yr*`Ef9o)MZ-d(*hLK{QKw;SRO;$3+ofH* zKij&WwXI8OrFN3=C*hw7_$Lt15KuD&On?dj5q{tAx$n)q`4jwy#BO~b$lQ1Dx%ZrV z?z!il`{$h-?B$Iukl(cWj*OXuFTYi;Y&l5KT5gUyYSZu2YtPQM-`UcI)M$S-+fMsK zH$Ei6^(>okq0un7=*W6bYQGb*-7K}*r5%zb?3`4a|FhDa$8+w@WfL7Q4*gRWC>m4RQu=d5UXtq%?)L@Xl9%j4 z12?ZMy64`r$6Xm(c%jSh?33Z{A#fcxhn?pe{gU?J|9~w-*<8EyKS*lr;-3h`ovnx4 z_00A9;myUth~C<*Z|T;Dx9}>Kc761#kuRgML{t5_5(TKy6l3f*ISRX351ySJc?jnZ zakmlgTlpUZ@%iYV4}Bx#ALsNt-(t6^X=cOl#uWyjD1%2LoAIR$-e~yk;dXz`7OiFolJPO)#kk7?@pA8k9(tI~SGw~ay=-t1Vs*I6 z2Mv%RePM@u7z%OG7l!4Wti-mhKn4ZS0{<;$ut^SFmzDyyX$7ky!SD(ULJGETMUfTk zT|t9W3hROj(#x7wM|w7k<%O0kfG$}+9p5SC zVOT+i-6&P>-X%qBw~#)1y?)`Y<0wi;Q4fQtuG0Nq^8xq4>jf*ReXDeut#B&=I7otC zwx{%bV0z8OMAA?_l+jgs{`LCtmxS2OrPD7gx^T;J0xfJ?!82jPf_96J{ns1FKo;x} zn64E(C?hcY1bw$f5liS=K`T#ci%g(IGF{ni7SGc_X7T5rLnfFUHjz^#NapU+Y3Gp4 z{eV#{0y$qSx^Tzw(noLBFWjc6?O1`6u&TJ-CTTo-Q+iM}3mbBu@Zoh7O@Jp@9{8;+Omzem4!@(Iy}t7VX4=fX$HgV3EY)L|~*`d*{$_KRTp!!*u(i^7y9 z>-LLw=tzuJVRijQyUjIIoii)bk#)ha+|NuMIybW%v2 zc6mH4_`MEH<6P2uKE1cX;>0KS_wu4d+CIIvVxaG>V2jY&pfy1&f&(o3Hitq-Xc0e{nXGI;IF`|r8d?;hta z94m|cu{#-BM!)x+3=dvJ2~z#_bgcEW7k`jD89Gh+*qKh}oeYkzkSj{hiSPWpT(^o* zf2Tr)^G=2@nQ$E2rl~Gk|;6gky&myeaNv zSZ1Dvw@?$WT~9H-1OJ}_-1BC69Gi>!xs%~R3{;bI0Dwo@BgLHz9pDM~RA!lQ9Ou>D z2=R9^Fu9z}u|4XJg5*0HmICg0BHk2tGCY+~p2C}aC&R4sY)BDQc$4pBm z^>xSJ$uJknej(s4GvP7-tMJC($>2i#-V5=^_9(o`cQOov%>OtEZ}OcCBh9{H6)AgH zDeh#@0r!+y9_6K|pF0_D1l;eE;garTm<70%$#6+`GAsbxngqDheJ8_nR5obVrvSLlyZj9Q=r-)^;s*HT*I@5+ zX!)g;N6#!DU3v8K^66E3WX}@&iQQYReMTL3bR0Rc@F(33YClkxrr}++{13k1@b2H$ zVc%B|P2@iA*+b?2lww>2vgcOcgJ;E%YKsF{4v*Y{Kgjt3?qsOitE&qcutTf|U(stb z_1Y2sU}Mzf>mjI4Jye`{yF$~UdU%(i$Aw@Ue9IWTXC&;O-gUU`>d@Jv_32IYV4+ph zp{jVXs2+ll-TeXhBev7=Zduibt=2iHHc^MmJyRdJDI}~=i{F$ABP&V#RSf$nc z1~n`W<&VH7SBnwcqSx;^kGn6Mu-HBNSPNq7SM7Y~5%^~4o`ALH(YTWLokzmB+JIY% z1mq6suco(Rd#43{GhN8=I=21p%`ir{1or^8gVz`kdc)`j0p;H5y*hR&ufOA1LH7Pks-8tUTD*+WbB-{#u3Uj8D_43`5tQ}`Q3U7AssFzO+f7r^hzMjaOztloEQ zmCj2HG;o2%!v{w@?t;PcJa(>j3c?P#z2I@IJ>s8rdqKDD_JT|O!87O;^K8BLbGT2y z^1iD(&wvjTb(?{lVbJYI92)C*C&7DPNN~q5QL4D>;2i>It;Wq6MQA4T4uRv?umM#N zXpYqI`4U-xiYQW}{x&`rnT|-m@*%sbblHJUIKS9!1YgDl0U2yPIC@wKgN}|Kj``~M z!l~TI18?|i$D&9FLFqgKgEQ0N0pbn#mOG5xgj81yoD>=^=4{Y#gK<)5dc%m74R07H zg{D)C7&M$>oD`bAF=Ej0jd4?M)iSmvg0i*rBY zI2ZQ_j05TVx&_t&25cQ*JAw^9Fn$yeot%jhW( z316+#JguWoPhX@vn@)7p8Pmhs^7ORvmzGzIzg(+ch7y$T&Z@rScYBaJDRiS#AmC_- z7lab(EBKuhY#TqcysfJ1VmOn*q{D| z@$d*rcdf;t8y^#bkiGc9&*KP4jT@y+qOY4TD=d}c)8I(wA50ut%@J<)u0_E%JeS}V z1S!y7?yQE2)0`%h9vp-!L@rY>pn&}8u94aY;OO9e-NPoQTVQlwoO=NA4=WBjkGw zoJjXQnC1>w9ywE65|(<84o8Ymsx>a6nIgHiclhwBsz+hFP+NU^y&|mK0Gpt2EVl4k z`Kmq|cOBwb7TRR|nepx6<39U!svHQ4BXSMGVh7L{>mNCZ1NLit!OTb@A|{85zd(Ut zyeSX|$Y&JJF5rf?+}mZa`Tlxd#(}OXarxen_fJaT=GY9se_?;HI6J3{#lWL}j=vUfxHJgY1;MTUI+Ore!|)FW;LuG+{lSQU z*2%~Zf^m-#?=KR9af{Jh_{V2`;*5b002?74;&grz^ki}9d&h9sO?XmpyD|Ns@9=ir z*&CR?TPMYVU}vCaw^n@-;9~x|-23F%on=UfLD-_RPI8iMpO}$@rviz*AD$c<<`N^* zs9NO|L$RrU%d*X8S;kNbj*iGKlxg}y!>P76V3X0dq^sfvSQQw{d;MOxHLFBdygHPB zB~QE=HEZBH9(~;RJjdB9L*pG7GPnAJYfCxgzDzrkp54!r;dC3)U|PQONRwXQGXaC` z=ws`AV&Ed-U5vzi_w;fkt!6v)3hU34B5s@qsxJbb$zicdcM&tuDxdCk{@QZY zcK7U)b3KPqE8j|Xd>nhyM<1M6xzKt1ObnUDP7KJL2~q2A#V_nc%Y&d^b0X$J&~R2b z4A_#q`A@vKzXf2^i#rXDnROI9_dW_9qz1FMU__05f%sba>_(kmmUn{ zOb9Y3-9{UOl=MmL6BjVn_xxS>i!%&46E@@L?wkpG90K%4>6V*EPiS_O{(jQM6Ly+K zG7BQUHUWNxkC)x`<>CZ70c`*_JU33RH0ivON zv0qLhky!~^PpkPlUSVX7SLuNKB0|!!CsvMk!T;cy@$jF;s|?BFLC~~aY9~1&?KsE< z2kcI=9Q>qP{6@ek)>h9)kj*dt&gJ-+DF%=M5de1>e9I^sT**rEbYKevq;%J94p*xa zLQ=Z(HpjFUx?YE#;!7!BO2d?to%V1Xjfs2lR{I|3b{Zn-G^rKFlNnUv=M6g zZNy@&fPUgZX1Yn{D9~oU(qH@r0EwhF|5+MciK|7YD76T^{8Ka{|Do_eGAJ4p|4YCJ z-}7?Fhsx#+w9@DaToGhLqrsjyUj=83p0JK>{?!Q$(r&nxVF;j%KU=+^v;mXumm*)m znG}k~pmwJ!7FX(_d)$+g(^`5B5UhdM|INPV>4LrV;N=%jP}^?^er>|91l6jYJA9q1 zcE0Ody=v#%KFp+F_rdM|jPgEuQO}0R)b&9F8rHmsj|*OAqdI&1=sP zTw6eU&V{e_l{O--B?I&0ODlnkaH6SZqynCx#F7V)lB70|sWWPvh=k&S2N7OZ492C3CALW*j(d!V{DeVH51Q zx=Zg}WFRCm!HN1wp)t|5L1UtOQfN$cZP1wLo)p>@2aSpDNue>(wUv#D?n$9B(S6tC zfvRa&AurtP1v9EmW~@=}*1Is>SMJu=a^UU5z>CXf!Uw{C;ovL-Z|MODGOV$~GWs@P z@Edb%Th*IRj?9s{XexCQ%lXw2xDW3P+=n*=_u&o3eR%2ce?B~=|MP8`l(O& zPM`We|3B*gylU?(`y>a`6|O8}eK~$*`EjU5J>*ZE-ZVdjh2`-`Gu@Ws8!`A$g?-kg zhlb^d+wzN`GqI`+x8+L!r;M$dW=0-v!$J@OF}qCea8=ijDg#Z1OH77jesJbgbV!*D zsQY{I-I-cLjuoJce%pK?w$j`ZGY|!;D-uR=w8OAQdmKhFZ>)K9^}DaUXs$WyUM$hN zKy!5Q`OhG>-^J(6&^8H+&keBSd)4CeChD$TP*k?XXD<12v3ICg*nP(6&4BOwQxR+V z+q<)m9p&)$0vDtuU;Sv9>}78<$7Gk`-0b((m(RvqS&6Je<4cbju5IgZh12TN0~N9e zjlW@3t{Ih!E<%_7_O_g3n5uNTVEe0OXs#kzw4}*Wbc4_}xkCus4MY|HQhC?$GqvSD z81o^R88qhq7rz)4En_|qy`l;3!1dAHx)V#(ykNItp?#vs$hzCcI+=ETus#au{8IPBYN!Smv0#F=@#N+4 z$PY1c#P%Gx`(gR-&PAm}Ft0-(CNXN|8)mbtgNtl-MW%>3ScK(RvD-Ie5ZW!q_;#35 zx8+QD(NWqu#5;7f)G#u8e@WO_KamL zO7>zRJ7Jou`qMz?m0aTQ`&I1Fu`nKve%Fb47L~i zaGRS9++V;gY)gc8BlsdT12z*5z%w^D6O7C)&`Eo^jUbmI`<@e{i(70m(QFR{cd1PU zm|kBywuLt!Qx8VcU=v&C-<_k*bc}+%buR3!tcKM4470eTisw|Hw^gO~|MzSp8TkLn z@{i5+4v$cE36uxVC#5^4L_=H>P^k?FeOXd@a#)327^GC_>S$GD~n3eALGQK^ffp;d#exCt2+U&^APe~*UNM?*)V zp|4=ppg4JbG&DaN;>L=?3o_&k_J{GZ{d441H2*Og`ZZc#o#cKo8af;eU5=JkIi8D# z-i>Q>9!mST{AkCY^L{^mv}c2#d_b)HcQ`7{7@l!SbsC${KQUpXF8%n?{%;e8yy~x< z7fcwQpzjZ6j|oGi)t?{jucF=g#QbQ_G-LQP$jO~mp2$=W8cUvIQ5frl^kR;McnCYl z$-N4Qk@;A;wnt&CTo>_U20T1kHpt1@EVYPwtXwOjFjg+VNzhpU?9Y#OC<Bo=uT=T>)N8VI4v*6#EFw~a)(eqamM)}cpFr(4!j9+j z_}|~x4qr22sDD)1MP>}!PQ~0{#!$DYn0aOlHHnJ3-;6)e)`8ik!5%JH!yy*J$66Z>KnzG6}L~He+NEZ^p7*YX0$%aymVd) zzk~^NmrYS6PMJ6D&i?aFnQ{9qsUc=eFS}*Nl-cvAeRM15q;7uS!@T)K8Yd~;{Qe&r z{y*W4v_G;MO4)vY%LCy_d)VLN_|ji)EE6W&9USwr84FTtWA#o#%R%EzkM zCyXqMTc^zLwN=kkR-i>;3dRkjGP3IV2_wtm)=B!$duTeVVjsQ)i-vw!Gf>LOs@Nxt zEW1^m!eC8|GVRc$=$MIWCB(@(;Px4}Ou22^oH=FF9hT`!>vhU4 zQA^k=$QDK8IQHroIwsGZC(bg=KXz9eDy;q%oe76y9<~b9l5Dak zZN}5@kNx>5-ZkvySm8=XNm$K{tr+gV!Si(j-c+8nxv9WI5qkUkk+g{V+Stq+RWksSwxrWewkve2=W<<>mGnWw*!fku?|Ev4`VwSU!gA zlqdTYvgnOLeK@~DvHVuZ-ZE03gquMLm+V)F?ZUQ*e=ag#DR-$>(cOr*`SF2KHO|hU zcrQ3V`q7Ul{~pf=lmCKG<=Dg_p*RIo?LSXh}|Xf3!Hr5t}X2j``F5uT%7b-h~agKd0c_ptS(2%RQvc! zxKs~hEPQF{{Q0TU>~WPTEFc#Eo>m506f~6u#9iwRfV;*#6T&P4mvEQJRMcU2Qh8HT z-X&6O*40AHzLM%L5#C(?kcpQTi1=g5=O`X~XhBv3?(qaT_}*SssSPjB&eeu5%q}X{ z@ZD?){`#}0;_r3Ya}R1|m-7@$aWL!)wjOSosofr)sa;k)xQB($)XJLp_5woqn>ADG zsh_FMZ_7YJZ`D!l-Z99>D?rj=hGqYYR*DNch}zR6BoU{V+gz#)FJW>lG2L{ z9qNl1>d4Y=_$#7!@ukhFqJZR|!rBen5#Py}mRS6ETC^KpMEp+1H^t(=)uG+60r`wx zooB4;JBRp2w}P@U+CzU()O6z0(6XW|{>_LHNeUDSRc;&|iUgBlleA*3rx7HhgsHBX zc-aJ`oxZ9_u2#JiENAg0xyJOZpm8t$@7L>hrQ>djUY?WoX=TmGcwrVO!tbb&0C*Ys zfwa)Eo`1IR&tFk)p<{#mMMU=|X6%*MRZKsKAR@K0xA_Jvj1pFrZ6lR!QlkzhvUTs# zZrDZeHQEiY1LfCvDjTr%2!F9i!j~1V7Wj=|W=|9H^fVKI1u;q`LvD}|Mna23Ny1CW z*Ms*SHpn^%);0*%WyR}(pl2NroKs>VLsDP*OsyR-mlZcM7vxBxSPS6Giq|p@et;;R zmz^6g;yHGtv4_;TZ#Ep>= za*(@{fWBZD0Hb}QQJ=C7zq8(y0DOl9KJ;upI8iGbhANb8V{wBi4uu54L4)z4Rujd~$a9JeM4Ol6He^~nZ$aIv2z|J_xO%Xq%0mQ~y7x_BgqAt45 z>6F_FvdoCJjv}a^#yaQu!yM;7b%x^~9v|X3=fS}a4Net?!Y9k%15OIQJXsj4`0#fO z{`1_?qWh|C;#Z4h5#eW!UvD6M%fEcSg7MaS_Oy-DEj~lxMV}9|LcsF-doDx* zPrao(E{lf#5Dnb~5>)ECXy}z_=o%a#Q>ovNhW-)_eHL=0Qfm?7d9!rK&!TY~qM5=W|qylW%lhqu+12+W@bE`G4|&f+A_mv1fhy}g80wABJ)g%vBZYP zN`L2i*9xR{c9XSJ6f+vkjefG|ZByCHA6s*`NMqNf{9A|1#;LjHj9sCBiJOwkf$+2qJKNWAvT79Iae)SPdm{uPt z3s=2!-o5#pFyUjsX}~St24d0#gjp8CEQJt0f=Ca1Y|$!$g`^*DGcZM~>NroU`6YU8 z)jOCnZDAqDVTY5%{XNYvx{4B~Ow7M(*WJZivN&U6s1cuq_2B!}9$$FXt_mdZ?|SFz zw|my%ztO?@&&KHd=REBvbV35dOsn|?@f}q@CQ%`{>VS*HU3YuKPX5KsUAtx%ccZx3 z#ao=H`D*+Zz7WM-#2e{(5&wbqE`#RKCAlIO7cNMQ8pBwPM9&fb%e<7B%@I;?}5WaF?fWsV@FMQ#=%?S2__gyH< zQMF53y|bqs4QSr~2Yvz=x6FNa*hsn6hM)CbfJSaX+=9*c-{g{Z$G`Lu8KKJas2Pe}(*tYZsR^CM13fa&uO1?5zA zYOGB5@IO&opdM0%RjW!X9IMP4UGLbtdIu^a^;p$-_Z5Ah{PHVaKOfdVZVmk`>I7Q- zO>}d#cJkwRi=lHoUXw!3Mv&D-@J?S8wgvO==pzT01plVJFC%E|Rs3%6{_)H@{O)BdgBPphK2F`0i zKdWU8A92>%=bZbwk)Qv<(&also-WtLyty7{6bHqem`oBZ;FPlH94Q|xCNmBu)L?_jkP%Z z;Vo~_nBk^9&)Td%iCqb5DsF8=c{osKD-T}{Csh!lmkKc&;+pIm=4A zraBySlI#KuAJ+YJ%sz{O$3Kl2V^6BtCnsoifYVL-$oEvUPr`+h%EPQOKUO|NgzHTK z=etgBC8_-*N{0u!&67jUFs{v@3 z+%XG*6JWP?|7HxDzC#UXl;<1X1M{-F$BQ3sgthA5Uy3Acc^Vx&yWt;yPlmQUtb3N! zm;6ZOsc0kIeUAZ7cf-%<5kD;2+lWRGeiNwup{pwcxIfpEg{u;ZwdLEi<*&lCX`xa8UckqZv|8JvtDu6c&zlXgkE2G}s^t?wQwuiq6&K)9k1;I{JqpNr@m zZTaSC%B>*HQ{7rJ55K_%5YVPA-%fYxdhL={*2D0WWKH%gF$riy^%4lcyAaT*2*9fl z&=?~CQ9?i?DrOQ8RsNS8Ia1AsEat2xkatE3Ng{O)G>;ZA&U%w|8 zJ>orkLD4{SKtEL+K==ModZ1*WIdJ1Zb71N~b70m$b71a3b6~-Ma==!5=#eaIG>ndB zr)VTJs=6uzUfeRU`!Kw)1)BmmUIR;Ek$*HU)aj_yRkCkICCnc~n;6Doso`u0jBXK| z=7Hf@A3+Fq4%b!;&j^E;VZ5p>hY$A%7P??sa4_{u%mCrdmQ4Z1Hmjk$)mIb-cEzvAdGL$olT2w`03An?w5n+cDfA4|$7jPA~ zU-R4VeYv7j5BBNq7Uev%@zZX=PMT5;^w$kewfOg^21L1>E;W!s4T>fiKsA^fFeh*%u$kDqb2;B~N z0A@i~N(>Bx;1XXhRm6pZkN`)WrO0nV3WMfug5w zByAo<0X-nJ}i+=t06B_K0>X1vFr7 zV6x}wtujlO#etiL1jK*@j&2MJ2O3b4!jBkoph3{%ML57@g&r|L4A#nh(Uv7#K@aepTIyLmKdpiM|~Hn4```A z(}7>5OZ7Y8#})XySpD5rid4S~CdE!w{VudV==l*=uC(Q=As$oF1b&i;`s=|i&>(NZ zf-aK=i6INRK!Uso3z$q2Bt}?(n0-P53g2g?2nlTfijV|tc{j4zmWV2!Nh6c0BdP^@ zBq->2ptLNOBvHh_qkbP)08qitbXPQ82tX7?U?19`J(@xSXbk{6ygHbN{-JNKDA*I;l1l~Z4f-z}96lg(|>J0m&Gbmz^ z1Y%?s0Ah?_qqZCuE_4@OZL$K=K*!M1Qo@bB5)^K%wzwft6e7Z=Sowc(S(435QcCXg%vFY3Q`FJg&^81 z0EU+MxP;-x9BDCO#p#!X$_3IE1E&@qZ<3}MKw&7!J((m}V(E;>Sga5(1dT?1HM!94C1vhuCcWjg6;kTnu<|yI8|P zOfEc+$b7={uXU(A?h(gy!SLMI!R0v^a95jqd;!mWx@ZR&Si&4kA`q0;iA^F9!^TmQ z2)qbynCz0d0%C+UyyF`tLu?X(1Zf(z$%i{Q{lNyPsxX^JtR!W`L2A^hAT}dmHe1bu zmLjmE0=+NT;)4MzuOI+b;{8%l2vC41Qh;chaM2WyfFdmuj^>-c78LymMPeQU4m=JH zq1v{2jJ$yfYM#P*gv5XfHII=u5TVk+1(gmO5JQ~|ETlo;fS-uC!71A z-XmZjWD@Pqz4m=qWJDG=h#HE(42e?vu568H5;y`4fuh8c1Y?+U4{{L zPDYr73DRmXSf(dra*@n3)p%%06=z4#2l29Dcpg@Z7JwCoU@TDLp$DUcNED>R91D;R zB2;=D5fT+QN0OY75&W70(v6M=n;@Pg-{P`?Nx;W#6f~+9u^Rs`2bOZJ$d;6 zDIyGN_)9hoA&p^M=lJ+7#8kqd)-OI0!r*gBi>Zz&!O2#~lZ7OS`q(64QIoU<8chfo z0AWz;7XzpnzLR4O6$7mnTF@2)Dh%=K7pGqsz7<_PI0b6Kny^5$sE}Zva#WyU7o9xf zfLz=d(I%Z?Ak+c}h{?52%PKc5Jj3&=N(>U%1BjY+!M(6OX<_NLPs?yYBf#%|U0|Rf z;2uHQcgO;D9cSMm3hFwQeTNiL*Z9R~rS_${D_~pg_HlbDI#&s<(w5`uTatlD=qg1< zhrkFK=8OfYLWXU|f+!&)dB%biA;TIql$mR@sl>v8j$*ZfFJjk2k${s8Q4%^&g{5m* zF?z6#CVD(47Ii>CV$8tThhPToPCf}U+EklZ%xD8JXA5B|MVQe>(L^*?y;U?gLEHj@ zMnRP^c>zRp0t+Q!&Rn-k5G>F!gORxlw!e^wh6pxddLb_|cr)1*H*;m>U`=x73Ir-2 zP$P>e3*0Q>rbBe^#qc~~uC0hTsJ$0xPmu$>0Uoobu*8spF4_J;vQWTeP@s}gb(MkG zB?C~y;>D^$eNhqic_a?CM?)kmxyNjmk9WB};N7a#Jj7*%MGc&81cLBCW8P8uJ|-ck zKx|Y+P#Vc*Vj+q=`p7#pV(SLCVW9zS8QmkYu%Fg34Kr*C6_z==t@G6VHx?Q?0}Xu& zcgh0ZR;{KKA1LAgI#+(`_l&F^^~6Na(ua9FnMA&fNKR2VODs;fFcF9L{q9EirQr@Q zM7H+qhbMZLEK-@`=e3AsrX^1>lR2*qeFGEPdNrYiPo}|`)DGf=3zOQ2XFtDnY|-6) zk=M7AuwKMkQ{DT|K`LKxe#}`eN7O-X&AFrBDwLktH zOmI(+_Jz=D`MV_9rS9IOqG&(;4J z0D(P)`ahK6f!eJwDRF!-=PZuQF3FA0lUq%AB0PQO}s zVu2bNPK#ZZQZvM6xTz!o_ODv=u^*dxZ%K%4&Z)UB$4pFh)qFxN%5vIdn?PWYXhT~J zs4!Fx^cYciy2gm)MNn&OI0efb31`R5)sEN_M~bEJ4x36mz4HUvACH^Inq3k5<4_Be zX`w`q2}l%86dn_h7co(oE?W{3B{oXpF#$=rRph(h{qZLrjMD`l%l>#h#sCJ1gIY%pX4s zmRKpE(JX*-T5R2+jFi(_^cgjQm1*h&-@ybnvXaLuaPk8FeF8XnF_cJJ%##xZA0|R^V0u6)ZQfY<1=aUyM^W?=P6j~hLia&XgW1hUoMuUB#PF{d) zIb$JY$FES~JO;?-$qP^&J9&X#mwfU9f@m&B$qLk6ukx_PkEJ6=Kgp=Z$|v%AviMyF zN`Y?TcNxfrQC2 z$!tH{x)Y*k;B$&Apdt%QHw}EGi!dN6?#z(@Lm2Sek8KSt0$<9MeDXBaDZuf<;& z(x2mBY@j`eH{*4{+zeGjiNo)l?%>psKtw2;PQG@(l}^ch!8tFL?4Th-TVokr-NPx##^s|3|=@Oxij^&3$406uQ032W7_ zK>{YZH~)%7YSq8Nn?Lwkz`e~^waclu_FUyz*+zPd;5&L~JU9c_BVD?;r(zQeh+?dG zi0st<)O(&3`aFuEkX1gv;2#g&H_5%*_b0^I8cPqpi>gU8`VKWZD?X#QM)cOML-o$` zx83cNYBrZ^21E>>eYEb;Tf6=4msJVfnYuF^aBkL}&HkE~%ZKRhO}e{L_jXmhsn;CR zswa~E(6uM@&^K`)q|b0;jqQlxT{mwCCNw(gfTk#LH=DIRK$^G=w-uFDVknjM2e&{B zMH<~*0q>rQ7Yz3XJ=p6Hz6w@hAqPhejNmanxHeLN?lkk#=y|1JLy|y)RIO$Z{C!7q zc?(c*H6Rk?iVzEt2p&d$gH7w>J{%Ge_f_64ligt2_wVF$CJ_jun`3`;| z-I3Rj$sLYth5vEXIH|hes#fy`U?U??A>a=LA0LTk4}_LYEs`!L%^A}fzUR1pq52%f z*85`loqDJY!v8KCwERt@HqGn|x_4uIXV9uIKr=;KA%cqN!Q*HsR2l+oc%GFmpa&1@ zp?oCavz9j9`)b7tdT2Hn_=4`<$d0GbRs>^21%j*G&F({@Y${PIz9bs;EYycnUv!*0 zLaS~>q!Bz|Z0R&Y6VWkRAUayrFA)pXgM3Ets9tlTd@X{$!wrb8EFWW7+Q4Yt7cjMf zKiKun&?O@R!95T$!@HyWrzUs>Jp8kT<6$AO75@{t9jrw~lWR%l_kj7a`bGW$uNbYH$TUQt-x-TmIdC|t>WE9L$wU-dYF1IKrF_p-asZ6#_z&Asyi+;a zQr%$4sc)%bw$svQ`FU1cTdf7QUC2Sb}2*Z?F3Ee4^4x3Zp5%;YC@oeCVU2+&Yw1;Mp#x7S9Y{?MJr~n~es70X^>f5p{c4Y1v#&g^UvVTBV-16` z;aO^0L~U1vM!^Z+Hp6{D_Z}>d^(%nOC})RNI#Bhix035e9r!l7PON5;^VzTV8EaSJ z_e5kVJ6*6VG6OHPI~FxW^xln+`QN%W;Z!kUw-ITCCCUeU%0hK2vRieG+RST=91v^pt5g><|vF*WE8v?KF+H zCS|m3h0z8!wqazU(S{N0P7r-WcON$qS3KYj2ZC?7+w~BwAEy!g5+&w#eK6_(cglUb zw^yrr9Z6!GzR3WM$aUhkNexNrwoX!M+RwBaP5ZeG1dPCk!hZK=Wi`@xMgewg6_4mu z`(yyYAzQub-H}?&HZWH?l=I!}`#uiou1~6&mhJ$-v3;n@GS*R17Li_mv0}RNAGx%ORJWyMe1@S-xPZk@swT>cMH?v@K zt@XQmboV-(DIR15kJFi3^}k?b4TS!avmX@X4{q$vG^$oXaQ2nY(nE70IBSXyt#+30 zF=|I;>oBo(_iMWMIL(|_c{m#;3vODfcA^;f38SWshAPk?VU6?)+Bl7>XZPyP_KNM~ zsbHXeeH?fy9@j(PMX_7Zu~;m!%Ez9tUq{1C!4Hbu;&+Gj;LBB=rn12t*t${D=58uM$w_Q&hmXmZS_G`Q4ie;OfTu^0%A<>m=A>V=`*KD zo6A?>4Q8*ZMuSu%X2hD+=u*|7t)-Au{2TW2bj-SR7+Sg;odvd5BddaXpkeA+@@TxV zWU0+MsSO2BDVg=EHe01OM(uBk(0+z9qE++8UnBHyBpnu?>eEJRr2L0Q$e#_PZl{m) zhzNSM*;cxD|9niPcH%=43f$Uy$k~ZN&F=Em?n8l^4zhAhz>TzeX`P7SS!`CTDd61A z*17rw`>zrFtM1+pR24rpf}4!cb0i@+Cp$fmxtcApFdHDPe#B%2f^Va-Czi^!pb$uVFgwWvI50jemh<#DyZh{bK_tC1P|5u2>S83M<6rJ?}h>T33prQ@W60;FnICN|`+?%xJ z{_J#p3#Pd5YWFok+xTnA&02C39Lx#0Hz6V&RF6KQZ+SzS&As36jj#p>{J}RUb9rvu z$r{g}{S?*U!Kyb*$vdbd?^WRs@CJhU4Y>Zwsmp!UTFnJOZG?V2lA0v+D2E;23hEQw zcwHFNqP{7d**G8=rhfb*Ze&vW5o}UI82JKqW_13jSG{vWt2qbK8R&VF8Vl!40a639XvIC?x2#iFou_*XE=uDW=_qRdq&BS zaC%Ex2jZMghZ8n|Gb_`PRXQy9GgF7o%`C|X4{1s7Nb4oV4#$_xOMi0ly9>KTS%~Y! zZ~FHfj%LEgM=a#qUt+S;_zjPnaaATRNlX&Qb$JP;AFu}bwyKIW0fN(8e+{=-b-iB z#-)+!Iex<7_<<9bWq9(YPnkb|M!xL^Q|$fA)8B0(OdN~<q$~SJG zT|TL_zhrlTF`s4S`R{%ByJbp2KL1UhGiA1? zuwcqH=6#u6vvUp)|06IUKt)l@6t5uyclK zMNpFIR2{bG936ENeu#RMNE|sl1Y+ z*bE_1o2ldKoJz4hFxA2Y;p)(+G<*$6g?Yz#UWYTz@7*@YfmQ!E^r|Fj)ic z-9{~@9yewfwR)COTQV{bd>wN^IQVkGFoH)E?kuM}i8E1Qi|$#_(Wpwd0Z37#?|z;gMH`CpU&i zf(nlW6&?xxRq9@dmZ>EgslL43h_-xNSRk)DPW&DcHpnX)3GMg5jj+IIyrOMjbH0rd zgl@lQ@!M<%iDx*Aw8WA|+LB_mm0YMmG?yY!qJfmIakzqTy0AsK*m2sh9l(_DbYWx9 zGO8L2(S&!Dv+vt{#u+p$1NFSv?$pbtp3#%>OMv2Z$-bG*@}(!^@43TuF9`I|IZ_wPJFtm|13w zx`T*cPP5*jwMKGN}Bmt2A7 zQ+J?QpLwwo_=2FKgI6%%ifqUCBL8~CK8NFiEXtVom+l?rx8&DAr3@^eab-%K;`7LX zCgm%4jsS;Tk)J}NA}}(tm;ui<=%@`j_?!YqDPM&TMUlp<@d1MxOyMv2(J*AfMhG%t(8Hy;2llvokTYEvW|A}8Xe(=x9<*&G@?b?n8U z&!-o^?hJLS?+3MLnSU_;tGTNRp(qFL)I%I+Pbw}e8sgklrM@3)7{GF;Ab*_>mm>|` zKh=YdDfJMTI7^Wk202;3_~%sF(01#P3?1a8eHi~F&n-s+Kv}qU$fwK~rlhjPe~);Z zg`H@N|6n>UbG_{rSmo2^^Ga9F>TJ`C=78F4Id+?cOuLJI|g!n$Je#&@uV$JZ06Zzwb2*$S)``!9Sf7TC!oE`6zj|QewZS zN}gHb7_(R<&&lf-mm%HO#;N46pGo1E{Cw{Rl7?}X@O&x_W}~FgZdOX{_f$#)*Y^1I zCk;hNJBc*RzP)_j?Azzhp5EU=!I*rn)yN_m@1-H~ydJlbq1ZaXO_)D(tu@s;K^V($ z1F=rPk#1GOxYOS{K^x#)?_-^yH>tdmBHpaaEko=e=-i|VJ>sZzIB>$zX;eipi?4^f zAdJE@wI1vr^lZlecKpYbJ|iGg`WrE&pF5IM4n(h-scZMFovF=lgSmeM6TKsd#D2>O z%+E7sYGvtt1lS0Gtr>>98&iXB0JQ4PeXvXjFszFJuK-{hN_Yn)yaRx3Qi1@SEhX## zKsx~50>E3ywhJZb^>49+QC&n!e8_~n)qR|u5))AX4snaS7{|h4?VE5*1dipgAO*M7 zf{QQDm7+YvC6%Y*;>%NU@#W>FC=YQ-<*B&%@>E=Wc||G8LtIjMDlWb}6&GJ#Ns96i zmsFmLi!V>b#g{iVMR|xzDo@45m&Z6KC9%4R^C^meiRf%lq6AVs`r>uW3SCon3beF9xhkn z;^B%?z$J6%sLU!=4cC!kAjWKWt8}bSIGxx`{F|T?hYI$8;`yl-aq6*%0}cFJhln~C zaz62dOZVcD7P#QUDp36$7eo1GaOp>pt}RE$o8RWdF(Ok#AC;c#t-teS2zw=`YS;kw zX~WaF^wVfR8H06)YjNPEh}k-37iYuQD&)D3Ub zd^hqDdDxeS{si2lhR!qUJJQ*)vmCuxn(@|GY{Od%%ECd7Hf{K4u(4f)lQ(KTW6=>= zE-&&O3ZGGa8H$~k+1)m?4%y0o_@0F`Q^kvlEadzG@d%EyOdPSj@DNE4Hlr`ed5Ny? z;etDWns9Zb4!N5m-vNs;Q+KAPJJNFpO--BYEO8D_a}1v6%!RZ#er`?>pTX~+VZ(4X z{Yjjk=aE=EPAJm;=I0@#vmOGI!h&V};-6Dx!6whoX_v)6?{|K_&CJ%{{QPyKr?iCK zd4=38i%rn6#h!Q489KLG^D1CFGb^DdUF)7=PTKSBlWuF$el1~2u9|gi{*-I30&M@2 z_mdQzaz4+(Y_%FXW~}D}+3SVwf;=^+r@el!@UUo^!|{)t%BI-Yqf^VhXE_`h|01BW zKl*X(>m}~PWdYrsuPdC^UsqPi)G_YdLhGHsoChsTfGf(vzFwZ&V~1mZtpwc9%z9ua z%^`3ecU*bIyEvgd!JD6NbuB5Dc$X%?NqJ)n#tDG=hj^C(Ze=3g0&iS-F2FsVP@do| z%yXMX$q(^vNPrW(-m!6T#M=V6ZHainKRY1nyFH;i#b>wI4oAFu6W|1|+mmar2=Vp; zt}h91eq4jN0Cy~*JcW0x$C~2;MA~=M1cqXd!do!b-f+b00^GTeE4XO;y2r)U*M;Ou z63SC}y}9`+tNdks{Rwb_H!n9Y5pN0L@)PmqxyRc1LO$P=P@dq;%Nu8hBi=a)a0;)- zYZqwNcP`+rNy0nUZLdcW;1(v7r|=fU)q{9fCcr7Ynh532$Lsd*q_L4GHBb zyxwv4@`(3X0-WIW)}G$$|C%+JqoWkPVo}&_97b)f`WIf+illt#M=S5>_og{ z^Wxf;sV$(z}X*#w=hnh z67MAmaH_uEvFwZ1L%g|wo0^EXAh*ztm-QVFxO+@EC*B2bLGrlZPldN2FHT;Fw*+tx zCgIJ`wYLZH-UPV+O2UfIHdwC@&Z(%}ts=nS_uZ0x9 z#JeE@PWX&&ZkJQmw*_#U67i06kF%FYyxSAX6TIU*aeACfjh9TsAKRnw<|oxRA8>n< z@Qy9C%byG7@F@Jye^muK@uTpLbKCjC`c6%Ww=mxh$NJs^xUPDre_b(P z&GJUzo$K*qyz%2q9paY)uI}Fuz!T-47m8O!$zSsCl1a7*1ciUzf>T` zgQ9SSd3OFyMZ6BU1`{sYKfKU)J2x-i21mSM!0k!I z>(0f=P9&NS%X3{}y{TWRuRC|Fy&{CW1aN;%##>NO7|Z_*f`tj?iJZD~$9ZjxrXK1_ zfHU!8Cn#1P@%934#8MR(<+B@Xu{B5$7<(+CJcZZowkrgdH=aUcdlX(wKoa<(11>ii zZ@%5&CEn{2$}{nfwa=s|S4#{VQUp!B1$M=Bb|i{h8Hkn7kg9K9zFqMW z?~8z2l8m>&F3-!L$U6Y{I};}gulvjBTvBe$}ZyaBUcPZf3B;obsje!{@^9Y5Q`sD2gR z{9L=iOT50`jG79!2;JQq>!ALUkddAwy8;|&9OJeyLGV$iy4NBH`JK(@# z32^qO2wyz_@3;aRU#J?|QotpA<2p+lUY_mJhA+&{E#@+M5&rtKOSofQyCk~>g}_6` zRiXS0>;}}Cy8=VGD}c3fTn?+&?!O^x_m>;q-g(0#>mljb7w~)6Xw`o}m}~eAzPv2k z1_68|cghAq8aD`J4Znq%JuFlkeukDsnzBF>zBMk^vV^QGl2x^8R_xrWZ%X6LIz9CL zpX!ofg22=^N8*ohow z)bC1Pw6hl@Ws3(nP*)u913inKj%8#*E5Hh|)jdkD-n#Jw81Vv^q>SRd@cPw{{Dm=~6A%S{2nZD19 zoe4ek?LPl#Udb~k5GroT0Nb>xuS1E)v02O+X`# zXrU!({ecb=(C2NSgZcwa7tlv-px_qvj8YIV7@+g#m^CUH+#lEwf=#l(hV%z^2Eo2$ zft`^O44iM#hJP+{pKM=?yqG&y+Hfs0M{R1i2Wm$KY8T*u#@wtx?W~dL2D|XTJ5W2- zWz^2fLHHp4ub)}0mEkCFHL400LIT2me4)#;9JxXTDZnueN>|fNL%Rq2PB^y_a35vi zh}^_T?8QqYKG&FwzSFajS=URX-_6q<8yKOW@QpGe)-z%qF205bNRV+v#}~o)`WH9) z?i+@DuR-EpUi7-iEeOOZ@E83!+Fidp9r2X<>U1iXKyW+Csvhicp=ZkJj=hC?{p;xj zl)jN}?w$|JDGu%cG`xTs^?T7RUPrgsZFY-#bb`f$c}Nt*px0bNHY`qe6tN@h%QpLu zkg(3A3H$MaLo${y<4clhkYlQ38V2siKuuDF;1@wKkbz(jQV62=upkmdsHGOcqd@RW zf#8cN30@C^mzo5>1Q+Z=FtSDoR^3#w54=|3AQJ~$wB@gm(xqvn6d%|scI^Sv1@76w zy%?1>iKXou<%3UnRd=SM+=EiH zG%^?n5dqpkD!vD}1jK{CqRUK#Hh2(>^ZY`slhhixQ~)I+-!PW=bQ(!DbHft{a}xMz z?hvB%J%u<@yo$pDM-k$kBp&!p)`5B) z8UpoDiK1L}e1e@}k^(xq%}i1Q9#mN>f}t(nL60GzpC{2#m4qBmN{w(3i)fydYM=_u ztcn4`n8Re{T=Z458qI*WszE4V+hvk(C%xHDIvEerO}@(yv)QS}#xp*H5V=??+~=r5 z;&+;34-yM)na~vSH9ngG8PEd4{GAYO{@*j|;3RYwr!+vZ)-x=F{AYw#F)eT>C#zC4g_` z`>cf4v(j!9#7IqH9vrumj{|{hkq)U#y;+3=C#gc@gI3|$_g#f&-**+>wpU?Isw(U` zNfmZ}&?-FrzN-KwSE;%M)c;9aVXwUkt5a2>lT~0Z;DJoutMRI6#V(EsTPeO~HK6sQ z9e~{`+B4Ahsy9H8o;+z4vTenobm+LGT0q)qy;{SMQfR6UszWpCwia2isGm812Frxcq;gZyd73=R<)^*9Krpsb;|Ci8SAKvegBqt?kO={4~f50 zJ?Lz#eR zfzpwU9;zCN5*RLmdrKtJCC_U5XDpxNSB^);6WSiVHh{^V51wqA0^WxCa9tDTx2O0{ zdTj%yN+;l1rhN4zI7g|eFCPLRe5;9pT|80k@q3o6F9e134NG@20TfIuaE1b}F~C)^ zBYeoRTg9R5zR9)K?W`=Z(^-w>?sc7D1TFBOEB?uoMS8 zd#;9K8=?(h%8r_}6AiP$^Cd7WQ&~R`9*po!`DQrCq2HX*@FP~! zO7A``gbcQd7pREtS-gdX!^e{Hngh>0wf|W{#yQ{As=GgrB%E+B5}#Jee``S|8Y%2&Bf)bTT9xYyRwL~zhhrl&QiP2Z_PaMmefHqOHK`h*o%`tX zhmBN^Mp{i5JFPtxKjkFHN=`b2vY3(C%wQ zUU-&+TR7!2$v^=7hY^G$l5{%@pGk52Z$>9^zf7M=xyF_*!~0k5zA(PEO`i|TN;&2cVF#UYRYsQ zohG%zY0}lfXSYoKF|}QT(^z!W0S&0toGS!8%JoF3jj0sW(C_6f_^7~cfW|uCe&Wyp5^-u=h7Oea`z*98292k(@q?j48~$7!f=KyI zZn~4U`*DmV%MisimbF@SRvLzWtd{inL;vYQUYK1Q4LE9s>)=3eCv|Rp7F4rVjT5}G zM24ATS-n5>trJ*vbe4C}kQuIx=+uTojgUXEX3C)AZpzN#TcJ=(du$JSX)9buAG z2ET)Gjno9ypjB^@ygge49|{3;+jnRqwn!!UXERWhpUL8ZcO&N_9X@SD%gg{85pQK- zvZob&2^lfQHo^6uhzB(e#iPle2`+Mw!U`A@qFYdxdUCTi>E3K-Gmt_Op@U_FgqIIfp82hP`D8N`qpksRf>mHZu)V z7?}%x$hYmD4~$gr{Mg!Vct1fbqRJl)_#OEf%Gn=!mK7xEr7zIbI@VdD1AX4 z3@K~hsErWXnuz`YRRvcvxPxJekPLDpbv=awPEDyNUVv!5hpM2KM3Iglbv4o`48NC89y}#AO7pee^+$M!NGAt@<-i1A$-%$b6f&K^Xd> zN2?x0nzQnhKyH9OFK0KY=bkgA5F60Z5lu1CVtjxBh0e)Xf~8-(8C2rHYu9!IO*VW zq7cMs39@YmH&Pe}PzLr%C079#6~d1jNwRDYl7gyscTiaAb1375|b%*$hp4Y%fw960^w$R4Cu@2vS%4s43?)7{;92ErgD8r8xCm*)W|$4hjVqU zxHx;NP8ku-fT1E5-PqG>-^$_pApg8$j7BfL2zXgO<~R$pXQ3V#2G{sOpmY8ssUJAuDZh9o1`3kZvk$KX*@3AbT8R8)S{vhbal&rao*(+MM6Kp1 znw`45h)a)pF--OnR8fg2?ROIJ25Y!Wh*#!(m{z3urs*U;L;31-K!hDUhCwd-NbL^c)=K89)oE2DVf>2of8l3}6V%mL2 zV7G`y;k;-8IRUp3P#QS=;{X5cy$yKO)wSLE9dLq%!{)54Qc^+UNE@$9t})*K>~N{M&oH=h~NZP5>=$wFdA7 zh_;H=GN1;qDq@xU`L4ZxznNra0u5p2KKJ?mo@ZwE?ANu|UVH7e*IIl3#&*?rlu!1J zb$;h!zhg7b{Q^DU?MsQ3n~*rW6S4SO&g|P({uOH?n3J?)bRi0sav(NphjAHCvw*LK zdxuSF5NUF@yjb9$*03WbWMODXw~}k|S|4x%#{$SL-Vy7u{p3qrLKj=n%|0&H`U~ym zw7o<)_w@Y&TLq@O$OLR@&FZ$8r-(^B%9ozy6nIJJix~mQs@+dd(~g*dusz8x32xfU z;(?(Bi-xtSv7r#5NQULlPWq}wSj1t(Ir6OLmyI(*tg^4AA7D$wVk|*(;Rm4V#fi?Xn&aKW*Du_VUYuuhP5g9ddos>`UVh9d7NTa7DD9RDfOV{H$kXhDHh z$!v;ud|yfs0GZ4k;gzwGk1j$zfJnIH)%2HNBb!DjjtH$Ow%4@@lwjYjav*4zNpNo< z1T$xPjE6pD7-UX(41yAJt!;^^mgfbX=4a1#&?MMxTWT2D&JKz^-IDSDAcZvgCus!) zgmwhXKFB@OahgY^+@*2aMLz|WHhsPDD7uxYA~imc=7O$dZToWjgn)Y%eX_SiWc0Is z-8bxK$@xaT4kH>@s3q3gq9q@UY)b!k9D1_lhG4#DZ7^tGY(w)!oq5N07(;m9m1;~jIGV=KFbcxrrBQJtW*NzAe7O$s)xP5+xxjSK3k6&1DRW;=^u zY{e+(AbpU7=@jUBO+~v@>T%5js@O?P_w&{yAlXQ`=w-kT5B!O1hwvCDRQf znpxu|e@KVjHced~!&mlr1U2*?4*?O3hjx4|KNXr@O25ds(yy1)H??30?Pjfwt@sCU znZ8d5rOMZM+a6p38R=kpn4SKYx(BzF&Xor%T0Xb(VC9koL-WoMW!{wD5RR0_H{e}j zZ7a{Soo3VTlWyHkpJV?QzY}#O@1^9*gH=mns!d)B)OLr()h-#QxC$3Hk2v`Yb+pR} zL}d*%7)Q*ULSNH&zU<)Ml>T!DKlGCtb{OTnq@mr!P9e5p4;Ih#Hom;v{%&m4pOt#Z zQh&yq9Mk`qFQtN^LrUeB*Tb;>{A53=-{Xsl-Rk^`T2ZeJiOqXSzlkrbJn7Z1s20Jn z(-!hws)u)RP|t<1r}P=(QK11b{boA*g#z5Gur7SNfD6F?>Du_HdQb&Kp6i4-gRR`r&xYF1^sr$P}t!5OS%dn!%|*7}w+YC7kwu3uF? z#K-BK6BzObZcF-F_#_EeRAQYR)-na*axhe^O5>eqI> zAU;SR&H6`u&K7VK_1#}>`DH0Ms`h1F=&$`7fRFa4>*t95Nvl^`G#%g*N!gwwrDK8k z4itdkT^Bp8Op^}9-@oGED?dNnV-amyPn58r*V(X?>VI@KpuzK>iPptVFAKY3Pz3Ey z_3d-5%jjgPygJ!_*K$h9ep7*PQC_X-gXL5$b~+-l3?dS1I60-W8J%TkeC1=Kvzmxf zN%_ZI<)07QXiprJA#s>>c3#0bK5~Hhzo^S;Fl^l=zX9!V+wanx;yql+j+C2?Ys`cc zPw|S_?v`Qk^@!;Z@yf@>-1N0|tD{59B1115GCn%f0T3KB*Bxzo&l4|7tmKxPcyZIe&^6IvmPO^NVm+yQw95P>kbpb{!+3M3AYeCZ5ZEhR90H>i zCqyb{2e-;7JrcRq{X`rja08i<7U>mZn63;KTU~wSWx196v9UhR&h+uqzTW63Mn*ny zKi;vc$|E0MS{C`>EqFw(&3#Xc=%&}_)-xlw9vl^!y?bQj#_g4n=^J8^%O5>C^4W(@ zid=aAh{*Y?J`g!)>4}k#+%i0JO7xSF+!`a1G|DSI`}lp6k6{r@Rg{nTO(nk{n&5Cg0o&UlAiWQ*zX)G)~yH;9r}J~q=?Y_rIJb@arckrV$% z_FHixkv8DeV`F%O&1m`ZtR^aU+E6)JbbO&wFpSr@N+}ZOHlkY2Cdg?6Mv$+DWLv!z!N^MHi>`T;iCECBQ zg-$h{Rnuv@2H03x5bf8x_>`_@gwZZTVsyxmNPxY6_I)rO(EcL<($EeU_^n`E^I{pc zWu`;Cqfj)Qh{IBl?M>4q|o7`0!m?@wRnpk2hd%hak-VM9xpoxb}GSvAx^v$wX?hLb;0Qy#-Z$laTGB(bThq&s>63upmR>|m_B~lDS!?CPd5d0hdc1uB`my%=gyUrU zF#Mf)GamEcnOG}x5Ul;kiN9UhemC}lG`*|L@XVOhtg-j7{W0EtO0r`%{zOkDr@Yvk zq++c<0Rr9+&3-8XT|i5Z@#aD>BP$kYy=0E^~`>E8Ff&?r=; znbT*p_d+-nAHF5lTBUTVKrh|XZTh5>iIKa^FUHe%^2_z*Q^H&#!OXI^FX+iyc)HAiT3YFB%y$O+1F_~Iv1%rymwI5udQ2kDhrck z4CQOBOuJ%PCmp-`o^)L2cj@fb52+frTfZJ{N4kzLFB5!tc;v&#cc&uXos4`p9Qm$% zt;Qu1`G+Cy*UPza(KQ-H^jfHN5CJ5qh~n<@Lwb;12kyDNuN9NzZ`qH0h#3=Wy%H5J zg0}pt_KvjqA9z*mqkcgqe7zuhOUA2eY;4qLB$4?DW{T*Wf63GtBH|~4tXPa)o>LWV#^WvVDO+SS{i<~I0x0!+ ze(*JwLGxAM!F^KDV!!TcF~D|>fivMP$T>CM2J1OH&t zz7J?RbjKtzwVdgl*;#D}I^;XJQ~%AMC;;BE0{#KPQ$gog1&ERJWIP4Ya{ID6X`1XE z>LlI)|H*)EdJ*nSyG&E8TfS|Zu6h%+ADW&&Sj>rb%%9=fzGRr%70;C$<41-DZkUK+~&GYi04Ul@o z^LBd+m>9Fs_8(b69ok-$RJf%_0*BrZYg+j2@!{|~H%d~!8^(;XqCtI8symJW%WbW1c+v7+}L`2iS^o;ol01Z#9QFN348`{Nr=T{p=X)pT<$KAF zneaX9XfpM(Dsp@e$i<8^-4SG-y$LvHD(?=!8&S|NYzyJnWzoVW zt)s>ptRuL?{k`LD+cgG|yZx@AedQMOj6Xj;K1_L?e^r(&bb&KP<=rJd&!rq11HSTjzNZk~3TlcZ}c zba@KL+bk@Ys4D{bWVeJV8Oo-bWMbS6kQskt(gnEN=g162a|e5U?3rQpvD1h4WpJ=L zn=6%?D_K`6tfN=G8D6$E_SL-;0~^gz@VlSp^1H1H2nu&TJHIs zUKPFNRrsA$IS!AkL~QB{@jFDk5QTl%-DQTzK*Z~x-Mub`HiZIWcg*H}0lSk&>59Ko z>b-pBO}$#oZb~+@6PHLoN`9})(NA>D$jF$7PKbQ${_@Bf@W0dGeZ?u5&O<(LG->P!iA2LX_S%qVyAt^hzo)|qdMS? zZa%{j;YqF67&rWpal=!b%)oJ@T%Nc)iA!{bC!T{NactD9v60m#b!9}sJrV_hizetAIfG1@XvBvv{O&jr2o(cw}`fQVowh6CT-1MqJxl zMtnJt5xQF9P&tyB`s>JYMgB*He%9YD z#kZa~?+3-zlg+Kth)ksJmSI&I;VrupsUcE!)u29;O08Z7Thr4%-XJ~gW2RGkmAA^S zSADF24W_3}?M5==&Ywd|{yo0(m4mU?8-}Q*^Q-03!MR0$PtPWiJ=escQ@Y8uY=;)wNo5LCl~FKd5&zwM(pL?utbC4bXEi<}unOUl7j zT~D;~Vd_cX9nq=)$`@2i>K-RMH%n8>$<7^EJftZ_EV_E)J=Qf&$nxG9PWYwnmU5(1# z1veW}BGN2x?S0Z=)Rc(N`PwPD)A~uluCs4v3W(J2Y7W=^f{=y{r7lwbk`YLF zjG$>c{)Fvf8XBJ%j=Oq4^Oildk*wL~$}k)_j77IHc1dadlQruwZh)dCf5b`|Yt^+$ z3Ea)OunLdneVlwv)Er1odAfNhSV^Sz%R+nJw3@9+eur`@SEpmSMaI;juU$FuVe3gE zl@@JuCu=ycfiv4?3YaP2U_AA9oFh>Zc{sthi;0$S8t0mg>1q5^H`~67A_=M$?)9Ww z`A_cSgnrTM0VbhK{=7@t9Ya*RhB4mLvM|x{d5Cl$sMIN2Y${Qm#?c#Y zr|j1#uSur%HP|8SSnKN;SZm%|oow4K%)Z6B)`rynIy;Ij>Uvwlth}Zxrd~CYNWF!X zil(S)$@k0_o|KL|4Pkmu5`^#*w6VUSVeC8jhR4UQ@#BxIY);+rj&C4|K3Lspy+ib9 zk)`gs>d{=h0{?Gl|L(bQ$e0H9HK54XiE&N5@e9PX%#2*`Z#Z$i%^8$|NRrBaX>imPrjx%I&!S~>B^!A2$$JOu0 zSH3gk_K(JK@rWloMtnwsHCBz}*exVf#@gnUYchO=(j9SLx`owkxymQohbMS{duqfE z5iSU7f?T$h@jt14Qlb>;$ z)?yU8iH;VGwd>bLFp(rX{=W$1+0gOB2(2c^9)j&8wq$BgL+W`m9ktWPnzx*M+191``Z}i5hSc7= zl`q4V_g+2m$31SoJ}<)S=9W7nyhQ5Fc+CMb9&KB4ul$*zMT8r~o^Mshuj)9p%uUw? z)zfX3GFvsNYi*)7aJ%1np^_!=XHl^n%7d&o+2iJF)V8PIJg- zS2L3t{Veg8-q8L-VOLYMN|QMu%PaMbS9_C_!?89%h^hUn<;qi;9Q$UxeG&^8QpfJZ z+3)Th#iVOBSmG;xZS$7<1mX~&pSSV$)8d?>h}UdO$d)Txm}Zykv#4vW-{GCZdW{s~` zJJ>f9HCwR3VX0;QvIr+8K4SAX4iC`$O)!7CZy+vj?@5#eU>wnd`McB4U)jNdQ(QND z{&wp=fyd(8dJi75v( zb>B&*dNg$}^h0D*w-)%#mw@{aK;`?~hv48pEHcx|sCnB^^ISu!x{E3Mv{1{@;^^&{ zVN-Wg_9MxTQ%~W7FlOtPna(UQQ#LbCqg!p3 z{>Wx2f%ldzk;VX z$)(lkIAcfNgOljI)d@Qv{W6}V>7ViizebeUn0}COd}U8mBJR_OzHl0rw|5U?;1NG+e_mm9V&eH5j48p_Vppcl-(cN7 z1Z?TAk;lUqz&wO5=seQ9oZmiy;ZAoE0er06o70aGmivU{aOg_#!HWdn=6>KiF)w`I z5-oL{dSb)GYR!kM8Dax$ICxFpN+OG3;MDLb>kA9BEz32prJn+e**dYhuW`+&F3ym! z!rd86{)+Vaw?jQnv~qY}e<~5we0-ktWbH`qdUZ7s`3=mbyq?MTSE?8B{b2QSvL+ zr$ZiUSS++RMv1hC3S~NMn6fF>D!aJ=-Y_jTa=5YC=N%fGbblbGcYN46*l2b~XAgfW zS06B33SDey-1_MaLY-tnN8P;*K&73;8s3%gS6;TBBm^k-8N{^&hqz@Rhl84dd@TwZ z0R$~$RtppWAmUU|g{}gcNqsn@2{8VXSChe!iqlL1(6crz`&)<~9Pu~8Xs~`#SKCOE z0&KJ$VEQFiz695MfU}G2S?SNO8`$V&j4Qi^9$XVlf0)*FfTMH~0lj%#9{$V>EH<30 z>(WwDp?1tbfN9lT)lkR@g05MP*POymWl`U4Lf?i+G>z5V>5~yJNHTPQZ!^AamD2=v ziGN{E39<%shYa0X2Bm2mFq!Ro1BbF(J}SgYyB%x00=bF`x-F9%Uu0?NO08_HZM4Ik zozIZ{0QC|l-C z=nlZ(!DOe&p9I}BpeuOz;Z49zoMboi+;#yzN@X43msl@*w#G)@R|^@jqQ_b{yu4s`OBzV;P;`sD-~6fxbu=;Z0wh zhIRL=h_O;hFVoj0QuA&OvKBIZVGVzm%^+Rc%)X8vVH11NyR3Z%0jkfIDuuR73EK8k zO%8kCa&SklCAp4KbX~0N9L5HiRmP5~w8q`g>mU~f3g)5rma(Bfr~z-)rEE%mMl-rQ zWSEZiX7sEyz(h6}pMes(Gc%?28Pxzcn8t>Grkp-Nqz&pJWH|bR7E5>c$nvW(7IK4Y z8O)EH87;=vMQ+2Sle)-a`R)1!$isB!7!{%5chb6#8BStW$gQFm@s;&3I8-VLu3kU@ z* zGQx*YhoD3R{ao*pF0 zJ8p|A--Nq!7SoBfc7af#f%|=W4%;NMtHc@A_0j=Qjw@xmpM%2eeBE|czDpEv5(TKX zN9P)K9l|^6ou-u?0o8QTkYITLqf7_Q87Wqr^>9O$6&BL8877>b!TRSMP55(9g{`3g zcTmecx0fWsN#Kx5hZqwhSjLcyU2vBr4K&;%ccCX>15GmdGFuMKK#l#( z5t(5{qsN(av#f=v`JK&J-7C<&rI3`R8E$ML=)B03>&0I#2O=kKNlbO}njt3J2hl+< zsx5$>huMaBha6igbT^F%k*zTn>UXsd`=j+T+d^ZOCid*;-0D>RJifGI;WPUfqjDGp zAq^i)|J|RAXqcM1plZ7a*}5Z<11>i@J%fVbXi$dZkLf>f0x=UOaU(NI+)}gmQzqvI z<74y4ca8;u>N|~IR|q7V;CA^8RB5hn8$cDfWx}|5Zr0%zVV*EAx{pi)}DnB~-`SE+=1>*wnW1 zEjj}6JR^O7+uhor%TJUTT*SEB=WcHH1+hk zGxr9>8eMkVNs{i&71w8xYOQ|31?~wh#{EGxE~ZNQad0v2gVl0GCg$Pl@hNng$yD{F8;xrL^%5_q|`4d>EjH8y2|R z6MAB!dX$;#VS03OlU1`Pp~E4lEvt05%NPyuYgB-;+8lC|D7ePNSH6b8g$vaJyGvmW zg72Hkh~|}4a-$kE%BL~Ztgq`Xk%b(rba1757*3QrJ2iiq04R^m10Bmh;`%A4l21S; zhVO~BjnO~bc#_w6#=F{ld*MEjdN0;?BD73noz9j;hjc)qI>L4V_Z)jcG>OXobR6EHtLVqSj4I7A@bM# zQl$hRdJpHi(!kxSDGoJ1BYeE2>yX7*^au6bRRF%51s(&FbJ357T*vrL!&q+fLeP_S zv28e~5aaFlm@B{ZBvteiz&Z2>g69gsdtX85kmuV|X07_kD3Q!{T%^L>MHkr6(P~5@ z%g@qy>Q~jvfuuh5NcAdy_g32Z3f(M)pI{Vf6O6V~+@3^d?ED0g492kD>_c380^-(i zGw)xQ24jp3l4OVHb(I_-<2iRoCPKjg@hA(K-v(L;U4GnEo_KmBtxF$mckakz@NG4Y zXQQZxaZ&H;aVXI4`IW?!_jT*e#7aOUjZLjdhmWJ}RqJ~Nn_$_P9L|}j11;yM)ji0& zv9=cGxkjhkGR|A=QaEOjuq(B&rkS{h`Yr)D78nV)akBX#yBfqehPubr2|ZV+9w2(90JR zoOS+Cto45NQX4ONh7}rhCW2+tdX0e!?B@>DmQUD75sI~a&b{2H(+~q}UWFNgs0MGq ziOAiU1mN75#350oZxoo*9WXf8>3*=SjD#J76IggBYk+9t9wjs9S zO7I(N9jkArTi7V5TD!rPnvk@NOE)na6+^%Aw)XlX97eh~(XIUktMA(ulb2th*fW+} ziw%u+8b%1bD0w;7jq}p3WNH&jsBJ)m`L}n*&t98!2P!t|a(78Tp5(etInl8O);ePd-$7PD>v_se21aS8?$)Ujn1HtU z`HE@yXwX7h`+~TD)Ts|9nFBE~ysHjeFL_xwWs-;I~U>UE09e6c0hK1 zjBsdHSxp*Z@ofk>y7C8u1>Kj#ROKM@bj{F9C>ZsoBV+n0RhYcL9=CZf+OXOko~_Z$ zMLZbAnbbDV*EIMGCfB;_n*w^W$-CLEf7piRHrDzG-!vfyIM%0qLhXU1wg<0zx`G~b zt0XZyiO~}INU%iiifd!0XF2@M=qA?vTj*?yVBDaowk5%h93!YgH_PhgCN7q=%M=+u zHbb-~Ri!j}NX*<{j$I1#-;1zq&b}w!-XYueaJwwD%keNutO1D*Ybw7)&()yZJ8>S44~!m!=@mNd$Oc?UQL^8&IH*$>^1vch@prz@gKRQt7+0E}MjfU2L0 zaRI`)t#s_g&y?|i4JS5O z7!G7A@5-OD2$=a~H|8Gn56dMtd$ltbw8?p7k!M(P8ajU9Wsx;m%F(w4H@$gZ6ZuJA ze&*aPf3HeT422OgAdL9MWN#phI1g>~IrZ;`SGKwJg#0}Lok>DzUjTI?y0H4l$XP|l z*Fc#)YMD#ONhP-!fy8j)o{mp>Pr}G6tgQwZuT^8z-(lSdgb^((#y0>iL(~#TR#QVCSwnPTvIfl10De2=Mpxi+cc%{KY6JANAXrZ3 zjHr>>e`e$jOkF=`+KJ6r=A)4}zR2RE>-)wBf(DqB+>vDIuN60$u0dP6_q0&oo^Z?V zUn+0(nA~79OY5SSHx9#34#>o6@`lL{n%-|Q;w)5N-hi|caJOFa2IcI^$lnUka5=8` z^2Q$mAV({2fG6GTH(tQjO8hWCiKPW3 zjd-@C@fOH7Nu$FL{IYfNN^Kyn*7^gbjoa z`ET~Q(EcEzP9P(IE&&$kgtD$7)^WqCK)&S)3aNr)tG^<0A0!Q2n1bwv0(3z;wEG7zz-rg|6_-wZMjCK7pcHwAwAXyx*)j#TyBZ$9Qh}m7I95_rtT&S zeW2_pQ+PM`@L(7uyBqc-`ymZ-3AQw7v7U5#)tdxt4FH#>Ti?5PvWMr&j!UQl5S;9% zwahdvoVx~1i0Ta9S*w_+5gxwqXC zmz$%8q|8eqD{{X`#0x6R%)>`bMs^A1;iidgET~}AwZd)$7WK{~SmEjOye7dUSk=I5 zV75wBV{D?r_0DKQicTG4xW{Dg;(o$Uc;PV=3eu@M%aFpSo9riZE9G(yn!D zGBc(0|8}3?O9a-WA0R0}(_G1Nz4ab#6}m4DwaIv4>vstpfSk2HJ)1@iZ|3wb*+Ap{ zBnpMtieKxdxrh03V@1gqjQHLDl)rWj;j0vXQyt%Jk<3X9YUbQ zZBi+)i?tfd4!tzaFg+>nKlnh~L3OuVh|>PsSu{2ZU(}NsxJ-@Ky1M%>qQfWBd(e*< zD(T?904X@7q6Ce~uKwF*jHEPiiD`ddC!o^E!u@lN`LSN(W7VL1Id0}MgESCPJ2SxhvijrcF&A$pdp z<#+a85RaOW$h4!cY8=6h?L%hTJ((DODAxL4xH^zA%`!jjQHN|T=}8WM7VU)UBMaUw zJs2@|3)5OY%m#(uD?ZEJ7+_E=?MZBTtF&ZQRyyRB;0mA<_I##rmKdl-)kZ69LbzwY6D&HNWT zl-k0V=_ih!KQ=j2$C~s*FuJ__Q7cRie-tIk;?VlHW>bJaZkM6wNc_?GwC5|CfdX9{ zjXyppW<3D@Xm>PA$JyQOdE*Y|uUTtL$$16R{O`!tFNB#Ey4 zmd4fb$9)XS`t)K&^yQB|o)<>bPnw(}{`j~Lm8XAcA)I8I;ld#o%(?_3;DYwjfPev_ zE^!P{PQE4wnwF!rT9x$Sf`DsW5Ut3z{9I6qG&56j=%r%X-PdLBovL*8uv{cPrR@HFrjn_`7TGZV1^~O0ZW2Ox9=AWQ`fKGlVkDkX@k zd32cSLP~uoOa)24tP3gi;V|{gFx7>W`qMD=>@d}Zl=?`RS{D=KqSt`LEu6vvD@OR_|DXPrNyac7gD}gEsn3qcP*q; z7gFjzixbVexXLiZME0qoOqY5_7^(_WtHZc4Vd(rYRXuWLCxxNOVXE-%QftFdeNJk5 zWS&nI?2(9Yc^if?z4QWK9}zri&NNzl<%O$)$*OmcfG|ap9?A9pDm8B$PE@! zstYL<-xwg!a}ys2iwh|gdz?@GTA1oWO05V}XN9RQq|_0XI*xD035kl#4fDB>@=0q| z7SYv3E?+(1WJ=r|mT)1J;O=ZnREH&Q>s8|GVF?#fiP2W#@~}jzSBcxg5-y|?x{t`= zWoek|LQ1W&)D46jq<_PI7kcM}AQWfnxwtQ&1a_g{L!K`5Mi`2-3c0vD!_d89h$bR; z@^K^MLV}>$AXrA|+kDE6|GQeEVK4PEDHpdqgoVtJ@9=T;xsd7`WA$myOd%AFj1EIp zVQ5Sk(p+%)CWoQgFcc3%)5FjWVQ6L;x+M%P3PZxNt9f}CS`~%_UmYI@-#Hel;`@O{Zbg6}gHn#uQk3kkj^3kkjj77~1#!_mlc zzCUXr!8gG|tNE_5(EWT*wooPC^%fe5z>D!v(FLSy*Gj7exb-{MpbX1(no8mZ;` z3Jb;gZm`gFzAv}X4SavtLNodPs)cUhdya(`@jcT*OZon?g_iS;kV4I?_^z_hYQD=Y zbU)u0T4>z7`3o8e>3RQfByw|f{=$~V=6SQ`G>)62>izRSwJh{31gWjX#GH#Cn$BBST)Z{e9Cl+ zIO_>Yb%%L^Qd2K=*(&)d)YiX+d4f_kA`l*&;tG~}ILs52I$Mh~&uM%LLHMIEPf+Ru zwJH;dAed50_>K}c~$E?%`U9$fU+Nz!EB0d(DdeX;y+)CviuPrQt$ehaaJlngm-Jo@{9{hofLO5HT)E6>;Db&1mksj z!j+N?QWiad@2HF5JHX*zg?WNfFZ!5^`ziHRF&dtWLkPF|7)`vgs2>K)lWFV5D_z8= z!%{nZj3z@_)T5?PshLR^acNlUMITdZr5s1}g~EQ#76as|4NE=mV+31SG=HfpuChXr zsbQ&~`53`g7S%+Gx-5JLI2tB>`$n%A;p5OZzt#)P68~M&{{p;7N zj8?=dUuTbx(aKmBy@~JKQg^CKo-c)^qSw2aFY{9t{b%>be+RI(hIxXvqF;1bzG|gv zGY#=SD0N4eCn)urkC|zuf}6hi!xNOcMd0(y3?Y2p$IKzNESkU63@xfWbHY;HK4u=V zWl`P0oB=Xut68Y!nHQES6B>Bt^C^oyzycZM&4gBmd4hhOE@bm8;8PZTJa4I=gn5Ee z7ktGEnXFP4eK~Kb$HP29sp~r zdM?Zpl&YNJa!4{Ni%Od)PU@bZ)R)3ML8<$R(VI*%Jzekfe4C<-GT)z822-7?yQzsK1T4BDPx~!-0a1RgDAC~FOSmyai+dq zkISMEOV$(g{Z!xBP}0hx5J}jZNJz)u_Ib446{_cu&ojKAdcHZEM4k`uDOArpK950C z0G>NFt$84Ug7xh2d4%0U{kn0kYH?C|A$Xqhc{JLE>bbhnsxg_qP(7P{9?2(#>bYEu zhv#%Yh3e__c}82F{Npu68)%*n^C?u%!#>YPEKmM=&bmdlxP3&SdcN=ToJqby{Te3L z$Wz6qP(9!BdCs;x`Qh2`^PJO9J$HYdM4oCsh2Z(9mUNy^@F@h(13u3uEl+-U&RL*Z z+)lJmJ^#b!VWJm=r|veZ#`dv=>e=G+e5#*%W-WB}TwrO zdGhz`OSh{Qr*m*HyZ`zQ`qkz0#EC1^uf#W9xhpJBrTgQ*gL)qKd9JiP`QfS4qRrF5 zrw}|p_j#@g^JqnMkJ=iyG_#;y&Hu8fFW~d&XWXJpp6QlHy%Wdp{nRXT|F7YvESkSx zYkZz-El>V_ec=vQ&-Gy*_g~)uexCPvB>on{^L&Y2JU8$ugrB#39*M1m>RBm)h36(d zg|raEB>3=rg-@Y++I=1gVTIs1;XA4&5|KDnsGh&_c_dO4!t=X6&+LBc`Q~4d$TOEu zp?VJaJX)R$!Sn6qB=Tsn2kCv#i=ovv3_8ZjmnH4Tt z%TJ+tHu^kT63U`uTmkc#ie*u8^2`&|^O^6uI8C#B z^^9xI@C1=sU-}7ZT zEL*lLIe*f$Y16K{wq{1n_-mRLESSIWW~E#^aR$-#HPdeQ9J?3|8QU*LL)!NpTRVoZ z2}?kD&4kG_7B6m`Fb$M}&5QD4bJCzk(KzH0VoN!L3V>6p@hJdKsm3ag z_*>G!DgaN(Mymikr5dh$0(7aE-ErGS421LM1*Sx!)Ju#i?mcEpHqz@S3NR$>EB^r~y z%79WWzkT5-)u{A^BN&!}Gs1yPQ?@+|8e4kXUP=Z*I20v=;MfP@NG9^ZMfhLP+8`kJ zFK8`5vnM!l3$WFdG4V^{_g zKP4KjzLZlpNUOj=P?HaqQVml+SV}ZRIV*K3$0#o_r5dEXzzo7D9Z7_lP~%L{EHd8f zvw4{rO|eH8%OZUw7zSfZjuj6@7>b%2Gvt1`vAGFXmSXx^MgSt-rShwBPSkHRLu7eLQw{we-H*?90nq<9Ek;& zN=J7jxCUdyjt9Op0Iw50o$oCQtLlxoQGt7L=MKUl#S_It2wQexr!mTA{qxop|&EVtOBFB10s=!^7)>!Qo&UC-IL#8ubM zsOf!RYgzyuhu`wTcl1Sb;5_=Gec|nOKra_)!D^?>js*Mw>&fZK0PC^inz$Zb_r-kY z6DLl+X6m$vJJVS((@`E`9LT~ySZyQs_IrRbZ0C+U?kL)U%_1BL4T!e5hCMrudQ80= z2(7q7utATHqdBOs=QW2HkIjkqNOan7)8Rqmv12-{B0Jc zw|SgIkAHXbH(Heb=J7&3zSMiXygQG?9ED|r^w(x4?j zASJ`gps)4=VI#*&0vHG^eNQtKpNI5?Af%0f?@Ny)3fTz&dU~8&7=)e-WJr&_cLQ!U z?C~+|G6&Llmb_UKHrKoTQemNwSXzRq+~*F3K;JU(c{UJYKq&F<^T#yBkFDvD1_s{u zBXO}o>gUln6ZYvCX7`a=%W=NK$@bU?c_1woqo?^?NRRu*yg;MH=x<-&$KyV*1scq2 zh3dyfQ!|0haKn-XE%U$G)I8UGzpuFx3(mOti<@Whro_R(-9UIvzM(A1;#UBWQsJfV zu;bTyYz$-|gOdk{QsET8atN-=tyuhF9Px3F`^@$YuOZ4XB82f}ub>uf&`0F??)xm=+%l0y!s0E`miBxfR* z3M=^lDH&c0&-Buu=ScA6*>THNspg`@;%l#+HfciL?X%`Dm^FKW4%p|{5GZ^7g#MFt zt9n1Bbz@86|Xak#gcbd6UZllo0&ZLsPL%sh9no1IBJi$tX7V? zOipV9^IFSldcYMFAfbz-|^Ah;Z~UJktRu;<57jaS%kuf0Gx$R&fI7&yo> zju)fDSvn;*i_-5rhV!CK1nFnT(Wb-(axto`<2pknZ$DiyA>meEonCJTg8 zVIj{FTS`(X08YuUlV>F^DS8UPQ!@M zL#}$f8ybWj4P=Pj^}>VD%Yio@_WT(3*8^!iOMopUZTA7ARM_Y%=Y}LvOJ(-F%(D~XOjQ$RUb>+i$>EZv zL`+^_1_LukS}Ts=lrt!la;z{E0}r`QH$_`Na{O!tOB;D4ywm3#UzQ^FI&a%W8SBDM zDn;vl{~!zo76!tfi@^*o|w&|QDcT@DBXtjFE( zri;xA`IQB3i23h;8zkSaX$6}<$_56)@o{VWX5*||$IX~AXX&gNio`LuzLVfV@_l-kiyP06@?9{={SUqs|L6(9KlclKwHMS}*eg=-?Iz!c z!HJ$IpV~_DO{%f~!I$zKBwuykd>2m+ve>WkRe-y3ee>0f@71vKjV9mZoO~0fWLKzs zW5{<|PQHtB>YGfyWKO;*Ir-w``(odG6DCaP+wKkIo7pGdqe9WTdE;VdwOaP}6Ds&^ z>uediE_POZB(nYl4Wh7^R{H8>NB!{2?mD!baLXz2Ihzt4wf|nTHU9SgL`Qk;x_PVP zYtlmr#wo2lk@(720OO)m3%lYEZ7`1Y^GQRS$ z__}!kJXXGgobk4;L4kOzzN@{ydMWjdi4#~9SIxc^UqM*jwcSo1tv~J7pX~n8XjcEO zrNLTXRXmlf9v@F_i+Avw!+>bmO50)8^EoUf8J2LzMjW48l$?M>^fd@{AJA+;gd zo&+>tKR?La(6&)EQ0G^wCnsa|8`?Iuj0+lTk66Xl*x|0;wPwaAlY4Jh?5tYh7l3Gk zx`Sp^b2c_6XN|9q&%%d#k&TZ(A3l6IGu}aahpQa^j0)yu;OYB6Q~zpvR8#mVk4DN@ z@IUws(oPr>IiWc^J{tTgeRfv4AfNx`Cju$kqxp0JA5rOeJ~#3a1<&O3Lp~$;Jiuo= zpHum~z-JGiGx+S|69c#^ek=K%&ZnBNet*SB1Ki2)D}2O`_VIg&k5TQANaQR&wZvEP zdoCZQOJqF17xIY{uH|bT%a+Vv&{8#fSyjWe zH(ooTYEg63*Bj@wjIFAfIBo)Gng5S?MghgSe99vq3qv;(5`!s^JQ0RoA~ezB4)EdaJkl11{)Ld(S$U+J&_xz{gV4nm8Unst*->HW975vI<&kR%iM5tT&g1*j z7Wx2XKVzYfkg9PikJJ#l)IxQHK5HSqBA?(>MtTqu)o8@ajAmSFWtb}3ajB!jIMI;H zR~4p;eq7v`Fr;#>tOnGjik@6Zy>Y36*TvO_A<>je6-Bw!WEc`Pxzy{zxEsRISHjep zVcgs>bW51JAdFiShFZeZrD5E+!qDesup`V7SkA!iLhM`}DsRSaQ;{!SH)beqmx-fKE7@8V}glm^?S{VAgg~rXBzo3zj zp7#$&A~#2~5?@Jz<%!H~TG%*_iBv&Mng2v0Rpc2T<{=Qd zgqWGVVj6t?k^#%2D(@o*LFo3}Ow#cwmEhI!yW&`+te$ET~(ub46#(b|#8 zGg{5^NMOzfvz^+q94e(eT6aF^W86=+^iLvrdL`SRY{}nx8r3-VezK*0qlT#XQ12&O z;y;0;vS=sY9)($1C-{C1wLIeKxzD9Ok7!UA-5eT<-WF?aeHQIw0h*T;xj zT+HA57*T?Y`8OY<5p*%#K1P^#F|Yd=M&HN0+bd>H^PCy;xvZZ8JVBlcp~{{h=0iC# zW5SrAt%*4?iJX}0b7F4EiCLTzvmz(v-d-_3;-5boX*aGr9mTlj#(9fcn#awclM^S* zn7?>dkSR==+Y}@&oYUNRTjRo(SqpMAHO^_8+gPalT$kFoaPExeS>Mbp+%&`GD4deF z92@R_8g;3ez4?sW8=Du;Z(5iY6I7tV4cCGhKK8a*OJ^)-TzGTKEjelV;LK^xSJSvO zr}&Ip8=Dt4<{~iwhe60d<8fyM*f%yG96rDB+&B9SVDTi7J-QzEIl1TeH{QXJfh{+v zImHWfGU!jQPKHw~Qwve~fc12m1zN8nGv+kiHhccU#;m@p?mKwp+~PiQ1>o}yWF~@} zEq$p7|a^AFpg|l>:${RELEASE_COMPILE_FLAGS}>") + add_link_options("$<$>:${RELEASE_LINK_FLAGS}>") +else() + add_compile_options(-Wall -Wextra -Wno-unused-function -fno-exceptions -fwrapv -Wno-expansion-to-defined -fvisibility=hidden) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") + if(NOT ${CMAKE_BUILD_TYPE} MATCHES "Debug") + add_compile_options(-fomit-frame-pointer) + endif() + + # posix_memalign may require _POSIX_C_SOURCE, but MacOS might not like it + include(CheckSymbolExists) + check_symbol_exists(posix_memalign "stdlib.h" HAVE_MEMALIGN) + if(NOT HAVE_MEMALIGN) + add_compile_definitions(_POSIX_C_SOURCE=200112L) + endif() +endif() + +add_library(rapidyenc OBJECT ${RAPIDYENC_SOURCES} ${CRCUTIL_SOURCES}) +set_property(TARGET rapidyenc PROPERTY POSITION_INDEPENDENT_CODE 1) # needed for shared build + +if(MSVC) + if(IS_X86) + set_source_files_properties(${SRC_DIR}/encoder_avx.cc PROPERTIES COMPILE_OPTIONS /arch:AVX) + set_source_files_properties(${SRC_DIR}/decoder_avx.cc PROPERTIES COMPILE_OPTIONS /arch:AVX) + set_source_files_properties(${SRC_DIR}/encoder_avx2.cc PROPERTIES COMPILE_OPTIONS /arch:AVX2) + set_source_files_properties(${SRC_DIR}/decoder_avx2.cc PROPERTIES COMPILE_OPTIONS /arch:AVX2) + set_source_files_properties(${SRC_DIR}/encoder_vbmi2.cc PROPERTIES COMPILE_OPTIONS /arch:AVX512) + set_source_files_properties(${SRC_DIR}/decoder_vbmi2.cc PROPERTIES COMPILE_OPTIONS /arch:AVX512) + set_source_files_properties(${SRC_DIR}/crc_folding_256.cc PROPERTIES COMPILE_OPTIONS /arch:AVX2) + endif() +endif() +if(NOT MSVC OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + if(IS_X86) + set_source_files_properties(${SRC_DIR}/encoder_sse2.cc PROPERTIES COMPILE_OPTIONS -msse2) + set_source_files_properties(${SRC_DIR}/decoder_sse2.cc PROPERTIES COMPILE_OPTIONS -msse2) + set_source_files_properties(${SRC_DIR}/encoder_ssse3.cc PROPERTIES COMPILE_OPTIONS -mssse3) + set_source_files_properties(${SRC_DIR}/decoder_ssse3.cc PROPERTIES COMPILE_OPTIONS -mssse3) + set_source_files_properties(${SRC_DIR}/encoder_avx.cc PROPERTIES COMPILE_OPTIONS "-mavx;-mpopcnt") + set_source_files_properties(${SRC_DIR}/decoder_avx.cc PROPERTIES COMPILE_OPTIONS "-mavx;-mpopcnt") + set_source_files_properties(${SRC_DIR}/encoder_avx2.cc PROPERTIES COMPILE_OPTIONS "-mavx2;-mpopcnt;-mbmi;-mbmi2;-mlzcnt") + set_source_files_properties(${SRC_DIR}/decoder_avx2.cc PROPERTIES COMPILE_OPTIONS "-mavx2;-mpopcnt;-mbmi;-mbmi2;-mlzcnt") + CHECK_CXX_COMPILER_FLAG("-mavx512vbmi2 -mavx512vl -mavx512bw" COMPILER_SUPPORTS_VBMI2) + if(COMPILER_SUPPORTS_VBMI2) + set_source_files_properties(${SRC_DIR}/encoder_vbmi2.cc PROPERTIES COMPILE_OPTIONS "-mavx512vbmi2;-mavx512vl;-mavx512bw;-mpopcnt;-mbmi;-mbmi2;-mlzcnt") + set_source_files_properties(${SRC_DIR}/decoder_vbmi2.cc PROPERTIES COMPILE_OPTIONS "-mavx512vbmi2;-mavx512vl;-mavx512bw;-mpopcnt;-mbmi;-mbmi2;-mlzcnt") + endif() + set_source_files_properties(${SRC_DIR}/crc_folding.cc PROPERTIES COMPILE_OPTIONS "-mssse3;-msse4.1;-mpclmul") + CHECK_CXX_COMPILER_FLAG("-mvpclmulqdq" COMPILER_SUPPORTS_VPCLMULQDQ) + if(COMPILER_SUPPORTS_VPCLMULQDQ) + set_source_files_properties(${SRC_DIR}/crc_folding_256.cc PROPERTIES COMPILE_OPTIONS "-mavx2;-mvpclmulqdq;-mpclmul") + endif() + + if(IS_X32) + add_compile_definitions(CRCUTIL_USE_ASM=0) + endif() + endif() + + if(NOT APPLE) # M1 Macs don't seem to need these ARM options + if(IS_ARM32) + set_source_files_properties(${SRC_DIR}/encoder_neon.cc PROPERTIES COMPILE_OPTIONS -mfpu=neon) + set_source_files_properties(${SRC_DIR}/${DECODER_NEON_FILE} PROPERTIES COMPILE_OPTIONS -mfpu=neon) + set_source_files_properties(${SRC_DIR}/crc_arm.cc PROPERTIES COMPILE_OPTIONS "-march=armv8-a+crc;-mfpu=fp-armv8") + endif() + if(IS_ARM64) + set_source_files_properties(${SRC_DIR}/crc_arm.cc PROPERTIES COMPILE_OPTIONS -march=armv8-a+crc) + endif() + endif() + + if(IS_RISCV64) + CHECK_CXX_COMPILER_FLAG("-march=rv64gcv" COMPILER_SUPPORTS_RVV) + if(COMPILER_SUPPORTS_RVV) + set_source_files_properties(${SRC_DIR}/encoder_rvv.cc PROPERTIES COMPILE_OPTIONS -march=rv64gcv) + endif() + endif() + if(IS_RISCV32) + CHECK_CXX_COMPILER_FLAG("-march=rv32gcv" COMPILER_SUPPORTS_RVV) + if(COMPILER_SUPPORTS_RVV) + set_source_files_properties(${SRC_DIR}/encoder_rvv.cc PROPERTIES COMPILE_OPTIONS -march=rv32gcv) + endif() + endif() +endif() + + +add_library(rapidyenc_shared SHARED $ ./rapidyenc.c) +target_compile_definitions(rapidyenc_shared PUBLIC BUILD_SHARED=1) +add_library(rapidyenc_static STATIC $ ./rapidyenc.c) +# suggestion from https://stackoverflow.com/a/67027398/459150 +set_target_properties(rapidyenc_static PROPERTIES OUTPUT_NAME rapidyenc ARCHIVE_OUTPUT_DIRECTORY rapidyenc_static) +set_target_properties(rapidyenc_shared PROPERTIES OUTPUT_NAME rapidyenc ARCHIVE_OUTPUT_DIRECTORY rapidyenc_shared) + + +# binaries +add_executable(rapidyenc_cli tool/cli.c) +target_link_libraries(rapidyenc_cli rapidyenc_static) +add_executable(rapidyenc_bench tool/bench.cc) +target_link_libraries(rapidyenc_bench rapidyenc_static) +target_compile_features(rapidyenc_bench PUBLIC cxx_std_11) diff --git a/rapidyenc/README.md b/rapidyenc/README.md new file mode 100644 index 0000000..12ed51c --- /dev/null +++ b/rapidyenc/README.md @@ -0,0 +1,65 @@ +This C compatible library provides functions for implementing [yEnc](http://www.yenc.org/yenc-draft.1.3.txt) where speed is important. + +Note that it only handles the underlying encoding/decoding routines - yEnc headers aren’t handled. + +Features +--------- + +- implementation uses x86/ARM SIMD capabilities, with support for ARMv7 NEON, ARMv8 ASIMD or the following x86 SIMD extensions: SSE2, SSSE3, AVX, AVX2, AVX512-BW (128/256-bit), AVX512-VBMI2 +- CPU detection and dynamic dispatch (i.e. select best implementation for currently running CPU) +- incremental processing, including detection of yEnc/NNTP end sequences in decoder +- raw yEnc encoding with the ability to specify line length. A single thread can achieve \>450MB/s on a Raspberry Pi 3, or \>5GB/s on a Core-i series CPU. +- yEnc decoding, with and without NNTP layer dot unstuffing. A single thread can achieve \>300MB/s on a Raspberry Pi 3, or \>4.5GB/s on a Core-i series CPU. +- CRC32 implementation via [crcutil](https://code.google.com/p/crcutil/) or [PCLMULQDQ instruction](http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf), or ARMv8’s CRC instructions (\>1GB/s on a low power Atom/ARM CPU, \>15GB/s on a modern Intel CPU) +- ability to combine two CRC32 hashes into one (useful for amalgamating *pcrc32s* into a *crc32* for yEnc), as well as quickly compute the CRC32 of a sequence of null bytes + +Building +========== + +A build file/project can be created using CMake. + +``` +mkdir build +cd build +cmake .. +cmake --build . --config Release +``` + +After compilation, a shared and static library should be generated, as well as a benchmark and sample CLI application. + +## Build Options + +The following options can be passed into CMake: + +* **BUILD_NATIVE**: Optimise for and target only the build host’s CPU; this build may not be re-distributable +* **DISABLE_AVX256**: Disable the use of 256-bit AVX instructions on x86 processors + +API +=== + +Before any encoding/decoding/CRC functions can be used, the respective `_init` function must be called. These functions set up the necessary state for computation. Note that `_init` functions aren’t thread-safe, but all others are. + +Functions documented in the [header file](rapidyenc.h). + +[cli.c](tool/cli.c) is a simple command-line application which encodes/decodes stdin to stdout. It demonstrates how to do incremental encoding/decoding/CRC32 using this library. + +# Other Language Bindings + +* [node-yencode](https://github.com/animetosho/node-yencode): for NodeJS/Bun +* [go-yencode](https://github.com/mnightingale/go-yencode): for Golang +* [sabctools](https://github.com/sabnzbd/sabctools): for Python (Sabnzbd specific binding) + +Algorithm +========= + +A brief description of how the SIMD yEnc encoding algorithm works [can be found here](https://github.com/animetosho/node-yencode/issues/4#issuecomment-330025192). +I may eventually write up something more detailed, regarding optimizations and such used. + +License +======= + +This module is Public Domain or [CC0](https://creativecommons.org/publicdomain/zero/1.0/legalcode) (or equivalent) if PD isn’t recognised. + +[crcutil](https://code.google.com/p/crcutil/), used for CRC32 calculation, is licensed under the [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +[zlib-ng](https://github.com/Dead2/zlib-ng), from where the CRC32 calculation using folding approach was stolen, is under a [zlib license](https://github.com/Dead2/zlib-ng/blob/develop/LICENSE.md) \ No newline at end of file diff --git a/rapidyenc/crcutil-1.0/LICENSE b/rapidyenc/crcutil-1.0/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/rapidyenc/crcutil-1.0/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/rapidyenc/crcutil-1.0/code/base_types.h b/rapidyenc/crcutil-1.0/code/base_types.h new file mode 100644 index 0000000..5b74364 --- /dev/null +++ b/rapidyenc/crcutil-1.0/code/base_types.h @@ -0,0 +1,73 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Defines 8/16/32/64-bit integer types. +// +// Either uint64 or uint32 will map to size_t. +// This way, specialized variants of CRC implementation +// parameterized by "size_t" will be reused when +// parameterized by "uint64" or "uint32". +// In their turn, specialized verisons are parameterized +// by "size_t" so that one version of the code is optimal +// both on 32-bit and 64-bit platforms. + +#ifndef CRCUTIL_BASE_TYPES_H_ +#define CRCUTIL_BASE_TYPES_H_ + +#include "std_headers.h" // size_t, ptrdiff_t + +namespace crcutil { + +template class ChooseFirstIfSame { + public: + template class ChooseFirstIfTrue { + public: + typedef AA Type; + }; + template class ChooseFirstIfTrue { + public: + typedef BB Type; + }; + + typedef typename ChooseFirstIfTrue::Type Type; +}; + +typedef unsigned char uint8; +typedef signed char int8; + +typedef unsigned short uint16; +typedef short int16; + +typedef ChooseFirstIfSame::Type uint32; +typedef ChooseFirstIfSame::Type int32; + +#if defined(_MSC_VER) +typedef ChooseFirstIfSame::Type uint64; +typedef ChooseFirstIfSame::Type int64; +#define HAVE_UINT64 1 +#elif defined(__GNUC__) +typedef ChooseFirstIfSame::Type uint64; +typedef ChooseFirstIfSame::Type int64; +#define HAVE_UINT64 1 +#else +// TODO: ensure that everything compiles and works when HAVE_UINT64 is false. +// TODO: remove HAVE_UINT64 and use sizeof(uint64) instead? +#define HAVE_UINT64 0 +typedef uint32 uint64; +typedef int32 int64; +#endif + +} // namespace crcutil + +#endif // CRCUTIL_BASE_TYPES_H_ diff --git a/rapidyenc/crcutil-1.0/code/crc32c_sse4.cc b/rapidyenc/crcutil-1.0/code/crc32c_sse4.cc new file mode 100644 index 0000000..13aa84a --- /dev/null +++ b/rapidyenc/crcutil-1.0/code/crc32c_sse4.cc @@ -0,0 +1,369 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Implements CRC32C using Intel's SSE4 crc32 instruction. +// Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero, +// emilates intrinsics via CRC_WORD/CRC_BYTE otherwise. + +#include "crc32c_sse4.h" + +#if HAVE_I386 || HAVE_AMD64 + +namespace crcutil { + +#define UPDATE_STRIPE_CRCS(index, block_size, num_stripes) do { \ + CRC_UPDATE_WORD(crc0, \ + reinterpret_cast(src + \ + 0 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \ + CRC_UPDATE_WORD(crc1, \ + reinterpret_cast(src + \ + 1 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \ + CRC_UPDATE_WORD(crc2, \ + reinterpret_cast(src + \ + 2 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \ + if (num_stripes > 3) { \ + CRC_UPDATE_WORD(crc3, \ + reinterpret_cast(src + \ + 3 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \ + } \ +} while (0) + +// Multiplies "crc" by "x**(8 * STRIPE_SIZE(block_size)" +// using appropriate multiplication table(s). +// +#if 0 + +// This variant is for illustration purposes only. +// Actual implementation below: +// 1. Splits the computation into 2 data-independent paths +// by independently multiplying lower and upper halves +// of "crc0" in interleaved manner, and combining the +// results in the end. +// 2. Removing redundant "crc0 = 0" etc. in the beginning. +// 3. Removing redundant shifts of "tmp0" and "tmp1" in the last round. +#define MULTIPLY_CRC(crc0, block_size, num_stripes) do { \ + size_t tmp0 = crc0; \ + crc0 = 0; \ + for (size_t i = 0; i < kNumTables; ++i) { \ + crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ + [i][tmp0 & (kTableEntries - 1)]; \ + tmp0 >>= kTableEntryBits; \ + } \ +} while (0) + +#else + +#define MULTIPLY_CRC(crc0, block_size, num_stripes) do { \ + size_t tmp0 = crc0; \ + size_t tmp1 = crc0 >> (kTableEntryBits * kNumTablesHalfHi); \ + crc0 = CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ + [0][tmp0 & (kTableEntries - 1)]; \ + tmp0 >>= kTableEntryBits; \ + size_t crc1 = CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ + [kNumTablesHalfHi][tmp1 & (kTableEntries - 1)]; \ + tmp1 >>= kTableEntryBits; \ + for (size_t i = 1; i < kNumTablesHalfLo - 1; ++i) { \ + crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ + [i][tmp0 & (kTableEntries - 1)]; \ + tmp0 >>= kTableEntryBits; \ + crc1 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ + [i + kNumTablesHalfHi][tmp1 & (kTableEntries - 1)]; \ + tmp1 >>= kTableEntryBits; \ + } \ + crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ + [kNumTablesHalfLo - 1][tmp0 & (kTableEntries - 1)]; \ + if (kNumTables & 1) { \ + tmp0 >>= kTableEntryBits; \ + } \ + crc1 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ + [kNumTables - 1][tmp1]; \ + if (kNumTables & 1) { \ + crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ + [kNumTablesHalfLo][tmp0 & (kTableEntries - 1)]; \ + } \ + crc0 ^= crc1; \ +} while (0) + +#endif + +// Given CRCs (crc0, crc1, etc.) of consequitive +// stripes of STRIPE_SIZE(block_size) bytes each, +// produces CRC of concatenated stripes. +#define COMBINE_STRIPE_CRCS(block_size, num_stripes) do { \ + MULTIPLY_CRC(crc0, block_size, num_stripes); \ + crc0 ^= crc1; \ + MULTIPLY_CRC(crc0, block_size, num_stripes); \ + crc0 ^= crc2; \ + if (num_stripes > 3) { \ + MULTIPLY_CRC(crc0, block_size, num_stripes); \ + crc0 ^= crc3; \ + } \ +} while (0) + +// Processes input BLOCK_SIZE(block) bytes per iteration +// by splitting a block of BLOCK_SIZE(block) bytes into N +// equally-sized stripes of STRIPE_SIZE(block_size) each, +// computing CRC of each stripe, and concatenating stripe CRCs. +#define PROCESS_BLOCK(block_size, num_stripes) do { \ + while (bytes >= CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes)) { \ + Crc crc1 = 0; \ + Crc crc2 = 0; \ + Crc crc3; \ + if (num_stripes > 3) crc3 = 0; \ + { \ + const uint8 *stripe_end = src + \ + (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) / \ + kUnrolledLoopBytes) * kUnrolledLoopBytes; \ + do { \ + UPDATE_STRIPE_CRCS(0, block_size, num_stripes); \ + UPDATE_STRIPE_CRCS(1, block_size, num_stripes); \ + UPDATE_STRIPE_CRCS(2, block_size, num_stripes); \ + UPDATE_STRIPE_CRCS(3, block_size, num_stripes); \ + UPDATE_STRIPE_CRCS(4, block_size, num_stripes); \ + UPDATE_STRIPE_CRCS(5, block_size, num_stripes); \ + UPDATE_STRIPE_CRCS(6, block_size, num_stripes); \ + UPDATE_STRIPE_CRCS(7, block_size, num_stripes); \ + src += kUnrolledLoopBytes; \ + } while (src < stripe_end); \ + if ((CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) % \ + kUnrolledLoopBytes) != 0) { \ + stripe_end += \ + CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) % \ + kUnrolledLoopBytes; \ + do { \ + UPDATE_STRIPE_CRCS(0, block_size, num_stripes); \ + src += sizeof(size_t); \ + } while (src < stripe_end); \ + } \ + } \ + COMBINE_STRIPE_CRCS(block_size, num_stripes); \ + src += CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * \ + ((num_stripes) - 1); \ + bytes = static_cast(end - src); \ + } \ + no_more_##block_size##_##num_stripes:; \ +} while (0) + +size_t Crc32cSSE4::Crc32c(const void *data, size_t bytes, Crc crc0) const { + const uint8 *src = static_cast(data); + const uint8 *end = src + bytes; + crc0 ^= Base().Canonize(); + + // If we don't have too much data to process, + // do not waste time trying to align input etc. + // Noticeably improves performance on small inputs. + if (bytes < 4 * sizeof(size_t)) goto less_than_4_size_t; + if (bytes < 8 * sizeof(size_t)) goto less_than_8_size_t; + if (bytes < 16 * sizeof(size_t)) goto less_than_16_size_t; + +#define PROCESS_TAIL_IF_SMALL(block_size, num_stripes) do { \ + if (bytes < CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes)) { \ + goto no_more_##block_size##_##num_stripes; \ + } \ +} while (0) +#define NOOP(block_size, num_stripes) + + CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(PROCESS_TAIL_IF_SMALL, + NOOP, + NOOP); + +#undef PROCESS_TAIL_IF_SMALL + + + // Do not use ALIGN_ON_WORD_BOUNDARY_IF_NEEDED() here because: + // 1. It uses CRC_BYTE() which won't work. + // 2. Its threshold may be incorrect becuase Crc32 that uses + // native CPU crc32 instruction is much faster than + // generic table-based CRC computation. + // + // In case of X5550 CPU, break even point is at 2KB -- exactly. + if (bytes >= 2 * 1024) { + while ((reinterpret_cast(src) & (sizeof(Word) - 1)) != 0) { + if (src >= end) { + return (crc0 ^ Base().Canonize()); + } + CRC_UPDATE_BYTE(crc0, src[0]); + src += 1; + } + bytes = static_cast(end - src); + } + if (src >= end) { + return (crc0 ^ Base().Canonize()); + } + + // Quickly skip processing of too large blocks + // Noticeably improves performance on small inputs. +#define SKIP_BLOCK_IF_NEEDED(block_size, num_stripes) do { \ + if (bytes < CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes)) { \ + goto no_more_##block_size##_##num_stripes; \ + } \ +} while (0) + + CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(NOOP, + SKIP_BLOCK_IF_NEEDED, + SKIP_BLOCK_IF_NEEDED); + +#undef SKIP_BLOCK_IF_NEEDED + + // Process data in all blocks. + CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING(PROCESS_BLOCK, + PROCESS_BLOCK, + PROCESS_BLOCK); + + // Finish the tail word-by-word and then byte-by-byte. +#define CRC_UPDATE_WORD_4(index) do { \ + CRC_UPDATE_WORD(crc0, reinterpret_cast(src)[index]); \ + CRC_UPDATE_WORD(crc0, reinterpret_cast(src)[index + 1]); \ + CRC_UPDATE_WORD(crc0, reinterpret_cast(src)[index + 2]); \ + CRC_UPDATE_WORD(crc0, reinterpret_cast(src)[index + 3]); \ +} while (0) + + if (bytes >= 4 * 4 * sizeof(size_t)) { + end -= 4 * 4 * sizeof(size_t); + do { + CRC_UPDATE_WORD_4(4 * 0); + CRC_UPDATE_WORD_4(4 * 1); + CRC_UPDATE_WORD_4(4 * 2); + CRC_UPDATE_WORD_4(4 * 3); + src += 4 * 4 * sizeof(size_t); + } while (src <= end); + end += 4 * 4 * sizeof(size_t); + bytes = static_cast(end - src); + } + less_than_16_size_t: + + if (bytes >= 4 * 2 * sizeof(size_t)) { + CRC_UPDATE_WORD_4(4 * 0); + CRC_UPDATE_WORD_4(4 * 1); + src += 4 * 2 * sizeof(size_t); + bytes -= 4 * 2 * sizeof(size_t); + } + less_than_8_size_t: + + if (bytes >= 4 * sizeof(size_t)) { + CRC_UPDATE_WORD_4(0); + src += 4 * sizeof(size_t); + bytes -= 4 * sizeof(size_t); + } + less_than_4_size_t: + + if (bytes >= 1 * sizeof(size_t)) { + end -= 1 * sizeof(size_t); + do { + CRC_UPDATE_WORD(crc0, reinterpret_cast(src)[0]); + src += 1 * sizeof(size_t); + } while (src <= end); + end += 1 * sizeof(size_t); + } + + while (src < end) { + CRC_UPDATE_BYTE(crc0, src[0]); + src += 1; + } + + return (crc0 ^ Base().Canonize()); +} + + +void Crc32cSSE4::Init(bool constant) { + base_.Init(FixedGeneratingPolynomial(), FixedDegree(), constant); + +#define INIT_MUL_TABLE(block_size, num_stripes) do { \ + size_t multiplier = \ + Base().Xpow8N(CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes)); \ + for (size_t table = 0; table < kNumTables; ++table) { \ + for (size_t entry = 0; entry < kTableEntries; ++entry) { \ + size_t value = static_cast(entry << (kTableEntryBits * table)); \ + CRC32C_SSE4_MUL_TABLE(block_size, num_stripes)[table][entry] = \ + static_cast(Base().Multiply(value, multiplier)); \ + } \ + } \ +} while (0) + + CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(INIT_MUL_TABLE); + +#undef INIT_MUL_TABLE + +#if !CRCUTIL_USE_MM_CRC32 + for (size_t j = 0; j < sizeof(Word); ++j) { + Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + 32); + for (size_t i = 0; i < 256; ++i) { + crc_word_[j][i] = Base().MultiplyUnnormalized(i, 8, k); + } + } +#endif // !CRCUTIL_USE_MM_CRC32 +} + + +bool Crc32cSSE4::IsSSE42Available() { +#if defined(_MSC_VER) + int cpu_info[4]; + __cpuid(cpu_info, 1); + return ((cpu_info[3] & (1 << 20)) != 0); +#elif defined(__GNUC__) && (HAVE_AMD64 || HAVE_I386) + // Not using "cpuid.h" intentionally: it is missing from + // too many installations. + uint32 eax; + uint32 ecx; + uint32 edx; + __asm__ volatile( +#if HAVE_I386 && defined(__PIC__) + "push %%ebx\n" + "cpuid\n" + "pop %%ebx\n" +#define EBX_CLOBBER +// ^ GCC complains if we say we're clobbering ebx... +#else + "cpuid\n" +#define EBX_CLOBBER "ebx" +#endif // HAVE_I386 && defined(__PIC__) + : "=a" (eax), "=c" (ecx), "=d" (edx) + : "a" (1), "2" (0) + : EBX_CLOBBER + ); + return ((ecx & (1 << 20)) != 0); +#else + return false; +#endif +} + + +void RollingCrc32cSSE4::Init(const Crc32cSSE4 &crc, + size_t roll_window_bytes, + const Crc &start_value) { + crc_ = &crc; + roll_window_bytes_ = roll_window_bytes; + start_value_ = start_value; + + Crc add = crc.Base().Canonize() ^ start_value; + add = crc.Base().Multiply(add, crc.Base().Xpow8N(roll_window_bytes)); + add ^= crc.Base().Canonize(); + Crc mul = crc.Base().One() ^ crc.Base().Xpow8N(1); + add = crc.Base().Multiply(add, mul); + + mul = crc.Base().XpowN(8 * roll_window_bytes + crc.Base().Degree()); + for (size_t i = 0; i < 256; ++i) { + out_[i] = static_cast( + crc.Base().MultiplyUnnormalized( + static_cast(i), 8, mul) ^ add); + } + +#if !CRCUTIL_USE_MM_CRC32 + memcpy(crc_word_, crc_->crc_word_, sizeof(crc_word_)); +#endif // !CRCUTIL_USE_MM_CRC32 +} + +} // namespace crcutil + +#endif // HAVE_I386 || HAVE_AMD64 diff --git a/rapidyenc/crcutil-1.0/code/crc32c_sse4.h b/rapidyenc/crcutil-1.0/code/crc32c_sse4.h new file mode 100644 index 0000000..ac3d842 --- /dev/null +++ b/rapidyenc/crcutil-1.0/code/crc32c_sse4.h @@ -0,0 +1,252 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Implements CRC32C using Intel's SSE4 crc32 instruction. +// Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero, +// emilates intrinsics via CRC_WORD/CRC_BYTE otherwise. + +#ifndef CRCUTIL_CRC32C_SSE4_H_ +#define CRCUTIL_CRC32C_SSE4_H_ + +#include "gf_util.h" // base types, gf_util class, etc. +#include "crc32c_sse4_intrin.h" // _mm_crc32_u* intrinsics + +#if HAVE_I386 || HAVE_AMD64 + +#if CRCUTIL_USE_MM_CRC32 + +#if HAVE_I386 +#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u32(crc, (value))) +#else +#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u64(crc, (value))) +#endif // HAVE_I386 + +#define CRC_UPDATE_BYTE(crc, value) \ + (crc = _mm_crc32_u8(static_cast(crc), static_cast(value))) + +#else + +#include "generic_crc.h" + +#define CRC_UPDATE_WORD(crc, value) do { \ + size_t buf = (value); \ + CRC_WORD(this, crc, buf); \ +} while (0) +#define CRC_UPDATE_BYTE(crc, value) do { \ + CRC_BYTE(this, crc, (value)); \ +} while (0) + +#endif // CRCUTIL_USE_MM_CRC32 + +namespace crcutil { + +#pragma pack(push, 16) + +// Since the same pieces should be parameterized in many different places +// and we do not want to introduce a mistake which is rather hard to find, +// use a macro to enumerate all block sizes. +// +// Block sizes and number of stripes were tuned for best performance. +// +// All constants should be literal constants (too lazy to fix the macro). +// +// The use of different "macro_first", "macro", and "macro_last" +// allows generation of different code for smallest, in between, +// and largest block sizes. +// +// This macro shall be kept in sync with +// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING. +// Failure to do so will cause compile-time error. +#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING( \ + macro_smallest, macro, macro_largest) \ + macro_smallest(512, 3); \ + macro(1024, 3); \ + macro(4096, 3); \ + macro_largest(32768, 3) + +// This macro shall be kept in sync with +// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING. +// Failure to do so will cause compile-time error. +#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING( \ + macro_smallest, macro, macro_largest) \ + macro_largest(32768, 3); \ + macro(4096, 3); \ + macro(1024, 3); \ + macro_smallest(512, 3) + +// Enumerates all block sizes. +#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(macro) \ + CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(macro, macro, macro) + +#define CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) \ + (((block_size) / (num_stripes)) & ~(sizeof(size_t) - 1)) + +#define CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes) \ + (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * (num_stripes)) + +#define CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ + mul_table_##block_size##_##num_blocks##_ + +class RollingCrc32cSSE4; + +class Crc32cSSE4 { + public: + // Exports Crc, TableEntry, and Word (needed by RollingCrc). + typedef size_t Crc; + typedef Crc Word; + typedef Crc TableEntry; + + Crc32cSSE4() {} + + // Initializes the tables given generating polynomial of degree (degree). + // If "canonical" is true, crc value will be XOR'ed with (-1) before and + // after actual CRC computation. + explicit Crc32cSSE4(bool canonical) { + Init(canonical); + } + void Init(bool canonical); + + // Initializes the tables given generating polynomial of degree. + // If "canonical" is true, crc value will be XOR'ed with (-1) before and + // after actual CRC computation. + // Provided for compatibility with GenericCrc. + Crc32cSSE4(const Crc &generating_polynomial, + size_t degree, + bool canonical) { + Init(generating_polynomial, degree, canonical); + } + void Init(const Crc &generating_polynomial, + size_t degree, + bool canonical) { + if (generating_polynomial == FixedGeneratingPolynomial() && + degree == FixedDegree()) { + Init(canonical); + } + } + + // Returns fixed generating polymonial the class implements. + static Crc FixedGeneratingPolynomial() { + return 0x82f63b78; + } + + // Returns degree of fixed generating polymonial the class implements. + static Crc FixedDegree() { + return 32; + } + + // Returns base class. + const GfUtil &Base() const { return base_; } + + // Computes CRC32. + size_t CrcDefault(const void *data, size_t bytes, const Crc &crc) const { + return Crc32c(data, bytes, crc); + } + + // Returns true iff crc32 instruction is available. + static bool IsSSE42Available(); + + protected: + // Actual implementation. + size_t Crc32c(const void *data, size_t bytes, Crc crc) const; + + enum { + kTableEntryBits = 8, + kTableEntries = 1 << kTableEntryBits, + kNumTables = (32 + kTableEntryBits - 1) / kTableEntryBits, + kNumTablesHalfLo = kNumTables / 2, + kNumTablesHalfHi = (kNumTables + 1) / 2, + + kUnrolledLoopCount = 8, + kUnrolledLoopBytes = kUnrolledLoopCount * sizeof(size_t), + }; + + // May be set to size_t or uint32, whichever is faster. + typedef uint32 Entry; + +#define DECLARE_MUL_TABLE(block_size, num_stripes) \ + Entry CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ + [kNumTables][kTableEntries] + + CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(DECLARE_MUL_TABLE); + +#undef DECLARE_MUL_TABLE + + GfUtil base_; + +#if !CRCUTIL_USE_MM_CRC32 + TableEntry crc_word_[sizeof(Word)][256]; + friend class RollingCrc32cSSE4; +#endif // !CRCUTIL_USE_MM_CRC32 +} GCC_ALIGN_ATTRIBUTE(16); + +class RollingCrc32cSSE4 { + public: + typedef Crc32cSSE4::Crc Crc; + typedef Crc32cSSE4::TableEntry TableEntry; + typedef Crc32cSSE4::Word Word; + + RollingCrc32cSSE4() {} + + // Initializes internal data structures. + // Retains reference to "crc" instance -- it is used by Start(). + RollingCrc32cSSE4(const Crc32cSSE4 &crc, + size_t roll_window_bytes, + const Crc &start_value) { + Init(crc, roll_window_bytes, start_value); + } + void Init(const Crc32cSSE4 &crc, + size_t roll_window_bytes, + const Crc &start_value); + + // Computes crc of "roll_window_bytes" using + // "start_value" of "crc" (see Init()). + Crc Start(const void *data) const { + return crc_->CrcDefault(data, roll_window_bytes_, start_value_); + } + + // Computes CRC of "roll_window_bytes" starting in next position. + Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const { + Crc crc = old_crc; + CRC_UPDATE_BYTE(crc, byte_in); + crc ^= out_[byte_out]; + return crc; + } + + // Returns start value. + Crc StartValue() const { return start_value_; } + + // Returns length of roll window. + size_t WindowBytes() const { return roll_window_bytes_; } + + protected: + typedef Crc Entry; + Entry out_[256]; + + // Used only by Start(). + Crc start_value_; + const Crc32cSSE4 *crc_; + size_t roll_window_bytes_; + +#if !CRCUTIL_USE_MM_CRC32 + TableEntry crc_word_[sizeof(Word)][256]; +#endif // !CRCUTIL_USE_MM_CRC32 +} GCC_ALIGN_ATTRIBUTE(16); + +#pragma pack(pop) + +} // namespace crcutil + +#endif // HAVE_I386 || HAVE_AMD64 + +#endif // CRCUTIL_CRC32C_SSE4_H_ diff --git a/rapidyenc/crcutil-1.0/code/crc32c_sse4_intrin.h b/rapidyenc/crcutil-1.0/code/crc32c_sse4_intrin.h new file mode 100644 index 0000000..b57a7c4 --- /dev/null +++ b/rapidyenc/crcutil-1.0/code/crc32c_sse4_intrin.h @@ -0,0 +1,102 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Provides _mm_crc32_u64/32/8 intrinsics. + +#ifndef CRCUTIL_CRC32C_SSE4_INTRIN_H_ +#define CRCUTIL_CRC32C_SSE4_INTRIN_H_ + +#include "platform.h" +#include "base_types.h" + +#if CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64) + +#if defined(_MSC_VER) || defined(__SSE4_2__) + +#if defined(_MSC_VER) +#pragma warning(push) +// '_M_IA64' is not defined as a preprocessor macro +#pragma warning(disable: 4668) +#endif // defined(_MSC_VER) + +#include + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif // defined(_MSC_VER) + +#elif GCC_VERSION_AVAILABLE(4, 5) && !defined(CRCUTIL_FORCE_ASM_CRC32C) +// Allow the use of _mm_crc32_u* intrinsic when CRCUTIL_USE_MM_CRC32 +// is set irrespective of "-msse*" settings. This way, the sources +// may be compiled with "-msse2 -mcrc32" and work on older CPUs, +// while taking full advantage of "crc32" instruction on newer +// CPUs (requires dynamic CPU detection). See "interface.cc". +// +// If neither -msse4 or -mcrc32 is provided and CRCUTIL_USE_MM_CRC32 is set +// and CRCUTIL_FORCE_ASM_CRC32 is not set, compile-time error will happen. +// Why? Becuase GCC disables __builtin_ia32_crc32* intrinsics when compiled +// without -msse4 or -mcrc32. -msse4 could be detected at run time by checking +// whether __SSE4_2__ is defined, but there is no way to tell whether the +// sources are compiled with -mcrc32. + +extern __inline unsigned int __attribute__(( + __gnu_inline__, __always_inline__, __artificial__)) +_mm_crc32_u8(unsigned int __C, unsigned char __V) { + //return __builtin_ia32_crc32qi(__C, __V); + return 0; +} +#ifdef __x86_64__ +extern __inline unsigned long long __attribute__(( + __gnu_inline__, __always_inline__, __artificial__)) +_mm_crc32_u64(unsigned long long __C, unsigned long long __V) { + //return __builtin_ia32_crc32di(__C, __V); + return 0; +} +#else +extern __inline unsigned int __attribute__(( + __gnu_inline__, __always_inline__, __artificial__)) +_mm_crc32_u32(unsigned int __C, unsigned int __V) { + //return __builtin_ia32_crc32si (__C, __V); + return 0; +} +#endif // __x86_64__ + +#else + +// GCC 4.4.x and earlier: use inline asm. + +namespace crcutil { + +__forceinline uint64 _mm_crc32_u64(uint64 crc, uint64 value) { + asm("crc32q %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value)); + return crc; +} + +__forceinline uint32 _mm_crc32_u32(uint32 crc, uint64 value) { + asm("crc32l %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value)); + return crc; +} + +__forceinline uint32 _mm_crc32_u8(uint32 crc, uint8 value) { + asm("crc32b %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value)); + return crc; +} + +} // namespace crcutil + +#endif + +#endif // CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64) + +#endif // CRCUTIL_CRC32C_SSE4_INTRIN_H_ diff --git a/rapidyenc/crcutil-1.0/code/crc_casts.h b/rapidyenc/crcutil-1.0/code/crc_casts.h new file mode 100644 index 0000000..a14044f --- /dev/null +++ b/rapidyenc/crcutil-1.0/code/crc_casts.h @@ -0,0 +1,68 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Casting between integers and compound CRC types. + +#ifndef CRCUTIL_CRC_CASTS_H_ +#define CRCUTIL_CRC_CASTS_H_ + +#include "base_types.h" // uint8, uint64 +#include "platform.h" // __forceinline + +namespace crcutil { + +// Downcasts a value of (oftentimes larger) Crc type to (smaller base integer) +// Result type, enabling specialized downcasts implemented by "large integer" +// classes (e.g. uint128_sse2). +template +__forceinline Result Downcast(const Crc &x) { + return static_cast(x); +} + +// Extracts 8 least significant bits from a value of Crc type. +#define TO_BYTE(x) Downcast(x) + +// Converts a pair of uint64 bit values into single value of CRC type. +// It is caller's responsibility to ensure that the input is correct. +template +__forceinline Crc CrcFromUint64(uint64 lo, uint64 hi = 0) { + if (sizeof(Crc) <= sizeof(lo)) { + return static_cast(lo); + } else { + // static_cast to keep compiler happy. + Crc result = static_cast(hi); + result = SHIFT_LEFT_SAFE(result, 8 * sizeof(lo)); + result ^= lo; + return result; + } +} + +// Converts Crc value to a pair of uint64 values. +template +__forceinline void Uint64FromCrc(const Crc &crc, + uint64 *lo, uint64 *hi = NULL) { + if (sizeof(*lo) >= sizeof(crc)) { + *lo = Downcast(crc); + if (hi != NULL) { + *hi = 0; + } + } else { + *lo = Downcast(crc); + *hi = Downcast(SHIFT_RIGHT_SAFE(crc, 8 * sizeof(lo))); + } +} + +} // namespace crcutil + +#endif // CRCUTIL_CRC_CASTS_H_ diff --git a/rapidyenc/crcutil-1.0/code/generic_crc.h b/rapidyenc/crcutil-1.0/code/generic_crc.h new file mode 100644 index 0000000..06af21c --- /dev/null +++ b/rapidyenc/crcutil-1.0/code/generic_crc.h @@ -0,0 +1,687 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Defines GenericCrc class which implements arbitrary CRCs. +// +// Please read crc.pdf to understand how it all works. + +#ifndef CRCUTIL_GENERIC_CRC_H_ +#define CRCUTIL_GENERIC_CRC_H_ + +#include "base_types.h" // uint8 +#include "crc_casts.h" // TO_BYTE(), Downcast<>. +#include "gf_util.h" // GfUtil class. +#include "platform.h" // GCC_ALIGN_ATTRIBUTE(16) +#include "uint128_sse2.h" // uint128_sse2 type (if necessary) + +namespace crcutil { + +#pragma pack(push, 16) + +// Extends CRC by one byte. +// Technically, if degree of a polynomial does not exceed 8, +// right shift by 8 bits is not required, but who cares about CRC-8? +#define CRC_BYTE(table, crc, byte) do { \ + crc = ((sizeof(crc) > 1) ? SHIFT_RIGHT_SAFE(crc, 8) : 0) ^ \ + table->crc_word_[sizeof(Word) - 1][TO_BYTE(crc) ^ (byte)]; \ +} while (0) + +#define TABLE_ENTRY(table, byte, buf) \ + table[byte][Downcast(buf)] + +#define TABLE_ENTRY_LAST(table, buf) \ + table[sizeof(Word) - 1][buf] + +// Extends CRC by one word. +#define CRC_WORD(table, crc, buf) do { \ + buf ^= Downcast(crc); \ + if (sizeof(crc) > sizeof(buf)) { \ + crc = SHIFT_RIGHT_SAFE(crc, sizeof(buf) * 8); \ + crc ^= TABLE_ENTRY(table->crc_word_, 0, buf); \ + } else { \ + crc = TABLE_ENTRY(table->crc_word_, 0, buf); \ + } \ + buf >>= 8; \ + for (size_t byte = 1; byte < sizeof(buf) - 1; ++byte) { \ + crc ^= TABLE_ENTRY(table->crc_word_, byte, buf); \ + buf >>= 8; \ + } \ + crc ^= TABLE_ENTRY_LAST(table->crc_word_, buf); \ +} while (0) + +// Process beginning of data block byte by byte until source pointer +// becomes perfectly aligned on Word boundary. +#define ALIGN_ON_WORD_BOUNDARY(table, src, end, crc, Word) do { \ + while ((reinterpret_cast(src) & (sizeof(Word) - 1)) != 0) { \ + if (src >= end) { \ + return (crc ^ table->Base().Canonize()); \ + } \ + CRC_BYTE(table, crc, *src); \ + src += 1; \ + } \ +} while (0) + + +// On amd64, enforcing alignment is 2-4% slower on small (<= 64 bytes) blocks +// but 6-10% faster on larger blocks (>= 2KB). +// Break-even point (+-1%) is around 1KB (Q9650, E6600). +// +#define ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, table, src, end, crc, Word) \ +do { \ + if (sizeof(Word) > 8 || (bytes) > CRCUTIL_MIN_ALIGN_SIZE) { \ + ALIGN_ON_WORD_BOUNDARY(table, src, end, crc, Word); \ + } \ +} while (0) + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable: 4127) // conditional expression is constant +#endif // defined(_MSC_VER) + +// Forward declarations. +template class RollingCrc; + +// Crc is the type used internally and to return values of N-bit CRC. +// It should be at least as large as "TableEntry" and "Word" but +// may be larger (e.g. for 16-bit CRC, TableEntry and Word may be +// set to uint16 but Crc may be set to uint32). +// +// TableEntry is the type of values stored in the tables. +// To implement N-bit CRC, TableEntry should be large enough +// to store N bits. +// +// Word is the type used to read data sizeof(Word) at a time. +// Ideally, it shoulde be "most suitable for given architecture" +// integer type -- typically "size_t". +// +// kStride is the number of words processed in interleaved manner by +// CrcMultiword() and CrcWordblock(). Shall be either 3 or 4. +// Optimal value depends on hardware architecture (AMD64, ARM, etc). +// +template + class GenericCrc { + public: + // Make Crc, TableEntry, and Word types visible (used by RollingCrc etc.) + typedef _Crc Crc; + typedef _TableEntry TableEntry; + typedef _Word Word; + + GenericCrc() {} + + // Initializes the tables given generating polynomial of degree. + // If "canonical" is true, crc value will be XOR'ed with (-1) before and + // after actual CRC computation. + GenericCrc(const Crc &generating_polynomial, size_t degree, bool canonical) { + Init(generating_polynomial, degree, canonical); + } + void Init(const Crc &generating_polynomial, size_t degree, bool canonical) { + base_.Init(generating_polynomial, degree, canonical); + + // Instead of computing + // table[j][i] = MultiplyUnnormalized(i, 8, k), + // for all i = 0...255, we may notice that + // if i = 2**n then for all m = 1...(i-1) + // MultiplyUnnormalized(i + m, 8, k) = + // MultiplyUnnormalized(i ^ m, 8, k) = + // MultiplyUnnormalized(i, 8, k) ^ MultiplyUnnormalized(m, 8, k) = + // MultiplyUnnormalized(i, 8, k) ^ crc_word_interleaved[j][m] = + // table[i] ^ table[m]. +#if 0 + for (size_t j = 0; j < sizeof(Word); ++j) { + Crc k = Base().XpowN((sizeof(Word) * kStride - 1 - j) * 8 + degree); + for (size_t i = 0; i < 256; ++i) { + Crc temp = Base().MultiplyUnnormalized(static_cast(i), 8, k); + this->crc_word_interleaved_[j][i] = Downcast(temp); + } + } +#else + for (size_t j = 0; j < sizeof(Word); ++j) { + Crc k = Base().XpowN((sizeof(Word) * kStride - 1 - j) * 8 + degree); + TableEntry *table = this->crc_word_interleaved_[j]; + table[0] = 0; // Init 0s entry -- multiply 0 by anything yields 0. + for (size_t i = 1; i < 256; i <<= 1) { + TableEntry value = Downcast( + Base().MultiplyUnnormalized(static_cast(i), 8, k)); + table[i] = value; + for (size_t m = 1; m < i; ++m) { + table[i + m] = value ^ table[m]; + } + } + } +#endif + +#if 0 + for (size_t j = 0; j < sizeof(Word); ++j) { + Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + degree); + for (size_t i = 0; i < 256; ++i) { + Crc temp = Base().MultiplyUnnormalized(static_cast(i), 8, k); + this->crc_word_[j][i] = Downcast(temp); + } + } +#else + for (size_t j = 0; j < sizeof(Word); ++j) { + Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + degree); + TableEntry *table = this->crc_word_[j]; + table[0] = 0; // Init 0s entry -- multiply 0 by anything yields 0. + for (size_t i = 1; i < 256; i <<= 1) { + TableEntry value = Downcast( + Base().MultiplyUnnormalized(static_cast(i), 8, k)); + table[i] = value; + for (size_t m = 1; m < i; ++m) { + table[i + m] = value ^ table[m]; + } + } + } +#endif + } + + // Default CRC implementation + Crc CrcDefault(const void *data, size_t bytes, const Crc &start) const { +#if HAVE_AMD64 || HAVE_I386 + return CrcMultiword(data, bytes, start); +#else + // Very few CPUs have multiple ALUs and speculative execution + // (Itanium is an exception) so sophisticated algorithms will + // not perform better than good old Sarwate algorithm. + return CrcByteUnrolled(data, bytes, start); +#endif // HAVE_AMD64 || HAVE_I386 + } + + // Returns base class. + const GfUtil &Base() const { return base_; } + + protected: + // Canonical, byte-by-byte CRC computation. + Crc CrcByte(const void *data, size_t bytes, const Crc &start) const { + const uint8 *src = static_cast(data); + Crc crc = start ^ Base().Canonize(); + for (const uint8 *end = src + bytes; src < end; ++src) { + CRC_BYTE(this, crc, *src); + } + return (crc ^ Base().Canonize()); + } + + // Byte-by-byte CRC with main loop unrolled. + Crc CrcByteUnrolled(const void *data, size_t bytes, const Crc &start) const { + if (bytes == 0) { + return start; + } + + const uint8 *src = static_cast(data); + const uint8 *end = src + bytes; + Crc crc = start ^ Base().Canonize(); + + // Unroll loop 4 times. + end -= 3; + for (; src < end; src += 4) { + PREFETCH(src); + CRC_BYTE(this, crc, src[0]); + CRC_BYTE(this, crc, src[1]); + CRC_BYTE(this, crc, src[2]); + CRC_BYTE(this, crc, src[3]); + } + end += 3; + + // Compute CRC of remaining bytes. + for (; src < end; ++src) { + CRC_BYTE(this, crc, *src); + } + + return (crc ^ Base().Canonize()); + } + + // Canonical, byte-by-byte CRC computation. + Crc CrcByteWord(const void *data, size_t bytes, const Crc &start) const { + const uint8 *src = static_cast(data); + const uint8 *end = src + bytes; + Crc crc0 = start ^ Base().Canonize(); + + ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Crc); + if (src >= end) { + return (crc0 ^ Base().Canonize()); + } + + // Process 4*sizeof(Crc) bytes at a time. + end -= 4 * sizeof(Crc) - 1; + for (; src < end; src += 4 * sizeof(Crc)) { + for (size_t i = 0; i < 4; ++i) { + crc0 ^= reinterpret_cast(src)[i]; + if (i == 0) { + PREFETCH(src); + } + for (size_t byte = 0; byte < sizeof(crc0); ++byte) { + CRC_BYTE(this, crc0, 0); + } + } + } + end += 4 * sizeof(Crc) - 1; + + // Process sizeof(Crc) bytes at a time. + end -= sizeof(Crc) - 1; + for (; src < end; src += sizeof(Crc)) { + crc0 ^= reinterpret_cast(src)[0]; + for (size_t byte = 0; byte < sizeof(crc0); ++byte) { + CRC_BYTE(this, crc0, 0); + } + } + end += sizeof(Crc) - 1; + + // Compute CRC of remaining bytes. + for (;src < end; ++src) { + CRC_BYTE(this, crc0, *src); + } + + return (crc0 ^ Base().Canonize()); + } + + // Faster, word-by-word CRC. + Crc CrcWord(const void *data, size_t bytes, const Crc &start) const { + const uint8 *src = static_cast(data); + const uint8 *end = src + bytes; + Crc crc0 = start ^ Base().Canonize(); + + ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word); + if (src >= end) { + return (crc0 ^ Base().Canonize()); + } + + // Process 4 sizeof(Word) bytes at once. + end -= 4 * sizeof(Word) - 1; + for (; src < end; src += 4 * sizeof(Word)) { + Word buf0 = reinterpret_cast(src)[0]; + PREFETCH(src); + CRC_WORD(this, crc0, buf0); + buf0 = reinterpret_cast(src)[1]; + CRC_WORD(this, crc0, buf0); + buf0 = reinterpret_cast(src)[2]; + CRC_WORD(this, crc0, buf0); + buf0 = reinterpret_cast(src)[3]; + CRC_WORD(this, crc0, buf0); + } + end += 4 * sizeof(Word) - 1; + + // Process sizeof(Word) bytes at a time. + end -= sizeof(Word) - 1; + for (; src < end; src += sizeof(Word)) { + Word buf0 = reinterpret_cast(src)[0]; + CRC_WORD(this, crc0, buf0); + } + end += sizeof(Word) - 1; + + // Compute CRC of remaining bytes. + for (;src < end; ++src) { + CRC_BYTE(this, crc0, *src); + } + + return (crc0 ^ Base().Canonize()); + } + +#define REPEAT_FROM_1(macro) \ + macro(1); \ + macro(2); \ + macro(3); \ + macro(4); \ + macro(5); \ + macro(6); \ + macro(7); + +#define REPEAT_FROM_0(macro) \ + macro(0); \ + REPEAT_FROM_1(macro) + + // Faster, process adjusent blocks in parallel and concatenate CRCs. + Crc CrcBlockword(const void *data, size_t bytes, const Crc &start) const { + if (kStride < 2 || kStride > 8) { + // Unsupported configuration; + // fall back to something sensible. + return CrcWord(data, bytes, start); + } + + const uint8 *src = static_cast(data); + const uint8 *end = src + bytes; + Crc crc0 = start ^ Base().Canonize(); + enum { + // Add 16 to avoid false L1 cache collisions. + kStripe = (15*1024 + 16) & ~(sizeof(Word) - 1), + }; + + ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word); + if (src >= end) { + return (crc0 ^ Base().Canonize()); + } + + end -= kStride * kStripe - 1; + if (src < end) { + Crc x_pow_8kStripe = Base().Xpow8N(kStripe); + do { + const uint8 *stripe_end = src + kStripe; + +#define INIT_CRC(reg) \ + Crc crc##reg; \ + if (kStride >= reg) { \ + crc##reg = 0; \ + } + REPEAT_FROM_1(INIT_CRC); +#undef INIT_CRC + + do { +#define FIRST(reg) \ + Word buf##reg; \ + if (kStride > reg) { \ + buf##reg = reinterpret_cast(src + reg * kStripe)[0]; \ + buf##reg ^= Downcast(crc##reg); \ + if (sizeof(crc##reg) > sizeof(buf##reg)) { \ + crc##reg = SHIFT_RIGHT_SAFE(crc##reg, sizeof(buf##reg) * 8); \ + crc##reg ^= TABLE_ENTRY(this->crc_word_, 0, buf##reg); \ + } else { \ + crc##reg = TABLE_ENTRY(this->crc_word_, 0, buf##reg); \ + } \ + buf##reg >>= 8; \ + } + REPEAT_FROM_0(FIRST); +#undef FIRST + + for (size_t byte = 1; byte < sizeof(buf0) - 1; ++byte) { +#define NEXT(reg) do { \ + if (kStride > reg) { \ + crc##reg ^= TABLE_ENTRY(this->crc_word_, byte, buf##reg); \ + buf##reg >>= 8; \ + } \ +} while (0) + REPEAT_FROM_0(NEXT); +#undef NEXT + } + +#define LAST(reg) do { \ + if (kStride > reg) { \ + crc##reg ^= TABLE_ENTRY_LAST(this->crc_word_, buf##reg); \ + } \ +} while (0) + REPEAT_FROM_0(LAST); +#undef LAST + + src += sizeof(Word); + } while (src < stripe_end); + +#if 0 +// The code is left for illustrational purposes only. +#define COMBINE(reg) do { \ + if (reg > 0 && kStride > reg) { \ + crc0 = Base().ChangeStartValue(crc##reg, kStripe, 0, crc0); \ + } \ +} while (0) +#else +#define COMBINE(reg) do { \ + if (reg > 0 && kStride > reg) { \ + crc0 = crc##reg ^ Base().Multiply(crc0, x_pow_8kStripe); \ + } \ +} while (0) +#endif + REPEAT_FROM_0(COMBINE); +#undef COMBINE + + src += (kStride - 1) * kStripe; + } + while (src < end); + } + end += kStride * kStripe - 1; + + // Process sizeof(Word) bytes at a time. + end -= sizeof(Word) - 1; + for (; src < end; src += sizeof(Word)) { + Word buf0 = reinterpret_cast(src)[0]; + CRC_WORD(this, crc0, buf0); + } + end += sizeof(Word) - 1; + + // Compute CRC of remaining bytes. + for (;src < end; ++src) { + CRC_BYTE(this, crc0, *src); + } + + return (crc0 ^ Base().Canonize()); + } + + // Fastest, interleaved multi-byte CRC. + Crc CrcMultiword(const void *data, size_t bytes, const Crc &start) const { + if (kStride < 2 || kStride > 8) { + // Unsupported configuration; + // fall back to something sensible. + return CrcWord(data, bytes, start); + } + + const uint8 *src = static_cast(data); + const uint8 *end = src + bytes; + Crc crc0 = start ^ Base().Canonize(); + + ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word); + if (src >= end) { + return (crc0 ^ Base().Canonize()); + } + + // Process kStride Word registers at once; + // should have have at least 2*kInterleaveBytes of data to start. + end -= 2*kInterleaveBytes - 1; + if (src < end) { + Crc crc_carryover; + if (sizeof(Crc) > sizeof(Word)) { + // crc_carryover is used if and only if Crc is wider than Word. + crc_carryover = 0; + } +#define INIT_CRC(reg) \ + Crc crc##reg; \ + if (reg > 0 && kStride > reg) { \ + crc##reg = 0; \ + } + REPEAT_FROM_1(INIT_CRC); +#undef INIT_CRC + +#define INIT_BUF(reg) \ + Word buf##reg; \ + if (kStride > reg) { \ + buf##reg = reinterpret_cast(src)[reg]; \ + } + REPEAT_FROM_0(INIT_BUF); +#undef INIT_BUF + + do { + PREFETCH(src); + src += kInterleaveBytes; + + if (sizeof(Crc) > sizeof(Word)) { + crc0 ^= crc_carryover; + } + +#define FIRST(reg, next_reg) do { \ + if (kStride > reg) { \ + buf##reg ^= Downcast(crc##reg); \ + if (sizeof(Crc) > sizeof(Word)) { \ + if (reg < kStride - 1) { \ + crc##next_reg ^= SHIFT_RIGHT_SAFE(crc##reg, 8 * sizeof(buf0)); \ + } else { \ + crc_carryover = SHIFT_RIGHT_SAFE(crc##reg, 8 * sizeof(buf0)); \ + } \ + } \ + crc##reg = TABLE_ENTRY(this->crc_word_interleaved_, 0, buf##reg); \ + buf##reg >>= 8; \ + } \ +} while (0) + FIRST(0, 1); + FIRST(1, 2); + FIRST(2, 3); + FIRST(3, 4); + FIRST(4, 5); + FIRST(5, 6); + FIRST(6, 7); + FIRST(7, 0); +#undef FIRST + + for (size_t byte = 1; byte < sizeof(Word) - 1; ++byte) { +#define NEXT(reg) do { \ + if (kStride > reg) { \ + crc##reg ^= \ + TABLE_ENTRY(this->crc_word_interleaved_, byte, buf##reg); \ + buf##reg >>= 8; \ + } \ +} while(0) + REPEAT_FROM_0(NEXT); +#undef NEXT + } + +#define LAST(reg) do { \ + if (kStride > reg) { \ + crc##reg ^= TABLE_ENTRY_LAST(this->crc_word_interleaved_, buf##reg); \ + buf##reg = reinterpret_cast(src)[reg]; \ + } \ +} while(0) + REPEAT_FROM_0(LAST); +#undef LAST + } + while (src < end); + + if (sizeof(Crc) > sizeof(Word)) { + crc0 ^= crc_carryover; + } + +#define COMBINE(reg) do { \ + if (kStride > reg) { \ + if (reg != 0) { \ + crc0 ^= crc##reg; \ + } \ + CRC_WORD(this, crc0, buf##reg); \ + } \ +} while (0) + REPEAT_FROM_0(COMBINE); +#undef COMBINE + + src += kInterleaveBytes; + } + end += 2*kInterleaveBytes - 1; + + // Process sizeof(Word) bytes at once. + end -= sizeof(Word) - 1; + for (; src < end; src += sizeof(Word)) { + Word buf0 = reinterpret_cast(src)[0]; + CRC_WORD(this, crc0, buf0); + } + end += sizeof(Word) - 1; + + // Compute CRC of remaining bytes. + for (;src < end; ++src) { + CRC_BYTE(this, crc0, *src); + } + + return (crc0 ^ Base().Canonize()); + } + + protected: + enum { + kInterleaveBytes = sizeof(Word) * kStride, + }; + + // Multiplication tables used by CRCs. + TableEntry crc_word_interleaved_[sizeof(Word)][256]; + TableEntry crc_word_[sizeof(Word)][256]; + + // Base class stored after CRC tables so that the most frequently + // used table is at offset 0 and may be accessed faster. + GfUtil base_; + + friend class RollingCrc< GenericCrc >; + + private: + // CrcMultiword on amd64 may run at 1.2 CPU cycles per byte which is + // noticeably faster than CrcWord (2.2-2.6 cycles/byte depending on + // hardware and compiler). However, there are problems with compilers. + // + // Test system: P45 chipset, Intel Q9650 CPU, 800MHz 4-4-4-12 memory. + // + // 64-bit compiler, <= 64-bit CRC, 64-bit tables, 64-bit reads: + // CL 15.00.307291.1 C++ >1.2< CPU cycles/byte + // ICL 11.1.051 -O3 C++ 1.5 CPU cycles/byte + // GCC 4.5 -O3 C++ 2.0 CPU cycles/byte + // GCC 4.x -O3 ASM >1.2< CPU cycles/byte + // + // 32-bit compiler, MMX used, <= 64-bit CRC, 64-bit tables, 64-bit reads + // CL 15.00.307291.1 C++ 2.0 CPU cycles/byte + // GCC 4.5 -O3 C++ 1.9 CPU cycles/byte + // ICL 11.1.051 -S C++ 1.6 CPU cycles/byte + // GCC 4.x -O3 ASM >1.3< CPU cycles/byte + // + // So, use inline ASM code for GCC for both i386 and amd64. + + Crc CrcMultiwordI386Mmx( + const void *data, size_t bytes, const Crc &start) const; + Crc CrcMultiwordGccAmd64( + const void *data, size_t bytes, const Crc &start) const; + Crc CrcMultiwordGccAmd64Sse2( + const uint8 *src, const uint8 *end, const Crc &start) const; +} GCC_ALIGN_ATTRIBUTE(16); + +#undef REPEAT_FROM_0 +#undef REPEAT_FROM_1 + + +// Specialized variants. +#if CRCUTIL_USE_ASM + +#if (defined(__GNUC__) && (HAVE_AMD64 || (HAVE_I386 && HAVE_MMX))) + +// Declare specialized functions. +template<> uint64 GenericCrc::CrcMultiword( + const void *data, size_t bytes, const uint64 &start) const; + +#if HAVE_AMD64 && HAVE_SSE2 +template<> +uint128_sse2 +GenericCrc::CrcMultiword( + const void *data, size_t bytes, const uint128_sse2 &start) const; +#endif // HAVE_AMD64 && HAVE_SSE2 + +#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER <= 150030729 && \ + (HAVE_I386 && HAVE_MMX) + +// Work around bug in MSC (present at least in v. 15.00.30729.1) +template<> uint64 GenericCrc::CrcMultiwordI386Mmx( + const void *data, + size_t bytes, + const uint64 &start) const; +template<> __forceinline +uint64 GenericCrc::CrcMultiword( + const void *data, + size_t bytes, + const uint64 &start) const { + typedef uint64 Word; + typedef uint64 Crc; + if (bytes <= 12) { + const uint8 *src = static_cast(data); + uint64 crc = start ^ Base().Canonize(); + for (const uint8 *end = src + bytes; src < end; ++src) { + CRC_BYTE(this, crc, *src); + } + return (crc ^ Base().Canonize()); + } + return CrcMultiwordI386Mmx(data, bytes, start); +} + +#endif // (defined(__GNUC__) && (HAVE_AMD64 || (HAVE_I386 && HAVE_MMX))) + +#endif // CRCUTIL_USE_ASM + + +#pragma pack(pop) + +} // namespace crcutil + +#endif // CRCUTIL_GENERIC_CRC_H_ diff --git a/rapidyenc/crcutil-1.0/code/gf_util.h b/rapidyenc/crcutil-1.0/code/gf_util.h new file mode 100644 index 0000000..43c1d6b --- /dev/null +++ b/rapidyenc/crcutil-1.0/code/gf_util.h @@ -0,0 +1,304 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Defines GfUtil template class which implements +// 1. some useful operations in GF(2^n), +// 2. CRC helper function (e.g. concatenation of CRCs) which are +// not affected by specific implemenation of CRC computation per se. +// +// Please read crc.pdf to understand how it all works. + +#ifndef CRCUTIL_GF_UTIL_H_ +#define CRCUTIL_GF_UTIL_H_ + +#include "base_types.h" // uint8, uint64 +#include "crc_casts.h" // TO_BYTE() +#include "platform.h" // GCC_ALIGN_ATTRIBUTE(16), SHIFT_*_SAFE + +namespace crcutil { + +#pragma pack(push, 16) + +// "Crc" is the type used internally and to return values of N-bit CRC. +template class GfUtil { + public: + // Initializes the tables given generating polynomial of degree (degree). + // If "canonical" is true, starting CRC value and computed CRC value will be + // XOR-ed with 111...111. + GfUtil() {} + GfUtil(const Crc &generating_polynomial, size_t degree, bool canonical) { + Init(generating_polynomial, degree, canonical); + } + void Init(const Crc &generating_polynomial, size_t degree, bool canonical) { + Crc one = 1; + one <<= degree - 1; + this->generating_polynomial_ = generating_polynomial; + this->crc_bytes_ = (degree + 7) >> 3; + this->degree_ = degree; + this->one_ = one; + if (canonical) { + this->canonize_ = one | (one - 1); + } else { + this->canonize_ = 0; + } + this->normalize_[0] = 0; + this->normalize_[1] = generating_polynomial; + + Crc k = one >> 1; + for (size_t i = 0; i < sizeof(uint64) * 8; ++i) { + this->x_pow_2n_[i] = k; + k = Multiply(k, k); + } + + this->crc_of_crc_ = Multiply(this->canonize_, + this->one_ ^ Xpow8N((degree + 7) >> 3)); + + FindLCD(Xpow8N(this->crc_bytes_), &this->x_pow_minus_W_); + } + + // Returns generating polynomial. + Crc GeneratingPolynomial() const { + return this->generating_polynomial_; + } + + // Returns number of bits in CRC (degree of generating polynomial). + size_t Degree() const { + return this->degree_; + } + + // Returns start/finish adjustment constant. + Crc Canonize() const { + return this->canonize_; + } + + // Returns normalized value of 1. + Crc One() const { + return this->one_; + } + + // Returns value of CRC(A, |A|, start_new) given known + // crc=CRC(A, |A|, start_old) -- without touching the data. + Crc ChangeStartValue(const Crc &crc, uint64 bytes, + const Crc &start_old, + const Crc &start_new) const { + return (crc ^ Multiply(start_new ^ start_old, Xpow8N(bytes))); + } + + // Returns CRC of concatenation of blocks A and B when CRCs + // of blocks A and B are known -- without touching the data. + // + // To be precise, given CRC(A, |A|, startA) and CRC(B, |B|, 0), + // returns CRC(AB, |AB|, startA). + Crc Concatenate(const Crc &crc_A, const Crc &crc_B, uint64 bytes_B) const { + return ChangeStartValue(crc_B, bytes_B, 0 /* start_B */, crc_A); + } + + // Returns CRC of sequence of zeroes -- without touching the data. + Crc CrcOfZeroes(uint64 bytes, const Crc &start) const { + Crc tmp = Multiply(start ^ this->canonize_, Xpow8N(bytes)); + return (tmp ^ this->canonize_); + } + + // Given CRC of a message, stores extra (degree + 7)/8 bytes after + // the message so that CRC(message+extra, start) = result. + // Does not change CRC start value (use ChangeStartValue for that). + // Returns number of stored bytes. + size_t StoreComplementaryCrc(void *dst, + const Crc &message_crc, + const Crc &result) const { + Crc crc0 = Multiply(result ^ this->canonize_, this->x_pow_minus_W_); + crc0 ^= message_crc ^ this->canonize_; + uint8 *d = reinterpret_cast(dst); + for (size_t i = 0; i < this->crc_bytes_; ++i) { + d[i] = TO_BYTE(crc0); + crc0 >>= 8; + } + return this->crc_bytes_; + } + + // Stores given CRC of a message as (degree + 7)/8 bytes filled + // with 0s to the right. Returns number of stored bytes. + // CRC of the message and stored CRC is a constant value returned + // by CrcOfCrc() -- it does not depend on contents of the message. + size_t StoreCrc(void *dst, const Crc &crc) const { + uint8 *d = reinterpret_cast(dst); + Crc crc0 = crc; + for (size_t i = 0; i < this->crc_bytes_; ++i) { + d[i] = TO_BYTE(crc0); + crc0 >>= 8; + } + return this->crc_bytes_; + } + + // Returns expected CRC value of CRC(Message,CRC(Message)) + // when CRC is stored after the message. This value is fixed + // and does not depend on the message or CRC start value. + Crc CrcOfCrc() const { + return this->crc_of_crc_; + } + + // Returns ((a * b) mod P) where "a" and "b" are of degree <= (D-1). + Crc Multiply(const Crc &aa, const Crc &bb) const { + Crc a = aa; + Crc b = bb; + if ((a ^ (a - 1)) < (b ^ (b - 1))) { + Crc temp = a; + a = b; + b = temp; + } + + if (a == 0) { + return a; + } + + Crc product = 0; + Crc one = this->one_; + for (; a != 0; a <<= 1) { + if ((a & one) != 0) { + product ^= b; + a ^= one; + } + b = (b >> 1) ^ this->normalize_[Downcast(b & 1)]; + } + + return product; + } + + // Returns ((unnorm * m) mod P) where degree of m is <= (D-1) + // and degree of value "unnorm" is provided explicitly. + Crc MultiplyUnnormalized(const Crc &unnorm, size_t degree, + const Crc &m) const { + Crc v = unnorm; + Crc result = 0; + while (degree > this->degree_) { + degree -= this->degree_; + Crc value = v & (this->one_ | (this->one_ - 1)); + result ^= Multiply(value, Multiply(m, XpowN(degree))); + v >>= this->degree_; + } + result ^= Multiply(v << (this->degree_ - degree), m); + return result; + } + + // returns ((x ** n) mod P). + Crc XpowN(uint64 n) const { + Crc one = this->one_; + Crc result = one; + + for (size_t i = 0; n != 0; ++i, n >>= 1) { + if (n & 1) { + result = Multiply(result, this->x_pow_2n_[i]); + } + } + + return result; + } + + // Returns (x ** (8 * n) mod P). + Crc Xpow8N(uint64 n) const { + return XpowN(n << 3); + } + + // Returns remainder (A mod B) and sets *q = (A/B) of division + // of two polynomials: + // A = dividend + dividend_x_pow_D_coef * x**degree, + // B = divisor. + Crc Divide(const Crc ÷nd0, int dividend_x_pow_D_coef, + const Crc &divisor0, Crc *q) const { + Crc divisor = divisor0; + Crc dividend = dividend0; + Crc quotient = 0; + Crc coef = this->one_; + + while ((divisor & 1) == 0) { + divisor >>= 1; + coef >>= 1; + } + + if (dividend_x_pow_D_coef) { + quotient = coef >> 1; + dividend ^= divisor >> 1; + } + + Crc x_pow_degree_b = 1; + for (;;) { + if ((dividend & x_pow_degree_b) != 0) { + dividend ^= divisor; + quotient ^= coef; + } + if (coef == this->one_) { + break; + } + coef <<= 1; + x_pow_degree_b <<= 1; + divisor <<= 1; + } + + *q = quotient; + return dividend; + } + + // Extended Euclid's algorith -- for given A finds LCD(A, P) and + // value B such that (A * B) mod P = LCD(A, P). + Crc FindLCD(const Crc &A, Crc *B) const { + if (A == 0 || A == this->one_) { + *B = A; + return A; + } + + // Actually, generating polynomial is + // (generating_polynomial_ + x**degree). + int r0_x_pow_D_coef = 1; + Crc r0 = this->generating_polynomial_; + Crc b0 = 0; + Crc r1 = A; + Crc b1 = this->one_; + + for (;;) { + Crc q; + Crc r = Divide(r0, r0_x_pow_D_coef, r1, &q); + if (r == 0) { + break; + } + r0_x_pow_D_coef = 0; + + r0 = r1; + r1 = r; + + Crc b = b0 ^ Multiply(q, b1); + b0 = b1; + b1 = b; + } + + *B = b1; + return r1; + } + + protected: + Crc canonize_; + Crc x_pow_2n_[sizeof(uint64) * 8]; + Crc generating_polynomial_; + Crc one_; + Crc x_pow_minus_W_; + Crc crc_of_crc_; + Crc normalize_[2]; + size_t crc_bytes_; + size_t degree_; +} GCC_ALIGN_ATTRIBUTE(16); + +#pragma pack(pop) + +} // namespace crcutil + +#endif // CRCUTIL_GF_UTIL_H_ diff --git a/rapidyenc/crcutil-1.0/code/multiword_128_64_gcc_amd64_sse2.cc b/rapidyenc/crcutil-1.0/code/multiword_128_64_gcc_amd64_sse2.cc new file mode 100644 index 0000000..f94fd1f --- /dev/null +++ b/rapidyenc/crcutil-1.0/code/multiword_128_64_gcc_amd64_sse2.cc @@ -0,0 +1,291 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Implements multiword CRC for GCC on i386. +// +// Small comment: the trick described in +// http://software.intel.com/en-us/articles/fast-simd-integer-move-for-the-intel-pentiumr-4-processor +// (replace "movdqa dst, src" with "pshufd $0xE4, src, dst") +// did not work: execution time increased from +// 1.8 CPU cycles/byte to 2.1 CPU cycles/byte. +// So it may be good idea on P4 but it's not on newer CPUs. +// +// movaps/xorps vs. movdqa/pxor did not make any difference. + +#include "generic_crc.h" +#include "uint128_sse2.h" + +#if defined(__GNUC__) && CRCUTIL_USE_ASM && HAVE_AMD64 && HAVE_SSE2 + +namespace crcutil { + +template<> uint128_sse2 +GenericCrc::CrcMultiwordGccAmd64Sse2( + const uint8 *src, const uint8 *end, const uint128_sse2 &start) const; + +template<> +uint128_sse2 GenericCrc::CrcMultiword( + const void *data, size_t bytes, const uint128_sse2 &start) const { + const uint8 *src = static_cast(data); + uint128_sse2 crc = start ^ this->Base().Canonize(); + const uint8 *end = src + bytes; + if (bytes <= 7) { + for (; src < end; ++src) { + CRC_BYTE(this, crc, *src); + } + return (crc ^ this->Base().Canonize()); + } + + ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc, uint64); + if (src >= end) { + return (crc ^ this->Base().Canonize()); + } + + return CrcMultiwordGccAmd64Sse2(src, end, crc); +} + +#define CRC_WORD_ASM() \ + SSE2_MOVQ " %[crc0], %[tmp0]\n" \ + "xorq %[tmp0], %[buf0]\n" \ + "psrldq $8, %[crc0]\n" \ + "movzbq %b[buf0], %[tmp0]\n" \ + "shrq $8, %[buf0]\n" \ + "addq %[tmp0], %[tmp0]\n" \ + "pxor (%[table_word], %[tmp0], 8), %[crc0]\n" \ + "movzbq %b[buf0], %[tmp1]\n" \ + "shrq $8, %[buf0]\n" \ + "addq %[tmp1], %[tmp1]\n" \ + "pxor 1*256*16(%[table_word], %[tmp1], 8), %[crc0]\n" \ + "movzbq %b[buf0], %[tmp0]\n" \ + "shrq $8, %[buf0]\n" \ + "addq %[tmp0], %[tmp0]\n" \ + "pxor 2*256*16(%[table_word], %[tmp0], 8), %[crc0]\n" \ + "movzbq %b[buf0], %[tmp1]\n" \ + "shrq $8, %[buf0]\n" \ + "addq %[tmp1], %[tmp1]\n" \ + "pxor 3*256*16(%[table_word], %[tmp1], 8), %[crc0]\n" \ + "movzbq %b[buf0], %[tmp0]\n" \ + "shrq $8, %[buf0]\n" \ + "addq %[tmp0], %[tmp0]\n" \ + "pxor 4*256*16(%[table_word], %[tmp0], 8), %[crc0]\n" \ + "movzbq %b[buf0], %[tmp1]\n" \ + "shrq $8, %[buf0]\n" \ + "addq %[tmp1], %[tmp1]\n" \ + "pxor 5*256*16(%[table_word], %[tmp1], 8), %[crc0]\n" \ + "movzbq %b[buf0], %[tmp0]\n" \ + "shrq $8, %[buf0]\n" \ + "addq %[tmp0], %[tmp0]\n" \ + "pxor 6*256*16(%[table_word], %[tmp0], 8), %[crc0]\n" \ + "addq %[buf0], %[buf0]\n" \ + "pxor 7*256*16(%[table_word], %[buf0], 8), %[crc0]\n" + +template<> uint128_sse2 +GenericCrc::CrcMultiwordGccAmd64Sse2( + const uint8 *src, const uint8 *end, const uint128_sse2 &start) const { + __m128i crc0 = start; + __m128i crc1; + __m128i crc2; + __m128i crc3; + __m128i crc_carryover; + + uint64 buf0; + uint64 buf1; + uint64 buf2; + uint64 buf3; + + uint64 tmp0; + uint64 tmp1; + + asm( + "sub $2*4*8 - 1, %[end]\n" + "cmpq %[src], %[end]\n" + "jbe 2f\n" + + "pxor %[crc1], %[crc1]\n" + "pxor %[crc2], %[crc2]\n" + "pxor %[crc3], %[crc3]\n" + "pxor %[crc_carryover], %[crc_carryover]\n" + "movq (%[src]), %[buf0]\n" + "movq 1*8(%[src]), %[buf1]\n" + "movq 2*8(%[src]), %[buf2]\n" + "movq 3*8(%[src]), %[buf3]\n" + + "1:\n" +#if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0 + "prefetcht0 " TO_STRING(CRCUTIL_PREFETCH_WIDTH) "(%[src])\n" +#endif +#if GCC_VERSION_AVAILABLE(4, 5) + // Bug in GCC 4.2.4? + "add $4*8, %[src]\n" +#else + "lea 4*8(%[src]), %[src]\n" +#endif + "pxor %[crc_carryover], %[crc0]\n" + + SSE2_MOVQ " %[crc0], %[tmp0]\n" + "psrldq $8, %[crc0]\n" + "xorq %[tmp0], %[buf0]\n" + "movzbq %b[buf0], %[tmp0]\n" + "pxor %[crc0], %[crc1]\n" + "addq %[tmp0], %[tmp0]\n" + "shrq $8, %[buf0]\n" + "movdqa (%[table], %[tmp0], 8), %[crc0]\n" + + SSE2_MOVQ " %[crc1], %[tmp1]\n" + "psrldq $8, %[crc1]\n" + "xorq %[tmp1], %[buf1]\n" + "movzbq %b[buf1], %[tmp1]\n" + "pxor %[crc1], %[crc2]\n" + "addq %[tmp1], %[tmp1]\n" + "shrq $8, %[buf1]\n" + "movdqa (%[table], %[tmp1], 8), %[crc1]\n" + + SSE2_MOVQ " %[crc2], %[tmp0]\n" + "psrldq $8, %[crc2]\n" + "xorq %[tmp0], %[buf2]\n" + "movzbq %b[buf2], %[tmp0]\n" + "pxor %[crc2], %[crc3]\n" + "addq %[tmp0], %[tmp0]\n" + "shrq $8, %[buf2]\n" + "movdqa (%[table], %[tmp0], 8), %[crc2]\n" + + SSE2_MOVQ " %[crc3], %[tmp1]\n" + "psrldq $8, %[crc3]\n" + "xorq %[tmp1], %[buf3]\n" + "movzbq %b[buf3], %[tmp1]\n" + "movdqa %[crc3], %[crc_carryover]\n" + "addq %[tmp1], %[tmp1]\n" + "shrq $8, %[buf3]\n" + "movdqa (%[table], %[tmp1], 8), %[crc3]\n" + +#define XOR(byte) \ + "movzbq %b[buf0], %[tmp0]\n" \ + "shrq $8, %[buf0]\n" \ + "addq %[tmp0], %[tmp0]\n" \ + "pxor " #byte "*256*16(%[table], %[tmp0], 8), %[crc0]\n" \ + "movzbq %b[buf1], %[tmp1]\n" \ + "shrq $8, %[buf1]\n" \ + "addq %[tmp1], %[tmp1]\n" \ + "pxor " #byte "*256*16(%[table], %[tmp1], 8), %[crc1]\n" \ + "movzbq %b[buf2], %[tmp0]\n" \ + "shrq $8, %[buf2]\n" \ + "addq %[tmp0], %[tmp0]\n" \ + "pxor " #byte "*256*16(%[table], %[tmp0], 8), %[crc2]\n" \ + "movzbq %b[buf3], %[tmp1]\n" \ + "shrq $8, %[buf3]\n" \ + "addq %[tmp1], %[tmp1]\n" \ + "pxor " #byte "*256*16(%[table], %[tmp1], 8), %[crc3]\n" + + XOR(1) + XOR(2) + XOR(3) + XOR(4) + XOR(5) + XOR(6) +#undef XOR + + "addq %[buf0], %[buf0]\n" + "pxor 7*256*16(%[table], %[buf0], 8), %[crc0]\n" + "movq (%[src]), %[buf0]\n" + + "addq %[buf1], %[buf1]\n" + "pxor 7*256*16(%[table], %[buf1], 8), %[crc1]\n" + "movq 1*8(%[src]), %[buf1]\n" + + "addq %[buf2], %[buf2]\n" + "pxor 7*256*16(%[table], %[buf2], 8), %[crc2]\n" + "movq 2*8(%[src]), %[buf2]\n" + + "addq %[buf3], %[buf3]\n" + "pxor 7*256*16(%[table], %[buf3], 8), %[crc3]\n" + "movq 3*8(%[src]), %[buf3]\n" + + "cmpq %[src], %[end]\n" + "ja 1b\n" + + "pxor %[crc_carryover], %[crc0]\n" + CRC_WORD_ASM() + + "pxor %[crc1], %[crc0]\n" + "movq %[buf1], %[buf0]\n" + CRC_WORD_ASM() + + "pxor %[crc2], %[crc0]\n" + "movq %[buf2], %[buf0]\n" + CRC_WORD_ASM() + + "pxor %[crc3], %[crc0]\n" + "movq %[buf3], %[buf0]\n" + CRC_WORD_ASM() + + "add $4*8, %[src]\n" + "2:\n" + + "add $2*4*8 - 8, %[end]\n" + + "cmpq %[src], %[end]\n" + "jbe 4f\n" + "3:\n" + "movq (%[src]), %[buf0]\n" + "addq $8, %[src]\n" + CRC_WORD_ASM() + "cmpq %[src], %[end]\n" + "ja 3b\n" + + "4:\n" + "add $7, %[end]\n" + + "cmpq %[src], %[end]\n" + "jbe 6f\n" + + "5:\n" + "movzbq (%[src]), %[buf0]\n" + "add $1, %[src]\n" + SSE2_MOVQ " %[crc0], %[tmp0]\n" + "movzx %b[tmp0], %[tmp0]\n" + "psrldq $1, %[crc0]\n" + "xor %[buf0], %[tmp0]\n" + "addq %[tmp0], %[tmp0]\n" + "pxor 7*256*16(%[table_word], %[tmp0], 8), %[crc0]\n" + + "cmpq %[src], %[end]\n" + "ja 5b\n" + + "6:\n" + + : // outputs + [src] "+r" (src), + [end] "+r" (end), + [crc0] "+x" (crc0), + [crc1] "=&x" (crc1), + [crc2] "=&x" (crc2), + [crc3] "=&x" (crc3), + [crc_carryover] "=&x" (crc_carryover), + [buf0] "=&r" (buf0), + [buf1] "=&r" (buf1), + [buf2] "=&r" (buf2), + [buf3] "=&r" (buf3), + [tmp0] "=&r" (tmp0), + [tmp1] "=&r" (tmp1) + + : // inputs + [table_word] "r" (this->crc_word_), + [table] "r" (this->crc_word_interleaved_)); + + return (this->Base().Canonize() ^ crc0); +} + +} // namespace crcutil + +#endif // defined(__GNUC__) && CRCUTIL_USE_ASM && HAVE_AMD64 && HAVE_SSE2 diff --git a/rapidyenc/crcutil-1.0/code/multiword_64_64_cl_i386_mmx.cc b/rapidyenc/crcutil-1.0/code/multiword_64_64_cl_i386_mmx.cc new file mode 100644 index 0000000..af7352a --- /dev/null +++ b/rapidyenc/crcutil-1.0/code/multiword_64_64_cl_i386_mmx.cc @@ -0,0 +1,304 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Implements 64-bit multiword CRC for Microsoft and Intel compilers +// using MMX instructions (i386). + +#include "generic_crc.h" + +#if CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX && defined(_MSC_VER) + +namespace crcutil { + +#define CRC_WORD_MMX() \ + __asm pxor BUF0, CRC0 \ + __asm movd TMP0, BUF0 \ + __asm psrlq BUF0, 32 \ + __asm movzx TEMP, TMP0L \ + __asm shr TMP0, 8 \ + __asm movq CRC0, [TABLE + TEMP * 8] \ + __asm movzx TEMP, TMP0L \ + __asm shr TMP0, 8 \ + __asm pxor CRC0, [TABLE + TEMP * 8 + 1 * 256 * 8] \ + __asm movzx TEMP, TMP0L \ + __asm shr TMP0, 8 \ + __asm pxor CRC0, [TABLE + TEMP * 8 + 2 * 256 * 8] \ + __asm pxor CRC0, [TABLE + TMP0 * 8 + 3 * 256 * 8] \ + __asm movd TMP0, BUF0 \ + __asm movzx TEMP, TMP0L \ + __asm shr TMP0, 8 \ + __asm pxor CRC0, [TABLE + TEMP * 8 + 4 * 256 * 8] \ + __asm movzx TEMP, TMP0L \ + __asm shr TMP0, 8 \ + __asm pxor CRC0, [TABLE + TEMP * 8 + 5 * 256 * 8] \ + __asm movzx TEMP, TMP0L \ + __asm shr TMP0, 8 \ + __asm pxor CRC0, [TABLE + TEMP * 8 + 6 * 256 * 8] \ + __asm pxor CRC0, [TABLE + TMP0 * 8 + 7 * 256 * 8] + +// frame pointer register 'ebp' modified by inline assembly code +#pragma warning(disable: 4731) + +template<> uint64 GenericCrc::CrcMultiwordI386Mmx( + const void *data, + size_t bytes, + const uint64 &start) const { + const uint8 *src = static_cast(data); + const uint8 *end = src + bytes; + uint64 crc0 = start ^ this->Base().Canonize(); + + ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, uint64); + if (src >= end) { + return (crc0 ^ this->Base().Canonize()); + } + +#define CRC0 mm0 +#define CRC1 mm1 +#define CRC2 mm2 +#define CRC3 mm3 +#define BUF0 mm4 +#define BUF1 mm5 +#define BUF2 mm6 +#define BUF3 mm7 +#define TMP0 eax +#define TMP0L al +#define TMP0H ah +#define TMP1 ebx +#define TMP1L bl +#define TMP1H bh +#define TMP2 ecx +#define TMP2L cl +#define TMP2H ch +#define TMP3 edx +#define TMP3L dl +#define TMP3H dh +#define TEMP edi +#define SRC esi +#define END [esp] +#define TABLE ebp + + + const uint64 *interleaved_table_address = + &this->crc_word_interleaved_[0][0]; + const uint64 *word_table_address = &this->crc_word_[0][0]; + + __asm { + push ebp + + mov TMP0, interleaved_table_address + + movq CRC0, crc0 + mov SRC, src + mov TMP1, end + sub TMP1, 2*4*8 - 1 + cmp SRC, TMP1 + mov TABLE, word_table_address + jae end_main_loop + + push TABLE + mov TABLE, TMP0 + push TMP1 + + pxor CRC1, CRC1 + pxor CRC2, CRC2 + pxor CRC3, CRC3 + + movq BUF0, [SRC] + movq BUF1, [SRC + 1 * 8] + movq BUF2, [SRC + 2 * 8] + movq BUF3, [SRC + 3 * 8] + + main_loop: +#if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0 + prefetcht0 [SRC + CRCUTIL_PREFETCH_WIDTH] +#endif + add SRC, 32 + pxor BUF0, CRC0 + pxor BUF1, CRC1 + pxor BUF2, CRC2 + pxor BUF3, CRC3 + + movd TMP0, BUF0 + psrlq BUF0, 32 + movd TMP1, BUF1 + psrlq BUF1, 32 + movd TMP2, BUF2 + psrlq BUF2, 32 + movd TMP3, BUF3 + psrlq BUF3, 32 + + movzx TEMP, TMP0L + movq CRC0, [TABLE + TEMP * 8] + movzx TEMP, TMP1L + movq CRC1, [TABLE + TEMP * 8] + movzx TEMP, TMP2L + movq CRC2, [TABLE + TEMP * 8] + movzx TEMP, TMP3L + movq CRC3, [TABLE + TEMP * 8] + + movzx TEMP, TMP0H + shr TMP0, 16 + pxor CRC0, [TABLE + TEMP * 8 + 1 * 256 * 8] + movzx TEMP, TMP1H + shr TMP1, 16 + pxor CRC1, [TABLE + TEMP * 8 + 1 * 256 * 8] + movzx TEMP, TMP2H + shr TMP2, 16 + pxor CRC2, [TABLE + TEMP * 8 + 1 * 256 * 8] + movzx TEMP, TMP3H + shr TMP3, 16 + pxor CRC3, [TABLE + TEMP * 8 + 1 * 256 * 8] + + movzx TEMP, TMP0L + shr TMP0, 8 + pxor CRC0, [TABLE + TEMP * 8 + 2 * 256 * 8] + movzx TEMP, TMP1L + shr TMP1, 8 + pxor CRC1, [TABLE + TEMP * 8 + 2 * 256 * 8] + movzx TEMP, TMP2L + shr TMP2, 8 + pxor CRC2, [TABLE + TEMP * 8 + 2 * 256 * 8] + movzx TEMP, TMP3L + shr TMP3, 8 + pxor CRC3, [TABLE + TEMP * 8 + 2 * 256 * 8] + + pxor CRC0, [TABLE + TMP0 * 8 + 3 * 256 * 8] + movd TMP0, BUF0 + pxor CRC1, [TABLE + TMP1 * 8 + 3 * 256 * 8] + movd TMP1, BUF1 + pxor CRC2, [TABLE + TMP2 * 8 + 3 * 256 * 8] + movd TMP2, BUF2 + pxor CRC3, [TABLE + TMP3 * 8 + 3 * 256 * 8] + movd TMP3, BUF3 + + movzx TEMP, TMP0L + pxor CRC0, [TABLE + TEMP * 8 + 4 * 256 * 8] + movzx TEMP, TMP1L + pxor CRC1, [TABLE + TEMP * 8 + 4 * 256 * 8] + movzx TEMP, TMP2L + pxor CRC2, [TABLE + TEMP * 8 + 4 * 256 * 8] + movzx TEMP, TMP3L + pxor CRC3, [TABLE + TEMP * 8 + 4 * 256 * 8] + + movzx TEMP, TMP0H + shr TMP0, 16 + pxor CRC0, [TABLE + TEMP * 8 + 5 * 256 * 8] + movzx TEMP, TMP1H + shr TMP1, 16 + pxor CRC1, [TABLE + TEMP * 8 + 5 * 256 * 8] + movzx TEMP, TMP2H + shr TMP2, 16 + pxor CRC2, [TABLE + TEMP * 8 + 5 * 256 * 8] + movzx TEMP, TMP3H + shr TMP3, 16 + pxor CRC3, [TABLE + TEMP * 8 + 5 * 256 * 8] + + movzx TEMP, TMP0L + shr TMP0, 8 + pxor CRC0, [TABLE + TEMP * 8 + 6 * 256 * 8] + movzx TEMP, TMP1L + shr TMP1, 8 + pxor CRC1, [TABLE + TEMP * 8 + 6 * 256 * 8] + movzx TEMP, TMP2L + shr TMP2, 8 + pxor CRC2, [TABLE + TEMP * 8 + 6 * 256 * 8] + movzx TEMP, TMP3L + shr TMP3, 8 + pxor CRC3, [TABLE + TEMP * 8 + 6 * 256 * 8] + + pxor CRC0, [TABLE + TMP0 * 8 + 7 * 256 * 8] + movq BUF0, [SRC] + pxor CRC1, [TABLE + TMP1 * 8 + 7 * 256 * 8] + movq BUF1, [SRC + 1 * 8] + pxor CRC2, [TABLE + TMP2 * 8 + 7 * 256 * 8] + movq BUF2, [SRC + 2 * 8] + pxor CRC3, [TABLE + TMP3 * 8 + 7 * 256 * 8] + movq BUF3, [SRC + 3 * 8] + + cmp END, SRC + ja main_loop + +#undef END +#define END TMP1 + pop END + pop TABLE + add SRC, 32 + + CRC_WORD_MMX() + + pxor BUF1, CRC1 + movq BUF0, BUF1 + CRC_WORD_MMX() + + pxor BUF2, CRC2 + movq BUF0, BUF2 + CRC_WORD_MMX() + + pxor BUF3, CRC3 + movq BUF0, BUF3 + CRC_WORD_MMX() + + end_main_loop: + add END, 2*4*8 - 8 + cmp SRC, END + jae end_word_loop + + word_loop: + movq BUF0, [SRC] + add SRC, 8 + CRC_WORD_MMX() + cmp END, SRC + ja word_loop + end_word_loop: + +#if 0 // Plain C version is faster? + add END, 7 + cmp SRC, END + jae end_byte_loop + + byte_loop: + movd TMP0, CRC0 + movzx TEMP, byte ptr [SRC] + movzx TMP0, TMP0L + psrlq CRC0, 8 + xor TEMP, TMP0 + add SRC, 1 + pxor CRC0, [TABLE + TEMP*8 + 7*256*8] + cmp END, SRC + ja byte_loop + end_byte_loop: +#endif + + pop ebp + + mov src, SRC + movq crc0, CRC0 + + emms + } + +#if 1 + // Compute CRC of remaining bytes. + for (;src < end; ++src) { + CRC_BYTE(this, crc0, *src); + } +#endif + + return (crc0 ^ this->Base().Canonize()); +} + + +} // namespace crcutil + +#endif // CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX && defined(_MSC_VER) diff --git a/rapidyenc/crcutil-1.0/code/multiword_64_64_gcc_amd64_asm.cc b/rapidyenc/crcutil-1.0/code/multiword_64_64_gcc_amd64_asm.cc new file mode 100644 index 0000000..a697b27 --- /dev/null +++ b/rapidyenc/crcutil-1.0/code/multiword_64_64_gcc_amd64_asm.cc @@ -0,0 +1,298 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Implements multiword CRC for GCC on AMD64. +// +// Accoding to "Software Optimization Guide for AMD Family 10h Processors" +// http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/40546.pdf +// instead of +// movzbq %al, %rsi +// shrq $8, %rax +// [use %rsi] +// movzbq %al, %rsi +// shrq $8, %rax +// [use %rsi] +// it is better to use 32-bit registers +// (high 32 bits will be cleared on assignment), i.e. +// movzbl %al, %esi +// [use %rsi] +// movzbl %ah, %esi +// shrq $16, %rax +// [use %rsi] +// Makes instructions shorter and removes one shift +// (the latter is not such a big deal as it's execution time +// is nicely masked by [use %rsi] instruction). +// +// Performance difference: +// About 10% degradation on bytes = 8 .. 16 +// (clobbering registers that should be saved) +// Break even at 32 bytes. +// 3% improvement starting from 64 bytes. + +#include "generic_crc.h" + +#if defined(__GNUC__) && CRCUTIL_USE_ASM && HAVE_AMD64 + +namespace crcutil { + +template<> uint64 GenericCrc::CrcMultiwordGccAmd64( + const void *data, size_t bytes, const uint64 &start) const; + +template<> uint64 GenericCrc::CrcMultiword( + const void *data, + size_t bytes, + const uint64 &start) const { + if (bytes <= 6 * sizeof(Word) - 1) { + const uint8 *src = static_cast(data); + uint64 crc = start ^ this->Base().Canonize(); + const uint8 *end = src + bytes; +#define PROCESS_ONE_WORD() do { \ + Word buf = reinterpret_cast(src)[0]; \ + CRC_WORD(this, crc, buf); \ + src += sizeof(Word); \ +} while (0) + if (bytes >= 1 * sizeof(Word)) { + PROCESS_ONE_WORD(); + if (bytes >= 2 * sizeof(Word)) { + PROCESS_ONE_WORD(); + if (bytes >= 3 * sizeof(Word)) { + PROCESS_ONE_WORD(); + if (bytes >= 4 * sizeof(Word)) { + PROCESS_ONE_WORD(); + if (bytes >= 5 * sizeof(Word)) { + PROCESS_ONE_WORD(); + } + } + } + } + } + for (; src < end; ++src) { + CRC_BYTE(this, crc, *src); + } + return (crc ^ this->Base().Canonize()); + } + return this->CrcMultiwordGccAmd64(data, bytes, start); +} + +#define TMP0 "%%rsi" +#define TMP0W "%%esi" + +#define BUF0 "%%rax" +#define BUF0L "%%al" +#define BUF0H "%%ah" + +#define BUF1 "%%rbx" +#define BUF1L "%%bl" +#define BUF1H "%%bh" + +#define BUF2 "%%rcx" +#define BUF2L "%%cl" +#define BUF2H "%%ch" + +#define BUF3 "%%rdx" +#define BUF3L "%%dl" +#define BUF3H "%%dh" + +#define CRC_WORD_ASM() \ + "xorq %[crc0], " BUF0 "\n" \ + "movzbq " BUF0L ", " TMP0 "\n" \ + "movq (%[table_word], " TMP0 ", 8), %[crc0]\n" \ + "movzbl " BUF0H ", " TMP0W "\n" \ + "shrq $16, " BUF0 "\n" \ + "xorq 1*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \ + "movzbq " BUF0L ", " TMP0 "\n" \ + "xorq 2*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \ + "movzbl " BUF0H ", " TMP0W "\n" \ + "shrq $16, " BUF0 "\n" \ + "xorq 3*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \ + "movzbq " BUF0L ", " TMP0 "\n" \ + "xorq 4*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \ + "movzbl " BUF0H ", " TMP0W "\n" \ + "shrq $16, " BUF0 "\n" \ + "xorq 5*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \ + "movzbq " BUF0L ", " TMP0 "\n" \ + "xorq 6*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" \ + "movzbl " BUF0H ", " TMP0W "\n" \ + "xorq 7*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" + +template<> uint64 GenericCrc::CrcMultiwordGccAmd64( + const void *data, size_t bytes, const uint64 &start) const { + const uint8 *src = static_cast(data); + const uint8 *end = src + bytes; + uint64 crc0 = start ^ this->Base().Canonize(); + + ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, uint64); + if (src >= end) { + return (crc0 ^ this->Base().Canonize()); + } + + uint64 crc1; + uint64 crc2; + uint64 crc3; + + asm( + "subq $2*4*8 - 1, %[end]\n" + "cmpq %[src], %[end]\n" + "jbe 2f\n" + "xorq %[crc1], %[crc1]\n" + "movq (%[src]), " BUF0 "\n" + "movq 1*8(%[src]), " BUF1 "\n" + "movq 2*8(%[src]), " BUF2 "\n" + "movq 3*8(%[src]), " BUF3 "\n" + "movq %[crc1], %[crc2]\n" + "movq %[crc1], %[crc3]\n" + + "1:\n" +#if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0 + "prefetcht0 " TO_STRING(CRCUTIL_PREFETCH_WIDTH) "(%[src])\n" +#endif // HAVE_SSE + "addq $4*8, %[src]\n" + + // Set buffer data. + "xorq %[crc0], " BUF0 "\n" + "xorq %[crc1], " BUF1 "\n" + "xorq %[crc2], " BUF2 "\n" + "xorq %[crc3], " BUF3 "\n" + + // LOAD crc of byte 0 and shift buffers. + "movzbl " BUF0L ", " TMP0W "\n" + "movq (%[table], " TMP0 ", 8), %[crc0]\n" + "movzbl " BUF1L ", " TMP0W "\n" + "movq (%[table], " TMP0 ", 8), %[crc1]\n" + "movzbl " BUF2L ", " TMP0W "\n" + "movq (%[table], " TMP0 ", 8), %[crc2]\n" + "movzbl " BUF3L ", " TMP0W "\n" + "movq (%[table], " TMP0 ", 8), %[crc3]\n" + +#define XOR1(byte1) \ + "movzbl " BUF0L ", " TMP0W "\n" \ + "xorq " #byte1 "*256*8(%[table], " TMP0 ", 8), %[crc0]\n" \ + "movzbl " BUF1L ", " TMP0W "\n" \ + "xorq " #byte1 "*256*8(%[table], " TMP0 ", 8), %[crc1]\n" \ + "movzbl " BUF2L ", " TMP0W "\n" \ + "xorq " #byte1 "*256*8(%[table], " TMP0 ", 8), %[crc2]\n" \ + "movzbl " BUF3L ", " TMP0W "\n" \ + "xorq " #byte1 "*256*8(%[table], " TMP0 ", 8), %[crc3]\n" + +#define XOR2(byte2) \ + "movzbl " BUF0H ", " TMP0W "\n" \ + "shrq $16, " BUF0 "\n" \ + "xorq " #byte2 "*256*8(%[table], " TMP0 ", 8), %[crc0]\n" \ + "movzbl " BUF1H ", " TMP0W "\n" \ + "shrq $16, " BUF1 "\n" \ + "xorq " #byte2 "*256*8(%[table], " TMP0 ", 8), %[crc1]\n" \ + "movzbl " BUF2H ", " TMP0W "\n" \ + "shrq $16, " BUF2 "\n" \ + "xorq " #byte2 "*256*8(%[table], " TMP0 ", 8), %[crc2]\n" \ + "movzbl " BUF3H ", " TMP0W "\n" \ + "shrq $16, " BUF3 "\n" \ + "xorq " #byte2 "*256*8(%[table], " TMP0 ", 8), %[crc3]\n" + + XOR2(1) + XOR1(2) + XOR2(3) + XOR1(4) + XOR2(5) + XOR1(6) + + // Update CRC registers and load buffers. + "movzbl " BUF0H ", " TMP0W "\n" + "xorq 7*256*8(%[table], " TMP0 ", 8), %[crc0]\n" + "movq (%[src]), " BUF0 "\n" + "movzbl " BUF1H ", " TMP0W "\n" + "xorq 7*256*8(%[table], " TMP0 ", 8), %[crc1]\n" + "movq 1*8(%[src]), " BUF1 "\n" + "movzbl " BUF2H ", " TMP0W "\n" + "xorq 7*256*8(%[table], " TMP0 ", 8), %[crc2]\n" + "movq 2*8(%[src]), " BUF2 "\n" + "movzbl " BUF3H ", " TMP0W "\n" + "xorq 7*256*8(%[table], " TMP0 ", 8), %[crc3]\n" + "movq 3*8(%[src]), " BUF3 "\n" + + "cmpq %[src], %[end]\n" + "ja 1b\n" + + CRC_WORD_ASM() + + "xorq %[crc1], " BUF1 "\n" + "movq " BUF1 ", " BUF0 "\n" + CRC_WORD_ASM() + + "xorq %[crc2], " BUF2 "\n" + "movq " BUF2 ", " BUF0 "\n" + CRC_WORD_ASM() + + "xorq %[crc3], " BUF3 "\n" + "movq " BUF3 ", " BUF0 "\n" + CRC_WORD_ASM() + + "addq $4*8, %[src]\n" + + "2:\n" + "addq $2*4*8 - 8, %[end]\n" + "cmpq %[src], %[end]\n" + "jbe 4f\n" + + "3:\n" + "movq (%[src]), " BUF0 "\n" + "addq $8, %[src]\n" + CRC_WORD_ASM() + "cmpq %[src], %[end]\n" + "ja 3b\n" + + "4:\n" + "addq $7, %[end]\n" + + "cmpq %[src], %[end]\n" + "jbe 6f\n" + + "5:\n" + "movzbq (%[src]), " BUF0 "\n" + "movzbq %b[crc0], " TMP0 "\n" + "shrq $8, %[crc0]\n" + "xorq " BUF0 ", " TMP0 "\n" + "addq $1, %[src]\n" + "xorq 7*256*8(%[table_word], " TMP0 ", 8), %[crc0]\n" + "cmpq %[src], %[end]\n" + "ja 5b\n" + + "6:\n" + + + : // outputs + [src] "+r" (src), + [end] "+r" (end), + [crc0] "+r" (crc0), + [crc1] "=&r" (crc1), + [crc2] "=&r" (crc2), + [crc3] "=&r" (crc3) + + : // inputs + [table] "r" (&this->crc_word_interleaved_[0][0]), + [table_word] "r" (&this->crc_word_[0][0]) + + : // clobbers + "%rax", // BUF0 + "%rbx", // BUF1 + "%rcx", // BUF2 + "%rdx", // BUF3 + "%rsi" // TMP0 + ); + + return (crc0 ^ this->Base().Canonize()); +} + +} // namespace crcutil + +#endif // defined(__GNUC__) && HAVE_AMD64 && CRCUTIL_USE_ASM diff --git a/rapidyenc/crcutil-1.0/code/multiword_64_64_gcc_i386_mmx.cc b/rapidyenc/crcutil-1.0/code/multiword_64_64_gcc_i386_mmx.cc new file mode 100644 index 0000000..0ffda3b --- /dev/null +++ b/rapidyenc/crcutil-1.0/code/multiword_64_64_gcc_i386_mmx.cc @@ -0,0 +1,284 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Implements multiword CRC for GCC on i386. + +#include "generic_crc.h" + +#if defined(__GNUC__) && !defined(__clang__) && CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX + +#if defined(__PIC__) && __GNUC__ < 5 +/* workaround for issue with PIC reserving ebx: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54232 */ +/* a little dangerous otherwise, since the compiler could allocate ebx to something else, so we only do it for GCC < 5.0 */ +#define PIC_WORKAROUND +#endif + +namespace crcutil { + +template<> uint64 GenericCrc::CrcMultiwordI386Mmx( + const void *data, size_t bytes, const uint64 &start) + const GCC_OMIT_FRAME_POINTER; + +template<> uint64 GenericCrc::CrcMultiword( + const void *data, size_t bytes, const uint64 &start) const { + if (bytes <= 7) { + const uint8 *src = static_cast(data); + uint64 crc = start ^ this->Base().Canonize(); + for (const uint8 *end = src + bytes; src < end; ++src) { + CRC_BYTE(this, crc, *src); + } + return (crc ^ this->Base().Canonize()); + } + return CrcMultiwordI386Mmx(data, bytes, start); +} + +#define CRC_WORD_MMX() \ + "pxor %[crc0], %[buf0]\n" \ + "movd %[buf0], %[tmp0]\n" \ + "psrlq $32, %[buf0]\n" \ + "movzbl %b[tmp0], %[temp]\n" \ + "shrl $8, %[tmp0]\n" \ + "movq (%[table], %[temp], 8), %[crc0]\n" \ + "movzbl %b[tmp0], %[temp]\n" \ + "shrl $8, %[tmp0]\n" \ + "pxor 1*256*8(%[table], %[temp], 8), %[crc0]\n" \ + "movzbl %b[tmp0], %[temp]\n" \ + "shrl $8, %[tmp0]\n" \ + "pxor 2*256*8(%[table], %[temp], 8), %[crc0]\n" \ + "pxor 3*256*8(%[table], %[tmp0], 8), %[crc0]\n" \ + "movd %[buf0], %[tmp0]\n" \ + "movzbl %b[tmp0], %[temp]\n" \ + "shrl $8, %[tmp0]\n" \ + "pxor 4*256*8(%[table], %[temp], 8), %[crc0]\n" \ + "movzbl %b[tmp0], %[temp]\n" \ + "shrl $8, %[tmp0]\n" \ + "pxor 5*256*8(%[table], %[temp], 8), %[crc0]\n" \ + "movzbl %b[tmp0], %[temp]\n" \ + "shrl $8, %[tmp0]\n" \ + "pxor 6*256*8(%[table], %[temp], 8), %[crc0]\n" \ + "pxor 7*256*8(%[table], %[tmp0], 8), %[crc0]\n" + +template<> uint64 GenericCrc::CrcMultiwordI386Mmx( + const void *data, size_t bytes, const uint64 &start) const { + const uint8 *src = static_cast(data); + const uint8 *end = src + bytes; + uint64 crc0 = start ^ this->Base().Canonize(); + + ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, uint64); + if (src >= end) { + return (crc0 ^ this->Base().Canonize()); + } + + uint64 crc1; + uint64 crc2; + uint64 crc3; + + uint64 buf0; + uint64 buf1; + uint64 buf2; + uint64 buf3; + + uint32 tmp0; + uint32 tmp1; + uint32 tmp2; +#ifndef PIC_WORKAROUND + uint32 tmp3; +#endif + + uint32 temp; + + void *table_ptr; + const uint64 *table_interleaved = &this->crc_word_interleaved_[0][0]; + const uint64 *table_word = &this->crc_word_[0][0]; + + asm( +#ifdef PIC_WORKAROUND + "push %%ebx\n" + #define TMP3 "%%ebx" + #define TMP3b "%%bl" +#else + #define TMP3 "%[tmp3]" + #define TMP3b "%b[tmp3]" +#endif + "subl $2*4*8 - 1, %[end]\n" + "cmpl %[src], %[end]\n" + "jbe 2f\n" + + "pxor %[crc1], %[crc1]\n" + "pxor %[crc2], %[crc2]\n" + "pxor %[crc3], %[crc3]\n" + "movq (%[src]), %[buf0]\n" + "movq 1*8(%[src]), %[buf1]\n" + "movq 2*8(%[src]), %[buf2]\n" + "movq 3*8(%[src]), %[buf3]\n" + + "movl %[table_interleaved], %[table]\n" + "1:\n" +#if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0 + "prefetcht0 " TO_STRING(CRCUTIL_PREFETCH_WIDTH) "(%[src])\n" +#endif + "addl $0x20, %[src]\n" + "pxor %[crc0], %[buf0]\n" + "pxor %[crc1], %[buf1]\n" + "pxor %[crc2], %[buf2]\n" + "pxor %[crc3], %[buf3]\n" + + "movd %[buf0], %[tmp0]\n" + "psrlq $32, %[buf0]\n" + "movd %[buf1], %[tmp1]\n" + "psrlq $32, %[buf1]\n" + "movd %[buf2], %[tmp2]\n" + "psrlq $32, %[buf2]\n" + "movd %[buf3], " TMP3 "\n" + "psrlq $32, %[buf3]\n" + + "movzbl %b[tmp0], %[temp]\n" + "shrl $8, %[tmp0]\n" + "movq (%[table], %[temp], 8), %[crc0]\n" + "movzbl %b[tmp1], %[temp]\n" + "shrl $8, %[tmp1]\n" + "movq (%[table], %[temp], 8), %[crc1]\n" + "movzbl %b[tmp2], %[temp]\n" + "shrl $8, %[tmp2]\n" + "movq (%[table], %[temp], 8), %[crc2]\n" + "movzbl " TMP3b ", %[temp]\n" + "shrl $8, " TMP3 "\n" + "movq (%[table], %[temp], 8), %[crc3]\n" + +#define XOR(byte) \ + "movzbl %b[tmp0], %[temp]\n" \ + "shrl $8, %[tmp0]\n" \ + "pxor " #byte "*256*8(%[table], %[temp], 8), %[crc0]\n" \ + "movzbl %b[tmp1], %[temp]\n" \ + "shrl $8, %[tmp1]\n" \ + "pxor " #byte "*256*8(%[table], %[temp], 8), %[crc1]\n" \ + "movzbl %b[tmp2], %[temp]\n" \ + "shrl $8, %[tmp2]\n" \ + "pxor " #byte "*256*8(%[table], %[temp], 8), %[crc2]\n" \ + "movzbl " TMP3b ", %[temp]\n" \ + "shrl $8, " TMP3 "\n" \ + "pxor " #byte "*256*8(%[table], %[temp], 8), %[crc3]\n" + + XOR(1) + XOR(2) + + "pxor 3*256*8(%[table], %[tmp0], 8), %[crc0]\n" + "movd %[buf0], %[tmp0]\n" + "pxor 3*256*8(%[table], %[tmp1], 8), %[crc1]\n" + "movd %[buf1], %[tmp1]\n" + "pxor 3*256*8(%[table], %[tmp2], 8), %[crc2]\n" + "movd %[buf2], %[tmp2]\n" + "pxor 3*256*8(%[table], " TMP3 ", 8), %[crc3]\n" + "movd %[buf3], " TMP3 "\n" + + XOR(4) + XOR(5) + XOR(6) + + "pxor 7*256*8(%[table], %[tmp0], 8), %[crc0]\n" + "movq (%[src]), %[buf0]\n" + "pxor 7*256*8(%[table], %[tmp1], 8), %[crc1]\n" + "movq 1*8(%[src]), %[buf1]\n" + "pxor 7*256*8(%[table], %[tmp2], 8), %[crc2]\n" + "movq 2*8(%[src]), %[buf2]\n" + "pxor 7*256*8(%[table], " TMP3 ", 8), %[crc3]\n" + "movq 3*8(%[src]), %[buf3]\n" + "cmpl %[src], %[end]\n" + "ja 1b\n" +#undef XOR + + "movl %[table_word], %[table]\n" + CRC_WORD_MMX() + + "pxor %[crc1], %[buf1]\n" + "movq %[buf1], %[buf0]\n" + CRC_WORD_MMX() + + "pxor %[crc2], %[buf2]\n" + "movq %[buf2], %[buf0]\n" + CRC_WORD_MMX() + + "pxor %[crc3], %[buf3]\n" + "movq %[buf3], %[buf0]\n" + CRC_WORD_MMX() + + "addl $4*8, %[src]\n" + "2:\n" + "movl %[table_word], %[table]\n" + + "addl $2*4*8 - 8, %[end]\n" + "cmpl %[src], %[end]\n" + "jbe 4f\n" + "3:\n" + "movq (%[src]), %[buf0]\n" + "addl $0x8, %[src]\n" + CRC_WORD_MMX() + "cmpl %[src], %[end]\n" + "ja 3b\n" + "4:\n" + "addl $7, %[end]\n" + + "cmpl %[src], %[end]\n" + "jbe 6f\n" + + "5:\n" + "movd %[crc0], %[tmp0]\n" + "movzbl (%[src]), %[temp]\n" + "movzbl %b[tmp0], %[tmp0]\n" + "psrlq $8, %[crc0]\n" + "xorl %[tmp0], %[temp]\n" + "addl $1, %[src]\n" + "pxor 7*256*8(%[table], %[temp], 8), %[crc0]\n" + "cmpl %[src], %[end]\n" + "ja 5b\n" + + "6:\n" + +#ifdef PIC_WORKAROUND + "pop %%ebx\n" +#endif +#undef TMP3 + + : // outputs + [src] "+r" (src), + [end] "+m" (end), + [crc0] "+y" (crc0), + [crc1] "=&y" (crc1), + [crc2] "=&y" (crc2), + [crc3] "=&y" (crc3), + [buf0] "=&y" (buf0), + [buf1] "=&y" (buf1), + [buf2] "=&y" (buf2), + [buf3] "=&y" (buf3), + [tmp0] "=&q" (tmp0), + [tmp1] "=&q" (tmp1), + [tmp2] "=&q" (tmp2), +#ifndef PIC_WORKAROUND + [tmp3] "=&q" (tmp3), +#endif + [temp] "=&r" (temp), + [table] "=&r" (table_ptr) + + : // inputs + [table_interleaved] "m" (table_interleaved), + [table_word] "m" (table_word)); + + asm volatile("emms"); + + return (crc0 ^ this->Base().Canonize()); +} + +} // namespace crcutil + +#endif // defined(__GNUC__) && HAVE_AMD64 && CRCUTIL_USE_ASM diff --git a/rapidyenc/crcutil-1.0/code/multiword_64_64_intrinsic_i386_mmx.cc b/rapidyenc/crcutil-1.0/code/multiword_64_64_intrinsic_i386_mmx.cc new file mode 100644 index 0000000..9aa74d4 --- /dev/null +++ b/rapidyenc/crcutil-1.0/code/multiword_64_64_intrinsic_i386_mmx.cc @@ -0,0 +1,243 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Implements 64-bit multiword CRC using MMX built-in functions. + +#include "generic_crc.h" + +#if CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX && !defined(_MSC_VER) && !(defined(__GNUC__) && !defined(__clang__)) + +namespace crcutil { + +template<> uint64 GenericCrc::CrcMultiwordI386Mmx( + const void *data, size_t bytes, const uint64 &start) + const GCC_OMIT_FRAME_POINTER; + +#if !defined(_MSC_VER) +template<> uint64 GenericCrc::CrcMultiword( + const void *data, + size_t bytes, + const uint64 &start) const { + if (bytes <= 7) { + const uint8 *src = static_cast(data); + uint64 crc = start ^ Base().Canonize(); + for (const uint8 *end = src + bytes; src < end; ++src) { + CRC_BYTE(this, crc, *src); + } + return (crc ^ Base().Canonize()); + } + return CrcMultiwordI386Mmx(data, bytes, start); +} +#else +#pragma warning(push) +// CL: uninitialized local variable 'crc1' used +// Wrong: crc1 = XOR(crc1, crc1) sets it to 0. +#pragma warning(disable: 4700) + +#pragma warning(disable: 4619) // there is no warning number '592' + +// ICL: variable "crc1" is used before its value is set +// Wrong: crc1 = XOR(crc1, crc1) sets it to 0. +#pragma warning(disable: 592) +#endif // !defined(_MSC_VER) + +#define MM64(adr) reinterpret_cast(adr) +#define MM64_TABLE(byte) MM64(crc_word_interleaved_[byte]) + +#define CRC_WORD_MMX(this, crc, buf) do { \ + buf = _mm_xor_si64(buf, crc); \ + uint32 tmp = static_cast(_mm_cvtsi64_si32(buf)); \ + buf = _mm_srli_si64(buf, 32); \ + crc = MM64(crc_word_[0])[TO_BYTE(tmp)]; \ + tmp >>= 8; \ + crc = _mm_xor_si64(crc, MM64(crc_word_[1])[TO_BYTE(tmp)]); \ + tmp >>= 8; \ + crc = _mm_xor_si64(crc, MM64(crc_word_[2])[TO_BYTE(tmp)]); \ + tmp >>= 8; \ + crc = _mm_xor_si64(crc, MM64(crc_word_[3])[tmp]); \ + tmp = static_cast(_mm_cvtsi64_si32(buf)); \ + crc = _mm_xor_si64(crc, MM64(crc_word_[4])[TO_BYTE(tmp)]); \ + tmp >>= 8; \ + crc = _mm_xor_si64(crc, MM64(crc_word_[5])[TO_BYTE(tmp)]); \ + tmp >>= 8; \ + crc = _mm_xor_si64(crc, MM64(crc_word_[6])[TO_BYTE(tmp)]); \ + tmp >>= 8; \ + crc = _mm_xor_si64(crc, MM64(crc_word_[7])[tmp]); \ +} while (0) + +template<> uint64 GenericCrc::CrcMultiwordI386Mmx( + const void *data, size_t bytes, const uint64 &start) const { + const uint8 *src = static_cast(data); + const uint8 *end = src + bytes; + uint64 crc = start ^ Base().Canonize(); + + ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc, uint64); + if (src >= end) { + return (crc ^ Base().Canonize()); + } + + // Process 4 registers of sizeof(uint64) bytes at once. + bytes = static_cast(end - src) & ~(4*8 - 1); + if (bytes > 4*8) { + const uint8 *stop = src + bytes - 4*8; + union { + __m64 m64; + uint64 u64; + } temp; + __m64 crc0; + __m64 crc1; + __m64 crc2; + __m64 crc3; + __m64 buf0 = MM64(src)[0]; + __m64 buf1 = MM64(src)[1]; + __m64 buf2 = MM64(src)[2]; + __m64 buf3 = MM64(src)[3]; + + temp.u64 = crc; + crc0 = temp.m64; +#if defined(__GNUC__) && !GCC_VERSION_AVAILABLE(4, 4) + // There is no way to suppress a warning in GCC; + // generate extra assignments. + temp.u64 = 0; + crc1 = temp.m64; + crc2 = temp.m64; + crc3 = temp.m64; +#else + crc1 = _mm_xor_si64(crc1, crc1); + crc2 = _mm_xor_si64(crc2, crc2); + crc3 = _mm_xor_si64(crc3, crc3); +#endif // defined(__GNUC__) && !GCC_VERSION_AVAILABLE(4, 4) + + do { + PREFETCH(src); + src += 4*8; + + buf0 = _mm_xor_si64(buf0, crc0); + buf1 = _mm_xor_si64(buf1, crc1); + buf2 = _mm_xor_si64(buf2, crc2); + buf3 = _mm_xor_si64(buf3, crc3); + + uint32 tmp0 = static_cast(_mm_cvtsi64_si32(buf0)); + uint32 tmp1 = static_cast(_mm_cvtsi64_si32(buf1)); + uint32 tmp2 = static_cast(_mm_cvtsi64_si32(buf2)); + uint32 tmp3 = static_cast(_mm_cvtsi64_si32(buf3)); + + buf0 = _mm_srli_si64(buf0, 32); + buf1 = _mm_srli_si64(buf1, 32); + buf2 = _mm_srli_si64(buf2, 32); + buf3 = _mm_srli_si64(buf3, 32); + + crc0 = MM64_TABLE(0)[TO_BYTE(tmp0)]; + tmp0 >>= 8; + crc1 = MM64_TABLE(0)[TO_BYTE(tmp1)]; + tmp1 >>= 8; + crc2 = MM64_TABLE(0)[TO_BYTE(tmp2)]; + tmp2 >>= 8; + crc3 = MM64_TABLE(0)[TO_BYTE(tmp3)]; + tmp3 >>= 8; + +#define XOR(byte) do { \ + crc0 = _mm_xor_si64(crc0, MM64_TABLE(byte)[TO_BYTE(tmp0)]); \ + tmp0 >>= 8; \ + crc1 = _mm_xor_si64(crc1, MM64_TABLE(byte)[TO_BYTE(tmp1)]); \ + tmp1 >>= 8; \ + crc2 = _mm_xor_si64(crc2, MM64_TABLE(byte)[TO_BYTE(tmp2)]); \ + tmp2 >>= 8; \ + crc3 = _mm_xor_si64(crc3, MM64_TABLE(byte)[TO_BYTE(tmp3)]); \ + tmp3 >>= 8; \ +} while (0) + + XOR(1); + XOR(2); + + crc0 = _mm_xor_si64(crc0, MM64_TABLE(3)[tmp0]); + tmp0 = static_cast(_mm_cvtsi64_si32(buf0)); + crc1 = _mm_xor_si64(crc1, MM64_TABLE(3)[tmp1]); + tmp1 = static_cast(_mm_cvtsi64_si32(buf1)); + crc2 = _mm_xor_si64(crc2, MM64_TABLE(3)[tmp2]); + tmp2 = static_cast(_mm_cvtsi64_si32(buf2)); + crc3 = _mm_xor_si64(crc3, MM64_TABLE(3)[tmp3]); + tmp3 = static_cast(_mm_cvtsi64_si32(buf3)); + + XOR(4); + XOR(5); + XOR(6); + +#undef XOR + + crc0 = _mm_xor_si64(crc0, MM64_TABLE(sizeof(uint64) - 1)[tmp0]); + buf0 = MM64(src)[0]; + crc1 = _mm_xor_si64(crc1, MM64_TABLE(sizeof(uint64) - 1)[tmp1]); + buf1 = MM64(src)[1]; + crc2 = _mm_xor_si64(crc2, MM64_TABLE(sizeof(uint64) - 1)[tmp2]); + buf2 = MM64(src)[2]; + crc3 = _mm_xor_si64(crc3, MM64_TABLE(sizeof(uint64) - 1)[tmp3]); + buf3 = MM64(src)[3]; + } + while (src < stop); + + CRC_WORD_MMX(this, crc0, buf0); + buf1 = _mm_xor_si64(buf1, crc1); + CRC_WORD_MMX(this, crc0, buf1); + buf2 = _mm_xor_si64(buf2, crc2); + CRC_WORD_MMX(this, crc0, buf2); + buf3 = _mm_xor_si64(buf3, crc3); + CRC_WORD_MMX(this, crc0, buf3); + + temp.m64 = crc0; + crc = temp.u64; + + _mm_empty(); + + src += 4*8; + } + + // Process sizeof(uint64) bytes at once. + bytes = static_cast(end - src) & ~(sizeof(uint64) - 1); + if (bytes > 0) { + union { + __m64 m64; + uint64 u64; + } temp; + __m64 crc0; + + temp.u64 = crc; + crc0 = temp.m64; + + for (const uint8 *stop = src + bytes; src < stop; src += sizeof(uint64)) { + __m64 buf0 = MM64(src)[0]; + CRC_WORD_MMX(this, crc0, buf0); + } + + temp.m64 = crc0; + crc = temp.u64; + + _mm_empty(); + } + + // Compute CRC of remaining bytes. + for (;src < end; ++src) { + CRC_BYTE(this, crc, *src); + } + + return (crc ^ Base().Canonize()); +} + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif // defined(_MSC_VER) + +} // namespace crcutil + +#endif // CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX diff --git a/rapidyenc/crcutil-1.0/code/platform.h b/rapidyenc/crcutil-1.0/code/platform.h new file mode 100644 index 0000000..936cf7d --- /dev/null +++ b/rapidyenc/crcutil-1.0/code/platform.h @@ -0,0 +1,245 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Detects configuration and defines compiler-specific macros. +// Also, sets user-defined CRUTIL_USE_* macros to default values. + +#ifndef CRCUTIL_PLATFORM_H_ +#define CRCUTIL_PLATFORM_H_ + +// Permanently disable some annoying warnings generated +// by Microsoft CL when compiling Microsoft's headers. +#include "std_headers.h" + +// Use inline asm version of the code? +#if !defined(CRCUTIL_USE_ASM) +#define CRCUTIL_USE_ASM 1 +#endif // !defined(CRCUTIL_USE_ASM) + + +#if !defined(HAVE_I386) +#if defined(__i386__) || defined(_M_IX86) +#define HAVE_I386 1 +#else +#define HAVE_I386 0 +#endif // defined(__i386__) || defined(_M_IX86) +#endif // defined(HAVE_I386) + + +#if !defined(HAVE_AMD64) +#if defined(__amd64__) || defined(_M_AMD64) +#define HAVE_AMD64 1 +#else +#define HAVE_AMD64 0 +#endif // defined(__amd64__) || defined(_M_AMD64) +#endif // defined(HAVE_AMD64) + + +#if HAVE_AMD64 || HAVE_I386 +#if defined(_MSC_VER) +#pragma warning(push) +// '_M_IX86' is not defined as a preprocessor macro +#pragma warning(disable: 4668) +#include +#pragma warning(pop) +#endif // defined(_MSC_VER) + + +#if !defined(HAVE_MMX) +#if defined(_MSC_VER) || (defined(__GNUC__) && defined(__MMX__)) +#define HAVE_MMX 1 +#else +#define HAVE_MMX 0 +#endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__MMX__)) +#endif // !defined(HAVE_MMX) + + +#if !defined(HAVE_SSE) +#if defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE__)) +#include +#define HAVE_SSE 1 +#else +#define HAVE_SSE 0 +#endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE__)) +#endif // !defined(HAVE_SSE) + + +#if !defined(HAVE_SSE2) +#if defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE2__)) +#include +#define HAVE_SSE2 1 +#else +#define HAVE_SSE2 0 +#endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE2__)) +#endif // !defined(HAVE_SSE2) + +#else + +#if !defined(HAVE_MMX) +#define HAVE_MMX 0 +#endif // !defined(HAVE_MMX) + +#if !defined(HAVE_SSE) +#define HAVE_SSE 0 +#endif // !defined(HAVE_SSE) + +#if !defined(HAVE_SSE2) +#define HAVE_SSE2 0 +#endif // !defined(HAVE_SSE2) + +#endif // HAVE_AMD64 || HAVE_I386 + +// Error checking +#if HAVE_SSE && !HAVE_MMX +#error SSE is available but not MMX? +#endif // HAVE_SSE && !HAVE_MMX + +#if HAVE_SSE2 && (!HAVE_SSE || !HAVE_MMX) +#error SSE2 is available but not SSE or MMX? +#endif // HAVE_SSE2 && (!HAVE_SSE || !HAVE_MMX) + + +#if !defined(CRCUTIL_PREFETCH_WIDTH) +// On newer X5550 CPU, heavily optimized CrcMultiword is 3% faster without +// prefetch for inputs smaller than 8MB and less than 1% slower for 8MB and +// larger blocks. On older Q9650 CPU, the code is 2-3% faster for inputs +// smaller than 8MB, 4-5% slower when length >= 8MB. +// Tested with prefetch length 256, 512, and 4096. +// +// At this moment there is no compelling reason to use prefetching. +// +#define CRCUTIL_PREFETCH_WIDTH 0 +#endif // !defined(CRCUTIL_PREFETCH_WIDTH) + + +#if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0 +#define PREFETCH(src) \ + _mm_prefetch(reinterpret_cast(src) + CRCUTIL_PREFETCH_WIDTH, \ + _MM_HINT_T0) +#else +#define PREFETCH(src) +#endif // HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0 + + +// If block size exceeds CRCUTIL_MIN_ALIGN_SIZE, align the data +// before accessing it at word boundary. See generic_crc.cc, +// ALIGN_ON_WORD_BOUNDARY_IF_NEEDED() macro. +#if !defined(CRCUTIL_MIN_ALIGN_SIZE) +#if HAVE_AMD64 || HAVE_I386 +#define CRCUTIL_MIN_ALIGN_SIZE (1024) +#else +#define CRCUTIL_MIN_ALIGN_SIZE 0 +#endif // HAVE_AMD64 || HAVE_I386 +#endif // !defined(CRCUTIL_MIN_ALIGN_SIZE) + + +// Use _mm_crc32_u64/32/8 intrinics? +// If not, they will be implemented in software. +#if !HAVE_I386 && !HAVE_AMD64 + +#undef CRCUTIL_USE_MM_CRC32 +#define CRCUTIL_USE_MM_CRC32 0 + +#else + +#if !defined(CRCUTIL_USE_MM_CRC32) +#if defined(_MSC_VER) || defined(__GNUC__) +#define CRCUTIL_USE_MM_CRC32 1 +#else +#define CRCUTIL_USE_MM_CRC32 0 +#endif // defined(_MSC_VER) || defined(__GNUC__) +#endif // !defined(CRCUTIL_USE_MM_CRC32) + +#endif // !HAVE_I386 && !HAVE_AMD64 + + +// Stringize -- always handy. +#define TO_STRING_VALUE(arg) #arg +#define TO_STRING(arg) TO_STRING_VALUE(arg) + + +// Compilers give "right shift count >= width of type" warning even +// though the shift happens only under appropriate "if". +#define SHIFT_RIGHT_NO_WARNING(value, bits) \ + ((value) >> (((bits) < (8 * sizeof(value))) ? (bits) : 0)) +#define SHIFT_RIGHT_SAFE(value, bits) \ + ((bits) < (8 * sizeof(value)) ? SHIFT_RIGHT_NO_WARNING(value, bits) : 0) + +// The same for left shifts. +#define SHIFT_LEFT_NO_WARNING(value, bits) \ + ((value) << (((bits) < (8 * sizeof(value))) ? (bits) : 0)) +#define SHIFT_LEFT_SAFE(value, bits) \ + ((bits) < (8 * sizeof(value)) ? SHIFT_LEFT_NO_WARNING(value, bits) : 0) + +// GCC-specific macros. +// +#define GCC_VERSION_AVAILABLE(major, minor) \ + (defined(__GNUC__) && \ + (__GNUC__ > (major) || \ + (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))) + + +#if defined(__GNUC__) + +// The GenericCrc tables must be properly aligned. +// Penalty for misalignment? 50% performance degradation. +// For 128-bit SSE2, the penalty is access violation. +#define GCC_ALIGN_ATTRIBUTE(n) __attribute__((aligned(n))) + +#if GCC_VERSION_AVAILABLE(4, 4) +// If not marked as "omit frame pointer", +// GCC won't be able to find enough registers. +#define GCC_OMIT_FRAME_POINTER \ + __attribute__((__optimize__(2, "omit-frame-pointer"))) +#endif // GCC_VERSION_AVAILABLE(4, 4) + +#if !defined(__forceinline) +#define __forceinline __attribute__((__always_inline__)) inline +#endif // !defined(__forceinline) + +#if defined(__APPLE_CC__) +// The version of GCC used by Max OS X xCode v 5664 does not understand +// "movq xmm, r64" instruction and requires the use of "movd" (probably +// because of the bug in GCC which treats "movq/movd xmm,r64 or r64,xmm" +// the same). +// +// Leaving common sense aside, let's peek into Intel's instruction +// reference manual. That's what description of MOVD command says: +// MOVD xmm, r/m32 (opcode 66 0F 6E /r) +// MOVD r/m32, xmm (opcode 66 0F 7E /r) +// MOVQ xmm, r/m64 (opcode 66 REX.W 0F 6E /r) +// MOVQ r/m64, xmm (opcode 66 REX.W 0F 7E /r) +#define SSE2_MOVQ "movd" +#else +#define SSE2_MOVQ "movq" +#endif // defined(__APPLE_CC__) + +#endif // defined(__GNUC__) + + +// Define compiler-specific macros that were not set yet. +#if !defined(_MSC_VER) && !defined(__forceinline) +#define __forceinline inline +#endif // !defined(_MSC_VER) && !defined(__forceinline) + +#if !defined(GCC_OMIT_FRAME_POINTER) +#define GCC_OMIT_FRAME_POINTER +#endif // !defined(GCC_OMIT_FRAME_POINTER) + +#if !defined(GCC_ALIGN_ATTRIBUTE) +#define GCC_ALIGN_ATTRIBUTE(n) +#endif // !defined(GCC_ALIGN_ATTRIBUTE) + + +#endif // CRCUTIL_PLATFORM_H_ diff --git a/rapidyenc/crcutil-1.0/code/protected_crc.h b/rapidyenc/crcutil-1.0/code/protected_crc.h new file mode 100644 index 0000000..762fced --- /dev/null +++ b/rapidyenc/crcutil-1.0/code/protected_crc.h @@ -0,0 +1,61 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Protects CRC tables with its own CRC. +// CRC tables get corrupted too, and if corruption is +// not caught, data poisoning becomes a reality. + +#ifndef CRCUTIL_PROTECTED_CRC_H_ +#define CRCUTIL_PROTECTED_CRC_H_ + +namespace crcutil { + +#pragma pack(push, 16) + +// Class CrcImplementation should not have virtual functions: +// vptr is stored as the very first field, vptr value is defined +// at runtime, so it is impossible to CRC(*this) once and +// guarantee that this value will not change from run to run. +// +template class ProtectedCrc + : public CrcImplementation { + public: + typedef typename CrcImplementation::Crc Crc; + + // Returns check value that the caller should compare + // against pre-computed, trusted constant. + // + // Computing SelfCheckValue() after CRC initialization, + // storing it in memory, and periodically checking against + // stored value may not work: if CRC tables were initialized + // incorrectly and/or had been corrupted during initialization, + // CheckValue() will return garbage. Garbage in, garbage out. + // Consequitive checks will not detect a problem, the application + // will happily produce and save the data with corrupt CRC. + // + // The application should call SelfCheckValue() regularly: + // 1. First and foremost, on every CRC mismatch. + // 2. After CRC'ing the data but before sending it out or writing it. + // 3. Worst case, every Nth CRC'ed byte or every Nth call to CRC. + // + Crc SelfCheckValue() const { + return CrcDefault(this, sizeof(*this), 0); + } +} GCC_ALIGN_ATTRIBUTE(16); + +#pragma pack(pop) + +} // namespace crcutil + +#endif // CRCUTIL_PROTECTED_CRC_H_ diff --git a/rapidyenc/crcutil-1.0/code/rolling_crc.h b/rapidyenc/crcutil-1.0/code/rolling_crc.h new file mode 100644 index 0000000..ad4a947 --- /dev/null +++ b/rapidyenc/crcutil-1.0/code/rolling_crc.h @@ -0,0 +1,106 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Implements rolling CRC (e.g. for Rabin fingerprinting). + +#ifndef CRCUTIL_ROLLING_CRC_H_ +#define CRCUTIL_ROLLING_CRC_H_ + +#include "base_types.h" // size_t, uint8 +#include "crc_casts.h" // TO_BYTE + +namespace crcutil { + +#pragma pack(push, 16) + +// CrcImplementation should provide: +// - typename Crc +// - typename TableEntry +// - typename Word +// - Crc CrcDefault(const void *data, size_t bytes, const Crc &start) +// - const GfUtil &Base() const +template class RollingCrc { + public: + typedef typename CrcImplementation::Crc Crc; + typedef typename CrcImplementation::TableEntry TableEntry; + typedef typename CrcImplementation::Word Word; + + RollingCrc() {} + + // Initializes internal data structures. + // Retains reference to "crc" instance -- it is used by Start(). + RollingCrc(const CrcImplementation &crc, + size_t roll_window_bytes, + const Crc &start_value) { + Init(crc, roll_window_bytes, start_value); + } + + // Computes crc of "roll_window_bytes" using + // "start_value" of "crc" (see Init()). + Crc Start(const void *data) const { + return crc_->CrcDefault(data, roll_window_bytes_, start_value_); + } + + // Computes CRC of "roll_window_bytes" starting in next position. + Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const { + return (old_crc >> 8) ^ in_[TO_BYTE(old_crc) ^ byte_in] ^ out_[byte_out]; + } + + // Initializes internal data structures. + // Retains reference to "crc" instance -- it is used by Start(). + void Init(const CrcImplementation &crc, + size_t roll_window_bytes, + const Crc &start_value) { + crc_ = &crc; + roll_window_bytes_ = roll_window_bytes; + start_value_ = start_value; + + Crc add = crc.Base().Canonize() ^ start_value; + add = crc.Base().Multiply(add, crc.Base().Xpow8N(roll_window_bytes)); + add ^= crc.Base().Canonize(); + Crc mul = crc.Base().One() ^ crc.Base().Xpow8N(1); + add = crc.Base().Multiply(add, mul); + + mul = crc.Base().XpowN(8 * roll_window_bytes + crc.Base().Degree()); + for (size_t i = 0; i < 256; ++i) { + out_[i] = static_cast( + crc.Base().MultiplyUnnormalized( + static_cast(i), 8, mul) ^ add); + } + for (size_t i = 0; i < 256; ++i) { + in_[i] = crc.crc_word_[sizeof(Word) - 1][i]; + } + } + + // Returns start value. + Crc StartValue() const { return start_value_; } + + // Returns length of roll window. + size_t WindowBytes() const { return roll_window_bytes_; } + + protected: + TableEntry in_[256]; + TableEntry out_[256]; + + // Used only by Start(). + Crc start_value_; + const CrcImplementation *crc_; + size_t roll_window_bytes_; +} GCC_ALIGN_ATTRIBUTE(16); + +#pragma pack(pop) + +} // namespace crcutil + +#endif // CRCUTIL_ROLLING_CRC_H_ diff --git a/rapidyenc/crcutil-1.0/code/std_headers.h b/rapidyenc/crcutil-1.0/code/std_headers.h new file mode 100644 index 0000000..5c7b6a3 --- /dev/null +++ b/rapidyenc/crcutil-1.0/code/std_headers.h @@ -0,0 +1,51 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Includes some standard C headers for size_t, memset, etc. +// +// Also, permanently disables a number of warnings produced +// by Microsoft's compiler when it includes standard headers +// (surprisingly, also by Microsoft). + +#ifndef CRCUTIL_STD_HEADERS_H_ +#define CRCUTIL_STD_HEADERS_H_ + +#if defined(_MSC_VER) +// '4' bytes padding added after data member ... +#pragma warning(disable:4820) + +// unreferenced inline function has been removed ... +#pragma warning(disable:4514) + +// conditional expression is constant +#pragma warning(disable: 4127) + +// function ... not inlined +#pragma warning(disable: 4710) + +// function ... selected for automatic inline expansion +#pragma warning(disable: 4711) + +#define _CRT_SECURE_NO_WARNINGS + +#endif // defined(_MSC_VER) + +// #define _CSTDLIB_ +#include // always handy +#include // memset +#include // size_t, _rotl/_rotl64(MSC) +#include // ptrdiff_t (GNUC) +#include // va_list + +#endif // CRCUTIL_STD_HEADERS_H_ diff --git a/rapidyenc/crcutil-1.0/code/uint128_sse2.h b/rapidyenc/crcutil-1.0/code/uint128_sse2.h new file mode 100644 index 0000000..6365135 --- /dev/null +++ b/rapidyenc/crcutil-1.0/code/uint128_sse2.h @@ -0,0 +1,312 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Implements a limited set of 128-bit arithmetic operations +// (the ones that are used by CRC) using SSE2 intrinsics. + +#ifndef CRCUTIL_UINT128_SSE2_H_ +#define CRCUTIL_UINT128_SSE2_H_ + +#include "base_types.h" +#include "crc_casts.h" // Downcast, CrcFromUint64, Uint64FromCrc +#include "platform.h" + +#if HAVE_SSE2 + +namespace crcutil { + +// Specialized functions handling __m128i. +template<> __forceinline uint64 Downcast(const __m128i &value) { +#if HAVE_AMD64 && defined(__GNUC__) + // GCC 4.4.x is too smart and, instead of MOVQ, generates SSE4 PEXTRQ + // instruction when the code is compiled with -mmsse4. + // Fixed in 4.5 which generates conversion through memory (why?). + // And -- yes, it makes quite measurable difference. + uint64 temp; + asm(SSE2_MOVQ " %[i128], %[u64]\n" : [u64] "=r" (temp) : [i128] "x" (value)); + return temp; +#elif HAVE_AMD64 && (!defined(_MSC_FULL_VER) || _MSC_FULL_VER > 150030729) + return static_cast(_mm_cvtsi128_si64(value)); +#else + // 64-bit CL 15.00.30729.1 -O2 generates incorrect code (tests fail). + // _mm_cvtsi128_si64() is not available on i386. + uint64 temp; + _mm_storel_epi64(reinterpret_cast<__m128i *>(&temp), value); + return temp; +#endif +} + + +class uint128_sse2 { + public: + uint128_sse2() {} + ~uint128_sse2() {} + + // Default casts to uint128_sse2 and assignment operator. + __forceinline void operator =(uint64 value) { +#if HAVE_AMD64 && defined(__GNUC__) && !GCC_VERSION_AVAILABLE(4, 5) + // Prevent generation of SSE4 pinsrq insruction when + // compiling with GCC 4.4.x with -msse4 flag. + asm(SSE2_MOVQ " %[u64], %[i128]\n" : [i128] "=x" (x_) : [u64] "r" (value)); +#elif HAVE_AMD64 + x_ = _mm_cvtsi64_si128(static_cast(value)); +#else + x_ = _mm_loadl_epi64(reinterpret_cast(&value)); +#endif + } + __forceinline uint128_sse2(uint64 x) { + *this = x; + } + __forceinline uint128_sse2(const __m128i x) : x_(x) { + } + __forceinline uint128_sse2(const uint128_sse2& x) : x_(x.x_) { + } + __forceinline operator __m128i() const { + return x_; + } + __forceinline void operator =(const uint128_sse2 &x) { + x_ = x.x_; + } + + // Extracts 64 less significant bits. + __forceinline uint64 to_uint64() const { + return Downcast<__m128i, uint64>(x_); + } + + // Comparisons. + __forceinline bool operator ==(const uint128_sse2 &y) const { + union { + __m128i i128; + uint64 u64[2]; + } t; + t.i128 = _mm_xor_si128(x_, y.x_); + return (t.u64[0] | t.u64[1]) == 0; + } + __forceinline bool operator ==(uint64 value) const { + union { + __m128i i128; + uint64 u64[2]; + } t; + t.i128 = x_; + return (t.u64[0] == value && t.u64[1] == 0); + } + __forceinline bool operator !=(const uint128_sse2 &y) const { + union { + __m128i i128; + uint64 u64[2]; + } t; + t.i128 = _mm_xor_si128(x_, y.x_); + return (t.u64[0] | t.u64[1]) != 0; + } + __forceinline bool operator !=(uint64 value) const { + union { + __m128i i128; + uint64 u64[2]; + } t; + t.i128 = x_; + return (t.u64[0] != value || t.u64[1] != 0); + } + + __forceinline bool operator <(const uint128_sse2 &y) const { + union { + __m128i i128; + uint64 u64[2]; + } xx, yy; + xx.i128 = x_; + yy.i128 = y.x_; + return (xx.u64[0] < yy.u64[0] || + (xx.u64[0] == yy.u64[0] && xx.u64[1] < yy.u64[1])); + } + + // Bitwise logic operators. + __forceinline uint128_sse2 operator ^(const uint128_sse2 &y) const { + return _mm_xor_si128(x_, y.x_); + } + __forceinline uint128_sse2 operator &(const uint128_sse2 &y) const { + return _mm_and_si128(x_, y.x_); + } + __forceinline uint128_sse2 operator |(const uint128_sse2 &y) const { + return _mm_or_si128(x_, y.x_); + } + + __forceinline void operator ^=(const uint128_sse2 &y) { + *this = *this ^ y.x_; + } + __forceinline void operator &=(const uint128_sse2 &y) { + *this = *this & y.x_; + } + __forceinline void operator |=(const uint128_sse2 &y) { + *this = *this | y.x_; + } + + // Arithmetic operators. + __forceinline uint128_sse2 operator +(uint64 y) const { + union { + __m128i i128; + uint64 u64[2]; + } temp; + temp.i128 = x_; + // a + b >= 2**64 iff + // a + b > (2**64 - 1) iff + // a > (2**64 - 1) - b iff + // a > ~b + if (temp.u64[0] > ~y) { + temp.u64[1] += 1; + } + temp.u64[0] += y; + return temp.i128; + } + __forceinline void operator +=(uint64 x) { + *this = *this + x; + } + __forceinline uint128_sse2 operator -(uint64 y) const { + union { + __m128i i128; + uint64 u64[2]; + } temp; + temp.i128 = x_; + if (temp.u64[0] < y) { + temp.u64[1] -= 1; + } + temp.u64[0] -= y; + return temp.i128; + } + __forceinline void operator -=(uint64 x) { + *this = *this - x; + } + + // Bitwise logical shifts. + __forceinline uint128_sse2 operator >>(const int bits) const { + if (bits == 8) { + return _mm_srli_si128(x_, 1); + } else if (bits == 16) { + return _mm_srli_si128(x_, 2); + } else if (bits == 32) { + return _mm_srli_si128(x_, 4); + } else if (bits == 64) { + return _mm_srli_si128(x_, 8); + } else { + return long_shift_right(bits); + } + } + __forceinline uint128_sse2 operator >>(const size_t bits) const { + return *this >> static_cast(bits); + } + __forceinline void operator >>=(const int bits) { + *this = *this >> bits; + } + __forceinline void operator >>=(const size_t bits) { + *this = *this >> static_cast(bits); + } + + __forceinline uint128_sse2 operator <<(int bits) const { + if (bits == 8) { + return _mm_slli_si128(x_, 1); + } else if (bits == 16) { + return _mm_slli_si128(x_, 2); + } else if (bits == 32) { + return _mm_slli_si128(x_, 4); + } else if (bits == 64) { + return _mm_slli_si128(x_, 8); + } else { + return long_shift_left(bits); + } + } + __forceinline uint128_sse2 operator <<(size_t bits) const { + return *this << static_cast(bits); + } + __forceinline void operator <<=(int bits) { + *this = *this << bits; + } + __forceinline void operator <<=(size_t bits) { + *this = *this << static_cast(bits); + } + + protected: + __forceinline uint128_sse2 long_shift_right(int bits) const { + union { + __m128i i128; + uint64 u64[2]; + } x; + x.i128 = x_; + for (; bits > 0; --bits) { + x.u64[0] >>= 1; + if (x.u64[1] & 1) { + x.u64[0] |= static_cast(1) << 63; + } + x.u64[1] >>= 1; + } + return x.i128; + } + + __forceinline uint128_sse2 long_shift_left(int bits) const { + union { + __m128i i128; + int64 i64[2]; + } x; + x.i128 = x_; + for (; bits > 0; --bits) { + x.i64[1] <<= 1; + if (x.i64[0] < 0) { + x.i64[1] |= 1; + } + x.i64[0] <<= 1; + } + return x.i128; + } + + __m128i x_; +} GCC_ALIGN_ATTRIBUTE(16); + + +// Specialized versions. +template<> __forceinline uint64 Downcast(const uint128_sse2 &x) { + return x.to_uint64(); +} +template<> __forceinline uint32 Downcast(const uint128_sse2 &x) { + return static_cast(x.to_uint64()); +} +template<> __forceinline uint16 Downcast(const uint128_sse2 &x) { + return static_cast(x.to_uint64()); +} +template<> __forceinline uint8 Downcast(const uint128_sse2 &x) { + return static_cast(x.to_uint64()); +} + +template<> __forceinline uint128_sse2 CrcFromUint64(uint64 lo, uint64 hi) { + union { + __m128i i128; + uint64 u64[2]; + } temp; + temp.u64[0] = lo; + temp.u64[1] = hi; + return temp.i128; +} + +template<> __forceinline void Uint64FromCrc(const uint128_sse2 &crc, + uint64 *lo, uint64 *hi) { + union { + __m128i i128; + uint64 u64[2]; + } temp; + temp.i128 = crc; + *lo = temp.u64[0]; + *hi = temp.u64[1]; +} + +} // namespace crcutil + +#endif // HAVE_SSE2 + +#endif // CRCUTIL_UINT128_SSE2_H_ diff --git a/rapidyenc/crcutil-1.0/examples/interface.cc b/rapidyenc/crcutil-1.0/examples/interface.cc new file mode 100644 index 0000000..6c8a34f --- /dev/null +++ b/rapidyenc/crcutil-1.0/examples/interface.cc @@ -0,0 +1,307 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is the only file where all details of CRC implementation are buried. + +#include "interface.h" + +#include "aligned_alloc.h" +#include "crc32c_sse4.h" +#include "generic_crc.h" +#include "protected_crc.h" +#include "rolling_crc.h" + +// Align all CRC tables on kAlign boundary. +// Shall be exact power of 2. +static size_t kAlign = 4 * 1024; + +using namespace crcutil; + +#if defined(__GNUC__) +// Suppress 'invalid access to non-static data member ... of NULL object' +#undef offsetof +#define offsetof(TYPE, MEMBER) (reinterpret_cast \ + ((&reinterpret_cast ( \ + reinterpret_cast (1)->MEMBER))) - 1) +#endif // defined(__GNUC__) + +namespace crcutil_interface { + +template + class Implementation : public CRC { + public: + typedef typename CrcImplementation::Crc Crc; + typedef Implementation Self; + + Implementation(const Crc &poly, + size_t degree, + bool canonical, + const Crc &roll_start_value, + size_t roll_length) + : crc_(poly, degree, canonical), + rolling_crc_(crc_, roll_length, roll_start_value) { + } + + static Self *Create(const Crc &poly, + size_t degree, + bool canonical, + const Crc &roll_start_value, + size_t roll_length, + const void **allocated_memory) { + void *memory = AlignedAlloc(sizeof(Self), + offsetof(Self, crc_), + kAlign, + allocated_memory); + return new(memory) Self(poly, + degree, + canonical, + roll_start_value, + roll_length); + } + + virtual void Delete() { + AlignedFree(this); + } + + void *operator new(size_t, void *p) { + return p; + } + + virtual void GeneratingPolynomial(/* OUT */ UINT64 *lo, + /* OUT */ UINT64 *hi = NULL) const { + SetValue(crc_.Base().GeneratingPolynomial(), lo, hi); + } + + virtual size_t Degree() const { + return crc_.Base().Degree(); + } + + virtual void CanonizeValue(/* OUT */ UINT64 *lo, + /* OUT */ UINT64 *hi = NULL) const { + SetValue(crc_.Base().Canonize(), lo, hi); + } + + virtual void RollStartValue(/* OUT */ UINT64 *lo, + /* OUT */ UINT64 *hi = NULL) const { + SetValue(rolling_crc_.StartValue(), lo, hi); + } + + virtual size_t RollWindowBytes() const { + return rolling_crc_.WindowBytes(); + } + + virtual void SelfCheckValue(/* OUT */ UINT64 *lo, + /* OUT */ UINT64 *hi = NULL) const { + Crc crc = crc_.CrcDefault(&crc_, sizeof(crc_), 0); + crc = crc_.CrcDefault(&rolling_crc_, sizeof(rolling_crc_), crc); + SetValue(crc, lo, hi); + } + + virtual void Compute(const void *data, + size_t bytes, + /* INOUT */ UINT64 *lo, + /* INOUT */ UINT64 *hi = NULL) const { + SetValue(crc_.CrcDefault(data, bytes, GetValue(lo, hi)), lo, hi); + } + + virtual void RollStart(const void *data, + /* INOUT */ UINT64 *lo, + /* INOUT */ UINT64 *hi = NULL) const { + SetValue(rolling_crc_.Start(data), lo, hi); + } + + virtual void Roll(size_t byte_out, + size_t byte_in, + /* INOUT */ UINT64 *lo, + /* INOUT */ UINT64 *hi = NULL) const { + SetValue(rolling_crc_.Roll(GetValue(lo, hi), byte_out, byte_in), lo, hi); + } + + virtual void CrcOfZeroes(UINT64 bytes, + /* INOUT */ UINT64 *lo, + /* INOUT */ UINT64 *hi = NULL) const { + SetValue(crc_.Base().CrcOfZeroes(bytes, GetValue(lo, hi)), lo, hi); + } + + virtual void ChangeStartValue( + UINT64 start_old_lo, UINT64 start_old_hi, + UINT64 start_new_lo, UINT64 start_new_hi, + UINT64 bytes, + /* INOUT */ UINT64 *lo, + /* INOUT */ UINT64 *hi = NULL) const { + SetValue(crc_.Base().ChangeStartValue( + GetValue(lo, hi), + bytes, + GetValue(start_old_lo, start_old_hi), + GetValue(start_new_lo, start_new_hi)), + lo, + hi); + } + + virtual void Concatenate(UINT64 crcB_lo, UINT64 crcB_hi, + UINT64 bytes_B, + /* INOUT */ UINT64* crcA_lo, + /* INOUT */ UINT64* crcA_hi = NULL) const { + SetValue(crc_.Base().Concatenate(GetValue(crcA_lo, crcA_hi), + GetValue(crcB_lo, crcB_hi), + bytes_B), + crcA_lo, + crcA_hi); + } + + virtual size_t StoreComplementaryCrc( + void *dst, + UINT64 message_crc_lo, UINT64 message_crc_hi, + UINT64 result_crc_lo, UINT64 result_crc_hi = 0) const { + return crc_.Base().StoreComplementaryCrc( + dst, + GetValue(message_crc_lo, message_crc_hi), + GetValue(result_crc_lo, result_crc_hi)); + } + + virtual size_t StoreCrc(void *dst, + UINT64 lo, + UINT64 hi = 0) const { + return crc_.Base().StoreCrc(dst, GetValue(lo, hi)); + } + + virtual void CrcOfCrc(/* OUT */ UINT64 *lo, + /* OUT */ UINT64 *hi = NULL) const { + SetValue(crc_.Base().CrcOfCrc(), lo, hi); + } + + private: + static Crc GetValue(UINT64 *lo, UINT64 *hi) { + if (sizeof(Crc) <= sizeof(*lo)) { + return CrcFromUint64(*lo); + } else { + return CrcFromUint64(*lo, *hi); + } + } + + static Crc GetValue(UINT64 lo, UINT64 hi) { + return CrcFromUint64(lo, hi); + } + + static void SetValue(const Crc &crc, UINT64 *lo, UINT64 *hi) { + Uint64FromCrc(crc, + reinterpret_cast(lo), + reinterpret_cast(hi)); + } + + const CrcImplementation crc_; + const RollingCrcImplementation rolling_crc_; + + const Self &operator =(const Self &) {} +}; + +#if defined(_MSC_VER) +// 'use_sse4_2' : unreferenced formal parameter +#pragma warning(disable: 4100) +#endif // defined(_MSC_VER) + +bool CRC::IsSSE42Available() { +#if HAVE_AMD64 || HAVE_I386 + return Crc32cSSE4::IsSSE42Available(); +#else + return false; +#endif // HAVE_AMD64 || HAVE_I386 +} + +CRC::~CRC() {} +CRC::CRC() {} + +CRC *CRC::Create(UINT64 poly_lo, + UINT64 poly_hi, + size_t degree, + bool canonical, + UINT64 roll_start_value_lo, + UINT64 roll_start_value_hi, + size_t roll_length, + bool use_sse4_2, + const void **allocated_memory) { + if (degree == 0) { + return NULL; + } + + if (degree > 64) { +#if !HAVE_SSE2 + return NULL; +#else + if (degree > 128) { + return NULL; + } + uint128_sse2 poly = CrcFromUint64(poly_lo, poly_hi); + if (degree != 128 && (poly >> degree) != 0) { + return NULL; + } + uint128_sse2 roll_start_value = + CrcFromUint64(roll_start_value_lo, roll_start_value_hi); + if (degree != 128 && (roll_start_value >> degree) != 0) { + return NULL; + } +#if HAVE_I386 + typedef GenericCrc Crc128; +#elif defined(__GNUC__) && GCC_VERSION_AVAILABLE(4, 5) + typedef GenericCrc Crc128; +#else + typedef GenericCrc Crc128; +#endif // HAVE_I386 + return Implementation >::Create( + poly, + degree, + canonical, + roll_start_value, + roll_length, + allocated_memory); +#endif // !HAVE_SSE2 + } + +#if CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64) + if (use_sse4_2 && + degree == Crc32cSSE4::FixedDegree() && + poly_lo == Crc32cSSE4::FixedGeneratingPolynomial() && + poly_hi == 0) { + if (roll_start_value_hi != 0 || (roll_start_value_lo >> 32) != 0) { + return NULL; + } + return Implementation::Create( + static_cast(poly_lo), + degree, + canonical, + static_cast(roll_start_value_lo), + static_cast(roll_length), + allocated_memory); + } +#endif // CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64) + + if (poly_hi != 0 || (degree != 64 && (poly_lo >> degree) != 0)) { + return NULL; + } + if (roll_start_value_hi != 0 || + (degree != 64 && (roll_start_value_lo >> degree) != 0)) { + return NULL; + } + typedef GenericCrc + Crc64; + return Implementation >::Create( + poly_lo, + degree, + canonical, + roll_start_value_lo, + roll_length, + allocated_memory); +} + +} // namespace crcutil_interface diff --git a/rapidyenc/crcutil-1.0/examples/interface.h b/rapidyenc/crcutil-1.0/examples/interface.h new file mode 100644 index 0000000..2b3e2ee --- /dev/null +++ b/rapidyenc/crcutil-1.0/examples/interface.h @@ -0,0 +1,204 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Example how to use CRC implementation via the interface which +// hides details of implementation. +// +// The raw implementation is not indended to be used in a project +// directly because: +// - Implementation lives in the header files because that is the +// only way to use templates efficiently. +// - Header files are quite "dirty" -- they define and use a +// lot of macros. Bringing these macros to all files in +// a project is not particularly good idea. +// - The code takes forever to compile with GCC (e.g. GCC +// 4.4.3 and 4.5.0 compile the unittest for about 30 seconds). +// +// Solution: +// - Create your own, clean interface. +// - Do not expose interface internals in a header file. +// - Proxy all calls to your interface to CRC implementation. +// - Keep only one copy of actual implementation. + +#ifndef CRCUTIL_INTERFACE_H_ +#define CRCUTIL_INTERFACE_H_ + +#include "std_headers.h" // size_t + +namespace crcutil_interface { + +// Many projects define their own uint64. Do it here. +typedef unsigned long long UINT64; + +class CRC { + public: + // Creates new instance of CRC class. + // If arguments are illegal (e.g. provided generating polynomial + // has more bits than provided degree), returns NULL. + // + // poly_* - generating polynomial (reversed bit format). + // degree - degree of generating polynomial. + // canonical - if true, input CRC value will be XOR'ed with + // (inverted) before and after CRC computation. + // roll_start_value - starting value of rolling CRC. + // roll_window_bytes - length of rolling CRC window in bytes. + // If roll_length is 0, roll_start_value + // shall be 0. + // use_sse4_2 - if true, use SSE4.2 crc32 instruction to compute + // CRC when generating polynomial is CRC32C (Castagnoli) + // allocated_memory - optional (may be NULL) address of a variable + // to store the address of actually allocated memory. + static CRC *Create(UINT64 poly_lo, + UINT64 poly_hi, + size_t degree, + bool canonical, + UINT64 roll_start_value_lo, + UINT64 roll_start_value_hi, + size_t roll_window_bytes, + bool use_sse4_2, + const void **allocated_memory); + + // Deletes the instance of CRC class. + virtual void Delete() = 0; + + // Returns true if SSE4.2 is available. + static bool IsSSE42Available(); + + // Returns generating polynomial. + virtual void GeneratingPolynomial(/* OUT */ UINT64 *lo, + /* OUT */ UINT64 *hi = NULL) const = 0; + + // Returns degree of generating polynomial. + virtual size_t Degree() const = 0; + + // Returns canonization constant used to XOR crc value + // before and after CRC computation. + virtual void CanonizeValue(/* OUT */ UINT64 *lo, + /* OUT */ UINT64 *hi = NULL) const = 0; + + // Returns rolling CRC starting value. + virtual void RollStartValue(/* OUT */ UINT64 *lo, + /* OUT */ UINT64 *hi = NULL) const = 0; + + // Returns length of rolling CRC window. + virtual size_t RollWindowBytes() const = 0; + + // Returns CRC of CRC tables to enable verification + // of integrity of CRC function itself by comparing + // the result with pre-computed value. + virtual void SelfCheckValue(/* OUT */ UINT64 *lo, + /* OUT */ UINT64 *hi = NULL) const = 0; + + // Given CRC value of previous chunk of data, + // extends it to new chunk, retuning the result in-place. + // + // If degree of CRC polynomial is 64 or less, + // (*hi) will not be touched. + virtual void Compute(const void *data, + size_t bytes, + /* INOUT */ UINT64 *lo, + /* INOUT */ UINT64 *hi = NULL) const = 0; + + // Starts rolling CRC by computing CRC of first + // "roll_length" bytes of "data", using "roll_start_value" + // as starting value (see Create()). + // Should not be called if the value of "roll_value" was 0. + virtual void RollStart(const void *data, + /* OUT */ UINT64 *lo, + /* OUT */ UINT64 *hi = NULL) const = 0; + + // Rolls CRC by 1 byte, given the bytes leaving and + // entering the window of "roll_length" bytes. + // RollStart() should be called before "Roll". + // Should not be called if the value of "roll_value" was 0. + virtual void Roll(size_t byte_out, + size_t byte_in, + /* INOUT */ UINT64 *lo, + /* INOUT */ UINT64 *hi = NULL) const = 0; + + // Computes CRC of sequence of zeroes -- without touching the data. + virtual void CrcOfZeroes(UINT64 bytes, + /* INOUT */ UINT64 *lo, + /* INOUT */ UINT64 *hi = NULL) const = 0; + + // Computes value of CRC(A, bytes, start_new) given known + // crc=CRC(A, bytes, start_old) -- without touching the data. + virtual void ChangeStartValue( + UINT64 start_old_lo, UINT64 start_old_hi, + UINT64 start_new_lo, UINT64 start_new_hi, + UINT64 bytes, + /* INOUT */ UINT64 *lo, + /* INOUT */ UINT64 *hi = NULL) const = 0; + + // Returns CRC of concatenation of blocks A and B when CRCs + // of blocks A and B are known -- without touching the data. + // + // To be precise, given CRC(A, |A|, startA) and CRC(B, |B|, 0), + // returns CRC(AB, |AB|, startA). + virtual void Concatenate(UINT64 crcB_lo, UINT64 crcB_hi, + UINT64 bytes_B, + /* INOUT */ UINT64* crcA_lo, + /* INOUT */ UINT64* crcA_hi = NULL) const = 0; + + // Given CRC of a message, stores extra (degree + 7)/8 bytes after + // the message so that CRC(message+extra, start) = result. + // Does not change CRC start value (use ChangeStartValue for that). + // Returns number of stored bytes. + virtual size_t StoreComplementaryCrc( + void *dst, + UINT64 message_crc_lo, UINT64 message_crc_hi, + UINT64 result_crc_lo, UINT64 result_crc_hi = 0) const = 0; + + // Stores given CRC of a message as (degree + 7)/8 bytes filled + // with 0s to the right. Returns number of stored bytes. + // CRC of the message and stored CRC is a constant value returned + // by CrcOfCrc() -- it does not depend on contents of the message. + virtual size_t StoreCrc(/* OUT */ void *dst, + UINT64 lo, + UINT64 hi = 0) const = 0; + + // Computes expected CRC value of CRC(Message,CRC(Message)) + // when CRC is stored after the message. This value is fixed + // and does not depend on the message or CRC start value. + virtual void CrcOfCrc(/* OUT */ UINT64 *lo, + /* OUT */ UINT64 *hi = NULL) const = 0; + + protected: + // CRC instance should be created only once (most of the time): + // - Creation and initializion is relatively expensive. + // - CRC is fully defined by its generating polynomials + // (well, and few more parameters). + // - CRC instances are pure constants. There is no + // reason to have 2 instances of the same CRC. + // - There are not too many generating polynomials that are + // used on practice. It is hard to imagine a project + // which uses 50 different generating polynomials. + // Thus, a handful of CRC instances is sufficient + // to cover the needs of even very large project. + // - Finally and most importantly, CRC tables should be + // aligned properly. No, the instances of CRC class + // are not created by blind "new" -- they use placement + // "new" and, in absense of placement "delete", + // should be deleted by calling explicit Delete() method. + virtual ~CRC(); + + // Cannot instantiate the class -- instances may be created + // by CRC::Create() only. + CRC(); +}; + +} // namespace crcutil_interface + + +#endif // CRCUTIL_INTERFACE_H_ diff --git a/rapidyenc/crcutil-1.0/tests/aligned_alloc.h b/rapidyenc/crcutil-1.0/tests/aligned_alloc.h new file mode 100644 index 0000000..37cefba --- /dev/null +++ b/rapidyenc/crcutil-1.0/tests/aligned_alloc.h @@ -0,0 +1,66 @@ +// Copyright 2010 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Poor man's platform-independent implementation of aligned memory allocator. + +#ifndef CRCUTIL_ALIGNED_ALLOC_H_ +#define CRCUTIL_ALIGNED_ALLOC_H_ + +#include "std_headers.h" // size_t, ptrdiff_t + +namespace crcutil { + +// Allocates a block of memory of "size" bytes so that a field +// at "field_offset" is aligned on "align" boundary. +// +// NB #1: "align" shall be exact power of two. +// +// NB #2: memory allocated by AlignedAlloc should be release by AlignedFree(). +// +inline void *AlignedAlloc(size_t size, + size_t field_offset, + size_t align, + const void **allocated_mem) { + if (align == 0 || (align & (align - 1)) != 0 || align < sizeof(char *)) { + align = sizeof(*allocated_mem); + } + size += align - 1 + sizeof(*allocated_mem); + char *allocated_memory = new char[size]; + char *aligned_memory = allocated_memory + sizeof(*allocated_mem); + field_offset &= align - 1; + size_t actual_alignment = + reinterpret_cast(aligned_memory + field_offset) & (align - 1); + if (actual_alignment != 0) { + aligned_memory += align - actual_alignment; + } + reinterpret_cast(aligned_memory)[-1] = allocated_memory; + + if (allocated_mem != NULL) { + *allocated_mem = allocated_memory; + } + + return aligned_memory; +} + +// Frees memory allocated by AlignedAlloc(). +inline void AlignedFree(void *aligned_memory) { + if (aligned_memory != NULL) { + char *allocated_memory = reinterpret_cast(aligned_memory)[-1]; + delete[] allocated_memory; + } +} + +} // namespace crcutil + +#endif // CRCUTIL_ALIGNED_ALLOC_H_ diff --git a/rapidyenc/rapidyenc.c b/rapidyenc/rapidyenc.c new file mode 100644 index 0000000..3f63357 --- /dev/null +++ b/rapidyenc/rapidyenc.c @@ -0,0 +1,104 @@ +#ifdef BUILD_SHARED +# ifdef _MSC_VER +# define RAPIDYENC_API __declspec(dllexport) +# else +# define RAPIDYENC_API __attribute__((visibility("default"))) +# endif +#endif + +#include "rapidyenc.h" + +int rapidyenc_version(void) { + return RAPIDYENC_VERSION; +} + +/*** ENCODER ***/ +#include "src/encoder.h" +void rapidyenc_encode_init(void) { + static int done = 0; + if(done) return; + done = 1; + encoder_init(); +} + +size_t rapidyenc_encode_max_length(size_t length, int line_size) { + size_t ret = length * 2 /* all characters escaped */ + + 2 /* allocation for offset and that a newline may occur early */ +#if !defined(YENC_DISABLE_AVX256) + + 64 /* allocation for YMM overflowing */ +#else + + 32 /* allocation for XMM overflowing */ +#endif + ; + /* add newlines, considering the possibility of all chars escaped */ + if(line_size == 128) // optimize common case + return ret + 2 * (length >> 6); + return ret + 2 * ((length*2) / line_size); +} + +size_t rapidyenc_encode(const void* __restrict src, void* __restrict dest, size_t src_length) { + return rapidyenc_encode_ex(128, NULL, src, dest, src_length, 1); +} + +size_t rapidyenc_encode_ex(int line_size, int* column, const void* __restrict src, void* __restrict dest, size_t src_length, int is_end) { + int unusedColumn = 0; + if(!column) column = &unusedColumn; + return _do_encode(line_size, column, src, dest, src_length, is_end); +} + +int rapidyenc_encode_kernel() { + return encode_isa_level(); +} + +/*** DECODER ***/ +#include "src/decoder.h" +void rapidyenc_decode_init(void) { + static int done = 0; + if(done) return; + done = 1; + decoder_init(); +} + +size_t rapidyenc_decode(const void* src, void* dest, size_t src_length) { + return rapidyenc_decode_ex(1, src, dest, src_length, NULL); +} + +size_t rapidyenc_decode_ex(int is_raw, const void* src, void* dest, size_t src_length, RapidYencDecoderState* state) { + RapidYencDecoderState unusedState = RYDEC_STATE_CRLF; + if(!state) state = &unusedState; + return do_decode(is_raw, (const unsigned char*)src, (unsigned char*)dest, src_length, (YencDecoderState*)state); +} + +RapidYencDecoderEnd rapidyenc_decode_incremental(const void** src, void** dest, size_t src_length, RapidYencDecoderState* state) { + RapidYencDecoderState unusedState = RYDEC_STATE_CRLF; + if(!state) state = &unusedState; + return (RapidYencDecoderEnd)do_decode_end((const unsigned char**)src, (unsigned char**)dest, src_length, (YencDecoderState*)state); +} + +int rapidyenc_decode_kernel() { + return decode_isa_level(); +} + +/*** CRC32 ***/ +#include "src/crc.h" +void rapidyenc_crc_init(void) { + static int done = 0; + if(done) return; + done = 1; + crc_init(); +} + +uint32_t rapidyenc_crc(const void* src, size_t src_length, uint32_t init_crc) { + return do_crc32(src, src_length, init_crc); +} +uint32_t rapidyenc_crc_combine(uint32_t crc1, const uint32_t crc2, size_t length2) { + return do_crc32_combine(crc1, crc2, length2); +} +uint32_t rapidyenc_crc_zeros(uint32_t init_crc, size_t length) { + return do_crc32_zeros(init_crc, length); +} + +int rapidyenc_crc_kernel() { + return crc32_isa_level(); +} + diff --git a/rapidyenc/rapidyenc.h b/rapidyenc/rapidyenc.h new file mode 100644 index 0000000..60e5092 --- /dev/null +++ b/rapidyenc/rapidyenc.h @@ -0,0 +1,188 @@ +#ifndef __RAPIDYENC_H +#define __RAPIDYENC_H +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#ifndef RAPIDYENC_API +# define RAPIDYENC_API +#endif + +/** + * Version, in 0xMMmmpp format, where MM=major version, mm=minor version, pp=patch version + */ +#define RAPIDYENC_VERSION 0x010000 +RAPIDYENC_API int rapidyenc_version(void); // returns RAPIDYENC_VERSION + +/** + * For determining which kernel was selected for the current CPU + * Note that if this was compiled with the BUILD_NATIVE option set, values may not correspond with any of those below + */ +#define RYKERN_GENERIC 0 // generic kernel chosen +// x86 specific encode/decode kernels +#define RYKERN_SSE2 0x100 +#define RYKERN_SSSE3 0x200 +#define RYKERN_AVX 0x381 +#define RYKERN_AVX2 0x403 +#define RYKERN_VBMI2 0x603 +// ARM specific encode/decode kernels +#define RYKERN_NEON 0x1000 +// x86 specific CRC32 kernels +#define RYKERN_PCLMUL 0x340 +#define RYKERN_VPCLMUL 0x440 +// ARM specific CRC32 kernels +#define RYKERN_ARMCRC 8 + + +/***** ENCODE *****/ +/** + * Initialise global state of the encoder (sets up lookup tables and performs CPU detection). + * As it alters global state, this function only needs to be called once, and is not thread-safe (subsequent calls to this will do nothing). + * This must be called before any other rapidyenc_encode* functions are called. + */ +RAPIDYENC_API void rapidyenc_encode_init(void); + + +/** + * yEnc encode the buffer at `src` (of length `src_length`) and write it to `dest` + * Returns the number of bytes written to `dest` + * `dest` is assumed to be large enough to hold the output - use `rapidyenc_encode_max_length` to compute the necessary size of `dest` + * + * This is effectively an alias for `rapidyenc_encode_ex(128, NULL, src, dest, src_length, 1)` + */ +RAPIDYENC_API size_t rapidyenc_encode(const void* __restrict src, void* __restrict dest, size_t src_length); + +/** + * Like `rapidyenc_encode` but provide the ability to perform incremental processing + * This is done by keeping track of the column position, and you'll need to indicate if this is the last chunk of an article. + * + * - line_size: the target number of bytes for each line. 128 is commonly used + * - column [in/out]: the column in the line to start at. This will be updated with the column position after encoding. Articles will typically start at 0. Pass in NULL to not track the column. + * - src: the source data to encode + * - dest: where to write the encoded data to. This cannot alias the source data + * - src_length: the length of the source data to encode. Note that the length of the output buffer is assumed to be large enough (see `rapidyenc_encode_max_length`) + * - is_end: if not 0, this is the final chunk of the article. Setting this ensures that trailing whitespace is properly escaped + */ +RAPIDYENC_API size_t rapidyenc_encode_ex(int line_size, int* column, const void* __restrict src, void* __restrict dest, size_t src_length, int is_end); + +/** + * Returns the maximum possible length of yEnc encoded output, given an input of `length` bytes + * This function does also include additional padding needed by rapidyenc's implementation. + * Note that this function doesn't require `rapidyenc_encode_init` to be called beforehand + */ +RAPIDYENC_API size_t rapidyenc_encode_max_length(size_t length, int line_size); + +/** + * Returns the kernel/ISA level used for encoding + * Values correspond with RYKERN_* definitions above + */ +RAPIDYENC_API int rapidyenc_encode_kernel(); + + +/***** DECODE *****/ +/** + * Current decoder state, for incremental decoding + * The values here refer to the previously seen characters in the stream, which influence how some sequences need to be handled + * The shorthands represent: + * CR (\r), LF (\n), EQ (=), DT (.) + */ +typedef enum { + RYDEC_STATE_CRLF, // default + RYDEC_STATE_EQ, + RYDEC_STATE_CR, + RYDEC_STATE_NONE, + RYDEC_STATE_CRLFDT, + RYDEC_STATE_CRLFDTCR, + RYDEC_STATE_CRLFEQ // may actually be "\r\n.=" in raw decoder +} RapidYencDecoderState; + +/** + * End state for incremental decoding (whether the end of the yEnc data was reached) + */ +typedef enum { + RYDEC_END_NONE, // end not reached + RYDEC_END_CONTROL, // \r\n=y sequence found, src points to byte after 'y' + RYDEC_END_ARTICLE // \r\n.\r\n sequence found, src points to byte after last '\n' +} RapidYencDecoderEnd; + +/** + * Initialise global state of the decoder (sets up lookup tables and performs CPU detection). + * As it alters global state, this function only needs to be called once, and is not thread-safe (subsequent calls to this will do nothing). + * This must be called before any other rapidyenc_decode* functions are called. + */ +RAPIDYENC_API void rapidyenc_decode_init(void); + +/** + * yEnc decode the buffer at `src` (of length `src_length`) and write it to `dest` + * Returns the number of bytes written to `dest` + * + * This is effectively an alias for `rapidyenc_decode_ex(1, src, dest, src_length, NULL)` + */ +RAPIDYENC_API size_t rapidyenc_decode(const void* src, void* dest, size_t src_length); + +/** + * yEnc decode the buffer at `src` (of length `src_length`) and write it to `dest` + * Returns the number of bytes written to `dest` + * + * If `is_raw` is non-zero, will also handle NNTP dot unstuffing + * `state` can be used to track the decoder state, if incremental decoding is desired. Set to NULL if tracking is not needed + * `src` and `dest` are allowed to point to the same location for in-situ decoding, otherwise `dest` is assumed to be at least `src_length` in size + */ +RAPIDYENC_API size_t rapidyenc_decode_ex(int is_raw, const void* src, void* dest, size_t src_length, RapidYencDecoderState* state); + +/** + * Like `rapidyenc_decode`, but stops decoding when a yEnc/NNTP end sequence is found + * Returns whether such an end sequence was found + * Note that the `is_raw` parameter in `rapidyenc_decode` is assumed to be True here + * + * `src` and `dest` are pointers of pointers here, as they'll both be updated to the positions after decoding + * The length of the written data can thus be derived from the post-decode `dest` minus the pre-decode `dest` + * Whilst `src` and `dest` can point to the same memory, the pointers themselves should be different. In other words, `**src == **dest` is fine, but `*src == *dest` is not + */ +RAPIDYENC_API RapidYencDecoderEnd rapidyenc_decode_incremental(const void** src, void** dest, size_t src_length, RapidYencDecoderState* state); + +/** + * Returns the kernel/ISA level used for decoding + * Values correspond with RYKERN_* definitions above + */ +RAPIDYENC_API int rapidyenc_decode_kernel(); + + +/***** CRC32 *****/ +/** + * Initialise global state for CRC32 computation (performs CPU detection). + * As it alters global state, this function only needs to be called once, and is not thread-safe (subsequent calls to this will do nothing). + * This must be called before any other rapidyenc_crc* functions are called. + */ +RAPIDYENC_API void rapidyenc_crc_init(void); + +/** + * Returns the CRC32 hash of `src` (of length `src_length`), with initial CRC32 value `init_crc` + * The initial value should be 0 unless this is a subsequent call during incremental hashing + */ +RAPIDYENC_API uint32_t rapidyenc_crc(const void* src, size_t src_length, uint32_t init_crc); + +/** + * Given `crc1 = CRC32(data1)` and `crc2 = CRC32(data2)`, returns CRC32(data1 + data2) + * `length2` refers to the length of 'data2' + */ +RAPIDYENC_API uint32_t rapidyenc_crc_combine(uint32_t crc1, const uint32_t crc2, size_t length2); + +/** + * Returns `rapidyenc_crc(src, length, init_crc)` where 'src' is all zeroes + */ +RAPIDYENC_API uint32_t rapidyenc_crc_zeros(uint32_t init_crc, size_t length); + +/** + * Returns the kernel/ISA level used for CRC32 computation + * Values correspond with RYKERN_* definitions above + */ +RAPIDYENC_API int rapidyenc_crc_kernel(); + +#ifdef __cplusplus +} +#endif +#endif /* __RAPIDYENC_H */ diff --git a/rapidyenc/src/common.h b/rapidyenc/src/common.h new file mode 100644 index 0000000..91e8093 --- /dev/null +++ b/rapidyenc/src/common.h @@ -0,0 +1,333 @@ +#ifndef __YENC_COMMON +#define __YENC_COMMON + +#include "hedley.h" + +#if defined(__x86_64__) || \ + defined(__amd64__ ) || \ + defined(__LP64 ) || \ + defined(_M_X64 ) || \ + defined(_M_AMD64 ) || \ + (defined(_WIN64) && !defined(_M_ARM64)) + #define PLATFORM_AMD64 1 +#endif +#if defined(PLATFORM_AMD64) || \ + defined(__i386__ ) || \ + defined(__i486__ ) || \ + defined(__i586__ ) || \ + defined(__i686__ ) || \ + defined(_M_I86 ) || \ + defined(_M_IX86 ) || \ + (defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)) + #define PLATFORM_X86 1 +#endif +#if defined(__aarch64__) || \ + defined(__armv7__ ) || \ + defined(__arm__ ) || \ + defined(_M_ARM64 ) || \ + defined(_M_ARM ) || \ + defined(__ARM_ARCH_6__ ) || \ + defined(__ARM_ARCH_7__ ) || \ + defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_8A__) || \ + (defined(__ARM_ARCH ) && __ARM_ARCH >= 6) + #define PLATFORM_ARM 1 +#endif + + +#include +#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__) + // MSVC doesn't support C11 aligned_alloc: https://stackoverflow.com/a/62963007 + #define ALIGN_ALLOC(buf, len, align) *(void**)&(buf) = _aligned_malloc((len), align) + #define ALIGN_FREE _aligned_free +#elif defined(_ISOC11_SOURCE) + // C11 method + // len needs to be a multiple of alignment, although it sometimes works if it isn't... + #define ALIGN_ALLOC(buf, len, align) *(void**)&(buf) = aligned_alloc(align, ((len) + (align)-1) & ~((align)-1)) + #define ALIGN_FREE free +#elif defined(__cplusplus) && __cplusplus >= 201700 + // C++17 method + #include + #define ALIGN_ALLOC(buf, len, align) *(void**)&(buf) = std::aligned_alloc(align, ((len) + (align)-1) & ~((align)-1)) + #define ALIGN_FREE free +#else + #define ALIGN_ALLOC(buf, len, align) if(posix_memalign((void**)&(buf), align, (len))) (buf) = NULL + #define ALIGN_FREE free +#endif + + +// MSVC compatibility +#if ((defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(_M_X64)) && defined(_MSC_VER) && !defined(__clang__) + #define __SSE2__ 1 + #define __SSSE3__ 1 + #define __SSE4_1__ 1 + #if _MSC_VER >= 1600 && defined(__SSE2__) + #define __POPCNT__ 1 + #define __LZCNT__ 1 + #endif + #if !defined(__AVX__) && (_MSC_VER >= 1700 && defined(__SSE2__)) + #define __AVX__ 1 + #endif + #if !defined(__AVX2__) && (_MSC_VER >= 1800 && defined(__AVX__)) + #define __AVX2__ 1 + #define __BMI2__ 1 + #endif + /* AVX512 requires VS 15.3 */ + #if !defined(__AVX512F__) && (_MSC_VER >= 1911 && defined(__AVX__)) + #define __AVX512BW__ 1 + #define __AVX512F__ 1 + #endif + /* AVX512VL not available until VS 15.5 */ + #if defined(__AVX512F__) && _MSC_VER >= 1912 + #define __AVX512VL__ 1 + #endif + #if defined(__AVX512F__) && _MSC_VER >= 1920 + #define __AVX512VBMI__ 1 + #define __AVX512VBMI2__ 1 + #endif +#endif +#if defined(_M_ARM64) + #define __aarch64__ 1 + #define __ARM_NEON 1 +#endif +#if defined(_M_ARM) + #define __ARM_NEON 1 +#endif +#ifdef _MSC_VER +# ifndef __BYTE_ORDER__ +# define __BYTE_ORDER__ 1234 +# endif +# ifndef __ORDER_BIG_ENDIAN__ +# define __ORDER_BIG_ENDIAN__ 4321 +# endif +# include +#endif + + +// combine two 8-bit ints into a 16-bit one +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define UINT16_PACK(a, b) (((a) << 8) | (b)) +#define UINT32_PACK(a, b, c, d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d)) +#define UINT32_16_PACK(a, b) (((a) << 16) | (b)) +#else +#define UINT16_PACK(a, b) ((a) | ((b) << 8)) +#define UINT32_PACK(a, b, c, d) ((a) | ((b) << 8) | ((c) << 16) | ((d) << 24)) +#define UINT32_16_PACK(a, b) ((a) | ((b) << 16)) +#endif + +#ifdef __SSE2__ +#include +#define XMM_SIZE 16 /*== (signed int)sizeof(__m128i)*/ + +#ifdef __SSSE3__ +#include +#endif +#ifdef __POPCNT__ +#include +// POPCNT can never return a negative result, but GCC doesn't seem to realise this, so typecast it to hint it better +#define popcnt32 (unsigned int)_mm_popcnt_u32 +#endif + +#if defined(__AVX2__) || defined(__AVX512F__) +#include +#endif + + +#if defined(__tune_core2__) || defined(__tune_atom__) +/* on older Intel CPUs, plus first gen Atom, it is faster to store XMM registers in half */ +# define STOREU_XMM(dest, xmm) \ + _mm_storel_epi64((__m128i*)(dest), xmm); \ + _mm_storeh_pi(((__m64*)(dest) +1), _mm_castsi128_ps(xmm)) +#else +# define STOREU_XMM(dest, xmm) \ + _mm_storeu_si128((__m128i*)(dest), xmm) +#endif + +#endif + +#if defined(__ARM_NEON) && defined(__has_include) +# if !__has_include() +# undef __ARM_NEON +HEDLEY_WARNING("NEON has been disabled due to missing arm_neon.h"); +# endif +#endif + +#ifdef __ARM_NEON +# include + +// ARM provides no standard way to inline define a vector :( +static HEDLEY_ALWAYS_INLINE uint8x8_t vmake_u8( + uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t e, uint8_t f, uint8_t g, uint8_t h +) { +# if defined(_MSC_VER) + uint8_t t[] = {a,b,c,d,e,f,g,h}; + return vld1_u8(t); +# else + return (uint8x8_t){a,b,c,d,e,f,g,h}; +# endif +} +static HEDLEY_ALWAYS_INLINE uint8x16_t vmakeq_u8( + uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t e, uint8_t f, uint8_t g, uint8_t h, + uint8_t i, uint8_t j, uint8_t k, uint8_t l, uint8_t m, uint8_t n, uint8_t o, uint8_t p +) { +# if defined(_MSC_VER) + uint8_t t[] = {a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p}; + return vld1q_u8(t); +# else + return (uint8x16_t){a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p}; +# endif +} +static HEDLEY_ALWAYS_INLINE int8x16_t vmakeq_s8( + int8_t a, int8_t b, int8_t c, int8_t d, int8_t e, int8_t f, int8_t g, int8_t h, + int8_t i, int8_t j, int8_t k, int8_t l, int8_t m, int8_t n, int8_t o, int8_t p +) { +# if defined(_MSC_VER) + int8_t t[] = {a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p}; + return vld1q_s8(t); +# else + return (int8x16_t){a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p}; +# endif +} + +# ifdef _MSC_VER +# define _CREATE_TUPLE(type, ...) type{{ __VA_ARGS__ }} +# else +# define _CREATE_TUPLE(type, ...) (type){{ __VA_ARGS__ }} +# endif +static HEDLEY_ALWAYS_INLINE uint8x16x2_t vcreate2_u8(uint8x16_t a, uint8x16_t b) { + return _CREATE_TUPLE(uint8x16x2_t, a, b); +} +static HEDLEY_ALWAYS_INLINE int8x16x2_t vcreate2_s8(int8x16_t a, int8x16_t b) { + return _CREATE_TUPLE(int8x16x2_t, a, b); +} +static HEDLEY_ALWAYS_INLINE uint8x16x3_t vcreate3_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { + return _CREATE_TUPLE(uint8x16x3_t, a, b, c); +} +static HEDLEY_ALWAYS_INLINE uint8x16x4_t vcreate4_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c, uint8x16_t d) { + return _CREATE_TUPLE(uint8x16x4_t, a, b, c, d); +} +# undef _CREATE_TUPLE +#endif +#ifdef PLATFORM_ARM +bool cpu_supports_neon(); +#endif + +#ifdef _MSC_VER +#define ALIGN_TO(a, v) __declspec(align(a)) v +#else +#define ALIGN_TO(a, v) v __attribute__((aligned(a))) +#endif + + +#ifdef PLATFORM_X86 +enum YEncDecIsaLevel { + ISA_GENERIC = 0, + ISA_FEATURE_POPCNT = 0x1, + ISA_FEATURE_LZCNT = 0x2, + ISA_LEVEL_SSE2 = 0x100, + ISA_LEVEL_SSSE3 = 0x200, + ISA_LEVEL_SSE41 = 0x300, + ISA_LEVEL_SSE4_POPCNT = 0x301, + ISA_LEVEL_PCLMUL = 0x340, + ISA_LEVEL_AVX = 0x381, // same as above, just used as a differentiator for `cpu_supports_isa` + ISA_LEVEL_AVX2 = 0x403, // also includes BMI1/2 and LZCNT + ISA_LEVEL_VPCLMUL = 0x440, + ISA_LEVEL_AVX3 = 0x503, // SKX variant; AVX512VL + AVX512BW + ISA_LEVEL_VBMI2 = 0x603 // ICL +}; +#elif defined(PLATFORM_ARM) +enum YEncDecIsaLevel { + ISA_GENERIC = 0, + ISA_FEATURE_CRC = 8, + ISA_LEVEL_NEON = 0x1000 +}; +#elif defined(__riscv) +enum YEncDecIsaLevel { + ISA_GENERIC = 0, + ISA_LEVEL_RVV = 0x10000 +}; +#else +enum YEncDecIsaLevel { + ISA_GENERIC = 0 +}; +#endif +#ifdef PLATFORM_X86 +#ifdef _MSC_VER +// native tuning not supported in MSVC +# define ISA_NATIVE ISA_LEVEL_SSE2 +#else +# if defined(__AVX512VBMI2__) +# define _ISA_NATIVE ISA_LEVEL_VBMI2 +# elif defined(__AVX512BW__) +# define _ISA_NATIVE ISA_LEVEL_AVX3 +# elif defined(__AVX2__) +# define _ISA_NATIVE ISA_LEVEL_AVX2 +# elif defined(__SSE4_1__) +# define _ISA_NATIVE ISA_LEVEL_SSE41 +# elif defined(__SSSE3__) +# define _ISA_NATIVE ISA_LEVEL_SSSE3 +# else +# define _ISA_NATIVE ISA_LEVEL_SSE2 +# endif +# if defined(__POPCNT__) +# if defined(__LZCNT__) +# define ISA_NATIVE (enum YEncDecIsaLevel)(_ISA_NATIVE | ISA_FEATURE_POPCNT | ISA_FEATURE_LZCNT) +# else +# define ISA_NATIVE (enum YEncDecIsaLevel)(_ISA_NATIVE | ISA_FEATURE_POPCNT) +# endif +# else +# define ISA_NATIVE _ISA_NATIVE +# endif +#endif + +int cpu_supports_isa(); +#endif // PLATFORM_X86 + + +#ifdef __riscv +bool cpu_supports_rvv(); +#endif +#if defined(__riscv_vector) && defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(13,0,0) +// GCC added RVV intrinsics in GCC13 +# undef __riscv_vector +#endif + + +#include +#if !defined(_MSC_VER) || defined(_STDINT) || _MSC_VER >= 1900 +# include +# include +#else +/* Workaround for older MSVC not supporting stdint.h - just pull it from V8 */ +# include +#endif + + +// GCC 8/9/10(dev) fails to optimize cases where KNOT should be used, so use intrinsic explicitly; Clang 6+ has no issue, but Clang 6/7 doesn't have the intrinsic; MSVC 2019 also fails and lacks the intrinsic +#if (defined(__GNUC__) && __GNUC__ >= 7) || (defined(_MSC_VER) && _MSC_VER >= 1924) +# define KNOT16 _knot_mask16 +# define KNOT32 _knot_mask32 +#else +# define KNOT16(x) ((__mmask16)~(x)) +# define KNOT32(x) ((__mmask32)~(x)) +#endif + +// weird thing with Apple's Clang; doesn't seem to always occur, so assume that Clang >= 9 is fine: https://github.com/animetosho/node-yencode/issues/8#issuecomment-583385864 +// seems that Clang < 3.6 also uses the old name +#if defined(__clang__) && ((defined(__APPLE__) && __clang_major__ < 9) || __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ < 6)) +# define _lzcnt_u32 __lzcnt32 +#endif + + + +#ifdef __GNUC__ +# if __GNUC__ >= 9 +# define LIKELIHOOD(p, c) (HEDLEY_PREDICT(!!(c), 1, p)) +# else +# define LIKELIHOOD(p, c) (p>0.3 && p<0.7 ? HEDLEY_UNPREDICTABLE(!!(c)) : __builtin_expect(!!(c), (p >= 0.5))) +# endif +#else +# define LIKELIHOOD(p, c) (c) +#endif + +#endif /* __YENC_COMMON */ diff --git a/rapidyenc/src/crc.cc b/rapidyenc/src/crc.cc new file mode 100644 index 0000000..c7794e2 --- /dev/null +++ b/rapidyenc/src/crc.cc @@ -0,0 +1,175 @@ +#include "crc_common.h" + +#include "interface.h" +crcutil_interface::CRC* crc = NULL; + +#if defined(PLATFORM_X86) && !defined(__ILP32__) +static uint32_t do_crc32_incremental_generic(const void* data, size_t length, uint32_t init) { + // use optimised ASM on x86 platforms + crcutil_interface::UINT64 tmp = init; + crc->Compute(data, length, &tmp); + return (uint32_t)tmp; +} +#else +// slice-by-8 algorithm from https://create.stephan-brumme.com/crc32/ +static uint32_t* HEDLEY_RESTRICT crc_slice8_table; +static uint32_t do_crc32_incremental_generic(const void* data, size_t length, uint32_t init) { + uint32_t crc = ~init; + uint32_t* current = (uint32_t*)data; + const int UNROLL_CYCLES = 2; // must be power of 2 + uint32_t* end = current + ((length/sizeof(uint32_t)) & -(UNROLL_CYCLES*2)); + while(current != end) { + for(int unroll=0; unroll> 24) | + ((crc >> 16) & 0xff00) | + ((crc & 0xff00) << 16) | + ((crc & 0xff) << 24) + ); +# endif + uint32_t two = *current++; + crc = crc_slice8_table[two & 0xFF] ^ + crc_slice8_table[0x100L + ((two >> 8) & 0xFF)] ^ + crc_slice8_table[0x200L + ((two >> 16) & 0xFF)] ^ + crc_slice8_table[0x300L + ((two >> 24) & 0xFF)] ^ + crc_slice8_table[0x400L + (one & 0xFF)] ^ + crc_slice8_table[0x500L + ((one >> 8) & 0xFF)] ^ + crc_slice8_table[0x600L + ((one >> 16) & 0xFF)] ^ + crc_slice8_table[0x700L + ((one >> 24) & 0xFF)]; +#else + uint32_t one = *current++ ^ crc; + uint32_t two = *current++; + crc = crc_slice8_table[(two >> 24) & 0xFF] ^ + crc_slice8_table[0x100L + ((two >> 16) & 0xFF)] ^ + crc_slice8_table[0x200L + ((two >> 8) & 0xFF)] ^ + crc_slice8_table[0x300L + (two & 0xFF)] ^ + crc_slice8_table[0x400L + ((one >> 24) & 0xFF)] ^ + crc_slice8_table[0x500L + ((one >> 16) & 0xFF)] ^ + crc_slice8_table[0x600L + ((one >> 8) & 0xFF)] ^ + crc_slice8_table[0x700L + (one & 0xFF)]; +#endif + } + } + uint8_t* current8 = (uint8_t*)current; + for(size_t i=0; i < (length & (sizeof(uint32_t)*2 * UNROLL_CYCLES -1)); i++) { + crc = (crc >> 8) ^ crc_slice8_table[(crc & 0xFF) ^ current8[i]]; + } + return ~crc; +} +static void generate_crc32_slice8_table() { + crc_slice8_table = (uint32_t*)malloc(8*256*sizeof(uint32_t)); + for(int byte=0; byte<8; byte++) + for(int v=0; v<256; v++) { + uint32_t crc = v; + for(int i = byte; i >= 0; i--) { + for(int j = 0; j < 8; j++) { + crc = (crc >> 1) ^ (-(crc & 1) & 0xEDB88320); + } + } + crc_slice8_table[byte*256 + v] = crc; + } +} +#endif + +extern "C" { + crc_func _do_crc32_incremental = &do_crc32_incremental_generic; + int _crc32_isa = ISA_GENERIC; +} + + +uint32_t do_crc32_combine(uint32_t crc1, uint32_t crc2, size_t len2) { + crcutil_interface::UINT64 crc1_ = crc1, crc2_ = crc2; + crc->Concatenate(crc2_, 0, len2, &crc1_); + return (uint32_t)crc1_; +} + +uint32_t do_crc32_zeros(uint32_t crc1, size_t len) { + crcutil_interface::UINT64 crc_ = crc1; + crc->CrcOfZeroes(len, &crc_); + return (uint32_t)crc_; +} + +void crc_clmul_set_funcs(); +void crc_clmul256_set_funcs(); +void crc_arm_set_funcs(); + +#ifdef PLATFORM_X86 +int cpu_supports_crc_isa(); +#endif + +#if defined(PLATFORM_ARM) && defined(_WIN32) +# define WIN32_LEAN_AND_MEAN +# include +#endif +#ifdef PLATFORM_ARM +# ifdef __ANDROID__ +# include +# elif defined(__APPLE__) +# include +# include +# elif defined(__has_include) +# if __has_include() +# include +# ifdef __FreeBSD__ +static unsigned long getauxval(unsigned long cap) { + unsigned long ret; + elf_aux_info(cap, &ret, sizeof(ret)); + return ret; +} +# endif +# if __has_include() +# include +# endif +# endif +# endif +#endif +void crc_init() { + crc = crcutil_interface::CRC::Create( + 0xEDB88320, 0, 32, true, 0, 0, 0, 0, NULL); + // instance never deleted... oh well... + +#if !defined(PLATFORM_X86) || defined(__ILP32__) + generate_crc32_slice8_table(); +#endif + +#ifdef PLATFORM_X86 + int support = cpu_supports_crc_isa(); + if(support == 2) + crc_clmul256_set_funcs(); + else if(support == 1) + crc_clmul_set_funcs(); +#endif +#ifdef PLATFORM_ARM +# ifdef __APPLE__ + int supported = 0; + size_t len = sizeof(supported); + if(sysctlbyname("hw.optional.armv8_crc32", &supported, &len, NULL, 0)) + supported = 0; +# endif + if( +# if defined(AT_HWCAP2) && defined(HWCAP2_CRC32) + getauxval(AT_HWCAP2) & HWCAP2_CRC32 +# elif defined(AT_HWCAP) && defined(HWCAP_CRC32) + getauxval(AT_HWCAP) & HWCAP_CRC32 +# elif defined(ANDROID_CPU_FAMILY_ARM) && defined(__aarch64__) + android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_CRC32 +# elif defined(ANDROID_CPU_FAMILY_ARM) /* aarch32 */ + android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_CRC32 +# elif defined(_WIN32) + IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) +# elif defined(__APPLE__) + supported +# elif defined(__ARM_FEATURE_CRC32) + true /* assume available if compiled as such */ +# else + false +# endif + ) { + crc_arm_set_funcs(); + } +#endif +} diff --git a/rapidyenc/src/crc.h b/rapidyenc/src/crc.h new file mode 100644 index 0000000..8e3b91e --- /dev/null +++ b/rapidyenc/src/crc.h @@ -0,0 +1,27 @@ +#ifndef __YENC_CRC_H +#define __YENC_CRC_H + +#ifdef __cplusplus +extern "C" { +#endif + + + +typedef uint32_t (*crc_func)(const void*, size_t, uint32_t); +extern crc_func _do_crc32_incremental; +extern int _crc32_isa; +#define do_crc32 (*_do_crc32_incremental) + +uint32_t do_crc32_combine(uint32_t crc1, const uint32_t crc2, size_t len2); +uint32_t do_crc32_zeros(uint32_t crc1, size_t len); +void crc_init(); +static inline int crc32_isa_level() { + return _crc32_isa; +} + + + +#ifdef __cplusplus +} +#endif +#endif diff --git a/rapidyenc/src/crc_arm.cc b/rapidyenc/src/crc_arm.cc new file mode 100644 index 0000000..2ae52f8 --- /dev/null +++ b/rapidyenc/src/crc_arm.cc @@ -0,0 +1,209 @@ +#include "crc_common.h" + +#if defined(PLATFORM_ARM) && defined(_MSC_VER) && defined(__clang__) && !defined(__ARM_FEATURE_CRC32) +// I don't think GYP provides a nice way to detect whether MSVC or clang-cl is being used, but it doesn't use clang-cl by default, so a warning here is probably sufficient +HEDLEY_WARNING("CRC32 acceleration is not been enabled under ARM clang-cl by default; add `-march=armv8-a+crc` to additional compiler arguments to enable"); +#endif + +// disable CRC on GCC versions with broken arm_acle.h +#if defined(__ARM_FEATURE_CRC32) && defined(HEDLEY_GCC_VERSION) +# if !defined(__aarch64__) && HEDLEY_GCC_VERSION_CHECK(7,0,0) && !HEDLEY_GCC_VERSION_CHECK(8,1,1) +# undef __ARM_FEATURE_CRC32 +HEDLEY_WARNING("CRC32 acceleration has been disabled due to broken arm_acle.h shipped in GCC 7.0 - 8.1 [https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81497]. If you need this feature, please use a different compiler or version of GCC"); +# endif +# if defined(__aarch64__) && HEDLEY_GCC_VERSION_CHECK(9,4,0) && !HEDLEY_GCC_VERSION_CHECK(9,5,0) +# undef __ARM_FEATURE_CRC32 +HEDLEY_WARNING("CRC32 acceleration has been disabled due to broken arm_acle.h shipped in GCC 9.4 [https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100985]. If you need this feature, please use a different compiler or version of GCC"); +# endif +#endif +#if defined(__ARM_FEATURE_CRC32) && defined(__has_include) +# if !__has_include() +# undef __ARM_FEATURE_CRC32 +HEDLEY_WARNING("CRC32 acceleration has been disabled due to missing arm_acle.h"); +# endif +#endif + +#if defined(__ARM_FEATURE_CRC32) || (defined(_M_ARM64) && !defined(__clang__)) // MSVC doesn't support CRC for ARM32 + +/* ARMv8 accelerated CRC */ +#if defined(_MSC_VER) && !defined(__clang__) +#include +#else +#include +#endif + + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# ifdef __GNUC__ +# define _LE16 __builtin_bswap16 +# define _LE32 __builtin_bswap32 +# define _LE64 __builtin_bswap64 +# else +// currently not supported +# error No endian swap intrinsic defined +# endif +#else +# define _LE16(x) (x) +# define _LE32(x) (x) +# define _LE64(x) (x) +#endif + +#ifdef __aarch64__ +# define WORD_T uint64_t +# define WORDSIZE_LOG 3 // sizeof(WORD_T) == 1<>31) & a; + a = ((a >> 1) ^ (0xEDB88320 & NEGATE(a&1))); + b <<= 1; + } + res ^= NEGATE(b>>31) & a; + return res; +} + +static const uint32_t crc_power[] = { // pre-computed 2^(2^n), with first 3 entries removed (saves a shift) + 0x00800000, 0x00008000, 0xedb88320, 0xb1e6b092, 0xa06a2517, 0xed627dae, 0x88d14467, 0xd7bbfe6a, + 0xec447f11, 0x8e7ea170, 0x6427800e, 0x4d47bae0, 0x09fe548f, 0x83852d0f, 0x30362f1a, 0x7b5a9cc3, + 0x31fec169, 0x9fec022a, 0x6c8dedc4, 0x15d6874d, 0x5fde7a4e, 0xbad90e37, 0x2e4e5eef, 0x4eaba214, + 0xa8a472c0, 0x429a969e, 0x148d302a, 0xc40ba6d0, 0xc4e22c3c, 0x40000000, 0x20000000, 0x08000000 +}; +/* above table can be computed with + int main(void) { + uint32_t k = 0x80000000 >> 1; + for (size_t i = 0; i < 32+3; ++i) { + if(i>2) printf("0x%08x, ", k); + k = crc_multiply(k, k); + } + return 0; + } +*/ +#endif + + + +// inspired/stolen off https://github.com/jocover/crc32_armv8/blob/master/crc32_armv8.c +static uint32_t arm_crc_calc(uint32_t crc, const unsigned char *src, long len) { + + // initial alignment + if (len >= 16) { // 16 is an arbitrary number; it just needs to be >=8 + if ((uintptr_t)src & sizeof(uint8_t)) { + crc = __crc32b(crc, *src); + src++; + len--; + } + if ((uintptr_t)src & sizeof(uint16_t)) { + crc = __crc32h(crc, _LE16(*((uint16_t *)src))); + src += sizeof(uint16_t); + len -= sizeof(uint16_t); + } +#ifdef __aarch64__ + if ((uintptr_t)src & sizeof(uint32_t)) { + crc = __crc32w(crc, _LE32(*((uint32_t *)src))); + src += sizeof(uint32_t); + len -= sizeof(uint32_t); + } +#endif + } + + const WORD_T* srcW = (const WORD_T*)src; + +#ifdef ENABLE_PIPELINE_OPT + // uses ideas from https://github.com/komrad36/crc#option-13-golden + // (this is a slightly less efficient, but much simpler implementation of the idea) + const unsigned SPLIT_WORDS_LOG = 10; // make sure it's at least 2 + const unsigned SPLIT_WORDS = 1<= (long)(sizeof(WORD_T)*SPLIT_WORDS*2)) { + // compute 2x CRCs concurrently to leverage piplining + uint32_t crc2 = 0; + for(unsigned i=0; i= 0) { + crc = CRC_WORD(crc, *(srcW++)); + crc = CRC_WORD(crc, *(srcW++)); + crc = CRC_WORD(crc, *(srcW++)); + crc = CRC_WORD(crc, *(srcW++)); + crc = CRC_WORD(crc, *(srcW++)); + crc = CRC_WORD(crc, *(srcW++)); + crc = CRC_WORD(crc, *(srcW++)); + crc = CRC_WORD(crc, *(srcW++)); + } + if (len & sizeof(WORD_T)*4) { + crc = CRC_WORD(crc, *(srcW++)); + crc = CRC_WORD(crc, *(srcW++)); + crc = CRC_WORD(crc, *(srcW++)); + crc = CRC_WORD(crc, *(srcW++)); + } + if (len & sizeof(WORD_T)*2) { + crc = CRC_WORD(crc, *(srcW++)); + crc = CRC_WORD(crc, *(srcW++)); + } + if (len & sizeof(WORD_T)) { + crc = CRC_WORD(crc, *(srcW++)); + } + src = (const unsigned char*)srcW; + +#ifdef __aarch64__ + if (len & sizeof(uint32_t)) { + crc = __crc32w(crc, _LE32(*((uint32_t *)src))); + src += sizeof(uint32_t); + } +#endif + if (len & sizeof(uint16_t)) { + crc = __crc32h(crc, _LE16(*((uint16_t *)src))); + src += sizeof(uint16_t); + } + if (len & sizeof(uint8_t)) + crc = __crc32b(crc, *src); + + return crc; +} + +static uint32_t do_crc32_incremental_arm(const void* data, size_t length, uint32_t init) { + return ~arm_crc_calc(~init, (const unsigned char*)data, (long)length); +} + +void crc_arm_set_funcs() { + _do_crc32_incremental = &do_crc32_incremental_arm; + _crc32_isa = ISA_FEATURE_CRC; +} +#else +void crc_arm_set_funcs() {} +#endif diff --git a/rapidyenc/src/crc_common.h b/rapidyenc/src/crc_common.h new file mode 100644 index 0000000..bfa758b --- /dev/null +++ b/rapidyenc/src/crc_common.h @@ -0,0 +1,4 @@ +#include "common.h" +#include // for size_t +#include "crc.h" + diff --git a/rapidyenc/src/crc_folding.cc b/rapidyenc/src/crc_folding.cc new file mode 100644 index 0000000..05d9f82 --- /dev/null +++ b/rapidyenc/src/crc_folding.cc @@ -0,0 +1,375 @@ +// taken from zlib-ng / Intel's zlib patch, modified to remove zlib dependencies +/* + * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ + * instruction. + * + * A white paper describing this algorithm can be found at: + * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf + * + * Copyright (C) 2013 Intel Corporation. All rights reserved. + * Authors: + * Wajdi Feghali + * Jim Guilford + * Vinodh Gopal + * Erdinc Ozturk + * Jim Kukunas + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "crc_common.h" + +#if (defined(__PCLMUL__) && defined(__SSSE3__) && defined(__SSE4_1__)) || (defined(_MSC_VER) && _MSC_VER >= 1600 && defined(PLATFORM_X86) && !defined(__clang__)) +#include +#include +#include + + +#if defined(__AVX512VL__) && defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0 +# define ENABLE_AVX512 1 +#endif + + +// interestingly, MSVC seems to generate better code if using VXORPS over VPXOR +// original Intel code uses XORPS for many XOR operations, but PXOR is pretty much always better (more port freedom on Intel CPUs). The only advantage of XORPS is that it's 1 byte shorter, an advantage which disappears under AVX as both instructions have the same length +#if defined(__AVX__) && defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0 +# define fold_xor _mm_xor_si128 +#else +static __m128i fold_xor(__m128i a, __m128i b) { + return _mm_castps_si128(_mm_xor_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b))); +} +#endif + +#ifdef ENABLE_AVX512 +static __m128i do_one_fold_merge(__m128i src, __m128i data) { + const __m128i xmm_fold4 = _mm_set_epi32( + 0x00000001, 0x54442bd4, + 0x00000001, 0xc6e41596); + return _mm_ternarylogic_epi32( + _mm_clmulepi64_si128(src, xmm_fold4, 0x01), + _mm_clmulepi64_si128(src, xmm_fold4, 0x10), + data, + 0x96 + ); +} +#else +static __m128i do_one_fold(__m128i src) { + const __m128i xmm_fold4 = _mm_set_epi32( + 0x00000001, 0x54442bd4, + 0x00000001, 0xc6e41596); + return fold_xor( + _mm_clmulepi64_si128(src, xmm_fold4, 0x01), + _mm_clmulepi64_si128(src, xmm_fold4, 0x10) + ); +} +#endif + +ALIGN_TO(32, static const unsigned pshufb_shf_table[60]) = { + 0x84838281, 0x88878685, 0x8c8b8a89, 0x008f8e8d, /* shl 15 (16 - 1)/shr1 */ + 0x85848382, 0x89888786, 0x8d8c8b8a, 0x01008f8e, /* shl 14 (16 - 3)/shr2 */ + 0x86858483, 0x8a898887, 0x8e8d8c8b, 0x0201008f, /* shl 13 (16 - 4)/shr3 */ + 0x87868584, 0x8b8a8988, 0x8f8e8d8c, 0x03020100, /* shl 12 (16 - 4)/shr4 */ + 0x88878685, 0x8c8b8a89, 0x008f8e8d, 0x04030201, /* shl 11 (16 - 5)/shr5 */ + 0x89888786, 0x8d8c8b8a, 0x01008f8e, 0x05040302, /* shl 10 (16 - 6)/shr6 */ + 0x8a898887, 0x8e8d8c8b, 0x0201008f, 0x06050403, /* shl 9 (16 - 7)/shr7 */ + 0x8b8a8988, 0x8f8e8d8c, 0x03020100, 0x07060504, /* shl 8 (16 - 8)/shr8 */ + 0x8c8b8a89, 0x008f8e8d, 0x04030201, 0x08070605, /* shl 7 (16 - 9)/shr9 */ + 0x8d8c8b8a, 0x01008f8e, 0x05040302, 0x09080706, /* shl 6 (16 -10)/shr10*/ + 0x8e8d8c8b, 0x0201008f, 0x06050403, 0x0a090807, /* shl 5 (16 -11)/shr11*/ + 0x8f8e8d8c, 0x03020100, 0x07060504, 0x0b0a0908, /* shl 4 (16 -12)/shr12*/ + 0x008f8e8d, 0x04030201, 0x08070605, 0x0c0b0a09, /* shl 3 (16 -13)/shr13*/ + 0x01008f8e, 0x05040302, 0x09080706, 0x0d0c0b0a, /* shl 2 (16 -14)/shr14*/ + 0x0201008f, 0x06050403, 0x0a090807, 0x0e0d0c0b /* shl 1 (16 -15)/shr15*/ +}; + +static void partial_fold(const size_t len, __m128i *xmm_crc0, __m128i *xmm_crc1, + __m128i *xmm_crc2, __m128i *xmm_crc3, __m128i *xmm_crc_part) { + + const __m128i xmm_mask3 = _mm_set1_epi32(0x80808080); + + __m128i xmm_shl, xmm_shr, xmm_tmp1, xmm_tmp2, xmm_tmp3; + __m128i xmm_a0_0; + + xmm_shl = _mm_load_si128((__m128i *)pshufb_shf_table + (len - 1)); + xmm_shr = xmm_shl; + xmm_shr = _mm_xor_si128(xmm_shr, xmm_mask3); + + xmm_a0_0 = _mm_shuffle_epi8(*xmm_crc0, xmm_shl); + + *xmm_crc0 = _mm_shuffle_epi8(*xmm_crc0, xmm_shr); + xmm_tmp1 = _mm_shuffle_epi8(*xmm_crc1, xmm_shl); + *xmm_crc0 = _mm_or_si128(*xmm_crc0, xmm_tmp1); + + *xmm_crc1 = _mm_shuffle_epi8(*xmm_crc1, xmm_shr); + xmm_tmp2 = _mm_shuffle_epi8(*xmm_crc2, xmm_shl); + *xmm_crc1 = _mm_or_si128(*xmm_crc1, xmm_tmp2); + + *xmm_crc2 = _mm_shuffle_epi8(*xmm_crc2, xmm_shr); + xmm_tmp3 = _mm_shuffle_epi8(*xmm_crc3, xmm_shl); + *xmm_crc2 = _mm_or_si128(*xmm_crc2, xmm_tmp3); + + *xmm_crc3 = _mm_shuffle_epi8(*xmm_crc3, xmm_shr); + *xmm_crc_part = _mm_shuffle_epi8(*xmm_crc_part, xmm_shl); + *xmm_crc3 = _mm_or_si128(*xmm_crc3, *xmm_crc_part); + +#ifdef ENABLE_AVX512 + *xmm_crc3 = do_one_fold_merge(xmm_a0_0, *xmm_crc3); +#else + *xmm_crc3 = fold_xor( + do_one_fold(xmm_a0_0), + *xmm_crc3 + ); +#endif +} + +ALIGN_TO(16, static const unsigned crc_k[]) = { + 0xccaa009e, 0x00000000, /* rk1 */ + 0x751997d0, 0x00000001, /* rk2 */ + 0xccaa009e, 0x00000000, /* rk5 */ + 0x63cd6124, 0x00000001, /* rk6 */ + 0xf7011641, 0x00000000, /* rk7 */ + 0xdb710640, 0x00000001 /* rk8 */ +}; + +ALIGN_TO(16, static const unsigned crc_mask[4]) = { + 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF +}; + + +static uint32_t crc_fold(const unsigned char *src, long len, uint32_t initial) { + unsigned long algn_diff; + __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3; + + // TODO: consider calculating this via a LUT instead (probably faster) + // info from https://www.reddit.com/r/ReverseEngineering/comments/2zwhl3/mystery_constant_0x9db42487_in_intels_crc32ieee/ + // firstly, calculate: xmm_crc0 = (intial * 0x487b9c8a) mod 0x104c11db7, where 0x487b9c8a = inverse(1<<512) mod 0x104c11db7 + xmm_t0 = _mm_cvtsi32_si128(~initial); + + xmm_t0 = _mm_clmulepi64_si128(xmm_t0, _mm_set_epi32(0, 0, 0xa273bc24, 0), 0); // reverse(0x487b9c8a)<<1 == 0xa273bc24 + xmm_t2 = _mm_set_epi32( // polynomial reduction factors + 1, 0xdb710640, // G* = 0x04c11db7 + 0, 0xf7011641 // Q+ = 0x04d101df (+1 to save an additional xor operation) + ); + xmm_t1 = _mm_clmulepi64_si128(xmm_t0, xmm_t2, 0); + xmm_t1 = _mm_clmulepi64_si128(xmm_t1, xmm_t2, 0x10); + + __m128i xmm_crc0 = _mm_srli_si128(_mm_xor_si128(xmm_t0, xmm_t1), 8); + + __m128i xmm_crc1 = _mm_setzero_si128(); + __m128i xmm_crc2 = _mm_setzero_si128(); + __m128i xmm_crc3 = _mm_setzero_si128(); + __m128i xmm_crc_part; + + if (len < 16) { + if (len == 0) + return initial; + xmm_crc_part = _mm_setzero_si128(); + memcpy(&xmm_crc_part, src, len); + goto partial; + } + + algn_diff = (0 - (uintptr_t)src) & 0xF; + if (algn_diff) { + xmm_crc_part = _mm_loadu_si128((__m128i *)src); + + src += algn_diff; + len -= algn_diff; + + partial_fold(algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, + &xmm_crc_part); + } + + while (len >= 64) { + xmm_t0 = _mm_load_si128((__m128i *)src); + xmm_t1 = _mm_load_si128((__m128i *)src + 1); + xmm_t2 = _mm_load_si128((__m128i *)src + 2); + xmm_t3 = _mm_load_si128((__m128i *)src + 3); + +#ifdef ENABLE_AVX512 + xmm_crc0 = do_one_fold_merge(xmm_crc0, xmm_t0); + xmm_crc1 = do_one_fold_merge(xmm_crc1, xmm_t1); + xmm_crc2 = do_one_fold_merge(xmm_crc2, xmm_t2); + xmm_crc3 = do_one_fold_merge(xmm_crc3, xmm_t3); +#else + // nesting do_one_fold() in _mm_xor_si128() seems to cause MSVC to generate horrible code, so separate it out + xmm_crc0 = do_one_fold(xmm_crc0); + xmm_crc1 = do_one_fold(xmm_crc1); + xmm_crc2 = do_one_fold(xmm_crc2); + xmm_crc3 = do_one_fold(xmm_crc3); + xmm_crc0 = _mm_xor_si128(xmm_crc0, xmm_t0); + xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_t1); + xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t2); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t3); +#endif + + src += 64; + len -= 64; + } + + if (len >= 48) { + len -= 48; + + xmm_t0 = _mm_load_si128((__m128i *)src); + xmm_t1 = _mm_load_si128((__m128i *)src + 1); + xmm_t2 = _mm_load_si128((__m128i *)src + 2); + + xmm_t3 = xmm_crc3; +#ifdef ENABLE_AVX512 + xmm_crc3 = do_one_fold_merge(xmm_crc2, xmm_t2); + xmm_crc2 = do_one_fold_merge(xmm_crc1, xmm_t1); + xmm_crc1 = do_one_fold_merge(xmm_crc0, xmm_t0); +#else + xmm_crc3 = do_one_fold(xmm_crc2); + xmm_crc2 = do_one_fold(xmm_crc1); + xmm_crc1 = do_one_fold(xmm_crc0); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t2); + xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t1); + xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_t0); +#endif + xmm_crc0 = xmm_t3; + + if (len == 0) + goto done; + + xmm_crc_part = _mm_load_si128((__m128i *)src + 3); + } else if (len >= 32) { + len -= 32; + + xmm_t0 = _mm_load_si128((__m128i *)src); + xmm_t1 = _mm_load_si128((__m128i *)src + 1); + + xmm_t2 = xmm_crc2; + xmm_t3 = xmm_crc3; +#ifdef ENABLE_AVX512 + xmm_crc3 = do_one_fold_merge(xmm_crc1, xmm_t1); + xmm_crc2 = do_one_fold_merge(xmm_crc0, xmm_t0); +#else + xmm_crc3 = do_one_fold(xmm_crc1); + xmm_crc2 = do_one_fold(xmm_crc0); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t1); + xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t0); +#endif + xmm_crc1 = xmm_t3; + xmm_crc0 = xmm_t2; + + if (len == 0) + goto done; + + xmm_crc_part = _mm_load_si128((__m128i *)src + 2); + } else if (len >= 16) { + len -= 16; + + xmm_t0 = _mm_load_si128((__m128i *)src); + + xmm_t3 = xmm_crc3; +#ifdef ENABLE_AVX512 + xmm_crc3 = do_one_fold_merge(xmm_crc0, xmm_t0); +#else + xmm_crc3 = _mm_xor_si128(do_one_fold(xmm_crc0), xmm_t0); +#endif + xmm_crc0 = xmm_crc1; + xmm_crc1 = xmm_crc2; + xmm_crc2 = xmm_t3; + + if (len == 0) + goto done; + + xmm_crc_part = _mm_load_si128((__m128i *)src + 1); + } else { + if (len == 0) + goto done; + xmm_crc_part = _mm_load_si128((__m128i *)src); + } + +partial: + partial_fold(len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, + &xmm_crc_part); +done: +{ + const __m128i xmm_mask = _mm_load_si128((__m128i *)crc_mask); + __m128i x_tmp0, x_tmp1, x_tmp2, crc_fold; + + /* + * k1 + */ + crc_fold = _mm_load_si128((__m128i *)crc_k); + + x_tmp0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x10); + xmm_crc0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x01); +#ifdef ENABLE_AVX512 + xmm_crc1 = _mm_ternarylogic_epi32(xmm_crc1, x_tmp0, xmm_crc0, 0x96); +#else + xmm_crc1 = _mm_xor_si128(xmm_crc1, x_tmp0); + xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_crc0); +#endif + + x_tmp1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x10); + xmm_crc1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x01); +#ifdef ENABLE_AVX512 + xmm_crc2 = _mm_ternarylogic_epi32(xmm_crc2, x_tmp1, xmm_crc1, 0x96); +#else + xmm_crc2 = _mm_xor_si128(xmm_crc2, x_tmp1); + xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_crc1); +#endif + + x_tmp2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x10); + xmm_crc2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x01); +#ifdef ENABLE_AVX512 + xmm_crc3 = _mm_ternarylogic_epi32(xmm_crc3, x_tmp2, xmm_crc2, 0x96); +#else + xmm_crc3 = _mm_xor_si128(xmm_crc3, x_tmp2); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2); +#endif + + /* + * k5 + */ + crc_fold = _mm_load_si128((__m128i *)crc_k + 1); + + xmm_crc0 = xmm_crc3; + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0); + xmm_crc0 = _mm_srli_si128(xmm_crc0, 8); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0); + + xmm_crc0 = xmm_crc3; + xmm_crc3 = _mm_slli_si128(xmm_crc3, 4); + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); +#ifdef ENABLE_AVX512 + //xmm_crc3 = _mm_maskz_xor_epi32(14, xmm_crc3, xmm_crc0); + xmm_crc3 = _mm_ternarylogic_epi32(xmm_crc3, xmm_crc0, xmm_mask, 0x28); +#else + xmm_crc0 = _mm_and_si128(xmm_crc0, xmm_mask); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0); +#endif + + /* + * k7 + */ + xmm_crc1 = xmm_crc3; + crc_fold = _mm_load_si128((__m128i *)crc_k + 2); + + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0); + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); +#ifdef ENABLE_AVX512 + xmm_crc3 = _mm_ternarylogic_epi32(xmm_crc3, xmm_crc1, xmm_crc1, 0xC3); // NOT(xmm_crc3 ^ xmm_crc1) +#else + xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_mask); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); +#endif + return _mm_extract_epi32(xmm_crc3, 2); +} + +} + +static uint32_t do_crc32_incremental_clmul(const void* data, size_t length, uint32_t init) { + return crc_fold((const unsigned char*)data, (long)length, init); +} + +void crc_clmul_set_funcs() { + _do_crc32_incremental = &do_crc32_incremental_clmul; + _crc32_isa = ISA_LEVEL_PCLMUL; +} +#else +void crc_clmul_set_funcs() {} +#endif + diff --git a/rapidyenc/src/crc_folding_256.cc b/rapidyenc/src/crc_folding_256.cc new file mode 100644 index 0000000..2d3657b --- /dev/null +++ b/rapidyenc/src/crc_folding_256.cc @@ -0,0 +1,231 @@ +// 256-bit version of crc_folding + +#include "crc_common.h" + +#if !defined(YENC_DISABLE_AVX256) && ((defined(__VPCLMULQDQ__) && defined(__AVX2__) && defined(__PCLMUL__)) || (defined(_MSC_VER) && _MSC_VER >= 1920 && defined(PLATFORM_X86) && !defined(__clang__))) +#include +#include + + +#if defined(__AVX512VL__) && defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0 +# define ENABLE_AVX512 1 +#endif + +static __m256i do_one_fold(__m256i src, __m256i data) { + const __m256i fold4 = _mm256_set_epi32( + 0x00000001, 0x54442bd4, + 0x00000001, 0xc6e41596, + 0x00000001, 0x54442bd4, + 0x00000001, 0xc6e41596 + ); +#ifdef ENABLE_AVX512 + return _mm256_ternarylogic_epi32( + _mm256_clmulepi64_epi128(src, fold4, 0x01), + _mm256_clmulepi64_epi128(src, fold4, 0x10), + data, + 0x96 + ); +#else + return _mm256_xor_si256(data, _mm256_xor_si256( + _mm256_clmulepi64_epi128(src, fold4, 0x01), + _mm256_clmulepi64_epi128(src, fold4, 0x10) + )); +#endif +} + +ALIGN_TO(32, static const uint8_t pshufb_rot_table[]) = { + 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 +}; +// _mm256_castsi128_si256, but upper is defined to be 0 +#if (defined(__clang__) && __clang_major__ >= 5 && (!defined(__APPLE__) || __clang_major__ >= 7)) || (defined(__GNUC__) && __GNUC__ >= 10) || (defined(_MSC_VER) && _MSC_VER >= 1910) +// intrinsic unsupported in GCC 9 and MSVC < 2017 +# define zext128_256 _mm256_zextsi128_si256 +#else +// technically a cast is incorrect, due to upper 128 bits being undefined, but should usually work fine +// alternative may be `_mm256_set_m128i(_mm_setzero_si128(), v)` but unsupported on GCC < 7, and most compilers generate a VINSERTF128 instruction for it +# ifdef __OPTIMIZE__ +# define zext128_256 _mm256_castsi128_si256 +# else +# define zext128_256(x) _mm256_inserti128_si256(_mm256_setzero_si256(), x, 0) +# endif +#endif + +#ifdef ENABLE_AVX512 +# define MM256_BLENDV(a, b, m) _mm256_ternarylogic_epi32(a, b, m, 0xd8) +# define MM_2XOR(a, b, c) _mm_ternarylogic_epi32(a, b, c, 0x96) +#else +# define MM256_BLENDV _mm256_blendv_epi8 +# define MM_2XOR(a, b, c) _mm_xor_si128(_mm_xor_si128(a, b), c) +#endif + +static void partial_fold(const size_t len, __m256i *crc0, __m256i *crc1, __m256i crc_part) { + __m256i shuf = _mm256_broadcastsi128_si256(_mm_loadu_si128((__m128i*)(pshufb_rot_table + (len&15)))); + __m256i mask = _mm256_cmpgt_epi8(shuf, _mm256_set1_epi8(15)); + + *crc0 = _mm256_shuffle_epi8(*crc0, shuf); + *crc1 = _mm256_shuffle_epi8(*crc1, shuf); + crc_part = _mm256_shuffle_epi8(crc_part, shuf); + + __m256i crc_out = _mm256_permute2x128_si256(*crc0, *crc0, 0x08); // move bottom->top + __m256i crc01, crc1p; + if(len >= 16) { + crc_out = MM256_BLENDV(crc_out, *crc0, mask); + crc01 = *crc1; + crc1p = crc_part; + *crc0 = _mm256_permute2x128_si256(*crc0, *crc1, 0x21); + *crc1 = _mm256_permute2x128_si256(*crc1, crc_part, 0x21); + crc_part = zext128_256(_mm256_extracti128_si256(crc_part, 1)); + } else { + crc_out = _mm256_and_si256(crc_out, mask); + crc01 = _mm256_permute2x128_si256(*crc0, *crc1, 0x21); + crc1p = _mm256_permute2x128_si256(*crc1, crc_part, 0x21); + } + + *crc0 = MM256_BLENDV(*crc0, crc01, mask); + *crc1 = MM256_BLENDV(*crc1, crc1p, mask); + + *crc1 = do_one_fold(crc_out, *crc1); +} + + +ALIGN_TO(16, static const unsigned crc_k[]) = { + 0xccaa009e, 0x00000000, /* rk1 */ + 0x751997d0, 0x00000001, /* rk2 */ + 0xccaa009e, 0x00000000, /* rk5 */ + 0x63cd6124, 0x00000001, /* rk6 */ + 0xf7011641, 0x00000000, /* rk7 */ + 0xdb710640, 0x00000001 /* rk8 */ +}; + + +static uint32_t crc_fold(const unsigned char *src, long len, uint32_t initial) { + // info from https://www.reddit.com/r/ReverseEngineering/comments/2zwhl3/mystery_constant_0x9db42487_in_intels_crc32ieee/ + // firstly, calculate: xmm_crc0 = (intial * 0x487b9c8a) mod 0x104c11db7, where 0x487b9c8a = inverse(1<<512) mod 0x104c11db7 + __m128i xmm_t0 = _mm_cvtsi32_si128(~initial); + + xmm_t0 = _mm_clmulepi64_si128(xmm_t0, _mm_set_epi32(0, 0, 0xa273bc24, 0), 0); // reverse(0x487b9c8a)<<1 == 0xa273bc24 + __m128i reduction = _mm_set_epi32( // polynomial reduction factors + 1, 0xdb710640, // G* = 0x04c11db7 + 0, 0xf7011641 // Q+ = 0x04d101df (+1 to save an additional xor operation) + ); + __m128i xmm_t1 = _mm_clmulepi64_si128(xmm_t0, reduction, 0); + xmm_t1 = _mm_clmulepi64_si128(xmm_t1, reduction, 0x10); + + xmm_t0 = _mm_srli_si128(_mm_xor_si128(xmm_t0, xmm_t1), 8); + __m256i crc0 = zext128_256(xmm_t0); + __m256i crc1 = _mm256_setzero_si256(); + + if (len < 32) { + if (len == 0) + return initial; + __m256i crc_part = _mm256_setzero_si256(); + memcpy(&crc_part, src, len); + partial_fold(len, &crc0, &crc1, crc_part); + } else { + uintptr_t algn_diff = (0 - (uintptr_t)src) & 0x1F; + if (algn_diff) { + partial_fold(algn_diff, &crc0, &crc1, _mm256_loadu_si256((__m256i *)src)); + src += algn_diff; + len -= algn_diff; + } + + while (len >= 64) { + crc0 = do_one_fold(crc0, _mm256_load_si256((__m256i*)src)); + crc1 = do_one_fold(crc1, _mm256_load_si256((__m256i*)src + 1)); + src += 64; + len -= 64; + } + + if (len >= 32) { + __m256i old = crc1; + crc1 = do_one_fold(crc0, _mm256_load_si256((__m256i*)src)); + crc0 = old; + + len -= 32; + src += 32; + } + + if(len != 0) { + partial_fold(len, &crc0, &crc1, _mm256_load_si256((__m256i *)src)); + } + } + + const __m128i xmm_mask = _mm_set_epi32(-1,-1,-1,0); + __m128i x_tmp0, x_tmp1, x_tmp2, crc_fold; + + __m128i xmm_crc0 = _mm256_castsi256_si128(crc0); + __m128i xmm_crc1 = _mm256_extracti128_si256(crc0, 1); + __m128i xmm_crc2 = _mm256_castsi256_si128(crc1); + __m128i xmm_crc3 = _mm256_extracti128_si256(crc1, 1); + + /* + * k1 + */ + crc_fold = _mm_load_si128((__m128i *)crc_k); + + x_tmp0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x10); + xmm_crc0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x01); + xmm_crc1 = MM_2XOR(xmm_crc1, x_tmp0, xmm_crc0); + + x_tmp1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x10); + xmm_crc1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x01); + xmm_crc2 = MM_2XOR(xmm_crc2, x_tmp1, xmm_crc1); + + x_tmp2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x10); + xmm_crc2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x01); + xmm_crc3 = MM_2XOR(xmm_crc3, x_tmp2, xmm_crc2); + + /* + * k5 + */ + crc_fold = _mm_load_si128((__m128i *)crc_k + 1); + + xmm_crc0 = xmm_crc3; + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0); + xmm_crc0 = _mm_srli_si128(xmm_crc0, 8); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0); + + xmm_crc0 = xmm_crc3; + xmm_crc3 = _mm_slli_si128(xmm_crc3, 4); + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); +#ifdef ENABLE_AVX512 + //xmm_crc3 = _mm_maskz_xor_epi32(14, xmm_crc3, xmm_crc0); + xmm_crc3 = _mm_ternarylogic_epi32(xmm_crc3, xmm_crc0, xmm_mask, 0x28); +#else + xmm_crc0 = _mm_and_si128(xmm_crc0, xmm_mask); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0); +#endif + + /* + * k7 + */ + xmm_crc1 = xmm_crc3; + crc_fold = _mm_load_si128((__m128i *)crc_k + 2); + + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0); + xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10); +#ifdef ENABLE_AVX512 + xmm_crc3 = _mm_ternarylogic_epi32(xmm_crc3, xmm_crc1, xmm_crc1, 0xC3); // NOT(xmm_crc3 ^ xmm_crc1) +#else + xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_mask); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1); +#endif + return _mm_extract_epi32(xmm_crc3, 2); +} + +static uint32_t do_crc32_incremental_clmul(const void* data, size_t length, uint32_t init) { + return crc_fold((const unsigned char*)data, (long)length, init); +} + +void crc_clmul256_set_funcs() { + _do_crc32_incremental = &do_crc32_incremental_clmul; + _crc32_isa = ISA_LEVEL_VPCLMUL; +} +#else +void crc_clmul_set_funcs(); +void crc_clmul256_set_funcs() { + crc_clmul_set_funcs(); +} +#endif + diff --git a/rapidyenc/src/decoder.cc b/rapidyenc/src/decoder.cc new file mode 100644 index 0000000..722ef53 --- /dev/null +++ b/rapidyenc/src/decoder.cc @@ -0,0 +1,68 @@ +#include "common.h" + +#include "decoder_common.h" +#include "decoder.h" + +extern "C" { + YencDecoderEnd (*_do_decode)(const unsigned char**, unsigned char**, size_t, YencDecoderState*) = &do_decode_scalar; + YencDecoderEnd (*_do_decode_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*) = &do_decode_scalar; + YencDecoderEnd (*_do_decode_end_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*) = &do_decode_end_scalar; + + int _decode_isa = ISA_GENERIC; +} + +void decoder_set_sse2_funcs(); +void decoder_set_ssse3_funcs(); +void decoder_set_avx_funcs(); +void decoder_set_avx2_funcs(); +void decoder_set_vbmi2_funcs(); +void decoder_set_neon_funcs(); + + +#if defined(PLATFORM_X86) && defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0 +# if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256) +# include "decoder_avx2_base.h" +static inline void decoder_set_native_funcs() { + ALIGN_ALLOC(lookups, sizeof(*lookups), 16); + decoder_init_lut(lookups->eqFix, lookups->compact); + _do_decode = &do_decode_simd >; + _do_decode_raw = &do_decode_simd >; + _do_decode_end_raw = &do_decode_simd >; + _decode_isa = ISA_NATIVE; +} +# else +# include "decoder_sse_base.h" +static inline void decoder_set_native_funcs() { + decoder_sse_init(); + decoder_init_lut(lookups->eqFix, lookups->compact); + _do_decode = &do_decode_simd >; + _do_decode_raw = &do_decode_simd >; + _do_decode_end_raw = &do_decode_simd >; + _decode_isa = ISA_NATIVE; +} +# endif +#endif + +void decoder_init() { +#ifdef PLATFORM_X86 +# if defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0 + decoder_set_native_funcs(); +# else + int use_isa = cpu_supports_isa(); + if(use_isa >= ISA_LEVEL_VBMI2) + decoder_set_vbmi2_funcs(); + else if(use_isa >= ISA_LEVEL_AVX2) + decoder_set_avx2_funcs(); + else if(use_isa >= ISA_LEVEL_AVX) + decoder_set_avx_funcs(); + else if(use_isa >= ISA_LEVEL_SSSE3) + decoder_set_ssse3_funcs(); + else + decoder_set_sse2_funcs(); +# endif +#endif +#ifdef PLATFORM_ARM + if(cpu_supports_neon()) + decoder_set_neon_funcs(); +#endif +} diff --git a/rapidyenc/src/decoder.h b/rapidyenc/src/decoder.h new file mode 100644 index 0000000..1614c7a --- /dev/null +++ b/rapidyenc/src/decoder.h @@ -0,0 +1,57 @@ +#ifndef __YENC_DECODER_H +#define __YENC_DECODER_H + +#ifdef __cplusplus +extern "C" { +#endif + + + +// the last state that the decoder was in (i.e. last few characters processed) +// the state is needed for incremental decoders as its behavior is affected by what it processed last +// acronyms: CR = carriage return (\r), LF = line feed (\n), EQ = equals char, DT = dot char (.) +typedef enum { + YDEC_STATE_CRLF, // default + YDEC_STATE_EQ, + YDEC_STATE_CR, + YDEC_STATE_NONE, + YDEC_STATE_CRLFDT, + YDEC_STATE_CRLFDTCR, + YDEC_STATE_CRLFEQ // may actually be "\r\n.=" in raw decoder +} YencDecoderState; + +// end result for incremental processing (whether the end of the yEnc data was reached) +typedef enum { + YDEC_END_NONE, // end not reached + YDEC_END_CONTROL, // \r\n=y sequence found, src points to byte after 'y' + YDEC_END_ARTICLE // \r\n.\r\n sequence found, src points to byte after last '\n' +} YencDecoderEnd; + +#include "hedley.h" + +extern YencDecoderEnd (*_do_decode)(const unsigned char**, unsigned char**, size_t, YencDecoderState*); +extern YencDecoderEnd (*_do_decode_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*); +extern YencDecoderEnd (*_do_decode_end_raw)(const unsigned char**, unsigned char**, size_t, YencDecoderState*); +extern int _decode_isa; + +static inline size_t do_decode(int isRaw, const unsigned char* src, unsigned char* dest, size_t len, YencDecoderState* state) { + unsigned char* ds = dest; + (*(isRaw ? _do_decode_raw : _do_decode))(&src, &ds, len, state); + return ds - dest; +} + +static inline YencDecoderEnd do_decode_end(const unsigned char** src, unsigned char** dest, size_t len, YencDecoderState* state) { + return _do_decode_end_raw(src, dest, len, state); +} + +void decoder_init(); + +static inline int decode_isa_level() { + return _decode_isa; +} + + +#ifdef __cplusplus +} +#endif +#endif diff --git a/rapidyenc/src/decoder_avx.cc b/rapidyenc/src/decoder_avx.cc new file mode 100644 index 0000000..f1b84f8 --- /dev/null +++ b/rapidyenc/src/decoder_avx.cc @@ -0,0 +1,19 @@ +#include "common.h" + +#if defined(__AVX__) && defined(__POPCNT__) +#include "decoder_common.h" +#include "decoder_sse_base.h" +void decoder_set_avx_funcs() { + decoder_sse_init(); + decoder_init_lut(lookups->eqFix, lookups->compact); + _do_decode = &do_decode_simd >; + _do_decode_raw = &do_decode_simd >; + _do_decode_end_raw = &do_decode_simd >; + _decode_isa = ISA_LEVEL_AVX; +} +#else +void decoder_set_ssse3_funcs(); +void decoder_set_avx_funcs() { + decoder_set_ssse3_funcs(); +} +#endif diff --git a/rapidyenc/src/decoder_avx2.cc b/rapidyenc/src/decoder_avx2.cc new file mode 100644 index 0000000..721e767 --- /dev/null +++ b/rapidyenc/src/decoder_avx2.cc @@ -0,0 +1,19 @@ +#include "common.h" + +#if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256) +#include "decoder_common.h" +#include "decoder_avx2_base.h" +void decoder_set_avx2_funcs() { + ALIGN_ALLOC(lookups, sizeof(*lookups), 16); + decoder_init_lut(lookups->eqFix, lookups->compact); + _do_decode = &do_decode_simd >; + _do_decode_raw = &do_decode_simd >; + _do_decode_end_raw = &do_decode_simd >; + _decode_isa = ISA_LEVEL_AVX2; +} +#else +void decoder_set_avx_funcs(); +void decoder_set_avx2_funcs() { + decoder_set_avx_funcs(); +} +#endif diff --git a/rapidyenc/src/decoder_avx2_base.h b/rapidyenc/src/decoder_avx2_base.h new file mode 100644 index 0000000..2ec719a --- /dev/null +++ b/rapidyenc/src/decoder_avx2_base.h @@ -0,0 +1,632 @@ + +#ifdef __AVX2__ + +// GCC (ver 6-10(dev)) fails to optimize pure C version of mask testing, but has this intrinsic; Clang >= 7 optimizes C version fine; functions added in Clang 8 +#if (defined(__GNUC__) && __GNUC__ >= 7) || (defined(_MSC_VER) && _MSC_VER >= 1924) +# define KORTEST32(a, b) !_kortestz_mask32_u8((a), (b)) +# define KAND32(a, b) _kand_mask32((a), (b)) +# define KOR32(a, b) _kor_mask32((a), (b)) +#else +# define KORTEST32(a, b) ((a) | (b)) +# define KAND32(a, b) ((a) & (b)) +# define KOR32(a, b) ((a) | (b)) +#endif + +#pragma pack(16) +static struct { + /*align16*/ struct { char bytes[16]; } compact[32768]; + uint8_t eqFix[256]; +} * HEDLEY_RESTRICT lookups; +#pragma pack() + + +static HEDLEY_ALWAYS_INLINE __m256i force_align_read_256(const void* p) { +#ifdef _MSC_VER + // MSVC complains about casting away volatile + return *(__m256i *)(p); +#else + return *(volatile __m256i *)(p); +#endif +} + +// _mm256_castsi128_si256, but upper is defined to be 0 +#if (defined(__clang__) && __clang_major__ >= 5 && (!defined(__APPLE__) || __clang_major__ >= 7)) || (defined(__GNUC__) && __GNUC__ >= 10) || (defined(_MSC_VER) && _MSC_VER >= 1910) +// intrinsic unsupported in GCC 9 and MSVC < 2017 +# define zext128_256 _mm256_zextsi128_si256 +#else +// technically a cast is incorrect, due to upper 128 bits being undefined, but should usually work fine +// alternative may be `_mm256_set_m128i(_mm_setzero_si128(), v)` but unsupported on GCC < 7, and most compilers generate a VINSERTF128 instruction for it +# ifdef __OPTIMIZE__ +# define zext128_256 _mm256_castsi128_si256 +# else +# define zext128_256(x) _mm256_inserti128_si256(_mm256_setzero_si256(), x, 0) +# endif +#endif + +#if defined(__tune_icelake_client__) || defined(__tune_icelake_server__) || defined(__tune_tigerlake__) || defined(__tune_rocketlake__) || defined(__tune_alderlake__) || defined(__tune_sapphirerapids__) +# define COMPRESS_STORE _mm256_mask_compressstoreu_epi8 +#else +// avoid uCode on Zen4 +# define COMPRESS_STORE(dst, mask, vec) _mm256_storeu_si256((__m256i*)(dst), _mm256_maskz_compress_epi8(mask, vec)) +#endif + +template +HEDLEY_ALWAYS_INLINE void do_decode_avx2(const uint8_t* src, long& len, unsigned char*& p, unsigned char& _escFirst, uint16_t& _nextMask) { + HEDLEY_ASSUME(_escFirst == 0 || _escFirst == 1); + HEDLEY_ASSUME(_nextMask == 0 || _nextMask == 1 || _nextMask == 2); + uintptr_t escFirst = _escFirst; + __m256i yencOffset = escFirst ? _mm256_set_epi8( + -42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42, + -42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42-64 + ) : _mm256_set1_epi8(-42); + __m256i minMask = _mm256_set1_epi8('.'); + if(_nextMask && isRaw) { + minMask = _mm256_set_epi8( + '.','.','.','.','.','.','.','.','.','.','.','.','.','.','.','.', + '.','.','.','.','.','.','.','.','.','.','.','.','.','.',_nextMask==2?0:'.',_nextMask==1?0:'.' + ); + } + + // for some reason, MSVC Win32 seems to crash when trying to compile _mm256_mask_cmpeq_epi8_mask + // the crash can be fixed by switching the order of the last two arguments, but it seems to generate wrong code + // so just disable the optimisation as it seems to be problematic there +#if defined(__AVX512VL__) && defined(__AVX512BW__) +# if defined(_MSC_VER) && !defined(PLATFORM_AMD64) && !defined(__clang__) + const bool useAVX3MaskCmp = false; +# else + const bool useAVX3MaskCmp = (use_isa >= ISA_LEVEL_AVX3); +# endif +#endif + intptr_t i; + for(i = -len; i; i += sizeof(__m256i)*2) { + __m256i oDataA = _mm256_load_si256((__m256i *)(src+i)); + __m256i oDataB = _mm256_load_si256((__m256i *)(src+i) + 1); + + // search for special chars + __m256i cmpA = _mm256_cmpeq_epi8(oDataA, _mm256_shuffle_epi8( + _mm256_set_epi8( + -1,'=','\r',-1,-1,'\n',-1,-1,-1,-1,-1,-1,-1,-1,-1,'.', + -1,'=','\r',-1,-1,'\n',-1,-1,-1,-1,-1,-1,-1,-1,-1,'.' + ), + _mm256_min_epu8(oDataA, minMask) + )); + __m256i cmpB = _mm256_cmpeq_epi8(oDataB, _mm256_shuffle_epi8( + _mm256_set_epi8( + -1,'=','\r',-1,-1,'\n',-1,-1,-1,-1,-1,-1,-1,-1,-1,'.', + -1,'=','\r',-1,-1,'\n',-1,-1,-1,-1,-1,-1,-1,-1,-1,'.' + ), + _mm256_min_epu8(oDataB, _mm256_set1_epi8('.')) + )); + + // TODO: can OR the vectors together to save generating a mask, but may not be worth it + uint64_t mask = (uint32_t)_mm256_movemask_epi8(cmpB); // not the most accurate mask if we have invalid sequences; we fix this up later + mask = (mask << 32) | (uint32_t)_mm256_movemask_epi8(cmpA); + __m256i dataA, dataB; + if(use_isa >= ISA_LEVEL_AVX3) + dataA = _mm256_add_epi8(oDataA, yencOffset); + + if (mask != 0) { + __m256i cmpEqA = _mm256_cmpeq_epi8(oDataA, _mm256_set1_epi8('=')); + __m256i cmpEqB = _mm256_cmpeq_epi8(oDataB, _mm256_set1_epi8('=')); + uint64_t maskEq = (uint32_t)_mm256_movemask_epi8(cmpEqB); + maskEq = (maskEq << 32) | (uint32_t)_mm256_movemask_epi8(cmpEqA); + + // handle \r\n. sequences + // RFC3977 requires the first dot on a line to be stripped, due to dot-stuffing + if((isRaw || searchEnd) && LIKELIHOOD(0.45, mask != maskEq)) { +#if 0 + // prefer shuffling data over unaligned loads on Zen (unknown if worth it on Zen2/Excavator) + // unfortunately not beneficial, probably due to available register pressure; this is left here because it could be beneficial if we figure out how to use fewer registers + __m256i nextDataA, nextDataB; + if(searchEnd) { + nextDataA = _mm256_inserti128_si256( + _mm256_castsi128_si256(_mm256_extracti128_si256(oDataA, 1)), + _mm256_castsi256_si128(oDataB), + 1 + ); + nextDataB = _mm256_inserti128_si256( + _mm256_castsi128_si256(_mm256_extracti128_si256(oDataB, 1)), + _mm_load_si128((__m128i*)(src+i+sizeof(__m256i)*2)), + 1 + ); + } +# define SHIFT_DATA_A(offs) (searchEnd ? _mm256_alignr_epi8(nextDataA, oDataA, offs) : _mm256_loadu_si256((__m256i *)(src+i+offs))) +# define SHIFT_DATA_B(offs) (searchEnd ? _mm256_alignr_epi8(nextDataB, oDataB, offs) : _mm256_loadu_si256((__m256i *)(src+i+offs) + 1)) +#else +# define SHIFT_DATA_A(offs) _mm256_loadu_si256((__m256i *)(src+i+offs)) +# define SHIFT_DATA_B(offs) _mm256_loadu_si256((__m256i *)(src+i+offs) + 1) +#endif + __m256i tmpData2A = SHIFT_DATA_A(2); + __m256i tmpData2B = SHIFT_DATA_B(2); + __m256i match2EqA, match2EqB; +#if defined(__AVX512VL__) && defined(__AVX512BW__) + __mmask32 match2EqMaskA, match2EqMaskB; + __mmask32 match0CrMaskA, match0CrMaskB; + __mmask32 match2CrXDtMaskA, match2CrXDtMaskB; + if(useAVX3MaskCmp && searchEnd) { + match2EqMaskA = _mm256_cmpeq_epi8_mask(_mm256_set1_epi8('='), tmpData2A); + match2EqMaskB = _mm256_cmpeq_epi8_mask(_mm256_set1_epi8('='), tmpData2B); + } else +#endif + if(searchEnd) { + match2EqA = _mm256_cmpeq_epi8(_mm256_set1_epi8('='), tmpData2A); + match2EqB = _mm256_cmpeq_epi8(_mm256_set1_epi8('='), tmpData2B); + } + + int partialKillDotFound; + __m256i match2CrXDtA, match2CrXDtB; + if(isRaw) { + // find patterns of \r_. + +#if defined(__AVX512VL__) && defined(__AVX512BW__) + if(useAVX3MaskCmp) { + match0CrMaskA = _mm256_cmpeq_epi8_mask(oDataA, _mm256_set1_epi8('\r')); + match0CrMaskB = _mm256_cmpeq_epi8_mask(oDataB, _mm256_set1_epi8('\r')); + match2CrXDtMaskA = _mm256_mask_cmpeq_epi8_mask(match0CrMaskA, tmpData2A, _mm256_set1_epi8('.')); + match2CrXDtMaskB = _mm256_mask_cmpeq_epi8_mask(match0CrMaskB, tmpData2B, _mm256_set1_epi8('.')); + partialKillDotFound = KORTEST32(match2CrXDtMaskA, match2CrXDtMaskB); + } else +#endif + { + match2CrXDtA = _mm256_and_si256( + _mm256_cmpeq_epi8(oDataA, _mm256_set1_epi8('\r')), + _mm256_cmpeq_epi8(tmpData2A, _mm256_set1_epi8('.')) + ); + match2CrXDtB = _mm256_and_si256( + _mm256_cmpeq_epi8(oDataB, _mm256_set1_epi8('\r')), + _mm256_cmpeq_epi8(tmpData2B, _mm256_set1_epi8('.')) + ); + partialKillDotFound = _mm256_movemask_epi8(_mm256_or_si256( + match2CrXDtA, match2CrXDtB + )); + } + } + + if(isRaw && LIKELIHOOD(0.002, partialKillDotFound)) { + // merge matches for \r\n. + __m256i match2NlDotA, match1NlA; + __m256i match2NlDotB, match1NlB; +#if defined(__AVX512VL__) && defined(__AVX512BW__) + __mmask32 match1NlMaskA, match1NlMaskB; + __mmask32 match2NlDotMaskA, match2NlDotMaskB; + if(useAVX3MaskCmp) { + match1NlMaskA = _mm256_mask_cmpeq_epi8_mask( + match0CrMaskA, + _mm256_set1_epi8('\n'), + SHIFT_DATA_A(1) + ); + match1NlMaskB = _mm256_mask_cmpeq_epi8_mask( + match0CrMaskB, + _mm256_set1_epi8('\n'), + SHIFT_DATA_B(1) + ); + match2NlDotMaskA = KAND32(match2CrXDtMaskA, match1NlMaskA); + match2NlDotMaskB = KAND32(match2CrXDtMaskB, match1NlMaskB); + } else +#endif + { + __m256i match1LfA = _mm256_cmpeq_epi8( + _mm256_set1_epi8('\n'), + SHIFT_DATA_A(1) + ); + __m256i match1LfB = _mm256_cmpeq_epi8( + _mm256_set1_epi8('\n'), + SHIFT_DATA_B(1) + ); + // force re-computing these to avoid register spills elsewhere + match1NlA = _mm256_and_si256(match1LfA, _mm256_cmpeq_epi8(force_align_read_256(src+i), _mm256_set1_epi8('\r'))); + match1NlB = _mm256_and_si256(match1LfB, _mm256_cmpeq_epi8(force_align_read_256(src+i + sizeof(__m256i)), _mm256_set1_epi8('\r'))); + match2NlDotA = _mm256_and_si256(match2CrXDtA, match1NlA); + match2NlDotB = _mm256_and_si256(match2CrXDtB, match1NlB); + } + if(searchEnd) { + __m256i tmpData4A; +#if defined(__AVX512VL__) && defined(PLATFORM_AMD64) + if(use_isa >= ISA_LEVEL_AVX3) + // AVX512 with 32 registers shouldn't have any issue with holding onto oData* in registers + tmpData4A = _mm256_alignr_epi32(oDataB, oDataA, 1); + else +#endif + tmpData4A = SHIFT_DATA_A(4); + __m256i tmpData4B = SHIFT_DATA_B(4); + // match instances of \r\n.\r\n and \r\n.=y + __m256i match3CrA = _mm256_cmpeq_epi8( + _mm256_set1_epi8('\r'), + SHIFT_DATA_A(3) + ); + __m256i match3CrB = _mm256_cmpeq_epi8( + _mm256_set1_epi8('\r'), + SHIFT_DATA_B(3) + ); + __m256i match4LfA = _mm256_cmpeq_epi8(tmpData4A, _mm256_set1_epi8('\n')); + __m256i match4LfB = _mm256_cmpeq_epi8(tmpData4B, _mm256_set1_epi8('\n')); + __m256i match4EqYA = _mm256_cmpeq_epi16(tmpData4A, _mm256_set1_epi16(0x793d)); // =y + __m256i match4EqYB = _mm256_cmpeq_epi16(tmpData4B, _mm256_set1_epi16(0x793d)); // =y + + int matchEnd; +#if defined(__AVX512VL__) && defined(__AVX512BW__) + if(useAVX3MaskCmp) { + __mmask32 match3EqYMaskA = _mm256_mask_cmpeq_epi8_mask( + match2EqMaskA, + _mm256_set1_epi8('y'), + SHIFT_DATA_A(3) + ); + __mmask32 match3EqYMaskB = _mm256_mask_cmpeq_epi8_mask( + match2EqMaskB, + _mm256_set1_epi8('y'), + SHIFT_DATA_B(3) + ); + __m256i match34EqYA, match34EqYB; +# ifdef __AVX512VBMI2__ + if(use_isa >= ISA_LEVEL_VBMI2) { + match34EqYA = _mm256_shrdi_epi16(_mm256_movm_epi8(match3EqYMaskA), match4EqYA, 8); + match34EqYB = _mm256_shrdi_epi16(_mm256_movm_epi8(match3EqYMaskB), match4EqYB, 8); + } else +# endif + { + // (match4EqY & 0xff00) | (match3EqY >> 8) + match34EqYA = _mm256_mask_blend_epi8(match3EqYMaskA>>1, _mm256_and_si256(match4EqYA, _mm256_set1_epi16(-0xff)), _mm256_set1_epi8(-1)); + match34EqYB = _mm256_mask_blend_epi8(match3EqYMaskB>>1, _mm256_and_si256(match4EqYB, _mm256_set1_epi16(-0xff)), _mm256_set1_epi8(-1)); + } + // merge \r\n and =y matches for tmpData4 + __m256i match4EndA = _mm256_ternarylogic_epi32(match34EqYA, match3CrA, match4LfA, 0xF8); // (match3Cr & match4Lf) | match34EqY + __m256i match4EndB = _mm256_ternarylogic_epi32(match34EqYB, match3CrB, match4LfB, 0xF8); + // merge with \r\n. and combine + matchEnd = KORTEST32( + KOR32( + _mm256_mask_test_epi8_mask(match2NlDotMaskA, match4EndA, match4EndA), + KAND32(match3EqYMaskA, match1NlMaskA) + ), + KOR32( + _mm256_mask_test_epi8_mask(match2NlDotMaskB, match4EndB, match4EndB), + KAND32(match3EqYMaskB, match1NlMaskB) + ) + ); + } else +#endif + { + __m256i match3EqYA = _mm256_and_si256(match2EqA, _mm256_cmpeq_epi8( + _mm256_set1_epi8('y'), + SHIFT_DATA_A(3) + )); + __m256i match3EqYB = _mm256_and_si256(match2EqB, _mm256_cmpeq_epi8( + _mm256_set1_epi8('y'), + SHIFT_DATA_B(3) + )); + match4EqYA = _mm256_slli_epi16(match4EqYA, 8); // TODO: also consider using PBLENDVB here with shifted match3EqY instead + match4EqYB = _mm256_slli_epi16(match4EqYB, 8); + // merge \r\n and =y matches for tmpData4 + __m256i match4EndA = _mm256_or_si256( + _mm256_and_si256(match3CrA, match4LfA), + _mm256_or_si256(match4EqYA, _mm256_srli_epi16(match3EqYA, 8)) // _mm256_srli_si256 by 1 also works + ); + __m256i match4EndB = _mm256_or_si256( + _mm256_and_si256(match3CrB, match4LfB), + _mm256_or_si256(match4EqYB, _mm256_srli_epi16(match3EqYB, 8)) + ); + // merge with \r\n. + match4EndA = _mm256_and_si256(match4EndA, match2NlDotA); + match4EndB = _mm256_and_si256(match4EndB, match2NlDotB); + // match \r\n=y + __m256i match3EndA = _mm256_and_si256(match3EqYA, match1NlA); + __m256i match3EndB = _mm256_and_si256(match3EqYB, match1NlB); + // combine match sequences + matchEnd = _mm256_movemask_epi8(_mm256_or_si256( + _mm256_or_si256(match4EndA, match3EndA), + _mm256_or_si256(match4EndB, match3EndB) + )); + } + if(LIKELIHOOD(0.002, matchEnd)) { + // terminator found + // there's probably faster ways to do this, but reverting to scalar code should be good enough + len += (long)i; + break; + } + } +#if defined(__AVX512VL__) && defined(__AVX512BW__) + if(useAVX3MaskCmp) { + mask |= (uint64_t)match2NlDotMaskA << 2; + mask |= (uint64_t)match2NlDotMaskB << 34; + minMask = _mm256_maskz_mov_epi8(~(match2NlDotMaskB>>30), _mm256_set1_epi8('.')); + } else +#endif + { + mask |= (uint64_t)((uint32_t)_mm256_movemask_epi8(match2NlDotA)) << 2; + mask |= (uint64_t)((uint32_t)_mm256_movemask_epi8(match2NlDotB)) << 34; + match2NlDotB = zext128_256(_mm_srli_si128(_mm256_extracti128_si256(match2NlDotB, 1), 14)); + minMask = _mm256_subs_epu8(_mm256_set1_epi8('.'), match2NlDotB); + } + } + else if(searchEnd) { + bool partialEndFound; + __m256i match3EqYA, match3EqYB; +#if defined(__AVX512VL__) && defined(__AVX512BW__) + __mmask32 match3EqYMaskA, match3EqYMaskB; + if(useAVX3MaskCmp) { + match3EqYMaskA = _mm256_mask_cmpeq_epi8_mask( + match2EqMaskA, + _mm256_set1_epi8('y'), + SHIFT_DATA_A(3) + ); + match3EqYMaskB = _mm256_mask_cmpeq_epi8_mask( + match2EqMaskB, + _mm256_set1_epi8('y'), + SHIFT_DATA_B(3) + ); + partialEndFound = KORTEST32(match3EqYMaskA, match3EqYMaskB); + } else +#endif + { + __m256i match3YA = _mm256_cmpeq_epi8( + _mm256_set1_epi8('y'), + SHIFT_DATA_A(3) + ); + __m256i match3YB = _mm256_cmpeq_epi8( + _mm256_set1_epi8('y'), + SHIFT_DATA_B(3) + ); + match3EqYA = _mm256_and_si256(match2EqA, match3YA); + match3EqYB = _mm256_and_si256(match2EqB, match3YB); + partialEndFound = _mm256_movemask_epi8(_mm256_or_si256(match3EqYA, match3EqYB)); + } + if(LIKELIHOOD(0.002, partialEndFound)) { + bool endFound; +#if defined(__AVX512VL__) && defined(__AVX512BW__) + if(useAVX3MaskCmp) { + __mmask32 match3LfEqYMaskA = _mm256_mask_cmpeq_epi8_mask( + match3EqYMaskA, + _mm256_set1_epi8('\n'), + SHIFT_DATA_A(1) + ); + __mmask32 match3LfEqYMaskB = _mm256_mask_cmpeq_epi8_mask( + match3EqYMaskB, + _mm256_set1_epi8('\n'), + SHIFT_DATA_B(1) + ); + + endFound = KORTEST32( + _mm256_mask_cmpeq_epi8_mask(match3LfEqYMaskA, oDataA, _mm256_set1_epi8('\r')), + _mm256_mask_cmpeq_epi8_mask(match3LfEqYMaskB, oDataB, _mm256_set1_epi8('\r')) + ); + } else +#endif + { + __m256i match1LfA = _mm256_cmpeq_epi8( + _mm256_set1_epi8('\n'), + SHIFT_DATA_A(1) + ); + __m256i match1LfB = _mm256_cmpeq_epi8( + _mm256_set1_epi8('\n'), + SHIFT_DATA_B(1) + ); + endFound = _mm256_movemask_epi8(_mm256_or_si256( + _mm256_and_si256( + match3EqYA, + _mm256_and_si256(match1LfA, _mm256_cmpeq_epi8(force_align_read_256(src+i), _mm256_set1_epi8('\r'))) + ), + _mm256_and_si256( + match3EqYB, + _mm256_and_si256(match1LfB, _mm256_cmpeq_epi8(force_align_read_256(src+i + sizeof(__m256i)), _mm256_set1_epi8('\r'))) + ) + )); + } + if(endFound) { + len += (long)i; + break; + } + } + if(isRaw) minMask = _mm256_set1_epi8('.'); + } + else if(isRaw) // no \r_. found + minMask = _mm256_set1_epi8('.'); + } +#undef SHIFT_DATA_A +#undef SHIFT_DATA_B + + if(use_isa >= ISA_LEVEL_AVX3) + dataB = _mm256_add_epi8(oDataB, _mm256_set1_epi8(-42)); + + if(LIKELIHOOD(0.0001, (mask & ((maskEq << 1) + escFirst)) != 0)) { + unsigned tmp = lookups->eqFix[(maskEq&0xff) & ~(uint64_t)escFirst]; + uint64_t maskEq2 = tmp; + for(int j=8; j<64; j+=8) { + tmp = lookups->eqFix[(unsigned)((maskEq>>j)&0xff) & ~(tmp>>7)]; + maskEq2 |= (uint64_t)tmp<>7; + // next, eliminate anything following a `=` from the special char mask; this eliminates cases of `=\r` so that they aren't removed + maskEq <<= 1; + mask &= ~maskEq; + + // unescape chars following `=` +#if defined(__AVX512VL__) && defined(__AVX512BW__) + if(use_isa >= ISA_LEVEL_AVX3) { + // GCC < 7 seems to generate rubbish assembly for this + dataA = _mm256_mask_add_epi8( + dataA, + (__mmask32)maskEq, + dataA, + _mm256_set1_epi8(-64) + ); + dataB = _mm256_mask_add_epi8( + dataB, + (__mmask32)(maskEq>>32), + dataB, + _mm256_set1_epi8(-64) + ); + } else +#endif + { + // convert maskEq into vector form (i.e. reverse pmovmskb) +#ifdef PLATFORM_AMD64 + __m256i vMaskEq = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(maskEq)); +#else + __m256i vMaskEq = _mm256_permute4x64_epi64(_mm256_insert_epi32( + _mm256_set_epi32(0,0,0,0, 0,0,0, maskEq & 0xffffffff), + maskEq >> 32, + 1 + ), 0); +#endif + __m256i vMaskEqA = _mm256_shuffle_epi8(vMaskEq, _mm256_set_epi32( + 0x03030303, 0x03030303, 0x02020202, 0x02020202, + 0x01010101, 0x01010101, 0x00000000, 0x00000000 + )); + __m256i vMaskEqB = _mm256_shuffle_epi8(vMaskEq, _mm256_set_epi32( + 0x07070707, 0x07070707, 0x06060606, 0x06060606, + 0x05050505, 0x05050505, 0x04040404, 0x04040404 + )); + vMaskEqA = _mm256_cmpeq_epi8( + _mm256_and_si256(vMaskEqA, _mm256_set1_epi64x(0x8040201008040201ULL)), + _mm256_set1_epi64x(0x8040201008040201ULL) + ); + vMaskEqB = _mm256_cmpeq_epi8( + _mm256_and_si256(vMaskEqB, _mm256_set1_epi64x(0x8040201008040201ULL)), + _mm256_set1_epi64x(0x8040201008040201ULL) + ); + dataA = _mm256_add_epi8(oDataA, _mm256_blendv_epi8(yencOffset, _mm256_set1_epi8(-42-64), vMaskEqA)); + dataB = _mm256_add_epi8(oDataB, _mm256_blendv_epi8(_mm256_set1_epi8(-42), _mm256_set1_epi8(-42-64), vMaskEqB)); + } + } else { + escFirst = (maskEq >> 63); + +#if defined(__AVX512VL__) && defined(__AVX512BW__) + if(use_isa >= ISA_LEVEL_AVX3) { + dataA = _mm256_mask_add_epi8( + dataA, + (__mmask32)(maskEq << 1), + dataA, + _mm256_set1_epi8(-64) + ); + dataB = _mm256_mask_add_epi8( + dataB, + (__mmask32)(maskEq >> 31), + dataB, + _mm256_set1_epi8(-64) + ); + } else +#endif + { + // << 1 byte + cmpEqA = _mm256_alignr_epi8(cmpEqA, _mm256_inserti128_si256( + _mm256_set1_epi8('='), _mm256_castsi256_si128(cmpEqA), 1 + ), 15); + cmpEqB = _mm256_cmpeq_epi8(_mm256_set1_epi8('='), _mm256_loadu_si256((__m256i *)(src+i-1) + 1)); + dataA = _mm256_add_epi8( + oDataA, + _mm256_blendv_epi8( + yencOffset, + _mm256_set1_epi8(-42-64), + cmpEqA + ) + ); + dataB = _mm256_add_epi8( + oDataB, + _mm256_blendv_epi8( + _mm256_set1_epi8(-42), + _mm256_set1_epi8(-42-64), + cmpEqB + ) + ); + } + } + // subtract 64 from first element if escFirst == 1 +#if defined(__AVX512VL__) && defined(__AVX512BW__) + if(use_isa >= ISA_LEVEL_AVX3) { + yencOffset = _mm256_mask_add_epi8(_mm256_set1_epi8(-42), (__mmask32)escFirst, _mm256_set1_epi8(-42), _mm256_set1_epi8(-64)); + } else +#endif + { + yencOffset = _mm256_xor_si256(_mm256_set1_epi8(-42), zext128_256( + _mm_slli_epi16(_mm_cvtsi32_si128((int)escFirst), 6) + )); + } + + // all that's left is to 'compress' the data (skip over masked chars) +#if defined(__AVX512VBMI2__) && defined(__AVX512VL__) + if(use_isa >= ISA_LEVEL_VBMI2) { + COMPRESS_STORE(p, KNOT32(mask), dataA); + p -= popcnt32(mask & 0xffffffff); + COMPRESS_STORE((p + XMM_SIZE*2), KNOT32(mask>>32), dataB); + p += XMM_SIZE*4 - popcnt32(mask >> 32); + } else +#endif + { + // lookup compress masks and shuffle + __m256i shuf = _mm256_inserti128_si256( + _mm256_castsi128_si256(_mm_load_si128((__m128i*)(lookups->compact + (mask & 0x7fff)))), + *(__m128i*)((char*)lookups->compact + ((mask >> 12) & 0x7fff0)), + 1 + ); + dataA = _mm256_shuffle_epi8(dataA, shuf); + + _mm_storeu_si128((__m128i*)p, _mm256_castsi256_si128(dataA)); + // increment output position + p -= popcnt32(mask & 0xffff); + + _mm_storeu_si128((__m128i*)(p + XMM_SIZE), _mm256_extracti128_si256(dataA, 1)); + p -= popcnt32(mask & 0xffff0000); + +#ifdef PLATFORM_AMD64 + mask >>= 28; + shuf = _mm256_inserti128_si256( + _mm256_castsi128_si256(_mm_load_si128((__m128i*)((char*)lookups->compact + (mask & 0x7fff0)))), + *(__m128i*)((char*)lookups->compact + ((mask >> 16) & 0x7fff0)), + 1 + ); + dataB = _mm256_shuffle_epi8(dataB, shuf); + + _mm_storeu_si128((__m128i*)(p + XMM_SIZE*2), _mm256_castsi256_si128(dataB)); + p -= popcnt32(mask & 0xffff0); + + _mm_storeu_si128((__m128i*)(p + XMM_SIZE*3), _mm256_extracti128_si256(dataB, 1)); + p -= popcnt32((unsigned int)(mask >> 20)); +#else + mask >>= 32; + shuf = _mm256_inserti128_si256( + _mm256_castsi128_si256(_mm_load_si128((__m128i*)(lookups->compact + (mask & 0x7fff)))), + *(__m128i*)((char*)lookups->compact + ((mask >> 12) & 0x7fff0)), + 1 + ); + dataB = _mm256_shuffle_epi8(dataB, shuf); + + _mm_storeu_si128((__m128i*)(p + XMM_SIZE*2), _mm256_castsi256_si128(dataB)); + p -= popcnt32(mask & 0xffff); + + _mm_storeu_si128((__m128i*)(p + XMM_SIZE*3), _mm256_extracti128_si256(dataB, 1)); + p -= popcnt32(mask & 0xffff0000); +#endif + p += XMM_SIZE*4; + } + } else { + if(use_isa < ISA_LEVEL_AVX3) + dataA = _mm256_add_epi8(oDataA, yencOffset); + dataB = _mm256_add_epi8(oDataB, _mm256_set1_epi8(-42)); + + _mm256_storeu_si256((__m256i*)p, dataA); + _mm256_storeu_si256((__m256i*)p + 1, dataB); + p += sizeof(__m256i)*2; + escFirst = 0; + yencOffset = _mm256_set1_epi8(-42); + } + } + _escFirst = (unsigned char)escFirst; + if(isRaw) { + // this would be the trivial solution, but requires the compiler holding onto minMask throughout the loop: + //_nextMask = ~(uint16_t)_mm256_movemask_epi8(_mm256_cmpeq_epi8(minMask, _mm256_set1_epi8('.'))); + // instead, just scan the memory to determine what to set nextMask to + if(len != 0) { // have to gone through at least one loop cycle + if(src[i-2] == '\r' && src[i-1] == '\n' && src[i] == '.') + _nextMask = 1; + else if(src[i-1] == '\r' && src[i] == '\n' && src[i+1] == '.') + _nextMask = 2; + else + _nextMask = 0; + } + } else + _nextMask = 0; + _mm256_zeroupper(); +} +#endif diff --git a/rapidyenc/src/decoder_common.h b/rapidyenc/src/decoder_common.h new file mode 100644 index 0000000..92ee3e1 --- /dev/null +++ b/rapidyenc/src/decoder_common.h @@ -0,0 +1,512 @@ +#include "decoder.h" + +// TODO: need to support max output length somehow +// TODO: add branch probabilities + + +// state var: refers to the previous state - only used for incremental processing +template +size_t do_decode_noend_scalar(const unsigned char* src, unsigned char* dest, size_t len, YencDecoderState* state) { + const unsigned char *es = src + len; // end source pointer + unsigned char *p = dest; // destination pointer + long i = -(long)len; // input position + unsigned char c; // input character + + if(len < 1) return 0; + + if(isRaw) { + + if(state) switch(*state) { + case YDEC_STATE_EQ: + c = es[i]; + *p++ = c - 42 - 64; + i++; + if(c == '\r') { + *state = YDEC_STATE_CR; + if(i >= 0) return 0; + } else { + *state = YDEC_STATE_NONE; + break; + } + // fall-thru + case YDEC_STATE_CR: + if(es[i] != '\n') break; + i++; + *state = YDEC_STATE_CRLF; + if(i >= 0) return 0; + // Else fall-thru + case YDEC_STATE_CRLF: + // skip past first dot + if(es[i] == '.') i++; + // fall-thru + default: break; // silence compiler warnings + } else // treat as YDEC_STATE_CRLF + if(es[i] == '.') i++; + + for(; i < -2; i++) { + c = es[i]; + switch(c) { + case '\r': + // skip past \r\n. sequences + //i += (es[i+1] == '\n' && es[i+2] == '.') << 1; + if(es[i+1] == '\n' && es[i+2] == '.') + i += 2; + // fall-thru + case '\n': + continue; + case '=': + c = es[i+1]; + *p++ = c - 42 - 64; + i += (c != '\r'); // if we have a \r, reprocess character to deal with \r\n. case + continue; + default: + *p++ = c - 42; + } + } + if(state) *state = YDEC_STATE_NONE; + + if(i == -2) { // 2nd last char + c = es[i]; + switch(c) { + case '\r': + if(state && es[i+1] == '\n') { + *state = YDEC_STATE_CRLF; + return p - dest; + } + // Else fall-thru + case '\n': + break; + case '=': + c = es[i+1]; + *p++ = c - 42 - 64; + i += (c != '\r'); + break; + default: + *p++ = c - 42; + } + i++; + } + + // do final char; we process this separately to prevent an overflow if the final char is '=' + if(i == -1) { + c = es[i]; + if(c != '\n' && c != '\r' && c != '=') { + *p++ = c - 42; + } else if(state) { + if(c == '=') *state = YDEC_STATE_EQ; + else if(c == '\r') *state = YDEC_STATE_CR; + else *state = YDEC_STATE_NONE; + } + } + + } else { + + if(state && *state == YDEC_STATE_EQ) { + *p++ = es[i] - 42 - 64; + i++; + *state = YDEC_STATE_NONE; + } + + /*for(i = 0; i < len - 1; i++) { + c = src[i]; + if(c == '\n' || c == '\r') continue; + unsigned char isEquals = (c == '='); + i += isEquals; + *p++ = src[i] - (42 + (isEquals << 6)); + }*/ + for(; i < -1; i++) { + c = es[i]; + switch(c) { + case '\n': case '\r': continue; + case '=': + i++; + c = es[i] - 64; + } + *p++ = c - 42; + } + if(state) *state = YDEC_STATE_NONE; + // do final char; we process this separately to prevent an overflow if the final char is '=' + if(i == -1) { + c = es[i]; + if(c != '\n' && c != '\r' && c != '=') { + *p++ = c - 42; + } else + if(state) *state = (c == '=' ? YDEC_STATE_EQ : YDEC_STATE_NONE); + } + + } + + return p - dest; +} + +template +YencDecoderEnd do_decode_end_scalar(const unsigned char** src, unsigned char** dest, size_t len, YencDecoderState* state) { + const unsigned char *es = (*src) + len; // end source pointer + unsigned char *p = *dest; // destination pointer + long i = -(long)len; // input position + unsigned char c; // input character + + if(len < 1) return YDEC_END_NONE; + +#define YDEC_CHECK_END(s) if(i == 0) { \ + *state = s; \ + *src = es; \ + *dest = p; \ + return YDEC_END_NONE; \ +} + if(state) switch(*state) { + case YDEC_STATE_CRLFEQ: do_decode_endable_scalar_ceq: + if(es[i] == 'y') { + *state = YDEC_STATE_NONE; + *src = es+i+1; + *dest = p; + return YDEC_END_CONTROL; + } // Else fall-thru + case YDEC_STATE_EQ: + c = es[i]; + *p++ = c - 42 - 64; + i++; + if(c != '\r') break; + YDEC_CHECK_END(YDEC_STATE_CR) + // fall-through + case YDEC_STATE_CR: + if(es[i] != '\n') break; + i++; + YDEC_CHECK_END(YDEC_STATE_CRLF) + // fall-through + case YDEC_STATE_CRLF: do_decode_endable_scalar_c0: + if(es[i] == '.' && isRaw) { + i++; + YDEC_CHECK_END(YDEC_STATE_CRLFDT) + // fall-through + } else if(es[i] == '=') { + i++; + YDEC_CHECK_END(YDEC_STATE_CRLFEQ) + goto do_decode_endable_scalar_ceq; + } else + break; + case YDEC_STATE_CRLFDT: + if(isRaw && es[i] == '\r') { + i++; + YDEC_CHECK_END(YDEC_STATE_CRLFDTCR) + // fall-through + } else if(isRaw && es[i] == '=') { // check for dot-stuffed ending: \r\n.=y + i++; + YDEC_CHECK_END(YDEC_STATE_CRLFEQ) + goto do_decode_endable_scalar_ceq; + } else + break; + case YDEC_STATE_CRLFDTCR: + if(es[i] == '\n') { + if(isRaw) { + *state = YDEC_STATE_CRLF; + *src = es + i + 1; + *dest = p; + return YDEC_END_ARTICLE; + } else { + i++; + YDEC_CHECK_END(YDEC_STATE_CRLF) + goto do_decode_endable_scalar_c0; // handle as CRLF + } + } else + break; + case YDEC_STATE_NONE: break; // silence compiler warning + } else // treat as YDEC_STATE_CRLF + goto do_decode_endable_scalar_c0; + + for(; i < -2; i++) { + c = es[i]; + switch(c) { + case '\r': if(es[i+1] == '\n') { + if(isRaw && es[i+2] == '.') { + // skip past \r\n. sequences + i += 3; + YDEC_CHECK_END(YDEC_STATE_CRLFDT) + // check for end + if(es[i] == '\r') { + i++; + YDEC_CHECK_END(YDEC_STATE_CRLFDTCR) + if(es[i] == '\n') { + *src = es + i + 1; + *dest = p; + *state = YDEC_STATE_CRLF; + return YDEC_END_ARTICLE; + } else i--; + } else if(es[i] == '=') { + i++; + YDEC_CHECK_END(YDEC_STATE_CRLFEQ) + if(es[i] == 'y') { + *src = es + i + 1; + *dest = p; + *state = YDEC_STATE_NONE; + return YDEC_END_CONTROL; + } else { + // escape char & continue + c = es[i]; + *p++ = c - 42 - 64; + i -= (c == '\r'); + } + } else i--; + } + else if(es[i+2] == '=') { + i += 3; + YDEC_CHECK_END(YDEC_STATE_CRLFEQ) + if(es[i] == 'y') { + // ended + *src = es + i + 1; + *dest = p; + *state = YDEC_STATE_NONE; + return YDEC_END_CONTROL; + } else { + // escape char & continue + c = es[i]; + *p++ = c - 42 - 64; + i -= (c == '\r'); + } + } + } // fall-thru + case '\n': + continue; + case '=': + c = es[i+1]; + *p++ = c - 42 - 64; + i += (c != '\r'); // if we have a \r, reprocess character to deal with \r\n. case + continue; + default: + *p++ = c - 42; + } + } + if(state) *state = YDEC_STATE_NONE; + + if(i == -2) { // 2nd last char + c = es[i]; + switch(c) { + case '\r': + if(state && es[i+1] == '\n') { + *state = YDEC_STATE_CRLF; + *src = es; + *dest = p; + return YDEC_END_NONE; + } + // Else fall-thru + case '\n': + break; + case '=': + c = es[i+1]; + *p++ = c - 42 - 64; + i += (c != '\r'); + break; + default: + *p++ = c - 42; + } + i++; + } + + // do final char; we process this separately to prevent an overflow if the final char is '=' + if(i == -1) { + c = es[i]; + if(c != '\n' && c != '\r' && c != '=') { + *p++ = c - 42; + } else if(state) { + if(c == '=') *state = YDEC_STATE_EQ; + else if(c == '\r') *state = YDEC_STATE_CR; + else *state = YDEC_STATE_NONE; + } + } +#undef YDEC_CHECK_END + + *src = es; + *dest = p; + return YDEC_END_NONE; +} + +template +YencDecoderEnd do_decode_scalar(const unsigned char** src, unsigned char** dest, size_t len, YencDecoderState* state) { + if(searchEnd) + return do_decode_end_scalar(src, dest, len, state); + *dest += do_decode_noend_scalar(*src, *dest, len, state); + *src += len; + return YDEC_END_NONE; +} + + + +template +YencDecoderEnd do_decode_simd(const unsigned char** src, unsigned char** dest, size_t len, YencDecoderState* state) { + if(len <= width*2) return do_decode_scalar(src, dest, len, state); + + YencDecoderState tState = YDEC_STATE_CRLF; + YencDecoderState* pState = state ? state : &tState; + if((uintptr_t)(*src) & ((width-1))) { + // find source memory alignment + unsigned char* aSrc = (unsigned char*)(((uintptr_t)(*src) + (width-1)) & ~(width-1)); + int amount = (int)(aSrc - *src); + len -= amount; + YencDecoderEnd ended = do_decode_scalar(src, dest, amount, pState); + if(ended) return ended; + } + + size_t lenBuffer = width -1; + if(searchEnd) lenBuffer += 3 + (isRaw?1:0); + else if(isRaw) lenBuffer += 2; + + if(len > lenBuffer) { + unsigned char *p = *dest; // destination pointer + unsigned char escFirst = 0; // input character; first char needs escaping + uint16_t nextMask = 0; + // handle finicky case of special sequences straddled across initial boundary + switch(*pState) { + case YDEC_STATE_CRLF: + if(isRaw && **src == '.') { + nextMask = 1; + if(searchEnd && *(uint16_t*)(*src +1) == UINT16_PACK('\r','\n')) { + (*src) += 3; + *pState = YDEC_STATE_CRLF; + return YDEC_END_ARTICLE; + } + if(searchEnd && *(uint16_t*)(*src +1) == UINT16_PACK('=','y')) { + (*src) += 3; + *pState = YDEC_STATE_NONE; + return YDEC_END_CONTROL; + } + } + else if(searchEnd && *(uint16_t*)(*src) == UINT16_PACK('=','y')) { + (*src) += 2; + *pState = YDEC_STATE_NONE; + return YDEC_END_CONTROL; + } + break; + case YDEC_STATE_CR: + if(isRaw && *(uint16_t*)(*src) == UINT16_PACK('\n','.')) { + nextMask = 2; + if(searchEnd && *(uint16_t*)(*src +2) == UINT16_PACK('\r','\n')) { + (*src) += 4; + *pState = YDEC_STATE_CRLF; + return YDEC_END_ARTICLE; + } + if(searchEnd && *(uint16_t*)(*src +2) == UINT16_PACK('=','y')) { + (*src) += 4; + *pState = YDEC_STATE_NONE; + return YDEC_END_CONTROL; + } + } + else if(searchEnd && (*(uint32_t*)(*src) & 0xffffff) == UINT32_PACK('\n','=','y',0)) { + (*src) += 3; + *pState = YDEC_STATE_NONE; + return YDEC_END_CONTROL; + } + break; + case YDEC_STATE_CRLFDT: + if(searchEnd && isRaw && *(uint16_t*)(*src) == UINT16_PACK('\r','\n')) { + (*src) += 2; + *pState = YDEC_STATE_CRLF; + return YDEC_END_ARTICLE; + } + if(searchEnd && isRaw && *(uint16_t*)(*src) == UINT16_PACK('=','y')) { + (*src) += 2; + *pState = YDEC_STATE_NONE; + return YDEC_END_CONTROL; + } + break; + case YDEC_STATE_CRLFDTCR: + if(searchEnd && isRaw && **src == '\n') { + (*src) += 1; + *pState = YDEC_STATE_CRLF; + return YDEC_END_ARTICLE; + } + break; + case YDEC_STATE_CRLFEQ: + if(searchEnd && **src == 'y') { + (*src) += 1; + *pState = YDEC_STATE_NONE; + return YDEC_END_CONTROL; + } + break; + default: break; // silence compiler warning + } + escFirst = (*pState == YDEC_STATE_EQ || *pState == YDEC_STATE_CRLFEQ); + + // our algorithm may perform an aligned load on the next part, of which we consider 2 bytes (for \r\n. sequence checking) + long dLen = (long)(len - lenBuffer); + dLen = (dLen + (width-1)) & ~(width-1); + + kernel((const uint8_t*)(*src) + dLen, dLen, p, escFirst, nextMask); + + if(escFirst) *pState = YDEC_STATE_EQ; // escape next character + else if(nextMask == 1) *pState = YDEC_STATE_CRLF; // next character is '.', where previous two were \r\n + else if(nextMask == 2) *pState = YDEC_STATE_CR; // next characters are '\n.', previous is \r + else *pState = YDEC_STATE_NONE; + + *src += dLen; + len -= dLen; + *dest = p; + } + + // end alignment + if(len) + return do_decode_scalar(src, dest, len, pState); + /** for debugging: ensure that the SIMD routine doesn't exit early + if(len && !searchEnd) { + const uint8_t* s = *src; + unsigned char* p = *dest; + int ended = do_decode_scalar(src, dest, len, pState); + if(*src - s > width*2) { + // this shouldn't happen, corrupt some data to fail the test + while(p < *dest) + *p++ = 0; + } + return ended; + } + */ + return YDEC_END_NONE; +} + +static inline void decoder_init_lut(uint8_t* eqFixLUT, void* compactLUT) { + for(int i=0; i<256; i++) { + int k = i; + int p = 0; + + // fix LUT + k = i; + p = 0; + for(int j=0; j<8; j++) { + k = i >> j; + if(k & 1) { + p |= 1 << j; + j++; + } + } + eqFixLUT[i] = p; + + #ifdef YENC_DEC_USE_THINTABLE + uint8_t* res = (uint8_t*)compactLUT + i*8; + k = i; + p = 0; + for(int j=0; j<8; j++) { + if(!(k & 1)) { + res[p++] = j; + } + k >>= 1; + } + for(; p<8; p++) + res[p] = 0x80; + #endif + } + #ifndef YENC_DEC_USE_THINTABLE + for(int i=0; i<32768; i++) { + int k = i; + uint8_t* res = (uint8_t*)compactLUT + i*16; + int p = 0; + + for(int j=0; j<16; j++) { + if(!(k & 1)) { + res[p++] = j; + } + k >>= 1; + } + for(; p<16; p++) + res[p] = 0x80; + } + #endif +} + diff --git a/rapidyenc/src/decoder_neon.cc b/rapidyenc/src/decoder_neon.cc new file mode 100644 index 0000000..1588a5e --- /dev/null +++ b/rapidyenc/src/decoder_neon.cc @@ -0,0 +1,475 @@ +#include "common.h" +#ifdef __ARM_NEON + +#ifndef __aarch64__ +#define YENC_DEC_USE_THINTABLE 1 +#endif +#include "decoder_common.h" + + +#if defined(_MSC_VER) && !defined(__clang__) +# define vld1_u8_align(p, a) vld1_u8_ex(p, a*8) +# define vld1q_u8_align(p, a) vld1q_u8_ex(p, a*8) +#elif defined(__GNUC__) +# define vld1_u8_align(p, n) vld1_u8((uint8_t*)__builtin_assume_aligned(p, n)) +# define vld1q_u8_align(p, n) vld1q_u8((uint8_t*)__builtin_assume_aligned(p, n)) +#else +# define vld1_u8_align(p, n) vld1_u8(p) +# define vld1q_u8_align(p, n) vld1q_u8(p) +#endif + + +// for compilers that lack these functions (Clang armv7 9-12 seems to have issues with multi-vector loads) +#if (defined(__clang__) && (defined(__aarch64__) || __clang_major__<9 || __clang_major__>12)) || (defined(__GNUC__) && (defined(__aarch64__) && __GNUC__ >= 8)) +# define vld1q_u8_x2_align(p, n) vld1q_u8_x2((uint8_t*)__builtin_assume_aligned(p, n)) +#else +# define vld1q_u8_x2_align(p, n) vcreate2_u8(vld1q_u8_align(p, (n)/2), vld1q_u8_align((p)+16, (n)/2)) +#endif +// Clang wrongly assumes alignment on vld1q_u8_x2, and ARMv7 GCC doesn't support the function, so effectively, it can only be used in ARMv8 compilers +#if defined(__aarch64__) && (defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(8,5,0)) +# define vst1q_u8_x2_unaligned vst1q_u8_x2 +#else +static HEDLEY_ALWAYS_INLINE void vst1q_u8_x2_unaligned(uint8_t* p, uint8x16x2_t data) { + vst1q_u8(p, data.val[0]); + vst1q_u8(p+16, data.val[1]); +} +#endif + +#ifdef YENC_DEC_USE_THINTABLE +static uint64_t ALIGN_TO(8, compactLUT[256]); +#else +# pragma pack(16) +static struct { char bytes[16]; } ALIGN_TO(16, compactLUT[32768]); +# pragma pack() +#endif + +static uint8_t eqFixLUT[256]; + + + +static bool neon_vect_is_nonzero(uint8x16_t v) { +# ifdef __aarch64__ + return !!(vget_lane_u64(vreinterpret_u64_u32(vqmovn_u64(vreinterpretq_u64_u8(v))), 0)); +# else + uint32x4_t tmp1 = vreinterpretq_u32_u8(v); + uint32x2_t tmp2 = vorr_u32(vget_low_u32(tmp1), vget_high_u32(tmp1)); + return !!(vget_lane_u32(vpmax_u32(tmp2, tmp2), 0)); +# endif +} + + +template +HEDLEY_ALWAYS_INLINE void do_decode_neon(const uint8_t* src, long& len, unsigned char*& p, unsigned char& escFirst, uint16_t& nextMask) { + HEDLEY_ASSUME(escFirst == 0 || escFirst == 1); + HEDLEY_ASSUME(nextMask == 0 || nextMask == 1 || nextMask == 2); + uint8x16_t yencOffset = escFirst ? vmakeq_u8(42+64,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42) : vdupq_n_u8(42); +#ifdef __aarch64__ + uint8x16_t nextMaskMix = vdupq_n_u8(0); + if(nextMask == 1) + nextMaskMix = vsetq_lane_u8(1, nextMaskMix, 0); + if(nextMask == 2) + nextMaskMix = vsetq_lane_u8(2, nextMaskMix, 1); +#else + uint8x16_t lfCompare = vdupq_n_u8('\n'); + if(isRaw) { + if(nextMask == 1) + lfCompare = vsetq_lane_u8('.', lfCompare, 0); + if(nextMask == 2) + lfCompare = vsetq_lane_u8('.', lfCompare, 1); + } +#endif + long i; + for(i = -len; i; i += sizeof(uint8x16_t)*2) { + uint8x16x2_t data = vld1q_u8_x2_align(src+i, 32); + uint8x16_t dataA = data.val[0]; + uint8x16_t dataB = data.val[1]; + + // search for special chars + uint8x16_t cmpEqA = vceqq_u8(dataA, vdupq_n_u8('=')), + cmpEqB = vceqq_u8(dataB, vdupq_n_u8('=')), +#ifdef __aarch64__ + cmpA = vqtbx1q_u8( + cmpEqA, + // \n \r + vmakeq_u8(0,0,0,0,0,0,0,0,0,0,255,0,0,255,0,0), + dataA + ), + cmpB = vqtbx1q_u8( + cmpEqB, + vmakeq_u8(0,0,0,0,0,0,0,0,0,0,255,0,0,255,0,0), + dataB + ); + if(isRaw) cmpA = vorrq_u8(cmpA, nextMaskMix); +#else + cmpCrA = vceqq_u8(dataA, vdupq_n_u8('\r')), + cmpCrB = vceqq_u8(dataB, vdupq_n_u8('\r')), + cmpA = vorrq_u8( + vorrq_u8( + cmpCrA, + vceqq_u8(dataA, lfCompare) + ), + cmpEqA + ), + cmpB = vorrq_u8( + vorrq_u8( + cmpCrB, + vceqq_u8(dataB, vdupq_n_u8('\n')) + ), + cmpEqB + ); +#endif + + +#ifdef __aarch64__ + if (LIKELIHOOD(0.42 /*guess*/, neon_vect_is_nonzero(vorrq_u8(cmpA, cmpB)))) { + cmpA = vandq_u8(cmpA, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + cmpB = vandq_u8(cmpB, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + uint8x16_t cmpMerge = vpaddq_u8(cmpA, cmpB); + uint8x16_t cmpEqMerge = vpaddq_u8( + vandq_u8(cmpEqA, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)), + vandq_u8(cmpEqB, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)) + ); + + uint8x16_t cmpCombined = vpaddq_u8(cmpMerge, cmpEqMerge); + cmpCombined = vpaddq_u8(cmpCombined, cmpCombined); + uint8x8_t cmpPacked = vget_low_u8(cmpCombined); + uint32_t mask = vgetq_lane_u32(vreinterpretq_u32_u8(cmpCombined), 0); + uint32_t maskEq = vgetq_lane_u32(vreinterpretq_u32_u8(cmpCombined), 1); +#else + cmpA = vandq_u8(cmpA, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + cmpB = vandq_u8(cmpB, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + // no vpaddq_u8 in ARMv7, so need extra 64-bit VPADD + uint8x8_t cmpPacked = vpadd_u8( + vpadd_u8( + vget_low_u8(cmpA), vget_high_u8(cmpA) + ), + vpadd_u8( + vget_low_u8(cmpB), vget_high_u8(cmpB) + ) + ); + cmpPacked = vpadd_u8(cmpPacked, cmpPacked); + uint32_t mask = vget_lane_u32(vreinterpret_u32_u8(cmpPacked), 0); + if(LIKELIHOOD(0.42, mask != 0)) { + uint8x16_t cmpEqMaskedA = vandq_u8(cmpEqA, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + uint8x16_t cmpEqMaskedB = vandq_u8(cmpEqB, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + uint8x8_t cmpEqPacked = vpadd_u8( + vpadd_u8( + vget_low_u8(cmpEqMaskedA), vget_high_u8(cmpEqMaskedA) + ), + vpadd_u8( + vget_low_u8(cmpEqMaskedB), vget_high_u8(cmpEqMaskedB) + ) + ); + cmpEqPacked = vpadd_u8(cmpEqPacked, cmpEqPacked); + uint32_t maskEq = vget_lane_u32(vreinterpret_u32_u8(cmpEqPacked), 0); +#endif + + // handle \r\n. sequences + // RFC3977 requires the first dot on a line to be stripped, due to dot-stuffing + if((isRaw || searchEnd) && LIKELIHOOD(0.15, mask != maskEq)) { + // vext seems to be a cheap operation on ARM, relative to loads, so only avoid it if there's only one load (isRaw only) + uint8x16_t tmpData2, nextData; + if(isRaw && !searchEnd) { + tmpData2 = vld1q_u8(src+i + 2 + sizeof(uint8x16_t)); + } else { + nextData = vld1q_u8_align(src+i + sizeof(uint8x16_t)*2, 16); // only 32-bits needed, but there doesn't appear a nice way to do this via intrinsics: https://stackoverflow.com/questions/46910799/arm-neon-intrinsics-convert-d-64-bit-register-to-low-half-of-q-128-bit-regis + tmpData2 = vextq_u8(dataB, nextData, 2); + } +#ifdef __aarch64__ + uint8x16_t cmpCrA = vceqq_u8(dataA, vdupq_n_u8('\r')); + uint8x16_t cmpCrB = vceqq_u8(dataB, vdupq_n_u8('\r')); +# define NEXT_DATA(n) vextq_u8(dataB, nextData, n) +#else +// on ARMv7, prefer loading over VEXT to avoid holding onto nextData reference; this reduces register spills. Shouldn't be an issue on ARMv8 due to 32x 128-bit registers +# define NEXT_DATA(n) vld1q_u8(src+i + n+sizeof(uint8x16_t)) +#endif + uint8x16_t match2EqA, match2Cr_DotA; + uint8x16_t match2EqB, match2Cr_DotB; + if(searchEnd) { + match2EqB = vceqq_u8(tmpData2, vdupq_n_u8('=')); + } + if(isRaw) { + match2Cr_DotA = vandq_u8(cmpCrA, vceqq_u8(vextq_u8(dataA, dataB, 2), vdupq_n_u8('.'))); + match2Cr_DotB = vandq_u8(cmpCrB, vceqq_u8(tmpData2, vdupq_n_u8('.'))); + } + + // find patterns of \r_. + if(isRaw && LIKELIHOOD(0.001, neon_vect_is_nonzero( + vorrq_u8(match2Cr_DotA, match2Cr_DotB) + ))) { + uint8x16_t match1LfA = vceqq_u8(vextq_u8(dataA, dataB, 1), vdupq_n_u8('\n')); + uint8x16_t match1LfB; + if(searchEnd) + match1LfB = vceqq_u8(NEXT_DATA(1), vdupq_n_u8('\n')); + else + match1LfB = vceqq_u8(vld1q_u8(src+i + 1+sizeof(uint8x16_t)), vdupq_n_u8('\n')); + // merge matches of \r_. with those for \n + uint8x16_t match2NlDotA = vandq_u8(match2Cr_DotA, match1LfA); + uint8x16_t match2NlDotB = vandq_u8(match2Cr_DotB, match1LfB); + if(searchEnd) { + uint8x16_t match1NlA = vandq_u8(match1LfA, cmpCrA); + uint8x16_t match1NlB = vandq_u8(match1LfB, cmpCrB); + + uint8x16_t tmpData3 = NEXT_DATA(3); + uint8x16_t tmpData4 = NEXT_DATA(4); + // match instances of \r\n.\r\n and \r\n.=y + uint8x16_t match3CrB = vceqq_u8(tmpData3, vdupq_n_u8('\r')); + uint8x16_t match4LfB = vceqq_u8(tmpData4, vdupq_n_u8('\n')); + uint8x16_t match4Nl = vbslq_u8(vdupq_n_u8('\r'), // exact VBSL vector doesn't matter, so reuse the '\r' vector + vextq_u8(match1NlA, match1NlB, 3), + vandq_u8(match3CrB, match4LfB) + ); + uint8x16_t match4EqY = vbslq_u8(vdupq_n_u8('\r'), + // match =y + vreinterpretq_u8_u16(vceqq_u16(vreinterpretq_u16_u8(vextq_u8(dataA, dataB, 4)), vdupq_n_u16(0x793d))), + vreinterpretq_u8_u16(vceqq_u16(vreinterpretq_u16_u8(tmpData4), vdupq_n_u16(0x793d))) + ); + + match2EqA = vextq_u8(cmpEqA, cmpEqB, 2); + uint8x16_t match3EqY = vbslq_u8(vdupq_n_u8('\r'), + vandq_u8( + vceqq_u8(vextq_u8(dataA, dataB, 3), vdupq_n_u8('y')), + match2EqA + ), vandq_u8( + vceqq_u8(tmpData3, vdupq_n_u8('y')), + match2EqB + ) + ); + // merge \r\n and =y matches for tmpData4 + uint8x16_t match4End = vorrq_u8( + match4Nl, + vreinterpretq_u8_u16(vsriq_n_u16(vreinterpretq_u16_u8(match4EqY), vreinterpretq_u16_u8(match3EqY), 8)) + ); + // merge with \r\n. + uint8x16_t match2NlDot = vbslq_u8(vdupq_n_u8('\r'), match2NlDotA, match2NlDotB); + match4End = vandq_u8(match4End, match2NlDot); + // match \r\n=y + uint8x16_t match1Nl = vbslq_u8(vdupq_n_u8('\r'), match1NlA, match1NlB); + uint8x16_t match3End = vandq_u8(match3EqY, match1Nl); + // combine match sequences + if(LIKELIHOOD(0.001, neon_vect_is_nonzero(vorrq_u8(match4End, match3End)))) { + // terminator found + // there's probably faster ways to do this, but reverting to scalar code should be good enough + len += i; + break; + } + } +#ifdef __aarch64__ + uint8x16_t match2NlDotBMasked = vandq_u8(match2NlDotB, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + uint8x16_t mergeKillDots = vpaddq_u8( + vandq_u8(match2NlDotA, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)), + match2NlDotBMasked + ); + uint8x8_t mergeKillDots2 = vget_low_u8(vpaddq_u8(mergeKillDots, mergeKillDots)); +#else + uint8x16_t match2NlDotMaskedA = vandq_u8(match2NlDotA, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + uint8x16_t match2NlDotMaskedB = vandq_u8(match2NlDotB, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + uint8x8_t mergeKillDots2 = vpadd_u8( + vpadd_u8( + vget_low_u8(match2NlDotMaskedA), vget_high_u8(match2NlDotMaskedA) + ), + vpadd_u8( + vget_low_u8(match2NlDotMaskedB), vget_high_u8(match2NlDotMaskedB) + ) + ); +#endif + mergeKillDots2 = vpadd_u8(mergeKillDots2, mergeKillDots2); + uint32x2_t mergeKillDotsShifted = vshl_n_u32(vreinterpret_u32_u8(mergeKillDots2), 2); + mask |= vget_lane_u32(mergeKillDotsShifted, 0); + cmpPacked = vorr_u8(cmpPacked, vreinterpret_u8_u32(mergeKillDotsShifted)); +#ifdef __aarch64__ + nextMaskMix = vextq_u8(match2NlDotB, vdupq_n_u8(0), 14); +#else + lfCompare = vcombine_u8(vbsl_u8( + vext_u8(vget_high_u8(match2NlDotB), vdup_n_u8('\n'), 6), + vdup_n_u8('.'), + vget_high_u8(lfCompare) + ), vget_high_u8(lfCompare)); +#endif + } else if(searchEnd) { + match2EqA = vextq_u8(cmpEqA, cmpEqB, 2); + uint8x16_t match3EqYA = vandq_u8(match2EqA, vceqq_u8(vextq_u8(dataA, dataB, 3), vdupq_n_u8('y'))); + uint8x16_t match3EqYB = vandq_u8(match2EqB, vceqq_u8(NEXT_DATA(3), vdupq_n_u8('y'))); + if(LIKELIHOOD(0.001, neon_vect_is_nonzero(vorrq_u8( + match3EqYA, match3EqYB + )))) { + uint8x16_t match1LfA = vceqq_u8(vextq_u8(dataA, dataB, 1), vdupq_n_u8('\n')); + uint8x16_t match1LfB = vceqq_u8(NEXT_DATA(1), vdupq_n_u8('\n')); + uint8x16_t matchEnd = vorrq_u8( + vandq_u8(match3EqYA, vandq_u8(match1LfA, cmpCrA)), + vandq_u8(match3EqYB, vandq_u8(match1LfB, cmpCrB)) + ); + if(LIKELIHOOD(0.001, neon_vect_is_nonzero(matchEnd))) { + len += i; + break; + } + } +#undef NEXT_DATA + if(isRaw) +#ifdef __aarch64__ + nextMaskMix = vdupq_n_u8(0); +#else + lfCompare = vcombine_u8(vget_high_u8(lfCompare), vget_high_u8(lfCompare)); +#endif + } else if(isRaw) // no \r_. found +#ifdef __aarch64__ + nextMaskMix = vdupq_n_u8(0); +#else + lfCompare = vcombine_u8(vget_high_u8(lfCompare), vget_high_u8(lfCompare)); +#endif + } + + // a spec compliant encoder should never generate sequences: ==, =\n and =\r, but we'll handle them to be spec compliant + // the yEnc specification requires any character following = to be unescaped, not skipped over, so we'll deal with that + // firstly, check for invalid sequences of = (we assume that these are rare, as a spec compliant yEnc encoder should not generate these) + if(LIKELIHOOD(0.0001, (mask & ((maskEq << 1) | escFirst)) != 0)) { + uint8_t tmp = eqFixLUT[(maskEq&0xff) & ~escFirst]; + uint32_t maskEq2 = tmp; + for(int j=8; j<32; j+=8) { + tmp = eqFixLUT[((maskEq>>j)&0xff) & ~(tmp>>7)]; + maskEq2 |= tmp<>7; + + // unescape chars following `=` + uint8x8_t maskEqTemp = vreinterpret_u8_u32(vmov_n_u32(maskEq)); + cmpPacked = vbic_u8(cmpPacked, maskEqTemp); // `mask &= ~maskEq` in vector form +#ifdef __aarch64__ + uint8x16_t vMaskEqA = vqtbl1q_u8( + vcombine_u8(maskEqTemp, vdup_n_u8(0)), + vmakeq_u8(0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1) + ); + uint8x16_t vMaskEqB = vqtbl1q_u8( + vcombine_u8(maskEqTemp, vdup_n_u8(0)), + vmakeq_u8(2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3) + ); +#else + uint8x16_t vMaskEqA = vcombine_u8( + vdup_lane_u8(maskEqTemp, 0), + vdup_lane_u8(maskEqTemp, 1) + ); + uint8x16_t vMaskEqB = vcombine_u8( + vdup_lane_u8(maskEqTemp, 2), + vdup_lane_u8(maskEqTemp, 3) + ); +#endif + vMaskEqA = vtstq_u8(vMaskEqA, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + vMaskEqB = vtstq_u8(vMaskEqB, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + + dataA = vsubq_u8( + dataA, + vbslq_u8(vMaskEqA, vdupq_n_u8(64+42), vdupq_n_u8(42)) + ); + dataB = vsubq_u8( + dataB, + vbslq_u8(vMaskEqB, vdupq_n_u8(64+42), vdupq_n_u8(42)) + ); + } else { + // no invalid = sequences found - we can cut out some things from above + // this code path is a shortened version of above; it's here because it's faster, and what we'll be dealing with most of the time + escFirst = (maskEq >> 31); + + dataA = vsubq_u8( + dataA, + vbslq_u8( + vextq_u8(vdupq_n_u8(42), cmpEqA, 15), + vdupq_n_u8(64+42), + yencOffset + ) + ); + dataB = vsubq_u8( + dataB, + vbslq_u8( + vextq_u8(cmpEqA, cmpEqB, 15), + vdupq_n_u8(64+42), + vdupq_n_u8(42) + ) + ); + } + yencOffset = vsetq_lane_u8((escFirst << 6) | 42, yencOffset, 0); + + // all that's left is to 'compress' the data (skip over masked chars) + uint32_t counts = 0x08080808 - vget_lane_u32(vreinterpret_u32_u8(vcnt_u8(cmpPacked)), 0); +#ifdef __aarch64__ + counts += counts >> 8; + vst1q_u8(p, vqtbl1q_u8( + dataA, + vld1q_u8_align((uint8_t*)(compactLUT + (mask&0x7fff)), 16) + )); + p += counts & 0xff; + mask >>= 16; + vst1q_u8(p, vqtbl1q_u8( + dataB, + vld1q_u8_align((uint8_t*)(compactLUT + (mask&0x7fff)), 16) + )); + p += (counts>>16) & 0xff; +#else + // lookup compress masks and shuffle + vst1_u8(p, vtbl1_u8( + vget_low_u8(dataA), + vld1_u8_align((uint8_t*)(compactLUT + (mask&0xff)), 8) + )); + p += counts & 0xff; + mask >>= 8; + vst1_u8(p, vtbl1_u8( + vget_high_u8(dataA), + vld1_u8_align((uint8_t*)(compactLUT + (mask&0xff)), 8) + )); + p += (counts>>8) & 0xff; + mask >>= 8; + vst1_u8(p, vtbl1_u8( + vget_low_u8(dataB), + vld1_u8_align((uint8_t*)(compactLUT + (mask&0xff)), 8) + )); + p += (counts>>16) & 0xff; + mask >>= 8; + vst1_u8(p, vtbl1_u8( + vget_high_u8(dataB), + vld1_u8_align((uint8_t*)(compactLUT + (mask&0xff)), 8) + )); + p += (counts>>24) & 0xff; + +#endif + + } else { + dataA = vsubq_u8(dataA, yencOffset); + dataB = vsubq_u8(dataB, vdupq_n_u8(42)); + vst1q_u8_x2_unaligned(p, vcreate2_u8(dataA, dataB)); + p += sizeof(uint8x16_t)*2; + escFirst = 0; +#ifdef __aarch64__ + yencOffset = vdupq_n_u8(42); +#else + yencOffset = vcombine_u8(vdup_n_u8(42), vget_high_u8(yencOffset)); +#endif + } + } + + if(isRaw) { + if(len != 0) { // have to gone through at least one loop cycle + if(src[i-2] == '\r' && src[i-1] == '\n' && src[i] == '.') + nextMask = 1; + else if(src[i-1] == '\r' && src[i] == '\n' && src[i+1] == '.') + nextMask = 2; + else + nextMask = 0; + } + } else + nextMask = 0; +} + +void decoder_set_neon_funcs() { + decoder_init_lut(eqFixLUT, compactLUT); + _do_decode = &do_decode_simd >; + _do_decode_raw = &do_decode_simd >; + _do_decode_end_raw = &do_decode_simd >; + _decode_isa = ISA_LEVEL_NEON; +} +#else +void decoder_set_neon_funcs() {} +#endif diff --git a/rapidyenc/src/decoder_neon64.cc b/rapidyenc/src/decoder_neon64.cc new file mode 100644 index 0000000..6287f69 --- /dev/null +++ b/rapidyenc/src/decoder_neon64.cc @@ -0,0 +1,455 @@ +#include "common.h" +#if defined(__ARM_NEON) && defined(__aarch64__) + +#include "decoder_common.h" + +#pragma pack(16) +static struct { char bytes[16]; } ALIGN_TO(16, compactLUT[32768]); +#pragma pack() + +static uint8_t eqFixLUT[256]; + + +// AArch64 GCC lacks these functions until 8.5, 9.4 and 10.1 (10.0 unknown) +#if !defined(__clang__) && !defined(_MSC_VER) && (!defined(__aarch64__) || !(HEDLEY_GCC_VERSION_CHECK(9,4,0) || (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && HEDLEY_GCC_VERSION_CHECK(8,5,0)))) +static HEDLEY_ALWAYS_INLINE uint8x16x4_t _vld1q_u8_x4(const uint8_t* p) { + uint8x16x4_t ret; + ret.val[0] = vld1q_u8(p); + ret.val[1] = vld1q_u8(p+16); + ret.val[2] = vld1q_u8(p+32); + ret.val[3] = vld1q_u8(p+48); + return ret; +} +static HEDLEY_ALWAYS_INLINE void _vst1q_u8_x4(uint8_t* p, uint8x16x4_t data) { + vst1q_u8(p, data.val[0]); + vst1q_u8(p+16, data.val[1]); + vst1q_u8(p+32, data.val[2]); + vst1q_u8(p+48, data.val[3]); +} +#else +# define _vld1q_u8_x4 vld1q_u8_x4 +# define _vst1q_u8_x4 vst1q_u8_x4 +#endif + + +static bool neon_vect_is_nonzero(uint8x16_t v) { + return !!(vget_lane_u64(vreinterpret_u64_u32(vqmovn_u64(vreinterpretq_u64_u8(v))), 0)); +} + +static HEDLEY_ALWAYS_INLINE uint8x16_t mergeCompares(uint8x16_t a, uint8x16_t b, uint8x16_t c, uint8x16_t d) { + // constant vectors arbitrarily chosen from ones that can be reused; exact ordering of bits doesn't matter, we just need to mix them in + return vbslq_u8( + vdupq_n_u8('='), + vbslq_u8(vdupq_n_u8('y'), a, b), + vbslq_u8(vdupq_n_u8('y'), c, d) + ); +} + + +template +HEDLEY_ALWAYS_INLINE void do_decode_neon(const uint8_t* src, long& len, unsigned char*& p, unsigned char& escFirst, uint16_t& nextMask) { + HEDLEY_ASSUME(escFirst == 0 || escFirst == 1); + HEDLEY_ASSUME(nextMask == 0 || nextMask == 1 || nextMask == 2); + uint8x16_t nextMaskMix = vdupq_n_u8(0); + if(nextMask == 1) + nextMaskMix = vsetq_lane_u8(1, nextMaskMix, 0); + if(nextMask == 2) + nextMaskMix = vsetq_lane_u8(2, nextMaskMix, 1); + uint8x16_t yencOffset = escFirst ? vmakeq_u8(42+64,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42) : vdupq_n_u8(42); + long i; + for(i = -len; i; i += sizeof(uint8x16_t)*4) { + uint8x16x4_t data = _vld1q_u8_x4(src+i); + uint8x16_t dataA = data.val[0]; + uint8x16_t dataB = data.val[1]; + uint8x16_t dataC = data.val[2]; + uint8x16_t dataD = data.val[3]; + + // search for special chars + uint8x16_t cmpEqA = vceqq_u8(dataA, vdupq_n_u8('=')), + cmpEqB = vceqq_u8(dataB, vdupq_n_u8('=')), + cmpEqC = vceqq_u8(dataC, vdupq_n_u8('=')), + cmpEqD = vceqq_u8(dataD, vdupq_n_u8('=')), + cmpA = vqtbx1q_u8( + cmpEqA, + // \n \r + vmakeq_u8(0,0,0,0,0,0,0,0,0,0,255,0,0,255,0,0), + dataA + ), + cmpB = vqtbx1q_u8( + cmpEqB, + vmakeq_u8(0,0,0,0,0,0,0,0,0,0,255,0,0,255,0,0), + dataB + ), + cmpC = vqtbx1q_u8( + cmpEqC, + vmakeq_u8(0,0,0,0,0,0,0,0,0,0,255,0,0,255,0,0), + dataC + ), + cmpD = vqtbx1q_u8( + cmpEqD, + vmakeq_u8(0,0,0,0,0,0,0,0,0,0,255,0,0,255,0,0), + dataD + ); + if(isRaw) cmpA = vorrq_u8(cmpA, nextMaskMix); + + if (LIKELIHOOD(0.42 /*guess*/, neon_vect_is_nonzero(vorrq_u8( + vorrq_u8(cmpA, cmpB), + vorrq_u8(cmpC, cmpD) + )))) { + uint8x16_t cmpMerge = vpaddq_u8( + vpaddq_u8( + vandq_u8(cmpA, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)), + vandq_u8(cmpB, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)) + ), + vpaddq_u8( + vandq_u8(cmpC, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)), + vandq_u8(cmpD, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)) + ) + ); + uint8x16_t cmpEqMerge = vpaddq_u8( + vpaddq_u8( + vandq_u8(cmpEqA, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)), + vandq_u8(cmpEqB, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)) + ), + vpaddq_u8( + vandq_u8(cmpEqC, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)), + vandq_u8(cmpEqD, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)) + ) + ); + + uint8x16_t cmpCombined = vpaddq_u8(cmpMerge, cmpEqMerge); + uint64_t mask = vgetq_lane_u64(vreinterpretq_u64_u8(cmpCombined), 0); + uint64_t maskEq = vgetq_lane_u64(vreinterpretq_u64_u8(cmpCombined), 1); + + // handle \r\n. sequences + // RFC3977 requires the first dot on a line to be stripped, due to dot-stuffing + if((isRaw || searchEnd) && LIKELIHOOD(0.15, mask != maskEq)) { + // vext seems to be a cheap operation on ARM, relative to loads, so only avoid it if there's only one load (isRaw only) + uint8x16_t tmpData2, nextData; + if(isRaw && !searchEnd) { + tmpData2 = vld1q_u8(src+i + 2 + sizeof(uint8x16_t)*3); + } else { + nextData = vld1q_u8(src+i + sizeof(uint8x16_t)*4); // only 32-bits needed, but there doesn't appear a nice way to do this via intrinsics: https://stackoverflow.com/questions/46910799/arm-neon-intrinsics-convert-d-64-bit-register-to-low-half-of-q-128-bit-regis + tmpData2 = vextq_u8(dataD, nextData, 2); + } + uint8x16_t cmpCrA = vceqq_u8(dataA, vdupq_n_u8('\r')); + uint8x16_t cmpCrB = vceqq_u8(dataB, vdupq_n_u8('\r')); + uint8x16_t cmpCrC = vceqq_u8(dataC, vdupq_n_u8('\r')); + uint8x16_t cmpCrD = vceqq_u8(dataD, vdupq_n_u8('\r')); + uint8x16_t match2EqA, match2Cr_DotA; + uint8x16_t match2EqB, match2Cr_DotB; + uint8x16_t match2EqC, match2Cr_DotC; + uint8x16_t match2EqD, match2Cr_DotD; + if(searchEnd) { + match2EqD = vceqq_u8(tmpData2, vdupq_n_u8('=')); + } + if(isRaw) { + match2Cr_DotA = vandq_u8(cmpCrA, vceqq_u8(vextq_u8(dataA, dataB, 2), vdupq_n_u8('.'))); + match2Cr_DotB = vandq_u8(cmpCrB, vceqq_u8(vextq_u8(dataB, dataC, 2), vdupq_n_u8('.'))); + match2Cr_DotC = vandq_u8(cmpCrC, vceqq_u8(vextq_u8(dataC, dataD, 2), vdupq_n_u8('.'))); + match2Cr_DotD = vandq_u8(cmpCrD, vceqq_u8(tmpData2, vdupq_n_u8('.'))); + } + + // find patterns of \r_. + if(isRaw && LIKELIHOOD(0.001, neon_vect_is_nonzero(vorrq_u8( + vorrq_u8(match2Cr_DotA, match2Cr_DotB), + vorrq_u8(match2Cr_DotC, match2Cr_DotD) + )))) { + uint8x16_t match1LfA = vceqq_u8(vextq_u8(dataA, dataB, 1), vdupq_n_u8('\n')); + uint8x16_t match1LfB = vceqq_u8(vextq_u8(dataB, dataC, 1), vdupq_n_u8('\n')); + uint8x16_t match1LfC = vceqq_u8(vextq_u8(dataC, dataD, 1), vdupq_n_u8('\n')); + uint8x16_t match1LfD; + if(searchEnd) + match1LfD = vceqq_u8(vextq_u8(dataD, nextData, 1), vdupq_n_u8('\n')); + else + match1LfD = vceqq_u8(vld1q_u8(src+i + 1+sizeof(uint8x16_t)*3), vdupq_n_u8('\n')); + // merge matches of \r_. with those for \n + uint8x16_t match2NlDotA = vandq_u8(match2Cr_DotA, match1LfA); + uint8x16_t match2NlDotB = vandq_u8(match2Cr_DotB, match1LfB); + uint8x16_t match2NlDotC = vandq_u8(match2Cr_DotC, match1LfC); + uint8x16_t match2NlDotD = vandq_u8(match2Cr_DotD, match1LfD); + if(searchEnd) { + uint8x16_t match1NlA = vandq_u8(match1LfA, cmpCrA); + uint8x16_t match1NlB = vandq_u8(match1LfB, cmpCrB); + uint8x16_t match1NlC = vandq_u8(match1LfC, cmpCrC); + uint8x16_t match1NlD = vandq_u8(match1LfD, cmpCrD); + + uint8x16_t tmpData3 = vextq_u8(dataD, nextData, 3); + uint8x16_t tmpData4 = vextq_u8(dataD, nextData, 4); + // match instances of \r\n.\r\n and \r\n.=y + uint8x16_t match3CrD = vceqq_u8(tmpData3, vdupq_n_u8('\r')); + uint8x16_t match4LfD = vceqq_u8(tmpData4, vdupq_n_u8('\n')); + uint8x16_t match4Nl = mergeCompares( + vextq_u8(match1NlA, match1NlB, 3), + vextq_u8(match1NlB, match1NlC, 3), + vextq_u8(match1NlC, match1NlD, 3), + vandq_u8(match3CrD, match4LfD) + ); + uint8x16_t match4EqY = mergeCompares( + // match with =y + vreinterpretq_u8_u16(vceqq_u16(vreinterpretq_u16_u8(vextq_u8(dataA, dataB, 4)), vdupq_n_u16(0x793d))), + vreinterpretq_u8_u16(vceqq_u16(vreinterpretq_u16_u8(vextq_u8(dataB, dataC, 4)), vdupq_n_u16(0x793d))), + vreinterpretq_u8_u16(vceqq_u16(vreinterpretq_u16_u8(vextq_u8(dataC, dataD, 4)), vdupq_n_u16(0x793d))), + vreinterpretq_u8_u16(vceqq_u16(vreinterpretq_u16_u8(tmpData4), vdupq_n_u16(0x793d))) + ); + match2EqA = vextq_u8(cmpEqA, cmpEqB, 2); + match2EqB = vextq_u8(cmpEqB, cmpEqC, 2); + match2EqC = vextq_u8(cmpEqC, cmpEqD, 2); + uint8x16_t match3EqY = mergeCompares( + vandq_u8( + vceqq_u8(vextq_u8(dataA, dataB, 3), vdupq_n_u8('y')), + match2EqA + ), vandq_u8( + vceqq_u8(vextq_u8(dataB, dataC, 3), vdupq_n_u8('y')), + match2EqB + ), vandq_u8( + vceqq_u8(vextq_u8(dataC, dataD, 3), vdupq_n_u8('y')), + match2EqC + ), vandq_u8( + vceqq_u8(tmpData3, vdupq_n_u8('y')), + match2EqD + ) + ); + + // merge \r\n and =y matches for tmpData4 + uint8x16_t match4End = vorrq_u8( + match4Nl, + vreinterpretq_u8_u16(vsriq_n_u16(vreinterpretq_u16_u8(match4EqY), vreinterpretq_u16_u8(match3EqY), 8)) + ); + // merge with \r\n. + uint8x16_t match2NlDot = mergeCompares(match2NlDotA, match2NlDotB, match2NlDotC, match2NlDotD); + match4End = vandq_u8(match4End, match2NlDot); + // match \r\n=y + uint8x16_t match1Nl = mergeCompares(match1NlA, match1NlB, match1NlC, match1NlD); + uint8x16_t match3End = vandq_u8(match3EqY, match1Nl); + // combine match sequences + if(LIKELIHOOD(0.001, neon_vect_is_nonzero(vorrq_u8(match4End, match3End)))) { + // terminator found + // there's probably faster ways to do this, but reverting to scalar code should be good enough + len += i; + break; + } + } + uint8x16_t match2NlDotDMasked = vandq_u8(match2NlDotD, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + uint8x16_t mergeKillDots = vpaddq_u8( + vpaddq_u8( + vandq_u8(match2NlDotA, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)), + vandq_u8(match2NlDotB, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)) + ), + vpaddq_u8( + vandq_u8(match2NlDotC, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)), + match2NlDotDMasked + ) + ); + mergeKillDots = vpaddq_u8(mergeKillDots, mergeKillDots); + uint64x2_t mergeKillDotsShifted = vshlq_n_u64(vreinterpretq_u64_u8(mergeKillDots), 2); + mask |= vgetq_lane_u64(mergeKillDotsShifted, 0); + cmpCombined = vorrq_u8(cmpCombined, vreinterpretq_u8_u64(mergeKillDotsShifted)); + nextMaskMix = vextq_u8(match2NlDotD, vdupq_n_u8(0), 14); + } else if(searchEnd) { + match2EqA = vextq_u8(cmpEqA, cmpEqB, 2); + match2EqB = vextq_u8(cmpEqB, cmpEqC, 2); + match2EqC = vextq_u8(cmpEqC, cmpEqD, 2); + + uint8x16_t match3EqYA = vandq_u8(match2EqA, vceqq_u8(vextq_u8(dataA, dataB, 3), vdupq_n_u8('y'))); + uint8x16_t match3EqYB = vandq_u8(match2EqB, vceqq_u8(vextq_u8(dataB, dataC, 3), vdupq_n_u8('y'))); + uint8x16_t match3EqYC = vandq_u8(match2EqC, vceqq_u8(vextq_u8(dataC, dataD, 3), vdupq_n_u8('y'))); + uint8x16_t match3EqYD = vandq_u8(match2EqD, vceqq_u8(vextq_u8(dataD, nextData, 3), vdupq_n_u8('y'))); + if(LIKELIHOOD(0.001, neon_vect_is_nonzero(vorrq_u8( + vorrq_u8(match3EqYA, match3EqYB), + vorrq_u8(match3EqYC, match3EqYD) + )))) { + uint8x16_t match1LfA = vceqq_u8(vextq_u8(dataA, dataB, 1), vdupq_n_u8('\n')); + uint8x16_t match1LfB = vceqq_u8(vextq_u8(dataB, dataC, 1), vdupq_n_u8('\n')); + uint8x16_t match1LfC = vceqq_u8(vextq_u8(dataC, dataD, 1), vdupq_n_u8('\n')); + uint8x16_t match1LfD = vceqq_u8(vextq_u8(dataD, nextData, 1), vdupq_n_u8('\n')); + uint8x16_t matchEnd = vorrq_u8( + vorrq_u8( + vandq_u8(match3EqYA, vandq_u8(match1LfA, cmpCrA)), + vandq_u8(match3EqYB, vandq_u8(match1LfB, cmpCrB)) + ), + vorrq_u8( + vandq_u8(match3EqYC, vandq_u8(match1LfC, cmpCrC)), + vandq_u8(match3EqYD, vandq_u8(match1LfD, cmpCrD)) + ) + ); + if(LIKELIHOOD(0.001, neon_vect_is_nonzero(matchEnd))) { + len += i; + break; + } + } + if(isRaw) + nextMaskMix = vdupq_n_u8(0); + } else if(isRaw) // no \r_. found + nextMaskMix = vdupq_n_u8(0); + } + + // a spec compliant encoder should never generate sequences: ==, =\n and =\r, but we'll handle them to be spec compliant + // the yEnc specification requires any character following = to be unescaped, not skipped over, so we'll deal with that + // firstly, check for invalid sequences of = (we assume that these are rare, as a spec compliant yEnc encoder should not generate these) + if(LIKELIHOOD(0.0001, (mask & ((maskEq << 1) | escFirst)) != 0)) { + uint8_t tmp = eqFixLUT[(maskEq&0xff) & ~escFirst]; + uint64_t maskEq2 = tmp; + for(int j=8; j<64; j+=8) { + tmp = eqFixLUT[((maskEq>>j)&0xff) & ~(tmp>>7)]; + maskEq2 |= ((uint64_t)tmp)<>7; + + // unescape chars following `=` +#if defined(__GNUC__) && !defined(__clang__) + // this seems to stop GCC9 producing slow code, for some reason... TODO: investigate why + uint8x8_t _maskEqTemp = vreinterpret_u8_u64(vmov_n_u64(maskEq)); + uint8x16_t maskEqTemp = vcombine_u8(_maskEqTemp, vdup_n_u8(0)); +#else + uint8x16_t maskEqTemp = vreinterpretq_u8_u64(vmovq_n_u64(maskEq)); +#endif + cmpCombined = vbicq_u8(cmpCombined, maskEqTemp); // `mask &= ~maskEq` in vector form + + uint8x16_t vMaskEqA = vqtbl1q_u8( + maskEqTemp, + vmakeq_u8(0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1) + ); + maskEqTemp = vextq_u8(maskEqTemp, maskEqTemp, 2); + uint8x16_t vMaskEqB = vqtbl1q_u8( + maskEqTemp, + vmakeq_u8(0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1) + ); + maskEqTemp = vextq_u8(maskEqTemp, maskEqTemp, 2); + uint8x16_t vMaskEqC = vqtbl1q_u8( + maskEqTemp, + vmakeq_u8(0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1) + ); + maskEqTemp = vextq_u8(maskEqTemp, maskEqTemp, 2); + uint8x16_t vMaskEqD = vqtbl1q_u8( + maskEqTemp, + vmakeq_u8(0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1) + ); + vMaskEqA = vtstq_u8(vMaskEqA, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + vMaskEqB = vtstq_u8(vMaskEqB, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + vMaskEqC = vtstq_u8(vMaskEqC, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + vMaskEqD = vtstq_u8(vMaskEqD, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + + dataA = vsubq_u8( + dataA, + vbslq_u8(vMaskEqA, vdupq_n_u8(64+42), vdupq_n_u8(42)) + ); + dataB = vsubq_u8( + dataB, + vbslq_u8(vMaskEqB, vdupq_n_u8(64+42), vdupq_n_u8(42)) + ); + dataC = vsubq_u8( + dataC, + vbslq_u8(vMaskEqC, vdupq_n_u8(64+42), vdupq_n_u8(42)) + ); + dataD = vsubq_u8( + dataD, + vbslq_u8(vMaskEqD, vdupq_n_u8(64+42), vdupq_n_u8(42)) + ); + } else { + // no invalid = sequences found - we can cut out some things from above + // this code path is a shortened version of above; it's here because it's faster, and what we'll be dealing with most of the time + escFirst = (maskEq >> 63); + + dataA = vsubq_u8( + dataA, + vbslq_u8( + vextq_u8(vdupq_n_u8(42), cmpEqA, 15), + vdupq_n_u8(64+42), + yencOffset + ) + ); + dataB = vsubq_u8( + dataB, + vbslq_u8( + vextq_u8(cmpEqA, cmpEqB, 15), + vdupq_n_u8(64+42), + vdupq_n_u8(42) + ) + ); + dataC = vsubq_u8( + dataC, + vbslq_u8( + vextq_u8(cmpEqB, cmpEqC, 15), + vdupq_n_u8(64+42), + vdupq_n_u8(42) + ) + ); + dataD = vsubq_u8( + dataD, + vbslq_u8( + vextq_u8(cmpEqC, cmpEqD, 15), + vdupq_n_u8(64+42), + vdupq_n_u8(42) + ) + ); + } + yencOffset = vsetq_lane_u8((escFirst << 6) | 42, yencOffset, 0); + + // all that's left is to 'compress' the data (skip over masked chars) + uint64_t counts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vget_low_u8(cmpCombined))), 0); + counts = 0x0808080808080808ULL - counts; + counts += counts>>8; + + vst1q_u8(p, vqtbl1q_u8( + dataA, + vld1q_u8((uint8_t*)(compactLUT + (mask&0x7fff))) + )); + p += counts & 0xff; + mask >>= 16; + vst1q_u8(p, vqtbl1q_u8( + dataB, + vld1q_u8((uint8_t*)(compactLUT + (mask&0x7fff))) + )); + p += (counts>>16) & 0xff; + mask >>= 16; + vst1q_u8(p, vqtbl1q_u8( + dataC, + vld1q_u8((uint8_t*)(compactLUT + (mask&0x7fff))) + )); + p += (counts>>32) & 0xff; + mask >>= 16; + vst1q_u8(p, vqtbl1q_u8( + dataD, + vld1q_u8((uint8_t*)(compactLUT + (mask&0x7fff))) + )); + p += (counts>>48) & 0xff; + } else { + dataA = vsubq_u8(dataA, yencOffset); + dataB = vsubq_u8(dataB, vdupq_n_u8(42)); + dataC = vsubq_u8(dataC, vdupq_n_u8(42)); + dataD = vsubq_u8(dataD, vdupq_n_u8(42)); + _vst1q_u8_x4(p, vcreate4_u8(dataA, dataB, dataC, dataD)); + p += sizeof(uint8x16_t)*4; + escFirst = 0; + yencOffset = vdupq_n_u8(42); + } + } + if(isRaw) { + if(len != 0) { // have to gone through at least one loop cycle + if(src[i-2] == '\r' && src[i-1] == '\n' && src[i] == '.') + nextMask = 1; + else if(src[i-1] == '\r' && src[i] == '\n' && src[i+1] == '.') + nextMask = 2; + else + nextMask = 0; + } + } else + nextMask = 0; +} + +void decoder_set_neon_funcs() { + decoder_init_lut(eqFixLUT, compactLUT); + _do_decode = &do_decode_simd >; + _do_decode_raw = &do_decode_simd >; + _do_decode_end_raw = &do_decode_simd >; + _decode_isa = ISA_LEVEL_NEON; +} +#else +void decoder_set_neon_funcs() {} +#endif diff --git a/rapidyenc/src/decoder_sse2.cc b/rapidyenc/src/decoder_sse2.cc new file mode 100644 index 0000000..f61268e --- /dev/null +++ b/rapidyenc/src/decoder_sse2.cc @@ -0,0 +1,17 @@ +#include "common.h" + +#ifdef __SSE2__ +#include "decoder_common.h" +#include "decoder_sse_base.h" + +void decoder_set_sse2_funcs() { + decoder_sse_init(); + decoder_init_lut(lookups->eqFix, lookups->compact); + _do_decode = &do_decode_simd >; + _do_decode_raw = &do_decode_simd >; + _do_decode_end_raw = &do_decode_simd >; + _decode_isa = ISA_LEVEL_SSE2; +} +#else +void decoder_set_sse2_funcs() {} +#endif diff --git a/rapidyenc/src/decoder_sse_base.h b/rapidyenc/src/decoder_sse_base.h new file mode 100644 index 0000000..65ab1f3 --- /dev/null +++ b/rapidyenc/src/decoder_sse_base.h @@ -0,0 +1,725 @@ + +#ifdef __SSE2__ + +#if defined(__clang__) && __clang_major__ == 6 && __clang_minor__ == 0 +// VBMI2 introduced in clang 6.0, but 128-bit functions misnamed there; fixed in clang 7.0, but we'll handle those on 6.0 +# define _mm_mask_compressstoreu_epi8 _mm128_mask_compressstoreu_epi8 +# define _mm_shrdi_epi16 _mm128_shrdi_epi16 +#endif + +#if defined(__tune_icelake_client__) || defined(__tune_icelake_server__) || defined(__tune_tigerlake__) || defined(__tune_rocketlake__) || defined(__tune_alderlake__) || defined(__tune_sapphirerapids__) +# define COMPRESS_STORE _mm_mask_compressstoreu_epi8 +#else +// avoid uCode on Zen4 +# define COMPRESS_STORE(dst, mask, vec) _mm_storeu_si128((__m128i*)(dst), _mm_maskz_compress_epi8(mask, vec)) +#endif + +// GCC (ver 6-10(dev)) fails to optimize pure C version of mask testing, but has this intrinsic; Clang >= 7 optimizes C version fine +#if (defined(__GNUC__) && __GNUC__ >= 7) || (defined(_MSC_VER) && _MSC_VER >= 1924) +# define KORTEST16(a, b) !_kortestz_mask16_u8((a), (b)) +# define KAND16(a, b) _kand_mask16((a), (b)) +# define KOR16(a, b) _kor_mask16((a), (b)) +#else +# define KORTEST16(a, b) ((a) | (b)) +# define KAND16(a, b) ((a) & (b)) +# define KOR16(a, b) ((a) | (b)) +#endif + +#pragma pack(16) +static struct { + unsigned char BitsSetTable256inv[256]; + /*align16*/ struct { char bytes[16]; } compact[32768]; + uint8_t eqFix[256]; + /*align8*/ uint64_t eqAdd[256]; + /*align16*/ int8_t unshufMask[32*16]; +} * HEDLEY_RESTRICT lookups; +#pragma pack() + + +static HEDLEY_ALWAYS_INLINE __m128i force_align_read_128(const void* p) { +#ifdef _MSC_VER + // MSVC complains about casting away volatile + return *(__m128i *)(p); +#else + return *(volatile __m128i *)(p); +#endif +} + + +static void decoder_sse_init() { + ALIGN_ALLOC(lookups, sizeof(*lookups), 16); + for(int i=0; i<256; i++) { + lookups->BitsSetTable256inv[i] = 8 - ( + (i & 1) + ((i>>1) & 1) + ((i>>2) & 1) + ((i>>3) & 1) + ((i>>4) & 1) + ((i>>5) & 1) + ((i>>6) & 1) + ((i>>7) & 1) + ); + + #define _X(n, k) ((((n) & (1<eqAdd[i] = _X(i, 0) | _X(i, 1) | _X(i, 2) | _X(i, 3) | _X(i, 4) | _X(i, 5) | _X(i, 6) | _X(i, 7); + #undef _X + } + for(int i=0; i<32; i++) { + for(int j=0; j<16; j++) { + if(i >= 16) // only used for LZCNT + lookups->unshufMask[i*16 + j] = ((31-i)>j ? -1 : 0); + else // only used for BSR + lookups->unshufMask[i*16 + j] = (i>j ? -1 : 0); + } + } +} + + +// for LZCNT/BSR +#ifdef _MSC_VER +# include +# include +static HEDLEY_ALWAYS_INLINE unsigned BSR32(unsigned src) { + unsigned long result; + _BitScanReverse((unsigned long*)&result, src); + return result; +} +#elif defined(__GNUC__) +// have seen Clang not like _bit_scan_reverse +# include // for lzcnt +# define BSR32(src) (31^__builtin_clz(src)) +#else +# include +# define BSR32 _bit_scan_reverse +#endif + +template +static HEDLEY_ALWAYS_INLINE __m128i sse2_compact_vect(uint32_t mask, __m128i data) { + while(mask) { + unsigned bitIndex; +#if defined(__LZCNT__) + if(use_isa & ISA_FEATURE_LZCNT) { + // lzcnt is always at least as fast as bsr, so prefer it if it's available + bitIndex = _lzcnt_u32(mask); + mask &= 0x7fffffffU>>bitIndex; + } else +#endif + { + bitIndex = BSR32(mask); + mask ^= 1<unshufMask + bitIndex); + data = _mm_or_si128( + _mm_and_si128(mergeMask, data), + _mm_andnot_si128(mergeMask, _mm_srli_si128(data, 1)) + ); + } + return data; +} + +template +HEDLEY_ALWAYS_INLINE void do_decode_sse(const uint8_t* src, long& len, unsigned char*& p, unsigned char& _escFirst, uint16_t& _nextMask) { + HEDLEY_ASSUME(_escFirst == 0 || _escFirst == 1); + HEDLEY_ASSUME(_nextMask == 0 || _nextMask == 1 || _nextMask == 2); + uintptr_t escFirst = _escFirst; + __m128i yencOffset = escFirst ? _mm_set_epi8( + -42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42,-42-64 + ) : _mm_set1_epi8(-42); + +#if defined(__SSSE3__) && !defined(__tune_atom__) && !defined(__tune_slm__) && !defined(__tune_btver1__) && !defined(__tune_btver2__) + const bool _USING_FAST_MATCH = (use_isa >= ISA_LEVEL_SSSE3); +#else + const bool _USING_FAST_MATCH = false; +#endif +#if defined(__SSE4_1__) && !defined(__tune_slm__) && !defined(__tune_goldmont__) && !defined(__tune_goldmont_plus__) && !defined(__tune_tremont__) + const bool _USING_BLEND_ADD = (use_isa >= ISA_LEVEL_SSE41); +#else + const bool _USING_BLEND_ADD = false; +#endif +#if defined(__AVX512VL__) && defined(__AVX512BW__) +# if defined(_MSC_VER) && !defined(PLATFORM_AMD64) && !defined(__clang__) + const bool useAVX3MaskCmp = false; +# else + const bool useAVX3MaskCmp = (use_isa >= ISA_LEVEL_AVX3); +# endif +#endif + + __m128i lfCompare = _mm_set1_epi8('\n'); + __m128i minMask = _mm_set1_epi8('.'); + if(_nextMask && isRaw) { + if(_USING_FAST_MATCH) + minMask = _mm_insert_epi16(minMask, _nextMask == 1 ? 0x2e00 : 0x002e, 0); + else + lfCompare = _mm_insert_epi16(lfCompare, _nextMask == 1 ? 0x0a2e /*".\n"*/ : 0x2e0a /*"\n."*/, 0); + } + intptr_t i; + for(i = -len; i; i += sizeof(__m128i)*2) { + __m128i oDataA = _mm_load_si128((__m128i *)(src+i)); + __m128i oDataB = _mm_load_si128((__m128i *)(src+i) + 1); + + // search for special chars + __m128i cmpEqA, cmpEqB, cmpCrA, cmpCrB; + __m128i cmpA, cmpB; +#if defined(__SSSE3__) + if(_USING_FAST_MATCH) { + cmpA = _mm_cmpeq_epi8(oDataA, _mm_shuffle_epi8( + _mm_set_epi8(-1,'=','\r',-1,-1,'\n',-1,-1,-1,-1,-1,-1,-1,-1,-1,'.'), + _mm_min_epu8(oDataA, minMask) + )); + cmpB = _mm_cmpeq_epi8(oDataB, _mm_shuffle_epi8( + _mm_set_epi8(-1,'=','\r',-1,-1,'\n',-1,-1,-1,-1,-1,-1,-1,-1,-1,'.'), + _mm_min_epu8(oDataB, _mm_set1_epi8('.')) + )); + } else +#endif + { + cmpEqA = _mm_cmpeq_epi8(oDataA, _mm_set1_epi8('=')); + cmpEqB = _mm_cmpeq_epi8(oDataB, _mm_set1_epi8('=')); + cmpCrA = _mm_cmpeq_epi8(oDataA, _mm_set1_epi8('\r')); + cmpCrB = _mm_cmpeq_epi8(oDataB, _mm_set1_epi8('\r')); + cmpA = _mm_or_si128( + _mm_or_si128( + _mm_cmpeq_epi8(oDataA, lfCompare), cmpCrA + ), + cmpEqA + ); + cmpB = _mm_or_si128( + _mm_or_si128( + _mm_cmpeq_epi8(oDataB, _mm_set1_epi8('\n')), cmpCrB + ), + cmpEqB + ); + } + + __m128i dataA, dataB; + if(!_USING_BLEND_ADD) + dataA = _mm_add_epi8(oDataA, yencOffset); + uint32_t mask = (unsigned)_mm_movemask_epi8(cmpA) | ((unsigned)_mm_movemask_epi8(cmpB) << 16); // not the most accurate mask if we have invalid sequences; we fix this up later + + if (LIKELIHOOD(0.42 /* rough guess */, mask != 0)) { + if(_USING_FAST_MATCH) { + cmpEqA = _mm_cmpeq_epi8(oDataA, _mm_set1_epi8('=')); + cmpEqB = _mm_cmpeq_epi8(oDataB, _mm_set1_epi8('=')); + } + +#define LOAD_HALVES(a, b) _mm_castps_si128(_mm_loadh_pi( \ + _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)(a))), \ + (__m64*)(b) \ +)) + + // a spec compliant encoder should never generate sequences: ==, =\n and =\r, but we'll handle them to be spec compliant + // the yEnc specification requires any character following = to be unescaped, not skipped over, so we'll deal with that + // firstly, check for invalid sequences of = (we assume that these are rare, as a spec compliant yEnc encoder should not generate these) + uint32_t maskEq = (unsigned)_mm_movemask_epi8(cmpEqA) | ((unsigned)_mm_movemask_epi8(cmpEqB) << 16); + + // handle \r\n. sequences + // RFC3977 requires the first dot on a line to be stripped, due to dot-stuffing + if((isRaw || searchEnd) && LIKELIHOOD(0.25, mask != maskEq)) { +#if 0 + // for experimentation: prefer shifting data over unaligned loads on CPUs with slow unaligned handling + // haven't ever seen this be beneficial though + __m128i nextDataB; + if(searchEnd && _USING_BLEND_ADD) + nextDataB = _mm_cvtsi32_si128(*(uint32_t*)(src+i+sizeof(__m128i)*2)); +# define SHIFT_DATA_A(offs) (searchEnd && _USING_BLEND_ADD ? _mm_alignr_epi8(oDataB, oDataA, offs) : _mm_loadu_si128((__m128i *)(src+i+offs))) +# define SHIFT_DATA_B(offs) (searchEnd && _USING_BLEND_ADD ? _mm_alignr_epi8(nextDataB, oDataB, offs) : _mm_loadu_si128((__m128i *)(src+i+offs) + 1)) +#else +# define SHIFT_DATA_A(offs) _mm_loadu_si128((__m128i *)(src+i+offs)) +# define SHIFT_DATA_B(offs) _mm_loadu_si128((__m128i *)(src+i+offs) + 1) +#endif + __m128i tmpData2A = SHIFT_DATA_A(2); + __m128i tmpData2B = SHIFT_DATA_B(2); + __m128i match2EqA, match2EqB; + +#if defined(__AVX512VL__) && defined(__AVX512BW__) + __mmask16 match2EqMaskA, match2EqMaskB; + __mmask16 match0CrMaskA, match0CrMaskB; + __mmask16 match2CrXDtMaskA, match2CrXDtMaskB; + if(useAVX3MaskCmp && searchEnd) { + match2EqMaskA = _mm_cmpeq_epi8_mask(_mm_set1_epi8('='), tmpData2A); + match2EqMaskB = _mm_cmpeq_epi8_mask(_mm_set1_epi8('='), tmpData2B); + } else +#endif + if(searchEnd) { +#if !defined(__tune_btver1__) + if(use_isa < ISA_LEVEL_SSSE3) +#endif + match2EqA = _mm_cmpeq_epi8(_mm_set1_epi8('='), tmpData2A); + match2EqB = _mm_cmpeq_epi8(_mm_set1_epi8('='), tmpData2B); + } + int partialKillDotFound; + __m128i match2CrXDtA, match2CrXDtB; + if(isRaw) { +#if defined(__AVX512VL__) && defined(__AVX512BW__) + if(useAVX3MaskCmp) { + match0CrMaskA = _mm_cmpeq_epi8_mask(oDataA, _mm_set1_epi8('\r')); + match0CrMaskB = _mm_cmpeq_epi8_mask(oDataB, _mm_set1_epi8('\r')); + match2CrXDtMaskA = _mm_mask_cmpeq_epi8_mask(match0CrMaskA, tmpData2A, _mm_set1_epi8('.')); + match2CrXDtMaskB = _mm_mask_cmpeq_epi8_mask(match0CrMaskB, tmpData2B, _mm_set1_epi8('.')); + partialKillDotFound = KORTEST16(match2CrXDtMaskA, match2CrXDtMaskB); + } else +#endif + { + if(_USING_FAST_MATCH) { + cmpCrA = _mm_cmpeq_epi8(oDataA, _mm_set1_epi8('\r')); + cmpCrB = _mm_cmpeq_epi8(oDataB, _mm_set1_epi8('\r')); + } + match2CrXDtA = _mm_and_si128(cmpCrA, _mm_cmpeq_epi8(tmpData2A, _mm_set1_epi8('.'))); + match2CrXDtB = _mm_and_si128(cmpCrB, _mm_cmpeq_epi8(tmpData2B, _mm_set1_epi8('.'))); + partialKillDotFound = _mm_movemask_epi8(_mm_or_si128(match2CrXDtA, match2CrXDtB)); + } + } + + if(isRaw && LIKELIHOOD(0.001, partialKillDotFound)) { + __m128i match2NlDotA, match1NlA; + __m128i match2NlDotB, match1NlB; + // merge matches for \r\n. +#if defined(__AVX512VL__) && defined(__AVX512BW__) + __mmask16 match1NlMaskA, match1NlMaskB; + __mmask16 match2NlDotMaskA, match2NlDotMaskB; + if(useAVX3MaskCmp) { + match1NlMaskA = _mm_mask_cmpeq_epi8_mask( + match0CrMaskA, + _mm_set1_epi8('\n'), + SHIFT_DATA_A(1) + ); + match1NlMaskB = _mm_mask_cmpeq_epi8_mask( + match0CrMaskB, + _mm_set1_epi8('\n'), + SHIFT_DATA_B(1) + ); + match2NlDotMaskA = KAND16(match2CrXDtMaskA, match1NlMaskA); + match2NlDotMaskB = KAND16(match2CrXDtMaskB, match1NlMaskB); + } else +#endif + { + __m128i match1LfA = _mm_cmpeq_epi8(_mm_set1_epi8('\n'), SHIFT_DATA_A(1)); + __m128i match1LfB = _mm_cmpeq_epi8(_mm_set1_epi8('\n'), SHIFT_DATA_B(1)); + + // always recompute cmpCr to avoid register spills above + cmpCrA = _mm_cmpeq_epi8(force_align_read_128(src+i), _mm_set1_epi8('\r')); + cmpCrB = _mm_cmpeq_epi8(force_align_read_128(src+i + sizeof(__m128i)), _mm_set1_epi8('\r')); + match1NlA = _mm_and_si128(match1LfA, cmpCrA); + match1NlB = _mm_and_si128(match1LfB, cmpCrB); + match2NlDotA = _mm_and_si128(match2CrXDtA, match1NlA); + match2NlDotB = _mm_and_si128(match2CrXDtB, match1NlB); + } + if(searchEnd) { + __m128i tmpData3A = SHIFT_DATA_A(3); + __m128i tmpData3B = SHIFT_DATA_B(3); + __m128i tmpData4A = SHIFT_DATA_A(4); + __m128i tmpData4B = SHIFT_DATA_B(4); + // match instances of \r\n.\r\n and \r\n.=y + // TODO: consider doing a PALIGNR using match1Nl for match4NlA + __m128i match3CrA = _mm_cmpeq_epi8(_mm_set1_epi8('\r'), tmpData3A); + __m128i match3CrB = _mm_cmpeq_epi8(_mm_set1_epi8('\r'), tmpData3B); + __m128i match4LfA = _mm_cmpeq_epi8(tmpData4A, _mm_set1_epi8('\n')); + __m128i match4LfB = _mm_cmpeq_epi8(tmpData4B, _mm_set1_epi8('\n')); + __m128i match4EqYA = _mm_cmpeq_epi16(tmpData4A, _mm_set1_epi16(0x793d)); // =y + __m128i match4EqYB = _mm_cmpeq_epi16(tmpData4B, _mm_set1_epi16(0x793d)); // =y + + int matchEnd; +#if defined(__AVX512VL__) && defined(__AVX512BW__) + if(useAVX3MaskCmp) { + __mmask16 match3EqYMaskA = _mm_mask_cmpeq_epi8_mask( + match2EqMaskA, _mm_set1_epi8('y'), tmpData3A + ); + __mmask16 match3EqYMaskB = _mm_mask_cmpeq_epi8_mask( + match2EqMaskB, _mm_set1_epi8('y'), tmpData3B + ); + __m128i match34EqYA, match34EqYB; +# ifdef __AVX512VBMI2__ + if(use_isa >= ISA_LEVEL_VBMI2) { + match34EqYA = _mm_shrdi_epi16(_mm_movm_epi8(match3EqYMaskA), match4EqYA, 8); + match34EqYB = _mm_shrdi_epi16(_mm_movm_epi8(match3EqYMaskB), match4EqYB, 8); + } else +# endif + { + // (match4EqY & 0xff00) | (match3EqY >> 8) + match34EqYA = _mm_mask_blend_epi8(match3EqYMaskA>>1, _mm_and_si128(match4EqYA, _mm_set1_epi16(-0xff)), _mm_set1_epi8(-1)); + match34EqYB = _mm_mask_blend_epi8(match3EqYMaskB>>1, _mm_and_si128(match4EqYB, _mm_set1_epi16(-0xff)), _mm_set1_epi8(-1)); + } + // merge \r\n and =y matches for tmpData4 + __m128i match4EndA = _mm_ternarylogic_epi32(match34EqYA, match3CrA, match4LfA, 0xF8); // (match3Cr & match4Lf) | match34EqY + __m128i match4EndB = _mm_ternarylogic_epi32(match34EqYB, match3CrB, match4LfB, 0xF8); + // merge with \r\n. and combine + matchEnd = KORTEST16( + KOR16( + _mm_mask_test_epi8_mask(match2NlDotMaskA, match4EndA, match4EndA), + KAND16(match3EqYMaskA, match1NlMaskA) + ), + KOR16( + _mm_mask_test_epi8_mask(match2NlDotMaskB, match4EndB, match4EndB), + KAND16(match3EqYMaskB, match1NlMaskB) + ) + ); + } else +#endif + { +#if defined(__SSSE3__) && !defined(__tune_btver1__) + if(use_isa >= ISA_LEVEL_SSSE3) + match2EqA = _mm_alignr_epi8(cmpEqB, cmpEqA, 2); +#endif + __m128i match3EqYA = _mm_and_si128(match2EqA, _mm_cmpeq_epi8(_mm_set1_epi8('y'), tmpData3A)); + __m128i match3EqYB = _mm_and_si128(match2EqB, _mm_cmpeq_epi8(_mm_set1_epi8('y'), tmpData3B)); + match4EqYA = _mm_slli_epi16(match4EqYA, 8); // TODO: also consider using PBLENDVB here with shifted match3EqY instead + match4EqYB = _mm_slli_epi16(match4EqYB, 8); + // merge \r\n and =y matches for tmpData4 + __m128i match4EndA = _mm_or_si128( + _mm_and_si128(match3CrA, match4LfA), + _mm_or_si128(match4EqYA, _mm_srli_epi16(match3EqYA, 8)) // _mm_srli_si128 by 1 also works + ); + __m128i match4EndB = _mm_or_si128( + _mm_and_si128(match3CrB, match4LfB), + _mm_or_si128(match4EqYB, _mm_srli_epi16(match3EqYB, 8)) // _mm_srli_si128 by 1 also works + ); + // merge with \r\n. + match4EndA = _mm_and_si128(match4EndA, match2NlDotA); + match4EndB = _mm_and_si128(match4EndB, match2NlDotB); + // match \r\n=y + __m128i match3EndA = _mm_and_si128(match3EqYA, match1NlA); + __m128i match3EndB = _mm_and_si128(match3EqYB, match1NlB); + // combine match sequences + matchEnd = _mm_movemask_epi8(_mm_or_si128( + _mm_or_si128(match4EndA, match3EndA), + _mm_or_si128(match4EndB, match3EndB) + )); + } + + if(LIKELIHOOD(0.001, matchEnd)) { + // terminator found + // there's probably faster ways to do this, but reverting to scalar code should be good enough + len += (long)i; + break; + } + } +#if defined(__AVX512VL__) && defined(__AVX512BW__) + if(useAVX3MaskCmp) { + mask |= match2NlDotMaskA << 2; + mask |= (match2NlDotMaskB << 18) & 0xffffffff; + minMask = _mm_maskz_mov_epi8(~(match2NlDotMaskB>>14), _mm_set1_epi8('.')); + } else +#endif + { + mask |= (_mm_movemask_epi8(match2NlDotA) << 2); + mask |= (_mm_movemask_epi8(match2NlDotB) << 18) & 0xffffffff; + match2NlDotB = _mm_srli_si128(match2NlDotB, 14); + if(_USING_FAST_MATCH) + minMask = _mm_subs_epu8(_mm_set1_epi8('.'), match2NlDotB); + else + // this bitiwse trick works because '.'|'\n' == '.' + lfCompare = _mm_or_si128( + _mm_and_si128(match2NlDotB, _mm_set1_epi8('.')), + _mm_set1_epi8('\n') + ); + } + } + else if(searchEnd) { + bool partialEndFound; + __m128i match3EqYA, match3EqYB; +#if defined(__AVX512VL__) && defined(__AVX512BW__) + __mmask16 match3EqYMaskA, match3EqYMaskB; + if(useAVX3MaskCmp) { + match3EqYMaskA = _mm_mask_cmpeq_epi8_mask( + match2EqMaskA, + _mm_set1_epi8('y'), + SHIFT_DATA_A(3) + ); + match3EqYMaskB = _mm_mask_cmpeq_epi8_mask( + match2EqMaskB, + _mm_set1_epi8('y'), + SHIFT_DATA_B(3) + ); + partialEndFound = KORTEST16(match3EqYMaskA, match3EqYMaskB); + } else +#endif + { + __m128i match3YA = _mm_cmpeq_epi8( + _mm_set1_epi8('y'), + SHIFT_DATA_A(3) + ); + __m128i match3YB = _mm_cmpeq_epi8( + _mm_set1_epi8('y'), + SHIFT_DATA_B(3) + ); +#if defined(__SSSE3__) && !defined(__tune_btver1__) + if(use_isa >= ISA_LEVEL_SSSE3) + match2EqA = _mm_alignr_epi8(cmpEqB, cmpEqA, 2); +#endif + match3EqYA = _mm_and_si128(match2EqA, match3YA); + match3EqYB = _mm_and_si128(match2EqB, match3YB); + partialEndFound = _mm_movemask_epi8(_mm_or_si128(match3EqYA, match3EqYB)); + } + if(LIKELIHOOD(0.001, partialEndFound)) { + // if the rare case of '=y' is found, do a more precise check + bool endFound; + +#if defined(__AVX512VL__) && defined(__AVX512BW__) + if(useAVX3MaskCmp) { + __mmask16 match3LfEqYMaskA = _mm_mask_cmpeq_epi8_mask( + match3EqYMaskA, + _mm_set1_epi8('\n'), + SHIFT_DATA_A(1) + ); + __mmask16 match3LfEqYMaskB = _mm_mask_cmpeq_epi8_mask( + match3EqYMaskB, + _mm_set1_epi8('\n'), + SHIFT_DATA_B(1) + ); + + endFound = KORTEST16( + _mm_mask_cmpeq_epi8_mask(match3LfEqYMaskA, oDataA, _mm_set1_epi8('\r')), + _mm_mask_cmpeq_epi8_mask(match3LfEqYMaskB, oDataB, _mm_set1_epi8('\r')) + ); + } else +#endif + { + // always recompute cmpCr to avoid register spills above + cmpCrA = _mm_cmpeq_epi8(force_align_read_128(src+i), _mm_set1_epi8('\r')); + cmpCrB = _mm_cmpeq_epi8(force_align_read_128(src+i + sizeof(__m128i)), _mm_set1_epi8('\r')); + __m128i match1LfA = _mm_cmpeq_epi8( + _mm_set1_epi8('\n'), + SHIFT_DATA_A(1) + ); + __m128i match1LfB = _mm_cmpeq_epi8( + _mm_set1_epi8('\n'), + SHIFT_DATA_B(1) + ); + endFound = _mm_movemask_epi8(_mm_or_si128( + _mm_and_si128( + match3EqYA, + _mm_and_si128(match1LfA, cmpCrA) + ), + _mm_and_si128( + match3EqYB, + _mm_and_si128(match1LfB, cmpCrB) + ) + )); + } + + if(endFound) { + len += (long)i; + break; + } + } + if(isRaw) { + if(_USING_FAST_MATCH) + minMask = _mm_set1_epi8('.'); + else + lfCompare = _mm_set1_epi8('\n'); + } + } + else if(isRaw) { // no \r_. found + if(_USING_FAST_MATCH) + minMask = _mm_set1_epi8('.'); + else + lfCompare = _mm_set1_epi8('\n'); + } + } +#undef SHIFT_DATA_A +#undef SHIFT_DATA_B + + if(!_USING_BLEND_ADD) + dataB = _mm_add_epi8(oDataB, _mm_set1_epi8(-42)); + + if(LIKELIHOOD(0.0001, (mask & ((maskEq << 1) + escFirst)) != 0)) { + // resolve invalid sequences of = to deal with cases like '====' + unsigned tmp = lookups->eqFix[(maskEq&0xff) & ~escFirst]; + uint32_t maskEq2 = tmp; + for(int j=8; j<32; j+=8) { + tmp = lookups->eqFix[((maskEq>>j)&0xff) & ~(tmp>>7)]; + maskEq2 |= tmp<> 31); + // next, eliminate anything following a `=` from the special char mask; this eliminates cases of `=\r` so that they aren't removed + maskEq <<= 1; + mask &= ~maskEq; + + if(_USING_BLEND_ADD) { + dataA = _mm_add_epi8(oDataA, yencOffset); + dataB = _mm_add_epi8(oDataB, _mm_set1_epi8(-42)); + } + + // unescape chars following `=` +#if defined(__AVX512VL__) && defined(__AVX512BW__) + if(use_isa >= ISA_LEVEL_AVX3) { + // GCC < 7 seems to generate rubbish assembly for this + dataA = _mm_mask_add_epi8( + dataA, + (__mmask16)maskEq, + dataA, + _mm_set1_epi8(-64) + ); + dataB = _mm_mask_add_epi8( + dataB, + (__mmask16)(maskEq>>16), + dataB, + _mm_set1_epi8(-64) + ); + } else +#endif + { + dataA = _mm_add_epi8( + dataA, + LOAD_HALVES( + lookups->eqAdd + (maskEq&0xff), + lookups->eqAdd + ((maskEq>>8)&0xff) + ) + ); + maskEq >>= 16; + dataB = _mm_add_epi8( + dataB, + LOAD_HALVES( + lookups->eqAdd + (maskEq&0xff), + lookups->eqAdd + ((maskEq>>8)&0xff) + ) + ); + + yencOffset = _mm_xor_si128(_mm_set1_epi8(-42), + _mm_slli_epi16(_mm_cvtsi32_si128((int)escFirst), 6) + ); + } + } else { + // no invalid = sequences found - we can cut out some things from above + // this code path is a shortened version of above; it's here because it's faster, and what we'll be dealing with most of the time + escFirst = (maskEq >> 31); + +#if defined(__AVX512VL__) && defined(__AVX512BW__) + if(use_isa >= ISA_LEVEL_AVX3) { + dataA = _mm_add_epi8( + oDataA, + _mm_ternarylogic_epi32( + _mm_slli_si128(cmpEqA, 1), yencOffset, _mm_set1_epi8(-42-64), 0xac + ) + ); + dataB = _mm_add_epi8( + oDataB, + _mm_ternarylogic_epi32( + _mm_alignr_epi8(cmpEqB, cmpEqA, 15), _mm_set1_epi8(-42), _mm_set1_epi8(-42-64), 0xac + ) + ); + } else +#endif +#if defined(__SSE4_1__) + if(_USING_BLEND_ADD) { + /* // the following strategy seems more ideal, however, both GCC and Clang go bonkers over it and spill more registers + cmpEqA = _mm_blendv_epi8(_mm_set1_epi8(-42), _mm_set1_epi8(-42-64), cmpEqA); + cmpEqB = _mm_blendv_epi8(_mm_set1_epi8(-42), _mm_set1_epi8(-42-64), cmpEqB); + dataB = _mm_add_epi8(oDataB, _mm_alignr_epi8(cmpEqB, cmpEqA, 15)); + dataA = _mm_add_epi8(oDataA, _mm_and_si128( + _mm_alignr_epi8(cmpEqA, _mm_set1_epi8(-42), 15), + yencOffset + )); + yencOffset = _mm_alignr_epi8(_mm_set1_epi8(-42), cmpEqB, 15); + */ + + dataA = _mm_add_epi8( + oDataA, + _mm_blendv_epi8( + yencOffset, _mm_set1_epi8(-42-64), _mm_slli_si128(cmpEqA, 1) + ) + ); + dataB = _mm_add_epi8( + oDataB, + _mm_blendv_epi8( + _mm_set1_epi8(-42), _mm_set1_epi8(-42-64), _mm_alignr_epi8(cmpEqB, cmpEqA, 15) + ) + ); + yencOffset = _mm_xor_si128(_mm_set1_epi8(-42), + _mm_slli_epi16(_mm_cvtsi32_si128((int)escFirst), 6) + ); + } else +#endif + { + cmpEqA = _mm_and_si128(cmpEqA, _mm_set1_epi8(-64)); + cmpEqB = _mm_and_si128(cmpEqB, _mm_set1_epi8(-64)); + yencOffset = _mm_add_epi8(_mm_set1_epi8(-42), _mm_srli_si128(cmpEqB, 15)); +#if defined(__SSSE3__) && !defined(__tune_btver1__) + if(use_isa >= ISA_LEVEL_SSSE3) + cmpEqB = _mm_alignr_epi8(cmpEqB, cmpEqA, 15); + else +#endif + cmpEqB = _mm_or_si128( + _mm_slli_si128(cmpEqB, 1), + _mm_srli_si128(cmpEqA, 15) + ); + cmpEqA = _mm_slli_si128(cmpEqA, 1); + dataA = _mm_add_epi8(dataA, cmpEqA); + dataB = _mm_add_epi8(dataB, cmpEqB); + } + } + // subtract 64 from first element if escFirst == 1 +#if defined(__AVX512VL__) && defined(__AVX512BW__) + if(use_isa >= ISA_LEVEL_AVX3) { + yencOffset = _mm_mask_add_epi8(_mm_set1_epi8(-42), (__mmask16)escFirst, _mm_set1_epi8(-42), _mm_set1_epi8(-64)); + } +#endif + + // all that's left is to 'compress' the data (skip over masked chars) +#ifdef __SSSE3__ + if(use_isa >= ISA_LEVEL_SSSE3) { +# if defined(__AVX512VBMI2__) && defined(__AVX512VL__) && defined(__POPCNT__) + if(use_isa >= ISA_LEVEL_VBMI2) { + COMPRESS_STORE(p, KNOT16(mask), dataA); + p -= popcnt32(mask & 0xffff); + COMPRESS_STORE(p+XMM_SIZE, KNOT16(mask>>16), dataB); + p -= popcnt32(mask>>16); + p += XMM_SIZE*2; + } else +# endif + { + + dataA = _mm_shuffle_epi8(dataA, _mm_load_si128((__m128i*)(lookups->compact + (mask&0x7fff)))); + STOREU_XMM(p, dataA); + + dataB = _mm_shuffle_epi8(dataB, _mm_load_si128((__m128i*)((char*)lookups->compact + ((mask >> 12) & 0x7fff0)))); + +# if defined(__POPCNT__) && !defined(__tune_btver1__) + if(use_isa & ISA_FEATURE_POPCNT) { + p -= popcnt32(mask & 0xffff); + STOREU_XMM(p+XMM_SIZE, dataB); + p -= popcnt32(mask & 0xffff0000); + p += XMM_SIZE*2; + } else +# endif + { + p += lookups->BitsSetTable256inv[mask & 0xff] + lookups->BitsSetTable256inv[(mask >> 8) & 0xff]; + STOREU_XMM(p, dataB); + mask >>= 16; + p += lookups->BitsSetTable256inv[mask & 0xff] + lookups->BitsSetTable256inv[(mask >> 8) & 0xff]; + } + } + } else +#endif + { + dataA = sse2_compact_vect(mask & 0xffff, dataA); + STOREU_XMM(p, dataA); + p += lookups->BitsSetTable256inv[mask & 0xff] + lookups->BitsSetTable256inv[(mask >> 8) & 0xff]; + mask >>= 16; + dataB = sse2_compact_vect(mask, dataB); + STOREU_XMM(p, dataB); + p += lookups->BitsSetTable256inv[mask & 0xff] + lookups->BitsSetTable256inv[(mask >> 8) & 0xff]; + } +#undef LOAD_HALVES + } else { + if(_USING_BLEND_ADD) + dataA = _mm_add_epi8(oDataA, yencOffset); + dataB = _mm_add_epi8(oDataB, _mm_set1_epi8(-42)); + + STOREU_XMM(p, dataA); + STOREU_XMM(p+XMM_SIZE, dataB); + p += XMM_SIZE*2; + escFirst = 0; + yencOffset = _mm_set1_epi8(-42); + } + } + _escFirst = (unsigned char)escFirst; + if(isRaw) { + if(len != 0) { // have to gone through at least one loop cycle + if(src[i-2] == '\r' && src[i-1] == '\n' && src[i] == '.') + _nextMask = 1; + else if(src[i-1] == '\r' && src[i] == '\n' && src[i+1] == '.') + _nextMask = 2; + else + _nextMask = 0; + } + } else + _nextMask = 0; +} +#endif diff --git a/rapidyenc/src/decoder_ssse3.cc b/rapidyenc/src/decoder_ssse3.cc new file mode 100644 index 0000000..f92c2a2 --- /dev/null +++ b/rapidyenc/src/decoder_ssse3.cc @@ -0,0 +1,19 @@ +#include "common.h" + +#ifdef __SSSE3__ +#include "decoder_common.h" +#include "decoder_sse_base.h" +void decoder_set_ssse3_funcs() { + decoder_sse_init(); + decoder_init_lut(lookups->eqFix, lookups->compact); + _do_decode = &do_decode_simd >; + _do_decode_raw = &do_decode_simd >; + _do_decode_end_raw = &do_decode_simd >; + _decode_isa = ISA_LEVEL_SSSE3; +} +#else +void decoder_set_sse2_funcs(); +void decoder_set_ssse3_funcs() { + decoder_set_sse2_funcs(); +} +#endif diff --git a/rapidyenc/src/decoder_vbmi2.cc b/rapidyenc/src/decoder_vbmi2.cc new file mode 100644 index 0000000..d4ce562 --- /dev/null +++ b/rapidyenc/src/decoder_vbmi2.cc @@ -0,0 +1,32 @@ +#include "common.h" + +#if defined(__AVX512VL__) && defined(__AVX512VBMI2__) && defined(__AVX512BW__) +# include "decoder_common.h" +# ifndef YENC_DISABLE_AVX256 +# include "decoder_avx2_base.h" +void decoder_set_vbmi2_funcs() { + ALIGN_ALLOC(lookups, sizeof(*lookups), 16); + // TODO: consider removing compact LUT + decoder_init_lut(lookups->eqFix, lookups->compact); + _do_decode = &do_decode_simd >; + _do_decode_raw = &do_decode_simd >; + _do_decode_end_raw = &do_decode_simd >; + _decode_isa = ISA_LEVEL_VBMI2; +} +# else +# include "decoder_sse_base.h" +void decoder_set_vbmi2_funcs() { + decoder_sse_init(); + decoder_init_lut(lookups->eqFix, lookups->compact); + _do_decode = &do_decode_simd >; + _do_decode_raw = &do_decode_simd >; + _do_decode_end_raw = &do_decode_simd >; + _decode_isa = ISA_LEVEL_VBMI2; +} +# endif +#else +void decoder_set_avx2_funcs(); +void decoder_set_vbmi2_funcs() { + decoder_set_avx2_funcs(); +} +#endif diff --git a/rapidyenc/src/encoder.cc b/rapidyenc/src/encoder.cc new file mode 100644 index 0000000..e0cd1ad --- /dev/null +++ b/rapidyenc/src/encoder.cc @@ -0,0 +1,181 @@ +#include "common.h" +#include "encoder_common.h" +#include "encoder.h" + +size_t do_encode_generic(int line_size, int* colOffset, const unsigned char* HEDLEY_RESTRICT src, unsigned char* HEDLEY_RESTRICT dest, size_t len, int doEnd) { + unsigned char* es = (unsigned char*)src + len; + unsigned char *p = dest; // destination pointer + long i = -(long)len; // input position + unsigned char c, escaped; // input character; escaped input character + int col = *colOffset; + + if (col == 0) { + c = es[i++]; + if (escapedLUT[c]) { + memcpy(p, &escapedLUT[c], sizeof(uint16_t)); + p += 2; + col = 2; + } else { + *(p++) = c + 42; + col = 1; + } + } + while(i < 0) { + // main line + unsigned char* sp = NULL; + while (i < -1-8 && line_size-col-1 > 8) { + // 8 cycle unrolled version + sp = p; + #define DO_THING(n) \ + c = es[i+n], escaped = escapeLUT[c]; \ + if (escaped) \ + *(p++) = escaped; \ + else { \ + memcpy(p, &escapedLUT[c], sizeof(uint16_t)); \ + p += 2; \ + } + DO_THING(0); + DO_THING(1); + DO_THING(2); + DO_THING(3); + DO_THING(4); + DO_THING(5); + DO_THING(6); + DO_THING(7); + + i += 8; + col += (int)(p - sp); + } + if(sp && col >= line_size-1) { + // TODO: consider revert optimisation from SIMD code + // we overflowed - need to revert and use slower method :( + col -= (int)(p - sp); + p = sp; + i -= 8; + } + // handle remaining chars + while(col < line_size-1) { + c = es[i++], escaped = escapeLUT[c]; + if (escaped) { + *(p++) = escaped; + col++; + } + else { + memcpy(p, &escapedLUT[c], sizeof(uint16_t)); + p += 2; + col += 2; + } + /* experimental branchless version + *p = '='; + c = (es[i++] + 42) & 0xFF; + int cond = (c=='\0' || c=='=' || c=='\r' || c=='\n'); + *(p+cond) = c + (cond << 6); + p += 1+cond; + col += 1+cond; + */ + if (i >= 0) goto end; + } + + // last line char + if(col < line_size) { // this can only be false if the last character was an escape sequence (or line_size is horribly small), in which case, we don't need to handle space/tab cases + c = es[i++]; + if (escapedLUT[c] && c != '.'-42) { + memcpy(p, &escapedLUT[c], sizeof(uint16_t)); + p += 2; + } else { + *(p++) = c + 42; + } + } + + if (i >= 0) break; + + c = es[i++]; + if (escapedLUT[c]) { + uint32_t w = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)escapedLUT[c]); + memcpy(p, &w, sizeof(w)); + p += 4; + col = 2; + } else { + // another option may be to just write the EOL and let the first char be handled by the faster methods above, but it appears that writing the extra byte here is generally faster... + uint32_t w = UINT32_PACK('\r', '\n', (uint32_t)(c+42), 0); + memcpy(p, &w, sizeof(w)); + p += 3; + col = 1; + } + } + + end: + if(doEnd) { + // special case: if the last character is a space/tab, it needs to be escaped as it's the final character on the line + unsigned char lc = *(p-1); + if(lc == '\t' || lc == ' ') { + *(p-1) = '='; + *p = lc+64; + p++; + col++; + } + } + *colOffset = col; + return p - dest; +} + + +extern "C" { + size_t (*_do_encode)(int, int*, const unsigned char* HEDLEY_RESTRICT, unsigned char* HEDLEY_RESTRICT, size_t, int) = &do_encode_generic; + int _encode_isa = ISA_GENERIC; +} + +void encoder_sse2_init(); +void encoder_ssse3_init(); +void encoder_avx_init(); +void encoder_avx2_init(); +void encoder_vbmi2_init(); +void encoder_neon_init(); +void encoder_rvv_init(); + +#if defined(PLATFORM_X86) && defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0 +# if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256) +# include "encoder_avx_base.h" +static inline void encoder_native_init() { + _do_encode = &do_encode_simd< do_encode_avx2 >; + encoder_avx2_lut(); + _encode_isa = ISA_NATIVE; +} +# else +# include "encoder_sse_base.h" +static inline void encoder_native_init() { + _do_encode = &do_encode_simd< do_encode_sse >; + encoder_sse_lut(); + _encode_isa = ISA_NATIVE; +} +# endif +#endif + + +void encoder_init() { +#ifdef PLATFORM_X86 +# if defined(YENC_BUILD_NATIVE) && YENC_BUILD_NATIVE!=0 + encoder_native_init(); +# else + int use_isa = cpu_supports_isa(); + if(use_isa >= ISA_LEVEL_VBMI2) + encoder_vbmi2_init(); + else if(use_isa >= ISA_LEVEL_AVX2) + encoder_avx2_init(); + else if(use_isa >= ISA_LEVEL_AVX) + encoder_avx_init(); + else if(use_isa >= ISA_LEVEL_SSSE3) + encoder_ssse3_init(); + else + encoder_sse2_init(); +# endif +#endif +#ifdef PLATFORM_ARM + if(cpu_supports_neon()) + encoder_neon_init(); +#endif +#ifdef __riscv + if(cpu_supports_rvv()) + encoder_rvv_init(); +#endif +} diff --git a/rapidyenc/src/encoder.h b/rapidyenc/src/encoder.h new file mode 100644 index 0000000..b904a97 --- /dev/null +++ b/rapidyenc/src/encoder.h @@ -0,0 +1,25 @@ +#ifndef __YENC_ENCODER_H +#define __YENC_ENCODER_H + +#ifdef __cplusplus +extern "C" { +#endif + + + +#include "hedley.h" + +extern size_t (*_do_encode)(int, int*, const unsigned char* HEDLEY_RESTRICT, unsigned char* HEDLEY_RESTRICT, size_t, int); +extern int _encode_isa; +#define do_encode (*_do_encode) +void encoder_init(); +static inline int encode_isa_level() { + return _encode_isa; +} + + + +#ifdef __cplusplus +} +#endif +#endif diff --git a/rapidyenc/src/encoder_avx.cc b/rapidyenc/src/encoder_avx.cc new file mode 100644 index 0000000..4ef746d --- /dev/null +++ b/rapidyenc/src/encoder_avx.cc @@ -0,0 +1,17 @@ +#include "common.h" + +#if defined(__AVX__) && defined(__POPCNT__) +#include "encoder_sse_base.h" + +void encoder_avx_init() { + _do_encode = &do_encode_simd< do_encode_sse >; + encoder_sse_lut(); + _encode_isa = ISA_LEVEL_AVX; +} +#else +void encoder_ssse3_init(); +void encoder_avx_init() { + encoder_ssse3_init(); +} +#endif + diff --git a/rapidyenc/src/encoder_avx2.cc b/rapidyenc/src/encoder_avx2.cc new file mode 100644 index 0000000..ba6008f --- /dev/null +++ b/rapidyenc/src/encoder_avx2.cc @@ -0,0 +1,17 @@ +#include "common.h" + +#if defined(__AVX2__) && !defined(YENC_DISABLE_AVX256) +#include "encoder_avx_base.h" + +void encoder_avx2_init() { + _do_encode = &do_encode_simd< do_encode_avx2 >; + encoder_avx2_lut(); + _encode_isa = ISA_LEVEL_AVX2; +} +#else +void encoder_avx_init(); +void encoder_avx2_init() { + encoder_avx_init(); +} +#endif + diff --git a/rapidyenc/src/encoder_avx_base.h b/rapidyenc/src/encoder_avx_base.h new file mode 100644 index 0000000..ac88d2e --- /dev/null +++ b/rapidyenc/src/encoder_avx_base.h @@ -0,0 +1,564 @@ +// can't seem to make this worth it +#include "common.h" +#ifdef __AVX2__ + +#include "encoder.h" +#include "encoder_common.h" +#define YMM_SIZE 32 + +#if (defined(__GNUC__) && __GNUC__ >= 7) || (defined(_MSC_VER) && _MSC_VER >= 1924) +# define KLOAD32(a, offs) _load_mask32((__mmask32*)(a) + (offs)) +#else +# define KLOAD32(a, offs) (((uint32_t*)(a))[(offs)]) +#endif + +#pragma pack(16) +static struct { + uint32_t eolLastChar[256]; + /*align32*/ __m256i shufExpand[65536]; // huge 2MB table + /*align32*/ int8_t expandMergemix[33*2*32]; // not used in AVX3 +} * HEDLEY_RESTRICT lookupsAVX2; +static struct { + uint32_t eolLastChar[256]; + uint32_t expand[65536]; // biggish 256KB table (but still smaller than the 2MB table) +} * HEDLEY_RESTRICT lookupsVBMI2; +#pragma pack() + +static inline void fill_eolLastChar(uint32_t* table) { + for(int n=0; n<256; n++) { + table[n] = ((n == 214+'\t' || n == 214+' ' || n == 214+'\0' || n == 214+'\n' || n == 214+'\r' || n == '='-42) ? (((n+42+64)&0xff)<<8)+0x0a0d003d : ((n+42)&0xff)+0x0a0d00); + } +} + +template +static void encoder_avx2_lut() { + if(use_isa >= ISA_LEVEL_VBMI2) { + ALIGN_ALLOC(lookupsVBMI2, sizeof(*lookupsVBMI2), 32); + fill_eolLastChar(lookupsVBMI2->eolLastChar); + for(int i=0; i<65536; i++) { + int k = i; + uint32_t expand = 0; + int p = 0; + for(int j=0; j<16; j++) { + if(k & 1) { + p++; + } + expand |= 1<<(j+p); + k >>= 1; + } + lookupsVBMI2->expand[i] = expand; + } + } else { + ALIGN_ALLOC(lookupsAVX2, sizeof(*lookupsAVX2), 32); + fill_eolLastChar(lookupsAVX2->eolLastChar); + for(int i=0; i<65536; i++) { + int k = i; + uint8_t* res = (uint8_t*)(lookupsAVX2->shufExpand + i); + int p = 0; + for(int j=0; j<16; j++) { + if(k & 1) { + res[j+p] = 0xff; + p++; + } + res[j+p] = j; + k >>= 1; + } + for(; p<16; p++) + res[16+p] = 0x40; // arbitrary value (top bit cannot be set) + } + for(int i=0; i<33; i++) { + int n = (i == 32 ? 32 : 31-i); + for(int j=0; j<32; j++) { + lookupsAVX2->expandMergemix[i*64 + j] = (n>=j ? -1 : 0); + lookupsAVX2->expandMergemix[i*64 + j + 32] = ('='*(n==j) + 64*(n==j-1) + 42*(n!=j)); + } + } + } +} + +template +HEDLEY_ALWAYS_INLINE void do_encode_avx2(int line_size, int* colOffset, const uint8_t* HEDLEY_RESTRICT srcEnd, uint8_t* HEDLEY_RESTRICT& dest, size_t& len) { + // offset position to enable simpler loop condition checking + const int INPUT_OFFSET = YMM_SIZE*4 + 1 -1; // -1 to change <= to < + if(len <= INPUT_OFFSET || line_size < 16) return; + + uint8_t *p = dest; // destination pointer + intptr_t i = -(intptr_t)len; // input position + intptr_t lineSizeOffset = -line_size +1; // -1 because we want to stop one char before the end to handle the last char differently + intptr_t col = *colOffset + lineSizeOffset; + + i += INPUT_OFFSET; + const uint8_t* es = srcEnd - INPUT_OFFSET; + +#if !defined(__tune_bdver4__) && !defined(__tune_znver1__) + // always process at least one byte to prevent underflow when doing a read with -1 offset + if(col < 0 && col != -line_size+1) { + // not the first/last character of a line + uint8_t c = es[i++]; + if(HEDLEY_UNLIKELY(c == 214 || c == '\n'+214 || c == '\r'+214 || c == '='-42)) { + *(uint16_t*)p = 0x6a3d + (((uint16_t)c) << 8); + p += 2; + col += 2; + } else { + *p++ = c+42; + col++; + } + } +#endif + + if(HEDLEY_UNLIKELY(col >= 0)) { + uint8_t c = es[i++]; + if(col == 0) { + // last char + uint32_t eolChar = (use_isa >= ISA_LEVEL_VBMI2 ? lookupsVBMI2->eolLastChar[c] : lookupsAVX2->eolLastChar[c]); + *(uint32_t*)p = eolChar; + p += 3 + (uintptr_t)(eolChar>>27); + col = -line_size+1; + } else { + // line overflowed, insert a newline + if (LIKELIHOOD(0.0273, escapedLUT[c]!=0)) { + *(uint32_t*)p = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)escapedLUT[c]); + p += 4; + col = 2-line_size + 1; + } else { + *(uint32_t*)p = UINT32_PACK('\r', '\n', (uint32_t)(c+42), 0); + p += 3; + col = 2-line_size; + } + } + } + if (HEDLEY_LIKELY(col == -line_size+1)) { + // first char of the line + uint8_t c = es[i++]; + if (LIKELIHOOD(0.0273, escapedLUT[c] != 0)) { + *(uint16_t*)p = escapedLUT[c]; + p += 2; + col += 2; + } else { + *(p++) = c + 42; + col += 1; + } + } + do { + __m256i dataA = _mm256_loadu_si256((__m256i *)(es + i)); + __m256i dataB = _mm256_loadu_si256((__m256i *)(es + i) + 1); + i += YMM_SIZE*2; + // search for special chars + __m256i cmpA = _mm256_cmpeq_epi8( + _mm256_shuffle_epi8(_mm256_set_epi8( + '\0'-42,-42,'\r'-42,'.'-42,'='-42,'\0'-42,'\t'-42,'\n'-42,-42,-42,'\r'-42,-42,'='-42,' '-42,-42,'\n'-42, + '\0'-42,-42,'\r'-42,'.'-42,'='-42,'\0'-42,'\t'-42,'\n'-42,-42,-42,'\r'-42,-42,'='-42,' '-42,-42,'\n'-42 + ), _mm256_abs_epi8(dataA)), + dataA + ); + __m256i cmpB = _mm256_cmpeq_epi8( + _mm256_shuffle_epi8(_mm256_set_epi8( + '\0'-42,-42,'\r'-42,'.'-42,'='-42,'\0'-42,'\t'-42,'\n'-42,-42,-42,'\r'-42,-42,'='-42,' '-42,-42,'\n'-42, + '\0'-42,-42,'\r'-42,'.'-42,'='-42,'\0'-42,'\t'-42,'\n'-42,-42,-42,'\r'-42,-42,'='-42,' '-42,-42,'\n'-42 + ), _mm256_abs_epi8(dataB)), + dataB + ); + +#if defined(__AVX512VL__) + if(use_isa >= ISA_LEVEL_AVX3) { + dataA = _mm256_add_epi8(dataA, _mm256_set1_epi8(42)); + dataA = _mm256_ternarylogic_epi32(dataA, cmpA, _mm256_set1_epi8(64), 0xf8); // data | (cmp & 64) + dataB = _mm256_add_epi8(dataB, _mm256_set1_epi8(42)); + dataB = _mm256_ternarylogic_epi32(dataB, cmpB, _mm256_set1_epi8(64), 0xf8); // data | (cmp & 64) + } +#endif + + uint32_t maskA = (uint32_t)_mm256_movemask_epi8(cmpA); + uint32_t maskB = (uint32_t)_mm256_movemask_epi8(cmpB); + unsigned int maskBitsA = popcnt32(maskA); + unsigned int maskBitsB = popcnt32(maskB); + unsigned int outputBytesA = maskBitsA + YMM_SIZE; + unsigned int bitIndexA, bitIndexB; + if (LIKELIHOOD(0.170, (maskBitsA|maskBitsB) > 1)) { + _encode_loop_branch_slow: + unsigned int m1 = maskA & 0xffff, m3 = maskB & 0xffff; + unsigned int m2, m4; + __m256i data1A, data2A; + __m256i data1B, data2B; + __m256i shuf1A, shuf1B; // not set in VBMI2 path + __m256i shuf2A, shuf2B; // not set in VBMI2 path + +#if defined(__AVX512VBMI2__) && defined(__AVX512VL__) && defined(__AVX512BW__) + if(use_isa >= ISA_LEVEL_VBMI2) { + m2 = maskA >> 16; + m4 = maskB >> 16; + + /* alternative no-LUT strategy + uint64_t expandMaskA = ~_pdep_u64(~maskA, 0x5555555555555555); // expand bits, with bits set + expandMaskA = _pext_u64(expandMaskA^0x5555555555555555, expandMaskA); + */ + + data1A = _mm256_mask_expand_epi8(_mm256_set1_epi8('='), KLOAD32(lookupsVBMI2->expand, m1), dataA); + data2A = _mm256_mask_expand_epi8(_mm256_set1_epi8('='), KLOAD32(lookupsVBMI2->expand, m2), _mm256_castsi128_si256( + _mm256_extracti128_si256(dataA, 1) + )); + data1B = _mm256_mask_expand_epi8(_mm256_set1_epi8('='), KLOAD32(lookupsVBMI2->expand, m3), dataB); + data2B = _mm256_mask_expand_epi8(_mm256_set1_epi8('='), KLOAD32(lookupsVBMI2->expand, m4), _mm256_castsi128_si256( + _mm256_extracti128_si256(dataB, 1) + )); + } else +#endif + { + if(use_isa < ISA_LEVEL_AVX3) { + dataA = _mm256_add_epi8(dataA, _mm256_blendv_epi8(_mm256_set1_epi8(42), _mm256_set1_epi8(42+64), cmpA)); + dataB = _mm256_add_epi8(dataB, _mm256_blendv_epi8(_mm256_set1_epi8(42), _mm256_set1_epi8(42+64), cmpB)); + } + + m2 = (maskA >> 11) & 0x1fffe0; + m4 = (maskB >> 11) & 0x1fffe0; + + // duplicate halves + data1A = _mm256_inserti128_si256(dataA, _mm256_castsi256_si128(dataA), 1); + data1B = _mm256_inserti128_si256(dataB, _mm256_castsi256_si128(dataB), 1); +#if defined(__tune_znver2__) || defined(__tune_znver3__) || defined(__tune_znver4__) + data2A = _mm256_permute2x128_si256(dataA, dataA, 0x11); + data2B = _mm256_permute2x128_si256(dataB, dataB, 0x11); +#else + data2A = _mm256_permute4x64_epi64(dataA, 0xee); + data2B = _mm256_permute4x64_epi64(dataB, 0xee); +#endif + + shuf1A = _mm256_load_si256(lookupsAVX2->shufExpand + m1); + shuf2A = _mm256_load_si256((__m256i*)((char*)(lookupsAVX2->shufExpand) + m2)); + shuf1B = _mm256_load_si256(lookupsAVX2->shufExpand + m3); + shuf2B = _mm256_load_si256((__m256i*)((char*)(lookupsAVX2->shufExpand) + m4)); + + // expand + data1A = _mm256_shuffle_epi8(data1A, shuf1A); + data2A = _mm256_shuffle_epi8(data2A, shuf2A); + data1B = _mm256_shuffle_epi8(data1B, shuf1B); + data2B = _mm256_shuffle_epi8(data2B, shuf2B); + // add in '=' + data1A = _mm256_blendv_epi8(data1A, _mm256_set1_epi8('='), shuf1A); + data2A = _mm256_blendv_epi8(data2A, _mm256_set1_epi8('='), shuf2A); + data1B = _mm256_blendv_epi8(data1B, _mm256_set1_epi8('='), shuf1B); + data2B = _mm256_blendv_epi8(data2B, _mm256_set1_epi8('='), shuf2B); + } + + unsigned int shuf1Len = popcnt32(m1) + 16; + unsigned int shuf3Len = popcnt32(m3) + 16; + _mm256_storeu_si256((__m256i*)p, data1A); + _mm256_storeu_si256((__m256i*)(p + shuf1Len), data2A); + _mm256_storeu_si256((__m256i*)(p + outputBytesA), data1B); + _mm256_storeu_si256((__m256i*)(p + outputBytesA + shuf3Len), data2B); + unsigned int outputBytes = YMM_SIZE + outputBytesA + maskBitsB; + p += outputBytes; + col += outputBytes; + + if(col >= 0) { + // we overflowed - find correct position to revert back to + // this is perhaps sub-optimal on 32-bit, but who still uses that with AVX2? + uint64_t eqMask; + int shiftAmt = (int)(maskBitsB + YMM_SIZE -1 - col); + if(HEDLEY_UNLIKELY(shiftAmt < 0)) { + uint32_t eqMask1, eqMask2; +#if defined(__AVX512VBMI2__) && defined(__AVX512VL__) && defined(__AVX512BW__) + if(use_isa >= ISA_LEVEL_VBMI2) { + eqMask1 = lookupsVBMI2->expand[m1]; + eqMask2 = lookupsVBMI2->expand[m2]; + } else +#endif + { + eqMask1 = (uint32_t)_mm256_movemask_epi8(shuf1A); + eqMask2 = (uint32_t)_mm256_movemask_epi8(shuf2A); + } + eqMask = eqMask1 | ((uint64_t)eqMask2 << shuf1Len); + if(use_isa < ISA_LEVEL_VBMI2) + i += (uintptr_t)maskBitsB; + else + i -= YMM_SIZE; + shiftAmt += outputBytesA; + } else { + uint32_t eqMask3, eqMask4; +#if defined(__AVX512VBMI2__) && defined(__AVX512VL__) && defined(__AVX512BW__) + if(use_isa >= ISA_LEVEL_VBMI2) { + eqMask3 = lookupsVBMI2->expand[m3]; + eqMask4 = lookupsVBMI2->expand[m4]; + } else +#endif + { + eqMask3 = (uint32_t)_mm256_movemask_epi8(shuf1B); + eqMask4 = (uint32_t)_mm256_movemask_epi8(shuf2B); + } + eqMask = eqMask3 | ((uint64_t)eqMask4 << shuf3Len); + } + +#if defined(__GNUC__) && defined(PLATFORM_AMD64) + if(use_isa >= ISA_LEVEL_VBMI2) { + __asm__( + "shrq $1, %[eqMask] \n" + "shrq %%cl, %[eqMask] \n" + "adcq %q[col], %q[p] \n" + : [eqMask]"+r"(eqMask), [p]"+r"(p) + : "c"(shiftAmt), [col]"r"(~col) + ); + i -= _mm_popcnt_u64(eqMask); + } else +#endif + { + eqMask >>= shiftAmt; + unsigned int bitCount; +#ifdef PLATFORM_AMD64 + bitCount = (unsigned int)_mm_popcnt_u64(eqMask); +#else + bitCount = popcnt32(eqMask & 0xffffffff) + popcnt32(eqMask >> 32); +#endif +#if defined(__AVX512VBMI2__) && defined(__AVX512VL__) && defined(__AVX512BW__) + if(use_isa >= ISA_LEVEL_VBMI2) { + i -= bitCount; + p -= col; + if(LIKELIHOOD(0.98, (eqMask & 1) != 1)) + p--; + else + i++; + } else +#endif + { + i += bitCount; + unsigned int revert = (unsigned int)(col + (eqMask & 1)); + p -= revert; + i -= revert; + } + } + goto _encode_eol_handle_pre; + } + } else { + //_encode_loop_branch_fast: + maskBitsB += YMM_SIZE; +#if defined(__AVX512VL__) && defined(__AVX512BW__) + if(use_isa >= ISA_LEVEL_AVX3) { +# if defined(__AVX512VBMI2__) + if(use_isa >= ISA_LEVEL_VBMI2) { + _mm256_mask_storeu_epi8(p+1, 1UL<<31, dataA); + dataA = _mm256_mask_expand_epi8(_mm256_set1_epi8('='), KNOT32(maskA), dataA); + _mm256_storeu_si256((__m256i*)p, dataA); + p += outputBytesA; + + _mm256_mask_storeu_epi8(p+1, 1UL<<31, dataB); + dataB = _mm256_mask_expand_epi8(_mm256_set1_epi8('='), KNOT32(maskB), dataB); + _mm256_storeu_si256((__m256i*)p, dataB); + p += maskBitsB; + } else +# endif + { + _mm256_mask_storeu_epi8(p+1, 1UL<<31, dataA); + dataA = _mm256_mask_alignr_epi8(dataA, (uint32_t)(-(int32_t)maskA), dataA, _mm256_permute4x64_epi64(dataA, _MM_SHUFFLE(1,0,3,2)), 15); + dataA = _mm256_ternarylogic_epi32(dataA, cmpA, _mm256_set1_epi8('='), 0xb8); // (data & ~cmp) | (cmp & '=') + _mm256_storeu_si256((__m256i*)p, dataA); + p += outputBytesA; + + _mm256_mask_storeu_epi8(p+1, 1UL<<31, dataB); + dataB = _mm256_mask_alignr_epi8(dataB, (uint32_t)(-(int32_t)maskB), dataB, _mm256_permute4x64_epi64(dataB, _MM_SHUFFLE(1,0,3,2)), 15); + dataB = _mm256_ternarylogic_epi32(dataB, cmpB, _mm256_set1_epi8('='), 0xb8); + _mm256_storeu_si256((__m256i*)p, dataB); + p += maskBitsB; + } + } else +#endif + { + bitIndexA = _lzcnt_u32(maskA); + bitIndexB = _lzcnt_u32(maskB); + __m256i mergeMaskA = _mm256_load_si256((const __m256i*)(lookupsAVX2->expandMergemix + bitIndexA*2*YMM_SIZE)); + __m256i mergeMaskB = _mm256_load_si256((const __m256i*)(lookupsAVX2->expandMergemix + bitIndexB*2*YMM_SIZE)); + +#if defined(__tune_bdver4__) || defined(__tune_znver1__) + // avoid slower 32-byte crossing loads on Zen1 + __m256i dataAShifted = _mm256_alignr_epi8( + dataA, + _mm256_inserti128_si256(dataA, _mm256_castsi256_si128(dataA), 1), + 15 + ); + __m256i dataBShifted = _mm256_alignr_epi8( + dataB, + _mm256_inserti128_si256(dataB, _mm256_castsi256_si128(dataB), 1), + 15 + ); +#else + __m256i dataAShifted = _mm256_loadu_si256((__m256i *)(es + i - YMM_SIZE*2 - 1)); + __m256i dataBShifted = _mm256_loadu_si256((__m256i *)(es + i - YMM_SIZE - 1)); +#endif + dataA = _mm256_andnot_si256(cmpA, dataA); // clear space for '=' char + dataA = _mm256_blendv_epi8(dataAShifted, dataA, mergeMaskA); + dataA = _mm256_add_epi8(dataA, _mm256_load_si256((const __m256i*)(lookupsAVX2->expandMergemix + bitIndexA*2*YMM_SIZE) + 1)); + _mm256_storeu_si256((__m256i*)p, dataA); + p[YMM_SIZE] = es[i-1-YMM_SIZE] + 42 + (64 & (maskA>>(YMM_SIZE-1-6))); + p += outputBytesA; + + dataB = _mm256_andnot_si256(cmpB, dataB); + dataB = _mm256_blendv_epi8(dataBShifted, dataB, mergeMaskB); + dataB = _mm256_add_epi8(dataB, _mm256_load_si256((const __m256i*)(lookupsAVX2->expandMergemix + bitIndexB*2*YMM_SIZE) + 1)); + _mm256_storeu_si256((__m256i*)p, dataB); + p[YMM_SIZE] = es[i-1] + 42 + (64 & (maskB>>(YMM_SIZE-1-6))); + p += maskBitsB; + } + col += outputBytesA + maskBitsB; + + if(col >= 0) { + _encode_loop_branch_fast_eol: + if(HEDLEY_UNLIKELY(col > (intptr_t)maskBitsB)) { + if(use_isa >= ISA_LEVEL_AVX3) + bitIndexA = _lzcnt_u32(maskA); + bitIndexA += 1 + maskBitsB; + + i += maskBitsB - YMM_SIZE; + if(HEDLEY_UNLIKELY(col == (intptr_t)bitIndexA)) { + // this is an escape character, so line will need to overflow + p--; + } else { + i += (col > (intptr_t)bitIndexA); + } + } else { + if(use_isa >= ISA_LEVEL_AVX3) + bitIndexB = _lzcnt_u32(maskB); + bitIndexB++; + + if(HEDLEY_UNLIKELY(col == (intptr_t)bitIndexB)) { + p--; + } else { + i += (col > (intptr_t)bitIndexB); + } + } + i -= col; + p -= col; + + _encode_eol_handle_pre: + uint32_t eolChar = (use_isa >= ISA_LEVEL_VBMI2 ? lookupsVBMI2->eolLastChar[es[i]] : lookupsAVX2->eolLastChar[es[i]]); + *(uint32_t*)p = eolChar; + p += 3 + (uintptr_t)(eolChar>>27); + col = lineSizeOffset; + + if(HEDLEY_UNLIKELY(i >= 0)) { // this isn't really a proper check - it's only needed to support short lines; basically, if the line is too short, `i` never gets checked, so we need one somewhere + i++; + break; + } + + dataA = _mm256_loadu_si256((__m256i *)(es + i + 1)); + dataB = _mm256_loadu_si256((__m256i *)(es + i + 1) + 1); + i += YMM_SIZE*2 + 1; + // search for special chars + cmpA = _mm256_cmpeq_epi8( + _mm256_shuffle_epi8(_mm256_set_epi8( + '\0'-42,-42,'\r'-42,'.'-42,'='-42,'\0'-42,'\t'-42,'\n'-42,-42,-42,'\r'-42,-42,'='-42,' '-42,-42,'\n'-42, + '\0'-42,-42,'\r'-42,'.'-42,'='-42,'\0'-42,'\t'-42,'\n'-42,-42,-42,'\r'-42,-42,'='-42,' '-42,-42,'\n'-42 + ), _mm256_adds_epi8( + _mm256_abs_epi8(dataA), _mm256_set_epi64x(0, 0, 0, 88) + )), + dataA + ); + cmpB = _mm256_cmpeq_epi8( + _mm256_shuffle_epi8(_mm256_set_epi8( + '\0'-42,-42,'\r'-42,'.'-42,'='-42,'\0'-42,'\t'-42,'\n'-42,-42,-42,'\r'-42,-42,'='-42,' '-42,-42,'\n'-42, + '\0'-42,-42,'\r'-42,'.'-42,'='-42,'\0'-42,'\t'-42,'\n'-42,-42,-42,'\r'-42,-42,'='-42,' '-42,-42,'\n'-42 + ), _mm256_abs_epi8(dataB)), + dataB + ); + + // duplicate some code from above to reduce jumping a little +#if defined(__AVX512VL__) + if(use_isa >= ISA_LEVEL_AVX3) { + dataA = _mm256_add_epi8(dataA, _mm256_set1_epi8(42)); + dataA = _mm256_ternarylogic_epi32(dataA, cmpA, _mm256_set1_epi8(64), 0xf8); // data | (cmp & 64) + dataB = _mm256_add_epi8(dataB, _mm256_set1_epi8(42)); + dataB = _mm256_ternarylogic_epi32(dataB, cmpB, _mm256_set1_epi8(64), 0xf8); // data | (cmp & 64) + } +#endif + + maskA = (uint32_t)_mm256_movemask_epi8(cmpA); + maskB = (uint32_t)_mm256_movemask_epi8(cmpB); + maskBitsA = popcnt32(maskA); + maskBitsB = popcnt32(maskB); + outputBytesA = maskBitsA + YMM_SIZE; + if (LIKELIHOOD(0.170, (maskBitsA|maskBitsB) > 1)) + goto _encode_loop_branch_slow; + + + //goto _encode_loop_branch_fast; + // duplicating the code, instead of using the goto above, seems to fix a performance regression in GCC + maskBitsB += YMM_SIZE; +#if defined(__AVX512VL__) && defined(__AVX512BW__) + if(use_isa >= ISA_LEVEL_AVX3) { +# if defined(__AVX512VBMI2__) + if(use_isa >= ISA_LEVEL_VBMI2) { + _mm256_mask_storeu_epi8(p+1, 1UL<<31, dataA); + dataA = _mm256_mask_expand_epi8(_mm256_set1_epi8('='), KNOT32(maskA), dataA); + _mm256_storeu_si256((__m256i*)p, dataA); + p += outputBytesA; + + _mm256_mask_storeu_epi8(p+1, 1UL<<31, dataB); + dataB = _mm256_mask_expand_epi8(_mm256_set1_epi8('='), KNOT32(maskB), dataB); + _mm256_storeu_si256((__m256i*)p, dataB); + p += maskBitsB; + } else +# endif + { + _mm256_mask_storeu_epi8(p+1, 1UL<<31, dataA); + dataA = _mm256_mask_alignr_epi8(dataA, (uint32_t)(-(int32_t)maskA), dataA, _mm256_permute4x64_epi64(dataA, _MM_SHUFFLE(1,0,3,2)), 15); + dataA = _mm256_ternarylogic_epi32(dataA, cmpA, _mm256_set1_epi8('='), 0xb8); // (data & ~cmp) | (cmp & '=') + _mm256_storeu_si256((__m256i*)p, dataA); + p += outputBytesA; + + _mm256_mask_storeu_epi8(p+1, 1UL<<31, dataB); + dataB = _mm256_mask_alignr_epi8(dataB, (uint32_t)(-(int32_t)maskB), dataB, _mm256_permute4x64_epi64(dataB, _MM_SHUFFLE(1,0,3,2)), 15); + dataB = _mm256_ternarylogic_epi32(dataB, cmpB, _mm256_set1_epi8('='), 0xb8); + _mm256_storeu_si256((__m256i*)p, dataB); + p += maskBitsB; + } + } else +#endif + { + bitIndexA = _lzcnt_u32(maskA); + bitIndexB = _lzcnt_u32(maskB); + __m256i mergeMaskA = _mm256_load_si256((const __m256i*)(lookupsAVX2->expandMergemix + bitIndexA*2*YMM_SIZE)); + __m256i mergeMaskB = _mm256_load_si256((const __m256i*)(lookupsAVX2->expandMergemix + bitIndexB*2*YMM_SIZE)); + +#if defined(__tune_bdver4__) || defined(__tune_znver1__) + // avoid slower 32-byte crossing loads on Zen1 + __m256i dataAShifted = _mm256_alignr_epi8( + dataA, + _mm256_inserti128_si256(dataA, _mm256_castsi256_si128(dataA), 1), + 15 + ); + __m256i dataBShifted = _mm256_alignr_epi8( + dataB, + _mm256_inserti128_si256(dataB, _mm256_castsi256_si128(dataB), 1), + 15 + ); +#else + __m256i dataAShifted = _mm256_loadu_si256((__m256i *)(es + i - YMM_SIZE*2 - 1)); + __m256i dataBShifted = _mm256_loadu_si256((__m256i *)(es + i - YMM_SIZE - 1)); +#endif + dataA = _mm256_andnot_si256(cmpA, dataA); // clear space for '=' char + dataA = _mm256_blendv_epi8(dataAShifted, dataA, mergeMaskA); + dataA = _mm256_add_epi8(dataA, _mm256_load_si256((const __m256i*)(lookupsAVX2->expandMergemix + bitIndexA*2*YMM_SIZE) + 1)); + _mm256_storeu_si256((__m256i*)p, dataA); + p[YMM_SIZE] = es[i-1-YMM_SIZE] + 42 + (64 & (maskA>>(YMM_SIZE-1-6))); + p += outputBytesA; + + dataB = _mm256_andnot_si256(cmpB, dataB); + dataB = _mm256_blendv_epi8(dataBShifted, dataB, mergeMaskB); + dataB = _mm256_add_epi8(dataB, _mm256_load_si256((const __m256i*)(lookupsAVX2->expandMergemix + bitIndexB*2*YMM_SIZE) + 1)); + _mm256_storeu_si256((__m256i*)p, dataB); + p[YMM_SIZE] = es[i-1] + 42 + (64 & (maskB>>(YMM_SIZE-1-6))); + p += maskBitsB; + } + col += outputBytesA + maskBitsB; + + if(col >= 0) + goto _encode_loop_branch_fast_eol; + } + } + } while(i < 0); + + _mm256_zeroupper(); + + *colOffset = (int)(col + line_size -1); + dest = p; + len = -(i - INPUT_OFFSET); +} + +#endif diff --git a/rapidyenc/src/encoder_common.h b/rapidyenc/src/encoder_common.h new file mode 100644 index 0000000..8795e13 --- /dev/null +++ b/rapidyenc/src/encoder_common.h @@ -0,0 +1,109 @@ +#ifndef __YENC_ENCODER_COMMON +#define __YENC_ENCODER_COMMON + +// lookup tables for scalar processing +#define _B1(n) _B(n), _B(n+1), _B(n+2), _B(n+3) +#define _B2(n) _B1(n), _B1(n+4), _B1(n+8), _B1(n+12) +#define _B3(n) _B2(n), _B2(n+16), _B2(n+32), _B2(n+48) +#define _BX _B3(0), _B3(64), _B3(128), _B3(192) + +static const unsigned char escapeLUT[256] = { // whether or not the character is critical +#define _B(n) ((n == 214 || n == '\r'+214 || n == '\n'+214 || n == '='-42) ? 0 : (n+42) & 0xff) + _BX +#undef _B +}; +static const uint16_t escapedLUT[256] = { // escaped sequences for characters that need escaping +#define _B(n) ((n == 214 || n == 214+'\r' || n == 214+'\n' || n == '='-42 || n == 214+'\t' || n == 214+' ' || n == '.'-42) ? UINT16_PACK('=', ((n+42+64)&0xff)) : 0) + _BX +#undef _B +}; + +#undef _B1 +#undef _B2 +#undef _B3 +#undef _BX + + +size_t do_encode_generic(int line_size, int* colOffset, const unsigned char* HEDLEY_RESTRICT src, unsigned char* HEDLEY_RESTRICT dest, size_t len, int doEnd); + +template +static size_t do_encode_simd(int line_size, int* colOffset, const uint8_t* HEDLEY_RESTRICT src, uint8_t* HEDLEY_RESTRICT dest, size_t len, int doEnd) { + if(len < 1) return 0; + if(line_size < 12) { // short lines probably not worth processing in a SIMD way + // we assume at least the first and last char exist in the line, and since the first char could be escaped, and SIMD encoder assumes at least one non-first/last char, assumption means that line size has to be >= 4 + return do_encode_generic(line_size, colOffset, src, dest, len, doEnd); + } + + const uint8_t* es = src + len; + uint8_t* p = dest; + + if(*colOffset < 0) *colOffset = 0; // sanity check + + kernel(line_size, colOffset, es, p, len); + + // scalar loop to process remaining + long i = -(long)len; + if(*colOffset == 0 && i < 0) { + uint8_t c = es[i++]; + if (LIKELIHOOD(0.0273, escapedLUT[c] != 0)) { + memcpy(p, escapedLUT + c, 2); + p += 2; + *colOffset = 2; + } else { + *(p++) = c + 42; + *colOffset = 1; + } + } + while(i < 0) { + uint8_t c = es[i++]; + if(*colOffset < line_size-1) { + if(!escapeLUT[c]) { + p[0] = '='; + p[1] = c+42+64; + p += 2; + (*colOffset) += 2; + } else { + *(p++) = escapeLUT[c]; + (*colOffset) += 1; + } + } else { + if(*colOffset < line_size) { + if (escapedLUT[c] && c != '.'-42) { + memcpy(p, escapedLUT + c, 2); + p += 2; + } else { + *(p++) = c + 42; + } + if(i == 0) break; + c = es[i++]; + } + + // handle EOL + if (escapedLUT[c]) { + uint32_t w = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)escapedLUT[c]); + memcpy(p, &w, sizeof(w)); + p += 4; + *colOffset = 2; + } else { + uint32_t w = UINT32_PACK('\r', '\n', (uint32_t)(c+42), 0); + memcpy(p, &w, sizeof(w)); + p += 3; + *colOffset = 1; + } + } + } + + if(doEnd) { + // special case: if the last character is a space/tab, it needs to be escaped as it's the final character on the line + unsigned char lc = *(p-1); + if(lc == '\t' || lc == ' ') { + p[-1] = '='; + *p = lc+64; + p++; + (*colOffset)++; + } + } + return p - dest; +} + +#endif /* __YENC_ENCODER_COMMON */ diff --git a/rapidyenc/src/encoder_neon.cc b/rapidyenc/src/encoder_neon.cc new file mode 100644 index 0000000..97344bd --- /dev/null +++ b/rapidyenc/src/encoder_neon.cc @@ -0,0 +1,547 @@ +#include "common.h" + +#ifdef __ARM_NEON +#include "encoder.h" +#include "encoder_common.h" + +// Clang wrongly assumes alignment on vst1q_u8_x2, and ARMv7 GCC doesn't support the function, so effectively, it can only be used in ARMv8 compilers +#if defined(__aarch64__) && (defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(8,5,0)) +# define vst1q_u8_x2_unaligned vst1q_u8_x2 +#else +static HEDLEY_ALWAYS_INLINE void vst1q_u8_x2_unaligned(uint8_t* p, uint8x16x2_t data) { + vst1q_u8(p, data.val[0]); + vst1q_u8(p+16, data.val[1]); +} +#endif + + +// ARM's CLZ instruction at native bit-width +#ifdef __aarch64__ +static HEDLEY_ALWAYS_INLINE int clz_n(uint64_t v) { +# ifdef _MSC_VER + long r; + // does this work? + if(_BitScanReverse64((unsigned long*)&r, v)) + r ^= 63; + else + r = 64; + return r; +# else +# if defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(11,0,0) + // this pattern is only detected on GCC >= 11 (Clang 9 seems to as well, unsure about earlier versions) + // - note: return type must be 'int'; GCC fails to optimise this if type is 'long' + // GCC <= 10 doesn't optimize around the '0 = undefined behaviour', so not needed there + if(v == 0) return 64; +# endif + return __builtin_clzll(v); +# endif +} +#else +static HEDLEY_ALWAYS_INLINE int clz_n(uint32_t v) { +# ifdef __GNUC__ +# if defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(7,0,0) + // as with AArch64 version above, only insert this check if compiler can optimise it away + if(v == 0) return 32; +# endif + return __builtin_clz(v); +# elif defined(_MSC_VER) + return _arm_clz(v); +# else + return __clz(v); // ARM compiler? +# endif +} +#endif + +static uint8x16_t ALIGN_TO(16, shufLUT[256]); +static uint16_t expandLUT[256]; + +static HEDLEY_ALWAYS_INLINE void encode_eol_handle_pre(const uint8_t* HEDLEY_RESTRICT es, long& i, uint8_t*& p, long& col, long lineSizeOffset) { + uint8x16_t oDataA = vld1q_u8(es + i); + uint8x16_t oDataB = vld1q_u8(es + i + sizeof(uint8x16_t)); + uint8x16_t dataA = oDataA; + uint8x16_t dataB = oDataB; +#ifdef __aarch64__ + uint8x16_t cmpA = vreinterpretq_u8_s8(vqtbx2q_s8( + vdupq_n_s8('='-42), + vcreate2_s8(vmakeq_s8('\0'-42,-128,-128,'\0'-42,'\t'-42,'\n'-42,'\r'-42,'\t'-42,'\n'-42,'\r'-42,-128,-128,'\0'-42,-128,-128,-128), vmakeq_s8(' '-42,'\n'-42,'\r'-42,' '-42,-128,-128,-128,-128,-128,-128,'.'-42,-128,-128,-128,'='-42,-128)), + vreinterpretq_u8_s8(vhaddq_s8(vreinterpretq_s8_u8(dataA), vmakeq_s8(42,48,66,66, 66,66,66,66, 66,66,66,66, 66,66,66,66))) + )); + cmpA = vceqq_u8(cmpA, dataA); + + dataB = vaddq_u8(oDataB, vdupq_n_u8(42)); + uint8x16_t cmpB = vqtbx1q_u8( + vceqq_u8(oDataB, vdupq_n_u8('='-42)), + // \0 \n \r + vmakeq_u8(255,0,0,0,0,0,0,0,0,0,255,0,0,255,0,0), + dataB + ); + dataA = vaddq_u8(dataA, vbslq_u8(cmpA, vdupq_n_u8(64+42), vdupq_n_u8(42))); + dataB = vorrq_u8(dataB, vandq_u8(cmpB, vdupq_n_u8(64))); +#else + uint8x16_t cmpA = vorrq_u8( + vorrq_u8( + vceqq_u8(oDataA, vdupq_n_u8(-42)), + vceqq_u8(oDataA, vdupq_n_u8('='-42)) + ), + vorrq_u8( + vceqq_u8(oDataA, vdupq_n_u8('\r'-42)), + vceqq_u8(oDataA, vdupq_n_u8('\n'-42)) + ) + ); + uint8x16_t cmpB = vorrq_u8( + vorrq_u8( + vceqq_u8(oDataB, vdupq_n_u8(-42)), + vceqq_u8(oDataB, vdupq_n_u8('='-42)) + ), + vorrq_u8( + vceqq_u8(oDataB, vdupq_n_u8('\r'-42)), + vceqq_u8(oDataB, vdupq_n_u8('\n'-42)) + ) + ); + + // dup low 2 bytes & compare + uint8x8_t firstTwoChars = vreinterpret_u8_u16(vdup_lane_u16(vreinterpret_u16_u8(vget_low_u8(oDataA)), 0)); + uint8x8_t cmpNl = vceq_u8(firstTwoChars, vmake_u8( + ' '+214,' '+214,'\t'+214,'\t'+214,'\r'+214,'.'-42,'='-42,'='-42 + )); + // use padd to merge comparisons + uint16x4_t cmpNl2 = vreinterpret_u16_u8(cmpNl); + cmpNl2 = vpadd_u16(cmpNl2, vdup_n_u16(0)); + cmpNl2 = vpadd_u16(cmpNl2, vdup_n_u16(0)); + cmpA = vcombine_u8( + vorr_u8(vget_low_u8(cmpA), vreinterpret_u8_u16(cmpNl2)), + vget_high_u8(cmpA) + ); + dataA = vsubq_u8(dataA, vbslq_u8(cmpA, vdupq_n_u8(-64-42), vdupq_n_u8(-42))); + dataB = vsubq_u8(dataB, vbslq_u8(cmpB, vdupq_n_u8(-64-42), vdupq_n_u8(-42))); +#endif + + + uint8x16_t cmpAMasked = vandq_u8(cmpA, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + uint8x16_t cmpBMasked = vandq_u8(cmpB, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); +#ifdef __aarch64__ + uint8x16_t cmpMerge = vpaddq_u8(cmpAMasked, cmpBMasked); + cmpMerge = vpaddq_u8(cmpMerge, cmpMerge); + uint64_t mask = vgetq_lane_u64(vreinterpretq_u64_u8(cmpMerge), 0); + + // write out first char + newline + uint32_t firstChar = vgetq_lane_u8(dataA, 0); + if(LIKELIHOOD(0.0234, mask & 1)) { + firstChar <<= 8; + firstChar |= 0x0a0d003d; + memcpy(p, &firstChar, sizeof(firstChar)); + p += 4; + mask ^= 1; + cmpMerge = vbicq_u8(cmpMerge, vmakeq_u8(1,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0)); + } else { + firstChar |= 0x0a0d00; + memcpy(p, &firstChar, sizeof(firstChar)); + p += 3; + } + + if(LIKELIHOOD(0.09, (mask & (mask-1)) != 0)) { + mask |= mask >> 8; + uint8x8_t cmpPacked = vpadd_u8(vget_low_u8(cmpMerge), vget_low_u8(cmpMerge)); + uint8_t m1 = (mask & 0xff); + uint8_t m2 = ((mask >> 16) & 0xff); + uint8_t m3 = ((mask >> 32) & 0xff); + uint8_t m4 = ((mask >> 48) & 0xff); +#else + // no vpaddq_u8 in ARMv7, so need extra 64-bit VPADD + uint8x8_t cmpPacked = vpadd_u8( + vpadd_u8( + vget_low_u8(cmpAMasked), vget_high_u8(cmpAMasked) + ), + vpadd_u8( + vget_low_u8(cmpBMasked), vget_high_u8(cmpBMasked) + ) + ); + cmpPacked = vpadd_u8(cmpPacked, cmpPacked); + uint32_t mask = vget_lane_u32(vreinterpret_u32_u8(cmpPacked), 0); + + // write out first char + newline + uint32_t firstChar = vgetq_lane_u8(dataA, 0); + if(LIKELIHOOD(0.0234, mask & 1)) { + firstChar <<= 8; + firstChar |= 0x0a0d003d; + memcpy(p, &firstChar, sizeof(firstChar)); + p += 4; + mask ^= 1; + cmpPacked = vbic_u8(cmpPacked, vmake_u8(1,0,0,0, 0,0,0,0)); + } else { + firstChar |= 0x0a0d00; + memcpy(p, &firstChar, sizeof(firstChar)); + p += 3; + } + + if(LIKELIHOOD(0.09, (mask & (mask-1)) != 0)) { + uint8_t m1 = (mask & 0xff); + uint8_t m2 = ((mask >> 8) & 0xff); + uint8_t m3 = ((mask >> 16) & 0xff); + uint8_t m4 = ((mask >> 24) & 0xff); +#endif + + // perform lookup for shuffle mask + uint8x16_t shuf1 = vld1q_u8((uint8_t*)(shufLUT + m1)); + uint8x16_t shuf2 = vld1q_u8((uint8_t*)(shufLUT + m2)); + uint8x16_t shuf3 = vld1q_u8((uint8_t*)(shufLUT + m3)); + uint8x16_t shuf4 = vld1q_u8((uint8_t*)(shufLUT + m4)); +#ifdef __aarch64__ + uint8x16_t data1A = vqtbx1q_u8(shuf1, dataA, shuf1); + uint8x16_t data2A = vqtbx1q_u8(shuf2, vextq_u8(dataA, dataA, 8), shuf2); + uint8x16_t data1B = vqtbx1q_u8(shuf3, dataB, shuf3); + uint8x16_t data2B = vqtbx1q_u8(shuf4, vextq_u8(dataB, dataB, 8), shuf4); +#else + uint8x8_t shuf1l = vget_low_u8(shuf1); + uint8x8_t shuf1h = vget_high_u8(shuf1); + uint8x8_t shuf2l = vget_low_u8(shuf2); + uint8x8_t shuf2h = vget_high_u8(shuf2); + uint8x8_t shuf3l = vget_low_u8(shuf3); + uint8x8_t shuf3h = vget_high_u8(shuf3); + uint8x8_t shuf4l = vget_low_u8(shuf4); + uint8x8_t shuf4h = vget_high_u8(shuf4); + uint8x16_t data1A = vcombine_u8(vtbx1_u8(shuf1l, vget_low_u8(dataA), shuf1l), + vtbx1_u8(shuf1h, vget_low_u8(dataA), shuf1h)); + uint8x16_t data2A = vcombine_u8(vtbx1_u8(shuf2l, vget_high_u8(dataA), shuf2l), + vtbx1_u8(shuf2h, vget_high_u8(dataA), shuf2h)); + uint8x16_t data1B = vcombine_u8(vtbx1_u8(shuf3l, vget_low_u8(dataB), shuf3l), + vtbx1_u8(shuf3h, vget_low_u8(dataB), shuf3h)); + uint8x16_t data2B = vcombine_u8(vtbx1_u8(shuf4l, vget_high_u8(dataB), shuf4l), + vtbx1_u8(shuf4h, vget_high_u8(dataB), shuf4h)); +#endif + data1A = vextq_u8(data1A, data1A, 1); // shift out processed byte (last char of line) + + uint32_t counts = vget_lane_u32(vreinterpret_u32_u8(vcnt_u8(cmpPacked)), 0); + counts += 0x08080807; + + unsigned char shuf1Len = counts & 0xff; + unsigned char shuf2Len = (counts>>8) & 0xff; + unsigned char shuf3Len = (counts>>16) & 0xff; + unsigned char shuf4Len = (counts>>24) & 0xff; + uint32_t shufTotalLen = counts * 0x1010101; + shufTotalLen >>= 24; + + vst1q_u8(p, data1A); + p += shuf1Len; + vst1q_u8(p, data2A); + p += shuf2Len; + vst1q_u8(p, data1B); + p += shuf3Len; + vst1q_u8(p, data2B); + p += shuf4Len; + col = shufTotalLen+1 + lineSizeOffset-32; + } else { + // shuffle stuff up + long bitIndex = clz_n(mask); + uint8x16_t vClz = vdupq_n_u8(bitIndex & ~(sizeof(mask)*8)); +#ifdef __aarch64__ + uint8x16_t blendA = vcgtq_u8(vmakeq_u8(63,62,61,60,51,50,49,48,47,46,45,44,35,34,33,32), vClz); + uint8x16_t blendB = vcgtq_u8(vmakeq_u8(31,30,29,28,19,18,17,16,15,14,13,12, 3, 2, 1, 0), vClz); +#else + uint8x16_t blendA = vcgtq_u8(vmakeq_u8(31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16), vClz); + uint8x16_t blendB = vcgtq_u8(vmakeq_u8(15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0), vClz); +#endif + uint8x16_t dataAShifted = vbslq_u8(cmpA, vdupq_n_u8('='), dataA); + uint8x16_t dataBShifted = vbslq_u8(cmpB, vdupq_n_u8('='), dataB); + dataAShifted = vextq_u8(dataAShifted, dataBShifted, 1); + dataBShifted = vextq_u8(dataBShifted, dataBShifted, 1); + dataA = vbslq_u8(blendA, dataAShifted, dataA); + dataB = vbslq_u8(blendB, dataBShifted, dataB); + + vst1q_u8_x2_unaligned(p, vcreate2_u8(dataA, dataB)); + p += sizeof(uint8x16_t)*2 - 1; + p += (mask != 0); + col = lineSizeOffset + (mask != 0); + } + + i += sizeof(uint8x16_t)*2; + // TODO: check col >= 0 if we want to support short lines +} + + +HEDLEY_ALWAYS_INLINE void do_encode_neon(int line_size, int* colOffset, const uint8_t* HEDLEY_RESTRICT srcEnd, uint8_t* HEDLEY_RESTRICT& dest, size_t& len) { + // offset position to enable simpler loop condition checking + const int INPUT_OFFSET = sizeof(uint8x16_t)*4 -1; // extra chars for EOL handling, -1 to change <= to < + if(len <= INPUT_OFFSET || line_size < (int)sizeof(uint8x16_t)*4) return; + + uint8_t *p = dest; // destination pointer + long i = -(long)len; // input position + long lineSizeOffset = -line_size +32; // line size plus vector length + long col = *colOffset - line_size +1; + + i += INPUT_OFFSET; + const uint8_t* es = srcEnd - INPUT_OFFSET; + + if (HEDLEY_LIKELY(col == -line_size+1)) { + uint8_t c = es[i++]; + if (LIKELIHOOD(0.0273, escapedLUT[c] != 0)) { + memcpy(p, escapedLUT + c, 2); + p += 2; + col += 2; + } else { + *(p++) = c + 42; + col += 1; + } + } + if(HEDLEY_UNLIKELY(col >= 0)) { + if(col == 0) + encode_eol_handle_pre(es, i, p, col, lineSizeOffset); + else { + uint8_t c = es[i++]; + if (LIKELIHOOD(0.0273, escapedLUT[c]!=0)) { + uint32_t v = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)escapedLUT[c]); + memcpy(p, &v, sizeof(v)); + p += 4; + col = 2-line_size + 1; + } else { + uint32_t v = UINT32_PACK('\r', '\n', (uint32_t)(c+42), 0); + memcpy(p, &v, sizeof(v)); + p += 3; + col = 2-line_size; + } + } + } + while(i < 0) { + // for unaligned loads, separate loads seem to be faster than vld1q_u8_x2 on Cortex A53; unsure if this applies elsewhere + uint8x16_t dataA = vld1q_u8(es + i); + uint8x16_t dataB = vld1q_u8(es + i + sizeof(uint8x16_t)); + i += sizeof(uint8x16_t)*2; + // search for special chars +#ifdef __aarch64__ + uint8x16_t cmpEqA = vceqq_u8(dataA, vdupq_n_u8('='-42)); + uint8x16_t cmpEqB = vceqq_u8(dataB, vdupq_n_u8('='-42)); + dataA = vaddq_u8(dataA, vdupq_n_u8(42)); + dataB = vaddq_u8(dataB, vdupq_n_u8(42)); + uint8x16_t cmpA = vqtbx1q_u8( + cmpEqA, + // \0 \n \r + vmakeq_u8(255,0,0,0,0,0,0,0,0,0,255,0,0,255,0,0), + dataA + ); + uint8x16_t cmpB = vqtbx1q_u8( + cmpEqB, + // \0 \n \r + vmakeq_u8(255,0,0,0,0,0,0,0,0,0,255,0,0,255,0,0), + dataB + ); + + dataA = vorrq_u8(dataA, vandq_u8(cmpA, vdupq_n_u8(64))); + dataB = vorrq_u8(dataB, vandq_u8(cmpB, vdupq_n_u8(64))); +#else + // the ARMv8 strategy may be worth it here with 2x vtbx2's, but both GCC-9 and Clang-9 generate poor assembly for it, so it performs worse than the following + uint8x16_t cmpA = vorrq_u8( + vorrq_u8( + vceqq_u8(dataA, vdupq_n_u8(-42)), + vceqq_u8(dataA, vdupq_n_u8('='-42)) + ), + vorrq_u8( + vceqq_u8(dataA, vdupq_n_u8('\r'-42)), + vceqq_u8(dataA, vdupq_n_u8('\n'-42)) + ) + ); + uint8x16_t cmpB = vorrq_u8( + vorrq_u8( + vceqq_u8(dataB, vdupq_n_u8(-42)), + vceqq_u8(dataB, vdupq_n_u8('='-42)) + ), + vorrq_u8( + vceqq_u8(dataB, vdupq_n_u8('\r'-42)), + vceqq_u8(dataB, vdupq_n_u8('\n'-42)) + ) + ); + + dataA = vsubq_u8(dataA, vbslq_u8(cmpA, vdupq_n_u8(-64-42), vdupq_n_u8(-42))); + dataB = vsubq_u8(dataB, vbslq_u8(cmpB, vdupq_n_u8(-64-42), vdupq_n_u8(-42))); +#endif + + + long bitIndex; // prevent compiler whining + uint8x16_t cmpAMasked = vandq_u8(cmpA, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); + uint8x16_t cmpBMasked = vandq_u8(cmpB, vmakeq_u8(1,2,4,8,16,32,64,128, 1,2,4,8,16,32,64,128)); +#ifdef __aarch64__ + uint8x16_t cmpMerge = vpaddq_u8(cmpAMasked, cmpBMasked); + cmpMerge = vpaddq_u8(cmpMerge, cmpMerge); + uint64_t mask = vgetq_lane_u64(vreinterpretq_u64_u8(cmpMerge), 0); + if(LIKELIHOOD(0.09, (mask & (mask-1)) != 0)) { + mask |= mask >> 8; + uint8x8_t cmpPacked = vpadd_u8(vget_low_u8(cmpMerge), vget_low_u8(cmpMerge)); + uint8_t m1 = (mask & 0xff); + uint8_t m2 = ((mask >> 16) & 0xff); + uint8_t m3 = ((mask >> 32) & 0xff); + uint8_t m4 = ((mask >> 48) & 0xff); +#else + // no vpaddq_u8 in ARMv7, so need extra 64-bit VPADD + uint8x8_t cmpPacked = vpadd_u8( + vpadd_u8( + vget_low_u8(cmpAMasked), vget_high_u8(cmpAMasked) + ), + vpadd_u8( + vget_low_u8(cmpBMasked), vget_high_u8(cmpBMasked) + ) + ); + cmpPacked = vpadd_u8(cmpPacked, cmpPacked); + uint32_t mask = vget_lane_u32(vreinterpret_u32_u8(cmpPacked), 0); + if(LIKELIHOOD(0.09, (mask & (mask-1)) != 0)) { + uint8_t m1 = (mask & 0xff); + uint8_t m2 = ((mask >> 8) & 0xff); + uint8_t m3 = ((mask >> 16) & 0xff); + uint8_t m4 = ((mask >> 24) & 0xff); +#endif + + // perform lookup for shuffle mask + uint8x16_t shuf1 = vld1q_u8((uint8_t*)(shufLUT + m1)); + uint8x16_t shuf2 = vld1q_u8((uint8_t*)(shufLUT + m2)); + uint8x16_t shuf3 = vld1q_u8((uint8_t*)(shufLUT + m3)); + uint8x16_t shuf4 = vld1q_u8((uint8_t*)(shufLUT + m4)); + + // expand halves +#ifdef __aarch64__ + uint8x16_t data1A = vqtbx1q_u8(shuf1, dataA, shuf1); + uint8x16_t data2A = vqtbx1q_u8(shuf2, vextq_u8(dataA, dataA, 8), shuf2); + uint8x16_t data1B = vqtbx1q_u8(shuf3, dataB, shuf3); + uint8x16_t data2B = vqtbx1q_u8(shuf4, vextq_u8(dataB, dataB, 8), shuf4); +#else + uint8x8_t shuf1l = vget_low_u8(shuf1); + uint8x8_t shuf1h = vget_high_u8(shuf1); + uint8x8_t shuf2l = vget_low_u8(shuf2); + uint8x8_t shuf2h = vget_high_u8(shuf2); + uint8x8_t shuf3l = vget_low_u8(shuf3); + uint8x8_t shuf3h = vget_high_u8(shuf3); + uint8x8_t shuf4l = vget_low_u8(shuf4); + uint8x8_t shuf4h = vget_high_u8(shuf4); + uint8x16_t data1A = vcombine_u8(vtbx1_u8(shuf1l, vget_low_u8(dataA), shuf1l), + vtbx1_u8(shuf1h, vget_low_u8(dataA), shuf1h)); + uint8x16_t data2A = vcombine_u8(vtbx1_u8(shuf2l, vget_high_u8(dataA), shuf2l), + vtbx1_u8(shuf2h, vget_high_u8(dataA), shuf2h)); + uint8x16_t data1B = vcombine_u8(vtbx1_u8(shuf3l, vget_low_u8(dataB), shuf3l), + vtbx1_u8(shuf3h, vget_low_u8(dataB), shuf3h)); + uint8x16_t data2B = vcombine_u8(vtbx1_u8(shuf4l, vget_high_u8(dataB), shuf4l), + vtbx1_u8(shuf4h, vget_high_u8(dataB), shuf4h)); +#endif + + // store out + uint32_t counts = vget_lane_u32(vreinterpret_u32_u8(vcnt_u8(cmpPacked)), 0); + counts += 0x08080808; + + unsigned char shuf1Len = counts & 0xff; + unsigned char shuf2Len = (counts>>8) & 0xff; + unsigned char shuf3Len = (counts>>16) & 0xff; + unsigned char shuf4Len = (counts>>24) & 0xff; + uint32_t shufTotalLen = counts * 0x1010101; + shufTotalLen >>= 24; + + vst1q_u8(p, data1A); + p += shuf1Len; + vst1q_u8(p, data2A); + p += shuf2Len; + vst1q_u8(p, data1B); + p += shuf3Len; + vst1q_u8(p, data2B); + p += shuf4Len; + col += shufTotalLen; + + if(LIKELIHOOD(0.3, col >= 0)) { + // we overflowed - find correct position to revert back to + long revert = col; + long len2ndHalf = shuf3Len+shuf4Len; + long shiftAmt = len2ndHalf - col -1; + uint32_t eqMaskHalf; + if(HEDLEY_UNLIKELY(shiftAmt < 0)) { + eqMaskHalf = (expandLUT[m2] << shuf1Len) | expandLUT[m1]; + eqMaskHalf >>= shufTotalLen - col -1; + i += len2ndHalf - 16; + } else { + eqMaskHalf = (expandLUT[m4] << shuf3Len) | expandLUT[m3]; + eqMaskHalf >>= shiftAmt; + } + revert += eqMaskHalf & 1; + + // count bits in eqMask + uint8x8_t vCnt = vcnt_u8(vreinterpret_u8_u32(vmov_n_u32(eqMaskHalf))); + uint32_t cnt = vget_lane_u32(vreinterpret_u32_u8(vCnt), 0); + cnt *= 0x1010101; + i += cnt >> 24; + + p -= revert; + i -= revert; + goto _encode_eol_handle_pre; + } + } else { + { + bitIndex = clz_n(mask); + uint8x16_t vClz = vdupq_n_u8(bitIndex & ~(sizeof(mask)*8)); +#ifdef __aarch64__ + uint8x16_t blendA = vcgeq_u8(vmakeq_u8(63,62,61,60,51,50,49,48,47,46,45,44,35,34,33,32), vClz); + uint8x16_t blendB = vcgeq_u8(vmakeq_u8(31,30,29,28,19,18,17,16,15,14,13,12, 3, 2, 1, 0), vClz); +#else + uint8x16_t blendA = vcgeq_u8(vmakeq_u8(31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16), vClz); + uint8x16_t blendB = vcgeq_u8(vmakeq_u8(15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0), vClz); +#endif + uint8x16_t dataAShifted = vextq_u8(dataA, dataA, 15); + uint8x16_t dataBShifted = vextq_u8(dataA, dataB, 15); + dataA = vbslq_u8(cmpA, vdupq_n_u8('='), dataA); + uint8x16_t outDataB = vbslq_u8(cmpB, vdupq_n_u8('='), dataB); + dataA = vbslq_u8(blendA, dataA, dataAShifted); + outDataB = vbslq_u8(blendB, outDataB, dataBShifted); + + vst1q_u8_x2_unaligned(p, vcreate2_u8(dataA, outDataB)); + p += sizeof(uint8x16_t)*2; + // write last byte + *p = vgetq_lane_u8(dataB, 15); + p += (mask != 0); + col += (mask != 0) + sizeof(uint8x16_t)*2; + } + + if(HEDLEY_UNLIKELY(col >= 0)) { +#ifdef __aarch64__ + // fixup bitIndex + bitIndex -= ((bitIndex+4)>>4)<<3; +#endif + bitIndex = bitIndex +1; + if(HEDLEY_UNLIKELY(col == bitIndex)) { + // this is an escape character, so line will need to overflow + p--; + } else { + i += (col > bitIndex); + } + p -= col; + i -= col; + + _encode_eol_handle_pre: + encode_eol_handle_pre(es, i, p, col, lineSizeOffset); + } + } + } + + *colOffset = col + line_size -1; + dest = p; + len = -(i - INPUT_OFFSET); +} + +void encoder_neon_init() { + _do_encode = &do_encode_simd; + _encode_isa = ISA_LEVEL_NEON; + // generate shuf LUT + for(int i=0; i<256; i++) { + int k = i; + uint16_t expand = 0; + uint8_t* res = (uint8_t*)(shufLUT + i); + int p = 0; + for(int j=0; j<8; j++) { + if(k & 1) { + res[j+p] = '='; + expand |= 1<<(j+p); + p++; + } + res[j+p] = j; + k >>= 1; + } + for(; p<8; p++) + res[8+p] = 8+p +0x80; // +0x80 => 0 discarded entries; has no effect other than to ease debugging + + expandLUT[i] = expand; + } +} +#else +void encoder_neon_init() {} +#endif /* defined(__ARM_NEON) */ diff --git a/rapidyenc/src/encoder_rvv.cc b/rapidyenc/src/encoder_rvv.cc new file mode 100644 index 0000000..298f70c --- /dev/null +++ b/rapidyenc/src/encoder_rvv.cc @@ -0,0 +1,220 @@ +#include "common.h" + +#ifdef __riscv_vector +#include "encoder.h" +#include "encoder_common.h" + +# include +# if defined(__clang__) && __clang_major__ < 16 +# define RV(f) f +# else +# define RV(f) __riscv_##f +# endif + + +static HEDLEY_ALWAYS_INLINE void encode_eol_handle_pre(const uint8_t* HEDLEY_RESTRICT _src, long& inpos, uint8_t*& outp, long& col, long lineSizeOffset) { + // TODO: vectorize + uint8_t c = _src[inpos++]; + if(HEDLEY_UNLIKELY(escapedLUT[c] && c != '.'-42)) { + memcpy(outp, &escapedLUT[c], sizeof(uint16_t)); + outp += 2; + } else { + *(outp++) = c + 42; + } + + c = _src[inpos++]; + if(LIKELIHOOD(0.0273, escapedLUT[c]!=0)) { + uint32_t w = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)escapedLUT[c]); + memcpy(outp, &w, sizeof(w)); + outp += 4; + col = lineSizeOffset + 2; + } else { + uint32_t w = UINT32_PACK('\r', '\n', (uint32_t)(c+42), 0); + memcpy(outp, &w, sizeof(w)); + outp += 3; + col = lineSizeOffset + 1; + } +} + + +HEDLEY_ALWAYS_INLINE void do_encode_rvv(int line_size, int* colOffset, const uint8_t* HEDLEY_RESTRICT srcEnd, uint8_t* HEDLEY_RESTRICT& dest, size_t& len) { + size_t vl2 = RV(vsetvlmax_e8m2)(); // TODO: limit to line length + // TODO: have a LMUL=1 variant if line_size < vl + + // offset position to enable simpler loop condition checking + const int INPUT_OFFSET = vl2*2 -1; // extra chars for EOL handling, -1 to change <= to < + if((intptr_t)len <= INPUT_OFFSET || line_size < (int)vl2*2) return; + + uint8_t *outp = dest; + long inpos = -(long)len; + long lineSizeOffset = -line_size +1; + long col = *colOffset - line_size +1; + + inpos += INPUT_OFFSET; + const uint8_t* _src = srcEnd - INPUT_OFFSET; + + if (HEDLEY_LIKELY(col == -line_size+1)) { + uint8_t c = _src[inpos++]; + if (LIKELIHOOD(0.0273, escapedLUT[c] != 0)) { + memcpy(outp, escapedLUT + c, 2); + outp += 2; + col += 2; + } else { + *(outp++) = c + 42; + col += 1; + } + } + if(HEDLEY_UNLIKELY(col >= 0)) { + if(col == 0) + encode_eol_handle_pre(_src, inpos, outp, col, lineSizeOffset); + else { + uint8_t c = _src[inpos++]; + if(LIKELIHOOD(0.0273, escapedLUT[c]!=0)) { + uint32_t v = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)escapedLUT[c]); + memcpy(outp, &v, sizeof(v)); + outp += 4; + col = 2-line_size + 1; + } else { + uint32_t v = UINT32_PACK('\r', '\n', (uint32_t)(c+42), 0); + memcpy(outp, &v, sizeof(v)); + outp += 3; + col = 2-line_size; + } + } + } + + // vector constants + const vuint8mf2_t ALT_SHIFT = RV(vreinterpret_v_u16mf2_u8mf2)(RV(vmv_v_x_u16mf2)(4, vl2)); + const uint8_t _MASK_EXPAND[] = {0xAA, 0xAB, 0xAE, 0xAF, 0xBA, 0xBB, 0xBE, 0xBF, 0xEA, 0xEB, 0xEE, 0xEF, 0xFA, 0xFB, 0xFE, 0xFF}; + const vuint8m1_t MASK_EXPAND = RV(vle8_v_u8m1)(_MASK_EXPAND, 16); + + + // TODO: consider exploiting partial vector capability + while(inpos < 0) { + vuint8m2_t data = RV(vle8_v_u8m2)(_src + inpos, vl2); + inpos += vl2; + + // search for special chars + // TODO: vrgather strat + + vuint8m2_t tmpData = RV(vsub_vx_u8m2)(data, -42, vl2); + vbool4_t cmp = RV(vmor_mm_b4)( + RV(vmor_mm_b4)( + RV(vmseq_vx_u8m2_b4)(data, -42, vl2), + RV(vmseq_vx_u8m2_b4)(tmpData, '=', vl2), + vl2 + ), + RV(vmor_mm_b4)( + RV(vmseq_vx_u8m2_b4)(data, '\r'-42, vl2), + RV(vmseq_vx_u8m2_b4)(data, '\n'-42, vl2), + vl2 + ), + vl2 + ); + +#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 13000 + data = RV(vor_vx_u8m2_mu)(cmp, tmpData, tmpData, 64, vl2); +#else + data = RV(vor_vx_u8m2_m)(cmp, tmpData, tmpData, 64, vl2); +#endif + + int idx; + size_t count = RV(vcpop_m_b4)(cmp, vl2); + if(count > 1) { + // widen mask: 4b->8b +#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 13000 + vuint8mf4_t vcmp = RV(vlmul_trunc_v_u8m1_u8mf4)(RV(vreinterpret_v_b4_u8m1)(cmp)); +#else + vuint8mf4_t vcmp = *(vuint8mf4_t*)(&cmp); +#endif + // TODO: use vwsll instead if available + // - is clmul useful here? + vuint8mf2_t xcmp = RV(vreinterpret_v_u16mf2_u8mf2)(RV(vwmulu_vx_u16mf2)(vcmp, 16, vl2)); + xcmp = RV(vsrl_vv_u8mf2)(xcmp, ALT_SHIFT, vl2); + + // expand mask by inserting '1' between each bit (0000abcd -> 1a1b1c1d) + vuint8m1_t xcmpTmp = RV(vrgather_vv_u8m1)(MASK_EXPAND, RV(vlmul_ext_v_u8mf2_u8m1)(xcmp), vl2); +#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 13000 + vbool2_t cmpmask = RV(vreinterpret_b2)(xcmpTmp); +#else + vbool2_t cmpmask = *(vbool2_t*)(&xcmpTmp); +#endif + + // expand data and insert = + // TODO: use vwsll instead if available + vuint16m4_t data2 = RV(vzext_vf2_u16m4)(data, vl2); + data2 = RV(vsll_vx_u16m4)(data2, 8, vl2); + data2 = RV(vor_vx_u16m4)(data2, '=', vl2); + + // prune unneeded = + vuint8m4_t dataTmp = RV(vreinterpret_v_u16m4_u8m4)(data2); + vuint8m4_t final_data = RV(vcompress_vm_u8m4)( +#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 13000 + dataTmp, cmpmask, vl2*2 +#else + cmpmask, dataTmp, dataTmp, vl2*2 +#endif + ); + + RV(vse8_v_u8m4)(outp, final_data, vl2*2); + outp += vl2 + count; + col += vl2 + count; + + if(col >= 0) { + // we overflowed - find correct position to revert back to + // TODO: stick with u8 type for vlmax <= 2048 (need to check if ok if vlmax == 2048) + // - considering that it's rare for colWidth > 128, maybe just don't support vectors that long + vuint16m8_t xidx = RV(viota_m_u16m8)(cmpmask, vl2*2); + vbool2_t discardmask = RV(vmsgeu_vx_u16m8_b2)(xidx, vl2 + count - col, vl2*2); + long idx_revert = RV(vcpop_m_b2)(discardmask, vl2*2); + + outp -= col + (idx_revert & 1); + inpos -= ((idx_revert+1) >> 1); + + goto _encode_eol_handle_pre; + } + } else { + // 0 or 1 special characters + { + vbool4_t mask = RV(vmsbf_m_b4)(cmp, vl2); + // TODO: is it better to shuffle this into two stores, instead of three? + RV(vse8_v_u8m2_m)(mask, outp, data, vl2); + idx = RV(vcpop_m_b4)(mask, vl2); + outp[idx] = '='; + RV(vse8_v_u8m2_m)(RV(vmnot_m_b4)(mask, vl2), outp+1, data, vl2); + + outp += vl2 + count; + col += vl2 + count; + } + + if(col >= 0) { + if(count > 0) { + idx = vl2 - idx; + if(HEDLEY_UNLIKELY(col == idx)) { + // this is an escape character, so line will need to overflow + outp--; + } else { + inpos += (col > idx); + } + } + outp -= col; + inpos -= col; + + _encode_eol_handle_pre: + encode_eol_handle_pre(_src, inpos, outp, col, lineSizeOffset); + } + } + } + + *colOffset = col + line_size -1; + dest = outp; + len = -(inpos - INPUT_OFFSET); +} + +void encoder_rvv_init() { + _do_encode = &do_encode_simd; + _encode_isa = ISA_LEVEL_RVV; +} +#else +void encoder_rvv_init() {} +#endif /* defined(__riscv_vector) */ diff --git a/rapidyenc/src/encoder_sse2.cc b/rapidyenc/src/encoder_sse2.cc new file mode 100644 index 0000000..6f5c04c --- /dev/null +++ b/rapidyenc/src/encoder_sse2.cc @@ -0,0 +1,14 @@ +#include "common.h" + +#ifdef __SSE2__ +#include "encoder_sse_base.h" + +void encoder_sse2_init() { + _do_encode = &do_encode_simd< do_encode_sse >; + encoder_sse_lut(); + _encode_isa = ISA_LEVEL_SSE2; +} +#else +void encoder_sse2_init() {} +#endif + diff --git a/rapidyenc/src/encoder_sse_base.h b/rapidyenc/src/encoder_sse_base.h new file mode 100644 index 0000000..6336225 --- /dev/null +++ b/rapidyenc/src/encoder_sse_base.h @@ -0,0 +1,723 @@ +#include "common.h" + +#include "encoder.h" +#include "encoder_common.h" + +#if defined(__clang__) && __clang_major__ == 6 && __clang_minor__ == 0 +// VBMI2 introduced in clang 6.0, but 128-bit functions misnamed there; fixed in clang 7.0, but we'll handle those on 6.0 +# define _mm_mask_expand_epi8 _mm128_mask_expand_epi8 +#endif + +#if (defined(__GNUC__) && __GNUC__ >= 7) || (defined(_MSC_VER) && _MSC_VER >= 1924) +# define KLOAD16(a, offs) _load_mask16((__mmask16*)(a) + (offs)) +#else +# define KLOAD16(a, offs) (((uint16_t*)(a))[(offs)]) +#endif + +#pragma pack(16) +static struct { + /*align16*/ struct { __m128i shuf, mix; } shufMix[256]; + unsigned char BitsSetTable256plus8[256]; + uint32_t eolLastChar[256]; + uint16_t eolFirstMask[256]; + uint16_t expandMask[256]; + /*align16*/ int8_t expandMaskmix[33*2*32]; + /*align16*/ int8_t expandShufmaskmix[33*2*32]; +} * HEDLEY_RESTRICT lookups; +#pragma pack() + +template +static void encoder_sse_lut() { + ALIGN_ALLOC(lookups, sizeof(*lookups), 16); + for(int i=0; i<256; i++) { + int k = i; + uint8_t* res = (uint8_t*)(&(lookups->shufMix[i].shuf)); + uint16_t expand = 0; + int p = 0; + for(int j=0; j<8; j++) { + if(k & 1) { + res[j+p] = 0xf0 + j; + p++; + } + expand |= 1<<(j+p); + res[j+p] = j; + k >>= 1; + } + for(; p<8; p++) + res[8+p] = 8+p +0x40; // +0x40 is an arbitrary value to make debugging slightly easier? the top bit cannot be set + + lookups->expandMask[i] = expand; + + // calculate add mask for mixing escape chars in + __m128i shuf = _mm_load_si128((__m128i*)res); + __m128i maskEsc = _mm_cmpeq_epi8(_mm_and_si128(shuf, _mm_set1_epi8(-16)), _mm_set1_epi8(-16)); // -16 == 0xf0 + __m128i addMask = _mm_and_si128(_mm_slli_si128(maskEsc, 1), _mm_set1_epi8(64)); + addMask = _mm_or_si128(addMask, _mm_and_si128(maskEsc, _mm_set1_epi8('='-42))); + addMask = _mm_add_epi8(addMask, _mm_set1_epi8(42)); + + _mm_store_si128(&(lookups->shufMix[i].mix), addMask); + + + lookups->eolLastChar[i] = ((i == 214+'\t' || i == 214+' ' || i == 214+'\0' || i == 214+'\n' || i == 214+'\r' || i == '='-42) ? (((i+42+64)&0xff)<<8)+0x0a0d003d : ((i+42)&0xff)+0x0a0d00); + lookups->eolFirstMask[i] = ((i == 214+'\t' || i == 214+' ' || i == '.'-42) ? 1 : 0); + + lookups->BitsSetTable256plus8[i] = 8 + ( + (i & 1) + ((i>>1) & 1) + ((i>>2) & 1) + ((i>>3) & 1) + ((i>>4) & 1) + ((i>>5) & 1) + ((i>>6) & 1) + ((i>>7) & 1) + ); + } + for(int i=0; i<33; i++) { + int n = (use_isa & ISA_FEATURE_LZCNT) ? (i == 32 ? 32 : 31-i) : (i == 0 ? 32 : i-1); + for(int j=0; j<32; j++) { + lookups->expandMaskmix[i*64 + j] = (n>j ? -1 : 0); + if(j > 15) // mask part + lookups->expandShufmaskmix[i*64 + j] = (n>=j ? -1 : 0); + else // shuffle part + lookups->expandShufmaskmix[i*64 + j] = (n==j ? -1 : (j-(nexpandMaskmix[i*64 + j + 32] = (n==j ? '=' : 42+64*(n==j-1)); + lookups->expandShufmaskmix[i*64 + j + 32] = (n==j ? '=' : 42+64*(n==j-1)); + } + } +} + + +// for LZCNT/BSF +#ifdef _MSC_VER +# include +# include +static HEDLEY_ALWAYS_INLINE unsigned BSR32(unsigned src) { + unsigned long result; + _BitScanReverse((unsigned long*)&result, src); + return result; +} +#elif defined(__GNUC__) +// have seen Clang not like _bit_scan_reverse +# include // for lzcnt +# define BSR32(src) (31^__builtin_clz(src)) +#else +# include +# define BSR32 _bit_scan_reverse +#endif + +template +static HEDLEY_ALWAYS_INLINE __m128i sse2_expand_bytes(unsigned mask, __m128i data) { + while(mask) { + // get highest bit + unsigned bitIndex; + __m128i mergeMask; +#if defined(__LZCNT__) + if(use_isa & ISA_FEATURE_LZCNT) { + bitIndex = _lzcnt_u32(mask); + mergeMask = _mm_load_si128((const __m128i*)lookups->expandMaskmix + bitIndex*4); + mask &= 0x7fffffffU>>bitIndex; + } else +#endif + { + // TODO: consider LUT for when BSR is slow + bitIndex = BSR32(mask); + mergeMask = _mm_load_si128((const __m128i*)lookups->expandMaskmix + (bitIndex+1)*4); + mask ^= 1< +static HEDLEY_ALWAYS_INLINE uintptr_t sse2_expand_store_vector(__m128i data, unsigned int mask, unsigned maskP1, unsigned maskP2, uint8_t* p, unsigned int& shufLenP1, unsigned int& shufLenP2) { + // TODO: consider 1 bit shortcut (slightly tricky with needing bit counts though) + if(mask) { + __m128i dataA = sse2_expand_bytes(maskP1, data); + __m128i dataB = sse2_expand_bytes(maskP2, _mm_srli_si128(data, 8)); + dataA = _mm_add_epi8(dataA, _mm_load_si128(&(lookups->shufMix[maskP1].mix))); + dataB = _mm_add_epi8(dataB, _mm_load_si128(&(lookups->shufMix[maskP2].mix))); + shufLenP1 = lookups->BitsSetTable256plus8[maskP1]; + shufLenP2 = shufLenP1 + lookups->BitsSetTable256plus8[maskP2]; + STOREU_XMM(p, dataA); + STOREU_XMM(p+shufLenP1, dataB); + return shufLenP2; + } else { + STOREU_XMM(p, _mm_sub_epi8(data, _mm_set1_epi8(-42))); + shufLenP1 = 8; + shufLenP2 = 16; + return XMM_SIZE; + } +} + + +template +HEDLEY_ALWAYS_INLINE void do_encode_sse(int line_size, int* colOffset, const uint8_t* HEDLEY_RESTRICT srcEnd, uint8_t* HEDLEY_RESTRICT& dest, size_t& len) { + // offset position to enable simpler loop condition checking + const int INPUT_OFFSET = XMM_SIZE*4+1 -1; // EOL handling reads an additional byte, -1 to change <= to < + if(len <= INPUT_OFFSET || line_size < XMM_SIZE) return; + + // slower CPUs prefer to branch as mispredict penalty is probably small relative to general execution +#if defined(__tune_atom__) || defined(__tune_slm__) || defined(__tune_btver1__) || defined(__tune_btver2__) + const bool _PREFER_BRANCHING = true; +#else + const bool _PREFER_BRANCHING = (use_isa < ISA_LEVEL_SSSE3); +#endif + + uint8_t *p = dest; // destination pointer + intptr_t i = -(intptr_t)len; // input position + intptr_t lineSizeOffset = -line_size +1; + //intptr_t col = *colOffset - line_size +1; // for some reason, this causes GCC-8 to spill an extra register, causing the main loop to run ~5% slower, so use the alternative version below + intptr_t col = *colOffset + lineSizeOffset; + + i += INPUT_OFFSET; + const uint8_t* es = srcEnd - INPUT_OFFSET; + + if(HEDLEY_UNLIKELY(col >= 0)) { + uint8_t c = es[i++]; + if(col == 0) { + uint32_t eolChar = lookups->eolLastChar[c]; + *(uint32_t*)p = eolChar; + p += 3 + (eolChar>>27); + col = -line_size+1; + } else { + if (LIKELIHOOD(0.0273, escapedLUT[c]!=0)) { + *(uint32_t*)p = UINT32_16_PACK(UINT16_PACK('\r', '\n'), (uint32_t)escapedLUT[c]); + p += 4; + col = 2-line_size + 1; + } else { + *(uint32_t*)p = UINT32_PACK('\r', '\n', (uint32_t)(c+42), 0); + p += 3; + col = 2-line_size; + } + } + } + if (HEDLEY_LIKELY(col == -line_size+1)) { + uint8_t c = es[i++]; + if (LIKELIHOOD(0.0273, escapedLUT[c] != 0)) { + *(uint16_t*)p = escapedLUT[c]; + p += 2; + col += 2; + } else { + *(p++) = c + 42; + col += 1; + } + } + do { + __m128i dataA = _mm_loadu_si128((__m128i *)(es + i)); // probably not worth the effort to align + __m128i dataB = _mm_loadu_si128((__m128i *)(es + i) +1); + + i += XMM_SIZE*2; + // search for special chars + __m128i cmpA, cmpB; +#if defined(__SSSE3__) && !defined(__tune_atom__) && !defined(__tune_slm__) && !defined(__tune_btver1__) + if(use_isa >= ISA_LEVEL_SSSE3) { + cmpA = _mm_cmpeq_epi8( + _mm_shuffle_epi8(_mm_set_epi8( + '\0'-42,-42,'\r'-42,'.'-42,'='-42,'\0'-42,'\t'-42,'\n'-42,-42,-42,'\r'-42,-42,'='-42,' '-42,-42,'\n'-42 + ), _mm_abs_epi8(dataA)), + dataA + ); + } else +#endif + { + cmpA = _mm_or_si128( + _mm_or_si128( + _mm_cmpeq_epi8(dataA, _mm_set1_epi8(-42)), + _mm_cmpeq_epi8(dataA, _mm_set1_epi8('\n'-42)) + ), + _mm_or_si128( + _mm_cmpeq_epi8(dataA, _mm_set1_epi8('='-42)), + _mm_cmpeq_epi8(dataA, _mm_set1_epi8('\r'-42)) + ) + ); + } + + _encode_loop_branchA: + unsigned int maskA = _mm_movemask_epi8(cmpA); + _encode_loop_branchB: + +#if defined(__SSSE3__) && !defined(__tune_atom__) && !defined(__tune_slm__) && !defined(__tune_btver1__) + if(use_isa >= ISA_LEVEL_SSSE3) { + cmpB = _mm_cmpeq_epi8( + _mm_shuffle_epi8(_mm_set_epi8( + '\0'-42,-42,'\r'-42,'.'-42,'='-42,'\0'-42,'\t'-42,'\n'-42,-42,-42,'\r'-42,-42,'='-42,' '-42,-42,'\n'-42 + ), _mm_abs_epi8(dataB)), + dataB + ); + } else +#endif + { + cmpB = _mm_or_si128( + _mm_or_si128( + _mm_cmpeq_epi8(dataB, _mm_set1_epi8(-42)), + _mm_cmpeq_epi8(dataB, _mm_set1_epi8('\n'-42)) + ), + _mm_or_si128( + _mm_cmpeq_epi8(dataB, _mm_set1_epi8('='-42)), + _mm_cmpeq_epi8(dataB, _mm_set1_epi8('\r'-42)) + ) + ); + } + unsigned int maskB = _mm_movemask_epi8(cmpB); + + uint32_t mask = (maskB<<16) | maskA; + intptr_t bitIndex; // because you can't goto past variable declarations... + intptr_t maskBits, outputBytes; + + bool manyBitsSet; +#if defined(__POPCNT__) && !defined(__tune_btver1__) + if(use_isa & ISA_FEATURE_POPCNT) { + maskBits = popcnt32(mask); + outputBytes = maskBits + XMM_SIZE*2; + manyBitsSet = maskBits > 1; + } else +#endif + { + manyBitsSet = (mask & (mask-1)) != 0; + } + + if (LIKELIHOOD(0.089, manyBitsSet)) { + _encode_loop_branch_slow: + unsigned m1 = maskA & 0xFF; + unsigned m2 = maskA >> 8; + unsigned m3 = maskB & 0xFF; + unsigned m4 = maskB >> 8; + unsigned int shuf1Len, shuf2Len, shuf3Len; + __m128i shuf1A, shuf1B, shuf2A, shuf2B; // only used for SSSE3 path + __m128i data1A, data1B, data2A, data2B; + +#if defined(__AVX512VBMI2__) && defined(__AVX512VL__) && defined(__AVX512BW__) + if(use_isa >= ISA_LEVEL_VBMI2) { + // do +42 and +64 to data + dataA = _mm_sub_epi8(dataA, _mm_set1_epi8(-42)); + dataA = _mm_ternarylogic_epi32(dataA, cmpA, _mm_set1_epi8(64), 0xf8); // data | (cmp & 64) + dataB = _mm_sub_epi8(dataB, _mm_set1_epi8(-42)); + dataB = _mm_ternarylogic_epi32(dataB, cmpB, _mm_set1_epi8(64), 0xf8); + + /* alternative no-LUT 64-bit only version + * LUT generally seems to be faster though + //uint64_t expandMask = _pdep_u64(mask, 0x5555555555555555); // expand bits + //expandMask = ~_pext_u64(expandMask, expandMask|~0x5555555555555555); + uint64_t expandMask = ~_pdep_u64(~mask, 0x5555555555555555); // expand bits, with bits set + expandMask = _pext_u64(expandMask^0x5555555555555555, expandMask); + data2A = _mm_mask_expand_epi8(_mm_set1_epi8('='), expandMask>>16, _mm_srli_si128(dataA, 8)); + data1A = _mm_mask_expand_epi8(_mm_set1_epi8('='), expandMask , dataA); + data2B = _mm_mask_expand_epi8(_mm_set1_epi8('='), expandMask>>48, _mm_srli_si128(dataB, 8)); + data1B = _mm_mask_expand_epi8(_mm_set1_epi8('='), expandMask>>32, dataB); + */ + data2A = _mm_mask_expand_epi8(_mm_set1_epi8('='), KLOAD16(lookups->expandMask, m2), _mm_srli_si128(dataA, 8)); + data1A = _mm_mask_expand_epi8(_mm_set1_epi8('='), KLOAD16(lookups->expandMask, m1), dataA); + data2B = _mm_mask_expand_epi8(_mm_set1_epi8('='), KLOAD16(lookups->expandMask, m4), _mm_srli_si128(dataB, 8)); + data1B = _mm_mask_expand_epi8(_mm_set1_epi8('='), KLOAD16(lookups->expandMask, m3), dataB); + } else +#endif +#ifdef __SSSE3__ + if(use_isa >= ISA_LEVEL_SSSE3) { + // perform lookup for shuffle mask + shuf1A = _mm_load_si128(&(lookups->shufMix[m1].shuf)); + shuf2A = _mm_load_si128(&(lookups->shufMix[m2].shuf)); + shuf1B = _mm_load_si128(&(lookups->shufMix[m3].shuf)); + shuf2B = _mm_load_si128(&(lookups->shufMix[m4].shuf)); + + // second mask processes on second half, so add to the offsets + shuf2A = _mm_or_si128(shuf2A, _mm_set1_epi8(8)); + shuf2B = _mm_or_si128(shuf2B, _mm_set1_epi8(8)); + + // expand halves + data2A = _mm_shuffle_epi8(dataA, shuf2A); + data1A = _mm_shuffle_epi8(dataA, shuf1A); + data2B = _mm_shuffle_epi8(dataB, shuf2B); + data1B = _mm_shuffle_epi8(dataB, shuf1B); + + // add in escaped chars + data1A = _mm_add_epi8(data1A, _mm_load_si128(&(lookups->shufMix[m1].mix))); + data2A = _mm_add_epi8(data2A, _mm_load_si128(&(lookups->shufMix[m2].mix))); + data1B = _mm_add_epi8(data1B, _mm_load_si128(&(lookups->shufMix[m3].mix))); + data2B = _mm_add_epi8(data2B, _mm_load_si128(&(lookups->shufMix[m4].mix))); + } else +#endif + { + p += sse2_expand_store_vector(dataA, maskA, m1, m2, p, shuf1Len, shuf2Len); + unsigned int shuf4Len; + p += sse2_expand_store_vector(dataB, maskB, m3, m4, p, shuf3Len, shuf4Len); + shuf3Len += shuf2Len; +#if !defined(__tune_btver1__) + if(!(use_isa & ISA_FEATURE_POPCNT)) +#endif + outputBytes = shuf2Len + shuf4Len; + } + + if(use_isa >= ISA_LEVEL_SSSE3) { + // store out +#if defined(__POPCNT__) && !defined(__tune_btver1__) + if(use_isa & ISA_FEATURE_POPCNT) { + shuf2Len = popcnt32(maskA) + 16; +# if defined(__tune_znver4__) || defined(__tune_znver3__) || defined(__tune_znver2__) || defined(__tune_znver1__) || defined(__tune_btver2__) + shuf1Len = popcnt32(m1) + 8; + shuf3Len = popcnt32(m3) + shuf2Len + 8; +# else + shuf1Len = lookups->BitsSetTable256plus8[m1]; + shuf3Len = lookups->BitsSetTable256plus8[m3] + shuf2Len; +# endif + } else +#endif + { + shuf1Len = lookups->BitsSetTable256plus8[m1]; + shuf2Len = shuf1Len + lookups->BitsSetTable256plus8[m2]; + shuf3Len = shuf2Len + lookups->BitsSetTable256plus8[m3]; + outputBytes = shuf3Len + lookups->BitsSetTable256plus8[m4]; + } + STOREU_XMM(p, data1A); + STOREU_XMM(p+shuf1Len, data2A); + STOREU_XMM(p+shuf2Len, data1B); + STOREU_XMM(p+shuf3Len, data2B); + p += outputBytes; + } + col += outputBytes; + + if(LIKELIHOOD(0.3 /*guess, using 128b lines*/, col >= 0)) { + uintptr_t bitCount; + intptr_t shiftAmt = (outputBytes - shuf2Len) - col -1; + uint32_t eqMask; + if(HEDLEY_UNLIKELY(shiftAmt < 0)) { + shiftAmt += shuf2Len; + i -= 16; + if(use_isa >= ISA_LEVEL_VBMI2 || use_isa < ISA_LEVEL_SSSE3) { + eqMask = + ((uint32_t)lookups->expandMask[m2] << shuf1Len) + | (uint32_t)lookups->expandMask[m1]; + } else { + eqMask = + ((uint32_t)_mm_movemask_epi8(shuf2A) << shuf1Len) + | (uint32_t)_mm_movemask_epi8(shuf1A); + i += outputBytes - shuf2Len; + } + } else { + if(use_isa >= ISA_LEVEL_VBMI2 || use_isa < ISA_LEVEL_SSSE3) { + eqMask = + ((uint32_t)lookups->expandMask[m4] << (shuf3Len-shuf2Len)) + | (uint32_t)lookups->expandMask[m3]; + } else { + eqMask = + ((uint32_t)_mm_movemask_epi8(shuf2B) << (shuf3Len-shuf2Len)) + | (uint32_t)_mm_movemask_epi8(shuf1B); + } + } + + if(use_isa >= ISA_LEVEL_VBMI2 || use_isa < ISA_LEVEL_SSSE3) { +#if defined(__GNUC__) + // be careful to avoid partial flag stalls on Intel P6 CPUs (SHR+ADC will likely stall) +# if !(defined(__tune_amdfam10__) || defined(__tune_k8__)) + if(use_isa >= ISA_LEVEL_VBMI2) +# endif + { + __asm__( + "shrl $1, %[eqMask] \n" + "shrl %%cl, %[eqMask] \n" // TODO: can use shrq to avoid above shift? +# if defined(PLATFORM_AMD64) && !defined(__ILP32__) + "adcq %q[col], %q[p] \n" +# else + "adcl %[col], %[p] \n" +# endif + : [eqMask]"+r"(eqMask), [p]"+r"(p) + : "c"(shiftAmt), [col]"r"(~col) + ); + } +# if !(defined(__tune_amdfam10__) || defined(__tune_k8__)) + else +# else + if(0) +# endif +#endif + { + eqMask >>= shiftAmt; + p -= col; + if(LIKELIHOOD(0.98, (eqMask & 1) != 1)) + p--; + else + i++; + } + } else { + eqMask >>= shiftAmt; + col += eqMask & 1; // revert if escape char + } + +#if defined(__POPCNT__) + if(use_isa & ISA_FEATURE_POPCNT) { + bitCount = popcnt32(eqMask); + } else +#endif + { + unsigned char cnt = lookups->BitsSetTable256plus8[eqMask & 0xff]; + cnt += lookups->BitsSetTable256plus8[(eqMask>>8) & 0xff]; + cnt += lookups->BitsSetTable256plus8[(eqMask>>16) & 0xff]; + cnt += lookups->BitsSetTable256plus8[(eqMask>>24) & 0xff]; + bitCount = (uintptr_t)cnt - 32; + } + + if(use_isa >= ISA_LEVEL_VBMI2 || use_isa < ISA_LEVEL_SSSE3) { + i -= bitCount; + goto _encode_eol_handle_pre; + } else { + i += bitCount; + goto _encode_eol_handle_pre_adjust; + } + } + } else { + if(_PREFER_BRANCHING && LIKELIHOOD(0.663, !mask)) { + _encode_loop_branch_fast_noesc: + dataA = _mm_sub_epi8(dataA, _mm_set1_epi8(-42)); + dataB = _mm_sub_epi8(dataB, _mm_set1_epi8(-42)); + STOREU_XMM(p, dataA); + STOREU_XMM(p+XMM_SIZE, dataB); + p += XMM_SIZE*2; + col += XMM_SIZE*2; + if(LIKELIHOOD(0.15, col >= 0)) + goto _encode_eol_handle_pre_adjust; + continue; + } + // shortcut for common case of only 1 bit set + _encode_loop_branch_fast_1ch: +#if defined(__AVX512VL__) && defined(__AVX512BW__) + if(use_isa >= ISA_LEVEL_AVX3) { + dataA = _mm_sub_epi8(dataA, _mm_set1_epi8(-42)); + dataA = _mm_ternarylogic_epi32(dataA, cmpA, _mm_set1_epi8(64), 0xf8); // data | (cmp & 64) + dataB = _mm_sub_epi8(dataB, _mm_set1_epi8(-42)); + dataB = _mm_ternarylogic_epi32(dataB, cmpB, _mm_set1_epi8(64), 0xf8); + + // store last char + _mm_mask_storeu_epi8(p+XMM_SIZE+1, 1<<15, dataB); + + uint32_t blendMask = (uint32_t)(-(int32_t)mask); + dataB = _mm_mask_alignr_epi8(dataB, blendMask>>16, dataB, dataA, 15); + dataB = _mm_ternarylogic_epi32(dataB, cmpB, _mm_set1_epi8('='), 0xb8); // (data & ~cmp) | (cmp & '=') + +# if defined(__AVX512VBMI2__) + if(use_isa >= ISA_LEVEL_VBMI2) + dataA = _mm_mask_expand_epi8(_mm_set1_epi8('='), ~mask, dataA); + else +# endif + { + dataA = _mm_mask_alignr_epi8(dataA, blendMask, dataA, dataA, 15); // there's no masked shift, so use ALIGNR instead + dataA = _mm_ternarylogic_epi32(dataA, cmpA, _mm_set1_epi8('='), 0xb8); + } + } else +#endif + { + +#if !defined(__tune_btver1__) + if(!(use_isa & ISA_FEATURE_POPCNT)) +#endif + maskBits = (mask != 0); + if(_PREFER_BRANCHING) maskBits = 1; +#if !defined(__tune_btver1__) + if(!(use_isa & ISA_FEATURE_POPCNT)) +#endif + outputBytes = XMM_SIZE*2 + maskBits; + +#if defined(__LZCNT__) + if(use_isa & ISA_FEATURE_LZCNT) + bitIndex = _lzcnt_u32(mask); + else +#endif + { + bitIndex = BSR32(mask); + bitIndex |= maskBits-1; // if(mask == 0) bitIndex = -1; + } + const __m128i* entries; + +#if defined(__SSSE3__) && !defined(__tune_atom__) && !defined(__tune_slm__) && !defined(__tune_btver1__) + if(use_isa >= ISA_LEVEL_SSSE3) { + entries = (const __m128i*)lookups->expandShufmaskmix; + if(!(use_isa & ISA_FEATURE_LZCNT)) + entries += 4; + entries += bitIndex*4; + + __m128i shufMaskA = _mm_load_si128(entries+0); + __m128i mergeMaskB = _mm_load_si128(entries+1); + __m128i dataBShifted = _mm_alignr_epi8(dataB, dataA, 15); + dataB = _mm_andnot_si128(cmpB, dataB); + + dataA = _mm_shuffle_epi8(dataA, shufMaskA); + +# if defined(__SSE4_1__) && !defined(__tune_slm__) && !defined(__tune_goldmont__) && !defined(__tune_goldmont_plus__) && !defined(__tune_tremont__) + if(use_isa >= ISA_LEVEL_SSE41) { + dataB = _mm_blendv_epi8(dataBShifted, dataB, mergeMaskB); + } else +# endif + { + dataB = _mm_or_si128( + _mm_and_si128(mergeMaskB, dataB), + _mm_andnot_si128(mergeMaskB, dataBShifted) + ); + } + } else +#endif + { + + entries = (const __m128i*)lookups->expandMaskmix; + if(!(use_isa & ISA_FEATURE_LZCNT)) + entries += 4; + entries += bitIndex*4; + + __m128i mergeMaskA = _mm_load_si128(entries+0); + __m128i mergeMaskB = _mm_load_si128(entries+1); + // TODO: consider deferring mask operation? (does require an extra ANDN but may help with L1 latency) + __m128i dataAMasked = _mm_andnot_si128(mergeMaskA, dataA); + __m128i dataBMasked = _mm_andnot_si128(mergeMaskB, dataB); + __m128i dataAShifted = _mm_slli_si128(dataAMasked, 1); + __m128i dataBShifted; + +#if defined(__SSSE3__) && !defined(__tune_btver1__) + if(use_isa >= ISA_LEVEL_SSSE3) + dataBShifted = _mm_alignr_epi8(dataBMasked, dataAMasked, 15); + else +#endif + dataBShifted = _mm_or_si128( + _mm_slli_si128(dataBMasked, 1), + _mm_srli_si128(dataAMasked, 15) + ); + + // alternatively `_mm_xor_si128(dataAMasked, dataA)` if compiler wants to load mergeMask* again + dataB = _mm_or_si128( + _mm_and_si128(mergeMaskB, dataB), dataBShifted + ); + dataA = _mm_or_si128( + _mm_and_si128(mergeMaskA, dataA), dataAShifted + ); + } + // add escape chars + dataA = _mm_add_epi8(dataA, _mm_load_si128(entries+2)); + dataB = _mm_add_epi8(dataB, _mm_load_si128(entries+3)); + + // store final char + p[XMM_SIZE*2] = es[i-1] + 42 + (64 & (mask>>(XMM_SIZE*2-1-6))); + } + + // store main part + STOREU_XMM(p, dataA); + STOREU_XMM(p+XMM_SIZE, dataB); + + p += outputBytes; + col += outputBytes; + + if(LIKELIHOOD(0.3, col >= 0)) { +#if defined(__AVX512VL__) + if(use_isa >= ISA_LEVEL_AVX3) + bitIndex = _lzcnt_u32(mask) +1; + else +#endif + if(use_isa & ISA_FEATURE_LZCNT) + bitIndex = bitIndex +1; + else + bitIndex = 31-bitIndex +1; + if(HEDLEY_UNLIKELY(col == bitIndex)) { + // this is an escape character, so line will need to overflow + p--; + } else { + i += (col > bitIndex); + } + _encode_eol_handle_pre_adjust: + p -= col; + i -= col; + + _encode_eol_handle_pre: + uint32_t eolChar = lookups->eolLastChar[es[i]]; + *(uint32_t*)p = eolChar; + p += 3 + (eolChar>>27); + col = lineSizeOffset; + + if(HEDLEY_UNLIKELY(i >= 0)) { // this isn't really a proper check - it's only needed to support short lines; basically, if the line is too short, `i` never gets checked, so we need one somewhere + i++; + break; + } + + dataA = _mm_loadu_si128((__m128i *)(es + i + 1)); + dataB = _mm_loadu_si128((__m128i *)(es + i + 1) + 1); + // search for special chars (EOL) +#if defined(__SSSE3__) && !defined(__tune_atom__) && !defined(__tune_slm__) && !defined(__tune_btver1__) + if(use_isa >= ISA_LEVEL_SSSE3) { + cmpA = _mm_cmpeq_epi8( + _mm_shuffle_epi8(_mm_set_epi8( + '\0'-42,-42,'\r'-42,'.'-42,'='-42,'\0'-42,'\t'-42,'\n'-42,-42,-42,'\r'-42,-42,'='-42,' '-42,-42,'\n'-42 + ), _mm_adds_epi8( + _mm_abs_epi8(dataA), _mm_cvtsi32_si128(88) + )), + dataA + ); + i += XMM_SIZE*2 + 1; +# if defined(__GNUC__) && !defined(__clang__) + // GCC seems to have trouble keeping track of variable usage and spills many of them if we goto after declarations; Clang9 seems to be fine, or if _PREFER_BRANCHING is used + if(!_PREFER_BRANCHING) + goto _encode_loop_branchA; +# endif + maskA = _mm_movemask_epi8(cmpA); + cmpB = _mm_cmpeq_epi8( + _mm_shuffle_epi8(_mm_set_epi8( + '\0'-42,-42,'\r'-42,'.'-42,'='-42,'\0'-42,'\t'-42,'\n'-42,-42,-42,'\r'-42,-42,'='-42,' '-42,-42,'\n'-42 + ), _mm_abs_epi8(dataB)), + dataB + ); + } else +#endif + { + cmpA = _mm_or_si128( + _mm_or_si128( + _mm_cmpeq_epi8(dataA, _mm_set1_epi8(-42)), + _mm_cmpeq_epi8(dataA, _mm_set1_epi8('\n'-42)) + ), + _mm_or_si128( + _mm_cmpeq_epi8(dataA, _mm_set1_epi8('='-42)), + _mm_cmpeq_epi8(dataA, _mm_set1_epi8('\r'-42)) + ) + ); + maskA = _mm_movemask_epi8(cmpA); + maskA |= lookups->eolFirstMask[es[i+1]]; + i += XMM_SIZE*2 + 1; +#if defined(__GNUC__) && !defined(__clang__) + if(!_PREFER_BRANCHING) + goto _encode_loop_branchB; +#endif + cmpB = _mm_or_si128( + _mm_or_si128( + _mm_cmpeq_epi8(dataB, _mm_set1_epi8(-42)), + _mm_cmpeq_epi8(dataB, _mm_set1_epi8('\n'-42)) + ), + _mm_or_si128( + _mm_cmpeq_epi8(dataB, _mm_set1_epi8('='-42)), + _mm_cmpeq_epi8(dataB, _mm_set1_epi8('\r'-42)) + ) + ); + } + maskB = _mm_movemask_epi8(cmpB); + + mask = (maskB<<16) | maskA; + bool manyBitsSet; // don't retain this across loop cycles +#if defined(__POPCNT__) && !defined(__tune_btver1__) + if(use_isa & ISA_FEATURE_POPCNT) { + maskBits = popcnt32(mask); + outputBytes = maskBits + XMM_SIZE*2; + manyBitsSet = maskBits > 1; + } else +#endif + { + manyBitsSet = (mask & (mask-1)) != 0; + } + + if (LIKELIHOOD(0.089, manyBitsSet)) + goto _encode_loop_branch_slow; + if(_PREFER_BRANCHING && LIKELIHOOD(0.663, !mask)) + goto _encode_loop_branch_fast_noesc; + goto _encode_loop_branch_fast_1ch; + if(0) { // silence unused label warnings + goto _encode_loop_branchA; + goto _encode_loop_branchB; + } + } + + } + } while(i < 0); + + *colOffset = (int)(col + line_size -1); + dest = p; + len = -(i - INPUT_OFFSET); +} + diff --git a/rapidyenc/src/encoder_ssse3.cc b/rapidyenc/src/encoder_ssse3.cc new file mode 100644 index 0000000..1a235da --- /dev/null +++ b/rapidyenc/src/encoder_ssse3.cc @@ -0,0 +1,19 @@ +#include "common.h" + +// slightly faster version which improves the worst case scenario significantly; since worst case doesn't happen often, overall speedup is relatively minor +// requires PSHUFB (SSSE3) instruction, but will use POPCNT (SSE4.2 (or AMD's ABM, but Phenom doesn't support SSSE3 so doesn't matter)) if available (these only seem to give minor speedups, so considered optional) +#ifdef __SSSE3__ +#include "encoder_sse_base.h" + +void encoder_ssse3_init() { + _do_encode = &do_encode_simd< do_encode_sse >; + encoder_sse_lut(); + _encode_isa = ISA_LEVEL_SSSE3; +} +#else +void encoder_sse2_init(); +void encoder_ssse3_init() { + encoder_sse2_init(); +} +#endif + diff --git a/rapidyenc/src/encoder_vbmi2.cc b/rapidyenc/src/encoder_vbmi2.cc new file mode 100644 index 0000000..d9ee62d --- /dev/null +++ b/rapidyenc/src/encoder_vbmi2.cc @@ -0,0 +1,25 @@ +#include "common.h" + +#if defined(__AVX512VL__) && defined(__AVX512VBMI2__) && defined(__AVX512BW__) +# ifndef YENC_DISABLE_AVX256 +# include "encoder_avx_base.h" + +void encoder_vbmi2_init() { + _do_encode = &do_encode_simd< do_encode_avx2 >; + encoder_avx2_lut(); + _encode_isa = ISA_LEVEL_VBMI2; +} +# else +# include "encoder_sse_base.h" +void encoder_vbmi2_init() { + _do_encode = &do_encode_simd< do_encode_sse >; + encoder_sse_lut(); + _encode_isa = ISA_LEVEL_VBMI2; +} +# endif +#else +void encoder_avx2_init(); +void encoder_vbmi2_init() { + encoder_avx2_init(); +} +#endif diff --git a/rapidyenc/src/hedley.h b/rapidyenc/src/hedley.h new file mode 100644 index 0000000..8a713e6 --- /dev/null +++ b/rapidyenc/src/hedley.h @@ -0,0 +1,2042 @@ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 15) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 15 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ diff --git a/rapidyenc/src/platform.cc b/rapidyenc/src/platform.cc new file mode 100644 index 0000000..62eba15 --- /dev/null +++ b/rapidyenc/src/platform.cc @@ -0,0 +1,197 @@ +#include "common.h" +#ifdef PLATFORM_ARM +# ifdef __ANDROID__ +# include +# elif defined(_WIN32) +# define WIN32_LEAN_AND_MEAN +# define NOMINMAX +# include +# elif defined(__APPLE__) +# include +# include +# elif defined(__has_include) +# if __has_include() +# include +# if __has_include() +# include +# endif +# endif +# endif +bool cpu_supports_neon() { +# if defined(AT_HWCAP) +# ifdef __FreeBSD__ + unsigned long supported; + elf_aux_info(AT_HWCAP, &supported, sizeof(supported)); +# ifdef __aarch64__ + return supported & HWCAP_ASIMD; +# else + return supported & HWCAP_NEON; +# endif +# else +# ifdef __aarch64__ + return getauxval(AT_HWCAP) & HWCAP_ASIMD; +# else + return getauxval(AT_HWCAP) & HWCAP_NEON; +# endif +# endif +# elif defined(ANDROID_CPU_FAMILY_ARM) +# ifdef __aarch64__ + return android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_ASIMD; +# else + return android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON; +# endif +# elif defined(_WIN32) + return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE); +# elif defined(__APPLE__) + int supported = 0; + size_t len = sizeof(supported); + if(sysctlbyname("hw.optional.neon", &supported, &len, NULL, 0)) + return false; + return (bool)supported; +# endif +# ifdef __aarch64__ + return true; // assume NEON support on AArch64 +# else + return false; +# endif +} +#endif + + +#ifdef PLATFORM_X86 +#ifdef _MSC_VER +# define _cpuid1(ar) __cpuid(ar, 1) +# define _cpuid1x(ar) __cpuid(ar, 0x80000001) +# if _MSC_VER >= 1600 +# define _cpuidX __cpuidex +# include +# define _GET_XCR() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) +# else +// not supported +# define _cpuidX(ar, eax, ecx) ar[0]=0, ar[1]=0, ar[2]=0, ar[3]=0 +# define _GET_XCR() 0 +# endif +#else +# include +# define _cpuid1(ar) __cpuid(1, ar[0], ar[1], ar[2], ar[3]) +# define _cpuid1x(ar) __cpuid(0x80000001, ar[0], ar[1], ar[2], ar[3]) +# define _cpuidX(ar, eax, ecx) __cpuid_count(eax, ecx, ar[0], ar[1], ar[2], ar[3]) +static inline int _GET_XCR() { + int xcr0; + __asm__ __volatile__("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx"); + return xcr0; +} +#endif +// checks if CPU has 128-bit AVX units; currently not used as AVX2 is beneficial even on Zen1 +// static bool cpu_has_slow_avx(cpuid1flag0) { + // int family = ((cpuid1flag0>>8) & 0xf) + ((cpuid1flag0>>16) & 0xff0), + // model = ((cpuid1flag0>>4) & 0xf) + ((cpuid1flag0>>12) & 0xf0); + // return ( + // family == 0x6f // AMD Bulldozer family + // || family == 0x7f // AMD Jaguar/Puma family + // || (family == 0x8f && (model == 0 /*Summit Ridge ES*/ || model == 1 /*Zen*/ || model == 8 /*Zen+*/ || model == 0x11 /*Zen APU*/ || model == 0x18 /*Zen+ APU*/ || model == 0x50 /*Subor Z+*/)) // AMD Zen1 family + // || (family == 6 && model == 0xf) // Centaur/Zhaoxin; overlaps with Intel Core 2, but they don't support AVX + // ); +// } + + +int cpu_supports_isa() { + int flags[4]; + _cpuid1(flags); + int ret = 0; + + if(flags[2] & 0x800000) + ret |= ISA_FEATURE_POPCNT; + int flags2[4]; + _cpuid1x(flags2); + if(flags2[2] & 0x20) // ABM + ret |= ISA_FEATURE_LZCNT | ISA_FEATURE_POPCNT; + + int family = ((flags[0]>>8) & 0xf) + ((flags[0]>>16) & 0xff0); + int model = ((flags[0]>>4) & 0xf) + ((flags[0]>>12) & 0xf0); + + if(family == 6 && ( + model == 0x1C || model == 0x26 || model == 0x27 || model == 0x35 || model == 0x36 || model == 0x37 || model == 0x4A || model == 0x4C || model == 0x4D || model == 0x5A || model == 0x5D + )) + // Intel Bonnell/Silvermont CPU with very slow PSHUFB and PBLENDVB - pretend SSSE3 doesn't exist + return ret | ISA_LEVEL_SSE2; + + if(family == 0x5f && (model == 0 || model == 1 || model == 2)) + // AMD Bobcat with slow SSSE3 instructions - pretend it doesn't exist + return ret | ISA_LEVEL_SSE2; + + if((flags[2] & 0x200) == 0x200) { // SSSE3 + if(family == 6 && (model == 0x5c || model == 0x5f || model == 0x7a || model == 0x9c)) + // Intel Goldmont/plus / Tremont with slow PBLENDVB + return ret | ISA_LEVEL_SSSE3; + + if(flags[2] & 0x80000) { // SSE4.1 + if((flags[2] & 0x1C800000) == 0x1C800000) { // POPCNT + OSXSAVE + XSAVE + AVX + int xcr = _GET_XCR() & 0xff; // ignore unused bits + if((xcr & 6) == 6) { // AVX enabled + int cpuInfo[4]; + _cpuidX(cpuInfo, 7, 0); + if((cpuInfo[1] & 0x128) == 0x128 && (ret & ISA_FEATURE_LZCNT)) { // BMI2 + AVX2 + BMI1 + if(((xcr & 0xE0) == 0xE0) && (cpuInfo[1] & 0xC0010000) == 0xC0010000) { // AVX512BW + AVX512VL + AVX512F + if(cpuInfo[2] & 0x40) + return ret | ISA_LEVEL_VBMI2; + return ret | ISA_LEVEL_AVX3; + } + // AVX2 is beneficial even on Zen1 + return ret | ISA_LEVEL_AVX2; + } + return ret | ISA_LEVEL_AVX; + } + } + return ret | ISA_LEVEL_SSE41; + } + return ret | ISA_LEVEL_SSSE3; + } + return ret | ISA_LEVEL_SSE2; +} + +int cpu_supports_crc_isa() { + int flags[4]; + _cpuid1(flags); + + if((flags[2] & 0x80202) == 0x80202) { // SSE4.1 + SSSE3 + CLMUL + if((flags[2] & 0x1C000000) == 0x1C000000) { // AVX + OSXSAVE + XSAVE + int xcr = _GET_XCR() & 0xff; // ignore unused bits + if((xcr & 6) == 6) { // AVX enabled + int cpuInfo[4]; + _cpuidX(cpuInfo, 7, 0); + if((cpuInfo[1] & 0x20) == 0x20 && (cpuInfo[2] & 0x400) == 0x400) { // AVX2 + VPCLMULQDQ + return 2; + } + } + } + return 1; + } + return 0; +} + +#endif // PLATFORM_X86 + +#ifdef __riscv +# if defined(__has_include) +# if __has_include() +# include +# if __has_include() +# include +# endif +# endif +# endif +bool cpu_supports_rvv() { +# if defined(AT_HWCAP) + unsigned long ret; +# ifdef __FreeBSD__ + elf_aux_info(AT_HWCAP, &ret, sizeof(ret)); +# else + ret = getauxval(AT_HWCAP); +# endif + return (ret & 0x20112D) == 0x20112D; // IMAFDCV; TODO: how to detect Z* features of 'G'? +# endif + return false; +} +#endif + diff --git a/rapidyenc/tool/bench.cc b/rapidyenc/tool/bench.cc new file mode 100644 index 0000000..ff7db3f --- /dev/null +++ b/rapidyenc/tool/bench.cc @@ -0,0 +1,73 @@ +#include +#include +#include + +#include "../rapidyenc.h" + +static const char* kernel_to_str(int k) { + if(k == RYKERN_GENERIC) return "generic"; + if(k == RYKERN_SSE2) return "SSE2"; + if(k == RYKERN_SSSE3) return "SSSE3"; + if(k == RYKERN_AVX) return "AVX"; + if(k == RYKERN_AVX2) return "AVX2"; + if(k == RYKERN_VBMI2) return "VBMI2"; + if(k == RYKERN_NEON) return "NEON"; + if(k == RYKERN_PCLMUL) return "PCLMUL"; + if(k == RYKERN_VPCLMUL) return "VPCLMUL"; + if(k == RYKERN_ARMCRC) return "ARM-CRC"; + return "unknown"; +} +#define ARTICLE_SIZE 768000ULL +#define REPETITIONS 1000 + +int main(int, char**) { + std::vector data(ARTICLE_SIZE); + std::vector article(rapidyenc_encode_max_length(ARTICLE_SIZE, 128)); + size_t article_length, decoded_length; + + // fill with random data + for(auto& c : data) + c = rand() & 0xff; + + + // encode benchmark + rapidyenc_encode_init(); + auto kernel = rapidyenc_encode_kernel(); + auto start = std::chrono::high_resolution_clock::now(); + for(int i=0; i(stop - start).count(); + double speed = ARTICLE_SIZE * REPETITIONS; + speed = speed / us / 1.048576; + std::cerr << "Encode (" << kernel_to_str(kernel) << "): " << speed << " MB/s" << std::endl; + + // decode + rapidyenc_decode_init(); + kernel = rapidyenc_decode_kernel(); + start = std::chrono::high_resolution_clock::now(); + for(int i=0; i(stop - start).count(); + speed = article_length * REPETITIONS; + speed = speed / us / 1.048576; + std::cerr << "Decode (" << kernel_to_str(kernel) << "): " << speed << " MB/s" << std::endl; + + // CRC + rapidyenc_crc_init(); + kernel = rapidyenc_crc_kernel(); + start = std::chrono::high_resolution_clock::now(); + for(int i=0; i(stop - start).count(); + speed = decoded_length * REPETITIONS; + speed = speed / us / 1.048576; + std::cerr << "CRC32 (" << kernel_to_str(kernel) << "): " << speed << " MB/s" << std::endl; + + return 0; +} diff --git a/rapidyenc/tool/cli.c b/rapidyenc/tool/cli.c new file mode 100644 index 0000000..72c8c34 --- /dev/null +++ b/rapidyenc/tool/cli.c @@ -0,0 +1,131 @@ +#include +#include +#include +#include + +#include "../rapidyenc.h" + +static int print_usage(const char *app) { + fprintf(stderr, "Sample rapidyenc application\n"); + fprintf(stderr, "Usage: %s {e|d}\n", app); + fprintf(stderr, " (e)ncodes or (d)ecodes stdin to stdout\n"); + return 1; +} + +#define BUFFER_SIZE 65536 +#define LINE_SIZE 128 + +int main(int argc, char **argv) { + if(argc < 2) + return print_usage(argv[0]); + if(argv[1][0] != 'e' && argv[1][0] != 'd') + return print_usage(argv[0]); + + FILE* infile = stdin; // fopen("", "rb"); + if(!infile) { + fprintf(stderr, "error opening input: %s\n", strerror(errno)); + return 1; + } + FILE* outfile = stdout; // fopen("", "rb"); + if(!outfile) { + fprintf(stderr, "error opening output: %s\n", strerror(errno)); + fclose(infile); + return 1; + } + + void* data = malloc(BUFFER_SIZE); + if(!data) { + fprintf(stderr, "error allocating input buffer\n"); + fclose(infile); + fclose(outfile); + return 1; + } + + rapidyenc_crc_init(); + uint32_t crc = 0; + int has_error = 0; + + if(argv[1][0] == 'e') { + void* output = malloc(rapidyenc_encode_max_length(BUFFER_SIZE, LINE_SIZE)); + if(!output) { + fprintf(stderr, "error allocating output buffer\n"); + fclose(infile); + fclose(outfile); + free(data); + return 1; + } + rapidyenc_encode_init(); + + int column = 0; + while(1) { + size_t read = fread(data, 1, BUFFER_SIZE, infile); + int eof = feof(infile); + if(read < BUFFER_SIZE && !eof) { + if(ferror(infile)) { + fprintf(stderr, "error reading input\n"); + } else { + fprintf(stderr, "error: got zero bytes when reading input\n"); + } + has_error = 1; + break; + } + size_t out_len = rapidyenc_encode_ex(LINE_SIZE, &column, data, output, read, eof); + crc = rapidyenc_crc(data, read, crc); + if(fwrite(output, 1, out_len, outfile) != out_len) { + fprintf(stderr, "error writing output\n"); + has_error = 1; + break; + } + if(eof) break; + } + free(output); + } else { + rapidyenc_decode_init(); + + RapidYencDecoderState state = RYDEC_STATE_CRLF; + while(1) { + size_t read = fread(data, 1, BUFFER_SIZE, infile); + RapidYencDecoderEnd ended; + void* in_ptr = data; + void* out_ptr = data; + int eof = feof(infile); + if(read < BUFFER_SIZE && !eof) { + if(ferror(infile)) { + fprintf(stderr, "error reading input\n"); + } else { + fprintf(stderr, "error: got zero bytes when reading input\n"); + } + has_error = 1; + break; + } + ended = rapidyenc_decode_incremental((const void**)&in_ptr, &out_ptr, read, &state); + size_t out_len = (uintptr_t)out_ptr - (uintptr_t)data; + crc = rapidyenc_crc(data, out_len, crc); + if(fwrite(data, 1, out_len, outfile) != out_len) { + fprintf(stderr, "error writing output\n"); + has_error = 1; + break; + } + + if(ended != RYDEC_END_NONE || eof) { + if(ended == RYDEC_END_CONTROL) + fprintf(stderr, "yEnc control line found\n"); + else if(ended == RYDEC_END_ARTICLE) + fprintf(stderr, "End-of-article marker found\n"); + else + fprintf(stderr, "End of input reached\n"); + break; + } + } + } + + fclose(infile); + fclose(outfile); + free(data); + + if(!has_error) { + fprintf(stderr, "Computed CRC32: %08x\n", crc); + } + + return 0; +} diff --git a/rapidyenc_darwin_arm64.go b/rapidyenc_darwin_arm64.go deleted file mode 100644 index 969299f..0000000 --- a/rapidyenc_darwin_arm64.go +++ /dev/null @@ -1,6 +0,0 @@ -package rapidyenc - -/* -#cgo LDFLAGS: ${SRCDIR}/lib/librapidyenc_darwin_arm64.a -lstdc++ -*/ -import "C" diff --git a/windows.bat b/windows.bat index 1ad1074..4bb79ed 100644 --- a/windows.bat +++ b/windows.bat @@ -1,4 +1,3 @@ -cd rapidyenc -cmake . -cmake --build . --config Release --target rapidyenc_shared -copy "Release\rapidyenc.dll" "..\lib" \ No newline at end of file +cmake -S rapidyenc -B rapidyenc/build -DCMAKE_CXX_FLAGS=-msse4 -G "MinGW Makefiles" +cmake --build rapidyenc/build --config Release +copy "rapidyenc\build\rapidyenc_static\librapidyenc.a" "lib\librapidyenc_windows_amd64.a" \ No newline at end of file