diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 50b5427..14159b7 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,229 +1,50 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: - workflow_call: - inputs: - extra-packages: - type: string - default: "" - required: false - cache-version: - type: string - default: "2" - required: false - pandoc-version: - type: string - default: "3.x" - required: false - extra-check-args: - type: string - # default: '"--run-donttest", "--no-manual", "--as-cran"' - default: "NULL" - required: false - macOS: - type: string - default: "macOS-latest" - required: false - windows: - type: string - default: "windows-latest" - required: false - ubuntu: - type: string - required: false - # To test more versions, they must be separated by a space. Ex: `"ubuntu-18.04 ubuntu-20.04` - default: "ubuntu-20.04" - minimum-r-version: - type: string - required: false - default: "" - force-windows-src: - type: boolean - required: false - default: false - rtools-35: - type: boolean - default: true - required: false - rtools-40: - type: boolean - default: true - required: false + push: + branches: [main, master] + pull_request: + branches: [main, master] name: R-CMD-check jobs: - setup: - name: setup - runs-on: ubuntu-latest - outputs: - config: ${{ steps.config.outputs.config }} - steps: - # - name: devel - # id: devel - # uses: r-lib/actions/setup-r@v2 - # with: - # r-version: devel - # install-r: false - - - name: release - id: release - uses: r-lib/actions/setup-r@v2 - with: - r-version: release - install-r: false - - - name: oldrel-1 - id: oldrel-1 - uses: r-lib/actions/setup-r@v2 - with: - r-version: oldrel-1 - install-r: false - - - name: oldrel-2 - id: oldrel-2 - uses: r-lib/actions/setup-r@v2 - with: - r-version: oldrel-2 - install-r: false - - - name: oldrel-3 - id: oldrel-3 - uses: r-lib/actions/setup-r@v2 - with: - r-version: oldrel-3 - install-r: false - - - name: oldrel-4 - id: oldrel-4 - uses: r-lib/actions/setup-r@v2 - with: - r-version: oldrel-4 - install-r: false - - - name: Checkout GitHub repo - uses: rstudio/shiny-workflows/.github/internal/checkout@v1 - - # R is a pre-installed software - - name: Config - id: config - shell: Rscript {0} - run: | - mac <- "${{ inputs.macOS }}" - windows <- "${{ inputs.windows }}" - ubuntu <- strsplit("${{ inputs.ubuntu }}", "[[:space:],]+")[[1]] - has_src <- dir.exists("src") - min_r_ver <- "${{ inputs.minimum-r-version }}" - test_on_rtools35 <- identical("${{ inputs.rtools-35 }}", "true") - test_on_rtools40 <- identical("${{ inputs.rtools-40 }}", "true") - force_windows_src <- identical("${{ inputs.force-windows-src }}", "true") - if (force_windows_src) has_src <- TRUE - - rver <- list( - # devel = "${{ steps.devel.outputs.installed-r-version }}", - # When R 4.3 was released, R version 4.4.0 was not recognized. - # However, `"devel"` is recognized within `r-lib/actions/setup-r`. - devel = "devel", - release = "${{ steps.release.outputs.installed-r-version }}", - oldrel1 = "${{ steps.oldrel-1.outputs.installed-r-version }}", - oldrel2 = "${{ steps.oldrel-2.outputs.installed-r-version }}", - oldrel3 = "${{ steps.oldrel-3.outputs.installed-r-version }}", - oldrel4 = "${{ steps.oldrel-4.outputs.installed-r-version }}" - ) - job <- function(os, r, ...) { - list(os = os, r = r, ...) - } - is_valid_os <- function(os, r_ver = "") { - if (identical(os, "false")) return(FALSE) - if (identical(os, "")) return(FALSE) - if (any(nchar(c(r_ver, min_r_ver)) == 0)) return(TRUE) - if (identical(r_ver, "devel")) return(TRUE) - r_ver >= min_r_ver - } - config <- c( - list( - if (is_valid_os(mac, rver$release)) job(mac, rver$release), - if (has_src && is_valid_os(windows, "devel")) job(windows, "devel", "rtools-version" = "44"), - if (is_valid_os(windows, rver$release)) job(windows, rver$release, "rtools-version" = "43"), - if (has_src && is_valid_os(windows, "4.2")) job(windows, "4.2", "rtools-version" = "42"), - if (has_src && test_on_rtools40 && is_valid_os(windows, "4.1")) job(windows, "4.1"), - if (has_src && test_on_rtools35 && is_valid_os(windows, "3.6")) job(windows, "3.6"), - if (is_valid_os(ubuntu, rver$devel)) job(ubuntu[[1]], rver$devel, "http-user-agent" = "release") - ), - if (is_valid_os(ubuntu)) - unlist(recursive = FALSE, lapply(ubuntu, function(ubuntu_) { - list( - if (is_valid_os(ubuntu_, rver$release)) job(ubuntu_, rver$release), - if (is_valid_os(ubuntu_, rver$oldrel1)) job(ubuntu_, rver$oldrel1), - if (is_valid_os(ubuntu_, rver$oldrel2)) job(ubuntu_, rver$oldrel2), - if (is_valid_os(ubuntu_, rver$oldrel3)) job(ubuntu_, rver$oldrel3), - if (is_valid_os(ubuntu_, rver$oldrel4)) job(ubuntu_, rver$oldrel4) - ) - })) - ) - ## Drop NULLs - config <- config[!vapply(config, is.null, logical(1))] - ## Convert to JSON manually to save 10s installing `jsonlite` - join_and_wrap <- function(x, start, end, sep = ",") { - paste0(start, paste0(x, collapse = sep), end) - } - entries_json <- vapply(config, character(1), FUN = function(entry) { - join_and_wrap( - paste0("\"", names(entry), "\":\"", unname(entry), "\""), - "{", "}" - ) - }) - config_json <- join_and_wrap(entries_json, "[", "]") - cat("Config:\n", config_json, "\n", sep = "") - cat("config=", config_json, "\n", file = Sys.getenv("GITHUB_OUTPUT"), sep = "", append = TRUE) - R-CMD-check: runs-on: ${{ matrix.config.os }} name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - needs: [setup] strategy: fail-fast: false matrix: - config: ${{ fromJSON(needs.setup.outputs.config) }} + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes steps: - - name: Checkout GitHub repo - uses: rstudio/shiny-workflows/.github/internal/checkout@v1 + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 - - name: Install R, system dependencies, and package dependencies - uses: rstudio/shiny-workflows/setup-r-package@v1 + - uses: r-lib/actions/setup-r@v2 with: - rtools-version: ${{ matrix.config.rtools-version }} - pandoc-version: ${{ inputs.pandoc-version }} r-version: ${{ matrix.config.r }} http-user-agent: ${{ matrix.config.http-user-agent }} - cache-version: ${{ inputs.cache-version }} - needs: check - extra-packages: | - any::rcmdcheck - ${{ inputs.extra-packages }} + use-public-rspm: true - - name: Check package - uses: r-lib/actions/check-r-package@v2 - timeout-minutes: 30 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - check-dir: '"check"' # matches directory below - args: 'c(${{ inputs.extra-check-args }}, "--no-manual", "--as-cran")' + extra-packages: any::rcmdcheck + needs: check - - name: "Show `testthat` output" - if: always() - run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true - shell: bash - - name: "Show install logs" - if: always() - run: find check -name '00install.out' -exec cat '{}' \; || true - shell: bash - - name: "Upload 'Check package' results" - if: failure() - uses: actions/upload-artifact@main + - uses: r-lib/actions/check-r-package@v2 with: - name: ${{ matrix.config.os }}-r${{ matrix.config.r }}-results - path: "check" + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/DESCRIPTION b/DESCRIPTION index dfb01c6..c9b038e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,13 @@ -Package: lzstringr +Package: lzstring Type: Package Title: An R wrapper of lzstring C++ library Version: 0.1.0 Author: Sam Parmar Maintainer: Sam Parmar -Description: An R wrapper of LZ-based compression algorithm implemented in C++ +Description: This package provides an R interface to the lz-string C++ library, + enabling LZ-based compression and decompression of strings directly within + R. This can be particularly useful for reducing the memory footprint of + large strings or for transmitting compressed data. License: MIT + file LICENSE Encoding: UTF-8 LazyData: true diff --git a/LICENSE b/LICENSE index 49454dc..36856d0 100644 --- a/LICENSE +++ b/LICENSE @@ -1,2 +1,2 @@ YEAR: 2024 -COPYRIGHT HOLDER: lzstringr authors +COPYRIGHT HOLDER: lzstring authors diff --git a/LICENSE.md b/LICENSE.md index 473f3f3..894fd37 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ # MIT License -Copyright (c) 2024 lzstringr authors +Copyright (c) 2024 lzstring authors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/NAMESPACE b/NAMESPACE index a44d5a3..61241da 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,4 +4,4 @@ export(compressToBase64) export(compressToEncodedURIComponent) export(decompressFromBase64) export(decompressFromEncodedURIComponent) -useDynLib(lzstringr, .registration = TRUE) +useDynLib(lzstring, .registration = TRUE) diff --git a/R/cpp11.R b/R/cpp11.R index 4acfc7d..62031a7 100644 --- a/R/cpp11.R +++ b/R/cpp11.R @@ -1,17 +1,17 @@ # Generated by cpp11: do not edit by hand compressToEncodedURIComponent_ <- function(bytes) { - .Call(`_lzstringr_compressToEncodedURIComponent_`, bytes) + .Call(`_lzstring_compressToEncodedURIComponent_`, bytes) } decompressFromEncodedURIComponent_ <- function(bytes) { - .Call(`_lzstringr_decompressFromEncodedURIComponent_`, bytes) + .Call(`_lzstring_decompressFromEncodedURIComponent_`, bytes) } compressToBase64_ <- function(bytes) { - .Call(`_lzstringr_compressToBase64_`, bytes) + .Call(`_lzstring_compressToBase64_`, bytes) } decompressFromBase64_ <- function(bytes) { - .Call(`_lzstringr_decompressFromBase64_`, bytes) + .Call(`_lzstring_decompressFromBase64_`, bytes) } diff --git a/R/lzstringr-package.R b/R/lzstringr-package.R index 9dd4d8e..2d5d0d4 100644 --- a/R/lzstringr-package.R +++ b/R/lzstringr-package.R @@ -1,13 +1,41 @@ ## usethis namespace: start -#' @useDynLib lzstringr, .registration = TRUE +#' @useDynLib lzstring, .registration = TRUE ## usethis namespace: end NULL +decode_utf16_surrogate <- function(values) { + # Initialize an empty character vector to store decoded characters + decoded_chars <- character() + # Function to decode surrogate pairs + decode_surrogates <- function(high, low) { + # Calculate the Unicode code point from surrogate values + # Formula: 0x10000 + (high - 0xD800) * 0x400 + (low - 0xDC00) + code_point <- 0x10000 + (high - 0xD800) * 0x400 + (low - 0xDC00) + # Convert the Unicode code point to a character + intToUtf8(code_point) + } + i <- 1 + while (i <= length(values)) { + if (values[i] < 0xD800 || values[i] > 0xDBFF) { # Not a high surrogate + # Direct conversion for regular characters (like space) + decoded_chars <- c(decoded_chars, intToUtf8(values[i])) + i <- i + 1 + } else { + # Decode surrogate pairs + decoded_chars <- c(decoded_chars, decode_surrogates(values[i], values[i + 1])) + i <- i + 2 + } + } + # Combine into a single string + paste(decoded_chars, collapse = "") +} + safe_compress <- function(string, f) { string <- enc2utf8(string) string <- iconv(string, from="UTF-8", to="UTF-16", toRaw=TRUE)[[1]] result <- f(string) - chr_result <- rawToChar(as.raw(result)) + chr_result <- decode_utf16_surrogate(result) + Encoding(chr_result) <- "UTF-8" chr_result } @@ -15,7 +43,8 @@ safe_decompress <- function(string, f) { string <- enc2utf8(string) string <- iconv(string, from="UTF-8", to="UTF-16", toRaw=TRUE)[[1]] result <- f(string) - chr_result <- intToUtf8(result) + chr_result <- decode_utf16_surrogate(result) + Encoding(chr_result) <- "UTF-8" chr_result } diff --git a/README.Rmd b/README.Rmd index 45d650c..55fbdf4 100644 --- a/README.Rmd +++ b/README.Rmd @@ -15,14 +15,14 @@ knitr::opts_chunk$set( ) ``` -# lzstringr +# lzstring [![R-CMD-check](https://github.com/parmsam/lzstring-r/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/parmsam/lzstring-r/actions/workflows/R-CMD-check.yaml) -The goal of lzstringr is to provide an R wrapper for the [lzstring C++ library](https://github.com/andykras/lz-string-cpp). [lzstring](https://github.com/pieroxy/lz-string) is originally a JavaScript library that provides fast and efficient string compression and decompression using a [LZ-based algorithm](https://en.wikipedia.org/wiki/Lempel–Ziv–Welch). Credit goes to [Winston Chang](https://github.com/wch) for spotting this missing R package and guiding me over at the R Shinylive repo—check out his awesome contributions which this repo is based on [here](https://github.com/posit-dev/r-shinylive/issues/70) and [here](https://github.com/posit-dev/r-shinylive/pull/71). Also, shoutout to Andy Kras for his implementation in C++ of lzstring, which you can find right [here](https://github.com/andykras/lz-string-cpp), and [pieroxy](https://github.com/pieroxy), the original brain behind lzstring in JavaScript—peek at his work over [here](https://github.com/pieroxy/lz-string). +The goal of lzstring-r is to provide an R wrapper for the [lzstring C++ library](https://github.com/andykras/lz-string-cpp). [lzstring](https://github.com/pieroxy/lz-string) is originally a JavaScript library that provides fast and efficient string compression and decompression using a [LZ-based algorithm](https://en.wikipedia.org/wiki/Lempel–Ziv–Welch). Credit goes to [Winston Chang](https://github.com/wch) for spotting this missing R package and guiding me over at the R Shinylive repo—check out his awesome contributions which this repo is based on [here](https://github.com/posit-dev/r-shinylive/issues/70) and [here](https://github.com/posit-dev/r-shinylive/pull/71). Also, shoutout to Andy Kras for his implementation in C++ of lzstring, which you can find right [here](https://github.com/andykras/lz-string-cpp), and [pieroxy](https://github.com/pieroxy), the original brain behind lzstring in JavaScript—peek at his work over [here](https://github.com/pieroxy/lz-string). ## Installation @@ -38,21 +38,21 @@ devtools::install_github("parmsam/lzstring-r") This is a basic example which shows you how to solve a common problem: ```{r example} -library(lzstringr) +library(lzstring) data = "The quick brown fox jumps over the lazy dog!"; -compressed = lzstringr::compressToBase64(data) +compressed = lzstring::compressToBase64(data) compressed -decompressed = lzstringr::decompressFromBase64(compressed) +decompressed = lzstring::decompressFromBase64(compressed) decompressed ``` ### Shinylive for R ```{r example2-r-shiny} -x <- lzstringr::decompressFromEncodedURIComponent("NobwRAdghgtgpmAXGKAHVA6ASmANGAYwHsIAXOMpMAGwEsAjAJykYE8AKAZwAtaJWAlAB0IdJiw71OY4RBEBiAAQAROADM+cRQFUAkorVFGitKkWluUUooAmzAO6cTi3p1JEA5sxiKAtP98RAFdaRQAeX0VUKA84AH1OWhs4ehZ2ERFFRSUAQXRzWlJqLQDAiCzSQuLFAF5FITAACThqaiJFAGVefgBCBtwM8uzOpJSWKKgIFoMjRT5UINInUszFROTU4zr1scZ0uSGspV0IBdJETrpk40NjCy0IIJh6OGMiNUV6PmWA1azpUaME5nfZZMFzU6LXQ2Wr1MBfCCcfp-MHUKAvaiwhoAOSeLzeHwRnEQyMOYJgfFhAEYBmSsjAoAAPWEAVgADLTwVkAG5QahBLR1ADMbJRsiyAlpqyUAHlFmcLo1aG5PN4-L8hqg2qQ5aQQUR5VCYXUGjZlaQAArahqyWQKFTqTRrV7c16KNoeWgERSMOAARxCvph7lsDmcrncXlg6v8Ik4LrdEQMQQgBEqJHY80WuEUBr1iwEihAgyOiiVKqjPne5m4Whl1BhADEoIVuGogpiAOJwVjx4zKKxQGNlUv2Vs+-0CtxwGGPZ5u6tE6WKAAqrkUcEZqF9nESJBrVkUsSmzHITiHEdV0eVinszHQM4hzgIfOoy5Dvog1ytRGsIb4ZovuQB7nNKy5Uhgii6NYN4NL6UBprQroNCYX41q86hGFoPAGg2nxaFAixEAylQvq0rDLmCvq+JucAEIsj72LW5RZksiiZpCpAACREoWBCWBAsTLgATJB0FOHmZzmKwqBaDeQ5ar+qySYsXFmm4P7WEmn7ftq7DFmSzJJmoLYWO21BcfYpl8B4KJEuEkTxn67AUhA7CMpKigMoy7mecUgkWBgeawqxPHfIoADUihUnaZIRqCXKMpyXJMHAUAANZOHURLJeCxCYiaYDyAA7CyOQ5MoABCpJcp8RjXFiYBMYUcA1YlaL0I1ADq1mCQU8DmO0UyMtYrxBKg6blBxXnfMIeAovSLblIV5aRmq1ZWYUNn9XASJgGKqwAL6yIdgxKAAwvBwFdHwrAmPkKyIt0rB5Kg7AhLCIQ5n2rpbM6jC-bIYCHQAukAA") +x <- lzstring::decompressFromEncodedURIComponent("NobwRAdghgtgpmAXGKAHVA6ASmANGAYwHsIAXOMpMAGwEsAjAJykYE8AKAZwAtaJWAlAB0IdJiw71OY4RBEBiAAQAROADM+cRQFUAkorVFGitKkWluUUooAmzAO6cTi3p1JEA5sxiKAtP98RAFdaRQAeX0VUKA84AH1OWhs4ehZ2ERFFRSUAQXRzWlJqLQDAiCzSQuLFAF5FITAACThqaiJFAGVefgBCBtwM8uzOpJSWKKgIFoMjRT5UINInUszFROTU4zr1scZ0uSGspV0IBdJETrpk40NjCy0IIJh6OGMiNUV6PmWA1azpUaME5nfZZMFzU6LXQ2Wr1MBfCCcfp-MHUKAvaiwhoAOSeLzeHwRnEQyMOYJgfFhAEYBmSsjAoAAPWEAVgADLTwVkAG5QahBLR1ADMbJRsiyAlpqyUAHlFmcLo1aG5PN4-L8hqg2qQ5aQQUR5VCYXUGjZlaQAArahqyWQKFTqTRrV7c16KNoeWgERSMOAARxCvph7lsDmcrncXlg6v8Ik4LrdEQMQQgBEqJHY80WuEUBr1iwEihAgyOiiVKqjPne5m4Whl1BhADEoIVuGogpiAOJwVjx4zKKxQGNlUv2Vs+-0CtxwGGPZ5u6tE6WKAAqrkUcEZqF9nESJBrVkUsSmzHITiHEdV0eVinszHQM4hzgIfOoy5Dvog1ytRGsIb4ZovuQB7nNKy5Uhgii6NYN4NL6UBprQroNCYX41q86hGFoPAGg2nxaFAixEAylQvq0rDLmCvq+JucAEIsj72LW5RZksiiZpCpAACREoWBCWBAsTLgATJB0FOHmZzmKwqBaDeQ5ar+qySYsXFmm4P7WEmn7ftq7DFmSzJJmoLYWO21BcfYpl8B4KJEuEkTxn67AUhA7CMpKigMoy7mecUgkWBgeawqxPHfIoADUihUnaZIRqCXKMpyXJMHAUAANZOHURLJeCxCYiaYDyAA7CyOQ5MoABCpJcp8RjXFiYBMYUcA1YlaL0I1ADq1mCQU8DmO0UyMtYrxBKg6blBxXnfMIeAovSLblIV5aRmq1ZWYUNn9XASJgGKqwAL6yIdgxKAAwvBwFdHwrAmPkKyIt0rB5Kg7AhLCIQ5n2rpbM6jC-bIYCHQAukAA") y <- jsonlite::fromJSON(x) cat(y$name) cat(y$content) @@ -61,7 +61,7 @@ cat(y$content) ### Shinylive for Python ```{r example3-py-shiny} -x <- lzstringr::decompressFromEncodedURIComponent("NobwRAdghgtgpmAXGKAHVA6VBPMAaMAYwHsIAXOcpMAMwCdiYACAZwAsBLCbDOAD1R04LFkw4xUxOmTERUAVzJ4mQiABM4dZfI4AdCPp0YuCsgH0WAGw4a6ACl2RHyxwDlnTAAzKAjJ+9MAEyeAJT64RAAAqq2GBR8ZPoaNExkCXYhiPpMOSpwZPJ0EEw0jhAAVIFioiAmihgQGUzlQQC+jvpgrQC6QA") +x <- lzstring::decompressFromEncodedURIComponent("NobwRAdghgtgpmAXGKAHVA6VBPMAaMAYwHsIAXOcpMAMwCdiYACAZwAsBLCbDOAD1R04LFkw4xUxOmTERUAVzJ4mQiABM4dZfI4AdCPp0YuCsgH0WAGw4a6ACl2RHyxwDlnTAAzKAjJ+9MAEyeAJT64RAAAqq2GBR8ZPoaNExkCXYhiPpMOSpwZPJ0EEw0jhAAVIFioiAmihgQGUzlQQC+jvpgrQC6QA") y <- jsonlite::fromJSON(x) cat(y$name) cat(y$content) diff --git a/README.md b/README.md index c640462..ed75c9d 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,14 @@ -# lzstringr +# lzstring [![R-CMD-check](https://github.com/parmsam/lzstring-r/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/parmsam/lzstring-r/actions/workflows/R-CMD-check.yaml) -The goal of lzstringr is to provide an R wrapper for the [lzstring C++ +The goal of lzstring-r is to provide an R wrapper for the [lzstring C++ library](https://github.com/andykras/lz-string-cpp). [lzstring](https://github.com/pieroxy/lz-string) is originally a JavaScript library that provides fast and efficient string compression @@ -40,15 +40,15 @@ devtools::install_github("parmsam/lzstring-r") This is a basic example which shows you how to solve a common problem: ``` r -library(lzstringr) +library(lzstring) data = "The quick brown fox jumps over the lazy dog!"; -compressed = lzstringr::compressToBase64(data) +compressed = lzstring::compressToBase64(data) compressed #> [1] "CoCwpgBAjgrglgYwNYQEYCcD2B3AdhAM0wA8IArGAWwAcBnCTANzHQgBdwIAbAQwC8AnhAAmmAOYBCIA" -decompressed = lzstringr::decompressFromBase64(compressed) +decompressed = lzstring::decompressFromBase64(compressed) decompressed #> [1] "The quick brown fox jumps over the lazy dog!" ``` @@ -56,7 +56,7 @@ decompressed ### Shinylive for R ``` r -x <- lzstringr::decompressFromEncodedURIComponent("NobwRAdghgtgpmAXGKAHVA6ASmANGAYwHsIAXOMpMAGwEsAjAJykYE8AKAZwAtaJWAlAB0IdJiw71OY4RBEBiAAQAROADM+cRQFUAkorVFGitKkWluUUooAmzAO6cTi3p1JEA5sxiKAtP98RAFdaRQAeX0VUKA84AH1OWhs4ehZ2ERFFRSUAQXRzWlJqLQDAiCzSQuLFAF5FITAACThqaiJFAGVefgBCBtwM8uzOpJSWKKgIFoMjRT5UINInUszFROTU4zr1scZ0uSGspV0IBdJETrpk40NjCy0IIJh6OGMiNUV6PmWA1azpUaME5nfZZMFzU6LXQ2Wr1MBfCCcfp-MHUKAvaiwhoAOSeLzeHwRnEQyMOYJgfFhAEYBmSsjAoAAPWEAVgADLTwVkAG5QahBLR1ADMbJRsiyAlpqyUAHlFmcLo1aG5PN4-L8hqg2qQ5aQQUR5VCYXUGjZlaQAArahqyWQKFTqTRrV7c16KNoeWgERSMOAARxCvph7lsDmcrncXlg6v8Ik4LrdEQMQQgBEqJHY80WuEUBr1iwEihAgyOiiVKqjPne5m4Whl1BhADEoIVuGogpiAOJwVjx4zKKxQGNlUv2Vs+-0CtxwGGPZ5u6tE6WKAAqrkUcEZqF9nESJBrVkUsSmzHITiHEdV0eVinszHQM4hzgIfOoy5Dvog1ytRGsIb4ZovuQB7nNKy5Uhgii6NYN4NL6UBprQroNCYX41q86hGFoPAGg2nxaFAixEAylQvq0rDLmCvq+JucAEIsj72LW5RZksiiZpCpAACREoWBCWBAsTLgATJB0FOHmZzmKwqBaDeQ5ar+qySYsXFmm4P7WEmn7ftq7DFmSzJJmoLYWO21BcfYpl8B4KJEuEkTxn67AUhA7CMpKigMoy7mecUgkWBgeawqxPHfIoADUihUnaZIRqCXKMpyXJMHAUAANZOHURLJeCxCYiaYDyAA7CyOQ5MoABCpJcp8RjXFiYBMYUcA1YlaL0I1ADq1mCQU8DmO0UyMtYrxBKg6blBxXnfMIeAovSLblIV5aRmq1ZWYUNn9XASJgGKqwAL6yIdgxKAAwvBwFdHwrAmPkKyIt0rB5Kg7AhLCIQ5n2rpbM6jC-bIYCHQAukAA") +x <- lzstring::decompressFromEncodedURIComponent("NobwRAdghgtgpmAXGKAHVA6ASmANGAYwHsIAXOMpMAGwEsAjAJykYE8AKAZwAtaJWAlAB0IdJiw71OY4RBEBiAAQAROADM+cRQFUAkorVFGitKkWluUUooAmzAO6cTi3p1JEA5sxiKAtP98RAFdaRQAeX0VUKA84AH1OWhs4ehZ2ERFFRSUAQXRzWlJqLQDAiCzSQuLFAF5FITAACThqaiJFAGVefgBCBtwM8uzOpJSWKKgIFoMjRT5UINInUszFROTU4zr1scZ0uSGspV0IBdJETrpk40NjCy0IIJh6OGMiNUV6PmWA1azpUaME5nfZZMFzU6LXQ2Wr1MBfCCcfp-MHUKAvaiwhoAOSeLzeHwRnEQyMOYJgfFhAEYBmSsjAoAAPWEAVgADLTwVkAG5QahBLR1ADMbJRsiyAlpqyUAHlFmcLo1aG5PN4-L8hqg2qQ5aQQUR5VCYXUGjZlaQAArahqyWQKFTqTRrV7c16KNoeWgERSMOAARxCvph7lsDmcrncXlg6v8Ik4LrdEQMQQgBEqJHY80WuEUBr1iwEihAgyOiiVKqjPne5m4Whl1BhADEoIVuGogpiAOJwVjx4zKKxQGNlUv2Vs+-0CtxwGGPZ5u6tE6WKAAqrkUcEZqF9nESJBrVkUsSmzHITiHEdV0eVinszHQM4hzgIfOoy5Dvog1ytRGsIb4ZovuQB7nNKy5Uhgii6NYN4NL6UBprQroNCYX41q86hGFoPAGg2nxaFAixEAylQvq0rDLmCvq+JucAEIsj72LW5RZksiiZpCpAACREoWBCWBAsTLgATJB0FOHmZzmKwqBaDeQ5ar+qySYsXFmm4P7WEmn7ftq7DFmSzJJmoLYWO21BcfYpl8B4KJEuEkTxn67AUhA7CMpKigMoy7mecUgkWBgeawqxPHfIoADUihUnaZIRqCXKMpyXJMHAUAANZOHURLJeCxCYiaYDyAA7CyOQ5MoABCpJcp8RjXFiYBMYUcA1YlaL0I1ADq1mCQU8DmO0UyMtYrxBKg6blBxXnfMIeAovSLblIV5aRmq1ZWYUNn9XASJgGKqwAL6yIdgxKAAwvBwFdHwrAmPkKyIt0rB5Kg7AhLCIQ5n2rpbM6jC-bIYCHQAukAA") y <- jsonlite::fromJSON(x) cat(y$name) #> app.R @@ -120,7 +120,7 @@ cat(y$content) ### Shinylive for Python ``` r -x <- lzstringr::decompressFromEncodedURIComponent("NobwRAdghgtgpmAXGKAHVA6VBPMAaMAYwHsIAXOcpMAMwCdiYACAZwAsBLCbDOAD1R04LFkw4xUxOmTERUAVzJ4mQiABM4dZfI4AdCPp0YuCsgH0WAGw4a6ACl2RHyxwDlnTAAzKAjJ+9MAEyeAJT64RAAAqq2GBR8ZPoaNExkCXYhiPpMOSpwZPJ0EEw0jhAAVIFioiAmihgQGUzlQQC+jvpgrQC6QA") +x <- lzstring::decompressFromEncodedURIComponent("NobwRAdghgtgpmAXGKAHVA6VBPMAaMAYwHsIAXOcpMAMwCdiYACAZwAsBLCbDOAD1R04LFkw4xUxOmTERUAVzJ4mQiABM4dZfI4AdCPp0YuCsgH0WAGw4a6ACl2RHyxwDlnTAAzKAjJ+9MAEyeAJT64RAAAqq2GBR8ZPoaNExkCXYhiPpMOSpwZPJ0EEw0jhAAVIFioiAmihgQGUzlQQC+jvpgrQC6QA") y <- jsonlite::fromJSON(x) cat(y$name) #> app.py diff --git a/src/cpp11.cpp b/src/cpp11.cpp index a7e1bdb..0ff8982 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -7,28 +7,28 @@ // code.cpp std::u16string compressToEncodedURIComponent_(std::vector bytes); -extern "C" SEXP _lzstringr_compressToEncodedURIComponent_(SEXP bytes) { +extern "C" SEXP _lzstring_compressToEncodedURIComponent_(SEXP bytes) { BEGIN_CPP11 return cpp11::as_sexp(compressToEncodedURIComponent_(cpp11::as_cpp>>(bytes))); END_CPP11 } // code.cpp std::u16string decompressFromEncodedURIComponent_(std::vector bytes); -extern "C" SEXP _lzstringr_decompressFromEncodedURIComponent_(SEXP bytes) { +extern "C" SEXP _lzstring_decompressFromEncodedURIComponent_(SEXP bytes) { BEGIN_CPP11 return cpp11::as_sexp(decompressFromEncodedURIComponent_(cpp11::as_cpp>>(bytes))); END_CPP11 } // code.cpp std::u16string compressToBase64_(std::vector bytes); -extern "C" SEXP _lzstringr_compressToBase64_(SEXP bytes) { +extern "C" SEXP _lzstring_compressToBase64_(SEXP bytes) { BEGIN_CPP11 return cpp11::as_sexp(compressToBase64_(cpp11::as_cpp>>(bytes))); END_CPP11 } // code.cpp std::u16string decompressFromBase64_(std::vector bytes); -extern "C" SEXP _lzstringr_decompressFromBase64_(SEXP bytes) { +extern "C" SEXP _lzstring_decompressFromBase64_(SEXP bytes) { BEGIN_CPP11 return cpp11::as_sexp(decompressFromBase64_(cpp11::as_cpp>>(bytes))); END_CPP11 @@ -36,15 +36,15 @@ extern "C" SEXP _lzstringr_decompressFromBase64_(SEXP bytes) { extern "C" { static const R_CallMethodDef CallEntries[] = { - {"_lzstringr_compressToBase64_", (DL_FUNC) &_lzstringr_compressToBase64_, 1}, - {"_lzstringr_compressToEncodedURIComponent_", (DL_FUNC) &_lzstringr_compressToEncodedURIComponent_, 1}, - {"_lzstringr_decompressFromBase64_", (DL_FUNC) &_lzstringr_decompressFromBase64_, 1}, - {"_lzstringr_decompressFromEncodedURIComponent_", (DL_FUNC) &_lzstringr_decompressFromEncodedURIComponent_, 1}, + {"_lzstring_compressToBase64_", (DL_FUNC) &_lzstring_compressToBase64_, 1}, + {"_lzstring_compressToEncodedURIComponent_", (DL_FUNC) &_lzstring_compressToEncodedURIComponent_, 1}, + {"_lzstring_decompressFromBase64_", (DL_FUNC) &_lzstring_decompressFromBase64_, 1}, + {"_lzstring_decompressFromEncodedURIComponent_", (DL_FUNC) &_lzstring_decompressFromEncodedURIComponent_, 1}, {NULL, NULL, 0} }; } -extern "C" attribute_visible void R_init_lzstringr(DllInfo* dll){ +extern "C" attribute_visible void R_init_lzstring(DllInfo* dll){ R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); R_forceSymbols(dll, TRUE); diff --git a/tests/testthat.R b/tests/testthat.R index 40705f5..ea5a613 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -7,6 +7,6 @@ # * https://testthat.r-lib.org/reference/test_package.html#special-files library(testthat) -library(lzstringr) +library(lzstring) -test_check("lzstringr") +test_check("lzstring") diff --git a/tests/testthat/test-lzstringr.R b/tests/testthat/test-lzstringr.R index 6bd9e45..3062920 100644 --- a/tests/testthat/test-lzstringr.R +++ b/tests/testthat/test-lzstringr.R @@ -88,6 +88,7 @@ test_that("Decompression from Base64 matches JavaScript implementation output", ## Test cases to ensure the string is UTF-8 encoded ---- test_that("Ensure strings are UTF-8 encoded", { + x <- "fa\xE7ile" Encoding(x) <- "latin1" @@ -106,3 +107,74 @@ test_that("Ensure strings are UTF-8 encoded", { y <- decompressFromBase64(compressToBase64(x)) expect_equal(Encoding(y), "UTF-8") }) + +## Compress and decompress for comparison ---- +compare_compress_decompress <- function(x) { + compressed <- compressToBase64(x) + decompressed <- decompressFromBase64(compressed) + expect_equal(decompressed, x) +} + +## Test cases for difference encodings ---- +test_that("Compress and Decompress for different encodings", { + emoji_pat <- "😑😑 😑" + compare_compress_decompress(emoji_pat) + + pat <- rawToChar(as.raw(c(0xce, 0x94, 0xe2, 0x98, 0x85, 0xf0, 0x9f, 0x98, 0x8e))) + Encoding(pat) <- "UTF-8" + compare_compress_decompress(pat) + + x <- rawToChar(as.raw(c(0xe5, 0x8d, 0x88))) + Encoding(x) <- "UTF-8" + compare_compress_decompress(x) + + latin1_str <- rawToChar(as.raw(0xFF)) + Encoding(latin1_str) <- "latin1" + compare_compress_decompress(latin1_str) + + japanese_text <- "こんにちは" # Hello in Japanese + encoded_text <- iconv(japanese_text, from = "UTF-8", to = "Shift-JIS") + decoded_text <- iconv(encoded_text, from = "Shift-JIS", to = "UTF-8") + compressed <- compressToBase64(decoded_text) + decompressed <- decompressFromBase64(compressed) + expect_equal(decompressed, japanese_text) +}) + +test_that("Compression handles special characters and symbols", { + text <- "𐐷𐑌 – Mathematical symbols: ∑ ∫, Emoji: 😊, Arabic: العربية, Hebrew: עברית" + expect_no_error(compressToBase64(text)) + compare_compress_decompress(text) + text <- "漢字 – Kanji, Cyrillic: Цирилица, Thai: ภาษาไทย" + expect_no_error(compressToBase64(text)) + compare_compress_decompress(text) +}) + +test_that("Decompression handles malformed input gracefully", { + malformed_base64 <- "This isn't base64 at all!" + # Decompression should handle this without crashing + expect_no_error(decompressFromBase64(malformed_base64)) + expect_equal(decompressFromBase64(malformed_base64), "") +}) + + +## Test cases for specific operating systems ---- +test_that("Compression handles OS-specific encodings", { + skip() + input_windows <- iconv("This is a test – with a dash", from = "UTF-8", to = "Windows-1252") + input_mac <- iconv("This is a test – with a dash", from = "UTF-8", to = "macintosh") + compressToBase64(input_windows) + compare_compress_decompress(input_windows) + compare_compress_decompress(input_mac) +}) + +# Test cases for to and from URI component encoding ---- +test_that("Compress and Decompress for URI encoding", { + text <- "[{\"name\":\"app.py\",\"content\":\"from shiny.express import input, render, ui\\n\\nui.input_slider(\\\"n\\\", \\\"N\\\", 0, 100, 20)\\n\\n\\n@render.text\\ndef txt():\\n return f\\\"n*2 is {input.n() * 2}\\\"\\n\"}]" + hash <- "NobwRAdghgtgpmAXGKAHVA6VBPMAaMAYwHsIAXOcpMAMwCdiYACAZwAsBLCbDOAD1R04LFkw4xUxOmTERUAVzJ4mQiABM4dZfI4AdCPp0YuCsgH0WAGw4a6ACl2RHyxwDlnTAAzKAjJ+9MAEyeAJT64RAAAqq2GBR8ZPoaNExkCXYhiPpMOSpwZPJ0EEw0jhAAVIFioiAmihgQGUzlQQC+jvpgrQC6QA" + + compressed <- compressToEncodedURIComponent(text) + expect_equal(compressed, hash) + + decompressed <- decompressFromEncodedURIComponent(compressed) + expect_equal(decompressed, text) +})