Skip to content

Commit

Permalink
ensure all cpp code is cpp11 compatible (#866)
Browse files Browse the repository at this point in the history
* ensure cpp is cpp11 compatible

* move scan to cpp

* use portable cpp headers
  • Loading branch information
DyfanJones authored Jan 2, 2025
1 parent a47f534 commit d2f9d8a
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 104 deletions.
8 changes: 2 additions & 6 deletions paws.common/R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,8 @@ paws_url_encoder <- function(urls, safe = "") {
.Call('_paws_common_paws_url_encoder', PACKAGE = 'paws.common', urls, safe)
}

rtrim_whitespace <- function(vec) {
.Call('_paws_common_rtrim_whitespace', PACKAGE = 'paws.common', vec)
}

identify_comments <- function(vec) {
.Call('_paws_common_identify_comments', PACKAGE = 'paws.common', vec)
scan_ini_file <- function(filename) {
.Call('_paws_common_scan_ini_file', PACKAGE = 'paws.common', filename)
}

process_profile_name <- function(vec) {
Expand Down
12 changes: 1 addition & 11 deletions paws.common/R/iniutil.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,18 @@ extract_ini_parameter <- function(items) {
}

read_ini <- function(file_name) {
if (!file.exists(file_name)) {
stopf("Unable to find file: %s", file_name)
}

if (!is.null(profiles <- ini_cache[[file_name]])) {
return(profiles)
}

content <- rtrim_whitespace(
scan(file_name, what = "", sep = "\n", quiet = TRUE)
)
content <- scan_ini_file(file_name)

# Return empty list for empty files
if (length(content) == 0) {
ini_cache[[file_name]] <- list()
return(ini_cache[[file_name]])
}

# Remove ini comments
comments <- which(identify_comments(content))
if (length(comments) > 0) content <- content[-comments]

# Get Profile names
found <- which(startsWith(content, "[") + endsWith(content, "]") == 2)
profile_nms <- process_profile_name(content[found])
Expand Down
24 changes: 6 additions & 18 deletions paws.common/src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,14 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// rtrim_whitespace
Rcpp::CharacterVector rtrim_whitespace(Rcpp::CharacterVector vec);
RcppExport SEXP _paws_common_rtrim_whitespace(SEXP vecSEXP) {
// scan_ini_file
std::vector<std::string> scan_ini_file(const std::string& filename);
RcppExport SEXP _paws_common_scan_ini_file(SEXP filenameSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type vec(vecSEXP);
rcpp_result_gen = Rcpp::wrap(rtrim_whitespace(vec));
return rcpp_result_gen;
END_RCPP
}
// identify_comments
Rcpp::LogicalVector identify_comments(Rcpp::CharacterVector vec);
RcppExport SEXP _paws_common_identify_comments(SEXP vecSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type vec(vecSEXP);
rcpp_result_gen = Rcpp::wrap(identify_comments(vec));
Rcpp::traits::input_parameter< const std::string& >::type filename(filenameSEXP);
rcpp_result_gen = Rcpp::wrap(scan_ini_file(filename));
return rcpp_result_gen;
END_RCPP
}
Expand Down Expand Up @@ -126,8 +115,7 @@ END_RCPP

static const R_CallMethodDef CallEntries[] = {
{"_paws_common_paws_url_encoder", (DL_FUNC) &_paws_common_paws_url_encoder, 2},
{"_paws_common_rtrim_whitespace", (DL_FUNC) &_paws_common_rtrim_whitespace, 1},
{"_paws_common_identify_comments", (DL_FUNC) &_paws_common_identify_comments, 1},
{"_paws_common_scan_ini_file", (DL_FUNC) &_paws_common_scan_ini_file, 1},
{"_paws_common_process_profile_name", (DL_FUNC) &_paws_common_process_profile_name, 1},
{"_paws_common_json_convert_string", (DL_FUNC) &_paws_common_json_convert_string, 1},
{"_paws_common_check_global", (DL_FUNC) &_paws_common_check_global, 1},
Expand Down
4 changes: 2 additions & 2 deletions paws.common/src/encoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

#include <Rcpp.h>
#include <unordered_set>
#include <string_view>
#include <vector>
#include <bitset>
#include <string>

using namespace Rcpp;

Expand All @@ -29,7 +29,7 @@ inline std::string to_hex(char x) {
return output;
}

std::string internal_url_encode(std::string_view url, std::string_view safe) {
std::string internal_url_encode(const std::string& url, const std::string& safe) {
// Create a bitset for additional safe characters
std::bitset<256> safe_chars_map = unreserved_chars_map;
for (char ch : safe) {
Expand Down
148 changes: 91 additions & 57 deletions paws.common/src/ini_utils.cpp
Original file line number Diff line number Diff line change
@@ -1,95 +1,129 @@
#include <Rcpp.h>
#include <iostream>
#include <vector>
#include <string>
#include <fstream>
#include <Rcpp.h>
#include <cctype>
#include <algorithm> // for std::isspace, std::remove_if

using namespace Rcpp;

// Function to trim trailing whitespace from a string
std::string rtrim(const std::string& s) {
size_t end = s.find_last_not_of(" \t\n\r\f\v");
return (end == std::string::npos) ? "" : s.substr(0, end + 1);
}

// [[Rcpp::export]]
Rcpp::CharacterVector rtrim_whitespace(Rcpp::CharacterVector vec) {
Rcpp::CharacterVector result(vec.size());

for (size_t i = 0; i < vec.size(); ++i) {
std::string line = Rcpp::as<std::string>(vec[i]);
result[i] = rtrim(line);
// Helper function to trim trailing and leading whitespace and check if a line starts with the unwanted pattern
bool processLine(std::string &line)
{
// Trim trailing whitespace
line.erase(std::find_if_not(line.rbegin(), line.rend(), [](unsigned char ch)
{ return std::isspace(ch); })
.base(),
line.end());

// Trim leading whitespace
auto start = std::find_if_not(line.begin(), line.end(), [](unsigned char ch)
{ return std::isspace(ch); });

// Line is empty or only whitespace
if (start == line.end())
{
return false;
}

return result;
// Check for ';' or '#'
return !(*start == ';' || *start == '#');
}

// Function to check if a line starts with specified patterns
bool startsWithPattern(const std::string& line) {
size_t i = 0;
// Skip leading whitespace
while (i < line.size() && std::isspace(line[i])) {
i++;
// Function to read an ini file using standard file I/O and return its contents as a vector of strings
// [[Rcpp::export]]
std::vector<std::string> scan_ini_file(const std::string &filename)
{
std::vector<std::string> fileLines;

// Open the file
std::ifstream file(filename);
if (!file.is_open())
{
Rcpp::stop("Unable to find file: " + filename);
}
// Check for specific patterns
return (i < line.size() && (line[i] == ';' || line[i] == '#'));
}


// [[Rcpp::export]]
Rcpp::LogicalVector identify_comments(Rcpp::CharacterVector vec) {
Rcpp::LogicalVector result(vec.size());
// Reserve space for lines to minimize reallocations (assuming an average line length of 80 characters)
fileLines.reserve(100); // Adjust this value based on the expected number of lines

for (size_t i = 0; i < vec.size(); ++i) {
std::string line = Rcpp::as<std::string>(vec[i]);
result[i] = startsWithPattern(line);
// Read the file line by line
std::string line;
while (std::getline(file, line))
{
if (processLine(line))
{
fileLines.push_back(line);
}
}

return result;
// Close the file
file.close();

return fileLines;
}

// Function to trim leading and trailing whitespace characters
std::string trim(const std::string& str) {
// Helper function to trim leading and trailing whitespace characters from a string
std::string trim(const std::string &str)
{
size_t start = 0;
size_t end = str.size();

// Find the first non-whitespace character
while (start < str.size() && std::isspace(str[start])) {
while (start < end && std::isspace(str[start]))
{
++start;
}

// If the string is entirely whitespace, return an empty string
if (start == str.size()) {
return "";
}

size_t end = str.size() - 1;
// Find the last non-whitespace character
while (end > start && std::isspace(str[end])) {
while (end > start && std::isspace(str[end - 1]))
{
--end;
}

// Return the substring that excludes leading and trailing whitespace
return str.substr(start, end - start + 1);
return str.substr(start, end - start);
}

// Function to remove square brackets and all outer whitespaces
std::string removeBracketsAndTrim(const std::string& str) {
std::string result;
result.reserve(str.size()); // Reserve space to avoid multiple allocations

for (char ch : str) {
// Check if the character is not a square bracket
if (ch != '[' && ch != ']') {
result += ch;
}
std::string removeBracketsAndTrim(const std::string &str)
{
size_t start = 0;
size_t end = str.size();

// Skip leading whitespace
while (start < end && std::isspace(str[start]))
{
++start;
}

// Skip trailing whitespace
while (end > start && std::isspace(str[end - 1]))
{
--end;
}

// Remove square brackets if present
if (start < end && str[start] == '[')
{
++start;
}
if (end > start && str[end - 1] == ']')
{
--end;
}

// Trim leading and trailing whitespace from the result
return trim(result);
// Trim the internal whitespace again
return trim(str.substr(start, end - start));
}

// [[Rcpp::export]]
std::vector<std::string> process_profile_name(const std::vector<std::string>& vec) {
std::vector<std::string> process_profile_name(const std::vector<std::string> &vec)
{
std::vector<std::string> modifiedVec;
modifiedVec.reserve(vec.size());

for (const auto& str : vec) {
for (const auto &str : vec)
{
std::string modified = removeBracketsAndTrim(str);
modifiedVec.push_back(modified);
}
Expand Down
22 changes: 12 additions & 10 deletions paws.common/src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,22 @@

using namespace Rcpp;


// Sort character vector
// param str A character vector to be sorted
//' @useDynLib paws.common _paws_common_char_sort
//' @importFrom Rcpp evalCpp
// [[Rcpp::export]]
CharacterVector char_sort(CharacterVector str) {
IntegerVector idx = seq_along(str) - 1;
std::sort(idx.begin(), idx.end(), [&](int i, int j){return str[i] < str[j];});
return str[idx];
CharacterVector char_sort(CharacterVector str)
{
IntegerVector idx = seq_along(str) - 1;
std::sort(idx.begin(), idx.end(), [&](int i, int j)
{ return str[i] < str[j]; });
return str[idx];
}

// Optimized UUID v4 generator function
std::string generate_uuid_v4() {
std::string generate_uuid_v4()
{
// Singleton random engine and distribution to avoid reinitialization overhead
static thread_local std::random_device rd; // Seed for random number generator
static thread_local std::mt19937 gen(rd()); // Mersenne twister engine
Expand All @@ -44,18 +46,18 @@ std::string generate_uuid_v4() {
}

// Creates uuid v4
// Developed from: https://github.com/rkg82/uuid-v4
// param n A integer, number of uuid v4 to generate.
//' @useDynLib paws.common _paws_common_uuid_v4
//' @importFrom Rcpp evalCpp
// [[Rcpp::export]]
CharacterVector uuid_v4(size_t n = 1) {
CharacterVector uuid_v4(size_t n = 1)
{
CharacterVector uuids(n);

for (size_t i = 0; i < n; ++i) {
for (size_t i = 0; i < n; ++i)
{
uuids[i] = generate_uuid_v4();
}

return uuids;
}

Empty file.
6 changes: 6 additions & 0 deletions paws.common/tests/testthat/test_iniutil.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,9 @@ test_that("Check cache", {
content2 <- read_ini("data_ini")
expect_equal(content1, content2)
})

test_that("Check empty ini file", {
paws_reset_cache()
content <- read_ini("empty_ini")
expect_equal(content, list())
})

0 comments on commit d2f9d8a

Please sign in to comment.