Merge pull request #3 from devkitPro/rplwrap

wut 1.1.0: rplwrap, garbage collection
devkitPro · May 7, 2019 · fcf3b7a · fcf3b7a
2 parents 6a54839 + 98f6581
commit fcf3b7a
Show file tree

Hide file tree

Showing 5 changed files with 150 additions and 36 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,7 @@
+#### wut-tools 1.1.0
+- elf2rpl: Added a new feature, `__rplwrap`. This will rename any symbol named `__rplwrap_<name>` (where `<name>` is any string) to just `<name>`. If a `<name>` already exists that would conflict with the new symbol, it is renamed to `__rplwrap_name`.
+- rplimportgen: Add support for `:TEXT_WRAP` and `:DATA_WRAP` sections. Every symbol in these sections will be prefixed with `__rplwrap_`. This is useful for cases where Cafe functions conflict with libc functions, and should not be used outside of libc or wut internals.
+- No known loader, including decaf, readrpl and the Cafe system loader.elf, actually uses or checks the crc32 and count parameters on an import section. To allow import garbage-collection, these have been hardcoded to dummy values.
+- rplimportgen now places each imported function into a dedicated section, -ffunction-sections style. This allows ld to garbage-collect unused imports, but also requires an updated linker script that only ships with wut 1.0.0-beta9 and later.
+
+#### wut-tools 1.0.0
diff --git a/configure.ac b/configure.ac
@@ -2,7 +2,7 @@
 # Process this file with autoconf to produce a configure script.
 
 AC_PREREQ(2.61)
-AC_INIT([wut-tools],[1.0.0],[https://github.com/devkitPro/wut-tools/issues])
+AC_INIT([wut-tools],[1.1.0],[https://github.com/devkitPro/wut-tools/issues])
 AC_CONFIG_SRCDIR([src/elf2rpl/main.cpp])
 AC_CONFIG_MACRO_DIR([m4])
 

diff --git a/src/common/rplwrap.h b/src/common/rplwrap.h
@@ -0,0 +1,3 @@
+#pragma once
+
+#define RPLWRAP_PREFIX "__rplwrap_"
diff --git a/src/elf2rpl/main.cpp b/src/elf2rpl/main.cpp
@@ -1,5 +1,6 @@
 #include "elf.h"
 #include "utils.h"
+#include "rplwrap.h"
 
 #include <algorithm>
 #include <excmd.h>
@@ -463,6 +464,99 @@ relocateSection(ElfFile &file,
    return true;
 }
 
+const static std::string rplwrap_prefix(RPLWRAP_PREFIX);
+
+/**
+ * Rename __rplwrap_<name> to <name>, and if <name> already exists rename it to
+ * __rplwrap_<name>.
+ *
+ * This is useful in situations where the imported libraries have names that
+ * conflict with existing ones.
+ */
+static bool
+renameRplWrap(ElfFile &file)
+{
+   auto strtabIndex = getSectionIndex(file, ".strtab");
+   if (strtabIndex < 0) return false;
+   auto& strtab = file.sections[strtabIndex];
+   auto strtabd = reinterpret_cast<char *>(strtab->data.data());
+
+   for (auto &symSection : file.sections) {
+      if (symSection->header.type != elf::SectionType::SHT_SYMTAB) {
+         continue;
+      }
+
+      auto symbols = reinterpret_cast<elf::Symbol *>(symSection->data.data());
+      auto numSymbols = symSection->data.size() / sizeof(elf::Symbol);
+
+      // First pass - find all the symbols prefixed with __rplwrap_, don't do
+      // anything yet
+      std::vector<elf::Symbol*> foundRplWraps;
+
+      for (auto i = 0u; i < numSymbols; ++i) {
+         auto type = symbols[i].info & 0xf;
+
+         // Only rename functions, data
+         if (type != elf::STT_OBJECT &&
+             type != elf::STT_FUNC) {
+            continue;
+         }
+
+         std::string name = &strtabd[symbols[i].name];
+         if (!name.compare(0, rplwrap_prefix.size(), rplwrap_prefix)) {
+            foundRplWraps.push_back(&symbols[i]);
+         }
+      }
+
+      // Second pass - Find any symbols that would conflict if __rplwrap_<name>
+      // got renamed to <name>, and if so, swap the names
+
+      for (auto i = 0u; i < numSymbols; ++i) {
+         auto type = symbols[i].info & 0xf;
+         std::string symName = &strtabd[symbols[i].name];
+
+         // Only rename functions, data
+         if (type != elf::STT_OBJECT &&
+             type != elf::STT_FUNC) {
+            continue;
+         }
+
+         auto rplWrap = foundRplWraps.begin();
+         while (rplWrap != foundRplWraps.end()) {
+            std::string wrapName = &strtabd[(*rplWrap)->name];
+            // Get the <name> part of __rplwrap_<name>
+            std::string wrapNameBase = wrapName.substr(rplwrap_prefix.size());
+
+            if (wrapNameBase == symName) {
+               // both __rplwrap_<name> and <name> exist, we can just swap their
+               // name pointers
+#ifdef DEBUG
+               fmt::print("DEBUG: renameRplWrap: {} <-> {}\n",
+                  wrapName, symName);
+#endif //DEBUG
+               std::swap((*rplWrap)->name, symbols[i].name);
+               // We're done, remove symbol from foundRplWraps
+               rplWrap = foundRplWraps.erase(rplWrap);
+            // Otherwise, increment the iterator and go around again
+            } else ++rplWrap;
+         }
+      }
+
+      // Final pass: rename any remaining (non-conflicting) __rplwrap_<name>
+      // symbols to <name>
+
+      for (auto symbol : foundRplWraps) {
+#ifdef DEBUG
+         fmt::print("DEBUG: renameRplWrap: {} -> {}\n",
+            std::string(&strtabd[symbol->name]),
+            std::string(&strtabd[symbol->name] + rplwrap_prefix.length()));
+#endif //DEBUG
+         symbol->name += rplwrap_prefix.length();
+      }
+   }
+
+   return true;
+}
 
 /**
  * Fix the loader virtual addresses.
@@ -795,6 +889,11 @@ int main(int argc, char **argv)
       return -1;
    }
 
+   if (!renameRplWrap(elf)) {
+       fmt::print("ERROR: renameRplWrap failed.\n");
+       return -1;
+   }
+
    if (!fixLoaderVirtualAddresses(elf)) {
       fmt::print("ERROR: fixLoaderVirtualAddresses failed.\n");
       return -1;

diff --git a/src/rplimportgen/rplimportgen.cpp b/src/rplimportgen/rplimportgen.cpp
@@ -1,4 +1,5 @@
 #include "utils.h"
+#include "rplwrap.h"
 
 #include <array>
 #include <algorithm>
@@ -17,7 +18,9 @@ enum class ReadMode
 {
    INVALID,
    TEXT,
-   DATA
+   TEXT_WRAP,
+   DATA,
+   DATA_WRAP,
 };
 
 void
@@ -26,34 +29,6 @@ writeExports(std::ofstream &out,
              bool isData,
              const std::vector<std::string> &exports)
 {
-   // Align module name up to 8 bytes
-   auto moduleNameSize = (moduleName.length() + 1 + 7) & ~7;
-
-   // Calculate the data block size
-   auto exportSecSize = exports.size() * 8;
-
-   if (exportSecSize < moduleNameSize) {
-      exportSecSize = moduleNameSize;
-   }
-
-   // Calculate export hash
-   uint32_t exportsHash = crc32(0, Z_NULL, 0);
-
-   for (auto &exp : exports) {
-      exportsHash = crc32(exportsHash, reinterpret_cast<const Bytef *>(exp.data()), exp.size() + 1);
-   }
-
-   std::array<Bytef, 0xE> extraHashBytes;
-   extraHashBytes.fill(0);
-   exportsHash = crc32(exportsHash, extraHashBytes.data(), extraHashBytes.size());
-
-   // Setup section data
-   std::vector<uint32_t> secData;
-   secData.resize(exportSecSize / 4, 0);
-   memcpy(secData.data(), moduleName.c_str(), moduleName.length());
-
-   out << std::endl;
-
    if (isData) {
       out << ".section .dimport_" << moduleName << ", \"a\", @0x80000002" << std::endl;
    } else {
@@ -63,21 +38,43 @@ writeExports(std::ofstream &out,
    out << ".align 4" << std::endl;
    out << std::endl;
 
-   out << ".long " << exports.size() << std::endl;
-   out << ".long 0x" << std::hex << exportsHash << std::endl;
+   // Usually the symbol count, but isn't checked on hardware.
+   // Spoofed to allow ld to garbage-collect later.
+   out << ".long 1" << std::endl;
+   // Supposed to be a crc32 of the imports. Again, not actually checked.
+   out << ".long 0x00000000" << std::endl;
+   out << std::endl;
+
+   // Align module name up to 8 bytes
+   auto moduleNameSize = (moduleName.length() + 1 + 7) & ~7;
+
+   // Setup name data
+   std::vector<uint32_t> secData;
+   secData.resize(moduleNameSize / 4, 0);
+   memcpy(secData.data(), moduleName.c_str(), moduleName.length());
+
+   // Add name data
+   for (uint32_t data : secData) {
+      out << ".long 0x" << std::hex << byte_swap(data) << std::endl;
+   }
    out << std::endl;
 
    const char *type = isData ? "@object" : "@function";
 
-   for (auto i = 0; i < exportSecSize / 8; ++i) {
+   for (auto i = 0; i < exports.size(); ++i) {
       if (i < exports.size()) {
+         // Basically do -ffunction-sections
+         if (isData) {
+            out << ".section .dimport_" << moduleName << "." << exports[i] << ", \"a\", @0x80000002" << std::endl;
+         } else {
+            out << ".section .fimport_" << moduleName << "." << exports[i] << ", \"ax\", @0x80000002" << std::endl;
+         }
          out << ".global " << exports[i] << std::endl;
          out << ".type " << exports[i] << ", " << type << std::endl;
          out << exports[i] << ":" << std::endl;
       }
-
-      out << ".long 0x" << std::hex << byte_swap(secData[i * 2 + 0]) << std::endl;
-      out << ".long 0x" << std::hex << byte_swap(secData[i * 2 + 1]) << std::endl;
+      out << ".long 0x0" << std::endl;
+      out << ".long 0x0" << std::endl;
       out << std::endl;
    }
 }
@@ -122,8 +119,12 @@ int main(int argc, char **argv)
          if (line[0] == ':') {
             if (line.substr(1) == "TEXT") {
                readMode = ReadMode::TEXT;
+            } else if (line.substr(1) == "TEXT_WRAP") {
+               readMode = ReadMode::TEXT_WRAP;
             } else if (line.substr(1) == "DATA") {
                readMode = ReadMode::DATA;
+            } else if (line.substr(1) == "DATA_WRAP") {
+               readMode = ReadMode::DATA_WRAP;
             } else if (line.substr(1, 4) == "NAME") {
                moduleName = line.substr(6);
             } else {
@@ -135,8 +136,12 @@ int main(int argc, char **argv)
 
          if (readMode == ReadMode::TEXT) {
             funcExports.push_back(line);
+         } else if (readMode == ReadMode::TEXT_WRAP) {
+            funcExports.push_back(std::string(RPLWRAP_PREFIX) + line);
          } else if (readMode == ReadMode::DATA) {
             dataExports.push_back(line);
+         } else if (readMode == ReadMode::DATA_WRAP) {
+            dataExports.push_back(std::string(RPLWRAP_PREFIX) + line);
          } else {
             std::cout << "Unexpected section data" << std::endl;
             return -1;
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		#pragma once

		#define RPLWRAP_PREFIX "__rplwrap_"