Skip to content

Commit

Permalink
(Binary Analysis) New simple memory image format
Browse files Browse the repository at this point in the history
* Created a new memory image format by extending ROSE's "vxcore"
  format to version 2. The new format uses a simple yet extensible,
  all binary, naturally aligned, fixed-endian, message header that
  contains only what's needed to restore memory to its original
  contents. It is intended to be easily produced on small devices.

Issue #216
  • Loading branch information
matzke1 authored and rosecompiler committed Aug 1, 2024
1 parent a69e1a9 commit f40926f
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 54 deletions.
10 changes: 5 additions & 5 deletions src/Rose/BinaryAnalysis/Partitioner2/EngineBinary.C
Original file line number Diff line number Diff line change
Expand Up @@ -949,11 +949,11 @@ EngineBinary::specimenNameDocumentation() {

"@bullet{If the name begins with the string \"vxcore:\" then it is treated as a special VxWorks core dump "
"in a format defined by ROSE. The complete specification has the syntax \"vxcore:[@v{memory_attributes}]"
":[@v{file_attributes}]:@v{file_name}\". The parts in square brackets are optional. The only memory attribute "
"recognized at this time is an equal sign (\"=\") followed by zero of more of the letters \"r\" (read), "
"\"w\" (write), and \"x\" (execute) to specify the mapping permissions. The default mapping permission if "
"no equal sign is specified is read, write, and execute. The only file attribute recognized at this time is "
"\"version=@v{v}\" where @v{v} is a version number, and ROSE currently supports only version 1.}"
":[@v{file_attributes}]:@v{file_name}\". The parts in square brackets are optional. The only file attribute "
"recognized at this time is \"version=@v{v}\" where @v{v} is a version number which must be 1 or 2, defaulting "
"to 1. For version 1, the only memory attribute is an equal sign (\"=\") followed by zero of more of the letters "
"\"r\" (read), \"w\" (write), and \"x\" (execute) to specify the mapping permissions, defaulting to read, write, and "
"execute. Version 2 has no memory attributes.}"

"@bullet{If the name begins with the string \"meta:\" then it adjusts meta information about the memory "
"map, such as permissions. " + MemoryMap::adjustMapDocumentation() + "}"
Expand Down
127 changes: 99 additions & 28 deletions src/frontend/BinaryFormats/BinaryVxcoreParser.C
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,13 @@ VxcoreParser::parseUrl(const std::string &spec) {
throw Exception("URL", 0, "invalid file attribute for vxcore URL: \"" + StringUtility::cEscape(parts[1]) + "\"");
}
}
if (settings_.version != 1)
if (settings_.version != 1 && settings_.version != 2)
throw Exception("URL", 0, "vxcore version " + boost::lexical_cast<std::string>(settings_.version) + " is not supported");

// Error checking
if (2 == settings_.version && settings_.protOverride)
throw Exception("URL", 0, "vxcore version 2 does not support memory protection override (\"=\" attribute)");

return parts[2];
}

Expand All @@ -97,18 +101,64 @@ VxcoreParser::parse(const boost::filesystem::path &fileName, const MemoryMap::Pt
void
VxcoreParser::parse(std::istream &input, const MemoryMap::Ptr &memory, const BaseSemantics::RegisterState::Ptr &registers,
const BaseSemantics::RiscOperators::Ptr &ops, const std::string &inputName) {
for (size_t segmentIdx = 0; input; ++segmentIdx) {
size_t headerOffset = input.tellg();
std::string header = rose_getline(input);
if (header.empty())
break; // EOF

std::string name = inputName + " segment #" + boost::lexical_cast<std::string>(segmentIdx);
if (!parseMemory(header, input, memory, name, headerOffset) &&
!parseRegisters(header, input, registers, ops, name, headerOffset)) {
throw Exception(inputName, input.tellg(), "invalid header: \"" + StringUtility::cEscape(header.substr(0, 30)) + "\"" +
(header.size() > 30 ? "..." : ""));
Sawyer::Message::Stream debug(mlog[DEBUG]);
if (1 == settings_.version) {
for (size_t segmentIdx = 0; input; ++segmentIdx) {
size_t headerOffset = input.tellg();
std::string header = rose_getline(input);
if (header.empty())
break; // EOF

std::string name = inputName + " segment #" + boost::lexical_cast<std::string>(segmentIdx);
if (!parseMemory(header, input, memory, name, headerOffset) &&
!parseRegisters(header, input, registers, ops, name, headerOffset)) {
throw Exception(inputName, input.tellg(), "invalid header: \"" + StringUtility::cEscape(header.substr(0, 30)) + "\"" +
(header.size() > 30 ? "..." : ""));
}
}
} else if (2 == settings_.version) {
while (true) {
// Read the message header
HeaderVersion2 header;
const size_t headerOffset = input.tellg();
input.read((char*)&header, sizeof header);
const size_t nHeader = input.gcount();
header.payloadSize = BitOps::fromLittleEndian(header.payloadSize);
header.addr = BitOps::fromLittleEndian(header.addr);

if (0 == nHeader) {
break;
} else if (nHeader != sizeof header) {
throw Exception(inputName, headerOffset,
(boost::format("short read (expected %1%, got only %2%) at %3%")
% sizeof(header) % nHeader % headerOffset).str());
} else if (2 != header.version) {
throw Exception(inputName, headerOffset,
(boost::format("invalid message version (expected %1%, got %2%) at %3%")
% settings_.version % header.version % headerOffset).str());
} else if (header.unused0 || header.unused1) {
throw Exception(inputName, headerOffset, (boost::format("unused fields must be zero at %1%") % headerOffset).str());
} else if (header.mapFlags & ~MemoryMap::READ_WRITE_EXECUTE) {
throw Exception(inputName, headerOffset, (boost::format("invalid map flags at %1%") % headerOffset).str());
} else if (header.payloadSize > 0) {
std::vector<uint8_t> buf(header.payloadSize);
input.read((char*)buf.data(), header.payloadSize);
const size_t nPayload = input.gcount();
if (nPayload != header.payloadSize) {
throw Exception(inputName, headerOffset,
(boost::format("short payload read (expected %1%, got only %2%) at %3%)")
% header.payloadSize % nPayload % (headerOffset + sizeof header)).str());
} else if (memory) {
const auto where = AddressInterval::baseSize(header.addr, header.payloadSize);
SAWYER_MESG(debug) <<"vxcore: addresses " <<StringUtility::addrToString(where) <<" at " <<headerOffset <<"\n";
memory->insert(where, MemoryMap::Segment::anonymousInstance(header.payloadSize, header.mapFlags, inputName));
const size_t nCopied = memory->at(header.addr).limit(header.payloadSize).write(buf.data()).size();
ASSERT_always_require(nCopied == header.payloadSize);
}
}
}
} else {
ASSERT_not_implemented("vxcore version " + boost::lexical_cast<std::string>(settings_.version));
}
}

Expand Down Expand Up @@ -222,19 +272,34 @@ VxcoreParser::unparse(std::ostream &out, const MemoryMap::Ptr &memory, const Add
const std::string &outputName) {
if (memory && !memoryLimit.isEmpty()) {
rose_addr_t va = memoryLimit.least();
while (const AddressInterval selected = memory->atOrAfter(va).singleSegment().available() & memoryLimit) {
const size_t maxPayload = 0xffffffff;
while (const AddressInterval selected = memory->atOrAfter(va).limit(maxPayload).singleSegment().available() & memoryLimit) {
MemoryMap::ConstNodeIterator inode = memory->at(selected.least()).nodes().begin();
ASSERT_forbid(inode == memory->nodes().end()); // because of the while loop's condition
ASSERT_require(inode->key().contains(selected));
const MemoryMap::Segment &segment = inode->value();

// Header
out <<StringUtility::addrToString(selected.least()).substr(2)
<<" " <<StringUtility::addrToString(selected.size()).substr(2)
<<" =" <<(0 != (segment.accessibility() & MemoryMap::READABLE) ? "R" : "-")
<<(0 != (segment.accessibility() & MemoryMap::WRITABLE) ? "W" : "-")
<<(0 != (segment.accessibility() & MemoryMap::EXECUTABLE) ? "X" : "-")
<<"\n";
if (1 == settings_.version) {
out <<StringUtility::addrToString(selected.least()).substr(2)
<<" " <<StringUtility::addrToString(selected.size()).substr(2)
<<" =" <<(0 != (segment.accessibility() & MemoryMap::READABLE) ? "R" : "-")
<<(0 != (segment.accessibility() & MemoryMap::WRITABLE) ? "W" : "-")
<<(0 != (segment.accessibility() & MemoryMap::EXECUTABLE) ? "X" : "-")
<<"\n";
} else if (2 == settings_.version) {
HeaderVersion2 header;
memset(&header, 0, sizeof header);
header.version = settings_.version;
header.mapFlags = segment.accessibility() & MemoryMap::READ_WRITE_EXECUTE;
header.payloadSize = BitOps::toLittleEndian(boost::numeric_cast<uint32_t>(selected.size()));
header.addr = BitOps::toLittleEndian(boost::numeric_cast<uint64_t>(selected.least()));
out.write((const char*)&header, sizeof header);
if (!out.good())
throw Exception(outputName, out.tellp(), "write failed");
} else {
ASSERT_not_implemented("vxcore version " + boost::lexical_cast<std::string>(settings_.version));
}

// Data output one buffer-full at a time since the memory map' underlying buffer might not be storing the bytes
// contiguously, but we need contiguous bytes for std::ostream::write.
Expand Down Expand Up @@ -265,16 +330,22 @@ VxcoreParser::unparse(std::ostream &out, const MemoryMap::Ptr &memory, const Add
}

if (registers) {
ASSERT_not_null(ops);
out <<"registers " <<registers->registerDictionary()->name() <<"\n";
RegisterDictionary::RegisterDescriptors regs = registers->registerDictionary()->getLargestRegisters();
RegisterNames registerName(registers->registerDictionary());
BOOST_FOREACH (RegisterDescriptor reg, regs) {
BaseSemantics::SValue::Ptr val = registers->peekRegister(reg, ops->undefined_(reg.nBits()), ops.get());
if (auto number = val->toUnsigned())
out <<(boost::format("%s 0x%x\n") % registerName(reg) % *number);
if (1 == settings_.version) {
ASSERT_not_null(ops);
out <<"registers " <<registers->registerDictionary()->name() <<"\n";
RegisterDictionary::RegisterDescriptors regs = registers->registerDictionary()->getLargestRegisters();
RegisterNames registerName(registers->registerDictionary());
BOOST_FOREACH (RegisterDescriptor reg, regs) {
BaseSemantics::SValue::Ptr val = registers->peekRegister(reg, ops->undefined_(reg.nBits()), ops.get());
if (auto number = val->toUnsigned())
out <<(boost::format("%s 0x%x\n") % registerName(reg) % *number);
}
out <<"end\n";
} else if (2 == settings_.version) {
// Registers are not stored for version 2
} else {
ASSERT_not_implemented("vxcore version " + boost::lexical_cast<std::string>(settings_.version));
}
out <<"end\n";
}
}

Expand Down
22 changes: 21 additions & 1 deletion src/frontend/BinaryFormats/BinaryVxcoreParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,16 @@ namespace BinaryAnalysis {
*
* A register record (at most one per file) begins with the word "registers", a space, and the instruction set architecture name
* recognized by ROSE. Following the header is one line per register, each line being a register name recognized by ROSE, a colon,
* optional horizontal white space, and a hexadecimal value, this time with a leading "0x". */
* optional horizontal white space, and a hexadecimal value, this time with a leading "0x".
*
* This format version was designed by Jim Leek.
*
* @seciton vxcore_v2 Version 2
*
* Version 2 of this format is a sequence of messages consisting of a binary header followed by a binary payload. Each header
* contains naturally aligned fields: a one byte version number having the value 2; two bytes not currently used for any purpose;
* one byte containing the memory access permission bits (see @ref MemoryMap); a four-byte little-endian payload size in bytes; an
* eight-byte little-endian starting memory address. */
class VxcoreParser {
public:
/** Settings that control the parser and unparser. */
Expand Down Expand Up @@ -77,6 +86,17 @@ class VxcoreParser {
}
};

private:
// Message headers for version 2.
struct HeaderVersion2 {
uint8_t version; // must be 2
uint8_t unused0;
uint8_t unused1;
uint8_t mapFlags; // MemoryMap::{READABLE,WRITABLE,EXECUTABLE}
uint32_t payloadSize; // little-endian
uint64_t addr; // little-endian
};

private:
Settings settings_;
std::string isaName_; // Parsed instruction set architecture name
Expand Down
Loading

0 comments on commit f40926f

Please sign in to comment.