Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial support for AIX big archive format. #3

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
261 changes: 255 additions & 6 deletions arpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
random access through seek and tell functions is supported on the archived files
"""

import struct

HEADER_BSD = 1
HEADER_GNU = 2
HEADER_GNU_TABLE = 3
Expand All @@ -76,8 +78,6 @@ class ArchiveFileHeader(object):

def __init__(self, header, offset):
""" Creates a new header from binary data starting at a specified offset """
import struct

name, timestamp, uid, gid, mode, size, magic = struct.unpack(
"16s 12s 6s 6s 8s 10s 2s", header)
if magic != b"\x60\x0a":
Expand Down Expand Up @@ -214,7 +214,7 @@ def _seek(self, offset):
# reached EOF before target offset
return

def __read_file_header(self, offset):
def _read_file_header(self, offset):
""" Reads and returns a single new file header """
self._seek(offset)

Expand All @@ -234,7 +234,7 @@ def __read_file_header(self, offset):

if offset == self.next_header_offset:
new_offset = file_header.file_offset + file_header.size
self.next_header_offset = Archive.__pad2(new_offset)
self.next_header_offset = Archive._pad2(new_offset)

return file_header

Expand Down Expand Up @@ -285,7 +285,7 @@ def __fix_name(self, header):
return 0

@staticmethod
def __pad2(num):
def _pad2(num):
""" Returns a 2-aligned offset """
if num % 2 == 0:
return num
Expand All @@ -302,7 +302,7 @@ def read_next_header(self):
"""
Reads a single new header, returning a its representation, or None at the end of file
"""
header = self.__read_file_header(self.next_header_offset)
header = self._read_file_header(self.next_header_offset)
if header is not None:
self.headers.append(header)
if header.type in (HEADER_BSD, HEADER_NORMAL, HEADER_GNU):
Expand Down Expand Up @@ -330,3 +330,252 @@ def read_all_headers(self):
def close(self):
""" Closes the archive file descriptor """
self.file.close()


class AIXBigArchive(Archive):
"""
Combines several files into one.

This is the default ar library archive format for the AIX operating system.

This file format accommodates both 32-bit and 64-bit object files within
the same archive.

"""

def __init__(self, filename=None, fileobj=None):
self.headers = []
self.file = fileobj or open(filename, "rb")
self._detect_seekable()

self.position = 0
self.archived_files = {}

self.global_header = AIXBigGlobalHeader(
self._read(AIXBigGlobalHeader.LENGTH))

self.next_header_offset = self.global_header.first_member

def _read_file_header(self, offset):
"""
Reads and returns a single new file header.

Also updates next header pointer when this is call as part of an
iteration.

"""
# We are already at the last member.
if offset == 0:
return None

self._seek(offset)

header_content = self._read(AIXBigFileHeader.MINIMUM_LENGTH)

if len(header_content) == 0:
return None

file_header = AIXBigFileHeader(header_content, offset)

content = self._read(file_header.remaining_header_length)
file_header.updateRemainingHeader(content)

# If we are in the process of iterating file members,
# update the next header.
if offset == self.next_header_offset:

# If we are last in the list, set to 0.
if offset == self.global_header.last_member:
self.next_header_offset = 0
else:
self.next_header_offset = file_header.next_member

return file_header


class AIXBigGlobalHeader(object):
"""
Each archive begins with a fixed-length header that contains offsets to
special archive file members. The fixed-length header also contains the
magic number, which identifies the archive file.

The fixed-length header has the following format:

#define __AR_BIG__
#define AIAMAGBIG "<bigaf>\n" /* Magic string */
#define SAIAMAG 8 /*Length of magic string */
struct fl_hdr /*Fixed-length header */

{
char fl_magic[SAIAMAG]; /* Archive magic string */
/* Offset to member table -> members */
char fl_memoff[20];
/* Offset to global symbol table -> global_symbol */
char fl_gstoff[20];
/* Offset global symbol table for 64-bit objects -> global_symbol_64 */
char fl_gst64off[20];
/* Offset to first archive member -> first_member */
char fl_fstmoff[20];
/* Offset to last archive member -> last_member */
char fl_lstmoff[20];
/* Offset to first mem on free list -> first_free_member */
char fl_freeoff[20];
}

Archive magic string is already parsed, so header is passed without the
magic string.

"""

LENGTH = 128
AIAMAGBIG = '<bigaf>\n'
SAIAMAG = 8

def __init__(self, content):
self._content = content
self._checkValidType()
header = ()
try:
header = struct.unpack("8s 20s 20s 20s 20s 20s 20s", content)
except struct.error:
raise ArchiveFormatError("bad format for global header")

(
magic,
self.members,
self.global_symbol,
self.global_symbol_64,
self.first_member,
self.last_member,
self.first_free_member,
) = header
self.members = int(self.members)
self.global_symbol = int(self.global_symbol)
self.global_symbol_64 = int(self.global_symbol_64)
self.first_member = int(self.first_member)
self.last_member = int(self.last_member)
self.first_free_member = int(self.first_free_member)

def _checkValidType(self):
"""Raise an error if archive had bad type."""
if len(self._content) < self.LENGTH:
raise ArchiveFormatError("file to short for AIX big format")

if self._content[:8] != AIXBigGlobalHeader.AIAMAGBIG:
raise ArchiveFormatError("this is not an AIX big format archive")


class AIXBigFileHeader(object):
"""
Each archive file member is preceded by a file member header,
which contains the following information about the file member:

#define AIAFMAG "`\n" /* Header trailer string*/
struct ar_hdr /* File member header*/
{
/* File member size - decimal -> size */
char ar_size[20];
/* Next member offset-decimal -> next_member */
char ar_nxtmem[20];
/* Previous member offset-dec -> previous_member */
char ar_prvmem[20];
/* File member date-decimal -> timestamp*/
char ar_date[12];
/* File member userid-decimal -> uid */
char ar_uid[12];
/* File member group id-decimal -> gid */
char ar_gid[12];
/* File member mode-octal -> mode */
char ar_mode[12];
/* File member name length-dec -> filename_length */
char ar_namlen[4];
union
{
char ar_name[2]; /* Start of member name */
char ar_fmag[2]; /* AIAFMAG - string to end */
};
_ar_name; /* Header and member name */
};

The member header provides support for member names up to 255 characters
long. The ar_namlen field contains the length of the member name.
The character string containing the member name begins at the _ar_name
field. The AIAFMAG string is cosmetic only.

Each archive member header begins on an even-byte boundary. The total
length of a member header is:

sizeof (struct ar_hdr) + ar_namlen
The actual data for a file member begins at the first even-byte boundary
beyond the member header and continues for the number of bytes specified
by the ar_size field. The ar command inserts null bytes for padding
where necessary.

All information in the fixed-length header and archive members is in
printable ASCII format. Numeric information, with the exception of
the ar_mode field, is stored as decimal numbers;
the ar_mode field is stored in octal format. Thus, if the archive file
contains only printable files, you can print the archive.

"""
AIAFMAG = '`\n'

MINIMUM_LENGTH = 112
type = HEADER_NORMAL

def __init__(self, content, offset):
if len(content) < AIXBigFileHeader.MINIMUM_LENGTH:
raise ArchiveFormatError('file header too short')

header = ()
try:
header = struct.unpack("20s 20s 20s 12s 12s 12s 12s 4s", content)
except struct.error:
raise ArchiveFormatError("bad format for file header")

(
self.size,
self.next_member,
self.previous_member,
self.timestamp,
self.uid,
self.gid,
self.mode,
self.filename_length,
) = header

self.size = int(self.size)
self.next_member = int(self.next_member)
self.previous_member = int(self.previous_member)
self.timestamp = int(self.timestamp)
self.uid = int(self.uid)
self.gid = int(self.gid)
self.mode = int(self.mode, 8)
self.filename_length = int(self.filename_length)
self._header_offset = offset

@property
def remaining_header_length(self):
"""Length of filename content raw data."""
# actual_filename + ALIGN_PAD + HEADER_TRAILING_STRING
return Archive._pad2(self.filename_length + len(self.AIAFMAG))

@property
def relative_file_offset(self):
"""Offset to file content start, relative to header."""
return self.MINIMUM_LENGTH + self.remaining_header_length

@property
def file_offset(self):
"""Offset to file content start, absolute to file."""
return self._header_offset + self.relative_file_offset

def updateRemainingHeader(self, content):
"""Update header with the variable length content."""
if len(content) < self.remaining_header_length:
raise ArchiveFormatError('file header end too short')

if not content.endswith(self.AIAFMAG):
raise ArchiveFormatError("bad ending for file header")

self.name = content[:self.filename_length]
Loading