Skip to content

Commit

Permalink
Add support for PPMd compression.
Browse files Browse the repository at this point in the history
  • Loading branch information
fancycode committed Apr 3, 2019
1 parent c453db0 commit ccb0e7c
Show file tree
Hide file tree
Showing 7 changed files with 224 additions and 0 deletions.
13 changes: 13 additions & 0 deletions py7zlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def bytes(s, encoding):
COMPRESSION_METHOD_BCJ_ARMT = unhexlify('03030701') # '\x03\x03\x07\x01'
COMPRESSION_METHOD_BCJ_SPARC = unhexlify('03030805') # '\x03\x03\x08\x05'
COMPRESSION_METHOD_BCJ2 = unhexlify('0303011B') # '\x03\x03\x01\x1B'
COMPRESSION_METHOD_PPMD = unhexlify('030401') # '\x03\x03\x01'

FILE_ATTRIBUTE_DIRECTORY = 0x10
FILE_ATTRIBUTE_READONLY = 0x01
Expand Down Expand Up @@ -619,6 +620,7 @@ def __init__(self, info, start, src_start, folder, archive, maxsize=None):
COMPRESSION_METHOD_BCJ_ARMT: '_read_bcj_armt',
COMPRESSION_METHOD_BCJ_SPARC: '_read_bcj_sparc',
COMPRESSION_METHOD_BCJ2: '_read_bcj2',
COMPRESSION_METHOD_PPMD: '_read_ppmd',
}

def _is_encrypted(self):
Expand Down Expand Up @@ -861,6 +863,15 @@ def _read_bcj_sparc(self, coder, input, level, num_coders):
data = pylzma.bcj_sparc_convert(input)
return data[self._start:self._start+size]

def _read_ppmd(self, coder, input, level, num_coders):
size = self._uncompressed[level]
if not input:
self._file.seek(self._src_start)
input = self._file.read(self.compressed)
total_out = sum(self._unpacksizes)
data = pylzma.ppmd_decompress(input, coder['properties'], total_out)
return data[self._start:self._start+size]

def checkcrc(self):
if self.digest is None:
return True
Expand Down Expand Up @@ -998,11 +1009,13 @@ def __init__(self, file, password=None):
for coder in folder.coders:
numinstreams = max(numinstreams, coder.get('numinstreams', 1))
info['_packsizes'] = packinfo.packsizes[instreamindex:instreamindex+numinstreams]
info['_unpacksizes'] = unpacksizes
streamidx += 1
else:
info['compressed'] = 0
info['_uncompressed'] = [0]
info['_packsizes'] = [0]
info['_unpacksizes'] = [0]
folder = None
maxsize = 0
numinstreams = 1
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ def build_extension(self, ext):
'src/sdk/C/Lzma2Dec.c',
'src/sdk/C/Lzma2Enc.c',
'src/sdk/C/Sha256.c',
'src/sdk/C/Ppmd7.c',
'src/sdk/C/Ppmd7Dec.c',
)
if ENABLE_COMPATIBILITY:
c_files += (
Expand Down
135 changes: 135 additions & 0 deletions src/pylzma/pylzma.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "../sdk/C/Bra.h"
#include "../sdk/C/Bcj2.h"
#include "../sdk/C/Delta.h"
#include "../sdk/C/Ppmd7.h"

#include "pylzma.h"
#include "pylzma_compress.h"
Expand All @@ -45,6 +46,7 @@
#include "pylzma_decompress_compat.h"
#include "pylzma_decompressobj_compat.h"
#endif
#include "pylzma_streams.h"

#if defined(WITH_THREAD) && !defined(PYLZMA_USE_GILSTATE)
PyInterpreterState* _pylzma_interpreterState = NULL;
Expand Down Expand Up @@ -350,6 +352,137 @@ pylzma_delta_encode(PyObject *self, PyObject *args)
return result;
}

const char
doc_ppmd_decompress[] =
"ppmd_decompress(data, properties, outsize) -- Decompress PPMd stream.";

typedef struct
{
IByteIn vt;
const Byte *cur;
const Byte *end;
const Byte *begin;
UInt64 processed;
BoolInt extra;
SRes res;
const ILookInStream *inStream;
} CByteInToLook;

static Byte
ReadByte(const IByteIn *pp) {
CByteInToLook *p = CONTAINER_FROM_VTBL(pp, CByteInToLook, vt);
if (p->cur != p->end) {
return *p->cur++;
}

if (p->res == SZ_OK) {
size_t size = p->cur - p->begin;
p->processed += size;
p->res = ILookInStream_Skip(p->inStream, size);
size = (1 << 25);
p->res = ILookInStream_Look(p->inStream, (const void **)&p->begin, &size);
p->cur = p->begin;
p->end = p->begin + size;
if (size != 0) {
return *p->cur++;;
}
}
p->extra = True;
return 0;
}

static PyObject *
pylzma_ppmd_decompress(PyObject *self, PyObject *args)
{
char *data;
PARSE_LENGTH_TYPE length;
char *props;
PARSE_LENGTH_TYPE propssize;
unsigned int outsize;
PyObject *result;
Byte *tmp;
unsigned order;
UInt32 memSize;
CPpmd7 ppmd;
CPpmd7z_RangeDec rc;
CByteInToLook s;
SRes res = SZ_OK;
CMemoryLookInStream stream;

if (!PyArg_ParseTuple(args, "s#s#I", &data, &length, &props, &propssize, &outsize)) {
return NULL;
}

if (propssize != 5) {
PyErr_Format(PyExc_TypeError, "properties must be exactly 5 bytes, got %ld", propssize);
return NULL;
}

order = props[0];
memSize = GetUi32(props + 1);
if (order < PPMD7_MIN_ORDER ||
order > PPMD7_MAX_ORDER ||
memSize < PPMD7_MIN_MEM_SIZE ||
memSize > PPMD7_MAX_MEM_SIZE) {
PyErr_SetString(PyExc_TypeError, "unsupporter compression properties");
return NULL;
}

if (!outsize) {
return PyBytes_FromString("");
}

Ppmd7_Construct(&ppmd);
if (!Ppmd7_Alloc(&ppmd, memSize, &allocator)) {
return PyErr_NoMemory();
}
Ppmd7_Init(&ppmd, order);

result = PyBytes_FromStringAndSize(NULL, outsize);
if (!result) {
return NULL;
}

CreateMemoryLookInStream(&stream, (Byte*) data, length);
tmp = (Byte *) PyBytes_AS_STRING(result);
Py_BEGIN_ALLOW_THREADS
Ppmd7z_RangeDec_CreateVTable(&rc);
s.vt.Read = ReadByte;
s.inStream = &stream.s;
s.begin = s.end = s.cur = NULL;
s.extra = False;
s.res = SZ_OK;
s.processed = 0;
rc.Stream = &s.vt;
if (!Ppmd7z_RangeDec_Init(&rc)) {
res = SZ_ERROR_DATA;
} else if (s.extra) {
res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
} else {
SizeT i;
for (i = 0; i < outsize; i++) {
int sym = Ppmd7_DecodeSymbol(&ppmd, &rc.vt);
if (s.extra || sym < 0) {
break;
}
tmp[i] = (Byte)sym;
}
if (i != outsize) {
res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
} else if (s.processed + (s.cur - s.begin) != length || !Ppmd7z_RangeDec_IsFinishedOK(&rc)) {
res = SZ_ERROR_DATA;
}
}
Py_END_ALLOW_THREADS
Ppmd7_Free(&ppmd, &allocator);
if (res != SZ_OK) {
Py_DECREF(result);
PyErr_SetString(PyExc_TypeError, "error during decompression");
result = NULL;
}
return result;
}

PyMethodDef
methods[] = {
// exported functions
Expand All @@ -372,6 +505,8 @@ methods[] = {
// Delta
{"delta_decode", (PyCFunction)pylzma_delta_decode, METH_VARARGS, (char *)&doc_delta_decode},
{"delta_encode", (PyCFunction)pylzma_delta_encode, METH_VARARGS, (char *)&doc_delta_encode},
// PPMd
{"ppmd_decompress", (PyCFunction)pylzma_ppmd_decompress, METH_VARARGS, (char *)&doc_ppmd_decompress},
{NULL, NULL},
};

Expand Down
60 changes: 60 additions & 0 deletions src/pylzma/pylzma_streams.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,63 @@ MemoryOutStreamDiscard(CMemoryOutStream *stream, size_t size)
stream->size -= size;
}
}

static SRes
MemoryLookInStream_Read(const ILookInStream *p, void *buf, size_t *size)
{
CMemoryLookInStream *self = (CMemoryLookInStream *) p;
size_t toread = *size;
if (toread > self->avail) {
toread = self->avail;
}
memcpy(buf, self->data, toread);
self->data += toread;
self->avail -= toread;
*size = toread;
return SZ_OK;
}

static SRes
MemoryLookInStream_Look(const ILookInStream *p, const void **buf, size_t *size)
{
CMemoryLookInStream *self = (CMemoryLookInStream *) p;
size_t toread = *size;
if (toread > self->avail) {
toread = self->avail;
}
*buf = self->data;
*size = toread;
return SZ_OK;
}

static SRes
MemoryLookInStream_Skip(const ILookInStream *p, size_t offset)
{
CMemoryLookInStream *self = (CMemoryLookInStream *) p;
size_t toread = offset;
if (toread > self->avail) {
toread = self->avail;
}
self->data += toread;
self->avail -= toread;
return SZ_OK;
}

static SRes
MemoryLookInStream_Seek(const ILookInStream *p, Int64 *pos, ESzSeek origin)
{
CMemoryLookInStream *self = (CMemoryLookInStream *) p;
printf("XXX\n");
return SZ_ERROR_UNSUPPORTED;
}

void
CreateMemoryLookInStream(CMemoryLookInStream *stream, Byte *data, size_t size)
{
stream->s.Read = MemoryLookInStream_Read;
stream->s.Look = MemoryLookInStream_Look;
stream->s.Skip = MemoryLookInStream_Skip;
stream->s.Seek = MemoryLookInStream_Seek;
stream->data = data;
stream->avail = size;
}
10 changes: 10 additions & 0 deletions src/pylzma/pylzma_streams.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,14 @@ typedef struct
void CreateMemoryOutStream(CMemoryOutStream *stream);
void MemoryOutStreamDiscard(CMemoryOutStream *stream, size_t size);

typedef struct
{
ILookInStream s;
Byte *data;
size_t size;
size_t avail;
} CMemoryLookInStream;

void CreateMemoryLookInStream(CMemoryLookInStream *stream, Byte *data, size_t size);

#endif
Binary file added tests/data/ppmd.7z
Binary file not shown.
4 changes: 4 additions & 0 deletions tests/test_7zfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,10 @@ def test_lzma_bcj_sparc(self):
# test loading of lzma compressed, filtered through bcj / SPARC
self._test_archive('lzma_bcj_sparc.7z')

def test_ppmd(self):
# test loading of lzma compressed, filtered through bcj / SPARC
self._test_archive('ppmd.7z')

def test_regress_1(self):
# prevent regression bug #1 reported by mail
fp = self._open_file(os.path.join(ROOT, 'data', 'regress_1.7z'), 'rb')
Expand Down

0 comments on commit ccb0e7c

Please sign in to comment.