diff --git a/py7zlib.py b/py7zlib.py index 3dbbc10..00e84cf 100644 --- a/py7zlib.py +++ b/py7zlib.py @@ -149,6 +149,7 @@ def bytes(s, encoding): COMPRESSION_METHOD_BCJ_ARMT = unhexlify('03030701') # '\x03\x03\x07\x01' COMPRESSION_METHOD_BCJ_SPARC = unhexlify('03030805') # '\x03\x03\x08\x05' COMPRESSION_METHOD_BCJ2 = unhexlify('0303011B') # '\x03\x03\x01\x1B' +COMPRESSION_METHOD_PPMD = unhexlify('030401') # '\x03\x03\x01' FILE_ATTRIBUTE_DIRECTORY = 0x10 FILE_ATTRIBUTE_READONLY = 0x01 @@ -619,6 +620,7 @@ def __init__(self, info, start, src_start, folder, archive, maxsize=None): COMPRESSION_METHOD_BCJ_ARMT: '_read_bcj_armt', COMPRESSION_METHOD_BCJ_SPARC: '_read_bcj_sparc', COMPRESSION_METHOD_BCJ2: '_read_bcj2', + COMPRESSION_METHOD_PPMD: '_read_ppmd', } def _is_encrypted(self): @@ -861,6 +863,15 @@ def _read_bcj_sparc(self, coder, input, level, num_coders): data = pylzma.bcj_sparc_convert(input) return data[self._start:self._start+size] + def _read_ppmd(self, coder, input, level, num_coders): + size = self._uncompressed[level] + if not input: + self._file.seek(self._src_start) + input = self._file.read(self.compressed) + total_out = sum(self._unpacksizes) + data = pylzma.ppmd_decompress(input, coder['properties'], total_out) + return data[self._start:self._start+size] + def checkcrc(self): if self.digest is None: return True @@ -998,11 +1009,13 @@ def __init__(self, file, password=None): for coder in folder.coders: numinstreams = max(numinstreams, coder.get('numinstreams', 1)) info['_packsizes'] = packinfo.packsizes[instreamindex:instreamindex+numinstreams] + info['_unpacksizes'] = unpacksizes streamidx += 1 else: info['compressed'] = 0 info['_uncompressed'] = [0] info['_packsizes'] = [0] + info['_unpacksizes'] = [0] folder = None maxsize = 0 numinstreams = 1 diff --git a/setup.py b/setup.py index 6d99dc1..01014d0 100644 --- a/setup.py +++ b/setup.py @@ -164,6 +164,8 @@ def build_extension(self, ext): 'src/sdk/C/Lzma2Dec.c', 'src/sdk/C/Lzma2Enc.c', 'src/sdk/C/Sha256.c', + 'src/sdk/C/Ppmd7.c', + 'src/sdk/C/Ppmd7Dec.c', ) if ENABLE_COMPATIBILITY: c_files += ( diff --git a/src/pylzma/pylzma.c b/src/pylzma/pylzma.c index cc754a8..a3acae2 100644 --- a/src/pylzma/pylzma.c +++ b/src/pylzma/pylzma.c @@ -31,6 +31,7 @@ #include "../sdk/C/Bra.h" #include "../sdk/C/Bcj2.h" #include "../sdk/C/Delta.h" +#include "../sdk/C/Ppmd7.h" #include "pylzma.h" #include "pylzma_compress.h" @@ -45,6 +46,7 @@ #include "pylzma_decompress_compat.h" #include "pylzma_decompressobj_compat.h" #endif +#include "pylzma_streams.h" #if defined(WITH_THREAD) && !defined(PYLZMA_USE_GILSTATE) PyInterpreterState* _pylzma_interpreterState = NULL; @@ -350,6 +352,137 @@ pylzma_delta_encode(PyObject *self, PyObject *args) return result; } +const char +doc_ppmd_decompress[] = + "ppmd_decompress(data, properties, outsize) -- Decompress PPMd stream."; + +typedef struct +{ + IByteIn vt; + const Byte *cur; + const Byte *end; + const Byte *begin; + UInt64 processed; + BoolInt extra; + SRes res; + const ILookInStream *inStream; +} CByteInToLook; + +static Byte +ReadByte(const IByteIn *pp) { + CByteInToLook *p = CONTAINER_FROM_VTBL(pp, CByteInToLook, vt); + if (p->cur != p->end) { + return *p->cur++; + } + + if (p->res == SZ_OK) { + size_t size = p->cur - p->begin; + p->processed += size; + p->res = ILookInStream_Skip(p->inStream, size); + size = (1 << 25); + p->res = ILookInStream_Look(p->inStream, (const void **)&p->begin, &size); + p->cur = p->begin; + p->end = p->begin + size; + if (size != 0) { + return *p->cur++;; + } + } + p->extra = True; + return 0; +} + +static PyObject * +pylzma_ppmd_decompress(PyObject *self, PyObject *args) +{ + char *data; + PARSE_LENGTH_TYPE length; + char *props; + PARSE_LENGTH_TYPE propssize; + unsigned int outsize; + PyObject *result; + Byte *tmp; + unsigned order; + UInt32 memSize; + CPpmd7 ppmd; + CPpmd7z_RangeDec rc; + CByteInToLook s; + SRes res = SZ_OK; + CMemoryLookInStream stream; + + if (!PyArg_ParseTuple(args, "s#s#I", &data, &length, &props, &propssize, &outsize)) { + return NULL; + } + + if (propssize != 5) { + PyErr_Format(PyExc_TypeError, "properties must be exactly 5 bytes, got %ld", propssize); + return NULL; + } + + order = props[0]; + memSize = GetUi32(props + 1); + if (order < PPMD7_MIN_ORDER || + order > PPMD7_MAX_ORDER || + memSize < PPMD7_MIN_MEM_SIZE || + memSize > PPMD7_MAX_MEM_SIZE) { + PyErr_SetString(PyExc_TypeError, "unsupporter compression properties"); + return NULL; + } + + if (!outsize) { + return PyBytes_FromString(""); + } + + Ppmd7_Construct(&ppmd); + if (!Ppmd7_Alloc(&ppmd, memSize, &allocator)) { + return PyErr_NoMemory(); + } + Ppmd7_Init(&ppmd, order); + + result = PyBytes_FromStringAndSize(NULL, outsize); + if (!result) { + return NULL; + } + + CreateMemoryLookInStream(&stream, (Byte*) data, length); + tmp = (Byte *) PyBytes_AS_STRING(result); + Py_BEGIN_ALLOW_THREADS + Ppmd7z_RangeDec_CreateVTable(&rc); + s.vt.Read = ReadByte; + s.inStream = &stream.s; + s.begin = s.end = s.cur = NULL; + s.extra = False; + s.res = SZ_OK; + s.processed = 0; + rc.Stream = &s.vt; + if (!Ppmd7z_RangeDec_Init(&rc)) { + res = SZ_ERROR_DATA; + } else if (s.extra) { + res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA); + } else { + SizeT i; + for (i = 0; i < outsize; i++) { + int sym = Ppmd7_DecodeSymbol(&ppmd, &rc.vt); + if (s.extra || sym < 0) { + break; + } + tmp[i] = (Byte)sym; + } + if (i != outsize) { + res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA); + } else if (s.processed + (s.cur - s.begin) != length || !Ppmd7z_RangeDec_IsFinishedOK(&rc)) { + res = SZ_ERROR_DATA; + } + } + Py_END_ALLOW_THREADS + Ppmd7_Free(&ppmd, &allocator); + if (res != SZ_OK) { + Py_DECREF(result); + PyErr_SetString(PyExc_TypeError, "error during decompression"); + result = NULL; + } + return result; +} + PyMethodDef methods[] = { // exported functions @@ -372,6 +505,8 @@ methods[] = { // Delta {"delta_decode", (PyCFunction)pylzma_delta_decode, METH_VARARGS, (char *)&doc_delta_decode}, {"delta_encode", (PyCFunction)pylzma_delta_encode, METH_VARARGS, (char *)&doc_delta_encode}, + // PPMd + {"ppmd_decompress", (PyCFunction)pylzma_ppmd_decompress, METH_VARARGS, (char *)&doc_ppmd_decompress}, {NULL, NULL}, }; diff --git a/src/pylzma/pylzma_streams.c b/src/pylzma/pylzma_streams.c index 73bf322..6586c4a 100644 --- a/src/pylzma/pylzma_streams.c +++ b/src/pylzma/pylzma_streams.c @@ -177,3 +177,63 @@ MemoryOutStreamDiscard(CMemoryOutStream *stream, size_t size) stream->size -= size; } } + +static SRes +MemoryLookInStream_Read(const ILookInStream *p, void *buf, size_t *size) +{ + CMemoryLookInStream *self = (CMemoryLookInStream *) p; + size_t toread = *size; + if (toread > self->avail) { + toread = self->avail; + } + memcpy(buf, self->data, toread); + self->data += toread; + self->avail -= toread; + *size = toread; + return SZ_OK; +} + +static SRes +MemoryLookInStream_Look(const ILookInStream *p, const void **buf, size_t *size) +{ + CMemoryLookInStream *self = (CMemoryLookInStream *) p; + size_t toread = *size; + if (toread > self->avail) { + toread = self->avail; + } + *buf = self->data; + *size = toread; + return SZ_OK; +} + +static SRes +MemoryLookInStream_Skip(const ILookInStream *p, size_t offset) +{ + CMemoryLookInStream *self = (CMemoryLookInStream *) p; + size_t toread = offset; + if (toread > self->avail) { + toread = self->avail; + } + self->data += toread; + self->avail -= toread; + return SZ_OK; +} + +static SRes +MemoryLookInStream_Seek(const ILookInStream *p, Int64 *pos, ESzSeek origin) +{ + CMemoryLookInStream *self = (CMemoryLookInStream *) p; + printf("XXX\n"); + return SZ_ERROR_UNSUPPORTED; +} + +void +CreateMemoryLookInStream(CMemoryLookInStream *stream, Byte *data, size_t size) +{ + stream->s.Read = MemoryLookInStream_Read; + stream->s.Look = MemoryLookInStream_Look; + stream->s.Skip = MemoryLookInStream_Skip; + stream->s.Seek = MemoryLookInStream_Seek; + stream->data = data; + stream->avail = size; +} diff --git a/src/pylzma/pylzma_streams.h b/src/pylzma/pylzma_streams.h index 2e8b35d..e1de85f 100644 --- a/src/pylzma/pylzma_streams.h +++ b/src/pylzma/pylzma_streams.h @@ -64,4 +64,14 @@ typedef struct void CreateMemoryOutStream(CMemoryOutStream *stream); void MemoryOutStreamDiscard(CMemoryOutStream *stream, size_t size); +typedef struct +{ + ILookInStream s; + Byte *data; + size_t size; + size_t avail; +} CMemoryLookInStream; + +void CreateMemoryLookInStream(CMemoryLookInStream *stream, Byte *data, size_t size); + #endif diff --git a/tests/data/ppmd.7z b/tests/data/ppmd.7z new file mode 100644 index 0000000..54664a4 Binary files /dev/null and b/tests/data/ppmd.7z differ diff --git a/tests/test_7zfiles.py b/tests/test_7zfiles.py index 45177ed..2c1e99b 100644 --- a/tests/test_7zfiles.py +++ b/tests/test_7zfiles.py @@ -225,6 +225,10 @@ def test_lzma_bcj_sparc(self): # test loading of lzma compressed, filtered through bcj / SPARC self._test_archive('lzma_bcj_sparc.7z') + def test_ppmd(self): + # test loading of lzma compressed, filtered through bcj / SPARC + self._test_archive('ppmd.7z') + def test_regress_1(self): # prevent regression bug #1 reported by mail fp = self._open_file(os.path.join(ROOT, 'data', 'regress_1.7z'), 'rb')