Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix error while reading directory with shift_jis encoded name #124

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 49 additions & 33 deletions pycdlib/pycdlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,8 +705,8 @@ def _seek_to_extent(self, extent):
self._cdfp.seek(extent * self.logical_block_size)

@functools.lru_cache(maxsize=256)
def _find_iso_record(self, iso_path):
# type: (bytes) -> dr.DirectoryRecord
def _find_iso_record(self, iso_path, encoding='utf-8'):
# type: (bytes, str) -> dr.DirectoryRecord
"""
An internal method to find a directory record on the ISO given an ISO
path. If the entry is found, it returns the directory record object
Expand All @@ -718,11 +718,11 @@ def _find_iso_record(self, iso_path):
Returns:
The directory record entry representing the entry on the ISO.
"""
return _find_dr_record_by_name(self.pvd, iso_path, 'utf-8')
return _find_dr_record_by_name(self.pvd, iso_path, encoding)

@functools.lru_cache(maxsize=256)
def _find_rr_record(self, rr_path):
# type: (bytes) -> dr.DirectoryRecord
def _find_rr_record(self, rr_path, encoding='utf-8'):
# type: (bytes, str) -> dr.DirectoryRecord
"""
An internal method to find a directory record on the ISO given a Rock
Ridge path. If the entry is found, it returns the directory record
Expand All @@ -742,7 +742,7 @@ def _find_rr_record(self, rr_path):

splitpath = utils.split_path(rr_path)

currpath = splitpath.pop(0).decode('utf-8').encode('utf-8')
currpath = splitpath.pop(0).decode('utf-8').encode(encoding)

entry = root_dir_record

Expand Down Expand Up @@ -793,13 +793,13 @@ def _find_rr_record(self, rr_path):
if not child.is_dir():
break
entry = child
currpath = splitpath.pop(0).decode('utf-8').encode('utf-8')
currpath = splitpath.pop(0).decode('utf-8').encode(encoding)

raise pycdlibexception.PyCdlibInvalidInput('Could not find path')

@functools.lru_cache(maxsize=256)
def _find_joliet_record(self, joliet_path):
# type: (bytes) -> dr.DirectoryRecord
def _find_joliet_record(self, joliet_path, encoding='utf-16_be'):
# type: (bytes, str) -> dr.DirectoryRecord
"""
An internal method to find a directory record on the ISO given a Joliet
path. If the entry is found, it returns the directory record object
Expand All @@ -813,7 +813,7 @@ def _find_joliet_record(self, joliet_path):
"""
if self.joliet_vd is None:
raise pycdlibexception.PyCdlibInternalError('Joliet path requested on non-Joliet ISO')
return _find_dr_record_by_name(self.joliet_vd, joliet_path, 'utf-16_be')
return _find_dr_record_by_name(self.joliet_vd, joliet_path, encoding)

@functools.lru_cache(maxsize=256)
def _find_udf_record(self, udf_path):
Expand Down Expand Up @@ -2412,8 +2412,8 @@ def _udf_get_file_from_iso_fp(self, outfp, blocksize, udf_path):
utils.copy_data(data_len, blocksize, data_fp, outfp)

def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path,
joliet_path):
# type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes]) -> None
joliet_path, encoding=None):
# type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes], str) -> None
"""
An internal method to fetch a single file from the ISO and write it out
to the file object.
Expand All @@ -2433,13 +2433,16 @@ def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path,
if joliet_path is not None:
if self.joliet_vd is None:
raise pycdlibexception.PyCdlibInvalidInput('Cannot fetch a joliet_path from a non-Joliet ISO')
found_record = self._find_joliet_record(joliet_path)
encoding = encoding or 'utf-16_be'
found_record = self._find_joliet_record(joliet_path, encoding)
elif rr_path is not None:
if not self.rock_ridge:
raise pycdlibexception.PyCdlibInvalidInput('Cannot fetch a rr_path from a non-Rock Ridge ISO')
found_record = self._find_rr_record(rr_path)
encoding = encoding or 'utf-8'
found_record = self._find_rr_record(rr_path, encoding)
elif iso_path is not None:
found_record = self._find_iso_record(iso_path)
encoding = encoding or 'utf-8'
found_record = self._find_iso_record(iso_path, encoding)
else:
raise pycdlibexception.PyCdlibInternalError('Invalid path passed to get_file_from_iso_fp')

Expand Down Expand Up @@ -3471,8 +3474,8 @@ def _rm_joliet_dir(self, joliet_path):

return num_bytes_to_remove

def _get_iso_entry(self, iso_path):
# type: (bytes) -> dr.DirectoryRecord
def _get_iso_entry(self, iso_path, encoding='utf-8'):
# type: (bytes, str) -> dr.DirectoryRecord
"""
Internal method to get the directory record for an ISO path.

Expand All @@ -3484,10 +3487,10 @@ def _get_iso_entry(self, iso_path):
if self._needs_reshuffle:
self._reshuffle_extents()

return self._find_iso_record(iso_path)
return self._find_iso_record(iso_path, encoding)

def _get_rr_entry(self, rr_path):
# type: (bytes) -> dr.DirectoryRecord
def _get_rr_entry(self, rr_path, encoding='utf-8'):
# type: (bytes, str) -> dr.DirectoryRecord
"""
Internal method to get the directory record for a Rock Ridge path.

Expand All @@ -3500,10 +3503,10 @@ def _get_rr_entry(self, rr_path):
if self._needs_reshuffle:
self._reshuffle_extents()

return self._find_rr_record(rr_path)
return self._find_rr_record(rr_path, encoding)

def _get_joliet_entry(self, joliet_path):
# type: (bytes) -> dr.DirectoryRecord
def _get_joliet_entry(self, joliet_path, encoding='utf-16_be'):
# type: (bytes, str) -> dr.DirectoryRecord
"""
Internal method to get the directory record for a Joliet path.

Expand All @@ -3516,7 +3519,7 @@ def _get_joliet_entry(self, joliet_path):
if self._needs_reshuffle:
self._reshuffle_extents()

return self._find_joliet_record(joliet_path)
return self._find_joliet_record(joliet_path, encoding)

def _get_udf_entry(self, udf_path):
# type: (str) -> udfmod.UDFFileEntry
Expand Down Expand Up @@ -4172,6 +4175,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
with iso_path, rr_path, and udf_path).
udf_path - The absolute UDF path to lookup on the ISO (exclusive with
iso_path, rr_path, and joliet_path).
encoding - The encoding to use for parsing the filenames.
Returns:
Nothing.
"""
Expand All @@ -4183,6 +4187,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
iso_path = None
rr_path = None
udf_path = None
encoding = None
num_paths = 0
for key, value in kwargs.items():
if key == 'blocksize':
Expand Down Expand Up @@ -4213,6 +4218,8 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
num_paths += 1
elif value is not None:
raise pycdlibexception.PyCdlibInvalidInput('udf_path must be a string')
elif key == 'encoding':
encoding = value
else:
raise pycdlibexception.PyCdlibInvalidInput('Unknown keyword %s' % (key))

Expand All @@ -4223,7 +4230,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
self._udf_get_file_from_iso_fp(outfp, blocksize, udf_path)
else:
self._get_file_from_iso_fp(outfp, blocksize, iso_path, rr_path,
joliet_path)
joliet_path, encoding)

def get_and_write(self, iso_path, local_path, blocksize=8192):
# type: (str, str, int) -> None
Expand Down Expand Up @@ -5459,6 +5466,8 @@ def list_children(self, **kwargs):
if key in ('joliet_path', 'rr_path', 'iso_path', 'udf_path'):
if value is not None:
num_paths += 1
elif key in ('encoding'):
continue
else:
raise pycdlibexception.PyCdlibInvalidInput("Invalid keyword, must be one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'")

Expand All @@ -5476,12 +5485,15 @@ def list_children(self, **kwargs):
else:
use_rr = False
if 'joliet_path' in kwargs:
rec = self._get_joliet_entry(self._normalize_joliet_path(kwargs['joliet_path']))
kwargs['encoding'] = kwargs.get('encoding', None) or 'utf-16_be'
rec = self._get_joliet_entry(self._normalize_joliet_path(kwargs['joliet_path']), kwargs['encoding'])
elif 'rr_path' in kwargs:
rec = self._get_rr_entry(utils.normpath(kwargs['rr_path']))
kwargs['encoding'] = kwargs.get('encoding', None) or 'utf-8'
rec = self._get_rr_entry(utils.normpath(kwargs['rr_path']), kwargs['encoding'])
use_rr = True
else:
rec = self._get_iso_entry(utils.normpath(kwargs['iso_path']))
kwargs['encoding'] = kwargs.get('encoding', None) or 'utf-8'
rec = self._get_iso_entry(utils.normpath(kwargs['iso_path']), kwargs['encoding'])

for c in _yield_children(rec, use_rr):
yield c
Expand Down Expand Up @@ -5626,8 +5638,8 @@ def rm_isohybrid(self):

self.isohybrid_mbr = None

def full_path_from_dirrecord(self, rec, rockridge=False):
# type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool) -> str
def full_path_from_dirrecord(self, rec, rockridge=False, user_encoding=None):
# type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool, str) -> str
"""
Get the absolute path of a directory record.

Expand All @@ -5646,6 +5658,8 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
if self.joliet_vd is not None and id(rec.vd) == id(self.joliet_vd):
encoding = 'utf-16_be'

if user_encoding:
encoding = user_encoding
# A root entry has no Rock Ridge entry, even on a Rock Ridge ISO.
# Always return / here.
if rec.is_root:
Expand Down Expand Up @@ -5685,6 +5699,8 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
encoding = rec.file_ident.encoding
else:
encoding = 'utf-8'
if user_encoding:
encoding = user_encoding
udf_rec = rec # type: Optional[udfmod.UDFFileEntry]
while udf_rec is not None:
ident = udf_rec.file_identifier()
Expand Down Expand Up @@ -5893,13 +5909,13 @@ def walk(self, **kwargs):
while dirs:
dir_record = dirs.popleft()

relpath = self.full_path_from_dirrecord(dir_record,
rockridge=path_type == 'rr_path')
relpath = self.full_path_from_dirrecord(dir_record, rockridge=path_type == 'rr_path',
user_encoding=user_encoding)
dirlist = []
filelist = []
dirdict = {}

for child in reversed(list(self.list_children(**{path_type: relpath}))):
for child in reversed(list(self.list_children(**{path_type: relpath, 'encoding': kwargs.get('encoding', None)}))):
if child is None or child.is_dot() or child.is_dotdot():
continue

Expand Down