From 03e33a15a6e6d3dafedd77b507955bd217a3ffaa Mon Sep 17 00:00:00 2001 From: Viktor Kopp Date: Sat, 16 May 2020 16:19:44 +0200 Subject: [PATCH] Remove extension symbols count limit (#171) - For files with no extension basename is returned as extension - In file types table extensions sorted by files count in group --- .travis.yml | 2 +- README.md | 3 --- analysis/gitrevision.py | 6 ++---- tools/__init__.py | 18 ++++++++++++------ tools/configuration.py | 1 - tools/tests/__init__.py | 0 tools/tests/test_tools.py | 13 +++++++++++++ 7 files changed, 28 insertions(+), 15 deletions(-) create mode 100644 tools/tests/__init__.py create mode 100644 tools/tests/test_tools.py diff --git a/.travis.yml b/.travis.yml index ded23c5..1680a1b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,4 +8,4 @@ install: virtualenv: system_site_packages: true script: - - python3 -m unittest \ No newline at end of file + - python3 -m unittest -v \ No newline at end of file diff --git a/README.md b/README.md index 004d9ec..3683755 100644 --- a/README.md +++ b/README.md @@ -94,7 +94,6 @@ Configuration file might contain following fields (all are optional): ```json { "max_domains": 10, - "max_ext_length": 10, "max_authors": 7, "max_plot_authors_count": 10, "max_authors_of_months": 6, @@ -113,8 +112,6 @@ of contributors and activity levels, to avoid showing too much or too little information. * `max_domains`: number of e-mail domains to show in author stats -* `max_ext_length`: max symbols count after `.` in a filename to -consider substring as a file extension * `max_authors`: number of authors in the "top authors" table (other authors are listed without detailed stats) * `max_plot_authors_count`: number of authors to include in plots diff --git a/analysis/gitrevision.py b/analysis/gitrevision.py index 7f6a8b2..e4ebf74 100644 --- a/analysis/gitrevision.py +++ b/analysis/gitrevision.py @@ -1,5 +1,3 @@ -import functools - from tools import get_file_extension from .gitdata import RevisionData, FilesData @@ -26,8 +24,8 @@ def size(self): @property def files_extensions_summary(self): df = self.files_df[["size_bytes", "lines_count"]] - df["extension"] = self.files_df['file'].apply(functools.partial(get_file_extension, max_ext_length=6)) + df["extension"] = self.files_df['file'].apply(get_file_extension) df = df.groupby(by="extension").agg({"size_bytes": ["sum"], "lines_count": ["sum", "count"]}) df.columns = ["size_bytes", "lines_count", "files_count"] df.reset_index() - return df + return df.sort_values(by="files_count", ascending=False) diff --git a/tools/__init__.py b/tools/__init__.py index 99302eb..f0a8329 100644 --- a/tools/__init__.py +++ b/tools/__init__.py @@ -12,10 +12,16 @@ def split_email_address(email_address): return parts[0], parts[1] -def get_file_extension(git_file_path, max_ext_length=5): - filename = os.path.basename(git_file_path) +def get_file_extension(filepath: str): + assert filepath + filename = os.path.basename(filepath) basename_parts = filename.split('.') - ext = basename_parts[1] if len(basename_parts) == 2 and basename_parts[0] else '' - if len(ext) > max_ext_length: - ext = '' - return ext + if len(basename_parts) == 1: + # 'folder/filename'-case + return filename + elif len(basename_parts) == 2 and not basename_parts[0]: + # 'folder/.filename'-case + return filename + else: + # "normal" case + return basename_parts[-1] diff --git a/tools/configuration.py b/tools/configuration.py index ecb6d2f..36c5062 100644 --- a/tools/configuration.py +++ b/tools/configuration.py @@ -92,7 +92,6 @@ def __init__(self, args_orig, **kwargs): def _set_default_configuration(self): self.update({ "max_domains": 10, - "max_ext_length": 10, "max_authors": 20, "max_plot_authors_count": 10, "max_authors_of_months": 6, diff --git a/tools/tests/__init__.py b/tools/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tools/tests/test_tools.py b/tools/tests/test_tools.py new file mode 100644 index 0000000..3756f92 --- /dev/null +++ b/tools/tests/test_tools.py @@ -0,0 +1,13 @@ +import unittest + +import tools + + +class TestTools(unittest.TestCase): + + def test_get_file_extension(self): + self.assertEqual('extension', tools.get_file_extension("folder/filename.extension")) + self.assertEqual('.extension', tools.get_file_extension("folder/.extension")) + self.assertEqual('extension', tools.get_file_extension("folder/filename.suffix.extension")) + self.assertEqual('FILENAME', tools.get_file_extension("folder/FILENAME")) + self.assertEqual('extension', tools.get_file_extension("folder/.filename.suffix.extension"))