BUG skip zero length truth files more robustly (#17)

* BUG skip zero length truth files more robustly * TST fix buggy test * wrong column * try again
des-science · Aug 4, 2023 · 45fb783 · 45fb783
1 parent 507f0a4
commit 45fb783
Show file tree

Hide file tree

Showing 2 changed files with 67 additions and 9 deletions.
diff --git a/montara/eastlake_step.py b/montara/eastlake_step.py
@@ -14,6 +14,39 @@
 from eastlake.des_files import read_pizza_cutter_yaml
 
 
+def read_galsim_truth_file(fname):
+    """read a galsim truth file to a structured numpy array"""
+    import pandas as pd
+
+    if not os.path.getsize(fname):
+        return None
+
+    ncomment = 0
+    ndata = 0
+    with open(fname, "r") as fp:
+        lines = fp.readlines()
+        for line in lines:
+            line = line.strip()
+            if len(line) == 0 or line.startswith("#"):
+                ncomment += 1
+            else:
+                ndata += 1
+
+    if ndata > 0 and ncomment == 0:
+        raise RuntimeError("No header line found for truth file %r!" % fname)
+
+    if ndata == 0:
+        return None
+    else:
+        df = pd.read_csv(fname, skiprows=[0], sep=r"\s+", index_col=False, header=None)
+        with open(fname, "r") as fp:
+            h = fp.readline().strip().split()[1:]
+        df.columns = h
+        stringcols = df.select_dtypes(include='object').columns
+        _d = df.to_records(index=False, column_dtypes={c: "U1" for c in stringcols})
+        return _d
+
+
 class MontaraGalSimRunner(Step):
     """
     Pipeline step which runs galsim
@@ -292,18 +325,11 @@ def update_stash(self, config, stash):
             stash["tilenames"] = tilenames
 
     def _write_truth(self, fnames, tilename, base_dir, stash, bands):
-        import pandas as pd
-
         dtype = None
         data = []
         for fname in fnames:
-            if os.path.getsize(fname):
-                df = pd.read_csv(fname, skiprows=[0], sep=r"\s+", index_col=False, header=None)
-                with open(fname, "r") as fp:
-                    h = fp.readline().strip().split()[1:]
-                df.columns = h
-                stringcols = df.select_dtypes(include='object').columns
-                _d = df.to_records(index=False, column_dtypes={c: "U1" for c in stringcols})
+            _d = read_galsim_truth_file(fname)
+            if _d is not None:
                 self.logger.info("read truth file with dtype: %r", _d.dtype.descr)
                 data.append(_d)
                 if dtype is None:

diff --git a/montara/tests/test_truth_file.py b/montara/tests/test_truth_file.py
@@ -0,0 +1,32 @@
+import tempfile
+import os
+
+from montara.eastlake_step import read_galsim_truth_file
+
+import pytest
+
+
+@pytest.mark.parametrize("head", [True, False])
+@pytest.mark.parametrize("ndata", [0, 1, 2])
+def test_read_file(head, ndata):
+    letters = "abc"
+    with tempfile.TemporaryDirectory() as tmpdir:
+        fname = os.path.join(tmpdir, "blah.dat")
+        with open(fname, "w") as fp:
+            if head:
+                fp.write("# a band c\n")
+            for nd in range(ndata):
+                fp.write("%d %s %f\n" % (nd, letters[nd], nd*3.14159))
+
+        if not head and ndata > 0:
+            with pytest.raises(RuntimeError) as e:
+                read_galsim_truth_file(fname)
+
+            assert "No header line found for truth file" in str(e.value)
+        else:
+            d = read_galsim_truth_file(fname)
+            if ndata == 0:
+                assert d is None
+            else:
+                assert d.dtype.descr == [("a", "<i8"), ("band", "<U1"), ("c", "<f8")]
+                assert d.shape[0] == ndata