Test to/from_string

Closes: #10
APN-Pucky · Oct 13, 2024 · 485d384 · 485d384
1 parent aff863d
commit 485d384
Show file tree

Hide file tree

Showing 11 changed files with 251 additions and 86 deletions.
diff --git a/src/babyyoda/grogu/histo1d_v2.py b/src/babyyoda/grogu/histo1d_v2.py
@@ -17,6 +17,11 @@ class Bin:
         d_sumwx2: float = 0.0
         d_numentries: float = 0.0
 
+        def __post_init__(self):
+            assert (
+                self.d_xmin is None or self.d_xmax is None or self.d_xmin < self.d_xmax
+            )
+
         ########################################################
         # YODA compatibilty code
         ########################################################
@@ -129,17 +134,17 @@ def xVariance(self):
         def numEntries(self):
             return self.d_numentries
 
-        def __eq__(self, other):
-            return (
-                isinstance(other, GROGU_HISTO1D_V2.Bin)
-                and self.d_xmin == other.d_xmin
-                and self.d_xmax == other.d_xmax
-                and self.d_sumw == other.d_sumw
-                and self.d_sumw2 == other.d_sumw2
-                and self.d_sumwx == other.d_sumwx
-                and self.d_sumwx2 == other.d_sumwx2
-                and self.d_numentries == other.d_numentries
-            )
+        # def __eq__(self, other):
+        #    return (
+        #        isinstance(other, GROGU_HISTO1D_V2.Bin)
+        #        and self.d_xmin == other.d_xmin
+        #        and self.d_xmax == other.d_xmax
+        #        and self.d_sumw == other.d_sumw
+        #        and self.d_sumw2 == other.d_sumw2
+        #        and self.d_sumwx == other.d_sumwx
+        #        and self.d_sumwx2 == other.d_sumwx2
+        #        and self.d_numentries == other.d_numentries
+        #    )
 
         def __add__(self, other):
             assert isinstance(other, GROGU_HISTO1D_V2.Bin)
@@ -302,8 +307,11 @@ def to_string(histo) -> str:
         return f"{header}{stats}{underflow}\n{overflow}\n{legend}{bin_data}\n{footer}"
 
     @classmethod
-    def from_string(cls, file_content: str, key: str = "") -> "GROGU_HISTO1D_V2":
+    def from_string(cls, file_content: str) -> "GROGU_HISTO1D_V2":
         lines = file_content.strip().splitlines()
+        key = ""
+        if find := re.search(r"BEGIN YODA_HISTO1D_V2 (\S+)", lines[0]):
+            key = find.group(1)
 
         # Extract metadata (path, title)
         path = ""
@@ -322,7 +330,11 @@ def from_string(cls, file_content: str, key: str = "") -> "GROGU_HISTO1D_V2":
         data_section_started = False
 
         for line in lines:
-            if line.startswith("#"):
+            if line.startswith("BEGIN YODA_HISTO1D_V2"):
+                continue
+            if line.startswith("END YODA_HISTO1D_V2"):
+                break
+            if line.startswith("#") or line.isspace():
                 continue
             if line.startswith("---"):
                 data_section_started = True
@@ -332,18 +344,18 @@ def from_string(cls, file_content: str, key: str = "") -> "GROGU_HISTO1D_V2":
 
             values = re.split(r"\s+", line.strip())
             if values[0] == "Underflow":
-                underflow = GROGU_HISTO1D_V2.Bin.from_string(line)
+                underflow = cls.Bin.from_string(line)
             elif values[0] == "Overflow":
-                overflow = GROGU_HISTO1D_V2.Bin.from_string(line)
+                overflow = cls.Bin.from_string(line)
             elif values[0] == "Total":
                 # ignore for now
                 pass
             else:
                 # Regular bin
-                bins.append(GROGU_HISTO1D_V2.Bin.from_string(line))
+                bins.append(cls.Bin.from_string(line))
 
         # Create and return the YODA_HISTO1D_V2 object
-        return GROGU_HISTO1D_V2(
+        return cls(
             d_key=key,
             d_path=path,
             d_title=title,

diff --git a/src/babyyoda/grogu/histo1d_v3.py b/src/babyyoda/grogu/histo1d_v3.py
@@ -155,7 +155,9 @@ def from_string(cls, string: str) -> "GROGU_HISTO1D_V3.Bin":
     def __post_init__(self):
         self.d_type = "Histo1D"
         # one more edge than bins, subtract 2 for underflow and overflow
-        assert len(self.d_edges) == len(self.d_bins) + 1 - 2
+        assert (
+            len(self.d_edges) == len(self.d_bins) + 1 - 2
+        ), f"{len(self.d_edges)} != {len(self.d_bins)} + 1 - 2"
 
     ############################################
     # YODA compatibilty code
@@ -238,8 +240,11 @@ def rebinXTo(self, edges: List[float]):
         assert len(self.d_bins) == len(self.xEdges()) - 1 + 2
 
     @classmethod
-    def from_string(cls, file_content: str, key: str = "") -> "GROGU_HISTO1D_V3":
+    def from_string(cls, file_content: str) -> "GROGU_HISTO1D_V3":
         lines = file_content.strip().splitlines()
+        key = ""
+        if find := re.search(r"BEGIN YODA_HISTO1D_V3 (\S+)", lines[0]):
+            key = find.group(1)
 
         # Extract metadata (path, title)
         path = ""
@@ -258,7 +263,11 @@ def from_string(cls, file_content: str, key: str = "") -> "GROGU_HISTO1D_V3":
         data_section_started = False
 
         for line in lines:
-            if line.startswith("#"):
+            if line.startswith("BEGIN YODA_HISTO1D_V3"):
+                continue
+            if line.startswith("END YODA_HISTO1D_V3"):
+                break
+            if line.startswith("#") or line.isspace():
                 continue
             if line.startswith("---"):
                 data_section_started = True
@@ -267,17 +276,15 @@ def from_string(cls, file_content: str, key: str = "") -> "GROGU_HISTO1D_V3":
                 continue
 
             if line.startswith("Edges"):
-                content = re.findall(r"\[(.*?)\]", line)
-                numbers_as_strings = re.findall(
-                    r"[-+]?\d*\.\d+e[+-]?\d+|\d+", content[0]
-                )
-                edges = [float(i) for i in numbers_as_strings]
+                content = re.findall(r"\[(.*?)\]", line)[0]
+                values = re.split(r"\s+", content.replace(",", ""))
+                edges = [float(i) for i in values]
                 continue
 
-            bins.append(GROGU_HISTO1D_V3.Bin.from_string(line))
+            bins.append(cls.Bin.from_string(line))
 
         # Create and return the YODA_HISTO1D_V2 object
-        return GROGU_HISTO1D_V3(
+        return cls(
             d_key=key,
             d_path=path,
             d_title=title,
@@ -303,8 +310,8 @@ def to_string(self):
 
         edges = f"Edges(A1): [{', '.join(str(e) for e in self.d_edges)}]\n"
         # Add the bin data
-        bin_data = "\n".join(GROGU_HISTO1D_V3.Bin.to_string(b) for b in self.bins())
+        bin_data = "\n".join(GROGU_HISTO1D_V3.Bin.to_string(b) for b in self.bins(True))
 
         footer = "END YODA_HISTO1D_V3\n"
 
-        return f"{header}{stats}{edges}\n\n# sumW\t sumW2\t sumW(A1)\t sumW2(A1)\t numEntries\n{bin_data}\n{footer}"
+        return f"{header}{stats}{edges}# sumW\t sumW2\t sumW(A1)\t sumW2(A1)\t numEntries\n{bin_data}\n{footer}"
diff --git a/src/babyyoda/grogu/histo2d_v2.py b/src/babyyoda/grogu/histo2d_v2.py
@@ -205,14 +205,18 @@ def to_string(self) -> str:
 
         legend = "# xlow\t xhigh\t ylow\t yhigh\t sumw\t sumw2\t sumwx\t sumwx2\t sumwy\t sumwy2\t sumwxy\t numEntries\n"
         bin_data = "\n".join(b.to_string() for b in self.d_bins)
-        footer = "END YODA_HISTO2D_V2\n"
+        footer = "\nEND YODA_HISTO2D_V2\n"
 
         return f"{header}{stats}{legend}{bin_data}{footer}"
 
     @classmethod
-    def from_string(cls, file_content: str, name: str = "") -> "GROGU_HISTO2D_V2":
+    def from_string(cls, file_content: str) -> "GROGU_HISTO2D_V2":
         lines = file_content.strip().splitlines()
 
+        key = ""
+        if find := re.search(r"BEGIN YODA_HISTO2D_V2 (\S+)", lines[0]):
+            key = find.group(1)
+
         # Extract metadata (path, title)
         path = ""
         title = ""
@@ -229,7 +233,11 @@ def from_string(cls, file_content: str, name: str = "") -> "GROGU_HISTO2D_V2":
         data_section_started = False
 
         for line in lines:
-            if line.startswith("#"):
+            if line.startswith("BEGIN YODA_HISTO2D_V2"):
+                continue
+            if line.startswith("END YODA_HISTO2D_V2"):
+                break
+            if line.startswith("#") or line.isspace():
                 continue
             if line.startswith("---"):
                 data_section_started = True
@@ -239,7 +247,7 @@ def from_string(cls, file_content: str, name: str = "") -> "GROGU_HISTO2D_V2":
 
             values = re.split(r"\s+", line.strip())
             if values[0] == "Underflow":
-                underflow = GROGU_HISTO2D_V2.Bin(
+                underflow = cls.Bin(
                     None,
                     None,
                     None,
@@ -254,7 +262,7 @@ def from_string(cls, file_content: str, name: str = "") -> "GROGU_HISTO2D_V2":
                     float(values[9]),
                 )
             elif values[0] == "Overflow":
-                overflow = GROGU_HISTO2D_V2.Bin(
+                overflow = cls.Bin(
                     None,
                     None,
                     None,
@@ -286,7 +294,7 @@ def from_string(cls, file_content: str, name: str = "") -> "GROGU_HISTO2D_V2":
                     numEntries,
                 ) = map(float, values)
                 bins.append(
-                    GROGU_HISTO2D_V2.Bin(
+                    cls.Bin(
                         xlow,
                         xhigh,
                         ylow,
@@ -302,8 +310,8 @@ def from_string(cls, file_content: str, name: str = "") -> "GROGU_HISTO2D_V2":
                     )
                 )
 
-        return GROGU_HISTO2D_V2(
-            d_key=name,
+        return cls(
+            d_key=key,
             d_path=path,
             d_title=title,
             d_bins=bins,

diff --git a/src/babyyoda/grogu/histo2d_v3.py b/src/babyyoda/grogu/histo2d_v3.py
@@ -177,19 +177,24 @@ def to_string(self) -> str:
         #    f"# Mean: {self.mean()}\n"
         #    f"# Area: {self.area()}\n"
         # )
+        edges = ""
+        for i, edg in enumerate(self.d_edges):
+            edges += f"Edges(A{i+1}): [{', '.join(str(e) for e in edg )}]\n"
 
         legend = (
             "# sumw\t sumw2\t sumwx\t sumwx2\t sumwy\t sumwy2\t sumwxy\t numEntries\n"
         )
         bin_data = "\n".join(b.to_string() for b in self.d_bins)
-        footer = "END YODA_HISTO2D_V3\n"
+        footer = "\nEND YODA_HISTO2D_V3\n"
 
-        return f"{header}{stats}{legend}{bin_data}{footer}"
+        return f"{header}{stats}{edges}{legend}{bin_data}{footer}"
 
     @classmethod
-    def from_string(cls, file_content: str, key: str = "") -> "GROGU_HISTO2D_V3":
+    def from_string(cls, file_content: str) -> "GROGU_HISTO2D_V3":
         lines = file_content.strip().splitlines()
-
+        key = ""
+        if find := re.search(r"BEGIN YODA_HISTO2D_V3 (\S+)", lines[0]):
+            key = find.group(1)
         # Extract metadata (path, title)
         path = ""
         title = ""
@@ -207,7 +212,11 @@ def from_string(cls, file_content: str, key: str = "") -> "GROGU_HISTO2D_V3":
         data_section_started = False
 
         for line in lines:
-            if line.startswith("#"):
+            if line.startswith("BEGIN YODA_HISTO2D_V3"):
+                continue
+            if line.startswith("END YODA_HISTO2D_V3"):
+                break
+            if line.startswith("#") or line.isspace():
                 continue
             if line.startswith("---"):
                 data_section_started = True
@@ -216,17 +225,15 @@ def from_string(cls, file_content: str, key: str = "") -> "GROGU_HISTO2D_V3":
                 continue
 
             if line.startswith("Edges"):
-                content = re.findall(r"\[(.*?)\]", line)
-                numbers_as_strings = re.findall(
-                    r"[-+]?\d*\.\d+e[+-]?\d+|\d+", content[0]
-                )
-                edges += [[float(i) for i in numbers_as_strings]]
+                content = re.findall(r"\[(.*?)\]", line)[0]
+                values = re.split(r"\s+", content.replace(",", ""))
+                edges += [[float(i) for i in values]]
                 continue
 
-            bins.append(GROGU_HISTO2D_V3.Bin.from_string(line))
+            bins.append(cls.Bin.from_string(line))
 
         # Create and return the YODA_HISTO1D_V2 object
-        return GROGU_HISTO2D_V3(
+        return cls(
             d_key=key,
             d_path=path,
             d_title=title,

diff --git a/src/babyyoda/grogu/read.py b/src/babyyoda/grogu/read.py
@@ -10,23 +10,25 @@ def read(file_path: str):
     with open(file_path) as f:
         content = f.read()
 
-    pattern = re.compile(r"BEGIN (YODA_[A-Z0-9_]+) ([^\n]+)\n(.*?)\nEND \1", re.DOTALL)
+    pattern = re.compile(
+        r"(BEGIN (YODA_[A-Z0-9_]+) ([^\n]+)\n(.*?)\nEND \2)", re.DOTALL
+    )
     matches = pattern.findall(content)
 
     histograms = {}
 
-    for hist_type, name, body in matches:
+    for full_match, hist_type, name, body in matches:
         if hist_type == "YODA_HISTO1D_V2":
-            hist = GROGU_HISTO1D_V2.from_string(body, name)
+            hist = GROGU_HISTO1D_V2.from_string(full_match)
             histograms[name] = hist
         elif hist_type == "YODA_HISTO1D_V3":
-            hist = GROGU_HISTO1D_V3.from_string(body, name)
+            hist = GROGU_HISTO1D_V3.from_string(full_match)
             histograms[name] = hist
         elif hist_type == "YODA_HISTO2D_V2":
-            hist = GROGU_HISTO2D_V2.from_string(body, name)
+            hist = GROGU_HISTO2D_V2.from_string(full_match)
             histograms[name] = hist
         elif hist_type == "YODA_HISTO2D_V3":
-            hist = GROGU_HISTO2D_V3.from_string(body, name)
+            hist = GROGU_HISTO2D_V3.from_string(full_match)
             histograms[name] = hist
         else:
             # Add other parsing logic for different types if necessary

diff --git a/src/babyyoda/test.py b/src/babyyoda/test.py
@@ -59,7 +59,7 @@ def assert_value1d(gb, yb):
     assert gb.numEntries() == yb.numEntries()
 
 
-def assert_equal_histo1d(gh1, yh1):
+def assert_histo1d(gh1, yh1):
     assert_ao(gh1, yh1)
 
     assert len(gh1.bins()) == len(yh1.bins()), f"{len(gh1.bins())} != {len(yh1.bins())}"