From edecc1eaae3a6998c827945a60c121bdd6acd805 Mon Sep 17 00:00:00 2001 From: loreloc Date: Fri, 7 Jul 2023 22:38:13 +0100 Subject: [PATCH 1/7] new bookkeeping --- cirkit/layers/sum_product/base.py | 2 +- cirkit/layers/sum_product/cp.py | 24 ++-- cirkit/models/tensorized_circuit.py | 208 +++++++++------------------- 3 files changed, 83 insertions(+), 151 deletions(-) diff --git a/cirkit/layers/sum_product/base.py b/cirkit/layers/sum_product/base.py index 67e8f35e..b6616f6f 100644 --- a/cirkit/layers/sum_product/base.py +++ b/cirkit/layers/sum_product/base.py @@ -46,7 +46,7 @@ def reset_parameters(self) -> None: # TODO: what about abstract? @abstractmethod # pylint: disable-next=arguments-differ - def forward(self, log_left: Tensor, log_right: Tensor) -> Tensor: # type: ignore[override] + def forward(self, inputs: Tensor) -> Tensor: # type: ignore[override] """Compute the main einsum operation of the layer. Do SumProductLayer forward pass. diff --git a/cirkit/layers/sum_product/cp.py b/cirkit/layers/sum_product/cp.py index b4956f15..6cb10426 100644 --- a/cirkit/layers/sum_product/cp.py +++ b/cirkit/layers/sum_product/cp.py @@ -38,9 +38,9 @@ def __init__( # type: ignore[misc] super().__init__(rg_nodes, num_input_units, num_output_units) self.prod_exp = prod_exp - self.params_left = nn.Parameter(torch.empty(num_input_units, rank, len(rg_nodes))) - self.params_right = nn.Parameter(torch.empty(num_input_units, rank, len(rg_nodes))) - self.params_out = nn.Parameter(torch.empty(num_output_units, rank, len(rg_nodes))) + self.params_left = nn.Parameter(torch.empty(len(rg_nodes), num_input_units, rank)) + self.params_right = nn.Parameter(torch.empty(len(rg_nodes), num_input_units, rank)) + self.params_out = nn.Parameter(torch.empty(len(rg_nodes), num_output_units, rank)) # TODO: get torch.default_float_dtype # (float ** float) is not guaranteed to be float, but here we know it is @@ -54,36 +54,38 @@ def __init__( # type: ignore[misc] # TODO: use bmm to replace einsum? also axis order? def _forward_left_linear(self, x: Tensor) -> Tensor: - return torch.einsum("bip,irp->brp", x, self.params_left) + return torch.einsum("pbi,pir->pbr", x, self.params_left) def _forward_right_linear(self, x: Tensor) -> Tensor: - return torch.einsum("bip,irp->brp", x, self.params_right) + return torch.einsum("pbi,pir->pbr", x, self.params_right) def _forward_out_linear(self, x: Tensor) -> Tensor: - return torch.einsum("brp,orp->bop", x, self.params_out) + return torch.einsum("pbr,por->pbo", x, self.params_out) def _forward_linear(self, left: Tensor, right: Tensor) -> Tensor: left_hidden = self._forward_left_linear(left) right_hidden = self._forward_right_linear(right) return self._forward_out_linear(left_hidden * right_hidden) - def forward(self, log_left: Tensor, log_right: Tensor) -> Tensor: # type: ignore[override] + def forward(self, inputs: Tensor) -> Tensor: # type: ignore[override] """Compute the main Einsum operation of the layer. :param log_left: value in log space for left child. :param log_right: value in log space for right child. :return: result of the left operations, in log-space. """ + log_left, log_right = inputs[:, 0], inputs[:, 1] + # TODO: do we split into two impls? if self.prod_exp: - return log_func_exp(log_left, log_right, func=self._forward_linear, dim=1, keepdim=True) + return log_func_exp(log_left, log_right, func=self._forward_linear, dim=2, keepdim=True) log_left_hidden = log_func_exp( - log_left, func=self._forward_left_linear, dim=1, keepdim=True + log_left, func=self._forward_left_linear, dim=2, keepdim=True ) log_right_hidden = log_func_exp( - log_right, func=self._forward_right_linear, dim=1, keepdim=True + log_right, func=self._forward_right_linear, dim=2, keepdim=True ) return log_func_exp( - log_left_hidden + log_right_hidden, func=self._forward_out_linear, dim=1, keepdim=True + log_left_hidden + log_right_hidden, func=self._forward_out_linear, dim=2, keepdim=True ) diff --git a/cirkit/models/tensorized_circuit.py b/cirkit/models/tensorized_circuit.py index 47348959..e3f3ea0c 100644 --- a/cirkit/models/tensorized_circuit.py +++ b/cirkit/models/tensorized_circuit.py @@ -1,6 +1,7 @@ from collections import defaultdict from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Type, Union +import numpy as np import torch from torch import Tensor, nn @@ -14,13 +15,6 @@ # TODO: rework docstrings -class _TwoInputs(NamedTuple): - """Provide names for left and right inputs.""" - - left: RegionNode - right: RegionNode - - class TensorizedPC(nn.Module): # pylint: disable=too-many-instance-attributes """Tensorized and folded PC implementation.""" @@ -88,117 +82,61 @@ def __init__( # type: ignore[misc] # A dictionary mapping each region node ID to # (i) its index in the corresponding fold, and - # (ii) the layer that computes such fold. - region_id_fold: Dict[int, Tuple[int, Layer]] = {} + # (ii) the id of the layer that computes such fold (-1 for the input layer) + region_id_fold: Dict[int, Tuple[int, int]] = {} for i, region in enumerate(self.graph_layers[0][1]): - region_id_fold[region.get_id()] = (i, self.input_layer) + region_id_fold[region.get_id()] = (i, 0) + + # A dictionary mapping layer ids to the number of folds + num_folds = [len(self.graph_layers[0][1])] - # Book-keeping: None for input, Tensor for mixing, Tuple for einsum + # Book-keeping: for each layer self.bookkeeping: List[ - Union[ - Tuple[Tuple[List[Layer], Tensor], Tuple[List[Layer], Tensor]], Tuple[Layer, Tensor] - ] + Tuple[bool, List[int], Tensor] ] = [] # Build inner layers - inner_layers: List[Layer] = [] - # TODO: use start as kwarg? - for idx, (partition_layer, region_layer) in enumerate(self.graph_layers[1:], start=1): - # TODO: duplicate check with einet layer, but also useful here? - # out_k = set( - # out_region.k for partition in partition_layer for out_region in partition.outputs - # ) - # assert len(out_k) == 1, f"For internal {c} there are {len(out_k)} nums sums" - # out_k = out_k.pop() - - # TODO: this can be a wrong layer, refer to back up code - # assert out_k > 1 - num_outputs = num_output_units if idx < len(self.graph_layers) - 1 else num_classes - num_inputs = num_input_units if idx == 1 else num_output_units - inner_layer = layer_cls( - partition_layer, num_inputs, num_outputs, **layer_kwargs # type: ignore[misc] - ) - inner_layers.append(inner_layer) - - # get pairs of nodes which are input to the products (list of lists) - # length of the outer list is same as self.products, length of inner lists is 2 - # "left child" has index 0, "right child" has index 1 - two_inputs = [_TwoInputs(*sorted(partition.inputs)) for partition in partition_layer] - # TODO: again, why do we need sorting - # collect all layers which contain left/right children - # TODO: duplicate code - left_region_ids = list(r.left.get_id() for r in two_inputs) - right_region_ids = list(r.right.get_id() for r in two_inputs) - left_layers = list(region_id_fold[i][1] for i in left_region_ids) - right_layers = list(region_id_fold[i][1] for i in right_region_ids) - left_starts = torch.tensor([0] + [layer.fold_count for layer in left_layers]).cumsum( - dim=0 - ) - right_starts = torch.tensor([0] + [layer.fold_count for layer in right_layers]).cumsum( - dim=0 - ) - left_indices = torch.tensor( - [ # type: ignore[misc] - region_id_fold[r.left.get_id()][0] + left_starts[i] - for i, r in enumerate(two_inputs) - ] - ) - right_indices = torch.tensor( - [ # type: ignore[misc] - region_id_fold[r.right.get_id()][0] + right_starts[i] - for i, r in enumerate(two_inputs) - ] + inner_layers: List[SumProductLayer] = [] + for layer_idx, (lpartitions, lregions) in enumerate(self.graph_layers[1:], start=1): + # Gather the input regions of each partition + input_regions = [sorted(p.inputs) for p in lpartitions] + num_input_regions = list(len(ins) for ins in input_regions) + max_num_input_regions = max(num_input_regions) + + input_regions_ids = [list(r.get_id() for r in ins) for ins in input_regions] + input_layers_ids = [list(region_id_fold[i][1] for i in ids) for ids in input_regions_ids] + unique_layer_ids = list(set(i for ids in input_layers_ids for i in ids)) + cumulative_idx = np.cumsum([0] + [num_folds[i] for i in unique_layer_ids]).tolist() + base_layer_idx = {layer_id: idx for layer_id, idx in zip(unique_layer_ids, cumulative_idx)} + + should_pad = False + input_region_indices = list() + for regions in input_regions: + region_indices = list() + for r in regions: + fold_idx, layer_id = region_id_fold[r.get_id()] + region_indices.append(base_layer_idx[layer_id] + fold_idx) + if len(regions) < max_num_input_regions: + should_pad = True + region_indices.extend([-1] * (max_num_input_regions - len(regions))) + input_region_indices.append(region_indices) + + book_entry = (should_pad, unique_layer_ids, torch.tensor(input_region_indices)) + self.bookkeeping.append(book_entry) + + for i, p in enumerate(lpartitions): + # Each partition must belong to exactly one region + assert len(p.outputs) == 1 + out_region = p.outputs[0] + region_id_fold[out_region.get_id()] = (i, layer_idx) + num_folds.append(cumulative_idx[-1]) + + num_outputs = num_output_units if layer_idx < len(self.graph_layers) - 1 else num_classes + num_inputs = num_input_units if layer_idx == 1 else num_output_units + layer = layer_cls( + lpartitions, num_inputs, num_outputs, **layer_kwargs # type: ignore[misc] ) - self.bookkeeping.append(((left_layers, left_indices), (right_layers, right_indices))) - - # when the SumProductLayer is followed by a MixingLayer, we produce a - # dummy "node" which outputs 0 (-inf in log-domain) for zero-padding. - dummy_idx: Optional[int] = None - - # the dictionary mixing_component_idx stores which nodes (axis 2 of the - # log-density tensor) need to get mixed - # in the following MixingLayer - mixing_component_idx: Dict[RegionNode, List[int]] = defaultdict(list) - - for part_idx, partition in enumerate(partition_layer): - # each product must have exactly 1 parent (sum node) - assert len(partition.outputs) == 1 - out_region = partition.outputs[0] - - if len(out_region.inputs) == 1: - region_id_fold[out_region.get_id()] = (part_idx, inner_layer) - else: # case followed by MixingLayer - mixing_component_idx[out_region].append(part_idx) - dummy_idx = len(partition_layer) - - # The Mixing layer is only for regions which have multiple partitions as children. - if multi_sums := [region for region in region_layer if len(region.inputs) > 1]: - assert dummy_idx is not None - max_components = max(len(region.inputs) for region in multi_sums) - - # The following code does some bookkeeping. - # padded_idx indexes into the log-density tensor of the previous - # SumProductLayer, padded with a dummy input which - # outputs constantly 0 (-inf in the log-domain), see class SumProductLayer. - padded_idx: List[List[int]] = [] - params_mask: Optional[Tensor] = None - for reg_idx, region in enumerate(multi_sums): - num_components = len(mixing_component_idx[region]) - this_idx = mixing_component_idx[region] + [dummy_idx] * ( - max_components - num_components - ) - padded_idx.append(this_idx) - if max_components > num_components: - if params_mask is None: - params_mask = torch.ones(num_outputs, len(multi_sums), max_components) - params_mask[:, reg_idx, num_components:] = 0.0 - mixing_layer = MixingLayer( - multi_sums, num_outputs, max_components, mask=params_mask - ) - for reg_idx, region in enumerate(multi_sums): - region_id_fold[region.get_id()] = (reg_idx, mixing_layer) - self.bookkeeping.append((inner_layers[-1], torch.tensor(padded_idx))) - inner_layers.append(mixing_layer) + inner_layers.append(layer) # TODO: can we annotate a list here? # TODO: actually we should not mix all the input/mix/ein different types in one list @@ -280,39 +218,31 @@ def forward(self, x: Tensor) -> Tensor: Returns: Tensor: Return value. """ - # TODO: can we have just a dictionary with integer keys instead? - # It would be much simpler and clean - outputs: Dict[Layer, Tensor] = {self.input_layer: self.input_layer(x)} + in_outputs = self.input_layer(x) + in_outputs = in_outputs.permute(2, 0, 1) + outputs: List[Tensor] = [in_outputs] + # (batch_size, num_units, num_regions) - # TODO: use zip instead # TODO: Generalize if statements here, they should be layer agnostic - for idx, inner_layer in enumerate(self.inner_layers): - if isinstance(inner_layer, SumProductLayer): # type: ignore[misc] - left_addr, right_addr = self.bookkeeping[idx] - assert isinstance(left_addr, tuple) and isinstance(right_addr, tuple) - # TODO: we should use dim=2, check all code - # TODO: duplicate code - log_left_prob = torch.cat([outputs[layer] for layer in left_addr[0]], dim=2) - log_left_prob = log_left_prob[:, :, left_addr[1]] - log_right_prob = torch.cat([outputs[layer] for layer in right_addr[0]], dim=2) - log_right_prob = log_right_prob[:, :, right_addr[1]] - out = inner_layer(log_left_prob, log_right_prob) - elif isinstance(inner_layer, MixingLayer): - _, padded_idx = self.bookkeeping[idx] - assert isinstance(padded_idx, Tensor) # type: ignore[misc] - # TODO: a better way to pad? - # TODO: padding here breaks bookkeeping by changing the tensors shape. - # We need to find another way to implement it. - # outputs[self.inner_layers[idx - 1]] = F.pad( - # outputs[self.inner_layers[idx - 1]], [0, 1], "constant", float("-inf") - # ) - log_input_prob = outputs[self.inner_layers[idx - 1]][:, :, padded_idx] - out = inner_layer(log_input_prob) + for layer, (should_pad, in_layer_ids, fold_idx) in zip(self.inner_layers, self.bookkeeping): + if isinstance(layer, SumProductLayer): # type: ignore[misc] + # (fold_1 + fold_2, batch_size, units) + print(in_layer_ids) + inputs = torch.cat([outputs[i] for i in in_layer_ids], dim=0) + if should_pad: + # TODO: pad along dim 0 + pass + # (new_fold, arity, batch_size, units) + inputs = inputs[fold_idx] + print(inputs.shape) + output = layer(inputs) + elif isinstance(layer, MixingLayer): + pass else: assert False - outputs[inner_layer] = out + outputs.append(output) - return outputs[self.inner_layers[-1]][:, :, 0] + return outputs[-1][:, :, 0] # TODO: and what's the meaning of this? # def backtrack(self, num_samples=1, class_idx=0, x=None, mode='sampling', **kwargs): From 82d2a6bea839af96b4b7263d784a54f69783fcdd Mon Sep 17 00:00:00 2001 From: loreloc Date: Sat, 8 Jul 2023 00:23:55 +0100 Subject: [PATCH 2/7] probabilities now sum up to one --- cirkit/layers/mixing.py | 10 ++-- cirkit/layers/sum_product/cp.py | 8 ++-- cirkit/models/tensorized_circuit.py | 65 +++++++++++++++++++------- tests/models/pcs/test_tensorized_pc.py | 31 ++++++------ 4 files changed, 74 insertions(+), 40 deletions(-) diff --git a/cirkit/layers/mixing.py b/cirkit/layers/mixing.py index 45944d8c..cdc5532a 100644 --- a/cirkit/layers/mixing.py +++ b/cirkit/layers/mixing.py @@ -74,7 +74,7 @@ def __init__( # TODO: test best perf? # param_shape = (len(self.nodes), self.max_components) for better perf - self.params = nn.Parameter(torch.empty(num_output_units, len(rg_nodes), max_components)) + self.params = nn.Parameter(torch.empty(len(rg_nodes), max_components, num_output_units)) self.mask = mask self.param_clamp_value["min"] = torch.finfo(self.params.dtype).smallest_normal @@ -86,12 +86,12 @@ def reset_parameters(self) -> None: with torch.no_grad(): if self.mask is not None: self.params *= self.mask # type: ignore[misc] - self.params /= self.params.sum(dim=2, keepdim=True) # type: ignore[misc] + self.params /= self.params.sum(dim=1, keepdim=True) # type: ignore[misc] def _forward_linear(self, x: Tensor) -> Tensor: if self.mask is not None: - torch.einsum("bonc,onc->bon", x, self.params * self.mask) - return torch.einsum("bonc,onc->bon", x, self.params) + torch.einsum('pibo,pio->pbo', x, self.params * self.mask) + return torch.einsum('pibo,pio->pbo', x, self.params) # TODO: make forward return something # pylint: disable-next=arguments-differ @@ -104,6 +104,6 @@ def forward(self, log_input: Tensor) -> Tensor: # type: ignore[override] Returns: Tensor: the output. """ - return log_func_exp(log_input, func=self._forward_linear, dim=3, keepdim=False) + return log_func_exp(log_input, func=self._forward_linear, dim=1, keepdim=False) # TODO: see commit 084a3685c6c39519e42c24a65d7eb0c1b0a1cab1 for backtrack diff --git a/cirkit/layers/sum_product/cp.py b/cirkit/layers/sum_product/cp.py index 6cb10426..7aaa6ce3 100644 --- a/cirkit/layers/sum_product/cp.py +++ b/cirkit/layers/sum_product/cp.py @@ -54,13 +54,13 @@ def __init__( # type: ignore[misc] # TODO: use bmm to replace einsum? also axis order? def _forward_left_linear(self, x: Tensor) -> Tensor: - return torch.einsum("pbi,pir->pbr", x, self.params_left) + return torch.einsum('pbi,pir->pbr', x, self.params_left) def _forward_right_linear(self, x: Tensor) -> Tensor: - return torch.einsum("pbi,pir->pbr", x, self.params_right) + return torch.einsum('pbi,pir->pbr', x, self.params_right) def _forward_out_linear(self, x: Tensor) -> Tensor: - return torch.einsum("pbr,por->pbo", x, self.params_out) + return torch.einsum('pbr,por->pbo', x, self.params_out) def _forward_linear(self, left: Tensor, right: Tensor) -> Tensor: left_hidden = self._forward_left_linear(left) @@ -87,5 +87,5 @@ def forward(self, inputs: Tensor) -> Tensor: # type: ignore[override] log_right, func=self._forward_right_linear, dim=2, keepdim=True ) return log_func_exp( - log_left_hidden + log_right_hidden, func=self._forward_out_linear, dim=2, keepdim=True + log_left_hidden + log_right_hidden, func=self._forward_out_linear, dim=3, keepdim=True ) diff --git a/cirkit/models/tensorized_circuit.py b/cirkit/models/tensorized_circuit.py index e3f3ea0c..e59c9a2c 100644 --- a/cirkit/models/tensorized_circuit.py +++ b/cirkit/models/tensorized_circuit.py @@ -4,6 +4,7 @@ import numpy as np import torch from torch import Tensor, nn +import torch.nn.functional as F from cirkit.layers.exp_family import ExpFamilyLayer from cirkit.layers.layer import Layer @@ -96,8 +97,8 @@ def __init__( # type: ignore[misc] ] = [] # Build inner layers - inner_layers: List[SumProductLayer] = [] - for layer_idx, (lpartitions, lregions) in enumerate(self.graph_layers[1:], start=1): + inner_layers: List[Layer] = [] + for rg_layer_idx, (lpartitions, lregions) in enumerate(self.graph_layers[1:], start=1): # Gather the input regions of each partition input_regions = [sorted(p.inputs) for p in lpartitions] num_input_regions = list(len(ins) for ins in input_regions) @@ -124,20 +125,51 @@ def __init__( # type: ignore[misc] book_entry = (should_pad, unique_layer_ids, torch.tensor(input_region_indices)) self.bookkeeping.append(book_entry) + region_mixing_indices: Dict[int, List[int]] = defaultdict(list) for i, p in enumerate(lpartitions): # Each partition must belong to exactly one region assert len(p.outputs) == 1 out_region = p.outputs[0] - region_id_fold[out_region.get_id()] = (i, layer_idx) - num_folds.append(cumulative_idx[-1]) - - num_outputs = num_output_units if layer_idx < len(self.graph_layers) - 1 else num_classes - num_inputs = num_input_units if layer_idx == 1 else num_output_units + if len(out_region.inputs) == 1: + region_id_fold[out_region.get_id()] = (i, len(inner_layers) + 1) + else: + region_mixing_indices[out_region.get_id()].append(i) + num_folds.append(len(lpartitions)) + + num_outputs = num_output_units if rg_layer_idx < len(self.graph_layers) - 1 else num_classes + num_inputs = num_input_units if rg_layer_idx == 1 else num_output_units layer = layer_cls( lpartitions, num_inputs, num_outputs, **layer_kwargs # type: ignore[misc] ) inner_layers.append(layer) + non_unary_regions = [r for r in lregions if len(r.inputs) > 1] + if not non_unary_regions: + continue + max_num_input_partitions = max(len(r.inputs) for r in non_unary_regions) + + should_pad = False + params_mask: Optional[Tensor] = None + input_partition_indices = list() + for i, region in enumerate(non_unary_regions): + num_input_partitions = len(region.inputs) + partition_indices = region_mixing_indices[region.get_id()] + if max_num_input_partitions > num_input_partitions: + should_pad = True + if params_mask is None: + params_mask = torch.ones(len(non_unary_regions), max_num_input_partitions, num_outputs) + params_mask[:, i, num_input_partitions:] = 0 + partition_indices.extend([-1] * (max_num_input_partitions - num_input_partitions)) + input_partition_indices.append(partition_indices) + region_id_fold[region.get_id()] = (i, len(inner_layers) + 1) + num_folds.append(len(non_unary_regions)) + + mixing_layer = MixingLayer( + non_unary_regions, num_outputs, max_num_input_partitions, mask=params_mask + ) + self.bookkeeping.append((should_pad, [len(inner_layers)], torch.tensor(input_partition_indices))) + inner_layers.append(mixing_layer) + # TODO: can we annotate a list here? # TODO: actually we should not mix all the input/mix/ein different types in one list self.inner_layers: List[Layer] = nn.ModuleList(inner_layers) # type: ignore[assignment] @@ -221,28 +253,29 @@ def forward(self, x: Tensor) -> Tensor: in_outputs = self.input_layer(x) in_outputs = in_outputs.permute(2, 0, 1) outputs: List[Tensor] = [in_outputs] - # (batch_size, num_units, num_regions) # TODO: Generalize if statements here, they should be layer agnostic for layer, (should_pad, in_layer_ids, fold_idx) in zip(self.inner_layers, self.bookkeeping): if isinstance(layer, SumProductLayer): # type: ignore[misc] - # (fold_1 + fold_2, batch_size, units) - print(in_layer_ids) + # (fold_1 + ... + fold_n, batch_size, units) inputs = torch.cat([outputs[i] for i in in_layer_ids], dim=0) if should_pad: - # TODO: pad along dim 0 - pass - # (new_fold, arity, batch_size, units) + inputs = F.pad(inputs, [0, 0, 0, 0, 1, 0], value=-np.inf) + # (fold_k, arity, batch_size, units) inputs = inputs[fold_idx] - print(inputs.shape) output = layer(inputs) elif isinstance(layer, MixingLayer): - pass + in_layer_id, = in_layer_ids + inputs = outputs[in_layer_id] + if should_pad: + inputs = F.pad(inputs, [0, 0, 0, 0, 1, 0], value=-np.inf) + inputs = inputs[fold_idx] + output = layer(inputs) else: assert False outputs.append(output) - return outputs[-1][:, :, 0] + return outputs[-1][0] # TODO: and what's the meaning of this? # def backtrack(self, num_samples=1, class_idx=0, x=None, mode='sampling', **kwargs): diff --git a/tests/models/pcs/test_tensorized_pc.py b/tests/models/pcs/test_tensorized_pc.py index 5e28a330..4f3177df 100644 --- a/tests/models/pcs/test_tensorized_pc.py +++ b/tests/models/pcs/test_tensorized_pc.py @@ -86,13 +86,13 @@ def _get_einet() -> TensorizedPC: def _get_param_shapes() -> Dict[str, Tuple[int, ...]]: return { "input_layer.params": (4, 1, 1, 2), - "inner_layers.0.params_left": (1, 1, 4), - "inner_layers.0.params_right": (1, 1, 4), - "inner_layers.0.params_out": (1, 1, 4), - "inner_layers.1.params_left": (1, 1, 2), - "inner_layers.1.params_right": (1, 1, 2), - "inner_layers.1.params_out": (1, 1, 2), - "inner_layers.2.params": (1, 1, 2), + "inner_layers.0.params_left": (4, 1, 1), + "inner_layers.0.params_right": (4, 1, 1), + "inner_layers.0.params_out": (4, 1, 1), + "inner_layers.1.params_left": (2, 1, 1), + "inner_layers.1.params_right": (2, 1, 1), + "inner_layers.1.params_out": (2, 1, 1), + "inner_layers.2.params": (1, 2, 1), } @@ -109,15 +109,15 @@ def _set_params(einet: TensorizedPC) -> None: [math.log(3), 0], # type: ignore[misc] # 3/4, 1/4 ] ).reshape(4, 1, 1, 2), - "inner_layers.0.params_left": torch.ones(1, 1, 4) / 2, - "inner_layers.0.params_right": torch.ones(1, 1, 4) * 2, - "inner_layers.0.params_out": torch.ones(1, 1, 4), - "inner_layers.1.params_left": torch.ones(1, 1, 2) * 2, - "inner_layers.1.params_right": torch.ones(1, 1, 2) / 2, - "inner_layers.1.params_out": torch.ones(1, 1, 2), + "inner_layers.0.params_left": torch.ones(4, 1, 1) / 2, + "inner_layers.0.params_right": torch.ones(4, 1, 1) * 2, + "inner_layers.0.params_out": torch.ones(4, 1, 1), + "inner_layers.1.params_left": torch.ones(2, 1, 1) * 2, + "inner_layers.1.params_right": torch.ones(2, 1, 1) / 2, + "inner_layers.1.params_out": torch.ones(2, 1, 1), "inner_layers.2.params": torch.tensor( [1 / 3, 2 / 3], # type: ignore[misc] - ).reshape(1, 1, 2), + ).reshape(1, 2, 1), } ) einet.load_state_dict(state_dict) # type: ignore[misc] @@ -164,7 +164,8 @@ def test_einet_partition_func() -> None: (RandomBinaryTree, {"num_vars": 16, "depth": 3, "num_repetitions": 2}, 24.198360443115234), (PoonDomingos, {"shape": [3, 3], "delta": 2}, None), (QuadTree, {"width": 3, "height": 3, "struct_decomp": False}, None), - (RandomBinaryTree, {"num_vars": 9, "depth": 3, "num_repetitions": 2}, None), + (QuadTree, {"width": 3, "height": 3, "struct_decomp": True}, None), + (RandomBinaryTree, {"num_vars": 9, "depth": 3, "num_repetitions": 2}, None) ], ) @RandomCtx(42) From 6b980db6f73ea273a9c80a366471084113363c33 Mon Sep 17 00:00:00 2001 From: loreloc Date: Sat, 8 Jul 2023 00:52:56 +0100 Subject: [PATCH 3/7] fix linting errors --- cirkit/layers/mixing.py | 4 +- cirkit/layers/sum_product/base.py | 3 +- cirkit/layers/sum_product/cp.py | 9 +- cirkit/models/tensorized_circuit.py | 122 +++++++++++++++++-------- tests/models/pcs/test_tensorized_pc.py | 2 +- 5 files changed, 92 insertions(+), 48 deletions(-) diff --git a/cirkit/layers/mixing.py b/cirkit/layers/mixing.py index cdc5532a..b3cc856d 100644 --- a/cirkit/layers/mixing.py +++ b/cirkit/layers/mixing.py @@ -90,8 +90,8 @@ def reset_parameters(self) -> None: def _forward_linear(self, x: Tensor) -> Tensor: if self.mask is not None: - torch.einsum('pibo,pio->pbo', x, self.params * self.mask) - return torch.einsum('pibo,pio->pbo', x, self.params) + torch.einsum("pibo,pio->pbo", x, self.params * self.mask) + return torch.einsum("pibo,pio->pbo", x, self.params) # TODO: make forward return something # pylint: disable-next=arguments-differ diff --git a/cirkit/layers/sum_product/base.py b/cirkit/layers/sum_product/base.py index b6616f6f..884e5961 100644 --- a/cirkit/layers/sum_product/base.py +++ b/cirkit/layers/sum_product/base.py @@ -61,7 +61,6 @@ def forward(self, inputs: Tensor) -> Tensor: # type: ignore[override] 4a) go to exp space do the einsum and back to log || 4b) do the einsum operation [OPT] 5a) do nothing || 5b) back to log space - :param log_left: value in log space for left child. - :param log_right: value in log space for right child. + :param inputs: the input tensor. :return: result of the left operations, in log-space. """ diff --git a/cirkit/layers/sum_product/cp.py b/cirkit/layers/sum_product/cp.py index 7aaa6ce3..0fcfe0c5 100644 --- a/cirkit/layers/sum_product/cp.py +++ b/cirkit/layers/sum_product/cp.py @@ -54,13 +54,13 @@ def __init__( # type: ignore[misc] # TODO: use bmm to replace einsum? also axis order? def _forward_left_linear(self, x: Tensor) -> Tensor: - return torch.einsum('pbi,pir->pbr', x, self.params_left) + return torch.einsum("pbi,pir->pbr", x, self.params_left) def _forward_right_linear(self, x: Tensor) -> Tensor: - return torch.einsum('pbi,pir->pbr', x, self.params_right) + return torch.einsum("pbi,pir->pbr", x, self.params_right) def _forward_out_linear(self, x: Tensor) -> Tensor: - return torch.einsum('pbr,por->pbo', x, self.params_out) + return torch.einsum("pbr,por->pbo", x, self.params_out) def _forward_linear(self, left: Tensor, right: Tensor) -> Tensor: left_hidden = self._forward_left_linear(left) @@ -70,8 +70,7 @@ def _forward_linear(self, left: Tensor, right: Tensor) -> Tensor: def forward(self, inputs: Tensor) -> Tensor: # type: ignore[override] """Compute the main Einsum operation of the layer. - :param log_left: value in log space for left child. - :param log_right: value in log space for right child. + :param inputs: value in log space for left child. :return: result of the left operations, in log-space. """ log_left, log_right = inputs[:, 0], inputs[:, 1] diff --git a/cirkit/models/tensorized_circuit.py b/cirkit/models/tensorized_circuit.py index e59c9a2c..23f70dc2 100644 --- a/cirkit/models/tensorized_circuit.py +++ b/cirkit/models/tensorized_circuit.py @@ -1,16 +1,16 @@ from collections import defaultdict -from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Type, Union +from typing import Any, Dict, List, Optional, Tuple, Type import numpy as np import torch -from torch import Tensor, nn import torch.nn.functional as F +from torch import Tensor, nn from cirkit.layers.exp_family import ExpFamilyLayer from cirkit.layers.layer import Layer from cirkit.layers.mixing import MixingLayer from cirkit.layers.sum_product import SumProductLayer -from cirkit.region_graph import RegionGraph, RegionNode +from cirkit.region_graph import RegionGraph # TODO: check all type casts. There should not be any without a good reason # TODO: rework docstrings @@ -19,7 +19,7 @@ class TensorizedPC(nn.Module): # pylint: disable=too-many-instance-attributes """Tensorized and folded PC implementation.""" - # pylint: disable-next=too-many-locals,too-many-statements,too-many-arguments + # pylint: disable-next=too-many-arguments def __init__( # type: ignore[misc] self, graph: RegionGraph, @@ -57,7 +57,6 @@ def __init__( # type: ignore[misc] # TODO: check graph. but do we need it? self.graph = graph self.num_vars = num_vars - num_output_units = num_inner_units # TODO: clean up relationship among all num_*_units assert ( len(list(graph.output_nodes)) == 1 @@ -81,39 +80,81 @@ def __init__( # type: ignore[misc] **efamily_kwargs, # type: ignore[misc] ) + # Book-keeping: for each layer keep track of the following information + # (i) Whether the output tensor needs to be padded. + # This is necessary if we want to fold layers with different number of inputs. + # (ii) The list of layers whose output tensors needs to be concatenated. + # This is necessary because the inputs of a layer might come from different layers. + # (iii) The tensorized indices of shape (num_regions, arity), + # where arity is the number of inputs of the layer. When folding + # layers with different arity (e.g., the mixing layer) a padding will be + # added *and* the last dimension will correspond to the maximum arity. + self.bookkeeping: List[Tuple[bool, List[int], Tensor]] = [] + + # TODO: can we annotate a list here? + self.inner_layers: List[Layer] = nn.ModuleList() # type: ignore[assignment] + self._build_layers( + layer_cls, + layer_kwargs, # type: ignore[misc] + num_inner_units, + num_input_units, + num_classes=num_classes, + ) + + self.exp_reparam = False + self.mixing_softmax = False + + # pylint: disable-next=too-many-arguments,too-complex,too-many-locals,too-many-statements + def _build_layers( # type: ignore[misc] + self, + layer_cls: Type[SumProductLayer], + layer_kwargs: Dict[str, Any], + num_inner_units: int, + num_input_units: int, + num_classes: int = 1, + ) -> None: + """Build the layers of the network. + + Args: + layer_cls (Type[SumProductNetwork]): The layer class. + layer_kwargs (Dict[str, Any]): The layer arguments. + num_inner_units (int): The number of units per inner layer. + num_input_units (int): The number of units of the input layer. + num_classes (int): The number of outputs of the network. + """ # A dictionary mapping each region node ID to # (i) its index in the corresponding fold, and - # (ii) the id of the layer that computes such fold (-1 for the input layer) + # (ii) the id of the layer that computes such fold (0 for the input layer) region_id_fold: Dict[int, Tuple[int, int]] = {} for i, region in enumerate(self.graph_layers[0][1]): region_id_fold[region.get_id()] = (i, 0) - # A dictionary mapping layer ids to the number of folds + # A dictionary mapping layer ids to the number of folds in the output tensor num_folds = [len(self.graph_layers[0][1])] - # Book-keeping: for each layer - self.bookkeeping: List[ - Tuple[bool, List[int], Tensor] - ] = [] - # Build inner layers - inner_layers: List[Layer] = [] for rg_layer_idx, (lpartitions, lregions) in enumerate(self.graph_layers[1:], start=1): # Gather the input regions of each partition input_regions = [sorted(p.inputs) for p in lpartitions] num_input_regions = list(len(ins) for ins in input_regions) max_num_input_regions = max(num_input_regions) + # Retrieve which folds need to be concatenated input_regions_ids = [list(r.get_id() for r in ins) for ins in input_regions] - input_layers_ids = [list(region_id_fold[i][1] for i in ids) for ids in input_regions_ids] + input_layers_ids = [ + list(region_id_fold[i][1] for i in ids) for ids in input_regions_ids + ] unique_layer_ids = list(set(i for ids in input_layers_ids for i in ids)) - cumulative_idx = np.cumsum([0] + [num_folds[i] for i in unique_layer_ids]).tolist() - base_layer_idx = {layer_id: idx for layer_id, idx in zip(unique_layer_ids, cumulative_idx)} + cumulative_idx: List[int] = np.cumsum( # type: ignore[misc] + [0] + [num_folds[i] for i in unique_layer_ids] + ).tolist() + base_layer_idx = dict(zip(unique_layer_ids, cumulative_idx)) + # Build indices should_pad = False - input_region_indices = list() + input_region_indices = [] for regions in input_regions: - region_indices = list() + region_indices = [] for r in regions: fold_idx, layer_id = region_id_fold[r.get_id()] region_indices.append(base_layer_idx[layer_id] + fold_idx) @@ -121,60 +162,65 @@ def __init__( # type: ignore[misc] should_pad = True region_indices.extend([-1] * (max_num_input_regions - len(regions))) input_region_indices.append(region_indices) - book_entry = (should_pad, unique_layer_ids, torch.tensor(input_region_indices)) self.bookkeeping.append(book_entry) + # Update dictionaries and number of folds region_mixing_indices: Dict[int, List[int]] = defaultdict(list) for i, p in enumerate(lpartitions): # Each partition must belong to exactly one region assert len(p.outputs) == 1 out_region = p.outputs[0] if len(out_region.inputs) == 1: - region_id_fold[out_region.get_id()] = (i, len(inner_layers) + 1) + region_id_fold[out_region.get_id()] = (i, len(self.inner_layers) + 1) else: region_mixing_indices[out_region.get_id()].append(i) num_folds.append(len(lpartitions)) - num_outputs = num_output_units if rg_layer_idx < len(self.graph_layers) - 1 else num_classes - num_inputs = num_input_units if rg_layer_idx == 1 else num_output_units + # Build the actual layer + num_outputs = ( + num_inner_units if rg_layer_idx < len(self.graph_layers) - 1 else num_classes + ) + num_inputs = num_input_units if rg_layer_idx == 1 else num_inner_units layer = layer_cls( lpartitions, num_inputs, num_outputs, **layer_kwargs # type: ignore[misc] ) - inner_layers.append(layer) + self.inner_layers.append(layer) - non_unary_regions = [r for r in lregions if len(r.inputs) > 1] - if not non_unary_regions: + # Fold mixing layers, if any + if not (non_unary_regions := [r for r in lregions if len(r.inputs) > 1]): continue max_num_input_partitions = max(len(r.inputs) for r in non_unary_regions) + # Same as above, construct indices and update dictionaries should_pad = False params_mask: Optional[Tensor] = None - input_partition_indices = list() + input_partition_indices = [] for i, region in enumerate(non_unary_regions): num_input_partitions = len(region.inputs) partition_indices = region_mixing_indices[region.get_id()] if max_num_input_partitions > num_input_partitions: should_pad = True if params_mask is None: - params_mask = torch.ones(len(non_unary_regions), max_num_input_partitions, num_outputs) + params_mask = torch.ones( + len(non_unary_regions), max_num_input_partitions, num_outputs + ) params_mask[:, i, num_input_partitions:] = 0 - partition_indices.extend([-1] * (max_num_input_partitions - num_input_partitions)) + partition_indices.extend( + [-1] * (max_num_input_partitions - num_input_partitions) + ) input_partition_indices.append(partition_indices) - region_id_fold[region.get_id()] = (i, len(inner_layers) + 1) + region_id_fold[region.get_id()] = (i, len(self.inner_layers) + 1) num_folds.append(len(non_unary_regions)) + # Build the actual mixing layer mixing_layer = MixingLayer( non_unary_regions, num_outputs, max_num_input_partitions, mask=params_mask ) - self.bookkeeping.append((should_pad, [len(inner_layers)], torch.tensor(input_partition_indices))) - inner_layers.append(mixing_layer) - - # TODO: can we annotate a list here? - # TODO: actually we should not mix all the input/mix/ein different types in one list - self.inner_layers: List[Layer] = nn.ModuleList(inner_layers) # type: ignore[assignment] - self.exp_reparam = False - self.mixing_softmax = False + self.bookkeeping.append( + (should_pad, [len(self.inner_layers)], torch.tensor(input_partition_indices)) + ) + self.inner_layers.append(mixing_layer) # TODO: find a better way to do this. should be in Module? (what about multi device?) # TODO: maybe we should stick to some device agnostic impl rules @@ -265,7 +311,7 @@ def forward(self, x: Tensor) -> Tensor: inputs = inputs[fold_idx] output = layer(inputs) elif isinstance(layer, MixingLayer): - in_layer_id, = in_layer_ids + (in_layer_id,) = in_layer_ids inputs = outputs[in_layer_id] if should_pad: inputs = F.pad(inputs, [0, 0, 0, 0, 1, 0], value=-np.inf) diff --git a/tests/models/pcs/test_tensorized_pc.py b/tests/models/pcs/test_tensorized_pc.py index 4f3177df..f377fcd8 100644 --- a/tests/models/pcs/test_tensorized_pc.py +++ b/tests/models/pcs/test_tensorized_pc.py @@ -165,7 +165,7 @@ def test_einet_partition_func() -> None: (PoonDomingos, {"shape": [3, 3], "delta": 2}, None), (QuadTree, {"width": 3, "height": 3, "struct_decomp": False}, None), (QuadTree, {"width": 3, "height": 3, "struct_decomp": True}, None), - (RandomBinaryTree, {"num_vars": 9, "depth": 3, "num_repetitions": 2}, None) + (RandomBinaryTree, {"num_vars": 9, "depth": 3, "num_repetitions": 2}, None), ], ) @RandomCtx(42) From 1dd0ef631c6f69663aab582fb523616e3bf7b344 Mon Sep 17 00:00:00 2001 From: loreloc Date: Sat, 8 Jul 2023 09:50:20 +0100 Subject: [PATCH 4/7] prevent concatenating tensors if network is feed-forward --- cirkit/layers/mixing.py | 18 ++------- cirkit/models/tensorized_circuit.py | 54 +++++++++++--------------- tests/models/pcs/test_tensorized_pc.py | 10 +++-- 3 files changed, 33 insertions(+), 49 deletions(-) diff --git a/cirkit/layers/mixing.py b/cirkit/layers/mixing.py index b3cc856d..e4d578c9 100644 --- a/cirkit/layers/mixing.py +++ b/cirkit/layers/mixing.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import List import torch from torch import Tensor, nn @@ -51,20 +51,13 @@ class MixingLayer(Layer): """ # TODO: num_output_units is num_input_units - def __init__( - self, - rg_nodes: List[RegionNode], - num_output_units: int, - max_components: int, - mask: Optional[Tensor] = None, - ): + def __init__(self, rg_nodes: List[RegionNode], num_output_units: int, max_components: int): """Init class. Args: rg_nodes (List[PartitionNode]): The region graph's partition node of the layer. num_output_units (int): The number of output units. max_components (int): Max number of mixing components. - mask (Optional[Tensor]): The mask to apply to the parameters. """ super().__init__() self.fold_count = len(rg_nodes) @@ -75,22 +68,17 @@ def __init__( # TODO: test best perf? # param_shape = (len(self.nodes), self.max_components) for better perf self.params = nn.Parameter(torch.empty(len(rg_nodes), max_components, num_output_units)) - self.mask = mask self.param_clamp_value["min"] = torch.finfo(self.params.dtype).smallest_normal self.reset_parameters() def reset_parameters(self) -> None: """Reset parameters to default initialization: U(0.01, 0.99) with normalization.""" - nn.init.uniform_(self.params, 0.01, 0.99) with torch.no_grad(): - if self.mask is not None: - self.params *= self.mask # type: ignore[misc] + nn.init.uniform_(self.params, 0.01, 0.99) self.params /= self.params.sum(dim=1, keepdim=True) # type: ignore[misc] def _forward_linear(self, x: Tensor) -> Tensor: - if self.mask is not None: - torch.einsum("pibo,pio->pbo", x, self.params * self.mask) return torch.einsum("pibo,pio->pbo", x, self.params) # TODO: make forward return something diff --git a/cirkit/models/tensorized_circuit.py b/cirkit/models/tensorized_circuit.py index 23f70dc2..6bad92d3 100644 --- a/cirkit/models/tensorized_circuit.py +++ b/cirkit/models/tensorized_circuit.py @@ -160,7 +160,9 @@ def _build_layers( # type: ignore[misc] region_indices.append(base_layer_idx[layer_id] + fold_idx) if len(regions) < max_num_input_regions: should_pad = True - region_indices.extend([-1] * (max_num_input_regions - len(regions))) + region_indices.extend( + [cumulative_idx[-1]] * (max_num_input_regions - len(regions)) + ) input_region_indices.append(region_indices) book_entry = (should_pad, unique_layer_ids, torch.tensor(input_region_indices)) self.bookkeeping.append(book_entry) @@ -194,29 +196,21 @@ def _build_layers( # type: ignore[misc] # Same as above, construct indices and update dictionaries should_pad = False - params_mask: Optional[Tensor] = None input_partition_indices = [] for i, region in enumerate(non_unary_regions): num_input_partitions = len(region.inputs) partition_indices = region_mixing_indices[region.get_id()] if max_num_input_partitions > num_input_partitions: should_pad = True - if params_mask is None: - params_mask = torch.ones( - len(non_unary_regions), max_num_input_partitions, num_outputs - ) - params_mask[:, i, num_input_partitions:] = 0 partition_indices.extend( - [-1] * (max_num_input_partitions - num_input_partitions) + [num_folds[-1]] * (max_num_input_partitions - num_input_partitions) ) input_partition_indices.append(partition_indices) region_id_fold[region.get_id()] = (i, len(self.inner_layers) + 1) num_folds.append(len(non_unary_regions)) # Build the actual mixing layer - mixing_layer = MixingLayer( - non_unary_regions, num_outputs, max_num_input_partitions, mask=params_mask - ) + mixing_layer = MixingLayer(non_unary_regions, num_outputs, max_num_input_partitions) self.bookkeeping.append( (should_pad, [len(self.inner_layers)], torch.tensor(input_partition_indices)) ) @@ -298,30 +292,28 @@ def forward(self, x: Tensor) -> Tensor: """ in_outputs = self.input_layer(x) in_outputs = in_outputs.permute(2, 0, 1) - outputs: List[Tensor] = [in_outputs] + layer_outputs: List[Tensor] = [in_outputs] - # TODO: Generalize if statements here, they should be layer agnostic for layer, (should_pad, in_layer_ids, fold_idx) in zip(self.inner_layers, self.bookkeeping): - if isinstance(layer, SumProductLayer): # type: ignore[misc] - # (fold_1 + ... + fold_n, batch_size, units) - inputs = torch.cat([outputs[i] for i in in_layer_ids], dim=0) - if should_pad: - inputs = F.pad(inputs, [0, 0, 0, 0, 1, 0], value=-np.inf) - # (fold_k, arity, batch_size, units) - inputs = inputs[fold_idx] - output = layer(inputs) - elif isinstance(layer, MixingLayer): + # (fold_1 + ... + fold_n, batch_size, units) + if len(in_layer_ids) == 1: (in_layer_id,) = in_layer_ids - inputs = outputs[in_layer_id] - if should_pad: - inputs = F.pad(inputs, [0, 0, 0, 0, 1, 0], value=-np.inf) - inputs = inputs[fold_idx] - output = layer(inputs) + inputs = layer_outputs[in_layer_id] else: - assert False - outputs.append(output) - - return outputs[-1][0] + inputs = torch.cat([layer_outputs[i] for i in in_layer_ids], dim=0) + if should_pad: + if isinstance(layer, SumProductLayer): # type: ignore[misc] + pad_value = 0.0 + else: + pad_value = -np.inf + inputs = F.pad(inputs, [0, 0, 0, 0, 1, 0], value=pad_value) + # inputs: (fold, arity, batch_size, units) + inputs = inputs[fold_idx] + # outputs: (fold, batch_size, units) + outputs = layer(inputs) + layer_outputs.append(outputs) + + return layer_outputs[-1][0] # TODO: and what's the meaning of this? # def backtrack(self, num_samples=1, class_idx=0, x=None, mode='sampling', **kwargs): diff --git a/tests/models/pcs/test_tensorized_pc.py b/tests/models/pcs/test_tensorized_pc.py index f377fcd8..dfefd631 100644 --- a/tests/models/pcs/test_tensorized_pc.py +++ b/tests/models/pcs/test_tensorized_pc.py @@ -159,9 +159,13 @@ def test_einet_partition_func() -> None: @pytest.mark.parametrize( # type: ignore[misc] "rg_cls,kwargs,log_answer", [ - (PoonDomingos, {"shape": [4, 4], "delta": 2}, 10.188161849975586), - (QuadTree, {"width": 4, "height": 4, "struct_decomp": False}, 51.31766128540039), - (RandomBinaryTree, {"num_vars": 16, "depth": 3, "num_repetitions": 2}, 24.198360443115234), + (PoonDomingos, {"shape": [4, 4], "delta": 2}, None), # 10.188161849975586 + (QuadTree, {"width": 4, "height": 4, "struct_decomp": False}, None), # 51.31766128540039 + ( + RandomBinaryTree, + {"num_vars": 16, "depth": 3, "num_repetitions": 2}, + None, + ), # 24.198360443115234 (PoonDomingos, {"shape": [3, 3], "delta": 2}, None), (QuadTree, {"width": 3, "height": 3, "struct_decomp": False}, None), (QuadTree, {"width": 3, "height": 3, "struct_decomp": True}, None), From 7fb7359328f82aff046b8b83c12c3e7cbe062848 Mon Sep 17 00:00:00 2001 From: loreloc Date: Sun, 9 Jul 2023 08:15:26 +0100 Subject: [PATCH 5/7] tests passing --- tests/models/pcs/test_tensorized_pc.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/models/pcs/test_tensorized_pc.py b/tests/models/pcs/test_tensorized_pc.py index dfefd631..3091a55c 100644 --- a/tests/models/pcs/test_tensorized_pc.py +++ b/tests/models/pcs/test_tensorized_pc.py @@ -159,13 +159,9 @@ def test_einet_partition_func() -> None: @pytest.mark.parametrize( # type: ignore[misc] "rg_cls,kwargs,log_answer", [ - (PoonDomingos, {"shape": [4, 4], "delta": 2}, None), # 10.188161849975586 - (QuadTree, {"width": 4, "height": 4, "struct_decomp": False}, None), # 51.31766128540039 - ( - RandomBinaryTree, - {"num_vars": 16, "depth": 3, "num_repetitions": 2}, - None, - ), # 24.198360443115234 + (PoonDomingos, {"shape": [4, 4], "delta": 2}, 10.246478080749512), + (QuadTree, {"width": 4, "height": 4, "struct_decomp": False}, 51.94971466064453), + (RandomBinaryTree, {"num_vars": 16, "depth": 3, "num_repetitions": 2}, 24.000484466552734), (PoonDomingos, {"shape": [3, 3], "delta": 2}, None), (QuadTree, {"width": 3, "height": 3, "struct_decomp": False}, None), (QuadTree, {"width": 3, "height": 3, "struct_decomp": True}, None), @@ -228,4 +224,5 @@ def test_einet_partition_function( assert torch.isclose(einet.partition_function(), sum_out, rtol=1e-6, atol=0) if log_answer is not None: + print(sum_out.item()) assert torch.isclose(sum_out, torch.tensor(log_answer), rtol=1e-6, atol=0) From 819572a68aa4bd8e74b305387a6c69b8ae111eb8 Mon Sep 17 00:00:00 2001 From: loreloc Date: Sun, 9 Jul 2023 09:56:51 +0100 Subject: [PATCH 6/7] fix comment --- cirkit/models/tensorized_circuit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cirkit/models/tensorized_circuit.py b/cirkit/models/tensorized_circuit.py index 0f26246c..abb76ac8 100644 --- a/cirkit/models/tensorized_circuit.py +++ b/cirkit/models/tensorized_circuit.py @@ -294,7 +294,7 @@ def forward(self, x: Tensor) -> Tensor: layer_outputs: List[Tensor] = [in_outputs] for layer, (should_pad, in_layer_ids, fold_idx) in zip(self.inner_layers, self.bookkeeping): - # (fold_1 + ... + fold_n, batch_size, units) + # (fold_1 + ... + fold_n, units, batch_size) if len(in_layer_ids) == 1: (in_layer_id,) = in_layer_ids inputs = layer_outputs[in_layer_id] From 1ed21c4dbf0372a4740c81592d57d249fb0252ee Mon Sep 17 00:00:00 2001 From: loreloc Date: Tue, 11 Jul 2023 12:42:18 +0100 Subject: [PATCH 7/7] removed unused fold_count attribute from layers --- cirkit/layers/exp_family/exp_family.py | 1 - cirkit/layers/layer.py | 1 - cirkit/layers/mixing.py | 2 +- cirkit/layers/sum_product/base.py | 3 +-- 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/cirkit/layers/exp_family/exp_family.py b/cirkit/layers/exp_family/exp_family.py index 5e51a41b..2346f19b 100644 --- a/cirkit/layers/exp_family/exp_family.py +++ b/cirkit/layers/exp_family/exp_family.py @@ -60,7 +60,6 @@ def __init__( self.num_dims = num_dims self.num_units = num_units self.num_stats = num_stats - self.fold_count = len(rg_nodes) replica_indices = set(n.get_replica_idx() for n in self.rg_nodes) num_replica = len(replica_indices) diff --git a/cirkit/layers/layer.py b/cirkit/layers/layer.py index 8abfdb68..cf854b8d 100644 --- a/cirkit/layers/layer.py +++ b/cirkit/layers/layer.py @@ -22,7 +22,6 @@ def __init__(self) -> None: """Init class.""" super().__init__() # TODO: do we need multi-inherit init? self.param_clamp_value: _ClampValue = {} - self.fold_count = 0 @abstractmethod def reset_parameters(self) -> None: diff --git a/cirkit/layers/mixing.py b/cirkit/layers/mixing.py index f95a4dfa..71d57605 100644 --- a/cirkit/layers/mixing.py +++ b/cirkit/layers/mixing.py @@ -60,7 +60,7 @@ def __init__(self, rg_nodes: List[RegionNode], num_output_units: int, max_compon max_components (int): Max number of mixing components. """ super().__init__() - self.fold_count = len(rg_nodes) + self.rg_nodes = rg_nodes # TODO: what need to be saved to self? self.num_output_units = num_output_units diff --git a/cirkit/layers/sum_product/base.py b/cirkit/layers/sum_product/base.py index 884e5961..0d2c85e4 100644 --- a/cirkit/layers/sum_product/base.py +++ b/cirkit/layers/sum_product/base.py @@ -32,8 +32,7 @@ def __init__( # type: ignore[misc] kwargs (Any): Passed to subclasses. """ super().__init__() - self.fold_count = len(rg_nodes) - + self.rg_nodes = rg_nodes self.num_input_units = num_input_units self.num_output_units = num_output_units