Skip to content

Commit

Permalink
Deploying to gh-pages from @ 6309445 🚀
Browse files Browse the repository at this point in the history
  • Loading branch information
asogaard committed Sep 30, 2024
1 parent ed397af commit f84a83a
Show file tree
Hide file tree
Showing 60 changed files with 4,696 additions and 187 deletions.
34 changes: 33 additions & 1 deletion _modules/graphnet/data/datamodule.html
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,39 @@ <h1 id="modules-graphnet-data-datamodule--page-root">Source code for graphnet.da
<span class="s2">"Unknown dataset encountered during dataloader creation."</span>
<span class="p">)</span>

<span class="k">if</span> <span class="s2">"sampler"</span> <span class="ow">in</span> <span class="n">dataloader_args</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="c1"># If there were no kwargs provided, set it to empty dict</span>
<span class="k">if</span> <span class="s2">"sampler_kwargs"</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">dataloader_args</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="n">dataloader_args</span><span class="p">[</span><span class="s2">"sampler_kwargs"</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<span class="n">dataloader_args</span><span class="p">[</span><span class="s2">"sampler"</span><span class="p">]</span> <span class="o">=</span> <span class="n">dataloader_args</span><span class="p">[</span><span class="s2">"sampler"</span><span class="p">](</span>
<span class="n">dataset</span><span class="p">,</span> <span class="o">**</span><span class="n">dataloader_args</span><span class="p">[</span><span class="s2">"sampler_kwargs"</span><span class="p">]</span>
<span class="p">)</span>
<span class="k">del</span> <span class="n">dataloader_args</span><span class="p">[</span><span class="s2">"sampler_kwargs"</span><span class="p">]</span>

<span class="k">if</span> <span class="s2">"batch_sampler"</span> <span class="ow">in</span> <span class="n">dataloader_args</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="k">if</span> <span class="s2">"sampler"</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">dataloader_args</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span>
<span class="s2">"When specifying a `batch_sampler`,"</span>
<span class="s2">"you must also provide `sampler`."</span>
<span class="p">)</span>
<span class="c1"># If there were no kwargs provided, set it to empty dict</span>
<span class="k">if</span> <span class="s2">"batch_sampler_kwargs"</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">dataloader_args</span><span class="o">.</span><span class="n">keys</span><span class="p">():</span>
<span class="n">dataloader_args</span><span class="p">[</span><span class="s2">"batch_sampler_kwargs"</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>

<span class="n">batch_sampler</span> <span class="o">=</span> <span class="n">dataloader_args</span><span class="p">[</span><span class="s2">"batch_sampler"</span><span class="p">](</span>
<span class="n">dataloader_args</span><span class="p">[</span><span class="s2">"sampler"</span><span class="p">],</span>
<span class="o">**</span><span class="n">dataloader_args</span><span class="p">[</span><span class="s2">"batch_sampler_kwargs"</span><span class="p">],</span>
<span class="p">)</span>
<span class="n">dataloader_args</span><span class="p">[</span><span class="s2">"batch_sampler"</span><span class="p">]</span> <span class="o">=</span> <span class="n">batch_sampler</span>
<span class="c1"># Remove extra keys</span>
<span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="p">[</span>
<span class="s2">"batch_sampler_kwargs"</span><span class="p">,</span>
<span class="s2">"drop_last"</span><span class="p">,</span>
<span class="s2">"sampler"</span><span class="p">,</span>
<span class="s2">"shuffle"</span><span class="p">,</span>
<span class="p">]:</span>
<span class="n">dataloader_args</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>

<span class="k">if</span> <span class="n">dataloader_args</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="s2">"Dataloader arguments not provided."</span><span class="p">)</span>

Expand Down Expand Up @@ -848,7 +881,6 @@ <h1 id="modules-graphnet-data-datamodule--page-root">Source code for graphnet.da
<span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">frac</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">replace</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_rng</span><span class="p">)</span>
<span class="o">.</span><span class="n">values</span><span class="o">.</span><span class="n">tolist</span><span class="p">()</span>
<span class="p">)</span> <span class="c1"># shuffled list</span>

<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_split_selection</span><span class="p">(</span><span class="n">all_events</span><span class="p">)</span>

<span class="k">def</span> <span class="nf">_construct_dataset</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tmp_args</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Dataset</span><span class="p">:</span>
Expand Down
27 changes: 16 additions & 11 deletions _modules/graphnet/data/dataset/parquet/parquet_dataset.html
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ <h1 id="modules-graphnet-data-dataset-parquet-parquet-dataset--page-root">Source
<span class="sd"> `"10000 random events ~ event_no % 5 &gt; 0"` or `"20% random</span>
<span class="sd"> events ~ event_no % 5 &gt; 0"`).</span>
<span class="sd"> graph_definition: Method that defines the graph representation.</span>
<span class="sd"> cache_size: Number of batches to cache in memory.</span>
<span class="sd"> cache_size: Number of files to cache in memory.</span>
<span class="sd"> Must be at least 1. Defaults to 1.</span>
<span class="sd"> labels: Dictionary of labels to be added to the dataset.</span>
<span class="sd"> """</span>
Expand Down Expand Up @@ -484,8 +484,8 @@ <h1 id="modules-graphnet-data-dataset-parquet-parquet-dataset--page-root">Source
<span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_path</span>
<span class="c1"># Member Variables</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_cache_size</span> <span class="o">=</span> <span class="n">cache_size</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_batch_sizes</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_calculate_sizes</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_batch_cumsum</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">cumsum</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_batch_sizes</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_chunk_sizes</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_calculate_sizes</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_chunk_cumsum</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">cumsum</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_chunk_sizes</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_file_cache</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_initialize_file_cache</span><span class="p">(</span>
<span class="n">truth_table</span><span class="o">=</span><span class="n">truth_table</span><span class="p">,</span>
<span class="n">node_truth_table</span><span class="o">=</span><span class="n">node_truth_table</span><span class="p">,</span>
Expand Down Expand Up @@ -540,32 +540,37 @@ <h1 id="modules-graphnet-data-dataset-parquet-parquet-dataset--page-root">Source
<span class="p">)</span>
<span class="k">return</span> <span class="n">event_index</span>

<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">chunk_sizes</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">"""Return a list of the chunk sizes."""</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_chunk_sizes</span>

<span class="k">def</span> <span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="w"> </span><span class="sd">"""Return length of dataset, i.e. number of training examples."""</span>
<span class="k">return</span> <span class="nb">sum</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_batch_sizes</span><span class="p">)</span>
<span class="k">return</span> <span class="nb">sum</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_chunk_sizes</span><span class="p">)</span>

<span class="k">def</span> <span class="nf">_get_all_indices</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">"""Return a list of all unique values in `self._index_column`."""</span>
<span class="n">files</span> <span class="o">=</span> <span class="n">glob</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_truth_table</span><span class="p">,</span> <span class="s2">"*.parquet"</span><span class="p">))</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">files</span><span class="p">),</span> <span class="mi">1</span><span class="p">)</span>

<span class="k">def</span> <span class="nf">_calculate_sizes</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">"""Calculate the number of events in each batch."""</span>
<span class="w"> </span><span class="sd">"""Calculate the number of events in each chunk."""</span>
<span class="n">sizes</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">batch_id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_indices</span><span class="p">:</span>
<span class="k">for</span> <span class="n">chunk_id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_indices</span><span class="p">:</span>
<span class="n">path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_path</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_truth_table</span><span class="p">,</span>
<span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">truth_table</span><span class="si">}</span><span class="s2">_</span><span class="si">{</span><span class="n">batch_id</span><span class="si">}</span><span class="s2">.parquet"</span><span class="p">,</span>
<span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">truth_table</span><span class="si">}</span><span class="s2">_</span><span class="si">{</span><span class="n">chunk_id</span><span class="si">}</span><span class="s2">.parquet"</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">sizes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">pol</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="n">path</span><span class="p">)))</span>
<span class="k">return</span> <span class="n">sizes</span>

<span class="k">def</span> <span class="nf">_get_row_idx</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sequential_index</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="w"> </span><span class="sd">"""Return the row index corresponding to a `sequential_index`."""</span>
<span class="n">file_idx</span> <span class="o">=</span> <span class="n">bisect_right</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_batch_cumsum</span><span class="p">,</span> <span class="n">sequential_index</span><span class="p">)</span>
<span class="n">file_idx</span> <span class="o">=</span> <span class="n">bisect_right</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_chunk_cumsum</span><span class="p">,</span> <span class="n">sequential_index</span><span class="p">)</span>
<span class="k">if</span> <span class="n">file_idx</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">idx</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">sequential_index</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">_batch_cumsum</span><span class="p">[</span><span class="n">file_idx</span> <span class="o">-</span> <span class="mi">1</span><span class="p">])</span>
<span class="n">idx</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">sequential_index</span> <span class="o">-</span> <span class="bp">self</span><span class="o">.</span><span class="n">_chunk_cumsum</span><span class="p">[</span><span class="n">file_idx</span> <span class="o">-</span> <span class="mi">1</span><span class="p">])</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">idx</span> <span class="o">=</span> <span class="n">sequential_index</span>
<span class="k">return</span> <span class="n">idx</span>
Expand Down Expand Up @@ -604,9 +609,9 @@ <h1 id="modules-graphnet-data-dataset-parquet-parquet-dataset--page-root">Source
<span class="n">columns</span> <span class="o">=</span> <span class="p">[</span><span class="n">columns</span><span class="p">]</span>

<span class="k">if</span> <span class="n">sequential_index</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">file_idx</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_batch_cumsum</span><span class="p">),</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">file_idx</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_chunk_cumsum</span><span class="p">),</span> <span class="mi">1</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">file_idx</span> <span class="o">=</span> <span class="p">[</span><span class="n">bisect_right</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_batch_cumsum</span><span class="p">,</span> <span class="n">sequential_index</span><span class="p">)]</span>
<span class="n">file_idx</span> <span class="o">=</span> <span class="p">[</span><span class="n">bisect_right</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_chunk_cumsum</span><span class="p">,</span> <span class="n">sequential_index</span><span class="p">)]</span>

<span class="n">file_indices</span> <span class="o">=</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_indices</span><span class="p">[</span><span class="n">idx</span><span class="p">]</span> <span class="k">for</span> <span class="n">idx</span> <span class="ow">in</span> <span class="n">file_idx</span><span class="p">]</span>

Expand Down
Loading

0 comments on commit f84a83a

Please sign in to comment.