Skip to content

Commit

Permalink
version 2.3
Browse files Browse the repository at this point in the history
  • Loading branch information
Anwar-Said committed Nov 8, 2023
1 parent 2dc52bb commit ecb8703
Show file tree
Hide file tree
Showing 33 changed files with 597 additions and 427 deletions.
101 changes: 63 additions & 38 deletions NeuroGraph/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@
from typing import Callable, List, Optional

import torch
import zipfile

from torch_geometric.data import (
Data,
InMemoryDataset,
download_url,
extract_zip,
extract_zip
)

class NeuroGraphStatic(InMemoryDataset):
class NeuroGraphDataset(InMemoryDataset):
r"""The NeuroGraph benchmark datasets from the
`"NeuroGraph: Benchmarks for Graph Machine Learning in Brain Connectomics"
<https://arxiv.org/abs/2306.06202>`_ paper.
:class:`NeuroGraphStatic` holds a collection of five neuroimaging graph
:class:`NeuroGraphDataset` holds a collection of five neuroimaging graph
learning datasets that span multiple categories of demographics, mental
states, and cognitive traits.
See the `documentation
Expand All @@ -26,22 +26,22 @@ class NeuroGraphStatic(InMemoryDataset):
+--------------------+---------+----------------------+
| Dataset | #Graphs | Task |
+====================+=========+======================+
| :obj:`HCP-State` | 7,443 | Graph Classification |
| :obj:`HCPActivity` | 7,443 | Graph Classification |
+--------------------+---------+----------------------+
| :obj:`HCP-Gender` | 1,078 | Graph Classification |
| :obj:`HCPGender` | 1,078 | Graph Classification |
+--------------------+---------+----------------------+
| :obj:`HCP-Age` | 1,065 | Graph Classification |
| :obj:`HCPAge` | 1,065 | Graph Classification |
+--------------------+---------+----------------------+
| :obj:`HCP-FI` | 1,071 | Graph Regression |
| :obj:`HCPFI` | 1,071 | Graph Regression |
+--------------------+---------+----------------------+
| :obj:`HCP-WM` | 1,078 | Graph Regression |
| :obj:`HCPWM` | 1,078 | Graph Regression |
+--------------------+---------+----------------------+
Args:
root (str): Root directory where the dataset should be saved.
name (str): The name of the dataset (one of :obj:`"HCPGender"`,
:obj:`"HCP-State"`, :obj:`"HCP-Age"`, :obj:`"HCP-FI"`,
:obj:`"HCP-WM"`).
:obj:`"HCPActivity"`, :obj:`"HCPAge"`, :obj:`"HCPFI"`,
:obj:`"HCPWM"`).
transform (callable, optional): A function/transform that takes in an
:obj:`torch_geometric.data.Data` object and returns a transformed
version. The data object will be transformed before every access.
Expand All @@ -57,11 +57,11 @@ class NeuroGraphStatic(InMemoryDataset):
"""
url = 'https://vanderbilt.box.com/shared/static'
filenames = {
'HCP-Gender': 'r6hlz2arm7yiy6v6981cv2nzq3b0meax.zip',
'HCP-State': 'b4g59ibn8itegr0rpcd16m9ajb2qyddf.zip',
'HCP-Age': 'static/lzzks4472czy9f9vc8aikp7pdbknmtfe.zip',
'HCP-WM': 'xtmpa6712fidi94x6kevpsddf9skuoxy.zip',
'HCP-FI': 'g2md9h9snh7jh6eeay02k1kr9m4ido9f.zip',
'HCPGender': 'r6hlz2arm7yiy6v6981cv2nzq3b0meax.zip',
'HCPActivity': 'b4g59ibn8itegr0rpcd16m9ajb2qyddf.zip',
'HCPAge': 'static/lzzks4472czy9f9vc8aikp7pdbknmtfe.zip',
'HCPWM': 'xtmpa6712fidi94x6kevpsddf9skuoxy.zip',
'HCPFI': 'g2md9h9snh7jh6eeay02k1kr9m4ido9f.zip',
}

def __init__(
Expand All @@ -76,7 +76,7 @@ def __init__(
self.name = name

super().__init__(root, transform, pre_transform, pre_filter)
self.load(self.processed_paths[0])
self.data, self.slices = torch.load(self.processed_paths[0])

@property
def raw_dir(self) -> str:
Expand Down Expand Up @@ -124,8 +124,8 @@ def process(self):
sample = self.pre_transform(sample)

data_list.append(sample)

self.save(data_list, self.processed_paths[0])
data, slices = self.collate(data_list)
torch.save((data,slices), self.processed_paths[0])

class NeuroGraphDynamic():
r"""Graph-based neuroimaging benchmark datasets, e.g.,
Expand All @@ -139,31 +139,56 @@ class NeuroGraphDynamic():
Returns:
list: A list of graphs in PyTorch Geometric (pyg) format. Each graph contains a list of dynamic graphs batched in pyg batch.
"""
url = 'https://vanderbilt.box.com/shared/static'
filenames = {
'DynHCPGender': 'mj0z6unea34lfz1hkdwsinj7g22yohxn.zip',
'DynHCPActivity': '2so3fnfqakeu6hktz322o3nm2c8ocus7.zip',
'DynHCPAge': '195f9teg4t4apn6kl6hbc4ib4g9addtq.zip',
'DynHCPWM': 'mxy8fq3ghm60q6h7uhnu80pgvfxs6xo2.zip',
'DynHCPFI': 'un7w3ohb2mmyjqt1ou2wm3g87y1lfuuo.zip',
}
def __init__(self,root, name):
self.root = root
self.name = name
self.urls = {"DynHCP-Gender":'https://vanderbilt.box.com/shared/static/mj0z6unea34lfz1hkdwsinj7g22yohxn.zip',
"DynHCP-State":'https://vanderbilt.box.com/shared/static/2so3fnfqakeu6hktz322o3nm2c8ocus7.zip',
"DynHCP-Age":'https://vanderbilt.box.com/shared/static/195f9teg4t4apn6kl6hbc4ib4g9addtq.zip',
"DynHCP-WM":'https://vanderbilt.box.com/shared/static/mxy8fq3ghm60q6h7uhnu80pgvfxs6xo2.zip',
"DynHCP-FI":'https://vanderbilt.box.com/shared/static/un7w3ohb2mmyjqt1ou2wm3g87y1lfuuo.zip'
}
if self.urls.get(name):
self.download(self.urls.get(name))
else:
print('dataset not found! The name of the datasets are: "DynHCPGender","DynHCPActivity","DynHCPAge","DynHCPWM","DynHCPFI"')


# self.urls = {"DynHCP-Gender":'https://vanderbilt.box.com/shared/static/mj0z6unea34lfz1hkdwsinj7g22yohxn.zip',
# "DynHCP-State":'https://vanderbilt.box.com/shared/static/2so3fnfqakeu6hktz322o3nm2c8ocus7.zip',
# "DynHCP-Age":'https://vanderbilt.box.com/shared/static/195f9teg4t4apn6kl6hbc4ib4g9addtq.zip',
# "DynHCP-WM":'https://vanderbilt.box.com/shared/static/mxy8fq3ghm60q6h7uhnu80pgvfxs6xo2.zip',
# "DynHCP-FI":'https://vanderbilt.box.com/shared/static/un7w3ohb2mmyjqt1ou2wm3g87y1lfuuo.zip'
# }
assert name in self.filenames.keys()
self.name = name
file_path = os.path.join(self.root,self.name,'processed', self.name+".pt")
if not os.path.exists(file_path):
self.download()
# else:
# print('dataset not found! The name of the datasets are: "DynHCP-Gender","DynHCP-Activity","DynHCP-Age","DynHCP-WM","DynHCP-FI"')
self.dataset, self.labels = self.load_data()

def download(self,url):

download_url(url, os.path.join(self.root, self.name))
basename = os.path.basename(url)
with zipfile.ZipFile(os.path.join(self.root,self.name,basename), 'r') as file:
file.extractall(os.path.join(self.root,self.name,os.path.dirname(basename)))
# def download(self,url):
# download_url(url, os.path.join(self.root, self.name))
# basename = os.path.basename(url)
# with zipfile.ZipFile(os.path.join(self.root,self.name,basename), 'r') as file:
# file.extractall(os.path.join(self.root,self.name,os.path.dirname(basename)))
# self.remove(os.path.join(self.raw_dir,basename))


def download(self):
url = f'{self.url}/{self.filenames[self.name]}'
path = download_url(url, os.path.join(self.root, self.name))
extract_zip(path, self.root)
os.unlink(path)
# os.rename(
# osp.join(self.raw_dir, self.name, 'processed', f'{self.name}.pt'),
# osp.join(self.raw_dir, 'data.pt'))
# shutil.rmtree(osp.join(self.raw_dir, self.name))


def load_data(self):
if self.name=='DynHCP-State':
dataset_raw = torch.load(os.path.join(self.root,self.name,self.name,"processed", self.name+".pt"))
if self.name=='DynHCPActivity':
dataset_raw = torch.load(os.path.join(self.root,self.name,'processed', self.name+".pt"))
dataset,labels = [],[]
for v in dataset_raw:
batches = v.get('batches')
Expand All @@ -173,7 +198,7 @@ def load_data(self):
dataset.append(b)
labels.append(y)
else:
dataset = torch.load(os.path.join(self.root,self.name,self.name,"processed", self.name+".pt"))
dataset = torch.load(os.path.join(self.root,self.name,'processed', self.name+".pt"))
labels = dataset['labels']
dataset = dataset['batches']
return dataset,labels
Expand Down
Binary file modified doc/_build/doctrees/NeuroGraph.doctree
Binary file not shown.
Binary file modified doc/_build/doctrees/datasets.doctree
Binary file not shown.
Binary file modified doc/_build/doctrees/environment.pickle
Binary file not shown.
Binary file modified doc/_build/doctrees/get_started.doctree
Binary file not shown.
Binary file modified doc/_build/doctrees/index.doctree
Binary file not shown.
Binary file modified doc/_build/doctrees/install.doctree
Binary file not shown.
Binary file modified doc/_build/doctrees/preprocess.doctree
Binary file not shown.
Binary file modified doc/_build/doctrees/utils.doctree
Binary file not shown.
2 changes: 1 addition & 1 deletion doc/_build/html/.buildinfo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 7313becfcf9696e10c17e4e0753ff7df
config: c0e52ab7b160affdaa25d3a6a797e2b5
tags: 645f666f9bcd5a90fca523b33c5a78b7
14 changes: 6 additions & 8 deletions doc/_build/html/NeuroGraph.html
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@
<script src="_static/js/html5shiv.min.js"></script>
<![endif]-->

<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
<script src="_static/jquery.js"></script>
<script src="_static/underscore.js"></script>
<script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
<script src="_static/doctools.js"></script>
<script src="_static/documentation_options.js?v=20623aea"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="_static/js/theme.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
Expand Down Expand Up @@ -97,16 +95,16 @@

<p>NeuroGraph is a collection of graph-based neuroimaging datasets that span multiple categories of demographics, mental states and cognitive traits. The following provides an overview of these categories and their associated datasets.</p>
<section id="demographics">
<h1>Demographics<a class="headerlink" href="#demographics" title="Permalink to this heading"></a></h1>
<h1>Demographics<a class="headerlink" href="#demographics" title="Link to this heading"></a></h1>
<p>Demographics category includes gender and age estimation. The gender attribute facilitates a binary classification with the categories being male and female. Age is categorized into three distinct groups as in: 22-25, 26-30, and 31-35 years. We introduce four datasets named: HCP-Gender, HCP-Age, DynHCP-Gender, and DynHCP-Age under this category. The first two are
static graph datasets while the last two are the corresponding dynamic graph datasets.</p>
</section>
<section id="mental-states">
<h1>Mental States<a class="headerlink" href="#mental-states" title="Permalink to this heading"></a></h1>
<h1>Mental States<a class="headerlink" href="#mental-states" title="Link to this heading"></a></h1>
<p>The mental state decoding involves seven tasks: Emotion Processing, Gambling, Language, Motor, Relational Processing, Social Cognition, and Working Memory. Each task is designed to help delineate a core set of functions relevant to different facets of the relation between human brain, cognition and behavior. Under this category, we present two datasets: HCP-Activity, a static representation, and DynHCP-Activity, its dynamic counterpart.</p>
</section>
<section id="cognitive-traits">
<h1>Cognitive Traits<a class="headerlink" href="#cognitive-traits" title="Permalink to this heading"></a></h1>
<h1>Cognitive Traits<a class="headerlink" href="#cognitive-traits" title="Link to this heading"></a></h1>
<p>The cognitive traits category of our dataset comprises two significant traits: working memory (List Sorting) and fluid intelligence evaluation with PMAT24. Working memory refers to an individual’s capacity to temporarily hold and manipulate information, a crucial aspect that influences higher cognitive functions such as reasoning, comprehension, and learning. Fluid intelligence represents the ability to solve novel problems, independent of any knowledge from the past. It demonstrates the capacity to analyze complex relationships, identify patterns, and derive solutions in dynamic situations. The prediction of both these traits, quantified as continuous variables in our dataset, are treated as regression problem. We aim to predict
the performance or scores related to these cognitive traits based on the functional connectome graphs. We generate four datasets under cognitive traits: HCP Fluid Intelligence (HCP-FI), HCP Working Memory (HCP-WM), DynHCP-FI and DynHCP-WM.</p>
</section>
Expand Down
Loading

0 comments on commit ecb8703

Please sign in to comment.