-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdatasetManagement.js
172 lines (137 loc) Β· 5.88 KB
/
datasetManagement.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import * as path from 'path';
import DDFCsvReader from "@vizabi/reader-ddfcsv";
import {resultTransformer} from "./resultTransformer.js";
import {getRepoBranchCommitMapping} from "./getRepoBranchCommitMapping.js";
import {
getLocalBranchCommitMapping,
checkFilesOnDisk,
updateFilesOnDisk,
cleanupAllDirectories
} from "./updateFilesOnDisk.js";
import { updateAllowedDatasets, allowedDatasets } from "./allowedDatasets.js";
import Log from "./logger.js"
const rootPath = path.resolve("./datasets/");
const dummyReaderInstance = (new DDFCsvReader.getDDFCsvReaderObject()).init({
path: path.join(rootPath, 'ddf--gapminder--fasttrack', 'master'),
resultTransformer,
})
/**
* Dataset version reader instances used to serve the data
* Filled out during GitHub metadata / dataset refresh
* @type {{[slug]: {[branch]: DDFCsvReader}}}
*/
export const datasetVersionReaderInstances = {
'slug-dummy': {
'branch-dummy': dummyReaderInstance
},
}
export const datasetBranchCommitMapping = {}
export const syncStatus = {ongoing: false, events: []};
export function getBranchFromCommit(datasetSlug, commit) {
const branchCommitMapping = datasetBranchCommitMapping[datasetSlug];
for (let [branch, mappedCommit] of Object.entries(branchCommitMapping)) {
if (mappedCommit === commit || mappedCommit.substr(0,7) === commit) {
return branch;
}
}
return undefined;
}
export function getAllowedDatasetEntryFromSlug(datasetSlug) {
const dataset = allowedDatasets.find(f => f.slug === datasetSlug);
return dataset || false;
}
export function getDefaultCommit(datasetSlug){
const defaultBranch = getDefaultBranch(datasetSlug);
const branchCommitMapping = datasetBranchCommitMapping[datasetSlug];
return defaultBranch && branchCommitMapping ? branchCommitMapping[ defaultBranch ] : false;
}
export function getDefaultBranch(datasetSlug){
const dataset = getAllowedDatasetEntryFromSlug(datasetSlug);
return dataset ? dataset.default_branch || dataset.branches[0] : false;
}
export function updateSyncStatus(comment, addnew) {
if (!addnew && syncStatus.events.length > 0)
syncStatus.events[syncStatus.events.length - 1] = {timestamp: new Date().valueOf(), comment};
else
syncStatus.events.push({timestamp: new Date().valueOf(), comment});
Log.info(comment);
}
export function syncDatasetsIfNotAlreadySyncing(datasetSlug) {
if (syncStatus.ongoing) return syncStatus;
syncStatus.ongoing = true;
syncStatus.events = [];
const syncFunction = datasetSlug ? syncDataset : syncAllDatasets;
syncFunction(datasetSlug).finally(() => {
syncStatus.ongoing = false;
});
return syncStatus;
}
async function syncAllDatasets(){
updateSyncStatus("π Received a request to sync ALL datasets", true);
await updateAllowedDatasets();
const datasetListString = allowedDatasets.length > 0 ? allowedDatasets.map(m => m.slug).join(", ") : "";
updateSyncStatus(`Got info about ${allowedDatasets.length} datasets: ${datasetListString}`);
cleanupAllDirectories(rootPath, allowedDatasets);
for (const dataset of allowedDatasets)
await syncDataset(dataset.slug);
updateSyncStatus(`π’ Sync complete for ${allowedDatasets.length} datasets: ${datasetListString}`);
}
async function syncDataset(datasetSlug){
try {
updateSyncStatus(`π Syncing dataset with slug ${datasetSlug}`);
const dataset = getAllowedDatasetEntryFromSlug(datasetSlug);
if (!dataset) throw(`dataset not allowed`);
const branchCommitMapping = await getRepoBranchCommitMapping(dataset.id, dataset.branches);
datasetBranchCommitMapping[dataset.slug] = branchCommitMapping;
await updateFilesOnDisk(rootPath, dataset.id, branchCommitMapping, updateSyncStatus);
updateSyncStatus('Files on disk updated successfully.');
await loadReaderInstances(dataset, branchCommitMapping);
updateSyncStatus(`π’ Sync successful for dataset ${datasetSlug}`);
return "Success";
} catch (err) {
updateSyncStatus(`π΄ Error syncing dataset ${datasetSlug}: ${err}`);
}
}
export async function loadAllDatasets() {
await updateAllowedDatasets();
const datasetListString = allowedDatasets.length > 0 ? allowedDatasets.map(m => m.slug).join(", ") : "";
Log.info(`Got info about ${allowedDatasets.length} datasets: ${datasetListString}`);
for (const dataset of allowedDatasets)
await loadDataset(dataset.slug);
Log.info(`
π’ Load complete! This is not the complete sync. Run /sync to do that.
`);
return `π’ Load complete for ${allowedDatasets.length} datasets: ${datasetListString}`;
}
export async function loadDataset(datasetSlug) {
Log.info(`
=== Loading dataset with slug ${datasetSlug} ===
`);
const dataset = getAllowedDatasetEntryFromSlug(datasetSlug);
if (!dataset) throw new Error(`Syncing error: Dataset not allowed: ${datasetSlug}`);
const branchCommitMapping = await getLocalBranchCommitMapping(rootPath, dataset.id, dataset.branches);
Log.info(branchCommitMapping)
datasetBranchCommitMapping[dataset.slug] = branchCommitMapping;
try {
await checkFilesOnDisk(rootPath, dataset.id, branchCommitMapping)
Log.info('Files on disk checked successfully.');
} catch (err) {
Log.error('Error checking files on disk:', err);
}
await loadReaderInstances(dataset, branchCommitMapping)
Log.info(`Sync successful for dataset ${datasetSlug}`);
return(`Sync successful for ${datasetSlug}`);
}
async function loadReaderInstances(dataset, branchCommitMapping) {
datasetVersionReaderInstances[dataset.slug] = {}
for (const [branchName, latestCommit] of Object.entries(branchCommitMapping)) {
const branchPath = path.join(rootPath, dataset.id, branchName);
const readerInstance = new DDFCsvReader.getDDFCsvReaderObject();
readerInstance.init({
path: branchPath,
resultTransformer,
});
Log.info(`Created a reader instance for ${dataset.slug}/${branchName}`)
datasetVersionReaderInstances[dataset.slug][branchName] = readerInstance
}
}