Skip to content

Commit

Permalink
feat: added generate-nano-reps script (mistakia#84)
Browse files Browse the repository at this point in the history
  • Loading branch information
mistakia authored Mar 25, 2024
1 parent c0887b9 commit 08f4523
Show file tree
Hide file tree
Showing 7 changed files with 397 additions and 59 deletions.
1 change: 1 addition & 0 deletions common/index.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import request from './request.mjs'
export * as cloudflare from './cloudflare.mjs'
export { request }
export { default as convertToCSV } from './convert-to-csv.mjs'
export { default as read_csv } from './read-csv.mjs'

const POST = (data) => ({
method: 'POST',
Expand Down
14 changes: 14 additions & 0 deletions common/read-csv.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import fs from 'fs'
import csv from 'csv-parser'

const read_csv = (filepath, options = {}) =>
new Promise((resolve, reject) => {
const results = []
fs.createReadStream(filepath)
.pipe(csv(options))
.on('data', (data) => results.push(data))
.on('error', (error) => resolve(error))
.on('end', () => resolve(results))
})

export default read_csv
198 changes: 139 additions & 59 deletions data/nano-reps.csv

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions db/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ CREATE TABLE `representatives_meta` (
`twitter` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
`discord` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
`github` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
`website` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
`email` varchar(255) CHARACTER SET utf8 DEFAULT NULL,

`timestamp` int(11) NOT NULL,
UNIQUE KEY `account` (`account`, `timestamp`)
Expand Down Expand Up @@ -230,6 +232,8 @@ CREATE TABLE `representatives_meta_index` (
`twitter` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
`discord` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
`github` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
`website` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
`email` varchar(255) CHARACTER SET utf8 DEFAULT NULL,

`timestamp` int(11) NOT NULL,
UNIQUE KEY `account` (`account`)
Expand Down
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,11 @@
"compression": "^1.7.4",
"connected-react-router": "^6.9.3",
"cors": "^2.8.5",
"csv-parser": "^3.0.0",
"csv-writer": "^1.6.0",
"dayjs": "^1.11.9",
"debug": "^4.3.4",
"deep-diff": "^1.0.2",
"deep-extend": "^0.6.0",
"esm": "^3.2.25",
"express": "^4.18.2",
Expand Down
217 changes: 217 additions & 0 deletions scripts/generate-nano-reps.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
import debug from 'debug'
// import yargs from 'yargs'
// import { hideBin } from 'yargs/helpers'
import diff from 'deep-diff'
import fetch from 'node-fetch'
import { pipeline } from 'stream'
import { promisify } from 'util'
import os from 'os'
import fs from 'fs'
import { fileURLToPath } from 'url'
import path, { dirname } from 'path'

/* eslint-disable no-unused-vars */
import db from '#db'
import { isMain, read_csv, convertToCSV } from '#common'
import { REPRESENTATIVE_TRACKING_MINIMUM_VOTING_WEIGHT } from '#constants'
/* eslint-enable no-unused-vars */

// const argv = yargs(hideBin(process.argv)).argv
const log = debug('generate-nano-reps')
debug.enable('generate-nano-reps')

const generate_nano_reps = async () => {
// Fetch nano representatives data from GitHub and save to temp file
const current_date = new Date().toISOString().split('T')[0]
const filename = `nano-reps-${current_date}.csv`
const csv_download_path = `${os.tmpdir()}/${filename}`
const stream_pipeline = promisify(pipeline)
const nano_reps_resp = await fetch(
'https://raw.githubusercontent.com/mistakia/nano-community/main/data/nano-reps.csv'
)
await stream_pipeline(
nano_reps_resp.body,
fs.createWriteStream(`${csv_download_path}`)
)
// Read the downloaded CSV data
const csv_reps = await read_csv(csv_download_path, {
mapValues: ({ header, index, value }) => {
if (value === '') {
return null
}

if (header === 'alias') {
return value.replace(/"/g, '')
}

return value
}
})

// Fetch representatives data from the database
const db_reps = await db('representatives_meta_index')
.leftJoin(
'accounts',
'representatives_meta_index.account',
'accounts.account'
)
.leftJoin(
'accounts_meta_index',
'accounts.account',
'accounts_meta_index.account'
)
.whereNot(
'representatives_meta_index.account',
'nano_1111111111111111111111111111111111111111111111111111hifc8npp'
)
.select(
'accounts.alias',
'accounts_meta_index.weight',
'representatives_meta_index.account',
'reddit',
'discord',
'twitter',
'github',
'website',
'email'
)

const results_index = {}

// Index database representatives by account
const db_reps_index = db_reps.reduce((acc, cur) => {
acc[cur.account] = cur
return acc
}, {})

// Index CSV representatives by account
const csv_reps_index = csv_reps.reduce((acc, cur) => {
acc[cur.account] = cur
return acc
}, {})

// Merge csv and database data, skip accounts with conflicts
for (const account in csv_reps_index) {
const nano_rep = csv_reps_index[account]
const db_rep = db_reps_index[account]
const db_rep_without_weight_field = { ...db_rep }
delete db_rep_without_weight_field.weight
const differences = diff(nano_rep, db_rep_without_weight_field)

// Filter for conflicting edits
const edits = differences.filter((diff) => diff.kind === 'E')
const conflicting_edits = edits.filter(
(edit) => Boolean(edit.lhs) && Boolean(edit.rhs)
)

if (conflicting_edits.length) {
log(`conflicting edits for account: ${account}`)
log(conflicting_edits)
continue
}

// Merge values, preferring truthy values
const merged_rep = {}
for (const key in merged_rep) {
if (merged_rep[key] === null) {
merged_rep[key] = nano_rep[key] || db_rep_without_weight_field[key]
}
}

results_index[account] = {
alias: nano_rep.alias || db_rep_without_weight_field.alias,
account: nano_rep.account || db_rep_without_weight_field.account,
discord: nano_rep.discord || db_rep_without_weight_field.discord || '',
reddit: nano_rep.reddit || db_rep_without_weight_field.reddit || '',
twitter: nano_rep.twitter || db_rep_without_weight_field.twitter || '',
website: nano_rep.website || db_rep_without_weight_field.website || '',
email: nano_rep.email || db_rep_without_weight_field.email || '',
github: nano_rep.github || db_rep_without_weight_field.github || ''
}
}

// Add missing representatives with sufficient voting weight (10,000 Nano) and at least one non-null field
for (const account in db_reps_index) {
if (
!results_index[account] &&
db_reps_index[account].weight >
REPRESENTATIVE_TRACKING_MINIMUM_VOTING_WEIGHT
) {
const {
weight,
account: db_rep_account,
...db_rep_without_weight_field
} = db_reps_index[account]
const has_non_null_field = Object.values(
db_rep_without_weight_field
).some((value) => value !== null)
if (has_non_null_field) {
results_index[account] = {
alias: db_rep_without_weight_field.alias,
account: db_rep_account,
discord: db_rep_without_weight_field.discord || '',
reddit: db_rep_without_weight_field.reddit || '',
twitter: db_rep_without_weight_field.twitter || '',
website: db_rep_without_weight_field.website || '',
email: db_rep_without_weight_field.email || '',
github: db_rep_without_weight_field.github || ''
}
}
}
}

// Sort results by alias
const results = Object.values(results_index).sort((a, b) => {
const alias_a = a.alias || ''
const alias_b = b.alias || ''
return alias_a.localeCompare(alias_b)
})

// escape any commas in alias
for (const rep of results) {
// check if alias contains a comma
if (rep.alias.includes(',')) {
rep.alias = `"${rep.alias}"`
}
}

// Convert results to CSV and save
const csv_headers = {}
for (const field of Object.keys(results[0])) {
csv_headers[field] = field
}
const result_csv_data = [csv_headers, ...results]
const result_csv_string = JSON.stringify(result_csv_data)
const result_csv = convertToCSV(result_csv_string)
const __dirname = dirname(fileURLToPath(import.meta.url))
const data_path = path.join(__dirname, '../data')
const csv_path = path.join(data_path, 'nano-reps.csv')
await fs.promises.writeFile(csv_path, result_csv)

log(`wrote ${results.length} reps to ${csv_path}`)
}

const main = async () => {
let error
try {
await generate_nano_reps()
} catch (err) {
error = err
log(error)
}

// await db('jobs').insert({
// type: constants.jobs.EXAMPLE,
// succ: error ? 0 : 1,
// reason: error ? error.message : null,
// timestamp: Math.round(Date.now() / 1000)
// })

process.exit()
}

if (isMain(import.meta.url)) {
main()
}

export default generate_nano_reps
20 changes: 20 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -6997,6 +6997,17 @@ __metadata:
languageName: node
linkType: hard

"csv-parser@npm:^3.0.0":
version: 3.0.0
resolution: "csv-parser@npm:3.0.0"
dependencies:
minimist: ^1.2.0
bin:
csv-parser: bin/csv-parser
checksum: adc9d67d9f185249825570778c24d13004625301655330f6b735a052b9fdfbe1a239a014afb1f89939e0626ee573718f71f9f14164db7c17e4bcb2f38d6a162b
languageName: node
linkType: hard

"csv-writer@npm:^1.6.0":
version: 1.6.0
resolution: "csv-writer@npm:1.6.0"
Expand Down Expand Up @@ -7243,6 +7254,13 @@ __metadata:
languageName: node
linkType: hard

"deep-diff@npm:^1.0.2":
version: 1.0.2
resolution: "deep-diff@npm:1.0.2"
checksum: 9de8b5eedc1957116e1b47e4c3c4e3dbe23cb741abefc5ec8829a12e77958c689ac46888a3c35320f976cf42fb6de2b016e158facdb24d894ab5b5fdabad9b34
languageName: node
linkType: hard

"deep-extend@npm:^0.6.0":
version: 0.6.0
resolution: "deep-extend@npm:0.6.0"
Expand Down Expand Up @@ -15687,9 +15705,11 @@ __metadata:
cors: ^2.8.5
cross-env: ^7.0.3
css-loader: 6.8.1
csv-parser: ^3.0.0
csv-writer: ^1.6.0
dayjs: ^1.11.9
debug: ^4.3.4
deep-diff: ^1.0.2
deep-extend: ^0.6.0
deepmerge: 4.3.1
echarts: ^5.4.3
Expand Down

0 comments on commit 08f4523

Please sign in to comment.