Skip to content

Commit

Permalink
Add functions for downloading all images and hashing filenames
Browse files Browse the repository at this point in the history
  • Loading branch information
MarvNC committed Jan 30, 2024
1 parent 35a0870 commit 84950d7
Show file tree
Hide file tree
Showing 7 changed files with 138 additions and 2 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
*.txt
csvs
/images/*.*

# Created by https://www.toptal.com/developers/gitignore/api/node
# Edit at https://www.toptal.com/developers/gitignore?templates=node
Expand Down
35 changes: 35 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
},
"dependencies": {
"@gerhobbelt/xregexp": "^4.4.0-32",
"axios": "^1.6.7",
"csv-parser": "^3.0.0",
"jsdom": "^23.0.1",
"yomichan-dict-builder": "^2.2.0"
Expand All @@ -16,4 +17,4 @@
"ava": "^6.0.1"
},
"version": "1.0.0"
}
}
4 changes: 3 additions & 1 deletion src/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,6 @@ const languages = {
},
};

export { languages };
const IMAGE_FOLDER = 'images';

export { languages, IMAGE_FOLDER };
3 changes: 3 additions & 0 deletions src/convertToTermDictionary.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { convertEntryToYomitanTerms } from './util/yomitan/convertEntryToYomitan
import { findLabelValues } from './util/entryParse/parseLabels.js';
import { addYomitanTags } from './util/addYomitanTags.js';
import { getAllImageURLs } from './util/entryParse/findImages.js';
import { downloadImages } from './util/imageHandler/downloadImages.js';

const dataFolder = './csvs';
const exportDirectory = './dist';
Expand All @@ -22,6 +23,8 @@ const exportDirectory = './dist';

const imageURLs = getAllImageURLs(dictionaryEntries);

await downloadImages(imageURLs);

const dictionary = new Dictionary({
fileName: `Words.hk ${dateString}.zip`,
});
Expand Down
75 changes: 75 additions & 0 deletions src/util/imageHandler/downloadImages.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import fs from 'fs';
import path from 'path';
import axios from 'axios';

import { getImageFileName } from './getImageFileName.js';
import { IMAGE_FOLDER } from '../../constants.js';

const DELAY_MS = 1000;

/**
* Downloads all the images in the given set.
* @param {Set} imageURLs - The set of image URLs to download.
*/
async function downloadImages(imageURLs) {
// Create directory
if (!fs.existsSync(IMAGE_FOLDER)) {
fs.mkdirSync(IMAGE_FOLDER);
}
let successful = 0;
let failed = 0;
for (const imageURL of imageURLs) {
try {
const fileName = getImageFileName(imageURL);
await downloadImage(imageURL, IMAGE_FOLDER, fileName);
await new Promise((resolve) => setTimeout(resolve, DELAY_MS));
successful++;
} catch (error) {
console.error(error);
failed++;
continue;
}
}
console.log(`Successfully downloaded ${successful} images.`);
console.log(`Failed to download ${failed} images.`);
}

/**
* Downloads the image at the given URL and saves it to the given path.
* @param {string} imageURL
* @param {string} savePath
* @param {string} fileName
*/
async function downloadImage(imageURL, savePath, fileName) {
// Check if path valid
if (!fs.existsSync(savePath)) {
throw new Error(`Invalid path: ${savePath}`);
}
// Check if valid URL
try {
new URL(imageURL);
} catch (error) {
console.error(`Invalid URL: ${imageURL}`);
return;
}

const filePath = path.join(savePath, fileName);

// Check if file already exists
if (fs.existsSync(filePath)) {
console.log(`File already exists: ${fileName}`);
return;
}
// Download image
console.log(`Downloading ${fileName}...`);
const response = await axios.get(imageURL, {
responseType: 'arraybuffer',
});
const buffer = Buffer.from(response.data, 'binary');

// Save image
fs.writeFileSync(filePath, buffer);
console.log(`Saved ${fileName}`);
}

export { downloadImages };
19 changes: 19 additions & 0 deletions src/util/imageHandler/getImageFileName.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { createHash } from 'crypto';

/**
* Hashes the image URL to get the image file name, preserving the file extension.
* @param {string} imageURL
*/
function getImageFileName(imageURL) {
const hash = createHash('sha256');
hash.update(imageURL);
const hashed = hash.digest('hex');
const extension = imageURL.split('.').pop() || '';
const allowedExtensions = ['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp'];
if (!allowedExtensions.includes(extension)) {
throw new Error(`Invalid extension: ${extension}`);
}
return `${hashed}.${extension}`;
}

export { getImageFileName };

0 comments on commit 84950d7

Please sign in to comment.