-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert_markdown.js
68 lines (60 loc) · 2.56 KB
/
convert_markdown.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
const fs = require('fs-extra');
const path = require('path');
const MarkdownIt = require('markdown-it');
const md = new MarkdownIt();
/**
* Reads a markdown file and converts each section to plain text.
* @param {string} filePath - Path to the markdown file.
* @returns {Array<{ header: string, plainText: string }>} An array of section objects containing header and plain text.
*/
async function convertMarkdownToText(filePath) {
try {
const markdownContent = await fs.readFile(filePath, 'utf-8');
// Split markdown content into sections based on headers
const sections = markdownContent.split(/(?=^#{1,2}\s)/gm);
const sectionData = sections.map((section) => {
const headerMatch = section.match(/^#{1,6}\s(.*)$/m);
const header = headerMatch ? headerMatch[1] : 'no_header';
const htmlContent = md.render(section);
const plainText = htmlContent.replace(/<\/?[^>]+(>|$)/g, '');
return { header, plainText };
});
return sectionData;
} catch (error) {
console.error('Error reading file:', error);
}
}
/**
* Main function that converts all markdown files in the input directory to plain text files in the output directory.
* @param {string} inputDir - Path to the input directory containing markdown files.
* @param {string} outputDir - Path to the output directory where plain text files will be written.
*/
async function main(inputDir, outputDir) {
// Ensure output directory exists
await fs.ensureDir(outputDir);
try {
// Get list of markdown files in input directory
const files = await fs.readdir(inputDir);
const mdFiles = files.filter((file) => path.extname(file) === '.md');
// Loop through markdown files
for (const mdFile of mdFiles) {
const inputFilePath = path.join(inputDir, mdFile);
const sectionData = await convertMarkdownToText(inputFilePath);
// Write plain text sections to output directory
sectionData.forEach(async ({ header, plainText }) => {
const sanitizedHeader = header
? header.replace(/[^a-zA-Z0-9-_]/g, '-')
: 'no_header';
const outputFileName =
path.basename(mdFile, '.md') + `_section_${sanitizedHeader}.txt`;
const outputFilePath = path.join(outputDir, outputFileName);
await fs.writeFile(outputFilePath, plainText);
console.log(`Converted ${mdFile} to ${outputFileName}`);
});
}
} catch (error) {
console.error('Error processing files:', error);
}
}
// Run main function with default input and output directories
main('copilot/data/data/', 'copilot/data/doc-sections')