Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: updater-scripts #733

Draft
wants to merge 29 commits into
base: gql
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
90145f5
base section script
adit-bala Oct 27, 2024
101301a
modularize data fetching
adit-bala Oct 28, 2024
f523109
new `Course` & `Class` Schema
adit-bala Oct 28, 2024
7099aac
`courseParser` & `classParser`
adit-bala Oct 30, 2024
7f4e9c9
Merge branch 'gql' into feat-updater-scripts
mathhulk Nov 2, 2024
537af49
don't use terms for `course` api
adit-bala Nov 3, 2024
7b9b141
add batching from @mathhulk
adit-bala Nov 3, 2024
e8a0f2b
insert results into the database
adit-bala Nov 3, 2024
3f81826
only delete relevant objects
adit-bala Nov 4, 2024
d43c524
add mongodb loader
adit-bala Nov 4, 2024
edc75d1
update optional fields for `course`
adit-bala Nov 5, 2024
a0a6122
don't include invalid data
adit-bala Nov 5, 2024
1d6f78f
simplify `course` and `class`
adit-bala Nov 7, 2024
ff86b7c
add required fields for section
adit-bala Nov 11, 2024
e7838e9
add testing script
adit-bala Nov 11, 2024
c2e46aa
init infra changes
adit-bala Nov 17, 2024
469e691
denote required fields in typescript
adit-bala Nov 17, 2024
676a797
decouple `datapuller` infra
adit-bala Nov 21, 2024
57e1931
feat: log total errors for datapuller
adit-bala Nov 21, 2024
9867932
rm dependecy
adit-bala Nov 21, 2024
a0e165c
fix: keep `datapuller` infra in `app`
adit-bala Nov 25, 2024
85d99ba
chore: migrate logic to one file
adit-bala Nov 25, 2024
489859d
chore: add encrypted env vars
adit-bala Nov 26, 2024
d8d099d
Merge branch 'gql' into feat-updater-scripts
adit-bala Nov 26, 2024
ad36e05
feat: logging + cleanup of logs
adit-bala Nov 26, 2024
c4b729a
fix: error types
adit-bala Nov 26, 2024
e4d8e4a
chore: more detailed logging
adit-bala Nov 26, 2024
93adeab
fix: proper define
adit-bala Nov 26, 2024
5a2f5ee
fix: remove `dev` check
adit-bala Nov 26, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ COPY --from=datapuller-builder /datapuller/out/package-lock.json ./package-lock.
RUN ["npm", "install"]

COPY --from=datapuller-builder /datapuller/out/full/ .
ENTRYPOINT ["turbo", "run", "course", "--filter=datapuller"]
ENTRYPOINT ["turbo", "run", "runDatapuller", "--filter=datapuller"]

FROM datapuller-dev AS datapuller-prod
ENTRYPOINT ["turbo", "run", "course", "--filter=datapuller", "--env-mode=loose"]
ENTRYPOINT ["turbo", "run", "runDatapuller", "--filter=datapuller", "--env-mode=loose"]

# backend
FROM base AS backend-builder
Expand Down
6 changes: 5 additions & 1 deletion apps/datapuller/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
"private": true,
"scripts": {
"build": "tsc --noEmit",
"course": "tsx src/course.ts"
"update:courses": "tsx src/course.ts",
"update:sections": "tsx src/section.ts",
"update:classes": "tsx src/class.ts",
"cleanup:logs": "tsx src/cleanupLogs.ts",
"runDatapuller": "tsx src/runDatapuller.ts"
},
"devDependencies": {
"@types/node": "^22.9.1",
Expand Down
34 changes: 34 additions & 0 deletions apps/datapuller/src/bootstrap/loaders/mongoose.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import mongoose from "mongoose";

import { Config } from "../../config";

// Close the Mongoose default connection is the event of application termination
process.on("SIGINT", async () => {
await mongoose.connection.close();
process.exit(0);
});

// Your Mongoose setup goes here
export default async (config: Config): Promise<mongoose.Mongoose> => {
// Connect to MongoDB
config.log.info("Connecting to MongoDB...");
config.log.info("MongoDB URI:", config.mongoDB.uri);
const connection = mongoose.connect(config.mongoDB.uri);

// Log when the connection is established
mongoose.connection.on("connected", () => {
config.log.info("MongoDB connection established successfully");
});

// Log any errors during the connection
mongoose.connection.on("error", (err) => {
config.log.error("MongoDB connection error:", err);
});

// Log when the connection is disconnected
mongoose.connection.on("disconnected", () => {
config.log.info("MongoDB connection disconnected");
});

return connection;
};
69 changes: 69 additions & 0 deletions apps/datapuller/src/class.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import { IClassItem, NewClassModel } from "@repo/common";
import { ClassesAPI } from "@repo/sis-api/classes";

import { Config } from "./config";
import setup from "./shared";
import mapClassToNewClass, { CombinedClass } from "./shared/classParser";
import { fetchActiveTerms, fetchPaginatedData } from "./shared/utils";

export async function updateClasses(config: Config) {
const log = config.log;
const classesAPI = new ClassesAPI();

log.info("Fetching Active Terms");
const activeTerms = await fetchActiveTerms(log, {
app_id: config.sis.TERM_APP_ID,
app_key: config.sis.TERM_APP_KEY,
});

const classes = await fetchPaginatedData<IClassItem, CombinedClass>(
log,
classesAPI.v1,
activeTerms,
"getClassesUsingGet",
{
app_id: config.sis.CLASS_APP_ID,
app_key: config.sis.CLASS_APP_KEY,
},
(data) => data.apiResponse.response.classes || [],
mapClassToNewClass,
"classes"
);
log.info(activeTerms);

log.info("Example Class:", classes[0]);

await NewClassModel.deleteMany({
"session.term.id": { $in: activeTerms },
});

// Insert classes in batches of 5000
const insertBatchSize = 5000;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just leaving this here, but we should probably do some testing on hozer to determine the final batch size. It all depends on amount of memory, etc.


for (let i = 0; i < classes.length; i += insertBatchSize) {
const batch = classes.slice(i, i + insertBatchSize);

console.log(`Inserting batch ${i / insertBatchSize + 1}...`);

await NewClassModel.insertMany(batch, { ordered: false });
}

console.log(`Completed updating database with new class data.`);

log.info(`Updated ${classes.length} classes for active terms`);
}

const initialize = async () => {
const { config } = await setup();
try {
config.log.info("\n=== UPDATE CLASSES ===");
await updateClasses(config);
} catch (error) {
config.log.error(error);
process.exit(1);
}

process.exit(0);
};

initialize();
43 changes: 43 additions & 0 deletions apps/datapuller/src/cleanupLogs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import fs from "fs/promises";
import path from "path";
import { Logger } from "tslog";

async function cleanupLogs() {
const logger = new Logger({ name: "LogCleanup" });
const logDir = path.join(__dirname, "logs"); // Adjust the path if necessary
const retentionDays = 7; // Number of days to retain logs

try {
logger.info(
`Starting log cleanup. Retaining logs from the last ${retentionDays} days.`
);

// Get the current time and calculate the cutoff time
const now = Date.now();
const cutoffTime = now - retentionDays * 24 * 60 * 60 * 1000;

// Read the contents of the log directory
const files = await fs.readdir(logDir);

for (const file of files) {
// Only process log files that match the naming pattern
if (file.startsWith("error_") && file.endsWith(".log")) {
const filePath = path.join(logDir, file);
const stats = await fs.stat(filePath);

// Check if the file is older than the retention period
if (stats.mtime.getTime() < cutoffTime) {
await fs.unlink(filePath);
logger.info(`Deleted old log file: ${file}`);
}
}
}

logger.info("Log cleanup completed successfully.");
} catch (error: any) {
logger.error(`Log cleanup failed: ${error.message}`);
process.exit(1);
}
}

cleanupLogs();
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import dotenv from "dotenv";
import { Logger } from "tslog";

// Safely get the environment variable in the process
const env = (name: string): string => {
Expand All @@ -12,6 +13,7 @@ const env = (name: string): string => {
};

export interface Config {
log: Logger<unknown>;
isDev: boolean;
mongoDB: {
uri: string;
Expand All @@ -21,13 +23,21 @@ export interface Config {
CLASS_APP_KEY: string;
COURSE_APP_ID: string;
COURSE_APP_KEY: string;
TERM_APP_ID: string;
TERM_APP_KEY: string;
};
}

export function loadConfig(): Config {
dotenv.config();

const log = new Logger({
type: "pretty",
prettyLogTimeZone: "local",
});

return {
log,
isDev: env("NODE_ENV") === "development",
mongoDB: {
uri: env("MONGODB_URI"),
Expand All @@ -37,6 +47,8 @@ export function loadConfig(): Config {
CLASS_APP_KEY: env("SIS_CLASS_APP_KEY"),
COURSE_APP_ID: env("SIS_COURSE_APP_ID"),
COURSE_APP_KEY: env("SIS_COURSE_APP_KEY"),
TERM_APP_ID: env("SIS_TERM_APP_ID"),
TERM_APP_KEY: env("SIS_TERM_APP_KEY"),
},
};
}
73 changes: 45 additions & 28 deletions apps/datapuller/src/course.ts
Original file line number Diff line number Diff line change
@@ -1,43 +1,60 @@
import { TermModel } from "@repo/common";
import { ClassesAPI } from "@repo/sis-api/classes";
import { ICourseItem, NewCourseModel } from "@repo/common";
import { CoursesAPI } from "@repo/sis-api/courses";
import { TermsAPI } from "@repo/sis-api/terms";

import { Config } from "./config";
import setup from "./shared";
import mapCourseToNewCourse, { CombinedCourse } from "./shared/courseParser";
import { fetchPaginatedData } from "./shared/utils";

async function main() {
const { log } = setup();

// Terms API example
const termsAPI = new TermsAPI();
export async function updateCourses(config: Config) {
const log = config.log;
const coursesAPI = new CoursesAPI();

await termsAPI.v2.getByTermsUsingGet(
const courses = await fetchPaginatedData<ICourseItem, CombinedCourse>(
log,
coursesAPI.v4,
null,
"findCourseCollectionUsingGet",
{
"temporal-position": "Current",
app_id: config.sis.COURSE_APP_ID,
app_key: config.sis.COURSE_APP_KEY,
},
{
headers: {
app_id: "123",
app_key: "abc",
},
}
(data) => data.apiResponse.response.courses || [],
mapCourseToNewCourse,
"courses"
);

// Courses API example
const coursesAPI = new CoursesAPI();
log.info("Example Course:", courses[0]);

await NewCourseModel.deleteMany({});

await coursesAPI.v4.findCourseCollectionUsingGet({
"last-updated-since": "2021-01-01",
});
// Insert courses in batches of 5000
const insertBatchSize = 5000;

// Classes API example
const classesAPI = new ClassesAPI();
for (let i = 0; i < courses.length; i += insertBatchSize) {
const batch = courses.slice(i, i + insertBatchSize);

await classesAPI.v1.getClassesUsingGet({
"term-id": "123",
});
console.log(`Inserting batch ${i / insertBatchSize + 1}...`);

log.info(TermModel);
await NewCourseModel.insertMany(batch, { ordered: false });
}

console.log(`Completed updating database with new course data.`);

log.info(`Updated ${courses.length} courses for active terms`);
}

main();
const initialize = async () => {
const { config } = await setup();
try {
config.log.info("\n=== UPDATE COURSES ===");
await updateCourses(config);
} catch (error) {
config.log.error(error);
process.exit(1);
}

process.exit(0);
};

initialize();
62 changes: 62 additions & 0 deletions apps/datapuller/src/runDatapuller.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import mongoose from "mongoose";

import { updateClasses } from "./class";
import { Config } from "./config";
import { updateCourses } from "./course";
import { updateSections } from "./section";
import setup from "./shared";

const testDatabaseWrite = async (config: Config) => {
const TestSchema = new mongoose.Schema({
testField: String,
timestamp: Date,
});

const TestModel = mongoose.model("Test", TestSchema);

try {
const testDocument = new TestModel({
testField: "Test write from runDatapuller",
timestamp: new Date(),
});

const result = await testDocument.save();
config.log.info("Test document written successfully:", result);

return true;
} catch (error) {
config.log.error("Error writing to database:", error);
return false;
}
};

const runDatapuller = async () => {
const { config } = await setup();
try {
config.log.info("\n=== TESTING DATABASE WRITE ===");
const writeSuccessful = await testDatabaseWrite(config);
if (!writeSuccessful) {
throw new Error(
"Failed to write to the database. Please check your connection and permissions."
);
}

config.log.info("\n=== UPDATE COURSES ===");
await updateCourses(config);

config.log.info("\n=== UPDATE SECTIONS ===");
await updateSections(config);

config.log.info("\n=== UPDATE CLASSES ===");
await updateClasses(config);

config.log.info("\n=== DATA PULLING COMPLETED ===");
} catch (error) {
config.log.error(error);
process.exit(1);
}

process.exit(0);
};

runDatapuller();
Loading