Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: updater-scripts #733

Draft
wants to merge 29 commits into
base: gql
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
90145f5
base section script
adit-bala Oct 27, 2024
101301a
modularize data fetching
adit-bala Oct 28, 2024
f523109
new `Course` & `Class` Schema
adit-bala Oct 28, 2024
7099aac
`courseParser` & `classParser`
adit-bala Oct 30, 2024
7f4e9c9
Merge branch 'gql' into feat-updater-scripts
mathhulk Nov 2, 2024
537af49
don't use terms for `course` api
adit-bala Nov 3, 2024
7b9b141
add batching from @mathhulk
adit-bala Nov 3, 2024
e8a0f2b
insert results into the database
adit-bala Nov 3, 2024
3f81826
only delete relevant objects
adit-bala Nov 4, 2024
d43c524
add mongodb loader
adit-bala Nov 4, 2024
edc75d1
update optional fields for `course`
adit-bala Nov 5, 2024
a0a6122
don't include invalid data
adit-bala Nov 5, 2024
1d6f78f
simplify `course` and `class`
adit-bala Nov 7, 2024
ff86b7c
add required fields for section
adit-bala Nov 11, 2024
e7838e9
add testing script
adit-bala Nov 11, 2024
c2e46aa
init infra changes
adit-bala Nov 17, 2024
469e691
denote required fields in typescript
adit-bala Nov 17, 2024
676a797
decouple `datapuller` infra
adit-bala Nov 21, 2024
57e1931
feat: log total errors for datapuller
adit-bala Nov 21, 2024
9867932
rm dependecy
adit-bala Nov 21, 2024
a0e165c
fix: keep `datapuller` infra in `app`
adit-bala Nov 25, 2024
85d99ba
chore: migrate logic to one file
adit-bala Nov 25, 2024
489859d
chore: add encrypted env vars
adit-bala Nov 26, 2024
d8d099d
Merge branch 'gql' into feat-updater-scripts
adit-bala Nov 26, 2024
ad36e05
feat: logging + cleanup of logs
adit-bala Nov 26, 2024
c4b729a
fix: error types
adit-bala Nov 26, 2024
e4d8e4a
chore: more detailed logging
adit-bala Nov 26, 2024
93adeab
fix: proper define
adit-bala Nov 26, 2024
5a2f5ee
fix: remove `dev` check
adit-bala Nov 26, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ COPY --from=datapuller-builder /datapuller/out/package-lock.json ./package-lock.
RUN ["npm", "install"]

COPY --from=datapuller-builder /datapuller/out/full/ .
ENTRYPOINT ["turbo", "run", "course", "--filter=datapuller"]
ENTRYPOINT ["turbo", "run", "runDatapuller", "--filter=datapuller"]

FROM datapuller-dev AS datapuller-prod
ENTRYPOINT ["turbo", "run", "course", "--filter=datapuller", "--env-mode=loose"]
ENTRYPOINT ["turbo", "run", "runDatapuller", "--filter=datapuller", "--env-mode=loose"]

# backend
FROM base AS backend-builder
Expand Down
5 changes: 4 additions & 1 deletion apps/datapuller/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
"private": true,
"scripts": {
"build": "tsc --noEmit",
"course": "tsx src/course.ts"
"section": "tsx src/section.ts",
"class": "tsx src/class.ts",
"course": "tsx src/course.ts",
"runDatapuller": "tsx src/runDatapuller.ts"
},
"devDependencies": {
"@types/node": "^20.14.12",
Expand Down
33 changes: 33 additions & 0 deletions apps/datapuller/src/bootstrap/loaders/mongoose.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import mongoose from "mongoose";

import { Config } from "../../config";

// Close the Mongoose default connection is the event of application termination
process.on("SIGINT", async () => {
await mongoose.connection.close();
process.exit(0);
});

// Your Mongoose setup goes here
export default async (config: Config): Promise<mongoose.Mongoose> => {
// Connect to MongoDB
config.log.info("Connecting to MongoDB...");
const connection = await mongoose.connect(config.mongoDB.uri);

// Log when the connection is established
mongoose.connection.on("connected", () => {
config.log.info("MongoDB connection established successfully");
});

// Log any errors during the connection
mongoose.connection.on("error", (err) => {
config.log.error("MongoDB connection error:", err);
});

// Log when the connection is disconnected
mongoose.connection.on("disconnected", () => {
config.log.info("MongoDB connection disconnected");
});

return connection;
};
69 changes: 69 additions & 0 deletions apps/datapuller/src/class.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import { IClassItem, NewClassModel } from "@repo/common";
import { ClassesAPI } from "@repo/sis-api/classes";

import { Config } from "./config";
import setup from "./shared";
import mapClassToNewClass, { CombinedClass } from "./shared/classParser";
import { fetchActiveTerms, fetchPaginatedData } from "./shared/utils";

export async function updateClasses(config: Config) {
const log = config.log;
const classesAPI = new ClassesAPI();

log.info("Fetching Active Terms");
const activeTerms = await fetchActiveTerms(log, {
app_id: config.sis.TERM_APP_ID,
app_key: config.sis.TERM_APP_KEY,
});

log.info(activeTerms);

const classes = await fetchPaginatedData<IClassItem, CombinedClass>(
log,
classesAPI.v1,
activeTerms,
"getClassesUsingGet",
{
app_id: config.sis.CLASS_APP_ID,
app_key: config.sis.CLASS_APP_KEY,
},
(data) => data.apiResponse.response.classes || [],
mapClassToNewClass
);

log.info("Example Class:", classes[0]);

await NewClassModel.deleteMany({
"session.term.id": { $in: activeTerms },
});

// Insert classes in batches of 5000
const insertBatchSize = 5000;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just leaving this here, but we should probably do some testing on hozer to determine the final batch size. It all depends on amount of memory, etc.


for (let i = 0; i < classes.length; i += insertBatchSize) {
const batch = classes.slice(i, i + insertBatchSize);

console.log(`Inserting batch ${i / insertBatchSize + 1}...`);

await NewClassModel.insertMany(batch, { ordered: false });
}

console.log(`Completed updating database with new class data.`);

log.info(`Updated ${classes.length} classes for active terms`);
}

const initialize = async () => {
const { config } = await setup();
try {
config.log.info("\n=== UPDATE CLASSES ===");
await updateClasses(config);
} catch (error) {
config.log.error(error);
process.exit(1);
}

process.exit(0);
};

initialize();
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import dotenv from "dotenv";
import { Logger } from "tslog";

// Safely get the environment variable in the process
const env = (name: string): string => {
Expand All @@ -12,6 +13,7 @@ const env = (name: string): string => {
};

export interface Config {
log: Logger<unknown>;
isDev: boolean;
mongoDB: {
uri: string;
Expand All @@ -21,13 +23,21 @@ export interface Config {
CLASS_APP_KEY: string;
COURSE_APP_ID: string;
COURSE_APP_KEY: string;
TERM_APP_ID: string;
TERM_APP_KEY: string;
};
}

export function loadConfig(): Config {
dotenv.config();

const log = new Logger({
type: "pretty",
prettyLogTimeZone: "local",
});

return {
log,
isDev: env("NODE_ENV") === "development",
mongoDB: {
uri: env("MONGODB_URI"),
Expand All @@ -37,6 +47,8 @@ export function loadConfig(): Config {
CLASS_APP_KEY: env("SIS_CLASS_APP_KEY"),
COURSE_APP_ID: env("SIS_COURSE_APP_ID"),
COURSE_APP_KEY: env("SIS_COURSE_APP_KEY"),
TERM_APP_ID: env("SIS_TERM_APP_ID"),
TERM_APP_KEY: env("SIS_TERM_APP_KEY"),
},
};
}
72 changes: 44 additions & 28 deletions apps/datapuller/src/course.ts
Original file line number Diff line number Diff line change
@@ -1,43 +1,59 @@
import { TermModel } from "@repo/common";
import { ClassesAPI } from "@repo/sis-api/classes";
import { ICourseItem, NewCourseModel } from "@repo/common";
import { CoursesAPI } from "@repo/sis-api/courses";
import { TermsAPI } from "@repo/sis-api/terms";

import { Config } from "./config";
import setup from "./shared";
import mapCourseToNewCourse, { CombinedCourse } from "./shared/courseParser";
import { fetchPaginatedData } from "./shared/utils";

async function main() {
const { log } = setup();

// Terms API example
const termsAPI = new TermsAPI();
export async function updateCourses(config: Config) {
const log = config.log;
const coursesAPI = new CoursesAPI();

await termsAPI.v2.getByTermsUsingGet(
const courses = await fetchPaginatedData<ICourseItem, CombinedCourse>(
log,
coursesAPI.v4,
null,
"findCourseCollectionUsingGet",
{
"temporal-position": "Current",
app_id: config.sis.COURSE_APP_ID,
app_key: config.sis.COURSE_APP_KEY,
},
{
headers: {
app_id: "123",
app_key: "abc",
},
}
(data) => data.apiResponse.response.courses || [],
mapCourseToNewCourse
);

// Courses API example
const coursesAPI = new CoursesAPI();
log.info("Example Course:", courses[0]);

await NewCourseModel.deleteMany({});

await coursesAPI.v4.findCourseCollectionUsingGet({
"last-updated-since": "2021-01-01",
});
// Insert courses in batches of 5000
const insertBatchSize = 5000;

// Classes API example
const classesAPI = new ClassesAPI();
for (let i = 0; i < courses.length; i += insertBatchSize) {
const batch = courses.slice(i, i + insertBatchSize);

await classesAPI.v1.getClassesUsingGet({
"term-id": "123",
});
console.log(`Inserting batch ${i / insertBatchSize + 1}...`);

log.info(TermModel);
await NewCourseModel.insertMany(batch, { ordered: false });
}

console.log(`Completed updating database with new course data.`);

log.info(`Updated ${courses.length} courses for active terms`);
}

main();
const initialize = async () => {
const { config } = await setup();
try {
config.log.info("\n=== UPDATE COURSES ===");
await updateCourses(config);
} catch (error) {
config.log.error(error);
process.exit(1);
}

process.exit(0);
};

initialize();
27 changes: 27 additions & 0 deletions apps/datapuller/src/runDatapuller.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import { updateClasses } from "./class";
import { updateCourses } from "./course";
import { updateSections } from "./section";
import setup from "./shared";

const runDatapuller = async () => {
const { config } = await setup();
try {
config.log.info("\n=== UPDATE COURSES ===");
await updateCourses(config);

config.log.info("\n=== UPDATE SECTIONS ===");
await updateSections(config);

config.log.info("\n=== UPDATE CLASSES ===");
await updateClasses(config);

config.log.info("\n=== DATA PULLING COMPLETED ===");
} catch (error) {
config.log.error(error);
process.exit(1);
}

process.exit(0);
};

runDatapuller();
67 changes: 67 additions & 0 deletions apps/datapuller/src/section.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import { ISectionItem, NewSectionModel } from "@repo/common";
import { ClassSection, ClassesAPI } from "@repo/sis-api/classes";

import { Config } from "./config";
import setup from "./shared";
import mapSectionToNewSection from "./shared/sectionParser";
import { fetchActiveTerms, fetchPaginatedData } from "./shared/utils";

export async function updateSections(config: Config) {
const log = config.log;
const classesAPI = new ClassesAPI();

log.info("Fetching Active Terms");
const activeTerms = await fetchActiveTerms(log, {
app_id: config.sis.TERM_APP_ID,
app_key: config.sis.TERM_APP_KEY,
});

log.info(activeTerms);

const sections = await fetchPaginatedData<ISectionItem, ClassSection>(
log,
classesAPI.v1,
activeTerms,
"getClassSectionsUsingGet",
{
app_id: config.sis.CLASS_APP_ID,
app_key: config.sis.CLASS_APP_KEY,
},
(data) => data.apiResponse.response.classSections || [],
mapSectionToNewSection
);

log.info("Example Section:", sections[0]);

await NewSectionModel.deleteMany({});
adit-bala marked this conversation as resolved.
Show resolved Hide resolved

// Insert sections in batches of 5000
const insertBatchSize = 5000;

for (let i = 0; i < sections.length; i += insertBatchSize) {
const batch = sections.slice(i, i + insertBatchSize);

console.log(`Inserting batch ${i / insertBatchSize + 1}...`);

await NewSectionModel.insertMany(batch, { ordered: false });
}

console.log(`Completed updating database with new section data.`);

log.info(`Updated ${sections.length} sections for active terms`);
}

const initialize = async () => {
const { config } = await setup();
try {
config.log.info("\n=== UPDATE SECTIONS ===");
await updateSections(config);
} catch (error) {
config.log.error(error);
process.exit(1);
}

process.exit(0);
};

initialize();
Loading