diff --git a/devdays-2025-ai/.gitignore b/devdays-2025-ai/.gitignore index 4ba46e1..9b1ee42 100644 --- a/devdays-2025-ai/.gitignore +++ b/devdays-2025-ai/.gitignore @@ -173,6 +173,3 @@ dist # Finder (MacOS) folder config .DS_Store - -# Database -local.db diff --git a/devdays-2025-ai/README.md b/devdays-2025-ai/README.md index 4ca98f2..5847720 100644 --- a/devdays-2025-ai/README.md +++ b/devdays-2025-ai/README.md @@ -54,3 +54,8 @@ bun run src/index.ts "country code" - Llama Index - Vercel AI SDK - [ ] Add more examples in db + +## Database + +You can connect to the database locally using an new SQLite Connection. +Choosing the path of your local.db file for "Database path" diff --git a/devdays-2025-ai/bun.lockb b/devdays-2025-ai/bun.lockb index cc40acc..9cfcce1 100755 Binary files a/devdays-2025-ai/bun.lockb and b/devdays-2025-ai/bun.lockb differ diff --git a/devdays-2025-ai/local.db b/devdays-2025-ai/local.db new file mode 100644 index 0000000..fd8b5e0 Binary files /dev/null and b/devdays-2025-ai/local.db differ diff --git a/devdays-2025-ai/package.json b/devdays-2025-ai/package.json index e8ffde9..5e1a66a 100644 --- a/devdays-2025-ai/package.json +++ b/devdays-2025-ai/package.json @@ -10,6 +10,7 @@ }, "dependencies": { "@libsql/client": "^0.14.0", + "@slack/web-api": "^7.8.0", "openai": "^4.73.1" } } diff --git a/devdays-2025-ai/src/database.ts b/devdays-2025-ai/src/database.ts deleted file mode 100644 index b707605..0000000 --- a/devdays-2025-ai/src/database.ts +++ /dev/null @@ -1,87 +0,0 @@ -import { createClient, type Client } from "@libsql/client"; -import { embed } from "./openai"; - -export const database = createClient({ - url: "file:local.db", -}); - -// In this sample codebase we rely on an libsql (sqlite with other stuff) database to store the errors and comments data and vectors that result from embeddings -// If you want to explore vector datases, you can have a look at: -// - Chroma: https://www.trychroma.com/ -// - Pinecone: https://www.pinecone.io/ - -export const reset = async (database: Client) => { - const baseErrorsData = await Promise.all( - [ - { - created_at: "2024-11-26 17:58:00", - message: "Triggered: Invalid country code in RGP rostering", - }, - { - created_at: "2024-11-26 18:36:00", - message: - "Triggered: Job Failed: demo-districts-pipeline-28876500 on us-east-1-staging-purple-cluster", - }, - { - created_at: "2024-11-22 17:54:00", - message: "Triggered: Invalid country code in RGP rostering", - }, - { - created_at: "2024-11-21 18:02:00", - message: - "Triggered: Job Failed: set-onboarding-notification-6 on us-east-1-production-purple-cluster", - }, - ].map(async (item) => ({ ...item, embedding: await embed(item.message) })) - ); - - const insertErrors = `INSERT INTO errors (created_at, message, embedding) VALUES ${baseErrorsData - .map( - (item) => - `('${item.created_at}', '${item.message}', vector32('[${item.embedding}]'))` - ) - .join(", ")}`; - - const baseMessagesData = await Promise.all( - [ - { - errors_id: 1, - created_at: "2024-11-26 19:31:00", - message: `I see we already made a request in #gatekeepers-ext for tenants: 7835607 & 8731389 now the failing tenant is 8619140 @Anaïs and @Marion you are talking about an email thread with Danita here; is there any infos to share before I make a new request to #gatekeepers-ext?`, - sender: "Grégoire", - }, - { - errors_id: 1, - created_at: "2024-11-26 19:59:00", - message: - "Not much info I think. Pascal advised that the next step was to set up a meeting with Danita to try to understand what happens when she provisions a new account, I was waiting to see if we had more errors like this but I guess we could do this now.", - sender: "Marion", - }, - { - errors_id: 1, - created_at: "2024-11-27 15:33:00", - message: "Should I do it ? Or do you prefer to lead the topic ?", - sender: "Grégoire", - }, - ].map(async (item) => ({ ...item, embedding: await embed(item.message) })) - ); - const insertComments = `INSERT INTO comments (created_at, message, sender, embedding, errors_id) VALUES ${baseMessagesData - .map( - (item) => - `('${item.created_at}', '${item.message}', '${item.sender}', vector32('[${item.embedding}]'), ${item.errors_id})` - ) - .join(", ")};`; - - await database.batch( - [ - "DROP TABLE IF EXISTS comments", - "DROP TABLE IF EXISTS errors", - `CREATE TABLE IF NOT EXISTS errors (id INTEGER PRIMARY KEY AUTOINCREMENT, created_at DATETIME NOT NULL, message TEXT NOT NULL, embedding F32_BLOB(${process.env.EMBEDDING_DIMENSION}) NOT NULL)`, - "CREATE INDEX errors_idx ON errors (libsql_vector_idx(embedding))", - `CREATE TABLE IF NOT EXISTS comments (id INTEGER PRIMARY KEY AUTOINCREMENT, created_at DATETIME NOT NULL, message TEXT NOT NULL, sender TEXT NOT NULL, embedding F32_BLOB(${process.env.EMBEDDING_DIMENSION}) NOT NULL, errors_id INTEGER NOT NULL REFERENCES errors(id))`, - "CREATE INDEX comments_idx ON comments (libsql_vector_idx(embedding))", - insertErrors, - insertComments, - ], - "write" - ); -}; diff --git a/devdays-2025-ai/src/database/data.json b/devdays-2025-ai/src/database/data.json new file mode 100644 index 0000000..43d9e70 --- /dev/null +++ b/devdays-2025-ai/src/database/data.json @@ -0,0 +1,1407 @@ +[ + { + "created_at": "2024-12-12T16:38:52.130Z", + "title": "Triggered: Rostering failed", + "message": "The rostering failed after multiple retries\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-12-12T16:14:51.940Z", + "title": "Triggered: Rostering failed", + "message": "The rostering failed after multiple retries\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-12-12T16:39:20.437Z", + "message": "password change in snowflake, <@U02KV67BRAS> is handling it", + "sender": "U044SE2JWRF" + } + ] + }, + { + "created_at": "2024-12-09T16:56:21.957Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-12-08T16:52:22.810Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-12-07T16:50:23.421Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-12-06T16:57:22.129Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-12-05T16:57:23.578Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-12-05T16:29:22.375Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-12-04T16:52:22.410Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-12-04T16:24:22.072Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-12-03T16:57:22.370Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-12-03T16:25:22.056Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-12-03T05:39:21.355Z", + "title": "Triggered: Job Failed: demo-districts-pipeline-28886580 on us-east-1-staging-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 15.8", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-12-03T08:44:52.087Z", + "message": "Same problem as past two weeks, we’re working on a solve (cf. )", + "sender": "U044SE2JWRF" + } + ] + }, + { + "created_at": "2024-12-02T16:54:27.229Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-12-02T16:27:22.375Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-12-02T16:30:18.307Z", + "message": "New tenant 8602077", + "sender": "U02KV67BRAS" + }, + { + "created_at": "2024-12-03T09:16:02.446Z", + "message": "Email sent to Danita", + "sender": "U02KV67BRAS" + } + ] + }, + { + "created_at": "2024-12-01T16:55:23.183Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-12-02T09:05:52.771Z", + "message": "We have sent an email to danita !", + "sender": "U02KV67BRAS" + } + ] + }, + { + "created_at": "2024-11-30T16:54:23.122Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-11-29T16:56:23.525Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-11-29T13:02:20.683Z", + "title": "Triggered: Job Failed: sync-star-scores-28881420 on us-east-1-staging-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 10.2", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-12-02T10:21:10.383Z", + "message": "Thanksgiving down time", + "sender": "U044SE2JWRF" + } + ] + }, + { + "created_at": "2024-11-28T16:50:22.657Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-11-28T13:02:25.827Z", + "title": "Triggered: Job Failed: sync-star-scores-28879980 on us-east-1-staging-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 15.0", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-11-28T13:16:05.810Z", + "message": "Thanksgiving :slightly_smiling_face:", + "sender": "U02KV67BRAS" + } + ] + }, + { + "created_at": "2024-11-27T16:51:22.030Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-11-26T16:58:22.486Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-11-26T18:31:39.818Z", + "message": "I see we already made in <#CH5TG3W66|> for tenants:\n`7835607 & 8731389` now the failing tenant is `8619140`\n<@U018918FE93> and <@U02SU67AFLG> you are talking about an email thread with Danita ; is there any infos to share before I make a new request to <#CH5TG3W66|>?", + "sender": "U02KV67BRAS" + }, + { + "created_at": "2024-11-26T18:59:09.973Z", + "message": "Not much info I think. Pascal advised that the next step was to set up a meeting with Danita to try to understand what happens when she provisions a new account, I was waiting to see if we had more errors like this but I guess we could do this now.", + "sender": "U02SU67AFLG" + }, + { + "created_at": "2024-11-27T14:33:07.809Z", + "message": "Should I do it ? Or do you prefer to lead the topic ?", + "sender": "U02KV67BRAS" + }, + { + "created_at": "2024-11-28T08:29:47.177Z", + "message": "Feel free to do it, but I can be there as well if you’d like. I’ll forward the email to you", + "sender": "U02SU67AFLG" + } + ] + }, + { + "created_at": "2024-11-26T05:36:20.517Z", + "title": "Triggered: Job Failed: demo-districts-pipeline-28876500 on us-east-1-staging-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 3.733", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-11-26T09:18:41.751Z", + "message": "", + "sender": "U02KV67BRAS" + }, + { + "created_at": "2024-11-26T14:04:24.177Z", + "message": "After a thorough analysis, we discovered with Marin that demo jobs was not working properly since July.\ndemo-districts-pipeline is creating classes that are supposed to be kept in the demo-account-pipeline. However, since July those are deleted (goals in July was to delete manually created classes to avoid those account to be crowded with real classes) but the consequence is that admin report are not working as expected in those demo accounts. A post mortem will be run!", + "sender": "U02KV67BRAS" + } + ] + }, + { + "created_at": "2024-11-22T16:54:23.627Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-11-21T17:02:23.106Z", + "title": "Triggered: Job Failed: set-onboarding-notification-6 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 4.533", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-11-21T16:56:27.752Z", + "title": "Triggered: Job Failed: set-onboarding-notification-5 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 15.733", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-11-21T16:32:21.717Z", + "title": "Triggered: Job Failed: set-onboarding-notification-4 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 8.533", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-11-21T16:26:20.123Z", + "title": "Triggered: Job Failed: set-onboarding-notification-3 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 12.533", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-11-21T16:21:21.539Z", + "title": "Triggered: Job Failed: set-onboarding-notification-new on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 8.533", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-11-21T15:24:22.127Z", + "title": "Triggered: Job Failed: set-onboarding-notification-2 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 12.0", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-11-21T15:08:21.388Z", + "title": "Triggered: Job Failed: set-onboarding-notification on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 6.8", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-11-21T15:09:02.316Z", + "message": "<@U044SE2JWRF> I think that it’s for you", + "sender": "U018918FE93" + } + ] + }, + { + "created_at": "2024-11-19T05:35:21.335Z", + "title": "Triggered: Job Failed: demo-districts-pipeline-28866420 on us-east-1-staging-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 9.2", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-11-19T08:29:47.876Z", + "message": "Looking at the code it seems weird to have a student not found error :thinking_face:\nThe job didn’t fail in production. I can see that there’s a difference of concurrency between prod and staging, that could explain the staging failure.\nI’m not going to investigate further for the moment, I’ll relaunch the job and see if it succeeds", + "sender": "U018918FE93" + } + ] + }, + { + "created_at": "2024-11-18T13:10:21.069Z", + "title": "Triggered: Job Failed: sync-star-scores-202411181308 on us-east-1-staging-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 7.667", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-11-19T13:31:41.017Z", + "message": "503 from renaissance identity endpoint in staging, works again now, not going to investigate further for now", + "sender": "U044SE2JWRF" + } + ] + }, + { + "created_at": "2024-11-18T13:02:22.310Z", + "title": "Triggered: Job Failed: sync-star-scores-28865580 on us-east-1-staging-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 15.0", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-11-18T11:14:20.399Z", + "title": "Triggered: Job Failed: clean-mixpanel-teacher-profiles-202411180939 on eu-west-3-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 6.8", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-11-18T10:04:21.942Z", + "title": "Triggered: Job Failed: clean-mixpanel-teacher-profiles-202411180931 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 5.467", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-11-18T10:09:42.379Z", + "message": "I am relaunching failing jobs from . I'll wait for EU to finish before relaunching this one (to keep an eye on it)", + "sender": "U05RRFC629Z" + }, + { + "created_at": "2024-11-18T12:26:31.920Z", + "message": "<@U05RRFC629Z> If you want I can take over these issues as part of the weekly-g work, let me know if you’re interested", + "sender": "U018918FE93" + }, + { + "created_at": "2024-11-18T13:16:28.352Z", + "message": "Thanks <@U018918FE93>. EU job has failed too : exponential backoff limit reached. I've relaunched it. We have no info about what append in the pod or in datadog (at least nothing I can found).\nI think I'll try to improve login for us to have more info if it fails again", + "sender": "U05RRFC629Z" + } + ] + }, + { + "created_at": "2024-11-16T08:18:20.482Z", + "title": "Triggered: Job Failed: send-weekly-email-28862400 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 1.467", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-11-22T10:44:36.095Z", + "message": "I had no clue about what happened, we didn’t have any error log to check.\nTo investigate further, I took the actions detailed in this thread: \nConclusion: it was a memory issue.", + "sender": "U018918FE93" + } + ] + }, + { + "created_at": "2024-11-09T08:19:21.120Z", + "title": "Triggered: Job Failed: send-weekly-email-28852320 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 1.133", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-11-07T01:49:52.537Z", + "title": "Triggered: Rostering failed", + "message": "The rostering failed after multiple retries\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-11-07T10:28:10.427Z", + "message": "Again an occurrence of .\nIt happened recently for the tenants `3020907` and `8744107`", + "sender": "U02SU67AFLG" + } + ] + }, + { + "created_at": "2024-11-05T01:55:51.614Z", + "title": "Triggered: Rostering failed", + "message": "The rostering failed after multiple retries\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: ", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-11-06T11:18:55.957Z", + "message": "Marketo rate limiting issue, need to dig deeper", + "sender": "U044SE2JWRF" + }, + { + "created_at": "2024-11-06T11:31:33.251Z", + "message": "Nevermind, the marketo rate limit is an issue but not the cause for the error here (the rate limiting does not stop the job, the form is just not sent to marketo)", + "sender": "U044SE2JWRF" + }, + { + "created_at": "2024-11-06T11:35:00.732Z", + "message": "The issue is the same one as here ", + "sender": "U044SE2JWRF" + }, + { + "created_at": "2024-11-06T15:49:48.586Z", + "message": "", + "sender": "U02SU67AFLG" + } + ] + }, + { + "created_at": "2024-11-04T21:16:31.243Z", + "title": "Triggered: Rostering error logs volume is too high", + "message": "Main_Account:@slack-mtg-jobs\n\nMore than 10 log events matched in the last 24h against the monitored query: ", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-11-06T11:27:29.027Z", + "message": "Since the issue has not happened with further rosterings of this tenant it is likely the data we received “corrupted” and has been fixed since (eg. this issue ). <@U05RRFC629Z> I don’t remember for sure, but I think we investigated and found no issues in the code right? If that is correct I would advise not to spend time on this investigation again at this time", + "sender": "U044SE2JWRF" + }, + { + "created_at": "2024-11-06T13:18:33.427Z", + "message": "Yes, I didn't find anything in our code. But I didn't understand what happened. If I recall correctly the tenant was quite large and had several updates in a few times, so I suspected some race condition, but I am not 100% positive it was that", + "sender": "U05RRFC629Z" + } + ] + }, + { + "created_at": "2024-11-04T07:27:20.285Z", + "title": "Triggered: Job Failed: clean-inactive-entities-28845000 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 5.067", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-11-04T06:41:20.525Z", + "title": "Triggered: Job Failed: clean-inactive-entities-28845000 on eu-west-3-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 15.467", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-11-04T10:25:38.358Z", + "message": "Some socket hang up / timed out errors with Sendgrid. Maybe we should implement / improve the retry mechanism?\n<@U05RRFC629Z> are you on this since you put the emoji? Or should I investigate the solution a bit more and create a bug ticket?", + "sender": "U02SU67AFLG" + }, + { + "created_at": "2024-11-04T10:34:12.517Z", + "message": "Yes, I've looked at it. But, I haven't gone further than you (only downloaded the log files to have time to look at them after if necessary)", + "sender": "U05RRFC629Z" + }, + { + "created_at": "2024-11-04T10:35:31.412Z", + "message": "I think we should create a ticket : we had the same issue last month, but since we had lots of deletions with lilo demo, I thought it was transient", + "sender": "U05RRFC629Z" + }, + { + "created_at": "2024-11-04T11:35:29.369Z", + "message": "", + "sender": "U02SU67AFLG" + } + ] + }, + { + "created_at": "2024-10-29T05:09:20.963Z", + "title": "Triggered: Job Failed: demo-accounts-pipeline-28836300 on us-east-1-staging-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 15.867", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-10-29T05:03:21.882Z", + "title": "Triggered: Job Failed: demo-accounts-pipeline-28836300 on eu-west-3-staging-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 14.667", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-10-29T03:02:20.359Z", + "title": "Triggered: Job Failed: demo-districts-pipeline-28836180 on us-east-1-staging-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 4.533", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-10-29T10:56:51.994Z", + "message": "Pedagogy staging down, won’t restart since not vital on staging", + "sender": "U044SE2JWRF" + } + ] + }, + { + "created_at": "2024-10-28T07:30:35.214Z", + "title": ":lileo: slackbot error", + "message": "Error when picking next weekly-G :cry:\nError: Kim is not a member of the team.", + "sender": "U05DEHC8M7F", + "threads": [ + { + "created_at": "2024-10-28T07:30:35.214Z", + "message": "", + "sender": "U05DEHC8M7F" + }, + { + "created_at": "2024-10-28T08:16:45.437Z", + "message": "It was a typo on Kim notion page (Kim@)", + "sender": "U05RRFC629Z" + } + ] + }, + { + "created_at": "2024-10-17T14:25:42.355Z", + "title": "Triggered: Job Failed: clean-mixpanel-teacher-profiles-202410171203 on eu-west-3-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 12.267", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-10-17T09:49:22.564Z", + "title": "Triggered: Job Failed: clean-mixpanel-teacher-profiles-202410170927 on eu-west-3-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 8.267", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-10-15T16:30:22.591Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-10-15T16:42:19.306Z", + "message": "tenantId: 8728558", + "sender": "U018918FE93" + } + ] + }, + { + "created_at": "2024-10-14T09:47:51.997Z", + "title": "Triggered: Rostering failed", + "message": "The rostering failed after multiple retries\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:rgp-rostering-* \\@level:error env:production Rostering failure`", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-10-14T13:48:15.921Z", + "message": "• The error message is : `SQL compilation error:`\n`syntax error line 4 at position 30 unexpected ')'.`\n• Tenant id: 8754787\nWhen I download the files, I can see that there is no class and enrollment for this tenant. I haven’t found any other weird character or row in the files.", + "sender": "U018918FE93" + }, + { + "created_at": "2024-11-06T15:49:37.409Z", + "message": "", + "sender": "U02SU67AFLG" + } + ] + }, + { + "created_at": "2024-10-14T07:30:27.702Z", + "title": ":lileo: slackbot error", + "message": "Error when picking next weekly-G :cry:\nError: Baptiste is not a member of the team.", + "sender": "U05DEHC8M7F", + "threads": [ + { + "created_at": "2024-10-14T07:30:27.702Z", + "message": "", + "sender": "U05DEHC8M7F" + }, + { + "created_at": "2024-10-14T08:06:55.897Z", + "message": "It was a space in his name on Notion", + "sender": "U05RRFC629Z" + } + ] + }, + { + "created_at": "2024-10-13T17:18:23.052Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-10-12T17:16:22.093Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-10-11T17:18:22.696Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`](/logs?query=service%3A%28rgp-rostering-%2A+OR+lalilo-api%29+%40level%3Aerror+env%3Aproduction+We+do+not+support+RGP+rostering+for+other+regions&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-10-10T17:19:26.850Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`](/logs?query=service%3A%28rgp-rostering-%2A+OR+lalilo-api%29+%40level%3Aerror+env%3Aproduction+We+do+not+support+RGP+rostering+for+other+regions&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-10-10T08:35:20.621Z", + "title": "Triggered: Job Failed: sync-star-scores-28809060 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 9.0", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-10-10T12:32:06.767Z", + "message": "I've used /trello command to create a card without leaving slack, works like a charm", + "sender": "U02KV67BRAS" + } + ] + }, + { + "created_at": "2024-10-09T17:18:27.134Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`](/logs?query=service%3A%28rgp-rostering-%2A+OR+lalilo-api%29+%40level%3Aerror+env%3Aproduction+We+do+not+support+RGP+rostering+for+other+regions&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-10-10T06:49:02.808Z", + "message": "Still tenant `8731389`\nInvestigation in progress: ", + "sender": "U02SU67AFLG" + } + ] + }, + { + "created_at": "2024-10-08T17:19:23.921Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`](/logs?query=service%3A%28rgp-rostering-%2A+OR+lalilo-api%29+%40level%3Aerror+env%3Aproduction+We+do+not+support+RGP+rostering+for+other+regions&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-10-09T07:47:58.335Z", + "message": "Tenant id: `8731389`", + "sender": "U02SU67AFLG" + }, + { + "created_at": "2024-10-09T07:48:39.477Z", + "message": "In the orgs file, there is one school “Lycée Français International de Palma”", + "sender": "U02SU67AFLG" + }, + { + "created_at": "2024-10-09T07:48:55.801Z", + "message": "I will contact Stephane in case he knows about this", + "sender": "U02SU67AFLG" + }, + { + "created_at": "2024-10-09T08:31:54.339Z", + "message": "FYI we had a similar occurence on a school in Swiss (CH) a few weeks ago. ", + "sender": "U02KV67BRAS" + }, + { + "created_at": "2024-10-15T16:49:44.864Z", + "message": "Another tenant for which this happened: \nI asked Stéphane to know if someone enabled rostering (I guess that we didn’t, but I just want to make sure before escalating to RGP people)", + "sender": "U018918FE93" + }, + { + "created_at": "2024-10-16T07:12:37.642Z", + "message": "<@U018918FE93> I will add you to an email thread with Danita to ask again if she has any idea what is happening", + "sender": "U02SU67AFLG" + }, + { + "created_at": "2024-10-16T08:14:49.736Z", + "message": "Thanks!\nAnother piece of information is that there is only a school in the CSV files of yesterday’s failed tenant (no user, no class). And it looks like RGP puts the RGP subscriptions at the class level (with `apptags`), so that could be a cause of inconsistencies", + "sender": "U018918FE93" + }, + { + "created_at": "2024-10-16T10:00:58.690Z", + "message": "Yes I think RGP was activated unintentionally, so no one added classes in the RGP files", + "sender": "U02SU67AFLG" + } + ] + }, + { + "created_at": "2024-10-07T07:34:21.522Z", + "title": ":lileo: slackbot error", + "message": "Error when picking next weekly-G :cry:\nError: Baptiste is not a member of the team.", + "sender": "U05DEHC8M7F", + "threads": [ + { + "created_at": "2024-10-07T07:34:21.522Z", + "message": "", + "sender": "U05DEHC8M7F" + }, + { + "created_at": "2024-10-07T07:56:43.664Z", + "message": "<@U05RRFC629Z> I'm having a look at logs and it looks like we also need to increase timeouts", + "sender": "U026A7810BY" + } + ] + }, + { + "created_at": "2024-10-05T01:19:24.714Z", + "title": "Some Marketo users batch(es) could not have their versionType updated [production - us-east-1]", + "message": "", + "sender": "U03DQT150TU", + "threads": [ + { + "created_at": "2024-10-05T01:19:24.714Z", + "message": "Some Marketo users batch(es) could not have their versionType updated [production - us-east-1]", + "sender": "U03DQT150TU" + }, + { + "created_at": "2024-10-07T06:26:47.415Z", + "message": "```email,laliloVersionType,laliloSubscriptionType,Import Warning Reason\fakegreg@fakegregemail.com,CLASSIC_FULL_VERSION,,Invalid email address\nfake@fakegregemail.com,CLASSIC_FULL_VERSION,,Invalid email address\nfakegreg@fakegregemail.com,CLASSIC_FULL_VERSION,", + "sender": "U02SU67AFLG" + } + ] + }, + { + "created_at": "2024-10-05T01:03:40.673Z", + "title": "Some Marketo users batch(es) could not have their versionType updated [production - eu-west-3]", + "message": "", + "sender": "U03DQT150TU", + "threads": [ + { + "created_at": "2024-10-05T01:03:40.673Z", + "message": "Some Marketo users batch(es) could not have their versionType updated [production - eu-west-3]", + "sender": "U03DQT150TU" + } + ] + }, + { + "created_at": "2024-10-01T14:33:21.425Z", + "title": "Triggered: Job Failed: clean-inactive-entities-28796040 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 3.4", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-10-01T09:35:22.166Z", + "title": "Triggered: Job Failed: clean-inactive-entities-28796040 on eu-west-3-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 12.0", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-10-01T09:37:54.343Z", + "message": "Could be an issue of performance like with the mixpanel job <@U05RRFC629Z>, I’ll take a look at memory and cpu usage", + "sender": "U044SE2JWRF" + }, + { + "created_at": "2024-10-01T09:39:50.847Z", + "message": "Yes, and we had huge deletions today (eu and us) because old lilo demos have been totally deleted, so it might have put some pressure to the pod", + "sender": "U05RRFC629Z" + }, + { + "created_at": "2024-10-01T09:41:36.018Z", + "message": "Cpu usage has clearly increased, memory not so much, I don’t know if it is an issue though. And the increase might be linked to the changes in the job (deleting lilo demo) rather than the changes in the pod ressources", + "sender": "U044SE2JWRF" + }, + { + "created_at": "2024-10-01T09:42:34.855Z", + "message": "Yes, and I don't think it is really a problem if the teachers are not notified for next month deletion because it is very old accounts", + "sender": "U05RRFC629Z" + } + ] + }, + { + "created_at": "2024-09-28T08:03:26.676Z", + "title": "Triggered: Job Failed: send-weekly-email-28791840 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 11.267", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-09-30T09:32:34.964Z", + "message": "I don't know what to do with this : no logs in datadog, no remaining container in argos :confused:", + "sender": "U05RRFC629Z" + } + ] + }, + { + "created_at": "2024-09-26T02:36:30.881Z", + "title": "Triggered: Rostering error logs volume is too high", + "message": "Main_Account:@slack-mtg-jobs\n\nMore than 10 log events matched in the last 24h against the monitored query: [`service:rgp-rostering-* \\@level:error env:production \\@errorType:*`](/logs?query=service%3Argp-rostering-%2A+%40level%3Aerror+env%3Aproduction+%40errorType%3A%2A&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-09-26T07:12:19.681Z", + "message": "All errors are for tenant `3262995`.\nSome SequelizeUniqueConstraintError about class’ renaissance id, and some TEACHER_ALREADY_IN_CLASS errors.\nI wonder if it could be some race conditions with several jobs running at the same time, one job checks the class does not exist yes, the other job creates it, then the first job fails when trying to create it again.", + "sender": "U02SU67AFLG" + }, + { + "created_at": "2024-09-26T07:13:13.765Z", + "message": "Hmm there is an example of class which was created at the same second as the error log, and then was archived a few minutes later :woman-shrugging:", + "sender": "U02SU67AFLG" + }, + { + "created_at": "2024-09-26T07:13:21.853Z", + "message": "I will try restarting a job for this tenant", + "sender": "U02SU67AFLG" + }, + { + "created_at": "2024-09-26T07:50:49.580Z", + "message": "Not too sure but does this look like the issue you had <@U05RRFC629Z> not too long ago? I remember something about a class being created twice", + "sender": "U044SE2JWRF" + }, + { + "created_at": "2024-09-26T07:51:58.688Z", + "message": "This is definitely a tenant we had issue with recently: ", + "sender": "U02KV67BRAS" + }, + { + "created_at": "2024-09-26T07:52:03.305Z", + "message": "The rostering succeeded for the tenant", + "sender": "U02SU67AFLG" + }, + { + "created_at": "2024-09-26T07:53:23.710Z", + "message": "During the night two rostering started almost at the same time, so I think the race condition is a good hypothesis", + "sender": "U02SU67AFLG" + }, + { + "created_at": "2024-09-26T07:54:33.800Z", + "message": "FYI Marion this is a thread i started about this tenant: ", + "sender": "U02KV67BRAS" + } + ] + }, + { + "created_at": "2024-09-21T08:02:02.203Z", + "title": "Weekly email error report", + "message": "NotFoundError: CLASS_NOT_FOUND", + "sender": "U03DQT150TU", + "threads": [ + { + "created_at": "2024-09-21T08:02:02.203Z", + "message": "Weekly email error report", + "sender": "U03DQT150TU" + }, + { + "created_at": "2024-09-23T08:10:26.830Z", + "message": "I'm wondering if we should remove those errors :thinking_face:\nI see no adding value having those", + "sender": "U02KV67BRAS" + } + ] + }, + { + "created_at": "2024-09-20T07:48:21.424Z", + "title": "Triggered: Job Failed: sync-star-scores-28780260 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 7.0", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-09-20T08:39:57.138Z", + "message": "• Many “An error occurred while fetching student star results” logs but it shouldn’t make the job fail.\n• I can’t see any obvious cause, I’m going to relaunch the job manually and see if it succeeds", + "sender": "U018918FE93" + }, + { + "created_at": "2024-09-20T12:11:07.226Z", + "message": "The manual job ran successfully", + "sender": "U018918FE93" + } + ] + }, + { + "created_at": "2024-09-19T09:27:21.306Z", + "title": "Triggered: Job Failed: clean-mixpanel-teacher-profiles-202409190914 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 2.067", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-10-17T14:58:58.028Z", + "message": "<@U044SE2JWRF> Do you remember what you did to investigate this failure/fix it? I have relaunched the job 2 times and it keeps failing. Unfortunately, no error log shows up", + "sender": "U018918FE93" + }, + { + "created_at": "2024-10-17T15:06:52.965Z", + "message": "I thought it was linked to the change of ressources allocated for the job, so I upped them a little", + "sender": "U044SE2JWRF" + }, + { + "created_at": "2024-10-17T15:07:45.057Z", + "message": "We can investigate together if you want (not sure I’ll find more but I can show you what I usualy do)", + "sender": "U044SE2JWRF" + }, + { + "created_at": "2024-10-17T15:08:11.296Z", + "message": "That would be great :)", + "sender": "U018918FE93" + } + ] + }, + { + "created_at": "2024-09-19T09:15:21.338Z", + "title": "Triggered: Job Failed: clean-mixpanel-teacher-profiles-202409190912 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 11.067", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-18T05:22:27.064Z", + "title": "Triggered: Job Failed: demo-accounts-pipeline-28777260 on us-east-1-staging-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 12.0", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-18T05:16:31.001Z", + "title": "Triggered: Rostering error logs volume is too high", + "message": "Main_Account:@slack-mtg-jobs\n\nMore than 10 log events matched in the last 24h against the monitored query: `service:rgp-rostering-* \\@level:error env:production \\@errorType:*`", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-18T05:08:21.708Z", + "title": "Triggered: Job Failed: demo-accounts-pipeline-28777260 on eu-west-3-staging-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 6.867", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-18T05:08:20.731Z", + "title": "Triggered: Job Failed: demo-accounts-pipeline-28777260 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 11.067", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-18T05:06:21.712Z", + "title": "Triggered: Job Failed: demo-accounts-pipeline-28777260 on eu-west-3-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 9.067", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-16T08:17:20.811Z", + "title": "Triggered: Job Failed: clean-mixpanel-teacher-profiles-202409160746 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 10.733", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-15T06:28:21.547Z", + "title": "Triggered: Job Failed: clean-mixpanel-teacher-profiles-28773000 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 10.733", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-14T02:20:52.087Z", + "title": "Triggered: Rostering failed", + "message": "The rostering failed after multiple retries\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:rgp-rostering-* \\@level:error env:production Rostering failure`", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-09T17:06:22.844Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-09T08:05:21.473Z", + "title": "Triggered: Job Failed: sync-star-scores-28764420 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 10.0", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-09-09T09:16:04.246Z", + "message": "Relaunching job", + "sender": "U02KV67BRAS" + }, + { + "created_at": "2024-09-09T10:49:56.392Z", + "message": "Job ran successfully", + "sender": "U02KV67BRAS" + }, + { + "created_at": "2024-09-09T10:50:01.884Z", + "message": ":man-shrugging:", + "sender": "U02KV67BRAS" + } + ] + }, + { + "created_at": "2024-09-09T08:02:21.240Z", + "title": "Triggered: Job Failed: speech-anonymization-28708260 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 5.683", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-08T16:46:22.165Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-07T17:08:23.993Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-07T08:05:35.136Z", + "title": "Weekly email error report", + "message": "NotFoundError: CLASS_NOT_FOUND", + "sender": "U03DQT150TU", + "threads": [ + { + "created_at": "2024-09-07T08:05:35.136Z", + "message": "Weekly email error report", + "sender": "U03DQT150TU" + } + ] + }, + { + "created_at": "2024-09-07T08:01:17.722Z", + "title": "Weekly email error report", + "message": "NotFoundError: CLASS_NOT_FOUND", + "sender": "U03DQT150TU", + "threads": [ + { + "created_at": "2024-09-07T08:01:17.722Z", + "message": "Weekly email error report", + "sender": "U03DQT150TU" + } + ] + }, + { + "created_at": "2024-09-06T17:14:22.758Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-05T17:42:22.789Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-05T15:06:20.059Z", + "title": "Triggered: Job Failed: speech-anonymization-28708260 on eu-west-3-staging-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 2.833", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-09-05T15:17:15.532Z", + "message": "<@U018918FE93> I don't understand this failure, not sure if this could be impactful for you ?", + "sender": "U02KV67BRAS" + }, + { + "created_at": "2024-09-05T16:02:00.879Z", + "message": "I don’t understand it either... I relaunched the job manually and it succeeded.\nI can’t find any logs for the failed job, so unfortunately I’m not able to understand why it failed in the first place. It seems that it happened a month ago as well but I don’t have any context on it", + "sender": "U018918FE93" + }, + { + "created_at": "2024-09-16T11:10:25.765Z", + "message": "I spent some time investigating why we receive an alert one month after the job was supposed to run.\nI didn’t find the root cause, but I’m going to write here what I noticed in case if it helps the next time it happens:\nContext:\n• The job is programmed to run in all environments on August the first. If there is an error in the job, we should be able to find logs in Datadog (cf `executeJob` function).\nThings I noticed:\n• We have a behaviour between environments: we received alerts in August (when the job is supposed to run) for EU prod and US staging but we received alerts in September for EU staging and US prod! It’s not obvious what the difference is between these two sets of environments.\n ◦ The `lastTransitionTime` and `lastProbeTime` of EU staging and US prod k8s jobs is set in September. It’s in August for the other two environments. Not sure what that means, but it’s probably linked to the issue of the alert ringing late.\n• We can’t find logs in Datadog due to the fact that they are kept during 2 weeks only.\n• There are some `audio_file_student` rows with a `created_at` in 2022 which confirms that the job didn’t complete.", + "sender": "U018918FE93" + } + ] + }, + { + "created_at": "2024-09-04T17:14:22.944Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-04T02:26:30.787Z", + "title": "Triggered: Rostering error logs volume is too high", + "message": "Main_Account:@slack-mtg-jobs\n\nMore than 10 log events matched in the last 24h against the monitored query: `service:rgp-rostering-* \\@level:error env:production \\@errorType:*`", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-09-04T13:53:52.432Z", + "message": "Cannot understand how this is possible. The rostering job ran twice at a short interval, maybe some king of strange race condition.\nWill see tomorrow how it goes", + "sender": "U05RRFC629Z" + } + ] + }, + { + "created_at": "2024-09-03T17:14:23.911Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`](/logs?query=service%3A%28rgp-rostering-%2A+OR+lalilo-api%29+%40level%3Aerror+env%3Aproduction+We+do+not+support+RGP+rostering+for+other+regions&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-02T17:18:22.326Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`](/logs?query=service%3A%28rgp-rostering-%2A+OR+lalilo-api%29+%40level%3Aerror+env%3Aproduction+We+do+not+support+RGP+rostering+for+other+regions&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-02T13:02:20.897Z", + "title": "Triggered: Job Failed: sync-star-scores-28754700 on us-east-1-staging-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 13.033", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-09-02T13:04:41.207Z", + "message": "Us holidays", + "sender": "U044SE2JWRF" + } + ] + }, + { + "created_at": "2024-09-02T10:00:50.317Z", + "title": ":lileo: Lalibottine error", + "message": "Error when sending birthday message :cry:\nError: Request failed with status code 401", + "sender": "U05DEHC8M7F", + "threads": [ + { + "created_at": "2024-09-02T10:00:50.317Z", + "message": "", + "sender": "U05DEHC8M7F" + } + ] + }, + { + "created_at": "2024-09-02T09:12:21.044Z", + "title": "Triggered: Job Failed: clean-inactive-entities-28754280 on us-east-1-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 4.933", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-02T07:34:20.919Z", + "title": "Triggered: Job Failed: clean-inactive-entities-28754280 on eu-west-3-production-purple-cluster", + "message": "Check debugging run book: \n Main_Account:@slack-mtg-jobs\n\n`sum(last_1m):max:kubernetes_state.job.failed{!kube_job:rgp-rostering-*} by {kube_job,kube_cluster_name}.as_count() >= 1`\n\nMetric value: 1.2", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-09-01T17:18:22.924Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`](/logs?query=service%3A%28rgp-rostering-%2A+OR+lalilo-api%29+%40level%3Aerror+env%3Aproduction+We+do+not+support+RGP+rostering+for+other+regions&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-31T17:20:22.787Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`](/logs?query=service%3A%28rgp-rostering-%2A+OR+lalilo-api%29+%40level%3Aerror+env%3Aproduction+We+do+not+support+RGP+rostering+for+other+regions&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-31T08:04:05.078Z", + "title": "Weekly email error report", + "message": "NotFoundError: CLASS_NOT_FOUND", + "sender": "U03DQT150TU", + "threads": [ + { + "created_at": "2024-08-31T08:04:05.078Z", + "message": "Weekly email error report", + "sender": "U03DQT150TU" + }, + { + "created_at": "2024-09-02T10:11:45.992Z", + "message": "Indeed the class no longer exists, no big deal. If this happens again afew times, let's handle this error better", + "sender": "U026A7810BY" + } + ] + }, + { + "created_at": "2024-08-31T08:02:46.301Z", + "title": "Weekly email error report", + "message": "NotFoundError: CLASS_NOT_FOUND", + "sender": "U03DQT150TU", + "threads": [ + { + "created_at": "2024-08-31T08:02:46.301Z", + "message": "Weekly email error report", + "sender": "U03DQT150TU" + } + ] + }, + { + "created_at": "2024-08-30T17:14:22.315Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`](/logs?query=service%3A%28rgp-rostering-%2A+OR+lalilo-api%29+%40level%3Aerror+env%3Aproduction+We+do+not+support+RGP+rostering+for+other+regions&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-30T02:20:52.458Z", + "title": "Triggered: Rostering failed", + "message": "The rostering failed after multiple retries\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:rgp-rostering-* \\@level:error env:production Rostering failure`](/logs?query=service%3Argp-rostering-%2A+%40level%3Aerror+env%3Aproduction+Rostering+failure&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-29T17:58:51.525Z", + "title": "Triggered: Rostering failed", + "message": "The rostering failed after multiple retries\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:rgp-rostering-* \\@level:error env:production Rostering failure`](/logs?query=service%3Argp-rostering-%2A+%40level%3Aerror+env%3Aproduction+Rostering+failure&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-29T17:18:23.046Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`](/logs?query=service%3A%28rgp-rostering-%2A+OR+lalilo-api%29+%40level%3Aerror+env%3Aproduction+We+do+not+support+RGP+rostering+for+other+regions&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-29T02:22:51.205Z", + "title": "Triggered: Rostering failed", + "message": "The rostering failed after multiple retries\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:rgp-rostering-* \\@level:error env:production Rostering failure`](/logs?query=service%3Argp-rostering-%2A+%40level%3Aerror+env%3Aproduction+Rostering+failure&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-08-29T08:15:38.579Z", + "message": "Still same tenant 3262995", + "sender": "U02KV67BRAS" + } + ] + }, + { + "created_at": "2024-08-28T17:16:22.630Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`](/logs?query=service%3A%28rgp-rostering-%2A+OR+lalilo-api%29+%40level%3Aerror+env%3Aproduction+We+do+not+support+RGP+rostering+for+other+regions&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-28T02:24:51.569Z", + "title": "Triggered: Rostering failed", + "message": "The rostering failed after multiple retries\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:rgp-rostering-* \\@level:error env:production Rostering failure`](/logs?query=service%3Argp-rostering-%2A+%40level%3Aerror+env%3Aproduction+Rostering+failure&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-08-28T08:03:11.082Z", + "message": "Still same failing tenant 3262995", + "sender": "U02KV67BRAS" + } + ] + }, + { + "created_at": "2024-08-27T17:22:24.403Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`](/logs?query=service%3A%28rgp-rostering-%2A+OR+lalilo-api%29+%40level%3Aerror+env%3Aproduction+We+do+not+support+RGP+rostering+for+other+regions&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-27T02:18:51.595Z", + "title": "Triggered: Rostering failed", + "message": "The rostering failed after multiple retries\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:rgp-rostering-* \\@level:error env:production Rostering failure`](/logs?query=service%3Argp-rostering-%2A+%40level%3Aerror+env%3Aproduction+Rostering+failure&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-26T17:18:23.177Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`](/logs?query=service%3A%28rgp-rostering-%2A+OR+lalilo-api%29+%40level%3Aerror+env%3Aproduction+We+do+not+support+RGP+rostering+for+other+regions&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-26T02:22:51.541Z", + "title": "Triggered: Rostering failed", + "message": "The rostering failed after multiple retries\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:rgp-rostering-* \\@level:error env:production Rostering failure`", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-25T17:20:22.925Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-25T02:20:52.514Z", + "title": "Triggered: Rostering failed", + "message": "The rostering failed after multiple retries\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:rgp-rostering-* \\@level:error env:production Rostering failure`", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-24T17:16:22.325Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-24T10:00:40.817Z", + "title": ":lileo: Lalibottine error", + "message": "Error when sending birthday message :cry:\nError: Request failed with status code 401", + "sender": "U05DEHC8M7F", + "threads": [ + { + "created_at": "2024-08-24T10:00:40.817Z", + "message": "", + "sender": "U05DEHC8M7F" + } + ] + }, + { + "created_at": "2024-08-24T08:08:30.222Z", + "title": "Weekly email error report", + "message": "NotFoundError: CLASS_NOT_FOUND", + "sender": "U03DQT150TU", + "threads": [ + { + "created_at": "2024-08-24T08:08:30.222Z", + "message": "Weekly email error report", + "sender": "U03DQT150TU" + } + ] + }, + { + "created_at": "2024-08-24T02:16:51.663Z", + "title": "Triggered: Rostering failed", + "message": "The rostering failed after multiple retries\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:rgp-rostering-* \\@level:error env:production Rostering failure`", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-23T17:22:23.080Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-23T10:00:40.708Z", + "title": ":lileo: Lalibottine error", + "message": "Error when sending birthday message :cry:\nError: Request failed with status code 401", + "sender": "U05DEHC8M7F", + "threads": [ + { + "created_at": "2024-08-23T10:00:40.708Z", + "message": "", + "sender": "U05DEHC8M7F" + } + ] + }, + { + "created_at": "2024-08-23T02:26:51.776Z", + "title": "Triggered: Rostering failed", + "message": "The rostering failed after multiple retries\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:rgp-rostering-* \\@level:error env:production Rostering failure`](/logs?query=service%3Argp-rostering-%2A+%40level%3Aerror+env%3Aproduction+Rostering+failure&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-22T18:32:52.025Z", + "title": "Triggered: Rostering failed", + "message": "The rostering failed after multiple retries\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:rgp-rostering-* \\@level:error env:production Rostering failure`](/logs?query=service%3Argp-rostering-%2A+%40level%3Aerror+env%3Aproduction+Rostering+failure&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-22T17:20:21.941Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: [`service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`](/logs?query=service%3A%28rgp-rostering-%2A+OR+lalilo-api%29+%40level%3Aerror+env%3Aproduction+We+do+not+support+RGP+rostering+for+other+regions&agg_m=count&agg_t=count&index=%2A)", + "sender": "Datadog", + "threads": [] + }, + { + "created_at": "2024-08-22T02:14:50.934Z", + "title": "Triggered: Rostering failed", + "message": "The rostering failed after multiple retries\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:rgp-rostering-* \\@level:error env:production Rostering failure`", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-08-22T09:23:07.916Z", + "message": "Investigation ongoing: ", + "sender": "U02KV67BRAS" + } + ] + }, + { + "created_at": "2024-08-21T17:20:22.643Z", + "title": "Triggered: Invalid country code in RGP rostering", + "message": "A tenant had an invalid country code (one not in us-east-1 region)\n Main_Account:@slack-mtg-jobs\n\nMore than 0 log events matched in the last 15m against the monitored query: `service:(rgp-rostering-* OR lalilo-api) \\@level:error env:production We do not support RGP rostering for other regions`", + "sender": "Datadog", + "threads": [ + { + "created_at": "2024-08-22T09:27:25.248Z", + "message": "", + "sender": "U02KV67BRAS" + } + ] + } +] diff --git a/devdays-2025-ai/src/database/database.ts b/devdays-2025-ai/src/database/database.ts new file mode 100644 index 0000000..5fd31f1 --- /dev/null +++ b/devdays-2025-ai/src/database/database.ts @@ -0,0 +1,77 @@ +import { createClient, type Client } from "@libsql/client"; +import { embed } from "../openai"; +import data from "./data.json"; + +export const database = createClient({ + url: "file:local.db", +}); + +// In this sample codebase we rely on an libsql (sqlite with other stuff) database to store the errors and comments data and vectors that result from embeddings +// If you want to explore vector datases, you can have a look at: +// - Chroma: https://www.trychroma.com/ +// - Pinecone: https://www.pinecone.io/ + +const sanitzedSQL = (message: string): string => { + return message.replaceAll("'", ""); +}; + +export const reset = async (database: Client) => { + const dataWithEmbedding = await Promise.all( + data.map(async (item) => { + const threadsWithEmbeddings = await Promise.all( + item.threads.map(async (thread) => ({ + ...thread, + embedding: await embed(thread.message), + })) + ); + + return { + ...item, + embedding: await embed(item.message), + threads: threadsWithEmbeddings, + }; + }) + ); + + await database.batch( + [ + "DROP TABLE IF EXISTS comments", + "DROP TABLE IF EXISTS errors", + `CREATE TABLE IF NOT EXISTS errors (id INTEGER PRIMARY KEY AUTOINCREMENT, created_at DATETIME NOT NULL, title TEXT NOT NULL, message TEXT NOT NULL, embedding F32_BLOB(${process.env.EMBEDDING_DIMENSION}) NOT NULL)`, + `CREATE TABLE IF NOT EXISTS comments (id INTEGER PRIMARY KEY AUTOINCREMENT, created_at DATETIME NOT NULL, message TEXT NOT NULL, sender TEXT NOT NULL, embedding F32_BLOB(${process.env.EMBEDDING_DIMENSION}) NOT NULL, errors_id INTEGER NOT NULL REFERENCES errors(id))`, + "CREATE INDEX errors_idx ON errors (libsql_vector_idx(embedding))", + "CREATE INDEX comments_idx ON comments (libsql_vector_idx(embedding))", + ], + "write" + ); + + await database.execute("BEGIN TRANSACTION"); + + try { + for (const item of dataWithEmbedding) { + const result = await database.execute( + `INSERT INTO errors (created_at, title, message, embedding) VALUES ('${ + item.created_at + }', '${sanitzedSQL(item.title)}', '${sanitzedSQL( + item.message + )}', vector32('[${item.embedding}]'))` + ); + const errorId = result.lastInsertRowid; + + for (const thread of item.threads) { + await database.execute( + `INSERT INTO comments (errors_id, created_at, message, sender, embedding) VALUES ('${errorId}', '${ + thread.created_at + }', '${sanitzedSQL(thread.message)}', '${ + thread.sender + }', vector32('[${thread.embedding}]'))` + ); + } + } + + await database.execute("COMMIT"); + } catch (error) { + await database.execute("ROLLBACK"); + throw error; + } +}; diff --git a/devdays-2025-ai/src/reset.ts b/devdays-2025-ai/src/database/reset.ts similarity index 100% rename from devdays-2025-ai/src/reset.ts rename to devdays-2025-ai/src/database/reset.ts diff --git a/devdays-2025-ai/src/database/slack.ts b/devdays-2025-ai/src/database/slack.ts new file mode 100644 index 0000000..ee57be2 --- /dev/null +++ b/devdays-2025-ai/src/database/slack.ts @@ -0,0 +1,63 @@ +import { LogLevel, WebClient } from "@slack/web-api"; + +type ErrorMessage = { + created_at: Date; + title: string; + message: string; + sender: string; + threads: Omit[]; +}; + +const ERROR_COLOR = "a30200"; +const client = new WebClient(process.env.SLACK_TOKEN, { + logLevel: LogLevel.ERROR, +}); + +//mtg-jobs +const channelId = "C02FW8DGG2F"; + +try { + const result = await client.conversations.history({ + channel: channelId, + limit: 200, + }); + + const messages = result.messages?.filter((message) => + message.attachments?.some((attachment) => attachment.color === ERROR_COLOR) + ); + + const data: ErrorMessage[] = []; + for (const message of messages ?? []) { + if (!message.ts) { + continue; + } + const replies = await client.conversations.replies({ + channel: channelId, + ts: message.ts, + }); + + const userReplies = + replies.messages?.filter( + (reply: any) => reply.subtype !== "bot_message" + ) ?? []; + + data.push({ + created_at: new Date(parseFloat(message.ts) * 1000), + title: + (message.app_id === "A03DJ82GYP8" //LaliloBot + ? message.text + : message.attachments?.[0].title) ?? "", + message: message.attachments?.[0].text ?? "", + sender: message.username ?? message.user ?? "", + threads: + userReplies.map((reply) => ({ + created_at: new Date(parseFloat(reply.ts ?? "0") * 1000), + message: reply.text ?? "", + sender: reply.user ?? "", + })) || [], + }); + } + console.log(JSON.stringify(data)); +} catch (error) { + console.error(error); +} diff --git a/devdays-2025-ai/src/index.ts b/devdays-2025-ai/src/index.ts index 6739b59..9d73aef 100644 --- a/devdays-2025-ai/src/index.ts +++ b/devdays-2025-ai/src/index.ts @@ -1,4 +1,4 @@ -import { database } from "./database"; +import { database } from "./database/database"; import { embed, generateMessage } from "./openai"; const args = process.argv.slice(2); @@ -11,7 +11,7 @@ const searchQuery = args.join(" "); const queryEmbed = await embed(searchQuery); // https://docs.turso.tech/features/ai-and-embeddings const distanceComputation = `vector_distance_cos(errors.embedding, vector32('[${queryEmbed.join( - ", ", + ", " )}]'))`; const result = await database.execute(`SELECT errors.id as id, errors.message as errorMessage, errors.created_at as createdAt, comments.message as comment, comments.created_at as commentCreatedAt, comments.sender as commentSender, ${distanceComputation} as distance @@ -39,6 +39,6 @@ const data = Object.values(orderedGroups).map((group) => ({ console.log( await generateMessage( "You are a specialized AI that reads Lalilo error reports and the comments written about them by the team. From a given JSON containing an error message and its comments, create a clear, brief summary sentence that captures the essence of the issue and its resolution. Focus on what happened and how it was resolved if that information is present. Also highlight any team member that could have context about it and the dates it happened.", - JSON.stringify(data, null, 2), - ), + JSON.stringify(data, null, 2) + ) );