Skip to content

Commit

Permalink
Merge pull request #250 from cisagov/DJ_extract_cyhy_data_WIP
Browse files Browse the repository at this point in the history
Save hosts and tickets to datalake
  • Loading branch information
schmelz21 authored May 15, 2024
2 parents dec8c36 + 10f0889 commit 4f19718
Show file tree
Hide file tree
Showing 10 changed files with 347 additions and 16 deletions.
55 changes: 53 additions & 2 deletions backend/src/models/connection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,59 @@ let connection: Connection | null = null;

let dl_connection: Connection | null = null;

let dl2_connection: Connection | null = null;

const connectDl2 = async (logging?: boolean) => {
const dl2_connection = createConnection({
type: 'postgres',
host: process.env.DB_HOST,
port: parseInt(process.env.DB_PORT ?? ''),
username: process.env.MDL_USERNAME,
password: process.env.MDL_PASSWORD,
database: process.env.MDL_NAME,
entities: [
CertScan,
Cidr,
Contact,
DL_Cpe,
DL_Cve,
DL_Domain,
HostScan,
Host,
Ip,
Kev,
Location,
DL_Organization,
PortScan,
PrecertScan,
Report,
Sector,
Snapshot,
SslyzeScan,
Tag,
Tally,
TicketEvent,
Ticket,
TrustymailScan,
VulnScan
],
synchronize: false,
name: 'default2',
dropSchema: false,
logging: logging ?? false,
cache: true
});
return dl2_connection;
};

export const connectToDatalake2 = async (logging?: boolean) => {
if (!dl2_connection?.isConnected) {
console.log('Connected to datalake');
dl2_connection = await connectDl2(logging);
}
return dl2_connection;
};

const connectDl = async (logging?: boolean) => {
// process.env.DB_HOST = 'db';
// process.env.MDL_USERNAME = 'mdl';
Expand Down Expand Up @@ -108,8 +161,6 @@ export const connectToDatalake = async (logging?: boolean) => {
if (!dl_connection?.isConnected) {
console.log('Connected to datalake');
dl_connection = await connectDl(logging);
} else {
console.log("didn't connect");
}
return dl_connection;
};
Expand Down
7 changes: 4 additions & 3 deletions backend/src/models/mini_data_lake/ticket_events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,22 @@ import {
Column,
PrimaryGeneratedColumn,
BaseEntity,
ManyToOne
ManyToOne,
Unique
} from 'typeorm';

import { Ticket } from './tickets';
import { VulnScan } from './vuln_scans';

@Entity()
@Unique(['eventTimestamp', 'ticket', 'action'])
export class TicketEvent extends BaseEntity {
@PrimaryGeneratedColumn('uuid')
id: string;

@Column({
nullable: true,
type: 'varchar',
unique: true
type: 'varchar'
})
reference: string | null;

Expand Down
28 changes: 28 additions & 0 deletions backend/src/tasks/helpers/saveHost.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import { plainToClass } from 'class-transformer';
import { Host, connectToDatalake } from '../../models';

export default async (host: Host): Promise<string> => {
console.log(`Starting to save host ${host.ipString} to datalake`);
await connectToDatalake();
const hostUpdatedValues = Object.keys(host)
.map((key) => {
if (['id'].indexOf(key) > -1) return '';
else if (key === 'organization') return 'organizationId';
else if (key === 'ip') return 'ipId';
return host[key] !== null ? key : '';
})
.filter((key) => key !== '');
const host_id: string = (
await Host.createQueryBuilder()
.insert()
.values(host)
.orUpdate({
conflict_target: ['id'],
overwrite: hostUpdatedValues
})
.returning('id')
.execute()
).identifiers[0].id;

return host_id;
};
2 changes: 1 addition & 1 deletion backend/src/tasks/helpers/saveIpToMdl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { plainToClass } from 'class-transformer';
import { Ip, connectToDatalake } from '../../models';

export default async (ipObj: Ip): Promise<string | null> => {
console.log('Starting to save IP to datalake');
console.log(`Starting to save IP to datalake: ${ipObj.ip}`);
await connectToDatalake();
const ipUpdatedValues = Object.keys(ipObj)
.map((key) => {
Expand Down
2 changes: 1 addition & 1 deletion backend/src/tasks/helpers/saveOrganizationToMdl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export default async (
cidrs: Cidr[],
location: Location | null
): Promise<string> => {
console.log('Starting to save Org to datalake');
console.log(`Saving org ${organization.acronym} to datalake`);
await connectToDatalake();

const cidr_entities: Cidr[] = [];
Expand Down
35 changes: 35 additions & 0 deletions backend/src/tasks/helpers/saveTicket.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import { plainToClass } from 'class-transformer';
import {
Ticket,
DL_Organization,
Cidr,
Location,
connectToDatalake
} from '../../models';

export default async (ticket: Ticket): Promise<string> => {
console.log(`Starting to save Ticket to datalake`);
await connectToDatalake();
const ticketUpdatedValues = Object.keys(ticket)
.map((key) => {
if (['id'].indexOf(key) > -1) return '';
else if (key === 'organization') return 'organizationId';
else if (key === 'ip') return 'ipId';
else if (key === 'cve') return 'cveId';
return ticket[key] !== null ? key : '';
})
.filter((key) => key !== '');
const ticket_id: string = (
await Ticket.createQueryBuilder()
.insert()
.values(ticket)
.orUpdate({
conflict_target: ['id'],
overwrite: ticketUpdatedValues
})
.returning('id')
.execute()
).identifiers[0].id;

return ticket_id;
};
32 changes: 32 additions & 0 deletions backend/src/tasks/helpers/saveTicketEvent.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { plainToClass } from 'class-transformer';
import {
TicketEvent,
DL_Organization,
Cidr,
Location,
connectToDatalake
} from '../../models';

export default async (ticket_event: TicketEvent): Promise<string> => {
await connectToDatalake();
const ticketEventUpdatedValues = Object.keys(ticket_event)
.map((key) => {
if (['eventTimestamp', 'action', 'ticket'].indexOf(key) > -1) return '';
else if (key === 'vulnScan') return 'vulnScanId';
return ticket_event[key] !== null ? key : '';
})
.filter((key) => key !== '');
const ticket_event_id: string = (
await TicketEvent.createQueryBuilder()
.insert()
.values(ticket_event)
.orUpdate({
conflict_target: ['eventTimestamp', 'action', 'ticketId'],
overwrite: ticketEventUpdatedValues
})
.returning('id')
.execute()
).identifiers[0].id;

return ticket_event_id;
};
4 changes: 2 additions & 2 deletions backend/src/tasks/syncmdl.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Handler } from 'aws-lambda';
import { connectToDatalake, connectToDatabase } from '../models';
import { connectToDatalake2, connectToDatabase } from '../models';

export const handler: Handler = async (event) => {
const connection = await connectToDatabase();
Expand Down Expand Up @@ -42,7 +42,7 @@ export const handler: Handler = async (event) => {
);
}

const mdl_connection = await connectToDatalake(true);
const mdl_connection = await connectToDatalake2(true);
const type = event?.type || event;
const dangerouslyforce = type === 'dangerouslyforce';

Expand Down
Loading

0 comments on commit 4f19718

Please sign in to comment.