diff --git a/backend/src/xfd_django/xfd_api/management/commands/syncdb.py b/backend/src/xfd_django/xfd_api/management/commands/syncdb.py index 33c6e58f..5249bf01 100644 --- a/backend/src/xfd_django/xfd_api/management/commands/syncdb.py +++ b/backend/src/xfd_django/xfd_api/management/commands/syncdb.py @@ -7,9 +7,8 @@ from django.conf import settings from django.core.management import call_command from django.core.management.base import BaseCommand -from django.db import transaction -from xfd_api.models import Domain, Organization, OrganizationTag, Service, Vulnerability from xfd_api.tasks.es_client import ESClient +from xfd_api.tasks.syndb_helpers import manage_elasticsearch_indices, populate_sample_data # Sample data and helper data for random generation SAMPLE_TAG_NAME = "Sample Data" @@ -65,91 +64,10 @@ def handle(self, *args, **options): call_command("migrate") # Step 2: Elasticsearch Index Management - self.manage_elasticsearch_indices(dangerouslyforce) + manage_elasticsearch_indices(dangerouslyforce) # Step 3: Populate Sample Data if populate: self.stdout.write("Populating the database with sample data...") - self.populate_sample_data() + populate_sample_data() self.stdout.write("Sample data population complete.") - - def manage_elasticsearch_indices(self, dangerouslyforce): - """Handle Elasticsearch index setup and teardown.""" - try: - if dangerouslyforce: - es_client.delete_all() - es_client.sync_organizations_index() - es_client.sync_domains_index() - self.stdout.write("Elasticsearch indices synchronized.") - except Exception as e: - self.stdout.write(f"Error managing Elasticsearch indices: {e}") - - def populate_sample_data(self): - """Populate sample data into the database.""" - with transaction.atomic(): - tag, _ = OrganizationTag.objects.get_or_create(name=SAMPLE_TAG_NAME) - for _ in range(NUM_SAMPLE_ORGS): - org = Organization.objects.create( - acronym="".join(random.choices("ABCDEFGHIJKLMNOPQRSTUVWXYZ", k=5)), - name=self.generate_random_name(), - rootDomains=["crossfeed.local"], - ipBlocks=[], - isPassive=False, - state=random.choice(SAMPLE_STATES), - regionId=random.choice(SAMPLE_REGION_IDS), - ) - org.tags.add(tag) - - for _ in range(NUM_SAMPLE_DOMAINS): - domain = self.create_sample_domain(org) - self.create_sample_services_and_vulnerabilities(domain) - - def generate_random_name(self): - """Generate a random organization name using an adjective and entity noun.""" - adjective = random.choice(adjectives) - noun = random.choice(nouns) - entity = random.choice(["City", "County", "Agency", "Department"]) - return f"{adjective.capitalize()} {entity} {noun.capitalize()}" - - def create_sample_domain(self, organization): - """Create a sample domain linked to an organization.""" - domain_name = f"{random.choice(adjectives)}-{random.choice(nouns)}.crossfeed.local".lower() - ip = ".".join(map(str, (random.randint(0, 255) for _ in range(4)))) - return Domain.objects.create( - name=domain_name, - ip=ip, - fromRootDomain="crossfeed.local", - isFceb=True, - subdomainSource="findomain", - organization=organization, - ) - - def create_sample_services_and_vulnerabilities(self, domain): - """Create sample services and vulnerabilities for a domain.""" - # Add random services - if random.random() < PROB_SAMPLE_SERVICES: - Service.objects.create( - domain=domain, - port=random.choice([80, 443]), - service="http", - serviceSource="shodan", - wappalyzerResults=[ - {"technology": {"cpe": random.choice(cpes)}, "version": ""} - ], - ) - - # Add random vulnerabilities - if random.random() < PROB_SAMPLE_VULNERABILITIES: - Vulnerability.objects.create( - title="Sample Vulnerability", - domain=domain, - service=None, - description="Sample description", - severity=random.choice(["Low", "Medium", "High"]), - needsPopulation=True, # Ensuring required fields are populated - state="open", - substate="unconfirmed", - source="sample_source", - actions=[], - structuredData={}, - ) diff --git a/backend/src/xfd_django/xfd_api/tasks/run_syncdb.py b/backend/src/xfd_django/xfd_api/tasks/run_syncdb.py index c224ba44..c806300e 100644 --- a/backend/src/xfd_django/xfd_api/tasks/run_syncdb.py +++ b/backend/src/xfd_django/xfd_api/tasks/run_syncdb.py @@ -12,10 +12,7 @@ django.setup() from django.core.management import call_command -from xfd_api.management.commands.syncdb import ( - populate_sample_data, - manage_elasticsearch_indices, -) +from xfd_api.tasks.syndb_helpers import manage_elasticsearch_indices, populate_sample_data def handler(event, context): diff --git a/backend/src/xfd_django/xfd_api/tasks/syndb_helpers.py b/backend/src/xfd_django/xfd_api/tasks/syndb_helpers.py new file mode 100644 index 00000000..c93d8346 --- /dev/null +++ b/backend/src/xfd_django/xfd_api/tasks/syndb_helpers.py @@ -0,0 +1,115 @@ +# File: xfd_api/utils/db_utils.py +import json +import os +import random +from django.conf import settings +from django.db import transaction +from xfd_api.models import Domain, Organization, OrganizationTag, Service, Vulnerability +from xfd_api.tasks.es_client import ESClient + +# Constants for sample data generation +SAMPLE_TAG_NAME = "Sample Data" +NUM_SAMPLE_ORGS = 10 +NUM_SAMPLE_DOMAINS = 10 +PROB_SAMPLE_SERVICES = 0.5 +PROB_SAMPLE_VULNERABILITIES = 0.5 +SAMPLE_STATES = ["VA", "CA", "CO"] +SAMPLE_REGION_IDS = ["1", "2", "3"] + +# Load sample data files +SAMPLE_DATA_DIR = os.path.join(settings.BASE_DIR, "xfd_api", "tasks", "sample_data") +services = json.load(open(os.path.join(SAMPLE_DATA_DIR, "services.json"))) +cpes = json.load(open(os.path.join(SAMPLE_DATA_DIR, "cpes.json"))) +vulnerabilities = json.load(open(os.path.join(SAMPLE_DATA_DIR, "vulnerabilities.json"))) +cves = json.load(open(os.path.join(SAMPLE_DATA_DIR, "cves.json"))) +nouns = json.load(open(os.path.join(SAMPLE_DATA_DIR, "nouns.json"))) +adjectives = json.load(open(os.path.join(SAMPLE_DATA_DIR, "adjectives.json"))) + +# Elasticsearch client +es_client = ESClient() + + +def manage_elasticsearch_indices(dangerouslyforce): + """Handle Elasticsearch index setup and teardown.""" + try: + if dangerouslyforce: + es_client.delete_all() + es_client.sync_organizations_index() + es_client.sync_domains_index() + print("Elasticsearch indices synchronized.") + except Exception as e: + print(f"Error managing Elasticsearch indices: {e}") + + +def populate_sample_data(): + """Populate sample data into the database.""" + with transaction.atomic(): + tag, _ = OrganizationTag.objects.get_or_create(name=SAMPLE_TAG_NAME) + for _ in range(NUM_SAMPLE_ORGS): + org = Organization.objects.create( + acronym="".join(random.choices("ABCDEFGHIJKLMNOPQRSTUVWXYZ", k=5)), + name=generate_random_name(), + rootDomains=["crossfeed.local"], + ipBlocks=[], + isPassive=False, + state=random.choice(SAMPLE_STATES), + regionId=random.choice(SAMPLE_REGION_IDS), + ) + org.tags.add(tag) + + for _ in range(NUM_SAMPLE_DOMAINS): + domain = create_sample_domain(org) + create_sample_services_and_vulnerabilities(domain) + + +def generate_random_name(): + """Generate a random organization name using an adjective and entity noun.""" + adjective = random.choice(adjectives) + noun = random.choice(nouns) + entity = random.choice(["City", "County", "Agency", "Department"]) + return f"{adjective.capitalize()} {entity} {noun.capitalize()}" + + +def create_sample_domain(organization): + """Create a sample domain linked to an organization.""" + domain_name = f"{random.choice(adjectives)}-{random.choice(nouns)}.crossfeed.local".lower() + ip = ".".join(map(str, (random.randint(0, 255) for _ in range(4)))) + return Domain.objects.create( + name=domain_name, + ip=ip, + fromRootDomain="crossfeed.local", + isFceb=True, + subdomainSource="findomain", + organization=organization, + ) + + +def create_sample_services_and_vulnerabilities(domain): + """Create sample services and vulnerabilities for a domain.""" + # Add random services + if random.random() < PROB_SAMPLE_SERVICES: + Service.objects.create( + domain=domain, + port=random.choice([80, 443]), + service="http", + serviceSource="shodan", + wappalyzerResults=[ + {"technology": {"cpe": random.choice(cpes)}, "version": ""} + ], + ) + + # Add random vulnerabilities + if random.random() < PROB_SAMPLE_VULNERABILITIES: + Vulnerability.objects.create( + title="Sample Vulnerability", + domain=domain, + service=None, + description="Sample description", + severity=random.choice(["Low", "Medium", "High"]), + needsPopulation=True, + state="open", + substate="unconfirmed", + source="sample_source", + actions=[], + structuredData={}, + )