Skip to content

Commit

Permalink
Merge pull request #20 from salesforce/fix/GH-19-fix-markdown-duplicates
Browse files Browse the repository at this point in the history
Removes Table output duplicates
  • Loading branch information
kmcquade authored Mar 13, 2021
2 parents c62849e + 4595284 commit a31263a
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 66 deletions.
109 changes: 54 additions & 55 deletions azure_guardrails/scrapers/compliance_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,78 +223,77 @@ def _matching_metadata(self) -> dict:
results[display_name] = benchmark_data
return results

def markdown_table(self) -> str:
headers = ["Service", "Policy Definition"]
benchmark_names = ["Azure Security Benchmark", "CIS", "CCMC L3", "ISO 27001", "NIST SP 800-53 R4",
"NIST SP 800-171 R2", "HIPAA HITRUST 9.2", "New Zealand ISM"]
headers.extend(benchmark_names)
results = self.table_summary()
return tabulate(results, headers=headers, tablefmt="github")

def csv_table(self, path: str, verbosity: int):
headers = ["Service", "Policy Definition"]
benchmark_names = ["Azure Security Benchmark", "CIS", "CCMC L3", "ISO 27001", "NIST SP 800-53 R4",
"NIST SP 800-171 R2", "HIPAA HITRUST 9.2", "New Zealand ISM", "Policy Link"]
headers.extend(benchmark_names)
results = [headers]
results.extend(self.table_summary(hyperlink_format=False))
headers = ["Service", "Policy Definition", "Azure Security Benchmark", "CIS", "CCMC L3", "ISO 27001", "NIST SP 800-53 R4", "NIST SP 800-171 R2", "HIPAA HITRUST 9.2", "New Zealand ISM", "Link"]

# results = headers.copy()
results = self.table_summary(hyperlink_format=False)
if os.path.exists(path):
os.remove(path)
with open(path, 'w', newline='') as csv_file:
writer = csv.writer(csv_file)
writer = csv.DictWriter(csv_file, fieldnames=headers)
writer.writeheader()
for row in results:
writer.writerow(row)
# print(f"CSV updated! Wrote {len(results)} rows. Path: {path}")
if verbosity >= 1:
utils.print_grey(f"Removing the previous file: {path}")

def markdown_table(self) -> str:
results = self.table_summary()
return tabulate(results, headers="keys", tablefmt="github")

def table_summary(self, hyperlink_format: bool = True) -> list:
results = []

def get_benchmark_id(benchmark_name: str, this_policy_metadata: dict) -> str:
if this_policy_metadata.benchmarks.get(benchmark_name, None):
# if benchmark_name in this_policy_metadata["benchmarks"].keys():
# this_policy_metadata.benchmarks['Azure Security Benchmark'].requirement_id
benchmark_id = this_policy_metadata.benchmarks[benchmark_name].requirement_id
if this_policy_metadata.get(benchmark_name, None):
benchmark_id = this_policy_metadata[benchmark_name][benchmark_name]
benchmark_id = benchmark_id.replace(f"{benchmark_name}: ", "")
benchmark_id = benchmark_id.replace(f"ID : ", "")
else:
benchmark_id = ""
return benchmark_id

for policy_definition_name in self.matching_metadata:
# Loop through the matching metadata only, then look within the policy_compliance_data that holds the master details
for policy_definition_name, policy_definition_details in self.matching_metadata.items():
name = policy_definition_name.replace("[Preview]: ", "")

for policy in self.matching_metadata[policy_definition_name]:
service_name = self.policy_compliance_data.policy_definition_metadata[name][policy].service_name
github_link = self.policy_compliance_data.policy_definition_metadata[name][policy].github_link
if hyperlink_format:
policy_definition_string = f"[{policy_definition_name}]({github_link})"
else:
policy_definition_string = policy_definition_name

policy_metadata = self.policy_compliance_data.policy_definition_metadata[name][policy]
azure_security_benchmark_id = get_benchmark_id("Azure Security Benchmark", policy_metadata)
cis_id = get_benchmark_id("CIS", policy_metadata)
ccmc_id = get_benchmark_id("CCMC L3", policy_metadata)
iso_id = get_benchmark_id("ISO 27001", policy_metadata)
nist_800_171_id = get_benchmark_id("NIST SP 800-171 R2", policy_metadata)
nist_800_53_id = get_benchmark_id("NIST SP 800-53 R4", policy_metadata)
hipaa_id = get_benchmark_id("HIPAA HITRUST 9.2", policy_metadata)
new_zealand_id = get_benchmark_id("NZISM Security Benchmark", policy_metadata)
result = [
service_name,
policy_definition_string,
azure_security_benchmark_id,
cis_id,
ccmc_id,
iso_id,
nist_800_53_id,
nist_800_171_id,
hipaa_id,
new_zealand_id,
]
# If hyperlink format is not specified, that means it is not markdown and we want to include the github link in a separate column
if not hyperlink_format:
result.append(github_link)
results.append(result)
results = sorted(results, key=itemgetter(0, 1, 2, 3, 4, 5, 6, 7, 8, 9))
# for policy in self.matching_metadata[policy_definition_name]:
benchmarks = []
github_link = ""
service_name = ""
for benchmark, benchmark_details in self.policy_compliance_data.policy_definition_metadata[name].items():
benchmarks.append(benchmark)
service_name = benchmark_details.service_name
github_link = benchmark_details.github_link
if hyperlink_format:
policy_definition_string = f"[{policy_definition_name}]({github_link})"
else:
policy_definition_string = policy_definition_name

azure_security_benchmark_id = get_benchmark_id("Azure Security Benchmark", policy_definition_details)
cis_id = get_benchmark_id("CIS", policy_definition_details)
ccmc_id = get_benchmark_id("CCMC L3", policy_definition_details)
iso_id = get_benchmark_id("ISO 27001", policy_definition_details)
nist_800_171_id = get_benchmark_id("NIST SP 800-171 R2", policy_definition_details)
nist_800_53_id = get_benchmark_id("NIST SP 800-53 R4", policy_definition_details)
hipaa_id = get_benchmark_id("HIPAA HITRUST 9.2", policy_definition_details)
new_zealand_id = get_benchmark_id("NZISM Security Benchmark", policy_definition_details)

result = {
"Service": service_name,
"Policy Definition": policy_definition_string,
# "Name": policy_definition_name,
"Azure Security Benchmark": azure_security_benchmark_id,
"CIS": cis_id,
"CCMC L3": ccmc_id,
"ISO 27001": iso_id,
"NIST SP 800-171 R2": nist_800_171_id,
"NIST SP 800-53 R4": nist_800_53_id,
"HIPAA HITRUST 9.2": hipaa_id,
"New Zealand ISM": new_zealand_id,
"Link": github_link,
}
results.append(result)
results = sorted(results, key=itemgetter("Service", "Policy Definition"))
return results
24 changes: 13 additions & 11 deletions azure_guardrails/scrapers/standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,22 @@
from azure_guardrails.shared.utils import chomp_keep_single_spaces


def get_requirement_id(input_text: str, replacement_string: str) -> str:
"""Pass in table.previous_sibling.previous_sibling.text and get the Azure Benchmark ID"""
id_ownership_string = chomp_keep_single_spaces(input_text)
this_id = id_ownership_string
this_id = this_id.replace(f"ID : {replacement_string} ", "")
this_id = this_id.replace(f"ID : {replacement_string}", "")
this_id = this_id.replace(" Ownership : Customer", "")
this_id = this_id.replace(" Ownership : Shared", "")
return this_id


def scrape_standard(html_file_path: str, benchmark_name: str, replacement_string: str):
with open(html_file_path, "r") as f:
soup = BeautifulSoup(f.read(), "html.parser")
tables = soup.find_all("table")

def get_iso_id(input_text: str) -> str:
"""Pass in table.previous_sibling.previous_sibling.text and get the Azure Benchmark ID"""
id_ownership_string = chomp_keep_single_spaces(input_text)
this_id = id_ownership_string
this_id = this_id.replace(f"ID : {replacement_string} ", "")
this_id = this_id.replace(" Ownership : Customer", "")
this_id = this_id.replace(" Ownership : Shared", "")
return this_id

def get_service_name(github_link: str) -> str:
"""Pass in the github link and get the name of the service based on folder name"""
elements = github_link.split("/")
Expand All @@ -27,8 +29,8 @@ def get_service_name(github_link: str) -> str:
categories = []
for table in tables:
table_identifier_sibling = table.previous_sibling.previous_sibling
# Azure Security Benchmark ID
requirement_id = get_iso_id(table_identifier_sibling.text)
# Get requirement ID
requirement_id = get_requirement_id(table_identifier_sibling.text, replacement_string)

if replacement_string in table_identifier_sibling.text:
# Requirement Name
Expand Down

0 comments on commit a31263a

Please sign in to comment.