From 8634b1a1c245c92925c4c2e89a711992e899cc44 Mon Sep 17 00:00:00 2001 From: Abdalla Date: Sun, 3 Nov 2024 22:39:11 +0100 Subject: [PATCH] clean code and fix naming issues --- src/analyzer.py | 7 +-- src/inliner.py | 101 ++++++------------------------ src/smt_solver/modify_bitcode.cpp | 2 +- src/smt_solver/smt.py | 12 ++-- src/unroller.py | 64 +++---------------- 5 files changed, 35 insertions(+), 151 deletions(-) diff --git a/src/analyzer.py b/src/analyzer.py index c848076..d507b11 100644 --- a/src/analyzer.py +++ b/src/analyzer.py @@ -178,8 +178,6 @@ def _preprocess(self): # Preprocessing pass: unroll loops. if self.project_config.UNROLL_LOOPS: processing = self._run_loop_unroller(compiled_file=processing) - print("PROCESSING", processing) - print("LOCATION", self.project_config.location_temp_dir) self.dag_path: str = clang_helper.generate_dot_file(processing, self.project_config.location_temp_dir) self.preprocessed_path: str = processing # We are done with the preprocessing. @@ -205,7 +203,7 @@ def _run_loop_unroller(self, compiled_file: str) -> str: # Infer the name of the file that results from the CIL preprocessing. unrolled_file: str = unroller.unroll(compiled_file, self.project_config.location_temp_dir, - f"{self.project_config.name_orig_no_extension}gt-unrolled") + f"{self.project_config.name_orig_no_extension}") logger.info("Preprocessing the file: unrolling loops in the code...") @@ -245,8 +243,7 @@ def _run_inliner(self, input_file: str, additional_files: str): input_files = [input_file] + additional_files inlined_file = inliner.inline_functions(input_files, self.project_config.location_temp_dir, - f"{self.project_config.name_orig_no_extension}gt-inlined", self.project_config.func) - + f"{self.project_config.name_orig_no_extension}", self.project_config.func) if not inlined_file: err_msg = "Error running the inliner." diff --git a/src/inliner.py b/src/inliner.py index 8275552..4e023df 100644 --- a/src/inliner.py +++ b/src/inliner.py @@ -12,14 +12,6 @@ def run_command(command): sys.exit(1) return result.stdout -def compile_to_bitcode(files): - bitcode_files = [] - for file in files: - bc_file = file.replace('.c', '.bc') - run_command(f"clang -emit-llvm -O0 -c {file} -o {bc_file}") - bitcode_files.append(bc_file) - return bitcode_files - def link_bitcode(bitcode_files, output_file): run_command(f"llvm-link {' '.join(bitcode_files)} -o {output_file}") @@ -53,9 +45,6 @@ def modify_llvm_ir(input_file, output_file, skip_function): # Replace 'noinline' with 'alwaysinline' if not skipping if 'noinline' in lines[i-1]: # Check previous line for noinline modified_lines[-1] = modified_lines[-1].replace('noinline', 'alwaysinline') - - # Remove "optnone" from the current line - line = line.replace('optnone', '') # Add the processed line to the list of modified lines modified_lines.append(line) @@ -84,95 +73,43 @@ def assemble_bitcode(input_file, output_file): def inline_bitcode(input_file, output_file): run_command(f"opt -always-inline -inline -inline-threshold=10000000 {input_file} -o {output_file}") - run_command(f"opt -always-inline -inline -inline-threshold=10000000 {input_file} -o inlined.bc") def generate_cfg(input_file): run_command(f"opt -dot-cfg {input_file}") -if __name__ == "__main__": - if len(sys.argv) < 2: - print("Usage: python process_files.py ...") - sys.exit(1) - - c_files = sys.argv[1:] - combined_bc = "combined.bc" - combined_ll = "combined.ll" - combined_mod_ll = "combined_mod.ll" - combined_mod_bc = "combined_mod.bc" - inlined_bc = "inlined.bc" - - # Step 1: Compile all C files to bitcode - bitcode_files = compile_to_bitcode(c_files) - - # Step 2: Link all bitcode files into a single combined bitcode file - link_bitcode(bitcode_files, combined_bc) - - # Step 3: Disassemble the combined bitcode file to LLVM IR - disassemble_bitcode(combined_bc, combined_ll) - - # Step 4: Modify the LLVM IR file - modify_llvm_ir(combined_ll, combined_mod_ll, "test") - - # Step 5: Assemble the modified LLVM IR back to bitcode - assemble_bitcode(combined_mod_ll, combined_mod_bc) - - # Step 6: Inline the functions in the modified bitcode - inline_bitcode(combined_mod_bc, inlined_bc) - - # Step 7: Generate the CFG for the inlined bitcode - generate_cfg(inlined_bc) - - print("Processing complete. Output files:") - print(f" Combined bitcode: {combined_bc}") - print(f" Combined LLVM IR: {combined_ll}") - print(f" Modified LLVM IR: {combined_mod_ll}") - print(f" Modified bitcode: {combined_mod_bc}") - print(f" Inlined bitcode: {inlined_bc}") - print("CFG files generated in the current directory.") def inline_functions(bc_filepaths: list[str], output_file_folder: str, output_name: str, analyzed_function: str) -> str: output_file: str = os.path.join(output_file_folder, f"{output_name}.bc") - - combined_bc = "combined.bc" - combined_ll = "combined.ll" - combined_mod_ll = "combined_mod.ll" - combined_mod_bc = "combined_mod.bc" + file_to_analyze = bc_filepaths[0] + combined_bc = f"{file_to_analyze[:-3]}_linked.bc" + combined_ll = f"{file_to_analyze[:-3]}_linked.ll" + combined_mod_ll = f"{file_to_analyze[:-3]}_linked_mod.ll" + combined_mod_bc = f"{file_to_analyze[:-3]}_linked_mod.bc" + combined_inlined_mod_bc = f"{file_to_analyze[:-3]}_linked_inlined_mod.bc" + combined_inlined_mod_ll = f"{file_to_analyze[:-3]}_linked_inlined_mod.ll" + + if len(bc_filepaths) > 1: - # Step 2: Link all bitcode files into a single combined bitcode file + # Step 1: Link all bitcode files into a single combined bitcode file link_bitcode(bc_filepaths, combined_bc) else: combined_bc = bc_filepaths[0] - # Step 3: Disassemble the combined bitcode file to LLVM IR + # Step 2: Disassemble the combined bitcode file to LLVM IR disassemble_bitcode(combined_bc, combined_ll) - # Step 4: Modify the LLVM IR file + # Step 3: Modify the LLVM IR file modify_llvm_ir(combined_ll, combined_mod_ll, analyzed_function) - # Step 5: Assemble the modified LLVM IR back to bitcode + # Step 4: Assemble the modified LLVM IR back to bitcode assemble_bitcode(combined_mod_ll, combined_mod_bc) - # Step 6: Inline the functions in the modified bitcode - inline_bitcode(combined_mod_bc, output_file) - - disassemble_bitcode("inlined.bc", "inlined.ll") - - return output_file - + # Step 5: Inline the functions in the modified bitcode + inline_bitcode(combined_mod_bc, combined_inlined_mod_bc) -# python3 inline.py ext_func.c helper.c - -# inlined_file = clang_helper.inline_functions(input_file, self.project_config.location_temp_dir, -# f"{self.project_config.name_orig_no_extension}gt-inlined") - -# output_file: str = os.path.join(output_file_folder, f"{output_name}.bc") - -# commands: List[str] = ["opt", -# "-always-inline", -# "-inline", "-inline-threshold=10000000", -# "-S", bc_filepath, -# "-o", output_file] + # Step 6: Disassemble the combined bitcode file to LLVM IR for debugging + disassemble_bitcode(combined_inlined_mod_bc, combined_inlined_mod_ll) + + return combined_inlined_mod_bc -# logger.info(subprocess.run(commands, check=True)) -# return output_file \ No newline at end of file diff --git a/src/smt_solver/modify_bitcode.cpp b/src/smt_solver/modify_bitcode.cpp index 0826ba3..7075775 100644 --- a/src/smt_solver/modify_bitcode.cpp +++ b/src/smt_solver/modify_bitcode.cpp @@ -166,7 +166,7 @@ int main(int argc, char **argv) { string labelsFilename(argv[2]); string allLabelsFilename(argv[3]); string funcName(argv[4]); - string outputFilenameMod = inputFilename.substr(0, inputFilename.size() - 3) + "_mod"; + string outputFilenameMod = inputFilename.substr(0, inputFilename.size() - 3) + "_gvMod"; string outputFilename = inputFilename.substr(0, inputFilename.size() - 3); LLVMContext context; SMDiagnostic error; diff --git a/src/smt_solver/smt.py b/src/smt_solver/smt.py index 5a6092a..4abc1dd 100644 --- a/src/smt_solver/smt.py +++ b/src/smt_solver/smt.py @@ -47,18 +47,19 @@ def compile_and_run_cplusplus(modify_bit_code_cpp_file, modify_bit_code_exec_fil if additional_files: compiled_additional_files = clang_helper.compile_list_to_llvm_for_analysis(additional_files, output_dir) compiled_files = [compiled_file] + compiled_additional_files - input_bc_file = inliner.inline_functions(compiled_files, output_dir, f"{c_filename}_inlined", func_name) + input_bc_file = inliner.inline_functions(compiled_files, output_dir, c_filename, func_name) else: input_bc_file = compiled_file # input_bc_file = clang_helper.unroll_loops(inlined_file, output_dir, # f"{c_filename}-unrolled", project_config) if project_config.UNROLL_LOOPS : - input_bc_file = unroller.unroll(compiled_file, output_dir, c_filename) + input_bc_file = unroller.unroll(input_bc_file, output_dir, c_filename) # Run the compiled program # TODO: change modify bc to take in bc file, not c file run_command = ['./' + modify_bit_code_exec_file, input_bc_file, labels_file, all_labels_file, func_name] subprocess.run(run_command, check=True) + return f"{input_bc_file[:-3]}_gvMod.bc" def run_klee(klee_file): """ @@ -130,12 +131,7 @@ def run_smt(project_config: ProjectConfiguration, labels_file: str, output_dir: # TODO: Find a way to not hard code path modify_bit_code_cpp_file = '../../src/smt_solver/modify_bitcode.cpp' modify_bit_code_exec_file = '../../src/smt_solver/modify_bitcode' - compile_and_run_cplusplus(modify_bit_code_cpp_file, modify_bit_code_exec_file, klee_file_path, additional_files_path, c_file + "_klee_format", labels_file, os.path.join(project_config.location_temp_dir, "labels_0.txt"), project_config.func, output_dir, project_config) - inline_str = "" - if project_config.inlined: - inline_str = "_inlined" - - modified_klee_file_bc = klee_file_path[:-2] + inline_str + "_mod.bc" + modified_klee_file_bc = compile_and_run_cplusplus(modify_bit_code_cpp_file, modify_bit_code_exec_file, klee_file_path, additional_files_path, c_file + "_klee_format", labels_file, os.path.join(project_config.location_temp_dir, "labels_0.txt"), project_config.func, output_dir, project_config) # run klee run_klee(modified_klee_file_bc) diff --git a/src/unroller.py b/src/unroller.py index 615402d..8636140 100644 --- a/src/unroller.py +++ b/src/unroller.py @@ -39,9 +39,8 @@ def unroll_llvm_ir(input_ir, output_ir): def generate_llvm_dag(output_bc): """ - Generate LLVM DAG (.dot file) using llc and opt tools. + Generate LLVM DAG (.dot file) using opt. """ - # Use llc to create DAG command = f"opt -dot-cfg -S -enable-new-pm=0 -disable-output {output_bc}" run_command(command, f"Generating LLVM DAG from bitcode {output_bc}") @@ -104,65 +103,20 @@ def modify_loop_branches_to_next_block(input_file_path, output_file_path): def assemble_bitcode(input_file, output_file): run_command(f"llvm-as {input_file} -o {output_file}", "Assemble LLVM IR after unrolling") -# Usage example -# modify_loop_branches_to_next_block("input.ll", "output.ll") +def unroll(bc_filepath: str, output_file_folder: str, output_name: str): -# Usage example -# modify_loop_branches_to_next_block("input.ll", "output.ll") + output_ir = f"{bc_filepath[:-3]}.ll" + unrolled_output_ir = f"{bc_filepath[:-3]}_unrolled.ll" + unrolled_mod_output_ir = f"{bc_filepath[:-3]}_unrolled_mod.ll" + unrolled_mod_output_bc = f"{bc_filepath[:-3]}_unrolled_mod.bc" - -def main(): - if len(sys.argv) != 2: - print("Usage: python convert_to_llvm.py ") - sys.exit(1) - - c_file = sys.argv[1] - - if not os.path.isfile(c_file): - print(f"Error: {c_file} does not exist.") - sys.exit(1) - - base_filename = os.path.splitext(c_file)[0] - output_bc = f"{base_filename}.bc" - output_ir = f"{base_filename}.ll" - unrolled_output_ir = f"{base_filename}_unrolled.ll" - unrolled_mod_output_ir = f"{base_filename}_unrolled_mod.ll" - output_dag = f"{base_filename}.dot" - - # 1. Compile C file to LLVM Bitcode - compile_to_bitcode(c_file, output_bc) - - # 2. Generate LLVM IR from Bitcode - generate_llvm_ir(output_bc, output_ir) + generate_llvm_ir(bc_filepath, output_ir) unroll_llvm_ir(output_ir, unrolled_output_ir) modify_loop_branches_to_next_block(unrolled_output_ir, unrolled_mod_output_ir) - - # 3. Generate LLVM DAG from Bitcode - generate_llvm_dag(unrolled_mod_output_ir) - - print(f"Files generated:\n- Bitcode: {output_bc}\n- LLVM IR: {output_ir}\n- LLVM IR: {unrolled_output_ir}\n- LLVM DAG: {output_dag}") - -if __name__ == "__main__": - main() - -def unroll(bc_filepaths: list[str], output_file_folder: str, output_name: str): - - output_ir = f"{output_name}.ll" - unrolled_output_ir = f"{output_name}_unrolled.ll" - unrolled_mod_output_ir = f"{output_name}_unrolled_mod.ll" - unrolled_mod_output_bc = os.path.join(output_file_folder, f"{output_name}_unrolled_mod.bc") - - generate_llvm_ir(bc_filepaths, output_ir) - - unroll_llvm_ir(output_ir, unrolled_output_ir) - - modify_loop_branches_to_next_block(unrolled_output_ir, unrolled_mod_output_ir) - - generate_llvm_dag(unrolled_mod_output_ir) - assemble_bitcode(unrolled_mod_output_ir, bc_filepaths) + assemble_bitcode(unrolled_mod_output_ir, unrolled_mod_output_bc) - return bc_filepaths \ No newline at end of file + return unrolled_mod_output_bc \ No newline at end of file