From d4c1f9f87b7747aad057295a8d4999e8f97e1e93 Mon Sep 17 00:00:00 2001 From: Dibyendu Majumdar Date: Mon, 2 Nov 2020 00:06:41 +0000 Subject: [PATCH] issue #40 Optimizer linear IR output when step is known positive --- src/linearizer.c | 156 +++++++++++++++++++++++++- tests/expected/results.expected | 190 +++++++++++++------------------- 2 files changed, 229 insertions(+), 117 deletions(-) diff --git a/src/linearizer.c b/src/linearizer.c index c85206c..a9b980e 100644 --- a/src/linearizer.c +++ b/src/linearizer.c @@ -1838,28 +1838,172 @@ temporaries. Lend: +Above is the general case + +When we know the increment to be negative or positive we can simplify. +Example for positive case + + var = var - step + goto L1 +L1: + var = var + step; + goto L2 +L2: + stop = var > limit + if stop goto Lend + else goto Lbody +Lbody: + set local symbol in for loop to var + do body + goto L1; +Lend: + +Negative case + + var = var - step + goto L1 +L1: + var = var + step; + goto L3; +L3: + stop = var < limit + if stop goto Lend + else goto Lbody +Lbody: + set local symbol in for loop to var + do body + goto L1; +Lend: + */ //clang-format on + +static void linearize_for_num_statement_positivestep(struct proc *proc, struct ast_node *node) +{ + start_scope(proc->linearizer, proc, node->for_stmt.for_scope); + + struct ast_node *index_var_expr = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 0); + struct ast_node *limit_expr = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 1); + struct ast_node *step_expr = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 2); + struct lua_symbol *var_sym = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.symbols, 0); + + if (index_var_expr == NULL || limit_expr == NULL) { + handle_error(proc->linearizer->ast_container, "A least index and limit must be supplied"); + } + struct pseudo *t = linearize_expression(proc, index_var_expr); + if (t->type == PSEUDO_RANGE) { + convert_range_to_temp(t); // Only accept one result + } + struct pseudo *index_var_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT); + instruct_move(proc, op_mov, index_var_pseudo, t); + + t = linearize_expression(proc, limit_expr); + if (t->type == PSEUDO_RANGE) { + convert_range_to_temp(t); // Only accept one result + } + struct pseudo *limit_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT); + instruct_move(proc, op_mov, limit_pseudo, t); + + if (step_expr == NULL) + t = allocate_constant_pseudo(proc, allocate_integer_constant(proc, 1)); + else { + t = linearize_expression(proc, step_expr); + if (t->type == PSEUDO_RANGE) { + convert_range_to_temp(t); // Only accept one result + } + } + struct pseudo *step_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT); + instruct_move(proc, op_mov, step_pseudo, t); + + struct pseudo *stop_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT); + create_binary_instruction(proc, op_subii, index_var_pseudo, step_pseudo, index_var_pseudo); + + struct basic_block *L1 = create_block(proc); + struct basic_block *L2 = create_block(proc); + struct basic_block *Lbody = create_block(proc); + struct basic_block *Lend = create_block(proc); + struct basic_block *previous_break_target = proc->current_break_target; + struct block_scope *previous_break_scope = proc->current_break_scope; + proc->current_break_target = Lend; + proc->current_break_scope = proc->current_scope; + + start_block(proc, L1); + create_binary_instruction(proc, op_addii, index_var_pseudo, step_pseudo, index_var_pseudo); + instruct_br(proc, allocate_block_pseudo(proc, L2)); + + start_block(proc, L2); + create_binary_instruction(proc, op_ltii, limit_pseudo, index_var_pseudo, stop_pseudo); + instruct_cbr(proc, stop_pseudo, Lend, Lbody); + + start_block(proc, Lbody); + instruct_move(proc, op_mov, var_sym->variable.pseudo, index_var_pseudo); + + start_scope(proc->linearizer, proc, node->for_stmt.for_body); + linearize_statement_list(proc, node->for_stmt.for_statement_list); + end_scope(proc->linearizer, proc); + + /* If the fornum block has escaped local vars then we need to close */ + if (proc->current_break_scope->need_close) { + /* Note we put close instruction in current basic block */ + instruct_close(proc, proc->current_bb, proc->current_break_scope); + } + instruct_br(proc, allocate_block_pseudo(proc, L1)); + + end_scope(proc->linearizer, proc); + + free_temp_pseudo(proc, stop_pseudo, false); + free_temp_pseudo(proc, step_pseudo, false); + free_temp_pseudo(proc, limit_pseudo, false); + free_temp_pseudo(proc, index_var_pseudo, false); + + start_block(proc, Lend); + + proc->current_break_target = previous_break_target; + proc->current_break_scope = previous_break_scope; +} + static void linearize_for_num_statement(struct proc *proc, struct ast_node *node) { assert(node->type == STMT_FOR_NUM); - start_scope(proc->linearizer, proc, node->for_stmt.for_scope); /* For now we only allow integer expressions */ struct ast_node *expr; FOR_EACH_PTR(node->for_stmt.expr_list, expr) + { + if (expr->common_expr.type.type_code != RAVI_TNUMINT) { + handle_error(proc->linearizer->ast_container, + "Only for loops with integer expressions currently supported"); + } + } + END_FOR_EACH_PTR(expr) + + /* Check if we can optimize */ + struct ast_node *step_expr = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 2); { - if (expr->common_expr.type.type_code != RAVI_TNUMINT) { - handle_error(proc->linearizer->ast_container, - "Only for loops with integer expressions currently supported"); + bool step_known_positive = false; +// bool step_known_negative = false; + if (step_expr == NULL) { + step_known_positive = true; + } else if (step_expr->type == EXPR_LITERAL) { + if (step_expr->literal_expr.type.type_code == RAVI_TNUMINT) { + if (step_expr->literal_expr.u.i > 0) + step_known_positive = true; +// else if (step_expr->literal_expr.u.i < 0) +// step_known_negative = true; + } + } + if (step_known_positive) { + linearize_for_num_statement_positivestep(proc, node); + return; } } - END_FOR_EACH_PTR(expr) + + /* Default case where we do not know if step is negative or positive */ + start_scope(proc->linearizer, proc, node->for_stmt.for_scope); struct ast_node *index_var_expr = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 0); struct ast_node *limit_expr = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 1); - struct ast_node *step_expr = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 2); struct lua_symbol *var_sym = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.symbols, 0); if (index_var_expr == NULL || limit_expr == NULL) { diff --git a/tests/expected/results.expected b/tests/expected/results.expected index 46a2f12..e4e66a3 100644 --- a/tests/expected/results.expected +++ b/tests/expected/results.expected @@ -2644,25 +2644,21 @@ L0 (entry) MOV {1 Kint(0)} {Tint(1)} MOV {10 Kint(1)} {Tint(2)} MOV {1 Kint(0)} {Tint(3)} - LIii {0 Kint(2), Tint(3)} {Tint(4)} SUBii {Tint(1), Tint(3)} {Tint(1)} BR {L2} L1 (exit) L2 ADDii {Tint(1), Tint(3)} {Tint(1)} - CBR {Tint(4)} {L3, L4} + BR {L3} L3 - LIii {Tint(2), Tint(1)} {Tint(5)} - CBR {Tint(5)} {L6, L5} + LIii {Tint(2), Tint(1)} {Tint(4)} + CBR {Tint(4)} {L5, L4} L4 - LIii {Tint(1), Tint(2)} {Tint(5)} - CBR {Tint(5)} {L6, L5} -L5 MOV {Tint(1)} {Tint(0)} - LOADGLOBAL {Upval(_ENV), 'print' Ks(3)} {T(0)} + LOADGLOBAL {Upval(_ENV), 'print' Ks(2)} {T(0)} CALL {T(0), Tint(0)} {T(0..), 1 Kint(0)} BR {L2} -L6 +L5 RET {L1} function x() local a=1; function y() return function() return a end end; end function() @@ -7076,47 +7072,39 @@ L0 (entry) MOV {1 Kint(0)} {Tint(1)} MOV {500 Kint(1)} {Tint(2)} MOV {1 Kint(0)} {Tint(3)} - LIii {0 Kint(2), Tint(3)} {Tint(4)} SUBii {Tint(1), Tint(3)} {Tint(1)} BR {L2} L1 (exit) L2 ADDii {Tint(1), Tint(3)} {Tint(1)} - CBR {Tint(4)} {L3, L4} + BR {L3} L3 - LIii {Tint(2), Tint(1)} {Tint(5)} - CBR {Tint(5)} {L6, L5} + LIii {Tint(2), Tint(1)} {Tint(4)} + CBR {Tint(4)} {L5, L4} L4 - LIii {Tint(1), Tint(2)} {Tint(5)} - CBR {Tint(5)} {L6, L5} -L5 MOV {Tint(1)} {Tint(0)} - MOV {0.000000000000 Kflt(3)} {local(sum, 0)} - MOV {1 Kint(0)} {Tint(7)} - MOV {10000 Kint(4)} {Tint(8)} - MOV {1 Kint(0)} {Tint(9)} - LIii {0 Kint(2), Tint(9)} {Tint(10)} - SUBii {Tint(7), Tint(9)} {Tint(7)} - BR {L7} -L6 + MOV {0.000000000000 Kflt(2)} {local(sum, 0)} + MOV {1 Kint(0)} {Tint(6)} + MOV {10000 Kint(3)} {Tint(7)} + MOV {1 Kint(0)} {Tint(8)} + SUBii {Tint(6), Tint(8)} {Tint(6)} + BR {L6} +L5 RET {local(sum, 0)} {L1} +L6 + ADDii {Tint(6), Tint(8)} {Tint(6)} + BR {L7} L7 - ADDii {Tint(7), Tint(9)} {Tint(7)} - CBR {Tint(10)} {L8, L9} + LIii {Tint(7), Tint(6)} {Tint(9)} + CBR {Tint(9)} {L9, L8} L8 - LIii {Tint(8), Tint(7)} {Tint(11)} - CBR {Tint(11)} {L11, L10} -L9 - LIii {Tint(7), Tint(8)} {Tint(11)} - CBR {Tint(11)} {L11, L10} -L10 - MOV {Tint(7)} {Tint(6)} - MULii {Tint(6), Tint(6)} {Tint(12)} - DIVfi {1.000000000000 Kflt(5), Tint(12)} {Tflt(0)} + MOV {Tint(6)} {Tint(5)} + MULii {Tint(5), Tint(5)} {Tint(10)} + DIVfi {1.000000000000 Kflt(4), Tint(10)} {Tflt(0)} ADD {local(sum, 0), Tflt(0)} {T(0)} MOV {T(0)} {local(sum, 0)} - BR {L7} -L11 + BR {L6} +L9 BR {L2} function matmul(a: table, b: table) assert(@integer(#a[1]) == #b); @@ -7928,81 +7916,69 @@ L0 (entry) MOV {1 Kint(1)} {Tint(4)} MOV {Tint(0)} {Tint(5)} MOV {1 Kint(1)} {Tint(6)} - LIii {0 Kint(4), Tint(6)} {Tint(7)} SUBii {Tint(4), Tint(6)} {Tint(4)} BR {L2} L1 (exit) L2 ADDii {Tint(4), Tint(6)} {Tint(4)} - CBR {Tint(7)} {L3, L4} + BR {L3} L3 - LIii {Tint(5), Tint(4)} {Tint(8)} - CBR {Tint(8)} {L6, L5} + LIii {Tint(5), Tint(4)} {Tint(7)} + CBR {Tint(7)} {L5, L4} L4 - LIii {Tint(4), Tint(5)} {Tint(8)} - CBR {Tint(8)} {L6, L5} -L5 MOV {Tint(4)} {Tint(3)} - LOADGLOBAL {Upval(_ENV), 'table' Ks(5)} {T(6)} - GETsk {T(6), 'numarray' Ks(6)} {T(7)} - CALL {T(7), Tint(2), 0.000000000000 Kflt(7)} {T(7..), 1 Kint(1)} + LOADGLOBAL {Upval(_ENV), 'table' Ks(4)} {T(6)} + GETsk {T(6), 'numarray' Ks(5)} {T(7)} + CALL {T(7), Tint(2), 0.000000000000 Kflt(6)} {T(7..), 1 Kint(1)} TOFARRAY {T(7[7..])} MOV {T(7[7..])} {local(xi, 4)} TPUTik {local(xi, 4)} {local(x, 2), Tint(3)} - MOV {1 Kint(1)} {Tint(10)} - MOV {Tint(2)} {Tint(11)} - MOV {1 Kint(1)} {Tint(12)} - LIii {0 Kint(4), Tint(12)} {Tint(13)} - SUBii {Tint(10), Tint(12)} {Tint(10)} - BR {L7} -L6 + MOV {1 Kint(1)} {Tint(9)} + MOV {Tint(2)} {Tint(10)} + MOV {1 Kint(1)} {Tint(11)} + SUBii {Tint(9), Tint(11)} {Tint(9)} + BR {L6} +L5 RET {local(x, 2)} {L1} +L6 + ADDii {Tint(9), Tint(11)} {Tint(9)} + BR {L7} L7 - ADDii {Tint(10), Tint(12)} {Tint(10)} - CBR {Tint(13)} {L8, L9} + LIii {Tint(10), Tint(9)} {Tint(12)} + CBR {Tint(12)} {L9, L8} L8 - LIii {Tint(11), Tint(10)} {Tint(14)} - CBR {Tint(14)} {L11, L10} -L9 - LIii {Tint(10), Tint(11)} {Tint(14)} - CBR {Tint(14)} {L11, L10} -L10 - MOV {Tint(10)} {Tint(9)} + MOV {Tint(9)} {Tint(8)} TGETik {local(a, 0), Tint(3)} {T(8)} TOFARRAY {T(8)} - TGETik {local(c, 3), Tint(9)} {T(9)} + TGETik {local(c, 3), Tint(8)} {T(9)} TOFARRAY {T(9)} MOV {T(9)} {local(cj, 6)} MOV {T(8)} {local(ai, 5)} - MOVf {0.000000000000 Kflt(7)} {Tflt(0)} + MOVf {0.000000000000 Kflt(6)} {Tflt(0)} + MOV {1 Kint(1)} {Tint(14)} + MOV {Tint(1)} {Tint(15)} MOV {1 Kint(1)} {Tint(16)} - MOV {Tint(1)} {Tint(17)} - MOV {1 Kint(1)} {Tint(18)} - LIii {0 Kint(4), Tint(18)} {Tint(19)} - SUBii {Tint(16), Tint(18)} {Tint(16)} - BR {L12} -L11 + SUBii {Tint(14), Tint(16)} {Tint(14)} + BR {L10} +L9 BR {L2} +L10 + ADDii {Tint(14), Tint(16)} {Tint(14)} + BR {L11} +L11 + LIii {Tint(15), Tint(14)} {Tint(17)} + CBR {Tint(17)} {L13, L12} L12 - ADDii {Tint(16), Tint(18)} {Tint(16)} - CBR {Tint(19)} {L13, L14} -L13 - LIii {Tint(17), Tint(16)} {Tint(20)} - CBR {Tint(20)} {L16, L15} -L14 - LIii {Tint(16), Tint(17)} {Tint(20)} - CBR {Tint(20)} {L16, L15} -L15 - MOV {Tint(16)} {Tint(15)} - FAGETik {local(ai, 5), Tint(15)} {Tflt(1)} - FAGETik {local(cj, 6), Tint(15)} {Tflt(2)} + MOV {Tint(14)} {Tint(13)} + FAGETik {local(ai, 5), Tint(13)} {Tflt(1)} + FAGETik {local(cj, 6), Tint(13)} {Tflt(2)} MULff {Tflt(1), Tflt(2)} {Tflt(3)} ADDff {Tflt(0), Tflt(3)} {Tflt(2)} MOVf {Tflt(2)} {Tflt(0)} - BR {L12} -L16 - FAPUTfv {Tflt(0)} {local(xi, 4), Tint(9)} - BR {L7} + BR {L10} +L13 + FAPUTfv {Tflt(0)} {local(xi, 4), Tint(8)} + BR {L6} return -3+4*5//2^3^2//9+4%10/3 == (-3)+(((4*5)//(2^(3^2)))//9)+((4%10)/3) function() --upvalues _ENV* @@ -8838,25 +8814,21 @@ L0 (entry) MOV {1 Kint(0)} {Tint(1)} MOV {10 Kint(1)} {Tint(2)} MOV {1 Kint(0)} {Tint(3)} - LIii {0 Kint(2), Tint(3)} {Tint(4)} SUBii {Tint(1), Tint(3)} {Tint(1)} BR {L2} L1 (exit) L2 ADDii {Tint(1), Tint(3)} {Tint(1)} - CBR {Tint(4)} {L3, L4} + BR {L3} L3 - LIii {Tint(2), Tint(1)} {Tint(5)} - CBR {Tint(5)} {L6, L5} + LIii {Tint(2), Tint(1)} {Tint(4)} + CBR {Tint(4)} {L5, L4} L4 - LIii {Tint(1), Tint(2)} {Tint(5)} - CBR {Tint(5)} {L6, L5} -L5 MOV {Tint(1)} {Tint(0)} - LOADGLOBAL {Upval(_ENV), 'print' Ks(3)} {T(0)} + LOADGLOBAL {Upval(_ENV), 'print' Ks(2)} {T(0)} CALL {T(0), Tint(0)} {T(0..), 1 Kint(0)} BR {L2} -L6 +L5 RET {L1} for i=10,1,-1 do print(i) end function() @@ -9219,33 +9191,29 @@ L0 (entry) MOV {1 Kint(0)} {Tint(1)} MOV {10 Kint(1)} {Tint(2)} MOV {1 Kint(0)} {Tint(3)} - LIii {0 Kint(2), Tint(3)} {Tint(4)} SUBii {Tint(1), Tint(3)} {Tint(1)} BR {L2} L1 (exit) L2 ADDii {Tint(1), Tint(3)} {Tint(1)} - CBR {Tint(4)} {L3, L4} + BR {L3} L3 - LIii {Tint(2), Tint(1)} {Tint(5)} - CBR {Tint(5)} {L6, L5} + LIii {Tint(2), Tint(1)} {Tint(4)} + CBR {Tint(4)} {L5, L4} L4 - LIii {Tint(1), Tint(2)} {Tint(5)} - CBR {Tint(5)} {L6, L5} -L5 MOV {Tint(1)} {Tint(0)} - BR {L7} -L6 + BR {L6} +L5 RET {L1} +L6 + EQii {Tint(0), 2 Kint(2)} {T(0)} + CBR {T(0)} {L7, L8} L7 - EQii {Tint(0), 2 Kint(3)} {T(0)} - CBR {T(0)} {L8, L9} + BR {L5} L8 - BR {L6} -L9 BR {L2} -L10 - BR {L9} +L9 + BR {L8} local msgs = {} function Message (m) if not _nomsg then