From d4c1f9f87b7747aad057295a8d4999e8f97e1e93 Mon Sep 17 00:00:00 2001
From: Dibyendu Majumdar <mobile@majumdar.org.uk>
Date: Mon, 2 Nov 2020 00:06:41 +0000
Subject: [PATCH] issue #40 Optimizer linear IR output when step is known
 positive

---
 src/linearizer.c                | 156 +++++++++++++++++++++++++-
 tests/expected/results.expected | 190 +++++++++++++-------------------
 2 files changed, 229 insertions(+), 117 deletions(-)

diff --git a/src/linearizer.c b/src/linearizer.c
index c85206c..a9b980e 100644
--- a/src/linearizer.c
+++ b/src/linearizer.c
@@ -1838,28 +1838,172 @@ temporaries.
 
 Lend:
 
+Above is the general case
+
+When we know the increment to be negative or positive we can simplify.
+Example for positive case
+
+	var = var - step
+	goto L1
+L1:
+	var = var + step;
+ 	goto L2
+L2:
+	stop = var > limit
+	if stop goto Lend
+		else goto Lbody
+Lbody:
+	set local symbol in for loop to var
+	do body
+	goto L1;
+Lend:
+
+Negative case
+
+	var = var - step
+	goto L1
+L1:
+	var = var + step;
+	goto L3;
+L3:
+	stop = var < limit
+	if stop goto Lend
+		else goto Lbody
+Lbody:
+	set local symbol in for loop to var
+	do body
+	goto L1;
+Lend:
+
 
 */
 //clang-format on
+
+static void linearize_for_num_statement_positivestep(struct proc *proc, struct ast_node *node)
+{
+	start_scope(proc->linearizer, proc, node->for_stmt.for_scope);
+
+	struct ast_node *index_var_expr = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 0);
+	struct ast_node *limit_expr = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 1);
+	struct ast_node *step_expr = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 2);
+	struct lua_symbol *var_sym = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.symbols, 0);
+
+	if (index_var_expr == NULL || limit_expr == NULL) {
+		handle_error(proc->linearizer->ast_container, "A least index and limit must be supplied");
+	}
+	struct pseudo *t = linearize_expression(proc, index_var_expr);
+	if (t->type == PSEUDO_RANGE) {
+		convert_range_to_temp(t); // Only accept one result
+	}
+	struct pseudo *index_var_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT);
+	instruct_move(proc, op_mov, index_var_pseudo, t);
+
+	t = linearize_expression(proc, limit_expr);
+	if (t->type == PSEUDO_RANGE) {
+		convert_range_to_temp(t); // Only accept one result
+	}
+	struct pseudo *limit_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT);
+	instruct_move(proc, op_mov, limit_pseudo, t);
+
+	if (step_expr == NULL)
+		t = allocate_constant_pseudo(proc, allocate_integer_constant(proc, 1));
+	else {
+		t = linearize_expression(proc, step_expr);
+		if (t->type == PSEUDO_RANGE) {
+			convert_range_to_temp(t); // Only accept one result
+		}
+	}
+	struct pseudo *step_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT);
+	instruct_move(proc, op_mov, step_pseudo, t);
+
+	struct pseudo *stop_pseudo = allocate_temp_pseudo(proc, RAVI_TNUMINT);
+	create_binary_instruction(proc, op_subii, index_var_pseudo, step_pseudo, index_var_pseudo);
+
+	struct basic_block *L1 = create_block(proc);
+	struct basic_block *L2 = create_block(proc);
+	struct basic_block *Lbody = create_block(proc);
+	struct basic_block *Lend = create_block(proc);
+	struct basic_block *previous_break_target = proc->current_break_target;
+	struct block_scope *previous_break_scope = proc->current_break_scope;
+	proc->current_break_target = Lend;
+	proc->current_break_scope = proc->current_scope;
+
+	start_block(proc, L1);
+	create_binary_instruction(proc, op_addii, index_var_pseudo, step_pseudo, index_var_pseudo);
+	instruct_br(proc, allocate_block_pseudo(proc, L2));
+
+	start_block(proc, L2);
+	create_binary_instruction(proc, op_ltii, limit_pseudo, index_var_pseudo, stop_pseudo);
+	instruct_cbr(proc, stop_pseudo, Lend, Lbody);
+
+	start_block(proc, Lbody);
+	instruct_move(proc, op_mov, var_sym->variable.pseudo, index_var_pseudo);
+
+	start_scope(proc->linearizer, proc, node->for_stmt.for_body);
+	linearize_statement_list(proc, node->for_stmt.for_statement_list);
+	end_scope(proc->linearizer, proc);
+
+	/* If the fornum block has escaped local vars then we need to close */
+	if (proc->current_break_scope->need_close) {
+		/* Note we put close instruction in current basic block */
+		instruct_close(proc, proc->current_bb, proc->current_break_scope);
+	}
+	instruct_br(proc, allocate_block_pseudo(proc, L1));
+
+	end_scope(proc->linearizer, proc);
+
+	free_temp_pseudo(proc, stop_pseudo, false);
+	free_temp_pseudo(proc, step_pseudo, false);
+	free_temp_pseudo(proc, limit_pseudo, false);
+	free_temp_pseudo(proc, index_var_pseudo, false);
+
+	start_block(proc, Lend);
+
+	proc->current_break_target = previous_break_target;
+	proc->current_break_scope = previous_break_scope;
+}
+
 static void linearize_for_num_statement(struct proc *proc, struct ast_node *node)
 {
 	assert(node->type == STMT_FOR_NUM);
-	start_scope(proc->linearizer, proc, node->for_stmt.for_scope);
 
 	/* For now we only allow integer expressions */
 	struct ast_node *expr;
 	FOR_EACH_PTR(node->for_stmt.expr_list, expr)
+		{
+			if (expr->common_expr.type.type_code != RAVI_TNUMINT) {
+				handle_error(proc->linearizer->ast_container,
+					     "Only for loops with integer expressions currently supported");
+			}
+		}
+	END_FOR_EACH_PTR(expr)
+
+	/* Check if we can optimize */
+	struct ast_node *step_expr = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 2);
 	{
-		if (expr->common_expr.type.type_code != RAVI_TNUMINT) {
-			handle_error(proc->linearizer->ast_container,
-				     "Only for loops with integer expressions currently supported");
+		bool step_known_positive = false;
+//		bool step_known_negative = false;
+		if (step_expr == NULL) {
+			step_known_positive = true;
+		} else if (step_expr->type == EXPR_LITERAL) {
+			if (step_expr->literal_expr.type.type_code == RAVI_TNUMINT) {
+				if (step_expr->literal_expr.u.i > 0)
+					step_known_positive = true;
+//				else if (step_expr->literal_expr.u.i < 0)
+//					step_known_negative = true;
+			}
+		}
+		if (step_known_positive) {
+			linearize_for_num_statement_positivestep(proc, node);
+			return;
 		}
 	}
-	END_FOR_EACH_PTR(expr)
+
+	/* Default case where we do not know if step is negative or positive */
+	start_scope(proc->linearizer, proc, node->for_stmt.for_scope);
 
 	struct ast_node *index_var_expr = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 0);
 	struct ast_node *limit_expr = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 1);
-	struct ast_node *step_expr = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.expr_list, 2);
 	struct lua_symbol *var_sym = ptrlist_nth_entry((struct ptr_list *)node->for_stmt.symbols, 0);
 
 	if (index_var_expr == NULL || limit_expr == NULL) {
diff --git a/tests/expected/results.expected b/tests/expected/results.expected
index 46a2f12..e4e66a3 100644
--- a/tests/expected/results.expected
+++ b/tests/expected/results.expected
@@ -2644,25 +2644,21 @@ L0 (entry)
 	MOV {1 Kint(0)} {Tint(1)}
 	MOV {10 Kint(1)} {Tint(2)}
 	MOV {1 Kint(0)} {Tint(3)}
-	LIii {0 Kint(2), Tint(3)} {Tint(4)}
 	SUBii {Tint(1), Tint(3)} {Tint(1)}
 	BR {L2}
 L1 (exit)
 L2
 	ADDii {Tint(1), Tint(3)} {Tint(1)}
-	CBR {Tint(4)} {L3, L4}
+	BR {L3}
 L3
-	LIii {Tint(2), Tint(1)} {Tint(5)}
-	CBR {Tint(5)} {L6, L5}
+	LIii {Tint(2), Tint(1)} {Tint(4)}
+	CBR {Tint(4)} {L5, L4}
 L4
-	LIii {Tint(1), Tint(2)} {Tint(5)}
-	CBR {Tint(5)} {L6, L5}
-L5
 	MOV {Tint(1)} {Tint(0)}
-	LOADGLOBAL {Upval(_ENV), 'print' Ks(3)} {T(0)}
+	LOADGLOBAL {Upval(_ENV), 'print' Ks(2)} {T(0)}
 	CALL {T(0), Tint(0)} {T(0..), 1 Kint(0)}
 	BR {L2}
-L6
+L5
 	RET {L1}
 function x() local a=1; function y() return function() return a end end; end
 function()
@@ -7076,47 +7072,39 @@ L0 (entry)
 	MOV {1 Kint(0)} {Tint(1)}
 	MOV {500 Kint(1)} {Tint(2)}
 	MOV {1 Kint(0)} {Tint(3)}
-	LIii {0 Kint(2), Tint(3)} {Tint(4)}
 	SUBii {Tint(1), Tint(3)} {Tint(1)}
 	BR {L2}
 L1 (exit)
 L2
 	ADDii {Tint(1), Tint(3)} {Tint(1)}
-	CBR {Tint(4)} {L3, L4}
+	BR {L3}
 L3
-	LIii {Tint(2), Tint(1)} {Tint(5)}
-	CBR {Tint(5)} {L6, L5}
+	LIii {Tint(2), Tint(1)} {Tint(4)}
+	CBR {Tint(4)} {L5, L4}
 L4
-	LIii {Tint(1), Tint(2)} {Tint(5)}
-	CBR {Tint(5)} {L6, L5}
-L5
 	MOV {Tint(1)} {Tint(0)}
-	MOV {0.000000000000 Kflt(3)} {local(sum, 0)}
-	MOV {1 Kint(0)} {Tint(7)}
-	MOV {10000 Kint(4)} {Tint(8)}
-	MOV {1 Kint(0)} {Tint(9)}
-	LIii {0 Kint(2), Tint(9)} {Tint(10)}
-	SUBii {Tint(7), Tint(9)} {Tint(7)}
-	BR {L7}
-L6
+	MOV {0.000000000000 Kflt(2)} {local(sum, 0)}
+	MOV {1 Kint(0)} {Tint(6)}
+	MOV {10000 Kint(3)} {Tint(7)}
+	MOV {1 Kint(0)} {Tint(8)}
+	SUBii {Tint(6), Tint(8)} {Tint(6)}
+	BR {L6}
+L5
 	RET {local(sum, 0)} {L1}
+L6
+	ADDii {Tint(6), Tint(8)} {Tint(6)}
+	BR {L7}
 L7
-	ADDii {Tint(7), Tint(9)} {Tint(7)}
-	CBR {Tint(10)} {L8, L9}
+	LIii {Tint(7), Tint(6)} {Tint(9)}
+	CBR {Tint(9)} {L9, L8}
 L8
-	LIii {Tint(8), Tint(7)} {Tint(11)}
-	CBR {Tint(11)} {L11, L10}
-L9
-	LIii {Tint(7), Tint(8)} {Tint(11)}
-	CBR {Tint(11)} {L11, L10}
-L10
-	MOV {Tint(7)} {Tint(6)}
-	MULii {Tint(6), Tint(6)} {Tint(12)}
-	DIVfi {1.000000000000 Kflt(5), Tint(12)} {Tflt(0)}
+	MOV {Tint(6)} {Tint(5)}
+	MULii {Tint(5), Tint(5)} {Tint(10)}
+	DIVfi {1.000000000000 Kflt(4), Tint(10)} {Tflt(0)}
 	ADD {local(sum, 0), Tflt(0)} {T(0)}
 	MOV {T(0)} {local(sum, 0)}
-	BR {L7}
-L11
+	BR {L6}
+L9
 	BR {L2}
 function matmul(a: table, b: table)
   	assert(@integer(#a[1]) == #b);
@@ -7928,81 +7916,69 @@ L0 (entry)
 	MOV {1 Kint(1)} {Tint(4)}
 	MOV {Tint(0)} {Tint(5)}
 	MOV {1 Kint(1)} {Tint(6)}
-	LIii {0 Kint(4), Tint(6)} {Tint(7)}
 	SUBii {Tint(4), Tint(6)} {Tint(4)}
 	BR {L2}
 L1 (exit)
 L2
 	ADDii {Tint(4), Tint(6)} {Tint(4)}
-	CBR {Tint(7)} {L3, L4}
+	BR {L3}
 L3
-	LIii {Tint(5), Tint(4)} {Tint(8)}
-	CBR {Tint(8)} {L6, L5}
+	LIii {Tint(5), Tint(4)} {Tint(7)}
+	CBR {Tint(7)} {L5, L4}
 L4
-	LIii {Tint(4), Tint(5)} {Tint(8)}
-	CBR {Tint(8)} {L6, L5}
-L5
 	MOV {Tint(4)} {Tint(3)}
-	LOADGLOBAL {Upval(_ENV), 'table' Ks(5)} {T(6)}
-	GETsk {T(6), 'numarray' Ks(6)} {T(7)}
-	CALL {T(7), Tint(2), 0.000000000000 Kflt(7)} {T(7..), 1 Kint(1)}
+	LOADGLOBAL {Upval(_ENV), 'table' Ks(4)} {T(6)}
+	GETsk {T(6), 'numarray' Ks(5)} {T(7)}
+	CALL {T(7), Tint(2), 0.000000000000 Kflt(6)} {T(7..), 1 Kint(1)}
 	TOFARRAY {T(7[7..])}
 	MOV {T(7[7..])} {local(xi, 4)}
 	TPUTik {local(xi, 4)} {local(x, 2), Tint(3)}
-	MOV {1 Kint(1)} {Tint(10)}
-	MOV {Tint(2)} {Tint(11)}
-	MOV {1 Kint(1)} {Tint(12)}
-	LIii {0 Kint(4), Tint(12)} {Tint(13)}
-	SUBii {Tint(10), Tint(12)} {Tint(10)}
-	BR {L7}
-L6
+	MOV {1 Kint(1)} {Tint(9)}
+	MOV {Tint(2)} {Tint(10)}
+	MOV {1 Kint(1)} {Tint(11)}
+	SUBii {Tint(9), Tint(11)} {Tint(9)}
+	BR {L6}
+L5
 	RET {local(x, 2)} {L1}
+L6
+	ADDii {Tint(9), Tint(11)} {Tint(9)}
+	BR {L7}
 L7
-	ADDii {Tint(10), Tint(12)} {Tint(10)}
-	CBR {Tint(13)} {L8, L9}
+	LIii {Tint(10), Tint(9)} {Tint(12)}
+	CBR {Tint(12)} {L9, L8}
 L8
-	LIii {Tint(11), Tint(10)} {Tint(14)}
-	CBR {Tint(14)} {L11, L10}
-L9
-	LIii {Tint(10), Tint(11)} {Tint(14)}
-	CBR {Tint(14)} {L11, L10}
-L10
-	MOV {Tint(10)} {Tint(9)}
+	MOV {Tint(9)} {Tint(8)}
 	TGETik {local(a, 0), Tint(3)} {T(8)}
 	TOFARRAY {T(8)}
-	TGETik {local(c, 3), Tint(9)} {T(9)}
+	TGETik {local(c, 3), Tint(8)} {T(9)}
 	TOFARRAY {T(9)}
 	MOV {T(9)} {local(cj, 6)}
 	MOV {T(8)} {local(ai, 5)}
-	MOVf {0.000000000000 Kflt(7)} {Tflt(0)}
+	MOVf {0.000000000000 Kflt(6)} {Tflt(0)}
+	MOV {1 Kint(1)} {Tint(14)}
+	MOV {Tint(1)} {Tint(15)}
 	MOV {1 Kint(1)} {Tint(16)}
-	MOV {Tint(1)} {Tint(17)}
-	MOV {1 Kint(1)} {Tint(18)}
-	LIii {0 Kint(4), Tint(18)} {Tint(19)}
-	SUBii {Tint(16), Tint(18)} {Tint(16)}
-	BR {L12}
-L11
+	SUBii {Tint(14), Tint(16)} {Tint(14)}
+	BR {L10}
+L9
 	BR {L2}
+L10
+	ADDii {Tint(14), Tint(16)} {Tint(14)}
+	BR {L11}
+L11
+	LIii {Tint(15), Tint(14)} {Tint(17)}
+	CBR {Tint(17)} {L13, L12}
 L12
-	ADDii {Tint(16), Tint(18)} {Tint(16)}
-	CBR {Tint(19)} {L13, L14}
-L13
-	LIii {Tint(17), Tint(16)} {Tint(20)}
-	CBR {Tint(20)} {L16, L15}
-L14
-	LIii {Tint(16), Tint(17)} {Tint(20)}
-	CBR {Tint(20)} {L16, L15}
-L15
-	MOV {Tint(16)} {Tint(15)}
-	FAGETik {local(ai, 5), Tint(15)} {Tflt(1)}
-	FAGETik {local(cj, 6), Tint(15)} {Tflt(2)}
+	MOV {Tint(14)} {Tint(13)}
+	FAGETik {local(ai, 5), Tint(13)} {Tflt(1)}
+	FAGETik {local(cj, 6), Tint(13)} {Tflt(2)}
 	MULff {Tflt(1), Tflt(2)} {Tflt(3)}
 	ADDff {Tflt(0), Tflt(3)} {Tflt(2)}
 	MOVf {Tflt(2)} {Tflt(0)}
-	BR {L12}
-L16
-	FAPUTfv {Tflt(0)} {local(xi, 4), Tint(9)}
-	BR {L7}
+	BR {L10}
+L13
+	FAPUTfv {Tflt(0)} {local(xi, 4), Tint(8)}
+	BR {L6}
 return -3+4*5//2^3^2//9+4%10/3 == (-3)+(((4*5)//(2^(3^2)))//9)+((4%10)/3)
 function()
 --upvalues  _ENV*
@@ -8838,25 +8814,21 @@ L0 (entry)
 	MOV {1 Kint(0)} {Tint(1)}
 	MOV {10 Kint(1)} {Tint(2)}
 	MOV {1 Kint(0)} {Tint(3)}
-	LIii {0 Kint(2), Tint(3)} {Tint(4)}
 	SUBii {Tint(1), Tint(3)} {Tint(1)}
 	BR {L2}
 L1 (exit)
 L2
 	ADDii {Tint(1), Tint(3)} {Tint(1)}
-	CBR {Tint(4)} {L3, L4}
+	BR {L3}
 L3
-	LIii {Tint(2), Tint(1)} {Tint(5)}
-	CBR {Tint(5)} {L6, L5}
+	LIii {Tint(2), Tint(1)} {Tint(4)}
+	CBR {Tint(4)} {L5, L4}
 L4
-	LIii {Tint(1), Tint(2)} {Tint(5)}
-	CBR {Tint(5)} {L6, L5}
-L5
 	MOV {Tint(1)} {Tint(0)}
-	LOADGLOBAL {Upval(_ENV), 'print' Ks(3)} {T(0)}
+	LOADGLOBAL {Upval(_ENV), 'print' Ks(2)} {T(0)}
 	CALL {T(0), Tint(0)} {T(0..), 1 Kint(0)}
 	BR {L2}
-L6
+L5
 	RET {L1}
 for i=10,1,-1 do print(i) end
 function()
@@ -9219,33 +9191,29 @@ L0 (entry)
 	MOV {1 Kint(0)} {Tint(1)}
 	MOV {10 Kint(1)} {Tint(2)}
 	MOV {1 Kint(0)} {Tint(3)}
-	LIii {0 Kint(2), Tint(3)} {Tint(4)}
 	SUBii {Tint(1), Tint(3)} {Tint(1)}
 	BR {L2}
 L1 (exit)
 L2
 	ADDii {Tint(1), Tint(3)} {Tint(1)}
-	CBR {Tint(4)} {L3, L4}
+	BR {L3}
 L3
-	LIii {Tint(2), Tint(1)} {Tint(5)}
-	CBR {Tint(5)} {L6, L5}
+	LIii {Tint(2), Tint(1)} {Tint(4)}
+	CBR {Tint(4)} {L5, L4}
 L4
-	LIii {Tint(1), Tint(2)} {Tint(5)}
-	CBR {Tint(5)} {L6, L5}
-L5
 	MOV {Tint(1)} {Tint(0)}
-	BR {L7}
-L6
+	BR {L6}
+L5
 	RET {L1}
+L6
+	EQii {Tint(0), 2 Kint(2)} {T(0)}
+	CBR {T(0)} {L7, L8}
 L7
-	EQii {Tint(0), 2 Kint(3)} {T(0)}
-	CBR {T(0)} {L8, L9}
+	BR {L5}
 L8
-	BR {L6}
-L9
 	BR {L2}
-L10
-	BR {L9}
+L9
+	BR {L8}
 local msgs = {}
 function Message (m)
   if not _nomsg then