diff --git a/.vscode/launch.json b/.vscode/launch.json index b4a91bf..395b5bd 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -33,7 +33,7 @@ // }, "program": "${workspaceFolder}/target/debug/cli", - "args":["--llvm", "-o", "./test_output.llvm", "./cli/test.fb"], + "args":["--llvm", "-o", "./test_output.llvm", "./sample.fb"], "cwd": "${workspaceFolder}", "env": {"RUST_BACKTRACE": "1"}, "console": "externalTerminal", diff --git a/compiler/src/ast.rs b/compiler/src/ast.rs index 88d0e1a..a05d459 100644 --- a/compiler/src/ast.rs +++ b/compiler/src/ast.rs @@ -136,7 +136,9 @@ impl TopLevelValue { } fn get_dependencies(&self) -> HashSet { - self.value.get_dependencies(vec![self.ident.clone()]) + let mut values = self.args.iter().flat_map(ArgDeclaration::get_idents).collect_vec(); + values.push(self.ident.clone()); + self.value.get_dependencies(values) } } #[derive(Debug, PartialEq, Clone)] diff --git a/compiler/src/inference.rs b/compiler/src/inference.rs index 62148ec..64d1800 100644 --- a/compiler/src/inference.rs +++ b/compiler/src/inference.rs @@ -1,4 +1,4 @@ -use std::{cmp::Ordering, collections::HashMap}; +use std::{cmp::Ordering, collections::{HashMap, HashSet}}; use itertools::Itertools; @@ -556,25 +556,9 @@ impl Context { name: _, decls, } = module; - let order = self.dependency_tree.iter().sorted_by(|(lhs_name,lhs_depends), (rhs_name,rhs_depends)| { - match (lhs_depends.contains(*rhs_name), rhs_depends.contains(*lhs_name)) { - (true,true) => { - if decls.iter().find(|decl| &decl.get_ident() == *lhs_name).map_or(false, ast::TopLevelDeclaration::has_ty) { - Ordering::Less//left has explict type so it can come first - } else if decls.iter().find(|decl| &decl.get_ident() == *rhs_name).map_or(false, ast::TopLevelDeclaration::has_ty) { - Ordering::Greater//right has explict type so it can come first - } else { - todo!("remove this case. both lhs and rhs. means they depend on each other.") - } - } - (false, true) => Ordering::Less,//right depends on left thus should appear after. - (true, false) => Ordering::Greater,//left depends on right thus should appear after. - (false,false) => Ordering::Equal,//neither depend on each other. - } - }) - .map(|(a,_)|a) - .cloned() - .collect_vec(); + let items = self.dependency_tree.keys().cloned().collect(); + let order = sort_on_tree(items, &self.dependency_tree); + let order = dbg!(order); decls.sort_by_key(|decl| order.iter().position(|name| name == &decl.get_ident())); for decl in decls { self.known_locals.clear(); @@ -1806,6 +1790,31 @@ impl Context { } } +fn sort_on_tree(src : Vec, dependencies : &HashMap>) -> Vec { + let mut sorted = Vec::with_capacity(src.len()); + let mut visited = HashSet::with_capacity(src.len()); + for item in src { + visit(item,&mut visited, &mut sorted,dependencies); + } + sorted +} + +fn visit(item:String, visited:&mut HashSet, sorted:&mut Vec, dependencies : &HashMap>) { + if !visited.contains(&item) { + visited.insert(item.clone()); + if let Some(deps) = dependencies.get(&item) { + for dep in deps { + visit(dep.clone(),visited,sorted,dependencies) + } + } + sorted.push(item) + } else { + if !sorted.contains(&item) { + panic!("cylic"); + } + } +} + #[cfg(test)] impl Context { fn reset(&mut self) { diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index e66aaf0..b6bfaea 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::{cmp::Ordering, collections::HashMap}; use std::path::PathBuf; pub mod ast; @@ -12,9 +12,10 @@ pub mod types; mod util; use itertools::Itertools; +use regex::CaptureNames; use thiserror::Error; -use typed_ast::TypedModuleDeclaration; +use typed_ast::{TypedDeclaration, TypedExpr, TypedFnCall, TypedModuleDeclaration, TypedTopLevelValue, TypedValueType}; #[derive(Error, Debug)] #[error(transparent)] @@ -90,6 +91,75 @@ pub fn get_ast(input: &str, file_name: &str) -> typed_ast::TypedModuleDeclaratio let ast = infer_context.inference(module); TypedModuleDeclaration::from(ast, &HashMap::new(), &ops) } +fn unfold_global_curries(mut ast:TypedModuleDeclaration, external_globals : HashMap, dtree : HashMap>) -> TypedModuleDeclaration { + let decls = &mut ast.declarations; + let order = dtree.into_iter().sorted_by(|(lhs_name,lhs_depends), (rhs_name,rhs_depends)| { + match (lhs_depends.contains(rhs_name), rhs_depends.contains(lhs_name)) { + (true,true) => { + todo!("remove this case. both lhs and rhs. means they depend on each other.") + } + (false, true) => Ordering::Less,//right depends on left thus should appear after. + (true, false) => Ordering::Greater,//left depends on right thus should appear after. + (false,false) => Ordering::Equal,//neither depend on each other. + } + }) + .map(|(a,_)|a) + .collect_vec(); + let mut values = external_globals; + values.extend(decls.iter().filter(|decl|if let TypedDeclaration::Value(TypedTopLevelValue{args, ..}) = decl { + args.is_empty() + } else { + false + }).map(|decl| { + let TypedDeclaration::Value(decl) = decl else { unreachable!() }; + let TypedValueType::Expr(expr) = &decl.value else { unreachable!() }; + (decl.ident.clone(), expr.clone()) + })); + for decl in decls.iter_mut() + .filter(|decl| + if let TypedDeclaration::Value(TypedTopLevelValue{args, ..}) =decl { + args.is_empty() + } else { + false + } + ) + .sorted_by_cached_key(|decl| order.iter().position(|name| name == &decl.get_ident())) { + let TypedDeclaration::Value(decl) = decl else {unreachable!() }; + let TypedValueType::Expr(expr) = &mut decl.value else { unreachable!() }; + replace_values(expr, &values); + values.insert(decl.ident.clone(), expr.clone()); + } + ast +} + +fn replace_values(expr:&mut TypedExpr, values : &HashMap) { + match expr { + TypedExpr::FnCall(call) if call.is_extern => { + call.arg.as_mut().map(|arg| replace_values(arg.as_mut(), values)); + }, + TypedExpr::FnCall(call) => { + replace_values(call.value.as_mut(), values); + call.arg.as_mut().map(|expr| replace_values(expr.as_mut(), values)); + }, + TypedExpr::ValueRead(name, _, _) => { + if let Some(new_expr) = values.get(name) { + *expr = new_expr.clone(); + } + } + TypedExpr::TupleLiteral { contents,.. } + | TypedExpr::ListLiteral { contents } + | TypedExpr::ArrayLiteral { contents, .. } => { + for expr in contents { + replace_values(expr, values); + } + } + TypedExpr::BinaryOpCall(biop) => { + replace_values(&mut biop.lhs, values); + replace_values(&mut biop.rhs, values); + } + _ => () + } +} pub fn from_file<'ctx>( file: &PathBuf, @@ -134,24 +204,21 @@ pub fn from_file<'ctx>( let errors = errors.into_iter().map(Error::from).collect_vec(); ast.canonialize(vec![project_name]); let dependency_graph = ast.get_dependencies(); - let dependency_tree = dependency_graph + let dependency_tree : HashMap<_,_> = dependency_graph .into_iter() .map(|(key, value)| (key, value.into_iter().collect())) .collect(); let mut inference_context = inference::Context::new( - dependency_tree, + dbg!(dependency_tree.clone()), fwd_declarations.clone(), HashMap::new(), fwd_ops.clone(), HashMap::new(), ); let ast = inference_context.inference(ast); - let mut ast = TypedModuleDeclaration::from(ast, &fwd_declarations, &fwd_ops); //TODO: foward declare std lib - // #[cfg(debug_assertions)] - // println!("{:?}", ast.declarations); - use std::io::Write; ast.lower_generics(&HashMap::new()); + let ast = unfold_global_curries(ast,HashMap::new(),dependency_tree); ( if errors.is_empty() { Ok(ast) diff --git a/compiler/src/typed_ast.rs b/compiler/src/typed_ast.rs index 846eb16..3ca950f 100644 --- a/compiler/src/typed_ast.rs +++ b/compiler/src/typed_ast.rs @@ -137,7 +137,7 @@ pub enum TypedDeclaration { } impl TypedDeclaration { - fn get_ident(&self) -> String { + pub(crate) fn get_ident(&self) -> String { match self { Self::Value(v) => v.ident.clone(), Self::TypeDefinition(decl) => decl.get_ident(), diff --git a/llvm-codegen/src/code_gen.rs b/llvm-codegen/src/code_gen.rs index aa968c0..a4f20e4 100644 --- a/llvm-codegen/src/code_gen.rs +++ b/llvm-codegen/src/code_gen.rs @@ -12,7 +12,7 @@ use inkwell::debug_info::{ }; use inkwell::module::Module; use inkwell::targets::TargetData; -use inkwell::types::{AnyTypeEnum, BasicType, PointerType, StructType}; +use inkwell::types::{AnyTypeEnum, BasicType, BasicTypeEnum, PointerType, StructType}; use inkwell::values::{ AnyValue, AnyValueEnum, BasicValue, BasicValueEnum, FunctionValue, GlobalValue, IntValue, PhiValue, PointerValue }; @@ -1963,7 +1963,9 @@ impl<'ctx> CodeGen<'ctx> { self.known_functions .insert(decl.ident.clone(), fun.as_global_value()); } else { - todo!("externed globals?") + let ty = self.type_resolver.resolve_type_as_basic(decl.ty.clone()); + let gs = self.module.add_global(ty, None, &decl.ident); + self.known_values.insert(decl.ident.clone(),gs.as_basic_value_enum()); } } "intrinsic" => { @@ -1973,11 +1975,31 @@ impl<'ctx> CodeGen<'ctx> { println!("unknown abi {}", abi.identifier) } } - } else if decl.ty.is_function() { + } else if decl.ty.is_function() && !decl.args.is_empty() { let fun = self.create_curry_list(decl); self.known_functions.insert(decl.ident.clone(), fun); + } else if decl.ty.is_function() { + let TypedValueType::Expr(expr) = &decl.value else { unreachable!() }; + let ty = if let TypedExpr::ValueRead(name, _, _) = expr { + self.ctx.struct_type(&[self.ctx.i8_type().ptr_type(AddressSpace::default()).into()], false) + } else if let TypedExpr::FnCall(fun) = expr { + let fields = self.fold_arg_ty(fun); + let fields = [self.ctx.i8_type().ptr_type(AddressSpace::default()).into()] + .into_iter() + .chain(fields) + .collect_vec(); + self.ctx.struct_type(&fields,false) + } else { + todo!("const other expressions?"); + }; + let value = self.module.add_global(ty, None, &decl.ident); + value.set_initializer(&ty.const_zero()); + self.known_values.insert(decl.ident.clone(),value.as_basic_value_enum()); } else { - //TODO! global values. + let ty = self.type_resolver.resolve_type_as_basic(decl.ty.clone()); + let value = self.module.add_global(ty, None, &decl.ident); + self.known_values.insert(decl.ident.clone(),value.as_basic_value_enum()); + todo!("compile time values?") } } TypedDeclaration::TypeDefinition(def) => match def { @@ -2158,7 +2180,7 @@ impl<'ctx> CodeGen<'ctx> { pub fn compile_module( &mut self, mut ast: compiler::typed_ast::TypedModuleDeclaration, - ) -> Module<'ctx> { + ) -> (Module<'ctx>,Vec<(TypedTopLevelValue,GlobalValue<'ctx>)>) { if self.dibuilder.is_some() { let debug_metadata_version = self.ctx.i32_type().const_int(3, false); self.module.add_basic_value_flag( @@ -2167,6 +2189,7 @@ impl<'ctx> CodeGen<'ctx> { debug_metadata_version, ) } + ast.declarations.sort_by(|a, b| match (a, b) { (TypedDeclaration::Value(_), TypedDeclaration::Value(_)) => std::cmp::Ordering::Equal, @@ -2180,7 +2203,23 @@ impl<'ctx> CodeGen<'ctx> { } #[cfg(debug_assertions)] let _ = self.module.print_to_file("./debug.ll"); - for decl in ast.declarations.into_iter().filter(|it| match it { + + let (global_curries, declarations) = { + let mut split = ast.declarations + .into_iter() + .into_group_map_by(|decl| { + match decl { + TypedDeclaration::Value(decl) => decl.args.is_empty(), + TypedDeclaration::TypeDefinition(_) => false, + } + }); + let global_curries = if let Some((_,curries)) = split.remove_entry(&true) { curries} else {Vec::new()}; + let others = if let Some((_,others)) = split.remove_entry(&false) { others } else { Vec::new() }; + (global_curries,others) + }; + + + for decl in declarations.into_iter().filter(|it| match it { TypedDeclaration::Value(TypedTopLevelValue { value, .. }) if value == &TypedValueType::External =>false, _ => true, @@ -2211,7 +2250,12 @@ impl<'ctx> CodeGen<'ctx> { if let Some(dibuilder) = &self.dibuilder { dibuilder.finalize() } - self.module.clone() + let globals = global_curries.into_iter().map(|value|{ + let TypedDeclaration::Value(value) = value else { unreachable!() }; + let gv = self.module.get_global(&value.ident).unwrap(); + (value,gv) + }).collect(); + (self.module.clone(),globals) } pub(crate) fn replace_module(&mut self, new_module: Module<'ctx>) -> Module<'ctx> { @@ -2356,7 +2400,7 @@ impl<'ctx> CodeGen<'ctx> { self.dibuilder = Some(dibulder); self.compile_unit = Some(compile_unit); } - + let mut globals_to_be_init = Vec::new(); for file in ast { self.current_module = file.name.clone() + ".fb"; if is_debug { @@ -2366,7 +2410,8 @@ impl<'ctx> CodeGen<'ctx> { let difile = dibuilder.create_file(&file.name, ""); self.difile = Some(difile); } - self.compile_module(file); + let (_,values) = self.compile_module(file); + globals_to_be_init.extend(values); self.difile = None } @@ -2381,42 +2426,25 @@ impl<'ctx> CodeGen<'ctx> { ); let bb = self.ctx.append_basic_block(entry, ""); self.builder.position_at_end(bb); + for (value,gv) in globals_to_be_init { + + let TypedValueType::Expr(expr) = value.value else { unreachable!() }; + let TypedExpr::FnCall(fun) = &expr else { unreachable!() }; + let fields = self.fold_arg_ty(fun); + let fields = [self.ctx.i8_type().ptr_type(AddressSpace::default()).into()] + .into_iter() + .chain(fields) + .collect_vec(); + let ty = self.ctx.struct_type(&fields,false); + let ptr_value =self.compile_expr(expr).into_pointer_value(); + let value = self.builder.build_load(ty, ptr_value, "").unwrap(); + self.builder.build_store(gv.as_pointer_value(), value); + self.builder.build_free(ptr_value); + } let gs = self.module.get_global(&main_name).unwrap(); let main = self .builder - .build_struct_gep(self.curry_ty, gs.as_pointer_value(), 0, "") - .unwrap(); - let main = self - .builder - .build_load(self.ctx.i8_type().ptr_type(AddressSpace::default()), main, "") - .unwrap() - .into_pointer_value(); - let main = self - .builder - .build_bitcast( - main, - self.ctx - .void_type() - .fn_type( - &[ - self.ctx - .struct_type( - &[self - .ctx - .i8_type() - .ptr_type(AddressSpace::default()) - .into()], - false, - ) - .ptr_type(AddressSpace::default()) - .into(), - self.type_resolver.resolve_arg_type(&types::UNIT).into(), - ], - false, - ) - .ptr_type(AddressSpace::default()), - "", - ) + .build_load(self.ctx.i8_type().ptr_type(AddressSpace::default()), gs.as_pointer_value(), "main") .unwrap() .into_pointer_value(); let main_t = self.type_resolver.resolve_type_as_function(&types::UNIT.fn_ty(&types::UNIT)); @@ -2833,8 +2861,18 @@ impl<'ctx> CodeGen<'ctx> { value } } + fn fold_arg_ty(&mut self, fun : &TypedFnCall) -> Vec> { + let TypedFnCall { value, arg, arg_t, .. } = fun; + let arg_t = self.type_resolver.resolve_type_as_basic(arg_t.clone()); + if let TypedExpr::FnCall(fun) = value.as_ref() { + let mut out = self.fold_arg_ty(fun); + out.push(arg_t); + out + } else { + vec![arg_t] + } + } } - fn convert_to_basic_value<'ctx>(value: AnyValueEnum<'ctx>) -> BasicValueEnum<'ctx> { match value { AnyValueEnum::ArrayValue(v) => BasicValueEnum::ArrayValue(v), diff --git a/llvm-codegen/src/lib.rs b/llvm-codegen/src/lib.rs index c0a85de..298c4f5 100644 --- a/llvm-codegen/src/lib.rs +++ b/llvm-codegen/src/lib.rs @@ -47,7 +47,7 @@ pub fn compile_file( MultiMap::new(), target_machine.get_target_data(), ); - let module = code_gen.compile_program(vec![ast], true, false); + let module = code_gen.compile_program(vec![ast], false, false); if let Some(path) = write_llvm_to { module.print_to_file(path).unwrap() } diff --git a/llvm-codegen/src/type_resolver.rs b/llvm-codegen/src/type_resolver.rs index 1c2baee..6d18d0a 100644 --- a/llvm-codegen/src/type_resolver.rs +++ b/llvm-codegen/src/type_resolver.rs @@ -93,7 +93,7 @@ impl<'ctx> TypeResolver<'ctx> { if self.has_type(&ty) { return; } - match &ty { + match dbg!(&ty) { ResolvedType::Array { underlining, size } => { let result = self.resolve_type_as_basic(underlining.as_ref().clone()).array_type(*size as u32); self.known.insert(ty,result.as_any_type_enum()); diff --git a/sample.fb b/sample.fb new file mode 100644 index 0000000..ccb3db2 --- /dev/null +++ b/sample.fb @@ -0,0 +1,9 @@ +let sum3 (a:int32) (b:int32) (c:int32) = a + b + c; +// should have a suggestion for point free. +let sum2and3 b c = sum3 3 b c; + +let sum2and3_pointfree = sum3 3; + +let sum1and3and4 = sum2and3_pointfree 4; + +let main () = (); \ No newline at end of file