From 126f21676e9b0aceea6138e1a817d36432c8ecc0 Mon Sep 17 00:00:00 2001 From: s-kybound Date: Thu, 3 Oct 2024 22:33:43 +0800 Subject: [PATCH 01/28] add prelimiary framework for macro system --- src/createContext.ts | 4 +++ src/cse-machine/interpreter.ts | 9 ++++++ src/cse-machine/scheme-macros.ts | 49 ++++++++++++++++++++++++++++++++ src/cse-machine/utils.ts | 10 +++++++ 4 files changed, 72 insertions(+) create mode 100644 src/cse-machine/scheme-macros.ts diff --git a/src/createContext.ts b/src/createContext.ts index 2f5ab7dca..374701227 100644 --- a/src/createContext.ts +++ b/src/createContext.ts @@ -38,6 +38,7 @@ import { makeWrapper } from './utils/makeWrapper' import * as operators from './utils/operators' import { stringify } from './utils/stringify' import { schemeVisualise } from './alt-langs/scheme/scheme-mapper' +import { csep_eval } from './cse-machine/scheme-macros' export class LazyBuiltIn { func: (...arg0: any) => any @@ -450,6 +451,9 @@ export const importBuiltins = (context: Context, externalBuiltIns: CustomBuiltIn if (context.chapter <= +Chapter.SCHEME_1 && context.chapter >= +Chapter.FULL_SCHEME) { switch (context.chapter) { case Chapter.FULL_SCHEME: + // eval metaprocedure + defineBuiltin(context, '$scheme_ZXZhbA$61$$61$(xs)', csep_eval) + case Chapter.SCHEME_4: // Introduction to call/cc defineBuiltin(context, 'call$47$cc(f)', call_with_current_continuation) diff --git a/src/cse-machine/interpreter.ts b/src/cse-machine/interpreter.ts index 4edcc39e4..7c655c114 100644 --- a/src/cse-machine/interpreter.ts +++ b/src/cse-machine/interpreter.ts @@ -77,6 +77,7 @@ import { setVariable, valueProducing } from './utils' +import { isEval } from './scheme-macros' type CmdEvaluator = ( command: ControlItem, @@ -926,6 +927,14 @@ const cmdEvaluators: { [type: string]: CmdEvaluator } = { handleRuntimeError(context, new errors.CallingNonFunctionValue(func, command.srcNode)) } + if (isEval(func)) { + // Check for number of arguments mismatch error + checkNumberOfArguments(context, func, args, command.srcNode) + + throw new Error('Eval is not implemented yet') + return + } + if (isCallWithCurrentContinuation(func)) { // Check for number of arguments mismatch error checkNumberOfArguments(context, func, args, command.srcNode) diff --git a/src/cse-machine/scheme-macros.ts b/src/cse-machine/scheme-macros.ts new file mode 100644 index 000000000..19d1ad9ab --- /dev/null +++ b/src/cse-machine/scheme-macros.ts @@ -0,0 +1,49 @@ +import * as es from 'estree' + +/** + * A metaprocedure used to detect for the eval function object. + * If the interpreter sees this specific function, + */ +export class Eval extends Function { + private static instance: Eval = new Eval() + + private constructor() { + super() + } + + public static get(): Eval { + return Eval.instance + } + + public toString(): string { + return 'eval' + } +} + +export const csep_eval = Eval.get() + +export function isEval(value: any): boolean { + return value === csep_eval +} + +/** + * Provides an adequate representation of what calling + * eval looks like, to give to the + * APPLICATION instruction. + */ +export function makeDummyEvalExpression(callee: string, argument: string): es.CallExpression { + return { + type: 'CallExpression', + optional: false, + callee: { + type: 'Identifier', + name: callee + }, + arguments: [ + { + type: 'Identifier', + name: argument + } + ] + } +} diff --git a/src/cse-machine/utils.ts b/src/cse-machine/utils.ts index 9930c8357..ba25349cb 100644 --- a/src/cse-machine/utils.ts +++ b/src/cse-machine/utils.ts @@ -12,6 +12,7 @@ import { Control } from './interpreter' import { AppInstr, EnvArray, ControlItem, Instr, InstrType } from './types' import Closure from './closure' import { Continuation, isCallWithCurrentContinuation } from './continuations' +import { isEval } from './scheme-macros' /** * Typeguard for Instr to distinguish between program statements and instructions. @@ -514,6 +515,15 @@ export const checkNumberOfArguments = ( ) } return undefined + } else if (isEval(callee)) { + // eval should have a single argument + if (args.length !== 1) { + return handleRuntimeError( + context, + new errors.InvalidNumberOfArguments(exp, 1, args.length, false) + ) + } + return undefined } else if (callee instanceof Continuation) { // Continuations have variadic arguments, // and so we can let it pass From 2c0ed1925c76bbf8f59e8ba63e19a062f4e2b7d8 Mon Sep 17 00:00:00 2001 From: Kyriel Abad Date: Mon, 7 Oct 2024 23:18:44 +0800 Subject: [PATCH 02/28] Allow Scheme data types as part of control --- src/cse-machine/scheme-macros.ts | 18 ++++++++++++++++++ src/cse-machine/types.ts | 3 ++- src/cse-machine/utils.ts | 14 ++++++++++++-- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/cse-machine/scheme-macros.ts b/src/cse-machine/scheme-macros.ts index 19d1ad9ab..7366bebea 100644 --- a/src/cse-machine/scheme-macros.ts +++ b/src/cse-machine/scheme-macros.ts @@ -1,4 +1,12 @@ import * as es from 'estree' +import { List } from '../stdlib/list' +import { _Symbol } from '../alt-langs/scheme/scm-slang/src/stdlib/base' +import { SchemeNumber } from '../alt-langs/scheme/scm-slang/src/stdlib/core-math' +import { Context } from '..' +import { Control, Stash } from './interpreter' + +// this needs to be better but for now it's fine +export type SchemeControlItems = List | _Symbol | SchemeNumber | boolean | string /** * A metaprocedure used to detect for the eval function object. @@ -26,6 +34,16 @@ export function isEval(value: any): boolean { return value === csep_eval } +export function schemeEval( + command: SchemeControlItems, + context: Context, + control: Control, + stash: Stash, + isPrelude: boolean +) { + // do absolutely nothing for now +} + /** * Provides an adequate representation of what calling * eval looks like, to give to the diff --git a/src/cse-machine/types.ts b/src/cse-machine/types.ts index 9c1dbae3c..0d8226c6b 100644 --- a/src/cse-machine/types.ts +++ b/src/cse-machine/types.ts @@ -2,6 +2,7 @@ import * as es from 'estree' import { Environment, Node } from '../types' import Closure from './closure' +import { SchemeControlItems } from './scheme-macros' export enum InstrType { RESET = 'Reset', @@ -83,7 +84,7 @@ export type Instr = | EnvInstr | ArrLitInstr -export type ControlItem = (Node | Instr) & { +export type ControlItem = (Node | Instr | SchemeControlItems) & { isEnvDependent?: boolean } diff --git a/src/cse-machine/utils.ts b/src/cse-machine/utils.ts index ba25349cb..bb57dd2d6 100644 --- a/src/cse-machine/utils.ts +++ b/src/cse-machine/utils.ts @@ -234,7 +234,7 @@ export const envChanging = (command: ControlItem): boolean => { type === 'ArrowFunctionExpression' || (type === 'ExpressionStatement' && command.expression.type === 'ArrowFunctionExpression') ) - } else { + } else if (isInstr(command)) { const type = command.instrType return ( type === InstrType.ENVIRONMENT || @@ -243,6 +243,12 @@ export const envChanging = (command: ControlItem): boolean => { type === InstrType.ARRAY_ASSIGNMENT || (type === InstrType.APPLICATION && (command as AppInstr).numOfArgs > 0) ) + } else { + // TODO deal with scheme control items + // for now, as per the CSE machine paper, + // we decide to ignore environment optimizations + // for scheme control items :P + return true } } @@ -710,7 +716,7 @@ export const isEnvDependent = (command: ControlItem): boolean => { type === InstrType.CONTINUE_MARKER || type === InstrType.BREAK_MARKER ) - } else { + } else if (isNode(command)) { const type = command.type switch (type) { case 'StatementSequence': @@ -734,6 +740,10 @@ export const isEnvDependent = (command: ControlItem): boolean => { break } } + // TODO deal with scheme control items + // for now, as per the CSE machine paper, + // we decide to ignore environment optimizations + // for scheme control items :P command.isEnvDependent = isDependent return isDependent } From 334a87ca75aeff04f2b314bc96b7499550394822 Mon Sep 17 00:00:00 2001 From: Kyriel Abad Date: Mon, 7 Oct 2024 23:19:16 +0800 Subject: [PATCH 03/28] add functionality to scheme eval metaprocedure --- src/cse-machine/interpreter.ts | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/cse-machine/interpreter.ts b/src/cse-machine/interpreter.ts index 7c655c114..8a3ab9c4d 100644 --- a/src/cse-machine/interpreter.ts +++ b/src/cse-machine/interpreter.ts @@ -77,7 +77,7 @@ import { setVariable, valueProducing } from './utils' -import { isEval } from './scheme-macros' +import { isEval, schemeEval } from './scheme-macros' type CmdEvaluator = ( command: ControlItem, @@ -373,9 +373,12 @@ export function* generateCSEMachineStateStream( // With the new evaluator, we don't return a break // return new CSEBreak() } - } else { + } else if (isInstr(command)) { // Command is an instruction cmdEvaluators[command.instrType](command, context, control, stash, isPrelude) + } else { + // this is a scheme value + schemeEval(command, context, control, stash, isPrelude) } // Push undefined into the stack if both control and stash is empty @@ -781,7 +784,7 @@ const cmdEvaluators: { [type: string]: CmdEvaluator } = { [InstrType.RESET]: function (command: Instr, context: Context, control: Control, stash: Stash) { // Keep pushing reset instructions until marker is found. const cmdNext: ControlItem | undefined = control.pop() - if (cmdNext && (isNode(cmdNext) || cmdNext.instrType !== InstrType.MARKER)) { + if (cmdNext && (!isInstr(cmdNext) || cmdNext.instrType !== InstrType.MARKER)) { control.push(instr.resetInstr(command.srcNode)) } }, @@ -931,7 +934,11 @@ const cmdEvaluators: { [type: string]: CmdEvaluator } = { // Check for number of arguments mismatch error checkNumberOfArguments(context, func, args, command.srcNode) - throw new Error('Eval is not implemented yet') + // get the AST from the arguments + const AST = args[0] + + // move it to the control + control.push(AST) return } From fbe3a1f8e72ad2a714ca6dbd2c546ab0043e6c74 Mon Sep 17 00:00:00 2001 From: Kyriel Abad Date: Thu, 24 Oct 2024 09:54:41 +0800 Subject: [PATCH 04/28] update macro skeleton --- src/cse-machine/scheme-macros.ts | 145 ++++++++++++++++++++++++++++++- 1 file changed, 144 insertions(+), 1 deletion(-) diff --git a/src/cse-machine/scheme-macros.ts b/src/cse-machine/scheme-macros.ts index 7366bebea..6470fc2da 100644 --- a/src/cse-machine/scheme-macros.ts +++ b/src/cse-machine/scheme-macros.ts @@ -4,6 +4,7 @@ import { _Symbol } from '../alt-langs/scheme/scm-slang/src/stdlib/base' import { SchemeNumber } from '../alt-langs/scheme/scm-slang/src/stdlib/core-math' import { Context } from '..' import { Control, Stash } from './interpreter' +import { getVariable } from './utils' // this needs to be better but for now it's fine export type SchemeControlItems = List | _Symbol | SchemeNumber | boolean | string @@ -34,6 +35,22 @@ export function isEval(value: any): boolean { return value === csep_eval } +// helper function to check if a value is a list. +function isList(value: any): boolean { + if (value === null) { + return true + } + return Array.isArray(value) && value.length === 2 && isList(value[1]) +} + +// do a 1-level deep flattening of a list. +function flattenList(value: any): any[] { + if (value === null) { + return [] + } + return [value[0], ...flattenList(value[1])] +} + export function schemeEval( command: SchemeControlItems, context: Context, @@ -41,7 +58,133 @@ export function schemeEval( stash: Stash, isPrelude: boolean ) { - // do absolutely nothing for now + // scheme CSE machine will only ever encounter + // lists or primitives like symbols, booleans or strings. + // if its a list, we can parse the list and evaluate each item as necessary + // if its a symbol, we can look up the symbol in the environment. + // for either of these operations, if our list matches some pattern in + // the P component, then we can apply the corresponding rule. + + // if its a number, boolean, or string, we can just shift the value + // onto the stash. + + if (command === null) { + // TODO: error + return + } + + if (isList(command)) { + // do something + const parsedList = flattenList(command) + // do work based on the first element of the list. + // it should match some symbol "define", "set", "lambda", etc... + // or if it doesn't match any of these, then it is a function call. + if (parsedList[0] instanceof _Symbol) { + // we attempt to piggyback on the standard CSE machine to + // handle the basic special forms. + // however, for more advanced stuff like quotes or definitions, + // the logic will be handled here. + switch (parsedList[0].sym) { + case 'lambda': + // do something + case 'define': + // assume that define-function + // has been resolved to define-variable + // (P component will deal with this) + // at this point, parser enforces that variable + // is a symbol + const variable = parsedList[1] + const value = parsedList[2] + // estree VariableDeclaration + const definition = { + type: 'VariableDeclaration', + kind: 'let', + declarations: [ + { + type: 'VariableDeclarator', + id: makeDummyIdentifierNode(variable.sym), + init: value + } + ] + } + + control.push(definition as es.VariableDeclaration) + case 'set!': + const set_variable = parsedList[1] + const set_value = parsedList[2] + + // estree AssignmentExpression + const assignment = { + type: 'AssignmentExpression', + operator: '=', + left: makeDummyIdentifierNode(set_variable.sym), + right: set_value + } + + control.push(assignment as es.AssignmentExpression) + case 'if': + const condition = parsedList[1] + const consequent = parsedList[2] + // check if there is an alternate + const alternate = parsedList[3] ? parsedList[3] : null + + // estree ConditionalExpression + const conditional = { + type: 'ConditionalExpression', + test: condition, + consequent, + alternate + } + + control.push(conditional as es.ConditionalExpression) + case 'begin': + // begin is a sequence of expressions + // that are evaluated in order. + // we can just push the expressions to the control. + for (let i = 1; i < parsedList.length; i++) { + control.push(parsedList[i]) + } + + case "quote": + // TODO + case "quasiquote": + // hey, we can deal with unquote-splicing here! + // TODO + case "define-syntax": + // parse the pattern and template here, + // and add it to the Patterns component. + // TODO + } + return + } + // if we get to this point, then it is a function call. + // convert it into an es.CallExpression and push it to the control. + const procedure = parsedList[0] + const args = parsedList.slice(1) + const appln = { + type: 'CallExpression', + optional: false, + callee: procedure, + arguments: args + } + control.push(appln as es.CallExpression) + return + } else if (command instanceof _Symbol) { + // do something else + stash.push(getVariable(context, command.sym, makeDummyIdentifierNode(command.sym))) + return + } + // if we get to this point of execution, it is just some primitive value. + // just push it to the stash. + stash.push(command) + return +} + +export function makeDummyIdentifierNode(name: string): es.Identifier { + return { + type: 'Identifier', + name + } } /** From 19796ef55e544fcda29f877296e52aef00460c5d Mon Sep 17 00:00:00 2001 From: Kyriel Abad Date: Tue, 29 Oct 2024 00:05:27 +0800 Subject: [PATCH 05/28] add pattern matching system prototype --- src/cse-machine/interpreter.ts | 34 +++- src/cse-machine/patterns.ts | 322 +++++++++++++++++++++++++++++++ src/cse-machine/scheme-macros.ts | 34 +++- src/types.ts | 3 +- 4 files changed, 384 insertions(+), 9 deletions(-) create mode 100644 src/cse-machine/patterns.ts diff --git a/src/cse-machine/interpreter.ts b/src/cse-machine/interpreter.ts index 8a3ab9c4d..54a8dc2d5 100644 --- a/src/cse-machine/interpreter.ts +++ b/src/cse-machine/interpreter.ts @@ -78,6 +78,7 @@ import { valueProducing } from './utils' import { isEval, schemeEval } from './scheme-macros' +import { Transformer } from './patterns' type CmdEvaluator = ( command: ControlItem, @@ -167,6 +168,29 @@ export class Stash extends Stack { } } +/** + * The P component is a dictionary of mappings from syntax names to + * their corresponding syntax rule transformers (patterns). + */ +export class Pattern { + private items: Map + public constructor() { + this.items = new Map() + } + + public get(name: string): Transformer[] | undefined { + return this.items.get(name) + } + + public hasPattern(name: string): boolean { + return this.items.has(name) + } + + public set(name: string, item: Transformer[]): void { + this.items.set(name, item) + } +} + /** * Function to be called when a program is to be interpreted using * the explicit control evaluator. @@ -188,10 +212,12 @@ export function evaluate(program: es.Program, context: Context, options: IOption context.runtime.isRunning = true context.runtime.control = new Control(program) context.runtime.stash = new Stash() + context.runtime.patterns = new Pattern() return runCSEMachine( context, context.runtime.control, context.runtime.stash, + context.runtime.patterns, options.envSteps, options.stepLimit, options.isPrelude @@ -214,7 +240,7 @@ export function evaluate(program: es.Program, context: Context, options: IOption export function resumeEvaluate(context: Context) { try { context.runtime.isRunning = true - return runCSEMachine(context, context.runtime.control!, context.runtime.stash!, -1, -1) + return runCSEMachine(context, context.runtime.control!, context.runtime.stash!, context.runtime.patterns, -1, -1) } catch (error) { return new CseError(error) } finally { @@ -283,6 +309,7 @@ export function CSEResultPromise(context: Context, value: Value): Promise | _Symbol, literals: string[]): boolean { + if (pattern instanceof _Symbol && !literals.includes(pattern.sym)) { + // this will match whatever the input list is unless it is + // a literal in the literals list. (ie syntax) + return !(input instanceof _Symbol && !literals.includes(input.sym)) + } + + if (pattern instanceof _Symbol && literals.includes(pattern.sym)) { + // only match if the input is the same as the pattern + return input instanceof _Symbol && input.sym === pattern.sym + } + + // at this point, we know that the pattern is a list or improper list + // make sure that the input is one too. + if (!isList(input) || !isImproperList(input)) { + return false + } + + // make sure that both the pattern and input match each other. + // they should both be lists or improper lists, with no mix. + if (isImproperList(pattern) !== isImproperList(input)) { + return false + } + + // in the case that both the pattern and input are improper lists, + if (isImproperList(pattern) && isImproperList(input)) { + // match the first element of the list with the first element of the pattern + return match(input[0], (pattern as Pair)[0], literals) && match(input[1], pattern as Pair[1], literals) + } + + // now we know that both the pattern and list are lists. + // we can match the elements of the list against the pattern, + // but we also need to compare and check for the ... syntax. + if (input == pattern == null) { + return true + } + + // it's easier to reason about the lists as arrays for now. + const inputList = flattenList(input) + const patternList = flattenList(pattern) + + + // there can be a single ellepsis in the pattern, but it must be behind some element. + // scan the pattern for the ... symbol. + // we will need the position of the ... symbol to compare the front and back of the list. + const ellipsisIndex = patternList.findIndex((elem) => elem instanceof _Symbol && elem.sym === '...') + + // check if an ellipsis exists within the pattern. + if (ellipsisIndex !== -1) { + // if the input is shorter than the pattern (minus the ...), it can't match. + if (inputList.length < patternList.length - 1) { + return false + } + + const frontPatternLength = ellipsisIndex + const ellipsisPattern = patternList[ellipsisIndex - 1] + const backPatternLength = patternList.length - ellipsisIndex - 1 + + // compare the front of the list with the front of the pattern as per normal + for (let i = 0; i < frontPatternLength; i++) { + if (!match(inputList[i], patternList[i], literals)) { + return false + } + } + + // compare the items that should be captured by the ellipsis + for (let i = ellipsisIndex; i < inputList.length - backPatternLength; i++) { + if (!match(inputList[i], ellipsisPattern, literals)) { + return false + } + } + + // now we can compare the back of the list with the rest of the patterns + for (let i = inputList.length - backPatternLength; i < inputList.length; i++) { + if (!match(inputList[i], patternList[i - (inputList.length - patternList.length)], literals)) { + return false + } + } + + // else all is good and return true + return true + } + + // we assume for now that ... cannot appear elsewhere in this level of the pattern, except at the end. + // so here, we have no ... syntax. + + // we can just compare the elements of the list with the pattern. + if (inputList.length !== patternList.length) { + return false + } + + for (let i = 0; i < inputList.length; i++) { + if (!match(inputList[i], patternList[i], literals)) { + return false + } + } + + return true +} + +// once a pattern is matched, we need to collect all of the matched variables. +// ONLY called on matching patterns. +function collect(input: any, pattern: List | Pair | _Symbol, literals: string[]): Map { + const collected = new Map() + if (pattern instanceof _Symbol && !literals.includes(pattern.sym)) { + // collect the matching input here + collected.set(pattern.sym, [input]) + return collected + } + + if (pattern instanceof _Symbol && literals.includes(pattern.sym)) { + // pattern is a syntax literal, don't collect anything + return collected + } + + if (pattern instanceof _Symbol && pattern.sym === '_') { + // don't collect anything + return collected + } + + // if one is an improper list, the other should be as well. + if (isImproperList(pattern)) { + // collect the first element of the list + const collectedFirst = collect(input[0], (pattern as Pair)[0], literals) + for (const [key, value] of collectedFirst) { + collected.set(key, value) + } + + // collect the second element of the list + const collectedSecond = collect(input[1], (pattern as Pair)[1], literals) + for (const [key, value] of collectedSecond) { + collected.set(key, value) + } + + return collected + } + + // at this point, we know that the pattern is a list + // and the input should be too + if (!isList(input)) { + return collected + } + + if (input == pattern == null) { + // should be empty + return collected + } + + const inputList = flattenList(input) + const patternList = flattenList(pattern) + // there can be a single ellepsis in the pattern, but it must be behind some element. + // scan the pattern for the ... symbol. + // we will need the position of the ... symbol to compare the front and back of the list. + const ellipsisIndex = patternList.findIndex((elem) => elem instanceof _Symbol && elem.sym === '...') + + // check if an ellipsis exists within the pattern. + if (ellipsisIndex !== -1) { + const frontPatternLength = ellipsisIndex + const ellipsisPattern = patternList[ellipsisIndex - 1] + const backPatternLength = patternList.length - ellipsisIndex - 1 + + // collect items from the front of the list with the front of the pattern + for (let i = 0; i < frontPatternLength; i++) { + const collectedFront = collect(inputList[i], patternList[i], literals) + for (const [key, value] of collectedFront) { + if (collected.has(key)) { + // add the collected items to the back of the list + // (this preserves the order of the list) + collected.set(key, [...collected.get(key) as any[], ...value]) + } else { + collected.set(key, value) + } + } + } + + + // compare the items that should be captured by the ellipsis + for (let i = ellipsisIndex; i < inputList.length - backPatternLength; i++) { + const collectedEllipsis = collect(inputList[i], ellipsisPattern, literals) + for (const [key, value] of collectedEllipsis) { + if (collected.has(key)) { + collected.set(key, [...collected.get(key) as any[], ...value]) + } else { + collected.set(key, value) + } + } + } + + // collect the rest of the list with the back of the pattern + for (let i = inputList.length - backPatternLength; i < inputList.length; i++) { + const collectedRest = collect(inputList[i], patternList[i - (inputList.length - patternList.length)], literals) + for (const [key, value] of collectedRest) { + if (collected.has(key)) { + collected.set(key, [...collected.get(key) as any[], ...value]) + } else { + collected.set(key, value) + } + } + } + + return collected + } + + // final case, where there is no ... syntax + for (let i = 0; i < inputList.length; i++) { + const collectedItems = collect(inputList[i], patternList[i], literals) + for (const [key, value] of collectedItems) { + if (collected.has(key)) { + collected.set(key, [...collected.get(key) as any[], ...value]) + } else { + collected.set(key, value) + } + } + } + + return collected +} + +// when matched against a pattern, we use the transform() function +// to transform the list into the template. +// returns a list, a pair, or any value, as determined by the template. +function transform(template: List | Pair | _Symbol, collected: Map): any { + function arrayToList(arr: any[]): List { + if (arr.length === 0) { + return null + } + return [arr[0], arrayToList(arr.slice(1))] + } + + if (template instanceof _Symbol) { + if (collected.has(template.sym)) { + // get the item from the collected list, + // remove it from the collected list, + // and return it. + const item = (collected.get(template.sym) as any[]).shift() + return item + } + return template + } + + if (isImproperList(template)) { + // assemble both parts of the template separately + const firstPart = transform((template as Pair)[0], collected) + const secondPart = transform((template as Pair)[1], collected) + return [firstPart, secondPart] + } + + // at this point, its a list. + const templateList = flattenList(template) + + // if the template is empty, return null + if (templateList.length === 0) { + return null + } + + // if the template begins with the ... syntax, (... ) + // it halts evaluation of the rest of the list. + // (evaluation resolves to ) + if (templateList[0] instanceof _Symbol && templateList[0].sym === '...') { + return templateList[1] + } + + // we need to deal with any ... syntax as well. + // there is only one at the 1D flattened list level, and we need to deal with it. + const ellipsisIndex = templateList.findIndex((elem) => elem instanceof _Symbol && elem.sym === '...') + + if (ellipsisIndex !== -1) { + const frontTemplateLength = ellipsisIndex + const ellipsisTemplate = templateList[ellipsisIndex + 1] + const backTemplateLength = templateList.length - ellipsisIndex - 1 + + const transformedList = [] + + // transform the front of the list + for (let i = 0; i < frontTemplateLength; i++) { + transformedList.push(transform(templateList[i], collected)) + } + + // add the values from the ellipsis template + // TODO + } + + // if there is no ... syntax, we can just evaluate the list as is. + // use iteration, as we are not sure that map evaluates left to right. + const transformedList = [] + + for (let i = 0; i < templateList.length; i++) { + transformedList.push(transform(templateList[i], collected)) + } + + return arrayToList(transformedList) +} \ No newline at end of file diff --git a/src/cse-machine/scheme-macros.ts b/src/cse-machine/scheme-macros.ts index 6470fc2da..fb279f142 100644 --- a/src/cse-machine/scheme-macros.ts +++ b/src/cse-machine/scheme-macros.ts @@ -3,7 +3,7 @@ import { List } from '../stdlib/list' import { _Symbol } from '../alt-langs/scheme/scm-slang/src/stdlib/base' import { SchemeNumber } from '../alt-langs/scheme/scm-slang/src/stdlib/core-math' import { Context } from '..' -import { Control, Stash } from './interpreter' +import { Control, Pattern, Stash } from './interpreter' import { getVariable } from './utils' // this needs to be better but for now it's fine @@ -36,7 +36,7 @@ export function isEval(value: any): boolean { } // helper function to check if a value is a list. -function isList(value: any): boolean { +export function isList(value: any): boolean { if (value === null) { return true } @@ -44,7 +44,7 @@ function isList(value: any): boolean { } // do a 1-level deep flattening of a list. -function flattenList(value: any): any[] { +export function flattenList(value: any): any[] { if (value === null) { return [] } @@ -56,6 +56,7 @@ export function schemeEval( context: Context, control: Control, stash: Stash, + patterns: Pattern, isPrelude: boolean ) { // scheme CSE machine will only ever encounter @@ -76,17 +77,33 @@ export function schemeEval( if (isList(command)) { // do something const parsedList = flattenList(command) + const elem = parsedList[0] // do work based on the first element of the list. // it should match some symbol "define", "set", "lambda", etc... // or if it doesn't match any of these, then it is a function call. - if (parsedList[0] instanceof _Symbol) { + if (elem instanceof _Symbol) { + // check if elem matches any defined syntax in the P component. + // if it does, then apply the corresponding rule. + if (patterns.hasPattern(elem.sym)) { + // apply the rule + // TODO + return + } + + // else, this is a standard special form. // we attempt to piggyback on the standard CSE machine to // handle the basic special forms. // however, for more advanced stuff like quotes or definitions, // the logic will be handled here. switch (parsedList[0].sym) { case 'lambda': - // do something + // return a lambda expression that takes + // in the arguments, and returns the body + // as an eval of the body. + const args = parsedList[1] + // convert the args to estree pattern + const body = parsedList[2] + // TODO case 'define': // assume that define-function // has been resolved to define-variable @@ -146,12 +163,17 @@ export function schemeEval( } case "quote": - // TODO + // quote is a special form that returns the expression + // as is, without evaluating it. + // we can just push the expression to the stash. + stash.push(parsedList[1]) + return case "quasiquote": // hey, we can deal with unquote-splicing here! // TODO case "define-syntax": // parse the pattern and template here, + // generate a list of transformers from it, // and add it to the Patterns component. // TODO } diff --git a/src/types.ts b/src/types.ts index e4911a649..6651c3d9e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -10,7 +10,7 @@ import * as es from 'estree' import { EnvTree } from './createContext' import Heap from './cse-machine/heap' -import { Control, Stash } from './cse-machine/interpreter' +import { Control, Pattern, Stash } from './cse-machine/interpreter' import type { ModuleFunctions } from './modules/moduleTypes' import { Representation } from './alt-langs/mapper' @@ -146,6 +146,7 @@ export interface Context { /** Runtime Specific state */ runtime: { + patterns: Pattern break: boolean debuggerOn: boolean isRunning: boolean From e0783fd30e50a2bc2b2298531a4400d7b5ad7331 Mon Sep 17 00:00:00 2001 From: Kyriel Abad Date: Tue, 29 Oct 2024 17:15:41 +0800 Subject: [PATCH 06/28] complete first iteration of pattern matching functions --- src/cse-machine/interpreter.ts | 13 +- src/cse-machine/patterns.ts | 596 ++++++++++++++++++------------- src/cse-machine/scheme-macros.ts | 24 +- 3 files changed, 368 insertions(+), 265 deletions(-) diff --git a/src/cse-machine/interpreter.ts b/src/cse-machine/interpreter.ts index 54a8dc2d5..53b44acde 100644 --- a/src/cse-machine/interpreter.ts +++ b/src/cse-machine/interpreter.ts @@ -169,7 +169,7 @@ export class Stash extends Stack { } /** - * The P component is a dictionary of mappings from syntax names to + * The P component is a dictionary of mappings from syntax names to * their corresponding syntax rule transformers (patterns). */ export class Pattern { @@ -185,7 +185,7 @@ export class Pattern { public hasPattern(name: string): boolean { return this.items.has(name) } - + public set(name: string, item: Transformer[]): void { this.items.set(name, item) } @@ -240,7 +240,14 @@ export function evaluate(program: es.Program, context: Context, options: IOption export function resumeEvaluate(context: Context) { try { context.runtime.isRunning = true - return runCSEMachine(context, context.runtime.control!, context.runtime.stash!, context.runtime.patterns, -1, -1) + return runCSEMachine( + context, + context.runtime.control!, + context.runtime.stash!, + context.runtime.patterns, + -1, + -1 + ) } catch (error) { return new CseError(error) } finally { diff --git a/src/cse-machine/patterns.ts b/src/cse-machine/patterns.ts index bc68135b5..24a234f77 100644 --- a/src/cse-machine/patterns.ts +++ b/src/cse-machine/patterns.ts @@ -1,322 +1,418 @@ // a single pattern stored within the patterns component // will be henceforth referred to as a "transformer". -// it consists of a set of literals used as additional syntax, +// it consists of a set of literals used as additional syntax, // a pattern (for a list to match against) // and a final template (for the list to be transformed into). -import { List, Pair } from "../stdlib/list"; +import { List, Pair } from '../stdlib/list' import { _Symbol } from '../alt-langs/scheme/scm-slang/src/stdlib/base' -import { flattenList, isList } from "./scheme-macros"; +import { flattenList, isList } from './scheme-macros' export class Transformer { - literals: string[] - pattern: List - template: List - - constructor(literals: string[], pattern: List, template: List) { - this.literals = literals - this.pattern = pattern - this.template = template - } + literals: string[] + pattern: List + template: List + + constructor(literals: string[], pattern: List, template: List) { + this.literals = literals + this.pattern = pattern + this.template = template + } +} + +function arrayToList(arr: any[]): List { + if (arr.length === 0) { + return null + } + return [arr[0], arrayToList(arr.slice(1))] +} + +function arrayToImproperList(arr: any[], last: any): any { + if (arr.length === 0) { + return last + } + return [arr[0], arrayToImproperList(arr.slice(1), last)] } function isImproperList(value: any): boolean { - if (value === null) { - return false - } - return Array.isArray(value) && value.length === 2 && !isList(value[1]) + if (value === null) { + return false + } + return Array.isArray(value) && value.length === 2 && !isList(value[1]) +} + +function flattenImproperList(value: any): [any[], any] { + let items = [] + let working = value + while (working instanceof Array && working.length === 2) { + items.push(working[0]) + working = working[1] + } + return [items, working] } // we use the match() function to match a list against a pattern and literals // and verify if it is a match. function match(input: any, pattern: List | Pair | _Symbol, literals: string[]): boolean { - if (pattern instanceof _Symbol && !literals.includes(pattern.sym)) { - // this will match whatever the input list is unless it is - // a literal in the literals list. (ie syntax) - return !(input instanceof _Symbol && !literals.includes(input.sym)) + if (pattern instanceof _Symbol && !literals.includes(pattern.sym)) { + // this will match whatever the input list is unless it is + // a literal in the literals list. (ie syntax) + return !(input instanceof _Symbol && !literals.includes(input.sym)) + } + + if (pattern instanceof _Symbol && literals.includes(pattern.sym)) { + // only match if the input is the same as the pattern + return input instanceof _Symbol && input.sym === pattern.sym + } + + // at this point, we know that the pattern is a list or improper list + // make sure that the input is one too. + if (!isList(input) || !isImproperList(input)) { + return false + } + + // make sure that both the pattern and input match each other. + // they should both be lists or improper lists, with no mix. + if (isImproperList(pattern) !== isImproperList(input)) { + return false + } + + // in the case that both the pattern and input are improper lists, + if (isImproperList(pattern) && isImproperList(input)) { + const [patternItems, patternLast] = flattenImproperList(pattern) + const [inputItems, inputLast] = flattenImproperList(input) + // match the first element of the list with the first element of the pattern + return ( + match(arrayToList(inputItems), arrayToList(patternItems), literals) && + match(inputLast, patternLast, literals) + ) + } + + // now we know that both the pattern and list are lists. + // we can match the elements of the list against the pattern, + // but we also need to compare and check for the ... syntax. + if ((input == pattern) == null) { + return true + } + + // it's easier to reason about the lists as arrays for now. + const inputList = flattenList(input) + const patternList = flattenList(pattern) + + // there can be a single ellepsis in the pattern, but it must be behind some element. + // scan the pattern for the ... symbol. + // we will need the position of the ... symbol to compare the front and back of the list. + const ellipsisIndex = patternList.findIndex(elem => elem instanceof _Symbol && elem.sym === '...') + + // check if an ellipsis exists within the pattern. + if (ellipsisIndex !== -1) { + // if the input is shorter than the pattern (minus the ...), it can't match. + if (inputList.length < patternList.length - 1) { + return false } - if (pattern instanceof _Symbol && literals.includes(pattern.sym)) { - // only match if the input is the same as the pattern - return input instanceof _Symbol && input.sym === pattern.sym - } + const frontPatternLength = ellipsisIndex + const ellipsisPattern = patternList[ellipsisIndex - 1] + const backPatternLength = patternList.length - ellipsisIndex - 1 - // at this point, we know that the pattern is a list or improper list - // make sure that the input is one too. - if (!isList(input) || !isImproperList(input)) { + // compare the front of the list with the front of the pattern as per normal + for (let i = 0; i < frontPatternLength; i++) { + if (!match(inputList[i], patternList[i], literals)) { return false + } } - // make sure that both the pattern and input match each other. - // they should both be lists or improper lists, with no mix. - if (isImproperList(pattern) !== isImproperList(input)) { + // compare the items that should be captured by the ellipsis + for (let i = ellipsisIndex; i < inputList.length - backPatternLength; i++) { + if (!match(inputList[i], ellipsisPattern, literals)) { return false + } } - // in the case that both the pattern and input are improper lists, - if (isImproperList(pattern) && isImproperList(input)) { - // match the first element of the list with the first element of the pattern - return match(input[0], (pattern as Pair)[0], literals) && match(input[1], pattern as Pair[1], literals) - } - - // now we know that both the pattern and list are lists. - // we can match the elements of the list against the pattern, - // but we also need to compare and check for the ... syntax. - if (input == pattern == null) { - return true + // now we can compare the back of the list with the rest of the patterns + for (let i = inputList.length - backPatternLength; i < inputList.length; i++) { + if ( + !match(inputList[i], patternList[i - (inputList.length - patternList.length)], literals) + ) { + return false + } } - // it's easier to reason about the lists as arrays for now. - const inputList = flattenList(input) - const patternList = flattenList(pattern) - - - // there can be a single ellepsis in the pattern, but it must be behind some element. - // scan the pattern for the ... symbol. - // we will need the position of the ... symbol to compare the front and back of the list. - const ellipsisIndex = patternList.findIndex((elem) => elem instanceof _Symbol && elem.sym === '...') - - // check if an ellipsis exists within the pattern. - if (ellipsisIndex !== -1) { - // if the input is shorter than the pattern (minus the ...), it can't match. - if (inputList.length < patternList.length - 1) { - return false - } - - const frontPatternLength = ellipsisIndex - const ellipsisPattern = patternList[ellipsisIndex - 1] - const backPatternLength = patternList.length - ellipsisIndex - 1 - - // compare the front of the list with the front of the pattern as per normal - for (let i = 0; i < frontPatternLength; i++) { - if (!match(inputList[i], patternList[i], literals)) { - return false - } - } - - // compare the items that should be captured by the ellipsis - for (let i = ellipsisIndex; i < inputList.length - backPatternLength; i++) { - if (!match(inputList[i], ellipsisPattern, literals)) { - return false - } - } - - // now we can compare the back of the list with the rest of the patterns - for (let i = inputList.length - backPatternLength; i < inputList.length; i++) { - if (!match(inputList[i], patternList[i - (inputList.length - patternList.length)], literals)) { - return false - } - } + // else all is good and return true + return true + } - // else all is good and return true - return true - } - - // we assume for now that ... cannot appear elsewhere in this level of the pattern, except at the end. - // so here, we have no ... syntax. + // we assume for now that ... cannot appear elsewhere in this level of the pattern, except at the end. + // so here, we have no ... syntax. - // we can just compare the elements of the list with the pattern. - if (inputList.length !== patternList.length) { - return false - } + // we can just compare the elements of the list with the pattern. + if (inputList.length !== patternList.length) { + return false + } - for (let i = 0; i < inputList.length; i++) { - if (!match(inputList[i], patternList[i], literals)) { - return false - } + for (let i = 0; i < inputList.length; i++) { + if (!match(inputList[i], patternList[i], literals)) { + return false } + } - return true + return true } // once a pattern is matched, we need to collect all of the matched variables. // ONLY called on matching patterns. -function collect(input: any, pattern: List | Pair | _Symbol, literals: string[]): Map { - const collected = new Map() - if (pattern instanceof _Symbol && !literals.includes(pattern.sym)) { - // collect the matching input here - collected.set(pattern.sym, [input]) - return collected - } - - if (pattern instanceof _Symbol && literals.includes(pattern.sym)) { - // pattern is a syntax literal, don't collect anything - return collected - } - - if (pattern instanceof _Symbol && pattern.sym === '_') { - // don't collect anything - return collected - } +function collect( + input: any, + pattern: List | Pair | _Symbol, + literals: string[] +): Map { + const collected = new Map() + if (pattern instanceof _Symbol && !literals.includes(pattern.sym)) { + // collect the matching input here + collected.set(pattern.sym, [input]) + return collected + } - // if one is an improper list, the other should be as well. - if (isImproperList(pattern)) { - // collect the first element of the list - const collectedFirst = collect(input[0], (pattern as Pair)[0], literals) - for (const [key, value] of collectedFirst) { - collected.set(key, value) - } + if (pattern instanceof _Symbol && literals.includes(pattern.sym)) { + // pattern is a syntax literal, don't collect anything + return collected + } - // collect the second element of the list - const collectedSecond = collect(input[1], (pattern as Pair)[1], literals) - for (const [key, value] of collectedSecond) { - collected.set(key, value) - } + if (pattern instanceof _Symbol && pattern.sym === '_') { + // don't collect anything + return collected + } - return collected - } + // if one is an improper list, the other should be as well. + if (isImproperList(pattern)) { + const [patternItems, patternLast] = flattenImproperList(pattern) + const [inputItems, inputLast] = flattenImproperList(input) - // at this point, we know that the pattern is a list - // and the input should be too - if (!isList(input)) { - return collected + // collect the proper list items + const collectedFirst = collect(arrayToList(inputItems), arrayToList(patternItems), literals) + for (const [key, value] of collectedFirst) { + collected.set(key, value) } - if (input == pattern == null) { - // should be empty - return collected + // collect the improper list ending + const collectedSecond = collect(inputLast, patternLast, literals) + for (const [key, value] of collectedSecond) { + collected.set(key, value) } - const inputList = flattenList(input) - const patternList = flattenList(pattern) - // there can be a single ellepsis in the pattern, but it must be behind some element. - // scan the pattern for the ... symbol. - // we will need the position of the ... symbol to compare the front and back of the list. - const ellipsisIndex = patternList.findIndex((elem) => elem instanceof _Symbol && elem.sym === '...') - - // check if an ellipsis exists within the pattern. - if (ellipsisIndex !== -1) { - const frontPatternLength = ellipsisIndex - const ellipsisPattern = patternList[ellipsisIndex - 1] - const backPatternLength = patternList.length - ellipsisIndex - 1 - - // collect items from the front of the list with the front of the pattern - for (let i = 0; i < frontPatternLength; i++) { - const collectedFront = collect(inputList[i], patternList[i], literals) - for (const [key, value] of collectedFront) { - if (collected.has(key)) { - // add the collected items to the back of the list - // (this preserves the order of the list) - collected.set(key, [...collected.get(key) as any[], ...value]) - } else { - collected.set(key, value) - } - } - } + return collected + } + // at this point, we know that the pattern is a list + // and the input should be too + if (!isList(input)) { + return collected + } - // compare the items that should be captured by the ellipsis - for (let i = ellipsisIndex; i < inputList.length - backPatternLength; i++) { - const collectedEllipsis = collect(inputList[i], ellipsisPattern, literals) - for (const [key, value] of collectedEllipsis) { - if (collected.has(key)) { - collected.set(key, [...collected.get(key) as any[], ...value]) - } else { - collected.set(key, value) - } - } + if ((input == pattern) == null) { + // should be empty + return collected + } + + const inputList = flattenList(input) + const patternList = flattenList(pattern) + // there can be a single ellepsis in the pattern, but it must be behind some element. + // scan the pattern for the ... symbol. + // we will need the position of the ... symbol to compare the front and back of the list. + const ellipsisIndex = patternList.findIndex(elem => elem instanceof _Symbol && elem.sym === '...') + + // check if an ellipsis exists within the pattern. + if (ellipsisIndex !== -1) { + const frontPatternLength = ellipsisIndex + const ellipsisPattern = patternList[ellipsisIndex - 1] + const backPatternLength = patternList.length - ellipsisIndex - 1 + + // collect items from the front of the list with the front of the pattern + for (let i = 0; i < frontPatternLength; i++) { + const collectedFront = collect(inputList[i], patternList[i], literals) + for (const [key, value] of collectedFront) { + if (collected.has(key)) { + // add the collected items to the back of the list + // (this preserves the order of the list) + collected.set(key, [...(collected.get(key) as any[]), ...value]) + } else { + collected.set(key, value) } + } + } - // collect the rest of the list with the back of the pattern - for (let i = inputList.length - backPatternLength; i < inputList.length; i++) { - const collectedRest = collect(inputList[i], patternList[i - (inputList.length - patternList.length)], literals) - for (const [key, value] of collectedRest) { - if (collected.has(key)) { - collected.set(key, [...collected.get(key) as any[], ...value]) - } else { - collected.set(key, value) - } - } + // compare the items that should be captured by the ellipsis + for (let i = ellipsisIndex; i < inputList.length - backPatternLength; i++) { + const collectedEllipsis = collect(inputList[i], ellipsisPattern, literals) + for (const [key, value] of collectedEllipsis) { + if (collected.has(key)) { + collected.set(key, [...(collected.get(key) as any[]), ...value]) + } else { + collected.set(key, value) } - - return collected + } } - // final case, where there is no ... syntax - for (let i = 0; i < inputList.length; i++) { - const collectedItems = collect(inputList[i], patternList[i], literals) - for (const [key, value] of collectedItems) { - if (collected.has(key)) { - collected.set(key, [...collected.get(key) as any[], ...value]) - } else { - collected.set(key, value) - } + // collect the rest of the list with the back of the pattern + for (let i = inputList.length - backPatternLength; i < inputList.length; i++) { + const collectedRest = collect( + inputList[i], + patternList[i - (inputList.length - patternList.length)], + literals + ) + for (const [key, value] of collectedRest) { + if (collected.has(key)) { + collected.set(key, [...(collected.get(key) as any[]), ...value]) + } else { + collected.set(key, value) } + } } return collected + } + + // final case, where there is no ... syntax + for (let i = 0; i < inputList.length; i++) { + const collectedItems = collect(inputList[i], patternList[i], literals) + for (const [key, value] of collectedItems) { + if (collected.has(key)) { + collected.set(key, [...(collected.get(key) as any[]), ...value]) + } else { + collected.set(key, value) + } + } + } + + return collected } // when matched against a pattern, we use the transform() function // to transform the list into the template. // returns a list, a pair, or any value, as determined by the template. function transform(template: List | Pair | _Symbol, collected: Map): any { - function arrayToList(arr: any[]): List { - if (arr.length === 0) { - return null - } - return [arr[0], arrayToList(arr.slice(1))] + if (template instanceof _Symbol) { + if (collected.has(template.sym)) { + // get the item from the collected list, + // remove it from the collected list, + // and return it. + const item = (collected.get(template.sym) as any[]).shift() + return item } + return template + } + + if (isImproperList(template)) { + const [items, last] = flattenImproperList(template) + // assemble both parts of the template separately + const firstPart = flattenList(transform(arrayToList(items), collected)) + const secondPart = transform(last, collected) + return arrayToImproperList(firstPart, secondPart) + } + + // at this point, its a list. + const templateList = flattenList(template) + + // if the template is empty, return null + if (templateList.length === 0) { + return null + } + + // if the template begins with the ... syntax, (... ) + // it halts evaluation of the rest of the list. + // (evaluation resolves to ) + if (templateList[0] instanceof _Symbol && templateList[0].sym === '...') { + return templateList[1] + } + + // we need to deal with any ... syntax as well. + // there is only one at the 1D flattened list level, and we need to deal with it. + const ellipsisIndex = templateList.findIndex( + elem => elem instanceof _Symbol && elem.sym === '...' + ) + + if (ellipsisIndex !== -1) { + const frontTemplateLength = ellipsisIndex + const ellipsisTemplate = templateList[ellipsisIndex + 1] + const backTemplateLength = templateList.length - ellipsisIndex - 1 - if (template instanceof _Symbol) { - if (collected.has(template.sym)) { - // get the item from the collected list, - // remove it from the collected list, - // and return it. - const item = (collected.get(template.sym) as any[]).shift() - return item - } - return template - } + const transformedList = [] - if (isImproperList(template)) { - // assemble both parts of the template separately - const firstPart = transform((template as Pair)[0], collected) - const secondPart = transform((template as Pair)[1], collected) - return [firstPart, secondPart] + // transform the front of the list + for (let i = 0; i < frontTemplateLength; i++) { + transformedList.push(transform(templateList[i], collected)) } - // at this point, its a list. - const templateList = flattenList(template) - - // if the template is empty, return null - if (templateList.length === 0) { - return null + // add the values from the ellipsis template + // (repeat the ellipsis template until the relevant collected items are exhausted) + // (the tricky part is that the repeated ellipsis template may + // refer to a list as well...) + + // idea - track the relevant template items, and track them until they are exhausted. + // to my understanding, there should be no nested ellipsis templates, (as in repeats of ... in a template already repeated) + // as there would be no way to equally distribute the collected items. + + // deal with the ellipsis template based on 3 cases: symbol, list, or improper list. + if (ellipsisTemplate instanceof _Symbol) { + // if it is a symbol, we can just repeat it. + while ( + collected.has(ellipsisTemplate.sym) && + (collected.get(ellipsisTemplate.sym) as any[]).length > 0 + ) { + transformedList.push(transform(ellipsisTemplate, collected)) + } + } else if (isList(ellipsisTemplate) || isImproperList(ellipsisTemplate)) { + function deepFlatten(pair: Pair): any[] { + const items: any[] = [] + function flattenHelper(item: any) { + if (item instanceof _Symbol && item.sym !== '...') { + items.push(item) + } else if (item === null) { + return + } else if (item instanceof Array && item.length === 2) { + // based on the usage of (... ), + // and our previous discussion on the viability + // of ... within the ellipsis template + // we can assume that any ellipsis used is used to halt macro expansion of . + if (item[0] instanceof _Symbol && item[0].sym === '...') { + // do not collect any items here, this halts the collection + return + } + // if its a pair, traverse both car and cdr + flattenHelper(item[0]) + flattenHelper(item[1]) + } + } + flattenHelper(pair) + return items + } + + // collect all the items in the ellipsis template + const ellipsisTemplateList = deepFlatten(ellipsisTemplate as Pair) + + // all we need is to track some symbol in the ellipsis template, and make sure that it is exhausted. + while ( + collected.has(ellipsisTemplateList[0].sym) && + (collected.get(ellipsisTemplateList[0].sym) as any[]).length > 0 + ) { + transformedList.push(transform(ellipsisTemplate, collected)) + } } - // if the template begins with the ... syntax, (... ) - // it halts evaluation of the rest of the list. - // (evaluation resolves to ) - if (templateList[0] instanceof _Symbol && templateList[0].sym === '...') { - return templateList[1] + // transform the back of the list + for (let i = templateList.length - backTemplateLength; i < templateList.length; i++) { + transformedList.push(transform(templateList[i], collected)) } - // we need to deal with any ... syntax as well. - // there is only one at the 1D flattened list level, and we need to deal with it. - const ellipsisIndex = templateList.findIndex((elem) => elem instanceof _Symbol && elem.sym === '...') - - if (ellipsisIndex !== -1) { - const frontTemplateLength = ellipsisIndex - const ellipsisTemplate = templateList[ellipsisIndex + 1] - const backTemplateLength = templateList.length - ellipsisIndex - 1 - - const transformedList = [] + return arrayToList(transformedList) + } - // transform the front of the list - for (let i = 0; i < frontTemplateLength; i++) { - transformedList.push(transform(templateList[i], collected)) - } - - // add the values from the ellipsis template - // TODO - } + // if there is no ... syntax, we can just evaluate the list as is. + // use iteration, as we are not sure that map evaluates left to right. + const transformedList = [] - // if there is no ... syntax, we can just evaluate the list as is. - // use iteration, as we are not sure that map evaluates left to right. - const transformedList = [] - - for (let i = 0; i < templateList.length; i++) { - transformedList.push(transform(templateList[i], collected)) - } + for (let i = 0; i < templateList.length; i++) { + transformedList.push(transform(templateList[i], collected)) + } - return arrayToList(transformedList) -} \ No newline at end of file + return arrayToList(transformedList) +} diff --git a/src/cse-machine/scheme-macros.ts b/src/cse-machine/scheme-macros.ts index fb279f142..acd4b7f2f 100644 --- a/src/cse-machine/scheme-macros.ts +++ b/src/cse-machine/scheme-macros.ts @@ -73,7 +73,7 @@ export function schemeEval( // TODO: error return } - + if (isList(command)) { // do something const parsedList = flattenList(command) @@ -91,7 +91,7 @@ export function schemeEval( } // else, this is a standard special form. - // we attempt to piggyback on the standard CSE machine to + // we attempt to piggyback on the standard CSE machine to // handle the basic special forms. // however, for more advanced stuff like quotes or definitions, // the logic will be handled here. @@ -103,7 +103,7 @@ export function schemeEval( const args = parsedList[1] // convert the args to estree pattern const body = parsedList[2] - // TODO + // TODO case 'define': // assume that define-function // has been resolved to define-variable @@ -162,20 +162,20 @@ export function schemeEval( control.push(parsedList[i]) } - case "quote": + case 'quote': // quote is a special form that returns the expression // as is, without evaluating it. // we can just push the expression to the stash. stash.push(parsedList[1]) return - case "quasiquote": - // hey, we can deal with unquote-splicing here! - // TODO - case "define-syntax": - // parse the pattern and template here, - // generate a list of transformers from it, - // and add it to the Patterns component. - // TODO + case 'quasiquote': + // hey, we can deal with unquote-splicing here! + // TODO + case 'define-syntax': + // parse the pattern and template here, + // generate a list of transformers from it, + // and add it to the Patterns component. + // TODO } return } From 4798e078abcc1191bb1a25ec12442f2a25654234 Mon Sep 17 00:00:00 2001 From: Kyriel Abad Date: Thu, 31 Oct 2024 01:31:09 +0800 Subject: [PATCH 07/28] finish first implementation of pattern matcher --- src/cse-machine/closure.ts | 4 +- src/cse-machine/interpreter.ts | 9 ++- src/cse-machine/patterns.ts | 22 +++++- src/cse-machine/scheme-macros.ts | 125 ++++++++++++++++++++++++++++--- src/types.ts | 2 +- 5 files changed, 142 insertions(+), 20 deletions(-) diff --git a/src/cse-machine/closure.ts b/src/cse-machine/closure.ts index 42dc64784..f11d1a585 100644 --- a/src/cse-machine/closure.ts +++ b/src/cse-machine/closure.ts @@ -10,7 +10,7 @@ import { } from '../cse-machine/utils' import { Context, Environment, StatementSequence, Value } from '../types' import * as ast from '../utils/ast/astCreator' -import { Control, Stash, generateCSEMachineStateStream } from './interpreter' +import { Control, Pattern, Stash, generateCSEMachineStateStream } from './interpreter' import { envInstr } from './instrCreator' const closureToJS = (value: Closure, context: Context) => { @@ -39,10 +39,12 @@ const closureToJS = (value: Closure, context: Context) => { // The call expression won't create one as there is only one item in the control. newContext.runtime.control.push(envInstr(currentEnvironment(context), node), node) newContext.runtime.stash = new Stash() + newContext.runtime.patterns = context.runtime.patterns const gen = generateCSEMachineStateStream( newContext, newContext.runtime.control, newContext.runtime.stash, + newContext.runtime.patterns as Pattern, -1, -1 ) diff --git a/src/cse-machine/interpreter.ts b/src/cse-machine/interpreter.ts index 53b44acde..fcdabe7e2 100644 --- a/src/cse-machine/interpreter.ts +++ b/src/cse-machine/interpreter.ts @@ -178,15 +178,16 @@ export class Pattern { this.items = new Map() } - public get(name: string): Transformer[] | undefined { - return this.items.get(name) + // only call this if you are sure that the pattern exists. + public getPattern(name: string): Transformer[] { + return this.items.get(name) as Transformer[] } public hasPattern(name: string): boolean { return this.items.has(name) } - public set(name: string, item: Transformer[]): void { + public addPattern(name: string, item: Transformer[]): void { this.items.set(name, item) } } @@ -244,7 +245,7 @@ export function resumeEvaluate(context: Context) { context, context.runtime.control!, context.runtime.stash!, - context.runtime.patterns, + context.runtime.patterns as Pattern, -1, -1 ) diff --git a/src/cse-machine/patterns.ts b/src/cse-machine/patterns.ts index 24a234f77..25918f15e 100644 --- a/src/cse-machine/patterns.ts +++ b/src/cse-machine/patterns.ts @@ -7,6 +7,8 @@ import { List, Pair } from '../stdlib/list' import { _Symbol } from '../alt-langs/scheme/scm-slang/src/stdlib/base' import { flattenList, isList } from './scheme-macros' +// a single pattern stored within the patterns component +// may have several transformers attributed to it. export class Transformer { literals: string[] pattern: List @@ -19,7 +21,15 @@ export class Transformer { } } -function arrayToList(arr: any[]): List { +// given a matching transformer, +// the macro_transform() function will transform a list +// into the template of the transformer. +export function macro_transform(input: List, transformer: Transformer): List { + const collected = collect(input, transformer.pattern, transformer.literals) + return transform(transformer.template, collected) +} + +export function arrayToList(arr: any[]): List { if (arr.length === 0) { return null } @@ -33,14 +43,14 @@ function arrayToImproperList(arr: any[], last: any): any { return [arr[0], arrayToImproperList(arr.slice(1), last)] } -function isImproperList(value: any): boolean { +export function isImproperList(value: any): boolean { if (value === null) { return false } return Array.isArray(value) && value.length === 2 && !isList(value[1]) } -function flattenImproperList(value: any): [any[], any] { +export function flattenImproperList(value: any): [any[], any] { let items = [] let working = value while (working instanceof Array && working.length === 2) { @@ -52,7 +62,11 @@ function flattenImproperList(value: any): [any[], any] { // we use the match() function to match a list against a pattern and literals // and verify if it is a match. -function match(input: any, pattern: List | Pair | _Symbol, literals: string[]): boolean { +export function match( + input: any, + pattern: List | Pair | _Symbol, + literals: string[] +): boolean { if (pattern instanceof _Symbol && !literals.includes(pattern.sym)) { // this will match whatever the input list is unless it is // a literal in the literals list. (ie syntax) diff --git a/src/cse-machine/scheme-macros.ts b/src/cse-machine/scheme-macros.ts index acd4b7f2f..d91fdb937 100644 --- a/src/cse-machine/scheme-macros.ts +++ b/src/cse-machine/scheme-macros.ts @@ -5,6 +5,15 @@ import { SchemeNumber } from '../alt-langs/scheme/scm-slang/src/stdlib/core-math import { Context } from '..' import { Control, Pattern, Stash } from './interpreter' import { getVariable } from './utils' +import { + Transformer, + arrayToList, + flattenImproperList, + isImproperList, + macro_transform, + match +} from './patterns' +import { ControlItem } from './types' // this needs to be better but for now it's fine export type SchemeControlItems = List | _Symbol | SchemeNumber | boolean | string @@ -85,7 +94,21 @@ export function schemeEval( // check if elem matches any defined syntax in the P component. // if it does, then apply the corresponding rule. if (patterns.hasPattern(elem.sym)) { - // apply the rule + // get the relevant transformers + const transformers: Transformer[] = patterns.getPattern(elem.sym) + + // find the first matching transformer + for (const transformer of transformers) { + // check if the transformer matches the list + if (match(command, transformer.pattern, transformer.literals)) { + // if it does, apply the transformer + const transformedMacro = macro_transform(command as List, transformer) + control.push(transformedMacro as ControlItem) + return + } + } + + // there is an error if we get to here // TODO return } @@ -101,9 +124,54 @@ export function schemeEval( // in the arguments, and returns the body // as an eval of the body. const args = parsedList[1] + + let argsList: _Symbol[] = [] + let rest: _Symbol | null = null + if (args instanceof _Symbol) { + // if the args is a symbol, then it is a variadic function. + // we can just set the args to a list of the symbol. + rest = args + } else if (isImproperList(args)) { + [argsList, rest] = flattenImproperList(args) + } else { + argsList = flattenList(args) as _Symbol[] + } + // convert the args to estree pattern - const body = parsedList[2] - // TODO + const params: (es.Identifier | es.RestElement)[] = argsList.map(arg => + makeDummyIdentifierNode(arg.sym) + ) + + let body = parsedList[2] + + // if there is a rest argument, we need to wrap it in a rest element. + // we also need to add another element to the body, + // to convert the rest element into a list. + if (rest !== null) { + params.push({ + type: 'RestElement', + argument: makeDummyIdentifierNode(rest.sym) + }) + body = arrayToList([ + new _Symbol('begin'), + arrayToList([ + new _Symbol('set!'), + rest, + arrayToList([new _Symbol('vector->list'), rest]) + ]), + body + ]) + } + + // estree ArrowFunctionExpression + const lambda = { + type: 'ArrowFunctionExpression', + params: params, + body: body + } + + control.push(lambda as es.ArrowFunctionExpression) + case 'define': // assume that define-function // has been resolved to define-variable @@ -139,6 +207,7 @@ export function schemeEval( } control.push(assignment as es.AssignmentExpression) + case 'if': const condition = parsedList[1] const consequent = parsedList[2] @@ -154,6 +223,7 @@ export function schemeEval( } control.push(conditional as es.ConditionalExpression) + case 'begin': // begin is a sequence of expressions // that are evaluated in order. @@ -167,15 +237,50 @@ export function schemeEval( // as is, without evaluating it. // we can just push the expression to the stash. stash.push(parsedList[1]) - return + /* + quasiquote can be represented using + macros! + + (define-syntax quasiquote + (syntax-rules (unquote unquote-splicing) + ((_ (unquote x)) x) + ((_ (unquote-splicing x) . rest) + (append x (quasiquote rest))) + + ((_ (a . rest)) + (cons (quasiquote a) (quasiquote rest))) + + ((_ x) (quote x)))) + case 'quasiquote': - // hey, we can deal with unquote-splicing here! - // TODO + // hey, we can deal with unquote-splicing here! + // decompose the list into a call to a list of the elements, + // leaving quoted items alone, and unquoting the unquoted items. + */ + case 'define-syntax': - // parse the pattern and template here, - // generate a list of transformers from it, - // and add it to the Patterns component. - // TODO + // parse the pattern and template here, + // generate a list of transformers from it, + // and add it to the Patterns component. + const syntaxName = parsedList[1] + const syntaxRules = parsedList[2] + + // at this point, we assume that syntax-rules is verified + // and parsed correctly already. + const syntaxRulesList = flattenList(syntaxRules) + const literals: string[] = syntaxRulesList[1].map((literal: _Symbol) => literal.sym) + const rules = syntaxRulesList.slice(2) + // rules are set as a list of patterns and templates. + // we need to convert these into transformers. + const transformers: Transformer[] = rules.map(rule => { + const ruleList = flattenList(rule) + const pattern = ruleList[0] + const template = ruleList[1] + return new Transformer(literals, pattern, template) + }) + // now we can add the transformers to the patterns component. + patterns.addPattern(syntaxName.sym, transformers) + return } return } diff --git a/src/types.ts b/src/types.ts index 6651c3d9e..30b47817a 100644 --- a/src/types.ts +++ b/src/types.ts @@ -146,7 +146,7 @@ export interface Context { /** Runtime Specific state */ runtime: { - patterns: Pattern + patterns?: Pattern break: boolean debuggerOn: boolean isRunning: boolean From 20c2b83a6b315743111f84488d129a614d5f591f Mon Sep 17 00:00:00 2001 From: Kyriel Abad Date: Thu, 31 Oct 2024 22:53:06 +0800 Subject: [PATCH 08/28] update tests for call/cc --- src/cse-machine/__tests__/cse-machine-callcc.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cse-machine/__tests__/cse-machine-callcc.ts b/src/cse-machine/__tests__/cse-machine-callcc.ts index f26b52683..2249a1799 100644 --- a/src/cse-machine/__tests__/cse-machine-callcc.ts +++ b/src/cse-machine/__tests__/cse-machine-callcc.ts @@ -2,7 +2,7 @@ import { Chapter, Variant } from '../../types' import { expectParsedError, expectResult } from '../../utils/testing' // Continuation tests for Scheme -const optionECScm = { chapter: Chapter.FULL_SCHEME, variant: Variant.EXPLICIT_CONTROL } +const optionECScm = { chapter: Chapter.SCHEME_4, variant: Variant.EXPLICIT_CONTROL } test('basic call/cc works', () => { return expectResult( From f31bf8b471bbcdd433f66a61e002e65a71f68298 Mon Sep 17 00:00:00 2001 From: Kyriel Abad Date: Thu, 31 Oct 2024 22:53:56 +0800 Subject: [PATCH 09/28] complete CSEP machine behaviour for standard syntax forms --- src/cse-machine/scheme-macros.ts | 36 +++++++++++++++++++------------- src/cse-machine/utils.ts | 22 ++++++++++++++----- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/src/cse-machine/scheme-macros.ts b/src/cse-machine/scheme-macros.ts index d91fdb937..aaf6e9ac9 100644 --- a/src/cse-machine/scheme-macros.ts +++ b/src/cse-machine/scheme-macros.ts @@ -14,6 +14,7 @@ import { match } from './patterns' import { ControlItem } from './types' +import { encode } from '../alt-langs/scheme/scm-slang/src' // this needs to be better but for now it's fine export type SchemeControlItems = List | _Symbol | SchemeNumber | boolean | string @@ -79,7 +80,7 @@ export function schemeEval( // onto the stash. if (command === null) { - // TODO: error + // error return } @@ -132,7 +133,7 @@ export function schemeEval( // we can just set the args to a list of the symbol. rest = args } else if (isImproperList(args)) { - [argsList, rest] = flattenImproperList(args) + ;[argsList, rest] = flattenImproperList(args) } else { argsList = flattenList(args) as _Symbol[] } @@ -142,7 +143,8 @@ export function schemeEval( makeDummyIdentifierNode(arg.sym) ) - let body = parsedList[2] + let body_elements = parsedList.slice(2) + let body: List = arrayToList([new _Symbol('begin'), ...body_elements]) // if there is a rest argument, we need to wrap it in a rest element. // we also need to add another element to the body, @@ -159,7 +161,7 @@ export function schemeEval( rest, arrayToList([new _Symbol('vector->list'), rest]) ]), - body + ...body_elements ]) } @@ -167,10 +169,11 @@ export function schemeEval( const lambda = { type: 'ArrowFunctionExpression', params: params, - body: body + body: body as any } control.push(lambda as es.ArrowFunctionExpression) + return case 'define': // assume that define-function @@ -187,13 +190,14 @@ export function schemeEval( declarations: [ { type: 'VariableDeclarator', - id: makeDummyIdentifierNode(variable.sym), + id: makeDummyIdentifierNode(encode(variable.sym)), init: value } ] } control.push(definition as es.VariableDeclaration) + return case 'set!': const set_variable = parsedList[1] const set_value = parsedList[2] @@ -202,12 +206,12 @@ export function schemeEval( const assignment = { type: 'AssignmentExpression', operator: '=', - left: makeDummyIdentifierNode(set_variable.sym), + left: makeDummyIdentifierNode(encode(set_variable.sym)), right: set_value } control.push(assignment as es.AssignmentExpression) - + return case 'if': const condition = parsedList[1] const consequent = parsedList[2] @@ -223,20 +227,21 @@ export function schemeEval( } control.push(conditional as es.ConditionalExpression) - + return case 'begin': // begin is a sequence of expressions // that are evaluated in order. - // we can just push the expressions to the control. - for (let i = 1; i < parsedList.length; i++) { + // push the expressions to the control in reverse + for (let i = parsedList.length - 1; i > 0; i--) { control.push(parsedList[i]) } - + return case 'quote': // quote is a special form that returns the expression // as is, without evaluating it. // we can just push the expression to the stash. stash.push(parsedList[1]) + return /* quasiquote can be represented using macros! @@ -282,7 +287,6 @@ export function schemeEval( patterns.addPattern(syntaxName.sym, transformers) return } - return } // if we get to this point, then it is a function call. // convert it into an es.CallExpression and push it to the control. @@ -297,8 +301,10 @@ export function schemeEval( control.push(appln as es.CallExpression) return } else if (command instanceof _Symbol) { - // do something else - stash.push(getVariable(context, command.sym, makeDummyIdentifierNode(command.sym))) + // get the value of the symbol from the environment + // associated with this symbol. + const encodedName = encode(command.sym) + stash.push(getVariable(context, encodedName, makeDummyIdentifierNode(command.sym))) return } // if we get to this point of execution, it is just some primitive value. diff --git a/src/cse-machine/utils.ts b/src/cse-machine/utils.ts index bb57dd2d6..3ba553519 100644 --- a/src/cse-machine/utils.ts +++ b/src/cse-machine/utils.ts @@ -4,7 +4,7 @@ import { isArray, isFunction } from 'lodash' import { Context } from '..' import * as errors from '../errors/errors' import { RuntimeSourceError } from '../errors/runtimeSourceError' -import type { Environment, Node, StatementSequence, Value } from '../types' +import { Chapter, type Environment, type Node, type StatementSequence, type Value } from '../types' import * as ast from '../utils/ast/astCreator' import Heap from './heap' import * as instr from './instrCreator' @@ -422,7 +422,12 @@ export function defineVariable( ) { const environment = currentEnvironment(context) - if (environment.head[name] !== UNASSIGNED_CONST && environment.head[name] !== UNASSIGNED_LET) { + // we disable this check for full scheme due to the inability to scan for variables before usage + if ( + environment.head[name] !== UNASSIGNED_CONST && + environment.head[name] !== UNASSIGNED_LET && + context.chapter !== Chapter.FULL_SCHEME + ) { return handleRuntimeError(context, new errors.VariableRedeclaration(node, name, !constant)) } @@ -740,10 +745,17 @@ export const isEnvDependent = (command: ControlItem): boolean => { break } } - // TODO deal with scheme control items - // for now, as per the CSE machine paper, + // as per the CSE machine paper, // we decide to ignore environment optimizations - // for scheme control items :P + // for scheme control items + + // if the command is a primitive, it is not environment dependent + // commands are allowed to be in the control now due to the + // scheme CSEP machine. + // avoid attempting to add attributes to it. + if (typeof command === 'string' || typeof command === 'boolean') { + return isDependent + } command.isEnvDependent = isDependent return isDependent } From eef4b12b04c3771893ca0b057000364b59808013 Mon Sep 17 00:00:00 2001 From: Kyriel Abad Date: Sat, 2 Nov 2024 22:17:45 +0800 Subject: [PATCH 10/28] add tags to modified estree nodes, and deparser --- src/cse-machine/scheme-macros.ts | 271 ++++++++++++++++++++++++++++--- 1 file changed, 244 insertions(+), 27 deletions(-) diff --git a/src/cse-machine/scheme-macros.ts b/src/cse-machine/scheme-macros.ts index aaf6e9ac9..a346c2674 100644 --- a/src/cse-machine/scheme-macros.ts +++ b/src/cse-machine/scheme-macros.ts @@ -1,12 +1,14 @@ import * as es from 'estree' +import * as errors from '../errors/errors' import { List } from '../stdlib/list' import { _Symbol } from '../alt-langs/scheme/scm-slang/src/stdlib/base' -import { SchemeNumber } from '../alt-langs/scheme/scm-slang/src/stdlib/core-math' +import { is_number, SchemeNumber } from '../alt-langs/scheme/scm-slang/src/stdlib/core-math' import { Context } from '..' import { Control, Pattern, Stash } from './interpreter' -import { getVariable } from './utils' +import { getVariable, handleRuntimeError } from './utils' import { Transformer, + arrayToImproperList, arrayToList, flattenImproperList, isImproperList, @@ -15,6 +17,7 @@ import { } from './patterns' import { ControlItem } from './types' import { encode } from '../alt-langs/scheme/scm-slang/src' +import { popInstr } from './instrCreator' // this needs to be better but for now it's fine export type SchemeControlItems = List | _Symbol | SchemeNumber | boolean | string @@ -80,8 +83,7 @@ export function schemeEval( // onto the stash. if (command === null) { - // error - return + return handleRuntimeError(context, new errors.ExceptionError(new Error('Cannot evaluate null'))) } if (isList(command)) { @@ -110,8 +112,10 @@ export function schemeEval( } // there is an error if we get to here - // TODO - return + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('No matching transformer found for macro')) + ) } // else, this is a standard special form. @@ -169,19 +173,43 @@ export function schemeEval( const lambda = { type: 'ArrowFunctionExpression', params: params, - body: body as any + body: body as any, + modified: true } - control.push(lambda as es.ArrowFunctionExpression) + control.push(lambda as unknown as es.ArrowFunctionExpression) return case 'define': - // assume that define-function - // has been resolved to define-variable - // (P component will deal with this) - // at this point, parser enforces that variable - // is a symbol const variable = parsedList[1] + if (isList(variable)) { + // then this define is actually a function definition + const varList = flattenList(variable) + const name = varList[0] + const params = varList.slice(1) + const body = parsedList.slice(2) + + const define_function = arrayToList([ + new _Symbol('define'), + name, + arrayToList([new _Symbol('lambda'), arrayToList(params), ...body]) + ]) + control.push(define_function as any) + return + } else if (isImproperList(variable)) { + const [varList, rest] = flattenImproperList(variable) + const name = varList[0] + const params = varList.slice(1) + const body = parsedList.slice(2) + + const define_function = arrayToList([ + new _Symbol('define'), + name, + arrayToList([new _Symbol('lambda'), arrayToImproperList(params, rest), ...body]) + ]) + control.push(define_function as any) + return + } const value = parsedList[2] // estree VariableDeclaration const definition = { @@ -193,7 +221,8 @@ export function schemeEval( id: makeDummyIdentifierNode(encode(variable.sym)), init: value } - ] + ], + modified: true } control.push(definition as es.VariableDeclaration) @@ -207,7 +236,8 @@ export function schemeEval( type: 'AssignmentExpression', operator: '=', left: makeDummyIdentifierNode(encode(set_variable.sym)), - right: set_value + right: set_value, + modified: true } control.push(assignment as es.AssignmentExpression) @@ -216,14 +246,18 @@ export function schemeEval( const condition = parsedList[1] const consequent = parsedList[2] // check if there is an alternate - const alternate = parsedList[3] ? parsedList[3] : null + const alternate = parsedList.length > 3 ? parsedList[3] : undefined + + // evaluate the condition with truthy + const truthyCondition = arrayToList([new _Symbol('truthy'), condition]) // estree ConditionalExpression const conditional = { type: 'ConditionalExpression', - test: condition, + test: truthyCondition as any, consequent, - alternate + alternate, + modified: true } control.push(conditional as es.ConditionalExpression) @@ -232,7 +266,9 @@ export function schemeEval( // begin is a sequence of expressions // that are evaluated in order. // push the expressions to the control in reverse - for (let i = parsedList.length - 1; i > 0; i--) { + control.push(parsedList[parsedList.length - 1]) + for (let i = parsedList.length - 2; i > 0; i--) { + control.push(popInstr(makeDummyIdentifierNode('pop'))) control.push(parsedList[i]) } return @@ -249,18 +285,13 @@ export function schemeEval( (define-syntax quasiquote (syntax-rules (unquote unquote-splicing) ((_ (unquote x)) x) - ((_ (unquote-splicing x) . rest) + ((_ ((unquote-splicing x) . rest)) (append x (quasiquote rest))) ((_ (a . rest)) (cons (quasiquote a) (quasiquote rest))) - + ((_ x) (quote x)))) - - case 'quasiquote': - // hey, we can deal with unquote-splicing here! - // decompose the list into a call to a list of the elements, - // leaving quoted items alone, and unquoting the unquoted items. */ case 'define-syntax': @@ -273,7 +304,8 @@ export function schemeEval( // at this point, we assume that syntax-rules is verified // and parsed correctly already. const syntaxRulesList = flattenList(syntaxRules) - const literals: string[] = syntaxRulesList[1].map((literal: _Symbol) => literal.sym) + const literalList = flattenList(syntaxRulesList[1]) + const literals: string[] = literalList.map((literal: _Symbol) => literal.sym) const rules = syntaxRulesList.slice(2) // rules are set as a list of patterns and templates. // we need to convert these into transformers. @@ -301,6 +333,27 @@ export function schemeEval( control.push(appln as es.CallExpression) return } else if (command instanceof _Symbol) { + if (patterns.hasPattern(command.sym)) { + // get the relevant transformers + const transformers: Transformer[] = patterns.getPattern(command.sym) + + // find the first matching transformer + for (const transformer of transformers) { + // check if the transformer matches the list + if (match(command, transformer.pattern, transformer.literals)) { + // if it does, apply the transformer + const transformedMacro = macro_transform(command, transformer) + control.push(transformedMacro as ControlItem) + return + } + } + + // there is an error if we get to here + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('No matching transformer found for macro')) + ) + } // get the value of the symbol from the environment // associated with this symbol. const encodedName = encode(command.sym) @@ -341,3 +394,167 @@ export function makeDummyEvalExpression(callee: string, argument: string): es.Ca ] } } + +/** + * Because we have passed estree nodes with list elements + * to the control, if any future estree functions require + * the values within the nodes to be evaluated, we use this + * function to re-parse the modified estree nodes to avoid any errors. + */ +export function reparseEstreeNode(node: any): es.Node { + // if the node is an estree node, we recursively reparse it. + if (node.type) { + if (!node.modified) { + return node + } + switch (node.type) { + case 'ArrowFunctionExpression': + return { + type: 'ArrowFunctionExpression', + params: node.params.map((param: any) => reparseEstreeNode(param) as es.Identifier | es.RestElement), + body: reparseEstreeNode(node.body) as es.BlockStatement + } as es.Node + case 'VariableDeclaration': + return { + type: 'VariableDeclaration', + kind: node.kind, + declarations: node.declarations.map((decl: any) => reparseEstreeNode(decl) as es.VariableDeclarator) + } as es.Node + case 'VariableDeclarator': + return { + type: 'VariableDeclarator', + id: reparseEstreeNode(node.id) as es.Identifier, + init: reparseEstreeNode(node.init) + } as es.Node + case 'AssignmentExpression': + return { + type: 'AssignmentExpression', + operator: node.operator, + left: reparseEstreeNode(node.left) as es.Identifier, + right: reparseEstreeNode(node.right) + } as es.Node + case 'ConditionalExpression': + return { + type: 'ConditionalExpression', + test: reparseEstreeNode(node.test), + consequent: reparseEstreeNode(node.consequent), + alternate: reparseEstreeNode(node.alternate) + } as es.Node + case 'CallExpression': + return { + type: 'CallExpression', + optional: false, + callee: reparseEstreeNode(node.callee), + arguments: node.arguments.map((arg: any) => reparseEstreeNode(arg)) + } as es.Node + case 'Identifier': + return { + type: 'Identifier', + name: node.name + } as es.Node + case 'RestElement': + return { + type: 'RestElement', + argument: reparseEstreeNode(node.argument) as es.Identifier + } as es.Node + default: + // no other node was touched by schemeEval. + // return it as is. + return node + } + } + // if the node is not an estree node, there are several possibilities: + // 1. it is a list/improper list + // 2. it is a symbol + // 3. it is a number + // 4. it is a boolean + // 5. it is a string + // we need to handle each of these cases. + if (isList(node)) { + // if it is a list, we can be lazy and reparse the list as a + // CallExpression to the list. followed by a call to eval. + // this will ensure that the list is evaluated. + const items = flattenList(node) + const evalledItems = items.map((item: any) => reparseEstreeNode(item)) + const listCall = { + type: 'CallExpression', + optional: false, + callee: { + type: 'Identifier', + name: 'list' + }, + arguments: evalledItems + } + return { + type: 'CallExpression', + optional: false, + callee: { + type: 'Identifier', + name: 'eval' + }, + arguments: [listCall as es.CallExpression] + } + } else if (isImproperList(node)) { + // we can treat the improper list as a recursive CallExpression of cons + // followed by a call to eval. + const pairCall = { + type: 'CallExpression', + optional: false, + callee: { + type: 'Identifier', + name: 'cons' + }, + arguments: [ + reparseEstreeNode(node[0]), + reparseEstreeNode(node[1]) + ] + } + return { + type: 'CallExpression', + optional: false, + callee: { + type: 'Identifier', + name: 'eval' + }, + arguments: [pairCall as es.CallExpression] + } + } else if (node instanceof _Symbol) { + // if it is a symbol, we can just return an Identifier node. + return { + type: 'Identifier', + name: node.sym + } + } else if (is_number(node)) { + // if it is a number, we treat it as a call to + // the string->number function. + return { + type: 'CallExpression', + optional: false, + callee: { + type: 'Identifier', + name: 'string->number' + }, + arguments: [ + { + type: 'Literal', + value: node.toString() + } + ] + } + } else if (typeof node === 'boolean') { + return { + type: 'Literal', + value: node + } + } else if (typeof node === 'string') { + return { + type: 'Literal', + value: node + } + } + // if we get to this point, just return undefined + return { + type: 'Literal', + value: "undefined" + } +} From 4449d649fc2eb14fe6a6eb693ef1a8599f77bf08 Mon Sep 17 00:00:00 2001 From: Kyriel Abad Date: Sat, 2 Nov 2024 22:18:39 +0800 Subject: [PATCH 11/28] fix pattern matching logic --- src/cse-machine/interpreter.ts | 6 +- src/cse-machine/patterns.ts | 258 +++++++++++++++++---------------- 2 files changed, 134 insertions(+), 130 deletions(-) diff --git a/src/cse-machine/interpreter.ts b/src/cse-machine/interpreter.ts index fcdabe7e2..aa3cbd49c 100644 --- a/src/cse-machine/interpreter.ts +++ b/src/cse-machine/interpreter.ts @@ -213,7 +213,7 @@ export function evaluate(program: es.Program, context: Context, options: IOption context.runtime.isRunning = true context.runtime.control = new Control(program) context.runtime.stash = new Stash() - context.runtime.patterns = new Pattern() + context.runtime.patterns = context.runtime.patterns ? context.runtime.patterns : new Pattern() return runCSEMachine( context, context.runtime.control, @@ -368,11 +368,11 @@ export function* generateCSEMachineStateStream( // Push first node to be evaluated into context. // The typeguard is there to guarantee that we are pushing a node (which should always be the case) - if (command && isNode(command)) { + if (command !== undefined && isNode(command)) { context.runtime.nodes.unshift(command) } - while (command) { + while (command !== undefined) { // Return to capture a snapshot of the control and stash after the target step count is reached if (!isPrelude && steps === envSteps) { yield { stash, control, steps } diff --git a/src/cse-machine/patterns.ts b/src/cse-machine/patterns.ts index 25918f15e..fe3ae8aab 100644 --- a/src/cse-machine/patterns.ts +++ b/src/cse-machine/patterns.ts @@ -6,6 +6,7 @@ import { List, Pair } from '../stdlib/list' import { _Symbol } from '../alt-langs/scheme/scm-slang/src/stdlib/base' import { flattenList, isList } from './scheme-macros' +import { atomic_equals, is_number } from '../alt-langs/scheme/scm-slang/src/stdlib/core-math' // a single pattern stored within the patterns component // may have several transformers attributed to it. @@ -24,7 +25,7 @@ export class Transformer { // given a matching transformer, // the macro_transform() function will transform a list // into the template of the transformer. -export function macro_transform(input: List, transformer: Transformer): List { +export function macro_transform(input: any, transformer: Transformer): any { const collected = collect(input, transformer.pattern, transformer.literals) return transform(transformer.template, collected) } @@ -33,14 +34,18 @@ export function arrayToList(arr: any[]): List { if (arr.length === 0) { return null } - return [arr[0], arrayToList(arr.slice(1))] + const pair: any[] = [arr[0], arrayToList(arr.slice(1))] as any[] + ;(pair as any).pair = true + return pair as List } -function arrayToImproperList(arr: any[], last: any): any { +export function arrayToImproperList(arr: any[], last: any): any { if (arr.length === 0) { return last } - return [arr[0], arrayToImproperList(arr.slice(1), last)] + const pair: any[] = [arr[0], arrayToImproperList(arr.slice(1), last)] as any[] + ;(pair as any).pair = true + return pair } export function isImproperList(value: any): boolean { @@ -62,15 +67,20 @@ export function flattenImproperList(value: any): [any[], any] { // we use the match() function to match a list against a pattern and literals // and verify if it is a match. -export function match( - input: any, - pattern: List | Pair | _Symbol, - literals: string[] -): boolean { +export function match(input: any, pattern: any, literals: string[]): boolean { + // deal with the cases where the pattern is a literal - a Scheme Number, string, or boolean + if (typeof pattern === 'string' || typeof pattern === 'boolean') { + return input === pattern + } + + if (is_number(pattern)) { + return atomic_equals(input, pattern) + } + if (pattern instanceof _Symbol && !literals.includes(pattern.sym)) { // this will match whatever the input list is unless it is // a literal in the literals list. (ie syntax) - return !(input instanceof _Symbol && !literals.includes(input.sym)) + return !(input instanceof _Symbol && literals.includes(input.sym)) } if (pattern instanceof _Symbol && literals.includes(pattern.sym)) { @@ -80,31 +90,31 @@ export function match( // at this point, we know that the pattern is a list or improper list // make sure that the input is one too. - if (!isList(input) || !isImproperList(input)) { + if (!isList(input) && !isImproperList(input)) { return false } - // make sure that both the pattern and input match each other. - // they should both be lists or improper lists, with no mix. - if (isImproperList(pattern) !== isImproperList(input)) { - return false + // we know that both the pattern and inputs are at least pairs now. + // we can take the head and tails of both. + if (isImproperList(pattern)) { + if (input === null) { + return false + } + const [patternHead, patternTail] = pattern as [any, any] + const [inputHead, inputTail] = input as [any, any] + return match(inputHead, patternHead, literals) && match(inputTail, patternTail, literals) } - // in the case that both the pattern and input are improper lists, - if (isImproperList(pattern) && isImproperList(input)) { - const [patternItems, patternLast] = flattenImproperList(pattern) - const [inputItems, inputLast] = flattenImproperList(input) - // match the first element of the list with the first element of the pattern - return ( - match(arrayToList(inputItems), arrayToList(patternItems), literals) && - match(inputLast, patternLast, literals) - ) + // at this point, the pattern is a list. + // if the input is not a list, it can't match. + if (!isList(input)) { + return false } // now we know that both the pattern and list are lists. // we can match the elements of the list against the pattern, // but we also need to compare and check for the ... syntax. - if ((input == pattern) == null) { + if (input === null && pattern === null) { return true } @@ -155,8 +165,7 @@ export function match( return true } - // we assume for now that ... cannot appear elsewhere in this level of the pattern, except at the end. - // so here, we have no ... syntax. + // here, we have no ... syntax. // we can just compare the elements of the list with the pattern. if (inputList.length !== patternList.length) { @@ -174,12 +183,17 @@ export function match( // once a pattern is matched, we need to collect all of the matched variables. // ONLY called on matching patterns. -function collect( - input: any, - pattern: List | Pair | _Symbol, - literals: string[] -): Map { +function collect(input: any, pattern: any, literals: string[]): Map { const collected = new Map() + // deal with the cases where the pattern is a literal - a Scheme Number, string, or boolean + if (typeof pattern === 'string' || typeof pattern === 'boolean') { + return collected + } + + if (is_number(pattern)) { + return collected + } + if (pattern instanceof _Symbol && !literals.includes(pattern.sym)) { // collect the matching input here collected.set(pattern.sym, [input]) @@ -191,24 +205,24 @@ function collect( return collected } - if (pattern instanceof _Symbol && pattern.sym === '_') { + if (pattern instanceof _Symbol && (pattern.sym === '_' || pattern.sym === '...')) { // don't collect anything return collected } - // if one is an improper list, the other should be as well. + // match on an improper list pattern if (isImproperList(pattern)) { - const [patternItems, patternLast] = flattenImproperList(pattern) - const [inputItems, inputLast] = flattenImproperList(input) + const [patternHead, patternTail] = pattern as [any, any] + const [inputHead, inputTail] = input as [any, any] - // collect the proper list items - const collectedFirst = collect(arrayToList(inputItems), arrayToList(patternItems), literals) + // collect the head + const collectedFirst = collect(inputHead, patternHead, literals) for (const [key, value] of collectedFirst) { collected.set(key, value) } - // collect the improper list ending - const collectedSecond = collect(inputLast, patternLast, literals) + // collect the tail + const collectedSecond = collect(inputTail, patternTail, literals) for (const [key, value] of collectedSecond) { collected.set(key, value) } @@ -303,24 +317,36 @@ function collect( // when matched against a pattern, we use the transform() function // to transform the list into the template. // returns a list, a pair, or any value, as determined by the template. -function transform(template: List | Pair | _Symbol, collected: Map): any { +function transform(template: any, collected: Map, indexToCollect: number = 0): any { + // deal with the cases where the template is a literal - a Scheme Number, string, or boolean + if (typeof template === 'string' || typeof template === 'boolean') { + return template + } + + if (is_number(template)) { + return template + } + if (template instanceof _Symbol) { if (collected.has(template.sym)) { // get the item from the collected list, // remove it from the collected list, // and return it. - const item = (collected.get(template.sym) as any[]).shift() + const item = (collected.get(template.sym) as any[])[indexToCollect] return item } return template } if (isImproperList(template)) { - const [items, last] = flattenImproperList(template) + const [head, tail] = template as [any, any] // assemble both parts of the template separately - const firstPart = flattenList(transform(arrayToList(items), collected)) - const secondPart = transform(last, collected) - return arrayToImproperList(firstPart, secondPart) + const firstPart = flattenList(transform(head, collected)) + const secondPart = transform(tail, collected) + + const newPair = [firstPart, secondPart] as any[] + ;(newPair as any).pair = true + return newPair } // at this point, its a list. @@ -338,95 +364,73 @@ function transform(template: List | Pair | _Symbol, collected: Map elem instanceof _Symbol && elem.sym === '...' - ) - - if (ellipsisIndex !== -1) { - const frontTemplateLength = ellipsisIndex - const ellipsisTemplate = templateList[ellipsisIndex + 1] - const backTemplateLength = templateList.length - ellipsisIndex - 1 - - const transformedList = [] - - // transform the front of the list - for (let i = 0; i < frontTemplateLength; i++) { - transformedList.push(transform(templateList[i], collected)) - } - - // add the values from the ellipsis template - // (repeat the ellipsis template until the relevant collected items are exhausted) - // (the tricky part is that the repeated ellipsis template may - // refer to a list as well...) - - // idea - track the relevant template items, and track them until they are exhausted. - // to my understanding, there should be no nested ellipsis templates, (as in repeats of ... in a template already repeated) - // as there would be no way to equally distribute the collected items. - - // deal with the ellipsis template based on 3 cases: symbol, list, or improper list. - if (ellipsisTemplate instanceof _Symbol) { - // if it is a symbol, we can just repeat it. - while ( - collected.has(ellipsisTemplate.sym) && - (collected.get(ellipsisTemplate.sym) as any[]).length > 0 - ) { - transformedList.push(transform(ellipsisTemplate, collected)) - } - } else if (isList(ellipsisTemplate) || isImproperList(ellipsisTemplate)) { - function deepFlatten(pair: Pair): any[] { - const items: any[] = [] - function flattenHelper(item: any) { - if (item instanceof _Symbol && item.sym !== '...') { - items.push(item) - } else if (item === null) { - return - } else if (item instanceof Array && item.length === 2) { - // based on the usage of (... ), - // and our previous discussion on the viability - // of ... within the ellipsis template - // we can assume that any ellipsis used is used to halt macro expansion of . - if (item[0] instanceof _Symbol && item[0].sym === '...') { - // do not collect any items here, this halts the collection - return - } - // if its a pair, traverse both car and cdr - flattenHelper(item[0]) - flattenHelper(item[1]) - } + // collects all items in an ellipsis template to be used in the final list. + function deepFlatten(pair: Pair): _Symbol[] { + const items: _Symbol[] = [] + function flattenHelper(item: any) { + if (item instanceof _Symbol && item.sym !== '...') { + items.push(item) + } else if (item === null) { + return + } else if (item instanceof Array && item.length === 2) { + // based on the usage of (... ), + // and our previous discussion on the viability + // of ... within the ellipsis template + // we can assume that any ellipsis used is used to halt macro expansion of . + if (item[0] instanceof _Symbol && item[0].sym === '...') { + // do not collect any items here, this halts the collection + return } - flattenHelper(pair) - return items - } - - // collect all the items in the ellipsis template - const ellipsisTemplateList = deepFlatten(ellipsisTemplate as Pair) - - // all we need is to track some symbol in the ellipsis template, and make sure that it is exhausted. - while ( - collected.has(ellipsisTemplateList[0].sym) && - (collected.get(ellipsisTemplateList[0].sym) as any[]).length > 0 - ) { - transformedList.push(transform(ellipsisTemplate, collected)) + // if its a pair, traverse both car and cdr + flattenHelper(item[0]) + flattenHelper(item[1]) } } - - // transform the back of the list - for (let i = templateList.length - backTemplateLength; i < templateList.length; i++) { - transformedList.push(transform(templateList[i], collected)) - } - - return arrayToList(transformedList) + flattenHelper(pair) + return items } - // if there is no ... syntax, we can just evaluate the list as is. - // use iteration, as we are not sure that map evaluates left to right. - const transformedList = [] + const transformedList: any[] = [] + let lastEllipsisTemplate: any + // collect all items in the working list, + // using the ellipsis templates if we need to. for (let i = 0; i < templateList.length; i++) { + if (templateList[i] instanceof _Symbol && templateList[i].sym === '...') { + // if we have an ellipsis, collect all items as necessary. + // we track these items, and apply the last ellipsis template again + // until these items are exhausted. + const items = deepFlatten(lastEllipsisTemplate) + // start at 1, since the first item has already been collected once. + let collectingIndex = 1 + while (true) { + // check if all items are exhausted + let itemsAreExhausted = false + for (let i = 0; i < items.length; i++) { + if (!collected.has(items[i].sym)) { + itemsAreExhausted = true + break + } + if ( + collected.has(items[i].sym) && + (collected.get(items[i].sym) as any[]).length <= collectingIndex + ) { + itemsAreExhausted = true + break + } + } + if (itemsAreExhausted) { + break + } + // apply the last ellipsis template again + transformedList.push(transform(lastEllipsisTemplate, collected, collectingIndex)) + collectingIndex++ + } + continue + } + // store this template for any ellipsis. + lastEllipsisTemplate = templateList[i] transformedList.push(transform(templateList[i], collected)) } - return arrayToList(transformedList) } From 255cd0029114b7e3671319c84fe23bd1341cd367 Mon Sep 17 00:00:00 2001 From: Kyriel Abad Date: Sat, 2 Nov 2024 22:19:16 +0800 Subject: [PATCH 12/28] allow let, cond, etc... to be defined in the prelude --- src/createContext.ts | 2 ++ src/stdlib/scheme.prelude.ts | 30 +++++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/src/createContext.ts b/src/createContext.ts index 374701227..3701e12c1 100644 --- a/src/createContext.ts +++ b/src/createContext.ts @@ -39,6 +39,7 @@ import * as operators from './utils/operators' import { stringify } from './utils/stringify' import { schemeVisualise } from './alt-langs/scheme/scheme-mapper' import { csep_eval } from './cse-machine/scheme-macros' +import { Pattern } from './cse-machine/interpreter' export class LazyBuiltIn { func: (...arg0: any) => any @@ -118,6 +119,7 @@ const createEmptyRuntime = () => ({ nodes: [], control: null, stash: null, + patterns: new Pattern(), objectCount: 0, envSteps: -1, envStepsTotal: 0, diff --git a/src/stdlib/scheme.prelude.ts b/src/stdlib/scheme.prelude.ts index c444049fb..1424bb852 100644 --- a/src/stdlib/scheme.prelude.ts +++ b/src/stdlib/scheme.prelude.ts @@ -92,9 +92,33 @@ export const scheme3Prelude = ` ` export const scheme4Prelude = ` -;; empty for now +(define call-with-current-continuation call/cc) ` export const schemeFullPrelude = ` -(define call-with-current-continuation call/cc) -` +(define-syntax let (syntax-rules () ((_ ((name val) ...) body ...) ((lambda (name ...) body ...) val ...)))) +(define-syntax quasiquote + (syntax-rules (unquote unquote-splicing) + ((_ (unquote x)) x) + ((_ ((unquote-splicing x) . rest)) + (append x (quasiquote rest))) + ((_ (a . rest)) + (cons (quasiquote a) (quasiquote rest))) + ((_ x) (quote x)))) +(define-syntax cond + (syntax-rules (else) + ((_) (if #f #f)) + + ((_ (else val ...)) + (begin val ...)) + + ((_ (test val ...)) + (if test + (begin val ...) + (cond))) + + ((_ (test val ...) next-clauses ...) + (if test + (begin val ...) + (cond next-clauses ...))))) +` \ No newline at end of file From 8e97aad71212e8102eae8b0beab090def39b32d2 Mon Sep 17 00:00:00 2001 From: Kyriel Abad Date: Sat, 2 Nov 2024 22:39:46 +0800 Subject: [PATCH 13/28] allow runtime checks for basic forms --- src/cse-machine/scheme-macros.ts | 128 +++++++++++++++++++++++++++++-- 1 file changed, 122 insertions(+), 6 deletions(-) diff --git a/src/cse-machine/scheme-macros.ts b/src/cse-machine/scheme-macros.ts index a346c2674..63450b4c6 100644 --- a/src/cse-machine/scheme-macros.ts +++ b/src/cse-machine/scheme-macros.ts @@ -125,6 +125,12 @@ export function schemeEval( // the logic will be handled here. switch (parsedList[0].sym) { case 'lambda': + if (parsedList.length < 3) { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('lambda requires at least 2 arguments!')) + ) + } // return a lambda expression that takes // in the arguments, and returns the body // as an eval of the body. @@ -138,8 +144,35 @@ export function schemeEval( rest = args } else if (isImproperList(args)) { ;[argsList, rest] = flattenImproperList(args) - } else { + argsList.forEach(arg => { + if (!(arg instanceof _Symbol)) { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('Invalid arguments for lambda!')) + ) + } + }) + if (rest !== null && !(rest instanceof _Symbol)) { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('Invalid arguments for lambda!')) + ) + } + } else if (isList(args)) { argsList = flattenList(args) as _Symbol[] + argsList.forEach(arg => { + if (!(arg instanceof _Symbol)) { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('Invalid arguments for lambda!')) + ) + } + }) + } else { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('Invalid arguments for lambda!')) + ) } // convert the args to estree pattern @@ -181,6 +214,12 @@ export function schemeEval( return case 'define': + if (parsedList.length < 3) { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('define requires at least 2 arguments!')) + ) + } const variable = parsedList[1] if (isList(variable)) { // then this define is actually a function definition @@ -209,6 +248,17 @@ export function schemeEval( ]) control.push(define_function as any) return + } else if (!(variable instanceof _Symbol)) { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('Invalid variable for define!')) + ) + } + if (parsedList.length !== 3) { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('define requires 2 arguments!')) + ) } const value = parsedList[2] // estree VariableDeclaration @@ -228,7 +278,19 @@ export function schemeEval( control.push(definition as es.VariableDeclaration) return case 'set!': + if (parsedList.length !== 3) { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('set! requires 2 arguments!')) + ) + } const set_variable = parsedList[1] + if (!(set_variable instanceof _Symbol)) { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('Invalid arguments for set!')) + ) + } const set_value = parsedList[2] // estree AssignmentExpression @@ -243,6 +305,18 @@ export function schemeEval( control.push(assignment as es.AssignmentExpression) return case 'if': + if (parsedList.length < 3) { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('if requires at least 2 arguments!')) + ) + } + if (parsedList.length > 4) { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('if requires at most 3 arguments!')) + ) + } const condition = parsedList[1] const consequent = parsedList[2] // check if there is an alternate @@ -266,6 +340,14 @@ export function schemeEval( // begin is a sequence of expressions // that are evaluated in order. // push the expressions to the control in reverse + // order. + if (parsedList.length < 2) { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('begin requires at least 1 argument!')) + ) + } + control.push(parsedList[parsedList.length - 1]) for (let i = parsedList.length - 2; i > 0; i--) { control.push(popInstr(makeDummyIdentifierNode('pop'))) @@ -295,17 +377,49 @@ export function schemeEval( */ case 'define-syntax': + if (parsedList.length !== 3) { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('define-syntax requires 2 arguments!')) + ) + } // parse the pattern and template here, // generate a list of transformers from it, // and add it to the Patterns component. const syntaxName = parsedList[1] + if (!(syntaxName instanceof _Symbol)) { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('define-syntax requires a symbol!')) + ) + } const syntaxRules = parsedList[2] // at this point, we assume that syntax-rules is verified // and parsed correctly already. const syntaxRulesList = flattenList(syntaxRules) + if (!(syntaxRulesList[0] instanceof _Symbol) || syntaxRulesList[0].sym !== 'syntax-rules') { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('define-syntax requires a syntax-rules transformer!')) + ) + } + if (syntaxRulesList.length < 3) { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('syntax-rules requires at least 2 arguments!')) + ) + } const literalList = flattenList(syntaxRulesList[1]) - const literals: string[] = literalList.map((literal: _Symbol) => literal.sym) + const literals: string[] = literalList.map((literal: _Symbol) => { + if (!(literal instanceof _Symbol)) { + return handleRuntimeError( + context, + new errors.ExceptionError(new Error('Invalid syntax-rules literals!')) + ) + } + return literal.sym + }) const rules = syntaxRulesList.slice(2) // rules are set as a list of patterns and templates. // we need to convert these into transformers. @@ -472,8 +586,10 @@ export function reparseEstreeNode(node: any): es.Node { // we need to handle each of these cases. if (isList(node)) { // if it is a list, we can be lazy and reparse the list as a - // CallExpression to the list. followed by a call to eval. + // CallExpression to the list procedure- followed by a call to eval. // this will ensure that the list is evaluated. + + // this also handles null. const items = flattenList(node) const evalledItems = items.map((item: any) => reparseEstreeNode(item)) const listCall = { @@ -490,7 +606,7 @@ export function reparseEstreeNode(node: any): es.Node { optional: false, callee: { type: 'Identifier', - name: 'eval' + name: encode('eval') }, arguments: [listCall as es.CallExpression] } @@ -514,7 +630,7 @@ export function reparseEstreeNode(node: any): es.Node { optional: false, callee: { type: 'Identifier', - name: 'eval' + name: encode('eval') }, arguments: [pairCall as es.CallExpression] } @@ -532,7 +648,7 @@ export function reparseEstreeNode(node: any): es.Node { optional: false, callee: { type: 'Identifier', - name: 'string->number' + name: encode('string->number') }, arguments: [ { From 8125486b493e3dea2484765f90a294e0d8667f2a Mon Sep 17 00:00:00 2001 From: Kyriel Abad Date: Sat, 2 Nov 2024 23:19:19 +0800 Subject: [PATCH 14/28] remove hack of storing scheme lists in estree nodes --- src/alt-langs/scheme/scm-slang | 2 +- src/cse-machine/scheme-macros.ts | 285 ++++++++++++------------------- src/stdlib/scheme.prelude.ts | 2 +- 3 files changed, 113 insertions(+), 176 deletions(-) diff --git a/src/alt-langs/scheme/scm-slang b/src/alt-langs/scheme/scm-slang index 2823b8983..9c0f35dae 160000 --- a/src/alt-langs/scheme/scm-slang +++ b/src/alt-langs/scheme/scm-slang @@ -1 +1 @@ -Subproject commit 2823b89837caf08e39e1354917db04752b4dcd97 +Subproject commit 9c0f35daef83cd98e40993142b6580509a6927a8 diff --git a/src/cse-machine/scheme-macros.ts b/src/cse-machine/scheme-macros.ts index 63450b4c6..926110365 100644 --- a/src/cse-machine/scheme-macros.ts +++ b/src/cse-machine/scheme-macros.ts @@ -144,13 +144,14 @@ export function schemeEval( rest = args } else if (isImproperList(args)) { ;[argsList, rest] = flattenImproperList(args) - argsList.forEach(arg => { + argsList.forEach((arg: any) => { if (!(arg instanceof _Symbol)) { return handleRuntimeError( context, new errors.ExceptionError(new Error('Invalid arguments for lambda!')) ) } + return }) if (rest !== null && !(rest instanceof _Symbol)) { return handleRuntimeError( @@ -160,13 +161,14 @@ export function schemeEval( } } else if (isList(args)) { argsList = flattenList(args) as _Symbol[] - argsList.forEach(arg => { + argsList.forEach((arg: any) => { if (!(arg instanceof _Symbol)) { return handleRuntimeError( context, new errors.ExceptionError(new Error('Invalid arguments for lambda!')) ) } + return }) } else { return handleRuntimeError( @@ -177,7 +179,7 @@ export function schemeEval( // convert the args to estree pattern const params: (es.Identifier | es.RestElement)[] = argsList.map(arg => - makeDummyIdentifierNode(arg.sym) + makeDummyIdentifierNode(encode(arg.sym)) ) let body_elements = parsedList.slice(2) @@ -189,7 +191,7 @@ export function schemeEval( if (rest !== null) { params.push({ type: 'RestElement', - argument: makeDummyIdentifierNode(rest.sym) + argument: makeDummyIdentifierNode(encode(rest.sym)) }) body = arrayToList([ new _Symbol('begin'), @@ -206,11 +208,10 @@ export function schemeEval( const lambda = { type: 'ArrowFunctionExpression', params: params, - body: body as any, - modified: true + body: convertToEvalExpression(body) } - control.push(lambda as unknown as es.ArrowFunctionExpression) + control.push(lambda as es.ArrowFunctionExpression) return case 'define': @@ -269,10 +270,9 @@ export function schemeEval( { type: 'VariableDeclarator', id: makeDummyIdentifierNode(encode(variable.sym)), - init: value + init: convertToEvalExpression(value) } - ], - modified: true + ] } control.push(definition as es.VariableDeclaration) @@ -298,8 +298,7 @@ export function schemeEval( type: 'AssignmentExpression', operator: '=', left: makeDummyIdentifierNode(encode(set_variable.sym)), - right: set_value, - modified: true + right: convertToEvalExpression(set_value) } control.push(assignment as es.AssignmentExpression) @@ -328,10 +327,9 @@ export function schemeEval( // estree ConditionalExpression const conditional = { type: 'ConditionalExpression', - test: truthyCondition as any, - consequent, - alternate, - modified: true + test: convertToEvalExpression(truthyCondition), + consequent: convertToEvalExpression(consequent), + alternate: alternate ? convertToEvalExpression(alternate) : undefined } control.push(conditional as es.ConditionalExpression) @@ -398,10 +396,15 @@ export function schemeEval( // at this point, we assume that syntax-rules is verified // and parsed correctly already. const syntaxRulesList = flattenList(syntaxRules) - if (!(syntaxRulesList[0] instanceof _Symbol) || syntaxRulesList[0].sym !== 'syntax-rules') { + if ( + !(syntaxRulesList[0] instanceof _Symbol) || + syntaxRulesList[0].sym !== 'syntax-rules' + ) { return handleRuntimeError( context, - new errors.ExceptionError(new Error('define-syntax requires a syntax-rules transformer!')) + new errors.ExceptionError( + new Error('define-syntax requires a syntax-rules transformer!') + ) ) } if (syntaxRulesList.length < 3) { @@ -441,8 +444,8 @@ export function schemeEval( const appln = { type: 'CallExpression', optional: false, - callee: procedure, - arguments: args + callee: convertToEvalExpression(procedure) as es.Expression, + arguments: args.map(convertToEvalExpression) // unfortunately, each one needs to be converted. } control.push(appln as es.CallExpression) return @@ -510,167 +513,101 @@ export function makeDummyEvalExpression(callee: string, argument: string): es.Ca } /** - * Because we have passed estree nodes with list elements - * to the control, if any future estree functions require - * the values within the nodes to be evaluated, we use this - * function to re-parse the modified estree nodes to avoid any errors. + * Convert a scheme expression (that is meant to be evaluated) + * into an estree expression, using eval. + * this will let us avoid the "hack" of storing Scheme lists + * in estree nodes. + * @param expression + * @returns estree expression */ -export function reparseEstreeNode(node: any): es.Node { - // if the node is an estree node, we recursively reparse it. - if (node.type) { - if (!node.modified) { - return node - } - switch (node.type) { - case 'ArrowFunctionExpression': - return { - type: 'ArrowFunctionExpression', - params: node.params.map((param: any) => reparseEstreeNode(param) as es.Identifier | es.RestElement), - body: reparseEstreeNode(node.body) as es.BlockStatement - } as es.Node - case 'VariableDeclaration': - return { - type: 'VariableDeclaration', - kind: node.kind, - declarations: node.declarations.map((decl: any) => reparseEstreeNode(decl) as es.VariableDeclarator) - } as es.Node - case 'VariableDeclarator': - return { - type: 'VariableDeclarator', - id: reparseEstreeNode(node.id) as es.Identifier, - init: reparseEstreeNode(node.init) - } as es.Node - case 'AssignmentExpression': - return { - type: 'AssignmentExpression', - operator: node.operator, - left: reparseEstreeNode(node.left) as es.Identifier, - right: reparseEstreeNode(node.right) - } as es.Node - case 'ConditionalExpression': - return { - type: 'ConditionalExpression', - test: reparseEstreeNode(node.test), - consequent: reparseEstreeNode(node.consequent), - alternate: reparseEstreeNode(node.alternate) - } as es.Node - case 'CallExpression': - return { - type: 'CallExpression', - optional: false, - callee: reparseEstreeNode(node.callee), - arguments: node.arguments.map((arg: any) => reparseEstreeNode(arg)) - } as es.Node - case 'Identifier': - return { +export function convertToEvalExpression(expression: SchemeControlItems): es.CallExpression { + function convertToEstreeExpression(expression: SchemeControlItems): es.Expression { + /* + cases to consider: + - list + - pair/improper list + - symbol + - number + - boolean + - string + */ + if (isList(expression)) { + // make a call expression to list + // with the elements of the list as its arguments. + const args = flattenList(expression).map(convertToEstreeExpression) + return { + type: 'CallExpression', + optional: false, + callee: { type: 'Identifier', - name: node.name - } as es.Node - case 'RestElement': - return { - type: 'RestElement', - argument: reparseEstreeNode(node.argument) as es.Identifier - } as es.Node - default: - // no other node was touched by schemeEval. - // return it as is. - return node - } - } - // if the node is not an estree node, there are several possibilities: - // 1. it is a list/improper list - // 2. it is a symbol - // 3. it is a number - // 4. it is a boolean - // 5. it is a string - // we need to handle each of these cases. - if (isList(node)) { - // if it is a list, we can be lazy and reparse the list as a - // CallExpression to the list procedure- followed by a call to eval. - // this will ensure that the list is evaluated. - - // this also handles null. - const items = flattenList(node) - const evalledItems = items.map((item: any) => reparseEstreeNode(item)) - const listCall = { - type: 'CallExpression', - optional: false, - callee: { - type: 'Identifier', - name: 'list' - }, - arguments: evalledItems - } - return { - type: 'CallExpression', - optional: false, - callee: { - type: 'Identifier', - name: encode('eval') - }, - arguments: [listCall as es.CallExpression] - } - } else if (isImproperList(node)) { - // we can treat the improper list as a recursive CallExpression of cons - // followed by a call to eval. - const pairCall = { - type: 'CallExpression', - optional: false, - callee: { - type: 'Identifier', - name: 'cons' - }, - arguments: [ - reparseEstreeNode(node[0]), - reparseEstreeNode(node[1]) - ] - } - return { - type: 'CallExpression', - optional: false, - callee: { - type: 'Identifier', - name: encode('eval') - }, - arguments: [pairCall as es.CallExpression] - } - } else if (node instanceof _Symbol) { - // if it is a symbol, we can just return an Identifier node. - return { - type: 'Identifier', - name: node.sym - } - } else if (is_number(node)) { - // if it is a number, we treat it as a call to - // the string->number function. - return { - type: 'CallExpression', - optional: false, - callee: { - type: 'Identifier', - name: encode('string->number') - }, - arguments: [ - { - type: 'Literal', - value: node.toString() - } - ] - } - } else if (typeof node === 'boolean') { - return { - type: 'Literal', - value: node + name: 'list' + }, + arguments: args + } + } else if (isImproperList(expression)) { + // make a call to cons + // with the car and cdr as its arguments. + const [car, cdr] = expression as [SchemeControlItems, SchemeControlItems] + return { + type: 'CallExpression', + optional: false, + callee: { + type: 'Identifier', + name: 'cons' + }, + arguments: [convertToEstreeExpression(car), convertToEstreeExpression(cdr)] + } + } else if (expression instanceof _Symbol) { + // make a call to string->symbol + // with the symbol name as its argument. + return { + type: 'CallExpression', + optional: false, + callee: { + type: 'Identifier', + name: encode('string->symbol') + }, + arguments: [ + { + type: 'Literal', + value: expression.sym + } + ] + } + } else if (is_number(expression)) { + // make a call to string->number + // with the number toString() as its argument. + return { + type: 'CallExpression', + optional: false, + callee: { + type: 'Identifier', + name: encode('string->number') + }, + arguments: [ + { + type: 'Literal', + value: (expression as any).toString() + } + ] + } } - } else if (typeof node === 'string') { + // if we're here, then it is a boolean or string. + // just return the literal value. return { type: 'Literal', - value: node + value: expression as boolean | string } } - // if we get to this point, just return undefined + + // make a call expression to eval with the single expression as its component. return { - type: 'Literal', - value: "undefined" + type: 'CallExpression', + optional: false, + callee: { + type: 'Identifier', + name: encode('eval') + }, + arguments: [convertToEstreeExpression(expression) as es.Expression] } } diff --git a/src/stdlib/scheme.prelude.ts b/src/stdlib/scheme.prelude.ts index 1424bb852..6bc5d8278 100644 --- a/src/stdlib/scheme.prelude.ts +++ b/src/stdlib/scheme.prelude.ts @@ -121,4 +121,4 @@ export const schemeFullPrelude = ` (if test (begin val ...) (cond next-clauses ...))))) -` \ No newline at end of file +` From db1c7b5c51309cd358786c26a68b30467d87fb4a Mon Sep 17 00:00:00 2001 From: Kyriel Abad Date: Sun, 3 Nov 2024 03:46:18 +0800 Subject: [PATCH 15/28] fix behaviour of ... in macros --- src/cse-machine/patterns.ts | 618 ++++++++++++++++++++++------------- src/stdlib/scheme.prelude.ts | 28 +- 2 files changed, 404 insertions(+), 242 deletions(-) diff --git a/src/cse-machine/patterns.ts b/src/cse-machine/patterns.ts index fe3ae8aab..cbdd8697e 100644 --- a/src/cse-machine/patterns.ts +++ b/src/cse-machine/patterns.ts @@ -7,6 +7,7 @@ import { List, Pair } from '../stdlib/list' import { _Symbol } from '../alt-langs/scheme/scm-slang/src/stdlib/base' import { flattenList, isList } from './scheme-macros' import { atomic_equals, is_number } from '../alt-langs/scheme/scm-slang/src/stdlib/core-math' +import { schemeVisualise } from '../alt-langs/scheme/scheme-mapper' // a single pattern stored within the patterns component // may have several transformers attributed to it. @@ -55,6 +56,10 @@ export function isImproperList(value: any): boolean { return Array.isArray(value) && value.length === 2 && !isList(value[1]) } +function isPair(value: any): value is Pair { + return Array.isArray(value) && value.length === 2 +} + export function flattenImproperList(value: any): [any[], any] { let items = [] let working = value @@ -65,250 +70,334 @@ export function flattenImproperList(value: any): [any[], any] { return [items, working] } +function improperListLength(value: any): number { + let length = 0 + let working = value + while (isPair(working)) { + length++ + working = working[1] + } + return length +} + // we use the match() function to match a list against a pattern and literals // and verify if it is a match. export function match(input: any, pattern: any, literals: string[]): boolean { - // deal with the cases where the pattern is a literal - a Scheme Number, string, or boolean - if (typeof pattern === 'string' || typeof pattern === 'boolean') { - return input === pattern + // we should compare the input and pattern based on the possible forms of pattern: + // 1. an identifier + // 2. a literal such as null, a number, a string, or a boolean + // 3. (+) + // 4. (+ . ) + // 5. (+ ... +) + // 6. (+ ... + . ) + + // case 1 + if (pattern instanceof _Symbol && literals.includes(pattern.sym)) { + return input instanceof _Symbol && input.sym === pattern.sym } - if (is_number(pattern)) { - return atomic_equals(input, pattern) + if (pattern instanceof _Symbol) { + return !(input instanceof _Symbol && literals.includes(input.sym)) } - if (pattern instanceof _Symbol && !literals.includes(pattern.sym)) { - // this will match whatever the input list is unless it is - // a literal in the literals list. (ie syntax) - return !(input instanceof _Symbol && literals.includes(input.sym)) + // case 2 + if (pattern === null) { + return input === null } - if (pattern instanceof _Symbol && literals.includes(pattern.sym)) { - // only match if the input is the same as the pattern - return input instanceof _Symbol && input.sym === pattern.sym + if (is_number(pattern)) { + return is_number(input) && atomic_equals(input, pattern) } - // at this point, we know that the pattern is a list or improper list - // make sure that the input is one too. - if (!isList(input) && !isImproperList(input)) { - return false + if (typeof pattern === 'string' || typeof pattern === 'boolean') { + return input === pattern } - // we know that both the pattern and inputs are at least pairs now. - // we can take the head and tails of both. - if (isImproperList(pattern)) { - if (input === null) { + // case 3 and 5 + if (isList(pattern)) { + if (!isList(input)) { return false } - const [patternHead, patternTail] = pattern as [any, any] - const [inputHead, inputTail] = input as [any, any] - return match(inputHead, patternHead, literals) && match(inputTail, patternTail, literals) - } + const inputList = flattenList(input) + const patternList = flattenList(pattern) + // there can be a single ellepsis in the pattern, but it must be behind some element. + // scan the pattern for the ... symbol. + // we will need the position of the ... symbol to compare the front and back of the list. + const ellipsisIndex = patternList.findIndex( + elem => elem instanceof _Symbol && elem.sym === '...' + ) + + // case 5 + if (ellipsisIndex !== -1) { + // if the input is shorter than the pattern (minus the ... and matching pattern), it can't match. + if (inputList.length < patternList.length - 2) { + return false + } - // at this point, the pattern is a list. - // if the input is not a list, it can't match. - if (!isList(input)) { - return false - } + const frontPatternLength = ellipsisIndex - 1 + const ellipsisPattern = patternList[ellipsisIndex - 1] + const backPatternLength = patternList.length - ellipsisIndex - 1 - // now we know that both the pattern and list are lists. - // we can match the elements of the list against the pattern, - // but we also need to compare and check for the ... syntax. - if (input === null && pattern === null) { - return true - } + // compare the front of the list with the front of the pattern as per normal + for (let i = 0; i < frontPatternLength; i++) { + if (!match(inputList[i], patternList[i], literals)) { + return false + } + } - // it's easier to reason about the lists as arrays for now. - const inputList = flattenList(input) - const patternList = flattenList(pattern) + // compare the items that should be captured by the ellipsis + for (let i = frontPatternLength; i < inputList.length - backPatternLength; i++) { + if (!match(inputList[i], ellipsisPattern, literals)) { + return false + } + } - // there can be a single ellepsis in the pattern, but it must be behind some element. - // scan the pattern for the ... symbol. - // we will need the position of the ... symbol to compare the front and back of the list. - const ellipsisIndex = patternList.findIndex(elem => elem instanceof _Symbol && elem.sym === '...') + // now we can compare the back of the list with the rest of the patterns + for (let i = inputList.length - backPatternLength; i < inputList.length; i++) { + if ( + !match(inputList[i], patternList[i - (inputList.length - patternList.length)], literals) + ) { + return false + } + } - // check if an ellipsis exists within the pattern. - if (ellipsisIndex !== -1) { - // if the input is shorter than the pattern (minus the ...), it can't match. - if (inputList.length < patternList.length - 1) { - return false + // else all is good and return true + return true } - const frontPatternLength = ellipsisIndex - const ellipsisPattern = patternList[ellipsisIndex - 1] - const backPatternLength = patternList.length - ellipsisIndex - 1 - - // compare the front of the list with the front of the pattern as per normal - for (let i = 0; i < frontPatternLength; i++) { + // case 3 + if (inputList.length !== patternList.length) { + return false + } + for (let i = 0; i < inputList.length; i++) { if (!match(inputList[i], patternList[i], literals)) { return false } } + return true + } - // compare the items that should be captured by the ellipsis - for (let i = ellipsisIndex; i < inputList.length - backPatternLength; i++) { - if (!match(inputList[i], ellipsisPattern, literals)) { + // case 4 and 6 + if (isImproperList(pattern)) { + // if the input is not a pair, it can't match. + if (!isPair(input)) { + return false + } + + let currEllipsisPattern + let currentPattern = pattern + let currentInput = input + let ellipsisFound = false + + // iterate through currentPattern while it is a pair + while (isPair(currentPattern)) { + if (!isPair(currentInput)) { return false } - } + const [headPattern, tailPattern] = currentPattern + const [headInput, tailInput] = currentInput - // now we can compare the back of the list with the rest of the patterns - for (let i = inputList.length - backPatternLength; i < inputList.length; i++) { + // we can lookahead to see if the ellipsis symbol is the next pattern element. if ( - !match(inputList[i], patternList[i - (inputList.length - patternList.length)], literals) + isPair(tailPattern) && + tailPattern[0] instanceof _Symbol && + tailPattern[0].sym === '...' ) { - return false + ellipsisFound = true + currEllipsisPattern = headPattern + // skip ahead to the (cddr pattern) for the next iteration + // the cddr is what "remains" of the pattern after the ellipsis. + currentPattern = tailPattern[1] + continue } - } - // else all is good and return true - return true - } - - // here, we have no ... syntax. - - // we can just compare the elements of the list with the pattern. - if (inputList.length !== patternList.length) { - return false - } + // if the ellipis is found, continue to match the pattern until the ellipsis is exhausted. + // (this is done by comparing the length of the input to the length of the remaining pattern) + if (ellipsisFound && improperListLength(currentInput) > improperListLength(currentPattern)) { + // match the headInput with the currEllipsisPattern + if (!match(headInput, currEllipsisPattern, literals)) { + return false + } + currentInput = tailInput // move to the next input + continue + } - for (let i = 0; i < inputList.length; i++) { - if (!match(inputList[i], patternList[i], literals)) { - return false + // if the ellipsis symbol is not found, or we have already matched the ellipsis pattern, + // match the headInput with the headPattern + if (!match(headInput, headPattern, literals)) { + return false + } + currEllipsisPattern = headPattern + currentPattern = tailPattern + currentInput = tailInput } + // now we can compare the last item in the pattern with the rest of the input + return match(currentInput, currentPattern, literals) } - return true + return false } // once a pattern is matched, we need to collect all of the matched variables. // ONLY called on matching patterns. function collect(input: any, pattern: any, literals: string[]): Map { const collected = new Map() - // deal with the cases where the pattern is a literal - a Scheme Number, string, or boolean - if (typeof pattern === 'string' || typeof pattern === 'boolean') { + // we should compare the input and pattern based on the possible forms of pattern: + // 1. an identifier + // 2. a literal such as null, a number, a string, or a boolean + // 3. (+) + // 4. (+ . ) + // 5. (+ ... +) + // 6. (+ ... + . ) + + // case 1 + if (pattern instanceof _Symbol && !literals.includes(pattern.sym)) { + if (!collected.has(pattern.sym)) { + collected.set(pattern.sym, []) + } + collected.get(pattern.sym)?.push(input) return collected } - if (is_number(pattern)) { + // case 2 + if (pattern === null) { return collected } - if (pattern instanceof _Symbol && !literals.includes(pattern.sym)) { - // collect the matching input here - collected.set(pattern.sym, [input]) + if (is_number(pattern)) { return collected } - if (pattern instanceof _Symbol && literals.includes(pattern.sym)) { - // pattern is a syntax literal, don't collect anything + if (typeof pattern === 'string' || typeof pattern === 'boolean') { return collected } - if (pattern instanceof _Symbol && (pattern.sym === '_' || pattern.sym === '...')) { - // don't collect anything - return collected - } + // cases 3 and 5 + if (isList(pattern)) { + const inputList = flattenList(input) + const patternList = flattenList(pattern) + const ellipsisIndex = patternList.findIndex( + elem => elem instanceof _Symbol && elem.sym === '...' + ) + + // case 5 + if (ellipsisIndex !== -1) { + const frontPatternLength = ellipsisIndex - 1 + const ellipsisPattern = patternList[ellipsisIndex - 1] + const backPatternLength = patternList.length - ellipsisIndex - 1 + + for (let i = 0; i < frontPatternLength; i++) { + const val = collect(inputList[i], patternList[i], literals) + for (let [key, value] of val) { + if (!collected.has(key)) { + collected.set(key, []) + } + collected.get(key)?.push(...value) + } + } - // match on an improper list pattern - if (isImproperList(pattern)) { - const [patternHead, patternTail] = pattern as [any, any] - const [inputHead, inputTail] = input as [any, any] + for (let i = frontPatternLength; i < inputList.length - backPatternLength; i++) { + const val = collect(inputList[i], ellipsisPattern, literals) + for (let [key, value] of val) { + if (!collected.has(key)) { + collected.set(key, []) + } + collected.get(key)?.push(...value) + } + } - // collect the head - const collectedFirst = collect(inputHead, patternHead, literals) - for (const [key, value] of collectedFirst) { - collected.set(key, value) + for (let i = inputList.length - backPatternLength; i < inputList.length; i++) { + const val = collect( + inputList[i], + patternList[i - (inputList.length - patternList.length)], + literals + ) + for (let [key, value] of val) { + if (!collected.has(key)) { + collected.set(key, []) + } + collected.get(key)?.push(...value) + } + } + return collected } - // collect the tail - const collectedSecond = collect(inputTail, patternTail, literals) - for (const [key, value] of collectedSecond) { - collected.set(key, value) + // case 3 + for (let i = 0; i < inputList.length; i++) { + const val = collect(inputList[i], patternList[i], literals) + for (let [key, value] of val) { + if (!collected.has(key)) { + collected.set(key, []) + } + collected.get(key)?.push(...value) + } } - return collected } - // at this point, we know that the pattern is a list - // and the input should be too - if (!isList(input)) { - return collected - } + // case 4 and 6 + if (isImproperList(pattern)) { + let currEllipsisPattern + let currentPattern = pattern + let currentInput = input + let ellipsisFound = false - if ((input == pattern) == null) { - // should be empty - return collected - } + // iterate through currentPattern while it is a pair + while (isPair(currentPattern)) { + const [headPattern, tailPattern] = currentPattern + const [headInput, tailInput] = currentInput - const inputList = flattenList(input) - const patternList = flattenList(pattern) - // there can be a single ellepsis in the pattern, but it must be behind some element. - // scan the pattern for the ... symbol. - // we will need the position of the ... symbol to compare the front and back of the list. - const ellipsisIndex = patternList.findIndex(elem => elem instanceof _Symbol && elem.sym === '...') - - // check if an ellipsis exists within the pattern. - if (ellipsisIndex !== -1) { - const frontPatternLength = ellipsisIndex - const ellipsisPattern = patternList[ellipsisIndex - 1] - const backPatternLength = patternList.length - ellipsisIndex - 1 - - // collect items from the front of the list with the front of the pattern - for (let i = 0; i < frontPatternLength; i++) { - const collectedFront = collect(inputList[i], patternList[i], literals) - for (const [key, value] of collectedFront) { - if (collected.has(key)) { - // add the collected items to the back of the list - // (this preserves the order of the list) - collected.set(key, [...(collected.get(key) as any[]), ...value]) - } else { - collected.set(key, value) - } + // we can lookahead to see if the ellipsis symbol is the next pattern element. + if ( + isPair(tailPattern) && + tailPattern[0] instanceof _Symbol && + tailPattern[0].sym === '...' + ) { + ellipsisFound = true + currEllipsisPattern = headPattern + // skip ahead to the (cddr pattern) for the next iteration + // the cddr is what "remains" of the pattern after the ellipsis. + currentPattern = tailPattern[1] + continue } - } - // compare the items that should be captured by the ellipsis - for (let i = ellipsisIndex; i < inputList.length - backPatternLength; i++) { - const collectedEllipsis = collect(inputList[i], ellipsisPattern, literals) - for (const [key, value] of collectedEllipsis) { - if (collected.has(key)) { - collected.set(key, [...(collected.get(key) as any[]), ...value]) - } else { - collected.set(key, value) + // if the ellipis is found, continue to match the pattern until the ellipsis is exhausted. + // (this is done by comparing the length of the input to the length of the remaining pattern) + // it may be the case that the ellipsis pattern is not matched at all. + if (ellipsisFound && improperListLength(currentInput) > improperListLength(currentPattern)) { + const val = collect(headInput, currEllipsisPattern, literals) + for (let [key, value] of val) { + if (!collected.has(key)) { + collected.set(key, []) + } + collected.get(key)?.push(...value) } + currentInput = tailInput // move to the next input + continue } - } - // collect the rest of the list with the back of the pattern - for (let i = inputList.length - backPatternLength; i < inputList.length; i++) { - const collectedRest = collect( - inputList[i], - patternList[i - (inputList.length - patternList.length)], - literals - ) - for (const [key, value] of collectedRest) { - if (collected.has(key)) { - collected.set(key, [...(collected.get(key) as any[]), ...value]) - } else { - collected.set(key, value) + // if the ellipsis symbol is not found, or we have already matched the ellipsis pattern, + // match the headInput with the headPattern + const val = collect(headInput, headPattern, literals) + for (let [key, value] of val) { + if (!collected.has(key)) { + collected.set(key, []) } + collected.get(key)?.push(...value) } + currEllipsisPattern = headPattern + currentPattern = tailPattern + currentInput = tailInput } - - return collected - } - - // final case, where there is no ... syntax - for (let i = 0; i < inputList.length; i++) { - const collectedItems = collect(inputList[i], patternList[i], literals) - for (const [key, value] of collectedItems) { - if (collected.has(key)) { - collected.set(key, [...(collected.get(key) as any[]), ...value]) - } else { - collected.set(key, value) + // now we can compare the last item in the pattern with the rest of the input + const val = collect(currentInput, currentPattern, literals) + for (let [key, value] of val) { + if (!collected.has(key)) { + collected.set(key, []) } + collected.get(key)?.push(...value) } + return collected } return collected @@ -318,15 +407,15 @@ function collect(input: any, pattern: any, literals: string[]): Map, indexToCollect: number = 0): any { - // deal with the cases where the template is a literal - a Scheme Number, string, or boolean - if (typeof template === 'string' || typeof template === 'boolean') { - return template - } - - if (is_number(template)) { - return template - } - + // there are 5 possible forms of the template: + // 1. an identifier + // 2. a literal such as null, a number, a string, or a boolean + // 3. (...