diff --git a/Makefile b/Makefile index cf22811..25230ac 100644 --- a/Makefile +++ b/Makefile @@ -52,7 +52,8 @@ EXTRA_TARGETS= \ $(EXTRA_OBJTYPES_H_TARGETS) \ $(EXTRA_DEBUG_H_TARGETS) \ $(EXTRA_DEBUG_C_TARGETS) \ - generated/UnicodeData.inc + generated/UnicodeData.inc \ + generated/UnicodeDigits.inc MAIN=src/main.c PREAMBLE=generated/preamble.c @@ -184,6 +185,9 @@ unicode/UnicodeData.txt: | unicode generated/UnicodeData.inc: unicode/UnicodeData.txt tools/analyzeCsv.py | generated $(PYTHON) ./tools/analyzeCsv.py > $@ +generated/UnicodeDigits.inc: unicode/UnicodeData.txt tools/makeUnicodeDigits.py | generated + $(PYTHON) ./tools/makeUnicodeDigits.py > $@ + realclean: clean rm -rf tags unicode diff --git a/docs/generated/anf.md b/docs/generated/anf.md index f056ff5..4dd04ca 100644 --- a/docs/generated/anf.md +++ b/docs/generated/anf.md @@ -48,7 +48,7 @@ CexpCond --cases--> CexpCondCases CexpIntCondCases --option--> MaybeBigInt CexpIntCondCases --body--> Exp CexpIntCondCases --next--> CexpIntCondCases -CexpCharCondCases --option--> char +CexpCharCondCases --option--> character CexpCharCondCases --body--> Exp CexpCharCondCases --next--> CexpCharCondCases CexpMatch --condition--> Aexp @@ -73,15 +73,12 @@ ExpLookup --annotatedVar--> AexpAnnotatedVar ExpLookup --body--> Exp CexpCondCases --charCases--> CexpCharCondCases CexpCondCases --intCases--> CexpIntCondCases -Aexp --t--> void_ptr -Aexp --f--> void_ptr -Aexp --v--> void_ptr Aexp --lam--> AexpLam Aexp --var--> HashSymbol Aexp --annotatedVar--> AexpAnnotatedVar Aexp --biginteger--> MaybeBigInt Aexp --littleinteger--> int -Aexp --character--> char +Aexp --character--> character Aexp --prim--> AexpPrimApp Aexp --unary--> AexpUnaryApp Aexp --makeVec--> AexpMakeVec diff --git a/docs/generated/ast.md b/docs/generated/ast.md index 1134312..a2474a6 100644 --- a/docs/generated/ast.md +++ b/docs/generated/ast.md @@ -106,7 +106,7 @@ AstArg --named--> AstNamedArg AstArg --unpack--> AstUnpack AstArg --unpackStruct--> AstUnpackStruct AstArg --number--> MaybeBigInt -AstArg --character--> char +AstArg --character--> character AstArg --tuple--> AstArgList AstExpression --back--> void_ptr AstExpression --wildcard--> void_ptr @@ -116,7 +116,7 @@ AstExpression --lookup--> AstLookup AstExpression --symbol--> HashSymbol AstExpression --gensym--> HashSymbol AstExpression --number--> MaybeBigInt -AstExpression --character--> char +AstExpression --character--> character AstExpression --fun--> AstCompositeFunction AstExpression --nest--> AstNest AstExpression --iff--> AstIff @@ -127,7 +127,6 @@ AstExpression --structure--> AstStruct AstExpression --assertion--> AstExpression AstExpression --error--> AstExpression AstPosition["enum AstPosition"] -AstCharArray["AstCharArray[]"] --entries--> char AstNamespaceArray["AstNamespaceArray[]"] --entries--> AstNamespaceImpl AstFileIdArray["AstFileIdArray[]"] --entries--> file_id AstStringArray["AstStringArray[]"] --entries--> string diff --git a/docs/generated/cekfs.md b/docs/generated/cekfs.md index b6f66b3..9fd5aba 100644 --- a/docs/generated/cekfs.md +++ b/docs/generated/cekfs.md @@ -35,7 +35,7 @@ Value --stdint_imag--> int Value --bigint_imag--> BigInt Value --irrational_imag--> double Value --complex--> Vec -Value --character--> char +Value --character--> character Value --clo--> Clo Value --pclo--> Clo Value --kont--> Kont @@ -46,7 +46,7 @@ Value --opaque--> opaque ByteCodeArray["ByteCodeArray[]"] --entries--> byte Stack["Stack[]"] --entries--> Value ByteCodes["enum ByteCodes"] -CharArray["CharArray[]"] --entries--> char +CharacterArray["CharacterArray[]"] --entries--> character ByteArray["ByteArray[]"] --entries--> byte Frame["Frame[]"] --entries--> Value ValueVal diff --git a/docs/generated/lambda.md b/docs/generated/lambda.md index 852f6ed..d180397 100644 --- a/docs/generated/lambda.md +++ b/docs/generated/lambda.md @@ -55,7 +55,7 @@ LamCond --cases--> LamCondCases LamIntCondCases --constant--> MaybeBigInt LamIntCondCases --body--> LamExp LamIntCondCases --next--> LamIntCondCases -LamCharCondCases --constant--> char +LamCharCondCases --constant--> character LamCharCondCases --body--> LamExp LamCharCondCases --next--> LamCharCondCases LamMatch --index--> LamExp @@ -141,7 +141,7 @@ LamExp --match--> LamMatch LamExp --cond--> LamCond LamExp --amb--> LamAmb LamExp --print--> LamPrint -LamExp --character--> char +LamExp --character--> character LamExp --back--> void_ptr LamExp --error--> void_ptr LamExp --cond_default--> void_ptr diff --git a/docs/generated/pratt.md b/docs/generated/pratt.md index cfbc285..e7b09ea 100644 --- a/docs/generated/pratt.md +++ b/docs/generated/pratt.md @@ -11,8 +11,8 @@ PrattTrie --character--> byte PrattTrie --terminal--> HashSymbol PrattTrie --siblings--> PrattTrie PrattTrie --children--> PrattTrie -PrattBuffer --data--> string -PrattBuffer --start--> string +PrattBuffer --data--> ustring +PrattBuffer --start--> ustring PrattBuffer --length--> int PrattBufList --lineno--> int PrattBufList --filename--> HashSymbol @@ -50,7 +50,7 @@ PrattNumberState["enum PrattNumberState"] PrattStringState["enum PrattStringState"] PrattFixity["enum PrattFixity"] PrattUTF8["PrattUTF8[]"] --entries--> uchar -PrattUnicode["PrattUnicode[]"] --entries--> char +PrattUnicode["PrattUnicode[]"] --entries--> character PrattValueVal PrattValueType ``` diff --git a/docs/generated/tpmc.md b/docs/generated/tpmc.md index 6ebe47a..712a968 100644 --- a/docs/generated/tpmc.md +++ b/docs/generated/tpmc.md @@ -39,7 +39,7 @@ TpmcPatternValue --var--> HashSymbol TpmcPatternValue --comparison--> TpmcComparisonPattern TpmcPatternValue --assignment--> TpmcAssignmentPattern TpmcPatternValue --wildcard--> void_ptr -TpmcPatternValue --character--> char +TpmcPatternValue --character--> character TpmcPatternValue --biginteger--> MaybeBigInt TpmcPatternValue --constructor--> TpmcConstructorPattern TpmcPatternValue --tuple--> TpmcPatternArray diff --git a/src/bytecode.h b/src/bytecode.h index 9a197b0..da53bda 100644 --- a/src/bytecode.h +++ b/src/bytecode.h @@ -24,7 +24,7 @@ # include "cekfs.h" // MUST remember to increment this if bytecodes change -# define CEKF_BYTECODE_VERSION 4 +# define CEKF_BYTECODE_VERSION 5 enum ReadByteCodeStatus { BYTECODES_OK, diff --git a/src/lambda_conversion.c b/src/lambda_conversion.c index 1e11012..77d26d5 100644 --- a/src/lambda_conversion.c +++ b/src/lambda_conversion.c @@ -36,42 +36,26 @@ char *lambda_conversion_function = NULL; // set by --lambda-conversion flag -static LamLetRecBindings *convertFuncDefs(AstDefinitions *definitions, - LamContext *env); -static LamList *convertExpressions(AstExpressions *expressions, - LamContext *env); -static LamSequence *convertSequence(AstExpressions *expressions, - LamContext *env); -static LamLetRecBindings *prependDefinition(AstDefinition *definition, - LamContext *env, - LamLetRecBindings *next); -static LamLetRecBindings *prependDefine(AstDefine *define, LamContext *env, LamLetRecBindings *next); -static LamLetRecBindings *prependGensymDefine(AstGensymDefine *define, LamContext *env, LamLetRecBindings *next); -static LamExp *convertExpression(AstExpression *expression, LamContext *env); -static bool typeHasFields(AstTypeBody *typeBody); -static LamTypeDefList *collectTypeDefs(AstDefinitions *definitions, - LamContext *env); -static void collectAliases(AstDefinitions *definitions, LamContext *env); -static void collectMacros(AstDefinitions *definitions, LamContext *env); -static LamTypeConstructor *collectTypeConstructor(AstTypeConstructor - *typeConstructor, - LamType *type, int size, - int index, bool needsVec, - LamContext *env); -static void collectTypeInfo(HashSymbol *symbol, AstTypeConstructorArgs *args, - LamTypeConstructor *type, - bool needsVec, int enumCount, int index, - int arity, LamContext *env); -static LamTypeConstructorArgs *convertAstTypeList(AstTypeList *typeList, LamContext *env); -static LamTypeConstructorArgs *convertAstTypeMap(AstTypeMap *typeMap, LamContext *env); -static LamTypeConstructorArgs *convertAstTypeConstructorArgs(AstTypeConstructorArgs *args, LamContext *env); -static HashSymbol *dollarSubstitute(HashSymbol *original); -static LamExp *convertNest(AstNest *nest, LamContext *env); -static LamExp *lamConvertDefsNsAndExprs(AstDefinitions *definitions, - AstNamespaceArray *nsArray, - AstExpressions *expressions, - LamContext *env); -static LamExp *convertSymbol(ParserInfo I, HashSymbol *symbol, LamContext *env); +static LamLetRecBindings *convertFuncDefs(AstDefinitions *, LamContext *); +static LamList *convertExpressions(AstExpressions *, LamContext *); +static LamSequence *convertSequence(AstExpressions *, LamContext *); +static LamLetRecBindings *prependDefinition(AstDefinition *, LamContext *, LamLetRecBindings *); +static LamLetRecBindings *prependDefine(AstDefine *, LamContext *, LamLetRecBindings *); +static LamLetRecBindings *prependGensymDefine(AstGensymDefine *, LamContext *, LamLetRecBindings *); +static LamExp *convertExpression(AstExpression *, LamContext *); +static bool typeHasFields(AstTypeBody *); +static LamTypeDefList *collectTypeDefs(AstDefinitions *, LamContext *); +static void collectAliases(AstDefinitions *, LamContext *); +static void collectMacros(AstDefinitions *, LamContext *); +static LamTypeConstructor *collectTypeConstructor(AstTypeConstructor *, LamType *, int, int, bool, LamContext *); +static void collectTypeInfo(HashSymbol *, AstTypeConstructorArgs *, LamTypeConstructor *, bool, int, int, int, LamContext *); +static LamTypeConstructorArgs *convertAstTypeList(AstTypeList *, LamContext *); +static LamTypeConstructorArgs *convertAstTypeMap(AstTypeMap *, LamContext *); +static LamTypeConstructorArgs *convertAstTypeConstructorArgs(AstTypeConstructorArgs *, LamContext *); +static HashSymbol *dollarSubstitute(HashSymbol *); +static LamExp *convertNest(AstNest *, LamContext *); +static LamExp *lamConvertDefsNsAndExprs(AstDefinitions *, AstNamespaceArray *, AstExpressions *, LamContext *); +static LamExp *convertSymbol(ParserInfo, HashSymbol *, LamContext *); #ifdef DEBUG_LAMBDA_CONVERT # include "debugging_on.h" @@ -91,6 +75,10 @@ static void conversionError(ParserInfo I, char *message, ...) { can_happen(" at +%d %s", I.lineno, I.filename); } +static LamExp *lamExpError(ParserInfo I) { + return newLamExp_Var(I, errorSymbol()); +} + static void addCurrentNamespaceToContext(LamContext *context, int id) { LamInfo *lamInfo = newLamInfo_Nsid(CPI(context), id); int save = PROTECT(lamInfo); @@ -786,13 +774,13 @@ static HashSymbol *dollarSubstitute(HashSymbol *symbol) { #define CHECK_ONE_ARG(name, args) do { \ int count = countLamList(args); \ if (count != 1) \ - cant_happen("expected 1 arg in " #name ", got %d", count); \ + conversionError(CPI(args), "expected 1 arg in " #name ", got %d", count); \ } while(0) #define CHECK_TWO_ARGS(name, args) do { \ int count = countLamList(args); \ if (count != 2) \ - cant_happen("expected 2 args in " #name ", got %d", count); \ + conversionError(CPI(args), "expected 2 args in " #name ", got %d", count); \ } while(0) static LamExp *makeCallCC(LamList *args) { @@ -851,7 +839,7 @@ static void bindMacroArgs(LamExpTable *table, LamVarList *fargs, LamList *aargs) static LamExp *expandMacro(HashSymbol *name, LamMacro *macro, LamList *args) { if (countLamList(args) != countLamVarList(macro->args)) { conversionError(CPI(args), "wrong number of arguments to macro %s", name->name); - return newLamExp_Var(CPI(args), name); + return newLamExp_Error(CPI(args)); } if (countLamList(args) == 0) { return macro->exp; @@ -1009,7 +997,8 @@ static void checkNoUnrecognisedTags(LamTypeTags *lamTags, AstTaggedExpressions * static void checkTagNotDuplicate(HashSymbol *tag, AstTaggedExpressions *tags) { if (tags == NULL) return; if (tag == tags->tag) { - cant_happen("duplicate tag %s", tag->name); + conversionError(CPI(tags), "duplicate tag %s", tag->name); + return; } checkTagNotDuplicate(tag, tags->next); } @@ -1078,7 +1067,8 @@ static LamExp *makeConstructorApplication(LamExp *constructor, LamList *args) { static LamExp *makeStructureApplication(LamExp *constructor, AstTaggedExpressions *tags, LamContext *env) { if (constructor->val.constructor->tags == NULL) { - cant_happen("non-struct constructor applied to struct"); + conversionError(CPI(constructor), "non-struct constructor applied to struct"); + return lamExpError(CPI(tags)); } checkAllTagsPresent(constructor->val.constructor->tags, tags); checkNoUnrecognisedTags(constructor->val.constructor->tags, tags); @@ -1086,7 +1076,8 @@ static LamExp *makeStructureApplication(LamExp *constructor, AstTaggedExpression int arity = findUnderlyingArity(constructor); int nargs = (int) countAstTaggedExpressions(tags); if (nargs != arity) { - cant_happen("wrong number of args in structure application"); + conversionError(CPI(constructor), "wrong number of args in structure application"); + return lamExpError(CPI(tags)); } LamList *args = convertTagsToArgs(constructor->val.constructor->tags, tags, env); int save = PROTECT(args); @@ -1117,7 +1108,8 @@ static LamTypeConstructorInfo *findConstructor(AstLookupOrSymbol *los, LamContex static LamExp *convertStructure(AstStruct *structure, LamContext *env) { LamTypeConstructorInfo *info = findConstructor(structure->symbol, env); if (info == NULL) { - cant_happen("cannot find constructor"); + conversionError(CPI(structure), "cannot find constructor"); + return lamExpError(CPI(structure)); } LamExp *constructor = newLamExp_Constructor(CPI(info), info); int save = PROTECT(constructor); @@ -1210,7 +1202,8 @@ static AstArgList *rewriteAstTaggedArgList(LamTypeTags *allTags, AstTaggedArgLis static AstArg *rewriteAstUnpackStruct(AstUnpackStruct *structure, LamContext *env) { LamTypeConstructorInfo *info = findConstructor(structure->symbol, env); if (info->tags == NULL) { - cant_happen("constructor not a struct"); + conversionError(CPI(structure), "constructor not a struct"); + return newAstArg_Wildcard(CPI(structure)); } AstArgList *args = rewriteAstTaggedArgList(info->tags, structure->argList, env); int save = PROTECT(args); @@ -1290,10 +1283,12 @@ static LamLam *convertCompositeBodies(int nargs, AstCompositeFunction *fun, return result; } -static LamExp *convertCompositeFun(AstCompositeFunction *fun, LamContext *env) { +static LamExp *convertCompositeFun(ParserInfo PI, AstCompositeFunction *fun, LamContext *env) { ENTER(convertCompositeFun); - if (fun == NULL) - cant_happen("composite function with no components"); + if (fun == NULL) { + conversionError(PI, "composite function with no components"); + return lamExpError(PI); + } int nargs = countAstArgList(fun->function->argList); LamLam *lambda = convertCompositeBodies(nargs, fun, env); DEBUG("convertCompositeBodies returned %p", lambda); @@ -1395,7 +1390,7 @@ static LamExp *convertExpression(AstExpression *expression, LamContext *env) { break; case AST_EXPRESSION_TYPE_FUN: DEBUG("fun"); - result = convertCompositeFun(expression->val.fun, env); + result = convertCompositeFun(CPI(expression), expression->val.fun, env); break; case AST_EXPRESSION_TYPE_NEST: DEBUG("nest"); diff --git a/src/pratt.yaml b/src/pratt.yaml index 8ec1e42..e6fae2d 100644 --- a/src/pratt.yaml +++ b/src/pratt.yaml @@ -36,8 +36,8 @@ structs: # Both a parse buffer and a parse token data type (like yytext) PrattBuffer: - data: string - start: string=NULL + data: ustring + start: ustring=NULL length: int=0 # Stack of buffers parsed in order diff --git a/src/pratt_parser.c b/src/pratt_parser.c index 3f97e7c..282cea2 100644 --- a/src/pratt_parser.c +++ b/src/pratt_parser.c @@ -51,93 +51,89 @@ AstStringArray *include_paths = NULL; -static AstExpression *expr_bp(PrattParser *parser, int min_bp); -static AstExpression *errorExpression(ParserInfo); -static PrattRecord *fetchRecord(PrattParser *parser, HashSymbol *symbol, bool fatal); -static PrattTrie *makePrattTrie(PrattParser *parser, PrattTrie *C); - -static AstExpression *grouping(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *list(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *doPrefix(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *tuple(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *unsafe(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *fn(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *macro(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *gensym(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *call(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *infixLeft(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *infixRight(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *lookup(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *iff(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *switchExp(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *print(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *nestexpr(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *error(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *back(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *passert(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *makeChar(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *makeAtom(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *makeNumber(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *makeString(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *wildcard(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *exprAlias(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *userPrefix(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *userInfixLeft(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *userInfixRight(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); -static AstExpression *userPostfix(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); - -static AstExpressions *expressions(PrattParser *parser); -static AstDefinitions *definitions(PrattParser *, HashSymbol *); -static AstExpressions *statements(PrattParser *, HashSymbol *); -static AstExpression *expression(PrattParser *); -static AstDefinition *definition(PrattParser *); -static AstDefinition *assignment(PrattParser *); -static AstDefinition *gensym_assignment(PrattParser *); -static AstDefinition *typedefinition(PrattParser *); -static AstDefinition *defun(PrattParser *, bool, bool); -static AstDefinition *defmacro(PrattParser *); -static AstDefinition *link(PrattParser *); -static AstDefinition *alias(PrattParser *); -static HashSymbol *symbol(PrattParser *); -static AstTypeSymbols *type_variables(PrattParser *); -static AstTypeBody *type_body(PrattParser *); +static AstAltArgs *alt_args(PrattParser *); +static AstAltFunction *alt_function(PrattParser *); +static AstArg *astCharacterToFarg(ParserInfo, Character); +static AstArg *astExpressionToFarg(PrattParser *parser, AstExpression *expr); +static AstArg *astFunCallToFarg(PrattParser *parser, AstFunCall *funCall); +static AstArg *astLookupToFarg(PrattParser *parser, AstLookup *lookup); +static AstArg *astNumberToFarg(ParserInfo, MaybeBigInt *); +static AstArg *astStructureToFarg(PrattParser *parser, AstStruct *structure); +static AstArg *astSymbolToFarg(ParserInfo, HashSymbol *); +static AstArg *astTupleToFarg(PrattParser *parser, AstExpressions *tuple); +static AstArgList *astExpressionsToArgList(PrattParser *parser, AstExpressions *exprs); +static AstArgList *fargs(PrattParser *); static AstCompositeFunction *composite_function(PrattParser *); static AstCompositeFunction *functions(PrattParser *); -static PrattUTF8 *rawString(PrattParser *); -static PrattUTF8 *str(PrattParser *); -static AstNamespace *parseLink(PrattParser *, unsigned char *, HashSymbol *); -static void storeNamespace(PrattParser *, AstNamespace *); -static AstType *type_type(PrattParser *); -static AstTypeClause *type_clause(PrattParser *); -static HashSymbol *type_variable(PrattParser *); -static AstTypeConstructor *type_constructor(PrattParser *); -static AstTypeList *type_list(PrattParser *); -static AstTypeMap *type_map(PrattParser *); -static AstAltFunction *alt_function(PrattParser *); -static AstTypeFunction *type_function(PrattParser *); -static AstTypeList *type_tuple(PrattParser *); -static AstAltArgs *alt_args(PrattParser *); -static AstNest *nest(PrattParser *); -static AstNest *nest_body(PrattParser *, HashSymbol *); -static AstLookupOrSymbol *scoped_symbol(PrattParser *); -static AstArgList *fargs(PrattParser *); -static AstFunCall *switchFC(PrattParser *parser); -static PrattUnicode *PrattUTF8ToUnicode(PrattUTF8 *); -static void synchronize(PrattParser *parser); - -static AstArg *astFunCallToFarg(PrattParser *parser, AstFunCall *funCall); -static AstArg *astLookupToFarg(PrattParser *parser, AstLookup *lookup); -static AstArg *astSymbolToFarg(ParserInfo, HashSymbol *); -static AstArg *astNumberToFarg(ParserInfo, MaybeBigInt *); -static AstArg *astCharacterToFarg(ParserInfo, Character); -static AstArg *astTupleToFarg(PrattParser *parser, AstExpressions *tuple); -static AstArg *astStructureToFarg(PrattParser *parser, AstStruct *structure); -static AstArg *astExpressionToFarg(PrattParser *parser, AstExpression *expr); -static AstArgList *astExpressionsToArgList(PrattParser *parser, AstExpressions *exprs); -static AstDefinitions *prattParseLink(PrattParser *, char *); -static AstNest *top(PrattParser *parser); - -static AstFileIdArray *fileIdStack = NULL; +static AstDefinition *alias(PrattParser *); +static AstDefinition *assignment(PrattParser *); +static AstDefinition *definition(PrattParser *); +static AstDefinition *defmacro(PrattParser *); +static AstDefinition *defun(PrattParser *, bool, bool); +static AstDefinition *gensym_assignment(PrattParser *); +static AstDefinition *link(PrattParser *); +static AstDefinition *typedefinition(PrattParser *); +static AstDefinitions *definitions(PrattParser *, HashSymbol *); +static AstDefinitions *prattParseLink(PrattParser *, char *); +static AstExpression *back(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *call(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *doPrefix(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *errorExpression(ParserInfo); +static AstExpression *error(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *exprAlias(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *expression(PrattParser *); +static AstExpression *expressionPrecedence(PrattParser *, int); +static AstExpression *fn(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *gensym(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *grouping(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *iff(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *infixLeft(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *infixRight(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *list(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *lookup(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *macro(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *makeAtom(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *makeChar(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *makeNumber(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *makeString(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *nestexpr(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *passert(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *print(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *switchExp(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *tuple(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *unsafe(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *userInfixLeft(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *userInfixRight(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *userPostfix(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *userPrefix(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpression *wildcard(PrattRecord *, PrattParser *, AstExpression *, PrattToken *); +static AstExpressions *expressions(PrattParser *); +static AstExpressions *statements(PrattParser *, HashSymbol *); +static AstFileIdArray *fileIdStack = NULL; +static AstFunCall *switchFC(PrattParser *parser); +static AstLookupOrSymbol *scoped_symbol(PrattParser *); +static AstNamespace *parseLink(PrattParser *, unsigned char *, HashSymbol *); +static AstNest *nest_body(PrattParser *, HashSymbol *); +static AstNest *nest(PrattParser *); +static AstNest *top(PrattParser *parser); +static AstTypeBody *type_body(PrattParser *); +static AstTypeClause *type_clause(PrattParser *); +static AstTypeConstructor *type_constructor(PrattParser *); +static AstTypeFunction *type_function(PrattParser *); +static AstTypeList *type_list(PrattParser *); +static AstTypeList *type_tuple(PrattParser *); +static AstTypeMap *type_map(PrattParser *); +static AstTypeSymbols *type_variables(PrattParser *); +static AstType *type_type(PrattParser *); +static HashSymbol *symbol(PrattParser *); +static HashSymbol *type_variable(PrattParser *); +static PrattRecord *fetchRecord(PrattParser *, HashSymbol *, bool); +static PrattTrie *makePrattTrie(PrattParser *, PrattTrie *); +static PrattUnicode *PrattUTF8ToUnicode(PrattUTF8 *); +static PrattUTF8 *rawString(PrattParser *); +static PrattUTF8 *str(PrattParser *); +static void storeNamespace(PrattParser *, AstNamespace *); +static void synchronize(PrattParser *parser); #ifdef DEBUG_PRATT_PARSER void disablePrattDebug(void) { @@ -377,7 +373,7 @@ static AstProg *prattParseThing(PrattLexer *thing) { AstExpressions *exprs = newAstExpressions(CPI(expression), expression, NULL); PROTECT(exprs); nest = newAstNest(CPI(expression), definitions, exprs); - AstProg *prog = astNestToProg(nest); + AstProg *prog = astNestToProg(nest); // has direct access to namespaces UNPROTECT(save); return prog; } @@ -586,7 +582,7 @@ static AstExpressions *statements(PrattParser *parser, HashSymbol *terminal) { } static AstExpression *expression(PrattParser *parser) { - AstExpression *res = expr_bp(parser, 0); + AstExpression *res = expressionPrecedence(parser, 0); int save = PROTECT(res); synchronize(parser); UNPROTECT(save); @@ -618,6 +614,10 @@ static void validateOperator(PrattParser *parser, PrattUTF8 *operator) { parserError(parser, "operator cannot be empty string"); } else if (isdigit(operator->entries[0])) { parserError(parser, "operator cannot start with a numeric digit"); + } else if (utf8_isopen(operator->entries)) { + parserError(parser, "operator cannot start with an opening bracket"); + } else if (utf8_isclose(operator->entries)) { + parserError(parser, "operator cannot start with a closing bracket"); } else { for (Index i = 0; i < operator->size; i++) { if (isspace(operator->entries[i])) { @@ -1658,7 +1658,7 @@ static PrattRecord *fetchRecord(PrattParser *parser, HashSymbol *symbol, bool fa static AstExpression *grouping(PrattRecord *record, PrattParser *parser, AstExpression *lhs __attribute__((unused)), PrattToken *tok __attribute__((unused))) { ENTER(grouping); - AstExpression *res = expr_bp(parser, record->prefixPrec); + AstExpression *res = expressionPrecedence(parser, record->prefixPrec); int save = PROTECT(res); consume(parser, TOK_CLOSE()); LEAVE(grouping); @@ -1712,7 +1712,7 @@ PrattToken *tok __attribute__((unused))) { static AstExpression *doPrefix(PrattRecord *record, PrattParser *parser, AstExpression *lhs __attribute__((unused)), PrattToken *tok __attribute__((unused))) { ENTER(doPrefix); - AstExpression *res = expr_bp(parser, record->prefixPrec + 1); + AstExpression *res = expressionPrecedence(parser, record->prefixPrec + 1); int save = PROTECT(res); res = makePrattUnary(CPI(res), record->symbol, res); LEAVE(doPrefix); @@ -1722,7 +1722,7 @@ PrattToken *tok __attribute__((unused))) { static AstExpressions *collectArguments(PrattParser *parser) { ENTER(collectArguments); - AstExpression *arg = expr_bp(parser, 0); + AstExpression *arg = expressionPrecedence(parser, 0); int save = PROTECT(arg); AstExpressions *next = NULL; if (match(parser, TOK_COMMA())) { @@ -1937,7 +1937,7 @@ static AstExpression *tuple(PrattRecord *record __attribute__((unused)), static AstExpression *infixLeft(PrattRecord *record, PrattParser *parser, AstExpression *lhs, PrattToken *tok __attribute__((unused))) { ENTER(infixLeft); - AstExpression *rhs = expr_bp(parser, record->infixPrec + 1); + AstExpression *rhs = expressionPrecedence(parser, record->infixPrec + 1); int save = PROTECT(rhs); rhs = makePrattBinary(CPI(lhs), record->symbol, lhs, rhs); LEAVE(infixLeft); @@ -1948,7 +1948,7 @@ PrattToken *tok __attribute__((unused))) { static AstExpression *lookup(PrattRecord *record, PrattParser *parser, AstExpression *lhs, PrattToken *tok __attribute__((unused))) { ENTER(lookup); - AstExpression *rhs = expr_bp(parser, record->infixPrec - 1); + AstExpression *rhs = expressionPrecedence(parser, record->infixPrec - 1); int save = PROTECT(rhs); if (lhs->type == AST_EXPRESSION_TYPE_SYMBOL) { int index = 0; @@ -1969,7 +1969,7 @@ PrattToken *tok __attribute__((unused))) { static AstExpression *infixRight(PrattRecord *record, PrattParser *parser, AstExpression *lhs, PrattToken *tok __attribute__((unused))) { ENTER(infixRight); - AstExpression *rhs = expr_bp(parser, record->infixPrec - 1); + AstExpression *rhs = expressionPrecedence(parser, record->infixPrec - 1); int save = PROTECT(rhs); rhs = makePrattBinary(CPI(rhs), record->symbol, lhs, rhs); LEAVE(infixRight); @@ -1982,7 +1982,7 @@ static AstExpression *exprAlias(PrattRecord *record, AstExpression *lhs, PrattToken *tok __attribute__((unused))) { ENTER(exprAlias); - AstExpression *rhs = expr_bp(parser, record->infixPrec - 1); + AstExpression *rhs = expressionPrecedence(parser, record->infixPrec - 1); int save = PROTECT(rhs); HashSymbol *alias = NULL; if (lhs->type == AST_EXPRESSION_TYPE_SYMBOL) { @@ -2004,7 +2004,7 @@ static AstExpression *userPrefix(PrattRecord *record, AstExpression *lhs __attribute__((unused)), PrattToken *tok) { ENTER(userPrefix); - AstExpression *rhs = expr_bp(parser, record->prefixPrec); + AstExpression *rhs = expressionPrecedence(parser, record->prefixPrec); int save = PROTECT(rhs); AstExpressions *arguments = newAstExpressions(CPI(rhs), rhs, NULL); PROTECT(arguments); @@ -2021,7 +2021,7 @@ static AstExpression *userInfix(PrattRecord *record, PrattToken *tok, int precShift) { ENTER(userInfix); - AstExpression *rhs = expr_bp(parser, record->infixPrec + precShift); + AstExpression *rhs = expressionPrecedence(parser, record->infixPrec + precShift); int save = PROTECT(rhs); AstExpressions *arguments = newAstExpressions(CPI(rhs), rhs, NULL); PROTECT(arguments); @@ -2184,8 +2184,8 @@ static AstExpression *makeString(PrattRecord *record __attribute__((unused)), return res; } -static AstExpression *expr_bp(PrattParser *parser, int min_bp) { - ENTER(expr_bp); +static AstExpression *expressionPrecedence(PrattParser *parser, int minimumPrecedence) { + ENTER(expressionPrecedence); AstExpression *lhs = NULL; PrattToken *tok = next(parser); int save = PROTECT(tok); @@ -2207,16 +2207,16 @@ static AstExpression *expr_bp(PrattParser *parser, int min_bp) { DEBUG("PEEKED OP %s", op->type->name); PrattRecord *record = fetchRecord(parser, op->type, true); if(record->postfixOp != NULL) { - DEBUG("postfix %d %d", record->postfixPrec, min_bp); - if (record->postfixPrec < min_bp) { + DEBUG("postfix %d %d", record->postfixPrec, minimumPrecedence); + if (record->postfixPrec < minimumPrecedence) { break; } next(parser); lhs = record->postfixOp(record, parser, lhs, op); REPLACE_PROTECT(save, lhs); } else if (record->infixOp != NULL) { - DEBUG("infix %d %d", record->infixPrec, min_bp); - if (record->infixPrec < min_bp) { + DEBUG("infix %d %d", record->infixPrec, minimumPrecedence); + if (record->infixPrec < minimumPrecedence) { break; } next(parser); @@ -2229,7 +2229,7 @@ static AstExpression *expr_bp(PrattParser *parser, int min_bp) { } } } - LEAVE(expr_bp); + LEAVE(expressionPrecedence); UNPROTECT(save); return lhs; } diff --git a/src/pratt_scanner.c b/src/pratt_scanner.c index 2354185..8cad7c2 100644 --- a/src/pratt_scanner.c +++ b/src/pratt_scanner.c @@ -319,10 +319,6 @@ HashSymbol *TOK_PRINT(void) { return s; } -static bool isLeadingUtf8(char c) { - return isTwoByteUtf8((Byte) c) || isThreeByteUtf8((Byte) c) || isFourByteUtf8((Byte) c); -} - static bool isALPHA(char c) { return isalpha(c) || c == '_'; } @@ -392,7 +388,6 @@ static HashSymbol *lookupTrieRecursive(PrattTrie *trie, PrattBuffer *buffer, int last, HashSymbol *found) { - // DEBUG("lookupTrieRecursive %p %c", trie, buffer->start[buffer->length]); if (trie == NULL || buffer->start[buffer->length] > trie->character) { buffer->length = last; return found; @@ -427,7 +422,7 @@ static PrattToken *tokenFromBigInt(PrattBufList *bufList, MaybeBigInt *bi, HashS } static HashSymbol *symbolFromBuffer(PrattBuffer *buffer) { - return newSymbolLength(buffer->start, buffer->length); + return newSymbolLength((char *)buffer->start, buffer->length); } static PrattToken *tokenFromString(PrattBufList *bufList, PrattUTF8 *string, HashSymbol *tokenType) { @@ -715,9 +710,9 @@ static PrattToken *parseNumeric(PrattLexer *lexer) { } MaybeBigInt *bi = NULL; if (floating) { - bi = makeIrrational(buffer->start, buffer->length); + bi = makeIrrational((char *)buffer->start, buffer->length); } else { - bi = makeMaybeBigInt(buffer->start, buffer->length); + bi = makeMaybeBigInt((char *)buffer->start, buffer->length); } int save = PROTECT(bi); PrattToken *token = tokenFromBigInt(lexer->bufList, bi, type); @@ -961,6 +956,12 @@ static PrattToken *parseString(PrattParser *parser, bool single, char sep) { return token; } +static Character nextCharacter(PrattBuffer *buffer) { + Character dest; + buffer->start = utf8Sgetc(buffer->start, &dest); + return dest; +} + PrattToken *next(PrattParser *parser) { PrattLexer *lexer = parser->lexer; PrattToken *lookahead = dequeueToken(lexer); @@ -973,11 +974,13 @@ PrattToken *next(PrattParser *parser) { buffer->start = buffer->data; } while (buffer->start[0]) { - if (isspace(buffer->start[0])) { + // whitespace + if (utf8_isspace(buffer->start)) { if (buffer->start[0] == '\n') { ++lexer->bufList->lineno; } - ++(buffer->start); + nextCharacter(buffer); + // comment } else if (buffer->start[0] == '/' && buffer->start[1] == '/') { while (buffer->start[0] && buffer->start[0] != '\n') { ++buffer->start; @@ -986,33 +989,41 @@ PrattToken *next(PrattParser *parser) { ++buffer->start; ++lexer->bufList->lineno; } - } else if (isALPHA(buffer->start[0]) || isLeadingUtf8(buffer->start[0])) { + // alpha + } else if (utf8_isalpha(buffer->start)) { PrattToken *token = lookupTrieSymbol(parser); if (token != NULL) { return token; } else { return parseIdentifier(parser); } + // digit (no unicode support yet) } else if (isdigit(buffer->start[0])) { return parseNumeric(lexer); + // string } else if (buffer->start[0] == '"') { return parseString(parser, false, '"'); + // char } else if (buffer->start[0] == '\'') { return parseString(parser, true, '\''); - } else if (ispunct(buffer->start[0])) { + // punctuation and symbols + } else if (utf8_ispunct(buffer->start) || utf8_issymbol(buffer->start)) { PrattToken *token = lookupTrieSymbol(parser); if (token != NULL) { return token; } parserError(parser, "unrecognised operator %c", buffer->start[0]); - ++buffer->start; + cant_happen("abort"); + nextCharacter(buffer); return tokenERROR(lexer); + // bad UTF8 } else if (isTrailingByteUtf8((Byte) (buffer->start[0]))) { parserError(parser, "malformed utf8"); ++buffer->start; return tokenERROR(lexer); + // unrecognised } else { - parserError(parser, "unexpected character '%c'", buffer->start[0]); + parserError(parser, "unexpected character 0x%02x", buffer->start[0]); ++buffer->start; return tokenERROR(lexer); } @@ -1079,12 +1090,12 @@ PrattTrie *insertPrattTrie(PrattTrie *current, HashSymbol *symbol) { } static PrattBuffer *prattBufferFromString(char *string) { - return newPrattBuffer(string); + return newPrattBuffer((unsigned char *)string); } static PrattBuffer *prattBufferFromFileName(char *path) { char *content = readFile(path); - return newPrattBuffer(content); + return newPrattBuffer((unsigned char *)content); } PrattToken *peek(PrattParser *parser) { diff --git a/src/preamble.fn b/src/preamble.fn index 8edf8a6..988e41e 100644 --- a/src/preamble.fn +++ b/src/preamble.fn @@ -24,9 +24,10 @@ namespace macro identity_macro(x) { x } -infix right 20 "then" then; +infix right 20 "then" amb; infix left 50 "==" equal_to; infix left 50 "!=" not_equal_to; +infix left 50 "≠" not_equal_to; infix left 50 ">" greater_than; infix left 50 "<" less_than; infix left 50 "<=" less_than_or_equal_to; @@ -37,10 +38,12 @@ infix left 90 "-" subtraction; prefix 100 "-" negation; prefix 100 "+" identity_macro; infix left 100 "*" multiplication; +infix left 100 "×" multiplication; infix left 100 "/" division; +infix left 100 "÷" division; infix left 100 "%" modulus; infix left 110 "**" exponential; -prefix 120 "here" here; +prefix 120 "here" callcc; typedef cmp { lt | eq | gt } typedef bool { false | true } diff --git a/src/primitives.yaml b/src/primitives.yaml index bf13c8e..075f52f 100644 --- a/src/primitives.yaml +++ b/src/primitives.yaml @@ -100,6 +100,11 @@ string: printf: "%s" valued: true +ustring: + cname: "unsigned char *" + printf: "%s" + valued: true + PrattOp: cname: PrattOp printf: "%p" diff --git a/src/symbols.c b/src/symbols.c index 50acfb7..0e94106 100644 --- a/src/symbols.c +++ b/src/symbols.c @@ -72,7 +72,7 @@ HashSymbol *putsSymbol() { HashSymbol *hereSymbol() { static HashSymbol *res = NULL; if (res == NULL) { - res = newSymbol("here"); + res = newSymbol("callcc"); } return res; } @@ -80,7 +80,7 @@ HashSymbol *hereSymbol() { HashSymbol *thenSymbol() { static HashSymbol *res = NULL; if (res == NULL) { - res = newSymbol("then"); + res = newSymbol("amb"); } return res; } diff --git a/src/tc_analyze.c b/src/tc_analyze.c index 7a5ab83..dd4941d 100644 --- a/src/tc_analyze.c +++ b/src/tc_analyze.c @@ -1770,7 +1770,7 @@ static void addIntBinOpToEnv(TcEnv *env, HashSymbol *symbol) { static void addThenToEnv(TcEnv *env) { // a -> a -> a - TcType *freshType = makeFreshVar("then"); + TcType *freshType = makeFreshVar(thenSymbol()->name); int save = PROTECT(freshType); addBinOpToEnv(env, thenSymbol(), freshType); UNPROTECT(save); diff --git a/src/unicode.c b/src/unicode.c index f8f3ba0..440578b 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -// This file contains a lookup table for the unicode general category +// This file includes a lookup table for the unicode general category // values of every unicode character, plus a set of unicode equivalents // to the functions defined in ctype.h. // Where unicode and ctype.h disagree (for example unicode considers TAB @@ -27,10 +27,39 @@ #include "unicode.h" #include "common.h" +struct UnicodeDigit { + int code; + int dec; +}; + +static struct UnicodeDigit digits[] = { +#include "UnicodeDigits.inc" +}; + static unsigned char category[] = { #include "UnicodeData.inc" }; +// untested brute-force binary search +int unicode_getdec(Character c) { + int start = 0; + int end = NUM_UNICODE_DIGITS - 1; + for (;;) { + eprintf("getUnicodeDec %d - %d\n", start, end); + if (start == end) { + cant_happen("failed to find decimal digit %lc", c); + } + int middle = start + (end - start) / 2; + if (digits[middle].code == c) { + return digits[middle].dec; + } else if (digits[middle].code < c) { + start = middle; + } else { + end = middle; + } + } +} + bool unicode_isvalid(Character c) { return c >= 0 && c <= UNICODE_MAX; } @@ -39,6 +68,18 @@ bool unicode_isascii(Character c) { return c >= 0 && c < 0x80; } +bool unicode_isopen(Character c) { + return unicode_isvalid(c) && (category[c] == GC_Ps); +} + +bool unicode_isclose(Character c) { + return unicode_isvalid(c) && (category[c] == GC_Pe); +} + +bool unicode_issymbol(Character c) { + return unicode_isvalid(c) && ((category[c] & GC_MASK) == GC_S); +} + bool unicode_isalnum(Character c) { return unicode_isalpha(c) || unicode_isnumber(c); } @@ -72,7 +113,7 @@ bool unicode_isprint(Character c) { } bool unicode_ispunct(Character c) { - return unicode_isvalid(c) && ((category[c] & GC_MASK) == GC_P && category[c] != GC_Pc); + return unicode_isvalid(c) && ((unicode_isascii(c) && ispunct(c)) || ((category[c] & GC_MASK) == GC_P && category[c] != GC_Pc)); } bool unicode_isspace(Character c) { diff --git a/src/unicode.h b/src/unicode.h index 2a38ca6..fedfd41 100644 --- a/src/unicode.h +++ b/src/unicode.h @@ -74,17 +74,21 @@ bool unicode_isalnum(Character c); bool unicode_isalpha(Character c); bool unicode_isascii(Character c); bool unicode_isblank(Character c); +bool unicode_isclose(Character c); bool unicode_iscntrl(Character c); bool unicode_isdigit(Character c); bool unicode_isgraph(Character c); bool unicode_islower(Character c); bool unicode_isnumber(Character c); // includes digits, roman numerals, vulgar fractions etc. +bool unicode_isopen(Character c); bool unicode_isprint(Character c); bool unicode_ispunct(Character c); bool unicode_isspace(Character c); +bool unicode_issymbol(Character c); bool unicode_isupper(Character c); bool unicode_isvalid(Character c); bool unicode_isxdigit(Character c); int unicode_category(Character c); +int unicode_getdec(Character c); #endif diff --git a/src/utf8.c b/src/utf8.c index 89e9166..82dcc59 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -401,6 +401,12 @@ bool utf8_isspace(unsigned char *s) { return unicode_isspace(c); } +bool utf8_issymbol(unsigned char *s) { + Character c = 0; + utf8Sgetc(s, &c); + return unicode_issymbol(c); +} + bool utf8_isupper(unsigned char *s) { Character c = 0; utf8Sgetc(s, &c); @@ -419,3 +425,15 @@ bool utf8_isxdigit(unsigned char *s) { return unicode_isxdigit(c); } +bool utf8_isopen(unsigned char *s) { + Character c = 0; + utf8Sgetc(s, &c); + return unicode_isopen(c); +} + +bool utf8_isclose(unsigned char *s) { + Character c = 0; + utf8Sgetc(s, &c); + return unicode_isclose(c); +} + diff --git a/src/utf8.h b/src/utf8.h index 45f4196..7f68c6a 100644 --- a/src/utf8.h +++ b/src/utf8.h @@ -69,14 +69,17 @@ bool utf8_isalnum(unsigned char *s); bool utf8_isalpha(unsigned char *s); bool utf8_isascii(unsigned char *s); bool utf8_isblank(unsigned char *s); +bool utf8_isclose(unsigned char *s); bool utf8_iscntrl(unsigned char *s); bool utf8_isdigit(unsigned char *s); bool utf8_isgraph(unsigned char *s); bool utf8_islower(unsigned char *s); bool utf8_isnumber(unsigned char *s); +bool utf8_isopen(unsigned char *s); bool utf8_isprint(unsigned char *s); bool utf8_ispunct(unsigned char *s); bool utf8_isspace(unsigned char *s); +bool utf8_issymbol(unsigned char *s); bool utf8_isupper(unsigned char *s); bool utf8_isvalid(unsigned char *s); bool utf8_isxdigit(unsigned char *s); diff --git a/tests/fn/test_arithmetic.fn b/tests/fn/test_arithmetic.fn index e63ffd5..cf62969 100644 --- a/tests/fn/test_arithmetic.fn +++ b/tests/fn/test_arithmetic.fn @@ -22,7 +22,7 @@ in assert(1/3 % 8 == 1 / 3); assert(((1/4) ** (1/2)) == 1/2); assert(1 + 4294967295 == 4294967296); - assert(1 / (5 + 7i) == 5/74 + (-7i/74)); + assert(1 ÷ (5 + 7i) == 5/74 + (-7i/74)); assert(16 % 12 == 4); assert(2 * 2 == 4); assert(2 ** 2 == 4); @@ -34,7 +34,7 @@ in assert((2 / 3) % (4 / 5) == 2/3); assert((2 / 3) * (4 / 5) == 8/15); assert(2 / 3 + 4 / 5 == 22/15); - assert(2/3 * (5 + 7i) == 10/3 + 14i/3); + assert(2/3 × (5 + 7i) == 10/3 + 14i/3); assert((2 / 3) / (6 / 5) == 5/9); assert(2 + 3i == 2 + 3i); assert(2 - 3i == 2 + -3i); diff --git a/tests/fn/test_sqlite.fn b/tests/fn/test_sqlite.fn index 87f5053..59bef20 100644 --- a/tests/fn/test_sqlite.fn +++ b/tests/fn/test_sqlite.fn @@ -4,13 +4,11 @@ let in sql.with_database("unicode/unicode.db", sql.with_statement("select * from unicode " - "where int_code between ? and ?", - sql.with_bindings([basic_number(0x13420), - basic_number(0x1342e)], + "where int_code = ?", + sql.with_bindings([basic_number(0x13420)], sql.with_results(fn (row) { - unsafe switch (dict.lookup("int_code", row)) { - (some(basic_number(n))) { putc(chr(n)) } - }; - sql.print_row(row); - putc('\n'); + let + name = dict.lookup("name", row); + in + assert(name == some(basic_string("EGYPTIAN HIEROGLYPH AA018"))); })))); diff --git a/tools/makeUnicodeDigits.py b/tools/makeUnicodeDigits.py new file mode 100755 index 0000000..abf650d --- /dev/null +++ b/tools/makeUnicodeDigits.py @@ -0,0 +1,55 @@ +import csv + +count = 0 +with open('unicode/UnicodeData.txt', newline='') as csvin: + reader = csv.reader(csvin, delimiter=';') + for row in reader: + if row[6]: + print(f"{{ .code = 0x{row[0]}, .dec = {row[6]} }},") + count += 1 +print(f"# define NUM_UNICODE_DIGITS {count}") + +# 2: {'Zp', 'Pf', 'Co', 'Zl', 'Lt', 'Nl', 'Pc', 'Pe', 'Pi', 'Pd', 'Nd', 'Mn', 'Cc', 'Lu', 'Cs', 'Sk', 'Me', 'Cf', 'Ps', 'So', 'Lm', 'Po', 'Sc', 'Mc', 'Sm', 'Ll', 'Zs', 'No', 'Lo'} +# +# 3: {'1', '103', '216', '17', '33', '6', '226', '240', '91', '18', '10', '118', '23', '130', '28', '228', '222', '36', '20', '218', '234', '9', '22', '11', '21', '84', '107', '202', '0', '16', '12', '129', '25', '14', '214', '230', '24', '19', '34', '35', '31', '27', '8', '26', '132', '224', '13', '220', '233', '15', '29', '7', '122', '32', '30', '232'} +# +# 4: {'ET', 'AN', 'PDF', 'L', 'B', 'BN', 'PDI', 'RLE', 'LRO', 'RLI', 'LRE', 'LRI', 'AL', 'WS', 'RLO', 'CS', 'ES', 'R', 'S', 'EN', 'ON', 'FSI', 'NSM'} +# +# 6: {'', '3', '7', '4', '5', '2', '9', '8', '1', '0', '6'} +# +# 7: {'', '0', '8', '5', '3', '6', '2', '9', '4', '7', '1'} +# +# 8: various pseudo-numeric values: 100000, 13/2 etc. +# +# 9: {'N', 'Y'} + +# GC_None +# GC_Cc +# GC_Cf +# GC_Co +# GC_Cs +# GC_Ll +# GC_Lm +# GC_Lo +# GC_Lt +# GC_Lu +# GC_Mc +# GC_Me +# GC_Mn +# GC_Nd +# GC_Nl +# GC_No +# GC_Pc +# GC_Pd +# GC_Pe +# GC_Pf +# GC_Pi +# GC_Po +# GC_Ps +# GC_Sc +# GC_Sk +# GC_Sm +# GC_So +# GC_Zl +# GC_Zp +# GC_Zs diff --git a/vim/syntax/fnatural.vim b/vim/syntax/fnatural.vim index 16c5947..a16869a 100644 --- a/vim/syntax/fnatural.vim +++ b/vim/syntax/fnatural.vim @@ -19,6 +19,8 @@ highlight link FnStatement Statement syntax match fnOperator "\v\*\*" syntax match fnOperator "\v\*" +syntax match fnOperator "\v×" +syntax match fnOperator "\v÷" syntax match fnOperator "\v/" syntax match fnOperator "\v\+" syntax match fnOperator "\v-"