Files
beginner-friendly-lang/tree-sitter/grammar.js
2025-09-21 22:54:00 +02:00

407 lines
9.1 KiB
JavaScript

/**
* @file Parser for the Crepuscular(ray) functional programming language
* @author Alexander Nutz <alexander.nutz@vxcc.dev>
* @license MIT
*/
/// <reference types="tree-sitter-cli/dsl" />
// @ts-check
module.exports = grammar({
name: "crepuscular",
reserved: {
toplevel_kw: $ =>
['type', 'with', 'extensible', 'extend', 'union', 'def', 'await', 'let', 'if', 'then', 'else', 'in', 'match'],
},
extras: ($) => [
/\s/, // whitespace
$.comment,
$.section_comment,
],
word: $ => $._identifier_tok,
precedences: _ => [
[
"parentrized",
"function_call",
"expr_atom",
"ident",
"exponent",
"multiplication",
"negate",
"addition",
"concat",
"binary_boolean",
"equal",
"if",
"let",
"match_arm",
"new_match_arm",
"await",
"tag",
],
],
rules: {
source_file: $ => repeat($.definition),
_identifier_tok: $ => token(/[a-zA-Z_]+[a-zA-Z0-9_]*/),
identifier: $ => choice(
reserved('toplevel_kw', $._identifier_tok),
token(/`[^\n\`]+`/)
),
path: $ => prec.left(seq($.identifier, repeat(seq('.', $.identifier)))),
comment: $ =>
token(seq("# ", /.*/)),
section_comment: $ =>
token(seq("###", /.*/)),
doc_comment_value: $ =>
choice(
token.immediate(/\n/),
token.immediate(/[^\n]*\n?/)),
doc_comment: $ =>
repeat1(seq('##', token.immediate(/ */), $.doc_comment_value)),
definition: $ => seq(
optional(field('doc', $.doc_comment)),
field('body', choice(
$.full_partial_type_definition,
$.type_definition,
$.extensible_union,
$.extend_decl,
$.def,
)),
),
extensible_union: $ => seq(
'extensible', 'union', $.path),
extend_decl: $ => seq(
'extend',
field('what', $.path),
'with',
field('tag', $.tag),
field('ty', $.type)),
full_partial_type_definition: $ => seq(
"type",
"?", field('name', $.path),
"=",
field('type', $.type)
),
type_definition: $ => seq(
"type",
optional(choice(
field('arg', $.identifier),
seq(
'[',
repeat(seq(field('arg', $.identifier), ',')),
field('arg', $.identifier),
']'),
)),
field('name', $.path),
"=",
field('type', $.type)
),
type_atom: $ => choice(
$.just_type,
$.partial_type,
seq('(', $.type, ')'),
$.record_type,
),
_type_non_fn: $ => choice(
$.type_atom,
$.tagged_type,
$.union_type,
$.partial_union_type,
$.parametrized_type,
$.with_type,
$.recursive_type,
),
type: $ => choice(
$._type_non_fn,
$.fn_type,
),
union_type: $ => prec.left(1,
seq(
field('left', $.type),
'|',
field('right', $.type))),
partial_union_type: $ => prec.left(1,
seq(
field('left', $.type),
'|', '...',
field('partial', $.partial_type))),
tag: $ => new RustRegex("'(?:[a-zA-Z_][a-zA-Z0-9_]*(?:[.][a-zA-Z_0-9]+)*)"),
tagged_type: $ => prec.right(3,
seq(field('tag', $.tag), optional(
field('type', choice(
$.type_atom,
$.parametrized_type))))),
multi_type_parameters: $ => seq('[',
field('arg', $.type),
repeat(seq(',', field('arg', $.type))),
']'),
parametrized_type: $ => prec.left(4, seq(
field('nest', choice(
$.multi_type_parameters,
$.type_atom,
)),
repeat(field('nest', $.path)),
field('type', $.path),
)),
with_type: $ => seq('with',
field('arg', $.identifier),
repeat(seq(',', field('arg', $.identifier))),
':',
field('type', $.type)),
recursive_type: $ => seq('&',
field('name', $.identifier),
field('type', $.type)),
partial_type: $ => seq('?', $.identifier),
fn_type: $ => prec.left(-10,
seq(field('arg', $.type), '->', field('res', $.type))),
just_type: $ => prec(-1, $.path),
// TODO: doc comments
record_type_field: $ => seq(field('name', $.identifier), ':', field('type', $.type)),
record_type: $ => seq(
'{',
repeat(seq(field('field', $.record_type_field), ',')),
optional(choice(
field('field', $.record_type_field),
seq('...', field('partial', $.partial_type)),
)),
'}'),
escape_sequence: $ =>
token.immediate(
seq('\\', /[tbrnf0"'\\]/)),
char_middle: $ => /./,
string_middle: $ => /[^\\"]+/,
char_literal: $ =>
seq('\'', choice($.escape_sequence, $.char_middle), '\''),
// TODO: fstrings
string_literal: $ =>
seq('"', repeat(choice($.escape_sequence, $.string_middle)), '"'),
num_literal: $ =>
seq(
choice(
/[0-9]+/,
/\-[0-9]+/
),
optional(token.immediate(/[.][0-9]+/))
),
list_expression: $ =>
seq(
'[',
repeat(seq($.expression, ',')),
optional($.expression),
']'),
field_access: $ => prec.left(
seq(field('expr', $.atom), ':', field('field', $.identifier))),
function_call: $ => prec.left("function_call",
seq(
field('fn', $.atom),
'(',
repeat(seq(field('arg', $.expression), ',')),
optional(field('arg', $.expression)),
')')),
ident_expr: $ => prec("ident",
$.path),
record_expr_field: $ =>
seq(field('field', $.identifier), ':', field('value', $.expression)),
record_expr: $ => seq(
'{',
repeat(seq($.record_expr_field, ',')),
optional($.record_expr_field),
'}'),
atom: $ => choice(
prec("parentrized", seq('(', $.expression, ')')),
$.ident_expr,
$.char_literal,
$.string_literal,
$.num_literal,
$.list_expression,
$.field_access,
$.function_call,
$.record_expr,
),
let_binding: $ => prec("let", seq(
'let',
field('name', $.identifier),
'=',
field('value', $.expression),
optional('in'),
field('body', $.expression),
)),
await_binding: $ => prec("let", seq(
'await',
field('name', $.identifier),
'=',
field('value', $.expression),
optional('in'),
field('body', $.expression),
)),
type_downcast: $ => seq(
field('expr', $.atom),
'::',
field('as', $.type),
),
lambda: $ => prec.right(4, seq(
field('arg', $.identifier),
optional(seq(':', field('arg_type', $._type_non_fn))),
'->',
field('body', $.expression)
)),
if_expr: $ => prec("if",
seq(
'if',
field('condition', $.expression),
'then',
field('then', $.expression),
'else',
field('else', $.expression))),
_add_expr: $ => prec.left("addition",
seq(
field('left', $.expression),
choice('+', '-'),
field('right', $.expression)
)),
_multiply_expr: $ => prec.left("multiplication",
seq(
field('left', $.expression),
choice('*', '/'),
field('right', $.expression)
)),
_equal_expr: $ => prec.left("equal",
seq(
field('left', $.expression),
'=',
field('right', $.expression)
)),
_concat_expr: $ => prec.left("concat",
seq(
field('left', $.expression),
choice('++', '=>'),
field('right', $.expression)
)),
_exponent_expr: $ => prec.left("exponent",
seq(
field('left', $.expression),
'^',
field('right', $.atom)
)),
_bin_bool_expr: $ => prec.left("binary_boolean",
seq(
field('left', $.expression),
choice('and', 'or'),
field('right', $.atom)
)),
binary_expr: $ => choice(
$._exponent_expr,
$._concat_expr,
$._equal_expr,
$._multiply_expr,
$._add_expr,
$._bin_bool_expr,
),
match_arm: $ => prec("match_arm",
seq(
field('cases', seq($.atom, repeat(seq('|', $.atom)))),
'->', field('expr', $.atom))),
match_expr: $ =>
seq('match', field('on', $.expression), 'with',
field('arm', $.match_arm),
prec("new_match_arm", repeat(seq('|', field('arm', $.match_arm))))),
unary_expr: $ => prec.right("negate",
seq(
'-',
field('expr', $.expression))),
tag_expr: $ => prec.right("tag",
seq(
field('tag', $.tag),
field('expr', $.expression))),
await_expr: $ => prec.right("await",
seq('await', field('expr', $.expression))),
expression: $ => choice(
prec("expr_atom", $.atom),
$.let_binding,
$.await_binding,
$.await_expr,
$.type_downcast,
$.lambda,
$.if_expr,
$.tag_expr,
$.match_expr,
$.binary_expr,
$.unary_expr,
),
def: $ => seq(
'def',
field('name', $.path),
choice(
seq(':', field('signature', $.type)),
seq(
optional(seq(':', field('signature', $.type))),
seq('=', field('value', $.expression)),
)
),
),
}
});