From 39f34ee77bd0f34b5989afb335ff4f13b16c28d0 Mon Sep 17 00:00:00 2001
From: alex_s168 <63254202+alex-s168@users.noreply.github.com>
Date: Thu, 19 Sep 2024 19:48:27 +0000
Subject: [PATCH] fix parser combinator
---
src/main/kotlin/blitz/Either.kt | 8 +-
src/main/kotlin/blitz/Obj.kt | 2 +-
src/main/kotlin/blitz/parse/JSON.kt | 88 ++++++++--------
src/main/kotlin/blitz/parse/comb2/Parser.kt | 111 +++++++++++++++-----
src/main/kotlin/blitz/parse/comb2/Predef.kt | 67 ++++++++++++
src/main/kotlin/blitz/str/CharsToString.kt | 4 +
6 files changed, 205 insertions(+), 75 deletions(-)
create mode 100644 src/main/kotlin/blitz/parse/comb2/Predef.kt
create mode 100644 src/main/kotlin/blitz/str/CharsToString.kt
diff --git a/src/main/kotlin/blitz/Either.kt b/src/main/kotlin/blitz/Either.kt
index 42be82f..ce8dff4 100644
--- a/src/main/kotlin/blitz/Either.kt
+++ b/src/main/kotlin/blitz/Either.kt
@@ -62,11 +62,11 @@ class Either private constructor(
fun Either.flatten(): R where A: R, B: R =
getAOrNull() ?: getB()
-fun Either>.partiallyFlatten(): Either =
- mapA> { Either.ofA(it) }.flatten()
+fun Either>.partiallyFlattenB(): Either where A: A2 =
+ mapA> { Either.ofA(it) }.flatten()
-fun Either, B>.partiallyFlatten(): Either =
- mapB> { Either.ofB(it) }.flatten()
+fun Either, B>.partiallyFlattenA(): Either where B: B2 =
+ mapB> { Either.ofB(it) }.flatten()
fun Either>.mapBA(fn: (BA) -> BAN): Either> =
mapB { it.mapA(fn) }
diff --git a/src/main/kotlin/blitz/Obj.kt b/src/main/kotlin/blitz/Obj.kt
index 5321fb5..2671dcb 100644
--- a/src/main/kotlin/blitz/Obj.kt
+++ b/src/main/kotlin/blitz/Obj.kt
@@ -30,7 +30,7 @@ fun Obj.map(transform: (I) -> O): Obj =
interface MutObj {
var v: T
- inline fun modify(fn: (T) -> T) {
+ fun modify(fn: (T) -> T) {
v = fn(v)
}
diff --git a/src/main/kotlin/blitz/parse/JSON.kt b/src/main/kotlin/blitz/parse/JSON.kt
index e486d79..04d5787 100644
--- a/src/main/kotlin/blitz/parse/JSON.kt
+++ b/src/main/kotlin/blitz/parse/JSON.kt
@@ -1,55 +1,57 @@
package blitz.parse
-import blitz.parse.comb.*
+import blitz.parse.comb2.*
object JSON {
- lateinit var jsonElement: Parser
+ val jsonElement = futureRec { jsonElement: Parser ->
- val jsonNum = parser {
- it.map(NumParse.float)?.mapSecond { n ->
- Number(n)
- }
- }
+ val jsonNum: Parser = floatLit()
+ .mapValue(::Number)
- val jsonString = parser {
- it.stringWithEscape()
- ?.mapSecond { Str(it) }
- }
+ val jsonString: Parser = stringLit()
+ .mapValue(::Str)
- val jsonArray = parser {
- it.require("[")
- ?.array(",") { elem ->
- elem.whitespaces()
- .map(jsonElement)
- ?.whitespaces()
- }
- ?.require("]")
- ?.mapSecond { x -> Array(x) }
- }
+ val jsonArray: Parser = just('[')
+ .then(jsonElement
+ .delimitedBy(just(','))
+ .mapValue(::Array))
+ .thenIgnore(whitespaces())
+ .thenIgnore(just(']'))
+ .mapValue { it.second }
- val jsonBool = parser { it.require("true")?.to(Bool(true)) } or
- parser { it.require("false")?.to(Bool(false)) }
+ val jsonBool: Parser = choose(
+ seq("true".toList()).mapValue { Bool(true) },
+ seq("false".toList()).mapValue { Bool(false) },
+ )
- val jsonNull = parser { it.require("null")?.to(Nul()) }
+ val jsonNull: Parser = seq("null".toList())
+ .mapValue { Nul() }
- val jsonObj = parser {
- it.require("{")
- ?.array(",") { elem ->
- elem.whitespaces()
- .map(jsonString)
- ?.mapSecond { it.str }
- ?.whitespaces()
- ?.require(":")
- ?.whitespaces()
- ?.map(jsonElement)
- ?.whitespaces()
- }
- ?.require("}")
- ?.mapSecond { x -> Obj(x.toMap()) }
- }
+ val jsonObj: Parser = just('{')
+ .then(
+ whitespaces()
+ .then(stringLit())
+ .mapValue { it.second }
+ .thenIgnore(whitespaces())
+ .thenIgnore(just(':'))
+ .then(jsonElement)
+ .delimitedBy(just(',')))
+ .thenIgnore(whitespaces())
+ .thenIgnore(just('}'))
+ .mapValue { Obj(it.second.toMap()) }
+
+ whitespaces()
+ .then(choose(
+ jsonArray,
+ jsonNum,
+ jsonString,
+ jsonObj,
+ jsonBool,
+ jsonNull
+ ))
+ .thenIgnore(whitespaces())
+ .mapValue { it.second }
- init {
- jsonElement = (jsonArray or jsonNum or jsonString or jsonObj or jsonBool or jsonNull).trim()
}
interface Element {
@@ -95,6 +97,6 @@ object JSON {
class Nul: Element
- fun parse(string: String): Element? =
- jsonElement(Parsable(string))?.second
+ fun parse(string: String): ParseResult =
+ jsonElement(ParseCtx(string.toList(), 0))
}
\ No newline at end of file
diff --git a/src/main/kotlin/blitz/parse/comb2/Parser.kt b/src/main/kotlin/blitz/parse/comb2/Parser.kt
index 0a2178d..70806b3 100644
--- a/src/main/kotlin/blitz/parse/comb2/Parser.kt
+++ b/src/main/kotlin/blitz/parse/comb2/Parser.kt
@@ -1,7 +1,11 @@
package blitz.parse.comb2
import blitz.Either
-import blitz.partiallyFlatten
+import blitz.Provider
+import blitz.collections.contents
+import blitz.partiallyFlattenA
+import blitz.partiallyFlattenB
+import blitz.str.charsToString
data class ParseCtx(
val input: List,
@@ -31,7 +35,7 @@ fun Parser.then(other: Parser): Parser> =
invoke(ctx).mapA { first ->
other.invoke(ctx)
.mapA { first to it }
- }.partiallyFlatten()
+ }.partiallyFlattenA()
}
fun Parser.thenIgnore(other: Parser): Parser =
@@ -39,23 +43,50 @@ fun Parser.thenIgnore(other: Parser): Parser =
invoke(ctx).mapA { first ->
other.invoke(ctx)
.mapA { first }
- }.partiallyFlatten()
+ }.partiallyFlattenA()
}
-fun Parser.orElse(other: Parser): Parser =
+fun Parser.orElseVal(value: O): Parser =
+ orElse { Either.ofA(value) }
+
+fun Parser.orNot(): Parser =
+ orElse { Either.ofA(null) }
+
+fun Parser.orElse(other: Parser): Parser where O: R =
{
val old = it.copy()
this(it).mapB { err ->
it.loadFrom(old)
other.invoke(it)
.mapB { err + it }
- }.partiallyFlatten()
+ }.partiallyFlattenB()
}
+fun choose(possible: Iterable>): Parser =
+ { ctx ->
+ val errors = mutableListOf()
+ var res: O? = null
+ for (p in possible) {
+ val old = ctx.copy()
+ val t = p.invoke(ctx)
+ if (t.isA) {
+ res = t.getA()
+ break
+ } else {
+ ctx.loadFrom(old)
+ errors += t.getB()
+ }
+ }
+ res?.let { Either.ofA(it) }
+ ?: Either.ofB(errors)
+ }
+
+fun choose(vararg possible: Parser): Parser =
+ choose(possible.toList())
+
fun Parser.repeated(): Parser> =
{ ctx ->
val out = mutableListOf()
- var ret: List? = null
while (true) {
val old = ctx.copy()
val t = invoke(ctx)
@@ -63,28 +94,18 @@ fun Parser.repeated(): Parser> =
out += t.getA()
} else {
ctx.loadFrom(old)
- ret = t.getB()
break
}
}
- if (ret == null) {
- Either.ofA(out)
- } else Either.ofB(ret)
+ Either.ofA(out)
}
-fun Parser.delimitedBy(delim: Parser): Parser> =
- thenIgnore(delim)
- .repeated()
- .then(this)
- .mapValue { (a, b) -> a + b }
- .orElse(value(listOf()))
-
inline fun Parser.verifyValue(crossinline verif: (O) -> String?): Parser =
{ ctx ->
invoke(ctx).mapA> {
verif(it)?.let { Either.ofB(listOf(ParseError(ctx.idx, it))) }
?: Either.ofA(it)
- }.partiallyFlatten()
+ }.partiallyFlattenA()
}
inline fun Parser>.verifyValueWithSpan(crossinline fn: (O) -> String?): Parser =
@@ -92,7 +113,7 @@ inline fun Parser>.verifyValueWithSpan(crossinline f
invoke(ctx).mapA> { (span, v) ->
fn(v)?.let { Either.ofB(listOf(ParseError(span.first, it))) }
?: Either.ofA(v)
- }.partiallyFlatten()
+ }.partiallyFlattenA()
}
fun Parser.errIfNull(msg: String = "parser value was null internally"): Parser =
@@ -116,20 +137,56 @@ fun withSpan(p: Parser): Parser> =
fun value(value: O): Parser =
{ Either.ofA(value) }
-fun whitespaces(): Parser =
- regex("\\s+")
+fun chain(parsers: List>): Parser> =
+ { ctx ->
+ val results = mutableListOf()
+ val errs = mutableListOf()
+ for (p in parsers) {
+ val r = p.invoke(ctx)
+ if (r.isA) {
+ results += r.getA()
+ } else {
+ errs += r.getB()
+ break
+ }
+ }
+ if (errs.isNotEmpty()) Either.ofB(errs)
+ else Either.ofA(results)
+ }
+
+fun seq(want: List): Parser> =
+ chain(want.map(::just))
+
+inline fun filter(msg: String, crossinline filter: (I) -> Boolean): Parser =
+ { ctx ->
+ if (ctx.idx >= ctx.input.size) {
+ Either.ofB(listOf(ParseError(ctx.idx, "unexpected end of file")))
+ } else {
+ val i = ctx.input[ctx.idx++]
+ if (filter(i)) Either.ofA(i)
+ else Either.ofB(listOf(ParseError(ctx.idx - 1, msg)))
+ }
+ }
fun just(want: I): Parser =
- { ctx ->
- val i = ctx.input[ctx.idx ++]
- if (i == want) Either.ofA(i)
- else Either.ofB(listOf(ParseError(ctx.idx - 1, "expected $want")))
- }
+ filter("expected $want") { it == want }
+
+fun oneOf(possible: Iterable): Parser =
+ filter("expected one of ${possible.contents}") { it in possible }
+
+fun future(prov: Provider>): Parser =
+ { prov()(it) }
+
+inline fun futureRec(crossinline fn: (future: Parser) -> Parser): Parser {
+ lateinit var f: Parser
+ f = fn(future { f })
+ return f
+}
/** group values 0 is the entire match */
fun regex(pattern: Regex, fn: (groups: MatchGroupCollection) -> O): Parser =
{ ctx ->
- pattern.matchAt(ctx.input.toString(), ctx.idx)?.let {
+ pattern.matchAt(ctx.input.charsToString(), ctx.idx)?.let {
ctx.idx = it.range.last + 1
Either.ofA(fn(it.groups))
} ?: Either.ofB(listOf(
diff --git a/src/main/kotlin/blitz/parse/comb2/Predef.kt b/src/main/kotlin/blitz/parse/comb2/Predef.kt
new file mode 100644
index 0000000..e8578fe
--- /dev/null
+++ b/src/main/kotlin/blitz/parse/comb2/Predef.kt
@@ -0,0 +1,67 @@
+package blitz.parse.comb2
+
+import blitz.str.charsToString
+import kotlin.math.absoluteValue
+import kotlin.math.sign
+
+fun whitespaces(): Parser =
+ oneOf("\n\t\r\b ".toList())
+ .repeated()
+ .mapValue { it.charsToString() }
+
+fun digit(): Parser =
+ oneOf("0123456789".toList())
+
+fun uintLit(): Parser =
+ withSpan(digit().repeated())
+ .verifyValueWithSpan { if (it.isEmpty()) "need digits after sign in num lit" else null }
+ .mapValue { it.charsToString().toUInt() }
+
+fun intLit(): Parser =
+ choose(just('+').mapValue { +1 },
+ just('-').mapValue { -1 },
+ value(+1))
+ .then(uintLit())
+ .mapValue { (sign, v) -> sign * v.toInt() }
+
+fun floatLit(): Parser =
+ intLit()
+ .then(just('.')
+ .then(uintLit())
+ .mapValue { it.second }
+ .orElseVal(0u))
+ .mapValue { (pre, post) ->
+ var p = post.toDouble()
+ while (p.absoluteValue >= 1) {
+ p *= 0.1
+ }
+
+ (pre.toDouble().absoluteValue + p) * pre.toDouble().sign
+ }
+
+fun escapeChar(): Parser =
+ just('\\').then(
+ choose(just('"'),
+ just('\''),
+ just('\\'),
+ just('n').mapValue { '\n' },
+ just('r').mapValue { '\r' },
+ just('b').mapValue { '\b' },
+ just('t').mapValue { '\t' })
+ .mapErrors { listOf(ParseError(it.first().loc, "invalid escape sequence")) }
+ ).mapValue { it.second }
+
+fun stringLit(): Parser =
+ just('"')
+ .then(choose(escapeChar(),
+ filter("a") { it != '"' })
+ .repeated())
+ .thenIgnore(just('"'))
+ .mapValue { (_, str) -> str.charsToString() }
+
+fun Parser.delimitedBy(delim: Parser): Parser> =
+ thenIgnore(delim)
+ .repeated()
+ .then(this)
+ .mapValue { (a, b) -> a + b }
+ .orElse(value(listOf()))
\ No newline at end of file
diff --git a/src/main/kotlin/blitz/str/CharsToString.kt b/src/main/kotlin/blitz/str/CharsToString.kt
new file mode 100644
index 0000000..80e0b12
--- /dev/null
+++ b/src/main/kotlin/blitz/str/CharsToString.kt
@@ -0,0 +1,4 @@
+package blitz.str
+
+fun Collection.charsToString(): String =
+ String(this.toCharArray())
\ No newline at end of file