fix parser combinator

This commit is contained in:
alex_s168
2024-09-19 19:48:27 +00:00
parent 8bbb4bc486
commit 39f34ee77b
6 changed files with 205 additions and 75 deletions

View File

@@ -62,11 +62,11 @@ class Either<A, B> private constructor(
fun <A, B, R> Either<A, B>.flatten(): R where A: R, B: R = fun <A, B, R> Either<A, B>.flatten(): R where A: R, B: R =
getAOrNull() ?: getB() getAOrNull() ?: getB()
fun <A, B> Either<A, Either<A, B>>.partiallyFlatten(): Either<A, B> = fun <A, A2, B> Either<A, Either<A2, B>>.partiallyFlattenB(): Either<A2, B> where A: A2 =
mapA<Either<A, B>> { Either.ofA(it) }.flatten() mapA<Either<A2, B>> { Either.ofA(it) }.flatten()
fun <A, B> Either<Either<A, B>, B>.partiallyFlatten(): Either<A, B> = fun <A, B, B2> Either<Either<A, B2>, B>.partiallyFlattenA(): Either<A, B2> where B: B2 =
mapB<Either<A, B>> { Either.ofB(it) }.flatten() mapB<Either<A, B2>> { Either.ofB(it) }.flatten()
fun <A, BA, BB, BAN> Either<A, Either<BA, BB>>.mapBA(fn: (BA) -> BAN): Either<A, Either<BAN, BB>> = fun <A, BA, BB, BAN> Either<A, Either<BA, BB>>.mapBA(fn: (BA) -> BAN): Either<A, Either<BAN, BB>> =
mapB { it.mapA(fn) } mapB { it.mapA(fn) }

View File

@@ -30,7 +30,7 @@ fun <I, O> Obj<I>.map(transform: (I) -> O): Obj<O> =
interface MutObj<T> { interface MutObj<T> {
var v: T var v: T
inline fun modify(fn: (T) -> T) { fun modify(fn: (T) -> T) {
v = fn(v) v = fn(v)
} }

View File

@@ -1,55 +1,57 @@
package blitz.parse package blitz.parse
import blitz.parse.comb.* import blitz.parse.comb2.*
object JSON { object JSON {
lateinit var jsonElement: Parser<Element> val jsonElement = futureRec { jsonElement: Parser<Char, Element> ->
val jsonNum = parser { val jsonNum: Parser<Char, Element> = floatLit()
it.map(NumParse.float)?.mapSecond { n -> .mapValue(::Number)
Number(n)
}
}
val jsonString = parser { val jsonString: Parser<Char, Element> = stringLit()
it.stringWithEscape() .mapValue(::Str)
?.mapSecond { Str(it) }
}
val jsonArray = parser { val jsonArray: Parser<Char, Element> = just('[')
it.require("[") .then(jsonElement
?.array(",") { elem -> .delimitedBy(just(','))
elem.whitespaces() .mapValue(::Array))
.map(jsonElement) .thenIgnore(whitespaces())
?.whitespaces() .thenIgnore(just(']'))
} .mapValue { it.second }
?.require("]")
?.mapSecond { x -> Array(x) }
}
val jsonBool = parser { it.require("true")?.to(Bool(true)) } or val jsonBool: Parser<Char, Element> = choose(
parser { it.require("false")?.to(Bool(false)) } seq("true".toList()).mapValue { Bool(true) },
seq("false".toList()).mapValue { Bool(false) },
)
val jsonNull = parser { it.require("null")?.to(Nul()) } val jsonNull: Parser<Char, Element> = seq("null".toList())
.mapValue { Nul() }
val jsonObj = parser { val jsonObj: Parser<Char, Element> = just('{')
it.require("{") .then(
?.array(",") { elem -> whitespaces()
elem.whitespaces() .then(stringLit())
.map(jsonString) .mapValue { it.second }
?.mapSecond { it.str } .thenIgnore(whitespaces())
?.whitespaces() .thenIgnore(just(':'))
?.require(":") .then(jsonElement)
?.whitespaces() .delimitedBy(just(',')))
?.map(jsonElement) .thenIgnore(whitespaces())
?.whitespaces() .thenIgnore(just('}'))
} .mapValue { Obj(it.second.toMap()) }
?.require("}")
?.mapSecond { x -> Obj(x.toMap()) } whitespaces()
} .then(choose(
jsonArray,
jsonNum,
jsonString,
jsonObj,
jsonBool,
jsonNull
))
.thenIgnore(whitespaces())
.mapValue { it.second }
init {
jsonElement = (jsonArray or jsonNum or jsonString or jsonObj or jsonBool or jsonNull).trim()
} }
interface Element { interface Element {
@@ -95,6 +97,6 @@ object JSON {
class Nul: Element class Nul: Element
fun parse(string: String): Element? = fun parse(string: String): ParseResult<Element> =
jsonElement(Parsable(string))?.second jsonElement(ParseCtx(string.toList(), 0))
} }

View File

@@ -1,7 +1,11 @@
package blitz.parse.comb2 package blitz.parse.comb2
import blitz.Either import blitz.Either
import blitz.partiallyFlatten import blitz.Provider
import blitz.collections.contents
import blitz.partiallyFlattenA
import blitz.partiallyFlattenB
import blitz.str.charsToString
data class ParseCtx<I>( data class ParseCtx<I>(
val input: List<I>, val input: List<I>,
@@ -31,7 +35,7 @@ fun <I, M, O> Parser<I, M>.then(other: Parser<I, O>): Parser<I, Pair<M, O>> =
invoke(ctx).mapA { first -> invoke(ctx).mapA { first ->
other.invoke(ctx) other.invoke(ctx)
.mapA { first to it } .mapA { first to it }
}.partiallyFlatten() }.partiallyFlattenA()
} }
fun <I, O, T> Parser<I, O>.thenIgnore(other: Parser<I, T>): Parser<I, O> = fun <I, O, T> Parser<I, O>.thenIgnore(other: Parser<I, T>): Parser<I, O> =
@@ -39,23 +43,50 @@ fun <I, O, T> Parser<I, O>.thenIgnore(other: Parser<I, T>): Parser<I, O> =
invoke(ctx).mapA { first -> invoke(ctx).mapA { first ->
other.invoke(ctx) other.invoke(ctx)
.mapA { first } .mapA { first }
}.partiallyFlatten() }.partiallyFlattenA()
} }
fun <I, O> Parser<I, O>.orElse(other: Parser<I, O>): Parser<I, O> = fun <I, O> Parser<I, O>.orElseVal(value: O): Parser<I, O> =
orElse { Either.ofA(value) }
fun <I, O: Any> Parser<I, O>.orNot(): Parser<I, O?> =
orElse { Either.ofA(null) }
fun <I, O, R> Parser<I, O>.orElse(other: Parser<I, R>): Parser<I, R> where O: R =
{ {
val old = it.copy() val old = it.copy()
this(it).mapB { err -> this(it).mapB { err ->
it.loadFrom(old) it.loadFrom(old)
other.invoke(it) other.invoke(it)
.mapB { err + it } .mapB { err + it }
}.partiallyFlatten() }.partiallyFlattenB()
} }
fun <I, O> choose(possible: Iterable<Parser<I, O>>): Parser<I, O> =
{ ctx ->
val errors = mutableListOf<ParseError>()
var res: O? = null
for (p in possible) {
val old = ctx.copy()
val t = p.invoke(ctx)
if (t.isA) {
res = t.getA()
break
} else {
ctx.loadFrom(old)
errors += t.getB()
}
}
res?.let { Either.ofA(it) }
?: Either.ofB(errors)
}
fun <I, O> choose(vararg possible: Parser<I, O>): Parser<I, O> =
choose(possible.toList())
fun <I, O> Parser<I, O>.repeated(): Parser<I, List<O>> = fun <I, O> Parser<I, O>.repeated(): Parser<I, List<O>> =
{ ctx -> { ctx ->
val out = mutableListOf<O>() val out = mutableListOf<O>()
var ret: List<ParseError>? = null
while (true) { while (true) {
val old = ctx.copy() val old = ctx.copy()
val t = invoke(ctx) val t = invoke(ctx)
@@ -63,28 +94,18 @@ fun <I, O> Parser<I, O>.repeated(): Parser<I, List<O>> =
out += t.getA() out += t.getA()
} else { } else {
ctx.loadFrom(old) ctx.loadFrom(old)
ret = t.getB()
break break
} }
} }
if (ret == null) { Either.ofA(out)
Either.ofA(out)
} else Either.ofB(ret)
} }
fun <I, O> Parser<I, O>.delimitedBy(delim: Parser<I, O>): Parser<I, List<O>> =
thenIgnore(delim)
.repeated()
.then(this)
.mapValue { (a, b) -> a + b }
.orElse(value(listOf()))
inline fun <I, O> Parser<I, O>.verifyValue(crossinline verif: (O) -> String?): Parser<I, O> = inline fun <I, O> Parser<I, O>.verifyValue(crossinline verif: (O) -> String?): Parser<I, O> =
{ ctx -> { ctx ->
invoke(ctx).mapA<ParseResult<O>> { invoke(ctx).mapA<ParseResult<O>> {
verif(it)?.let { Either.ofB(listOf(ParseError(ctx.idx, it))) } verif(it)?.let { Either.ofB(listOf(ParseError(ctx.idx, it))) }
?: Either.ofA(it) ?: Either.ofA(it)
}.partiallyFlatten() }.partiallyFlattenA()
} }
inline fun <I, O> Parser<I, Pair<IntRange, O>>.verifyValueWithSpan(crossinline fn: (O) -> String?): Parser<I, O> = inline fun <I, O> Parser<I, Pair<IntRange, O>>.verifyValueWithSpan(crossinline fn: (O) -> String?): Parser<I, O> =
@@ -92,7 +113,7 @@ inline fun <I, O> Parser<I, Pair<IntRange, O>>.verifyValueWithSpan(crossinline f
invoke(ctx).mapA<ParseResult<O>> { (span, v) -> invoke(ctx).mapA<ParseResult<O>> { (span, v) ->
fn(v)?.let { Either.ofB(listOf(ParseError(span.first, it))) } fn(v)?.let { Either.ofB(listOf(ParseError(span.first, it))) }
?: Either.ofA(v) ?: Either.ofA(v)
}.partiallyFlatten() }.partiallyFlattenA()
} }
fun <I, O: Any?> Parser<I, O?>.errIfNull(msg: String = "parser value was null internally"): Parser<I, O> = fun <I, O: Any?> Parser<I, O?>.errIfNull(msg: String = "parser value was null internally"): Parser<I, O> =
@@ -116,20 +137,56 @@ fun <I, O> withSpan(p: Parser<I, O>): Parser<I, Pair<IntRange, O>> =
fun <I, O> value(value: O): Parser<I, O> = fun <I, O> value(value: O): Parser<I, O> =
{ Either.ofA(value) } { Either.ofA(value) }
fun whitespaces(): Parser<Char, String> = fun <I, O> chain(parsers: List<Parser<I, O>>): Parser<I, List<O>> =
regex("\\s+") { ctx ->
val results = mutableListOf<O>()
val errs = mutableListOf<ParseError>()
for (p in parsers) {
val r = p.invoke(ctx)
if (r.isA) {
results += r.getA()
} else {
errs += r.getB()
break
}
}
if (errs.isNotEmpty()) Either.ofB(errs)
else Either.ofA(results)
}
fun <I> seq(want: List<I>): Parser<I, List<I>> =
chain(want.map(::just))
inline fun <I> filter(msg: String, crossinline filter: (I) -> Boolean): Parser<I, I> =
{ ctx ->
if (ctx.idx >= ctx.input.size) {
Either.ofB(listOf(ParseError(ctx.idx, "unexpected end of file")))
} else {
val i = ctx.input[ctx.idx++]
if (filter(i)) Either.ofA(i)
else Either.ofB(listOf(ParseError(ctx.idx - 1, msg)))
}
}
fun <I> just(want: I): Parser<I, I> = fun <I> just(want: I): Parser<I, I> =
{ ctx -> filter("expected $want") { it == want }
val i = ctx.input[ctx.idx ++]
if (i == want) Either.ofA(i) fun <I> oneOf(possible: Iterable<I>): Parser<I, I> =
else Either.ofB(listOf(ParseError(ctx.idx - 1, "expected $want"))) filter("expected one of ${possible.contents}") { it in possible }
}
fun <I, O> future(prov: Provider<Parser<I, O>>): Parser<I, O> =
{ prov()(it) }
inline fun <I, O> futureRec(crossinline fn: (future: Parser<I, O>) -> Parser<I, O>): Parser<I, O> {
lateinit var f: Parser<I, O>
f = fn(future { f })
return f
}
/** group values 0 is the entire match */ /** group values 0 is the entire match */
fun <O> regex(pattern: Regex, fn: (groups: MatchGroupCollection) -> O): Parser<Char, O> = fun <O> regex(pattern: Regex, fn: (groups: MatchGroupCollection) -> O): Parser<Char, O> =
{ ctx -> { ctx ->
pattern.matchAt(ctx.input.toString(), ctx.idx)?.let { pattern.matchAt(ctx.input.charsToString(), ctx.idx)?.let {
ctx.idx = it.range.last + 1 ctx.idx = it.range.last + 1
Either.ofA(fn(it.groups)) Either.ofA(fn(it.groups))
} ?: Either.ofB(listOf( } ?: Either.ofB(listOf(

View File

@@ -0,0 +1,67 @@
package blitz.parse.comb2
import blitz.str.charsToString
import kotlin.math.absoluteValue
import kotlin.math.sign
fun whitespaces(): Parser<Char, String> =
oneOf("\n\t\r\b ".toList())
.repeated()
.mapValue { it.charsToString() }
fun digit(): Parser<Char, Char> =
oneOf("0123456789".toList())
fun uintLit(): Parser<Char, UInt> =
withSpan(digit().repeated())
.verifyValueWithSpan { if (it.isEmpty()) "need digits after sign in num lit" else null }
.mapValue { it.charsToString().toUInt() }
fun intLit(): Parser<Char, Int> =
choose(just('+').mapValue { +1 },
just('-').mapValue { -1 },
value(+1))
.then(uintLit())
.mapValue { (sign, v) -> sign * v.toInt() }
fun floatLit(): Parser<Char, Double> =
intLit()
.then(just('.')
.then(uintLit())
.mapValue { it.second }
.orElseVal(0u))
.mapValue { (pre, post) ->
var p = post.toDouble()
while (p.absoluteValue >= 1) {
p *= 0.1
}
(pre.toDouble().absoluteValue + p) * pre.toDouble().sign
}
fun escapeChar(): Parser<Char, Char> =
just('\\').then(
choose(just('"'),
just('\''),
just('\\'),
just('n').mapValue { '\n' },
just('r').mapValue { '\r' },
just('b').mapValue { '\b' },
just('t').mapValue { '\t' })
.mapErrors { listOf(ParseError(it.first().loc, "invalid escape sequence")) }
).mapValue { it.second }
fun stringLit(): Parser<Char, String> =
just('"')
.then(choose(escapeChar(),
filter("a") { it != '"' })
.repeated())
.thenIgnore(just('"'))
.mapValue { (_, str) -> str.charsToString() }
fun <I, O, T> Parser<I, O>.delimitedBy(delim: Parser<I, T>): Parser<I, List<O>> =
thenIgnore(delim)
.repeated()
.then(this)
.mapValue { (a, b) -> a + b }
.orElse(value(listOf()))

View File

@@ -0,0 +1,4 @@
package blitz.str
fun Collection<Char>.charsToString(): String =
String(this.toCharArray())