improver parser perf

This commit is contained in:
alex_s168
2024-09-19 21:55:17 +00:00
parent 39f34ee77b
commit 8c2325bdd3
15 changed files with 606 additions and 455 deletions

View File

@@ -1,10 +1,8 @@
package blitz.parse.comb2
import blitz.Either
import blitz.Provider
import blitz.*
import blitz.collections.RefVec
import blitz.collections.contents
import blitz.partiallyFlattenA
import blitz.partiallyFlattenB
import blitz.str.charsToString
data class ParseCtx<I>(
@@ -21,175 +19,185 @@ data class ParseError(
val message: String?,
)
typealias ParseResult<O> = Either<O, List<ParseError>>
typealias ParseResult<O> = Either<O, RefVec<ParseError>>
typealias Parser<I, O> = (ParseCtx<I>) -> ParseResult<O>
inline fun <I, M, O> Parser<I, M>.mapValue(crossinline fn: (M) -> O): Parser<I, O> =
{ invoke(it).mapA { fn(it) } }
inline fun <I, M: Any, O: Any> mapValue(crossinline self: Parser<I, M>, crossinline fn: (M) -> O): Parser<I, O> =
{ self(it).mapA { fn(it) } }
inline fun <I, O> Parser<I, O>.mapErrors(crossinline fn: (List<ParseError>) -> List<ParseError>): Parser<I, O> =
{ invoke(it).mapB { fn(it) } }
inline fun <I, O: Any> mapErrors(crossinline self: Parser<I, O>, crossinline fn: (RefVec<ParseError>) -> RefVec<ParseError>): Parser<I, O> =
{ self(it).mapB { fn(it) } }
fun <I, M, O> Parser<I, M>.then(other: Parser<I, O>): Parser<I, Pair<M, O>> =
inline fun <I, M: Any, O: Any> then(crossinline self: Parser<I, M>, crossinline other: Parser<I, O>): Parser<I, Pair<M, O>> =
{ ctx ->
invoke(ctx).mapA { first ->
self(ctx).flatMapA<_,_,Pair<M,O>> { first ->
other.invoke(ctx)
.mapA { first to it }
}.partiallyFlattenA()
}
}
fun <I, O, T> Parser<I, O>.thenIgnore(other: Parser<I, T>): Parser<I, O> =
inline fun <I, M: Any, O: Any> thenOverwrite(crossinline self: Parser<I, M>, crossinline other: Parser<I, O>): Parser<I, O> =
{ ctx ->
invoke(ctx).mapA { first ->
self(ctx).flatMapA<_,_,O> {
other.invoke(ctx)
}
}
inline fun <I, O: Any, T: Any> thenIgnore(crossinline self: Parser<I, O>, crossinline other: Parser<I, T>): Parser<I, O> =
{ ctx ->
self(ctx).flatMapA { first ->
other.invoke(ctx)
.mapA { first }
}.partiallyFlattenA()
}
}
fun <I, O> Parser<I, O>.orElseVal(value: O): Parser<I, O> =
orElse { Either.ofA(value) }
inline fun <I, O: Any> orElseVal(crossinline self: Parser<I, O>, value: O): Parser<I, O> =
orElse(self) { Either.ofA(value) }
fun <I, O: Any> Parser<I, O>.orNot(): Parser<I, O?> =
orElse { Either.ofA(null) }
fun <I, O, R> Parser<I, O>.orElse(other: Parser<I, R>): Parser<I, R> where O: R =
inline fun <I, O, R: Any> orElse(crossinline self: Parser<I, O>, crossinline other: Parser<I, R>): Parser<I, R> where O: R =
{
val old = it.copy()
this(it).mapB { err ->
it.loadFrom(old)
val old = it.idx
self(it).mapB { err ->
it.idx = old
other.invoke(it)
.mapB { err + it }
.mapB { err.pushBack(it); err }
}.partiallyFlattenB()
}
fun <I, O> choose(possible: Iterable<Parser<I, O>>): Parser<I, O> =
fun <I, O: Any> choose(possible: Iterable<Parser<I, O>>): Parser<I, O> =
{ ctx ->
val errors = mutableListOf<ParseError>()
val errors = RefVec<ParseError>(possible.count())
var res: O? = null
for (p in possible) {
val old = ctx.copy()
val old = ctx.idx
val t = p.invoke(ctx)
if (t.isA) {
res = t.getA()
res = t.a!!
break
} else {
ctx.loadFrom(old)
errors += t.getB()
ctx.idx = old
errors.pushBack(t.b!!)
}
}
res?.let { Either.ofA(it) }
?: Either.ofB(errors)
}
fun <I, O> choose(vararg possible: Parser<I, O>): Parser<I, O> =
fun <I, O: Any> choose(vararg possible: Parser<I, O>): Parser<I, O> =
choose(possible.toList())
fun <I, O> Parser<I, O>.repeated(): Parser<I, List<O>> =
inline fun <I, O: Any> repeated(crossinline what: Parser<I, O>): Parser<I, RefVec<O>> =
{ ctx ->
val out = mutableListOf<O>()
val out = RefVec<O>(0)
while (true) {
val old = ctx.copy()
val t = invoke(ctx)
val old = ctx.idx
val t = what(ctx)
if (t.isA) {
out += t.getA()
out.pushBack(t.a!!)
} else {
ctx.loadFrom(old)
ctx.idx = old
break
}
}
Either.ofA(out)
}
inline fun <I, O> Parser<I, O>.verifyValue(crossinline verif: (O) -> String?): Parser<I, O> =
inline fun <I, O: Any> repeatedNoSave(crossinline what: Parser<I, O>): Parser<I, Unit> =
{ ctx ->
invoke(ctx).mapA<ParseResult<O>> {
verif(it)?.let { Either.ofB(listOf(ParseError(ctx.idx, it))) }
?: Either.ofA(it)
}.partiallyFlattenA()
}
inline fun <I, O> Parser<I, Pair<IntRange, O>>.verifyValueWithSpan(crossinline fn: (O) -> String?): Parser<I, O> =
{ ctx ->
invoke(ctx).mapA<ParseResult<O>> { (span, v) ->
fn(v)?.let { Either.ofB(listOf(ParseError(span.first, it))) }
?: Either.ofA(v)
}.partiallyFlattenA()
}
fun <I, O: Any?> Parser<I, O?>.errIfNull(msg: String = "parser value was null internally"): Parser<I, O> =
verifyValue { if (it == null) msg else null }
.mapValue { it!! }
inline fun <I, O> location(crossinline fn: (Int) -> O): Parser<I, O> =
{ Either.ofA(fn(it.idx)) }
fun <I> location(): Parser<I, Int> =
location { it }
fun <I, O> withSpan(p: Parser<I, O>): Parser<I, Pair<IntRange, O>> =
location<I>()
.then(p)
.then(location())
.mapValue { (beginAndV, end) ->
(beginAndV.first..end) to beginAndV.second
}
fun <I, O> value(value: O): Parser<I, O> =
{ Either.ofA(value) }
fun <I, O> chain(parsers: List<Parser<I, O>>): Parser<I, List<O>> =
{ ctx ->
val results = mutableListOf<O>()
val errs = mutableListOf<ParseError>()
for (p in parsers) {
val r = p.invoke(ctx)
if (r.isA) {
results += r.getA()
} else {
errs += r.getB()
while (true) {
val old = ctx.idx
val t = what(ctx)
if (t.isB) {
ctx.idx = old
break
}
}
if (errs.isNotEmpty()) Either.ofB(errs)
else Either.ofA(results)
Either.ofA(Unit)
}
fun <I> seq(want: List<I>): Parser<I, List<I>> =
chain(want.map(::just))
inline fun <I> filter(msg: String, crossinline filter: (I) -> Boolean): Parser<I, I> =
inline fun <I, O: Any> verifyValue(crossinline self: Parser<I, O>, crossinline verif: (O) -> String?): Parser<I, O> =
{ ctx ->
if (ctx.idx >= ctx.input.size) {
Either.ofB(listOf(ParseError(ctx.idx, "unexpected end of file")))
} else {
val i = ctx.input[ctx.idx++]
if (filter(i)) Either.ofA(i)
else Either.ofB(listOf(ParseError(ctx.idx - 1, msg)))
self(ctx).flatMapA<_,_,_> {
verif(it)?.let { Either.ofB(RefVec.of(ParseError(ctx.idx, it))) }
?: Either.ofA(it)
}
}
fun <I> just(want: I): Parser<I, I> =
inline fun <I, O: Any> verifyValueWithSpan(crossinline self: Parser<I, Pair<IntRange, O>>, crossinline fn: (O) -> String?): Parser<I, O> =
{ ctx ->
self(ctx).flatMapA<_,_,_> { (span, v) ->
fn(v)?.let { Either.ofB(RefVec.of(ParseError(span.first, it))) }
?: Either.ofA(v)
}
}
inline fun <I, O: Any> location(crossinline fn: (Int) -> O): Parser<I, O> =
{ Either.ofA(fn(it.idx)) }
inline fun <I> location(): Parser<I, Int> =
location { it }
inline fun <I, O: Any> withSpan(crossinline p: Parser<I, O>): Parser<I, Pair<IntRange, O>> =
mapValue(then(then(location(), p), location())) { (beginAndV, end) ->
(beginAndV.first..end) to beginAndV.second
}
inline fun <I, O: Any> value(value: O): Parser<I, O> =
{ Either.ofA(value) }
fun <I, O: Any> chain(parsers: List<Parser<I, O>>): Parser<I, RefVec<O>> =
{ ctx ->
val results = RefVec<O>(parsers.size)
val errs = RefVec<ParseError>(0)
for (p in parsers) {
val r = p.invoke(ctx)
if (r.isA) {
results.pushBack(r.a!!)
} else {
errs.pushBack(r.b!!)
break
}
}
if (errs.size != 0) Either.ofB(errs)
else Either.ofA(results)
}
inline fun <I: Any> seq(want: List<I>): Parser<I, RefVec<I>> =
chain(want.map(::just))
inline fun <I: Any> filter(msg: String, crossinline filter: (I) -> Boolean): Parser<I, I> =
{ ctx ->
if (ctx.idx >= ctx.input.size) {
Either.ofB(RefVec.of(ParseError(ctx.idx, "unexpected end of file")))
} else {
val i = ctx.input[ctx.idx++]
if (filter(i)) Either.ofA(i)
else Either.ofB(RefVec.of(ParseError(ctx.idx - 1, msg)))
}
}
inline fun <I: Any> just(want: I): Parser<I, I> =
filter("expected $want") { it == want }
fun <I> oneOf(possible: Iterable<I>): Parser<I, I> =
inline fun <I: Any> oneOf(possible: Iterable<I>): Parser<I, I> =
filter("expected one of ${possible.contents}") { it in possible }
fun <I, O> future(prov: Provider<Parser<I, O>>): Parser<I, O> =
inline fun <I, O: Any> future(crossinline prov: Provider<Parser<I, O>>): Parser<I, O> =
{ prov()(it) }
inline fun <I, O> futureRec(crossinline fn: (future: Parser<I, O>) -> Parser<I, O>): Parser<I, O> {
inline fun <I, O: Any> futureRec(fn: (future: Parser<I, O>) -> Parser<I, O>): Parser<I, O> {
lateinit var f: Parser<I, O>
f = fn(future { f })
f = fn({ f(it) })
return f
}
/** group values 0 is the entire match */
fun <O> regex(pattern: Regex, fn: (groups: MatchGroupCollection) -> O): Parser<Char, O> =
fun <O: Any> regex(pattern: Regex, fn: (groups: MatchGroupCollection) -> O): Parser<Char, O> =
{ ctx ->
pattern.matchAt(ctx.input.charsToString(), ctx.idx)?.let {
ctx.idx = it.range.last + 1
Either.ofA(fn(it.groups))
} ?: Either.ofB(listOf(
} ?: Either.ofB(RefVec.of(
ParseError(ctx.idx, "regular expression \"$pattern\" does not apply")
))
}
@@ -197,7 +205,7 @@ fun <O> regex(pattern: Regex, fn: (groups: MatchGroupCollection) -> O): Parser<C
fun regex(pattern: Regex) = regex(pattern) { it[0]!!.value }
/** group values 0 is the entire match */
fun <O> regex(pattern: String, fn: (groups: MatchGroupCollection) -> O): Parser<Char, O> =
fun <O: Any> regex(pattern: String, fn: (groups: MatchGroupCollection) -> O): Parser<Char, O> =
regex(Regex(pattern), fn)
fun regex(pattern: String) = regex(pattern) { it[0]!!.value }

View File

@@ -1,67 +1,63 @@
package blitz.parse.comb2
import blitz.collections.RefVec
import blitz.str.charsToString
import kotlin.math.absoluteValue
import kotlin.math.sign
fun whitespaces(): Parser<Char, String> =
oneOf("\n\t\r\b ".toList())
.repeated()
.mapValue { it.charsToString() }
fun whitespaces(): Parser<Char, Unit> =
repeatedNoSave(oneOf("\n\t\r\b ".toList()))
fun digit(): Parser<Char, Char> =
oneOf("0123456789".toList())
fun uintLit(): Parser<Char, UInt> =
withSpan(digit().repeated())
.verifyValueWithSpan { if (it.isEmpty()) "need digits after sign in num lit" else null }
.mapValue { it.charsToString().toUInt() }
fun uintLit(): Parser<Char, RefVec<Char>> =
verifyValueWithSpan(withSpan(repeated(digit())))
{ if (it.size == 0) "need digits after sign in num lit" else null }
fun intLit(): Parser<Char, Int> =
choose(just('+').mapValue { +1 },
just('-').mapValue { -1 },
value(+1))
.then(uintLit())
.mapValue { (sign, v) -> sign * v.toInt() }
mapValue(then(choose(mapValue(just('+')) { +1 },
mapValue(just('-')) { -1 },
value(+1)),
uintLit()))
{ (sign, v) -> sign * v.charsToString().toInt() }
fun floatLit(): Parser<Char, Double> =
intLit()
.then(just('.')
.then(uintLit())
.mapValue { it.second }
.orElseVal(0u))
.mapValue { (pre, post) ->
var p = post.toDouble()
while (p.absoluteValue >= 1) {
p *= 0.1
}
(pre.toDouble().absoluteValue + p) * pre.toDouble().sign
mapValue(
then(
thenIgnore(
intLit(),
just('.')),
orElseVal(uintLit(), RefVec.of('0'))))
{ (pre, post) ->
var p = post.charsToString().toDouble()
while (p.absoluteValue >= 1) {
p *= 0.1
}
(pre.toDouble().absoluteValue + p) * pre.toDouble().sign
}
fun escapeChar(): Parser<Char, Char> =
just('\\').then(
choose(just('"'),
thenOverwrite(just('\\'),
mapErrors(choose(just('"'),
just('\''),
just('\\'),
just('n').mapValue { '\n' },
just('r').mapValue { '\r' },
just('b').mapValue { '\b' },
just('t').mapValue { '\t' })
.mapErrors { listOf(ParseError(it.first().loc, "invalid escape sequence")) }
).mapValue { it.second }
mapValue(just('n')) { '\n' },
mapValue(just('r')) { '\r' },
mapValue(just('b')) { '\b' },
mapValue(just('t')) { '\t' }))
{ RefVec.of(ParseError(it[0].loc, "invalid escape sequence")) }
)
fun stringLit(): Parser<Char, String> =
just('"')
.then(choose(escapeChar(),
filter("a") { it != '"' })
.repeated())
.thenIgnore(just('"'))
.mapValue { (_, str) -> str.charsToString() }
mapValue(thenIgnore(then(just('"'),
repeated(choose(escapeChar(),
filter("a") { it != '"' }))),
just('"')))
{ (_, str) -> str.charsToString() }
fun <I, O, T> Parser<I, O>.delimitedBy(delim: Parser<I, T>): Parser<I, List<O>> =
thenIgnore(delim)
.repeated()
.then(this)
.mapValue { (a, b) -> a + b }
.orElse(value(listOf()))
inline fun <I, O: Any, T: Any> delimitedBy(crossinline self: Parser<I, O>, crossinline delim: Parser<I, T>): Parser<I, RefVec<O>> =
orElse(mapValue(then(repeated(thenIgnore(self, delim)), self))
{ (a, b) -> a.pushBack(b); a },
value(RefVec.of()))