improver parser perf
This commit is contained in:
@@ -1,56 +1,65 @@
|
||||
package blitz.parse
|
||||
|
||||
import blitz.parse.comb2.*
|
||||
import org.json.JSONObject
|
||||
import kotlin.math.min
|
||||
import kotlin.system.measureNanoTime
|
||||
|
||||
object JSON {
|
||||
val jsonElement = futureRec { jsonElement: Parser<Char, Element> ->
|
||||
|
||||
val jsonNum: Parser<Char, Element> = floatLit()
|
||||
.mapValue(::Number)
|
||||
val jsonNum: Parser<Char, Element> =
|
||||
mapValue(floatLit(), ::Number)
|
||||
|
||||
val jsonString: Parser<Char, Element> = stringLit()
|
||||
.mapValue(::Str)
|
||||
val jsonString: Parser<Char, Element> =
|
||||
mapValue(stringLit(), ::Str)
|
||||
|
||||
val jsonArray: Parser<Char, Element> = just('[')
|
||||
.then(jsonElement
|
||||
.delimitedBy(just(','))
|
||||
.mapValue(::Array))
|
||||
.thenIgnore(whitespaces())
|
||||
.thenIgnore(just(']'))
|
||||
.mapValue { it.second }
|
||||
val jsonArray: Parser<Char, Element> =
|
||||
thenIgnore(
|
||||
thenIgnore(
|
||||
thenOverwrite(just('['),
|
||||
mapValue(delimitedBy(jsonElement, just(',')))
|
||||
{ Array(it.toList())}),
|
||||
whitespaces()),
|
||||
just(']')
|
||||
)
|
||||
|
||||
val jsonBool: Parser<Char, Element> = choose(
|
||||
seq("true".toList()).mapValue { Bool(true) },
|
||||
seq("false".toList()).mapValue { Bool(false) },
|
||||
mapValue(seq("true".toList())) { Bool(true) },
|
||||
mapValue(seq("false".toList())) { Bool(false) },
|
||||
)
|
||||
|
||||
val jsonNull: Parser<Char, Element> = seq("null".toList())
|
||||
.mapValue { Nul() }
|
||||
val jsonNull: Parser<Char, Element> =
|
||||
mapValue(seq("null".toList())) { Nul() }
|
||||
|
||||
val jsonObj: Parser<Char, Element> = just('{')
|
||||
.then(
|
||||
whitespaces()
|
||||
.then(stringLit())
|
||||
.mapValue { it.second }
|
||||
.thenIgnore(whitespaces())
|
||||
.thenIgnore(just(':'))
|
||||
.then(jsonElement)
|
||||
.delimitedBy(just(',')))
|
||||
.thenIgnore(whitespaces())
|
||||
.thenIgnore(just('}'))
|
||||
.mapValue { Obj(it.second.toMap()) }
|
||||
val jsonObj: Parser<Char, Element> =
|
||||
mapValue(thenIgnore(thenIgnore(thenOverwrite(
|
||||
just('{'),
|
||||
delimitedBy(
|
||||
then(
|
||||
thenIgnore(
|
||||
thenIgnore(
|
||||
thenOverwrite(
|
||||
whitespaces(),
|
||||
stringLit()),
|
||||
whitespaces()),
|
||||
just(':')),
|
||||
jsonElement),
|
||||
just(','))),
|
||||
whitespaces()),
|
||||
just('}'))) { Obj(it.toMap()) }
|
||||
|
||||
whitespaces()
|
||||
.then(choose(
|
||||
thenIgnore(thenOverwrite(
|
||||
whitespaces(),
|
||||
choose(
|
||||
jsonArray,
|
||||
jsonNum,
|
||||
jsonString,
|
||||
jsonObj,
|
||||
jsonBool,
|
||||
jsonNull
|
||||
))
|
||||
.thenIgnore(whitespaces())
|
||||
.mapValue { it.second }
|
||||
)),
|
||||
whitespaces())
|
||||
|
||||
}
|
||||
|
||||
@@ -99,4 +108,206 @@ object JSON {
|
||||
|
||||
fun parse(string: String): ParseResult<Element> =
|
||||
jsonElement(ParseCtx(string.toList(), 0))
|
||||
}
|
||||
|
||||
fun main() {
|
||||
val json = """
|
||||
{
|
||||
"clinical_study": {
|
||||
"brief_summary": {
|
||||
"textblock": "CLEAR SYNERGY is an international multi center 2x2 randomized placebo controlled trial of"
|
||||
},
|
||||
"brief_title": "CLEAR SYNERGY Neutrophil Substudy",
|
||||
"overall_status": "Recruiting",
|
||||
"eligibility": {
|
||||
"study_pop": {
|
||||
"textblock": "Patients who are randomized to the drug RCT portion of the CLEAR SYNERGY (OASIS 9) trial"
|
||||
},
|
||||
"minimum_age": "19 Years",
|
||||
"sampling_method": "Non-Probability Sample",
|
||||
"gender": "All",
|
||||
"criteria": {
|
||||
"textblock": "Inclusion Criteria:"
|
||||
},
|
||||
"healthy_volunteers": "No",
|
||||
"maximum_age": "110 Years"
|
||||
},
|
||||
"number_of_groups": "2",
|
||||
"source": "NYU Langone Health",
|
||||
"location_countries": {
|
||||
"country": "United States"
|
||||
},
|
||||
"study_design_info": {
|
||||
"time_perspective": "Prospective",
|
||||
"observational_model": "Other"
|
||||
},
|
||||
"last_update_submitted_qc": "September 10, 2019",
|
||||
"intervention_browse": {
|
||||
"mesh_term": "Colchicine"
|
||||
},
|
||||
"official_title": "Studies on the Effects of Colchicine on Neutrophil Biology in Acute Myocardial Infarction: A Substudy of the CLEAR SYNERGY (OASIS 9) Trial",
|
||||
"primary_completion_date": {
|
||||
"type": "Anticipated",
|
||||
"content": "February 1, 2021"
|
||||
},
|
||||
"sponsors": {
|
||||
"lead_sponsor": {
|
||||
"agency_class": "Other",
|
||||
"agency": "NYU Langone Health"
|
||||
},
|
||||
"collaborator": [
|
||||
{
|
||||
"agency_class": "Other",
|
||||
"agency": "Population Health Research Institute"
|
||||
},
|
||||
{
|
||||
"agency_class": "NIH",
|
||||
"agency": "National Heart, Lung, and Blood Institute (NHLBI)"
|
||||
}
|
||||
]
|
||||
},
|
||||
"overall_official": {
|
||||
"role": "Principal Investigator",
|
||||
"affiliation": "NYU School of Medicine",
|
||||
"last_name": "Binita Shah, MD"
|
||||
},
|
||||
"overall_contact_backup": {
|
||||
"last_name": "Binita Shah, MD"
|
||||
},
|
||||
"condition_browse": {
|
||||
"mesh_term": [
|
||||
"Myocardial Infarction",
|
||||
"ST Elevation Myocardial Infarction",
|
||||
"Infarction"
|
||||
]
|
||||
},
|
||||
"overall_contact": {
|
||||
"phone": "646-501-9648",
|
||||
"last_name": "Fatmira Curovic",
|
||||
"email": "fatmira.curovic@nyumc.org"
|
||||
},
|
||||
"responsible_party": {
|
||||
"responsible_party_type": "Principal Investigator",
|
||||
"investigator_title": "Assistant Professor of Medicine",
|
||||
"investigator_full_name": "Binita Shah",
|
||||
"investigator_affiliation": "NYU Langone Health"
|
||||
},
|
||||
"study_first_submitted_qc": "March 12, 2019",
|
||||
"start_date": {
|
||||
"type": "Actual",
|
||||
"content": "March 4, 2019"
|
||||
},
|
||||
"has_expanded_access": "No",
|
||||
"study_first_posted": {
|
||||
"type": "Actual",
|
||||
"content": "March 14, 2019"
|
||||
},
|
||||
"arm_group": [
|
||||
{
|
||||
"arm_group_label": "Colchicine"
|
||||
},
|
||||
{
|
||||
"arm_group_label": "Placebo"
|
||||
}
|
||||
],
|
||||
"primary_outcome": {
|
||||
"measure": "soluble L-selectin",
|
||||
"time_frame": "between baseline and 3 months",
|
||||
"description": "Change in soluble L-selectin between baseline and 3 mo after STEMI in the placebo vs. colchicine groups."
|
||||
},
|
||||
"secondary_outcome": [
|
||||
{
|
||||
"measure": "Other soluble markers of neutrophil activity",
|
||||
"time_frame": "between baseline and 3 months",
|
||||
"description": "Other markers of neutrophil activity will be evaluated at baseline and 3 months after STEMI (myeloperoxidase, matrix metalloproteinase-9, neutrophil gelatinase-associated lipocalin, neutrophil elastase, intercellular/vascular cellular adhesion molecules)"
|
||||
},
|
||||
{
|
||||
"measure": "Markers of systemic inflammation",
|
||||
"time_frame": "between baseline and 3 months",
|
||||
"description": "Markers of systemic inflammation will be evaluated at baseline and 3 months after STEMI (high sensitive CRP, IL-1β)"
|
||||
},
|
||||
{
|
||||
"measure": "Neutrophil-driven responses that may further propagate injury",
|
||||
"time_frame": "between baseline and 3 months",
|
||||
"description": "Neutrophil-driven responses that may further propagate injury will be evaluated at baseline and 3 months after STEMI (neutrophil extracellular traps, neutrophil-derived microparticles)"
|
||||
}
|
||||
],
|
||||
"oversight_info": {
|
||||
"is_fda_regulated_drug": "No",
|
||||
"is_fda_regulated_device": "No",
|
||||
"has_dmc": "No"
|
||||
},
|
||||
"last_update_posted": {
|
||||
"type": "Actual",
|
||||
"content": "September 12, 2019"
|
||||
},
|
||||
"id_info": {
|
||||
"nct_id": "NCT03874338",
|
||||
"org_study_id": "18-01323",
|
||||
"secondary_id": "1R01HL146206"
|
||||
},
|
||||
"enrollment": {
|
||||
"type": "Anticipated",
|
||||
"content": "670"
|
||||
},
|
||||
"study_first_submitted": "March 12, 2019",
|
||||
"condition": [
|
||||
"Neutrophils.Hypersegmented | Bld-Ser-Plas",
|
||||
"STEMI - ST Elevation Myocardial Infarction"
|
||||
],
|
||||
"study_type": "Observational",
|
||||
"required_header": {
|
||||
"download_date": "ClinicalTrials.gov processed this data on July 19, 2020",
|
||||
"link_text": "Link to the current ClinicalTrials.gov record.",
|
||||
"url": "https://clinicaltrials.gov/show/NCT03874338"
|
||||
},
|
||||
"last_update_submitted": "September 10, 2019",
|
||||
"completion_date": {
|
||||
"type": "Anticipated",
|
||||
"content": "February 1, 2022"
|
||||
},
|
||||
"location": {
|
||||
"contact": {
|
||||
"phone": "646-501-9648",
|
||||
"last_name": "Fatmira Curovic",
|
||||
"email": "fatmira.curovic@nyumc.org"
|
||||
},
|
||||
"facility": {
|
||||
"address": {
|
||||
"zip": "10016",
|
||||
"country": "United States",
|
||||
"city": "New York",
|
||||
"state": "New York"
|
||||
},
|
||||
"name": "NYU School of Medicine"
|
||||
},
|
||||
"status": "Recruiting",
|
||||
"contact_backup": {
|
||||
"last_name": "Binita Shah, MD"
|
||||
}
|
||||
},
|
||||
"intervention": {
|
||||
"intervention_type": "Drug",
|
||||
"arm_group_label": [
|
||||
"Colchicine",
|
||||
"Placebo"
|
||||
],
|
||||
"description": "Participants in the main CLEAR SYNERGY trial are randomized to colchicine/spironolactone versus placebo in a 2x2 factorial design. The substudy is interested in the evaluation of biospecimens obtained from patients in the colchicine vs placebo group.",
|
||||
"intervention_name": "Colchicine Pill"
|
||||
},
|
||||
"patient_data": {
|
||||
"sharing_ipd": "No"
|
||||
},
|
||||
"verification_date": "September 2019"
|
||||
}
|
||||
}
|
||||
""".trimIndent()
|
||||
|
||||
var minAlex = Long.MAX_VALUE
|
||||
var minJson = Long.MAX_VALUE
|
||||
while (true) {
|
||||
minAlex = min(measureNanoTime { JSON.parse(json).a!! }, minAlex)
|
||||
minJson = min(measureNanoTime { JSONObject(json) }, minJson)
|
||||
println("alex: $minAlex ns, json-java: $minJson ns ; alex is ${ minJson.toFloat() / minAlex.toFloat() } times as fast as json-java")
|
||||
}
|
||||
}
|
@@ -1,43 +0,0 @@
|
||||
package blitz.parse
|
||||
|
||||
import blitz.parse.comb.*
|
||||
|
||||
object NumParse {
|
||||
private val intBase = parser { it.require("0b")?.to(2) } or
|
||||
parser { it.require("0x")?.to(16) } or
|
||||
parser { it.require("0o")?.to(8) } or
|
||||
constantParser(10)
|
||||
|
||||
private val sign = parser { it.require("+")?.to(1) } or
|
||||
parser { it.require("-")?.to(-1) } or
|
||||
constantParser(1)
|
||||
|
||||
val int = parser { s ->
|
||||
s.map(sign)?.map(intBase)?.map { str, (sign, base) ->
|
||||
val chars = when (base) {
|
||||
2 -> "01"
|
||||
8 -> "01234567"
|
||||
10 -> "0123456789"
|
||||
16 -> "0123456789abcdefABCDEF"
|
||||
else -> error("wtf")
|
||||
}
|
||||
str.asLongAs(*chars.toCharArray()) {
|
||||
it.toLongOrNull(base)?.times(sign)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
val float = parser { s ->
|
||||
s.map(sign)?.map { str, sign ->
|
||||
str.asLongAs('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.') {
|
||||
it.toDoubleOrNull()?.times(sign)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun parseInt(str: String): Long? =
|
||||
NumParse.int(Parsable(str))?.second
|
||||
|
||||
fun parseDouble(str: String): Double? =
|
||||
NumParse.float(Parsable(str))?.second
|
@@ -1,109 +0,0 @@
|
||||
package blitz.parse.comb
|
||||
|
||||
import blitz.str.collectToString
|
||||
|
||||
data class Parsable(
|
||||
val str: String,
|
||||
val loc: Int? = null
|
||||
)
|
||||
|
||||
typealias Parser<T> = (Parsable) -> Pair<Parsable, T>?
|
||||
|
||||
fun <T> parser(fn: (Parsable) -> Pair<Parsable, T>?): Parser<T> =
|
||||
fn
|
||||
|
||||
fun <T> Parser<T>.trim(): Parser<T> = parser {
|
||||
it.whitespaces()
|
||||
.map(this@trim)
|
||||
?.whitespaces()
|
||||
}
|
||||
|
||||
fun <T> constantParser(const: T): Parser<T> = { it to const }
|
||||
|
||||
infix fun <T> Parser<T>.or(other: Parser<T>): Parser<T> = {
|
||||
this@or(it) ?: other(it)
|
||||
}
|
||||
|
||||
fun Parsable.spaces(): Parsable {
|
||||
val new = str.trimStart(' ')
|
||||
return Parsable(new, loc?.let { it + str.length - new.length })
|
||||
}
|
||||
|
||||
fun Parsable.whitespaces(): Parsable {
|
||||
val new = str.trimStart()
|
||||
return Parsable(new, loc?.let { it + str.length - new.length })
|
||||
}
|
||||
|
||||
fun Parsable.require(what: String): Parsable? {
|
||||
if (str.startsWith(what))
|
||||
return Parsable(str.substring(what.length), loc?.let { it + what.length })
|
||||
return null
|
||||
}
|
||||
|
||||
fun <T> Parsable.untilRequire(c: String, map: (String) -> T?): Pair<Parsable, T>? {
|
||||
val before = str.substringBefore(c)
|
||||
return map(before)?.let { Parsable(str.substringAfter(c), loc?.let { it + before.length }) to it }
|
||||
}
|
||||
|
||||
fun <T> Parsable.asLongAs(vararg li: Char, map: (String) -> T?): Pair<Parsable, T>? {
|
||||
val o = mutableListOf<Char>()
|
||||
for (c in str) {
|
||||
if (c in li)
|
||||
o.add(c)
|
||||
else
|
||||
break
|
||||
}
|
||||
val out = str.substring(o.size)
|
||||
return map(o.iterator().collectToString())?.let { Parsable(out, loc?.plus(o.size)) to it }
|
||||
}
|
||||
|
||||
fun <T> Parsable.map(parser: Parser<T>): Pair<Parsable, T>? =
|
||||
parser(this)
|
||||
|
||||
fun <T, R> Pair<Parsable, T>.map(fn: (Parsable, T) -> Pair<Parsable, R>?): Pair<Parsable, R>? =
|
||||
fn(first, second)
|
||||
|
||||
fun <A, B> Pair<Parsable, A>.map(parser: Parser<B>): Pair<Parsable, Pair<A, B>>? =
|
||||
map { parsable, a ->
|
||||
parser(parsable)?.let { r ->
|
||||
r.first to (a to r.second)
|
||||
}
|
||||
}
|
||||
|
||||
fun <T> Pair<Parsable, T>.mapFirst(fn: (Parsable) -> Parsable): Pair<Parsable, T> =
|
||||
fn(first) to second
|
||||
|
||||
fun <T> Pair<Parsable, T>.mapFirstNullable(fn: (Parsable) -> Parsable?): Pair<Parsable, T>? =
|
||||
fn(first)?.let { it to second }
|
||||
|
||||
fun <T, R> Pair<Parsable, T>.mapSecond(fn: (T) -> R): Pair<Parsable, R> =
|
||||
first to fn(second)
|
||||
|
||||
fun <T> Pair<Parsable, T>.spaces(): Pair<Parsable, T> =
|
||||
mapFirst { it.spaces() }
|
||||
|
||||
fun <T> Pair<Parsable, T>.whitespaces(): Pair<Parsable, T> =
|
||||
mapFirst { it.whitespaces() }
|
||||
|
||||
fun <T> Pair<Parsable, T>.require(what: String): Pair<Parsable, T>? =
|
||||
mapFirstNullable { it.require(what) }
|
||||
|
||||
fun <T> Parsable.array(sep: String, map: (Parsable) -> Pair<Parsable, T>?): Pair<Parsable, List<T>> {
|
||||
val out = mutableListOf<T>()
|
||||
|
||||
var loc = loc
|
||||
var curr = str
|
||||
fun step() =
|
||||
map(Parsable(curr, loc))?.also {
|
||||
curr = it.first.str
|
||||
loc = it.first.loc
|
||||
}
|
||||
|
||||
while (true) {
|
||||
val r = step() ?: break
|
||||
out.add(r.second)
|
||||
curr = (Parsable(curr, loc).require(sep) ?: break).str
|
||||
}
|
||||
|
||||
return Parsable(curr, loc) to out
|
||||
}
|
@@ -1,36 +0,0 @@
|
||||
package blitz.parse.comb
|
||||
|
||||
fun Parsable.stringWithEscape(): Pair<Parsable, String>? {
|
||||
var escaped = false
|
||||
var index = 0
|
||||
val out = StringBuilder()
|
||||
for (c in str) {
|
||||
if (index == 0) {
|
||||
if (c != '"')
|
||||
return null
|
||||
} else {
|
||||
if (escaped) {
|
||||
escaped = false
|
||||
when (c) {
|
||||
'"' -> out.append('"')
|
||||
'\\' -> out.append('\\')
|
||||
'n' -> out.append('\n')
|
||||
'r' -> out.append('\r')
|
||||
'b' -> out.append('\b')
|
||||
't' -> out.append('\t')
|
||||
else -> return null
|
||||
}
|
||||
} else if (c == '"')
|
||||
break
|
||||
else if (c == '\\')
|
||||
escaped = true
|
||||
else {
|
||||
out.append(c)
|
||||
}
|
||||
}
|
||||
index ++
|
||||
}
|
||||
if (escaped)
|
||||
return null
|
||||
return Parsable(str.substring(index + 1), loc?.plus(index + 1)) to out.toString()
|
||||
}
|
@@ -1,10 +1,8 @@
|
||||
package blitz.parse.comb2
|
||||
|
||||
import blitz.Either
|
||||
import blitz.Provider
|
||||
import blitz.*
|
||||
import blitz.collections.RefVec
|
||||
import blitz.collections.contents
|
||||
import blitz.partiallyFlattenA
|
||||
import blitz.partiallyFlattenB
|
||||
import blitz.str.charsToString
|
||||
|
||||
data class ParseCtx<I>(
|
||||
@@ -21,175 +19,185 @@ data class ParseError(
|
||||
val message: String?,
|
||||
)
|
||||
|
||||
typealias ParseResult<O> = Either<O, List<ParseError>>
|
||||
typealias ParseResult<O> = Either<O, RefVec<ParseError>>
|
||||
typealias Parser<I, O> = (ParseCtx<I>) -> ParseResult<O>
|
||||
|
||||
inline fun <I, M, O> Parser<I, M>.mapValue(crossinline fn: (M) -> O): Parser<I, O> =
|
||||
{ invoke(it).mapA { fn(it) } }
|
||||
inline fun <I, M: Any, O: Any> mapValue(crossinline self: Parser<I, M>, crossinline fn: (M) -> O): Parser<I, O> =
|
||||
{ self(it).mapA { fn(it) } }
|
||||
|
||||
inline fun <I, O> Parser<I, O>.mapErrors(crossinline fn: (List<ParseError>) -> List<ParseError>): Parser<I, O> =
|
||||
{ invoke(it).mapB { fn(it) } }
|
||||
inline fun <I, O: Any> mapErrors(crossinline self: Parser<I, O>, crossinline fn: (RefVec<ParseError>) -> RefVec<ParseError>): Parser<I, O> =
|
||||
{ self(it).mapB { fn(it) } }
|
||||
|
||||
fun <I, M, O> Parser<I, M>.then(other: Parser<I, O>): Parser<I, Pair<M, O>> =
|
||||
inline fun <I, M: Any, O: Any> then(crossinline self: Parser<I, M>, crossinline other: Parser<I, O>): Parser<I, Pair<M, O>> =
|
||||
{ ctx ->
|
||||
invoke(ctx).mapA { first ->
|
||||
self(ctx).flatMapA<_,_,Pair<M,O>> { first ->
|
||||
other.invoke(ctx)
|
||||
.mapA { first to it }
|
||||
}.partiallyFlattenA()
|
||||
}
|
||||
}
|
||||
|
||||
fun <I, O, T> Parser<I, O>.thenIgnore(other: Parser<I, T>): Parser<I, O> =
|
||||
inline fun <I, M: Any, O: Any> thenOverwrite(crossinline self: Parser<I, M>, crossinline other: Parser<I, O>): Parser<I, O> =
|
||||
{ ctx ->
|
||||
invoke(ctx).mapA { first ->
|
||||
self(ctx).flatMapA<_,_,O> {
|
||||
other.invoke(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
inline fun <I, O: Any, T: Any> thenIgnore(crossinline self: Parser<I, O>, crossinline other: Parser<I, T>): Parser<I, O> =
|
||||
{ ctx ->
|
||||
self(ctx).flatMapA { first ->
|
||||
other.invoke(ctx)
|
||||
.mapA { first }
|
||||
}.partiallyFlattenA()
|
||||
}
|
||||
}
|
||||
|
||||
fun <I, O> Parser<I, O>.orElseVal(value: O): Parser<I, O> =
|
||||
orElse { Either.ofA(value) }
|
||||
inline fun <I, O: Any> orElseVal(crossinline self: Parser<I, O>, value: O): Parser<I, O> =
|
||||
orElse(self) { Either.ofA(value) }
|
||||
|
||||
fun <I, O: Any> Parser<I, O>.orNot(): Parser<I, O?> =
|
||||
orElse { Either.ofA(null) }
|
||||
|
||||
fun <I, O, R> Parser<I, O>.orElse(other: Parser<I, R>): Parser<I, R> where O: R =
|
||||
inline fun <I, O, R: Any> orElse(crossinline self: Parser<I, O>, crossinline other: Parser<I, R>): Parser<I, R> where O: R =
|
||||
{
|
||||
val old = it.copy()
|
||||
this(it).mapB { err ->
|
||||
it.loadFrom(old)
|
||||
val old = it.idx
|
||||
self(it).mapB { err ->
|
||||
it.idx = old
|
||||
other.invoke(it)
|
||||
.mapB { err + it }
|
||||
.mapB { err.pushBack(it); err }
|
||||
}.partiallyFlattenB()
|
||||
}
|
||||
|
||||
fun <I, O> choose(possible: Iterable<Parser<I, O>>): Parser<I, O> =
|
||||
fun <I, O: Any> choose(possible: Iterable<Parser<I, O>>): Parser<I, O> =
|
||||
{ ctx ->
|
||||
val errors = mutableListOf<ParseError>()
|
||||
val errors = RefVec<ParseError>(possible.count())
|
||||
var res: O? = null
|
||||
for (p in possible) {
|
||||
val old = ctx.copy()
|
||||
val old = ctx.idx
|
||||
val t = p.invoke(ctx)
|
||||
if (t.isA) {
|
||||
res = t.getA()
|
||||
res = t.a!!
|
||||
break
|
||||
} else {
|
||||
ctx.loadFrom(old)
|
||||
errors += t.getB()
|
||||
ctx.idx = old
|
||||
errors.pushBack(t.b!!)
|
||||
}
|
||||
}
|
||||
res?.let { Either.ofA(it) }
|
||||
?: Either.ofB(errors)
|
||||
}
|
||||
|
||||
fun <I, O> choose(vararg possible: Parser<I, O>): Parser<I, O> =
|
||||
fun <I, O: Any> choose(vararg possible: Parser<I, O>): Parser<I, O> =
|
||||
choose(possible.toList())
|
||||
|
||||
fun <I, O> Parser<I, O>.repeated(): Parser<I, List<O>> =
|
||||
inline fun <I, O: Any> repeated(crossinline what: Parser<I, O>): Parser<I, RefVec<O>> =
|
||||
{ ctx ->
|
||||
val out = mutableListOf<O>()
|
||||
val out = RefVec<O>(0)
|
||||
while (true) {
|
||||
val old = ctx.copy()
|
||||
val t = invoke(ctx)
|
||||
val old = ctx.idx
|
||||
val t = what(ctx)
|
||||
if (t.isA) {
|
||||
out += t.getA()
|
||||
out.pushBack(t.a!!)
|
||||
} else {
|
||||
ctx.loadFrom(old)
|
||||
ctx.idx = old
|
||||
break
|
||||
}
|
||||
}
|
||||
Either.ofA(out)
|
||||
}
|
||||
|
||||
inline fun <I, O> Parser<I, O>.verifyValue(crossinline verif: (O) -> String?): Parser<I, O> =
|
||||
inline fun <I, O: Any> repeatedNoSave(crossinline what: Parser<I, O>): Parser<I, Unit> =
|
||||
{ ctx ->
|
||||
invoke(ctx).mapA<ParseResult<O>> {
|
||||
verif(it)?.let { Either.ofB(listOf(ParseError(ctx.idx, it))) }
|
||||
?: Either.ofA(it)
|
||||
}.partiallyFlattenA()
|
||||
}
|
||||
|
||||
inline fun <I, O> Parser<I, Pair<IntRange, O>>.verifyValueWithSpan(crossinline fn: (O) -> String?): Parser<I, O> =
|
||||
{ ctx ->
|
||||
invoke(ctx).mapA<ParseResult<O>> { (span, v) ->
|
||||
fn(v)?.let { Either.ofB(listOf(ParseError(span.first, it))) }
|
||||
?: Either.ofA(v)
|
||||
}.partiallyFlattenA()
|
||||
}
|
||||
|
||||
fun <I, O: Any?> Parser<I, O?>.errIfNull(msg: String = "parser value was null internally"): Parser<I, O> =
|
||||
verifyValue { if (it == null) msg else null }
|
||||
.mapValue { it!! }
|
||||
|
||||
inline fun <I, O> location(crossinline fn: (Int) -> O): Parser<I, O> =
|
||||
{ Either.ofA(fn(it.idx)) }
|
||||
|
||||
fun <I> location(): Parser<I, Int> =
|
||||
location { it }
|
||||
|
||||
fun <I, O> withSpan(p: Parser<I, O>): Parser<I, Pair<IntRange, O>> =
|
||||
location<I>()
|
||||
.then(p)
|
||||
.then(location())
|
||||
.mapValue { (beginAndV, end) ->
|
||||
(beginAndV.first..end) to beginAndV.second
|
||||
}
|
||||
|
||||
fun <I, O> value(value: O): Parser<I, O> =
|
||||
{ Either.ofA(value) }
|
||||
|
||||
fun <I, O> chain(parsers: List<Parser<I, O>>): Parser<I, List<O>> =
|
||||
{ ctx ->
|
||||
val results = mutableListOf<O>()
|
||||
val errs = mutableListOf<ParseError>()
|
||||
for (p in parsers) {
|
||||
val r = p.invoke(ctx)
|
||||
if (r.isA) {
|
||||
results += r.getA()
|
||||
} else {
|
||||
errs += r.getB()
|
||||
while (true) {
|
||||
val old = ctx.idx
|
||||
val t = what(ctx)
|
||||
if (t.isB) {
|
||||
ctx.idx = old
|
||||
break
|
||||
}
|
||||
}
|
||||
if (errs.isNotEmpty()) Either.ofB(errs)
|
||||
else Either.ofA(results)
|
||||
Either.ofA(Unit)
|
||||
}
|
||||
|
||||
fun <I> seq(want: List<I>): Parser<I, List<I>> =
|
||||
chain(want.map(::just))
|
||||
|
||||
inline fun <I> filter(msg: String, crossinline filter: (I) -> Boolean): Parser<I, I> =
|
||||
inline fun <I, O: Any> verifyValue(crossinline self: Parser<I, O>, crossinline verif: (O) -> String?): Parser<I, O> =
|
||||
{ ctx ->
|
||||
if (ctx.idx >= ctx.input.size) {
|
||||
Either.ofB(listOf(ParseError(ctx.idx, "unexpected end of file")))
|
||||
} else {
|
||||
val i = ctx.input[ctx.idx++]
|
||||
if (filter(i)) Either.ofA(i)
|
||||
else Either.ofB(listOf(ParseError(ctx.idx - 1, msg)))
|
||||
self(ctx).flatMapA<_,_,_> {
|
||||
verif(it)?.let { Either.ofB(RefVec.of(ParseError(ctx.idx, it))) }
|
||||
?: Either.ofA(it)
|
||||
}
|
||||
}
|
||||
|
||||
fun <I> just(want: I): Parser<I, I> =
|
||||
inline fun <I, O: Any> verifyValueWithSpan(crossinline self: Parser<I, Pair<IntRange, O>>, crossinline fn: (O) -> String?): Parser<I, O> =
|
||||
{ ctx ->
|
||||
self(ctx).flatMapA<_,_,_> { (span, v) ->
|
||||
fn(v)?.let { Either.ofB(RefVec.of(ParseError(span.first, it))) }
|
||||
?: Either.ofA(v)
|
||||
}
|
||||
}
|
||||
|
||||
inline fun <I, O: Any> location(crossinline fn: (Int) -> O): Parser<I, O> =
|
||||
{ Either.ofA(fn(it.idx)) }
|
||||
|
||||
inline fun <I> location(): Parser<I, Int> =
|
||||
location { it }
|
||||
|
||||
inline fun <I, O: Any> withSpan(crossinline p: Parser<I, O>): Parser<I, Pair<IntRange, O>> =
|
||||
mapValue(then(then(location(), p), location())) { (beginAndV, end) ->
|
||||
(beginAndV.first..end) to beginAndV.second
|
||||
}
|
||||
|
||||
inline fun <I, O: Any> value(value: O): Parser<I, O> =
|
||||
{ Either.ofA(value) }
|
||||
|
||||
fun <I, O: Any> chain(parsers: List<Parser<I, O>>): Parser<I, RefVec<O>> =
|
||||
{ ctx ->
|
||||
val results = RefVec<O>(parsers.size)
|
||||
val errs = RefVec<ParseError>(0)
|
||||
for (p in parsers) {
|
||||
val r = p.invoke(ctx)
|
||||
if (r.isA) {
|
||||
results.pushBack(r.a!!)
|
||||
} else {
|
||||
errs.pushBack(r.b!!)
|
||||
break
|
||||
}
|
||||
}
|
||||
if (errs.size != 0) Either.ofB(errs)
|
||||
else Either.ofA(results)
|
||||
}
|
||||
|
||||
inline fun <I: Any> seq(want: List<I>): Parser<I, RefVec<I>> =
|
||||
chain(want.map(::just))
|
||||
|
||||
inline fun <I: Any> filter(msg: String, crossinline filter: (I) -> Boolean): Parser<I, I> =
|
||||
{ ctx ->
|
||||
if (ctx.idx >= ctx.input.size) {
|
||||
Either.ofB(RefVec.of(ParseError(ctx.idx, "unexpected end of file")))
|
||||
} else {
|
||||
val i = ctx.input[ctx.idx++]
|
||||
if (filter(i)) Either.ofA(i)
|
||||
else Either.ofB(RefVec.of(ParseError(ctx.idx - 1, msg)))
|
||||
}
|
||||
}
|
||||
|
||||
inline fun <I: Any> just(want: I): Parser<I, I> =
|
||||
filter("expected $want") { it == want }
|
||||
|
||||
fun <I> oneOf(possible: Iterable<I>): Parser<I, I> =
|
||||
inline fun <I: Any> oneOf(possible: Iterable<I>): Parser<I, I> =
|
||||
filter("expected one of ${possible.contents}") { it in possible }
|
||||
|
||||
fun <I, O> future(prov: Provider<Parser<I, O>>): Parser<I, O> =
|
||||
inline fun <I, O: Any> future(crossinline prov: Provider<Parser<I, O>>): Parser<I, O> =
|
||||
{ prov()(it) }
|
||||
|
||||
inline fun <I, O> futureRec(crossinline fn: (future: Parser<I, O>) -> Parser<I, O>): Parser<I, O> {
|
||||
inline fun <I, O: Any> futureRec(fn: (future: Parser<I, O>) -> Parser<I, O>): Parser<I, O> {
|
||||
lateinit var f: Parser<I, O>
|
||||
f = fn(future { f })
|
||||
f = fn({ f(it) })
|
||||
return f
|
||||
}
|
||||
|
||||
/** group values 0 is the entire match */
|
||||
fun <O> regex(pattern: Regex, fn: (groups: MatchGroupCollection) -> O): Parser<Char, O> =
|
||||
fun <O: Any> regex(pattern: Regex, fn: (groups: MatchGroupCollection) -> O): Parser<Char, O> =
|
||||
{ ctx ->
|
||||
pattern.matchAt(ctx.input.charsToString(), ctx.idx)?.let {
|
||||
ctx.idx = it.range.last + 1
|
||||
Either.ofA(fn(it.groups))
|
||||
} ?: Either.ofB(listOf(
|
||||
} ?: Either.ofB(RefVec.of(
|
||||
ParseError(ctx.idx, "regular expression \"$pattern\" does not apply")
|
||||
))
|
||||
}
|
||||
@@ -197,7 +205,7 @@ fun <O> regex(pattern: Regex, fn: (groups: MatchGroupCollection) -> O): Parser<C
|
||||
fun regex(pattern: Regex) = regex(pattern) { it[0]!!.value }
|
||||
|
||||
/** group values 0 is the entire match */
|
||||
fun <O> regex(pattern: String, fn: (groups: MatchGroupCollection) -> O): Parser<Char, O> =
|
||||
fun <O: Any> regex(pattern: String, fn: (groups: MatchGroupCollection) -> O): Parser<Char, O> =
|
||||
regex(Regex(pattern), fn)
|
||||
|
||||
fun regex(pattern: String) = regex(pattern) { it[0]!!.value }
|
@@ -1,67 +1,63 @@
|
||||
package blitz.parse.comb2
|
||||
|
||||
import blitz.collections.RefVec
|
||||
import blitz.str.charsToString
|
||||
import kotlin.math.absoluteValue
|
||||
import kotlin.math.sign
|
||||
|
||||
fun whitespaces(): Parser<Char, String> =
|
||||
oneOf("\n\t\r\b ".toList())
|
||||
.repeated()
|
||||
.mapValue { it.charsToString() }
|
||||
fun whitespaces(): Parser<Char, Unit> =
|
||||
repeatedNoSave(oneOf("\n\t\r\b ".toList()))
|
||||
|
||||
fun digit(): Parser<Char, Char> =
|
||||
oneOf("0123456789".toList())
|
||||
|
||||
fun uintLit(): Parser<Char, UInt> =
|
||||
withSpan(digit().repeated())
|
||||
.verifyValueWithSpan { if (it.isEmpty()) "need digits after sign in num lit" else null }
|
||||
.mapValue { it.charsToString().toUInt() }
|
||||
fun uintLit(): Parser<Char, RefVec<Char>> =
|
||||
verifyValueWithSpan(withSpan(repeated(digit())))
|
||||
{ if (it.size == 0) "need digits after sign in num lit" else null }
|
||||
|
||||
fun intLit(): Parser<Char, Int> =
|
||||
choose(just('+').mapValue { +1 },
|
||||
just('-').mapValue { -1 },
|
||||
value(+1))
|
||||
.then(uintLit())
|
||||
.mapValue { (sign, v) -> sign * v.toInt() }
|
||||
mapValue(then(choose(mapValue(just('+')) { +1 },
|
||||
mapValue(just('-')) { -1 },
|
||||
value(+1)),
|
||||
uintLit()))
|
||||
{ (sign, v) -> sign * v.charsToString().toInt() }
|
||||
|
||||
fun floatLit(): Parser<Char, Double> =
|
||||
intLit()
|
||||
.then(just('.')
|
||||
.then(uintLit())
|
||||
.mapValue { it.second }
|
||||
.orElseVal(0u))
|
||||
.mapValue { (pre, post) ->
|
||||
var p = post.toDouble()
|
||||
while (p.absoluteValue >= 1) {
|
||||
p *= 0.1
|
||||
}
|
||||
|
||||
(pre.toDouble().absoluteValue + p) * pre.toDouble().sign
|
||||
mapValue(
|
||||
then(
|
||||
thenIgnore(
|
||||
intLit(),
|
||||
just('.')),
|
||||
orElseVal(uintLit(), RefVec.of('0'))))
|
||||
{ (pre, post) ->
|
||||
var p = post.charsToString().toDouble()
|
||||
while (p.absoluteValue >= 1) {
|
||||
p *= 0.1
|
||||
}
|
||||
|
||||
(pre.toDouble().absoluteValue + p) * pre.toDouble().sign
|
||||
}
|
||||
|
||||
fun escapeChar(): Parser<Char, Char> =
|
||||
just('\\').then(
|
||||
choose(just('"'),
|
||||
thenOverwrite(just('\\'),
|
||||
mapErrors(choose(just('"'),
|
||||
just('\''),
|
||||
just('\\'),
|
||||
just('n').mapValue { '\n' },
|
||||
just('r').mapValue { '\r' },
|
||||
just('b').mapValue { '\b' },
|
||||
just('t').mapValue { '\t' })
|
||||
.mapErrors { listOf(ParseError(it.first().loc, "invalid escape sequence")) }
|
||||
).mapValue { it.second }
|
||||
mapValue(just('n')) { '\n' },
|
||||
mapValue(just('r')) { '\r' },
|
||||
mapValue(just('b')) { '\b' },
|
||||
mapValue(just('t')) { '\t' }))
|
||||
{ RefVec.of(ParseError(it[0].loc, "invalid escape sequence")) }
|
||||
)
|
||||
|
||||
fun stringLit(): Parser<Char, String> =
|
||||
just('"')
|
||||
.then(choose(escapeChar(),
|
||||
filter("a") { it != '"' })
|
||||
.repeated())
|
||||
.thenIgnore(just('"'))
|
||||
.mapValue { (_, str) -> str.charsToString() }
|
||||
mapValue(thenIgnore(then(just('"'),
|
||||
repeated(choose(escapeChar(),
|
||||
filter("a") { it != '"' }))),
|
||||
just('"')))
|
||||
{ (_, str) -> str.charsToString() }
|
||||
|
||||
fun <I, O, T> Parser<I, O>.delimitedBy(delim: Parser<I, T>): Parser<I, List<O>> =
|
||||
thenIgnore(delim)
|
||||
.repeated()
|
||||
.then(this)
|
||||
.mapValue { (a, b) -> a + b }
|
||||
.orElse(value(listOf()))
|
||||
inline fun <I, O: Any, T: Any> delimitedBy(crossinline self: Parser<I, O>, crossinline delim: Parser<I, T>): Parser<I, RefVec<O>> =
|
||||
orElse(mapValue(then(repeated(thenIgnore(self, delim)), self))
|
||||
{ (a, b) -> a.pushBack(b); a },
|
||||
value(RefVec.of()))
|
Reference in New Issue
Block a user