improver parser perf

This commit is contained in:
alex_s168
2024-09-19 21:55:17 +00:00
parent 39f34ee77b
commit 8c2325bdd3
15 changed files with 606 additions and 455 deletions

View File

@@ -15,6 +15,10 @@ dependencies {
testImplementation("org.jetbrains.kotlin:kotlin-test") testImplementation("org.jetbrains.kotlin:kotlin-test")
implementation("org.jetbrains.kotlinx:kotlinx-io-core:0.3.1") implementation("org.jetbrains.kotlinx:kotlinx-io-core:0.3.1")
implementation("org.jetbrains.kotlinx:kotlinx-io-bytestring:0.3.1") implementation("org.jetbrains.kotlinx:kotlinx-io-bytestring:0.3.1")
// https://mvnrepository.com/artifact/org.json/json
implementation("org.json:json:20240303")
} }
tasks.test { tasks.test {

View File

@@ -1,29 +1,17 @@
package blitz package blitz
class Either<A, B> private constructor( class Either<A: Any, B: Any>(
private val a: Obj<A>?, val a: A?,
private val b: Obj<B>? val b: B?
) { ) {
override fun equals(other: Any?): Boolean = override fun equals(other: Any?): Boolean =
other is Either<*, *> && other.a == a && other.b == b other is Either<*, *> && other.a == a && other.b == b
fun getAOrNull(): A? = fun assertA(): A =
a?.v (a ?: throw Exception("Value of Either is not of type A!"))
fun getA(): A = fun assertB(): B =
(a ?: throw Exception("Value of Either is not of type A!")).v (b ?: throw Exception("Value of Either is not of type B!"))
fun getAOr(prov: Provider<A>): A =
getAOrNull() ?: prov()
fun getBOrNull(): B? =
b?.v
fun getB(): B =
(b ?: throw Exception("Value of Either is not of type B!")).v
fun getBOr(prov: Provider<B>): B =
getBOrNull() ?: prov()
val isA: Boolean = val isA: Boolean =
a != null a != null
@@ -31,18 +19,9 @@ class Either<A, B> private constructor(
val isB: Boolean = val isB: Boolean =
b != null b != null
fun <R> then(af: (A) -> R, bf: (B) -> R): R =
if (isA) af(a!!.v) else bf(b!!.v)
fun <RA> mapA(transform: (A) -> RA): Either<RA, B> =
Either(a.mapNotNull(transform), b)
fun <RB> mapB(transform: (B) -> RB): Either<A, RB> =
Either(a, b.mapNotNull(transform))
override fun toString(): String = override fun toString(): String =
if (isA) "Either<A>(${a!!.v})" if (isA) "Either<A>(${a!!})"
else "Either<B>(${b!!.v})" else "Either<B>(${b!!})"
override fun hashCode(): Int { override fun hashCode(): Int {
var result = a?.hashCode() ?: 0 var result = a?.hashCode() ?: 0
@@ -51,43 +30,80 @@ class Either<A, B> private constructor(
} }
companion object { companion object {
fun <A, B> ofA(a: A): Either<A, B> = inline fun <A: Any, B: Any> ofA(a: A): Either<A, B> =
Either(Obj.of(a), null) Either(a, null)
fun <A, B> ofB(b: B): Either<A, B> = inline fun <A: Any, B: Any> ofB(b: B): Either<A, B> =
Either(null, Obj.of(b)) Either(null, b)
} }
} }
fun <A, B, R> Either<A, B>.flatten(): R where A: R, B: R = inline fun <A: Any, B: Any> Either<A, B>.getAOr(prov: Provider<A>): A =
getAOrNull() ?: getB() a ?: prov()
fun <A, A2, B> Either<A, Either<A2, B>>.partiallyFlattenB(): Either<A2, B> where A: A2 = inline fun <A: Any, B: Any> Either<A, B>.getBOr(prov: Provider<B>): B =
mapA<Either<A2, B>> { Either.ofA(it) }.flatten() b ?: prov()
fun <A, B, B2> Either<Either<A, B2>, B>.partiallyFlattenA(): Either<A, B2> where B: B2 = inline fun <A: Any, B: Any, R> Either<A, B>.then(af: (A) -> R, bf: (B) -> R): R =
mapB<Either<A, B2>> { Either.ofB(it) }.flatten() if (isA) af(a!!) else bf(b!!)
fun <A, BA, BB, BAN> Either<A, Either<BA, BB>>.mapBA(fn: (BA) -> BAN): Either<A, Either<BAN, BB>> = inline fun <A: Any, B: Any, RA: Any> Either<A, B>.mapA(transform: (A) -> RA): Either<RA, B> =
Either(a?.let(transform), b)
inline fun <A: Any, B: Any> Either<A, B>.flatMapA(transform: (A) -> Either<A, B>): Either<A, B> =
if (a != null) {
transform(a)
} else this
inline fun <A: Any, B: Any> Either<A, B>.flatMapB(transform: (B) -> Either<A, B>): Either<A, B> =
if (b != null) {
transform(b)
} else this
@JvmName("flatMapA_changeType")
inline fun <A: Any, B: Any, RA: Any> Either<A, B>.flatMapA(transform: (A) -> Either<RA, B>): Either<RA, B> =
if (a != null) {
transform(a)
} else Either.ofB(b!!)
@JvmName("flatMapB_changeType")
inline fun <A: Any, B: Any, RB: Any> Either<A, B>.flatMapB(transform: (B) -> Either<A, RB>): Either<A, RB> =
if (b != null) {
transform(b)
} else Either.ofA(a!!)
inline fun <A: Any, B: Any, RB: Any> Either<A, B>.mapB(transform: (B) -> RB): Either<A, RB> =
Either(a, b?.let(transform))
fun <A, B, R: Any> Either<A, B>.flatten(): R where A: R, B: R =
a ?: assertB()
fun <A, A2: Any, B: Any> Either<A, Either<A2, B>>.partiallyFlattenB(): Either<A2, B> where A: A2 =
mapA { Either.ofA<A2, B>(it) }.flatten()
fun <A: Any, B, B2: Any> Either<Either<A, B2>, B>.partiallyFlattenA(): Either<A, B2> where B: B2 =
mapB { Either.ofB<A, B2>(it) }.flatten()
inline fun <A: Any, BA: Any, BB: Any, BAN: Any> Either<A, Either<BA, BB>>.mapBA(fn: (BA) -> BAN): Either<A, Either<BAN, BB>> =
mapB { it.mapA(fn) } mapB { it.mapA(fn) }
fun <A, BA, BB, BBN> Either<A, Either<BA, BB>>.mapBB(fn: (BB) -> BBN): Either<A, Either<BA, BBN>> = inline fun <A: Any, BA: Any, BB: Any, BBN: Any> Either<A, Either<BA, BB>>.mapBB(fn: (BB) -> BBN): Either<A, Either<BA, BBN>> =
mapB { it.mapB(fn) } mapB { it.mapB(fn) }
fun <AA, AB, B, AAN> Either<Either<AA, AB>, B>.mapAA(fn: (AA) -> AAN): Either<Either<AAN, AB>, B> = inline fun <AA: Any, AB: Any, B: Any, AAN: Any> Either<Either<AA, AB>, B>.mapAA(fn: (AA) -> AAN): Either<Either<AAN, AB>, B> =
mapA { it.mapA(fn) } mapA { it.mapA(fn) }
fun <AA, AB, B, ABN> Either<Either<AA, AB>, B>.mapAB(fn: (AB) -> ABN): Either<Either<AA, ABN>, B> = inline fun <AA: Any, AB: Any, B: Any, ABN: Any> Either<Either<AA, AB>, B>.mapAB(fn: (AB) -> ABN): Either<Either<AA, ABN>, B> =
mapA { it.mapB(fn) } mapA { it.mapB(fn) }
fun <AA, AB, B> Either<Either<AA, AB>, B>.getAAOrNull(): AA? = fun <AA: Any, AB: Any, B: Any> Either<Either<AA, AB>, B>.getAAOrNull(): AA? =
getAOrNull()?.getAOrNull() a?.a
fun <AA, AB, B> Either<Either<AA, AB>, B>.getABOrNull(): AB? = fun <AA: Any, AB: Any, B: Any> Either<Either<AA, AB>, B>.getABOrNull(): AB? =
getAOrNull()?.getBOrNull() a?.b
fun <A, BA, BB> Either<A, Either<BA, BB>>.getBAOrNull(): BA? = fun <A: Any, BA: Any, BB: Any> Either<A, Either<BA, BB>>.getBAOrNull(): BA? =
getBOrNull()?.getAOrNull() b?.a
fun <A, BA, BB> Either<A, Either<BA, BB>>.getBBOrNull(): BB? = fun <A: Any, BA: Any, BB: Any> Either<A, Either<BA, BB>>.getBBOrNull(): BB? =
getBOrNull()?.getBOrNull() b?.b

View File

@@ -21,10 +21,10 @@ interface Obj<T> {
} }
} }
fun <I, O> Obj<I>?.mapNotNull(transform: (I) -> O): Obj<O>? = inline fun <I, O> Obj<I>?.mapNotNull(transform: (I) -> O): Obj<O>? =
this?.v?.let { Obj.of(transform(it)) } this?.v?.let { Obj.of(transform(it)) }
fun <I, O> Obj<I>.map(transform: (I) -> O): Obj<O> = inline fun <I, O> Obj<I>.map(transform: (I) -> O): Obj<O> =
Obj.of(transform(v)) Obj.of(transform(v))
interface MutObj<T> { interface MutObj<T> {

View File

@@ -36,6 +36,13 @@ class ByteVec(private val initCap: Int = 0): Vec<Byte>, ByteBatchSequence {
cap = size + amount cap = size + amount
} }
override fun reserve(need: Int, wantIfRealloc: Int) {
if (need > 0 && cap - size >= need)
return
cap = size + wantIfRealloc
array = array.copyOf(cap)
}
override fun popBack(): Byte = override fun popBack(): Byte =
array[size - 1].also { array[size - 1].also {
reserve(-1) reserve(-1)
@@ -110,7 +117,7 @@ class ByteVec(private val initCap: Int = 0): Vec<Byte>, ByteBatchSequence {
} }
override fun pushBack(elem: Byte) { override fun pushBack(elem: Byte) {
reserve(8) reserve(1, 8)
array[size] = elem array[size] = elem
size ++ size ++
} }

View File

@@ -0,0 +1,111 @@
package blitz.collections
import kotlin.system.measureTimeMillis
@Suppress("UNCHECKED_CAST")
class RefVec<T>(private val initCap: Int = 0): Vec<T> {
override var size = 0
private var cap = initCap
private var array: Array<Any?>? = if (initCap > 0) arrayOfNulls(initCap) else null
override fun clear() {
size = 0
if (array == null) return
if (array!!.size <= initCap) {
cap = array!!.size
} else {
cap = 0
array = null
}
}
fun copyAsArray(): Array<Any?> =
array?.copyOfRange(0, size) ?: emptyArray()
fun copyIntoArray(arr: Array<Any?>, destOff: Int = 0, startOff: Int = 0) =
array?.copyInto(arr, destOff, startOff, size)
override fun copy(): RefVec<T> =
RefVec<T>(size).also {
it.array?.let { copyIntoArray(it) }
}
override fun reserve(amount: Int) {
if (amount > 0 && cap - size >= amount)
return
if (array == null) {
cap = size + amount
array = arrayOfNulls(cap)
} else {
array = array!!.copyOf(size + amount)
cap = size + amount
}
}
override fun reserve(need: Int, totalIfRealloc: Int) {
if (need > 0 && cap - size >= need)
return
if (array == null) {
cap = size + totalIfRealloc
array = arrayOfNulls(cap)
} else {
array = array!!.copyOf(size + totalIfRealloc)
cap = size + totalIfRealloc
}
}
override fun popBack(): T =
array!![size - 1].also {
reserve(-1)
size --
} as T
override fun get(index: Int): T =
array!![index] as T
override fun flip() {
array = array?.reversedArray()
}
override fun pushBack(elem: T) {
reserve(1, 8)
array!![size] = elem
size ++
}
override fun iterator(): Iterator<T> =
object : Iterator<T> {
var index = 0
override fun hasNext(): Boolean = index < size
override fun next(): T {
if (!hasNext())
throw NoSuchElementException()
return array!![index++] as T
}
}
override fun toString(): String =
joinToString(prefix = "[", postfix = "]") { it.toString() }
override fun set(index: Int, value: T) {
array!![index] = value
}
companion object {
fun <T> from(data: Array<T>) =
RefVec<T>(data.size).also {
it.array?.let { data.copyInto(it) }
it.size += data.size
}
fun <T> from(data: Iterable<T>) =
RefVec<T>().also { bv ->
data.forEach(bv::pushBack)
}
fun <T> of(vararg elements: T): RefVec<T> =
RefVec<T>(elements.size).also {
it.array?.let { elements.copyInto(it) }
}
}
}

View File

@@ -8,6 +8,9 @@ interface Vec<T>: IndexableSequence<T> {
fun copy(): Vec<T> fun copy(): Vec<T>
fun reserve(amount: Int) fun reserve(amount: Int)
fun reserve(need: Int, totalIfRealloc: Int) {
reserve(need)
}
fun pushBack(elem: T) fun pushBack(elem: T)
fun pushBack(elems: Array<T>) { fun pushBack(elems: Array<T>) {
@@ -18,6 +21,10 @@ interface Vec<T>: IndexableSequence<T> {
reserve(elems.size) reserve(elems.size)
elems.forEach(::pushBack) elems.forEach(::pushBack)
} }
fun pushBack(elems: Vec<T>) {
reserve(elems.size)
elems.forEach(::pushBack)
}
fun popBack(): T fun popBack(): T
fun popBack(dest: Array<T>) { fun popBack(dest: Array<T>) {

View File

@@ -1,20 +0,0 @@
package blitz.ice
class Cooled<T>(private val of: T): Freezable {
private var frozen = false
override fun freeze() {
frozen = true
}
override fun isFrozen(): Boolean {
return frozen
}
fun getOrNull(): T? =
if (isFrozen()) null else of
fun <R> use(block: (T) -> R): R? =
if (isFrozen()) null
else block(of)
}

View File

@@ -1,10 +0,0 @@
package blitz.ice
interface Freezable {
fun freeze()
fun isFrozen(): Boolean
}
inline fun <R> Freezable.map(block: (Freezable) -> R): R? =
if (isFrozen()) null
else block(this)

View File

@@ -1,56 +1,65 @@
package blitz.parse package blitz.parse
import blitz.parse.comb2.* import blitz.parse.comb2.*
import org.json.JSONObject
import kotlin.math.min
import kotlin.system.measureNanoTime
object JSON { object JSON {
val jsonElement = futureRec { jsonElement: Parser<Char, Element> -> val jsonElement = futureRec { jsonElement: Parser<Char, Element> ->
val jsonNum: Parser<Char, Element> = floatLit() val jsonNum: Parser<Char, Element> =
.mapValue(::Number) mapValue(floatLit(), ::Number)
val jsonString: Parser<Char, Element> = stringLit() val jsonString: Parser<Char, Element> =
.mapValue(::Str) mapValue(stringLit(), ::Str)
val jsonArray: Parser<Char, Element> = just('[') val jsonArray: Parser<Char, Element> =
.then(jsonElement thenIgnore(
.delimitedBy(just(',')) thenIgnore(
.mapValue(::Array)) thenOverwrite(just('['),
.thenIgnore(whitespaces()) mapValue(delimitedBy(jsonElement, just(',')))
.thenIgnore(just(']')) { Array(it.toList())}),
.mapValue { it.second } whitespaces()),
just(']')
)
val jsonBool: Parser<Char, Element> = choose( val jsonBool: Parser<Char, Element> = choose(
seq("true".toList()).mapValue { Bool(true) }, mapValue(seq("true".toList())) { Bool(true) },
seq("false".toList()).mapValue { Bool(false) }, mapValue(seq("false".toList())) { Bool(false) },
) )
val jsonNull: Parser<Char, Element> = seq("null".toList()) val jsonNull: Parser<Char, Element> =
.mapValue { Nul() } mapValue(seq("null".toList())) { Nul() }
val jsonObj: Parser<Char, Element> = just('{') val jsonObj: Parser<Char, Element> =
.then( mapValue(thenIgnore(thenIgnore(thenOverwrite(
whitespaces() just('{'),
.then(stringLit()) delimitedBy(
.mapValue { it.second } then(
.thenIgnore(whitespaces()) thenIgnore(
.thenIgnore(just(':')) thenIgnore(
.then(jsonElement) thenOverwrite(
.delimitedBy(just(','))) whitespaces(),
.thenIgnore(whitespaces()) stringLit()),
.thenIgnore(just('}')) whitespaces()),
.mapValue { Obj(it.second.toMap()) } just(':')),
jsonElement),
just(','))),
whitespaces()),
just('}'))) { Obj(it.toMap()) }
whitespaces() thenIgnore(thenOverwrite(
.then(choose( whitespaces(),
choose(
jsonArray, jsonArray,
jsonNum, jsonNum,
jsonString, jsonString,
jsonObj, jsonObj,
jsonBool, jsonBool,
jsonNull jsonNull
)) )),
.thenIgnore(whitespaces()) whitespaces())
.mapValue { it.second }
} }
@@ -100,3 +109,205 @@ object JSON {
fun parse(string: String): ParseResult<Element> = fun parse(string: String): ParseResult<Element> =
jsonElement(ParseCtx(string.toList(), 0)) jsonElement(ParseCtx(string.toList(), 0))
} }
fun main() {
val json = """
{
"clinical_study": {
"brief_summary": {
"textblock": "CLEAR SYNERGY is an international multi center 2x2 randomized placebo controlled trial of"
},
"brief_title": "CLEAR SYNERGY Neutrophil Substudy",
"overall_status": "Recruiting",
"eligibility": {
"study_pop": {
"textblock": "Patients who are randomized to the drug RCT portion of the CLEAR SYNERGY (OASIS 9) trial"
},
"minimum_age": "19 Years",
"sampling_method": "Non-Probability Sample",
"gender": "All",
"criteria": {
"textblock": "Inclusion Criteria:"
},
"healthy_volunteers": "No",
"maximum_age": "110 Years"
},
"number_of_groups": "2",
"source": "NYU Langone Health",
"location_countries": {
"country": "United States"
},
"study_design_info": {
"time_perspective": "Prospective",
"observational_model": "Other"
},
"last_update_submitted_qc": "September 10, 2019",
"intervention_browse": {
"mesh_term": "Colchicine"
},
"official_title": "Studies on the Effects of Colchicine on Neutrophil Biology in Acute Myocardial Infarction: A Substudy of the CLEAR SYNERGY (OASIS 9) Trial",
"primary_completion_date": {
"type": "Anticipated",
"content": "February 1, 2021"
},
"sponsors": {
"lead_sponsor": {
"agency_class": "Other",
"agency": "NYU Langone Health"
},
"collaborator": [
{
"agency_class": "Other",
"agency": "Population Health Research Institute"
},
{
"agency_class": "NIH",
"agency": "National Heart, Lung, and Blood Institute (NHLBI)"
}
]
},
"overall_official": {
"role": "Principal Investigator",
"affiliation": "NYU School of Medicine",
"last_name": "Binita Shah, MD"
},
"overall_contact_backup": {
"last_name": "Binita Shah, MD"
},
"condition_browse": {
"mesh_term": [
"Myocardial Infarction",
"ST Elevation Myocardial Infarction",
"Infarction"
]
},
"overall_contact": {
"phone": "646-501-9648",
"last_name": "Fatmira Curovic",
"email": "fatmira.curovic@nyumc.org"
},
"responsible_party": {
"responsible_party_type": "Principal Investigator",
"investigator_title": "Assistant Professor of Medicine",
"investigator_full_name": "Binita Shah",
"investigator_affiliation": "NYU Langone Health"
},
"study_first_submitted_qc": "March 12, 2019",
"start_date": {
"type": "Actual",
"content": "March 4, 2019"
},
"has_expanded_access": "No",
"study_first_posted": {
"type": "Actual",
"content": "March 14, 2019"
},
"arm_group": [
{
"arm_group_label": "Colchicine"
},
{
"arm_group_label": "Placebo"
}
],
"primary_outcome": {
"measure": "soluble L-selectin",
"time_frame": "between baseline and 3 months",
"description": "Change in soluble L-selectin between baseline and 3 mo after STEMI in the placebo vs. colchicine groups."
},
"secondary_outcome": [
{
"measure": "Other soluble markers of neutrophil activity",
"time_frame": "between baseline and 3 months",
"description": "Other markers of neutrophil activity will be evaluated at baseline and 3 months after STEMI (myeloperoxidase, matrix metalloproteinase-9, neutrophil gelatinase-associated lipocalin, neutrophil elastase, intercellular/vascular cellular adhesion molecules)"
},
{
"measure": "Markers of systemic inflammation",
"time_frame": "between baseline and 3 months",
"description": "Markers of systemic inflammation will be evaluated at baseline and 3 months after STEMI (high sensitive CRP, IL-1β)"
},
{
"measure": "Neutrophil-driven responses that may further propagate injury",
"time_frame": "between baseline and 3 months",
"description": "Neutrophil-driven responses that may further propagate injury will be evaluated at baseline and 3 months after STEMI (neutrophil extracellular traps, neutrophil-derived microparticles)"
}
],
"oversight_info": {
"is_fda_regulated_drug": "No",
"is_fda_regulated_device": "No",
"has_dmc": "No"
},
"last_update_posted": {
"type": "Actual",
"content": "September 12, 2019"
},
"id_info": {
"nct_id": "NCT03874338",
"org_study_id": "18-01323",
"secondary_id": "1R01HL146206"
},
"enrollment": {
"type": "Anticipated",
"content": "670"
},
"study_first_submitted": "March 12, 2019",
"condition": [
"Neutrophils.Hypersegmented | Bld-Ser-Plas",
"STEMI - ST Elevation Myocardial Infarction"
],
"study_type": "Observational",
"required_header": {
"download_date": "ClinicalTrials.gov processed this data on July 19, 2020",
"link_text": "Link to the current ClinicalTrials.gov record.",
"url": "https://clinicaltrials.gov/show/NCT03874338"
},
"last_update_submitted": "September 10, 2019",
"completion_date": {
"type": "Anticipated",
"content": "February 1, 2022"
},
"location": {
"contact": {
"phone": "646-501-9648",
"last_name": "Fatmira Curovic",
"email": "fatmira.curovic@nyumc.org"
},
"facility": {
"address": {
"zip": "10016",
"country": "United States",
"city": "New York",
"state": "New York"
},
"name": "NYU School of Medicine"
},
"status": "Recruiting",
"contact_backup": {
"last_name": "Binita Shah, MD"
}
},
"intervention": {
"intervention_type": "Drug",
"arm_group_label": [
"Colchicine",
"Placebo"
],
"description": "Participants in the main CLEAR SYNERGY trial are randomized to colchicine/spironolactone versus placebo in a 2x2 factorial design. The substudy is interested in the evaluation of biospecimens obtained from patients in the colchicine vs placebo group.",
"intervention_name": "Colchicine Pill"
},
"patient_data": {
"sharing_ipd": "No"
},
"verification_date": "September 2019"
}
}
""".trimIndent()
var minAlex = Long.MAX_VALUE
var minJson = Long.MAX_VALUE
while (true) {
minAlex = min(measureNanoTime { JSON.parse(json).a!! }, minAlex)
minJson = min(measureNanoTime { JSONObject(json) }, minJson)
println("alex: $minAlex ns, json-java: $minJson ns ; alex is ${ minJson.toFloat() / minAlex.toFloat() } times as fast as json-java")
}
}

View File

@@ -1,43 +0,0 @@
package blitz.parse
import blitz.parse.comb.*
object NumParse {
private val intBase = parser { it.require("0b")?.to(2) } or
parser { it.require("0x")?.to(16) } or
parser { it.require("0o")?.to(8) } or
constantParser(10)
private val sign = parser { it.require("+")?.to(1) } or
parser { it.require("-")?.to(-1) } or
constantParser(1)
val int = parser { s ->
s.map(sign)?.map(intBase)?.map { str, (sign, base) ->
val chars = when (base) {
2 -> "01"
8 -> "01234567"
10 -> "0123456789"
16 -> "0123456789abcdefABCDEF"
else -> error("wtf")
}
str.asLongAs(*chars.toCharArray()) {
it.toLongOrNull(base)?.times(sign)
}
}
}
val float = parser { s ->
s.map(sign)?.map { str, sign ->
str.asLongAs('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.') {
it.toDoubleOrNull()?.times(sign)
}
}
}
}
fun parseInt(str: String): Long? =
NumParse.int(Parsable(str))?.second
fun parseDouble(str: String): Double? =
NumParse.float(Parsable(str))?.second

View File

@@ -1,109 +0,0 @@
package blitz.parse.comb
import blitz.str.collectToString
data class Parsable(
val str: String,
val loc: Int? = null
)
typealias Parser<T> = (Parsable) -> Pair<Parsable, T>?
fun <T> parser(fn: (Parsable) -> Pair<Parsable, T>?): Parser<T> =
fn
fun <T> Parser<T>.trim(): Parser<T> = parser {
it.whitespaces()
.map(this@trim)
?.whitespaces()
}
fun <T> constantParser(const: T): Parser<T> = { it to const }
infix fun <T> Parser<T>.or(other: Parser<T>): Parser<T> = {
this@or(it) ?: other(it)
}
fun Parsable.spaces(): Parsable {
val new = str.trimStart(' ')
return Parsable(new, loc?.let { it + str.length - new.length })
}
fun Parsable.whitespaces(): Parsable {
val new = str.trimStart()
return Parsable(new, loc?.let { it + str.length - new.length })
}
fun Parsable.require(what: String): Parsable? {
if (str.startsWith(what))
return Parsable(str.substring(what.length), loc?.let { it + what.length })
return null
}
fun <T> Parsable.untilRequire(c: String, map: (String) -> T?): Pair<Parsable, T>? {
val before = str.substringBefore(c)
return map(before)?.let { Parsable(str.substringAfter(c), loc?.let { it + before.length }) to it }
}
fun <T> Parsable.asLongAs(vararg li: Char, map: (String) -> T?): Pair<Parsable, T>? {
val o = mutableListOf<Char>()
for (c in str) {
if (c in li)
o.add(c)
else
break
}
val out = str.substring(o.size)
return map(o.iterator().collectToString())?.let { Parsable(out, loc?.plus(o.size)) to it }
}
fun <T> Parsable.map(parser: Parser<T>): Pair<Parsable, T>? =
parser(this)
fun <T, R> Pair<Parsable, T>.map(fn: (Parsable, T) -> Pair<Parsable, R>?): Pair<Parsable, R>? =
fn(first, second)
fun <A, B> Pair<Parsable, A>.map(parser: Parser<B>): Pair<Parsable, Pair<A, B>>? =
map { parsable, a ->
parser(parsable)?.let { r ->
r.first to (a to r.second)
}
}
fun <T> Pair<Parsable, T>.mapFirst(fn: (Parsable) -> Parsable): Pair<Parsable, T> =
fn(first) to second
fun <T> Pair<Parsable, T>.mapFirstNullable(fn: (Parsable) -> Parsable?): Pair<Parsable, T>? =
fn(first)?.let { it to second }
fun <T, R> Pair<Parsable, T>.mapSecond(fn: (T) -> R): Pair<Parsable, R> =
first to fn(second)
fun <T> Pair<Parsable, T>.spaces(): Pair<Parsable, T> =
mapFirst { it.spaces() }
fun <T> Pair<Parsable, T>.whitespaces(): Pair<Parsable, T> =
mapFirst { it.whitespaces() }
fun <T> Pair<Parsable, T>.require(what: String): Pair<Parsable, T>? =
mapFirstNullable { it.require(what) }
fun <T> Parsable.array(sep: String, map: (Parsable) -> Pair<Parsable, T>?): Pair<Parsable, List<T>> {
val out = mutableListOf<T>()
var loc = loc
var curr = str
fun step() =
map(Parsable(curr, loc))?.also {
curr = it.first.str
loc = it.first.loc
}
while (true) {
val r = step() ?: break
out.add(r.second)
curr = (Parsable(curr, loc).require(sep) ?: break).str
}
return Parsable(curr, loc) to out
}

View File

@@ -1,36 +0,0 @@
package blitz.parse.comb
fun Parsable.stringWithEscape(): Pair<Parsable, String>? {
var escaped = false
var index = 0
val out = StringBuilder()
for (c in str) {
if (index == 0) {
if (c != '"')
return null
} else {
if (escaped) {
escaped = false
when (c) {
'"' -> out.append('"')
'\\' -> out.append('\\')
'n' -> out.append('\n')
'r' -> out.append('\r')
'b' -> out.append('\b')
't' -> out.append('\t')
else -> return null
}
} else if (c == '"')
break
else if (c == '\\')
escaped = true
else {
out.append(c)
}
}
index ++
}
if (escaped)
return null
return Parsable(str.substring(index + 1), loc?.plus(index + 1)) to out.toString()
}

View File

@@ -1,10 +1,8 @@
package blitz.parse.comb2 package blitz.parse.comb2
import blitz.Either import blitz.*
import blitz.Provider import blitz.collections.RefVec
import blitz.collections.contents import blitz.collections.contents
import blitz.partiallyFlattenA
import blitz.partiallyFlattenB
import blitz.str.charsToString import blitz.str.charsToString
data class ParseCtx<I>( data class ParseCtx<I>(
@@ -21,175 +19,185 @@ data class ParseError(
val message: String?, val message: String?,
) )
typealias ParseResult<O> = Either<O, List<ParseError>> typealias ParseResult<O> = Either<O, RefVec<ParseError>>
typealias Parser<I, O> = (ParseCtx<I>) -> ParseResult<O> typealias Parser<I, O> = (ParseCtx<I>) -> ParseResult<O>
inline fun <I, M, O> Parser<I, M>.mapValue(crossinline fn: (M) -> O): Parser<I, O> = inline fun <I, M: Any, O: Any> mapValue(crossinline self: Parser<I, M>, crossinline fn: (M) -> O): Parser<I, O> =
{ invoke(it).mapA { fn(it) } } { self(it).mapA { fn(it) } }
inline fun <I, O> Parser<I, O>.mapErrors(crossinline fn: (List<ParseError>) -> List<ParseError>): Parser<I, O> = inline fun <I, O: Any> mapErrors(crossinline self: Parser<I, O>, crossinline fn: (RefVec<ParseError>) -> RefVec<ParseError>): Parser<I, O> =
{ invoke(it).mapB { fn(it) } } { self(it).mapB { fn(it) } }
fun <I, M, O> Parser<I, M>.then(other: Parser<I, O>): Parser<I, Pair<M, O>> = inline fun <I, M: Any, O: Any> then(crossinline self: Parser<I, M>, crossinline other: Parser<I, O>): Parser<I, Pair<M, O>> =
{ ctx -> { ctx ->
invoke(ctx).mapA { first -> self(ctx).flatMapA<_,_,Pair<M,O>> { first ->
other.invoke(ctx) other.invoke(ctx)
.mapA { first to it } .mapA { first to it }
}.partiallyFlattenA() }
} }
fun <I, O, T> Parser<I, O>.thenIgnore(other: Parser<I, T>): Parser<I, O> = inline fun <I, M: Any, O: Any> thenOverwrite(crossinline self: Parser<I, M>, crossinline other: Parser<I, O>): Parser<I, O> =
{ ctx -> { ctx ->
invoke(ctx).mapA { first -> self(ctx).flatMapA<_,_,O> {
other.invoke(ctx)
}
}
inline fun <I, O: Any, T: Any> thenIgnore(crossinline self: Parser<I, O>, crossinline other: Parser<I, T>): Parser<I, O> =
{ ctx ->
self(ctx).flatMapA { first ->
other.invoke(ctx) other.invoke(ctx)
.mapA { first } .mapA { first }
}.partiallyFlattenA() }
} }
fun <I, O> Parser<I, O>.orElseVal(value: O): Parser<I, O> = inline fun <I, O: Any> orElseVal(crossinline self: Parser<I, O>, value: O): Parser<I, O> =
orElse { Either.ofA(value) } orElse(self) { Either.ofA(value) }
fun <I, O: Any> Parser<I, O>.orNot(): Parser<I, O?> = inline fun <I, O, R: Any> orElse(crossinline self: Parser<I, O>, crossinline other: Parser<I, R>): Parser<I, R> where O: R =
orElse { Either.ofA(null) }
fun <I, O, R> Parser<I, O>.orElse(other: Parser<I, R>): Parser<I, R> where O: R =
{ {
val old = it.copy() val old = it.idx
this(it).mapB { err -> self(it).mapB { err ->
it.loadFrom(old) it.idx = old
other.invoke(it) other.invoke(it)
.mapB { err + it } .mapB { err.pushBack(it); err }
}.partiallyFlattenB() }.partiallyFlattenB()
} }
fun <I, O> choose(possible: Iterable<Parser<I, O>>): Parser<I, O> = fun <I, O: Any> choose(possible: Iterable<Parser<I, O>>): Parser<I, O> =
{ ctx -> { ctx ->
val errors = mutableListOf<ParseError>() val errors = RefVec<ParseError>(possible.count())
var res: O? = null var res: O? = null
for (p in possible) { for (p in possible) {
val old = ctx.copy() val old = ctx.idx
val t = p.invoke(ctx) val t = p.invoke(ctx)
if (t.isA) { if (t.isA) {
res = t.getA() res = t.a!!
break break
} else { } else {
ctx.loadFrom(old) ctx.idx = old
errors += t.getB() errors.pushBack(t.b!!)
} }
} }
res?.let { Either.ofA(it) } res?.let { Either.ofA(it) }
?: Either.ofB(errors) ?: Either.ofB(errors)
} }
fun <I, O> choose(vararg possible: Parser<I, O>): Parser<I, O> = fun <I, O: Any> choose(vararg possible: Parser<I, O>): Parser<I, O> =
choose(possible.toList()) choose(possible.toList())
fun <I, O> Parser<I, O>.repeated(): Parser<I, List<O>> = inline fun <I, O: Any> repeated(crossinline what: Parser<I, O>): Parser<I, RefVec<O>> =
{ ctx -> { ctx ->
val out = mutableListOf<O>() val out = RefVec<O>(0)
while (true) { while (true) {
val old = ctx.copy() val old = ctx.idx
val t = invoke(ctx) val t = what(ctx)
if (t.isA) { if (t.isA) {
out += t.getA() out.pushBack(t.a!!)
} else { } else {
ctx.loadFrom(old) ctx.idx = old
break break
} }
} }
Either.ofA(out) Either.ofA(out)
} }
inline fun <I, O> Parser<I, O>.verifyValue(crossinline verif: (O) -> String?): Parser<I, O> = inline fun <I, O: Any> repeatedNoSave(crossinline what: Parser<I, O>): Parser<I, Unit> =
{ ctx -> { ctx ->
invoke(ctx).mapA<ParseResult<O>> { while (true) {
verif(it)?.let { Either.ofB(listOf(ParseError(ctx.idx, it))) } val old = ctx.idx
?: Either.ofA(it) val t = what(ctx)
}.partiallyFlattenA() if (t.isB) {
} ctx.idx = old
inline fun <I, O> Parser<I, Pair<IntRange, O>>.verifyValueWithSpan(crossinline fn: (O) -> String?): Parser<I, O> =
{ ctx ->
invoke(ctx).mapA<ParseResult<O>> { (span, v) ->
fn(v)?.let { Either.ofB(listOf(ParseError(span.first, it))) }
?: Either.ofA(v)
}.partiallyFlattenA()
}
fun <I, O: Any?> Parser<I, O?>.errIfNull(msg: String = "parser value was null internally"): Parser<I, O> =
verifyValue { if (it == null) msg else null }
.mapValue { it!! }
inline fun <I, O> location(crossinline fn: (Int) -> O): Parser<I, O> =
{ Either.ofA(fn(it.idx)) }
fun <I> location(): Parser<I, Int> =
location { it }
fun <I, O> withSpan(p: Parser<I, O>): Parser<I, Pair<IntRange, O>> =
location<I>()
.then(p)
.then(location())
.mapValue { (beginAndV, end) ->
(beginAndV.first..end) to beginAndV.second
}
fun <I, O> value(value: O): Parser<I, O> =
{ Either.ofA(value) }
fun <I, O> chain(parsers: List<Parser<I, O>>): Parser<I, List<O>> =
{ ctx ->
val results = mutableListOf<O>()
val errs = mutableListOf<ParseError>()
for (p in parsers) {
val r = p.invoke(ctx)
if (r.isA) {
results += r.getA()
} else {
errs += r.getB()
break break
} }
} }
if (errs.isNotEmpty()) Either.ofB(errs) Either.ofA(Unit)
else Either.ofA(results)
} }
fun <I> seq(want: List<I>): Parser<I, List<I>> = inline fun <I, O: Any> verifyValue(crossinline self: Parser<I, O>, crossinline verif: (O) -> String?): Parser<I, O> =
chain(want.map(::just))
inline fun <I> filter(msg: String, crossinline filter: (I) -> Boolean): Parser<I, I> =
{ ctx -> { ctx ->
if (ctx.idx >= ctx.input.size) { self(ctx).flatMapA<_,_,_> {
Either.ofB(listOf(ParseError(ctx.idx, "unexpected end of file"))) verif(it)?.let { Either.ofB(RefVec.of(ParseError(ctx.idx, it))) }
} else { ?: Either.ofA(it)
val i = ctx.input[ctx.idx++]
if (filter(i)) Either.ofA(i)
else Either.ofB(listOf(ParseError(ctx.idx - 1, msg)))
} }
} }
fun <I> just(want: I): Parser<I, I> = inline fun <I, O: Any> verifyValueWithSpan(crossinline self: Parser<I, Pair<IntRange, O>>, crossinline fn: (O) -> String?): Parser<I, O> =
{ ctx ->
self(ctx).flatMapA<_,_,_> { (span, v) ->
fn(v)?.let { Either.ofB(RefVec.of(ParseError(span.first, it))) }
?: Either.ofA(v)
}
}
inline fun <I, O: Any> location(crossinline fn: (Int) -> O): Parser<I, O> =
{ Either.ofA(fn(it.idx)) }
inline fun <I> location(): Parser<I, Int> =
location { it }
inline fun <I, O: Any> withSpan(crossinline p: Parser<I, O>): Parser<I, Pair<IntRange, O>> =
mapValue(then(then(location(), p), location())) { (beginAndV, end) ->
(beginAndV.first..end) to beginAndV.second
}
inline fun <I, O: Any> value(value: O): Parser<I, O> =
{ Either.ofA(value) }
fun <I, O: Any> chain(parsers: List<Parser<I, O>>): Parser<I, RefVec<O>> =
{ ctx ->
val results = RefVec<O>(parsers.size)
val errs = RefVec<ParseError>(0)
for (p in parsers) {
val r = p.invoke(ctx)
if (r.isA) {
results.pushBack(r.a!!)
} else {
errs.pushBack(r.b!!)
break
}
}
if (errs.size != 0) Either.ofB(errs)
else Either.ofA(results)
}
inline fun <I: Any> seq(want: List<I>): Parser<I, RefVec<I>> =
chain(want.map(::just))
inline fun <I: Any> filter(msg: String, crossinline filter: (I) -> Boolean): Parser<I, I> =
{ ctx ->
if (ctx.idx >= ctx.input.size) {
Either.ofB(RefVec.of(ParseError(ctx.idx, "unexpected end of file")))
} else {
val i = ctx.input[ctx.idx++]
if (filter(i)) Either.ofA(i)
else Either.ofB(RefVec.of(ParseError(ctx.idx - 1, msg)))
}
}
inline fun <I: Any> just(want: I): Parser<I, I> =
filter("expected $want") { it == want } filter("expected $want") { it == want }
fun <I> oneOf(possible: Iterable<I>): Parser<I, I> = inline fun <I: Any> oneOf(possible: Iterable<I>): Parser<I, I> =
filter("expected one of ${possible.contents}") { it in possible } filter("expected one of ${possible.contents}") { it in possible }
fun <I, O> future(prov: Provider<Parser<I, O>>): Parser<I, O> = inline fun <I, O: Any> future(crossinline prov: Provider<Parser<I, O>>): Parser<I, O> =
{ prov()(it) } { prov()(it) }
inline fun <I, O> futureRec(crossinline fn: (future: Parser<I, O>) -> Parser<I, O>): Parser<I, O> { inline fun <I, O: Any> futureRec(fn: (future: Parser<I, O>) -> Parser<I, O>): Parser<I, O> {
lateinit var f: Parser<I, O> lateinit var f: Parser<I, O>
f = fn(future { f }) f = fn({ f(it) })
return f return f
} }
/** group values 0 is the entire match */ /** group values 0 is the entire match */
fun <O> regex(pattern: Regex, fn: (groups: MatchGroupCollection) -> O): Parser<Char, O> = fun <O: Any> regex(pattern: Regex, fn: (groups: MatchGroupCollection) -> O): Parser<Char, O> =
{ ctx -> { ctx ->
pattern.matchAt(ctx.input.charsToString(), ctx.idx)?.let { pattern.matchAt(ctx.input.charsToString(), ctx.idx)?.let {
ctx.idx = it.range.last + 1 ctx.idx = it.range.last + 1
Either.ofA(fn(it.groups)) Either.ofA(fn(it.groups))
} ?: Either.ofB(listOf( } ?: Either.ofB(RefVec.of(
ParseError(ctx.idx, "regular expression \"$pattern\" does not apply") ParseError(ctx.idx, "regular expression \"$pattern\" does not apply")
)) ))
} }
@@ -197,7 +205,7 @@ fun <O> regex(pattern: Regex, fn: (groups: MatchGroupCollection) -> O): Parser<C
fun regex(pattern: Regex) = regex(pattern) { it[0]!!.value } fun regex(pattern: Regex) = regex(pattern) { it[0]!!.value }
/** group values 0 is the entire match */ /** group values 0 is the entire match */
fun <O> regex(pattern: String, fn: (groups: MatchGroupCollection) -> O): Parser<Char, O> = fun <O: Any> regex(pattern: String, fn: (groups: MatchGroupCollection) -> O): Parser<Char, O> =
regex(Regex(pattern), fn) regex(Regex(pattern), fn)
fun regex(pattern: String) = regex(pattern) { it[0]!!.value } fun regex(pattern: String) = regex(pattern) { it[0]!!.value }

View File

@@ -1,67 +1,63 @@
package blitz.parse.comb2 package blitz.parse.comb2
import blitz.collections.RefVec
import blitz.str.charsToString import blitz.str.charsToString
import kotlin.math.absoluteValue import kotlin.math.absoluteValue
import kotlin.math.sign import kotlin.math.sign
fun whitespaces(): Parser<Char, String> = fun whitespaces(): Parser<Char, Unit> =
oneOf("\n\t\r\b ".toList()) repeatedNoSave(oneOf("\n\t\r\b ".toList()))
.repeated()
.mapValue { it.charsToString() }
fun digit(): Parser<Char, Char> = fun digit(): Parser<Char, Char> =
oneOf("0123456789".toList()) oneOf("0123456789".toList())
fun uintLit(): Parser<Char, UInt> = fun uintLit(): Parser<Char, RefVec<Char>> =
withSpan(digit().repeated()) verifyValueWithSpan(withSpan(repeated(digit())))
.verifyValueWithSpan { if (it.isEmpty()) "need digits after sign in num lit" else null } { if (it.size == 0) "need digits after sign in num lit" else null }
.mapValue { it.charsToString().toUInt() }
fun intLit(): Parser<Char, Int> = fun intLit(): Parser<Char, Int> =
choose(just('+').mapValue { +1 }, mapValue(then(choose(mapValue(just('+')) { +1 },
just('-').mapValue { -1 }, mapValue(just('-')) { -1 },
value(+1)) value(+1)),
.then(uintLit()) uintLit()))
.mapValue { (sign, v) -> sign * v.toInt() } { (sign, v) -> sign * v.charsToString().toInt() }
fun floatLit(): Parser<Char, Double> = fun floatLit(): Parser<Char, Double> =
intLit() mapValue(
.then(just('.') then(
.then(uintLit()) thenIgnore(
.mapValue { it.second } intLit(),
.orElseVal(0u)) just('.')),
.mapValue { (pre, post) -> orElseVal(uintLit(), RefVec.of('0'))))
var p = post.toDouble() { (pre, post) ->
while (p.absoluteValue >= 1) { var p = post.charsToString().toDouble()
p *= 0.1 while (p.absoluteValue >= 1) {
} p *= 0.1
(pre.toDouble().absoluteValue + p) * pre.toDouble().sign
} }
(pre.toDouble().absoluteValue + p) * pre.toDouble().sign
}
fun escapeChar(): Parser<Char, Char> = fun escapeChar(): Parser<Char, Char> =
just('\\').then( thenOverwrite(just('\\'),
choose(just('"'), mapErrors(choose(just('"'),
just('\''), just('\''),
just('\\'), just('\\'),
just('n').mapValue { '\n' }, mapValue(just('n')) { '\n' },
just('r').mapValue { '\r' }, mapValue(just('r')) { '\r' },
just('b').mapValue { '\b' }, mapValue(just('b')) { '\b' },
just('t').mapValue { '\t' }) mapValue(just('t')) { '\t' }))
.mapErrors { listOf(ParseError(it.first().loc, "invalid escape sequence")) } { RefVec.of(ParseError(it[0].loc, "invalid escape sequence")) }
).mapValue { it.second } )
fun stringLit(): Parser<Char, String> = fun stringLit(): Parser<Char, String> =
just('"') mapValue(thenIgnore(then(just('"'),
.then(choose(escapeChar(), repeated(choose(escapeChar(),
filter("a") { it != '"' }) filter("a") { it != '"' }))),
.repeated()) just('"')))
.thenIgnore(just('"')) { (_, str) -> str.charsToString() }
.mapValue { (_, str) -> str.charsToString() }
fun <I, O, T> Parser<I, O>.delimitedBy(delim: Parser<I, T>): Parser<I, List<O>> = inline fun <I, O: Any, T: Any> delimitedBy(crossinline self: Parser<I, O>, crossinline delim: Parser<I, T>): Parser<I, RefVec<O>> =
thenIgnore(delim) orElse(mapValue(then(repeated(thenIgnore(self, delim)), self))
.repeated() { (a, b) -> a.pushBack(b); a },
.then(this) value(RefVec.of()))
.mapValue { (a, b) -> a + b }
.orElse(value(listOf()))

View File

@@ -1,4 +1,13 @@
package blitz.str package blitz.str
import blitz.collections.Vec
fun Collection<Char>.charsToString(): String = fun Collection<Char>.charsToString(): String =
String(this.toCharArray()) String(this.toCharArray())
fun Vec<Char>.charsToString(): String =
String(CharArray(size) { this[it] })
@JvmName("charsToString_VecByte")
fun Vec<Byte>.charsToString(): String =
String(CharArray(size) { this[it].toInt().toChar() })