From 8c2325bdd3e929598ec61e0405cdac2dc6a756c7 Mon Sep 17 00:00:00 2001 From: alex_s168 <63254202+alex-s168@users.noreply.github.com> Date: Thu, 19 Sep 2024 21:55:17 +0000 Subject: [PATCH] improver parser perf --- build.gradle.kts | 4 + src/main/kotlin/blitz/Either.kt | 120 ++++---- src/main/kotlin/blitz/Obj.kt | 4 +- src/main/kotlin/blitz/collections/ByteVec.kt | 9 +- src/main/kotlin/blitz/collections/RefVec.kt | 111 ++++++++ src/main/kotlin/blitz/collections/Vec.kt | 7 + src/main/kotlin/blitz/ice/Cooled.kt | 20 -- src/main/kotlin/blitz/ice/Freezable.kt | 10 - src/main/kotlin/blitz/parse/JSON.kt | 275 ++++++++++++++++--- src/main/kotlin/blitz/parse/NumParse.kt | 43 --- src/main/kotlin/blitz/parse/comb/Parser.kt | 109 -------- src/main/kotlin/blitz/parse/comb/Special.kt | 36 --- src/main/kotlin/blitz/parse/comb2/Parser.kt | 216 ++++++++------- src/main/kotlin/blitz/parse/comb2/Predef.kt | 86 +++--- src/main/kotlin/blitz/str/CharsToString.kt | 11 +- 15 files changed, 606 insertions(+), 455 deletions(-) create mode 100644 src/main/kotlin/blitz/collections/RefVec.kt delete mode 100644 src/main/kotlin/blitz/ice/Cooled.kt delete mode 100644 src/main/kotlin/blitz/ice/Freezable.kt delete mode 100644 src/main/kotlin/blitz/parse/NumParse.kt delete mode 100644 src/main/kotlin/blitz/parse/comb/Parser.kt delete mode 100644 src/main/kotlin/blitz/parse/comb/Special.kt diff --git a/build.gradle.kts b/build.gradle.kts index 8bcfeb7..4b7b768 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -15,6 +15,10 @@ dependencies { testImplementation("org.jetbrains.kotlin:kotlin-test") implementation("org.jetbrains.kotlinx:kotlinx-io-core:0.3.1") implementation("org.jetbrains.kotlinx:kotlinx-io-bytestring:0.3.1") + + // https://mvnrepository.com/artifact/org.json/json + implementation("org.json:json:20240303") + } tasks.test { diff --git a/src/main/kotlin/blitz/Either.kt b/src/main/kotlin/blitz/Either.kt index ce8dff4..e511b51 100644 --- a/src/main/kotlin/blitz/Either.kt +++ b/src/main/kotlin/blitz/Either.kt @@ -1,29 +1,17 @@ package blitz -class Either private constructor( - private val a: Obj?, - private val b: Obj? +class Either( + val a: A?, + val b: B? ) { override fun equals(other: Any?): Boolean = other is Either<*, *> && other.a == a && other.b == b - fun getAOrNull(): A? = - a?.v + fun assertA(): A = + (a ?: throw Exception("Value of Either is not of type A!")) - fun getA(): A = - (a ?: throw Exception("Value of Either is not of type A!")).v - - fun getAOr(prov: Provider): A = - getAOrNull() ?: prov() - - fun getBOrNull(): B? = - b?.v - - fun getB(): B = - (b ?: throw Exception("Value of Either is not of type B!")).v - - fun getBOr(prov: Provider): B = - getBOrNull() ?: prov() + fun assertB(): B = + (b ?: throw Exception("Value of Either is not of type B!")) val isA: Boolean = a != null @@ -31,18 +19,9 @@ class Either private constructor( val isB: Boolean = b != null - fun then(af: (A) -> R, bf: (B) -> R): R = - if (isA) af(a!!.v) else bf(b!!.v) - - fun mapA(transform: (A) -> RA): Either = - Either(a.mapNotNull(transform), b) - - fun mapB(transform: (B) -> RB): Either = - Either(a, b.mapNotNull(transform)) - override fun toString(): String = - if (isA) "Either(${a!!.v})" - else "Either(${b!!.v})" + if (isA) "Either(${a!!})" + else "Either(${b!!})" override fun hashCode(): Int { var result = a?.hashCode() ?: 0 @@ -51,43 +30,80 @@ class Either private constructor( } companion object { - fun ofA(a: A): Either = - Either(Obj.of(a), null) + inline fun ofA(a: A): Either = + Either(a, null) - fun ofB(b: B): Either = - Either(null, Obj.of(b)) + inline fun ofB(b: B): Either = + Either(null, b) } } -fun Either.flatten(): R where A: R, B: R = - getAOrNull() ?: getB() +inline fun Either.getAOr(prov: Provider): A = + a ?: prov() -fun Either>.partiallyFlattenB(): Either where A: A2 = - mapA> { Either.ofA(it) }.flatten() +inline fun Either.getBOr(prov: Provider): B = + b ?: prov() -fun Either, B>.partiallyFlattenA(): Either where B: B2 = - mapB> { Either.ofB(it) }.flatten() +inline fun Either.then(af: (A) -> R, bf: (B) -> R): R = + if (isA) af(a!!) else bf(b!!) -fun Either>.mapBA(fn: (BA) -> BAN): Either> = +inline fun Either.mapA(transform: (A) -> RA): Either = + Either(a?.let(transform), b) + +inline fun Either.flatMapA(transform: (A) -> Either): Either = + if (a != null) { + transform(a) + } else this + +inline fun Either.flatMapB(transform: (B) -> Either): Either = + if (b != null) { + transform(b) + } else this + +@JvmName("flatMapA_changeType") +inline fun Either.flatMapA(transform: (A) -> Either): Either = + if (a != null) { + transform(a) + } else Either.ofB(b!!) + +@JvmName("flatMapB_changeType") +inline fun Either.flatMapB(transform: (B) -> Either): Either = + if (b != null) { + transform(b) + } else Either.ofA(a!!) + +inline fun Either.mapB(transform: (B) -> RB): Either = + Either(a, b?.let(transform)) + +fun Either.flatten(): R where A: R, B: R = + a ?: assertB() + +fun Either>.partiallyFlattenB(): Either where A: A2 = + mapA { Either.ofA(it) }.flatten() + +fun Either, B>.partiallyFlattenA(): Either where B: B2 = + mapB { Either.ofB(it) }.flatten() + +inline fun Either>.mapBA(fn: (BA) -> BAN): Either> = mapB { it.mapA(fn) } -fun Either>.mapBB(fn: (BB) -> BBN): Either> = +inline fun Either>.mapBB(fn: (BB) -> BBN): Either> = mapB { it.mapB(fn) } -fun Either, B>.mapAA(fn: (AA) -> AAN): Either, B> = +inline fun Either, B>.mapAA(fn: (AA) -> AAN): Either, B> = mapA { it.mapA(fn) } -fun Either, B>.mapAB(fn: (AB) -> ABN): Either, B> = +inline fun Either, B>.mapAB(fn: (AB) -> ABN): Either, B> = mapA { it.mapB(fn) } -fun Either, B>.getAAOrNull(): AA? = - getAOrNull()?.getAOrNull() +fun Either, B>.getAAOrNull(): AA? = + a?.a -fun Either, B>.getABOrNull(): AB? = - getAOrNull()?.getBOrNull() +fun Either, B>.getABOrNull(): AB? = + a?.b -fun Either>.getBAOrNull(): BA? = - getBOrNull()?.getAOrNull() +fun Either>.getBAOrNull(): BA? = + b?.a -fun Either>.getBBOrNull(): BB? = - getBOrNull()?.getBOrNull() \ No newline at end of file +fun Either>.getBBOrNull(): BB? = + b?.b \ No newline at end of file diff --git a/src/main/kotlin/blitz/Obj.kt b/src/main/kotlin/blitz/Obj.kt index 2671dcb..b07f5ef 100644 --- a/src/main/kotlin/blitz/Obj.kt +++ b/src/main/kotlin/blitz/Obj.kt @@ -21,10 +21,10 @@ interface Obj { } } -fun Obj?.mapNotNull(transform: (I) -> O): Obj? = +inline fun Obj?.mapNotNull(transform: (I) -> O): Obj? = this?.v?.let { Obj.of(transform(it)) } -fun Obj.map(transform: (I) -> O): Obj = +inline fun Obj.map(transform: (I) -> O): Obj = Obj.of(transform(v)) interface MutObj { diff --git a/src/main/kotlin/blitz/collections/ByteVec.kt b/src/main/kotlin/blitz/collections/ByteVec.kt index ce9ff0c..a5e66d5 100644 --- a/src/main/kotlin/blitz/collections/ByteVec.kt +++ b/src/main/kotlin/blitz/collections/ByteVec.kt @@ -36,6 +36,13 @@ class ByteVec(private val initCap: Int = 0): Vec, ByteBatchSequence { cap = size + amount } + override fun reserve(need: Int, wantIfRealloc: Int) { + if (need > 0 && cap - size >= need) + return + cap = size + wantIfRealloc + array = array.copyOf(cap) + } + override fun popBack(): Byte = array[size - 1].also { reserve(-1) @@ -110,7 +117,7 @@ class ByteVec(private val initCap: Int = 0): Vec, ByteBatchSequence { } override fun pushBack(elem: Byte) { - reserve(8) + reserve(1, 8) array[size] = elem size ++ } diff --git a/src/main/kotlin/blitz/collections/RefVec.kt b/src/main/kotlin/blitz/collections/RefVec.kt new file mode 100644 index 0000000..e92656c --- /dev/null +++ b/src/main/kotlin/blitz/collections/RefVec.kt @@ -0,0 +1,111 @@ +package blitz.collections + +import kotlin.system.measureTimeMillis + +@Suppress("UNCHECKED_CAST") +class RefVec(private val initCap: Int = 0): Vec { + override var size = 0 + private var cap = initCap + private var array: Array? = if (initCap > 0) arrayOfNulls(initCap) else null + + override fun clear() { + size = 0 + if (array == null) return + if (array!!.size <= initCap) { + cap = array!!.size + } else { + cap = 0 + array = null + } + } + + fun copyAsArray(): Array = + array?.copyOfRange(0, size) ?: emptyArray() + + fun copyIntoArray(arr: Array, destOff: Int = 0, startOff: Int = 0) = + array?.copyInto(arr, destOff, startOff, size) + + override fun copy(): RefVec = + RefVec(size).also { + it.array?.let { copyIntoArray(it) } + } + + override fun reserve(amount: Int) { + if (amount > 0 && cap - size >= amount) + return + if (array == null) { + cap = size + amount + array = arrayOfNulls(cap) + } else { + array = array!!.copyOf(size + amount) + cap = size + amount + } + } + + override fun reserve(need: Int, totalIfRealloc: Int) { + if (need > 0 && cap - size >= need) + return + if (array == null) { + cap = size + totalIfRealloc + array = arrayOfNulls(cap) + } else { + array = array!!.copyOf(size + totalIfRealloc) + cap = size + totalIfRealloc + } + } + + override fun popBack(): T = + array!![size - 1].also { + reserve(-1) + size -- + } as T + + override fun get(index: Int): T = + array!![index] as T + + override fun flip() { + array = array?.reversedArray() + } + + override fun pushBack(elem: T) { + reserve(1, 8) + array!![size] = elem + size ++ + } + + override fun iterator(): Iterator = + object : Iterator { + var index = 0 + override fun hasNext(): Boolean = index < size + override fun next(): T { + if (!hasNext()) + throw NoSuchElementException() + return array!![index++] as T + } + } + + override fun toString(): String = + joinToString(prefix = "[", postfix = "]") { it.toString() } + + override fun set(index: Int, value: T) { + array!![index] = value + } + + companion object { + fun from(data: Array) = + RefVec(data.size).also { + it.array?.let { data.copyInto(it) } + it.size += data.size + } + + fun from(data: Iterable) = + RefVec().also { bv -> + data.forEach(bv::pushBack) + } + + fun of(vararg elements: T): RefVec = + RefVec(elements.size).also { + it.array?.let { elements.copyInto(it) } + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/blitz/collections/Vec.kt b/src/main/kotlin/blitz/collections/Vec.kt index 54c493d..a091f26 100644 --- a/src/main/kotlin/blitz/collections/Vec.kt +++ b/src/main/kotlin/blitz/collections/Vec.kt @@ -8,6 +8,9 @@ interface Vec: IndexableSequence { fun copy(): Vec fun reserve(amount: Int) + fun reserve(need: Int, totalIfRealloc: Int) { + reserve(need) + } fun pushBack(elem: T) fun pushBack(elems: Array) { @@ -18,6 +21,10 @@ interface Vec: IndexableSequence { reserve(elems.size) elems.forEach(::pushBack) } + fun pushBack(elems: Vec) { + reserve(elems.size) + elems.forEach(::pushBack) + } fun popBack(): T fun popBack(dest: Array) { diff --git a/src/main/kotlin/blitz/ice/Cooled.kt b/src/main/kotlin/blitz/ice/Cooled.kt deleted file mode 100644 index 57a8971..0000000 --- a/src/main/kotlin/blitz/ice/Cooled.kt +++ /dev/null @@ -1,20 +0,0 @@ -package blitz.ice - -class Cooled(private val of: T): Freezable { - private var frozen = false - - override fun freeze() { - frozen = true - } - - override fun isFrozen(): Boolean { - return frozen - } - - fun getOrNull(): T? = - if (isFrozen()) null else of - - fun use(block: (T) -> R): R? = - if (isFrozen()) null - else block(of) -} \ No newline at end of file diff --git a/src/main/kotlin/blitz/ice/Freezable.kt b/src/main/kotlin/blitz/ice/Freezable.kt deleted file mode 100644 index 96a1aad..0000000 --- a/src/main/kotlin/blitz/ice/Freezable.kt +++ /dev/null @@ -1,10 +0,0 @@ -package blitz.ice - -interface Freezable { - fun freeze() - fun isFrozen(): Boolean -} - -inline fun Freezable.map(block: (Freezable) -> R): R? = - if (isFrozen()) null - else block(this) \ No newline at end of file diff --git a/src/main/kotlin/blitz/parse/JSON.kt b/src/main/kotlin/blitz/parse/JSON.kt index 04d5787..b754376 100644 --- a/src/main/kotlin/blitz/parse/JSON.kt +++ b/src/main/kotlin/blitz/parse/JSON.kt @@ -1,56 +1,65 @@ package blitz.parse import blitz.parse.comb2.* +import org.json.JSONObject +import kotlin.math.min +import kotlin.system.measureNanoTime object JSON { val jsonElement = futureRec { jsonElement: Parser -> - val jsonNum: Parser = floatLit() - .mapValue(::Number) + val jsonNum: Parser = + mapValue(floatLit(), ::Number) - val jsonString: Parser = stringLit() - .mapValue(::Str) + val jsonString: Parser = + mapValue(stringLit(), ::Str) - val jsonArray: Parser = just('[') - .then(jsonElement - .delimitedBy(just(',')) - .mapValue(::Array)) - .thenIgnore(whitespaces()) - .thenIgnore(just(']')) - .mapValue { it.second } + val jsonArray: Parser = + thenIgnore( + thenIgnore( + thenOverwrite(just('['), + mapValue(delimitedBy(jsonElement, just(','))) + { Array(it.toList())}), + whitespaces()), + just(']') + ) val jsonBool: Parser = choose( - seq("true".toList()).mapValue { Bool(true) }, - seq("false".toList()).mapValue { Bool(false) }, + mapValue(seq("true".toList())) { Bool(true) }, + mapValue(seq("false".toList())) { Bool(false) }, ) - val jsonNull: Parser = seq("null".toList()) - .mapValue { Nul() } + val jsonNull: Parser = + mapValue(seq("null".toList())) { Nul() } - val jsonObj: Parser = just('{') - .then( - whitespaces() - .then(stringLit()) - .mapValue { it.second } - .thenIgnore(whitespaces()) - .thenIgnore(just(':')) - .then(jsonElement) - .delimitedBy(just(','))) - .thenIgnore(whitespaces()) - .thenIgnore(just('}')) - .mapValue { Obj(it.second.toMap()) } + val jsonObj: Parser = + mapValue(thenIgnore(thenIgnore(thenOverwrite( + just('{'), + delimitedBy( + then( + thenIgnore( + thenIgnore( + thenOverwrite( + whitespaces(), + stringLit()), + whitespaces()), + just(':')), + jsonElement), + just(','))), + whitespaces()), + just('}'))) { Obj(it.toMap()) } - whitespaces() - .then(choose( + thenIgnore(thenOverwrite( + whitespaces(), + choose( jsonArray, jsonNum, jsonString, jsonObj, jsonBool, jsonNull - )) - .thenIgnore(whitespaces()) - .mapValue { it.second } + )), + whitespaces()) } @@ -99,4 +108,206 @@ object JSON { fun parse(string: String): ParseResult = jsonElement(ParseCtx(string.toList(), 0)) +} + +fun main() { + val json = """ +{ + "clinical_study": { + "brief_summary": { + "textblock": "CLEAR SYNERGY is an international multi center 2x2 randomized placebo controlled trial of" + }, + "brief_title": "CLEAR SYNERGY Neutrophil Substudy", + "overall_status": "Recruiting", + "eligibility": { + "study_pop": { + "textblock": "Patients who are randomized to the drug RCT portion of the CLEAR SYNERGY (OASIS 9) trial" + }, + "minimum_age": "19 Years", + "sampling_method": "Non-Probability Sample", + "gender": "All", + "criteria": { + "textblock": "Inclusion Criteria:" + }, + "healthy_volunteers": "No", + "maximum_age": "110 Years" + }, + "number_of_groups": "2", + "source": "NYU Langone Health", + "location_countries": { + "country": "United States" + }, + "study_design_info": { + "time_perspective": "Prospective", + "observational_model": "Other" + }, + "last_update_submitted_qc": "September 10, 2019", + "intervention_browse": { + "mesh_term": "Colchicine" + }, + "official_title": "Studies on the Effects of Colchicine on Neutrophil Biology in Acute Myocardial Infarction: A Substudy of the CLEAR SYNERGY (OASIS 9) Trial", + "primary_completion_date": { + "type": "Anticipated", + "content": "February 1, 2021" + }, + "sponsors": { + "lead_sponsor": { + "agency_class": "Other", + "agency": "NYU Langone Health" + }, + "collaborator": [ + { + "agency_class": "Other", + "agency": "Population Health Research Institute" + }, + { + "agency_class": "NIH", + "agency": "National Heart, Lung, and Blood Institute (NHLBI)" + } + ] + }, + "overall_official": { + "role": "Principal Investigator", + "affiliation": "NYU School of Medicine", + "last_name": "Binita Shah, MD" + }, + "overall_contact_backup": { + "last_name": "Binita Shah, MD" + }, + "condition_browse": { + "mesh_term": [ + "Myocardial Infarction", + "ST Elevation Myocardial Infarction", + "Infarction" + ] + }, + "overall_contact": { + "phone": "646-501-9648", + "last_name": "Fatmira Curovic", + "email": "fatmira.curovic@nyumc.org" + }, + "responsible_party": { + "responsible_party_type": "Principal Investigator", + "investigator_title": "Assistant Professor of Medicine", + "investigator_full_name": "Binita Shah", + "investigator_affiliation": "NYU Langone Health" + }, + "study_first_submitted_qc": "March 12, 2019", + "start_date": { + "type": "Actual", + "content": "March 4, 2019" + }, + "has_expanded_access": "No", + "study_first_posted": { + "type": "Actual", + "content": "March 14, 2019" + }, + "arm_group": [ + { + "arm_group_label": "Colchicine" + }, + { + "arm_group_label": "Placebo" + } + ], + "primary_outcome": { + "measure": "soluble L-selectin", + "time_frame": "between baseline and 3 months", + "description": "Change in soluble L-selectin between baseline and 3 mo after STEMI in the placebo vs. colchicine groups." + }, + "secondary_outcome": [ + { + "measure": "Other soluble markers of neutrophil activity", + "time_frame": "between baseline and 3 months", + "description": "Other markers of neutrophil activity will be evaluated at baseline and 3 months after STEMI (myeloperoxidase, matrix metalloproteinase-9, neutrophil gelatinase-associated lipocalin, neutrophil elastase, intercellular/vascular cellular adhesion molecules)" + }, + { + "measure": "Markers of systemic inflammation", + "time_frame": "between baseline and 3 months", + "description": "Markers of systemic inflammation will be evaluated at baseline and 3 months after STEMI (high sensitive CRP, IL-1β)" + }, + { + "measure": "Neutrophil-driven responses that may further propagate injury", + "time_frame": "between baseline and 3 months", + "description": "Neutrophil-driven responses that may further propagate injury will be evaluated at baseline and 3 months after STEMI (neutrophil extracellular traps, neutrophil-derived microparticles)" + } + ], + "oversight_info": { + "is_fda_regulated_drug": "No", + "is_fda_regulated_device": "No", + "has_dmc": "No" + }, + "last_update_posted": { + "type": "Actual", + "content": "September 12, 2019" + }, + "id_info": { + "nct_id": "NCT03874338", + "org_study_id": "18-01323", + "secondary_id": "1R01HL146206" + }, + "enrollment": { + "type": "Anticipated", + "content": "670" + }, + "study_first_submitted": "March 12, 2019", + "condition": [ + "Neutrophils.Hypersegmented | Bld-Ser-Plas", + "STEMI - ST Elevation Myocardial Infarction" + ], + "study_type": "Observational", + "required_header": { + "download_date": "ClinicalTrials.gov processed this data on July 19, 2020", + "link_text": "Link to the current ClinicalTrials.gov record.", + "url": "https://clinicaltrials.gov/show/NCT03874338" + }, + "last_update_submitted": "September 10, 2019", + "completion_date": { + "type": "Anticipated", + "content": "February 1, 2022" + }, + "location": { + "contact": { + "phone": "646-501-9648", + "last_name": "Fatmira Curovic", + "email": "fatmira.curovic@nyumc.org" + }, + "facility": { + "address": { + "zip": "10016", + "country": "United States", + "city": "New York", + "state": "New York" + }, + "name": "NYU School of Medicine" + }, + "status": "Recruiting", + "contact_backup": { + "last_name": "Binita Shah, MD" + } + }, + "intervention": { + "intervention_type": "Drug", + "arm_group_label": [ + "Colchicine", + "Placebo" + ], + "description": "Participants in the main CLEAR SYNERGY trial are randomized to colchicine/spironolactone versus placebo in a 2x2 factorial design. The substudy is interested in the evaluation of biospecimens obtained from patients in the colchicine vs placebo group.", + "intervention_name": "Colchicine Pill" + }, + "patient_data": { + "sharing_ipd": "No" + }, + "verification_date": "September 2019" + } +} + """.trimIndent() + + var minAlex = Long.MAX_VALUE + var minJson = Long.MAX_VALUE + while (true) { + minAlex = min(measureNanoTime { JSON.parse(json).a!! }, minAlex) + minJson = min(measureNanoTime { JSONObject(json) }, minJson) + println("alex: $minAlex ns, json-java: $minJson ns ; alex is ${ minJson.toFloat() / minAlex.toFloat() } times as fast as json-java") + } } \ No newline at end of file diff --git a/src/main/kotlin/blitz/parse/NumParse.kt b/src/main/kotlin/blitz/parse/NumParse.kt deleted file mode 100644 index 1bdfd53..0000000 --- a/src/main/kotlin/blitz/parse/NumParse.kt +++ /dev/null @@ -1,43 +0,0 @@ -package blitz.parse - -import blitz.parse.comb.* - -object NumParse { - private val intBase = parser { it.require("0b")?.to(2) } or - parser { it.require("0x")?.to(16) } or - parser { it.require("0o")?.to(8) } or - constantParser(10) - - private val sign = parser { it.require("+")?.to(1) } or - parser { it.require("-")?.to(-1) } or - constantParser(1) - - val int = parser { s -> - s.map(sign)?.map(intBase)?.map { str, (sign, base) -> - val chars = when (base) { - 2 -> "01" - 8 -> "01234567" - 10 -> "0123456789" - 16 -> "0123456789abcdefABCDEF" - else -> error("wtf") - } - str.asLongAs(*chars.toCharArray()) { - it.toLongOrNull(base)?.times(sign) - } - } - } - - val float = parser { s -> - s.map(sign)?.map { str, sign -> - str.asLongAs('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.') { - it.toDoubleOrNull()?.times(sign) - } - } - } -} - -fun parseInt(str: String): Long? = - NumParse.int(Parsable(str))?.second - -fun parseDouble(str: String): Double? = - NumParse.float(Parsable(str))?.second \ No newline at end of file diff --git a/src/main/kotlin/blitz/parse/comb/Parser.kt b/src/main/kotlin/blitz/parse/comb/Parser.kt deleted file mode 100644 index 9491db5..0000000 --- a/src/main/kotlin/blitz/parse/comb/Parser.kt +++ /dev/null @@ -1,109 +0,0 @@ -package blitz.parse.comb - -import blitz.str.collectToString - -data class Parsable( - val str: String, - val loc: Int? = null -) - -typealias Parser = (Parsable) -> Pair? - -fun parser(fn: (Parsable) -> Pair?): Parser = - fn - -fun Parser.trim(): Parser = parser { - it.whitespaces() - .map(this@trim) - ?.whitespaces() -} - -fun constantParser(const: T): Parser = { it to const } - -infix fun Parser.or(other: Parser): Parser = { - this@or(it) ?: other(it) -} - -fun Parsable.spaces(): Parsable { - val new = str.trimStart(' ') - return Parsable(new, loc?.let { it + str.length - new.length }) -} - -fun Parsable.whitespaces(): Parsable { - val new = str.trimStart() - return Parsable(new, loc?.let { it + str.length - new.length }) -} - -fun Parsable.require(what: String): Parsable? { - if (str.startsWith(what)) - return Parsable(str.substring(what.length), loc?.let { it + what.length }) - return null -} - -fun Parsable.untilRequire(c: String, map: (String) -> T?): Pair? { - val before = str.substringBefore(c) - return map(before)?.let { Parsable(str.substringAfter(c), loc?.let { it + before.length }) to it } -} - -fun Parsable.asLongAs(vararg li: Char, map: (String) -> T?): Pair? { - val o = mutableListOf() - for (c in str) { - if (c in li) - o.add(c) - else - break - } - val out = str.substring(o.size) - return map(o.iterator().collectToString())?.let { Parsable(out, loc?.plus(o.size)) to it } -} - -fun Parsable.map(parser: Parser): Pair? = - parser(this) - -fun Pair.map(fn: (Parsable, T) -> Pair?): Pair? = - fn(first, second) - -fun Pair.map(parser: Parser): Pair>? = - map { parsable, a -> - parser(parsable)?.let { r -> - r.first to (a to r.second) - } - } - -fun Pair.mapFirst(fn: (Parsable) -> Parsable): Pair = - fn(first) to second - -fun Pair.mapFirstNullable(fn: (Parsable) -> Parsable?): Pair? = - fn(first)?.let { it to second } - -fun Pair.mapSecond(fn: (T) -> R): Pair = - first to fn(second) - -fun Pair.spaces(): Pair = - mapFirst { it.spaces() } - -fun Pair.whitespaces(): Pair = - mapFirst { it.whitespaces() } - -fun Pair.require(what: String): Pair? = - mapFirstNullable { it.require(what) } - -fun Parsable.array(sep: String, map: (Parsable) -> Pair?): Pair> { - val out = mutableListOf() - - var loc = loc - var curr = str - fun step() = - map(Parsable(curr, loc))?.also { - curr = it.first.str - loc = it.first.loc - } - - while (true) { - val r = step() ?: break - out.add(r.second) - curr = (Parsable(curr, loc).require(sep) ?: break).str - } - - return Parsable(curr, loc) to out -} \ No newline at end of file diff --git a/src/main/kotlin/blitz/parse/comb/Special.kt b/src/main/kotlin/blitz/parse/comb/Special.kt deleted file mode 100644 index 2cc067a..0000000 --- a/src/main/kotlin/blitz/parse/comb/Special.kt +++ /dev/null @@ -1,36 +0,0 @@ -package blitz.parse.comb - -fun Parsable.stringWithEscape(): Pair? { - var escaped = false - var index = 0 - val out = StringBuilder() - for (c in str) { - if (index == 0) { - if (c != '"') - return null - } else { - if (escaped) { - escaped = false - when (c) { - '"' -> out.append('"') - '\\' -> out.append('\\') - 'n' -> out.append('\n') - 'r' -> out.append('\r') - 'b' -> out.append('\b') - 't' -> out.append('\t') - else -> return null - } - } else if (c == '"') - break - else if (c == '\\') - escaped = true - else { - out.append(c) - } - } - index ++ - } - if (escaped) - return null - return Parsable(str.substring(index + 1), loc?.plus(index + 1)) to out.toString() -} \ No newline at end of file diff --git a/src/main/kotlin/blitz/parse/comb2/Parser.kt b/src/main/kotlin/blitz/parse/comb2/Parser.kt index 70806b3..e55f694 100644 --- a/src/main/kotlin/blitz/parse/comb2/Parser.kt +++ b/src/main/kotlin/blitz/parse/comb2/Parser.kt @@ -1,10 +1,8 @@ package blitz.parse.comb2 -import blitz.Either -import blitz.Provider +import blitz.* +import blitz.collections.RefVec import blitz.collections.contents -import blitz.partiallyFlattenA -import blitz.partiallyFlattenB import blitz.str.charsToString data class ParseCtx( @@ -21,175 +19,185 @@ data class ParseError( val message: String?, ) -typealias ParseResult = Either> +typealias ParseResult = Either> typealias Parser = (ParseCtx) -> ParseResult -inline fun Parser.mapValue(crossinline fn: (M) -> O): Parser = - { invoke(it).mapA { fn(it) } } +inline fun mapValue(crossinline self: Parser, crossinline fn: (M) -> O): Parser = + { self(it).mapA { fn(it) } } -inline fun Parser.mapErrors(crossinline fn: (List) -> List): Parser = - { invoke(it).mapB { fn(it) } } +inline fun mapErrors(crossinline self: Parser, crossinline fn: (RefVec) -> RefVec): Parser = + { self(it).mapB { fn(it) } } -fun Parser.then(other: Parser): Parser> = +inline fun then(crossinline self: Parser, crossinline other: Parser): Parser> = { ctx -> - invoke(ctx).mapA { first -> + self(ctx).flatMapA<_,_,Pair> { first -> other.invoke(ctx) .mapA { first to it } - }.partiallyFlattenA() + } } -fun Parser.thenIgnore(other: Parser): Parser = +inline fun thenOverwrite(crossinline self: Parser, crossinline other: Parser): Parser = { ctx -> - invoke(ctx).mapA { first -> + self(ctx).flatMapA<_,_,O> { + other.invoke(ctx) + } + } + +inline fun thenIgnore(crossinline self: Parser, crossinline other: Parser): Parser = + { ctx -> + self(ctx).flatMapA { first -> other.invoke(ctx) .mapA { first } - }.partiallyFlattenA() + } } -fun Parser.orElseVal(value: O): Parser = - orElse { Either.ofA(value) } +inline fun orElseVal(crossinline self: Parser, value: O): Parser = + orElse(self) { Either.ofA(value) } -fun Parser.orNot(): Parser = - orElse { Either.ofA(null) } - -fun Parser.orElse(other: Parser): Parser where O: R = +inline fun orElse(crossinline self: Parser, crossinline other: Parser): Parser where O: R = { - val old = it.copy() - this(it).mapB { err -> - it.loadFrom(old) + val old = it.idx + self(it).mapB { err -> + it.idx = old other.invoke(it) - .mapB { err + it } + .mapB { err.pushBack(it); err } }.partiallyFlattenB() } -fun choose(possible: Iterable>): Parser = +fun choose(possible: Iterable>): Parser = { ctx -> - val errors = mutableListOf() + val errors = RefVec(possible.count()) var res: O? = null for (p in possible) { - val old = ctx.copy() + val old = ctx.idx val t = p.invoke(ctx) if (t.isA) { - res = t.getA() + res = t.a!! break } else { - ctx.loadFrom(old) - errors += t.getB() + ctx.idx = old + errors.pushBack(t.b!!) } } res?.let { Either.ofA(it) } ?: Either.ofB(errors) } -fun choose(vararg possible: Parser): Parser = +fun choose(vararg possible: Parser): Parser = choose(possible.toList()) -fun Parser.repeated(): Parser> = +inline fun repeated(crossinline what: Parser): Parser> = { ctx -> - val out = mutableListOf() + val out = RefVec(0) while (true) { - val old = ctx.copy() - val t = invoke(ctx) + val old = ctx.idx + val t = what(ctx) if (t.isA) { - out += t.getA() + out.pushBack(t.a!!) } else { - ctx.loadFrom(old) + ctx.idx = old break } } Either.ofA(out) } -inline fun Parser.verifyValue(crossinline verif: (O) -> String?): Parser = +inline fun repeatedNoSave(crossinline what: Parser): Parser = { ctx -> - invoke(ctx).mapA> { - verif(it)?.let { Either.ofB(listOf(ParseError(ctx.idx, it))) } - ?: Either.ofA(it) - }.partiallyFlattenA() - } - -inline fun Parser>.verifyValueWithSpan(crossinline fn: (O) -> String?): Parser = - { ctx -> - invoke(ctx).mapA> { (span, v) -> - fn(v)?.let { Either.ofB(listOf(ParseError(span.first, it))) } - ?: Either.ofA(v) - }.partiallyFlattenA() - } - -fun Parser.errIfNull(msg: String = "parser value was null internally"): Parser = - verifyValue { if (it == null) msg else null } - .mapValue { it!! } - -inline fun location(crossinline fn: (Int) -> O): Parser = - { Either.ofA(fn(it.idx)) } - -fun location(): Parser = - location { it } - -fun withSpan(p: Parser): Parser> = - location() - .then(p) - .then(location()) - .mapValue { (beginAndV, end) -> - (beginAndV.first..end) to beginAndV.second - } - -fun value(value: O): Parser = - { Either.ofA(value) } - -fun chain(parsers: List>): Parser> = - { ctx -> - val results = mutableListOf() - val errs = mutableListOf() - for (p in parsers) { - val r = p.invoke(ctx) - if (r.isA) { - results += r.getA() - } else { - errs += r.getB() + while (true) { + val old = ctx.idx + val t = what(ctx) + if (t.isB) { + ctx.idx = old break } } - if (errs.isNotEmpty()) Either.ofB(errs) - else Either.ofA(results) + Either.ofA(Unit) } -fun seq(want: List): Parser> = - chain(want.map(::just)) - -inline fun filter(msg: String, crossinline filter: (I) -> Boolean): Parser = +inline fun verifyValue(crossinline self: Parser, crossinline verif: (O) -> String?): Parser = { ctx -> - if (ctx.idx >= ctx.input.size) { - Either.ofB(listOf(ParseError(ctx.idx, "unexpected end of file"))) - } else { - val i = ctx.input[ctx.idx++] - if (filter(i)) Either.ofA(i) - else Either.ofB(listOf(ParseError(ctx.idx - 1, msg))) + self(ctx).flatMapA<_,_,_> { + verif(it)?.let { Either.ofB(RefVec.of(ParseError(ctx.idx, it))) } + ?: Either.ofA(it) } } -fun just(want: I): Parser = +inline fun verifyValueWithSpan(crossinline self: Parser>, crossinline fn: (O) -> String?): Parser = + { ctx -> + self(ctx).flatMapA<_,_,_> { (span, v) -> + fn(v)?.let { Either.ofB(RefVec.of(ParseError(span.first, it))) } + ?: Either.ofA(v) + } + } + +inline fun location(crossinline fn: (Int) -> O): Parser = + { Either.ofA(fn(it.idx)) } + +inline fun location(): Parser = + location { it } + +inline fun withSpan(crossinline p: Parser): Parser> = + mapValue(then(then(location(), p), location())) { (beginAndV, end) -> + (beginAndV.first..end) to beginAndV.second + } + +inline fun value(value: O): Parser = + { Either.ofA(value) } + +fun chain(parsers: List>): Parser> = + { ctx -> + val results = RefVec(parsers.size) + val errs = RefVec(0) + for (p in parsers) { + val r = p.invoke(ctx) + if (r.isA) { + results.pushBack(r.a!!) + } else { + errs.pushBack(r.b!!) + break + } + } + if (errs.size != 0) Either.ofB(errs) + else Either.ofA(results) + } + +inline fun seq(want: List): Parser> = + chain(want.map(::just)) + +inline fun filter(msg: String, crossinline filter: (I) -> Boolean): Parser = + { ctx -> + if (ctx.idx >= ctx.input.size) { + Either.ofB(RefVec.of(ParseError(ctx.idx, "unexpected end of file"))) + } else { + val i = ctx.input[ctx.idx++] + if (filter(i)) Either.ofA(i) + else Either.ofB(RefVec.of(ParseError(ctx.idx - 1, msg))) + } + } + +inline fun just(want: I): Parser = filter("expected $want") { it == want } -fun oneOf(possible: Iterable): Parser = +inline fun oneOf(possible: Iterable): Parser = filter("expected one of ${possible.contents}") { it in possible } -fun future(prov: Provider>): Parser = +inline fun future(crossinline prov: Provider>): Parser = { prov()(it) } -inline fun futureRec(crossinline fn: (future: Parser) -> Parser): Parser { +inline fun futureRec(fn: (future: Parser) -> Parser): Parser { lateinit var f: Parser - f = fn(future { f }) + f = fn({ f(it) }) return f } /** group values 0 is the entire match */ -fun regex(pattern: Regex, fn: (groups: MatchGroupCollection) -> O): Parser = +fun regex(pattern: Regex, fn: (groups: MatchGroupCollection) -> O): Parser = { ctx -> pattern.matchAt(ctx.input.charsToString(), ctx.idx)?.let { ctx.idx = it.range.last + 1 Either.ofA(fn(it.groups)) - } ?: Either.ofB(listOf( + } ?: Either.ofB(RefVec.of( ParseError(ctx.idx, "regular expression \"$pattern\" does not apply") )) } @@ -197,7 +205,7 @@ fun regex(pattern: Regex, fn: (groups: MatchGroupCollection) -> O): Parser regex(pattern: String, fn: (groups: MatchGroupCollection) -> O): Parser = +fun regex(pattern: String, fn: (groups: MatchGroupCollection) -> O): Parser = regex(Regex(pattern), fn) fun regex(pattern: String) = regex(pattern) { it[0]!!.value } \ No newline at end of file diff --git a/src/main/kotlin/blitz/parse/comb2/Predef.kt b/src/main/kotlin/blitz/parse/comb2/Predef.kt index e8578fe..cbc1052 100644 --- a/src/main/kotlin/blitz/parse/comb2/Predef.kt +++ b/src/main/kotlin/blitz/parse/comb2/Predef.kt @@ -1,67 +1,63 @@ package blitz.parse.comb2 +import blitz.collections.RefVec import blitz.str.charsToString import kotlin.math.absoluteValue import kotlin.math.sign -fun whitespaces(): Parser = - oneOf("\n\t\r\b ".toList()) - .repeated() - .mapValue { it.charsToString() } +fun whitespaces(): Parser = + repeatedNoSave(oneOf("\n\t\r\b ".toList())) fun digit(): Parser = oneOf("0123456789".toList()) -fun uintLit(): Parser = - withSpan(digit().repeated()) - .verifyValueWithSpan { if (it.isEmpty()) "need digits after sign in num lit" else null } - .mapValue { it.charsToString().toUInt() } +fun uintLit(): Parser> = + verifyValueWithSpan(withSpan(repeated(digit()))) + { if (it.size == 0) "need digits after sign in num lit" else null } fun intLit(): Parser = - choose(just('+').mapValue { +1 }, - just('-').mapValue { -1 }, - value(+1)) - .then(uintLit()) - .mapValue { (sign, v) -> sign * v.toInt() } + mapValue(then(choose(mapValue(just('+')) { +1 }, + mapValue(just('-')) { -1 }, + value(+1)), + uintLit())) + { (sign, v) -> sign * v.charsToString().toInt() } fun floatLit(): Parser = - intLit() - .then(just('.') - .then(uintLit()) - .mapValue { it.second } - .orElseVal(0u)) - .mapValue { (pre, post) -> - var p = post.toDouble() - while (p.absoluteValue >= 1) { - p *= 0.1 - } - - (pre.toDouble().absoluteValue + p) * pre.toDouble().sign + mapValue( + then( + thenIgnore( + intLit(), + just('.')), + orElseVal(uintLit(), RefVec.of('0')))) + { (pre, post) -> + var p = post.charsToString().toDouble() + while (p.absoluteValue >= 1) { + p *= 0.1 } + (pre.toDouble().absoluteValue + p) * pre.toDouble().sign + } + fun escapeChar(): Parser = - just('\\').then( - choose(just('"'), + thenOverwrite(just('\\'), + mapErrors(choose(just('"'), just('\''), just('\\'), - just('n').mapValue { '\n' }, - just('r').mapValue { '\r' }, - just('b').mapValue { '\b' }, - just('t').mapValue { '\t' }) - .mapErrors { listOf(ParseError(it.first().loc, "invalid escape sequence")) } - ).mapValue { it.second } + mapValue(just('n')) { '\n' }, + mapValue(just('r')) { '\r' }, + mapValue(just('b')) { '\b' }, + mapValue(just('t')) { '\t' })) + { RefVec.of(ParseError(it[0].loc, "invalid escape sequence")) } + ) fun stringLit(): Parser = - just('"') - .then(choose(escapeChar(), - filter("a") { it != '"' }) - .repeated()) - .thenIgnore(just('"')) - .mapValue { (_, str) -> str.charsToString() } + mapValue(thenIgnore(then(just('"'), + repeated(choose(escapeChar(), + filter("a") { it != '"' }))), + just('"'))) + { (_, str) -> str.charsToString() } -fun Parser.delimitedBy(delim: Parser): Parser> = - thenIgnore(delim) - .repeated() - .then(this) - .mapValue { (a, b) -> a + b } - .orElse(value(listOf())) \ No newline at end of file +inline fun delimitedBy(crossinline self: Parser, crossinline delim: Parser): Parser> = + orElse(mapValue(then(repeated(thenIgnore(self, delim)), self)) + { (a, b) -> a.pushBack(b); a }, + value(RefVec.of())) \ No newline at end of file diff --git a/src/main/kotlin/blitz/str/CharsToString.kt b/src/main/kotlin/blitz/str/CharsToString.kt index 80e0b12..5d17583 100644 --- a/src/main/kotlin/blitz/str/CharsToString.kt +++ b/src/main/kotlin/blitz/str/CharsToString.kt @@ -1,4 +1,13 @@ package blitz.str +import blitz.collections.Vec + fun Collection.charsToString(): String = - String(this.toCharArray()) \ No newline at end of file + String(this.toCharArray()) + +fun Vec.charsToString(): String = + String(CharArray(size) { this[it] }) + +@JvmName("charsToString_VecByte") +fun Vec.charsToString(): String = + String(CharArray(size) { this[it].toInt().toChar() }) \ No newline at end of file