This commit is contained in:
alex-s168
2024-10-28 18:53:53 +01:00
parent 234a682f7e
commit f0b2736af5
18 changed files with 799 additions and 232 deletions

View File

@@ -1,10 +1,9 @@
package blitz.parse
import blitz.collections.RefVec
import blitz.collections.contents
import blitz.parse.comb2.*
import org.json.JSONObject
import kotlin.math.min
import kotlin.system.measureNanoTime
import blitz.unreachable
object JSON {
@@ -27,8 +26,10 @@ object JSON {
val jsonArray: Parser<Char, Element> =
thenIgnore(
thenIgnore(
thenOverwrite(just('['),
mapValue(delimitedBy(jsonElement, just(',')), Element::newArr)),
thenOverwrite(
thenIgnore(just('['), whitespaces),
mapValue(delimitedBy(jsonElement,
chain(whitespaces, ignoreSeq(","), whitespaces)), Element::newArr)),
whitespaces),
just(']')
)
@@ -91,6 +92,17 @@ object JSON {
inline fun newObj(v: Map<String, Element>): Element =
Element(OBJ, _boxed = v)
}
override fun toString(): String =
when (kind) {
NUM -> uncheckedAsNum().toString()
BOOL -> uncheckedAsBool().toString()
NULL -> "null"
ARR -> uncheckedAsArr().contents.toString()
STR -> "\"${uncheckedAsStr()}\""
OBJ -> uncheckedAsObj().map { "${it.key}: ${it.value}" }.joinToString(prefix = "{", postfix = "}")
else -> unreachable()
}
}
inline fun Element.uncheckedAsNum(): Double =
@@ -129,211 +141,6 @@ object JSON {
return _boxed as Map<String, Element>
}
fun parse(string: String): ParseResult<Element> {
val ctx = ParseCtx(string.toList(), 0)
val v = jsonElement(ctx)
return v
}
}
fun main() {
val json = """
{
"clinical_study": {
"brief_summary": {
"textblock": "CLEAR SYNERGY is an international multi center 2x2 randomized placebo controlled trial of"
},
"brief_title": "CLEAR SYNERGY Neutrophil Substudy",
"overall_status": "Recruiting",
"eligibility": {
"study_pop": {
"textblock": "Patients who are randomized to the drug RCT portion of the CLEAR SYNERGY (OASIS 9) trial"
},
"minimum_age": "19 Years",
"sampling_method": "Non-Probability Sample",
"gender": "All",
"criteria": {
"textblock": "Inclusion Criteria:"
},
"healthy_volunteers": "No",
"maximum_age": "110 Years"
},
"number_of_groups": "2",
"source": "NYU Langone Health",
"location_countries": {
"country": "United States"
},
"study_design_info": {
"time_perspective": "Prospective",
"observational_model": "Other"
},
"last_update_submitted_qc": "September 10, 2019",
"intervention_browse": {
"mesh_term": "Colchicine"
},
"official_title": "Studies on the Effects of Colchicine on Neutrophil Biology in Acute Myocardial Infarction: A Substudy of the CLEAR SYNERGY (OASIS 9) Trial",
"primary_completion_date": {
"type": "Anticipated",
"content": "February 1, 2021"
},
"sponsors": {
"lead_sponsor": {
"agency_class": "Other",
"agency": "NYU Langone Health"
},
"collaborator": [
{
"agency_class": "Other",
"agency": "Population Health Research Institute"
},
{
"agency_class": "NIH",
"agency": "National Heart, Lung, and Blood Institute (NHLBI)"
}
]
},
"overall_official": {
"role": "Principal Investigator",
"affiliation": "NYU School of Medicine",
"last_name": "Binita Shah, MD"
},
"overall_contact_backup": {
"last_name": "Binita Shah, MD"
},
"condition_browse": {
"mesh_term": [
"Myocardial Infarction",
"ST Elevation Myocardial Infarction",
"Infarction"
]
},
"overall_contact": {
"phone": "646-501-9648",
"last_name": "Fatmira Curovic",
"email": "fatmira.curovic@nyumc.org"
},
"responsible_party": {
"responsible_party_type": "Principal Investigator",
"investigator_title": "Assistant Professor of Medicine",
"investigator_full_name": "Binita Shah",
"investigator_affiliation": "NYU Langone Health"
},
"study_first_submitted_qc": "March 12, 2019",
"start_date": {
"type": "Actual",
"content": "March 4, 2019"
},
"has_expanded_access": "No",
"study_first_posted": {
"type": "Actual",
"content": "March 14, 2019"
},
"arm_group": [
{
"arm_group_label": "Colchicine"
},
{
"arm_group_label": "Placebo"
}
],
"primary_outcome": {
"measure": "soluble L-selectin",
"time_frame": "between baseline and 3 months",
"description": "Change in soluble L-selectin between baseline and 3 mo after STEMI in the placebo vs. colchicine groups."
},
"secondary_outcome": [
{
"measure": "Other soluble markers of neutrophil activity",
"time_frame": "between baseline and 3 months",
"description": "Other markers of neutrophil activity will be evaluated at baseline and 3 months after STEMI (myeloperoxidase, matrix metalloproteinase-9, neutrophil gelatinase-associated lipocalin, neutrophil elastase, intercellular/vascular cellular adhesion molecules)"
},
{
"measure": "Markers of systemic inflammation",
"time_frame": "between baseline and 3 months",
"description": "Markers of systemic inflammation will be evaluated at baseline and 3 months after STEMI (high sensitive CRP, IL-1β)"
},
{
"measure": "Neutrophil-driven responses that may further propagate injury",
"time_frame": "between baseline and 3 months",
"description": "Neutrophil-driven responses that may further propagate injury will be evaluated at baseline and 3 months after STEMI (neutrophil extracellular traps, neutrophil-derived microparticles)"
}
],
"oversight_info": {
"is_fda_regulated_drug": "No",
"is_fda_regulated_device": "No",
"has_dmc": "No"
},
"last_update_posted": {
"type": "Actual",
"content": "September 12, 2019"
},
"id_info": {
"nct_id": "NCT03874338",
"org_study_id": "18-01323",
"secondary_id": "1R01HL146206"
},
"enrollment": {
"type": "Anticipated",
"content": "670"
},
"study_first_submitted": "March 12, 2019",
"condition": [
"Neutrophils.Hypersegmented | Bld-Ser-Plas",
"STEMI - ST Elevation Myocardial Infarction"
],
"study_type": "Observational",
"required_header": {
"download_date": "ClinicalTrials.gov processed this data on July 19, 2020",
"link_text": "Link to the current ClinicalTrials.gov record.",
"url": "https://clinicaltrials.gov/show/NCT03874338"
},
"last_update_submitted": "September 10, 2019",
"completion_date": {
"type": "Anticipated",
"content": "February 1, 2022"
},
"location": {
"contact": {
"phone": "646-501-9648",
"last_name": "Fatmira Curovic",
"email": "fatmira.curovic@nyumc.org"
},
"facility": {
"address": {
"zip": "10016",
"country": "United States",
"city": "New York",
"state": "New York"
},
"name": "NYU School of Medicine"
},
"status": "Recruiting",
"contact_backup": {
"last_name": "Binita Shah, MD"
}
},
"intervention": {
"intervention_type": "Drug",
"arm_group_label": [
"Colchicine",
"Placebo"
],
"description": "Participants in the main CLEAR SYNERGY trial are randomized to colchicine/spironolactone versus placebo in a 2x2 factorial design. The substudy is interested in the evaluation of biospecimens obtained from patients in the colchicine vs placebo group.",
"intervention_name": "Colchicine Pill"
},
"patient_data": {
"sharing_ipd": "No"
},
"verification_date": "September 2019"
}
}
""".trimIndent()
var minAlex = Long.MAX_VALUE
var minJson = Long.MAX_VALUE
while (true) {
minAlex = min(measureNanoTime { JSON.parse(json).a!! }, minAlex)
minJson = min(measureNanoTime { JSONObject(json) }, minJson)
println("alex: $minAlex ns, json-java: $minJson ns ; alex is ${ minJson.toFloat() / minAlex.toFloat() } times as fast as json-java")
}
fun parse(string: String): ParseResult<Element> =
jsonElement.run(string.toList())
}

View File

@@ -2,6 +2,8 @@ package blitz.parse.comb2
import blitz.*
import blitz.collections.RefVec
import blitz.collections.containsAt
import blitz.parse.JSON.jsonElement
import blitz.str.charsToString
data class ParseCtx<I>(
@@ -192,9 +194,29 @@ fun <I, O: Any> chain(parsers: List<Parser<I, O>>): Parser<I, RefVec<O>> =
else Either.ofA(results)
}
inline fun <I, O: Any> chain(vararg parsers: Parser<I, O>): Parser<I, RefVec<O>> =
chain(parsers.toList())
inline fun <I: Any> seq(want: List<I>): Parser<I, RefVec<I>> =
chain(want.map(::just))
inline fun seq(want: String): Parser<Char, RefVec<Char>> =
chain(want.map(::just))
inline fun ignoreSeq(want: String): Parser<Char, Unit> =
{ ctx ->
if (ctx.idx >= ctx.input.size) {
Either.ofB(ParseError(ctx.idx, "unexpected end of file"))
} else {
if (ctx.input.containsAt(ctx.idx, want.toList())) {
ctx.idx += want.length
Either.ofA(Unit)
} else {
Either.ofB(ParseError(ctx.idx, "expected $want"))
}
}
}
inline fun <I: Any> filter(msg: String, crossinline filter: (I) -> Boolean): Parser<I, I> =
{ ctx ->
if (ctx.idx >= ctx.input.size) {
@@ -253,4 +275,13 @@ fun regex(pattern: Regex) = regex(pattern) { it[0]!!.value }
fun <O: Any> regex(pattern: String, fn: (groups: MatchGroupCollection) -> O): Parser<Char, O> =
regex(Regex(pattern), fn)
fun regex(pattern: String) = regex(pattern) { it[0]!!.value }
fun regex(pattern: String) = regex(pattern) { it[0]!!.value }
fun <O: Any> ParseResult<O>.unwrap(): O =
flatMap(
{ it },
{ throw Exception("at ${it.loc}: ${it.message}") }
)
fun <I, O: Any> Parser<I, O>.run(input: List<I>): ParseResult<O> =
this(ParseCtx(input, 0))

View File

@@ -14,24 +14,24 @@ val digit: Parser<Char, Char> =
filter("expected digit") { it >= '0' && it <= '9' }
val uintLit: Parser<Char, RefVec<Char>> =
verifyValueWithSpan(withSpan(repeated(digit)))
{ if (it.size == 0) "need digits after sign in num lit" else null }
verifyValue(repeated(digit))
{ if (it.size == 0) "need digits after sign in num lit" else null }
val intLit: Parser<Char, Int> =
mapValue(then(choose<Char,Int> {
val intLit: Parser<Char, Long> =
mapValue(then(choose<Char, Int> {
it(mapValue(just('+')) { +1 })
it(mapValue(just('-')) { -1 })
it(value(+1))
}, uintLit))
{ (sign, v) -> sign * v.charsToString().toInt() }
{ (sign, v) -> sign * (v.charsToString().toLongOrNull() ?: Long.MAX_VALUE) }
val floatLit: Parser<Char, Double> =
mapValue(
then(
thenIgnore(
intLit,
just('.')),
orElseVal(uintLit, RefVec.of('0'))))
intLit,
orElseVal(
thenOverwrite(just('.'), uintLit),
RefVec.of('0'))))
{ (pre, post) ->
var p = post.charsToString().toDouble()
while (p.absoluteValue >= 1) {