c

2024-10-28 18:53:53 +01:00
parent 234a682f7e
commit f0b2736af5
18 changed files with 799 additions and 232 deletions
--- a/src/main/kotlin/blitz/parse/JSON.kt
+++ b/src/main/kotlin/blitz/parse/JSON.kt
@@ -1,10 +1,9 @@
 package blitz.parse

 import blitz.collections.RefVec
+import blitz.collections.contents
 import blitz.parse.comb2.*
-import org.json.JSONObject
-import kotlin.math.min
-import kotlin.system.measureNanoTime
+import blitz.unreachable

 object JSON {

@@ -27,8 +26,10 @@ object JSON {
        val jsonArray: Parser<Char, Element> =
            thenIgnore(
                thenIgnore(
-                    thenOverwrite(just('['),
-                        mapValue(delimitedBy(jsonElement, just(',')), Element::newArr)),
+                    thenOverwrite(
+                        thenIgnore(just('['), whitespaces),
+                        mapValue(delimitedBy(jsonElement,
+                            chain(whitespaces, ignoreSeq(","), whitespaces)), Element::newArr)),
                whitespaces),
            just(']')
            )
@@ -91,6 +92,17 @@ object JSON {
            inline fun newObj(v: Map<String, Element>): Element =
                Element(OBJ, _boxed = v)
        }
+
+        override fun toString(): String =
+            when (kind) {
+                NUM -> uncheckedAsNum().toString()
+                BOOL -> uncheckedAsBool().toString()
+                NULL -> "null"
+                ARR -> uncheckedAsArr().contents.toString()
+                STR -> "\"${uncheckedAsStr()}\""
+                OBJ -> uncheckedAsObj().map { "${it.key}: ${it.value}" }.joinToString(prefix = "{", postfix = "}")
+                else -> unreachable()
+            }
    }
    
    inline fun Element.uncheckedAsNum(): Double =
@@ -129,211 +141,6 @@ object JSON {
        return _boxed as Map<String, Element>
    }

-    fun parse(string: String): ParseResult<Element> {
-        val ctx = ParseCtx(string.toList(), 0)
-        val v = jsonElement(ctx)
-        return v
-    }
-}
-
-fun main() {
-    val json = """
-{
- "clinical_study": {
-  "brief_summary": {
-   "textblock": "CLEAR SYNERGY is an international multi center 2x2 randomized placebo controlled trial of"
-  },
-  "brief_title": "CLEAR SYNERGY Neutrophil Substudy",
-  "overall_status": "Recruiting",
-  "eligibility": {
-   "study_pop": {
-    "textblock": "Patients who are randomized to the drug RCT portion of the CLEAR SYNERGY (OASIS 9) trial"
-   },
-   "minimum_age": "19 Years",
-   "sampling_method": "Non-Probability Sample",
-   "gender": "All",
-   "criteria": {
-    "textblock": "Inclusion Criteria:"
-   },
-   "healthy_volunteers": "No",
-   "maximum_age": "110 Years"
-  },
-  "number_of_groups": "2",
-  "source": "NYU Langone Health",
-  "location_countries": {
-   "country": "United States"
-  },
-  "study_design_info": {
-   "time_perspective": "Prospective",
-   "observational_model": "Other"
-  },
-  "last_update_submitted_qc": "September 10, 2019",
-  "intervention_browse": {
-   "mesh_term": "Colchicine"
-  },
-  "official_title": "Studies on the Effects of Colchicine on Neutrophil Biology in Acute Myocardial Infarction: A Substudy of the CLEAR SYNERGY (OASIS 9) Trial",
-  "primary_completion_date": {
-   "type": "Anticipated",
-   "content": "February 1, 2021"
-  },
-  "sponsors": {
-   "lead_sponsor": {
-    "agency_class": "Other",
-    "agency": "NYU Langone Health"
-   },
-   "collaborator": [
-    {
-     "agency_class": "Other",
-     "agency": "Population Health Research Institute"
-    },
-    {
-     "agency_class": "NIH",
-     "agency": "National Heart, Lung, and Blood Institute (NHLBI)"
-    }
-   ]
-  },
-  "overall_official": {
-   "role": "Principal Investigator",
-   "affiliation": "NYU School of Medicine",
-   "last_name": "Binita Shah, MD"
-  },
-  "overall_contact_backup": {
-   "last_name": "Binita Shah, MD"
-  },
-  "condition_browse": {
-   "mesh_term": [
-    "Myocardial Infarction",
-    "ST Elevation Myocardial Infarction",
-    "Infarction"
-   ]
-  },
-  "overall_contact": {
-   "phone": "646-501-9648",
-   "last_name": "Fatmira Curovic",
-   "email": "fatmira.curovic@nyumc.org"
-  },
-  "responsible_party": {
-   "responsible_party_type": "Principal Investigator",
-   "investigator_title": "Assistant Professor of Medicine",
-   "investigator_full_name": "Binita Shah",
-   "investigator_affiliation": "NYU Langone Health"
-  },
-  "study_first_submitted_qc": "March 12, 2019",
-  "start_date": {
-   "type": "Actual",
-   "content": "March 4, 2019"
-  },
-  "has_expanded_access": "No",
-  "study_first_posted": {
-   "type": "Actual",
-   "content": "March 14, 2019"
-  },
-  "arm_group": [
-   {
-    "arm_group_label": "Colchicine"
-   },
-   {
-    "arm_group_label": "Placebo"
-   }
-  ],
-  "primary_outcome": {
-   "measure": "soluble L-selectin",
-   "time_frame": "between baseline and 3 months",
-   "description": "Change in soluble L-selectin between baseline and 3 mo after STEMI in the placebo vs. colchicine groups."
-  },
-  "secondary_outcome": [
-   {
-    "measure": "Other soluble markers of neutrophil activity",
-    "time_frame": "between baseline and 3 months",
-    "description": "Other markers of neutrophil activity will be evaluated at baseline and 3 months after STEMI (myeloperoxidase, matrix metalloproteinase-9, neutrophil gelatinase-associated lipocalin, neutrophil elastase, intercellular/vascular cellular adhesion molecules)"
-   },
-   {
-    "measure": "Markers of systemic inflammation",
-    "time_frame": "between baseline and 3 months",
-    "description": "Markers of systemic inflammation will be evaluated at baseline and 3 months after STEMI (high sensitive CRP, IL-1β)"
-   },
-   {
-    "measure": "Neutrophil-driven responses that may further propagate injury",
-    "time_frame": "between baseline and 3 months",
-    "description": "Neutrophil-driven responses that may further propagate injury will be evaluated at baseline and 3 months after STEMI (neutrophil extracellular traps, neutrophil-derived microparticles)"
-   }
-  ],
-  "oversight_info": {
-   "is_fda_regulated_drug": "No",
-   "is_fda_regulated_device": "No",
-   "has_dmc": "No"
-  },
-  "last_update_posted": {
-   "type": "Actual",
-   "content": "September 12, 2019"
-  },
-  "id_info": {
-   "nct_id": "NCT03874338",
-   "org_study_id": "18-01323",
-   "secondary_id": "1R01HL146206"
-  },
-  "enrollment": {
-   "type": "Anticipated",
-   "content": "670"
-  },
-  "study_first_submitted": "March 12, 2019",
-  "condition": [
-   "Neutrophils.Hypersegmented | Bld-Ser-Plas",
-   "STEMI - ST Elevation Myocardial Infarction"
-  ],
-  "study_type": "Observational",
-  "required_header": {
-   "download_date": "ClinicalTrials.gov processed this data on July 19, 2020",
-   "link_text": "Link to the current ClinicalTrials.gov record.",
-   "url": "https://clinicaltrials.gov/show/NCT03874338"
-  },
-  "last_update_submitted": "September 10, 2019",
-  "completion_date": {
-   "type": "Anticipated",
-   "content": "February 1, 2022"
-  },
-  "location": {
-   "contact": {
-    "phone": "646-501-9648",
-    "last_name": "Fatmira Curovic",
-    "email": "fatmira.curovic@nyumc.org"
-   },
-   "facility": {
-    "address": {
-     "zip": "10016",
-     "country": "United States",
-     "city": "New York",
-     "state": "New York"
-    },
-    "name": "NYU School of Medicine"
-   },
-   "status": "Recruiting",
-   "contact_backup": {
-    "last_name": "Binita Shah, MD"
-   }
-  },
-  "intervention": {
-   "intervention_type": "Drug",
-   "arm_group_label": [
-    "Colchicine",
-    "Placebo"
-   ],
-   "description": "Participants in the main CLEAR SYNERGY trial are randomized to colchicine/spironolactone versus placebo in a 2x2 factorial design. The substudy is interested in the evaluation of biospecimens obtained from patients in the colchicine vs placebo group.",
-   "intervention_name": "Colchicine Pill"
-  },
-  "patient_data": {
-   "sharing_ipd": "No"
-  },
-  "verification_date": "September 2019"
- }
-}
-    """.trimIndent()
-
-    var minAlex = Long.MAX_VALUE
-    var minJson = Long.MAX_VALUE
-    while (true) {
-        minAlex = min(measureNanoTime { JSON.parse(json).a!! }, minAlex)
-        minJson = min(measureNanoTime { JSONObject(json) }, minJson)
-        println("alex: $minAlex ns, json-java: $minJson ns ; alex is ${ minJson.toFloat() / minAlex.toFloat() } times as fast as json-java")
-    }
+    fun parse(string: String): ParseResult<Element> =
+        jsonElement.run(string.toList())
 }
--- a/src/main/kotlin/blitz/parse/comb2/Parser.kt
+++ b/src/main/kotlin/blitz/parse/comb2/Parser.kt
@@ -2,6 +2,8 @@ package blitz.parse.comb2

 import blitz.*
 import blitz.collections.RefVec
+import blitz.collections.containsAt
+import blitz.parse.JSON.jsonElement
 import blitz.str.charsToString

 data class ParseCtx<I>(
@@ -192,9 +194,29 @@ fun <I, O: Any> chain(parsers: List<Parser<I, O>>): Parser<I, RefVec<O>> =
        else Either.ofA(results)
    }

+inline fun <I, O: Any> chain(vararg parsers: Parser<I, O>): Parser<I, RefVec<O>> =
+    chain(parsers.toList())
+
 inline fun <I: Any> seq(want: List<I>): Parser<I, RefVec<I>> =
    chain(want.map(::just))

+inline fun seq(want: String): Parser<Char, RefVec<Char>> =
+    chain(want.map(::just))
+
+inline fun ignoreSeq(want: String): Parser<Char, Unit> =
+    { ctx ->
+        if (ctx.idx >= ctx.input.size) {
+            Either.ofB(ParseError(ctx.idx, "unexpected end of file"))
+        } else {
+            if (ctx.input.containsAt(ctx.idx, want.toList())) {
+                ctx.idx += want.length
+                Either.ofA(Unit)
+            } else {
+                Either.ofB(ParseError(ctx.idx, "expected $want"))
+            }
+        }
+    }
+
 inline fun <I: Any> filter(msg: String, crossinline filter: (I) -> Boolean): Parser<I, I> =
    { ctx ->
        if (ctx.idx >= ctx.input.size) {
@@ -253,4 +275,13 @@ fun regex(pattern: Regex) = regex(pattern) { it[0]!!.value }
 fun <O: Any> regex(pattern: String, fn: (groups: MatchGroupCollection) -> O): Parser<Char, O> =
    regex(Regex(pattern), fn)

-fun regex(pattern: String) = regex(pattern) { it[0]!!.value }
+fun regex(pattern: String) = regex(pattern) { it[0]!!.value }
+
+fun <O: Any> ParseResult<O>.unwrap(): O =
+    flatMap(
+        { it },
+        { throw Exception("at ${it.loc}: ${it.message}") }
+    )
+
+fun <I, O: Any> Parser<I, O>.run(input: List<I>): ParseResult<O> =
+    this(ParseCtx(input, 0))
--- a/src/main/kotlin/blitz/parse/comb2/Predef.kt
+++ b/src/main/kotlin/blitz/parse/comb2/Predef.kt
@@ -14,24 +14,24 @@ val digit: Parser<Char, Char> =
    filter("expected digit") { it >= '0' && it <= '9' }

 val uintLit: Parser<Char, RefVec<Char>> =
-    verifyValueWithSpan(withSpan(repeated(digit)))
-             { if (it.size == 0) "need digits after sign in num lit" else null }
+    verifyValue(repeated(digit))
+    { if (it.size == 0) "need digits after sign in num lit" else null }

-val intLit: Parser<Char, Int> =
-    mapValue(then(choose<Char,Int> {
+val intLit: Parser<Char, Long> =
+    mapValue(then(choose<Char, Int> {
        it(mapValue(just('+')) { +1 })
        it(mapValue(just('-')) { -1 })
        it(value(+1))
     }, uintLit))
-    { (sign, v) -> sign * v.charsToString().toInt() }
+    { (sign, v) -> sign * (v.charsToString().toLongOrNull() ?: Long.MAX_VALUE) }

 val floatLit: Parser<Char, Double> =
    mapValue(
            then(
-                thenIgnore(
-                    intLit,
-                    just('.')),
-                orElseVal(uintLit, RefVec.of('0'))))
+                intLit,
+                orElseVal(
+                    thenOverwrite(just('.'), uintLit),
+                    RefVec.of('0'))))
    { (pre, post) ->
        var p = post.charsToString().toDouble()
        while (p.absoluteValue >= 1) {