From 0b33d90c72030e129f387ed9ea04c75af5604918 Mon Sep 17 00:00:00 2001 From: Timothy Baldridge Date: Fri, 17 Apr 2015 17:13:55 -0600 Subject: [PATCH] Added streaming support for parsers --- pixie/parser.pxi | 45 +++++++++++++++++++++++++- pixie/parser/json.pxi | 4 +++ pixie/stdlib.pxi | 2 +- pixie/vm/libs/ffi.py | 16 +++++---- tests/pixie/tests/parser/test-json.pxi | 15 +++++++++ 5 files changed, 74 insertions(+), 8 deletions(-) diff --git a/pixie/parser.pxi b/pixie/parser.pxi index e1b12a26..29fc3127 100644 --- a/pixie/parser.pxi +++ b/pixie/parser.pxi @@ -1,5 +1,8 @@ (ns pixie.parser - (:require [pixie.stdlib :as s])) + (:require [pixie.stdlib :as s] + [pixie.streams :refer [read IInputStream IInputStream]])) + +(def DEFAULT-BUFFER-SIZE 1024) ;; This file contans a small framework for writing generic parsers in Pixie. Although the generated @@ -310,3 +313,43 @@ (def digits (parse-if (set "1234567890"))) (def whitespace (parse-if #{\newline \return \space \tab})) + + + + +;; Represents an input stream that implements ICursor + +(defn -input-stream-buffer-seq + "Creates a lazy seq of buffers retreived from the input stream" + [is] + (let [buf (gc-buffer DEFAULT-BUFFER-SIZE) + result (try + (read is buf DEFAULT-BUFFER-SIZE) + buf + (catch ex nil))] + (when (s/and result (pos? (count buf))) + (cons buf (lazy-seq (-input-stream-buffer-seq is)))))) + +(defrecord InputStreamSnapshot [buffer-cell idx]) + +(deftype InputStreamCursor [buffer-cell idx] + ICursor + (next! [this] + (if (= (inc idx) (count (first buffer-cell))) + (do (set-field! this :buffer-cell (next buffer-cell)) + (set-field! this :idx 0)) + (set-field! this :idx (inc idx)))) + (current [this] + (let [val (when (s/and buffer (< idx (count (first buffer-cell)))) + (char (nth (first buffer-cell) idx)))] + val)) + (at-end? [this] (= nil buffer-cell)) + (snapshot [this] + (->InputStreamSnapshot buffer-cell idx)) + (rewind! [this snapshot] + (assert (instance? InputStreamSnapshot snapshot) (str "Must provide an input stream snapshot ")) + (set-field! this :buffer-cell (:buffer-cell snapshot)) + (set-field! this :idx (:idx snapshot)))) + +(defn input-stream-cursor [is] + (->InputStreamCursor (-input-stream-buffer-seq is) 0)) diff --git a/pixie/parser/json.pxi b/pixie/parser/json.pxi index 321d71c1..2d0abbab 100644 --- a/pixie/parser/json.pxi +++ b/pixie/parser/json.pxi @@ -109,3 +109,7 @@ (if (failure? result) (println (current c) (snapshot c)) result))) + +(defn read-one [c] + (assert (satisfies? ICursor c)) + ((:ENTRY JSONParser) c)) diff --git a/pixie/stdlib.pxi b/pixie/stdlib.pxi index 48110302..89e4f903 100644 --- a/pixie/stdlib.pxi +++ b/pixie/stdlib.pxi @@ -1234,7 +1234,7 @@ and implements IAssociative, ILookup and IObject." `(= (. self ~field) (. other ~field))) fields))) `(-hash [self] - (throw "not implemented"))] + (hash ~fields))] deftype-decl `(deftype ~nm ~fields ~@default-bodies ~@body)] `(do ~type-from-map ~deftype-decl))) diff --git a/pixie/vm/libs/ffi.py b/pixie/vm/libs/ffi.py index 6fbc53a1..2de857cc 100644 --- a/pixie/vm/libs/ffi.py +++ b/pixie/vm/libs/ffi.py @@ -216,15 +216,16 @@ class Buffer(object.Object): def type(self): return Buffer._type - def __init__(self, size): + def __init__(self, size, auto_free): self._size = size self._used_size = 0 + self._auto_free = auto_free self._buffer = lltype.malloc(rffi.CCHARP.TO, size, flavor="raw") def __del__(self): - #lltype.free(self._buffer, flavor="raw") - pass + if self._auto_free: + lltype.free(self._buffer, flavor="raw") def set_used_size(self, size): self._used_size = size @@ -247,8 +248,7 @@ def capacity(self): def free_data(self): lltype.free(self._buffer, flavor="raw") - - + self._auto_free = False @extend(proto._dispose_BANG_, Buffer) def _dispose_voidp(self): @@ -270,7 +270,11 @@ def _count(self): @as_var("buffer") def buffer(size): - return Buffer(size.int_val()) + return Buffer(size.int_val(), False) + +@as_var("gc-buffer") +def gc_buffer(size): + return Buffer(size.int_val(), True) @as_var("buffer-capacity") def buffer_capacity(buffer): diff --git a/tests/pixie/tests/parser/test-json.pxi b/tests/pixie/tests/parser/test-json.pxi index e79ef6cd..eff5a578 100644 --- a/tests/pixie/tests/parser/test-json.pxi +++ b/tests/pixie/tests/parser/test-json.pxi @@ -1,5 +1,7 @@ (ns pixie.tests.parser.test-json (:require [pixie.test :refer :all] + [pixie.io :refer [slurp open-read]] + [pixie.parser :refer [failure? input-stream-cursor]] [pixie.parser.json :as json])) @@ -31,3 +33,16 @@ "{\"foo\": 42}" {"foo", 42} "{\"foo\": 42, \"bar\":null}" {"foo" 42 "bar" nil})) + +(deftest test-streaming-json-file + (let [filename "tests/pixie/tests/parser/test-json-data.json" + cursor (-> filename + open-read + input-stream-cursor) + streamed-result (json/read-one cursor) + slurped-result (-> "tests/pixie/tests/parser/test-json-data.json" + slurp + json/read-string)] + (assert (not (failure? streamed-result))) + (assert (not (failure? slurped-result))) + (assert= slurped-result streamed-result)))