Initial commit

This commit is contained in:
heyarne 2020-08-20 22:30:58 +02:00
commit 360550a6a9
3 changed files with 189 additions and 0 deletions

View file

@ -0,0 +1,114 @@
(ns heyarne.vanilla-sky.tiptaps)
;; this file contains the first steps.
;; we want to analyze pictures of cctv cameras. the thing we start with is
;; finding those pictures and loading them so we can manipulate them.
;; for some reason the generic example (using java.net.URL.) from the enlive
;; tutorial does not work, the pages return a 403 Forbidden, which is why we use
;; clj-http and parse the body directly:
(require '[clj-http.client :as http])
(require '[net.cgrand.enlive-html :as html])
(defn fetch-url [url]
(future (html/html-snippet (:body (http/get url)))))
(def some-detail-page
@(fetch-url "https://www.insecam.org/en/view/540433/"))
(def some-camera-image
(->
(html/select some-detail-page [:#image0])
(first)
(html/attr-values :src)
(first)))
;; cool! so we have the url of a camera image we chose randomly by fair dice
;; roll. we'll eventually have to think of a way to get a good camera image
;; dynamically but we can save that for later. for now let's load the
;; image and see what we can do with it.
;; with the image we encountered above, server-side push is implemented
;; using the multipart/mixed-replace header. this means that essentially the
;; connection is kept open and as soon as a complete chunk of data is received,
;; a browser would be replacing the currently displayed image with the new one.
;; we're only interested in the first chunk of data, so we need to figure out
;; how we can close the connection afterwards and discard the other ones.
;; TODO: To read the image the following is done
;; - Find the first boundary
;; - Load all following bytes into a buffer until the buffer appears the next time
;; - Return the buffer
;; NOTE A more elegant method might be this:
;; - Convert the stream into a lazy sequence of bytes
;; - Partition the lazy sequence whenever you find (str "--" boundary)
;; - Select the part of the sequence you want
(defn input->byte-seq [input]
(lazy-seq (let [b (.read input)]
;; -1 marks the end of the stream
(when (not= b -1)
(cons b (input->byte-seq input))))))
(comment
;; Let's test this
(input->byte-seq (java.io.StringReader. "Hello World")))
;; This is a helper function we need later.
(defn find-index
"Returns the index of the first occurence of `el` in `coll` or `nil` if it's
not found."
[el coll]
(first (keep-indexed #(when (= el %2) %1) coll)))
(defn partition-with-seq
"Partitions `coll` every time `sep` appears. The last item returned is
everything that follows after the last time `sep` was found"
[sep coll]
(lazy-seq
(when (seq coll)
(let [idx (find-index sep (partition (count sep) 1 coll))]
(if idx
(cons (take idx coll) (partition-with-seq sep (drop (+ idx (count sep)) coll)))
(list coll))))))
;; if you need a refresher what multipart messages look like:
;; https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html
(defn parse-multipart-alternative [body]
(let [parsed (partition-with-seq (map int [\return \newline \return \newline]) body)]
{:header (apply str (map char (first parsed)))
:body (byte-array (apply concat (rest parsed)))}))
(defn parse-multipart [request]
(let [content-type (get-in request [:headers "Content-Type"])
boundary (str "--" (second (re-find #"boundary=\"(.*?)\"" content-type)) "\r\n")]
;; let's throw in an assert because we have no idea how other servers
;; implement streaming or wether they implement it at all
(assert (some? boundary) "Could not parse multipart/x-mixed-replace boundary")
(with-open [input (:body request)]
;; find indices of the bytes between the first and second boundary; the byte
;; sequence always starts with the boundary, which is why can skip the first
;; byte and have this find the end index
(let [byte-seq (input->byte-seq input)
boundary-seq (map int boundary)]
;; the multipart message is prepended by the boundary, so we discard the
;; first (empty) split
(parse-multipart-alternative (second (partition-with-seq boundary-seq byte-seq)))))))
(def first-multipart-chunk (parse-multipart (http/get some-camera-image {:as :stream})))
;; we need javax to convert the byte array that is contained in the body of the
;; first multipart alternative to a `BufferedImage` that we can use
;; with Clojure2d.
(defn byte-array->image [bs]
(with-open [in (java.io.ByteArrayInputStream. bs)]
(javax.imageio.ImageIO/read in)))
(def img (byte-array->image (:body first-multipart-chunk)))
(require '[clojure2d.core :as c2d])