| 1 | #!bin/ysh
|
| 2 | #
|
| 3 | # Usage:
|
| 4 | # demo/url-search-params.ysh <function name>
|
| 5 | #
|
| 6 | # Tested against JavaScript's URLSearchParams. Differences:
|
| 7 | #
|
| 8 | # - JS strings can't represent bytes, so %ff turns into the Unicode replacement char.
|
| 9 | # - YSH turns this into the 0xff byte, denoted as b'\yff'
|
| 10 | # - JS accepts '==' as key="" value="="
|
| 11 | # - In YSH, this is a syntax error.
|
| 12 | # - On the other hand, both JS and YSH agree that =&=&= is 3 empty key value pairs:
|
| 13 | # [["", ""]
|
| 14 | # ["", ""],
|
| 15 | # ["", ""]]
|
| 16 | #
|
| 17 | # Evaluation of "the YSH experience":
|
| 18 | #
|
| 19 | # GOOD:
|
| 20 | #
|
| 21 | # - Eggex is elegant
|
| 22 | # - This code is structured better than the Python stdlib urlparse.py!
|
| 23 | # - This problem is also hard/ugly in JavaScript. They use an extra
|
| 24 | # s=>replace() on top of decodeURIComponent()!
|
| 25 | # - Task files in YSH basically work!
|
| 26 | # - I think this file has a nice structure
|
| 27 | # - It's nice to mix INTERIOR YSH testing and EXTERIOR comparison to node.js
|
| 28 | # - Triple quoted multiline strings are nice!
|
| 29 | #
|
| 30 | # NEEDS WORK:
|
| 31 | #
|
| 32 | # - need Vim syntax highlighting!
|
| 33 | # - e.g. multiline '' strings aren't higlighted
|
| 34 | # - task files need completion
|
| 35 | #
|
| 36 | # - Eggex can use multiline /// syntax, though you can use \ for line continuation
|
| 37 | # - Eggex could use "which" match
|
| 38 | # - m=>group('lit') sorta bothers me, it should be
|
| 39 | # - m.group('lit')
|
| 40 | # - $lit - probably!
|
| 41 | # - with vars(m.groupDict()) { ... }
|
| 42 | # - Alternative to printf -v probably needed, or at least wrap it in the YSH
|
| 43 | # stdlib
|
| 44 | #
|
| 45 | # - ERROR messages for URL parsing should bubble up to the user!
|
| 46 | # - USER code should be able to point out to location info for bad escapes
|
| 47 | # like %f or %0z
|
| 48 | # - I guess we just need an idiom for this?
|
| 49 |
|
| 50 | source $LIB_OSH/task-five.sh
|
| 51 | #source $LIB_YSH/yblocks.ysh
|
| 52 |
|
| 53 | func strFromTwoHex(two_hex) {
|
| 54 | var result
|
| 55 | # TODO: provide alternative to old OSH style!
|
| 56 |
|
| 57 | # Python style would include something like this
|
| 58 | # var i = int(two_hex, 16)
|
| 59 |
|
| 60 | printf -v result "\\x$two_hex"
|
| 61 | return (result)
|
| 62 | }
|
| 63 |
|
| 64 | const Hex = / [0-9 a-f A-F] /
|
| 65 |
|
| 66 | const Quoted = / \
|
| 67 | <capture !['%+']+ as lit> \
|
| 68 | | <capture '+' as plus> \
|
| 69 | | '%' <capture Hex Hex as two_hex> \
|
| 70 | /
|
| 71 |
|
| 72 | func unquote (s) {
|
| 73 | ### Turn strings with %20 into space, etc.
|
| 74 |
|
| 75 | #echo
|
| 76 | #echo "unquote $s"
|
| 77 |
|
| 78 | var pos = 0
|
| 79 | var parts = []
|
| 80 | while (true) {
|
| 81 | var m = s => leftMatch(Quoted, pos=pos)
|
| 82 | if (not m) {
|
| 83 | break
|
| 84 | }
|
| 85 |
|
| 86 | var lit = m => group('lit')
|
| 87 | var plus = m => group('plus')
|
| 88 | var two_hex = m => group('two_hex')
|
| 89 |
|
| 90 | var part
|
| 91 | if (lit) {
|
| 92 | #echo " lit $lit"
|
| 93 | setvar part = lit
|
| 94 | } elif (plus) {
|
| 95 | #echo " plus $plus"
|
| 96 | setvar part = ' '
|
| 97 | } elif (two_hex) {
|
| 98 | #echo " two_hex $two_hex"
|
| 99 | #setvar part = two_hex
|
| 100 |
|
| 101 | setvar part = strFromTwoHex(two_hex)
|
| 102 | }
|
| 103 | call parts->append(part)
|
| 104 |
|
| 105 | setvar pos = m => end(0)
|
| 106 | #echo
|
| 107 | }
|
| 108 | if (pos !== len(s)) {
|
| 109 | error "Unexpected trailing input in unquote"
|
| 110 | }
|
| 111 |
|
| 112 | return (join(parts))
|
| 113 | }
|
| 114 |
|
| 115 | proc js-decode-part(s) {
|
| 116 | nodejs -e '''
|
| 117 |
|
| 118 | var encoded = process.argv[1];
|
| 119 |
|
| 120 | // It does not handle +, because is only for query params, not components?
|
| 121 | // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent
|
| 122 | var encoded = encoded.replace(/\+/g, " ")
|
| 123 |
|
| 124 | var j = JSON.stringify(decodeURIComponent(encoded))
|
| 125 | process.stdout.write(j);
|
| 126 |
|
| 127 | ''' $s
|
| 128 | }
|
| 129 |
|
| 130 | const PART_CASES = [
|
| 131 | 'foo+bar',
|
| 132 | 'foo%23%40',
|
| 133 | # empty key, empty value, invalid % , etc.
|
| 134 | ]
|
| 135 |
|
| 136 | proc test-part() {
|
| 137 | echo hi
|
| 138 |
|
| 139 | #_check ('foo bar' === unquote('foo+bar'))
|
| 140 |
|
| 141 | for s in (PART_CASES) {
|
| 142 | js-decode-part $s | json read (&js)
|
| 143 | echo 'JS'
|
| 144 | pp line (js)
|
| 145 |
|
| 146 | echo 'YSH'
|
| 147 | var y = unquote(s)
|
| 148 | pp line (y)
|
| 149 |
|
| 150 | assert [y === js]
|
| 151 |
|
| 152 | echo
|
| 153 | #break
|
| 154 | }
|
| 155 | }
|
| 156 |
|
| 157 | #
|
| 158 | # Query
|
| 159 | #
|
| 160 |
|
| 161 | # JavaScript allows either side of k=v to be empty, so we match that
|
| 162 | const Tok = / !['&= ']* /
|
| 163 |
|
| 164 | const Pair = / <capture Tok as key> '=' <capture Tok as value> /
|
| 165 |
|
| 166 | const Pairs = / Pair <capture '&' as sep>? /
|
| 167 |
|
| 168 | func URLSearchParams(s) {
|
| 169 | ### Turn k=v&foo=spam+eggs&k=v into a list of pairs
|
| 170 |
|
| 171 | # Loop over matches
|
| 172 | var pos = 0
|
| 173 | #echo Pairs=$Pairs
|
| 174 |
|
| 175 | var pairs = []
|
| 176 | while (true) {
|
| 177 | var m = s => leftMatch(Pairs, pos=pos)
|
| 178 | if (not m) {
|
| 179 | break
|
| 180 | }
|
| 181 | #pp line (m)
|
| 182 | #pp line (m => group(0))
|
| 183 | var k = m => group('key')
|
| 184 | var v = m => group('value')
|
| 185 |
|
| 186 | #pp line (k)
|
| 187 | #pp line (v)
|
| 188 |
|
| 189 | call pairs->append([unquote(k), unquote(v)])
|
| 190 |
|
| 191 | setvar pos = m => end(0)
|
| 192 | #pp line (pos)
|
| 193 |
|
| 194 | var sep = m => group('sep')
|
| 195 | if (not sep) {
|
| 196 | break
|
| 197 | }
|
| 198 | }
|
| 199 | if (pos !== len(s)) {
|
| 200 | error "Unexpected trailing input in URLSearchParams $pos != $[len(s)]"
|
| 201 | }
|
| 202 |
|
| 203 | return (pairs)
|
| 204 | }
|
| 205 |
|
| 206 | proc js-decode-query(s) {
|
| 207 | nodejs -e '''
|
| 208 |
|
| 209 | const u = new URLSearchParams(process.argv[1]);
|
| 210 | //console.log(JSON.stringify(u));
|
| 211 |
|
| 212 | var pairs = []
|
| 213 | for (pair of u) {
|
| 214 | pairs.push(pair)
|
| 215 | }
|
| 216 |
|
| 217 | var j = JSON.stringify(pairs);
|
| 218 |
|
| 219 | //console.log(j):
|
| 220 | process.stdout.write(j);
|
| 221 | ''' $s
|
| 222 | }
|
| 223 |
|
| 224 | const QUERY_CASES = [
|
| 225 | 'k=foo+bar',
|
| 226 | 'key=foo%23%40',
|
| 227 | 'k=v&foo%23=bar+baz+%24%25&k=v',
|
| 228 | 'foo+bar=z',
|
| 229 |
|
| 230 | 'missing_val=&k=',
|
| 231 |
|
| 232 | '=missing_key&=m2',
|
| 233 |
|
| 234 | # This is valid
|
| 235 | '=&=',
|
| 236 | '=&=&',
|
| 237 |
|
| 238 | ]
|
| 239 |
|
| 240 | const OTHER_CASES = [
|
| 241 |
|
| 242 | # JavaScript converts %ff to the Unicode replacement char - its strings can't represent bytes
|
| 243 | 'foo%ffbar=z',
|
| 244 |
|
| 245 | # JavaScript treats = as literal - that seems wrong
|
| 246 | # YSH treating this as an error seems right
|
| 247 | '==',
|
| 248 | ]
|
| 249 |
|
| 250 |
|
| 251 | proc test-query() {
|
| 252 | for s in (QUERY_CASES) {
|
| 253 | #for s in (OTHER_CASES) {
|
| 254 | echo 'INPUT'
|
| 255 | echo " $s"
|
| 256 |
|
| 257 | js-decode-query $s | json read (&js)
|
| 258 | echo 'JS'
|
| 259 | pp line (js)
|
| 260 |
|
| 261 | echo 'YSH'
|
| 262 | var pairs = URLSearchParams(s)
|
| 263 | pp line (pairs)
|
| 264 |
|
| 265 | assert [pairs === js]
|
| 266 |
|
| 267 | echo
|
| 268 | }
|
| 269 | }
|
| 270 |
|
| 271 | proc run-tests() {
|
| 272 | devtools/byo.sh test $0
|
| 273 | }
|
| 274 |
|
| 275 | task-five "$@"
|