﻿id	summary	reporter	owner	description	type	status	priority	milestone	component	version	resolution	keywords	cc	difficulty
1448	http-client escaping URI path	Caolan McMahon		"I'm having problems using http-client with some HTTP servers. Noticeably when requesting images from discogs.com.

For example:

{{{#!scheme
(use http-client)

(print
  (call-with-input-request
    ""https://img.discogs.com/dMvk8q681FkVCkhv3qRvTfwlLZk=/fit-in/300x300/filters:strip_icc():format(jpeg):mode_rgb():quality(40)/discogs-images/R-8062430-1454420247-1268.jpeg.jpg""
    #f
    #f))
}}}

Results in:

{{{
Error: (call-with-input-request) Client error: 400 Bad Request: ""https://img.discogs.com/dMvk8q681FkVCkhv3qRvTfwlLZk=/fit-in/300x300/filters:strip_icc():format(jpeg):mode_rgb():quality(40)/discogs-images/R-8062430-1454420247-1268.jpeg.jpg""

	Call history:

	http-client.scm:227: grab-idle-connection!	  
	http-client.scm:173: with-mutex	  
	http-client.scm:126: dynamic-wind	  
	http-client.scm:127: mutex-lock!	  
	http-client.scm:192: take-first-idle-connection!	  
	http-client.scm:179: hash-table-ref/default	  
	http-client.scm:145: open-output-string	  
	http-client.scm:145: ##sys#check-output-port	  
	http-client.scm:145: uri-common#uri-host	  
	http-client.scm:145: ##sys#print	  
	http-client.scm:145: ##sys#write-char-0	  
	http-client.scm:145: uri-common#uri-port	  
	http-client.scm:145: ##sys#print	  
	http-client.scm:145: get-output-string	  
	http-client.scm:129: mutex-unlock!	  
	http-client.scm:718: raise
}}}

But with curl it works and returns a 200. The difference between the requests is that http-client will escape components in the path where curl does not.

curl:

{{{
GET /dMvk8q681FkVCkhv3qRvTfwlLZk=/fit-in/300x300/filters:strip_icc():format(jpeg):mode_rgb():quality(40)/discogs-images/R-8062430-1454420247-1268.jpeg.jpg HTTP/1.1
}}}

http-client:

{{{
GET /dMvk8q681FkVCkhv3qRvTfwlLZk%3D/fit-in/300x300/filters%3Astrip_icc%28%29%3Aformat%28jpeg%29%3Amode_rgb%28%29%3Aquality%2840%29/discogs-images/R-8062430-1454420247-1268.jpeg.jpg HTTP/1.1
}}}

This happens in http-client when it calls update-uri to remove the hostname, scheme, etc. from the URI before writing the request line:

{{{#!scheme
;; RFC1945, 5.1.2: ""The absoluteURI form is only allowed
;; when the request is being made to a proxy.""
;; RFC2616 is a little more regular (hosts MUST accept
;; absoluteURI), but it says ""HTTP/1.1 clients will only
;; generate them in requests to proxies."" (also 5.1.2)
(req-uri (if (http-connection-proxy con)
             req-uri
             (update-uri req-uri host: #f port: #f scheme: #f
                         path: (or (uri-path req-uri) '(/ """")))))
}}}

Calling update-uri from uri-common with the new path causes it to be escaped when later doing uri->string:

{{{
#;1> (use uri-common)

#;2> (define url ""https://img.discogs.com/dMvk8q681FkVCkhv3qRvTfwlLZk=/fit-in/300x300/filters:strip_icc():format(jpeg):mode_rgb():quality(40)/discogs-images/R-8062430-1454420247-1268.jpeg.jpg"")

#;3> (define req-uri (uri-reference url))

#;4> req-uri
#<URI-common: scheme=https port=443 host=""img.discogs.com"" path=(/ ""dMvk8q681FkVCkhv3qRvTfwlLZk="" ""fit-in"" ""300x300"" ""filters:strip_icc():format(jpeg):mode_rgb():quality(40)"" ""discogs-images"" ""R-8062430-1454420247-1268.jpeg.jpg"") query=() fragment=#f>

#;5> (uri->string req-uri)
""https://img.discogs.com/dMvk8q681FkVCkhv3qRvTfwlLZk=/fit-in/300x300/filters:strip_icc():format(jpeg):mode_rgb():quality(40)/discogs-images/R-8062430-1454420247-1268.jpeg.jpg""

#;6> (uri->string (update-uri req-uri path: (uri-path req-uri)))
""https://img.discogs.com/dMvk8q681FkVCkhv3qRvTfwlLZk%3D/fit-in/300x300/filters%3Astrip_icc%28%29%3Aformat%28jpeg%29%3Amode_rgb%28%29%3Aquality%2840%29/discogs-images/R-8062430-1454420247-1268.jpeg.jpg""
}}}

This does not occur when using uri-generic's update-uri procedure:

{{{#!scheme
(use (prefix uri-generic generic:)
     (prefix uri-common common:))

(define url 
  ""https://img.discogs.com/dMvk8q681FkVCkhv3qRvTfwlLZk=/fit-in/300x300/filters:strip_icc():format(jpeg):mode_rgb():quality(40)/discogs-images/R-8062430-1454420247-1268.jpeg.jpg"")

(define req-uri 
  (common:uri-reference url))

(let ((u (common:uri->uri-generic req-uri)))
  (print
    (common:uri->string
      (common:uri-generic->uri
	(generic:update-uri
	  u
	  path: (generic:uri-path u))))))
}}}

My workaround so far is to patch http-client to use the above conversions back and forth between uri-common and uri-generic when updating the path. I'm not sure what the wider implications of this are, but it will now successfully request the discogs image.

{{{
diff -u http-client/http-client.scm http-client2/http-client.scm
--- http-client/http-client.scm	2018-02-28 08:51:09.281253165 +0000
+++ http-client2/http-client.scm	2018-02-27 20:12:57.631852064 +0000
@@ -49,7 +49,7 @@
 (import chicken scheme lolevel)
 (use srfi-1 srfi-13 srfi-18 srfi-69
      ports files extras tcp data-structures posix
-     intarweb uri-common simple-md5 sendfile)
+     intarweb uri-common (prefix uri-generic generic:) simple-md5 sendfile)
 
 ;; Major TODOs:
 ;; * Find a better approach for storing cookies, which does not
@@ -593,8 +593,11 @@
                  ;; generate them in requests to proxies."" (also 5.1.2)
                  (req-uri (if (http-connection-proxy con)
                               req-uri
-                              (update-uri req-uri host: #f port: #f scheme: #f
-                                          path: (or (uri-path req-uri) '(/ """")))))
+                              ;; EDIT: hack to work around escaping of path when requesting discogs images - Caolan 27/02/2018
+                              (let ((ug (uri->uri-generic req-uri)))
+                                (uri-generic->uri
+                                 (generic:update-uri ug host: #f port: #f scheme: #f
+                                                     path: (or (generic:uri-path ug) '(/ """")))))))
                  (request (write-request (update-request req uri: req-uri)))
                  ;; Writer should be prepared to be called several times
                  ;; Maybe try and figure out a good way to use the
}}}"	defect	closed	major	someday	extensions	4.13.0	fixed	http-client		
