emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

master 2ebede7d75: Fix srcset parsing in shr


From: Lars Ingebrigtsen
Subject: master 2ebede7d75: Fix srcset parsing in shr
Date: Sat, 23 Jul 2022 01:43:59 -0400 (EDT)

branch: master
commit 2ebede7d7534ef22816d973e2a42613cecfd5b53
Author: Lars Ingebrigtsen <larsi@gnus.org>
Commit: Lars Ingebrigtsen <larsi@gnus.org>

    Fix srcset parsing in shr
    
    * lisp/net/shr.el (shr--preferred-image): Factor out srcset
    parsing for easier testing.
    (shr--parse-srcset): Don't misparse URLs with commas in them.
---
 lisp/net/shr.el            | 72 +++++++++++++++++++++++++++-------------------
 test/lisp/net/shr-tests.el | 15 ++++++++++
 2 files changed, 58 insertions(+), 29 deletions(-)

diff --git a/lisp/net/shr.el b/lisp/net/shr.el
index 476c7017e6..52e4389954 100644
--- a/lisp/net/shr.el
+++ b/lisp/net/shr.el
@@ -1740,35 +1740,13 @@ BASE is the URL of the HTML being rendered."
     shr-cookie-policy)))
 
 (defun shr--preferred-image (dom)
-  (let ((srcset (dom-attr dom 'srcset))
-        (frame-width (frame-pixel-width))
-        (width (string-to-number (or (dom-attr dom 'width) "100")))
-        candidate)
-    (when (> (length srcset) 0)
-      ;; srcset consist of a series of URL/size specifications
-      ;; separated by the ", " string.
-      (setq srcset
-            (sort (mapcar
-                   (lambda (elem)
-                     (let ((spec (split-string elem "[\t\n\r ]+")))
-                       (cond
-                        ((= (length spec) 1)
-                         ;; Make sure it's well formed.
-                         (list (car spec) 0))
-                        ((string-match "\\([0-9]+\\)x\\'" (cadr spec))
-                         ;; If we have an "x" form, then use the width
-                         ;; spec to compute the real width.
-                         (list (car spec)
-                               (* width (string-to-number
-                                         (match-string 1 (cadr spec))))))
-                        (t
-                         (list (car spec)
-                               (string-to-number (cadr spec)))))))
-                   (split-string (replace-regexp-in-string
-                                 "\\`[\t\n\r ]+\\|[\t\n\r ]+\\'" "" srcset)
-                                "[\t\n\r ]*,[\t\n\r ]*"))
-                  (lambda (e1 e2)
-                    (> (cadr e1) (cadr e2)))))
+  (let* ((srcset (shr--parse-srcset (dom-attr dom 'srcset)
+                                    (and (dom-attr dom 'width)
+                                         (string-to-number
+                                          (dom-attr dom 'width)))))
+         (frame-width (frame-pixel-width))
+         candidate)
+    (when (length> srcset 0)
       ;; Choose the smallest picture that's bigger than the current
       ;; frame.
       (setq candidate (caar srcset))
@@ -1778,6 +1756,42 @@ BASE is the URL of the HTML being rendered."
         (pop srcset)))
     (or candidate (dom-attr dom 'src))))
 
+(defun shr--parse-srcset (srcset &optional width)
+  (setq srcset (string-trim srcset)
+        width (or width 100))
+  (when (> (length srcset) 0)
+    ;; srcset consists of a series of URL/size specifications separated
+    ;; by the " ," string.
+    (sort (mapcar
+           (lambda (elem)
+             (let ((spec (split-string elem "[\t\n\r ]+")))
+               (cond
+                ((= (length spec) 1)
+                 ;; Make sure it's well formed.
+                 (list (car spec) 0))
+                ((string-match "\\([0-9]+\\)x\\'" (cadr spec))
+                 ;; If we have an "x" form, then use the width
+                 ;; spec to compute the real width.
+                 (list (car spec)
+                       (* width (string-to-number
+                                 (match-string 1 (cadr spec))))))
+                (t
+                 (list (car spec)
+                       (string-to-number (cadr spec)))))))
+           (with-temp-buffer
+             (insert srcset)
+             (goto-char (point-min))
+             (let ((bits nil))
+               (while (re-search-forward "[^\t\n\r ]+[\t\n\r ]+[^\t\n\r ,]+"
+                                         nil t)
+                 (push (match-string 0) bits)
+                 (if (looking-at "[\t\n\r ]*,[\t\n\r ]*")
+                     (goto-char (match-end 0))
+                   (goto-char (point-max))))
+               bits)))
+          (lambda (e1 e2)
+            (> (cadr e1) (cadr e2))))))
+
 (defun shr-string-number (string)
   (if (null string)
       nil
diff --git a/test/lisp/net/shr-tests.el b/test/lisp/net/shr-tests.el
index 821ca5ca63..2254f9bc86 100644
--- a/test/lisp/net/shr-tests.el
+++ b/test/lisp/net/shr-tests.el
@@ -67,6 +67,21 @@
     (should-not
      (shr--use-cookies-p "http://www.gnu.org"; '("http://www.fsf.org";)))))
 
+(ert-deftest shr-srcset ()
+  (should (equal (shr--parse-srcset "") nil))
+
+  (should (equal (shr--parse-srcset "a 10w, b 20w")
+                 '(("b" 20) ("a" 10))))
+
+  (should (equal (shr--parse-srcset "a 10w b 20w")
+                 '(("a" 10))))
+
+  (should (equal (shr--parse-srcset "https://example.org/1\n\n 10w , 
https://example.org/2 20w      ")
+                '(("https://example.org/2"; 20) ("https://example.org/1"; 10))))
+
+  (should (equal (shr--parse-srcset "https://example.org/1,2\n\n 10w , 
https://example.org/2 20w      ")
+                '(("https://example.org/2"; 20) ("https://example.org/1,2"; 
10)))))
+
 (require 'shr)
 
 ;;; shr-tests.el ends here



reply via email to

[Prev in Thread] Current Thread [Next in Thread]