>From 57327307c1ce56f0478e95bb32b8818ec0d9aa78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81ngel=20Gonz=C3=A1lez?=
Date: Mon, 16 Sep 2013 01:33:40 +0200
Subject: [PATCH 2/2] Expose wget functionality for extracting links from a web
page.
Provided by a new program called get-urls
---
src/get-urls.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)
create mode 100644 src/get-urls.c
diff --git a/src/get-urls.c b/src/get-urls.c
new file mode 100644
index 0000000..9393a62
--- /dev/null
+++ b/src/get-urls.c
@@ -0,0 +1,75 @@
+#include "wget.h"
+
+#include
+#include
+
+#include "url.h"
+#include "convert.h"
+#include "html-url.h"
+#include "css-url.h"
+
+
+void
+print_urls (const char *file, const char *url, bool is_css)
+{
+ bool meta_disallow_follow = false; /* Output value */
+ struct urlpos *child;
+ struct urlpos *children
+ = is_css ? get_urls_css_file (file, url) :
+ get_urls_html (file, url, &meta_disallow_follow, NULL);
+
+ printf ("# %s\n", url);
+
+ child = children;
+ for (; child; child = child->next)
+ {
+ printf ("%s #", child->url->url);
+
+ if (child->ignore_when_downloading)
+ printf(" ignore");
+ if (child->link_relative_p)
+ printf(" relative");
+ if (child->link_complete_p)
+ printf(" complete");
+ if (child->link_base_p)
+ printf(" base");
+ if (child->link_inline_p)
+ printf(" inline");
+ if (child->link_css_p)
+ printf(" fromcss");
+ if (child->link_expect_html)
+ printf(" html");
+ if (child->link_expect_css)
+ printf(" css");
+ if (child->link_refresh_p)
+ printf(" refresh");
+ printf("\n");
+ }
+
+ free_urlpos (children);
+}
+
+const char *exec_name = "get-urls";
+struct options opt;
+
+int
+main (int argc, char *argv[])
+{
+ bool is_css = false;
+
+ if (argc > 1 && !strcmp("--css", argv[1]))
+ {
+ is_css = true;
+ argc--;
+ argv++;
+ }
+
+ if (argc < 2)
+ {
+ fprintf(stderr, _("Usage: %s [--css] file original-URL\n"), exec_name);
+ return 1;
+ }
+
+ print_urls (argv[1], argv[2], is_css);
+ return 0;
+}
--
1.8.4