/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ /* * Copyright (C) 2001-2003, Ximian, Inc. */ #include #include #include #include #include #include #include #include #include #ifdef G_OS_WIN32 #include #define mkdir(path, mode) _mkdir (path) #endif SoupSession *session; GMainLoop *loop; gboolean recurse = FALSE, debug = FALSE; const char *method = SOUP_METHOD_GET; char *base; SoupUri *base_uri; int pending; GHashTable *fetched_urls; static GPtrArray * find_hrefs (const SoupUri *base, const char *body, int length) { GPtrArray *hrefs = g_ptr_array_new (); char *buf = g_strndup (body, length); char *start = buf, *end; char *href, *frag; SoupUri *uri; while ((start = strstr (start, "href"))) { start += 4; while (isspace ((unsigned char) *start)) start++; if (*start++ != '=') continue; while (isspace ((unsigned char) *start)) start++; if (*start++ != '"') continue; end = strchr (start, '"'); if (!end) break; href = g_strndup (start, end - start); start = end; frag = strchr (href, '#'); if (frag) *frag = '\0'; uri = soup_uri_new_with_base (base, href); g_free (href); if (!uri) continue; if (base->protocol != uri->protocol || base->port != uri->port || g_ascii_strcasecmp (base->host, uri->host) != 0) { soup_uri_free (uri); continue; } if (strncmp (base->path, uri->path, strlen (base->path)) != 0) { soup_uri_free (uri); continue; } g_ptr_array_add (hrefs, soup_uri_to_string (uri, FALSE)); soup_uri_free (uri); } g_free (buf); return hrefs; } static void mkdirs (const char *path) { char *slash; for (slash = strchr (path, '/'); slash; slash = strchr (slash + 1, '/')) { *slash = '\0'; if (*path && mkdir (path, 0755) == -1 && errno != EEXIST) { fprintf (stderr, "Could not create '%s'\n", path); g_main_loop_quit (loop); return; } *slash = '/'; } } static void print_header (gpointer name, gpointer value, gpointer data) { printf ("%s: %s\n", (const char *)name, (const char *)value); } static void get_url (const char *url) { char *url_to_get, *slash, *name; SoupMessage *msg; int fd, i; SoupUri *uri; GPtrArray *hrefs; const char *header; if (strncmp (url, base, strlen (base)) != 0) return; if (strchr (url, '?') && strcmp (url, base) != 0) return; slash = strrchr (url, '/'); if (slash && !slash[1]) url_to_get = g_strdup_printf ("%sindex.html", url); else url_to_get = g_strdup (url); if (g_hash_table_lookup (fetched_urls, url_to_get)) return; g_hash_table_insert (fetched_urls, url_to_get, url_to_get); if (recurse) { /* See if we're already downloading it, and create the * file if not. */ name = url_to_get + strlen (base); if (*name == '/') name++; if (access (name, F_OK) == 0) return; mkdirs (name); fd = open (name, O_WRONLY | O_CREAT | O_TRUNC, 0644); close (fd); } msg = soup_message_new (method, url_to_get); soup_message_set_flags (msg, SOUP_MESSAGE_NO_REDIRECT); soup_session_send_message (session, msg); name = soup_message_get_uri (msg)->path; if (strncmp (base_uri->path, name, strlen (base_uri->path)) != 0) { fprintf (stderr, " Error: not under %s\n", base_uri->path); return; } if (debug) { char *path = soup_uri_to_string (soup_message_get_uri (msg), TRUE); printf ("%s %s HTTP/1.%d\n\n", method, path, soup_message_get_http_version (msg)); printf ("HTTP/1.%d %d %s\n", soup_message_get_http_version (msg), msg->status_code, msg->reason_phrase); soup_message_foreach_header (msg->response_headers, print_header, NULL); printf ("\n"); } else printf ("%s: %d %s\n", name, msg->status_code, msg->reason_phrase); name += strlen (base_uri->path); if (*name == '/') name++; if (SOUP_STATUS_IS_REDIRECTION (msg->status_code)) { if (recurse) unlink (name); header = soup_message_get_header (msg->response_headers, "Location"); if (header) { if (!debug) printf (" -> %s\n", header); get_url (header); } return; } if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code)) return; if (recurse) fd = open (name, O_WRONLY | O_CREAT | O_TRUNC, 0644); else fd = STDOUT_FILENO; write (fd, msg->response.body, msg->response.length); if (!recurse) return; close (fd); header = soup_message_get_header (msg->response_headers, "Content-Type"); if (header && g_ascii_strncasecmp (header, "text/html", 9) != 0) return; uri = soup_uri_new (url); hrefs = find_hrefs (uri, msg->response.body, msg->response.length); soup_uri_free (uri); for (i = 0; i < hrefs->len; i++) { get_url (hrefs->pdata[i]); g_free (hrefs->pdata[i]); } g_ptr_array_free (hrefs, TRUE); } static void usage (void) { fprintf (stderr, "Usage: get [-c CAfile] [-p proxy URL] [-r] [-h] [-d] URL\n"); exit (1); } int main (int argc, char **argv) { const char *cafile = NULL; SoupUri *proxy = NULL; gboolean synchronous = FALSE; int opt; g_type_init (); g_thread_init (NULL); while ((opt = getopt (argc, argv, "c:dhp:rs")) != -1) { switch (opt) { case 'c': cafile = optarg; break; case 'd': debug = TRUE; break; case 'h': method = SOUP_METHOD_HEAD; debug = TRUE; break; case 'p': proxy = soup_uri_new (optarg); if (!proxy) { fprintf (stderr, "Could not parse %s as URI\n", optarg); exit (1); } break; case 'r': recurse = TRUE; break; case 's': synchronous = TRUE; break; case '?': usage (); break; } } argc -= optind; argv += optind; if (argc != 1) usage (); base = argv[0]; base_uri = soup_uri_new (base); if (!base_uri) { fprintf (stderr, "Could not parse '%s' as a URL\n", base); exit (1); } fetched_urls = g_hash_table_new (g_str_hash, g_str_equal); if (synchronous) { session = soup_session_sync_new_with_options ( SOUP_SESSION_SSL_CA_FILE, cafile, SOUP_SESSION_PROXY_URI, proxy, NULL); } else { session = soup_session_async_new_with_options ( SOUP_SESSION_SSL_CA_FILE, cafile, SOUP_SESSION_PROXY_URI, proxy, NULL); } if (recurse) { char *outdir; outdir = g_strdup_printf ("%lu", (unsigned long)getpid ()); if (mkdir (outdir, 0755) != 0) { fprintf (stderr, "Could not make output directory\n"); exit (1); } printf ("Output directory is '%s'\n", outdir); chdir (outdir); g_free (outdir); } if (!synchronous) loop = g_main_loop_new (NULL, TRUE); get_url (base); if (!synchronous) g_main_loop_unref (loop); soup_uri_free (base_uri); return 0; }