/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
* Copyright (C) 2001-2003, Ximian, Inc.
*/
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
#include <libsoup/soup.h>
#ifdef G_OS_WIN32
#include <io.h>
#define mkdir(path, mode) _mkdir (path)
#endif
SoupSession *session;
GMainLoop *loop;
gboolean recurse = FALSE, debug = FALSE;
const char *method = SOUP_METHOD_GET;
char *base;
SoupUri *base_uri;
int pending;
GHashTable *fetched_urls;
static GPtrArray *
find_hrefs (const SoupUri *base, const char *body, int length)
{
GPtrArray *hrefs = g_ptr_array_new ();
char *buf = g_strndup (body, length);
char *start = buf, *end;
char *href, *frag;
SoupUri *uri;
while ((start = strstr (start, "href"))) {
start += 4;
while (isspace ((unsigned char) *start))
start++;
if (*start++ != '=')
continue;
while (isspace ((unsigned char) *start))
start++;
if (*start++ != '"')
continue;
end = strchr (start, '"');
if (!end)
break;
href = g_strndup (start, end - start);
start = end;
frag = strchr (href, '#');
if (frag)
*frag = '\0';
uri = soup_uri_new_with_base (base, href);
g_free (href);
if (!uri)
continue;
if (base->protocol != uri->protocol ||
base->port != uri->port ||
g_ascii_strcasecmp (base->host, uri->host) != 0) {
soup_uri_free (uri);
continue;
}
if (strncmp (base->path, uri->path, strlen (base->path)) != 0) {
soup_uri_free (uri);
continue;
}
g_ptr_array_add (hrefs, soup_uri_to_string (uri, FALSE));
soup_uri_free (uri);
}
g_free (buf);
return hrefs;
}
static void
mkdirs (const char *path)
{
char *slash;
for (slash = strchr (path, '/'); slash; slash = strchr (slash + 1, '/')) {
*slash = '\0';
if (*path && mkdir (path, 0755) == -1 && errno != EEXIST) {
fprintf (stderr, "Could not create '%s'\n", path);
g_main_loop_quit (loop);
return;
}
*slash = '/';
}
}
static void
print_header (gpointer name, gpointer value, gpointer data)
{
printf ("%s: %s\n", (const char *)name, (const char *)value);
}
static void
get_url (const char *url)
{
char *url_to_get, *slash, *name;
SoupMessage *msg;
int fd, i;
SoupUri *uri;
GPtrArray *hrefs;
const char *header;
if (strncmp (url, base, strlen (base)) != 0)
return;
if (strchr (url, '?') && strcmp (url, base) != 0)
return;
slash = strrchr (url, '/');
if (slash && !slash[1])
url_to_get = g_strdup_printf ("%sindex.html", url);
else
url_to_get = g_strdup (url);
if (g_hash_table_lookup (fetched_urls, url_to_get))
return;
g_hash_table_insert (fetched_urls, url_to_get, url_to_get);
if (recurse) {
/* See if we're already downloading it, and create the
* file if not.
*/
name = url_to_get + strlen (base);
if (*name == '/')
name++;
if (access (name, F_OK) == 0)
return;
mkdirs (name);
fd = open (name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
close (fd);
}
msg = soup_message_new (method, url_to_get);
soup_message_set_flags (msg, SOUP_MESSAGE_NO_REDIRECT);
soup_session_send_message (session, msg);
name = soup_message_get_uri (msg)->path;
if (strncmp (base_uri->path, name, strlen (base_uri->path)) != 0) {
fprintf (stderr, " Error: not under %s\n", base_uri->path);
return;
}
if (debug) {
char *path = soup_uri_to_string (soup_message_get_uri (msg), TRUE);
printf ("%s %s HTTP/1.%d\n\n", method, path,
soup_message_get_http_version (msg));
printf ("HTTP/1.%d %d %s\n",
soup_message_get_http_version (msg),
msg->status_code, msg->reason_phrase);
soup_message_foreach_header (msg->response_headers, print_header, NULL);
printf ("\n");
} else
printf ("%s: %d %s\n", name, msg->status_code, msg->reason_phrase);
name += strlen (base_uri->path);
if (*name == '/')
name++;
if (SOUP_STATUS_IS_REDIRECTION (msg->status_code)) {
if (recurse)
unlink (name);
header = soup_message_get_header (msg->response_headers, "Location");
if (header) {
if (!debug)
printf (" -> %s\n", header);
get_url (header);
}
return;
}
if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code))
return;
if (recurse)
fd = open (name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
else
fd = STDOUT_FILENO;
write (fd, msg->response.body, msg->response.length);
if (!recurse)
return;
close (fd);
header = soup_message_get_header (msg->response_headers, "Content-Type");
if (header && g_ascii_strncasecmp (header, "text/html", 9) != 0)
return;
uri = soup_uri_new (url);
hrefs = find_hrefs (uri, msg->response.body, msg->response.length);
soup_uri_free (uri);
for (i = 0; i < hrefs->len; i++) {
get_url (hrefs->pdata[i]);
g_free (hrefs->pdata[i]);
}
g_ptr_array_free (hrefs, TRUE);
}
static void
usage (void)
{
fprintf (stderr, "Usage: get [-c CAfile] [-p proxy URL] [-r] [-h] [-d] URL\n");
exit (1);
}
int
main (int argc, char **argv)
{
const char *cafile = NULL;
SoupUri *proxy = NULL;
gboolean synchronous = FALSE;
int opt;
g_type_init ();
g_thread_init (NULL);
while ((opt = getopt (argc, argv, "c:dhp:rs")) != -1) {
switch (opt) {
case 'c':
cafile = optarg;
break;
case 'd':
debug = TRUE;
break;
case 'h':
method = SOUP_METHOD_HEAD;
debug = TRUE;
break;
case 'p':
proxy = soup_uri_new (optarg);
if (!proxy) {
fprintf (stderr, "Could not parse %s as URI\n",
optarg);
exit (1);
}
break;
case 'r':
recurse = TRUE;
break;
case 's':
synchronous = TRUE;
break;
case '?':
usage ();
break;
}
}
argc -= optind;
argv += optind;
if (argc != 1)
usage ();
base = argv[0];
base_uri = soup_uri_new (base);
if (!base_uri) {
fprintf (stderr, "Could not parse '%s' as a URL\n", base);
exit (1);
}
fetched_urls = g_hash_table_new (g_str_hash, g_str_equal);
if (synchronous) {
session = soup_session_sync_new_with_options (
SOUP_SESSION_SSL_CA_FILE, cafile,
SOUP_SESSION_PROXY_URI, proxy,
NULL);
} else {
session = soup_session_async_new_with_options (
SOUP_SESSION_SSL_CA_FILE, cafile,
SOUP_SESSION_PROXY_URI, proxy,
NULL);
}
if (recurse) {
char *outdir;
outdir = g_strdup_printf ("%lu", (unsigned long)getpid ());
if (mkdir (outdir, 0755) != 0) {
fprintf (stderr, "Could not make output directory\n");
exit (1);
}
printf ("Output directory is '%s'\n", outdir);
chdir (outdir);
g_free (outdir);
}
if (!synchronous)
loop = g_main_loop_new (NULL, TRUE);
get_url (base);
if (!synchronous)
g_main_loop_unref (loop);
soup_uri_free (base_uri);
return 0;
}
syntax highlighted by Code2HTML, v. 0.9.1