/* This file Copyright 1993 by Clifford A. Adams */
/* url.c
 *
 * Routines for handling WWW URL references.
 */

#include "EXTERN.h"
#include "common.h"
#ifdef USEURL
#include "term.h"
#include "util.h"
#include "util2.h"
#include "INTERN.h"
#include "url.h"
#include "url.ih"

/* Lower-level net routines grabbed from nntpinit.c.
 * The special cases (DECNET, EXCELAN, and NONETD) are not supported.
 */

/* NOTE: If running Winsock, NNTP must be enabled so that the Winsock
 *       initialization will be done.  (common.h will check for this)
 */
#ifdef WINSOCK
#include <winsock.h>
WSADATA wsaData;
#else
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#endif

#ifndef WINSOCK
unsigned long inet_addr _((char*));
struct servent* getservbyname();
struct hostent* gethostbyname();
#endif

static char url_buf[1030];
/* XXX just a little bit larger than necessary... */
static char url_type[256];
static char url_host[256];
static int  url_port;
static char url_path[1024];

static int
get_url_socket(machine,port)
char* machine;
int port;
{
    int s;
    struct sockaddr_in sin;
#ifdef __hpux
    int socksize = 0;
    int socksizelen = sizeof socksize;
#endif
    struct servent* sp;
    struct hostent* hp;
#ifdef h_addr
    int x = 0;
    register char** cp;
    static char* alist[1];
#endif /* h_addr */
    static struct hostent def;
    static struct in_addr defaddr;
    static char namebuf[256];

    if (port) {
	if ((sp = getservbyport(htons(port),"tcp")) == NULL) {
	    fprintf(stderr, "port %d/tcp: Unknown service.\n", port);
	    return -1;
	}
    }
    else {
	if ((sp = getservbyname("www", "tcp")) == NULL) {
	    fprintf(stderr, "www/tcp: Unknown service.\n");
	    return -1;
	}
    }
    /* If not a raw ip address, try nameserver */
    if (!isdigit(*machine)
#ifdef INADDR_NONE
     || (defaddr.s_addr = inet_addr(machine)) == INADDR_NONE)
#else
     || (long)(defaddr.s_addr = inet_addr(machine)) == -1)
#endif
	hp = gethostbyname(machine);
    else {
	/* Raw ip address, fake  */
	(void) strcpy(namebuf, machine);
	def.h_name = namebuf;
#ifdef h_addr
	def.h_addr_list = alist;
#endif
	def.h_addr = (char*)&defaddr;
	def.h_length = sizeof(struct in_addr);
	def.h_addrtype = AF_INET;
	def.h_aliases = 0;
	hp = &def;
    }
    if (hp == NULL) {
	fprintf(stderr, "%s: Unknown host.\n", machine);
	return -1;
    }

    bzero((char*)&sin, sizeof sin);
    sin.sin_family = hp->h_addrtype;
    sin.sin_port = sp->s_port;

    /* The following is kinda gross.  The name server under 4.3
    ** returns a list of addresses, each of which should be tried
    ** in turn if the previous one fails.  However, 4.2 hostent
    ** structure doesn't have this list of addresses.
    ** Under 4.3, h_addr is a #define to h_addr_list[0].
    ** We use this to figure out whether to include the NS specific
    ** code... */
#ifdef h_addr
    /* get a socket and initiate connection -- use multiple addresses */
    for (cp = hp->h_addr_list; cp && *cp; cp++) {
	extern char* inet_ntoa _((const struct in_addr));
	s = socket(hp->h_addrtype, SOCK_STREAM, 0);
	if (s < 0) {
	    perror("socket");
	    return -1;
	}
        bcopy(*cp, (char*)&sin.sin_addr, hp->h_length);
		
	if (x < 0)
	    fprintf(stderr, "trying %s\n", inet_ntoa(sin.sin_addr));
	x = connect(s, (struct sockaddr*)&sin, sizeof (sin));
	if (x == 0)
	    break;
        fprintf(stderr, "connection to %s: ", inet_ntoa(sin.sin_addr));
	perror("");
	(void) close(s);
    }
    if (x < 0) {
	fprintf(stderr, "giving up...\n");
	return -1;
    }
#else /* no name server */
    if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
	perror("socket");
	return -1;
    }

    /* And then connect */

    bcopy(hp->h_addr, (char*)&sin.sin_addr, hp->h_length);
    if (connect(s, (struct sockaddr*)&sin, sizeof sin) < 0) {
	perror("connect");
	(void) close(s);
	return -1;
    }
#endif /* !h_addr */
#ifdef __hpux	/* recommended by raj@cup.hp.com */
#define	HPSOCKSIZE 0x8000
    getsockopt(s, SOL_SOCKET, SO_SNDBUF, (caddr_t)&socksize, (caddr_t)&socksizelen);
    if (socksize < HPSOCKSIZE) {
	socksize = HPSOCKSIZE;
	setsockopt(s, SOL_SOCKET, SO_SNDBUF, (caddr_t)&socksize, sizeof(socksize));
    }
    socksize = 0;
    socksizelen = sizeof(socksize);
    getsockopt(s, SOL_SOCKET, SO_RCVBUF, (caddr_t)&socksize, (caddr_t)&socksizelen);
    if (socksize < HPSOCKSIZE) {
	socksize = HPSOCKSIZE;
	setsockopt(s, SOL_SOCKET, SO_RCVBUF, (caddr_t)&socksize, sizeof(socksize));
    }
#endif
    return s;
}

/* returns TRUE if successful */
bool
fetch_http(host,port,path,outname)
char* host;
int port;
char* path;
char* outname;
{
    int sock;
    FILE* fp_out;
    int len;

    sock = get_url_socket(host,port);

    /* XXX length check */
    /* XXX later consider using HTTP/1.0 format (and user-agent) */
    sprintf(url_buf, "GET %s\n",path);
    /* Should I be writing the 0 char at the end? */
    if (write(sock, url_buf, strlen(url_buf)+1) < 0) {
	printf("\nError: writing on URL socket\n");
	close(sock);
	return FALSE;
    }

    fp_out = fopen(outname,"w");
    if (!fp_out) {
	printf("\nURL output file could not be opened.\n");
	return FALSE;
    }
    /* XXX some kind of URL timeout would be really nice */
    /* (the old nicebg code caused portability problems) */
    /* later consider larger buffers, spinner */
    while (1) {
	if ((len = read(sock, url_buf, 1024)) < 0) {
	    printf("\nError: reading URL reply\n");
	    return FALSE;
	}
	if (len == 0) {
	    break;	/* no data, end connection */
	}
	fwrite(url_buf,1,len,fp_out);
    }
    fclose(fp_out);
    close(sock);
    return TRUE;
}

/* add port support later? */
bool
fetch_ftp(host,origpath,outname)
char* host;
char* origpath;
char* outname;
{
#ifdef USEFTP
    static char cmdline[1024];
    static char path[512];	/* use to make writable copy */
    /* buffers used because because filexp overwrites previous call results */
    static char username[128];
    static char userhost[128];
    char* p;
    int status;
    char* cdpath;
    int x,y,l;

    safecpy(path,origpath,510);
    p = rindex(path, '/');	/* p points to last slash or NULL*/
    if (p == NULL) {
	printf("Error: URL:ftp path has no '/' character.\n") FLUSH;
	return FALSE;
    }
    if (p[1] == '\0') {
	printf("Error: URL:ftp path has no final filename.\n") FLUSH;
	return FALSE;
    }
    safecpy(username,filexp("%L"),120);
    safecpy(userhost,filexp("%H"),120);
    if (p != path) {	/* not of form /foo */
	*p = '\0';
	cdpath = path;
    } else
	cdpath = "/";

    sprintf(cmdline,"%s/ftpgrab %s ftp %s@%s %s %s %s",
	    filexp("%X"),host,username,userhost,cdpath,p+1,outname);

    /* modified escape_shell_cmd code from NCSA HTTPD util.c */
    /* serious security holes could result without this code */
    l = strlen(cmdline);
    for (x = 0; cmdline[x]; x++) {
	if (index("&;`'\"|*?~<>^()[]{}$\\",cmdline[x])) {
	    for (y = l+1; y > x; y--)
		cmdline[y] = cmdline[y-1];
	    l++; /* length has been increased */
	    cmdline[x] = '\\';
	    x++; /* skip the character */
	}
    }

#if 0
    printf("ftpgrab command:\n|%s|\n",cmdline);
#endif

    *p = '/';
    status = doshell(NULL,cmdline);
#if 0
    printf("\nFTP command status is %d\n",status) FLUSH;
    while (!input_pending()) ;
    eat_typeahead();
#endif
    return TRUE;
#else
    printf("\nThis copy of trn does not have URL:ftp support.\n");
    return FALSE;
#endif
}

/* right now only full, absolute URLs are allowed. */
/* use relative URLs later? */
/* later: pay more attention to long URLs */
bool
parse_url(url)
char* url;
{
    char* s;
    char* p;

    /* consider using 0 as default to look up the service? */
    url_port = 80;	/* the default */
    if (!url || !*url) {
	printf("Empty URL -- ignoring.\n") FLUSH;
	return FALSE;
    }
    p = url_type;
    for (s = url; *s && *s != ':'; *p++ = *s++) ;
    *p = '\0';
    if (!*s) {
	printf("Incomplete URL: %s\n",url) FLUSH;
	return FALSE;
    }
    s++;
    if (strnEQ(s,"//",2)) {
	/* normal URL type, will have host (optional portnum) */
	s += 2;
	p = url_host;
	while (*s && *s != '/' && *s != ':') *p++ = *s++;
	*p = '\0';
	if (!*s) {
	    printf("Incomplete URL: %s\n",url) FLUSH;
	    return FALSE;
	}
	if (*s == ':') {
	    s++;
	    p = url_buf;	/* temp space */
	    if (!isdigit(*s)) {
		printf("Bad URL (non-numeric portnum): %s\n",url) FLUSH;
		return FALSE;
	    }
	    while (isdigit(*s)) *p++ = *s++;
	    *p = '\0';
	    url_port = atoi(url_buf);
	}
    } else {
	if (!strEQ(url_type,"news")) {
	    printf("URL needs a hostname: %s\n",url);
	    return FALSE;
	}
    }
    /* finally, just do the path */
    if (*s != '/') {
	printf("Bad URL (path does not start with /): %s\n",url) FLUSH;
	return FALSE;
    }
    strcpy(url_path,s);
    return TRUE;
}

bool
url_get(url,outfile)
char* url;
char* outfile;
{
    bool flag;
    
    if (!parse_url(url))
	return FALSE;

    if (strEQ(url_type,"http"))
	flag = fetch_http(url_host,url_port,url_path,outfile);
    else if (strEQ(url_type,"ftp"))
	flag = fetch_ftp(url_host,url_path,outfile);
    else {
	if (url_type)
	    printf("\nURL type %s not supported (yet?)\n",url_type) FLUSH;
	flag = FALSE;
    }
    return flag;
}
#endif /* USEURL */


syntax highlighted by Code2HTML, v. 0.9.1