/* This file Copyright 1993 by Clifford A. Adams */ /* url.c * * Routines for handling WWW URL references. */ #include "EXTERN.h" #include "common.h" #ifdef USEURL #include "term.h" #include "util.h" #include "util2.h" #include "INTERN.h" #include "url.h" #include "url.ih" /* Lower-level net routines grabbed from nntpinit.c. * The special cases (DECNET, EXCELAN, and NONETD) are not supported. */ /* NOTE: If running Winsock, NNTP must be enabled so that the Winsock * initialization will be done. (common.h will check for this) */ #ifdef WINSOCK #include WSADATA wsaData; #else #include #include #include #endif #ifndef WINSOCK unsigned long inet_addr _((char*)); struct servent* getservbyname(); struct hostent* gethostbyname(); #endif static char url_buf[1030]; /* XXX just a little bit larger than necessary... */ static char url_type[256]; static char url_host[256]; static int url_port; static char url_path[1024]; static int get_url_socket(machine,port) char* machine; int port; { int s; struct sockaddr_in sin; #ifdef __hpux int socksize = 0; int socksizelen = sizeof socksize; #endif struct servent* sp; struct hostent* hp; #ifdef h_addr int x = 0; register char** cp; static char* alist[1]; #endif /* h_addr */ static struct hostent def; static struct in_addr defaddr; static char namebuf[256]; if (port) { if ((sp = getservbyport(htons(port),"tcp")) == NULL) { fprintf(stderr, "port %d/tcp: Unknown service.\n", port); return -1; } } else { if ((sp = getservbyname("www", "tcp")) == NULL) { fprintf(stderr, "www/tcp: Unknown service.\n"); return -1; } } /* If not a raw ip address, try nameserver */ if (!isdigit(*machine) #ifdef INADDR_NONE || (defaddr.s_addr = inet_addr(machine)) == INADDR_NONE) #else || (long)(defaddr.s_addr = inet_addr(machine)) == -1) #endif hp = gethostbyname(machine); else { /* Raw ip address, fake */ (void) strcpy(namebuf, machine); def.h_name = namebuf; #ifdef h_addr def.h_addr_list = alist; #endif def.h_addr = (char*)&defaddr; def.h_length = sizeof(struct in_addr); def.h_addrtype = AF_INET; def.h_aliases = 0; hp = &def; } if (hp == NULL) { fprintf(stderr, "%s: Unknown host.\n", machine); return -1; } bzero((char*)&sin, sizeof sin); sin.sin_family = hp->h_addrtype; sin.sin_port = sp->s_port; /* The following is kinda gross. The name server under 4.3 ** returns a list of addresses, each of which should be tried ** in turn if the previous one fails. However, 4.2 hostent ** structure doesn't have this list of addresses. ** Under 4.3, h_addr is a #define to h_addr_list[0]. ** We use this to figure out whether to include the NS specific ** code... */ #ifdef h_addr /* get a socket and initiate connection -- use multiple addresses */ for (cp = hp->h_addr_list; cp && *cp; cp++) { extern char* inet_ntoa _((const struct in_addr)); s = socket(hp->h_addrtype, SOCK_STREAM, 0); if (s < 0) { perror("socket"); return -1; } bcopy(*cp, (char*)&sin.sin_addr, hp->h_length); if (x < 0) fprintf(stderr, "trying %s\n", inet_ntoa(sin.sin_addr)); x = connect(s, (struct sockaddr*)&sin, sizeof (sin)); if (x == 0) break; fprintf(stderr, "connection to %s: ", inet_ntoa(sin.sin_addr)); perror(""); (void) close(s); } if (x < 0) { fprintf(stderr, "giving up...\n"); return -1; } #else /* no name server */ if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) { perror("socket"); return -1; } /* And then connect */ bcopy(hp->h_addr, (char*)&sin.sin_addr, hp->h_length); if (connect(s, (struct sockaddr*)&sin, sizeof sin) < 0) { perror("connect"); (void) close(s); return -1; } #endif /* !h_addr */ #ifdef __hpux /* recommended by raj@cup.hp.com */ #define HPSOCKSIZE 0x8000 getsockopt(s, SOL_SOCKET, SO_SNDBUF, (caddr_t)&socksize, (caddr_t)&socksizelen); if (socksize < HPSOCKSIZE) { socksize = HPSOCKSIZE; setsockopt(s, SOL_SOCKET, SO_SNDBUF, (caddr_t)&socksize, sizeof(socksize)); } socksize = 0; socksizelen = sizeof(socksize); getsockopt(s, SOL_SOCKET, SO_RCVBUF, (caddr_t)&socksize, (caddr_t)&socksizelen); if (socksize < HPSOCKSIZE) { socksize = HPSOCKSIZE; setsockopt(s, SOL_SOCKET, SO_RCVBUF, (caddr_t)&socksize, sizeof(socksize)); } #endif return s; } /* returns TRUE if successful */ bool fetch_http(host,port,path,outname) char* host; int port; char* path; char* outname; { int sock; FILE* fp_out; int len; sock = get_url_socket(host,port); /* XXX length check */ /* XXX later consider using HTTP/1.0 format (and user-agent) */ sprintf(url_buf, "GET %s\n",path); /* Should I be writing the 0 char at the end? */ if (write(sock, url_buf, strlen(url_buf)+1) < 0) { printf("\nError: writing on URL socket\n"); close(sock); return FALSE; } fp_out = fopen(outname,"w"); if (!fp_out) { printf("\nURL output file could not be opened.\n"); return FALSE; } /* XXX some kind of URL timeout would be really nice */ /* (the old nicebg code caused portability problems) */ /* later consider larger buffers, spinner */ while (1) { if ((len = read(sock, url_buf, 1024)) < 0) { printf("\nError: reading URL reply\n"); return FALSE; } if (len == 0) { break; /* no data, end connection */ } fwrite(url_buf,1,len,fp_out); } fclose(fp_out); close(sock); return TRUE; } /* add port support later? */ bool fetch_ftp(host,origpath,outname) char* host; char* origpath; char* outname; { #ifdef USEFTP static char cmdline[1024]; static char path[512]; /* use to make writable copy */ /* buffers used because because filexp overwrites previous call results */ static char username[128]; static char userhost[128]; char* p; int status; char* cdpath; int x,y,l; safecpy(path,origpath,510); p = rindex(path, '/'); /* p points to last slash or NULL*/ if (p == NULL) { printf("Error: URL:ftp path has no '/' character.\n") FLUSH; return FALSE; } if (p[1] == '\0') { printf("Error: URL:ftp path has no final filename.\n") FLUSH; return FALSE; } safecpy(username,filexp("%L"),120); safecpy(userhost,filexp("%H"),120); if (p != path) { /* not of form /foo */ *p = '\0'; cdpath = path; } else cdpath = "/"; sprintf(cmdline,"%s/ftpgrab %s ftp %s@%s %s %s %s", filexp("%X"),host,username,userhost,cdpath,p+1,outname); /* modified escape_shell_cmd code from NCSA HTTPD util.c */ /* serious security holes could result without this code */ l = strlen(cmdline); for (x = 0; cmdline[x]; x++) { if (index("&;`'\"|*?~<>^()[]{}$\\",cmdline[x])) { for (y = l+1; y > x; y--) cmdline[y] = cmdline[y-1]; l++; /* length has been increased */ cmdline[x] = '\\'; x++; /* skip the character */ } } #if 0 printf("ftpgrab command:\n|%s|\n",cmdline); #endif *p = '/'; status = doshell(NULL,cmdline); #if 0 printf("\nFTP command status is %d\n",status) FLUSH; while (!input_pending()) ; eat_typeahead(); #endif return TRUE; #else printf("\nThis copy of trn does not have URL:ftp support.\n"); return FALSE; #endif } /* right now only full, absolute URLs are allowed. */ /* use relative URLs later? */ /* later: pay more attention to long URLs */ bool parse_url(url) char* url; { char* s; char* p; /* consider using 0 as default to look up the service? */ url_port = 80; /* the default */ if (!url || !*url) { printf("Empty URL -- ignoring.\n") FLUSH; return FALSE; } p = url_type; for (s = url; *s && *s != ':'; *p++ = *s++) ; *p = '\0'; if (!*s) { printf("Incomplete URL: %s\n",url) FLUSH; return FALSE; } s++; if (strnEQ(s,"//",2)) { /* normal URL type, will have host (optional portnum) */ s += 2; p = url_host; while (*s && *s != '/' && *s != ':') *p++ = *s++; *p = '\0'; if (!*s) { printf("Incomplete URL: %s\n",url) FLUSH; return FALSE; } if (*s == ':') { s++; p = url_buf; /* temp space */ if (!isdigit(*s)) { printf("Bad URL (non-numeric portnum): %s\n",url) FLUSH; return FALSE; } while (isdigit(*s)) *p++ = *s++; *p = '\0'; url_port = atoi(url_buf); } } else { if (!strEQ(url_type,"news")) { printf("URL needs a hostname: %s\n",url); return FALSE; } } /* finally, just do the path */ if (*s != '/') { printf("Bad URL (path does not start with /): %s\n",url) FLUSH; return FALSE; } strcpy(url_path,s); return TRUE; } bool url_get(url,outfile) char* url; char* outfile; { bool flag; if (!parse_url(url)) return FALSE; if (strEQ(url_type,"http")) flag = fetch_http(url_host,url_port,url_path,outfile); else if (strEQ(url_type,"ftp")) flag = fetch_ftp(url_host,url_path,outfile); else { if (url_type) printf("\nURL type %s not supported (yet?)\n",url_type) FLUSH; flag = FALSE; } return flag; } #endif /* USEURL */