static char rcsid[] = "@(#)$Id: url.c,v 1.13 2006/07/24 18:45:00 hurtta Exp $"; /****************************************************************************** * The Elm (ME+) Mail System - $Revision: 1.13 $ $State: Exp $ * * Author: Kari Hurtta * or Kari Hurtta *****************************************************************************/ #include "def_url.h" #include "s_me.h" DEBUG_VAR(Debug,__FILE__,"url"); static unsigned char * s2us P_((char *str)); static unsigned char * s2us(str) char *str; { return (unsigned char *)str; } static CONST unsigned char *cs2us P_((const char *str)); static CONST unsigned char *cs2us(str) CONST char *str; { return (CONST unsigned char *)str; } static char *us2s P_((unsigned char *str)); static char *us2s(str) unsigned char *str; { return (char *)str; } static struct scheme BUILTIN_SCHEMES[] = { { "--dummy--", & NO_URL_HANDLER }, { "mailto", & URL_mailto_handler }, { "http", & DUMMY_http_HANDLER }, { "https", & DUMMY_http_HANDLER }, { "imap", & URL_imap_handler } }; static struct scheme * scheme_list = BUILTIN_SCHEMES; static int scheme_count = sizeof (BUILTIN_SCHEMES) / sizeof (BUILTIN_SCHEMES[0]); static struct scheme * index_to_scheme P_((int idx)); static struct scheme * index_to_scheme(idx) int idx; { if (idx < 0 || idx >= scheme_count) panic("URL PANIC",__FILE__,__LINE__,"index_to_scheme", "Bad index",0); return & (scheme_list[idx]); } int name_to_scheme_idx(name) const char * name; { int i; for (i = 0; i < scheme_count; i++) { if (0 == strcmp(name,scheme_list[i].scheme)) return i; } if (BUILTIN_SCHEMES == scheme_list) { scheme_list = safe_malloc((scheme_count+1) * sizeof (scheme_list[0])); for (i = 0; i < scheme_count; i++) scheme_list[i] = BUILTIN_SCHEMES[i]; } else scheme_list = safe_realloc(scheme_list, (scheme_count+1) * sizeof (scheme_list[0])); scheme_list[scheme_count].scheme = safe_strdup(name); scheme_list[scheme_count].handler = & NO_URL_HANDLER; return scheme_count++; } /* ------------------------------------------------------------------------- */ /* RFC 1738: The character "#" is unsafe and should always be encoded because it is used in World Wide Web and in other systems to delimit a URL from a fragment/anchor identifier that might follow it. The character "%" is unsafe because it is used for encodings of other characters. */ /* RFC 1738: If the character corresponding to an octet is reserved in a scheme, the octet must be encoded. The characters ";", "/", "?", ":", "@", "=" and "&" are the characters which may be reserved for special meaning within a scheme. No other characters may be reserved within a scheme. */ static struct url * alloc_url P_((struct url_handler *handler, int scheme)); static struct url * alloc_url(handler,scheme) struct url_handler *handler; int scheme; { struct url * ret = safe_malloc(sizeof(*ret)); /* bzero is defined hdrs/defs.h */ bzero((void *)ret,sizeof (*ret)); ret->magic = URL_magic; if (URL_handler_magic != handler->magic) panic("URL PANIC",__FILE__,__LINE__,"alloc_url", "bad handler magic number",0); ret->scheme = scheme; ret->current_handler = handler; ret->host = NULL; ret->path = NULL; ret->u.dummy = NULL; ret->fragment = NULL; ret->current_handler->uh_init_it(ret); return ret; } static struct host_part * new_host_part P_((void)); static struct host_part * new_host_part() { struct host_part * ptr = safe_malloc(sizeof (*ptr)); /* bzero is defined hdrs/defs.h */ bzero((void *)ptr,sizeof (*ptr)); ptr->user = NULL; ptr->password = NULL; ptr->host = NULL; ptr->port = 0; return ptr; } static void free_host_part P_((struct host_part **ptr)); static void free_host_part(ptr) struct host_part **ptr; { if ((*ptr)->user) free_url_element(& ((*ptr)->user)); if ((*ptr)->password) free_url_element(& ((*ptr)->password)); if ((*ptr)->host) free_url_element(& ((*ptr)->host)); (*ptr)->port = 0; free(*ptr); *ptr = NULL; } struct url * url_from_raw(raw,parent,header_error) struct string *raw; struct url *parent; struct header_errors **header_error; { int X = 0; int startpos = 0; int len = string_len(raw); int scheme_found = 0; charset_t ascii_ptr = MIME_name_to_charset("US-ASCII",0); char * buffer = NULL; int scheme_idx = -1; struct scheme * sch = NULL; int L; struct host_part * host_part = NULL; struct url_path * path_part = NULL; struct url *ret = NULL; int no_relative = 0; int abs_path = 0; uint16 ch = 0; if (len < 1) { DPRINT(Debug,7,(&Debug,"url_from_raw: empty url\n")); return NULL; } if (!ascii_ptr) panic("URL_PANIC",__FILE__,__LINE__,"url_from_raw", "US-ASCII not found",0); /* RFC 1808: However, there is enough uniformity in the use of URLs to allow a parser to resolve relative URLs based upon a single, generic-RL syntax. This generic-RL syntax consists of six components: :///;?# each of which, except , may be absent from a particular URL. */ /* This code assumes that code charset is ASCII */ buffer = safe_malloc(len); /* check for scheme */ for (; X < len; X++) { ch = give_unicode_from_string(raw,X); if (0x003A /* : */ == ch) { scheme_found = X; /* NOTE: scheme_found is not set if ':' found from first position (0) */ break; } /* RFC 1738: Scheme names consist of a sequence of characters. The lower case letters "a"--"z", digits, and the characters plus ("+"), period ("."), and hyphen ("-") are allowed. For resiliency, programs interpreting URLs should treat upper case letters as equivalent to lower case in scheme names (e.g., allow "HTTP" as well as "http"). */ /* valid characters */ if (0x0061 /* a */ <= ch && ch <= 0x007A /* z */ || 0x0030 /* 0 */ <= ch && ch <= 0x0039 /* 9 */ || 0x002B /* + */ == ch || 0x002E /* . */ == ch || 0x002D /* - */ == ch) buffer[X] = ch; else if (0x0041 /* A */ <= ch && ch <= 0x005A /* Z */) buffer[X] = ch + 0x0061 /* a */ - 0x0041 /* A */; else { DPRINT(Debug,7,(&Debug, "url_from_raw: [%d] ch=%04x not valid for scheme = raw=%S\n", X,ch,raw)); break; } } if (scheme_found) { if (X != scheme_found) panic("URL PANIC",__FILE__,__LINE__,"url_from_raw", "Bad X",0); DPRINT(Debug,7,(&Debug, "url_from_raw: scheme found -- absolute URL\n")); /* This code assumes that code charset is ASCII */ buffer[scheme_found] = '\0'; DPRINT(Debug,11,(&Debug, "url_from_raw: raw scheme=%s\n",buffer)); scheme_idx = name_to_scheme_idx(buffer); /* Skip ':' */ startpos = ++X; ch = 0; no_relative = 1; } else { X = 0; /* Reset scanner */ ch = 0; if (!parent) { DPRINT(Debug,2,(&Debug, "url_from_raw: Relative URL (no scheme) without base URL: %S", raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeRelativeURL, "Relative URL (no scheme) without base URL: %S"), raw); goto failure; } scheme_idx = parent->scheme; } if (X < len) ch = give_unicode_from_string(raw,X); sch = index_to_scheme(scheme_idx); if (URL_handler_magic != sch->handler->magic) panic("URL PANIC",__FILE__,__LINE__,"url_from_raw", "Bad scheme magic number",0); if (0 != (sch->handler->flag & URLFLAG_common)) { if (X+2 < len && 0x002F /* / */ == give_unicode_from_string(raw,X) && 0x002F /* / */ == give_unicode_from_string(raw,X+1) ) { struct string * raw_user = NULL; struct string * raw_password = NULL; struct string * raw_host = NULL; struct string * port = NULL; long port_number = -1; int failure0 = 0; int was_at = 0; DPRINT(Debug,7,(&Debug, "url_from_raw: Common internet scheme syntax found\n")); X += 2; ch = 0; startpos = X; /* 1) Possible USER part */ for (; X < len; X++, ch = 0) { ch = give_unicode_from_string(raw,X); if (0x0023 /* # */ == ch) break; /* Anchor -- do not allow */ if (0x003B /* ; */ == ch || /* part delimiters -- do not allow */ 0x003F /* ? */ == ch) break; if (0x002F /* / */ == ch) break; /* URL specific part */ if (0x0040 /* @ */ == ch) break; /* username/password seen */ if (0x003A /* : */ == ch) break; /* password indicator */ } if (X < len) { DPRINT(Debug,11,(&Debug, "url_from_raw: username part ended with 0x%04x pos %d\n", ch,X)); } was_at = (0x0040 /* @ */ == ch); /* If : found that may be also port on http://host:port/ */ if (0x003A /* : */ == ch) { int X1; for (X1 = X+1; X1 < len; X1++) { uint16 ch1 = give_unicode_from_string(raw,X1); if (0x0023 /* # */ == ch1) break; /* Anchor -- do not allow */ if (0x003B /* ; */ == ch1 || /* part delimiters -- do not allow */ 0x003F /* ? */ == ch1) break; if (0x002F /* / */ == ch1) break; /* URL specific part */ if (0x0040 /* @ */ == ch1) { was_at = 1; break; /* username/password seen */ } if (0x003A /* : */ == ch1) break; /* do not allow */ } } if (was_at) { int L = X - startpos; /* First part is username */ DPRINT(Debug,7,(&Debug, "url_from_raw: %d - %d (len=%d) username found\n", X,startpos,L)); raw_user = clip_from_string(raw,&startpos,L); if (startpos != X) panic("URL PANIC",__FILE__,__LINE__,"url_from_raw", "Clipping Error (user -part)",0); DPRINT(Debug,11,(&Debug, "url_from_raw: raw user=%S\n",raw_user)); } if (was_at && 0x003A /* : */ == ch) { /* Find password */ startpos = ++X; ch = 0; for (; X < len; X++, ch = 0) { ch = give_unicode_from_string(raw,X); if (0x0023 /* # */ == ch) break; /* Anchor -- do not allow */ if (0x003B /* ; */ == ch || /* part delimiters -- do not allow */ 0x003F /* ? */ == ch) break; if (0x002F /* / */ == ch) break; /* URL specific part -- do not allow */ if (0x0040 /* @ */ == ch) break; /* username/password seen -- OK */ if (0x003A /* : */ == ch) break; /* password indicator -- do not allow */ } if (X < len) { DPRINT(Debug,11,(&Debug, "url_from_raw: password part ended with 0x%04x pos %d\n", ch,X)); } if (0x0040 /* @ */ != ch) { DPRINT(Debug,2,(&Debug, "url_from_raw: Failed to parse password from URL: %S\n", raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeFailedPasswordURL, "Failed to parse password from URL: %S"), raw); goto common_failure; } /* second part is password */ L = X - startpos; DPRINT(Debug,7,(&Debug, "url_from_raw: %d - %d (len=%d) password found\n", X,startpos,L)); raw_password = clip_from_string(raw,&startpos,L); if (startpos != X) panic("URL PANIC",__FILE__,__LINE__,"url_from_raw", "Clipping Error (password -part)",0); DPRINT(Debug,50,(&Debug, "url_from_raw: raw password=%S\n",raw_password)); } /* Need read next part for hostname */ if (0x0040 /* @ */ == ch) { startpos = ++X; ch = 0; for (; X < len; X++, ch = 0) { ch = give_unicode_from_string(raw,X); if (0x0023 /* # */ == ch) break; /* fragment -- OK */ if (0x003B /* ; */ == ch || /* part delimiters -- do not allow */ 0x003F /* ? */ == ch) break; if (0x002F /* / */ == ch) break; /* URL specific part -- OK */ if (0x0040 /* @ */ == ch) break; /* -- second @ -character -- do not allow */ if (0x003A /* : */ == ch) break; /* port indicator -- OK */ } if (X < len) { DPRINT(Debug,11,(&Debug, "url_from_raw: hostname part ended with 0x%04x pos %d\n", ch,X)); } } /* End of URL is OK on here */ if (X < len && 0x0023 /* # */ != ch && 0x002F /* / */ != ch && 0x003A /* : */ != ch) { DPRINT(Debug,2,(&Debug, "url_from_raw: Failed to parse hostname from URL: %S\n", raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeFailedHostnameURL, "Failed to parse hostname from URL: %S"), raw); goto common_failure; } /* third part is hostname */ L = X - startpos; DPRINT(Debug,7,(&Debug, "url_from_raw: %d - %d (len=%d) hostname found\n", X,startpos,L)); raw_host = clip_from_string(raw,&startpos,L); if (startpos != X) panic("URL PANIC",__FILE__,__LINE__,"url_from_raw", "Clipping Error (host -part)",0); DPRINT(Debug,11,(&Debug, "url_from_raw: raw host=%S\n",raw_host)); if (0x003A /* : */ == ch) { int L; int port_found = 0; /* Find port */ int badpos; startpos = ++X; ch = 0; for (; X < len; X++, ch = 0) { ch = give_unicode_from_string(raw,X); if (0x0023 /* # */ == ch) break; /* fragment -- OK */ if (0x002F /* / */ == ch) break; /* URL specific part -- OK */ /* Valid characters are numbers */ if (0x0030 /* 0 */ <= ch && ch <= 0x0039 /* 9 */) port_found = 1; else { DPRINT(Debug,7,(&Debug, "url_from_raw: [%d] ch=%04x not valid for port = raw=%S\n", X,ch,raw)); break; } } if (X < len) { DPRINT(Debug,11,(&Debug, "url_from_raw: port part ended with 0x%04x pos %d\n", ch,X)); } /* End of URL is OK on here */ if (X < len && 0x0023 /* # */ != ch && 0x002F /* / */ != ch || !port_found) { DPRINT(Debug,2,(&Debug, "url_from_raw: Failed to parse port from URL: %S\n", raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeFailedPortURL, "Failed to parse port from URL: %S"), raw); goto common_failure; } /* fourth part is port */ L = X - startpos; DPRINT(Debug,7,(&Debug, "url_from_raw: %d - %d (len=%d) port found\n", X,startpos,L)); port = clip_from_string(raw,&startpos,L); if (startpos != X) panic("URL PANIC",__FILE__,__LINE__,"url_from_raw", "Clipping Error (host -part)",0); DPRINT(Debug,11,(&Debug, "url_from_raw: port=%S\n",port)); port_number = string_to_long(port,&badpos); if (port_number > 0xFFFF || badpos != -1) { DPRINT(Debug,2,(&Debug, "url_from_raw: Failed to parse port from URL: %S\n", raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeFailedPortURL, "Failed to parse port from URL: %S"), raw); goto common_failure; } } if (raw_user && 0 == (sch->handler->flag & URLFLAG_username)) { DPRINT(Debug,2,(&Debug, "url_from_raw: Username not allowed on %s scheme, but is given on URL: %S\n", sch->scheme, raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeUsernameGivedUrl, "Username not allowed on %s scheme, but is given on URL: %S"), sch->scheme, raw); } if (raw_password && 0 == (sch->handler->flag & URLFLAG_password)) { DPRINT(Debug,2,(&Debug, "url_from_raw: Password not allowed on %s scheme, but is given on URL\n", sch->scheme)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MePasswordGivedUrl, "Password not allowed on %s scheme, but is given on URL"), sch->scheme); } if (port && 0 == (sch->handler->flag & URLFLAG_port)) { DPRINT(Debug,2,(&Debug, "url_from_raw: Port not allowed on %s scheme, but is given on URL: %S\n", sch->scheme, raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MePortGivedUrl, "Port not allowed on %s scheme, but is given on URL: %S"), sch->scheme, raw); } host_part = new_host_part(); if (raw_user) host_part->user = element_from_raw(raw_user); if (raw_password) host_part->password = element_from_raw(raw_password); if (raw_host) host_part->host = element_from_raw(raw_host); if (-1 != port_number) host_part->port = port_number; no_relative = 1; if (0) { common_failure: failure0 = 1; DPRINT(Debug,7,(&Debug, "url_from_raw: Failed to parse common internet scheme syntax\n")); } if (raw_user) free_string(& raw_user); if (raw_password) free_string(& raw_password); if (raw_host) free_string(& raw_host); if (port) free_string(& port); if (failure0) goto failure; } else if (no_relative) { DPRINT(Debug,2,(&Debug, "url_from_raw: Host part is missing from URL: %S", raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeURLnoHostPart, "Host part is missing from URL: %S"), raw); goto failure; } else if (!parent) { DPRINT(Debug,2,(&Debug, "url_from_raw: Relative URL (no hostname) without base URL: %S", raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeRelativeURL1, "Relative URL (no hostname) without base URL: %S"), raw); goto failure; } else { /* Handle relative URL */ if (!parent->host) { DPRINT(Debug,2,(&Debug, "url_from_raw: Relative URL (no hostname) without hostname on base URL: %S", raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeRelativeURL2, "Relative URL (no hostname) without hostname on base URL: %S"), raw); goto failure; } host_part = new_host_part(); if (0 == (sch->handler->flag & URLFLAG_username)) { if (parent->host->user) host_part->user = dup_url_element(parent->host->user); } if (0 == (sch->handler->flag & URLFLAG_password)) { if (parent->host->password) host_part->password = dup_url_element(parent->host-> password); } if (parent->host->host) host_part->host = dup_url_element(parent->host->host); if (0 == (sch->handler->flag & URLFLAG_port) && parent->scheme == scheme_idx) host_part->port = parent->host->port; } } /* Read next part for path or url specific part */ if (0x0023 /* # */ != ch && 0x003B /* ; */ != ch && 0x003F /* ? */ != ch && X < len) { struct string * raw_path = NULL; int failure1 = 0; startpos = X; if (X < len && 0x002F /* / */ == ch) { DPRINT(Debug,11,(&Debug, "url_from_raw: Absolute path -- starting with / \n")); abs_path = 1; } for (; X < len; X++, ch = 0) { ch = give_unicode_from_string(raw,X); if (0x0023 /* # */ == ch) break; /* fragment -- OK */ if (0x003B /* ; */ == ch || /* part delimiters -- OK */ 0x003F /* ? */ == ch) break; } if (X < len) { DPRINT(Debug,11,(&Debug, "url_from_raw: path part ended with 0x%04x pos %d\n", ch,X)); } /* fifth part is path */ L = X - startpos; DPRINT(Debug,7,(&Debug, "url_from_raw: %d - %d (len=%d) path (or scheme specific part) found\n", X,startpos,L)); raw_path = clip_from_string(raw,&startpos,L); if (startpos != X) panic("URL PANIC",__FILE__,__LINE__,"url_from_raw", "Clipping Error (path -part)",0); DPRINT(Debug,11,(&Debug, "url_from_raw: raw path=%S\n",raw_path)); if (0 != (sch->handler->flag & URLFLAG_path)) { if (abs_path) path_part = raw_to_url_path(raw_path); else if (no_relative) { DPRINT(Debug,2,(&Debug, "url_from_raw: Path part is relative on URL: %S", raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeURLRelPathpart, "Path part is relative on URL: %S"), raw); goto path_failure; } else if (!parent) { DPRINT(Debug,2,(&Debug, "url_from_raw: Relative URL (relative path) without base URL: %S", raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeRelativeURL3, "Relative URL (relative path) without base URL: %S"), raw); goto path_failure; } else { struct url_path * Z; if (!parent->path) { DPRINT(Debug,2,(&Debug, "url_from_raw: Relative URL (relative path) without path on base URL: %S", raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeRelativeURL4, "Relative URL (relative path) without path on base URL: %S"), raw); goto path_failure; } Z = raw_to_url_path(raw_path); path_part = join_url_path(parent->path,Z); free_url_path(&Z); if (!path_part) goto path_failure; } } else { int r; ret = alloc_url(sch->handler,scheme_idx); r = sch->handler->uh_parse_it_not_path(ret,raw_path,header_error); if (!r) goto path_failure; } no_relative = 1; if (0) { path_failure: failure1 = 1; DPRINT(Debug,7,(&Debug, "url_from_raw: Failed to parse path syntax\n")); } if (raw_path) free_string(&raw_path); if (failure1) goto failure; } else { /* No path */ if (0 != (sch->handler->flag & URLFLAG_path)) { if (no_relative) { DPRINT(Debug,2,(&Debug, "url_from_raw: Path part is missing from URL: %S", raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeURLnoPathPart, "Path part is missing from URL: %S"), raw); goto failure; } else if (!parent) { DPRINT(Debug,2,(&Debug, "url_from_raw: Relative URL (no path) without base URL: %S", raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeRelativeURL5, "Relative URL (no path) without base URL: %S"), raw); goto failure; } else { /* Handle relative URL */ if (!parent->path) { DPRINT(Debug,2,(&Debug, "url_from_raw: Relative URL (no path) without path on base URL: %S", raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeRelativeURL6, "Relative URL (no path) without path on base URL: %S"), raw); goto failure; } path_part = dup_url_path(parent->path); } } else { if (! no_relative && parent && sch->handler == parent->current_handler && parent->scheme == scheme_idx) { int r; ret = alloc_url(sch->handler,scheme_idx); r = sch->handler->uh_dup_it_not_path(ret,parent); if (!r) goto failure; } } } if (!ret) ret = alloc_url(sch->handler,scheme_idx); ret->host = host_part; ret->path = path_part; /* Read next part for params */ if (X < len && 0x003B /* ; */ == ch) { struct string * raw_params = NULL; int failure2 = 0; int r; startpos = ++X; ch = 0; for (; X < len; X++, ch = 0) { ch = give_unicode_from_string(raw,X); if (0x0023 /* # */ == ch) break; /* fragment -- OK */ if (0x003F /* ? */ == ch) /* part delimiter -- OK */ break; } if (X < len) { DPRINT(Debug,11,(&Debug, "url_from_raw: params part ended with 0x%04x pos %d\n", ch,X)); } /* sixth part is params */ L = X - startpos; DPRINT(Debug,7,(&Debug, "url_from_raw: %d - %d (len=%d) params found\n", X,startpos,L)); raw_params = clip_from_string(raw,&startpos,L); if (startpos != X) panic("URL PANIC",__FILE__,__LINE__,"url_from_raw", "Clipping Error (params -part)",0); DPRINT(Debug,11,(&Debug, "url_from_raw: raw params=%S\n",raw_params)); if (0 == (sch->handler->flag & URLFLAG_params)) { DPRINT(Debug,2,(&Debug, "url_from_raw: Params not allowed on %s scheme, but is given on URL: %S", sch->scheme, raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeParamsGivedUrl, "Params not allowed on %s scheme, but is given on URL: %S"), sch->scheme, raw); } r = sch->handler->uh_parse_it_params(ret,raw_params,header_error); if (!r) { failure2 = 1; DPRINT(Debug,7,(&Debug, "url_from_raw: Failed to parse params syntax\n")); } if (raw_params) free_string(&raw_params); if (failure2) goto failure; no_relative = 1; } else { /* No params */ if (! no_relative && parent && sch->handler == parent->current_handler && parent->scheme == scheme_idx && 0 != (sch->handler->flag & URLFLAG_params)) { int r; r = sch->handler->uh_dup_it_params(ret,parent); if (!r) goto failure; } } /* Read next part for query */ if (X < len && 0x003F /* ? */ == ch) { struct string * raw_query = NULL; int failure3 = 0; int r; startpos = ++X; ch = 0; for (; X < len; X++, ch = 0) { ch = give_unicode_from_string(raw,X); if (0x0023 /* # */ == ch) break; /* fragment -- OK */ } if (X < len) { DPRINT(Debug,11,(&Debug, "url_from_raw: query part ended with 0x%04x pos %d\n", ch,X)); } /* seventh part is params */ L = X - startpos; DPRINT(Debug,7,(&Debug, "url_from_raw: %d - %d (len=%d) query found\n", X,startpos,L)); raw_query = clip_from_string(raw,&startpos,L); if (startpos != X) panic("URL PANIC",__FILE__,__LINE__,"url_from_raw", "Clipping Error (query -part)",0); DPRINT(Debug,11,(&Debug, "url_from_raw: raw query=%S\n",raw_query)); if (0 == (sch->handler->flag & URLFLAG_query)) { DPRINT(Debug,2,(&Debug, "url_from_raw: Query not allowed on %s scheme, but is given on URL: %S", sch->scheme, raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeQueryGivedUrl, "Query not allowed on %s scheme, but is given on URL: %S"), sch->scheme, raw); } r = sch->handler->uh_parse_it_query(ret,raw_query,header_error); if (!r) { failure3 = 1; DPRINT(Debug,7,(&Debug, "url_from_raw: Failed to parse query syntax\n")); } if (raw_query) free_string(&raw_query); if (failure3) goto failure; no_relative = 1; } else { /* No query */ if (! no_relative && parent && sch->handler == parent->current_handler && parent->scheme == scheme_idx && 0 != (sch->handler->flag & URLFLAG_query)) { int r; r = sch->handler->uh_dup_it_query(ret,parent); if (!r) goto failure; } } /* Read next part for fragment */ if (X < len && 0x0023 /* # */ == ch) { struct string * raw_fragment = NULL; int failure4 = 0; int r; startpos = ++X; ch = 0; #if 0 for (; X < len; X++, ch = 0) { ch = give_unicode_from_string(raw,X); /* Dummy! */ } #else X = len; #endif /* eighth part is fragment */ L = X - startpos; DPRINT(Debug,7,(&Debug, "url_from_raw: %d - %d (len=%d) fragment found\n", X,startpos,L)); raw_fragment = clip_from_string(raw,&startpos,L); if (startpos != X) panic("URL PANIC",__FILE__,__LINE__,"url_from_raw", "Clipping Error (fragment -part)",0); DPRINT(Debug,11,(&Debug, "url_from_raw: raw fragment=%S\n",raw_fragment)); ret->fragment = element_from_raw(raw_fragment); free_string(&raw_fragment); } else { if (! no_relative && parent && parent->fragment) ret->fragment = dup_url_element(parent->fragment); } if (X < len) { DPRINT(Debug,2,(&Debug, "url_from_raw: Failed to parse scheme %s URL: %S\n", sch->scheme, raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeGeneralFailedUrl, "Failed to parse scheme %s URL: %S"), sch->scheme, raw); goto failure; } if (buffer) free(buffer); return ret; failure: if (buffer) free(buffer); if (host_part) free_host_part(& host_part); if (path_part) free_url_path(& (path_part)); if (ret) free_url(&ret); return NULL; } struct url * new_url(scheme) const char *scheme; { struct url *ret = NULL; int scheme_idx = -1; struct scheme * sch = NULL; scheme_idx = name_to_scheme_idx(scheme); sch = index_to_scheme(scheme_idx); ret = alloc_url(sch->handler,scheme_idx); return ret; } void free_url(url) struct url **url; { if (URL_magic != (*url)->magic) panic("URL PANIC",__FILE__,__LINE__,"free_url", "Bad magic number",0); if ((*url)->host) free_host_part(& ((*url)->host)); if ((*url)->path) free_url_path(& ((*url)->path)); if ((*url)->fragment) free_url_element(& ((*url)->fragment)); if (URL_handler_magic != (*url)->current_handler->magic) panic("URL PANIC",__FILE__,__LINE__,"free_url", "Bad handler magic number",0); (*url)->current_handler->uh_free_it(*url); (*url)->magic = 0; /* Invalidate */ free(*url); *url = NULL; } enum url_type get_url_type(url) CONST struct url *url; { if (URL_magic != url->magic) panic("URL PANIC",__FILE__,__LINE__,"get_url_type", "Bad magic number",0); if (URL_handler_magic != url->current_handler->magic) panic("URL PANIC",__FILE__,__LINE__,"get_url_type", "Bad handler magic number",0); return url->current_handler->uh_get_it_type(url); } enum url_type get_url_type_default(url) CONST struct url *url; { if (URL_magic != url->magic) panic("URL PANIC",__FILE__,__LINE__,"get_url_type_default", "Bad magic number",0); if (URL_handler_magic != url->current_handler->magic) panic("URL PANIC",__FILE__,__LINE__,"get_url_type_default", "Bad handler magic number",0); return url->current_handler->url_type; } int set_mailing_headers_from_url(headers,url,mailer_info) struct mailing_headers *headers; CONST struct url *url; struct mailer_info *mailer_info; { if (URL_magic != url->magic) panic("URL PANIC",__FILE__,__LINE__,"set_mailing_headers_from_url", "Bad magic number",0); if (URL_handler_magic != url->current_handler->magic) panic("URL PANIC",__FILE__,__LINE__,"set_mailing_headers_from_url", "Bad handler magic number",0); return url->current_handler->uh_set_mailing_headers_from_it(headers, url, mailer_info); } struct text_block * give_text_body_from_url(url,quote_l,errors) CONST struct url *url; int quote_l; int *errors; { CONST struct string *body = NULL; struct text_block *ret = NULL; int ERR = 0; if (URL_magic != url->magic) panic("URL PANIC",__FILE__,__LINE__,"set_mailing_headers_from_url", "Bad magic number",0); if (URL_handler_magic != url->current_handler->magic) panic("URL PANIC",__FILE__,__LINE__,"set_mailing_headers_from_url", "Bad handler magic number",0); body = url->current_handler->uh_get_body_string_from_it(url,&ERR); if (errors) *errors = ERR; if (!body) return NULL; ret = block_from_string(body,quote_l); return ret; } #ifdef REMOTE_MBX struct folder_browser * get_browser_from_host_url(url,method,verify_remote) const struct url *url; const struct browser_url_method *method; verify_remote_url_f * verify_remote; { struct folder_browser * browser = NULL; CONST struct string * user = NULL; CONST struct string * host1 = NULL; CONST struct string * password = NULL; char * host = NULL; uint16 port = 0; uint16 defport = 0; int anon = 0; if (URL_magic != url->magic) panic("URL PANIC",__FILE__,__LINE__,"get_browser_from_host_url", "Bad magic number",0); if (! url->host) { DPRINT(Debug,11,(&Debug, "get_browser_from_host_url: No host part on url\n")); return NULL; } /* FIXME Non-ascii hostnames (ie. IDN) is not currently supported .... */ if (url->host->host) { struct string * host2; host1 = parsed_from_element(url->host->host,NULL); if (!host1) { DPRINT(Debug,11,(&Debug, "get_browser_from_host_url: Bad host on imap url\n")); goto fail; } if (! can_ascii_string(host1)) lib_error(CATGETS(elm_msg_cat, MeSet, MeNonAsciiHostnameUrl, "Non-ascii hostanames on url are not supported: %S"), host1); /* NOTE: Original string is kept, if it can not converted to US-ASCII -- then this produces garbage, but user is warned */ host2 = ascify_string(host1); host = us2s(stream_from_string(host2,0,NULL)); free_string(&host2); } if (url->host->user) { user = parsed_from_element(url->host->user,NULL); if (!user) { DPRINT(Debug,11,(&Debug, "get_browser_from_host_url: Bad user on url\n")); goto fail; } } if (url->host->password) { password = parsed_from_element(url->host->password,NULL); if (!user) { DPRINT(Debug,11,(&Debug, "get_browser_from_host_url: Bad password on url\n")); goto fail; } } port = url->host->port; defport = browser_URL_default_port(method); if (user) { if (!verify_remote(url,user,host1,NULL,port,defport)) { DPRINT(Debug,11,(&Debug, "get_browser_from_host_url: Remote URL canceled\n")); goto fail; } browser = browser_connect_URL(method, user, password, host,port); } else { struct string * anon_passwd = NULL; struct string * anon = format_string(FRM("anonymous")); if (password) anon_passwd = dup_string(password); if (!verify_remote(url,anon,host1,&anon_passwd,port,defport)) { DPRINT(Debug,11,(&Debug, "get_browser_from_host_url: Remote URL canceled\n")); free_string(&anon); if (anon_passwd) free_string(&anon_passwd); goto fail; } browser = browser_connect_URL(method, anon, anon_passwd, host,port); free_string(&anon); if (anon_passwd) free_string(&anon_passwd); } if (!browser) { DPRINT(Debug,11,(&Debug, "get_browser_from_host_url: browser_connect_URL failed\n")); } fail: if (host) free(host); return browser; } int browser_select_url_path(browser,url,method) struct folder_browser *browser; CONST struct url *url; CONST struct browser_url_method *method; { int len,i; struct string * elems; CONST struct url_path_elem *path_elem; CONST struct string ** decoded_vector = NULL; int dlen; int ret = 0; if (URL_magic != url->magic) panic("URL PANIC",__FILE__,__LINE__,"browser_select_url_path", "Bad magic number",0); if (! url->path) { DPRINT(Debug,11,(&Debug, "get_browser_from_host_url: No path part on url\n")); return 0; } len = url_path_len(url->path); if (len < 1) { DPRINT(Debug,11,(&Debug, "get_browser_from_host_url: No path elems\n")); return 0; } /* On absolute urls first elems (0) have -> elem == NULL and -> trailing_slash = 1 this correspond / on beginning of URL */ if (! (path_elem = get_url_path_element(url->path,0)) || path_elem->elem) { DPRINT(Debug,11,(&Debug, "get_browser_from_host_url: Not absolute\n")); goto fail; } /* All elemnts 0 .. len-2 must have -> trailing_slash = 1 */ for (i = 0; i < len-1; i++) { if (! (path_elem = get_url_path_element(url->path,i)) || ! path_elem->trailing_slash) { DPRINT(Debug,11,(&Debug, "get_browser_from_host_url: path elem [%d] Missing /\n", i)); goto fail; } } /* Last element most be -> trailing_slash = 0 */ if (! (path_elem = get_url_path_element(url->path,len-1)) || path_elem->trailing_slash) { DPRINT(Debug,11,(&Debug, "get_browser_from_host_url: must not be directory URL, path last elem [%d] have ending /\n", len-1)); goto fail; } dlen = len-1; if (dlen < 1) panic("URL PANIC",__FILE__,__LINE__, "get_browser_from_host_url", "Bad dlen",0); decoded_vector = safe_malloc(dlen * sizeof (decoded_vector[0])); for (i = 0; i < dlen; i++) { if (i+1 >= len) panic("URL PANIC",__FILE__,__LINE__, "get_browser_from_host_url", "Bad index",0); path_elem = get_url_path_element(url->path,i+1); if (!path_elem) panic("URL PANIC",__FILE__,__LINE__, "get_browser_from_host_url", "failed to get path elem",0); decoded_vector[i] = parsed_from_element(path_elem->elem,NULL); if (! decoded_vector[i]) { DPRINT(Debug,11,(&Debug, "get_browser_from_host_url: Failed to decode %d\n", i+1)); goto fail; } } /* This is always absolute path -- from root as defined for URL */ if (! select_item_from_URL(method,browser, dlen, decoded_vector)) { DPRINT(Debug,11,(&Debug, "get_browser_from_host_url: failed to select url (dlen=%d)\n", dlen)); goto fail; } ret = 1; fail: if (decoded_vector) free(decoded_vector); DPRINT(Debug,11,(&Debug, "get_browser_from_host_url=%d\n",ret)); return ret; } #endif struct folder_info * get_folder_from_url(url,remote_url) const struct url *url; verify_remote_url_f *remote_url; { struct folder_info *folder; if (URL_magic != url->magic) panic("URL PANIC",__FILE__,__LINE__,"get_folder_from_url", "Bad magic number",0); if (URL_handler_magic != url->current_handler->magic) panic("URL PANIC",__FILE__,__LINE__,"get_folder_from_url", "Bad handler magic number",0); folder = url->current_handler->uh_get_folder_from_it(url,remote_url); return folder; } /* This gives aboslute URL even when original URL was relative */ struct string * raw_from_url(url) CONST struct url *url; { struct string * ret = NULL; struct scheme * sch; charset_t ascii_ptr = MIME_name_to_charset("US-ASCII",0); struct string * Sa = NULL; int r; if (!ascii_ptr) panic("CHARSET PANIC",__FILE__,__LINE__,"parsed_to_raw", "US-ASCII not found",0); if (URL_magic != url->magic) panic("URL PANIC",__FILE__,__LINE__,"raw_from_url", "Bad magic number",0); if (-1 == url->scheme) { DPRINT(Debug,7,(&Debug,"raw_from_url: Invalid scheme\n")); return NULL; } sch = index_to_scheme(url->scheme); ret = new_string(ascii_ptr); add_ascii_to_string(ret,cs2us(sch->scheme)); /* Add scheme name */ add_ascii_to_string(ret,s2us(":")); if (URL_handler_magic != url->current_handler->magic) panic("URL PANIC",__FILE__,__LINE__,"parsed_to_raw", "Bad handler magic number",0); if (url->host) { CONST struct string * R; add_ascii_to_string(ret,s2us("//")); if (url->host->user) { CONST struct string * R = raw_from_element(url->host->user); if (!R) { DPRINT(Debug,7,(&Debug,"raw_from_url: Failed to get username\n")); goto failure; } append_string(&ret,R); if (url->host->password) { CONST struct string * R = raw_from_element(url->host->password); if (!R) { DPRINT(Debug,7,(&Debug,"raw_from_url: Failed to get password\n")); goto failure; } add_ascii_to_string(ret,s2us(":")); append_string(&ret,R); } add_ascii_to_string(ret,s2us("@")); } R = raw_from_element(url->host->host); if (!R) { DPRINT(Debug,7,(&Debug,"raw_from_url: Failed to get hostname\n")); goto failure; } append_string(&ret,R); if (url->host->port) { struct string * S = format_string(FRM("%d"), url->host->port); add_ascii_to_string(ret,s2us(":")); append_string(&ret,S); free_string(&S); } } if (url->path) { /* Path includes starting / -character */ struct string * S = url_path_to_raw(url->path); if (!S) { DPRINT(Debug,7,(&Debug,"raw_from_url: Failed to get path\n")); goto failure; } append_string(&ret,S); free_string(&S); } else if (0 == (sch->handler->flag & URLFLAG_path)) { struct string * S = sch->handler->uh_not_path_to_raw_it(url); if (!S) { DPRINT(Debug,7,(&Debug,"raw_from_url: Failed to get not-path\n")); goto failure; } append_string(&ret,S); free_string(&S); } r = sch->handler->uh_params_to_raw_it(url,&Sa); if (Sa) { add_ascii_to_string(ret,s2us(";")); append_string(&ret,Sa); free_string(&Sa); } if (!r && 0 != (sch->handler->flag & URLFLAG_params)) { DPRINT(Debug,7,(&Debug,"raw_from_url: Failed to get params\n")); goto failure; } r = sch->handler->uh_query_to_raw_it(url,&Sa); if (Sa) { add_ascii_to_string(ret,s2us("?")); append_string(&ret,Sa); free_string(&Sa); } if (!r && 0 != (sch->handler->flag & URLFLAG_query)) { DPRINT(Debug,7,(&Debug,"raw_from_url: Failed to get query\n")); goto failure; } if (url->fragment) { CONST struct string * S = raw_from_element(url->fragment); if (!S) { DPRINT(Debug,7,(&Debug,"raw_from_url: Failed to get fragment\n")); goto failure; } add_ascii_to_string(ret,s2us("#")); append_string(&ret,S); } return ret; failure: free_string(&ret); return NULL; } /* * Local Variables: * mode:c * c-basic-offset:4 * buffer-file-coding-system: iso-8859-1 * End: */