static char rcsid[] = "@(#)$Id: url_element.c,v 1.6 2006/05/07 08:35:31 hurtta Exp $"; /****************************************************************************** * The Elm (ME+) Mail System - $Revision: 1.6 $ $State: Exp $ * * Author: Kari Hurtta * or Kari Hurtta *****************************************************************************/ #include "def_url.h" #include "s_me.h" DEBUG_VAR(Debug,__FILE__,"url"); static unsigned char * s2us P_((char *str)); static unsigned char * s2us(str) char *str; { return (unsigned char *)str; } /* RFC 1738: In addition, octets may be encoded by a character triplet consisting of the character "%" followed by the two hexadecimal digits (from "0123456789ABCDEF") which forming the hexadecimal value of the octet. (The characters "abcdef" may also be used in hexadecimal encodings.) */ static int unicode_hex P_((int ch)); static int unicode_hex(ch) int ch; { if (0x0030 /* 0 */ <= ch && ch <= 0x0039 /* 9 */) return ch - 0x0030 /* 0 */; if (0x0041 /* A */ <= ch && ch <= 0x0046 /* F */) return ch - 0x0041 /* A */ + 10; if (0x0061 /* a */ <= ch && ch <= 0x0066 /* f */) return ch - 0x0061 /* a */ + 10; return -1; } /* NOTE: %XX are generally bytes from charset of remote protocol raw charset is local matter so these charsets are not necessary same */ /* Decodes %XX, * IF there is %XX > %7F returns type is RAW_BUFFER (charset unknown) * otherwise uses charset of raw * If codes with %XX > %7F is able to handle as UTF-8, that is returned */ struct string * raw_to_parsed(raw,header_error) struct string *raw; struct header_errors **header_error; { charset_t utf8 = MIME_name_to_charset("UTF-8",0); struct string * ret0 = new_string(raw->string_type); struct string * ret = new_string(RAW_BUFFER); int ret0_ok = 1; struct string * ret1 = NULL; int ret1_ok = 1; int L = string_len(raw); int X; int flag_failure = 0; int encoded_8bit = 0; if (!utf8) panic("CHARSET PANIC",__FILE__,__LINE__,"parsed_to_raw", "UTF-8 not found",0); ret1 = new_string(utf8); for (X = 0; X < L; X++) { uint16 ch = give_unicode_from_string(raw,X); if (0x0025 /* % */ == ch) { while (X < L-2) { uint16 ch1; uint16 ch2; int v1, v2; unsigned char ch0; ch = give_unicode_from_string(raw,X); ch1 = give_unicode_from_string(raw,X+1); ch2 = give_unicode_from_string(raw,X+2); if (0x0025 /* % */ != ch) break; v1 = unicode_hex(ch1); v2 = unicode_hex(ch2); if (-1 == v1 || -1 == v2) { DPRINT(Debug,10,(&Debug, "raw_to_parsed: Bad URL escape %C%C%C on URL element: %S\n", ch,ch1,ch2,raw)); flag_failure++; goto failure0; } ch0 = v1 * 16 + v2; /* Adding to RAW_BUFFER should newer to fail ... */ if (!add_streambyte_to_string(ret,ch0)) panic("URL PANIC",__FILE__,__LINE__,"raw_to_parsed", "add_streambyte_to_string failed",0); if (!add_streambyte_to_string(ret0,ch0)) ret0_ok = 0; if (!add_streambyte_to_string(ret1,ch0)) ret1_ok = 0; if (ch0 >= 0x80) { ret0_ok = 0; encoded_8bit = 1; } X += 3; } if (X < L) goto failure0; } else { /* Pick non-escaped sequence */ int pos; struct string *temp; char * buf; int buflen; int err0; int err1; failure0: pos = X; err0 = 0; err1 = 0; while (X < L-1) { ch = give_unicode_from_string(raw,X+1); if (0x0025 /* % */ == ch) break; X++; } /* streamclip_from_string returns printable characters only so it can not used on here */ temp = clip_from_string(raw,&pos,X-pos+1); if (pos != X+1) panic("URL PANIC",__FILE__,__LINE__,"raw_to_parsed", "Clipping Error",0); bytestream_from_string(temp,&buf,&buflen); /* Adding to RAW_BUFFER should newer to fail ... */ if (buflen != add_streambytes_to_string(ret,buflen,s2us(buf), NULL)) panic("URL PANIC",__FILE__,__LINE__,"raw_to_parsed", "add_streambytes_to_string failed",0); if (buflen != add_streambytes_to_string(ret0,buflen,s2us(buf), &err0)) ret0_ok = 0; else if (err0 > 0) ret0_ok = 0; if (buflen != add_streambytes_to_string(ret1,buflen,s2us(buf), &err1)) ret1_ok = 0; else if (err1 > 0) ret1_ok = 0; free(buf); free_string(&temp); } } if (flag_failure) { DPRINT(Debug,2,(&Debug, "raw_to_parsed: Failed to decode URL element: %S", raw)); process_header_error(header_error, CATGETS(elm_msg_cat, MeSet, MeFailedURLElement, "Failed to decode URL element: %S"), raw); free_string(&ret); free_string(&ret0); free_string(&ret1); return NULL; } if (ret0_ok) { DPRINT(Debug,50,(&Debug, "raw_to_parsed: %S => %S\n", raw,ret0)); free_string(&ret); free_string(&ret1); return ret0; } else if (ret1_ok && encoded_8bit) { DPRINT(Debug,50,(&Debug, "raw_to_parsed (UTF-8): %S => %S\n", raw,ret1)); free_string(&ret); free_string(&ret0); return ret1; } else { DPRINT(Debug,50,(&Debug, "raw_to_parsed (RAW BUFFER): %S => %S\n", raw,ret)); free_string(&ret0); free_string(&ret1); return ret; } } /* Does %XX escaping, escaping is done according of UTF-8 except if type is RAW_BUFFER returns US-ASCII string */ char * parsed_to_raw(parsed) struct string *parsed; { struct string * P = parsed; charset_t utf8 = MIME_name_to_charset("UTF-8",0); char * buf; int buflen; char *ret = NULL; int i,x = 0; if (!utf8) panic("CHARSET PANIC",__FILE__,__LINE__,"parsed_to_raw", "UTF-8 not found",0); if (RAW_BUFFER != P->string_type) P = convert_string(utf8,parsed,0); /* RFC 1738 unsafe characters: space < > " # % { } | \ ^ ~ [ ] ` RFC 1738 reserved characters: ; / ? : @ = & ( Is ` typo and should be ' ? ) NO -- text says: Thus, only alphanumerics, the special characters "$-_.+!*'(),", and reserved characters used for their reserved purposes may be used unencoded within a URL. */ bytestream_from_string(P,&buf,&buflen); ret= safe_malloc(buflen*3+1); for (i = 0; i < buflen; i++) { unsigned char ch = buf[i]; /* US-ASCII assumed ... */ if (ch >= '0' && ch <= '9' || ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || NULL != strchr("$-_.+!*'(),",ch)) ret[x++] = ch; else { ret[x++] = '%'; ret[x++] = hexchars[ch / 16]; ret[x++] = hexchars[ch % 16]; } } ret[x] = '\0'; free(buf); if (P != parsed) free_string(&P); return ret; } /* ----------------------------------------------------------------------- */ #define URL_element_magic 0xEC02 struct url_element { unsigned short magic; /* URL_element_magic */ struct string * raw; struct string * parsed; }; static struct url_element *alloc_element P_((void)); static struct url_element *alloc_element() { struct url_element * ptr = safe_malloc (sizeof (*ptr)); /* bzero is defined hdrs/defs.h */ bzero((void *)ptr,sizeof (*ptr)); ptr->magic = URL_element_magic; ptr->raw = NULL; ptr->parsed = NULL; return ptr; } struct url_element * element_from_raw(raw) struct string *raw; { struct url_element * ptr = alloc_element(); ptr->raw = dup_string(raw); return ptr; } struct url_element * element_from_parsed(parsed) struct string *parsed; { struct url_element * ptr = alloc_element(); ptr->parsed = dup_string(parsed); return ptr; } void free_url_element(ptr) struct url_element **ptr; { if (URL_element_magic != (*ptr)->magic) panic("URL PANIC",__FILE__,__LINE__,"free_url_element", "bad magic number",0); if ((*ptr)->parsed) free_string(&((*ptr)->parsed)); if ((*ptr)->raw) free_string(&((*ptr)->raw)); (*ptr)->magic = 0; /* Invalidate */ free (*ptr); *ptr = NULL; } CONST struct string * parsed_from_element(elem,header_error) struct url_element *elem; struct header_errors **header_error; { if (URL_element_magic != elem->magic) panic("URL PANIC",__FILE__,__LINE__,"parsed_from_element", "bad magic number",0); if (elem->parsed) return elem->parsed; if (!elem->raw) return NULL; elem->parsed = raw_to_parsed(elem->raw,header_error); return elem->parsed; } CONST struct string * raw_from_element(elem) struct url_element *elem; { charset_t ascii_ptr = MIME_name_to_charset("US-ASCII",0); char * buffer; if (URL_element_magic != elem->magic) panic("URL PANIC",__FILE__,__LINE__,"raw_from_element", "bad magic number",0); if (!ascii_ptr) panic("CHARSET PANIC",__FILE__,__LINE__,"parsed_to_raw", "US-ASCII not found",0); if (elem->raw) return elem->raw; if (!elem->parsed) return NULL; buffer = parsed_to_raw(elem->parsed); if (!buffer) return NULL; elem->raw = new_string2(ascii_ptr,s2us(buffer)); free(buffer); return elem->raw; } struct url_element * dup_url_element(elem) struct url_element *elem; { struct url_element * ptr = alloc_element(); if (URL_element_magic != elem->magic) panic("URL PANIC",__FILE__,__LINE__,"dup_url_element", "bad magic number",0); if (elem->parsed) ptr->parsed = dup_string(elem->parsed); if (elem->raw) ptr->raw = dup_string(elem->raw); return ptr; } /* * Local Variables: * mode:c * c-basic-offset:4 * buffer-file-coding-system: iso-8859-1 * End: */