static char rcsid[] = "@(#)$Id: url_element.c,v 1.6 2006/05/07 08:35:31 hurtta Exp $";
/******************************************************************************
* The Elm (ME+) Mail System - $Revision: 1.6 $ $State: Exp $
*
* Author: Kari Hurtta <hurtta+elm@posti.FMI.FI>
* or Kari Hurtta <elm@elmme-mailer.org>
*****************************************************************************/
#include "def_url.h"
#include "s_me.h"
DEBUG_VAR(Debug,__FILE__,"url");
static unsigned char * s2us P_((char *str));
static unsigned char * s2us(str)
char *str;
{
return (unsigned char *)str;
}
/* RFC 1738:
In addition, octets may be encoded by a character triplet consisting
of the character "%" followed by the two hexadecimal digits (from
"0123456789ABCDEF") which forming the hexadecimal value of the octet.
(The characters "abcdef" may also be used in hexadecimal encodings.)
*/
static int unicode_hex P_((int ch));
static int unicode_hex(ch)
int ch;
{
if (0x0030 /* 0 */ <= ch && ch <= 0x0039 /* 9 */)
return ch - 0x0030 /* 0 */;
if (0x0041 /* A */ <= ch && ch <= 0x0046 /* F */)
return ch - 0x0041 /* A */ + 10;
if (0x0061 /* a */ <= ch && ch <= 0x0066 /* f */)
return ch - 0x0061 /* a */ + 10;
return -1;
}
/* NOTE:
%XX are generally bytes from charset of remote protocol
raw charset is local matter
so these charsets are not necessary same
*/
/* Decodes %XX,
* IF there is %XX > %7F returns type is RAW_BUFFER (charset unknown)
* otherwise uses charset of raw
* If codes with %XX > %7F is able to handle as UTF-8, that is returned
*/
struct string * raw_to_parsed(raw,header_error)
struct string *raw;
struct header_errors **header_error;
{
charset_t utf8 = MIME_name_to_charset("UTF-8",0);
struct string * ret0 = new_string(raw->string_type);
struct string * ret = new_string(RAW_BUFFER);
int ret0_ok = 1;
struct string * ret1 = NULL;
int ret1_ok = 1;
int L = string_len(raw);
int X;
int flag_failure = 0;
int encoded_8bit = 0;
if (!utf8)
panic("CHARSET PANIC",__FILE__,__LINE__,"parsed_to_raw",
"UTF-8 not found",0);
ret1 = new_string(utf8);
for (X = 0; X < L; X++) {
uint16 ch = give_unicode_from_string(raw,X);
if (0x0025 /* % */ == ch) {
while (X < L-2) {
uint16 ch1;
uint16 ch2;
int v1, v2;
unsigned char ch0;
ch = give_unicode_from_string(raw,X);
ch1 = give_unicode_from_string(raw,X+1);
ch2 = give_unicode_from_string(raw,X+2);
if (0x0025 /* % */ != ch)
break;
v1 = unicode_hex(ch1);
v2 = unicode_hex(ch2);
if (-1 == v1 || -1 == v2) {
DPRINT(Debug,10,(&Debug,
"raw_to_parsed: Bad URL escape %C%C%C on URL element: %S\n",
ch,ch1,ch2,raw));
flag_failure++;
goto failure0;
}
ch0 = v1 * 16 + v2;
/* Adding to RAW_BUFFER should newer to fail ... */
if (!add_streambyte_to_string(ret,ch0))
panic("URL PANIC",__FILE__,__LINE__,"raw_to_parsed",
"add_streambyte_to_string failed",0);
if (!add_streambyte_to_string(ret0,ch0))
ret0_ok = 0;
if (!add_streambyte_to_string(ret1,ch0))
ret1_ok = 0;
if (ch0 >= 0x80) {
ret0_ok = 0;
encoded_8bit = 1;
}
X += 3;
}
if (X < L)
goto failure0;
} else {
/* Pick non-escaped sequence */
int pos;
struct string *temp;
char * buf;
int buflen;
int err0;
int err1;
failure0:
pos = X;
err0 = 0;
err1 = 0;
while (X < L-1) {
ch = give_unicode_from_string(raw,X+1);
if (0x0025 /* % */ == ch)
break;
X++;
}
/* streamclip_from_string returns printable
characters only so it can not used on here
*/
temp = clip_from_string(raw,&pos,X-pos+1);
if (pos != X+1)
panic("URL PANIC",__FILE__,__LINE__,"raw_to_parsed",
"Clipping Error",0);
bytestream_from_string(temp,&buf,&buflen);
/* Adding to RAW_BUFFER should newer to fail ... */
if (buflen != add_streambytes_to_string(ret,buflen,s2us(buf),
NULL))
panic("URL PANIC",__FILE__,__LINE__,"raw_to_parsed",
"add_streambytes_to_string failed",0);
if (buflen != add_streambytes_to_string(ret0,buflen,s2us(buf),
&err0))
ret0_ok = 0;
else if (err0 > 0)
ret0_ok = 0;
if (buflen != add_streambytes_to_string(ret1,buflen,s2us(buf),
&err1))
ret1_ok = 0;
else if (err1 > 0)
ret1_ok = 0;
free(buf);
free_string(&temp);
}
}
if (flag_failure) {
DPRINT(Debug,2,(&Debug,
"raw_to_parsed: Failed to decode URL element: %S",
raw));
process_header_error(header_error,
CATGETS(elm_msg_cat, MeSet,
MeFailedURLElement,
"Failed to decode URL element: %S"),
raw);
free_string(&ret);
free_string(&ret0);
free_string(&ret1);
return NULL;
}
if (ret0_ok) {
DPRINT(Debug,50,(&Debug,
"raw_to_parsed: %S => %S\n",
raw,ret0));
free_string(&ret);
free_string(&ret1);
return ret0;
} else if (ret1_ok && encoded_8bit) {
DPRINT(Debug,50,(&Debug,
"raw_to_parsed (UTF-8): %S => %S\n",
raw,ret1));
free_string(&ret);
free_string(&ret0);
return ret1;
} else {
DPRINT(Debug,50,(&Debug,
"raw_to_parsed (RAW BUFFER): %S => %S\n",
raw,ret));
free_string(&ret0);
free_string(&ret1);
return ret;
}
}
/* Does %XX escaping, escaping is done according of UTF-8
except if type is RAW_BUFFER
returns US-ASCII string
*/
char * parsed_to_raw(parsed)
struct string *parsed;
{
struct string * P = parsed;
charset_t utf8 = MIME_name_to_charset("UTF-8",0);
char * buf;
int buflen;
char *ret = NULL;
int i,x = 0;
if (!utf8)
panic("CHARSET PANIC",__FILE__,__LINE__,"parsed_to_raw",
"UTF-8 not found",0);
if (RAW_BUFFER != P->string_type)
P = convert_string(utf8,parsed,0);
/* RFC 1738 unsafe characters:
space < > " # % { } | \ ^ ~ [ ] `
RFC 1738 reserved characters:
; / ? : @ = &
( Is ` typo and should be ' ? )
NO -- text says:
Thus, only alphanumerics, the special characters "$-_.+!*'(),", and
reserved characters used for their reserved purposes may be used
unencoded within a URL.
*/
bytestream_from_string(P,&buf,&buflen);
ret= safe_malloc(buflen*3+1);
for (i = 0; i < buflen; i++) {
unsigned char ch = buf[i];
/* US-ASCII assumed ... */
if (ch >= '0' && ch <= '9' ||
ch >= 'a' && ch <= 'z' ||
ch >= 'A' && ch <= 'Z' ||
NULL != strchr("$-_.+!*'(),",ch))
ret[x++] = ch;
else {
ret[x++] = '%';
ret[x++] = hexchars[ch / 16];
ret[x++] = hexchars[ch % 16];
}
}
ret[x] = '\0';
free(buf);
if (P != parsed)
free_string(&P);
return ret;
}
/* ----------------------------------------------------------------------- */
#define URL_element_magic 0xEC02
struct url_element {
unsigned short magic; /* URL_element_magic */
struct string * raw;
struct string * parsed;
};
static struct url_element *alloc_element P_((void));
static struct url_element *alloc_element()
{
struct url_element * ptr = safe_malloc (sizeof (*ptr));
/* bzero is defined hdrs/defs.h */
bzero((void *)ptr,sizeof (*ptr));
ptr->magic = URL_element_magic;
ptr->raw = NULL;
ptr->parsed = NULL;
return ptr;
}
struct url_element * element_from_raw(raw)
struct string *raw;
{
struct url_element * ptr = alloc_element();
ptr->raw = dup_string(raw);
return ptr;
}
struct url_element * element_from_parsed(parsed)
struct string *parsed;
{
struct url_element * ptr = alloc_element();
ptr->parsed = dup_string(parsed);
return ptr;
}
void free_url_element(ptr)
struct url_element **ptr;
{
if (URL_element_magic != (*ptr)->magic)
panic("URL PANIC",__FILE__,__LINE__,"free_url_element",
"bad magic number",0);
if ((*ptr)->parsed)
free_string(&((*ptr)->parsed));
if ((*ptr)->raw)
free_string(&((*ptr)->raw));
(*ptr)->magic = 0; /* Invalidate */
free (*ptr);
*ptr = NULL;
}
CONST struct string * parsed_from_element(elem,header_error)
struct url_element *elem;
struct header_errors **header_error;
{
if (URL_element_magic != elem->magic)
panic("URL PANIC",__FILE__,__LINE__,"parsed_from_element",
"bad magic number",0);
if (elem->parsed)
return elem->parsed;
if (!elem->raw)
return NULL;
elem->parsed = raw_to_parsed(elem->raw,header_error);
return elem->parsed;
}
CONST struct string * raw_from_element(elem)
struct url_element *elem;
{
charset_t ascii_ptr = MIME_name_to_charset("US-ASCII",0);
char * buffer;
if (URL_element_magic != elem->magic)
panic("URL PANIC",__FILE__,__LINE__,"raw_from_element",
"bad magic number",0);
if (!ascii_ptr)
panic("CHARSET PANIC",__FILE__,__LINE__,"parsed_to_raw",
"US-ASCII not found",0);
if (elem->raw)
return elem->raw;
if (!elem->parsed)
return NULL;
buffer = parsed_to_raw(elem->parsed);
if (!buffer)
return NULL;
elem->raw = new_string2(ascii_ptr,s2us(buffer));
free(buffer);
return elem->raw;
}
struct url_element * dup_url_element(elem)
struct url_element *elem;
{
struct url_element * ptr = alloc_element();
if (URL_element_magic != elem->magic)
panic("URL PANIC",__FILE__,__LINE__,"dup_url_element",
"bad magic number",0);
if (elem->parsed)
ptr->parsed = dup_string(elem->parsed);
if (elem->raw)
ptr->raw = dup_string(elem->raw);
return ptr;
}
/*
* Local Variables:
* mode:c
* c-basic-offset:4
* buffer-file-coding-system: iso-8859-1
* End:
*/
syntax highlighted by Code2HTML, v. 0.9.1