/*************************************************************************** * Copyright (C) 2006 Meni Livne * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/ #include #include #include #ifndef WIN32 #include #endif #include "phish.h" #include "phish_util_url.h" static int isNum(const char *str) { while (*str != '\0') { if (!isdigit(*str)) return 0; str++; } return 1; } static phish_result_t parseURL(const char *str, phish_util_url_t *url) { int state = 0; unsigned int i = 0; unsigned int begin = 0; int user_read = 0; char *tmp = NULL; /* either user name or host, before we know which one */ char *port = NULL; char c; while ((c = str[i]) != '\0') { switch(state) { case -2: /* memory allocation error occured */ free(port); phish_util_deleteURL(url); return PHISH_ERR_MEMORY; case -1: /* error in format of URL */ free(port); phish_util_deleteURL(url); return PHISH_ERR_MALFORMED_URL; case 0: /* reading protocol - only alphabetic characters are allowed */ if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) { if (c == ':' && str[i + 1] == '/' && str[i + 2] == '/') { url->protocol = malloc(i + 1); if (url->protocol == NULL) { state = -2; } else { if (i == begin) { /* must contain at least one character */ state = -1; } else { strncpy(url->protocol, str, i); url->protocol[i] = '\0'; i += 2; begin = i + 1; state = 1; /* move to reading user name or host */ } } } else { state = -1; } } break; case 1: /* reading user name or host */ if (c == '@') { if (user_read) { state = -1; /* @ character illegal in host */ } else { url->user = malloc(i - begin + 1); if (url->user == NULL) { state = -2; } else { if (i == begin) { /* must contain at least one character */ state = -1; } else { strncpy(url->user, str + begin, i - begin); url->user[i - begin] = '\0'; begin = i + 1; user_read = 1; /* remain at same state and read host */ } } } } else if (c == ':') { tmp = malloc(i - begin + 1); if (tmp == NULL) { state = -2; } else { if (i == begin) { /* must contain at least one character */ free(tmp); state = -1; } else { strncpy(tmp, str + begin, i - begin); tmp[i - begin] = '\0'; begin = i + 1; state = 2; /* move to reading password or port */ } } } else if (c == '/') { url->host = malloc(i - begin + 1); if (url->host == NULL) { state = -2; } else { if (i == begin) { /* must contain at least one character */ state = -1; } else { strncpy(url->host, str + begin, i - begin); url->host[i - begin] = '\0'; begin = i; state = 3; /* move to reading path */ } } } break; case 2: /* reading password or port */ if (c == '@') { url->user = tmp; user_read = 1; url->password = malloc(i - begin + 1); if (url->password == NULL) { state = -2; } else { if (i == begin) { /* must contain at least one character */ state = -1; } else { strncpy(url->password, str + begin, i - begin); url->password[i - begin] = '\0'; begin = i + 1; state = 1; /* move to reading host */ } } } else if (c == '/') { url->host = tmp; port = malloc(i - begin + 1); if (port == NULL) { state = -2; } else { if (i == begin) { /* must contain at least one character */ state = -1; } else { strncpy(port, str + begin, i - begin); port[i - begin] = '\0'; if (!isNum(port)) { /* port string must evaluate to a number */ state = -1; } else { url->port = atoi(port); begin = i; state = 3; /* move to reading path */ } } } } break; case 3: /* reading path */ if (c == '#') { url->path = malloc(i - begin + 1); if (url->path == NULL) { state = -2; } else { if (i == begin) { /* must contain at least one character */ state = -1; } else { strncpy(url->path, str + begin, i - begin); url->path[i - begin] = '\0'; begin = i + 1; state = 4; /* move to reading anchor */ } } } break; case 4: /* reading anchor - read characters until end of string */ break; } if (state >= 0) i++; } free(port); if (state == 0) { /* URL can't contain only protocol */ phish_util_deleteURL(url); return PHISH_ERR_MALFORMED_URL; } else if (state == 1) { /* URL ended in host */ url->host = malloc(i - begin + 1); if (url->host == NULL) { phish_util_deleteURL(url); return PHISH_ERR_MEMORY; } else { if (i == begin) { /* must contain at least one character */ phish_util_deleteURL(url); return PHISH_ERR_MALFORMED_URL; } else { strncpy(url->host, str + begin, i - begin); url->host[i - begin] = '\0'; } } } else if (state == 2) { /* URL ended in port */ url->host = tmp; port = malloc(i - begin + 1); if (port == NULL) { phish_util_deleteURL(url); return PHISH_ERR_MEMORY; } else { if (i == begin) { /* must contain at least one character */ free(port); phish_util_deleteURL(url); return PHISH_ERR_MALFORMED_URL; } else { strncpy(port, str + begin, i - begin); port[i - begin] = '\0'; if (!isNum(port)) { /* port string must evaluate to a number */ free(port); phish_util_deleteURL(url); return PHISH_ERR_MALFORMED_URL; } else { url->port = atoi(port); free(port); } } } } else if (state == 3) { /* URL ended in path */ url->path = malloc(i - begin + 1); if (url->path == NULL) { phish_util_deleteURL(url); return PHISH_ERR_MEMORY; } else { if (i == begin) { /* must contain at least one character */ phish_util_deleteURL(url); return PHISH_ERR_MALFORMED_URL; } else { strncpy(url->path, str + begin, i - begin); url->path[i - begin] = '\0'; } } } else if (state == 4) { /* URL ended in anchor */ url->anchor = malloc(i - begin + 1); if (url->anchor == NULL) { phish_util_deleteURL(url); return PHISH_ERR_MEMORY; } else { strncpy(url->anchor, str + begin, i - begin); url->anchor[i - begin] = '\0'; } } return PHISH_SUCCESS; } phish_result_t phish_util_strToURL(const char *str, phish_util_url_t *url) { phish_result_t r; r = parseURL(str, url); if (r != PHISH_SUCCESS) return r; if (url->path == NULL) { url->path = malloc(2); if (url->path == NULL) { phish_util_deleteURL(url); return PHISH_ERR_MEMORY; } url->path[0] = '/'; url->path[1] = '\0'; } return PHISH_SUCCESS; } void phish_util_checkURLScheme(phish_util_url_t *url, phish_url_data_t *results) { int i; if (url->user != NULL) results->user_scheme = 1; else results->user_scheme = 0; results->suspicious_host = 0; for (i = 0 ; i < strlen(url->host) ; i++) { char c = url->host[i]; if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '.' || c == '-' || isdigit(c))) { results->suspicious_host = 1; break; } } } void phish_util_initURL(phish_util_url_t *url) { url->protocol = NULL; url->user = NULL; url->password = NULL; url->host = NULL; url->port = -1; url->path = NULL; url->anchor = NULL; } void phish_util_deleteURL(phish_util_url_t *url) { free(url->protocol); free(url->user); free(url->password); free(url->host); free(url->path); free(url->anchor); }