/*
 * httpclnt.cxx
 *
 * HTTP Client class.
 *
 * Portable Windows Library
 *
 * Copyright (c) 1993-2002 Equivalence Pty. Ltd.
 *
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.0 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is Portable Windows Library.
 *
 * The Initial Developer of the Original Code is Equivalence Pty. Ltd.
 *
 * Contributor(s): ______________________________________.
 *
 * $Log: httpclnt.cxx,v $
 * Revision 1.37  2004/10/26 18:25:54  ykiryanov
 * Added (const char*) qualifier to url parameter, similar to one below
 *
 * Revision 1.36  2004/10/21 09:20:33  csoutheren
 * Fixed compile problems on gcc 2.95.x
 *
 * Revision 1.35  2004/08/17 15:18:30  csoutheren
 * Added support for MovedTemp/MovedPerm to GetDocument
 * Fixed problem with empty URL passed to ExecuteCommand
 *
 * Revision 1.34  2004/04/19 12:53:06  csoutheren
 * Fix for iostream changes thanks to David Parr
 *
 * Revision 1.33  2003/04/23 07:00:15  rogerh
 * Fix the encoding checking. the find_ip sample program now works again
 *
 * Revision 1.32  2003/01/28 06:48:35  robertj
 * Added https support to PHTTPClient (if #define P_SSL availbel).
 *
 * Revision 1.31  2002/12/03 22:38:35  robertj
 * Removed get document that just returns a content length as the chunked
 *   transfer encoding makes this very dangerous.
 * Added GetTextDocument() to get a URL content into a PString.
 * Added a version pf PostData() that gets the reponse content into a PString.
 * Added ReadContentBody() that takes a PString, not just PBYTEArray.
 * Fixed bug where conten-encoding must be checked even if there is a
 *   full content length MIME field.
 *
 * Revision 1.30  2002/11/06 22:47:25  robertj
 * Fixed header comment (copyright etc)
 *
 * Revision 1.29  2002/10/10 04:43:44  robertj
 * VxWorks port, thanks Martijn Roest
 *
 * Revision 1.28  2002/05/28 01:41:50  robertj
 * Fixed bug in reading chunked data, thanks David Iodice
 *
 * Revision 1.27  2001/10/30 07:02:28  robertj
 * Fixed problem with bad servers causing endless loops in client.
 *
 * Revision 1.26  2001/10/03 00:26:34  robertj
 * Upgraded client to HTTP/1.1 and for chunked mode entity bodies.
 *
 * Revision 1.25  2001/09/28 08:55:15  robertj
 * More changes to support restartable PHTTPClient
 *
 * Revision 1.24  2001/09/28 00:43:47  robertj
 * Added automatic setting of some outward MIME fields.
 * Added "user agent" string field for automatic inclusion.
 * Added function to read the contents of the HTTP request.
 * Added "restarting" of connection if lost persistence.
 *
 * Revision 1.23  2001/09/11 03:27:46  robertj
 * Improved error processing on high level protocol failures, usually
 *   caused by unexpected shut down of a socket.
 *
 * Revision 1.22  2001/09/10 02:51:23  robertj
 * Major change to fix problem with error codes being corrupted in a
 *   PChannel when have simultaneous reads and writes in threads.
 *
 * Revision 1.21  2001/02/22 05:27:14  robertj
 * Added "nicer" version of GetDocument in HTTP client class.
 *
 * Revision 1.20  1999/05/13 04:59:24  robertj
 * Increased amount of buffering on output request write.
 *
 * Revision 1.19  1999/05/11 12:23:52  robertj
 * Fixed bug introduced in last revision to have arbitrary HTTP commands, missing CRLF.
 *
 * Revision 1.18  1999/05/04 15:26:01  robertj
 * Improved HTTP/1.1 compatibility (pass through user commands).
 * Fixed problems with quicktime installer.
 *
 * Revision 1.17  1998/11/30 04:51:55  robertj
 * New directory structure
 *
 * Revision 1.16  1998/09/23 06:22:09  robertj
 * Added open source copyright license.
 *
 * Revision 1.15  1998/07/24 06:57:21  robertj
 * Fixed error returned on illegal URL passed to unopened socket.
 * Changed PostData function so just has string for data instead of dictionary.
 *
 * Revision 1.14  1998/06/16 03:32:56  robertj
 * Changed TCP connection shutdown to be parameterised.
 *
 * Revision 1.13  1998/06/13 15:03:58  robertj
 * More conditions for NOT shutting down write.
 *
 * Revision 1.12  1998/06/13 12:28:04  robertj
 * Added shutdown to client command if no content length specified.
 *
 * Revision 1.11  1998/04/14 03:42:41  robertj
 * Fixed error code propagation in HTTP client.
 *
 * Revision 1.10  1998/02/03 06:27:10  robertj
 * Fixed propagation of error codes, especially EOF.
 * Fixed writing to some CGI scripts that require CRLF outside of byte count.
 *
 * Revision 1.9  1998/01/26 00:39:00  robertj
 * Added function to allow HTTPClient to automatically connect if URL has hostname.
 * Fixed incorrect return values on HTTPClient GetDocument(), Post etc functions.
 *
 * Revision 1.8  1997/06/12 12:33:35  robertj
 * Fixed bug where mising MIME fields is regarded as an eror.
 *
 * Revision 1.7  1997/03/31 08:26:58  robertj
 * GNU compiler compatibilty.
 *
 * Revision 1.6  1997/03/28 04:40:46  robertj
 * Fixed bug in Post function doing wrong command.
 *
 * Revision 1.5  1997/03/18 22:04:03  robertj
 * Fix bug for binary POST commands.
 *
 * Revision 1.4  1996/12/21 01:26:21  robertj
 * Fixed bug in persistent connections when server closes socket during command.
 *
 * Revision 1.3  1996/12/12 09:24:44  robertj
 * Persistent connection support.
 *
 * Revision 1.2  1996/10/08 13:12:03  robertj
 * Fixed bug in HTTP/0.9 response, first 5 character not put back properly.
 *
 * Revision 1.1  1996/09/14 13:02:18  robertj
 * Initial revision
 *
 * Revision 1.37  1996/08/25 09:37:41  robertj
 * Added function to detect "local" host name.
 * Fixed printing of trailing '/' in empty URL, is distinction between with and without.
 *
 * Revision 1.36  1996/08/22 13:22:26  robertj
 * Fixed bug in print of URLs, extra @ signs.
 *
 * Revision 1.35  1996/08/19 13:42:40  robertj
 * Fixed errors in URL parsing and display.
 * Fixed "Forbidden" problem out of HTTP authorisation system.
 * Fixed authorisation so if have no user/password on basic authentication, does not require it.
 *
 * Revision 1.34  1996/07/27 04:13:47  robertj
 * Fixed use of HTTP proxy on non-persistent connections.
 *
 * Revision 1.33  1996/07/15 10:37:20  robertj
 * Improved proxy "self" detection (especially localhost).
 *
 * Revision 1.32  1996/06/28 13:20:24  robertj
 * Modified HTTPAuthority so gets PHTTPReqest (mainly for URL) passed in.
 * Moved HTTP form resource to another compilation module.
 * Fixed memory leak in POST command.
 *
 * Revision 1.31  1996/06/10 10:00:00  robertj
 * Added global function for query parameters parsing.
 *
 * Revision 1.30  1996/06/07 13:52:23  robertj
 * Added PUT to HTTP proxy FTP. Necessitating redisign of entity body processing.
 *
 * Revision 1.29  1996/06/05 12:33:04  robertj
 * Fixed bug in parsing URL with no path, is NOT absolute!
 *
 * Revision 1.28  1996/05/30 10:07:26  robertj
 * Fixed bug in version number checking of return code compatibility.
 *
 * Revision 1.27  1996/05/26 03:46:42  robertj
 * Compatibility to GNU 2.7.x
 *
 * Revision 1.26  1996/05/23 10:02:13  robertj
 * Added common function for GET and HEAD commands.
 * Fixed status codes to be the actual status code instead of sequential enum.
 * This fixed some problems with proxy pass through of status codes.
 * Fixed bug in URL parsing of username and passwords.
 *
 * Revision 1.19.1.1  1996/04/17 11:08:22  craigs
 * New version by craig pending confirmation by robert
 *
 * Revision 1.19  1996/04/05 01:46:30  robertj
 * Assured PSocket::Write always writes the number of bytes specified, no longer need write loops.
 * Added workaraound for NT Netscape Navigator bug with persistent connections.
 *
 * Revision 1.18  1996/03/31 09:05:07  robertj
 * HTTP 1.1 upgrade.
 *
 * Revision 1.17  1996/03/17 05:48:07  robertj
 * Fixed host name print out of URLs.
 * Added hit count to PHTTPResource.
 *
 * Revision 1.16  1996/03/16 05:00:26  robertj
 * Added ParseReponse() for splitting reponse line into code and info.
 * Added client side support for HTTP socket.
 * Added hooks for proxy support in HTTP socket.
 * Added translation type to TranslateString() to accommodate query variables.
 * Defaulted scheme field in URL to "http".
 * Inhibited output of port field on string conversion of URL according to scheme.
 *
 * Revision 1.15  1996/03/11 10:29:50  robertj
 * Fixed bug in help image HTML.
 *
 * Revision 1.14  1996/03/10 13:15:24  robertj
 * Redesign to make resources thread safe.
 *
 * Revision 1.13  1996/03/02 03:27:37  robertj
 * Added function to translate a string to a form suitable for inclusion in a URL.
 * Added radio button and selection boxes to HTTP form resource.
 * Fixed bug in URL parsing, losing first / if hostname specified.
 *
 * Revision 1.12  1996/02/25 11:14:24  robertj
 * Radio button support for forms.
 *
 * Revision 1.11  1996/02/25 03:10:34  robertj
 * Removed pass through HTTP resource.
 * Fixed PHTTPConfig resource to use correct name for config key.
 *
 * Revision 1.10  1996/02/19 13:48:28  robertj
 * Put multiple uses of literal strings into const variables.
 * Fixed URL parsing so that the unmangling of strings occurs correctly.
 * Moved nested classes from PHTTPForm.
 * Added overwrite option to AddResource().
 * Added get/set string to PHTTPString resource.
 *
 * Revision 1.9  1996/02/13 13:09:17  robertj
 * Added extra parameters to callback function in PHTTPResources, required
 *   by descendants to make informed decisions on data being loaded.
 *
 * Revision 1.8  1996/02/08 12:26:29  robertj
 * Redesign of resource callback mechanism.
 * Added new resource types for HTML data entry forms.
 *
 * Revision 1.7  1996/02/03 11:33:19  robertj
 * Changed RadCmd() so can distinguish between I/O error and unknown command.
 *
 * Revision 1.6  1996/02/03 11:11:49  robertj
 * Numerous bug fixes.
 * Added expiry date and ismodifiedsince support.
 *
 * Revision 1.5  1996/01/30 23:32:40  robertj
 * Added single .
 *
 * Revision 1.4  1996/01/28 14:19:09  robertj
 * Split HTML into separate source file.
 * Beginning of pass through resource type.
 * Changed PCharArray in OnLoadData to PString for convenience in mangling data.
 * Made PHTTPSpace return standard page on selection of partial path.
 *
 * Revision 1.3  1996/01/28 02:49:16  robertj
 * Further implementation.
 *
 * Revision 1.2  1996/01/26 02:24:30  robertj
 * Further implemetation.
 *
 * Revision 1.1  1996/01/23 13:04:32  robertj
 * Initial revision
 *
 */

#include <ptlib.h>
#include <ptlib/sockets.h>
#include <ptclib/http.h>

#if P_SSL
#include <ptclib/pssl.h>
#endif

#include <ctype.h>


//////////////////////////////////////////////////////////////////////////////
// PHTTPClient

PHTTPClient::PHTTPClient()
{
}


PHTTPClient::PHTTPClient(const PString & userAgent)
  : userAgentName(userAgent)
{
}


int PHTTPClient::ExecuteCommand(Commands cmd,
                                const PURL & url,
                                PMIMEInfo & outMIME,
                                const PString & dataBody,
                                PMIMEInfo & replyMime,
                                BOOL persist)
{
  return ExecuteCommand(commandNames[cmd], url, outMIME, dataBody, replyMime, persist);
}


int PHTTPClient::ExecuteCommand(const PString & cmdName,
                                const PURL & url,
                                PMIMEInfo & outMIME,
                                const PString & dataBody,
                                PMIMEInfo & replyMime,
                                BOOL persist)
{
  if (!outMIME.Contains(DateTag))
    outMIME.SetAt(DateTag, PTime().AsString());

  if (!userAgentName && !outMIME.Contains(UserAgentTag))
    outMIME.SetAt(UserAgentTag, userAgentName);

  if (persist)
    outMIME.SetAt(ConnectionTag, KeepAliveTag);

  for (PINDEX retry = 0; retry < 3; retry++) {
    if (!AssureConnect(url, outMIME))
      break;

    if (!WriteCommand(cmdName, url.AsString(PURL::URIOnly), outMIME, dataBody)) {
      lastResponseCode = -1;
      lastResponseInfo = GetErrorText(LastWriteError);
      break;
    }

    // If not persisting need to shut down write so other end stops reading
    if (!persist)
      Shutdown(ShutdownWrite);

    // Await a response, if all OK exit loop
    if (ReadResponse(replyMime))
      break;

    // If not persisting, we have no oppurtunity to write again, just error out
    if (!persist)
      break;

    // If have had a failure to read a response but there was no error then
    // we have a shutdown socket probably due to a lack of persistence so ...
    if (GetErrorCode(LastReadError) != NoError)
      break;

    // ... we close the channel and allow AssureConnet() to reopen it.
    Close();
  }

  return lastResponseCode;
}


BOOL PHTTPClient::WriteCommand(Commands cmd,
                               const PString & url,
                               PMIMEInfo & outMIME,
                               const PString & dataBody)
{
  return WriteCommand(commandNames[cmd], url, outMIME, dataBody);
}


BOOL PHTTPClient::WriteCommand(const PString & cmdName,
                               const PString & url,
                               PMIMEInfo & outMIME,
                               const PString & dataBody)
{
  ostream & this_stream = *this;
  PINDEX len = dataBody.GetSize()-1;
  if (!outMIME.Contains(ContentLengthTag))
    outMIME.SetInteger(ContentLengthTag, len);

  if (cmdName.IsEmpty())
    this_stream << "GET";
  else
    this_stream << cmdName;

  this_stream << ' ' << (url.IsEmpty() ? "/" :  (const char*) url) << " HTTP/1.1\r\n"
              << setfill('\r') << outMIME;

  return Write((const char *)dataBody, len);
}


BOOL PHTTPClient::ReadResponse(PMIMEInfo & replyMIME)
{
  PString http = ReadString(7);
  if (!http) {
    UnRead(http);

    if (http.Find("HTTP/") == P_MAX_INDEX) {
      lastResponseCode = PHTTP::RequestOK;
      lastResponseInfo = "HTTP/0.9";
      return TRUE;
    }

    if (http[0] == '\n')
      ReadString(1);
    else if (http[0] == '\r' &&  http[1] == '\n')
      ReadString(2);

    if (PHTTP::ReadResponse())
      if (replyMIME.Read(*this))
        return TRUE;
  }
 
  lastResponseCode = -1;
  if (GetErrorCode(LastReadError) != NoError)
    lastResponseInfo = GetErrorText(LastReadError);
  else {
    lastResponseInfo = "Premature shutdown";
    SetErrorValues(ProtocolFailure, 0, LastReadError);
  }

  return FALSE;
}


BOOL PHTTPClient::ReadContentBody(PMIMEInfo & replyMIME, PString & body)
{
  BOOL ok = InternalReadContentBody(replyMIME, body);
  body.SetSize(body.GetSize()+1);
  return ok;
}


BOOL PHTTPClient::ReadContentBody(PMIMEInfo & replyMIME, PBYTEArray & body)
{
  return InternalReadContentBody(replyMIME, body);
}


BOOL PHTTPClient::InternalReadContentBody(PMIMEInfo & replyMIME, PAbstractArray & body)
{
  PCaselessString encoding = replyMIME(TransferEncodingTag);

  if (encoding != ChunkedTag) {
    if (replyMIME.Contains(ContentLengthTag)) {
      PINDEX length = replyMIME.GetInteger(ContentLengthTag);
      body.SetSize(length);
      return ReadBlock(body.GetPointer(), length);
    }

    if (!(encoding.IsEmpty())) {
      lastResponseCode = -1;
      lastResponseInfo = "Unknown Transfer-Encoding extension";
      return FALSE;
    }

    // Must be raw, read to end file variety
    static const PINDEX ChunkSize = 2048;
    PINDEX bytesRead = 0;
    while (ReadBlock((char *)body.GetPointer(bytesRead+ChunkSize)+bytesRead, ChunkSize))
      bytesRead += GetLastReadCount();

    body.SetSize(bytesRead + GetLastReadCount());
    return GetErrorCode(LastReadError) == NoError;
  }

  // HTTP1.1 chunked format
  PINDEX bytesRead = 0;
  for (;;) {
    // Read chunk length line
    PString chunkLengthLine;
    if (!ReadLine(chunkLengthLine))
      return FALSE;

    // A zero length chunk is end of output
    PINDEX chunkLength = chunkLengthLine.AsUnsigned(16);
    if (chunkLength == 0)
      break;

    // Read the chunk
    if (!ReadBlock((char *)body.GetPointer(bytesRead+chunkLength)+bytesRead, chunkLength))
      return FALSE;
    bytesRead+= chunkLength;

    // Read the trailing CRLF
    if (!ReadLine(chunkLengthLine))
      return FALSE;
  }

  // Read the footer
  PString footer;
  do {
    if (!ReadLine(footer))
      return FALSE;
  } while (replyMIME.AddMIME(footer));

  return TRUE;
}


BOOL PHTTPClient::GetTextDocument(const PURL & url,
                                  PString & document,
                                  BOOL persist)
{
  PMIMEInfo outMIME, replyMIME;
  if (!GetDocument(url, outMIME, replyMIME, persist))
    return FALSE;

  return ReadContentBody(replyMIME, document);
}


BOOL PHTTPClient::GetDocument(const PURL & _url,
                              PMIMEInfo & _outMIME,
                              PMIMEInfo & replyMIME,
                              BOOL persist)
{
  int count = 0;
  static const char locationTag[] = "Location";
  PURL url = _url;
  for (;;) {
    PMIMEInfo outMIME = _outMIME;
    replyMIME.RemoveAll();
    PString u = url.AsString();
    int code = ExecuteCommand(GET, url, outMIME, PString(), replyMIME, persist);
    switch (code) {
      case RequestOK:
        return TRUE;
      case MovedPermanently:
      case MovedTemporarily:
        {
          if (count > 10)
            return FALSE;
          PString str = replyMIME(locationTag);
          if (str.IsEmpty())
            return FALSE;
          PString doc;
          if (!ReadContentBody(replyMIME, doc))
            return FALSE;
          url = str;
          count++;
        }
        break;
      default:
        return FALSE;
    }
  }
}


BOOL PHTTPClient::GetHeader(const PURL & url,
                            PMIMEInfo & outMIME,
                            PMIMEInfo & replyMIME,
                            BOOL persist)
{
  return ExecuteCommand(HEAD, url, outMIME, PString(), replyMIME, persist) == RequestOK;
}


BOOL PHTTPClient::PostData(const PURL & url,
                           PMIMEInfo & outMIME,
                           const PString & data,
                           PMIMEInfo & replyMIME,
                           BOOL persist)
{
  PString dataBody = data;
  if (!outMIME.Contains(ContentTypeTag)) {
    outMIME.SetAt(ContentTypeTag, "application/x-www-form-urlencoded");
    dataBody += "\r\n"; // Add CRLF for compatibility with some CGI servers.
  }

  return ExecuteCommand(POST, url, outMIME, data, replyMIME, persist) == RequestOK;
}


BOOL PHTTPClient::PostData(const PURL & url,
                           PMIMEInfo & outMIME,
                           const PString & data,
                           PMIMEInfo & replyMIME,
                           PString & body,
                           BOOL persist)
{
  if (!PostData(url, outMIME, data, replyMIME, persist))
    return FALSE;

  return ReadContentBody(replyMIME, body);
}


BOOL PHTTPClient::AssureConnect(const PURL & url, PMIMEInfo & outMIME)
{
  PString host = url.GetHostName();

  // Is not open or other end shut down, restablish connection
  if (!IsOpen()) {
    if (host.IsEmpty()) {
      lastResponseCode = BadRequest;
      lastResponseInfo = "No host specified";
      return SetErrorValues(ProtocolFailure, 0, LastReadError);
    }

#if P_SSL
    if (url.GetScheme() == "https") {
      PTCPSocket * tcp = new PTCPSocket(url.GetPort());
      tcp->SetReadTimeout(readTimeout);
      if (!tcp->Connect(host)) {
        lastResponseCode = -2;
        lastResponseInfo = tcp->GetErrorText();
        delete tcp;
        return FALSE;
      }

      PSSLChannel * ssl = new PSSLChannel;
      if (!ssl->Connect(tcp)) {
        lastResponseCode = -2;
        lastResponseInfo = ssl->GetErrorText();
        delete ssl;
        return FALSE;
      }

      if (!Open(ssl)) {
        lastResponseCode = -2;
        lastResponseInfo = GetErrorText();
        return FALSE;
      }
    }
    else
#endif

    if (!Connect(host, url.GetPort())) {
      lastResponseCode = -2;
      lastResponseInfo = GetErrorText();
      return FALSE;
    }
  }

  // Have connection, so fill in the required MIME fields
  static char HostTag[] = "Host";
  if (!outMIME.Contains(HostTag)) {
    if (!host)
      outMIME.SetAt(HostTag, host);
    else {
      PIPSocket * sock = GetSocket();
      if (sock != NULL)
        outMIME.SetAt(HostTag, sock->GetHostName());
    }
  }

  return TRUE;
}


// End Of File ///////////////////////////////////////////////////////////////


syntax highlighted by Code2HTML, v. 0.9.1