View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2002-2023, University of Amsterdam
    7                              VU University Amsterdam
    8                              CWI, Amsterdam
    9                              SWI-Prolog Solutions b.v.
   10    All rights reserved.
   11
   12    Redistribution and use in source and binary forms, with or without
   13    modification, are permitted provided that the following conditions
   14    are met:
   15
   16    1. Redistributions of source code must retain the above copyright
   17       notice, this list of conditions and the following disclaimer.
   18
   19    2. Redistributions in binary form must reproduce the above copyright
   20       notice, this list of conditions and the following disclaimer in
   21       the documentation and/or other materials provided with the
   22       distribution.
   23
   24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   25    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   26    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   27    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   28    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   29    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   30    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   31    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   32    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   34    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   35    POSSIBILITY OF SUCH DAMAGE.
   36*/
   37
   38:- module(http_open,
   39          [ http_open/3,                % +URL, -Stream, +Options
   40            http_set_authorization/2,   % +URL, +Authorization
   41            http_close_keep_alive/1     % +Address
   42          ]).   43:- autoload(library(aggregate),[aggregate_all/3]).   44:- autoload(library(apply),[foldl/4,include/3]).   45:- autoload(library(base64),[base64/3]).   46:- autoload(library(debug),[debug/3,debugging/1]).   47:- autoload(library(error),
   48	    [ domain_error/2, must_be/2, existence_error/2, instantiation_error/1
   49	    ]).   50:- autoload(library(lists),[last/2,member/2]).   51:- autoload(library(option),
   52	    [ meta_options/3, option/2, select_option/4, merge_options/3,
   53	      option/3, select_option/3
   54	    ]).   55:- autoload(library(readutil),[read_line_to_codes/2]).   56:- autoload(library(uri),
   57	    [ uri_resolve/3, uri_components/2, uri_data/3,
   58              uri_authority_components/2, uri_authority_data/3,
   59	      uri_encoded/3, uri_query_components/2, uri_is_global/1
   60	    ]).   61:- autoload(library(http/http_header),
   62            [ http_parse_header/2, http_post_data/3 ]).   63:- autoload(library(http/http_stream),[stream_range_open/3]).   64:- if(exists_source(library(ssl))).   65:- autoload(library(ssl), [ssl_upgrade_legacy_options/2]).   66:- endif.   67:- use_module(library(socket)).   68
   69
   70/** <module> HTTP client library
   71
   72This library defines http_open/3, which opens an URL as a Prolog stream.
   73The functionality of the  library  can   be  extended  by  loading two
   74additional modules that act as plugins:
   75
   76    * library(http/http_ssl_plugin)
   77    Loading this library causes http_open/3 to handle HTTPS connections.
   78    Relevant options for SSL certificate handling are handed to
   79    ssl_context/3. This plugin is loaded automatically if the scheme
   80    `https` is requested using a default SSL context. See the plugin for
   81    additional information regarding security.
   82
   83    * library(zlib)
   84    Loading this library supports the `gzip` transfer encoding.  This
   85    plugin is lazily loaded if a connection is opened that claims this
   86    transfer encoding.
   87
   88    * library(http/http_cookie)
   89    Loading this library adds tracking cookies to http_open/3. Returned
   90    cookies are collected in the Prolog database and supplied for
   91    subsequent requests.
   92
   93    * library(http/http_stream)
   94    This library adds support for _chunked_ encoding. It is lazily
   95    loaded if the server sends a ``Transfer-encoding: chunked`` header.
   96
   97
   98Here is a simple example to fetch a web-page:
   99
  100```
  101?- http_open('http://www.google.com/search?q=prolog', In, []),
  102   copy_stream_data(In, user_output),
  103   close(In).
  104<!doctype html><head><title>prolog - Google Search</title><script>
  105...
  106```
  107
  108The example below fetches the modification time of a web-page. Note that
  109=|Modified|= is =|''|= (the empty atom) if the  web-server does not provide a
  110time-stamp for the resource. See also parse_time/2.
  111
  112```
  113modified(URL, Stamp) :-
  114       http_open(URL, In,
  115                 [ method(head),
  116                   header(last_modified, Modified)
  117                 ]),
  118       close(In),
  119       Modified \== '',
  120       parse_time(Modified, Stamp).
  121```
  122
  123Then next example uses Google search. It exploits library(uri) to manage
  124URIs, library(sgml) to load  an  HTML   document  and  library(xpath) to
  125navigate the parsed HTML. Note that  you   may  need to adjust the XPath
  126queries if the data returned by Google changes (this example indeed
  127no longer works and currently fails at the first xpath/3 call)
  128
  129```
  130:- use_module(library(http/http_open)).
  131:- use_module(library(xpath)).
  132:- use_module(library(sgml)).
  133:- use_module(library(uri)).
  134
  135google(For, Title, HREF) :-
  136        uri_encoded(query_value, For, Encoded),
  137        atom_concat('http://www.google.com/search?q=', Encoded, URL),
  138        http_open(URL, In, []),
  139        call_cleanup(
  140            load_html(In, DOM, []),
  141            close(In)),
  142        xpath(DOM, //h3(@class=r), Result),
  143        xpath(Result, //a(@href=HREF0, text), Title),
  144        uri_components(HREF0, Components),
  145        uri_data(search, Components, Query),
  146        uri_query_components(Query, Parts),
  147        memberchk(q=HREF, Parts).
  148```
  149
  150An example query is below:
  151
  152```
  153?- google(prolog, Title, HREF).
  154Title = 'SWI-Prolog',
  155HREF = 'http://www.swi-prolog.org/' ;
  156Title = 'Prolog - Wikipedia',
  157HREF = 'https://nl.wikipedia.org/wiki/Prolog' ;
  158Title = 'Prolog - Wikipedia, the free encyclopedia',
  159HREF = 'https://en.wikipedia.org/wiki/Prolog' ;
  160Title = 'Pro-Log is logistiek dienstverlener m.b.t. vervoer over water.',
  161HREF = 'http://www.pro-log.nl/' ;
  162Title = 'Learn Prolog Now!',
  163HREF = 'http://www.learnprolognow.org/' ;
  164Title = 'Free Online Version - Learn Prolog
  165...
  166```
  167
  168@see load_html/3 and xpath/3 can be used to parse and navigate HTML
  169     documents.
  170@see http_get/3 and http_post/4 provide an alternative interface that
  171     convert the reply depending on the =|Content-Type|= header.
  172*/
  173
  174:- multifile
  175    http:encoding_filter/3,           % +Encoding, +In0, -In
  176    http:current_transfer_encoding/1, % ?Encoding
  177    http:disable_encoding_filter/1,   % +ContentType
  178    http:http_protocol_hook/5,        % +Protocol, +Parts, +StreamPair,
  179                                      % -NewStreamPair, +Options
  180    http:open_options/2,              % +Parts, -Options
  181    http:write_cookies/3,             % +Out, +Parts, +Options
  182    http:update_cookies/3,            % +CookieLine, +Parts, +Options
  183    http:authenticate_client/2,       % +URL, +Action
  184    http:http_connection_over_proxy/6.  185
  186:- meta_predicate
  187    http_open(+,-,:).  188
  189:- predicate_options(http_open/3, 3,
  190                     [ authorization(compound),
  191                       final_url(-atom),
  192                       header(+atom, -atom),
  193                       headers(-list),
  194                       raw_headers(-list(string)),
  195                       connection(+atom),
  196                       method(oneof([delete,get,put,head,post,patch,options])),
  197                       size(-integer),
  198                       status_code(-integer),
  199                       output(-stream),
  200                       timeout(number),
  201                       unix_socket(+atom),
  202                       proxy(atom, integer),
  203                       proxy_authorization(compound),
  204                       bypass_proxy(boolean),
  205                       request_header(any),
  206                       user_agent(atom),
  207                       version(-compound),
  208        % The option below applies if library(http/http_header) is loaded
  209                       post(any),
  210        % The options below apply if library(http/http_ssl_plugin)) is loaded
  211                       pem_password_hook(callable),
  212                       cacert_file(atom),
  213                       cert_verify_hook(callable)
  214                     ]).  215
  216%!  user_agent(-Agent) is det.
  217%
  218%   Default value for =|User-Agent|=,  can   be  overruled using the
  219%   option user_agent(Agent) of http_open/3.
  220
  221user_agent('SWI-Prolog').
  222
  223%!  http_open(+URL, -Stream, +Options) is det.
  224%
  225%   Open the data at the HTTP  server   as  a  Prolog stream. URL is
  226%   either an atom  specifying  a  URL   or  a  list  representing a
  227%   broken-down  URL  as  specified  below.   After  this  predicate
  228%   succeeds the data can be read from Stream. After completion this
  229%   stream must be  closed  using   the  built-in  Prolog  predicate
  230%   close/1. Options provides additional options:
  231%
  232%     * authenticate(+Boolean)
  233%     If `false` (default `true`), do _not_ try to automatically
  234%     authenticate the client if a 401 (Unauthorized) status code
  235%     is received.
  236%
  237%     * authorization(+Term)
  238%     Send authorization. See also http_set_authorization/2. Supported
  239%     schemes:
  240%
  241%       - basic(+User, +Password)
  242%       HTTP Basic authentication.
  243%       - bearer(+Token)
  244%       HTTP Bearer authentication.
  245%       - digest(+User, +Password)
  246%       HTTP Digest authentication.  This option is only provided
  247%       if the plugin library(http/http_digest) is also loaded.
  248%
  249%     * unix_socket(+Path)
  250%     Connect to the given Unix domain socket.  In this scenario
  251%     the host name and port or ignored.  If the server replies
  252%     with a _redirect_ message and the host differs from the
  253%     original host as normal TCP connection is used to handle
  254%     the redirect.  This option is inspired by curl(1)'s option
  255%     `--unix-socket`.
  256%
  257%     * connection(+Connection)
  258%     Specify the =Connection= header.  Default is =close=.  The
  259%     alternative is =|Keep-alive|=.  This maintains a pool of
  260%     available connections as determined by keep_connection/1.
  261%     The library(http/websockets) uses =|Keep-alive, Upgrade|=.
  262%     Keep-alive connections can be closed explicitly using
  263%     http_close_keep_alive/1. Keep-alive connections may
  264%     significantly improve repetitive requests on the same server,
  265%     especially if the IP route is long, HTTPS is used or the
  266%     connection uses a proxy.
  267%
  268%     * final_url(-FinalURL)
  269%     Unify FinalURL with the final   destination. This differs from
  270%     the  original  URL  if  the  returned  head  of  the  original
  271%     indicates an HTTP redirect (codes 301,  302 or 303). Without a
  272%     redirect, FinalURL is the same as URL if  URL is an atom, or a
  273%     URL constructed from the parts.
  274%
  275%     * header(Name, -AtomValue)
  276%     If provided, AtomValue is  unified  with   the  value  of  the
  277%     indicated  field  in  the  reply    header.  Name  is  matched
  278%     case-insensitive and the underscore  (_)   matches  the hyphen
  279%     (-). Multiple of these options  may   be  provided  to extract
  280%     multiple  header  fields.  If  the  header  is  not  available
  281%     AtomValue is unified to the empty atom ('').
  282%
  283%     * headers(-List)
  284%     If provided,  List is unified  with a list of  Name(Value) pairs
  285%     corresponding to  fields in  the reply  header.  Name  and Value
  286%     follow  the  same  conventions used  by  the  header(Name,Value)
  287%     option.  A  pseudo header status_code(Code) is  added to provide
  288%     the  HTTP status  as  an integer.   See also  raw_headers(-List)
  289%     which  provides  the  entire   HTTP  reply  header  in  unparsed
  290%     representation.
  291%
  292%     * method(+Method)
  293%     One of =get= (default), =head=, =delete=, =post=,   =put=   or
  294%     =patch=.
  295%     The  =head= message can be
  296%     used in combination with  the   header(Name,  Value) option to
  297%     access information on the resource   without actually fetching
  298%     the resource itself.  The  returned   stream  must  be  closed
  299%     immediately.
  300%
  301%     If post(Data) is provided, the default is =post=.
  302%
  303%     * size(-Size)
  304%     Size is unified with the   integer value of =|Content-Length|=
  305%     in the reply header.
  306%
  307%     * version(-Version)
  308%     Version is a _pair_ `Major-Minor`, where `Major` and `Minor`
  309%     are integers representing the HTTP version in the reply header.
  310%
  311%     * range(+Range)
  312%     Ask for partial content. Range   is  a term _|Unit(From,To)|_,
  313%     where `From` is an integer and `To`   is  either an integer or
  314%     the atom `end`. HTTP 1.1 only   supports Unit = `bytes`. E.g.,
  315%     to   ask   for    bytes    1000-1999,     use    the    option
  316%     range(bytes(1000,1999))
  317%
  318%     * raw_encoding(+Encoding)
  319%     Do not install a decoding filter for Encoding.  For example,
  320%     using raw_encoding('applocation/gzip') the system will not
  321%     decompress the stream if it is compressed using `gzip`.
  322%
  323%     * raw_headers(-Lines)
  324%     Unify Lines with a list of strings that represents the complete
  325%     reply header returned by the server.  See also headers(-List).
  326%
  327%     * redirect(+Boolean)
  328%     If `false` (default `true`), do _not_ automatically redirect
  329%     if a 3XX code is received.  Must be combined with
  330%     status_code(Code) and one of the header options to read the
  331%     redirect reply. In particular, without status_code(Code) a
  332%     redirect is mapped to an exception.
  333%
  334%     * status_code(-Code)
  335%     If this option is  present  and   Code  unifies  with the HTTP
  336%     status code, do *not* translate errors (4xx, 5xx) into an
  337%     exception. Instead, http_open/3 behaves as if 2xx (success) is
  338%     returned, providing the application to read the error document
  339%     from the returned stream.
  340%
  341%     * output(-Out)
  342%     Unify the output stream with Out and do not close it. This can
  343%     be used to upgrade a connection.
  344%
  345%     * timeout(+Timeout)
  346%     If provided, set a timeout on   the stream using set_stream/2.
  347%     With this option if no new data arrives within Timeout seconds
  348%     the stream raises an exception.  Default   is  to wait forever
  349%     (=infinite=).
  350%
  351%     * post(+Data)
  352%     Issue a =POST= request on the HTTP server.  Data is
  353%     handed to http_post_data/3.
  354%
  355%     * proxy(+Host:Port)
  356%     Use an HTTP proxy to connect to the outside world.  See also
  357%     socket:proxy_for_url/3.  This option overrules the proxy
  358%     specification defined by socket:proxy_for_url/3.
  359%
  360%     * proxy(+Host, +Port)
  361%     Synonym for proxy(+Host:Port).  Deprecated.
  362%
  363%     * proxy_authorization(+Authorization)
  364%     Send authorization to the proxy.  Otherwise   the  same as the
  365%     =authorization= option.
  366%
  367%     * bypass_proxy(+Boolean)
  368%     If =true=, bypass proxy hooks.  Default is =false=.
  369%
  370%     * request_header(Name = Value)
  371%     Additional  name-value  parts  are  added   in  the  order  of
  372%     appearance to the HTTP request   header.  No interpretation is
  373%     done.
  374%
  375%     * max_redirect(+Max)
  376%     Sets the maximum length of a redirection chain.  This is needed
  377%     for some IRIs that redirect indefinitely to other IRIs without
  378%     looping (e.g., redirecting to IRIs with a random element in them).
  379%     Max must be either a non-negative integer or the atom `infinite`.
  380%     The default value is `10`.
  381%
  382%     * user_agent(+Agent)
  383%     Defines the value of the  =|User-Agent|=   field  of  the HTTP
  384%     header. Default is =SWI-Prolog=.
  385%
  386%   The hook http:open_options/2 can  be   used  to  provide default
  387%   options   based   on   the   broken-down     URL.   The   option
  388%   status_code(-Code)  is  particularly  useful   to  query  *REST*
  389%   interfaces that commonly return status   codes  other than `200`
  390%   that need to be be processed by the client code.
  391%
  392%   @param URL is either an atom or string (url) or a list of _parts_.
  393%
  394%               When provided, this list may contain the fields
  395%               =scheme=, =user=, =password=, =host=, =port=, =path=
  396%               and either =query_string= (whose argument is an atom)
  397%               or =search= (whose argument is a list of
  398%               =|Name(Value)|= or =|Name=Value|= compound terms).
  399%               Only =host= is mandatory.  The example below opens the
  400%               URL =|http://www.example.com/my/path?q=Hello%20World&lang=en|=.
  401%               Note that values must *not* be quoted because the
  402%               library inserts the required quotes.
  403%
  404%               ```
  405%               http_open([ host('www.example.com'),
  406%                           path('/my/path'),
  407%                           search([ q='Hello world',
  408%                                    lang=en
  409%                                  ])
  410%                         ])
  411%               ```
  412%
  413%   @throws error(existence_error(url, Id),Context) is raised if the
  414%   HTTP result code is not in the range 200..299. Context has the
  415%   shape context(Message, status(Code, TextCode)), where `Code` is the
  416%   numeric HTTP code and `TextCode` is the textual description thereof
  417%   provided by the server. `Message` may provide additional details or
  418%   may be unbound.
  419%
  420%   @see ssl_context/3 for SSL related options if
  421%   library(http/http_ssl_plugin) is loaded.
  422
  423:- multifile
  424    socket:proxy_for_url/3.           % +URL, +Host, -ProxyList
  425
  426http_open(URL, Stream, QOptions) :-
  427    meta_options(is_meta, QOptions, Options0),
  428    (   atomic(URL)
  429    ->  parse_url_ex(URL, Parts)
  430    ;   Parts = URL
  431    ),
  432    autoload_https(Parts),
  433    upgrade_ssl_options(Parts, Options0, Options),
  434    add_authorization(Parts, Options, Options1),
  435    findall(HostOptions, hooked_options(Parts, HostOptions), AllHostOptions),
  436    foldl(merge_options_rev, AllHostOptions, Options1, Options2),
  437    (   option(bypass_proxy(true), Options)
  438    ->  try_http_proxy(direct, Parts, Stream, Options2)
  439    ;   term_variables(Options2, Vars2),
  440        findall(Result-Vars2,
  441                try_a_proxy(Parts, Result, Options2),
  442                ResultList),
  443        last(ResultList, Status-Vars2)
  444    ->  (   Status = true(_Proxy, Stream)
  445        ->  true
  446        ;   throw(error(proxy_error(tried(ResultList)), _))
  447        )
  448    ;   try_http_proxy(direct, Parts, Stream, Options2)
  449    ).
  450
  451try_a_proxy(Parts, Result, Options) :-
  452    parts_uri(Parts, AtomicURL),
  453    option(host(Host), Parts),
  454    (   option(unix_socket(Path), Options)
  455    ->  Proxy = unix_socket(Path)
  456    ;   (   option(proxy(ProxyHost:ProxyPort), Options)
  457        ;   is_list(Options),
  458            memberchk(proxy(ProxyHost,ProxyPort), Options)
  459        )
  460    ->  Proxy = proxy(ProxyHost, ProxyPort)
  461    ;   socket:proxy_for_url(AtomicURL, Host, Proxy)
  462    ),
  463    debug(http(proxy),
  464          'http_open: Connecting via ~w to ~w', [Proxy, AtomicURL]),
  465    (   catch(try_http_proxy(Proxy, Parts, Stream, Options), E, true)
  466    ->  (   var(E)
  467        ->  !, Result = true(Proxy, Stream)
  468        ;   Result = error(Proxy, E)
  469        )
  470    ;   Result = false(Proxy)
  471    ),
  472    debug(http(proxy), 'http_open: ~w: ~p', [Proxy, Result]).
  473
  474try_http_proxy(Method, Parts, Stream, Options0) :-
  475    option(host(Host), Parts),
  476    proxy_request_uri(Method, Parts, RequestURI),
  477    select_option(visited(Visited0), Options0, OptionsV, []),
  478    Options = [visited([Parts|Visited0])|OptionsV],
  479    parts_scheme(Parts, Scheme),
  480    default_port(Scheme, DefPort),
  481    url_part(port(Port), Parts, DefPort),
  482    host_and_port(Host, DefPort, Port, HostPort),
  483    (   option(connection(Connection), Options0),
  484        keep_alive(Connection),
  485        get_from_pool(Host:Port, StreamPair),
  486        debug(http(connection), 'Trying Keep-alive to ~p using ~p',
  487              [ Host:Port, StreamPair ]),
  488        catch(send_rec_header(StreamPair, Stream, HostPort,
  489                              RequestURI, Parts, Options),
  490              Error,
  491              keep_alive_error(Error, StreamPair))
  492    ->  true
  493    ;   http:http_connection_over_proxy(Method, Parts, Host:Port,
  494                                        SocketStreamPair, Options, Options1),
  495        (   catch(http:http_protocol_hook(Scheme, Parts,
  496                                          SocketStreamPair,
  497                                          StreamPair, Options),
  498                  Error,
  499                  ( close(SocketStreamPair, [force(true)]),
  500                    throw(Error)))
  501        ->  true
  502        ;   StreamPair = SocketStreamPair
  503        ),
  504        send_rec_header(StreamPair, Stream, HostPort,
  505                        RequestURI, Parts, Options1)
  506    ),
  507    return_final_url(Options).
  508
  509proxy_request_uri(direct, Parts, RequestURI) :-
  510    !,
  511    parts_request_uri(Parts, RequestURI).
  512proxy_request_uri(unix_socket(_), Parts, RequestURI) :-
  513    !,
  514    parts_request_uri(Parts, RequestURI).
  515proxy_request_uri(_, Parts, RequestURI) :-
  516    parts_uri(Parts, RequestURI).
  517
  518http:http_connection_over_proxy(unix_socket(Path), _, _,
  519                                StreamPair, Options, Options) :-
  520    !,
  521    unix_domain_socket(Socket),
  522    tcp_connect(Socket, Path),
  523    tcp_open_socket(Socket, In, Out),
  524    stream_pair(StreamPair, In, Out).
  525http:http_connection_over_proxy(direct, _, Host:Port,
  526                                StreamPair, Options, Options) :-
  527    !,
  528    open_socket(Host:Port, StreamPair, Options).
  529http:http_connection_over_proxy(proxy(ProxyHost, ProxyPort), Parts, _,
  530                                StreamPair, Options, Options) :-
  531    \+ ( memberchk(scheme(Scheme), Parts),
  532         secure_scheme(Scheme)
  533       ),
  534    !,
  535    % We do not want any /more/ proxy after this
  536    open_socket(ProxyHost:ProxyPort, StreamPair,
  537                [bypass_proxy(true)|Options]).
  538http:http_connection_over_proxy(socks(SocksHost, SocksPort), _Parts, Host:Port,
  539                                StreamPair, Options, Options) :-
  540    !,
  541    tcp_connect(SocksHost:SocksPort, StreamPair, [bypass_proxy(true)]),
  542    catch(negotiate_socks_connection(Host:Port, StreamPair),
  543          Error,
  544          ( close(StreamPair, [force(true)]),
  545            throw(Error)
  546          )).
  547
  548%!  hooked_options(+Parts, -Options) is nondet.
  549%
  550%   Calls  http:open_options/2  and  if  necessary    upgrades  old  SSL
  551%   cacerts_file(File) option to a cacerts(List) option to ensure proper
  552%   merging of options.
  553
  554hooked_options(Parts, Options) :-
  555    http:open_options(Parts, Options0),
  556    upgrade_ssl_options(Parts, Options0, Options).
  557
  558:- if(current_predicate(ssl_upgrade_legacy_options/2)).  559upgrade_ssl_options(Parts, Options0, Options) :-
  560    requires_ssl(Parts),
  561    !,
  562    ssl_upgrade_legacy_options(Options0, Options).
  563:- endif.  564upgrade_ssl_options(_, Options, Options).
  565
  566merge_options_rev(Old, New, Merged) :-
  567    merge_options(New, Old, Merged).
  568
  569is_meta(pem_password_hook).             % SSL plugin callbacks
  570is_meta(cert_verify_hook).
  571
  572
  573http:http_protocol_hook(http, _, StreamPair, StreamPair, _).
  574
  575default_port(https, 443) :- !.
  576default_port(wss,   443) :- !.
  577default_port(_,     80).
  578
  579host_and_port(Host, DefPort, DefPort, Host) :- !.
  580host_and_port(Host, _,       Port,    Host:Port).
  581
  582%!  autoload_https(+Parts) is det.
  583%
  584%   If the requested scheme is https or wss, load the HTTPS plugin.
  585
  586autoload_https(Parts) :-
  587    requires_ssl(Parts),
  588    memberchk(scheme(S), Parts),
  589    \+ clause(http:http_protocol_hook(S, _, StreamPair, StreamPair, _),_),
  590    exists_source(library(http/http_ssl_plugin)),
  591    !,
  592    use_module(library(http/http_ssl_plugin)).
  593autoload_https(_).
  594
  595requires_ssl(Parts) :-
  596    memberchk(scheme(S), Parts),
  597    secure_scheme(S).
  598
  599secure_scheme(https).
  600secure_scheme(wss).
  601
  602%!  send_rec_header(+StreamPair, -Stream,
  603%!                  +Host, +RequestURI, +Parts, +Options) is det.
  604%
  605%   Send header to Out and process reply.  If there is an error or
  606%   failure, close In and Out and return the error or failure.
  607
  608send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  609    (   catch(guarded_send_rec_header(StreamPair, Stream,
  610                                      Host, RequestURI, Parts, Options),
  611              E, true)
  612    ->  (   var(E)
  613        ->  (   option(output(StreamPair), Options)
  614            ->  true
  615            ;   true
  616            )
  617        ;   close(StreamPair, [force(true)]),
  618            throw(E)
  619        )
  620    ;   close(StreamPair, [force(true)]),
  621        fail
  622    ).
  623
  624guarded_send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  625    user_agent(Agent, Options),
  626    method(Options, MNAME),
  627    http_version(Version),
  628    option(connection(Connection), Options, close),
  629    debug(http(send_request), "> ~w ~w HTTP/~w", [MNAME, RequestURI, Version]),
  630    debug(http(send_request), "> Host: ~w", [Host]),
  631    debug(http(send_request), "> User-Agent: ~w", [Agent]),
  632    debug(http(send_request), "> Connection: ~w", [Connection]),
  633    format(StreamPair,
  634           '~w ~w HTTP/~w\r\n\c
  635               Host: ~w\r\n\c
  636               User-Agent: ~w\r\n\c
  637               Connection: ~w\r\n',
  638           [MNAME, RequestURI, Version, Host, Agent, Connection]),
  639    parts_uri(Parts, URI),
  640    x_headers(Options, URI, StreamPair),
  641    write_cookies(StreamPair, Parts, Options),
  642    (   option(post(PostData), Options)
  643    ->  http_post_data(PostData, StreamPair, [])
  644    ;   format(StreamPair, '\r\n', [])
  645    ),
  646    flush_output(StreamPair),
  647                                    % read the reply header
  648    read_header(StreamPair, Parts, ReplyVersion, Code, Comment, Lines),
  649    update_cookies(Lines, Parts, Options),
  650    reply_header(Lines, Options),
  651    do_open(ReplyVersion, Code, Comment, Lines, Options, Parts, Host,
  652            StreamPair, Stream).
  653
  654
  655%!  http_version(-Version:atom) is det.
  656%
  657%   HTTP version we publish. We  can  only   use  1.1  if we support
  658%   chunked encoding.
  659
  660http_version('1.1') :-
  661    http:current_transfer_encoding(chunked),
  662    !.
  663http_version('1.1') :-
  664    autoload_encoding(chunked),
  665    !.
  666http_version('1.0').
  667
  668method(Options, MNAME) :-
  669    option(post(_), Options),
  670    !,
  671    option(method(M), Options, post),
  672    (   map_method(M, MNAME0)
  673    ->  MNAME = MNAME0
  674    ;   domain_error(method, M)
  675    ).
  676method(Options, MNAME) :-
  677    option(method(M), Options, get),
  678    (   map_method(M, MNAME0)
  679    ->  MNAME = MNAME0
  680    ;   map_method(_, M)
  681    ->  MNAME = M
  682    ;   domain_error(method, M)
  683    ).
  684
  685%!  map_method(+MethodID, -Method)
  686%
  687%   Support additional ``METHOD`` keywords.  Default   are  the official
  688%   HTTP methods as defined by the various RFCs.
  689
  690:- multifile
  691    map_method/2.  692
  693map_method(delete,  'DELETE').
  694map_method(get,     'GET').
  695map_method(head,    'HEAD').
  696map_method(post,    'POST').
  697map_method(put,     'PUT').
  698map_method(patch,   'PATCH').
  699map_method(options, 'OPTIONS').
  700
  701%!  x_headers(+Options, +URI, +Out) is det.
  702%
  703%   Emit extra headers from   request_header(Name=Value)  options in
  704%   Options.
  705%
  706%   @tbd Use user/password fields
  707
  708x_headers(Options, URI, Out) :-
  709    x_headers_(Options, [url(URI)|Options], Out).
  710
  711x_headers_([], _, _).
  712x_headers_([H|T], Options, Out) :-
  713    x_header(H, Options, Out),
  714    x_headers_(T, Options, Out).
  715
  716x_header(request_header(Name=Value), _, Out) :-
  717    !,
  718    debug(http(send_request), "> ~w: ~w", [Name, Value]),
  719    format(Out, '~w: ~w\r\n', [Name, Value]).
  720x_header(proxy_authorization(ProxyAuthorization), Options, Out) :-
  721    !,
  722    auth_header(ProxyAuthorization, Options, 'Proxy-Authorization', Out).
  723x_header(authorization(Authorization), Options, Out) :-
  724    !,
  725    auth_header(Authorization, Options, 'Authorization', Out).
  726x_header(range(Spec), _, Out) :-
  727    !,
  728    Spec =.. [Unit, From, To],
  729    (   To == end
  730    ->  ToT = ''
  731    ;   must_be(integer, To),
  732        ToT = To
  733    ),
  734    debug(http(send_request), "> Range: ~w=~d-~w", [Unit, From, ToT]),
  735    format(Out, 'Range: ~w=~d-~w\r\n', [Unit, From, ToT]).
  736x_header(_, _, _).
  737
  738%!  auth_header(+AuthOption, +Options, +HeaderName, +Out)
  739
  740auth_header(basic(User, Password), _, Header, Out) :-
  741    !,
  742    format(codes(Codes), '~w:~w', [User, Password]),
  743    phrase(base64(Codes), Base64Codes),
  744    debug(http(send_request), "> ~w: Basic ~s", [Header, Base64Codes]),
  745    format(Out, '~w: Basic ~s\r\n', [Header, Base64Codes]).
  746auth_header(bearer(Token), _, Header, Out) :-
  747    !,
  748    debug(http(send_request), "> ~w: Bearer ~w", [Header,Token]),
  749    format(Out, '~w: Bearer ~w\r\n', [Header, Token]).
  750auth_header(Auth, Options, _, Out) :-
  751    option(url(URL), Options),
  752    add_method(Options, Options1),
  753    http:authenticate_client(URL, send_auth_header(Auth, Out, Options1)),
  754    !.
  755auth_header(Auth, _, _, _) :-
  756    domain_error(authorization, Auth).
  757
  758user_agent(Agent, Options) :-
  759    (   option(user_agent(Agent), Options)
  760    ->  true
  761    ;   user_agent(Agent)
  762    ).
  763
  764add_method(Options0, Options) :-
  765    option(method(_), Options0),
  766    !,
  767    Options = Options0.
  768add_method(Options0, Options) :-
  769    option(post(_), Options0),
  770    !,
  771    Options = [method(post)|Options0].
  772add_method(Options0, [method(get)|Options0]).
  773
  774%!  do_open(+HTTPVersion, +HTTPStatusCode, +HTTPStatusComment, +Header,
  775%!          +Options, +Parts, +Host, +In, -FinalIn) is det.
  776%
  777%   Handle the HTTP status once available. If   200-299, we are ok. If a
  778%   redirect, redo the open,  returning  a   new  stream.  Else issue an
  779%   error.
  780%
  781%   @error  existence_error(url, URL)
  782
  783                                        % Redirections
  784do_open(_, Code, _, Lines, Options0, Parts, _, In, Stream) :-
  785    redirect_code(Code),
  786    option(redirect(true), Options0, true),
  787    location(Lines, RequestURI),
  788    !,
  789    debug(http(redirect), 'http_open: redirecting to ~w', [RequestURI]),
  790    close(In),
  791    parts_uri(Parts, Base),
  792    uri_resolve(RequestURI, Base, Redirected),
  793    parse_url_ex(Redirected, RedirectedParts),
  794    (   redirect_limit_exceeded(Options0, Max)
  795    ->  format(atom(Comment), 'max_redirect (~w) limit exceeded', [Max]),
  796        throw(error(permission_error(redirect, http, Redirected),
  797                    context(_, Comment)))
  798    ;   redirect_loop(RedirectedParts, Options0)
  799    ->  throw(error(permission_error(redirect, http, Redirected),
  800                    context(_, 'Redirection loop')))
  801    ;   true
  802    ),
  803    redirect_options(Parts, RedirectedParts, Options0, Options),
  804    http_open(RedirectedParts, Stream, Options).
  805                                        % Need authentication
  806do_open(_Version, Code, _Comment, Lines, Options0, Parts, _Host, In0, Stream) :-
  807    authenticate_code(Code),
  808    option(authenticate(true), Options0, true),
  809    parts_uri(Parts, URI),
  810    parse_headers(Lines, Headers),
  811    http:authenticate_client(
  812             URI,
  813             auth_reponse(Headers, Options0, Options)),
  814    !,
  815    close(In0),
  816    http_open(Parts, Stream, Options).
  817                                        % Accepted codes
  818do_open(Version, Code, _, Lines, Options, Parts, Host, In0, In) :-
  819    (   option(status_code(Code), Options),
  820        Lines \== []
  821    ->  true
  822    ;   successful_code(Code)
  823    ),
  824    !,
  825    parts_uri(Parts, URI),
  826    parse_headers(Lines, Headers),
  827    return_version(Options, Version),
  828    return_size(Options, Headers),
  829    return_fields(Options, Headers),
  830    return_headers(Options, [status_code(Code)|Headers]),
  831    consider_keep_alive(Lines, Parts, Host, In0, In1, Options),
  832    transfer_encoding_filter(Lines, In1, In, Options),
  833                                    % properly re-initialise the stream
  834    set_stream(In, file_name(URI)),
  835    set_stream(In, record_position(true)).
  836do_open(_, _, _, [], Options, _, _, _, _) :-
  837    option(connection(Connection), Options),
  838    keep_alive(Connection),
  839    !,
  840    throw(error(keep_alive(closed),_)).
  841                                        % report anything else as error
  842do_open(_Version, Code, Comment, _,  _, Parts, _, _, _) :-
  843    parts_uri(Parts, URI),
  844    (   map_error_code(Code, Error)
  845    ->  Formal =.. [Error, url, URI]
  846    ;   Formal = existence_error(url, URI)
  847    ),
  848    throw(error(Formal, context(_, status(Code, Comment)))).
  849
  850
  851successful_code(Code) :-
  852    between(200, 299, Code).
  853
  854%!  redirect_limit_exceeded(+Options:list(compound), -Max:nonneg) is semidet.
  855%
  856%   True if we have exceeded the maximum redirection length (default 10).
  857
  858redirect_limit_exceeded(Options, Max) :-
  859    option(visited(Visited), Options, []),
  860    length(Visited, N),
  861    option(max_redirect(Max), Options, 10),
  862    (Max == infinite -> fail ; N > Max).
  863
  864
  865%!  redirect_loop(+Parts, +Options) is semidet.
  866%
  867%   True if we are in  a  redirection   loop.  Note  that some sites
  868%   redirect once to the same place using  cookies or similar, so we
  869%   allow for two tries. In fact,   we  should probably test whether
  870%   authorization or cookie headers have changed.
  871
  872redirect_loop(Parts, Options) :-
  873    option(visited(Visited), Options, []),
  874    include(==(Parts), Visited, Same),
  875    length(Same, Count),
  876    Count > 2.
  877
  878
  879%!  redirect_options(+Parts, +RedirectedParts, +Options0, -Options) is det.
  880%
  881%   A redirect from a POST should do  a   GET  on the returned URI. This
  882%   means we must remove the method(post)   and  post(Data) options from
  883%   the original option-list.
  884%
  885%   If we are connecting over a Unix   domain socket we drop this option
  886%   if the redirect host does not match the initial host.
  887
  888redirect_options(Parts, RedirectedParts, Options0, Options) :-
  889    select_option(unix_socket(_), Options0, Options1),
  890    memberchk(host(Host), Parts),
  891    memberchk(host(RHost), RedirectedParts),
  892    debug(http(redirect), 'http_open: redirecting AF_UNIX ~w to ~w',
  893          [Host, RHost]),
  894    Host \== RHost,
  895    !,
  896    redirect_options(Options1, Options).
  897redirect_options(_, _, Options0, Options) :-
  898    redirect_options(Options0, Options).
  899
  900redirect_options(Options0, Options) :-
  901    (   select_option(post(_), Options0, Options1)
  902    ->  true
  903    ;   Options1 = Options0
  904    ),
  905    (   select_option(method(Method), Options1, Options),
  906        \+ redirect_method(Method)
  907    ->  true
  908    ;   Options = Options1
  909    ).
  910
  911redirect_method(delete).
  912redirect_method(get).
  913redirect_method(head).
  914
  915
  916%!  map_error_code(+HTTPCode, -PrologError) is semidet.
  917%
  918%   Map HTTP error codes to Prolog errors.
  919%
  920%   @tbd    Many more maps. Unfortunately many have no sensible Prolog
  921%           counterpart.
  922
  923map_error_code(401, permission_error).
  924map_error_code(403, permission_error).
  925map_error_code(404, existence_error).
  926map_error_code(405, permission_error).
  927map_error_code(407, permission_error).
  928map_error_code(410, existence_error).
  929
  930redirect_code(301).                     % Moved Permanently
  931redirect_code(302).                     % Found (previously "Moved Temporary")
  932redirect_code(303).                     % See Other
  933redirect_code(307).                     % Temporary Redirect
  934
  935authenticate_code(401).
  936
  937%!  open_socket(+Address, -StreamPair, +Options) is det.
  938%
  939%   Create and connect a client socket to Address.  Options
  940%
  941%       * timeout(+Timeout)
  942%       Sets timeout on the stream, *after* connecting the
  943%       socket.
  944%
  945%   @tbd    Make timeout also work on tcp_connect/4.
  946%   @tbd    This is the same as do_connect/4 in http_client.pl
  947
  948open_socket(Address, StreamPair, Options) :-
  949    debug(http(open), 'http_open: Connecting to ~p ...', [Address]),
  950    tcp_connect(Address, StreamPair, Options),
  951    stream_pair(StreamPair, In, Out),
  952    debug(http(open), '\tok ~p ---> ~p', [In, Out]),
  953    set_stream(In, record_position(false)),
  954    (   option(timeout(Timeout), Options)
  955    ->  set_stream(In, timeout(Timeout))
  956    ;   true
  957    ).
  958
  959
  960return_version(Options, Major-Minor) :-
  961    option(version(Major-Minor), Options, _).
  962
  963return_size(Options, Headers) :-
  964    (   memberchk(content_length(Size), Headers)
  965    ->  option(size(Size), Options, _)
  966    ;   true
  967    ).
  968
  969return_fields([], _).
  970return_fields([header(Name, Value)|T], Headers) :-
  971    !,
  972    (   Term =.. [Name,Value],
  973        memberchk(Term, Headers)
  974    ->  true
  975    ;   Value = ''
  976    ),
  977    return_fields(T, Headers).
  978return_fields([_|T], Lines) :-
  979    return_fields(T, Lines).
  980
  981return_headers(Options, Headers) :-
  982    option(headers(Headers), Options, _).
  983
  984%!  parse_headers(+Lines, -Headers:list(compound)) is det.
  985%
  986%   Parse the header lines for   the  headers(-List) option. Invalid
  987%   header   lines   are   skipped,   printing   a   warning   using
  988%   pring_message/2.
  989
  990parse_headers([], []) :- !.
  991parse_headers([Line|Lines], Headers) :-
  992    catch(http_parse_header(Line, [Header]), Error, true),
  993    (   var(Error)
  994    ->  Headers = [Header|More]
  995    ;   print_message(warning, Error),
  996        Headers = More
  997    ),
  998    parse_headers(Lines, More).
  999
 1000
 1001%!  return_final_url(+Options) is semidet.
 1002%
 1003%   If Options contains final_url(URL), unify URL with the final
 1004%   URL after redirections.
 1005
 1006return_final_url(Options) :-
 1007    option(final_url(URL), Options),
 1008    var(URL),
 1009    !,
 1010    option(visited([Parts|_]), Options),
 1011    parts_uri(Parts, URL).
 1012return_final_url(_).
 1013
 1014
 1015%!  transfer_encoding_filter(+Lines, +In0, -In, +Options) is det.
 1016%
 1017%   Install filters depending on the transfer  encoding. If In0 is a
 1018%   stream-pair, we close the output   side. If transfer-encoding is
 1019%   not specified, the content-encoding is  interpreted as a synonym
 1020%   for transfer-encoding, because many   servers incorrectly depend
 1021%   on  this.  Exceptions  to  this   are  content-types  for  which
 1022%   disable_encoding_filter/1 holds.
 1023
 1024transfer_encoding_filter(Lines, In0, In, Options) :-
 1025    transfer_encoding(Lines, Encoding),
 1026    !,
 1027    transfer_encoding_filter_(Encoding, In0, In, Options).
 1028transfer_encoding_filter(Lines, In0, In, Options) :-
 1029    content_encoding(Lines, Encoding),
 1030    content_type(Lines, Type),
 1031    \+ http:disable_encoding_filter(Type),
 1032    !,
 1033    transfer_encoding_filter_(Encoding, In0, In, Options).
 1034transfer_encoding_filter(_, In, In, _Options).
 1035
 1036transfer_encoding_filter_(Encoding, In0, In, Options) :-
 1037    option(raw_encoding(Encoding), Options),
 1038    !,
 1039    In = In0.
 1040transfer_encoding_filter_(Encoding, In0, In, _Options) :-
 1041    stream_pair(In0, In1, Out),
 1042    (   nonvar(Out)
 1043    ->  close(Out)
 1044    ;   true
 1045    ),
 1046    (   http:encoding_filter(Encoding, In1, In)
 1047    ->  true
 1048    ;   autoload_encoding(Encoding),
 1049        http:encoding_filter(Encoding, In1, In)
 1050    ->  true
 1051    ;   domain_error(http_encoding, Encoding)
 1052    ).
 1053
 1054:- multifile
 1055    autoload_encoding/1. 1056
 1057:- if(exists_source(library(zlib))). 1058autoload_encoding(gzip) :-
 1059    use_module(library(zlib)).
 1060:- endif. 1061:- if(exists_source(library(http/http_stream))). 1062autoload_encoding(chunked) :-
 1063    use_module(library(http/http_stream)).
 1064:- endif. 1065
 1066content_type(Lines, Type) :-
 1067    member(Line, Lines),
 1068    phrase(field('content-type'), Line, Rest),
 1069    !,
 1070    atom_codes(Type, Rest).
 1071
 1072%!  http:disable_encoding_filter(+ContentType) is semidet.
 1073%
 1074%   Do not use  the   =|Content-encoding|=  as =|Transfer-encoding|=
 1075%   encoding for specific values of   ContentType. This predicate is
 1076%   multifile and can thus be extended by the user.
 1077
 1078http:disable_encoding_filter('application/x-gzip').
 1079http:disable_encoding_filter('application/x-tar').
 1080http:disable_encoding_filter('x-world/x-vrml').
 1081http:disable_encoding_filter('application/zip').
 1082http:disable_encoding_filter('application/x-gzip').
 1083http:disable_encoding_filter('application/x-zip-compressed').
 1084http:disable_encoding_filter('application/x-compress').
 1085http:disable_encoding_filter('application/x-compressed').
 1086http:disable_encoding_filter('application/x-spoon').
 1087
 1088%!  transfer_encoding(+Lines, -Encoding) is semidet.
 1089%
 1090%   True if Encoding  is  the   value  of  the =|Transfer-encoding|=
 1091%   header.
 1092
 1093transfer_encoding(Lines, Encoding) :-
 1094    what_encoding(transfer_encoding, Lines, Encoding).
 1095
 1096what_encoding(What, Lines, Encoding) :-
 1097    member(Line, Lines),
 1098    phrase(encoding_(What, Debug), Line, Rest),
 1099    !,
 1100    atom_codes(Encoding, Rest),
 1101    debug(http(What), '~w: ~p', [Debug, Rest]).
 1102
 1103encoding_(content_encoding, 'Content-encoding') -->
 1104    field('content-encoding').
 1105encoding_(transfer_encoding, 'Transfer-encoding') -->
 1106    field('transfer-encoding').
 1107
 1108%!  content_encoding(+Lines, -Encoding) is semidet.
 1109%
 1110%   True if Encoding is the value of the =|Content-encoding|=
 1111%   header.
 1112
 1113content_encoding(Lines, Encoding) :-
 1114    what_encoding(content_encoding, Lines, Encoding).
 1115
 1116%!  read_header(+In:stream, +Parts, -Version, -Code:int,
 1117%!  -Comment:atom, -Lines:list) is det.
 1118%
 1119%   Read the HTTP reply-header.  If the reply is completely empty
 1120%   an existence error is thrown.  If the replied header is
 1121%   otherwise invalid a 500 HTTP error is simulated, having the
 1122%   comment =|Invalid reply header|=.
 1123%
 1124%   @param Parts    A list of compound terms that describe the
 1125%                   parsed request URI.
 1126%   @param Version  HTTP reply version as Major-Minor pair
 1127%   @param Code     Numeric HTTP reply-code
 1128%   @param Comment  Comment of reply-code as atom
 1129%   @param Lines    Remaining header lines as code-lists.
 1130%
 1131%   @error existence_error(http_reply, Uri)
 1132
 1133read_header(In, Parts, Major-Minor, Code, Comment, Lines) :-
 1134    read_line_to_codes(In, Line),
 1135    (   Line == end_of_file
 1136    ->  parts_uri(Parts, Uri),
 1137        existence_error(http_reply,Uri)
 1138    ;   true
 1139    ),
 1140    Line \== end_of_file,
 1141    phrase(first_line(Major-Minor, Code, Comment), Line),
 1142    debug(http(open), 'HTTP/~d.~d ~w ~w', [Major, Minor, Code, Comment]),
 1143    read_line_to_codes(In, Line2),
 1144    rest_header(Line2, In, Lines),
 1145    !,
 1146    (   debugging(http(open))
 1147    ->  forall(member(HL, Lines),
 1148               debug(http(open), '~s', [HL]))
 1149    ;   true
 1150    ).
 1151read_header(_, _, 1-1, 500, 'Invalid reply header', []).
 1152
 1153rest_header([], _, []) :- !.            % blank line: end of header
 1154rest_header(L0, In, [L0|L]) :-
 1155    read_line_to_codes(In, L1),
 1156    rest_header(L1, In, L).
 1157
 1158%!  content_length(+Header, -Length:int) is semidet.
 1159%
 1160%   Find the Content-Length in an HTTP reply-header.
 1161
 1162content_length(Lines, Length) :-
 1163    member(Line, Lines),
 1164    phrase(content_length(Length0), Line),
 1165    !,
 1166    Length = Length0.
 1167
 1168location(Lines, RequestURI) :-
 1169    member(Line, Lines),
 1170    phrase(atom_field(location, RequestURI), Line),
 1171    !.
 1172
 1173connection(Lines, Connection) :-
 1174    member(Line, Lines),
 1175    phrase(atom_field(connection, Connection0), Line),
 1176    !,
 1177    Connection = Connection0.
 1178
 1179first_line(Major-Minor, Code, Comment) -->
 1180    "HTTP/", integer(Major), ".", integer(Minor),
 1181    skip_blanks,
 1182    integer(Code),
 1183    skip_blanks,
 1184    rest(Comment).
 1185
 1186atom_field(Name, Value) -->
 1187    field(Name),
 1188    rest(Value).
 1189
 1190content_length(Len) -->
 1191    field('content-length'),
 1192    integer(Len).
 1193
 1194field(Name) -->
 1195    { atom_codes(Name, Codes) },
 1196    field_codes(Codes).
 1197
 1198field_codes([]) -->
 1199    ":",
 1200    skip_blanks.
 1201field_codes([H|T]) -->
 1202    [C],
 1203    { match_header_char(H, C)
 1204    },
 1205    field_codes(T).
 1206
 1207match_header_char(C, C) :- !.
 1208match_header_char(C, U) :-
 1209    code_type(C, to_lower(U)),
 1210    !.
 1211match_header_char(0'_, 0'-).
 1212
 1213
 1214skip_blanks -->
 1215    [C],
 1216    { code_type(C, white)
 1217    },
 1218    !,
 1219    skip_blanks.
 1220skip_blanks -->
 1221    [].
 1222
 1223%!  integer(-Int)//
 1224%
 1225%   Read 1 or more digits and return as integer.
 1226
 1227integer(Code) -->
 1228    digit(D0),
 1229    digits(D),
 1230    { number_codes(Code, [D0|D])
 1231    }.
 1232
 1233digit(C) -->
 1234    [C],
 1235    { code_type(C, digit)
 1236    }.
 1237
 1238digits([D0|D]) -->
 1239    digit(D0),
 1240    !,
 1241    digits(D).
 1242digits([]) -->
 1243    [].
 1244
 1245%!  rest(-Atom:atom)//
 1246%
 1247%   Get rest of input as an atom.
 1248
 1249rest(Atom) --> call(rest_(Atom)).
 1250
 1251rest_(Atom, L, []) :-
 1252    atom_codes(Atom, L).
 1253
 1254
 1255%!  reply_header(+Lines, +Options) is det.
 1256%
 1257%   Return the entire reply header as  a list of strings to the option
 1258%   raw_headers(-Headers).
 1259
 1260reply_header(Lines, Options) :-
 1261    option(raw_headers(Headers), Options),
 1262    !,
 1263    maplist(string_codes, Headers, Lines).
 1264reply_header(_, _).
 1265
 1266
 1267                 /*******************************
 1268                 *   AUTHORIZATION MANAGEMENT   *
 1269                 *******************************/
 1270
 1271%!  http_set_authorization(+URL, +Authorization) is det.
 1272%
 1273%   Set user/password to supply with URLs   that have URL as prefix.
 1274%   If  Authorization  is  the   atom    =|-|=,   possibly   defined
 1275%   authorization is cleared.  For example:
 1276%
 1277%   ```
 1278%   ?- http_set_authorization('http://www.example.com/private/',
 1279%                             basic('John', 'Secret'))
 1280%   ```
 1281%
 1282%   @tbd    Move to a separate module, so http_get/3, etc. can use this
 1283%           too.
 1284
 1285:- dynamic
 1286    stored_authorization/2,
 1287    cached_authorization/2. 1288
 1289http_set_authorization(URL, Authorization) :-
 1290    must_be(atom, URL),
 1291    retractall(stored_authorization(URL, _)),
 1292    (   Authorization = (-)
 1293    ->  true
 1294    ;   check_authorization(Authorization),
 1295        assert(stored_authorization(URL, Authorization))
 1296    ),
 1297    retractall(cached_authorization(_,_)).
 1298
 1299check_authorization(Var) :-
 1300    var(Var),
 1301    !,
 1302    instantiation_error(Var).
 1303check_authorization(basic(User, Password)) :-
 1304    must_be(atom, User),
 1305    must_be(text, Password).
 1306check_authorization(digest(User, Password)) :-
 1307    must_be(atom, User),
 1308    must_be(text, Password).
 1309
 1310%!  authorization(+URL, -Authorization) is semidet.
 1311%
 1312%   True if Authorization must be supplied for URL.
 1313%
 1314%   @tbd    Cleanup cache if it gets too big.
 1315
 1316authorization(_, _) :-
 1317    \+ stored_authorization(_, _),
 1318    !,
 1319    fail.
 1320authorization(URL, Authorization) :-
 1321    cached_authorization(URL, Authorization),
 1322    !,
 1323    Authorization \== (-).
 1324authorization(URL, Authorization) :-
 1325    (   stored_authorization(Prefix, Authorization),
 1326        sub_atom(URL, 0, _, _, Prefix)
 1327    ->  assert(cached_authorization(URL, Authorization))
 1328    ;   assert(cached_authorization(URL, -)),
 1329        fail
 1330    ).
 1331
 1332add_authorization(_, Options, Options) :-
 1333    option(authorization(_), Options),
 1334    !.
 1335add_authorization(Parts, Options0, Options) :-
 1336    url_part(user(User), Parts),
 1337    url_part(password(Passwd), Parts),
 1338    !,
 1339    Options = [authorization(basic(User,Passwd))|Options0].
 1340add_authorization(Parts, Options0, Options) :-
 1341    stored_authorization(_, _) ->   % quick test to avoid work
 1342    parts_uri(Parts, URL),
 1343    authorization(URL, Auth),
 1344    !,
 1345    Options = [authorization(Auth)|Options0].
 1346add_authorization(_, Options, Options).
 1347
 1348
 1349%!  parse_url_ex(+URL, -Parts)
 1350%
 1351%   Parts:  Scheme,  Host,  Port,    User:Password,  RequestURI  (no
 1352%   fragment).
 1353
 1354parse_url_ex(URL, [uri(URL)|Parts]) :-
 1355    uri_components(URL, Components),
 1356    phrase(components(Components), Parts),
 1357    (   option(host(_), Parts)
 1358    ->  true
 1359    ;   domain_error(url, URL)
 1360    ).
 1361
 1362components(Components) -->
 1363    uri_scheme(Components),
 1364    uri_path(Components),
 1365    uri_authority(Components),
 1366    uri_request_uri(Components).
 1367
 1368uri_scheme(Components) -->
 1369    { uri_data(scheme, Components, Scheme), nonvar(Scheme) },
 1370    !,
 1371    [ scheme(Scheme)
 1372    ].
 1373uri_scheme(_) --> [].
 1374
 1375uri_path(Components) -->
 1376    { uri_data(path, Components, Path0), nonvar(Path0),
 1377      (   Path0 == ''
 1378      ->  Path = (/)
 1379      ;   Path = Path0
 1380      )
 1381    },
 1382    !,
 1383    [ path(Path)
 1384    ].
 1385uri_path(_) --> [].
 1386
 1387uri_authority(Components) -->
 1388    { uri_data(authority, Components, Auth), nonvar(Auth),
 1389      !,
 1390      uri_authority_components(Auth, Data)
 1391    },
 1392    [ authority(Auth) ],
 1393    auth_field(user, Data),
 1394    auth_field(password, Data),
 1395    auth_field(host, Data),
 1396    auth_field(port, Data).
 1397uri_authority(_) --> [].
 1398
 1399auth_field(Field, Data) -->
 1400    { uri_authority_data(Field, Data, EncValue), nonvar(EncValue),
 1401      !,
 1402      (   atom(EncValue)
 1403      ->  uri_encoded(query_value, Value, EncValue)
 1404      ;   Value = EncValue
 1405      ),
 1406      Part =.. [Field,Value]
 1407    },
 1408    [ Part ].
 1409auth_field(_, _) --> [].
 1410
 1411uri_request_uri(Components) -->
 1412    { uri_data(path, Components, Path0),
 1413      uri_data(search, Components, Search),
 1414      (   Path0 == ''
 1415      ->  Path = (/)
 1416      ;   Path = Path0
 1417      ),
 1418      uri_data(path, Components2, Path),
 1419      uri_data(search, Components2, Search),
 1420      uri_components(RequestURI, Components2)
 1421    },
 1422    [ request_uri(RequestURI)
 1423    ].
 1424
 1425%!  parts_scheme(+Parts, -Scheme) is det.
 1426%!  parts_uri(+Parts, -URI) is det.
 1427%!  parts_request_uri(+Parts, -RequestURI) is det.
 1428%!  parts_search(+Parts, -Search) is det.
 1429%!  parts_authority(+Parts, -Authority) is semidet.
 1430
 1431parts_scheme(Parts, Scheme) :-
 1432    url_part(scheme(Scheme), Parts),
 1433    !.
 1434parts_scheme(Parts, Scheme) :-          % compatibility with library(url)
 1435    url_part(protocol(Scheme), Parts),
 1436    !.
 1437parts_scheme(_, http).
 1438
 1439parts_authority(Parts, Auth) :-
 1440    url_part(authority(Auth), Parts),
 1441    !.
 1442parts_authority(Parts, Auth) :-
 1443    url_part(host(Host), Parts, _),
 1444    url_part(port(Port), Parts, _),
 1445    url_part(user(User), Parts, _),
 1446    url_part(password(Password), Parts, _),
 1447    uri_authority_components(Auth,
 1448                             uri_authority(User, Password, Host, Port)).
 1449
 1450parts_request_uri(Parts, RequestURI) :-
 1451    option(request_uri(RequestURI), Parts),
 1452    !.
 1453parts_request_uri(Parts, RequestURI) :-
 1454    url_part(path(Path), Parts, /),
 1455    ignore(parts_search(Parts, Search)),
 1456    uri_data(path, Data, Path),
 1457    uri_data(search, Data, Search),
 1458    uri_components(RequestURI, Data).
 1459
 1460parts_search(Parts, Search) :-
 1461    option(query_string(Search), Parts),
 1462    !.
 1463parts_search(Parts, Search) :-
 1464    option(search(Fields), Parts),
 1465    !,
 1466    uri_query_components(Search, Fields).
 1467
 1468
 1469parts_uri(Parts, URI) :-
 1470    option(uri(URI), Parts),
 1471    !.
 1472parts_uri(Parts, URI) :-
 1473    parts_scheme(Parts, Scheme),
 1474    ignore(parts_authority(Parts, Auth)),
 1475    parts_request_uri(Parts, RequestURI),
 1476    uri_components(RequestURI, Data),
 1477    uri_data(scheme, Data, Scheme),
 1478    uri_data(authority, Data, Auth),
 1479    uri_components(URI, Data).
 1480
 1481parts_port(Parts, Port) :-
 1482    parts_scheme(Parts, Scheme),
 1483    default_port(Scheme, DefPort),
 1484    url_part(port(Port), Parts, DefPort).
 1485
 1486url_part(Part, Parts) :-
 1487    Part =.. [Name,Value],
 1488    Gen =.. [Name,RawValue],
 1489    option(Gen, Parts),
 1490    !,
 1491    Value = RawValue.
 1492
 1493url_part(Part, Parts, Default) :-
 1494    Part =.. [Name,Value],
 1495    Gen =.. [Name,RawValue],
 1496    (   option(Gen, Parts)
 1497    ->  Value = RawValue
 1498    ;   Value = Default
 1499    ).
 1500
 1501
 1502                 /*******************************
 1503                 *            COOKIES           *
 1504                 *******************************/
 1505
 1506write_cookies(Out, Parts, Options) :-
 1507    http:write_cookies(Out, Parts, Options),
 1508    !.
 1509write_cookies(_, _, _).
 1510
 1511update_cookies(_, _, _) :-
 1512    predicate_property(http:update_cookies(_,_,_), number_of_clauses(0)),
 1513    !.
 1514update_cookies(Lines, Parts, Options) :-
 1515    (   member(Line, Lines),
 1516        phrase(atom_field('set_cookie', CookieData), Line),
 1517        http:update_cookies(CookieData, Parts, Options),
 1518        fail
 1519    ;   true
 1520    ).
 1521
 1522
 1523                 /*******************************
 1524                 *           OPEN ANY           *
 1525                 *******************************/
 1526
 1527:- multifile iostream:open_hook/6. 1528
 1529%!  iostream:open_hook(+Spec, +Mode, -Stream, -Close,
 1530%!                     +Options0, -Options) is semidet.
 1531%
 1532%   Hook implementation that makes  open_any/5   support  =http= and
 1533%   =https= URLs for =|Mode == read|=.
 1534
 1535iostream:open_hook(URL, read, Stream, Close, Options0, Options) :-
 1536    (atom(URL) -> true ; string(URL)),
 1537    uri_is_global(URL),
 1538    uri_components(URL, Components),
 1539    uri_data(scheme, Components, Scheme),
 1540    http_scheme(Scheme),
 1541    !,
 1542    Options = Options0,
 1543    Close = close(Stream),
 1544    http_open(URL, Stream, Options0).
 1545
 1546http_scheme(http).
 1547http_scheme(https).
 1548
 1549
 1550                 /*******************************
 1551                 *          KEEP-ALIVE          *
 1552                 *******************************/
 1553
 1554%!  consider_keep_alive(+HeaderLines, +Parts, +Host,
 1555%!                      +Stream0, -Stream,
 1556%!                      +Options) is det.
 1557
 1558consider_keep_alive(Lines, Parts, Host, StreamPair, In, Options) :-
 1559    option(connection(Asked), Options),
 1560    keep_alive(Asked),
 1561    connection(Lines, Given),
 1562    keep_alive(Given),
 1563    content_length(Lines, Bytes),
 1564    !,
 1565    stream_pair(StreamPair, In0, _),
 1566    connection_address(Host, Parts, HostPort),
 1567    debug(http(connection),
 1568          'Keep-alive to ~w (~D bytes)', [HostPort, Bytes]),
 1569    stream_range_open(In0, In,
 1570                      [ size(Bytes),
 1571                        onclose(keep_alive(StreamPair, HostPort))
 1572                      ]).
 1573consider_keep_alive(_, _, _, Stream, Stream, _).
 1574
 1575connection_address(Host, _, Host) :-
 1576    Host = _:_,
 1577    !.
 1578connection_address(Host, Parts, Host:Port) :-
 1579    parts_port(Parts, Port).
 1580
 1581keep_alive(keep_alive) :- !.
 1582keep_alive(Connection) :-
 1583    downcase_atom(Connection, 'keep-alive').
 1584
 1585:- public keep_alive/4. 1586
 1587keep_alive(StreamPair, Host, _In, 0) :-
 1588    !,
 1589    debug(http(connection), 'Adding connection to ~p to pool', [Host]),
 1590    add_to_pool(Host, StreamPair).
 1591keep_alive(StreamPair, Host, In, Left) :-
 1592    Left < 100,
 1593    debug(http(connection), 'Reading ~D left bytes', [Left]),
 1594    read_incomplete(In, Left),
 1595    add_to_pool(Host, StreamPair),
 1596    !.
 1597keep_alive(StreamPair, _, _, _) :-
 1598    debug(http(connection),
 1599          'Closing connection due to excessive unprocessed input', []),
 1600    (   debugging(http(connection))
 1601    ->  catch(close(StreamPair), E,
 1602              print_message(warning, E))
 1603    ;   close(StreamPair, [force(true)])
 1604    ).
 1605
 1606%!  read_incomplete(+In, +Left) is semidet.
 1607%
 1608%   If we have not all input from  a Keep-alive connection, read the
 1609%   remainder if it is short. Else, we fail and close the stream.
 1610
 1611read_incomplete(In, Left) :-
 1612    catch(setup_call_cleanup(
 1613              open_null_stream(Null),
 1614              copy_stream_data(In, Null, Left),
 1615              close(Null)),
 1616          _,
 1617          fail).
 1618
 1619:- dynamic
 1620    connection_pool/4,              % Hash, Address, Stream, Time
 1621    connection_gc_time/1. 1622
 1623add_to_pool(Address, StreamPair) :-
 1624    keep_connection(Address),
 1625    get_time(Now),
 1626    term_hash(Address, Hash),
 1627    assertz(connection_pool(Hash, Address, StreamPair, Now)).
 1628
 1629get_from_pool(Address, StreamPair) :-
 1630    term_hash(Address, Hash),
 1631    retract(connection_pool(Hash, Address, StreamPair, _)).
 1632
 1633%!  keep_connection(+Address) is semidet.
 1634%
 1635%   Succeeds if we want to keep   the  connection open. We currently
 1636%   keep a maximum of 10 connections  waiting   and  a  maximum of 2
 1637%   waiting for the same address. Connections   older than 2 seconds
 1638%   are closed.
 1639
 1640keep_connection(Address) :-
 1641    close_old_connections(2),
 1642    predicate_property(connection_pool(_,_,_,_), number_of_clauses(C)),
 1643    C =< 10,
 1644    term_hash(Address, Hash),
 1645    aggregate_all(count, connection_pool(Hash, Address, _, _), Count),
 1646    Count =< 2.
 1647
 1648close_old_connections(Timeout) :-
 1649    get_time(Now),
 1650    Before is Now - Timeout,
 1651    (   connection_gc_time(GC),
 1652        GC > Before
 1653    ->  true
 1654    ;   (   retractall(connection_gc_time(_)),
 1655            asserta(connection_gc_time(Now)),
 1656            connection_pool(Hash, Address, StreamPair, Added),
 1657            Added < Before,
 1658            retract(connection_pool(Hash, Address, StreamPair, Added)),
 1659            debug(http(connection),
 1660                  'Closing inactive keep-alive to ~p', [Address]),
 1661            close(StreamPair, [force(true)]),
 1662            fail
 1663        ;   true
 1664        )
 1665    ).
 1666
 1667
 1668%!  http_close_keep_alive(+Address) is det.
 1669%
 1670%   Close all keep-alive connections matching Address. Address is of
 1671%   the  form  Host:Port.  In  particular,  http_close_keep_alive(_)
 1672%   closes all currently known keep-alive connections.
 1673
 1674http_close_keep_alive(Address) :-
 1675    forall(get_from_pool(Address, StreamPair),
 1676           close(StreamPair, [force(true)])).
 1677
 1678%!  keep_alive_error(+Error, +StreamPair)
 1679%
 1680%   Deal with an error from  reusing   a  keep-alive  connection. If the
 1681%   error is due to an I/O error  or end-of-file, fail to backtrack over
 1682%   get_from_pool/2. Otherwise it is a real   error and we thus re-raise
 1683%   it. In all cases we close StreamPair rather than returning it to the
 1684%   pool as we may have done a partial read and thus be out of sync wrt.
 1685%   the HTTP protocol.
 1686
 1687keep_alive_error(error(keep_alive(closed), _), _) :-
 1688    !,
 1689    debug(http(connection), 'Keep-alive connection was closed', []),
 1690    fail.
 1691keep_alive_error(error(io_error(_,_), _), StreamPair) :-
 1692    !,
 1693    close(StreamPair, [force(true)]),
 1694    debug(http(connection), 'IO error on Keep-alive connection', []),
 1695    fail.
 1696keep_alive_error(Error, StreamPair) :-
 1697    close(StreamPair, [force(true)]),
 1698    throw(Error).
 1699
 1700
 1701                 /*******************************
 1702                 *     HOOK DOCUMENTATION       *
 1703                 *******************************/
 1704
 1705%!  http:open_options(+Parts, -Options) is nondet.
 1706%
 1707%   This hook is used by the HTTP   client library to define default
 1708%   options based on the the broken-down request-URL.  The following
 1709%   example redirects all trafic, except for localhost over a proxy:
 1710%
 1711%       ```
 1712%       :- multifile
 1713%           http:open_options/2.
 1714%
 1715%       http:open_options(Parts, Options) :-
 1716%           option(host(Host), Parts),
 1717%           Host \== localhost,
 1718%           Options = [proxy('proxy.local', 3128)].
 1719%       ```
 1720%
 1721%   This hook may return multiple   solutions.  The returned options
 1722%   are  combined  using  merge_options/3  where  earlier  solutions
 1723%   overrule later solutions.
 1724
 1725%!  http:write_cookies(+Out, +Parts, +Options) is semidet.
 1726%
 1727%   Emit a =|Cookie:|= header for the  current connection. Out is an
 1728%   open stream to the HTTP server, Parts is the broken-down request
 1729%   (see uri_components/2) and Options is the list of options passed
 1730%   to http_open.  The predicate is called as if using ignore/1.
 1731%
 1732%   @see complements http:update_cookies/3.
 1733%   @see library(http/http_cookie) implements cookie handling on
 1734%   top of these hooks.
 1735
 1736%!  http:update_cookies(+CookieData, +Parts, +Options) is semidet.
 1737%
 1738%   Update the cookie database.  CookieData  is   the  value  of the
 1739%   =|Set-Cookie|= field, Parts is  the   broken-down  request  (see
 1740%   uri_components/2) and Options is the list   of options passed to
 1741%   http_open.
 1742%
 1743%   @see complements http:write_cookies
 1744%   @see library(http/http_cookies) implements cookie handling on
 1745%   top of these hooks.