View source with formatted comments or as raw
    1/*  Part of SWISH
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2015-2017, VU University Amsterdam
    7			      CWI Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(gitty_driver_files,
   37	  [ gitty_close/1,		% +Store
   38	    gitty_file/4,		% +Store, ?Name, ?Ext, ?Hash
   39
   40	    gitty_update_head/4,	% +Store, +Name, +OldCommit, +NewCommit
   41	    delete_head/2,		% +Store, +Name
   42	    set_head/3,			% +Store, +Name, +Hash
   43	    store_object/4,		% +Store, +Hash, +Header, +Data
   44	    delete_object/2,		% +Store, +Hash
   45
   46	    gitty_hash/2,		% +Store, ?Hash
   47	    load_plain_commit/3,	% +Store, +Hash, -Meta
   48	    load_object/5,		% +Store, +Hash, -Data, -Type, -Size
   49	    gitty_object_file/3,	% +Store, +Hash, -File
   50
   51	    repack_objects/2,           % +Store, +Options
   52            pack_objects/6,             % +Store, +Objs, +Packs, +PackDir,
   53					% -File, +Opts
   54            unpack_packs/1,             % +Store
   55            unpack_pack/2,              % +Store, +PackFile
   56
   57            attach_pack/2,		% +Store, +PackFile
   58            gitty_fsck/1,               % +Store
   59            fsck_pack/1,                % +PackFile
   60            load_object_from_pack/4,	% +Hash, -Data, -Type, -Size
   61
   62	    gitty_rescan/1		% Store
   63	  ]).   64:- use_module(library(apply)).   65:- use_module(library(zlib)).   66:- use_module(library(filesex)).   67:- use_module(library(lists)).   68:- use_module(library(apply)).   69:- use_module(library(error)).   70:- use_module(library(debug)).   71:- use_module(library(zlib)).   72:- use_module(library(hash_stream)).   73:- use_module(library(option)).   74:- use_module(library(dcg/basics)).   75
   76/** <module> Gitty plain files driver
   77
   78This version of the driver uses plain files  to store the gitty data. It
   79consists of a nested directory  structure   with  files  named after the
   80hash. Objects and hash computation is the same as for `git`. The _heads_
   81(files) are computed on startup by scanning all objects. There is a file
   82=ref/head= that is updated if a head is updated. Other clients can watch
   83this file and update their notion  of   the  head. This implies that the
   84store can handle multiple clients that can  access a shared file system,
   85optionally shared using NFS from different machines.
   86
   87The store is simple and robust. The  main disadvantages are long startup
   88times as the store holds more objects and relatively high disk usage due
   89to rounding the small objects to disk allocation units.
   90
   91@bug	Shared access does not work on Windows.
   92*/
   93
   94:- dynamic
   95    head/4,				% Store, Name, Ext, Hash
   96    store/2,				% Store, Updated
   97    commit/3,				% Store, Hash, Meta
   98    heads_input_stream_cache/2,		% Store, Stream
   99    pack_object/6,                      % Hash, Type, Size, Offset, Store,PackFile
  100    attached_packs/1,                   % Store
  101    attached_pack/2.                    % Store, PackFile
  102
  103:- volatile
  104    head/4,
  105    store/2,
  106    commit/3,
  107    heads_input_stream_cache/2,
  108    pack_object/6,
  109    attached_packs/1,
  110    attached_pack/2.  111
  112:- multifile
  113    gitty:check_object/4.  114
  115% enable/disable syncing remote servers running on  the same file store.
  116% This facility requires shared access to files and thus doesn't work on
  117% Windows.
  118
  119:- if(current_prolog_flag(windows, true)).  120remote_sync(false).
  121:- else.  122remote_sync(true).
  123:- endif.  124
  125%!  gitty_close(+Store) is det.
  126%
  127%   Close resources associated with a store.
  128
  129gitty_close(Store) :-
  130    (   retract(heads_input_stream_cache(Store, In))
  131    ->  close(In)
  132    ;   true
  133    ),
  134    retractall(head(Store,_,_,_)),
  135    retractall(store(Store,_)),
  136    retractall(pack_object(_,_,_,_,Store,_)).
  137
  138
  139%%	gitty_file(+Store, ?File, ?Ext, ?Head) is nondet.
  140%
  141%	True when File entry in the  gitty   store  and Head is the HEAD
  142%	revision.
  143
  144gitty_file(Store, Head, Ext, Hash) :-
  145	gitty_scan(Store),
  146	head(Store, Head, Ext, Hash).
  147
  148%%	load_plain_commit(+Store, +Hash, -Meta:dict) is semidet.
  149%
  150%	Load the commit data as a  dict.   Loaded  commits are cached in
  151%	commit/3.  Note  that  only  adding  a  fact  to  the  cache  is
  152%	synchronized. This means that during  a   race  situation we may
  153%	load the same object  multiple  times   from  disk,  but this is
  154%	harmless while a lock  around   the  whole  predicate serializes
  155%	loading different objects, which is not needed.
  156
  157load_plain_commit(Store, Hash, Meta) :-
  158	must_be(atom, Store),
  159	must_be(atom, Hash),
  160	commit(Store, Hash, Meta), !.
  161load_plain_commit(Store, Hash, Meta) :-
  162	load_object(Store, Hash, String, _, _),
  163	term_string(Meta0, String, []),
  164	with_mutex(gitty_commit_cache,
  165		   assert_cached_commit(Store, Hash, Meta0)),
  166	Meta = Meta0.
  167
  168assert_cached_commit(Store, Hash, Meta) :-
  169	commit(Store, Hash, Meta0), !,
  170	assertion(Meta0 =@= Meta).
  171assert_cached_commit(Store, Hash, Meta) :-
  172	assertz(commit(Store, Hash, Meta)).
  173
  174%%	store_object(+Store, +Hash, +Header:string, +Data:string) is det.
  175%
  176%	Store the actual object. The store  must associate Hash with the
  177%	concatenation of Hdr and Data.
  178
  179store_object(Store, Hash, _Hdr, _Data) :-
  180        pack_object(Hash, _Type, _Size, _Offset, Store, _Pack), !.
  181store_object(Store, Hash, Hdr, Data) :-
  182        gitty_object_file(Store, Hash, Path),
  183        with_mutex(gitty_file, exists_or_create(Path, Out0)),
  184	(   var(Out0)
  185	->  true
  186	;   setup_call_cleanup(
  187		zopen(Out0, Out, [format(gzip)]),
  188		format(Out, '~s~s', [Hdr, Data]),
  189		close(Out))
  190	).
  191
  192exists_or_create(Path, _Out) :-
  193	exists_file(Path), !.
  194exists_or_create(Path, Out) :-
  195        file_directory_name(Path, Dir),
  196        make_directory_path(Dir),
  197        open(Path, write, Out, [encoding(utf8), lock(write)]).
  198
  199ensure_directory(Dir) :-
  200	exists_directory(Dir), !.
  201ensure_directory(Dir) :-
  202	make_directory(Dir).
  203
  204%%	load_object(+Store, +Hash, -Data, -Type, -Size) is det.
  205%
  206%	Load the given object.
  207
  208load_object(_Store, Hash, Data, Type, Size) :-
  209        load_object_from_pack(Hash, Data0, Type0, Size0), !,
  210        f(Data0, Type0, Size0) = f(Data, Type, Size).
  211load_object(Store, Hash, Data, Type, Size) :-
  212	gitty_object_file(Store, Hash, Path),
  213        exists_file(Path),
  214	setup_call_cleanup(
  215	    gzopen(Path, read, In, [encoding(utf8)]),
  216	    read_object(In, Data, Type, Size),
  217	    close(In)).
  218
  219%!	load_object_header(+Store, +Hash, -Type, -Size) is det.
  220%
  221%	Load the header of an object
  222
  223load_object_header(Store, Hash, Type, Size) :-
  224	gitty_object_file(Store, Hash, Path),
  225	setup_call_cleanup(
  226	    gzopen(Path, read, In, [encoding(utf8)]),
  227	    read_object_hdr(In, Type, Size),
  228	    close(In)).
  229
  230read_object(In, Data, Type, Size) :-
  231	read_object_hdr(In, Type, Size),
  232	read_string(In, _, Data).
  233
  234read_object_hdr(In, Type, Size) :-
  235	get_code(In, C0),
  236	read_hdr(C0, In, Hdr),
  237	phrase((nonblanks(TypeChars), " ", integer(Size)), Hdr),
  238	atom_codes(Type, TypeChars).
  239
  240read_hdr(C, In, [C|T]) :-
  241	C > 0, !,
  242	get_code(In, C1),
  243	read_hdr(C1, In, T).
  244read_hdr(_, _, []).
  245
  246%%	gitty_rescan(?Store) is det.
  247%
  248%	Update our view of the shared   storage  for all stores matching
  249%	Store.
  250
  251gitty_rescan(Store) :-
  252	retractall(store(Store, _)).
  253
  254%%	gitty_scan(+Store) is det.
  255%
  256%	Scan gitty store for files (entries),   filling  head/3. This is
  257%	performed lazily at first access to the store.
  258%
  259%	@tdb	Possibly we need to maintain a cached version of this
  260%		index to avoid having to open all objects of the gitty
  261%		store.
  262
  263gitty_scan(Store) :-
  264	store(Store, _), !,
  265	remote_updates(Store).
  266gitty_scan(Store) :-
  267	with_mutex(gitty, gitty_scan_sync(Store)).
  268
  269:- thread_local
  270	latest/3.  271
  272gitty_scan_sync(Store) :-
  273	store(Store, _), !.
  274:- if(remote_sync(true)).  275gitty_scan_sync(Store) :-
  276	remote_sync(true), !,
  277        gitty_attach_packs(Store),
  278	restore_heads_from_remote(Store).
  279:- endif.  280gitty_scan_sync(Store) :-
  281        gitty_attach_packs(Store),
  282	read_heads_from_objects(Store).
  283
  284%%	read_heads_from_objects(+Store) is det.
  285%
  286%       Establish the head(Store,File,Ext,Hash) relation  by reading all
  287%       objects and adding a fact for the most recent commit.
  288
  289read_heads_from_objects(Store) :-
  290	gitty_scan_latest(Store),
  291	forall(retract(latest(Name, Hash, _Time)),
  292	       assert_head(Store, Name, Hash)),
  293	get_time(Now),
  294	assertz(store(Store, Now)).
  295
  296assert_head(Store, Name, Hash) :-
  297	file_name_extension(_, Ext, Name),
  298        assertz(head(Store, Name, Ext, Hash)).
  299
  300
  301%%	gitty_scan_latest(+Store)
  302%
  303%	Scans the gitty store, extracting  the   latest  version of each
  304%	named entry.
  305
  306gitty_scan_latest(Store) :-
  307	retractall(head(Store, _, _, _)),
  308	retractall(latest(_, _, _)),
  309	(   gitty_hash(Store, Hash),
  310	    load_object(Store, Hash, Data, commit, _Size),
  311	    term_string(Meta, Data, []),
  312	    _{name:Name, time:Time} :< Meta,
  313	    (	latest(Name, _, OldTime),
  314		OldTime > Time
  315	    ->	true
  316	    ;	retractall(latest(Name, _, _)),
  317		assertz(latest(Name, Hash, Time))
  318	    ),
  319	    fail
  320	;   true
  321	).
  322
  323
  324%%	gitty_hash(+Store, ?Hash) is nondet.
  325%
  326%	True when Hash is an object in the store.
  327
  328gitty_hash(Store, Hash) :-
  329	var(Hash), !,
  330        (   gitty_attach_packs(Store),
  331            pack_object(Hash, _Type, _Size, _Offset, Store, _Pack)
  332        ;   gitty_file_object(Store, Hash)
  333        ).
  334gitty_hash(Store, Hash) :-
  335        (   gitty_attach_packs(Store),
  336            pack_object(Hash, _Type, _Size, _Offset, Store, _Pack)
  337        ->  true
  338        ;   gitty_object_file(Store, Hash, File),
  339            exists_file(File)
  340        ).
  341
  342gitty_file_object(Store, Hash) :-
  343	access_file(Store, exist),
  344	directory_files(Store, Level0),
  345	member(E0, Level0),
  346	E0 \== '..',
  347	atom_length(E0, 2),
  348	directory_file_path(Store, E0, Dir0),
  349	directory_files(Dir0, Level1),
  350	member(E1, Level1),
  351	E1 \== '..',
  352	atom_length(E1, 2),
  353	directory_file_path(Dir0, E1, Dir),
  354	directory_files(Dir, Files),
  355	member(File, Files),
  356	atom_length(File, 36),
  357	atomic_list_concat([E0,E1,File], Hash).
  358
  359%%	delete_object(+Store, +Hash)
  360%
  361%	Delete an existing object
  362
  363delete_object(Store, Hash) :-
  364	gitty_object_file(Store, Hash, File),
  365	delete_file(File).
  366
  367%!	gitty_object_file(+Store, +Hash, -Path) is det.
  368%
  369%	True when Path is the file  at   which  the  object with Hash is
  370%	stored.
  371
  372gitty_object_file(Store, Hash, Path) :-
  373	sub_string(Hash, 0, 2, _, Dir0),
  374	sub_string(Hash, 2, 2, _, Dir1),
  375	sub_string(Hash, 4, _, 0, File),
  376	atomic_list_concat([Store, Dir0, Dir1, File], /, Path).
  377
  378
  379		 /*******************************
  380		 *	      SYNCING		*
  381		 *******************************/
  382
  383%%	gitty_update_head(+Store, +Name, +OldCommit, +NewCommit) is det.
  384%
  385%	Update the head of a gitty  store   for  Name.  OldCommit is the
  386%	current head and NewCommit is the new  head. If Name is created,
  387%	and thus there is no head, OldCommit must be `-`.
  388%
  389%	This operation can fail because another   writer has updated the
  390%	head.  This can both be in-process or another process.
  391
  392gitty_update_head(Store, Name, OldCommit, NewCommit) :-
  393	with_mutex(gitty,
  394		   gitty_update_head_sync(Store, Name, OldCommit, NewCommit)).
  395
  396:- if(remote_sync(true)).  397gitty_update_head_sync(Store, Name, OldCommit, NewCommit) :-
  398	remote_sync(true), !,
  399	setup_call_cleanup(
  400	    heads_output_stream(Store, HeadsOut),
  401	    gitty_update_head_sync(Store, Name, OldCommit, NewCommit, HeadsOut),
  402	    close(HeadsOut)).
  403:- endif.  404gitty_update_head_sync(Store, Name, OldCommit, NewCommit) :-
  405	gitty_update_head_sync2(Store, Name, OldCommit, NewCommit).
  406
  407gitty_update_head_sync(Store, Name, OldCommit, NewCommit, HeadsOut) :-
  408	gitty_update_head_sync2(Store, Name, OldCommit, NewCommit),
  409	format(HeadsOut, '~q.~n', [head(Name, OldCommit, NewCommit)]).
  410
  411gitty_update_head_sync2(Store, Name, OldCommit, NewCommit) :-
  412	gitty_scan(Store),		% fetch remote changes
  413	(   OldCommit == (-)
  414	->  (   head(Store, Name, _, _)
  415	    ->	throw(error(gitty(file_exists(Name),_)))
  416	    ;	assert_head(Store, Name, NewCommit)
  417	    )
  418	;   (   retract(head(Store, Name, _, OldCommit))
  419	    ->	assert_head(Store, Name, NewCommit)
  420	    ;	throw(error(gitty(not_at_head(Name, OldCommit)), _))
  421	    )
  422	).
  423
  424%!	remote_updates(+Store)
  425%
  426%	Watch for remote updates to the store. We only do this if we did
  427%	not do so the last second.
  428
  429:- dynamic
  430	last_remote_sync/2.  431
  432:- if(remote_sync(false)).  433remote_updates(_) :-
  434	remote_sync(false), !.
  435:- endif.  436remote_updates(Store) :-
  437	remote_up_to_data(Store), !.
  438remote_updates(Store) :-
  439	with_mutex(gitty, remote_updates_sync(Store)).
  440
  441remote_updates_sync(Store) :-
  442	remote_up_to_data(Store), !.
  443remote_updates_sync(Store) :-
  444	retractall(last_remote_sync(Store, _)),
  445	get_time(Now),
  446	asserta(last_remote_sync(Store, Now)),
  447	remote_update(Store).
  448
  449remote_up_to_data(Store) :-
  450	last_remote_sync(Store, Last),
  451	get_time(Now),
  452	Now-Last < 1.
  453
  454remote_update(Store) :-
  455	remote_updates(Store, List),
  456	maplist(update_head(Store), List).
  457
  458update_head(Store, head(Name, OldCommit, NewCommit)) :-
  459	(   OldCommit == (-)
  460	->  \+ head(Store, Name, _, _)
  461	;   retract(head(Store, Name, _, OldCommit))
  462	), !,
  463	assert_head(Store, Name, NewCommit).
  464update_head(_, _).
  465
  466%%	remote_updates(+Store, -List) is det.
  467%
  468%	Find updates from other gitties  on   the  same filesystem. Note
  469%	that we have to push/pop the input   context to avoid creating a
  470%	notion of an  input  context   which  possibly  relate  messages
  471%	incorrectly to the sync file.
  472
  473remote_updates(Store, List) :-
  474	heads_input_stream(Store, Stream),
  475	setup_call_cleanup(
  476	    '$push_input_context'(gitty_sync),
  477	    read_new_terms(Stream, List),
  478	    '$pop_input_context').
  479
  480read_new_terms(Stream, Terms) :-
  481	read(Stream, First),
  482	read_new_terms(First, Stream, Terms).
  483
  484read_new_terms(end_of_file, _, List) :- !,
  485	List = [].
  486read_new_terms(Term, Stream, [Term|More]) :-
  487	read(Stream, Term2),
  488	read_new_terms(Term2, Stream, More).
  489
  490heads_output_stream(Store, Out) :-
  491	heads_file(Store, HeadsFile),
  492	open(HeadsFile, append, Out,
  493	     [ encoding(utf8),
  494	       lock(exclusive)
  495	     ]).
  496
  497heads_input_stream(Store, Stream) :-
  498	heads_input_stream_cache(Store, Stream0), !,
  499	Stream = Stream0.
  500heads_input_stream(Store, Stream) :-
  501	heads_file(Store, File),
  502	between(1, 2, _),
  503	catch(open(File, read, In,
  504		   [ encoding(utf8),
  505		     eof_action(reset)
  506		   ]),
  507	      _,
  508	      create_heads_file(Store)), !,
  509	assert(heads_input_stream_cache(Store, In)),
  510	Stream = In.
  511
  512create_heads_file(Store) :-
  513	call_cleanup(
  514	    heads_output_stream(Store, Out),
  515	    close(Out)),
  516	fail.					% always fail!
  517
  518heads_file(Store, HeadsFile) :-
  519	ensure_directory(Store),
  520	directory_file_path(Store, ref, RefDir),
  521	ensure_directory(RefDir),
  522	directory_file_path(RefDir, head, HeadsFile).
  523
  524%%	restore_heads_from_remote(Store)
  525%
  526%	Restore the known heads by reading the remote sync file.
  527
  528restore_heads_from_remote(Store) :-
  529	heads_file(Store, File),
  530	exists_file(File),
  531	setup_call_cleanup(
  532	    open(File, read, In, [encoding(utf8)]),
  533	    restore_heads(Store, In),
  534	    close(In)), !,
  535	get_time(Now),
  536	assertz(store(Store, Now)).
  537restore_heads_from_remote(Store) :-
  538	read_heads_from_objects(Store),
  539	heads_file(Store, File),
  540	setup_call_cleanup(
  541	    open(File, write, Out, [encoding(utf8)]),
  542	    save_heads(Store, Out),
  543	    close(Out)), !.
  544
  545restore_heads(Store, In) :-
  546	read(In, Term0),
  547	Term0 = epoch(_),
  548	read(In, Term1),
  549	restore_heads(Term1, In, Store).
  550
  551restore_heads(end_of_file, _, _) :- !.
  552restore_heads(head(File, _, Hash), In, Store) :-
  553	retractall(head(Store, File, _, _)),
  554	assert_head(Store, File, Hash),
  555	read(In, Term),
  556	restore_heads(Term, In, Store).
  557
  558save_heads(Store, Out) :-
  559	get_time(Now),
  560	format(Out, 'epoch(~0f).~n~n', [Now]),
  561	forall(head(Store, File, _, Hash),
  562	       format(Out, '~q.~n', [head(File, -, Hash)])).
  563
  564
  565%%	delete_head(+Store, +Head) is det.
  566%
  567%	Delete Head from Store. Used  by   gitty_fsck/1  to remove heads
  568%	that have no commits. Should  we   forward  this  to remotes, or
  569%	should they do their own thing?
  570
  571delete_head(Store, Head) :-
  572	retractall(head(Store, Head, _, _)).
  573
  574%%	set_head(+Store, +File, +Hash) is det.
  575%
  576%	Set the head of the given File to Hash
  577
  578set_head(Store, File, Hash) :-
  579	file_name_extension(_, Ext, File),
  580        (   head(Store, File, _, Hash0)
  581        ->  (   Hash == Hash0
  582            ->  true
  583            ;   asserta(head(Store, File, Ext, Hash)),
  584                retractall(head(Store, File, _, Hash0))
  585            )
  586        ;   asserta(head(Store, File, Ext, Hash))
  587        ).
  588
  589
  590		 /*******************************
  591		 *	      PACKS		*
  592		 *******************************/
  593
  594/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  595
  596<pack file> := <header>
  597               <file>*
  598<header>    := "gitty(Version).\n" <object>* "end_of_header.\n"
  599<object>    := obj(Hash, Type, Size, FileSize)
  600- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  601
  602pack_version(1).
  603
  604%!  repack_objects(+Store, +Options) is det.
  605%
  606%   Repack  objects  of  Store  for  reduced  disk  usage  and  enhanced
  607%   performance. By default this picks up all  file objects of the store
  608%   and all existing small pack files.  Options:
  609%
  610%     - small_pack(+Bytes)
  611%     Consider all packs with less than Bytes as small and repack them.
  612%     Default 10Mb
  613%     - min_files(+Count)
  614%     Do not repack if there are less than Count new files.
  615%     Default 1,000.
  616
  617:- debug(gitty(pack)).  618
  619repack_objects(Store, Options) :-
  620    option(min_files(MinFiles), Options, 1_000),
  621    findall(Object, gitty_file_object(Store, Object), Objects),
  622    length(Objects, NewFiles),
  623    debug(gitty(pack), 'Found ~D file objects', [NewFiles]),
  624    (   NewFiles >= MinFiles
  625    ->  pack_files(Store, ExistingPacks),
  626        option(small_pack(MaxSize), Options, 10_000_000),
  627        include(small_file(MaxSize), ExistingPacks, PackFiles),
  628        (   debugging(gitty(pack))
  629        ->  length(PackFiles, PackCount),
  630            debug(gitty(pack), 'Found ~D small packs', [PackCount])
  631        ;   true
  632        ),
  633        directory_file_path(Store, pack, PackDir),
  634        make_directory_path(PackDir),
  635        pack_objects(Store, Objects, PackFiles, PackDir, _PackFile, Options)
  636    ;   debug(gitty(pack), 'Nothing to do', [])
  637    ).
  638
  639small_file(MaxSize, File) :-
  640    size_file(File, Size),
  641    Size < MaxSize.
  642
  643%!  pack_objects(+Store, +Objects, +Packs, +PackDir,
  644%!               -PackFile, +Options) is det.
  645%
  646%   Pack the given objects and pack files into a new pack.
  647
  648pack_objects(Store, Objects, Packs, PackDir, PackFile, Options) :-
  649    with_mutex(gitty_pack,
  650	       pack_objects_sync(Store, Objects, Packs, PackDir,
  651                                 PackFile, Options)).
  652
  653pack_objects_sync(_Store, [], [Pack], _, [Pack], _) :-
  654    !.
  655pack_objects_sync(Store, Objects, Packs, PackDir, PackFilePath, Options) :-
  656    length(Objects, ObjCount),
  657    length(Packs, PackCount),
  658    debug(gitty(pack), 'Repacking ~D objects and ~D packs',
  659          [ObjCount, PackCount]),
  660    maplist(object_info(Store), Objects, FileInfo),
  661    maplist(pack_info(Store), Packs, PackInfo),
  662    append([FileInfo|PackInfo], Info0),
  663    sort(1, @<, Info0, Info),           % remove possible duplicates
  664    (   debugging(gitty(pack))
  665    ->  (   PackCount > 0
  666        ->  length(Info, FinalObjCount),
  667            debug(gitty(pack), 'Total ~D objects', [FinalObjCount])
  668        ;   true
  669        )
  670    ;   true
  671    ),
  672    directory_file_path(PackDir, 'pack-create', TmpPack),
  673    setup_call_cleanup(
  674	(   open(TmpPack, write, Out0, [type(binary), lock(write)]),
  675	    open_hash_stream(Out0, Out, [algorithm(sha1)])
  676	),
  677        (   write_signature(Out),
  678            maplist(write_header(Out), Info),
  679            format(Out, 'end_of_header.~n', []),
  680            maplist(add_file(Out, Store), Info),
  681	    stream_hash(Out, SHA1)
  682        ),
  683        close(Out)),
  684    format(atom(PackFile), 'pack-~w.pack', [SHA1]),
  685    directory_file_path(PackDir, PackFile, PackFilePath),
  686    rename_file(TmpPack, PackFilePath),
  687    debug(gitty(pack), 'Attaching ~p', [PackFilePath]),
  688    attach_pack(Store, PackFilePath),
  689    remove_objects_after_pack(Store, Objects, Options),
  690    delete(Packs, PackFilePath, RmPacks),
  691    remove_repacked_packs(Store, RmPacks, Options),
  692    debug(gitty(pack), 'Packing completed', []).
  693
  694object_info(Store, Object, obj(Object, Type, Size, FileSize)) :-
  695    gitty_object_file(Store, Object, File),
  696    load_object_header(Store, Object, Type, Size),
  697    size_file(File, FileSize).
  698
  699pack_info(Store, PackFile, Objects) :-
  700    attach_pack(Store, PackFile),
  701    pack_read_header(PackFile, _Version, _DataOffset, Objects).
  702
  703write_signature(Out) :-
  704    pack_version(Version),
  705    format(Out, "gitty(~d).~n", [Version]).
  706
  707write_header(Out, obj(Object, Type, Size, FileSize)) :-
  708    format(Out, 'obj(~q,~q,~d,~d).~n', [Object, Type, Size, FileSize]).
  709
  710%!  add_file(+Out, +Store, +Object) is det.
  711%
  712%   Add Object from Store to the pack stream Out.
  713
  714add_file(Out, Store, obj(Object, _Type, _Size, _FileSize)) :-
  715    gitty_object_file(Store, Object, File),
  716    exists_file(File),
  717    !,
  718    setup_call_cleanup(
  719        open(File, read, In, [type(binary)]),
  720        copy_stream_data(In, Out),
  721        close(In)).
  722add_file(Out, Store, obj(Object, Type, Size, FileSize)) :-
  723    pack_object(Object, Type, Size, Offset, Store, PackFile),
  724    setup_call_cleanup(
  725        open(PackFile, read, In, [type(binary)]),
  726        (   seek(In, Offset, bof, Offset),
  727            copy_stream_data(In, Out, FileSize)
  728        ),
  729        close(In)).
  730
  731
  732%!  gitty_fsck(+Store) is det.
  733%
  734%   Validate all packs associated with Store
  735
  736gitty_fsck(Store) :-
  737    pack_files(Store, PackFiles),
  738    maplist(fsck_pack, PackFiles).
  739
  740%!  fsck_pack(+File) is det.
  741%
  742%   Validate the integrity of the pack file File.
  743
  744fsck_pack(File) :-
  745    debug(gitty(pack), 'fsck ~p', [File]),
  746    check_pack_hash(File),
  747    debug(gitty(pack), 'fsck ~p: checking objects', [File]),
  748    check_pack_objects(File),
  749    debug(gitty(pack), 'fsck ~p: done', [File]).
  750
  751check_pack_hash(File) :-
  752    file_base_name(File, Base),
  753    file_name_extension(Plain, Ext, Base),
  754    must_be(oneof([pack]), Ext),
  755    atom_concat('pack-', Hash, Plain),
  756    setup_call_cleanup(
  757        (   open(File, read, In0, [type(binary)]),
  758            open_hash_stream(In0, In, [algorithm(sha1)])
  759        ),
  760        (   setup_call_cleanup(
  761                open_null_stream(Null),
  762                copy_stream_data(In, Null),
  763                close(Null)),
  764            stream_hash(In, SHA1)
  765        ),
  766        close(In)),
  767    assertion(Hash == SHA1).
  768
  769check_pack_objects(PackFile) :-
  770    setup_call_cleanup(
  771        open(PackFile, read, In, [type(binary)]),
  772        (  read_header(In, Version, DataOffset, Objects),
  773           set_stream(In, encoding(utf8)),
  774           foldl(check_object(In, PackFile, Version), Objects, DataOffset, _)
  775        ),
  776        close(In)).
  777
  778check_object(In, PackFile, _Version,
  779             obj(Object, Type, Size, FileSize),
  780             Offset0, Offset) :-
  781    Offset is Offset0+FileSize,
  782    byte_count(In, Here),
  783    (   Here == Offset0
  784    ->  true
  785    ;   print_message(warning, pack(reposition(Here, Offset0))),
  786        seek(In, Offset0, bof, Offset0)
  787    ),
  788    (   setup_call_cleanup(
  789            zopen(In, In2, [multi_part(false), close_parent(false)]),
  790            catch(read_object(In2, Data, _0RType, _0RSize), E,
  791                  ( print_message(error,
  792                                  gitty(PackFile, fsck(read_object(Object, E)))),
  793                    fail)),
  794            close(In2))
  795    ->  byte_count(In, End),
  796        (   End == Offset
  797        ->  true
  798        ;   print_message(error,
  799                          gitty(PackFile, fsck(object_end(Object, End,
  800                                                          Offset0, Offset,
  801                                                          Data))))
  802        ),
  803        assertion(Type == _0RType),
  804        assertion(Size == _0RSize),
  805        gitty:check_object(Object, Data, Type, Size)
  806    ;   true
  807    ).
  808
  809
  810%!  gitty_attach_packs(+Store) is det.
  811%
  812%   Attach all packs for Store
  813
  814gitty_attach_packs(Store) :-
  815    attached_packs(Store),
  816    !.
  817gitty_attach_packs(Store) :-
  818    with_mutex(gitty_attach_packs,
  819               gitty_attach_packs_sync(Store)).
  820
  821gitty_attach_packs_sync(Store) :-
  822    attached_packs(Store),
  823    !.
  824gitty_attach_packs_sync(Store) :-
  825    pack_files(Store, PackFiles),
  826    maplist(attach_pack(Store), PackFiles),
  827    asserta(attached_packs(Store)).
  828
  829pack_files(Store, Packs) :-
  830    directory_file_path(Store, pack, PackDir),
  831    exists_directory(PackDir),
  832    !,
  833    directory_files(PackDir, Files),
  834    convlist(is_pack(PackDir), Files, Packs).
  835pack_files(_, []).
  836
  837is_pack(PackDir, File, Path) :-
  838    file_name_extension(_, pack, File),
  839    directory_file_path(PackDir, File, Path).
  840
  841%!  attach_pack(+Store, +PackFile)
  842%
  843%   Load the index of Pack into memory.
  844
  845attach_pack(Store, PackFile) :-
  846    attached_pack(Store, PackFile),
  847    !.
  848attach_pack(Store, PackFile) :-
  849    retractall(pack_object(_,_,_,_,_,PackFile)),
  850    pack_read_header(PackFile, Version, DataOffset, Objects),
  851    foldl(assert_object(Store, PackFile, Version), Objects, DataOffset, _),
  852    assertz(attached_pack(Store, PackFile)).
  853
  854pack_read_header(PackFile, Version, DataOffset, Objects) :-
  855    setup_call_cleanup(
  856        open(PackFile, read, In, [type(binary)]),
  857        read_header(In, Version, DataOffset, Objects),
  858        close(In)).
  859
  860read_header(In, Version, DataOffset, Objects) :-
  861    read(In, Signature),
  862    (   Signature = gitty(Version)
  863    ->  true
  864    ;   domain_error(gitty_pack_file, Objects)
  865    ),
  866    read(In, Term),
  867    read_index(Term, In, Objects),
  868    get_code(In, Code),
  869    assertion(Code == 0'\n),
  870    byte_count(In, DataOffset).
  871
  872read_index(end_of_header, _, []) :-
  873    !.
  874read_index(Object, In, [Object|T]) :-
  875    read(In, Term2),
  876    read_index(Term2, In, T).
  877
  878assert_object(Store, Pack, _Version,
  879              obj(Object, Type, Size, FileSize),
  880              Offset0, Offset) :-
  881    Offset is Offset0+FileSize,
  882    assertz(pack_object(Object, Type, Size, Offset0, Store, Pack)).
  883
  884%!  detach_pack(+Store, +Pack) is det.
  885%
  886%   Remove a pack file from the memory index.
  887
  888detach_pack(Store, Pack) :-
  889    retractall(pack_object(_, _, _, _, Store, Pack)),
  890    retractall(attached_pack(Store, Pack)).
  891
  892%!  load_object_from_pack(+Hash, -Data, -Type, -Size) is semidet.
  893%
  894%   True when Hash is in a pack and can be loaded.
  895
  896load_object_from_pack(Hash, Data, Type, Size) :-
  897    pack_object(Hash, Type, Size, Offset, _Store, Pack),
  898    setup_call_cleanup(
  899        open(Pack, read, In, [type(binary)]),
  900        read_object_at(In, Offset, Data, Type, Size),
  901        close(In)).
  902
  903read_object_at(In, Offset, Data, Type, Size) :-
  904    seek(In, Offset, bof, Offset),
  905    read_object_here(In, Data, Type, Size).
  906
  907read_object_here(In, Data, Type, Size) :-
  908    stream_property(In, encoding(Enc)),
  909    setup_call_cleanup(
  910        ( set_stream(In, encoding(utf8)),
  911          zopen(In, In2, [multi_part(false), close_parent(false)])
  912        ),
  913        read_object(In2, Data, Type, Size),
  914        ( close(In2),
  915          set_stream(In, encoding(Enc))
  916        )).
  917
  918%!  unpack_packs(+Store) is det.
  919%
  920%   Unpack all packs.
  921
  922unpack_packs(Store) :-
  923    absolute_file_name(Store, AbsStore, [file_type(directory),
  924                                         access(read)]),
  925    pack_files(AbsStore, Packs),
  926    maplist(unpack_pack(AbsStore), Packs).
  927
  928%!  unpack_pack(+Store, +Pack) is det.
  929%
  930%   Turn a pack back into a plain object files
  931
  932unpack_pack(Store, PackFile) :-
  933    pack_read_header(PackFile, _Version, DataOffset, Objects),
  934    setup_call_cleanup(
  935        open(PackFile, read, In, [type(binary)]),
  936        foldl(create_file(Store, In), Objects, DataOffset, _),
  937        close(In)),
  938    detach_pack(Store, PackFile),
  939    delete_file(PackFile).
  940
  941create_file(Store, In, obj(Object, _Type, _Size, FileSize), Offset0, Offset) :-
  942    Offset is Offset0+FileSize,
  943    gitty_object_file(Store, Object, Path),
  944    with_mutex(gitty_file, exists_or_recreate(Path, Out)),
  945	(   var(Out)
  946	->  true
  947	;   setup_call_cleanup(
  948                seek(In, Offset0, bof, Offset0),
  949                copy_stream_data(In, Out, FileSize),
  950                close(Out))
  951	).
  952
  953exists_or_recreate(Path, _Out) :-
  954	exists_file(Path), !.
  955exists_or_recreate(Path, Out) :-
  956        file_directory_name(Path, Dir),
  957        make_directory_path(Dir),
  958        open(Path, write, Out, [type(binary), lock(write)]).
  959
  960
  961%!  remove_objects_after_pack(+Store, +Objects, +Options) is det.
  962%
  963%   Remove the indicated (file) objects from Store.
  964
  965remove_objects_after_pack(Store, Objects, Options) :-
  966    debug(gitty(pack), 'Deleting plain files', []),
  967    maplist(delete_object(Store), Objects),
  968    (   option(prune_empty_directories(true), Options, true)
  969    ->  debug(gitty(pack), 'Pruning empty directories', []),
  970        prune_empty_directories(Store)
  971    ;   true
  972    ).
  973
  974%!  remove_repacked_packs(+Store, +Packs, +Options)
  975%
  976%   Remove packs that have been repacked.
  977
  978remove_repacked_packs(Store, Packs, Options) :-
  979    maplist(remove_pack(Store, Options), Packs).
  980
  981remove_pack(Store, _Options, Pack) :-
  982    detach_pack(Store, Pack),
  983    delete_file(Pack).
  984
  985%!  prune_empty_directories(+Dir) is det.
  986%
  987%   Prune directories that are  empty  below   Dir.  Dir  itself  is not
  988%   removed, even if it is empty.
  989
  990prune_empty_directories(Dir) :-
  991    prune_empty_directories(Dir, 0).
  992
  993prune_empty_directories(Dir, Level) :-
  994    directory_files(Dir, AllFiles),
  995    exclude(hidden, AllFiles, Files),
  996    (   Files == [],
  997        Level > 0
  998    ->  delete_directory_async(Dir)
  999    ;   convlist(prune_empty_directories(Dir, Level), Files, Left),
 1000        (   Left == [],
 1001            Level > 0
 1002        ->  delete_directory_async(Dir)
 1003        ;   true
 1004        )
 1005    ).
 1006
 1007hidden(.).
 1008hidden(..).
 1009
 1010prune_empty_directories(Parent, Level0, File, _) :-
 1011    directory_file_path(Parent, File, Path),
 1012    exists_directory(Path),
 1013    !,
 1014    Level is Level0 + 1,
 1015    prune_empty_directories(Path, Level),
 1016    fail.
 1017prune_empty_directories(_, _, File, File).
 1018
 1019delete_directory_async(Dir) :-
 1020    with_mutex(gitty_file, delete_directory_async2(Dir)).
 1021
 1022delete_directory_async2(Dir) :-
 1023    catch(delete_directory(Dir), E,
 1024          (   \+ exists_directory(Dir)
 1025          ->  true
 1026          ;   \+ empty_directory(Dir)
 1027          ->  true
 1028          ;   throw(E)
 1029          )).
 1030
 1031empty_directory(Dir) :-
 1032    directory_files(Dir, AllFiles),
 1033    exclude(hidden, AllFiles, [])