#!/usr/bin/perl

=pod

=head1 NAME

apt-mirror - apt sources mirroring tool

=head1 SYNOPSIS

apt-mirror [configfile]

=head1 DESCRIPTION

A small and efficient tool that lets you mirror a part of or
the whole Debian GNU/Linux distribution or any other apt sources.

Main features:
 * It uses a config similar to APT's F<sources.list>
 * It's fully pool compliant
 * It supports multithreaded downloading
 * It supports multiple architectures at the same time
 * It can automatically remove unneeded files
 * It works well on an overloaded Internet connection
 * It never produces an inconsistent mirror including while mirroring
 * It works on all POSIX compliant systems with Perl and wget

=head1 COMMENTS

apt-mirror uses F</etc/apt/mirror.list> as a configuration file.
By default it is tuned to official Debian or Ubuntu mirrors. Change
it for your needs.

After you setup the configuration file you may run as root:

    # su - apt-mirror -c apt-mirror

Or uncomment the line in F</etc/cron.d/apt-mirror> to enable daily mirror updates.

=head1 FILES

F</etc/apt/mirror.list>
        Main configuration file

F</etc/cron.d/apt-mirror>
        Cron configuration template

F</var/spool/apt-mirror/mirror>
        Mirror places here

F</var/spool/apt-mirror/skel>
        Place for temporarily downloaded indexes

F</var/spool/apt-mirror/var>
        Log files placed here. URLs and MD5 checksums also here.

=head1 CONFIGURATION EXAMPLES

The mirror.list configuration supports many options, the file is well commented explaining each option.
Here are some sample mirror configuration lines showing the various supported ways:

Normal:
deb http://example.com/debian stable main contrib non-free

Arch Specific: (many other architectures are supported)
deb-powerpc http://example.com/debian stable main contrib non-free

HTTP and FTP Auth or non-standard port:
deb http://user:pass@example.com:8080/debian stable main contrib non-free

HTTPS with sending Basic HTTP authentication information (plaintext username and password) for all requests:
(this was default behaviour of Wget 1.10.2 and prior and is needed for some servers with new version of Wget)
set auth_no_challenge 1
deb https://user:pass@example.com:443/debian stable main contrib non-free

HTTPS without checking certificate:
set no_check_certificate 1
deb https://example.com:443/debian stable main contrib non-free

Source Mirroring:
deb-src http://example.com/debian stable main contrib non-free

=head1 AUTHORS

Dmitry N. Hramtsov E<lt>hdn@nsu.ruE<gt>
Brandon Holtsclaw E<lt>me@brandonholtsclaw.comE<gt>

=cut

use warnings;
use strict;
use File::Copy;
use File::Compare;
use File::Path qw(make_path);
use File::Basename;
use Fcntl qw(:flock);

my $config_file;

my %config_variables = (
    "defaultarch" => `dpkg --print-architecture 2>/dev/null` || 'i386',
    "nthreads"    => 20,
    "base_path"   => '/var/spool/apt-mirror',
    "mirror_path" => '$base_path/mirror',
    "skel_path"   => '$base_path/skel',
    "var_path"    => '$base_path/var',
    "cleanscript" => '$var_path/clean.sh',
    "_contents"   => 1,
    "_autoclean"  => 0,
    "_tilde"      => 0,
    "_plus"       => 0,
    "limit_rate"  => '100m',
    "run_postmirror"       => 1,
    "auth_no_challenge"    => 0,
    "no_check_certificate" => 0,
    "unlink"               => 0,
    "paranoid"             => 0,
    "postmirror_script"    => '$var_path/postmirror.sh',
    "use_proxy"            => 'off',
    "http_proxy"           => '',
    "https_proxy"          => '',
    "proxy_user"           => '',
    "proxy_password"       => ''
);

my @config_binaries = ();
my @config_sources  = ();

my @release_urls;
my @index_urls;
my @childrens       = ();
my %skipclean       = ();
my %clean_directory = ();
my @hash_strength   = qw(SHA512 SHA256 SHA1 MD5Sum);
my %packages_hashes = (
    SHA512 => "SHA512",
    SHA256 => "SHA256",
    SHA1   => "SHA1",
    MD5Sum => "MD5sum",
);
my %sources_hashes = (
    SHA512 => "Checksums-Sha512",
    SHA256 => "Checksums-Sha256",
    SHA1   => "Checksums-Sha1",
    MD5Sum => "Files",
);
my %verify_commands = (
    SHA512 => "sha512sum",
    SHA256 => "sha256sum",
    SHA1   => "sha1sum",
    MD5Sum => "md5sum",
);
my %checksum_filenames = (
    SHA512 => "SHA512",
    SHA256 => "SHA256",
    SHA1   => "SHA1",
    MD5Sum => "MD5",
);

# Mapping of files downloaded from a by-hash directory to their canonical locations.
my %hashsum_to_files = ();

# Mapping of all the checksums for a given canonical filename.
my %file_to_hashsums;
my %urls_checksums = ();

######################################################################################
## Setting up $config_file variable

$config_file = "/etc/apt/mirror.list";    # Default value
if ( $_ = shift )
{
    die("apt-mirror: invalid config file specified") unless -e $_;
    $config_file = $_;
}

chomp $config_variables{"defaultarch"};

######################################################################################
## Common subroutines

sub round_number
{
    my $n = shift;
    my $minus = $n < 0 ? '-' : '';
    $n = abs($n);
    $n = int( ( $n + .05 ) * 10 ) / 10;
    $n .= '.0' unless $n =~ /\./;
    $n .= '0' if substr( $n, ( length($n) - 1 ), 1 ) eq '.';
    chop $n if $n =~ /\.\d\d0$/;
    return "$minus$n";
}

sub format_bytes
{
    my $bytes     = shift;
    my $bytes_out = '0';
    my $size_name = 'bytes';
    my $KiB       = 1024;
    my $MiB       = 1024 * 1024;
    my $GiB       = 1024 * 1024 * 1024;

    if ( $bytes >= $KiB )
    {
        $bytes_out = $bytes / $KiB;
        $size_name = 'KiB';
        if ( $bytes >= $MiB )
        {
            $bytes_out = $bytes / $MiB;
            $size_name = 'MiB';
            if ( $bytes >= $GiB )
            {
                $bytes_out = $bytes / $GiB;
                $size_name = 'GiB';
            }
        }
        $bytes_out = round_number($bytes_out);
    }
    else
    {
        $bytes_out = $bytes;
        $size_name = 'bytes';
    }

    return "$bytes_out $size_name";
}

sub get_variable
{
    my $value = $config_variables{ shift @_ };
    my $count = 16;
    while ( $value =~ s/\$(\w+)/$config_variables{$1}/xg )
    {
        die("apt-mirror: too many substitution while evaluating variable") if ( $count-- ) < 0;
    }
    return $value;
}

sub quoted_path
{
    my $path = shift;
    $path =~ s/'/'\\''/g;
    return "'" . $path . "'";
}

sub lock_aptmirror
{
    open( LOCK_FILE, '>', get_variable("var_path") . "/apt-mirror.lock" );
    my $lock = flock( LOCK_FILE, LOCK_EX | LOCK_NB );
    if ( !$lock )
    {
        die("apt-mirror is already running, exiting");
    }
}

sub unlock_aptmirror
{
    close(LOCK_FILE);
    unlink( get_variable("var_path") . "/apt-mirror.lock" );
}

sub delete_corrupted_files
{
    my $stage = shift;
    my $found = 0;
    foreach my $hash (@hash_strength)
    {
        my $file = get_variable("var_path") . "/${stage}-${hash}";
        if (-s $file)
        {
            my $pipe;
            open $pipe, "-|", qq(env LC_ALL=C ${verify_commands{$hash}} --check --quiet ${file} 2>/dev/null) or die "Cannot run ${verify_commands{$hash}}";
            while (<$pipe>)
            {
                my ($filename) = /^(.*): FAILED/;
                if (-f $filename)
                {
                    $found++;
                    print "$filename is corrupted, deleting....\n";
                    unlink $filename or die "Cannot delete $filename.";
                }
            }
            close $pipe;
        }
    }
    return $found;
}

sub download_urls
{
    my $stage = shift;
    my @urls;
    my $i = 0;
    my $pid;
    my $nthreads = get_variable("nthreads");
    my @args     = ();
    local $| = 1;

    @urls = @_;
    $nthreads = @urls if @urls < $nthreads;

    if ( get_variable("auth_no_challenge") == 1 )    { push( @args, "--auth-no-challenge" ); }
    if ( get_variable("no_check_certificate") == 1 ) { push( @args, "--no-check-certificate" ); }
    if ( get_variable("unlink") == 1 )               { push( @args, "--unlink" ); }
    if ( length( get_variable("use_proxy") ) && ( get_variable("use_proxy") eq 'yes' || get_variable("use_proxy") eq 'on' ) )
    {
        if ( length( get_variable("http_proxy") ) || length( get_variable("https_proxy") ) ) { push( @args, "-e use_proxy=yes" ); }
        if ( length( get_variable("http_proxy") ) ) { push( @args, "-e http_proxy=" . get_variable("http_proxy") ); }
        if ( length( get_variable("https_proxy") ) ) { push( @args, "-e https_proxy=" . get_variable("https_proxy") ); }
        if ( length( get_variable("proxy_user") ) ) { push( @args, "-e proxy_user=" . get_variable("proxy_user") ); }
        if ( length( get_variable("proxy_password") ) ) { push( @args, "-e proxy_password=" . get_variable("proxy_password") ); }
    }
    print "Downloading " . scalar(@urls) . " $stage files using $nthreads threads...\n";

    if (get_variable("paranoid"))
    {
        my %fh = ();
        foreach my $hash (@hash_strength)
        {
            open $fh{$hash}, ">", get_variable("var_path") . "/${stage}-${hash}" or die ("apt-mirror: Cannot write to ${stage}-${hash}");
        }

        foreach (@urls)
        {
            if ($urls_checksums{$_})
            {
                my ($hash, $hashsum) = @{$urls_checksums{$_}};
                my $fh = $fh{$hash};
                print $fh $hashsum . "  " . sanitise_uri($_) . "\n";
            }
        }
        foreach my $hash (@hash_strength)
        {
            close $fh{$hash};
        }
    }

    my @url_fds;
    for ($i=0; $i<$nthreads; $i++)
    {
        open ( $url_fds[$i], ">", get_variable("var_path") . "/$stage-urls.$i") or die("apt-mirror: can't write to intermediate file ($stage-urls.$i)");
    }

    for ($i=scalar(@urls)-1; $i>=0; $i--)
    {
        my $thread = $i % $nthreads;
        print { $url_fds[$thread] } $urls[$i] . "\n";
    }

    foreach (@url_fds) {
        close $_ or die("apt-mirror: can't close intermediate file ($stage-urls.$i)");
    }

    for ($i=0; $i<$nthreads; $i++)
    {

        $pid = fork();

        die("apt-mirror: can't do fork in download_urls") if !defined($pid);

        if ( $pid == 0 )
        {
            exec 'wget', '--no-if-modified-since', '--no-cache', '--limit-rate=' . get_variable("limit_rate"), '-T', '60', '-t', '1', '-r', '-N', '-l', 'inf', '-o', get_variable("var_path") . "/$stage-log.$i", '-i', get_variable("var_path") . "/$stage-urls.$i", @args;

            # shouldn't reach this unless exec fails
            die("\n\nCould not run wget, please make sure its installed and in your path\n\n");
        }

        push @childrens, $pid;
    }

    print "Begin time: " . localtime() . "\n[" . scalar(@childrens) . "]... ";
    while ( scalar @childrens )
    {
        my $dead = wait();
        @childrens = grep { $_ != $dead } @childrens;
        print "[" . scalar(@childrens) . "]... ";
    }
    print "\nEnd time: " . localtime() . "\n\n";

    if (get_variable("paranoid"))
    {
        if (delete_corrupted_files($stage) > 0)
        {
            die "Some files were corrupted while downloading, aborting...";
        }
    }

    if (scalar keys %hashsum_to_files > 0)
    {
        foreach my $hashsum_filename (keys %hashsum_to_files)
        {
            foreach my $filename (@{$hashsum_to_files{$hashsum_filename}})
            {
                copy_file( $hashsum_filename, $filename );
            }
        }
    }

}

## Parse config

sub parse_config_line
{
    my $pattern_deb_line = qr/^[\t ]*(?<type>deb-src|deb)(?:-(?<arch>[\w\-]+))?[\t ]+(?:\[(?<options>[^\]]+)\][\t ]+)?(?<uri>[^\s]+)[\t ]+(?<components>.+)$/;
    my $line = $_;
    my %config;
    if ( $line =~ $pattern_deb_line ) {
        $config{'type'} = $+{type};
        $config{'arch'} = $+{arch};
        $config{'options'} = $+{options} ? $+{options} : "";
        $config{'uri'} = $+{uri};
        $config{'components'} = $+{components};
        if ( $config{'options'} =~ /arch=((?<arch>[\w\-]+)[,]*)/g ) {
            $config{'arch'} = $+{arch};
        }
        $config{'components'} = [ split /\s+/, $config{'components'} ];
    } elsif ( $line =~ /set[\t ]+(?<key>[^\s]+)[\t ]+(?<value>"[^"]+"|'[^']+'|[^\s]+)/ ) {
        $config{'type'} = 'set';
        $config{'key'} = $+{key};
        $config{'value'} = $+{value};
        $config{'value'} =~ s/^'(.*)'$/$1/;
        $config{'value'} =~ s/^"(.*)"$/$1/;
    } elsif ( $line =~ /(?<type>clean|skip-clean)[\t ]+(?<uri>[^\s]+)/ ) {
        $config{'type'} = $+{type};
        $config{'uri'} = $+{uri};
    }

    return %config;
}

sub sanitise_uri
{
    my $uri = shift;
    $uri =~ s[^(\w+)://][];
    $uri =~ s/^([^@]+)?@?// if (split '/',$uri)[0] =~ /@/;
    $uri =~ s/~/\%7E/g if get_variable("_tilde");
    $uri =~ s/\+/\%2B/g if get_variable("_plus");
    $uri =~ s[/$][];
    return $uri;
}

open CONFIG, "<$config_file" or die("apt-mirror: can't open config file ($config_file)");
while (<CONFIG>)
{
    next if /^\s*#/;
    next unless /\S/;
    my $line = $_;
    my %config_line = parse_config_line;

    if ( $config_line{'type'} eq "set" ) {
        $config_variables{ $config_line{'key'} } = $config_line{'value'};
        next;
    } elsif ( $config_line{'type'} eq "deb" ) {
        my $arch = $config_line{'arch'};
        $arch = get_variable("defaultarch") if ! defined $config_line{'arch'};
        push @config_binaries, [ $arch, $config_line{'uri'}, @{$config_line{'components'}} ];
        next;
    } elsif ( $config_line{'type'} eq "deb-src" ) {
        push @config_sources, [ $config_line{'uri'}, @{$config_line{'components'}} ];
        next;
    } elsif ( $config_line{'type'} =~ /(skip-clean|clean)/ ) {
        my $link = sanitise_uri($config_line{'uri'});
        if ( $config_line{'type'} eq "skip-clean" ) {
            $skipclean{ $link } = 1;
        } elsif ( $config_line{'type'} eq "clean" ) {
            $clean_directory{ $link } = 1;
        }
        next;
    }

    die("apt-mirror: invalid line in config file ($.: $line ...)");
}
close CONFIG;

die("Please explicitly specify 'defaultarch' in mirror.list") unless get_variable("defaultarch");

######################################################################################
## Create the 3 needed directories if they don't exist yet
my @needed_directories = ( get_variable("mirror_path"), get_variable("skel_path"), get_variable("var_path") );
foreach my $needed_directory (@needed_directories)
{
    unless ( -d $needed_directory )
    {
        make_path($needed_directory) or die("apt-mirror: can't create $needed_directory directory");
    }
}
#
#######################################################################################

lock_aptmirror();

######################################################################################
## Skel download

my %urls_to_download = ();
my ( $url, $arch );

sub remove_double_slashes
{
    local $_ = shift;
    while (s[/\./][/]g)                { }
    while (s[(?<!:)//][/]g)            { }
    while (s[(?<!:/)/[^/]+/\.\./][/]g) { }
    s/~/\%7E/g if get_variable("_tilde");
    s/\+/\%2B/g if get_variable("_plus");
    return $_;
}

sub add_url_to_download
{
    my $url = remove_double_slashes(shift);
    my $size = shift;
    my $strongest_hash = shift;
    my $hash = shift;
    my $hashsum = shift;
    my $acquire_by_hash = shift;

    my $canonical_filename = sanitise_uri($url);
    $skipclean{$canonical_filename} = 1;

    if ($acquire_by_hash)
    {
        # If the optional hashsum was passed as an argument
        # - download the strongest hash only
        # - make a copy to the canonical location
        # - make a copy for the other known hash versions

        $url = dirname($url) . "/by-hash/${hash}/${hashsum}";

        my $hashsum_filename = dirname($canonical_filename) . "/by-hash/${hash}/${hashsum}";
        $skipclean{$hashsum_filename} = 1;

        if ($hash eq $strongest_hash)
        {
            # This is the strongest hash, which is the one to download.
            # Also need to remember to which canonical location it should be linked.
            $hashsum_to_files{$hashsum_filename} ||= [];
            push @{$hashsum_to_files{$hashsum_filename}}, $canonical_filename;
            $urls_to_download{$url} = $size;
            $urls_checksums{$url} = [ $hash, $hashsum ];

        } else {
            # We are not going to download using this checksum, but we still
            # need to know where to put the checksum.
            $file_to_hashsums{$canonical_filename} ||= [];
            push @{$file_to_hashsums{$canonical_filename}}, $hashsum_filename;
        }
    } else {
        # Not using by-hash, so download the file only.
        $urls_to_download{$url} = $size;
        if ($strongest_hash and ($hash eq $strongest_hash))
        {
            $urls_checksums{$url} = [ $hash, $hashsum ];
        }
    }
}

foreach (@config_sources)
{
    my ( $uri, $distribution, @components ) = @{$_};

    if (@components)
    {
        $url = $uri . "/dists/" . $distribution . "/";
    }
    else
    {
        $url = $uri . "/" . $distribution . "/";
    }

    add_url_to_download( $url . "InRelease" );
    add_url_to_download( $url . "Release" );
    add_url_to_download( $url . "Release.gpg" );
}

foreach (@config_binaries)
{
    my ( $arch, $uri, $distribution, @components ) = @{$_};

    if (@components)
    {
        $url = $uri . "/dists/" . $distribution . "/";

    }
    else
    {
        $url = $uri . "/" . $distribution . "/";
    }

    add_url_to_download( $url . "InRelease" );
    add_url_to_download( $url . "Release" );
    add_url_to_download( $url . "Release.gpg" );

}

chdir get_variable("skel_path") or die("apt-mirror: can't chdir to skel");
@release_urls = sort keys %urls_to_download;
download_urls( "release", @release_urls );

######################################################################################
## Download all relevant metadata

%urls_to_download = ();

sub find_metadata_in_release
{
    # Look in the Release file for any files we need to download
    my ( $arch, $uri, $distribution, @components ) = @_;

    my ( $release_uri, $release_path, $line ) = '';
    my $component_regex = undef;
    my $arch_regex = "(?:${arch}|all)";
    my $compressed_extension_regex = '(?:\.(?:gz|bz2|xz|lzma))$';
    my $dist_uri;
    my $hash_type_regex = "(?:" . join("|", @hash_strength) . ")";

    if (@components)
    {
        $dist_uri  = remove_double_slashes($uri . "/dists/" . $distribution . "/");
        $component_regex = "(?:" . join("|", @components) . ")";
    }
    else {
        $dist_uri  = remove_double_slashes($uri . "/" . $distribution . "/");
    }

    my $stream;
    foreach my $release_filename ("InRelease", "Release")
    {
        $release_uri  = $dist_uri . $release_filename;
        $release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri);

        last if ( open $stream, "<", $release_path);
        $stream = undef;
    }

    unless ( $stream )
    {
        warn( "Failed to find InRelease or Release in " . get_variable("skel_path") . "/" . sanitise_uri($dist_uri) );
        return 0;
    }


    my $hash = undef;
    my %avaiable_hashes = ();
    my $acquire_by_hash = 0;
    my @parts_to_download = ();
    while ( $line = <$stream> )
    {
        chomp $line;
        if ($hash)
        {
            if ( $line =~ /^ +(.*)$/ )
            {
                my @parts = split( / +/, $1 );
                if ( @parts == 3 )
                {
                    my ( $hashsum, $size, $filename ) = @parts;
                    push @parts, $hash;
                    if ($arch eq "source")
                    {
                        if ($component_regex)
                        {
                            # Debian repository format https://wiki.debian.org/DebianRepository/Format#Debian_Repository_Format
                            if (
                                (
                                    $filename =~ m{^${component_regex}/source/Sources${compressed_extension_regex}}
                                ) or (
                                    $filename =~ m{^${component_regex}/Contents-source${compressed_extension_regex}}
                                )
                            )
                            {
                                push @parts_to_download, \@parts;
                            }
                        } else {
                            # Flat repository format https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format
                            if ($filename =~ m{^Sources${compressed_extension_regex}}
                            ) {
                                push @parts_to_download, \@parts;
                            }
                        }
                    } else {
                        if ($component_regex)
                        {
                            # Debian repository format https://wiki.debian.org/DebianRepository/Format#Debian_Repository_Format
                            if (
                                (
                                    $filename =~ m{^${component_regex}/Contents-${arch_regex}${compressed_extension_regex}}
                                ) or (
                                    $filename =~ m{^Contents-${arch_regex}${compressed_extension_regex}}
                                ) or (
                                    $filename =~ m{^Packages${compressed_extension_regex}}
                                ) or (
                                    $filename =~ m{^${component_regex}/binary-${arch_regex}/Packages${compressed_extension_regex}}
                                ) or (
                                    $filename =~ m{^${component_regex}/binary-${arch_regex}/Release$}
                                ) or (
                                    $filename =~ m{^${component_regex}/cnf/Commands-${arch_regex}${compressed_extension_regex}}
                                ) or (
                                    $filename =~ m{^${component_regex}/dep11/Components-${arch_regex}.*${compressed_extension_regex}}
                                ) or (
                                    $filename =~ m{^${component_regex}/dep11/icons-.*${compressed_extension_regex}}
                                ) or (
                                    $filename =~ m{^${component_regex}/i18n/Translation-.*${compressed_extension_regex}}
                                )
                            )
                            {
                                push @parts_to_download, \@parts;
                            }
                        } else {
                            # Flat repository format https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format
                            if ($filename =~ m{^Packages${compressed_extension_regex}})
                            {
                                push @parts_to_download, \@parts;
                            }
                        }
                    }
                }
                else
                {
                    warn("Malformed checksum line \"$1\" in $release_uri");
                }
            }
            else
            {
                $hash = undef;
            }
        }
        if ( not $hash )
        {
            if ( $line =~ /^(${hash_type_regex}):$/ )
            {
                $hash = $1;
                $avaiable_hashes{$hash} = 1;
            }
            elsif ( $line eq "Acquire-By-Hash: yes" )
            {
                $acquire_by_hash = 1;
            }
        }
    }
    close $stream;

    my $strongest_hash;
    if ($acquire_by_hash)
    {
        foreach (@hash_strength)
        {
            if ($avaiable_hashes{$_})
            {
                $strongest_hash = $_;
                last;
            }
        }
        unless ($strongest_hash)
        {
            warn("Cannot find a supported hash in $release_uri, will download from canonical locations.");
            $acquire_by_hash = 0;
        }
    }

    foreach (@parts_to_download)
    {
        my ( $hashsum, $size, $filename, $hash ) = @{$_};
        if ($acquire_by_hash)
        {
            add_url_to_download( $dist_uri . $filename, $size, $strongest_hash, $hash, $hashsum, 1 );
        }
        else
        {
            add_url_to_download( $dist_uri . $filename, $size, $strongest_hash, $hash, $hashsum, 0 );
        }
    }
    return 1;
}

print "Processing metadata files from releases [";
foreach (@config_binaries)
{
    my ( $arch, $uri, $distribution, @components ) = @{$_};
    print "M";
    unless (find_metadata_in_release( $arch, $uri, $distribution, @components))
    {
        # Insecure repo with no release file - try to get the well known indices
        foreach my $file_extension (".gz", ".bz2", ".xz", ".lzma", "")
        {
            if (@components)
            {
                # Debian repo
                foreach my $component (@components)
                {
                    foreach my $path (
                        "/dists/${distribution}/${component}/binary-${arch}/Packages",
                        "/dists/${distribution}/${component}/binary-all/Packages",
                        "/dists/${distribution}/${component}/Contents-${arch}",
                        "/dists/${distribution}/${component}/Contents-all",
                        "/dists/${distribution}/Contents-${arch}",
                        "/dists/${distribution}/Contents-all",
                    )
                    {
                        add_url_to_download( "${uri}/${path}${file_extension}" );
                    }
                }
            } else {
                # Flat repo
                foreach my $path (
                    "${distribution}/Packages",
                    "${distribution}/Contents-${arch}",
                    "${distribution}/Contents-all",
                )
                {
                    add_url_to_download( "${uri}/${path}${file_extension}" );
                }
            }
        }
    }
}

foreach (@config_sources)
{
    my ( $uri, $distribution, @components ) = @{$_};
    print "M";
    unless (find_metadata_in_release( "source", $uri, $distribution, @components))
    {
        # Insecure repo with no release file - try to get the well known indices
        foreach my $file_extension (".gz", ".bz2", ".xz", ".lzma", "")
        {
            if (@components)
            {
                # Debian repo
                foreach my $path (
                    "${distribution}/source/Sources",
                    "${distribution}/Contents-source",
                )
                {
                    add_url_to_download( "${uri}/${path}${file_extension}" );
                }
            } else {
                # Flat repo
                add_url_to_download( "${uri}/${distribution}/Sources${file_extension}" );
            }
        }
    }
}
print "]\n\n";

@index_urls = sort keys %urls_to_download;
download_urls( "index", @index_urls );

######################################################################################
## Main download preparations

%urls_to_download = ();

my %files_fh;

open $files_fh{ALL}, ">" . get_variable("var_path") . "/ALL" or die("apt-mirror: can't write to intermediate file (ALL)");
open $files_fh{NEW}, ">" . get_variable("var_path") . "/NEW" or die("apt-mirror: can't write to intermediate file (NEW)");
foreach my $hash (@hash_strength)
{
    open $files_fh{$hash}, ">" . get_variable("var_path") . "/" . ${checksum_filenames{$hash}} or die("apt-mirror: can't write to intermediate file (${hash})");
}

my %stat_cache = ();

sub _stat
{
    my ($filename) = shift;
    return @{ $stat_cache{$filename} } if exists $stat_cache{$filename};
    my @res = stat($filename);
    $stat_cache{$filename} = \@res;
    return @res;
}

sub clear_stat_cache
{
    %stat_cache = ();
}

sub need_update
{
    my $filename       = shift;
    my $size_on_server = shift;

    my ( undef, undef, undef, undef, undef, undef, undef, $size ) = _stat($filename);

    return 1 unless ($size);
    return 0 if $size_on_server == $size;

    if ( get_variable("unlink") == 1 )
    {
        unlink $filename;
    }
    return 1;
}

sub process_index
{
    my $uri   = shift;
    my $index = shift;
    my $optional = shift;
    my ( $path, $package, $mirror, $files ) = '';

    $path = sanitise_uri($uri);
    local $/ = "\n\n";
    $mirror = get_variable("mirror_path") . "/" . $path;

    if (-e "$path/$index.gz" )
    {
        system("gunzip < $path/$index.gz > $path/$index");
    }
    elsif (-e "$path/$index.xz" )
    {
        system("xz -d < $path/$index.xz > $path/$index");
    }
    elsif (-e "$path/$index.lzma" )
    {
        system("xz -d < $path/$index.xz > $path/$index");
    }
    elsif (-e "$path/$index.bz2" )
    {
        system("bzip2 -d < $path/$index.bz2 > $path/$index");
    }

    unless ( open STREAM, "<$path/$index" )
    {
        if ($optional)
        {
            return;
        }
        warn("apt-mirror: can't open index $path/$index in process_index");
        return;
    }

    while ( $package = <STREAM> )
    {
        local $/ = "\n";
        chomp $package;
        my ( undef, %lines ) = split( /^([\w\-]+): */m, $package );

        chomp(%lines);

        if ( exists $lines{"Filename"} )
        {    # Packages index
            my $filename = remove_double_slashes( $path . "/" . $lines{"Filename"});
            $skipclean{ $filename } = 1;
            print { $files_fh{ALL} } $filename . "\n";
            foreach my $hash (@hash_strength)
            {
                my $index_hash = $packages_hashes{$hash};
                print { $files_fh{$hash} } $lines{$index_hash} . "  " . $filename . "\n" if $lines{$index_hash};
            }
            if ( need_update( $mirror . "/" . $lines{"Filename"}, $lines{"Size"} ) )
            {
                my $hashsum = undef;
                my $hash = undef;
                foreach $hash (@hash_strength)
                {
                    my $index_hash = $packages_hashes{$hash};
                    if ($lines{$index_hash})
                    {
                        $hashsum = ${lines{$index_hash}};
                        last;
                    }
                }
                print { $files_fh{NEW} } $filename. "\n";
                add_url_to_download( $uri . "/" . $lines{"Filename"}, $lines{"Size"}, $hash, $hash, $hashsum, 0 );
            }
        }
        else
        {    # Sources index
            $lines{"Directory"} = "" unless defined $lines{"Directory"};
            foreach my $hash (@hash_strength)
            {
                my $index_hash = $sources_hashes{$hash};
                if ($lines{$index_hash})
                {
                    foreach ( split( /\n/, $lines{$index_hash} ) )
                    {
                        next if $_ eq '';
                        my @file = split;
                        die("apt-mirror: invalid Sources format") if @file != 3;
                        my $download_url = $uri . "/" . $lines{"Directory"} . "/" . $file[2];
                        my $filename = remove_double_slashes( $path . "/" . $lines{"Directory"} . "/" . $file[2] );
                        print { $files_fh{$hash} } $file[0] . "  " . ${filename} . "\n";

                        unless ($skipclean{ $filename })
                        {
                            $skipclean{ $filename } = 1;
                            print { $files_fh{ALL} } ${filename} . "\n";
                            if ( need_update( $mirror . "/" . $lines{"Directory"} . "/" . $file[2], $file[1] ) )
                            {
                                print { $files_fh{NEW} } ${download_url} . "\n";
                                add_url_to_download( $uri . "/" . $lines{"Directory"} . "/" . $file[2], $file[1], $hash, $hash, $file[0], 0 );
                            }
                        }
                    }
                }
            }
        }
    }

    close STREAM;
}

print "Processing indexes: [";

foreach (@config_sources)
{
    my ( $uri, $distribution, @components ) = @{$_};
    print "S";
    if (@components)
    {
        my $component;
        foreach $component (@components)
        {
            process_index( $uri, "/dists/$distribution/$component/source/Sources" );
        }
    }
    else
    {
        process_index( $uri, "/$distribution/Sources" );
    }
}

foreach (@config_binaries)
{
    my ( $arch, $uri, $distribution, @components ) = @{$_};
    print "P";
    if (@components)
    {
        my $component;
        foreach $component (@components)
        {
            process_index( $uri, "/dists/$distribution/$component/binary-$arch/Packages" );
            process_index( $uri, "/dists/$distribution/$component/binary-all/Packages", 1 );
        }
    }
    else
    {
        process_index( $uri, "/$distribution/Packages" );
    }
}

clear_stat_cache();

print "]\n\n";

foreach my $fh (values %files_fh)
{
    close $fh;
}

######################################################################################
## Main download

chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror");

my $need_bytes = 0;
foreach ( values %urls_to_download )
{
    $need_bytes += $_;
}

my $size_output = format_bytes($need_bytes);

print "$size_output will be downloaded into archive.\n";

download_urls( "archive", sort keys %urls_to_download );

######################################################################################
## Copy skel to main archive

sub copy_file
{
    my ( $from, $to ) = @_;
    my $dir = dirname($to);
    return unless -f $from;
    make_path($dir) unless -d $dir;
    if ( get_variable("unlink") == 1 )
    {
        if ( compare( $from, $to ) != 0 ) { unlink($to); }
    }
    my @stat_from = stat($from);
    if ( -f $to )
    {
        my @stat_to = stat($to);
        return if ("@stat_to" eq "@stat_from");
    }

    unless ( link( $from, $to ) or copy( $from, $to ) )
    {
        warn("apt-mirror: can't copy $from to $to");
        return;
    }
    my ( $atime, $mtime ) = @stat_from[ 8, 9 ];
    utime( $atime, $mtime, $to ) or die("apt-mirror: can't utime $to");
}

foreach (@release_urls, @index_urls)
{
    die("apt-mirror: invalid url in index_urls") unless s[^(\w+)://][];
    copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") );

    my $sanitized_uri = sanitise_uri($_);

    # If we downloaded any files from a checksum location, now is the time to
    # populate the canonical filename.
    if ($hashsum_to_files{$sanitized_uri})
    {
        foreach my $filename (@{$hashsum_to_files{$sanitized_uri}})
        {
            copy_file( get_variable("mirror_path") . "/" . $sanitized_uri, get_variable("mirror_path") . "/" . $filename );
            if ($file_to_hashsums{$filename})
            {
                foreach my $hashsum_filename (@{$file_to_hashsums{$filename}})
                {
                    copy_file( get_variable("mirror_path") . "/" . $sanitized_uri, get_variable("mirror_path") . "/" . $hashsum_filename );
                }
            }
        }
    }
}

######################################################################################
## Make cleaning script

my ( @rm_dirs, @rm_files ) = ();
my $unnecessary_bytes = 0;

sub process_symlink
{
    return 1;    # symlinks are always needed
}

sub process_file
{
    my $file = shift;
    $file =~ s[~][%7E]g if get_variable("_tilde");
    $file =~ s[\+][%2B]g if get_variable("_plus");
    return 1 if $skipclean{$file};
    push @rm_files, sanitise_uri($file);
    my ( undef, undef, undef, undef, undef, undef, undef, $size, undef, undef, undef, undef, $blocks ) = stat($file);
    $unnecessary_bytes += $blocks * 512;
    return 0;
}

sub process_directory
{
    my $dir       = shift;
    my $is_needed = 0;
    return 1 if $skipclean{$dir};
    opendir( my $dir_h, $dir ) or die "apt-mirror: can't opendir $dir: $!";
    foreach ( grep { !/^\.$/ && !/^\.\.$/ } readdir($dir_h) )
    {
        my $item = $dir . "/" . $_;
        $is_needed |= process_directory($item) if -d $item && !-l $item;
        $is_needed |= process_file($item)      if -f $item;
        $is_needed |= process_symlink($item)   if -l $item;
    }
    closedir $dir_h;
    push @rm_dirs, $dir unless $is_needed;
    return $is_needed;
}

chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror");

foreach ( keys %clean_directory )
{
    process_directory($_) if -d $_ && !-l $_;
}

open CLEAN, ">" . get_variable("cleanscript") or die("apt-mirror: can't open clean script file");

my ( $i, $total ) = ( 0, scalar @rm_files );

if ( get_variable("_autoclean") )
{

    my $size_output = format_bytes($unnecessary_bytes);
    print "$size_output in $total files and " . scalar(@rm_dirs) . " directories will be freed...";

    chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror");

    foreach (@rm_files) { unlink $_; }
    foreach (@rm_dirs)  { rmdir $_; }

}
else
{

    my $size_output = format_bytes($unnecessary_bytes);
    print "$size_output in $total files and " . scalar(@rm_dirs) . " directories can be freed.\n";
    print "Run " . get_variable("cleanscript") . " for this purpose.\n\n";

    print CLEAN "#!/bin/sh\n";
    print CLEAN "set -e\n\n";
    print CLEAN "cd " . quoted_path(get_variable("mirror_path")) . "\n\n";
    print CLEAN "echo 'Removing $total unnecessary files [$size_output]...'\n";
    foreach (@rm_files)
    {
        print CLEAN "rm -f '$_'\n";
        print CLEAN "echo -n '[" . int( 100 * $i / $total ) . "\%]'\n" unless $i % 500;
        print CLEAN "echo -n .\n" unless $i % 10;
        $i++;
    }
    print CLEAN "echo 'done.'\n";
    print CLEAN "echo\n\n";

    $i     = 0;
    $total = scalar @rm_dirs;
    print CLEAN "echo 'Removing $total unnecessary directories...'\n";
    foreach (@rm_dirs)
    {
        print CLEAN "if test -d '$_'; then rm -fr '$_'; fi\n";
        print CLEAN "echo -n '[" . int( 100 * $i / $total ) . "\%]'\n" unless $i % 50;
        print CLEAN "echo -n .\n";
        $i++;
    }
    print CLEAN "echo 'done.'\n";
    print CLEAN "echo\n";

    close CLEAN;

}

# Make clean script executable
my $perm = ( stat get_variable("cleanscript") )[2] & 07777;
chmod( $perm | 0111, get_variable("cleanscript") );

if ( get_variable("run_postmirror") )
{
    print "Running the Post Mirror script ...\n";
    print "(" . get_variable("postmirror_script") . ")\n\n";
    if ( -x get_variable("postmirror_script") )
    {
        system( get_variable("postmirror_script"), '' );
    }
    else
    {
        system( '/bin/sh', get_variable("postmirror_script") );
    }
    print "\nPost Mirror script has completed. See above output for any possible errors.\n\n";
}

unlock_aptmirror();
