yummirror.sh


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148

#!/bin/sh
# File: yummirror.sh
# Location: https://gitlab.com/bgstack15/coprmirror
# Author: bgstack15
# Startdate: 2021-08-16 09:57
# SPDX-License-Identifier: GPL-3.0
# Title: Script that copies a single copr yum repo
# Project: coprmirror
# Purpose: mirror copr locally, given baseurl; part of a larger set
# History:
# Usage:
#    Called by a larger script that catches the entire copr. This file only does a single yum repo for a single architecture and OS release. See coprmirror.sh
# References:
#    https://unix.stackexchange.com/questions/19701/yum-how-can-i-view-variables-like-releasever-basearch-yum0
#    https://gitlab.com/bgstack15/former-gists/-/blob/master/obsmirror.sh/obsmirror.sh
# Improve:
#    resign_repo is not implemented yet.
# Dependencies:
#    jq, yum-utils, awk, sed, grep
umask 0002

test -n "${YUMMIRROR_CONF}" && . "${YUMMIRROR_CONF}"
test -z "${logfile}" && logfile=./yummirror.$( date "+%FT%H%M%S" ).log
test -z "${inurl}" && inurl='https://copr-be.cloud.fedoraproject.org/results/bgstack15/stackrpms/epel-7-$basearch/'
test -z "${workdir}" && workdir=/tmp/copr
test -z "${thisuser}" && thisuser=${USER}
# also use include_sources resign_repo DEBUG VERBOSE

exec 3>&1
show() {
    printf "%s" "${*}" 1>&3
}

reset_show() {
    printf "\r%s" "${*}" 1>&3
}

## Functions
get_file() {
   # call: get_file "${tu}" "${md5sum}" "absolute"
   ___tu="${1}"
   ___sum="${2}"
   ___abs="${3}"
   if test "${___abs}" = "absolute" ; then
      tn="$( basename "${___tu}" )"
   else
      tn="${___tu##${inurl}}"
   fi
   tf="${workdir}/${tn}" ; tf="$( readlink -m "${tf}" )"
   td="$( dirname "${tf}" )"
   test -d "${td}" || mkdir -p "${td}"
   gotten="skipped   "
   #printf '\n%s\n' "inside get_file ${@}, DRYRUN=${DRYRUN}"
   if test -z "${DRYRUN}" || test "${DRYRUN}" = "metadata";
   then
      if test -z "${___sum}" || test "$( sha256sum "${tf}" 2>/dev/null | awk '{print $1}' )" != "${___sum}" ;
      then
         test -n "${VERBOSE}" && show "retrieving ${___tu}" 2>/dev/null || :
         wget --content-disposition --no-verbose --quiet -O "${tf}" "${___tu}" && gotten=DOWNLOADED
      fi
   fi
   test -n "${VERBOSE}" && reset_show 2>/dev/null || :
   echo "${gotten} ${___tu} -> ${tf}"
}

## MAIN

# Interpret any yum vars in the inurl
if echo "${inurl}" | grep -qE '\$' ;
then
   echo "" | jq 1>/dev/null 2>&1 || { echo "Need jq to interpret yum vars in baseurl ${inurl}. Aborted." ; exit 1 ; }
   raw="$( python -c 'import yum, json; yb = yum.YumBase(); print json.dumps(yb.conf.yumvar, indent=2)' )"
   # validated on centos 7
   basearch="$( echo "${raw}" | sed -n '1!p' | jq '.basearch' | tr -d '"' )"
   releasever="$( echo "${raw}" | sed -n '1!p' | jq '.releasever' | tr -d '"' )"
   #echo "${inurl}" | awk -v "basearch=${basearch}" -v "releasever=${releasever}" '{gsub("\$basearch",basearch,$0);gsub("\$releasever",releasever,$0);print}'
   newurl="$( echo "${inurl}" | sed -r -e "s/\\\$basearch/${basearch}/g;" -e "s/\\\$releasever/${releasever}/g;" )"
   test -n "${DEBUG}" && {
      echo "Interpreting ${inurl} -> ${newurl}"
   } 1>&2
   inurl="${newurl}"
fi

# clean up trailing slashes
inurl="${inurl%%/}"

# sync to workdir
wget_verbose=--quiet
test -n "${VERBOSE}" && unset wget_verbose
{
   test "${DEBUG:-NONE}" = "FULL" && set -x
   echo "logfile=${logfile}"
   mkdir -p "${workdir}" ; cd "${workdir}"

   # This file lists the files that define a yum repo
   for word in repodata/repomd.xml ;
   do
      # coprmirror.sh will pass DRYRUN=metadata if it has anything set in dryrun, and we interpret "metadata" here, because we absolutely need the repomd.xml and associated files no matter what, even for a dry run.
      DRYRUN="${DRYRUN:+metadata}" get_file "${inurl%%/}/${word}"
   done

   # loop through the important files listed in that file.
   # read sha256sum, so we don't have to download these if we already have them.
   metadata_list="$( grep -iE '<checksum |href' repodata/repomd.xml | awk -F'"' '/checksum/{print $3} /href/{print $2}' | awk "/^>/{gsub(\"^>\",\"\",\$0);gsub(\"<.*$\",\"\",\$0);a=\$0;} /repodata/{print a,\$0}" )"
   echo "${metadata_list}" | while read sum word ;
   do
      DRYRUN="${DRYRUN:+metadata}" get_file "${inurl%%/}/${word}" "${sum}" 3>/dev/null
   done

   # go ahead and fetch the gpgkey if resign_repo=no
   if echo "${resign_repo}" | grep -qiE 'yes|\<y\>|1|true' ;
   then
      # do something
      echo "Resigning functionality not built yet. Skipping..."
   else
      if test -n "${gpgkey}" ;
      then
         # fetch the gpgkey
         # note: yum variable parsing not yet implemented here. I would need to turn the variable parsing into a function and call it here.
         # cannot use get_file because it uses a relative path evaluation
         #wget --output-file="${workdir}/pubkey.gpg" "${gpgkey}"
         get_file "${gpgkey}" "" "absolute"
      fi
   fi

   # COPR always provides a primary.xml.gz file, which lists the assets to download
   full_list="$( zgrep -iE 'checksum|href' $( grep -oiE 'href=.*primary.xml.gz.*$' repodata/repomd.xml 2>/dev/null | awk -F'"' '{print $2}' ; echo "none.XXXXXXX" ) 2>/dev/null | awk "/checksum/{gsub(\".*\\\">\",\"\",\$0);gsub(\"</.*\",\"\",\$0);a=\$0;} /href/{gsub(\".*=\\\"\",\"\",\$0);gsub(\"\\\"/.*\",\"\",\$0);print a,\$0;}" )"
   #echo "FULL_LIST=${full_list}"

   # protect against the "metadata only" run
   test "${DRYRUN}" = "metadata" && DRYRUN=1
   echo "${full_list}" | while read sum word ;
   do
      if echo "${word}" | grep -qiE "\.src\.rpm" ;
      then
         # if a srpm, only get it if user has set include_sources.
         if echo "${include_sources}" | grep -qiE 'yes|\<y\>|1|true' ;
         then
            get_file "${inurl}/${word}" "${sum}"
         fi
      else
         # always get all regular rpms and other non-.src.rpm files if any
         get_file "${inurl}/${word}" "${sum}"
      fi
   done

   chown -R "${thisuser}:$( id -G "${thisuser}" | awk '{print $1}' )" "${workdir}"
} 2>&1 | tee -a "${logfile}"