initial commit

author: B. Stack <bgstack15@gmail.com> 2021-08-17 08:10:28 -0400
committer: B. Stack <bgstack15@gmail.com> 2021-08-17 08:10:28 -0400
commit: f3dca9210fba77aacf60a77f11e5c3f5e4388011 (patch)
tree: 5eef352a493197952cbad551e9385025fbdd54d8
download: coprmirror-f3dca9210fba77aacf60a77f11e5c3f5e4388011.tar.gz
coprmirror-f3dca9210fba77aacf60a77f11e5c3f5e4388011.tar.bz2
coprmirror-f3dca9210fba77aacf60a77f11e5c3f5e4388011.zip
6 files changed, 262 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8be9c70
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+*log*
+bgstack15-home/*
+*.conf
+*.gpg
+new/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..05f8fdd
--- /dev/null
+++ b/README.md
@@ -0,0 +1,30 @@
+# Readme for coprmirror
+`coprmirror` is a yum mirror solution, with a wrapper to mirror specifically a named [COPR](https://copr.fedorainfracloud.org/coprs/) repository.
+
+## Overview
+COPR is a distro-run community offering that lets users build yum/dnf repositories with their own software (primarily if not exclusively in rpm format). COPR performs the builds, and then hosts the binary and source rpms for client machines. This project downloads using native GNU/Linux tools the yum repositories that collectively make up a COPR, so each release-releasever-basearch triplet. The end goal is to have a local copy of the entire current yum repos. This does not copy the build assets or log files; just what a yum repository defines and the gpg public key.
+
+Notably this utility needs the yum python packages present only for evaluating yum variables in the inurl (baseurl from a .repo file), so if you have a string literal as the inurl value, you can run this on a system that does not have yum installed.
+
+## Using
+Configure coprmirror.conf from the provided .example file, and then run:
+
+    COPRMIRROR_CONF=coprmirror.conf VERBOSE=1 DEBUG=1 ./coprmirror.sh
+
+## Upstream
+Original content
+The `get_file` function was improved after being imported from my [Reference 1](https://gitlab.com/bgstack15/former-gists/-/blob/master/obsmirror.sh/obsmirror.sh) script.
+
+## Alternatives
+1. I felt like ansible and system are overkill, but if you like those, this is perfect for you:
+   [https://github.com/ganto/ansible-copr_reposync](https://github.com/ganto/ansible-copr_reposync)
+
+## Dependencies
+wget, grep, awk, sed, jq
+
+## References
+1. [obsmirror](https://gitlab.com/bgstack15/former-gists/-/blob/master/obsmirror.sh/obsmirror.sh)
+2. [Mirror an OBS repository locally — update 1](https://bgstack15.wordpress.com/2020/03/11/mirror-an-obs-repository-locally-update-1/)
+
+## Differences from upstream
+N/A
diff --git a/coprmirror.conf.example b/coprmirror.conf.example
new file mode 100644
index 0000000..eeb4310
--- /dev/null
+++ b/coprmirror.conf.example
@@ -0,0 +1,12 @@
+# vim: syntax=sh
+copr_url="https://copr-be.cloud.fedoraproject.org/results/"
+copr="bgstack15/stackrpms"
+# choose which environments to exclude. Regex.
+excludes="epel-6.*|fedora-3[12].*|rhelbeta-8.*"
+# These get sent to yummirror.sh
+workdir=~/dev/coprmirror/new
+logfile=./log/copr.$( date "+%FT%H%M%S" )
+resign_repo=no
+# default: no
+include_sources=
+this_user=apache
diff --git a/coprmirror.sh b/coprmirror.sh
new file mode 100755
index 0000000..9e53aa7
--- /dev/null
+++ b/coprmirror.sh
@@ -0,0 +1,62 @@
+#!/bin/sh
+# File: coprmirror.sh
+# Location: https://gitlab.com/bgstack15/coprmirror
+# Author: bgstack15
+# SPDX-License-Identifier: GPL-3.0
+# Startdate: 2021-08-16 14:34
+# Title: COPR Mirror Script
+# Project: coprmirror
+# Purpose: Mirror all available architectures of a COPR to local disk
+# History:
+# Usage:
+#    In a cronjob:
+#    COPRMIRROR_CONF=coprmirror.conf ./coprmirror.sh 1>/dev/null 2>&1
+# Reference:
+#    yummirror.sh
+# Improve:
+# Dependencies:
+#    yummirror.sh from this project
+
+# Flow: given a copr name, find all release-releasever-basearch entries, and pass each one to coprmirror-single.sh
+test -n "${COPRMIRROR_CONF}" && . "${COPRMIRROR_CONF}"
+copr_url="${copr_url%%/}"
+copr="${copr%%/}"
+test -z "${logfile}" && logfile=./coprmirror.all.$( date "+%FT%H%M%S" ).log
+export DEBUG DRYRUN VERBOSE
+
+# fetch listing from site
+raw_listing="$( curl -s -L "${copr_url}/${copr}/" )"
+# split html, list only the directory entries, show the href, split to just text that is displayed which is same as path for copr, and then exclude any the admin requests, then sort
+listing="$( echo "${raw_listing}" | sed -n -r -e '/<table /p' | sed -r -e 's:(<\/tr>):\1\n:g;' | grep Directory | grep -oE '<a href=.{6,45}>.*<\/a>' | awk -F'[<>]' '$3 ~ /.*-.*-.*/{print $3}' | grep -viE "${excludes}" | sort )"
+echo "Please get each of these:"
+echo "${listing}" 
+
+# The gpg key of a copr is for the whole thing, not per-repo.
+test -z "${DRYRUN}" && {
+   mkdir -p "${workdir}"
+   # the -N prevents pubkey.gpg.1
+   wget -N --output-file="${workdir}/pubkey.gpg" "${copr_url}/${copr}/pubkey.gpg"
+}
+
+for word in ${listing} ;
+do
+   echo "###########################################"
+   test -n "${DEBUG}" && echo env DEBUG=$DEBUG VERBOSE=$VERBOSE DRYRUN=$DRYRUN logfile="${logfile}.${word}" inurl="${copr_url}/${copr}/${word}/" this_user=${this_user} workdir="${workdir}/${word}" COPRMIRROR_CONF= ./yummirror.sh
+   test -z "${DRYRUN}" && {
+      mkdir -p "${workdir}/${word}"
+   }
+   # exclude gpgkey because the big script, this one, handles it, not the single script
+   # leave YUMMIRROR_CONF undefined; we have set all variables for the inner script here.
+   env DEBUG=$DEBUG \
+      VERBOSE=$VERBOSE \
+      DRYRUN="${DRYRUN}" \
+      logfile="${logfile}.${word}" \
+      inurl="${copr_url}/${copr}/${word}/" \
+      this_user="${this_user}" \
+      workdir="${workdir}/${word}" \
+      YUMMIRROR_CONF= \
+      include_sources="${include_sources}" \
+      resign_repo="${resign_repo}" \
+      gpgkey= \
+      ./yummirror.sh
+done
diff --git a/yummirror.conf.example b/yummirror.conf.example
new file mode 100644
index 0000000..0161608
--- /dev/null
+++ b/yummirror.conf.example
@@ -0,0 +1,8 @@
+# vim: syntax=sh
+logfile=~/log/coprmirror.$( date "+%FT%H%M%S" ).log
+inurl='https://copr-be.cloud.fedoraproject.org/results/bgstack15/stackrpms/epel-$releasever-$basearch/'
+thisuser=apache
+workdir=~/dev/coprmirror/bgstack15-home
+include_sources=
+resign_repo=no
+gpgkey=https://copr-be.cloud.fedoraproject.org/results/bgstack15/stackrpms/pubkey.gpg
diff --git a/yummirror.sh b/yummirror.sh
new file mode 100755
index 0000000..d423515
--- /dev/null
+++ b/yummirror.sh
@@ -0,0 +1,145 @@
+#!/bin/sh
+# File: yummirror.sh
+# Location: https://gitlab.com/bgstack15/coprmirror
+# Author: bgstack15
+# Startdate: 2021-08-16 09:57
+# SPDX-License-Identifier: GPL-3.0
+# Title: Script that copies a single copr yum repo
+# Project: coprmirror
+# Purpose: mirror copr locally, given baseurl; part of a larger set
+# History:
+# Usage:
+#    Called by a larger script that catches the entire copr. This file only does a single yum repo for a single architecture and OS release. See coprmirror.sh
+# References:
+#    https://unix.stackexchange.com/questions/19701/yum-how-can-i-view-variables-like-releasever-basearch-yum0
+#    https://gitlab.com/bgstack15/former-gists/-/blob/master/obsmirror.sh/obsmirror.sh
+# Improve:
+#    resign_repo is not implemented yet.
+# Dependencies:
+#    jq, yum-utils, awk, sed, grep
+umask 0002
+
+test -n "${YUMMIRROR_CONF}" && . "${YUMMIRROR_CONF}"
+test -z "${logfile}" && logfile=./yummirror.$( date "+%FT%H%M%S" ).log
+test -z "${inurl}" && inurl='https://copr-be.cloud.fedoraproject.org/results/bgstack15/stackrpms/epel-7-$basearch/'
+test -z "${workdir}" && workdir=/tmp/copr
+test -z "${thisuser}" && thisuser=${USER}
+# also use include_sources resign_repo DEBUG VERBOSE
+
+exec 3>&1
+show() {
+    printf "%s" "${*}" 1>&3
+}
+
+reset_show() {
+    printf "\r%s" "${*}" 1>&3
+}
+
+## Functions
+get_file() {
+   # call: get_file "${tu}" "${md5sum}" "absolute"
+   ___tu="${1}"
+   ___sum="${2}"
+   ___abs="${3}"
+   if test "${___abs}" = "absolute" ; then
+      tn="$( basename "${___tu}" )"
+   else
+      tn="${___tu##${inurl}}"
+   fi
+   tf="${workdir}/${tn}" ; tf="$( readlink -m "${tf}" )"
+   td="$( dirname "${tf}" )"
+   test -d "${td}" || mkdir -p "${td}"
+   gotten="skipped   "
+   #printf '\n%s\n' "inside get_file ${@}, DRYRUN=${DRYRUN}"
+   if test -z "${DRYRUN}" || test "${DRYRUN}" = "metadata";
+   then
+      if test -z "${___sum}" || test "$( sha256sum "${tf}" 2>/dev/null | awk '{print $1}' )" != "${___sum}" ;
+      then
+         test -n "${VERBOSE}" && show "retrieving ${___tu}" 2>/dev/null || :
+         wget --content-disposition --no-verbose --quiet -O "${tf}" "${___tu}" && gotten=DOWNLOADED
+      fi
+   fi
+   test -n "${VERBOSE}" && reset_show 2>/dev/null || :
+   echo "${gotten} ${___tu} -> ${tf}"
+}
+
+## MAIN
+
+# Interpret any yum vars in the inurl
+if echo "${inurl}" | grep -qE '\$' ;
+then
+   echo "" | jq 1>/dev/null 2>&1 || { echo "Need jq to interpret yum vars in baseurl ${inurl}. Aborted." ; exit 1 ; }
+   raw="$( python -c 'import yum, json; yb = yum.YumBase(); print json.dumps(yb.conf.yumvar, indent=2)' )"
+   # validated on centos 7
+   basearch="$( echo "${raw}" | sed -n '1!p' | jq '.basearch' | tr -d '"' )"
+   releasever="$( echo "${raw}" | sed -n '1!p' | jq '.releasever' | tr -d '"' )"
+   #echo "${inurl}" | awk -v "basearch=${basearch}" -v "releasever=${releasever}" '{gsub("\$basearch",basearch,$0);gsub("\$releasever",releasever,$0);print}'
+   newurl="$( echo "${inurl}" | sed -r -e "s/\\\$basearch/${basearch}/g;" -e "s/\\\$releasever/${releasever}/g;" )"
+   test -n "${DEBUG}" && {
+      echo "Interpreting ${inurl} -> ${newurl}"
+   } 1>&2
+   inurl="${newurl}"
+fi
+
+# sync to workdir
+wget_verbose=--quiet
+test -n "${VERBOSE}" && unset wget_verbose
+{
+   test "${DEBUG:-NONE}" = "FULL" && set -x
+   echo "logfile=${logfile}"
+   mkdir -p "${workdir}" ; cd "${workdir}"
+
+   # This file lists the files that define a yum repo
+   for word in repodata/repomd.xml ;
+   do
+      # coprmirror.sh will pass DRYRUN=metadata if it has anything set in dryrun, and we interpret "metadata" here, because we absolutely need the repomd.xml and associated files no matter what, even for a dry run.
+      DRYRUN="${DRYRUN:+metadata}" get_file "${inurl%%/}/${word}"
+   done
+
+   # loop through the important files listed in that file.
+   # read sha256sum, so we don't have to download these if we already have them.
+   metadata_list="$( grep -iE '<checksum |href' repodata/repomd.xml | awk -F'"' '/checksum/{print $3} /href/{print $2}' | awk "/^>/{gsub(\"^>\",\"\",\$0);gsub(\"<.*$\",\"\",\$0);a=\$0;} /repodata/{print a,\$0}" )"
+   echo "${metadata_list}" | while read sum word ;
+   do
+      DRYRUN="${DRYRUN:+metadata}" get_file "${inurl%%/}/${word}" "${sum}" 3>/dev/null
+   done
+
+   # go ahead and fetch the gpgkey if resign_repo=no
+   if echo "${resign_repo}" | grep -qiE 'yes|\<y\>|1|true' ;
+   then
+      # do something
+      echo "Resigning functionality not built yet. Skipping..."
+   else
+      if test -n "${gpgkey}" ;
+      then
+         # fetch the gpgkey
+         # note: yum variable parsing not yet implemented here. I would need to turn the variable parsing into a function and call it here.
+         # cannot use get_file because it uses a relative path evaluation
+         #wget --output-file="${workdir}/pubkey.gpg" "${gpgkey}"
+         get_file "${gpgkey}" "" "absolute"
+      fi
+   fi
+
+   # COPR always provides a primary.xml.gz file, which lists the assets to download
+   full_list="$( zgrep -iE 'checksum|href' $( grep -oiE 'href=.*primary.xml.gz.*$' repodata/repomd.xml 2>/dev/null | awk -F'"' '{print $2}' ; echo "none.XXXXXXX" ) 2>/dev/null | awk "/checksum/{gsub(\".*\\\">\",\"\",\$0);gsub(\"</.*\",\"\",\$0);a=\$0;} /href/{gsub(\".*=\\\"\",\"\",\$0);gsub(\"\\\"/.*\",\"\",\$0);print a,\$0;}" )"
+   #echo "FULL_LIST=${full_list}"
+
+   # protect against the "metadata only" run
+   test "${DRYRUN}" = "metadata" && DRYRUN=1
+   echo "${full_list}" | while read sum word ;
+   do
+      if echo "${word}" | grep -qiE "\.src\.rpm" ;
+      then
+         # if a srpm, only get it if user has set include_sources.
+         if echo "${include_sources}" | grep -qiE 'yes|\<y\>|1|true' ;
+         then
+            get_file "${inurl}/${word}" "${sum}"
+         fi
+      else
+         # always get all regular rpms and other non-.src.rpm files if any
+         get_file "${inurl}/${word}" "${sum}"
+      fi
+   done
+
+   chown -R "${thisuser}:$( id -G "${thisuser}" | awk '{print $1}' )" "${workdir}"
+} 2>&1 | tee -a "${logfile}"
author	B. Stack <bgstack15@gmail.com>	2021-08-17 08:10:28 -0400
committer	B. Stack <bgstack15@gmail.com>	2021-08-17 08:10:28 -0400
commit	f3dca9210fba77aacf60a77f11e5c3f5e4388011 (patch)
tree	5eef352a493197952cbad551e9385025fbdd54d8
download	coprmirror-f3dca9210fba77aacf60a77f11e5c3f5e4388011.tar.gz coprmirror-f3dca9210fba77aacf60a77f11e5c3f5e4388011.tar.bz2 coprmirror-f3dca9210fba77aacf60a77f11e5c3f5e4388011.zip