From 0e26f6f3e288e0e251ca070866bc239cc69d8799 Mon Sep 17 00:00:00 2001 From: Simon Wessel <9195792+simon-wessel@users.noreply.github.com> Date: Tue, 23 Jan 2024 16:41:20 +0100 Subject: [PATCH] chore: improve performance of python script for hash download (#10335) The old version of the script downloaded all binaries and generated file checksums locally. This was a slow process since all binaries of all architectures needed to be downloaded. The new version simply downloads the .sha256 files containing the binary checksum in text form which saves a lot of traffic and time. --- scripts/download_hash.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/scripts/download_hash.py b/scripts/download_hash.py index 86365e333..a136e2065 100644 --- a/scripts/download_hash.py +++ b/scripts/download_hash.py @@ -4,22 +4,21 @@ # run this script to update roles/kubespray-defaults/defaults/main/download.yml # with new hashes. -import hashlib import sys import requests from ruamel.yaml import YAML -MAIN_YML = "../roles/kubespray-defaults/defaults/main/download.yml" +CHECKSUMS_YML = "../roles/kubespray-defaults/defaults/main/checksums.yml" -def open_main_yaml(): +def open_checksums_yaml(): yaml = YAML() yaml.explicit_start = True yaml.preserve_quotes = True yaml.width = 4096 - with open(MAIN_YML, "r") as main_yml: - data = yaml.load(main_yml) + with open(CHECKSUMS_YML, "r") as checksums_yml: + data = yaml.load(checksums_yml) return data, yaml @@ -28,7 +27,7 @@ def download_hash(versions): architectures = ["arm", "arm64", "amd64", "ppc64le"] downloads = ["kubelet", "kubectl", "kubeadm"] - data, yaml = open_main_yaml() + data, yaml = open_checksums_yaml() for download in downloads: checksum_name = f"{download}_checksums" @@ -36,15 +35,25 @@ def download_hash(versions): for version in versions: if not version.startswith("v"): version = f"v{version}" - url = f"https://dl.k8s.io/release/{version}/bin/linux/{arch}/{download}" - download_file = requests.get(url, allow_redirects=True) - download_file.raise_for_status() - sha256sum = hashlib.sha256(download_file.content).hexdigest() + url = f"https://dl.k8s.io/release/{version}/bin/linux/{arch}/{download}.sha256" + hash_file = requests.get(url, allow_redirects=True) + if hash_file.status_code == 404: + print(f"Unable to find hash file for release {version} (arch: {arch})") + continue + if hash_file.status_code != 200: + raise Exception(f"Received a non-200 HTTP response code: {hash_file.status_code} (arch: {arch}, version: {version})") + sha256sum = hash_file.content.decode().strip() + if len(sha256sum) != 64: + raise Exception(f"Checksum has an unexpected length: {len(sha256sum)} (arch: {arch}, version: {version})") + if checksum_name not in data: + data[checksum_name] = {} + if arch not in data[checksum_name]: + data[checksum_name][arch] = {} data[checksum_name][arch][version] = sha256sum - with open(MAIN_YML, "w") as main_yml: - yaml.dump(data, main_yml) - print(f"\n\nUpdated {MAIN_YML}\n") + with open(CHECKSUMS_YML, "w") as checksums_yml: + yaml.dump(data, checksums_yml) + print(f"\n\nUpdated {CHECKSUMS_YML}\n") def usage():