|
#!/usr/bin/env bash |
|
|
|
set -o pipefail |
|
|
|
# Color definitions |
|
RED='\033[0;31m' |
|
GREEN='\033[0;32m' |
|
YELLOW='\033[1;33m' |
|
NC='\033[0m' # No Color |
|
|
|
trap 'printf "${YELLOW}\nDownload interrupted. You can resume by re-running the command.\n${NC}"; exit 1' INT |
|
|
|
display_help() { |
|
local exit_code="${1:-0}" |
|
|
|
cat << EOF |
|
Usage: |
|
hfd <REPO_ID> [--include include_pattern1 include_pattern2 ...] [--exclude exclude_pattern1 exclude_pattern2 ...] [--hf_username username] [--hf_token token] [--tool aria2c|wget] [-x threads] [-j jobs] [--dataset] [--local-dir path] [--revision rev] [--retries count] [--retry-wait seconds] [--retry-max-wait seconds] [--timeout seconds] [--proxy-list path] |
|
|
|
Description: |
|
Downloads a model or dataset from Hugging Face using the provided repo ID. |
|
|
|
Arguments: |
|
REPO_ID The Hugging Face repo ID (Required) |
|
Format: 'org_name/repo_name' or legacy format (e.g., gpt2) |
|
|
|
Options: |
|
include/exclude_pattern The patterns to match against file path, supports wildcard characters. |
|
e.g., '--exclude *.safetensor *.md', '--include vae/*'. |
|
--include (Optional) Patterns to include files for downloading (supports multiple patterns). |
|
--exclude (Optional) Patterns to exclude files from downloading (supports multiple patterns). |
|
--hf_username (Optional) Hugging Face username for authentication (not email). |
|
--hf_token (Optional) Hugging Face token for authentication. |
|
--tool (Optional) Download tool to use: aria2c (default) or wget. |
|
-x (Optional) Number of download threads for aria2c (default: 4). |
|
-j (Optional) Number of concurrent downloads for aria2c (default: 5). |
|
--dataset (Optional) Flag to indicate downloading a dataset. |
|
--local-dir (Optional) Directory path to store the downloaded data. |
|
Defaults to the current directory with a subdirectory named 'repo_name' |
|
if REPO_ID is composed of 'org_name/repo_name'. |
|
--revision (Optional) Model/Dataset revision to download (default: main). |
|
--retries (Optional) Retry attempts for metadata/download operations. Use 0 to retry forever. |
|
Default: 0. |
|
--retry-wait (Optional) Initial wait in seconds between retries (default: 15). |
|
--retry-max-wait (Optional) Maximum wait in seconds between retries (default: 300). |
|
--timeout (Optional) Network timeout in seconds for curl/aria2c/wget (default: 120). |
|
--proxy-list (Optional) File with proxy URL prefixes, one per line. When using aria2c, |
|
each prefix is added as a fallback URI for every file. Default: proxy_list.txt. |
|
|
|
Examples: |
|
hfd gpt2 |
|
hfd bigscience/bloom-560m --exclude *.safetensors |
|
hfd meta-llama/Llama-2-7b --hf_username myuser --hf_token mytoken -x 4 --retries 0 --retry-wait 20 |
|
hfd lavita/medical-qa-shared-task-v1-toy --dataset |
|
hfd bartowski/Phi-3.5-mini-instruct-exl2 --revision 5_0 |
|
hfd Qwen/Qwen2.5-Coder-32B-Instruct-GGUF --include *q2_k*.gguf --proxy-list proxy_list.txt |
|
EOF |
|
exit "$exit_code" |
|
} |
|
|
|
require_option_arg() { |
|
local option="$1" |
|
local value="${2-}" |
|
|
|
if [[ -z "$value" || "$value" == --* ]]; then |
|
printf "%b[Error] %s requires a value.%b\n" "$RED" "$option" "$NC" >&2 |
|
exit 1 |
|
fi |
|
} |
|
|
|
validate_positive_number() { |
|
local name="$1" |
|
local value="$2" |
|
|
|
if [[ ! "$value" =~ ^[1-9][0-9]*$ ]]; then |
|
printf "%b[Error] %s must be a positive integer.%b\n" "$RED" "$name" "$NC" >&2 |
|
exit 1 |
|
fi |
|
} |
|
|
|
validate_nonnegative_number() { |
|
local name="$1" |
|
local value="$2" |
|
|
|
if [[ ! "$value" =~ ^[0-9]+$ ]]; then |
|
printf "%b[Error] %s must be a non-negative integer.%b\n" "$RED" "$name" "$NC" >&2 |
|
exit 1 |
|
fi |
|
} |
|
|
|
validate_bounded_number() { |
|
local name="$1" |
|
local value="$2" |
|
local max_value="$3" |
|
|
|
validate_positive_number "$name" "$value" |
|
if [[ "$value" -gt "$max_value" ]]; then |
|
printf "%b[Error] %s must be between 1 and %s.%b\n" "$RED" "$name" "$max_value" "$NC" >&2 |
|
exit 1 |
|
fi |
|
} |
|
|
|
check_command() { |
|
if ! command -v "$1" >/dev/null 2>&1; then |
|
printf "%b%s is not installed. Please install it first.%b\n" "$RED" "$1" "$NC" >&2 |
|
exit 1 |
|
fi |
|
} |
|
|
|
next_retry_delay() { |
|
local delay="$1" |
|
|
|
if [[ "$delay" -ge "$RETRY_MAX_WAIT_SECONDS" ]]; then |
|
printf "%s" "$RETRY_MAX_WAIT_SECONDS" |
|
return |
|
fi |
|
|
|
delay=$((delay * 2)) |
|
if [[ "$delay" -gt "$RETRY_MAX_WAIT_SECONDS" ]]; then |
|
delay="$RETRY_MAX_WAIT_SECONDS" |
|
fi |
|
|
|
printf "%s" "$delay" |
|
} |
|
|
|
run_with_retry() { |
|
local description="$1" |
|
shift |
|
|
|
local attempt=1 |
|
local delay="$RETRY_WAIT_SECONDS" |
|
local status=0 |
|
|
|
while true; do |
|
"$@" |
|
status=$? |
|
|
|
if [[ "$status" -eq 0 ]]; then |
|
return 0 |
|
fi |
|
|
|
if [[ "$RETRY_MAX_ATTEMPTS" -gt 0 && "$attempt" -ge "$RETRY_MAX_ATTEMPTS" ]]; then |
|
printf "%b%s failed after %d attempt(s).%b\n" "$RED" "$description" "$attempt" "$NC" >&2 |
|
return "$status" |
|
fi |
|
|
|
printf "%b%s failed with exit code %d (attempt %d). Retrying in %ss...%b\n" \ |
|
"$YELLOW" "$description" "$status" "$attempt" "$delay" "$NC" >&2 |
|
sleep "$delay" |
|
|
|
attempt=$((attempt + 1)) |
|
delay=$(next_retry_delay "$delay") |
|
done |
|
} |
|
|
|
build_pattern_regex() { |
|
if [[ "$#" -eq 0 ]]; then |
|
return 0 |
|
fi |
|
|
|
printf '%s\n' "$@" | sed 's/\./\\./g; s/\*/.*/g' | paste -sd '|' - |
|
} |
|
|
|
file_fingerprint() { |
|
local file_path="$1" |
|
|
|
if [[ ! -f "$file_path" ]]; then |
|
printf "missing" |
|
return |
|
fi |
|
|
|
cksum "$file_path" | awk '{print $1 "-" $2}' |
|
} |
|
|
|
string_fingerprint() { |
|
printf '%s' "$1" | cksum | awk '{print $1 "-" $2}' |
|
} |
|
|
|
generate_command_string() { |
|
local command_string="REPO_ID=$REPO_ID" |
|
local proxy_fingerprint="disabled" |
|
|
|
if [[ "$TOOL" == "aria2c" ]]; then |
|
proxy_fingerprint=$(file_fingerprint "$PROXY_LIST_FILE") |
|
fi |
|
|
|
command_string+=" TOOL=$TOOL" |
|
command_string+=" INCLUDE_PATTERNS=${INCLUDE_PATTERNS[*]}" |
|
command_string+=" EXCLUDE_PATTERNS=${EXCLUDE_PATTERNS[*]}" |
|
command_string+=" DATASET=${DATASET:-0}" |
|
command_string+=" HF_TOKEN=${HF_TOKEN:-}" |
|
command_string+=" HF_ENDPOINT=$HF_ENDPOINT" |
|
command_string+=" REVISION=$REVISION" |
|
command_string+=" PROXY_LIST_FILE=$PROXY_LIST_FILE" |
|
command_string+=" PROXY_LIST_FINGERPRINT=$proxy_fingerprint" |
|
|
|
printf '%s\n' "$command_string" |
|
} |
|
|
|
should_regenerate_filelist() { |
|
local command_file="$LOCAL_DIR/.hfd/last_download_command" |
|
local current_command |
|
local saved_command |
|
|
|
current_command=$(generate_command_string) |
|
|
|
if [[ ! -f "$LOCAL_DIR/$fileslist_file" || ! -f "$command_file" ]]; then |
|
printf '%s\n' "$current_command" > "$command_file" |
|
return 0 |
|
fi |
|
|
|
saved_command=$(cat "$command_file") |
|
if [[ "$current_command" != "$saved_command" ]]; then |
|
printf '%s\n' "$current_command" > "$command_file" |
|
return 0 |
|
fi |
|
|
|
return 1 |
|
} |
|
|
|
load_proxy_prefixes() { |
|
PROXY_PREFIXES=() |
|
|
|
if [[ -f "$PROXY_LIST_FILE" ]]; then |
|
readarray -t PROXY_PREFIXES < <( |
|
grep -v '^[[:space:]]*$' "$PROXY_LIST_FILE" \ |
|
| grep -v '^[[:space:]]*#' \ |
|
| sed 's/^[[:space:]]*//;s/[[:space:]]*$//' |
|
) |
|
elif [[ "$PROXY_LIST_CONFIGURED" -eq 1 ]]; then |
|
printf "%bProxy list %s not found. Continuing without proxy prefixes.%b\n" \ |
|
"$YELLOW" "$PROXY_LIST_FILE" "$NC" |
|
fi |
|
|
|
if [[ "$TOOL" == "wget" && "${#PROXY_PREFIXES[@]}" -gt 0 ]]; then |
|
printf "%bProxy prefixes are only used with aria2c. Ignoring %s for wget.%b\n" \ |
|
"$YELLOW" "$PROXY_LIST_FILE" "$NC" |
|
fi |
|
} |
|
|
|
build_download_url() { |
|
local file="$1" |
|
printf '%s/%s/resolve/%s/%s' "$HF_ENDPOINT" "$DOWNLOAD_API_PATH" "$REVISION" "$file" |
|
} |
|
|
|
write_aria2_entry() { |
|
local file="$1" |
|
local url |
|
local proxy_prefix |
|
local file_dir |
|
|
|
url=$(build_download_url "$file") |
|
printf '%s' "$url" |
|
for proxy_prefix in "${PROXY_PREFIXES[@]}"; do |
|
printf '\t%s' "${proxy_prefix%/}/$url" |
|
done |
|
printf '\n' |
|
|
|
file_dir=$(dirname "$file") |
|
printf ' dir=%s\n' "$file_dir" |
|
printf ' out=%s\n' "$(basename "$file")" |
|
if [[ -n "${HF_TOKEN:-}" ]]; then |
|
printf ' header=Authorization: Bearer %s\n' "$HF_TOKEN" |
|
fi |
|
printf '\n' |
|
} |
|
|
|
write_wget_entry() { |
|
local file="$1" |
|
build_download_url "$file" |
|
printf '\n' |
|
} |
|
|
|
list_matching_files() { |
|
if command -v jq >/dev/null 2>&1; then |
|
printf '%s' "$RESPONSE" | jq -r \ |
|
--arg include_regex "$INCLUDE_REGEX" \ |
|
--arg exclude_regex "$EXCLUDE_REGEX" \ |
|
' |
|
.siblings[] |
|
| .rfilename |
|
| select( |
|
. != null |
|
and ($include_regex == "" or test($include_regex)) |
|
and ($exclude_regex == "" or (test($exclude_regex) | not)) |
|
) |
|
' |
|
else |
|
printf "%b[Warning] jq not installed, using grep/awk for metadata parsing (slower). Consider installing jq for better parsing performance.%b\n" \ |
|
"$YELLOW" "$NC" >&2 |
|
|
|
local files |
|
files=$(printf '%s' "$RESPONSE" \ |
|
| grep -o '"rfilename"[[:space:]]*:[[:space:]]*"[^"]*"' \ |
|
| sed -E 's/^"rfilename"[[:space:]]*:[[:space:]]*"//; s/"$//') |
|
|
|
if [[ -n "$INCLUDE_REGEX" ]]; then |
|
files=$(printf '%s\n' "$files" | grep -E "$INCLUDE_REGEX") |
|
fi |
|
if [[ -n "$EXCLUDE_REGEX" ]]; then |
|
files=$(printf '%s\n' "$files" | grep -vE "$EXCLUDE_REGEX") |
|
fi |
|
|
|
printf '%s\n' "$files" |
|
fi |
|
} |
|
|
|
generate_filelist() { |
|
local output_file="$LOCAL_DIR/$fileslist_file" |
|
local found_match=0 |
|
local file="" |
|
|
|
: > "$output_file" |
|
|
|
while IFS= read -r file; do |
|
[[ -n "$file" ]] || continue |
|
found_match=1 |
|
|
|
if [[ "$TOOL" == "aria2c" ]]; then |
|
write_aria2_entry "$file" |
|
else |
|
write_wget_entry "$file" |
|
fi |
|
done < <(list_matching_files) > "$output_file" |
|
|
|
if [[ "$found_match" -eq 0 ]]; then |
|
rm -f "$output_file" |
|
printf "%b[Error] No files matched the provided include/exclude filters.%b\n" "$RED" "$NC" >&2 |
|
exit 1 |
|
fi |
|
} |
|
|
|
fetch_metadata_once() { |
|
local tmp_metadata_file="$LOCAL_DIR/.hfd/repo_metadata.tmp" |
|
local -a curl_cmd |
|
local curl_status=0 |
|
|
|
rm -f "$tmp_metadata_file" |
|
|
|
curl_cmd=( |
|
curl |
|
-L |
|
-sS |
|
--connect-timeout "$NETWORK_TIMEOUT" |
|
--max-time "$((NETWORK_TIMEOUT * 2))" |
|
-w "%{http_code}" |
|
-o "$tmp_metadata_file" |
|
) |
|
if [[ -n "${HF_TOKEN:-}" ]]; then |
|
curl_cmd+=(-H "Authorization: Bearer $HF_TOKEN") |
|
fi |
|
|
|
LAST_METADATA_STATUS_CODE=$("${curl_cmd[@]}" "$API_URL") |
|
curl_status=$? |
|
LAST_METADATA_RESPONSE="" |
|
[[ -f "$tmp_metadata_file" ]] && LAST_METADATA_RESPONSE=$(cat "$tmp_metadata_file") |
|
|
|
if [[ "$curl_status" -eq 0 && "$LAST_METADATA_STATUS_CODE" == "200" ]]; then |
|
mv "$tmp_metadata_file" "$METADATA_FILE" |
|
RESPONSE="$LAST_METADATA_RESPONSE" |
|
return 0 |
|
fi |
|
|
|
rm -f "$tmp_metadata_file" |
|
|
|
if [[ "$curl_status" -eq 0 && ( "$LAST_METADATA_STATUS_CODE" == "401" || "$LAST_METADATA_STATUS_CODE" == "403" || "$LAST_METADATA_STATUS_CODE" == "404" ) ]]; then |
|
return 22 |
|
fi |
|
|
|
return 1 |
|
} |
|
|
|
fetch_and_save_metadata() { |
|
local attempt=1 |
|
local delay="$RETRY_WAIT_SECONDS" |
|
local status=0 |
|
|
|
while true; do |
|
fetch_metadata_once |
|
status=$? |
|
|
|
if [[ "$status" -eq 0 ]]; then |
|
return 0 |
|
fi |
|
|
|
if [[ "$status" -eq 22 ]]; then |
|
printf "%b[Error] Failed to fetch metadata from %s. HTTP status code: %s.%b\n%s\n" \ |
|
"$RED" "$API_URL" "${LAST_METADATA_STATUS_CODE:-unknown}" "$NC" "$LAST_METADATA_RESPONSE" >&2 |
|
return 1 |
|
fi |
|
|
|
if [[ "$RETRY_MAX_ATTEMPTS" -gt 0 && "$attempt" -ge "$RETRY_MAX_ATTEMPTS" ]]; then |
|
printf "%b[Error] Failed to fetch metadata from %s after %d attempt(s). Last HTTP status code: %s.%b\n%s\n" \ |
|
"$RED" "$API_URL" "$attempt" "${LAST_METADATA_STATUS_CODE:-unknown}" "$NC" "$LAST_METADATA_RESPONSE" >&2 |
|
return 1 |
|
fi |
|
|
|
printf "%bFetching metadata failed (curl/http: %s, attempt %d). Retrying in %ss...%b\n" \ |
|
"$YELLOW" "${LAST_METADATA_STATUS_CODE:-curl-error}" "$attempt" "$delay" "$NC" >&2 |
|
sleep "$delay" |
|
|
|
attempt=$((attempt + 1)) |
|
delay=$(next_retry_delay "$delay") |
|
done |
|
} |
|
|
|
check_authentication() { |
|
local response="$1" |
|
|
|
if command -v jq >/dev/null 2>&1; then |
|
local gated |
|
gated=$(printf '%s' "$response" | jq -r '.gated // false') |
|
if [[ "$gated" != "false" && ( -z "${HF_TOKEN:-}" || -z "${HF_USERNAME:-}" ) ]]; then |
|
printf "%bThe repository requires authentication, but --hf_username and --hf_token were not passed. Please get a token from https://huggingface.co/settings/tokens.\nExiting.\n%b" \ |
|
"$RED" "$NC" >&2 |
|
exit 1 |
|
fi |
|
else |
|
if printf '%s' "$response" | grep -q '"gated":[^f]' && [[ -z "${HF_TOKEN:-}" || -z "${HF_USERNAME:-}" ]]; then |
|
printf "%bThe repository requires authentication, but --hf_username and --hf_token were not passed. Please get a token from https://huggingface.co/settings/tokens.\nExiting.\n%b" \ |
|
"$RED" "$NC" >&2 |
|
exit 1 |
|
fi |
|
fi |
|
} |
|
|
|
perform_download() { |
|
local -a cmd |
|
|
|
if [[ "$TOOL" == "aria2c" ]]; then |
|
cmd=( |
|
aria2c |
|
--console-log-level=error |
|
--file-allocation=none |
|
--allow-overwrite=false |
|
--timeout="$NETWORK_TIMEOUT" |
|
--connect-timeout="$NETWORK_TIMEOUT" |
|
--lowest-speed-limit=1K |
|
--max-tries=1 |
|
-x "$THREADS" |
|
-j "$CONCURRENT" |
|
-s "$THREADS" |
|
-k 1M |
|
-c |
|
-i "$fileslist_file" |
|
--save-session="$fileslist_file" |
|
) |
|
else |
|
cmd=( |
|
wget |
|
-x |
|
-nH |
|
"--cut-dirs=$CUT_DIRS" |
|
--tries=1 |
|
--retry-connrefused |
|
--waitretry=1 |
|
"--timeout=$NETWORK_TIMEOUT" |
|
"--read-timeout=$NETWORK_TIMEOUT" |
|
) |
|
if [[ -n "${HF_TOKEN:-}" ]]; then |
|
cmd+=(--header="Authorization: Bearer $HF_TOKEN") |
|
fi |
|
cmd+=( |
|
--input-file="$fileslist_file" |
|
--continue |
|
) |
|
fi |
|
|
|
"${cmd[@]}" |
|
} |
|
|
|
[[ -z "${1:-}" || "$1" == "-h" || "$1" == "--help" ]] && display_help 0 |
|
|
|
REPO_ID=$1 |
|
shift |
|
|
|
TOOL="aria2c" |
|
THREADS=4 |
|
CONCURRENT=5 |
|
HF_ENDPOINT=${HF_ENDPOINT:-"https://huggingface.co"} |
|
RETRY_MAX_ATTEMPTS=${RETRY_MAX_ATTEMPTS:-0} |
|
RETRY_WAIT_SECONDS=${RETRY_WAIT_SECONDS:-15} |
|
RETRY_MAX_WAIT_SECONDS=${RETRY_MAX_WAIT_SECONDS:-300} |
|
NETWORK_TIMEOUT=${NETWORK_TIMEOUT:-120} |
|
PROXY_LIST_CONFIGURED=0 |
|
[[ -n "${PROXY_LIST_FILE+x}" ]] && PROXY_LIST_CONFIGURED=1 |
|
PROXY_LIST_FILE=${PROXY_LIST_FILE:-"proxy_list.txt"} |
|
INCLUDE_PATTERNS=() |
|
EXCLUDE_PATTERNS=() |
|
PROXY_PREFIXES=() |
|
REVISION="main" |
|
|
|
while [[ $# -gt 0 ]]; do |
|
case $1 in |
|
--include) |
|
shift |
|
while [[ $# -gt 0 && ! "$1" =~ ^-- && ! "$1" =~ ^-[^-] ]]; do |
|
INCLUDE_PATTERNS+=("$1") |
|
shift |
|
done |
|
;; |
|
--exclude) |
|
shift |
|
while [[ $# -gt 0 && ! "$1" =~ ^-- && ! "$1" =~ ^-[^-] ]]; do |
|
EXCLUDE_PATTERNS+=("$1") |
|
shift |
|
done |
|
;; |
|
--hf_username) |
|
require_option_arg "$1" "${2-}" |
|
HF_USERNAME="$2" |
|
shift 2 |
|
;; |
|
--hf_token) |
|
require_option_arg "$1" "${2-}" |
|
HF_TOKEN="$2" |
|
shift 2 |
|
;; |
|
--tool) |
|
require_option_arg "$1" "${2-}" |
|
case $2 in |
|
aria2c|wget) |
|
TOOL="$2" |
|
;; |
|
*) |
|
printf "%b[Error] Invalid tool. Use 'aria2c' or 'wget'.%b\n" "$RED" "$NC" >&2 |
|
exit 1 |
|
;; |
|
esac |
|
shift 2 |
|
;; |
|
-x) |
|
require_option_arg "$1" "${2-}" |
|
validate_bounded_number "threads (-x)" "$2" 10 |
|
THREADS="$2" |
|
shift 2 |
|
;; |
|
-j) |
|
require_option_arg "$1" "${2-}" |
|
validate_bounded_number "concurrent downloads (-j)" "$2" 10 |
|
CONCURRENT="$2" |
|
shift 2 |
|
;; |
|
--dataset) |
|
DATASET=1 |
|
shift |
|
;; |
|
--local-dir) |
|
require_option_arg "$1" "${2-}" |
|
LOCAL_DIR="$2" |
|
shift 2 |
|
;; |
|
--revision) |
|
require_option_arg "$1" "${2-}" |
|
REVISION="$2" |
|
shift 2 |
|
;; |
|
--retries) |
|
require_option_arg "$1" "${2-}" |
|
validate_nonnegative_number "retries (--retries)" "$2" |
|
RETRY_MAX_ATTEMPTS="$2" |
|
shift 2 |
|
;; |
|
--retry-wait) |
|
require_option_arg "$1" "${2-}" |
|
validate_nonnegative_number "retry wait (--retry-wait)" "$2" |
|
RETRY_WAIT_SECONDS="$2" |
|
shift 2 |
|
;; |
|
--retry-max-wait) |
|
require_option_arg "$1" "${2-}" |
|
validate_nonnegative_number "retry max wait (--retry-max-wait)" "$2" |
|
RETRY_MAX_WAIT_SECONDS="$2" |
|
shift 2 |
|
;; |
|
--timeout) |
|
require_option_arg "$1" "${2-}" |
|
validate_positive_number "timeout (--timeout)" "$2" |
|
NETWORK_TIMEOUT="$2" |
|
shift 2 |
|
;; |
|
--proxy-list) |
|
require_option_arg "$1" "${2-}" |
|
PROXY_LIST_CONFIGURED=1 |
|
PROXY_LIST_FILE="$2" |
|
shift 2 |
|
;; |
|
-h|--help) |
|
display_help 0 |
|
;; |
|
*) |
|
display_help 1 |
|
;; |
|
esac |
|
done |
|
|
|
if [[ "$RETRY_MAX_WAIT_SECONDS" -lt "$RETRY_WAIT_SECONDS" ]]; then |
|
RETRY_MAX_WAIT_SECONDS="$RETRY_WAIT_SECONDS" |
|
fi |
|
|
|
check_command curl |
|
check_command "$TOOL" |
|
load_proxy_prefixes |
|
|
|
LOCAL_DIR="${LOCAL_DIR:-${REPO_ID#*/}}" |
|
mkdir -p "$LOCAL_DIR/.hfd" |
|
|
|
if [[ "${DATASET:-0}" == 1 ]]; then |
|
METADATA_API_PATH="datasets/$REPO_ID" |
|
DOWNLOAD_API_PATH="datasets/$REPO_ID" |
|
CUT_DIRS=5 |
|
else |
|
METADATA_API_PATH="models/$REPO_ID" |
|
DOWNLOAD_API_PATH="$REPO_ID" |
|
CUT_DIRS=4 |
|
fi |
|
|
|
if [[ "$REVISION" != "main" ]]; then |
|
METADATA_API_PATH="$METADATA_API_PATH/revision/$REVISION" |
|
fi |
|
API_URL="$HF_ENDPOINT/api/$METADATA_API_PATH" |
|
|
|
METADATA_CACHE_KEY=$(string_fingerprint "${HF_ENDPOINT}|${DATASET:-0}|${REPO_ID}|${REVISION}") |
|
METADATA_FILE="$LOCAL_DIR/.hfd/repo_metadata_${METADATA_CACHE_KEY}.json" |
|
fileslist_file=".hfd/${TOOL}_urls.txt" |
|
INCLUDE_REGEX=$(build_pattern_regex "${INCLUDE_PATTERNS[@]}") |
|
EXCLUDE_REGEX=$(build_pattern_regex "${EXCLUDE_PATTERNS[@]}") |
|
|
|
if [[ ! -f "$METADATA_FILE" ]]; then |
|
printf "%bFetching repo metadata...%b\n" "$YELLOW" "$NC" |
|
fetch_and_save_metadata || exit 1 |
|
check_authentication "$RESPONSE" |
|
else |
|
printf "%bUsing cached metadata: %s%b\n" "$GREEN" "$METADATA_FILE" "$NC" |
|
RESPONSE=$(cat "$METADATA_FILE") |
|
check_authentication "$RESPONSE" |
|
fi |
|
|
|
if should_regenerate_filelist; then |
|
printf "%bGenerating file list...%b\n" "$YELLOW" "$NC" |
|
generate_filelist |
|
else |
|
printf "%bResume from file list: %s%b\n" "$GREEN" "$LOCAL_DIR/$fileslist_file" "$NC" |
|
fi |
|
|
|
printf "%bStarting download with %s to %s...%b\n" "$YELLOW" "$TOOL" "$LOCAL_DIR" "$NC" |
|
|
|
cd "$LOCAL_DIR" || exit 1 |
|
if run_with_retry "download with $TOOL" perform_download; then |
|
printf "%bDownload completed successfully. Repo directory: %s%b\n" "$GREEN" "$PWD" "$NC" |
|
else |
|
printf "%bDownload encountered errors.%b\n" "$RED" "$NC" >&2 |
|
exit 1 |
|
fi |