Created
August 26, 2025 21:23
-
-
Save DonRichards/2fe9abdb375c56fabc3c8275c7b4f6fc to your computer and use it in GitHub Desktop.
Islandora Hash extraction
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # chmod +x fedora_checksums.sh | |
| # ./fedora_checksums.sh | |
| # This script is used to extract Fedora checksums for files referenced in Drupal nodes. | |
| # It must be run from within the Drupal container. | |
| # The script connects to Fedora via http://fcrepo:8080 and uses Drush | |
| # to query the Drupal database for file URIs. | |
| # The script creates a fedora_checksums.json file with node IDs and their | |
| # corresponding Fedora file checksums. | |
| # Help function | |
| show_help() { | |
| echo "Usage: $0 [OPTIONS]" | |
| echo "" | |
| echo "Extract Fedora checksums for files referenced in Drupal nodes." | |
| echo "" | |
| echo "OPTIONS:" | |
| echo " -h, --help Show this help message" | |
| echo "" | |
| echo "IMPORTANT: This script must be run from within the Drupal container." | |
| echo "The script connects to Fedora via http://fcrepo:8080 and uses Drush" | |
| echo "to query the Drupal database for file URIs." | |
| echo "" | |
| echo "Output: Creates fedora_checksums.json with node IDs and their" | |
| echo " corresponding Fedora file checksums." | |
| exit 0 | |
| } | |
| # Parse command line arguments | |
| while [[ $# -gt 0 ]]; do | |
| case $1 in | |
| -h|--help) | |
| show_help | |
| ;; | |
| *) | |
| echo "Unknown option: $1" | |
| echo "Use --help for usage information." | |
| exit 1 | |
| ;; | |
| esac | |
| shift | |
| done | |
| echo "π Checking if running in a container..." | |
| if ! curl -s http://fcrepo:8080 > /dev/null; then | |
| echo "β Could not resolve http://fcrepo:8080. Are you running in a container?" | |
| exit 1 | |
| fi | |
| echo "π Running Fedora checksum extraction script..." | |
| echo "π¦ Using Drupal SQL connection via Drush" | |
| echo "π‘ Running SQL query to fetch node IDs and file URIs..." | |
| SQL_QUERY=" | |
| SELECT n.nid, fm.uri | |
| FROM node_field_data n | |
| JOIN media__field_media_of mo ON mo.field_media_of_target_id = n.nid | |
| JOIN media_field_data mfd ON mfd.mid = mo.entity_id | |
| JOIN media__field_media_file mff ON mfd.mid = mff.entity_id | |
| JOIN file_managed fm ON mff.field_media_file_target_id = fm.fid | |
| WHERE fm.uri LIKE 'fedora://%' | |
| AND fm.uri NOT LIKE '%Extracted Text%' | |
| AND fm.uri NOT LIKE '%FITS File%'; | |
| " | |
| echo "π Executing query..." | |
| if ! NODE_ROWS=$(drush sql:query "$SQL_QUERY"); then | |
| echo "β SQL query failed. Exiting." | |
| exit 1 | |
| else | |
| echo "β SQL query successful." | |
| NODE_COUNT=$(echo "$NODE_ROWS" | wc -l) | |
| echo "π Number of results: $NODE_COUNT" | |
| fi | |
| echo "π Processing rows..." | |
| declare -A RESULTS | |
| JSON_OUTPUT="{" | |
| while IFS=$'\t' read -r NID URI; do | |
| [[ "$NID" =~ ^[0-9]+$ ]] || continue # Skip header row if it exists | |
| if [[ "$URI" == fedora://* ]]; then | |
| FEDORA_PATH="${URI#fedora://}" | |
| FEDORA_URL="http://fcrepo:8080/fcrepo/rest/${FEDORA_PATH}" | |
| echo "π Fetching checksum for $FEDORA_URL" | |
| DIGEST_HEADER=$(curl -sI -H "Want-Digest: sha-256" "$FEDORA_URL" | grep -i ^Digest) | |
| if [[ -z "$DIGEST_HEADER" ]]; then | |
| echo "β οΈ No Digest returned for $FEDORA_URL" | |
| CHECKSUM="MISSING" | |
| else | |
| CHECKSUM=$(echo "$DIGEST_HEADER" | sed -n 's/^Digest: sha-256=\(.*\)/\1/p') | |
| fi | |
| else | |
| echo "β Unknown URI scheme: $URI" | |
| continue | |
| fi | |
| # Append to JSON output | |
| JSON_OUTPUT+="\"$NID\": { | |
| \"fedora_uri\": \"${URI}\", | |
| \"checksum\": \"${CHECKSUM}\" | |
| }," | |
| done <<< "$NODE_ROWS" | |
| # Remove trailing comma and close JSON | |
| JSON_OUTPUT="${JSON_OUTPUT%,}" | |
| JSON_OUTPUT+="}" | |
| echo "$JSON_OUTPUT" > fedora_checksums.json | |
| echo "β Checksums saved to fedora_checksums.json" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment