Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
*.pyc
.idea
venv
venv
attachments/*
*.zip
.DS_Store
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ LABEL maintainer="Specify Collections Consortium <github.com/specify>"
RUN apt-get update && apt-get -y install --no-install-recommends \
ghostscript \
imagemagick \
python3.6 \
python3.12 \
python3-venv \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

Expand All @@ -19,7 +19,7 @@ WORKDIR /home/specify

COPY --chown=specify:specify requirements.txt .

RUN python3.6 -m venv ve && ve/bin/pip install --no-cache-dir -r requirements.txt
RUN python3.12 -m venv ve && ve/bin/pip install --no-cache-dir -r requirements.txt

COPY --chown=specify:specify *.py views ./

Expand Down
225 changes: 171 additions & 54 deletions manage_collection_dirs.py
Original file line number Diff line number Diff line change
@@ -1,63 +1,180 @@
# This can be given either a single name or a list of names
#
# ```bash
# python manage_collection_dirs.py add geo_swiss
# ```
# or
# ```bash
# python3 manage_collection_dirs.py remove geo_swiss naag mcsn
# ```
#
# It creates new collection attachment directories. When a
# collection is removed, the directory and attachments remain.

#!/usr/bin/env python3
import sys
import os
import subprocess
import re
import boto3
from urllib.parse import urlparse

def add_collection_dir(collection_dir_names):
# This creates a new directory for the collection
attachments_dir = 'attachments'
if not os.path.exists(attachments_dir):
os.mkdir(attachments_dir)
for collection_dir_name in collection_dir_names:
dir_path = f'{attachments_dir}/{collection_dir_name}'
if not os.path.exists(dir_path):
os.mkdir(dir_path)
with open("settings.py", "r+") as f:
lines = f.readlines()
for i, line in enumerate(lines):
if line.startswith("COLLECTION_DIRS = {"):
for collection_dir_name in collection_dir_names:
lines.insert(i+1, f" '{collection_dir_name}': '{collection_dir_name}',\n")
break
f.seek(0)
f.truncate()
f.writelines(lines)
SETTINGS_FILE = "settings.py"
SERVICE_NAME = "web-asset-server.service" # adjust if different

def remove_collection_dir(collection_dir_names):
with open("settings.py", "r+") as f:
lines = f.readlines()
for i, line in enumerate(lines):
for collection_dir_name in collection_dir_names:
if line.startswith(f" '{collection_dir_name}': '{collection_dir_name}',"):
lines.pop(i)
break
f.seek(0)
f.truncate()

def load_settings_contents():
with open(SETTINGS_FILE, "r") as f:
return f.readlines()


def write_settings_contents(lines):
with open(SETTINGS_FILE, "w") as f:
f.writelines(lines)

if __name__ == "__main__":

def parse_action_args():
if len(sys.argv) < 3:
print("Usage: python manage_collection_dirs.py add <collection_dir_name> [<collection_dir_name> ...]")
print("Usage: python manage_collection_dirs.py remove <collection_dir_name> [<collection_dir_name> ...]")
print("Usage:")
print(" python manage_collection_dirs.py add <collection> <s3://bucket/path> [<collection> <s3://bucket/path> ...]")
print(" python manage_collection_dirs.py remove <collection> [<collection> ...]")
sys.exit(1)
action = sys.argv[1]
args = sys.argv[2:]
if action == "add":
if len(args) % 2 != 0:
print("For add, provide pairs: <collection> <s3://...> ...")
sys.exit(1)
pairs = [(args[i], args[i+1]) for i in range(0, len(args), 2)]
return action, pairs
elif action == "remove":
names = args
return action, names
else:
action = sys.argv[1]
collection_dir_names = sys.argv[2:]
if action == "add":
add_collection_dir(collection_dir_names)
elif action == "remove":
remove_collection_dir(collection_dir_names)
print("Invalid action. Use 'add' or 'remove'.")
sys.exit(1)


def ensure_valid_s3_uri(uri):
parsed = urlparse(uri)
return parsed.scheme == "s3" and parsed.netloc


def add_collections(pairs):
lines = load_settings_contents()
# find COLLECTION_S3_PATHS block
pattern = re.compile(r"^COLLECTION_S3_PATHS\s*=\s*{")
start_idx = None
for i, line in enumerate(lines):
if pattern.match(line):
start_idx = i
break
if start_idx is None:
print("Couldn't find COLLECTION_S3_PATHS definition in settings.py")
sys.exit(1)

# find end of dict (matching closing brace)
end_idx = start_idx
brace_depth = 0
for i in range(start_idx, len(lines)):
if "{" in lines[i]:
brace_depth += lines[i].count("{")
if "}" in lines[i]:
brace_depth -= lines[i].count("}")
if brace_depth == 0:
end_idx = i
break
# build existing entries map to avoid duplicates
existing = {}
for line in lines[start_idx+1:end_idx]:
m = re.match(r"\s*['\"]([^'\"]+)['\"]\s*:\s*['\"]([^'\"]+)['\"],?", line)
if m:
existing[m.group(1)] = m.group(2)

# insert or update entries
insertion = []
for coll, uri in pairs:
if not ensure_valid_s3_uri(uri):
print(f"Skipping invalid S3 URI for '{coll}': {uri}")
continue
if coll in existing:
print(f"Updating existing collection '{coll}' to '{uri}'")
# replace line in place later
for i in range(start_idx+1, end_idx):
if re.match(rf"\s*['\"]{re.escape(coll)}['\"]\s*:", lines[i]):
lines[i] = f" '{coll}': '{uri}',\n"
break
else:
print("Invalid action. Use 'add' or 'remove'.")
subprocess.run(['systemctl', 'restart', 'web-asset-server.service'])
print(f"Adding collection '{coll}' -> '{uri}'")
insertion.append(f" '{coll}': '{uri}',\n")

# inject new entries just before end_idx
if insertion:
lines = lines[:end_idx] + insertion + lines[end_idx:]

write_settings_contents(lines)

# create placeholder directories in S3 under originals/ and thumbnails/
import settings as user_settings # reload after edit
s3 = boto3.client("s3")
for coll, uri in pairs:
if not ensure_valid_s3_uri(uri):
continue
bucket, base_prefix = parse_s3_uri(uri)
for sub in (user_settings.ORIG_DIR, user_settings.THUMB_DIR):
key_prefix = f"{base_prefix}/{sub}/"
# create a zero-byte object to ensure the prefix is visible (not strictly needed)
s3.put_object(Bucket=bucket, Key=key_prefix)


def remove_collections(names):
lines = load_settings_contents()
pattern = re.compile(r"^COLLECTION_S3_PATHS\s*=\s*{")
start_idx = None
for i, line in enumerate(lines):
if pattern.match(line):
start_idx = i
break
if start_idx is None:
print("Couldn't find COLLECTION_S3_PATHS in settings.py")
sys.exit(1)

# locate end of dict
end_idx = start_idx
brace_depth = 0
for i in range(start_idx, len(lines)):
if "{" in lines[i]:
brace_depth += lines[i].count("{")
if "}" in lines[i]:
brace_depth -= lines[i].count("}")
if brace_depth == 0:
end_idx = i
break

# filter out lines for the named collections
new_block = []
removed = []
for line in lines[start_idx+1:end_idx]:
skip = False
for name in names:
if re.match(rf"\s*['\"]{re.escape(name)}['\"]\s*:", line):
skip = True
removed.append(name)
break
if not skip:
new_block.append(line)

if not removed:
print("No matching collections to remove found.")
return

# reconstruct file
new_lines = lines[: start_idx+1] + new_block + lines[end_idx:]
write_settings_contents(new_lines)
print(f"Removed collections: {', '.join(removed)}")


def parse_s3_uri(s3_uri):
parsed = urlparse(s3_uri)
if parsed.scheme != 's3' or not parsed.netloc:
raise ValueError(f"Invalid S3 URI: {s3_uri}")
bucket = parsed.netloc
prefix = parsed.path.lstrip('/').rstrip('/')
return bucket, prefix


if __name__ == "__main__":
action, payload = parse_action_args()
if action == "add":
add_collections(payload)
else: # remove
remove_collections(payload)

# restart service
subprocess.run(["systemctl", "restart", SERVICE_NAME])
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
ExifRead==2.3.1
Paste==3.4.4
sh==1.14.0
sh==2.0
Bottle>=0.12.23,<0.13
boto3>=1.26.0,<2.0
boto3-stubs>=1.26.0,<2.0
Loading