Skip to content

Commit ad030b9

Browse files
authored
Changes to support end to end operations of image tag management via CLI (#41)
* Updated requirements.txt. Fixed pylint issues. Updated test gen script * Added 2 new tables. Renamed tables to support ordered deploy * Added support for user creating and auditing in DB, DAL, and deployment. * Removed comment * Change * Adding support to check if db already exists. * Added comment * Fixed issue in db resoruces install file. Updated table schema * Updated deployment to be more idempotent. Updated table schemas * Added a new version of the data access layer. Added simple unit tests. * Added comments * Fixed issue * Changed query formatting in some areas * Removing unused files * Removed __init__.py that was causing pytest issues. Updated yaml. * Added packages * Fixed format issue * Fixing more linting issues * Removing 3.5 from matrix * Changes to enforce Tagging_User as a key in the CLI config * Updated requirements.txt. Fixed pylint issues. Updated test gen script * Added 2 new tables. Renamed tables to support ordered deploy * Added support for user creating and auditing in DB, DAL, and deployment. * Adding support to check if db already exists. * Added comment * Fixed issue in db resoruces install file. Updated table schema * Updated deployment to be more idempotent. Updated table schemas * Fixed issue * Changed query formatting in some areas * Changes to enforce Tagging_User as a key in the CLI config * Revised teh way we do user tracking. * Using logging pkg. Username integration in CLI. Clean up * Using logging pkg. Username integration in CLI. Clean up * Requiring that onboard have an argument in CLI * Added comment * Adding version to package dependency
1 parent 34acdac commit ad030b9

File tree

8 files changed

+113
-88
lines changed

8 files changed

+113
-88
lines changed

cli/cli.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@
3232

3333
if operation == 'download':
3434
download(config, args.num_images)
35-
elif operation == 'onboard':
35+
elif operation == 'onboard' and not args.folder:
36+
print ("--folder arg required for onboard operation")
37+
elif operation == 'onboard' and args.folder:
3638
onboard(config, args.folder)
3739
else:
3840
upload(config)

cli/operations.py

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
import pathlib
77
import json
88
import copy
9-
from azure.storage.blob import BlockBlobService, ContentSettings
9+
from azure.storage.blob.blockblobservice import BlockBlobService
10+
from azure.storage.file import ContentSettings
1011

1112
FUNCTIONS_SECTION = 'FUNCTIONS'
1213
FUNCTIONS_KEY = 'FUNCTIONS_KEY'
@@ -38,7 +39,7 @@ class MissingConfigException(Exception):
3839
class ImageLimitException(Exception):
3940
pass
4041

41-
42+
#TODO: Verify the storage account is correct. Currently we get an unhelpful error message if you have a type in Storage Name
4243
def get_azure_storage_client(config):
4344
# Todo: Move away from global client.
4445
global azure_storage_client
@@ -53,11 +54,12 @@ def get_azure_storage_client(config):
5354

5455
return azure_storage_client
5556

56-
57+
#TODO We should create the container if it does not exist
5758
def onboard(config, folder_name):
5859
blob_storage = get_azure_storage_client(config)
5960
uri = 'https://' + config.get("storage_account") + '.blob.core.windows.net/' + config.get("storage_container") + '/'
6061
functions_url = config.get('url') + '/api/onboarding'
62+
user_name = config.get("tagging_user")
6163
images = []
6264
for image in os.listdir(folder_name):
6365
if image.lower().endswith('.png') or image.lower().endswith('.jpg') or image.lower().endswith('.jpeg') or image.lower().endswith('.gif'):
@@ -73,11 +75,17 @@ def onboard(config, folder_name):
7375
data['imageUrls'] = images
7476
headers = {'content-type': 'application/json'}
7577
query = {
76-
"code": config.get('key')
78+
"code": config.get('key'),
79+
"userName": user_name
7780
}
7881

82+
#TODO: Ensure we don't get 4xx or 5xx return codes
7983
response = requests.post(functions_url, data=json.dumps(data), headers=headers, params=query)
80-
print("Images successfully uploaded. \n" + response.text)
84+
json_resp = response.json()
85+
count = len(json_resp['imageUrls'])
86+
print("Successfully uploaded " + str(count) + " images.")
87+
for url in json_resp['imageUrls']:
88+
print(url)
8189

8290

8391
def _download_bounds(num_images):
@@ -95,9 +103,11 @@ def _download_bounds(num_images):
95103
def download(config, num_images, strategy=None):
96104
# TODO: better/more proper URI handling.
97105
functions_url = config.get("url") + "/api/download"
106+
user_name = config.get("tagging_user")
98107
images_to_download = _download_bounds(num_images)
99108
query = {
100-
"imageCount": images_to_download
109+
"imageCount": images_to_download,
110+
"userName": user_name
101111
}
102112

103113
response = requests.get(functions_url, params=query)
@@ -123,9 +133,12 @@ def download(config, num_images, strategy=None):
123133
exist_ok=True
124134
)
125135

126-
download_images(config, data_dir, json_resp)
127-
print("Downloaded files. Ready to tag!")
128-
return images_to_download
136+
local_images = download_images(config, data_dir, json_resp)
137+
count = len(local_images)
138+
print("Successfully downloaded " + str(count) + " images.")
139+
for image_path in local_images:
140+
print(image_path)
141+
print("Ready to tag!")
129142

130143

131144
def download_images(config, image_dir, json_resp):
@@ -135,25 +148,23 @@ def download_images(config, image_dir, json_resp):
135148
write_vott_data(image_dir, json_resp)
136149

137150
urls = json_resp['imageUrls']
138-
dummy = "https://cdn.pixabay.com/photo/2017/02/20/18/03/cat-2083492_960_720.jpg"
139-
151+
downloaded_file_paths = []
140152
for index in range(len(urls)):
141153
url = urls[index]
142154

143-
# file will look something like
144-
# https://csehackstorage.blob.core.windows.net/image-to-tag/image4.jpeg
145-
# need to massage it to get the last portion.
146-
147155
file_name = url.split('/')[-1]
148156

149-
# todo: change this when we get actual data.
150-
response = requests.get(dummy)
157+
#TODO: We will download an empty file if we get a permission error on the blob store URL
158+
# We should raise an exception. For now the blob store must be publically accessible
159+
response = requests.get(url)
151160
file_path = pathlib.Path(image_dir / file_name)
152161

153162
with open(str(file_path), "wb") as file:
154163
for chunk in response.iter_content(chunk_size=128):
155164
file.write(chunk)
156165
file.close()
166+
downloaded_file_paths.append(file_path)
167+
return downloaded_file_paths
157168

158169

159170
def write_vott_data(image_dir, json_resp):
@@ -197,6 +208,7 @@ def prepend_file_paths(image_dir, vott_json):
197208

198209
def upload(config):
199210
functions_url = config.get("url") + "/api/upload"
211+
user_name = config.get("tagging_user")
200212
tagging_location = pathlib.Path(
201213
os.path.expanduser(config.get("tagging_location"))
202214
)
@@ -210,7 +222,11 @@ def upload(config):
210222
# Munge the vott json file.
211223
munged_json = trim_file_paths(json_data)
212224

213-
response = requests.post(functions_url, json=munged_json)
225+
query = {
226+
"userName": user_name
227+
}
228+
229+
response = requests.post(functions_url, json=munged_json, params=query)
214230
response.raise_for_status()
215231

216232
resp_json = response.json()
@@ -229,6 +245,8 @@ def trim_file_paths(json_data):
229245

230246
munged_visited_frames = []
231247
for frame_path in visited_frames:
248+
#TODO: This line assumes that the visited frames name is a full path.
249+
# Centralize this business logic in the codebase. It probably exists in shared code too
232250
munged_visited_frames.append(
233251
pathlib.Path(frame_path).name
234252
)

functions/pipeline/onboarding/__init__.py

Lines changed: 37 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from ..shared.db_access import ImageTagDataAccess, ImageInfo
88
from ..shared.onboarding import copy_images_to_permanent_storage
99
from azure.storage.blob import BlockBlobService
10+
DEFAULT_RETURN_HEADER= { "content-type": "application/json"}
1011

1112
def main(req: func.HttpRequest) -> func.HttpResponse:
1213
logging.info('Python HTTP trigger function processed a request.')
@@ -16,16 +17,15 @@ def main(req: func.HttpRequest) -> func.HttpResponse:
1617
if not user_name:
1718
return func.HttpResponse(
1819
status_code=401,
19-
headers={ "content-type": "application/json"},
20+
headers=DEFAULT_RETURN_HEADER,
2021
body=json.dumps({"error": "invalid userName given or omitted"})
2122
)
2223

2324
try:
2425
req_body = req.get_json()
25-
logging.error(req.get_json())
26+
logging.debug(req.get_json())
2627
raw_url_list = req_body["imageUrls"]
2728
except ValueError:
28-
logging.error("ERROR: Unable to decode JSON body")
2929
return func.HttpResponse("ERROR: Unable to decode POST body", status_code=400)
3030

3131
if not raw_url_list:
@@ -34,50 +34,49 @@ def main(req: func.HttpRequest) -> func.HttpResponse:
3434
# Check to ensure image URLs sent by client are all unique.
3535
url_list = set(raw_url_list)
3636

37-
# Get list of image objects to pass to DAL for insertion into DB.
3837
try:
3938
image_object_list = build_objects_from_url_list(url_list)
4039
except Exception as e:
4140
logging.error("ERROR: Could not build image object list. Exception: " + str(e))
4241
return func.HttpResponse("ERROR: Could not build image object list.", status_code=401)
4342

44-
# Connect to database.
4543
try:
46-
logging.info("Now connecting to database...")
4744
data_access = ImageTagDataAccess(get_postgres_provider())
48-
logging.info("Connected.")
45+
user_id= data_access.create_user(user_name)
46+
47+
logging.debug("Add new images to the database, and retrieve a dictionary ImageId's mapped to ImageUrl's")
48+
image_id_url_map = data_access.add_new_images(image_object_list,user_id)
49+
50+
copy_source = os.getenv('SOURCE_CONTAINER_NAME')
51+
copy_destination = os.getenv('DESTINATION_CONTAINER_NAME')
52+
53+
# Create blob service for storage account
54+
blob_service = BlockBlobService(account_name=os.getenv('STORAGE_ACCOUNT_NAME'), account_key=os.getenv('STORAGE_ACCOUNT_KEY'))
55+
56+
# Copy images to permanent storage and get a dictionary of images for which to update URLs in DB.
57+
# TODO: Prefer to have this function return a JSON blob as a string containing a list of successes
58+
# and a list of failures. If the list of failures contains any items, return a status code other than 200.
59+
update_urls_dictionary = copy_images_to_permanent_storage(image_id_url_map, copy_source, copy_destination, blob_service)
60+
61+
# If the dictionary of images is empty, this means a faiure occurred in a copy/delete operation.
62+
# Otherwise, dictionary contains permanent image URLs for each image ID that was successfully copied.
63+
if not update_urls_dictionary:
64+
return func.HttpResponse("ERROR: Image copy/delete operation failed. Check state of images in storage.", status_code=401)
65+
else:
66+
logging.debug("Now updating permanent URLs in the DB...")
67+
data_access.update_image_urls(update_urls_dictionary, user_id)
68+
69+
content = json.dumps({"imageUrls":list(update_urls_dictionary.values())})
70+
return func.HttpResponse(
71+
status_code=200,
72+
headers=DEFAULT_RETURN_HEADER,
73+
body=content
74+
)
75+
4976
except Exception as e:
50-
logging.error("ERROR: Database connection failed. Exception: " + str(e))
51-
return func.HttpResponse("ERROR: Unable to connect to database", status_code=503)
52-
53-
# Create/get user id
54-
user_id_number = data_access.create_user(user_id)
55-
logging.info("User id for {0} is {1}".format(user_id, str(user_id_number)))
56-
57-
# Add new images to the database, and retrieve a dictionary ImageId's mapped to ImageUrl's
58-
image_id_url_map = data_access.add_new_images(image_object_list,user_id)
59-
60-
copy_source = os.getenv('SOURCE_CONTAINER_NAME')
61-
copy_destination = os.getenv('DESTINATION_CONTAINER_NAME')
62-
63-
# Create blob service for storage account
64-
blob_service = BlockBlobService(account_name=os.getenv('STORAGE_ACCOUNT_NAME'), account_key=os.getenv('STORAGE_ACCOUNT_KEY'))
65-
66-
# Copy images to permanent storage and get a dictionary of images for which to update URLs in DB.
67-
# TODO: Prefer to have this function return a JSON blob as a string containing a list of successes
68-
# and a list of failures. If the list of failures contains any items, return a status code other than 200.
69-
update_urls_dictionary = copy_images_to_permanent_storage(image_id_url_map, copy_source, copy_destination, blob_service)
70-
71-
# If the dictionary of images is empty, this means a faiure occurred in a copy/delete operation.
72-
# Otherwise, dictionary contains permanent image URLs for each image ID that was successfully copied.
73-
if not update_urls_dictionary:
74-
return func.HttpResponse("ERROR: Image copy/delete operation failed. Check state of images in storage.", status_code=401)
75-
else:
76-
logging.info("Now updating permanent URLs in the DB...")
77-
data_access.update_image_urls(update_urls_dictionary, user_id_number)
78-
logging.info("Done.")
79-
# Return string containing list of URLs to images in permanent blob storage
80-
return func.HttpResponse("Images were successfully added to the database and copied to permanent storage.", status_code=200)
77+
logging.error("Exception: " + str(e))
78+
return func.HttpResponse("Internal error occured", status_code=503)
79+
8180

8281
# Given a list ofnimage URL's, build an ImageInfo object for each, and return a list of these image objects.
8382
def build_objects_from_url_list(url_list):
@@ -91,4 +90,3 @@ def build_objects_from_url_list(url_list):
9190
# Append image object to the list
9291
image_object_list.append(image)
9392
return image_object_list
94-

functions/pipeline/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
azure-functions==1.0.0a5
22
azure-functions-worker==1.0.0a6
33
azure-storage-blob==1.4.0
4+
azure-storage-file==1.4.0
45
grpcio==1.14.2
56
grpcio-tools==1.14.2
67
protobuf==3.6.1

0 commit comments

Comments
 (0)