Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
1894f43
Updated requirements.txt. Fixed pylint issues. Updated test gen script
Nov 2, 2018
b85f23b
Added 2 new tables. Renamed tables to support ordered deploy
Nov 3, 2018
17a2247
Added support for user creating and auditing in DB, DAL, and deployment.
Nov 4, 2018
ec75f81
Removed comment
Nov 4, 2018
35740c9
Change
andrebriggs Nov 4, 2018
e09739a
merging from upstream
andrebriggs Nov 5, 2018
dd0723e
Adding support to check if db already exists.
andrebriggs Nov 5, 2018
f8ead27
Added comment
andrebriggs Nov 6, 2018
3e339ce
Fixed issue in db resoruces install file. Updated table schema
andrebriggs Nov 7, 2018
4dcec72
Updated deployment to be more idempotent. Updated table schemas
andrebriggs Nov 8, 2018
475abbe
Added a new version of the data access layer. Added simple unit tests.
andrebriggs Nov 8, 2018
df69e48
Merging
andrebriggs Nov 8, 2018
1df6d66
Added comments
andrebriggs Nov 8, 2018
c3949cb
Fixed issue
andrebriggs Nov 8, 2018
b9b076a
Changed query formatting in some areas
andrebriggs Nov 8, 2018
c55ccec
Removing unused files
andrebriggs Nov 8, 2018
75b6e80
Removed __init__.py that was causing pytest issues. Updated yaml.
andrebriggs Nov 8, 2018
11468fb
Merge remote-tracking branch 'upstream/master'
andrebriggs Nov 8, 2018
d0c7483
Added packages
andrebriggs Nov 8, 2018
2d4b857
Fixed format issue
andrebriggs Nov 8, 2018
bc270b2
Fixing more linting issues
andrebriggs Nov 8, 2018
6cca44a
Removing 3.5 from matrix
andrebriggs Nov 9, 2018
7f794fb
Changes to enforce Tagging_User as a key in the CLI config
andrebriggs Nov 9, 2018
ab9ed70
Updated requirements.txt. Fixed pylint issues. Updated test gen script
Nov 2, 2018
b96b872
Added 2 new tables. Renamed tables to support ordered deploy
Nov 3, 2018
55e1d1f
Added support for user creating and auditing in DB, DAL, and deployment.
Nov 4, 2018
f156e55
Adding support to check if db already exists.
andrebriggs Nov 5, 2018
71a52ef
Added comment
andrebriggs Nov 6, 2018
c099e0c
Fixed issue in db resoruces install file. Updated table schema
andrebriggs Nov 7, 2018
678eabc
Updated deployment to be more idempotent. Updated table schemas
andrebriggs Nov 8, 2018
e588862
Fixed issue
andrebriggs Nov 8, 2018
617732c
Changed query formatting in some areas
andrebriggs Nov 8, 2018
6dfaa30
Changes to enforce Tagging_User as a key in the CLI config
andrebriggs Nov 9, 2018
ed65c3f
Merge branch 'master' of https://github.com/andrebriggs/active-learni…
andrebriggs Nov 9, 2018
eb843d7
Merge remote-tracking branch 'upstream/master'
andrebriggs Nov 9, 2018
ff40fcb
Merge remote-tracking branch 'upstream/master'
andrebriggs Nov 12, 2018
333de6c
Merge remote-tracking branch 'upstream/master'
andrebriggs Nov 13, 2018
2e4dcc5
Merge remote-tracking branch 'upstream/master'
andrebriggs Nov 14, 2018
4a5409d
Merge remote-tracking branch 'upstream/master'
andrebriggs Nov 15, 2018
593f69e
Revised teh way we do user tracking.
andrebriggs Nov 15, 2018
8f5290a
Using logging pkg. Username integration in CLI. Clean up
andrebriggs Nov 15, 2018
97a1bbb
Merge remote-tracking branch 'upstream/master'
andrebriggs Nov 15, 2018
1c7bff0
Using logging pkg. Username integration in CLI. Clean up
andrebriggs Nov 15, 2018
b6d0122
MERGING
andrebriggs Nov 15, 2018
6cdaad7
Requiring that onboard have an argument in CLI
andrebriggs Nov 15, 2018
12ca09b
Added comment
andrebriggs Nov 16, 2018
aef628a
Adding version to package dependency
andrebriggs Nov 16, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@

if operation == 'download':
download(config, args.num_images)
elif operation == 'onboard':
elif operation == 'onboard' and not args.folder:
print ("--folder arg required for onboard operation")
elif operation == 'onboard' and args.folder:
onboard(config, args.folder)
else:
upload(config)
54 changes: 36 additions & 18 deletions cli/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
import pathlib
import json
import copy
from azure.storage.blob import BlockBlobService, ContentSettings
from azure.storage.blob.blockblobservice import BlockBlobService
from azure.storage.file import ContentSettings

FUNCTIONS_SECTION = 'FUNCTIONS'
FUNCTIONS_KEY = 'FUNCTIONS_KEY'
Expand Down Expand Up @@ -38,7 +39,7 @@ class MissingConfigException(Exception):
class ImageLimitException(Exception):
pass


#TODO: Verify the storage account is correct. Currently we get an unhelpful error message if you have a type in Storage Name
def get_azure_storage_client(config):
# Todo: Move away from global client.
global azure_storage_client
Expand All @@ -53,11 +54,12 @@ def get_azure_storage_client(config):

return azure_storage_client


#TODO We should create the container if it does not exist
def onboard(config, folder_name):
blob_storage = get_azure_storage_client(config)
uri = 'https://' + config.get("storage_account") + '.blob.core.windows.net/' + config.get("storage_container") + '/'
functions_url = config.get('url') + '/api/onboarding'
user_name = config.get("tagging_user")
images = []
for image in os.listdir(folder_name):
if image.lower().endswith('.png') or image.lower().endswith('.jpg') or image.lower().endswith('.jpeg') or image.lower().endswith('.gif'):
Expand All @@ -73,11 +75,17 @@ def onboard(config, folder_name):
data['imageUrls'] = images
headers = {'content-type': 'application/json'}
query = {
"code": config.get('key')
"code": config.get('key'),
"userName": user_name
}

#TODO: Ensure we don't get 4xx or 5xx return codes
response = requests.post(functions_url, data=json.dumps(data), headers=headers, params=query)
print("Images successfully uploaded. \n" + response.text)
json_resp = response.json()
count = len(json_resp['imageUrls'])
print("Successfully uploaded " + str(count) + " images.")
for url in json_resp['imageUrls']:
print(url)


def _download_bounds(num_images):
Expand All @@ -95,9 +103,11 @@ def _download_bounds(num_images):
def download(config, num_images, strategy=None):
# TODO: better/more proper URI handling.
functions_url = config.get("url") + "/api/download"
user_name = config.get("tagging_user")
images_to_download = _download_bounds(num_images)
query = {
"imageCount": images_to_download
"imageCount": images_to_download,
"userName": user_name
}

response = requests.get(functions_url, params=query)
Expand All @@ -123,9 +133,12 @@ def download(config, num_images, strategy=None):
exist_ok=True
)

download_images(config, data_dir, json_resp)
print("Downloaded files. Ready to tag!")
return images_to_download
local_images = download_images(config, data_dir, json_resp)
count = len(local_images)
print("Successfully downloaded " + str(count) + " images.")
for image_path in local_images:
print(image_path)
print("Ready to tag!")


def download_images(config, image_dir, json_resp):
Expand All @@ -135,25 +148,23 @@ def download_images(config, image_dir, json_resp):
write_vott_data(image_dir, json_resp)

urls = json_resp['imageUrls']
dummy = "https://cdn.pixabay.com/photo/2017/02/20/18/03/cat-2083492_960_720.jpg"

downloaded_file_paths = []
for index in range(len(urls)):
url = urls[index]

# file will look something like
# https://csehackstorage.blob.core.windows.net/image-to-tag/image4.jpeg
# need to massage it to get the last portion.

file_name = url.split('/')[-1]

# todo: change this when we get actual data.
response = requests.get(dummy)
#TODO: We will download an empty file if we get a permission error on the blob store URL
# We should raise an exception. For now the blob store must be publically accessible
response = requests.get(url)
file_path = pathlib.Path(image_dir / file_name)

with open(str(file_path), "wb") as file:
for chunk in response.iter_content(chunk_size=128):
file.write(chunk)
file.close()
downloaded_file_paths.append(file_path)
return downloaded_file_paths


def write_vott_data(image_dir, json_resp):
Expand Down Expand Up @@ -197,6 +208,7 @@ def prepend_file_paths(image_dir, vott_json):

def upload(config):
functions_url = config.get("url") + "/api/upload"
user_name = config.get("tagging_user")
tagging_location = pathlib.Path(
os.path.expanduser(config.get("tagging_location"))
)
Expand All @@ -210,7 +222,11 @@ def upload(config):
# Munge the vott json file.
munged_json = trim_file_paths(json_data)

response = requests.post(functions_url, json=munged_json)
query = {
"userName": user_name
}

response = requests.post(functions_url, json=munged_json, params=query)
response.raise_for_status()

resp_json = response.json()
Expand All @@ -229,6 +245,8 @@ def trim_file_paths(json_data):

munged_visited_frames = []
for frame_path in visited_frames:
#TODO: This line assumes that the visited frames name is a full path.
# Centralize this business logic in the codebase. It probably exists in shared code too
munged_visited_frames.append(
pathlib.Path(frame_path).name
)
Expand Down
76 changes: 37 additions & 39 deletions functions/pipeline/onboarding/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from ..shared.db_access import ImageTagDataAccess, ImageInfo
from ..shared.onboarding import copy_images_to_permanent_storage
from azure.storage.blob import BlockBlobService
DEFAULT_RETURN_HEADER= { "content-type": "application/json"}

def main(req: func.HttpRequest) -> func.HttpResponse:
logging.info('Python HTTP trigger function processed a request.')
Expand All @@ -16,16 +17,15 @@ def main(req: func.HttpRequest) -> func.HttpResponse:
if not user_name:
return func.HttpResponse(
status_code=401,
headers={ "content-type": "application/json"},
headers=DEFAULT_RETURN_HEADER,
body=json.dumps({"error": "invalid userName given or omitted"})
)

try:
req_body = req.get_json()
logging.error(req.get_json())
logging.debug(req.get_json())
raw_url_list = req_body["imageUrls"]
except ValueError:
logging.error("ERROR: Unable to decode JSON body")
return func.HttpResponse("ERROR: Unable to decode POST body", status_code=400)

if not raw_url_list:
Expand All @@ -34,50 +34,49 @@ def main(req: func.HttpRequest) -> func.HttpResponse:
# Check to ensure image URLs sent by client are all unique.
url_list = set(raw_url_list)

# Get list of image objects to pass to DAL for insertion into DB.
try:
image_object_list = build_objects_from_url_list(url_list)
except Exception as e:
logging.error("ERROR: Could not build image object list. Exception: " + str(e))
return func.HttpResponse("ERROR: Could not build image object list.", status_code=401)

# Connect to database.
try:
logging.info("Now connecting to database...")
data_access = ImageTagDataAccess(get_postgres_provider())
logging.info("Connected.")
user_id= data_access.create_user(user_name)

logging.debug("Add new images to the database, and retrieve a dictionary ImageId's mapped to ImageUrl's")
image_id_url_map = data_access.add_new_images(image_object_list,user_id)

copy_source = os.getenv('SOURCE_CONTAINER_NAME')
copy_destination = os.getenv('DESTINATION_CONTAINER_NAME')

# Create blob service for storage account
blob_service = BlockBlobService(account_name=os.getenv('STORAGE_ACCOUNT_NAME'), account_key=os.getenv('STORAGE_ACCOUNT_KEY'))

# Copy images to permanent storage and get a dictionary of images for which to update URLs in DB.
# TODO: Prefer to have this function return a JSON blob as a string containing a list of successes
# and a list of failures. If the list of failures contains any items, return a status code other than 200.
update_urls_dictionary = copy_images_to_permanent_storage(image_id_url_map, copy_source, copy_destination, blob_service)

# If the dictionary of images is empty, this means a faiure occurred in a copy/delete operation.
# Otherwise, dictionary contains permanent image URLs for each image ID that was successfully copied.
if not update_urls_dictionary:
return func.HttpResponse("ERROR: Image copy/delete operation failed. Check state of images in storage.", status_code=401)
else:
logging.debug("Now updating permanent URLs in the DB...")
data_access.update_image_urls(update_urls_dictionary, user_id)

content = json.dumps({"imageUrls":list(update_urls_dictionary.values())})
return func.HttpResponse(
status_code=200,
headers=DEFAULT_RETURN_HEADER,
body=content
)

except Exception as e:
logging.error("ERROR: Database connection failed. Exception: " + str(e))
return func.HttpResponse("ERROR: Unable to connect to database", status_code=503)

# Create/get user id
user_id_number = data_access.create_user(user_id)
logging.info("User id for {0} is {1}".format(user_id, str(user_id_number)))

# Add new images to the database, and retrieve a dictionary ImageId's mapped to ImageUrl's
image_id_url_map = data_access.add_new_images(image_object_list,user_id)

copy_source = os.getenv('SOURCE_CONTAINER_NAME')
copy_destination = os.getenv('DESTINATION_CONTAINER_NAME')

# Create blob service for storage account
blob_service = BlockBlobService(account_name=os.getenv('STORAGE_ACCOUNT_NAME'), account_key=os.getenv('STORAGE_ACCOUNT_KEY'))

# Copy images to permanent storage and get a dictionary of images for which to update URLs in DB.
# TODO: Prefer to have this function return a JSON blob as a string containing a list of successes
# and a list of failures. If the list of failures contains any items, return a status code other than 200.
update_urls_dictionary = copy_images_to_permanent_storage(image_id_url_map, copy_source, copy_destination, blob_service)

# If the dictionary of images is empty, this means a faiure occurred in a copy/delete operation.
# Otherwise, dictionary contains permanent image URLs for each image ID that was successfully copied.
if not update_urls_dictionary:
return func.HttpResponse("ERROR: Image copy/delete operation failed. Check state of images in storage.", status_code=401)
else:
logging.info("Now updating permanent URLs in the DB...")
data_access.update_image_urls(update_urls_dictionary, user_id_number)
logging.info("Done.")
# Return string containing list of URLs to images in permanent blob storage
return func.HttpResponse("Images were successfully added to the database and copied to permanent storage.", status_code=200)
logging.error("Exception: " + str(e))
return func.HttpResponse("Internal error occured", status_code=503)


# Given a list ofnimage URL's, build an ImageInfo object for each, and return a list of these image objects.
def build_objects_from_url_list(url_list):
Expand All @@ -91,4 +90,3 @@ def build_objects_from_url_list(url_list):
# Append image object to the list
image_object_list.append(image)
return image_object_list

1 change: 1 addition & 0 deletions functions/pipeline/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
azure-functions==1.0.0a5
azure-functions-worker==1.0.0a6
azure-storage-blob==1.4.0
azure-storage-file==1.4.0
grpcio==1.14.2
grpcio-tools==1.14.2
protobuf==3.6.1
Expand Down
Loading