From 1894f4300b20ef82ce9a908bb56e2725c3979723 Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 1 Nov 2018 19:30:30 -0700 Subject: [PATCH 01/31] Updated requirements.txt. Fixed pylint issues. Updated test gen script --- db/postgres-client.py | 40 ++++++++++++++---------- functions/pipeline/requirements.txt | 1 + functions/pipeline/shared/data_access.py | 2 +- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/db/postgres-client.py b/db/postgres-client.py index ec6bd201..95121b7c 100644 --- a/db/postgres-client.py +++ b/db/postgres-client.py @@ -44,8 +44,8 @@ def id_generator(size=6, chars=string.ascii_uppercase + string.digits): def generate_test_image_infos(count): list_of_image_infos = [] for i in range(count): - file_name = f'{id_generator(size=random.randint(4,10))}.jpg' - image_location = f'https://mock-storage.blob.core.windows.net/new-uploads/{file_name}' + file_name = "{0}.jpg".format(id_generator(size=random.randint(4,10))) + image_location = "https://mock-storage.blob.core.windows.net/new-uploads/{0}".format(file_name) img = ImageInfo(file_name,image_location,random.randint(100,600),random.randint(100,600)) list_of_image_infos.append(img) return list_of_image_infos @@ -61,7 +61,7 @@ def get_image_ids_for_new_images(conn, list_of_image_infos): url_to_image_id_map[img.image_location] = new_img_id #__update_images(conn,[new_img_id],ImageTagState.NOT_READY) conn.commit() - print(f"Inserted {len(url_to_image_id_map)} images to the DB") + print("Inserted {0} images to the DB".format(len(url_to_image_id_map))) return url_to_image_id_map def get_new_images(conn,number_of_images): @@ -85,18 +85,18 @@ def update_image_urls(conn,image_id_to_url_map): cursor = conn.cursor() query = "UPDATE Image_Info SET ImageLocation = '{0}', ModifiedDtim = now() WHERE ImageId = {1}" cursor.execute(query.format(new_url,image_id)) - print(f"Updated ImageId: {image_id} to new ImageLocation: {new_url}") + print("Updated ImageId: {0} to new ImageLocation: {1}".format(image_id,new_url)) __update_images(conn,[image_id],ImageTagState.READY_TO_TAG) - print(f"ImageId: {image_id} to has a new state: {ImageTagState.READY_TO_TAG.name}") + print("ImageId: {0} to has a new state: {1}".format(image_id,ImageTagState.READY_TO_TAG.name)) conn.commit() def update_tagged_images(conn,list_of_image_ids): __update_images(conn,list_of_image_ids,ImageTagState.COMPLETED_TAG) - print(f"Updated {len(list_of_image_ids)} image(s) to the state {ImageTagState.COMPLETED_TAG.name}") + print("Updated {0} image(s) to the state {1}".format(len(list_of_image_ids),ImageTagState.COMPLETED_TAG.name)) def update_untagged_images(conn,list_of_image_ids): __update_images(conn,list_of_image_ids,ImageTagState.INCOMPLETE_TAG) - print(f"Updated {len(list_of_image_ids)} image(s) to the state {ImageTagState.INCOMPLETE_TAG.name}") + print("Updated {0} image(s) to the state {1}".format(len(list_of_image_ids),ImageTagState.INCOMPLETE_TAG.name)) def __update_images(conn, list_of_image_ids, new_image_tag_state): if not isinstance(new_image_tag_state, ImageTagState): @@ -138,7 +138,7 @@ def pretty_print_audit_history(conn, list_of_image_ids): if(row != None): print("ImageId\t\tOriginalImageName\t\tTagState\t\tTransitionTime") while row: - print(f"{str(row[0])}\t\t{str(row[1])}\t\t{str(row[2])}\t\t{str(row[3])}") + print("{0}\t\t{1}\t\t{2}\t\t{3}".format(str(row[0]),str(row[1]),str(row[2]),str(row[3]))) row = cursor.fetchone() else: print("No images!") @@ -155,11 +155,15 @@ def extract_image_id_from_urls(list_of_image_urls): extracted_image_ids.append(extracted_id) return extracted_image_ids -def main(): +def main(num_of_images): try: if(os.getenv("DB_HOST") is None or os.getenv("DB_USER") is None or os.getenv("DB_NAME") is None or os.getenv("DB_PASS") is None): print("Please set environment variables for DB_HOST, DB_USER, DB_NAME, DB_PASS") return + + if(num_of_images < 5 or num_of_images > 20): + print("Number of images should be between 5 and 20") + return ################################################################# # Below we simulate the following scenarios: # Onboarding of new images @@ -167,7 +171,7 @@ def main(): # Checking in images that have or have not been tagged ################################################################# - NUMBER_OF_IMAGES = 5 + NUMBER_OF_IMAGES = num_of_images # Simulate new images from VOTT getting created in some blob store mocked_images = generate_test_image_infos(NUMBER_OF_IMAGES) @@ -209,7 +213,7 @@ def main(): print("***\tSubject matter experts use the CLI to retrieve images in a 'ready to tag' state") time.sleep(2) print() - + list_of_image_urls = get_new_images(get_connection(),NUMBER_OF_IMAGES) print() print("***\tLet's wait for image taggers to get through the set of images....") @@ -230,11 +234,12 @@ def main(): # call corresponding methods to update tagged and untagged states completed_tagged_ids = [] incomplete_tagged_ids = [] + num_of_incomplete = NUMBER_OF_IMAGES/5 for idx, img_id in enumerate(extracted_image_ids): - if(idx > 2): - incomplete_tagged_ids.append(img_id) - else: + if(idx > num_of_incomplete): completed_tagged_ids.append(img_id) + else: + incomplete_tagged_ids.append(img_id) update_tagged_images(get_connection(),completed_tagged_ids) update_untagged_images(get_connection(),incomplete_tagged_ids) @@ -251,8 +256,11 @@ def main(): print("Success!") #__verify_connect_to_db(get_connection()) - #get_unvisited_items(get_connection(),count_of_images) + #get_unvisited_items(get_connection(),count_of_images) except Exception as e: print(e) if __name__ == "__main__": - main() + if (len(sys.argv) != 2): + print("Usage: {0} (Number of Images)".format(sys.argv[0])) + else: + main(int(sys.argv[1])) \ No newline at end of file diff --git a/functions/pipeline/requirements.txt b/functions/pipeline/requirements.txt index 654e08aa..fbf84768 100644 --- a/functions/pipeline/requirements.txt +++ b/functions/pipeline/requirements.txt @@ -1,5 +1,6 @@ azure-functions==1.0.0a5 azure-functions-worker==1.0.0a6 +azure-storage grpcio==1.14.2 grpcio-tools==1.14.2 protobuf==3.6.1 diff --git a/functions/pipeline/shared/data_access.py b/functions/pipeline/shared/data_access.py index 8d3a8420..5d439cdc 100644 --- a/functions/pipeline/shared/data_access.py +++ b/functions/pipeline/shared/data_access.py @@ -51,7 +51,7 @@ def get_unvisited_items(num_images): images_to_update = '{0}'.format(', '.join(selected_images_to_tag.keys())) cursor.execute("UPDATE Image_Tagging_State SET TagStateId = {0} WHERE ImageId IN ({1})".format(tagging_state,images_to_update)) db.commit() - print(f"Updated {len(selected_images_to_tag)} images to the state {tagging_state}") + print("Updated {0} images to the state {1}".format(len(selected_images_to_tag),tagging_state)) else: print("No images untagged images left!") From b85f23bcb6aaef4a877ff822837e539602ec119f Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Sat, 3 Nov 2018 14:53:11 -0700 Subject: [PATCH 02/31] Added 2 new tables. Renamed tables to support ordered deploy --- db/install-db-resources.py | 4 +++- db/tables/000_classification_info.sql | 7 +++++++ db/tables/{image_info.sql => 000_image_info.sql} | 0 ...ate_audit.sql => 000_image_tagging_state_audit.sql} | 0 db/tables/{tag_state.sql => 000_tag_state.sql} | 0 ...e_tagging_state.sql => 010_image_tagging_state.sql} | 0 db/tables/{image_tags.sql => 010_image_tags.sql} | 2 +- db/tables/010_tags_classification.sql | 10 ++++++++++ 8 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 db/tables/000_classification_info.sql rename db/tables/{image_info.sql => 000_image_info.sql} (100%) rename db/tables/{image_tagging_state_audit.sql => 000_image_tagging_state_audit.sql} (100%) rename db/tables/{tag_state.sql => 000_tag_state.sql} (100%) rename db/tables/{image_tagging_state.sql => 010_image_tagging_state.sql} (100%) rename db/tables/{image_tags.sql => 010_image_tags.sql} (90%) create mode 100644 db/tables/010_tags_classification.sql diff --git a/db/install-db-resources.py b/db/install-db-resources.py index ee52c552..c9239b55 100644 --- a/db/install-db-resources.py +++ b/db/install-db-resources.py @@ -1,6 +1,7 @@ import pg8000 import os import sys +import collections from os import listdir from os.path import isfile, join @@ -59,6 +60,7 @@ def execute_files_in_dir_list(conn,list_of_sub_dirs): for sub_dir in list_of_sub_dirs: print("\n****\tReading files in '{0}' directory\t****\n".format(sub_dir)) file_query_map = get_file_query_map(sub_dir) + file_query_map = collections.OrderedDict(sorted(file_query_map.items())) if '' in file_query_map.values(): print("One of the files is empty. Please fix") return @@ -83,6 +85,6 @@ def main(db_name): if __name__ == "__main__": if (len(sys.argv) != 2): - print("Expected 1 argument of type string for db_name") + print("Usage: python3 {0} (DB Name)".format(sys.argv[0])) else: main(str(sys.argv[1])) diff --git a/db/tables/000_classification_info.sql b/db/tables/000_classification_info.sql new file mode 100644 index 00000000..804a438f --- /dev/null +++ b/db/tables/000_classification_info.sql @@ -0,0 +1,7 @@ +-- Set up table +CREATE TABLE Classification_Info ( + ClassificationId SERIAL PRIMARY KEY, + ClassificationName text NOT NULL, + ModifiedDtim timestamp NOT NULL default current_timestamp, + CreatedDtim timestamp NOT NULL default current_timestamp +); diff --git a/db/tables/image_info.sql b/db/tables/000_image_info.sql similarity index 100% rename from db/tables/image_info.sql rename to db/tables/000_image_info.sql diff --git a/db/tables/image_tagging_state_audit.sql b/db/tables/000_image_tagging_state_audit.sql similarity index 100% rename from db/tables/image_tagging_state_audit.sql rename to db/tables/000_image_tagging_state_audit.sql diff --git a/db/tables/tag_state.sql b/db/tables/000_tag_state.sql similarity index 100% rename from db/tables/tag_state.sql rename to db/tables/000_tag_state.sql diff --git a/db/tables/image_tagging_state.sql b/db/tables/010_image_tagging_state.sql similarity index 100% rename from db/tables/image_tagging_state.sql rename to db/tables/010_image_tagging_state.sql diff --git a/db/tables/image_tags.sql b/db/tables/010_image_tags.sql similarity index 90% rename from db/tables/image_tags.sql rename to db/tables/010_image_tags.sql index 9bc65713..bcdc2de6 100644 --- a/db/tables/image_tags.sql +++ b/db/tables/010_image_tags.sql @@ -1,6 +1,6 @@ -- Set up table CREATE TABLE Image_Tags ( - ImageTagId integer NOT NULL, + ImageTagId integer NOT NULL UNIQUE, ImageId integer REFERENCES Image_Info(ImageId) ON DELETE RESTRICT, --ClassificationId text NOT NULL, --Needed? --Confidence double precision NOT NULL, --Needed? diff --git a/db/tables/010_tags_classification.sql b/db/tables/010_tags_classification.sql new file mode 100644 index 00000000..f86561be --- /dev/null +++ b/db/tables/010_tags_classification.sql @@ -0,0 +1,10 @@ +-- Set up table +CREATE TABLE Tags_Classification ( + ImageTagId integer REFERENCES Image_Tags(ImageTagId) UNIQUE, + ClassificationId integer REFERENCES Classification_Info(ClassificationId), + ModifiedDtim timestamp NOT NULL default current_timestamp, + CreatedDtim timestamp NOT NULL default current_timestamp, + PRIMARY KEY (ImageTagId,ClassificationId) + --CONSTRAINT FK_IMAGE_TAG FOREIGN KEY(ImageTagId), + --CONSTRAINT FK_CLASSIFICATION FOREIGN KEY(ClassificationId) +); \ No newline at end of file From 17a22471ab9b8ef5717b4d011722af207853466e Mon Sep 17 00:00:00 2001 From: Andre Date: Sat, 3 Nov 2018 23:46:30 -0700 Subject: [PATCH 03/31] Added support for user creating and auditing in DB, DAL, and deployment. --- db/SeedData.sql | 21 ---- db/data/seed_tag_states.sql | 8 ++ db/functions/log_image_info_insert.sql | 4 +- .../log_image_tagging_state_changes.sql | 4 +- .../log_image_tagging_state_insert.sql | 4 +- db/install-db-resources.py | 17 ++- db/postgres-client.py | 100 +++++++++++------- db/tables/000_image_tagging_state_audit.sql | 1 + db/tables/000_user_info.sql | 7 ++ ...{000_image_info.sql => 010_image_info.sql} | 1 + ..._state.sql => 100_image_tagging_state.sql} | 1 + ...{010_image_tags.sql => 100_image_tags.sql} | 0 ...cation.sql => 100_tags_classification.sql} | 0 13 files changed, 102 insertions(+), 66 deletions(-) delete mode 100644 db/SeedData.sql create mode 100644 db/data/seed_tag_states.sql create mode 100644 db/tables/000_user_info.sql rename db/tables/{000_image_info.sql => 010_image_info.sql} (86%) rename db/tables/{010_image_tagging_state.sql => 100_image_tagging_state.sql} (83%) rename db/tables/{010_image_tags.sql => 100_image_tags.sql} (100%) rename db/tables/{010_tags_classification.sql => 100_tags_classification.sql} (100%) diff --git a/db/SeedData.sql b/db/SeedData.sql deleted file mode 100644 index dbda07a9..00000000 --- a/db/SeedData.sql +++ /dev/null @@ -1,21 +0,0 @@ --- Set up the states -INSERT INTO TagState VALUES (0, 'Not Ready'); -INSERT INTO TagState VALUES (1, 'Ready To Tag'); -INSERT INTO TagState VALUES (2, 'Tag In Progress'); -INSERT INTO TagState VALUES (3, 'Completed Tag'); -INSERT INTO TagState VALUES (4, 'Incomplete Tag'); -INSERT INTO TagState VALUES (5, 'Abandoned'); - --- Create fake image entries -INSERT INTO Image_Info (OriginalImageName,ImageLocation,Height,Width) -VALUES ('MyTestImage.jpg', 'https://csehackstorage.blob.core.windows.net/image-to-tag/1.jpg', 40,40); -INSERT INTO Image_Info (OriginalImageName,ImageLocation,Height,Width) -VALUES ('AnotherImage.jpg', 'https://csehackstorage.blob.core.windows.net/image-to-tag/2.jpg', 60, 80); -INSERT INTO Image_Info (OriginalImageName,ImageLocation,Height,Width) -VALUES ('NonexistantImage.jpg', 'https://csehackstorage.blob.core.windows.net/image-to-tag/3.jpg', 60, 80); - --- Create "ready to tag" states for the 2 fake images -INSERT INTO Image_Tagging_State (ImageId,TagStateId) VALUES (1, 1); -INSERT INTO Image_Tagging_State (ImageId,TagStateId) VALUES (2, 1); -INSERT INTO Image_Tagging_State (ImageId,TagStateId) VALUES (3, 1); - diff --git a/db/data/seed_tag_states.sql b/db/data/seed_tag_states.sql new file mode 100644 index 00000000..8f8ad5c1 --- /dev/null +++ b/db/data/seed_tag_states.sql @@ -0,0 +1,8 @@ +-- Set up the states +INSERT INTO TagState VALUES + (0, 'Not Ready'), + (1, 'Ready To Tag'), + (2, 'Tag In Progress'), + (3, 'Completed Tag'), + (4, 'Incomplete Tag'), + (5, 'Abandoned'); diff --git a/db/functions/log_image_info_insert.sql b/db/functions/log_image_info_insert.sql index 52617256..d5d55614 100644 --- a/db/functions/log_image_info_insert.sql +++ b/db/functions/log_image_info_insert.sql @@ -3,8 +3,8 @@ CREATE OR REPLACE FUNCTION log_image_info_insert() RETURNS trigger AS ' BEGIN - INSERT INTO Image_Tagging_State(ImageId,TagStateId,ModifiedDtim,CreatedDtim) - VALUES(NEW.ImageId,0,current_timestamp,current_timestamp); + INSERT INTO Image_Tagging_State(ImageId,TagStateId,ModifiedByUser,ModifiedDtim,CreatedDtim) + VALUES(NEW.ImageId,0,NEW.CreatedByUser,current_timestamp,current_timestamp); RETURN NEW; END; diff --git a/db/functions/log_image_tagging_state_changes.sql b/db/functions/log_image_tagging_state_changes.sql index df65ed55..85580d51 100644 --- a/db/functions/log_image_tagging_state_changes.sql +++ b/db/functions/log_image_tagging_state_changes.sql @@ -4,8 +4,8 @@ CREATE OR REPLACE FUNCTION log_image_tagging_state_changes() ' BEGIN IF NEW.TagStateId <> OLD.TagStateId THEN - INSERT INTO Image_Tagging_State_Audit(ImageId,TagStateId,ModifiedDtim,ArchiveDtim,ActionFlag) - VALUES(NEW.ImageId,NEW.TagStateId,NEW.ModifiedDtim,current_timestamp,2); + INSERT INTO Image_Tagging_State_Audit(ImageId,TagStateId,ModifiedByUser,ModifiedDtim,ArchiveDtim,ActionFlag) + VALUES(NEW.ImageId,NEW.TagStateId,NEW.ModifiedByUser,NEW.ModifiedDtim,current_timestamp,2); END IF; RETURN NEW; diff --git a/db/functions/log_image_tagging_state_insert.sql b/db/functions/log_image_tagging_state_insert.sql index 1e011ae0..366e67a4 100644 --- a/db/functions/log_image_tagging_state_insert.sql +++ b/db/functions/log_image_tagging_state_insert.sql @@ -3,8 +3,8 @@ CREATE OR REPLACE FUNCTION log_image_tagging_state_insert() RETURNS trigger AS ' BEGIN - INSERT INTO Image_Tagging_State_Audit(ImageId,TagStateId,ModifiedDtim,ArchiveDtim,ActionFlag) - VALUES(NEW.ImageId,NEW.TagStateId,NEW.ModifiedDtim,current_timestamp,1); + INSERT INTO Image_Tagging_State_Audit(ImageId,TagStateId,ModifiedByUser,ModifiedDtim,ArchiveDtim,ActionFlag) + VALUES(NEW.ImageId,NEW.TagStateId,NEW.ModifiedByUser,NEW.ModifiedDtim,current_timestamp,1); RETURN NEW; END; diff --git a/db/install-db-resources.py b/db/install-db-resources.py index c9239b55..be559b58 100644 --- a/db/install-db-resources.py +++ b/db/install-db-resources.py @@ -37,6 +37,18 @@ def create_database(conn, db_name): print("No database created due to empty parameter") return +def install_extensions(conn, list_of_extensions): + if (len(list_of_extensions) > 0): + cursor = conn.cursor() + conn.autocommit = True + for ext in list_of_extensions: + query = "CREATE EXTENSION {0};" + cursor.execute(query.format(ext)) + print("Installed extension named '{0}'".format(ext)) + else: + print("No extensions to install") + return + def get_connection(): return __new_postgres_connection(os.environ['DB_HOST'],os.environ['DB_NAME'],os.environ['DB_USER'],os.environ['DB_PASS']) @@ -75,9 +87,12 @@ def main(db_name): #Set up the database create_database(get_default_connection(),db_name) + #Install extensions + install_extensions(get_connection_for_db(db_name),['citext']) + #Connect to the new database and install resources conn = get_connection_for_db(db_name) - sub_dirs = ["tables","functions","triggers"] + sub_dirs = ["tables","functions","triggers","data"] execute_files_in_dir_list(conn,sub_dirs) print("Done!") diff --git a/db/postgres-client.py b/db/postgres-client.py index 95121b7c..92ea5fe4 100644 --- a/db/postgres-client.py +++ b/db/postgres-client.py @@ -50,13 +50,14 @@ def generate_test_image_infos(count): list_of_image_infos.append(img) return list_of_image_infos -def get_image_ids_for_new_images(conn, list_of_image_infos): +# TODO: Use bulk insert: https://stackoverflow.com/questions/5875953/returning-multiple-serial-values-from-posgtres-batch-insert +def get_image_ids_for_new_images(conn, list_of_image_infos, user_id): url_to_image_id_map = {} - if(len(list_of_image_infos) > 0): + if(len(list_of_image_infos) > 0 and user_id): cursor = conn.cursor() for img in list(list_of_image_infos): - query = "INSERT INTO Image_Info (OriginalImageName,ImageLocation,Height,Width) VALUES ('{0}','{1}',{2},{3}) RETURNING ImageId;" - cursor.execute(query.format(img.image_name,img.image_location,str(img.height),str(img.width))) + query = "INSERT INTO Image_Info (OriginalImageName,ImageLocation,Height,Width,CreatedByUser) VALUES ('{0}','{1}',{2},{3},{4}) RETURNING ImageId;" + cursor.execute(query.format(img.image_name,img.image_location,str(img.height),str(img.width),user_id)) new_img_id = cursor.fetchone()[0] url_to_image_id_map[img.image_location] = new_img_id #__update_images(conn,[new_img_id],ImageTagState.NOT_READY) @@ -64,7 +65,7 @@ def get_image_ids_for_new_images(conn, list_of_image_infos): print("Inserted {0} images to the DB".format(len(url_to_image_id_map))) return url_to_image_id_map -def get_new_images(conn,number_of_images): +def get_new_images(conn,number_of_images, user_id): cursor = conn.cursor() # GET N existing UNTAGGED rows @@ -77,37 +78,39 @@ def get_new_images(conn,number_of_images): print('Image Id: {0} \t\tImage Name: {1} \t\tTag State: {2}'.format(row[0], row[1], row[2])) selected_images_to_tag[str(row[0])] = str(row[1]) - __update_images(conn,selected_images_to_tag,ImageTagState.TAG_IN_PROGRESS) + __update_images(conn,selected_images_to_tag,ImageTagState.TAG_IN_PROGRESS, user_id) return selected_images_to_tag.values() -def update_image_urls(conn,image_id_to_url_map): - for image_id, new_url in image_id_to_url_map.items(): - cursor = conn.cursor() - query = "UPDATE Image_Info SET ImageLocation = '{0}', ModifiedDtim = now() WHERE ImageId = {1}" - cursor.execute(query.format(new_url,image_id)) - print("Updated ImageId: {0} to new ImageLocation: {1}".format(image_id,new_url)) - __update_images(conn,[image_id],ImageTagState.READY_TO_TAG) - print("ImageId: {0} to has a new state: {1}".format(image_id,ImageTagState.READY_TO_TAG.name)) - conn.commit() +def update_image_urls(conn,image_id_to_url_map, user_id): + if(len(image_id_to_url_map.items()) and user_id): + for image_id, new_url in image_id_to_url_map.items(): + cursor = conn.cursor() + query = "UPDATE Image_Info SET ImageLocation = '{0}', ModifiedDtim = now() WHERE ImageId = {1}" + cursor.execute(query.format(new_url,image_id)) + conn.commit() + print("Updated ImageId: {0} to new ImageLocation: {1}".format(image_id,new_url)) + __update_images(conn,[image_id],ImageTagState.READY_TO_TAG, user_id) + print("ImageId: {0} to has a new state: {1}".format(image_id,ImageTagState.READY_TO_TAG.name)) -def update_tagged_images(conn,list_of_image_ids): - __update_images(conn,list_of_image_ids,ImageTagState.COMPLETED_TAG) + +def update_tagged_images(conn,list_of_image_ids, user_id): + __update_images(conn,list_of_image_ids,ImageTagState.COMPLETED_TAG,user_id) print("Updated {0} image(s) to the state {1}".format(len(list_of_image_ids),ImageTagState.COMPLETED_TAG.name)) -def update_untagged_images(conn,list_of_image_ids): - __update_images(conn,list_of_image_ids,ImageTagState.INCOMPLETE_TAG) +def update_untagged_images(conn,list_of_image_ids, user_id): + __update_images(conn,list_of_image_ids,ImageTagState.INCOMPLETE_TAG,user_id) print("Updated {0} image(s) to the state {1}".format(len(list_of_image_ids),ImageTagState.INCOMPLETE_TAG.name)) -def __update_images(conn, list_of_image_ids, new_image_tag_state): +def __update_images(conn, list_of_image_ids, new_image_tag_state, user_id): if not isinstance(new_image_tag_state, ImageTagState): raise TypeError('new_image_tag_state must be an instance of Direction Enum') - if(len(list_of_image_ids) > 0): + if(len(list_of_image_ids) > 0 and user_id): cursor = conn.cursor() image_ids_as_strings = [str(i) for i in list_of_image_ids] images_to_update = '{0}'.format(', '.join(image_ids_as_strings)) - query = "UPDATE Image_Tagging_State SET TagStateId = {0}, ModifiedDtim = now() WHERE ImageId IN ({1})" - cursor.execute(query.format(new_image_tag_state,images_to_update)) + query = "UPDATE Image_Tagging_State SET TagStateId = {0}, ModifiedByUser = {2}, ModifiedDtim = now() WHERE ImageId IN ({1})" + cursor.execute(query.format(new_image_tag_state,images_to_update,user_id)) conn.commit() #print(f"Updated {len(list_of_image_ids)} image(s) to the state {new_image_tag_state.name}") else: @@ -127,22 +130,36 @@ def pretty_print_audit_history(conn, list_of_image_ids): cursor = conn.cursor() image_ids_as_strings = [str(i) for i in list_of_image_ids] images_to_audit = '{0}'.format(', '.join(image_ids_as_strings)) - query = ("SELECT a.imageid,c.originalimagename, b.tagstatename, a.ArchiveDtim FROM image_tagging_state_audit a " + query = ("SELECT a.imageid,c.originalimagename, b.tagstatename, d.username, a.ArchiveDtim FROM image_tagging_state_audit a " "JOIN tagstate b ON a.tagstateid = b.tagstateid " "JOIN image_info c on a.imageid = c.imageid " + "JOIN user_info d on a.modifiedbyuser = d.userid " "WHERE a.ImageId in ({0}) " "ORDER BY a.ImageId,ArchiveDtim ASC") cursor.execute(query.format(images_to_audit)) row = cursor.fetchone() print() if(row != None): - print("ImageId\t\tOriginalImageName\t\tTagState\t\tTransitionTime") + print("ImageId\tImgName\tTagState\tUser\tLoggedTime") while row: - print("{0}\t\t{1}\t\t{2}\t\t{3}".format(str(row[0]),str(row[1]),str(row[2]),str(row[3]))) + print("{0}\t{1}\t{2}\t{3}\t{4}".format(str(row[0]),str(row[1]),str(row[2]),str(row[3]),str(row[4]))) row = cursor.fetchone() else: print("No images!") +def create_user(conn,user_name): + user_id = -1 + if user_name: + try: + cursor = conn.cursor() + query = "INSERT INTO User_Info (UserName) VALUES ('{0}') ON CONFLICT (username) DO UPDATE SET username=EXCLUDED.username RETURNING UserId;" + cursor.execute(query.format(user_name)) + user_id = cursor.fetchone()[0] + conn.commit() + except Exception as e: print(e) + finally: cursor.close() + return user_id + def extract_image_name_no_suffix(url): start_idx = url.rfind('/')+1 end_idx = url.rfind('.') @@ -155,22 +172,29 @@ def extract_image_id_from_urls(list_of_image_urls): extracted_image_ids.append(extracted_id) return extracted_image_ids -def main(num_of_images): +def main(num_of_images,user_name): try: if(os.getenv("DB_HOST") is None or os.getenv("DB_USER") is None or os.getenv("DB_NAME") is None or os.getenv("DB_PASS") is None): print("Please set environment variables for DB_HOST, DB_USER, DB_NAME, DB_PASS") return - + if(num_of_images < 5 or num_of_images > 20): print("Number of images should be between 5 and 20") return + + if(not user_name): + print("User name cannot be empty or whitespace") + return ################################################################# # Below we simulate the following scenarios: + # Creating a User # Onboarding of new images # Checking out images to tag # Checking in images that have or have not been tagged ################################################################# + user_id = create_user(get_connection(),user_name) + NUMBER_OF_IMAGES = num_of_images # Simulate new images from VOTT getting created in some blob store @@ -181,9 +205,9 @@ def main(num_of_images): print() # Simulate the data access layer creating entries in the DB for the new images # and returning a map of the original image url to generaled image id - url_to_image_id_map = get_image_ids_for_new_images(get_connection(),mocked_images) + url_to_image_id_map = get_image_ids_for_new_images(get_connection(),mocked_images, user_id) print() - + print("***\tBehind the scenes Az Functions move the images to a new blob location") time.sleep(1) print() @@ -196,7 +220,7 @@ def main(num_of_images): # Simulates the call the client makes to the data access layer # with the new payload. Image urls get updated in the DB - update_image_urls(get_connection(),updated_image_id_url_map) + update_image_urls(get_connection(),updated_image_id_url_map, user_id) print() print("***\tThe newly uploaded images are now onboarded with a 'ready to tag' state. See audit history") @@ -209,12 +233,12 @@ def main(num_of_images): pretty_print_audit_history(get_connection(),image_ids) time.sleep(3) print() - + print("***\tSubject matter experts use the CLI to retrieve images in a 'ready to tag' state") time.sleep(2) print() - list_of_image_urls = get_new_images(get_connection(),NUMBER_OF_IMAGES) + list_of_image_urls = get_new_images(get_connection(),NUMBER_OF_IMAGES, user_id) print() print("***\tLet's wait for image taggers to get through the set of images....") time.sleep(5) @@ -241,8 +265,8 @@ def main(num_of_images): else: incomplete_tagged_ids.append(img_id) - update_tagged_images(get_connection(),completed_tagged_ids) - update_untagged_images(get_connection(),incomplete_tagged_ids) + update_tagged_images(get_connection(),completed_tagged_ids,user_id) + update_untagged_images(get_connection(),incomplete_tagged_ids,user_id) print() print("***\tVOTT json results are posted. Lets take a look at the audit history") @@ -260,7 +284,7 @@ def main(num_of_images): except Exception as e: print(e) if __name__ == "__main__": - if (len(sys.argv) != 2): - print("Usage: {0} (Number of Images)".format(sys.argv[0])) + if (len(sys.argv) != 3): + print("Usage: {0} (Number of Images) (User Name)".format(sys.argv[0])) else: - main(int(sys.argv[1])) \ No newline at end of file + main(int(sys.argv[1]), str(sys.argv[2])) \ No newline at end of file diff --git a/db/tables/000_image_tagging_state_audit.sql b/db/tables/000_image_tagging_state_audit.sql index 16c8c992..9723398f 100644 --- a/db/tables/000_image_tagging_state_audit.sql +++ b/db/tables/000_image_tagging_state_audit.sql @@ -2,6 +2,7 @@ CREATE TABLE Image_Tagging_State_Audit ( RowId serial primary key, ImageId integer NOT NULL, TagStateId integer NOT NULL, + ModifiedByUser integer NOT NULL, ModifiedDtim timestamp NOT NULL, ArchiveDtim timestamp NOT NULL, ActionFlag integer NOT NULL diff --git a/db/tables/000_user_info.sql b/db/tables/000_user_info.sql new file mode 100644 index 00000000..a6f7b689 --- /dev/null +++ b/db/tables/000_user_info.sql @@ -0,0 +1,7 @@ +-- Simple User table +CREATE TABLE User_Info ( + UserId SERIAL PRIMARY KEY, + UserName citext NOT NULL UNIQUE, + ModifiedDtim timestamp NOT NULL default current_timestamp, + CreatedDtim timestamp NOT NULL default current_timestamp +); \ No newline at end of file diff --git a/db/tables/000_image_info.sql b/db/tables/010_image_info.sql similarity index 86% rename from db/tables/000_image_info.sql rename to db/tables/010_image_info.sql index b094550b..b30ebfc1 100644 --- a/db/tables/000_image_info.sql +++ b/db/tables/010_image_info.sql @@ -5,6 +5,7 @@ CREATE TABLE Image_Info ( ImageLocation text, Height integer NOT NULL, Width integer NOT NULL, + CreatedByUser integer REFERENCES User_Info(UserId), ModifiedDtim timestamp NOT NULL default current_timestamp, CreatedDtim timestamp NOT NULL default current_timestamp ); \ No newline at end of file diff --git a/db/tables/010_image_tagging_state.sql b/db/tables/100_image_tagging_state.sql similarity index 83% rename from db/tables/010_image_tagging_state.sql rename to db/tables/100_image_tagging_state.sql index dbe230ef..d8a1207e 100644 --- a/db/tables/010_image_tagging_state.sql +++ b/db/tables/100_image_tagging_state.sql @@ -2,6 +2,7 @@ CREATE TABLE Image_Tagging_State ( ImageId integer REFERENCES Image_Info(ImageId), TagStateId integer NOT NULL, + ModifiedByUser integer REFERENCES User_Info(UserId), ModifiedDtim timestamp NOT NULL default current_timestamp, CreatedDtim timestamp NOT NULL default current_timestamp ); \ No newline at end of file diff --git a/db/tables/010_image_tags.sql b/db/tables/100_image_tags.sql similarity index 100% rename from db/tables/010_image_tags.sql rename to db/tables/100_image_tags.sql diff --git a/db/tables/010_tags_classification.sql b/db/tables/100_tags_classification.sql similarity index 100% rename from db/tables/010_tags_classification.sql rename to db/tables/100_tags_classification.sql From ec75f81e581b6e1fdb704e8f2c0e4028ed2c9aab Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Sat, 3 Nov 2018 23:55:33 -0700 Subject: [PATCH 04/31] Removed comment --- db/Deploy-Python-Functions.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/db/Deploy-Python-Functions.sh b/db/Deploy-Python-Functions.sh index 45de075b..ca25e971 100644 --- a/db/Deploy-Python-Functions.sh +++ b/db/Deploy-Python-Functions.sh @@ -26,8 +26,6 @@ if [[ "$StorageName" != *[a-z0-9]* ]]; then exit 1 fi - -#$filtered_output=$(az extension list) # See http://jmespath.org/tutorial.html for querying filtered_output=$(az extension list --query "[?name=='functionapp'].name") From 35740c90e41ec03d2954ef8993bad48e45978270 Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Sat, 3 Nov 2018 23:58:54 -0700 Subject: [PATCH 05/31] Change --- db/Deploy-Python-Functions.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/db/Deploy-Python-Functions.sh b/db/Deploy-Python-Functions.sh index ca25e971..c74d6510 100644 --- a/db/Deploy-Python-Functions.sh +++ b/db/Deploy-Python-Functions.sh @@ -58,7 +58,6 @@ echo "Create a storage account for the function (if it does not exist for the cu echo az storage account create -n $StorageName -l "WestUS" -g $ResourceGroup --sku Standard_LRS - echo echo "Create a function app (if it does not exist for the current subscription)" echo From dd0723e3c54e0c0b6bae38da28e63489a4ff2433 Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Mon, 5 Nov 2018 12:31:51 -0800 Subject: [PATCH 06/31] Adding support to check if db already exists. --- db/install-db-resources.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/db/install-db-resources.py b/db/install-db-resources.py index be559b58..1d32bca8 100644 --- a/db/install-db-resources.py +++ b/db/install-db-resources.py @@ -25,6 +25,16 @@ def execute_queries_from_map(conn, file_query_map): print("\t{0}".format(file_path)) return +def database_exists(conn, db_name): + result = -1 + if db_name: + cursor = conn.cursor() + query = "SELECT 1 FROM pg_database WHERE datname='{0}'" + cursor.execute(query.format(db_name)) + row = cursor.fetchone() + result = int(row[0]) + return result == 1 + def create_database(conn, db_name): if db_name: cursor = conn.cursor() @@ -78,12 +88,16 @@ def execute_files_in_dir_list(conn,list_of_sub_dirs): return execute_queries_from_map(conn,file_query_map) -def main(db_name): +def main(db_name, overwrite_db): try: if(os.getenv("DB_HOST") is None or os.getenv("DB_USER") is None or os.getenv("DB_PASS") is None): print("Please set environment variables for DB_HOST, DB_USER, DB_PASS") return + if (database_exists(get_default_connection(), db_name) and not overwrite_db): + print("Database {0} already exists.".format(db_name)) + return + #Set up the database create_database(get_default_connection(),db_name) @@ -99,7 +113,14 @@ def main(db_name): except Exception as e: print(e) if __name__ == "__main__": - if (len(sys.argv) != 2): - print("Usage: python3 {0} (DB Name)".format(sys.argv[0])) - else: - main(str(sys.argv[1])) + if len(sys.argv) < 2: + print("Usage: python3 {0} (DB Name) [-force]".format(sys.argv[0])) + elif len(sys.argv) == 2: + main(str(sys.argv[1]),False) + ''' + elif str(sys.argv[2]).lower() == "-force": + main(str(sys.argv[1]),True) + else: + main(str(sys.argv[1]),False) + ''' + \ No newline at end of file From f8ead27cb49fa9c14f7689d01edf58501884e501 Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Tue, 6 Nov 2018 08:11:41 -0800 Subject: [PATCH 07/31] Added comment --- db/install-db-resources.py | 1 + 1 file changed, 1 insertion(+) diff --git a/db/install-db-resources.py b/db/install-db-resources.py index 1d32bca8..6943254f 100644 --- a/db/install-db-resources.py +++ b/db/install-db-resources.py @@ -94,6 +94,7 @@ def main(db_name, overwrite_db): print("Please set environment variables for DB_HOST, DB_USER, DB_PASS") return + #TODO: Allow overwriting of existing DB if (database_exists(get_default_connection(), db_name) and not overwrite_db): print("Database {0} already exists.".format(db_name)) return From 3e339ce1471fb33706e420f9fc0b573b548585f8 Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Tue, 6 Nov 2018 16:00:13 -0800 Subject: [PATCH 08/31] Fixed issue in db resoruces install file. Updated table schema --- db/install-db-resources.py | 10 ++++++---- db/tables/100_image_tags.sql | 10 +++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/db/install-db-resources.py b/db/install-db-resources.py index 6943254f..b6db48b7 100644 --- a/db/install-db-resources.py +++ b/db/install-db-resources.py @@ -26,14 +26,14 @@ def execute_queries_from_map(conn, file_query_map): return def database_exists(conn, db_name): - result = -1 if db_name: cursor = conn.cursor() query = "SELECT 1 FROM pg_database WHERE datname='{0}'" cursor.execute(query.format(db_name)) row = cursor.fetchone() - result = int(row[0]) - return result == 1 + if row: + return int(row[0]) == 1 + return False def create_database(conn, db_name): if db_name: @@ -111,7 +111,9 @@ def main(db_name, overwrite_db): execute_files_in_dir_list(conn,sub_dirs) print("Done!") - except Exception as e: print(e) + except Exception as e: + print(e) + #traceback.print_exc() if __name__ == "__main__": if len(sys.argv) < 2: diff --git a/db/tables/100_image_tags.sql b/db/tables/100_image_tags.sql index bcdc2de6..ac37cba6 100644 --- a/db/tables/100_image_tags.sql +++ b/db/tables/100_image_tags.sql @@ -1,13 +1,13 @@ -- Set up table CREATE TABLE Image_Tags ( - ImageTagId integer NOT NULL UNIQUE, + ImageTagId SERIAL UNIQUE, ImageId integer REFERENCES Image_Info(ImageId) ON DELETE RESTRICT, --ClassificationId text NOT NULL, --Needed? --Confidence double precision NOT NULL, --Needed? - X_Min integer NOT NULL, - X_Max integer NOT NULL, - Y_Min integer NOT NULL, - Y_Max integer NOT NULL, + X_Min double precision NOT NULL, + X_Max double precision NOT NULL, + Y_Min double precision NOT NULL, + Y_Max double precision NOT NULL, --VOTT_Data json NOT NULL PRIMARY KEY (ImageTagId,ImageId) ); \ No newline at end of file From 4dcec720b82f21f4197f36bd811a5af2f24e78e6 Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 8 Nov 2018 08:21:29 -0800 Subject: [PATCH 09/31] Updated deployment to be more idempotent. Updated table schemas --- db/install-db-resources.py | 42 ++++++++++++++++++--------- db/tables/000_classification_info.sql | 2 +- db/tables/100_image_tags.sql | 14 ++++----- db/tables/100_tags_classification.sql | 4 +-- 4 files changed, 38 insertions(+), 24 deletions(-) diff --git a/db/install-db-resources.py b/db/install-db-resources.py index b6db48b7..1a172645 100644 --- a/db/install-db-resources.py +++ b/db/install-db-resources.py @@ -2,6 +2,7 @@ import os import sys import collections +import argparse from os import listdir from os.path import isfile, join @@ -47,6 +48,18 @@ def create_database(conn, db_name): print("No database created due to empty parameter") return +def remove_database(conn, db_name): + if db_name: + cursor = conn.cursor() + conn.autocommit = True + query = "DROP DATABASE {0};" + print("\nAttempting to drop database '{0}'...This may take up to 30 seconds".format(db_name)) + cursor.execute(query.format(db_name)) + print("Successfully dropped database named '{0}'".format(db_name)) + else: + print("No database dropped due to empty parameter") + return + def install_extensions(conn, list_of_extensions): if (len(list_of_extensions) > 0): cursor = conn.cursor() @@ -94,9 +107,10 @@ def main(db_name, overwrite_db): print("Please set environment variables for DB_HOST, DB_USER, DB_PASS") return - #TODO: Allow overwriting of existing DB - if (database_exists(get_default_connection(), db_name) and not overwrite_db): - print("Database {0} already exists.".format(db_name)) + if (database_exists(get_default_connection(), db_name) and overwrite_db): + remove_database(get_default_connection(),db_name) + else: + print("Database {0} already exists. Please see --help for overwrite option.".format(db_name)) return #Set up the database @@ -116,14 +130,16 @@ def main(db_name, overwrite_db): #traceback.print_exc() if __name__ == "__main__": - if len(sys.argv) < 2: - print("Usage: python3 {0} (DB Name) [-force]".format(sys.argv[0])) - elif len(sys.argv) == 2: - main(str(sys.argv[1]),False) - ''' - elif str(sys.argv[2]).lower() == "-force": - main(str(sys.argv[1]),True) - else: - main(str(sys.argv[1]),False) - ''' + parser = argparse.ArgumentParser() + + parser.add_argument('database_name', type=str, + help='The name of the database to create and install resources on') + + parser.add_argument('-o','--overwrite', action='store_true', + help='Will drop and restore a database if it already exists') + + args = parser.parse_args() + database_name = args.database_name + main(args.database_name,args.overwrite) + \ No newline at end of file diff --git a/db/tables/000_classification_info.sql b/db/tables/000_classification_info.sql index 804a438f..cdf70e20 100644 --- a/db/tables/000_classification_info.sql +++ b/db/tables/000_classification_info.sql @@ -1,7 +1,7 @@ -- Set up table CREATE TABLE Classification_Info ( ClassificationId SERIAL PRIMARY KEY, - ClassificationName text NOT NULL, + ClassificationName citext NOT NULL UNIQUE, ModifiedDtim timestamp NOT NULL default current_timestamp, CreatedDtim timestamp NOT NULL default current_timestamp ); diff --git a/db/tables/100_image_tags.sql b/db/tables/100_image_tags.sql index ac37cba6..421cdb13 100644 --- a/db/tables/100_image_tags.sql +++ b/db/tables/100_image_tags.sql @@ -2,12 +2,12 @@ CREATE TABLE Image_Tags ( ImageTagId SERIAL UNIQUE, ImageId integer REFERENCES Image_Info(ImageId) ON DELETE RESTRICT, - --ClassificationId text NOT NULL, --Needed? - --Confidence double precision NOT NULL, --Needed? - X_Min double precision NOT NULL, - X_Max double precision NOT NULL, - Y_Min double precision NOT NULL, - Y_Max double precision NOT NULL, + X_Min decimal(6,2) NOT NULL, + X_Max decimal(6,2) NOT NULL, + Y_Min decimal(6,2) NOT NULL, + Y_Max decimal(6,2) NOT NULL, + CreatedByUser integer REFERENCES User_Info(UserId), + CreatedDtim timestamp NOT NULL default current_timestamp, --VOTT_Data json NOT NULL - PRIMARY KEY (ImageTagId,ImageId) + PRIMARY KEY (ImageId,X_Min,X_Max,Y_Min,Y_Max) --Should we include the bounded box as well? ); \ No newline at end of file diff --git a/db/tables/100_tags_classification.sql b/db/tables/100_tags_classification.sql index f86561be..a285ee2d 100644 --- a/db/tables/100_tags_classification.sql +++ b/db/tables/100_tags_classification.sql @@ -1,10 +1,8 @@ -- Set up table CREATE TABLE Tags_Classification ( - ImageTagId integer REFERENCES Image_Tags(ImageTagId) UNIQUE, + ImageTagId integer REFERENCES Image_Tags(ImageTagId), ClassificationId integer REFERENCES Classification_Info(ClassificationId), ModifiedDtim timestamp NOT NULL default current_timestamp, CreatedDtim timestamp NOT NULL default current_timestamp, PRIMARY KEY (ImageTagId,ClassificationId) - --CONSTRAINT FK_IMAGE_TAG FOREIGN KEY(ImageTagId), - --CONSTRAINT FK_CLASSIFICATION FOREIGN KEY(ClassificationId) ); \ No newline at end of file From 475abbe38b4653732bc3c326351b79938e8e18ff Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 8 Nov 2018 09:34:24 -0800 Subject: [PATCH 10/31] Added a new version of the data access layer. Added simple unit tests. --- functions/pipeline/shared/db_access_v2.py | 257 ++++++++++++++++++ functions/pipeline/shared/db_provider.py | 40 +++ .../pipeline/shared/test_db_access_v2.py | 104 +++++++ 3 files changed, 401 insertions(+) create mode 100644 functions/pipeline/shared/db_access_v2.py create mode 100644 functions/pipeline/shared/db_provider.py create mode 100644 functions/pipeline/shared/test_db_access_v2.py diff --git a/functions/pipeline/shared/db_access_v2.py b/functions/pipeline/shared/db_access_v2.py new file mode 100644 index 00000000..b2850b90 --- /dev/null +++ b/functions/pipeline/shared/db_access_v2.py @@ -0,0 +1,257 @@ +import sys +import string +import os +import time +import random +from enum import IntEnum, unique +import getpass +import itertools +from db_provider import DatabaseInfo, PostGresProvider + +@unique +class ImageTagState(IntEnum): + NOT_READY = 0 + READY_TO_TAG = 1 + TAG_IN_PROGRESS = 2 + COMPLETED_TAG = 3 + INCOMPLETE_TAG = 4 + ABANDONED = 5 + +# An entity class for a VOTT image +class ImageInfo(object): + def __init__(self, image_name, image_location, height, width): + self.image_name = image_name + self.image_location = image_location + self.height = height + self.width = width + +class ImageTag(object): + def __init__(self, image_id, x_min, x_max, y_min, y_max, classification_names): + self.image_id = image_id + self.x_min = x_min + self.x_max = x_max + self.y_min = y_min + self.y_max = y_max + self.classification_names = classification_names + +class ImageTagDataAccess(object): + def __init__(self, db_provider): + self._db_provider = db_provider + + def test_connection(self): + conn = self._db_provider.get_connection() + cursor = conn.cursor() + cursor.execute('select * from tagstate') + row = cursor.fetchone() + print() + while row: + print(str(row[0]) + " " + str(row[1])) + row = cursor.fetchone() + + def create_user(self,user_name): + user_id = -1 + if not user_name: + raise ArgumentException("Parameter cannot be an empty string") + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + query = "INSERT INTO User_Info (UserName) VALUES ('{0}') ON CONFLICT (username) DO UPDATE SET username=EXCLUDED.username RETURNING UserId;" + cursor.execute(query.format(user_name)) + user_id = cursor.fetchone()[0] + conn.commit() + finally: cursor.close() + except Exception as e: + print("An error occured creating a user: {0}".format(e)) + raise + finally: conn.close() + return user_id + + def get_new_images(self, number_of_images, user_id): + if number_of_images <= 0: + raise ArgumentException("Parameter must be greater than zero") + + selected_images_to_tag = {} + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + query = ("SELECT b.ImageId, b.ImageLocation, a.TagStateId FROM Image_Tagging_State a " + "JOIN Image_Info b ON a.ImageId = b.ImageId WHERE a.TagStateId = 1 order by " + "a.createddtim DESC limit {0}") + cursor.execute(query.format(number_of_images)) + for row in cursor: + print('Image Id: {0} \t\tImage Name: {1} \t\tTag State: {2}'.format(row[0], row[1], row[2])) + selected_images_to_tag[str(row[0])] = str(row[1]) + self._update_images(selected_images_to_tag,ImageTagState.TAG_IN_PROGRESS, user_id, conn) + finally: cursor.close() + except Exception as e: + print("An errors occured getting images: {0}".format(e)) + raise + finally: conn.close() + return selected_images_to_tag.values() + + def add_new_images(self,list_of_image_infos, user_id): + + if type(user_id) is not int: + raise TypeError('user id must be an integer') + + url_to_image_id_map = {} + if(len(list_of_image_infos) > 0): + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + for img in list(list_of_image_infos): + query = ("INSERT INTO Image_Info (OriginalImageName,ImageLocation,Height,Width,CreatedByUser) " + "VALUES ('{0}','{1}',{2},{3},{4}) RETURNING ImageId;") + cursor.execute(query.format(img.image_name,img.image_location,str(img.height),str(img.width),user_id)) + new_img_id = cursor.fetchone()[0] + url_to_image_id_map[img.image_location] = new_img_id + conn.commit() + finally: cursor.close() + print("Inserted {0} images to the DB".format(len(url_to_image_id_map))) + except Exception as e: + print("An errors occured getting image ids: {0}".format(e)) + raise + finally: conn.close() + return url_to_image_id_map + + def update_untagged_images(self,list_of_image_ids, user_id): + #TODO: Make sure the image ids are in a TAG_IN_PROGRESS state + self._update_images(list_of_image_ids,ImageTagState.INCOMPLETE_TAG,user_id, self._db_provider.get_connection()) + print("Updated {0} image(s) to the state {1}".format(len(list_of_image_ids),ImageTagState.INCOMPLETE_TAG.name)) + + def _update_images(self, list_of_image_ids, new_image_tag_state, user_id, conn): + if not isinstance(new_image_tag_state, ImageTagState): + raise TypeError('new_image_tag_state must be an instance of Direction Enum') + + if type(user_id) is not int: + raise TypeError('user id must be an integer') + + if not conn: + conn = self._db_provider.get_connection() + + try: + if(len(list_of_image_ids) > 0): + cursor = conn.cursor() + try: + image_ids_as_strings = [str(i) for i in list_of_image_ids] + images_to_update = '{0}'.format(', '.join(image_ids_as_strings)) + query = "UPDATE Image_Tagging_State SET TagStateId = {0}, ModifiedByUser = {2}, ModifiedDtim = now() WHERE ImageId IN ({1})" + cursor.execute(query.format(new_image_tag_state,images_to_update,user_id)) + conn.commit() + finally: cursor.close() + else: + print("No images to update") + except Exception as e: + print("An errors occured updating images: {0}".format(e)) + raise + + def update_image_urls(self,image_id_to_url_map, user_id): + if type(user_id) is not int: + raise TypeError('user id must be an integer') + + if(len(image_id_to_url_map.items())): + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + for image_id, new_url in image_id_to_url_map.items(): + cursor = conn.cursor() + query = "UPDATE Image_Info SET ImageLocation = '{0}', ModifiedDtim = now() WHERE ImageId = {1}" + cursor.execute(query.format(new_url,image_id)) + conn.commit() + print("Updated ImageId: {0} to new ImageLocation: {1}".format(image_id,new_url)) + self._update_images([image_id],ImageTagState.READY_TO_TAG, user_id,conn) + print("ImageId: {0} to has a new state: {1}".format(image_id,ImageTagState.READY_TO_TAG.name)) + finally: cursor.close() + except Exception as e: + print("An errors occured updating image urls: {0}".format(e)) + raise + finally: conn.close() + + def update_tagged_images(self,list_of_image_tags, user_id): + if(not list_of_image_tags): + return + + if type(user_id) is not int: + raise TypeError('user id must be an integer') + + groups_by_image_id = itertools.groupby(list_of_image_tags, key=lambda it:it.image_id) + try: + conn = self._db_provider.get_connection() + try: + cursor = conn.cursor() + for img_id, list_of_tags in groups_by_image_id: + for img_tag in list(list_of_tags): + query = ("with iti AS ( " + "INSERT INTO image_tags (ImageId, X_Min,X_Max,Y_Min,Y_Max,CreatedByUser) " + "VALUES ({0}, {1},{2},{3},{4},{5}) " + "RETURNING ImageTagId), " + "ci AS ( " + "INSERT INTO classification_info (ClassificationName) " + "VALUES {6} " + "ON CONFLICT (ClassificationName) DO UPDATE SET ClassificationName=EXCLUDED.ClassificationName " + "RETURNING (SELECT iti.ImageTagId FROM iti), ClassificationId) " + "INSERT INTO tags_classification (ImageTagId,ClassificationId) " + "SELECT imagetagid,classificationid from ci;") + classifications = ", ".join("('{0}')".format(name) for name in img_tag.classification_names) + cursor.execute(query.format(img_tag.image_id,img_tag.x_min,img_tag.x_max,img_tag.y_min,img_tag.y_max,user_id,classifications)) + self._update_images([img_id],ImageTagState.COMPLETED_TAG,user_id,conn) + conn.commit() + print("Updated {0} image tags".format(len(list_of_image_tags))) + finally: cursor.close() + except Exception as e: + print("An errors occured updating tagged image: {0}".format(e)) + raise + finally: conn.close() + +class ArgumentException(Exception): + pass + +def main(): + #Replace me for testing + db_config = DatabaseInfo("","","","") + data_access = ImageTagDataAccess(PostGresProvider(db_config)) + user_id = data_access.create_user(getpass.getuser()) + print("The user id for '{0}' is {1}".format(getpass.getuser(),user_id)) + + list_of_image_infos = generate_test_image_infos(5) + url_to_image_id_map = data_access.add_new_images(list_of_image_infos,user_id) + + image_tags = generate_test_image_tags(list(url_to_image_id_map.values()),4,4) + data_access.update_tagged_images(image_tags,user_id) + +TestClassifications = ("maine coon","german shephard","goldfinch","mackerel"," african elephant","rattlesnake") + +def generate_test_image_infos(count): + list_of_image_infos = [] + for i in range(count): + file_name = "{0}.jpg".format(id_generator(size=random.randint(4,10))) + image_location = "https://mock-storage.blob.core.windows.net/new-uploads/{0}".format(file_name) + img = ImageInfo(file_name,image_location,random.randint(100,600),random.randint(100,600)) + list_of_image_infos.append(img) + return list_of_image_infos + +def generate_test_image_tags(list_of_image_ids,max_tags_per_image,max_classifications_per_tag): + list_of_image_tags = [] + #round(random.uniform(1,2), N)) + for image_id in list(list_of_image_ids): + tags_per_image = random.randint(1,max_tags_per_image) + for i in range(tags_per_image): + x_min = random.uniform(50,300) + x_max = random.uniform(x_min,300) + y_min = random.uniform(50,300) + y_max = random.uniform(y_min,300) + classifications_per_tag = random.randint(1,max_classifications_per_tag) + image_tag = ImageTag(image_id,x_min,x_max,y_min,y_max,random.sample(TestClassifications,classifications_per_tag)) + list_of_image_tags.append(image_tag) + return list_of_image_tags + +def id_generator(size=6, chars=string.ascii_uppercase + string.digits): + return ''.join(random.choice(chars) for _ in range(size)) + +if __name__ == "__main__": + main() + diff --git a/functions/pipeline/shared/db_provider.py b/functions/pipeline/shared/db_provider.py new file mode 100644 index 00000000..430da3e0 --- /dev/null +++ b/functions/pipeline/shared/db_provider.py @@ -0,0 +1,40 @@ +import pg8000 +#import pyodbc + +class DatabaseInfo(object): + def __init__(self, db_host_name, db_name, db_user_name, db_password): + self.db_host_name = db_host_name + self.db_name = db_name + self.db_user_name = db_user_name + self.db_password = db_password + +class DBProvider(object): + def __new_connection(self,host_name,db_name,db_user,db_pass): pass + def get_connection(self): pass + def cursor(self):pass + def execute(self, query):pass + +class PostGresProvider(DBProvider): + + def __init__(self, database_info): + self.database_info = database_info + + def __new_connection(self,host_name,db_name,db_user,db_pass): + return pg8000.connect(db_user, host=host_name, unix_sock=None, port=5432, database=db_name, password=db_pass, ssl=True, timeout=None, application_name=None) + + def get_connection(self): + #self.connection = + return self.__new_connection(self.database_info.db_host_name,self.database_info.db_name,self.database_info.db_user_name,self.database_info.db_password) + +''' +class MSSqlProvider(DBProvider): + DRIVER= '{ODBC Driver 17 for SQL Server}' + def __init__(self, database_info): + self.database_info = database_info + + def __new_connection(self,host_name,db_name,db_user,db_pass): + return pyodbc.connect('DRIVER='+self.DRIVER+';PORT=1433;SERVER='+host_name+';PORT=1443;DATABASE='+db_name+';UID='+db_user+';PWD='+ db_pass) + + def get_connection(self): + return self.__new_connection(self.database_info.db_host_name,self.database_info.db_name,self.database_info.db_user_name,self.database_info.db_password) +''' \ No newline at end of file diff --git a/functions/pipeline/shared/test_db_access_v2.py b/functions/pipeline/shared/test_db_access_v2.py new file mode 100644 index 00000000..f51441c5 --- /dev/null +++ b/functions/pipeline/shared/test_db_access_v2.py @@ -0,0 +1,104 @@ +import unittest +from unittest.mock import patch +from unittest.mock import Mock + +from db_provider import( + DatabaseInfo, + DBProvider +) +from db_access_v2 import( + ImageTagDataAccess, + ArgumentException, + ImageTagState, + generate_test_image_infos +# _update_images, +# create_user, +# get_image_ids_for_new_images, +# get_new_images +) + +class MockConnection: + def _mock_cursor(self): + self.fetchCount=5 + + def fetchone(): + if (self.fetchCount): + self.fetchCount = self.fetchCount-1 + return (["A","B"]) + return None + + def execute(query): + return + + test = Mock() + test.execute = execute + test.fetchone = fetchone + return test + + def cursor(self): + return self._mock_cursor() + +class MockDBProvider: + def __init__(self, fail = False): + self.fail = fail + + def get_connection(self): + if self.fail: + raise Exception + return MockConnection() + +class TestImageTagDataAccess(unittest.TestCase): + def test_connection(self): + print("Running...") + data_access = ImageTagDataAccess(MockDBProvider()) + data_access.test_connection() + self.assertEqual(5, 5) + + def test_create_user_empty_string(self): + with self.assertRaises(ArgumentException): + data_access = ImageTagDataAccess(MockDBProvider()) + data_access.create_user('') + + def test_create_user_db_error(self): + with self.assertRaises(Exception): + data_access = ImageTagDataAccess(MockDBProvider(fail=True)) + data_access.create_user('MyUserName') + + def test_update_image_bad_image_state(self): + with self.assertRaises(TypeError): + data_access = ImageTagDataAccess(MockDBProvider()) + data_access._update_images((),"I should be an enum",1,None) + + def test_update_image_db_error(self): + with self.assertRaises(Exception): + data_access = ImageTagDataAccess(MockDBProvider(fail=True)) + data_access._update_images((),ImageTagState.READY_TO_TAG,1,None) + + def test_get_new_images_bad_request(self): + with self.assertRaises(ArgumentException): + data_access = ImageTagDataAccess(MockDBProvider()) + num_of_images = -5 + data_access.get_new_images(num_of_images,5) + + def test_add_new_images_user_id_type_error(self): + with self.assertRaises(TypeError): + data_access = ImageTagDataAccess(MockDBProvider()) + data_access.add_new_images((),"I should be an integer") + + def test_add_new_images_connection_error(self): + with self.assertRaises(Exception): + data_access = ImageTagDataAccess(MockDBProvider(fail=True)) + data_access.add_new_images(generate_test_image_infos(5),10) + + # def test_add_new_images_cursor_error(self): + # with self.assertRaises(Exception): + # data_access = ImageTagDataAccess(MockDBProvider(fail=True)) + # data_access.add_new_images(generate_test_image_infos(5),10) + + def test_update_image_urls_user_id_type_error(self): + with self.assertRaises(TypeError): + data_access = ImageTagDataAccess(MockDBProvider()) + data_access.update_image_urls((),"I should be an integer") + +if __name__ == '__main__': + unittest.main() From 1df6d6613df26dae17b47de51bea4ac9e34899d1 Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 8 Nov 2018 09:56:04 -0800 Subject: [PATCH 11/31] Added comments --- functions/pipeline/shared/db_access_v2.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/functions/pipeline/shared/db_access_v2.py b/functions/pipeline/shared/db_access_v2.py index b2850b90..cae79d6e 100644 --- a/functions/pipeline/shared/db_access_v2.py +++ b/functions/pipeline/shared/db_access_v2.py @@ -210,7 +210,16 @@ def update_tagged_images(self,list_of_image_tags, user_id): class ArgumentException(Exception): pass + def main(): + ################################################################# + # This main method is an example of how to use some of + # the ImageTagDataAccess methods. For instance: + # Creating a User + # Onboarding of new images + # Checking in images been tagged + ################################################################# + #Replace me for testing db_config = DatabaseInfo("","","","") data_access = ImageTagDataAccess(PostGresProvider(db_config)) @@ -236,7 +245,6 @@ def generate_test_image_infos(count): def generate_test_image_tags(list_of_image_ids,max_tags_per_image,max_classifications_per_tag): list_of_image_tags = [] - #round(random.uniform(1,2), N)) for image_id in list(list_of_image_ids): tags_per_image = random.randint(1,max_tags_per_image) for i in range(tags_per_image): From c3949cbd4795fd559adcacae6229185d81d71eab Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 8 Nov 2018 10:57:40 -0800 Subject: [PATCH 12/31] Fixed issue --- db/install-db-resources.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/install-db-resources.py b/db/install-db-resources.py index 14114b97..702fc8e6 100644 --- a/db/install-db-resources.py +++ b/db/install-db-resources.py @@ -106,10 +106,10 @@ def main(db_name, overwrite_db): if(os.getenv("DB_HOST") is None or os.getenv("DB_USER") is None or os.getenv("DB_PASS") is None): print("Please set environment variables for DB_HOST, DB_USER, DB_PASS") return - + if (database_exists(get_default_connection(), db_name) and overwrite_db): remove_database(get_default_connection(),db_name) - else: + elif (database_exists(get_default_connection(), db_name) and not overwrite_db): print("Database {0} already exists. Please see --help for overwrite option.".format(db_name)) return From b9b076a01dbf8be34806eef191a814817d95bee4 Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 8 Nov 2018 12:19:46 -0800 Subject: [PATCH 13/31] Changed query formatting in some areas --- db/install-db-resources.py | 4 ++-- functions/pipeline/shared/db_access_v2.py | 15 ++++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/db/install-db-resources.py b/db/install-db-resources.py index 702fc8e6..2088c893 100644 --- a/db/install-db-resources.py +++ b/db/install-db-resources.py @@ -29,8 +29,8 @@ def execute_queries_from_map(conn, file_query_map): def database_exists(conn, db_name): if db_name: cursor = conn.cursor() - query = "SELECT 1 FROM pg_database WHERE datname='{0}'" - cursor.execute(query.format(db_name)) + query = "SELECT 1 FROM pg_database WHERE datname=%s" + cursor.execute(query,(db_name,)) row = cursor.fetchone() if row: return int(row[0]) == 1 diff --git a/functions/pipeline/shared/db_access_v2.py b/functions/pipeline/shared/db_access_v2.py index cae79d6e..ddd7819f 100644 --- a/functions/pipeline/shared/db_access_v2.py +++ b/functions/pipeline/shared/db_access_v2.py @@ -56,8 +56,8 @@ def create_user(self,user_name): conn = self._db_provider.get_connection() try: cursor = conn.cursor() - query = "INSERT INTO User_Info (UserName) VALUES ('{0}') ON CONFLICT (username) DO UPDATE SET username=EXCLUDED.username RETURNING UserId;" - cursor.execute(query.format(user_name)) + query = "INSERT INTO User_Info (UserName) VALUES (%s) ON CONFLICT (username) DO UPDATE SET username=EXCLUDED.username RETURNING UserId;" + cursor.execute(query,(user_name,)) user_id = cursor.fetchone()[0] conn.commit() finally: cursor.close() @@ -104,8 +104,8 @@ def add_new_images(self,list_of_image_infos, user_id): cursor = conn.cursor() for img in list(list_of_image_infos): query = ("INSERT INTO Image_Info (OriginalImageName,ImageLocation,Height,Width,CreatedByUser) " - "VALUES ('{0}','{1}',{2},{3},{4}) RETURNING ImageId;") - cursor.execute(query.format(img.image_name,img.image_location,str(img.height),str(img.width),user_id)) + "VALUES (%s,%s,%s,%s,%s) RETURNING ImageId;") + cursor.execute(query,(img.image_name,img.image_location,img.height,img.width,user_id)) new_img_id = cursor.fetchone()[0] url_to_image_id_map[img.image_location] = new_img_id conn.commit() @@ -138,8 +138,8 @@ def _update_images(self, list_of_image_ids, new_image_tag_state, user_id, conn): try: image_ids_as_strings = [str(i) for i in list_of_image_ids] images_to_update = '{0}'.format(', '.join(image_ids_as_strings)) - query = "UPDATE Image_Tagging_State SET TagStateId = {0}, ModifiedByUser = {2}, ModifiedDtim = now() WHERE ImageId IN ({1})" - cursor.execute(query.format(new_image_tag_state,images_to_update,user_id)) + query = "UPDATE Image_Tagging_State SET TagStateId = %s, ModifiedByUser = %s, ModifiedDtim = now() WHERE ImageId IN (%s)" + cursor.execute(query,(new_image_tag_state,user_id,images_to_update)) conn.commit() finally: cursor.close() else: @@ -171,6 +171,7 @@ def update_image_urls(self,image_id_to_url_map, user_id): raise finally: conn.close() + #TODO: Do safer query string formatting def update_tagged_images(self,list_of_image_tags, user_id): if(not list_of_image_tags): return @@ -221,7 +222,7 @@ def main(): ################################################################# #Replace me for testing - db_config = DatabaseInfo("","","","") + db_config = DatabaseInfo("abrig-db.postgres.database.azure.com","micro","abrigtest@abrig-db","abcdABCD123") data_access = ImageTagDataAccess(PostGresProvider(db_config)) user_id = data_access.create_user(getpass.getuser()) print("The user id for '{0}' is {1}".format(getpass.getuser(),user_id)) From c55ccec92e273371c5a8044dc90180e2b54e2b78 Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 8 Nov 2018 12:26:21 -0800 Subject: [PATCH 14/31] Removing unused files --- functions/pipeline/shared/data_access.py | 70 ------------------------ functions/upload/__init__.py | 53 ------------------ functions/upload/function.json | 20 ------- functions/upload/host.json | 2 - 4 files changed, 145 deletions(-) delete mode 100644 functions/pipeline/shared/data_access.py delete mode 100644 functions/upload/__init__.py delete mode 100644 functions/upload/function.json delete mode 100644 functions/upload/host.json diff --git a/functions/pipeline/shared/data_access.py b/functions/pipeline/shared/data_access.py deleted file mode 100644 index 9f390c20..00000000 --- a/functions/pipeline/shared/data_access.py +++ /dev/null @@ -1,70 +0,0 @@ -# DEPRECATED - Moving all database access components into db_access.py -# Saving for notes for now.... - -# import logging -# import json -# import azure.functions as func # Getting an error - E0401: Unable to import -# import os -# import pg8000 # Getting an error - E0401: Unable to import - -# # Constants -# untagged_state = 1 -# tagging_state = 2 -# tagged_state = 3 - -# host = os.getenv('DB_HOST', None) -# user = os.getenv('DB_USER', None) -# dbname = os.getenv('DB_NAME', None) -# password = os.getenv('DB_PASS', None) - -# # Creates rows in the database for new images uploaded to blob storage. -# # Requires: TBD -# # Receives: List of new image names, user id of the user performing the upload -# # Returns: None. -# # TODO: Clarify if input list is a list of original image names or new ImageIds -# # Code: - -# # get_unvisited_items -# # # Retrieves a number of untagged images for the user to tag. -# # Requires: Assumes that the CLI (the requestor) handles setting a cap on the number of images that may be requested. -# # For now, assumes the user wants to use a LIFO strategy, and pull newest images first. -# # Receives: num_images = Number of images the user wants to download for tagging -# # Returns: List of URLs to the selected images (in blob storage) that need to be tagged. Status for these images is updated to "tagging" status in the DB. -# # TODO: Future inputs: Strategy for selecting images (enum), user_id for tracking which user made the request (string) -# def get_unvisited_items(num_images): -# # Connect to database -# # TODO: Add error handling -# db = pg8000.connect(user, host=host, unix_sock=None, port=5432, database=dbname, password=password, ssl=True, timeout=None, application_name=None) -# cursor = db.cursor() - -# # From the database, select the number of images the user requested where tag state is "untagged" -# cursor.execute("SELECT b.ImageId, b.OriginalImageName, a.TagStateId, b.ImageLocation FROM Image_Tagging_State a JOIN Image_Info b ON a.ImageId = b.ImageId WHERE a.TagStateId = {0} order by a.createddtim DESC limit {1}".format(untagged_state, num_images)) - -# # Put the ImageId and ImageLocation (URL) for the images to tag into a dictionary named selected_images_to_tag -# selected_images_to_tag = {} -# for row in cursor: -# print('Image Id: {0} \t\tImage Name: {1} \t\tTag State: {2}'.format(row[0], row[1], row[2])) -# selected_images_to_tag[str(row[0])] = str(row[3]) - -# # If there are images in the list, update the tagging state for the selected images from "untagged" to "tagging" state -# # If there are no images left to tag, output a message to the user -# # TODO: Separate this code out into an "update" helper function? -# if(len(selected_images_to_tag) > 0): -# images_to_update = '{0}'.format(', '.join(selected_images_to_tag.keys())) -# cursor.execute("UPDATE Image_Tagging_State SET TagStateId = {0} WHERE ImageId IN ({1})".format(tagging_state,images_to_update)) -# db.commit() -# print(f"Updated {len(selected_images_to_tag)} images to the state {tagging_state}") -# else: -# print("No images untagged images left!") - -# # Return the list of URLs to the user (values in the selected_images_to_tag dictionary) -# return list(selected_images_to_tag.values()) - -# # TODO: Create helper function to update status of a list of images in the DB from one state to another. - -# # get_unvisited_items (count of items, strategy enum, user id) -# # Returns a list of image locations -# # update_visited_items (List of visited image names, user id) -# # void -# # update_unvisited_items (List of unvisited image names, user id) -# # Void diff --git a/functions/upload/__init__.py b/functions/upload/__init__.py deleted file mode 100644 index fc1bd0fb..00000000 --- a/functions/upload/__init__.py +++ /dev/null @@ -1,53 +0,0 @@ -import os -import json - -# TODO: Move this into a library -def get_id_from_filename(filename): - path_components = filename.split('/') - filename = path_components[-1] - return int(filename.split('.')[0]) - -# TODO: Move this into a library -def process_vott_json(json): - all_frame_data = json['frames'] - - # Scrub filename keys to only have integer Id, drop path and file extensions. - id_to_tags_dict = {} - for full_path_key in sorted(all_frame_data.keys()): - id_to_tags_dict[get_id_from_filename(full_path_key)] = all_frame_data[full_path_key] - all_ids = id_to_tags_dict.keys() - - # Do the same with visitedFrames - visited_ids = sorted(json['visitedFrames']) - for index, filename in enumerate(visited_ids): - visited_ids[index] = get_id_from_filename(filename) - - # Unvisisted imageIds - unvisited_ids = sorted(list(set(all_ids) - set(visited_ids))) - - return { - "totalNumImages" : len(all_ids), - "numImagesVisted" : len(visited_ids), - "numImagesNotVisted" : len(unvisited_ids), - "imagesVisited" : visited_ids, - "imageNotVisisted" : unvisited_ids - } - -def main(): - try: - vott_json = json.loads(open(os.environ['req']).read()) - - stats = process_vott_json(vott_json) - # TODO: Call interface to update imagesVisited to 'COMPLETED_TAG' state and imageNotVisisted to 'INCOMPLETE_TAG' - - response = open(os.environ['res'], 'w') - response.write(str(stats)) - response.close() - except Exception as e: - response = open(os.environ['res'], 'w') - # TODO: Add error status code and proper message? - response.write(str(e)) - response.close() - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/functions/upload/function.json b/functions/upload/function.json deleted file mode 100644 index d841bddd..00000000 --- a/functions/upload/function.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "bindings": [ - { - "authLevel": "anonymous", - "type": "httpTrigger", - "direction": "in", - "name": "req", - "methods": [ - "post", - "put" - ] - }, - { - "type": "http", - "direction": "out", - "name": "res" - } - ], - "disabled": false - } \ No newline at end of file diff --git a/functions/upload/host.json b/functions/upload/host.json deleted file mode 100644 index 7a73a41b..00000000 --- a/functions/upload/host.json +++ /dev/null @@ -1,2 +0,0 @@ -{ -} \ No newline at end of file From 75b6e809e282971377d819f86b51644b567d6d46 Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 8 Nov 2018 12:50:45 -0800 Subject: [PATCH 15/31] Removed __init__.py that was causing pytest issues. Updated yaml. --- azure-pipelines.yml | 2 +- functions/pipeline/shared/__init__.py | 0 functions/pipeline/shared/test_db_access_v2.py | 4 ---- 3 files changed, 1 insertion(+), 5 deletions(-) delete mode 100644 functions/pipeline/shared/__init__.py diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 12e6fd2f..cad9ddcd 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -29,7 +29,7 @@ jobs: - script: | pip install pytest - pytest cli --doctest-modules --junitxml=junit/test-results.xml + pytest cli functions --doctest-modules --junitxml=junit/test-results.xml displayName: 'pytest' - task: PublishTestResults@2 diff --git a/functions/pipeline/shared/__init__.py b/functions/pipeline/shared/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/functions/pipeline/shared/test_db_access_v2.py b/functions/pipeline/shared/test_db_access_v2.py index f51441c5..d57a284e 100644 --- a/functions/pipeline/shared/test_db_access_v2.py +++ b/functions/pipeline/shared/test_db_access_v2.py @@ -2,10 +2,6 @@ from unittest.mock import patch from unittest.mock import Mock -from db_provider import( - DatabaseInfo, - DBProvider -) from db_access_v2 import( ImageTagDataAccess, ArgumentException, From d0c7483185e300ca3e6918af4711435e6e57ec40 Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 8 Nov 2018 13:37:37 -0800 Subject: [PATCH 16/31] Added packages --- requirements.txt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 92324a07..9de0ccd9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,9 @@ +azure-functions==1.0.0a5 +azure-functions-worker==1.0.0a6 +azure-storage +grpcio==1.14.2 +grpcio-tools==1.14.2 +protobuf==3.6.1 requests -azure-storage \ No newline at end of file +six==1.11.0 +pg8000==1.12.3 From 2d4b8578d31e08f6427d01a791db600becf83b2c Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 8 Nov 2018 13:41:10 -0800 Subject: [PATCH 17/31] Fixed format issue --- functions/pipeline/shared/db_access.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/functions/pipeline/shared/db_access.py b/functions/pipeline/shared/db_access.py index e9530e27..79623f46 100644 --- a/functions/pipeline/shared/db_access.py +++ b/functions/pipeline/shared/db_access.py @@ -42,7 +42,7 @@ def get_images_for_tagging(conn, num_images): images_to_update = '{0}'.format(', '.join(selected_images_to_tag.keys())) cursor.execute("UPDATE Image_Tagging_State SET TagStateId = {0} WHERE ImageId IN ({1})".format(ImageTagState.TAG_IN_PROGRESS, images_to_update)) conn.commit() - print(f"Updated {len(selected_images_to_tag)} images to the state {ImageTagState.TAG_IN_PROGRESS}") + print("Updated {len(selected_images_to_tag)} images to the state {0}".format(ImageTagState.TAG_IN_PROGRESS)) else: print("No images untagged images left!") # Return the list of URLs to the user (values in the selected_images_to_tag dictionary) From bc270b28798591cb58733f01a9c397e97e67ac35 Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 8 Nov 2018 13:46:08 -0800 Subject: [PATCH 18/31] Fixing more linting issues --- functions/pipeline/shared/db_access.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/functions/pipeline/shared/db_access.py b/functions/pipeline/shared/db_access.py index 79623f46..8c444ffe 100644 --- a/functions/pipeline/shared/db_access.py +++ b/functions/pipeline/shared/db_access.py @@ -53,11 +53,11 @@ def __new_postgres_connection(host_name, db_name, db_user, db_pass): def update_tagged_images(conn, list_of_image_ids): __update_images(conn, list_of_image_ids, ImageTagState.COMPLETED_TAG) - print(f"Updated {len(list_of_image_ids)} image(s) to the state {ImageTagState.COMPLETED_TAG.name}") + print("Updated {0} image(s) to the state {1}".format(len(list_of_image_ids),ImageTagState.COMPLETED_TAG.name)) def update_untagged_images(conn, list_of_image_ids): __update_images(conn, list_of_image_ids, ImageTagState.INCOMPLETE_TAG) - print(f"Updated {len(list_of_image_ids)} image(s) to the state {ImageTagState.INCOMPLETE_TAG.name}") + print("Updated {0} image(s) to the state {1}".format(len(list_of_image_ids),ImageTagState.INCOMPLETE_TAG.name)) def __update_images(conn, list_of_image_ids, new_image_tag_state): if not isinstance(new_image_tag_state, ImageTagState): From 6cca44ab7f6b0dfa57e7aa365991a1acc72c418d Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 8 Nov 2018 16:04:23 -0800 Subject: [PATCH 19/31] Removing 3.5 from matrix --- azure-pipelines.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index cad9ddcd..36eb3f15 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -10,8 +10,9 @@ jobs: vmImage: 'Ubuntu 16.04' strategy: matrix: - Python35: - python.version: '3.5' + #Azure Functions Python preview doesn't support 3.5 + #Python35: + # python.version: '3.5' Python36: python.version: '3.6' Python37: From 7f794fb4b4b391a366b5310fde437d9b47b7442c Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 8 Nov 2018 17:07:13 -0800 Subject: [PATCH 20/31] Changes to enforce Tagging_User as a key in the CLI config --- cli/operations.py | 11 +++++++---- cli/test_operations.py | 4 +++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/cli/operations.py b/cli/operations.py index f78585cb..894668a3 100644 --- a/cli/operations.py +++ b/cli/operations.py @@ -20,6 +20,7 @@ TAGGING_SECTION = 'TAGGING' TAGGING_LOCATION_KEY = 'TAGGING_LOCATION' +TAGGING_USER_KEY = 'TAGGING_USER' DEFAULT_NUM_IMAGES = 40 @@ -245,11 +246,12 @@ def storage_config_section(storage_config_section): def tagging_config_section(tagging_config_section): tagging_location_value = tagging_config_section.get(TAGGING_LOCATION_KEY) + tagging_user_value = tagging_config_section.get(TAGGING_USER_KEY) - if not tagging_location_value: + if not tagging_location_value or not tagging_user_value: raise MissingConfigException() - return tagging_location_value + return tagging_location_value, tagging_user_value def read_config_with_parsed_config(parser): @@ -272,7 +274,7 @@ def read_config_with_parsed_config(parser): parser[STORAGE_SECTION] ) - tagging_location = tagging_config_section(parser[TAGGING_SECTION]) + tagging_location, tagging_user = tagging_config_section(parser[TAGGING_SECTION]) return { "key": functions_key, @@ -280,5 +282,6 @@ def read_config_with_parsed_config(parser): "storage_account": storage_account, "storage_key": storage_key, "storage_container": storage_container, - "tagging_location": tagging_location + "tagging_location": tagging_location, + "tagging_user": tagging_user } diff --git a/cli/test_operations.py b/cli/test_operations.py index a2b0c218..dd6faa93 100644 --- a/cli/test_operations.py +++ b/cli/test_operations.py @@ -21,6 +21,7 @@ STORAGE_CONTAINER, TAGGING_SECTION, TAGGING_LOCATION_KEY, + TAGGING_USER_KEY, functions_config_section, storage_config_section, tagging_config_section, @@ -107,7 +108,8 @@ def test_acceptable_config(self): FUNCTIONS_URL: "test" }, TAGGING_SECTION: { - TAGGING_LOCATION_KEY: "test" + TAGGING_LOCATION_KEY: "test", + TAGGING_USER_KEY: "test" } } ) From ab9ed706158bec5cc304d82998f9ad83c1e938cb Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 1 Nov 2018 19:30:30 -0700 Subject: [PATCH 21/31] Updated requirements.txt. Fixed pylint issues. Updated test gen script --- db/postgres-client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/postgres-client.py b/db/postgres-client.py index 92ea5fe4..fe36cf72 100644 --- a/db/postgres-client.py +++ b/db/postgres-client.py @@ -287,4 +287,4 @@ def main(num_of_images,user_name): if (len(sys.argv) != 3): print("Usage: {0} (Number of Images) (User Name)".format(sys.argv[0])) else: - main(int(sys.argv[1]), str(sys.argv[2])) \ No newline at end of file + main(int(sys.argv[1]), str(sys.argv[2])) From b96b87221f7b2b273250f9ecc40a5ecb0dff0a39 Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Sat, 3 Nov 2018 14:53:11 -0700 Subject: [PATCH 22/31] Added 2 new tables. Renamed tables to support ordered deploy --- db/install-db-resources.py | 2 +- db/tables/000_image_info.sql | 10 ++++++++++ db/tables/010_image_tagging_state.sql | 7 +++++++ db/tables/010_image_tags.sql | 13 +++++++++++++ db/tables/010_tags_classification.sql | 10 ++++++++++ 5 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 db/tables/000_image_info.sql create mode 100644 db/tables/010_image_tagging_state.sql create mode 100644 db/tables/010_image_tags.sql create mode 100644 db/tables/010_tags_classification.sql diff --git a/db/install-db-resources.py b/db/install-db-resources.py index 2088c893..a10af426 100644 --- a/db/install-db-resources.py +++ b/db/install-db-resources.py @@ -140,4 +140,4 @@ def main(db_name, overwrite_db): args = parser.parse_args() database_name = args.database_name - main(args.database_name,args.overwrite) \ No newline at end of file + main(args.database_name,args.overwrite) diff --git a/db/tables/000_image_info.sql b/db/tables/000_image_info.sql new file mode 100644 index 00000000..b094550b --- /dev/null +++ b/db/tables/000_image_info.sql @@ -0,0 +1,10 @@ +-- Set up table and autoincrementing primary key +CREATE TABLE Image_Info ( + ImageId SERIAL PRIMARY KEY, + OriginalImageName text NOT NULL, + ImageLocation text, + Height integer NOT NULL, + Width integer NOT NULL, + ModifiedDtim timestamp NOT NULL default current_timestamp, + CreatedDtim timestamp NOT NULL default current_timestamp +); \ No newline at end of file diff --git a/db/tables/010_image_tagging_state.sql b/db/tables/010_image_tagging_state.sql new file mode 100644 index 00000000..dbe230ef --- /dev/null +++ b/db/tables/010_image_tagging_state.sql @@ -0,0 +1,7 @@ +-- Set up table and autoincrementing primary key +CREATE TABLE Image_Tagging_State ( + ImageId integer REFERENCES Image_Info(ImageId), + TagStateId integer NOT NULL, + ModifiedDtim timestamp NOT NULL default current_timestamp, + CreatedDtim timestamp NOT NULL default current_timestamp +); \ No newline at end of file diff --git a/db/tables/010_image_tags.sql b/db/tables/010_image_tags.sql new file mode 100644 index 00000000..bcdc2de6 --- /dev/null +++ b/db/tables/010_image_tags.sql @@ -0,0 +1,13 @@ +-- Set up table +CREATE TABLE Image_Tags ( + ImageTagId integer NOT NULL UNIQUE, + ImageId integer REFERENCES Image_Info(ImageId) ON DELETE RESTRICT, + --ClassificationId text NOT NULL, --Needed? + --Confidence double precision NOT NULL, --Needed? + X_Min integer NOT NULL, + X_Max integer NOT NULL, + Y_Min integer NOT NULL, + Y_Max integer NOT NULL, + --VOTT_Data json NOT NULL + PRIMARY KEY (ImageTagId,ImageId) +); \ No newline at end of file diff --git a/db/tables/010_tags_classification.sql b/db/tables/010_tags_classification.sql new file mode 100644 index 00000000..f86561be --- /dev/null +++ b/db/tables/010_tags_classification.sql @@ -0,0 +1,10 @@ +-- Set up table +CREATE TABLE Tags_Classification ( + ImageTagId integer REFERENCES Image_Tags(ImageTagId) UNIQUE, + ClassificationId integer REFERENCES Classification_Info(ClassificationId), + ModifiedDtim timestamp NOT NULL default current_timestamp, + CreatedDtim timestamp NOT NULL default current_timestamp, + PRIMARY KEY (ImageTagId,ClassificationId) + --CONSTRAINT FK_IMAGE_TAG FOREIGN KEY(ImageTagId), + --CONSTRAINT FK_CLASSIFICATION FOREIGN KEY(ClassificationId) +); \ No newline at end of file From 55e1d1f7a2f1214a13a3715ee5969b9435018fa6 Mon Sep 17 00:00:00 2001 From: Andre Date: Sat, 3 Nov 2018 23:46:30 -0700 Subject: [PATCH 23/31] Added support for user creating and auditing in DB, DAL, and deployment. --- db/install-db-resources.py | 3 +++ db/tables/000_image_info.sql | 10 ---------- db/tables/010_image_tagging_state.sql | 7 ------- db/tables/010_image_tags.sql | 13 ------------- db/tables/010_tags_classification.sql | 10 ---------- db/tables/100_image_tags.sql | 16 ++++++++-------- db/tables/100_tags_classification.sql | 4 +++- 7 files changed, 14 insertions(+), 49 deletions(-) delete mode 100644 db/tables/000_image_info.sql delete mode 100644 db/tables/010_image_tagging_state.sql delete mode 100644 db/tables/010_image_tags.sql delete mode 100644 db/tables/010_tags_classification.sql diff --git a/db/install-db-resources.py b/db/install-db-resources.py index a10af426..26bf9e78 100644 --- a/db/install-db-resources.py +++ b/db/install-db-resources.py @@ -48,6 +48,7 @@ def create_database(conn, db_name): print("No database created due to empty parameter") return +<<<<<<< HEAD def remove_database(conn, db_name): if db_name: cursor = conn.cursor() @@ -60,6 +61,8 @@ def remove_database(conn, db_name): print("No database dropped due to empty parameter") return +======= +>>>>>>> Added support for user creating and auditing in DB, DAL, and deployment. def install_extensions(conn, list_of_extensions): if (len(list_of_extensions) > 0): cursor = conn.cursor() diff --git a/db/tables/000_image_info.sql b/db/tables/000_image_info.sql deleted file mode 100644 index b094550b..00000000 --- a/db/tables/000_image_info.sql +++ /dev/null @@ -1,10 +0,0 @@ --- Set up table and autoincrementing primary key -CREATE TABLE Image_Info ( - ImageId SERIAL PRIMARY KEY, - OriginalImageName text NOT NULL, - ImageLocation text, - Height integer NOT NULL, - Width integer NOT NULL, - ModifiedDtim timestamp NOT NULL default current_timestamp, - CreatedDtim timestamp NOT NULL default current_timestamp -); \ No newline at end of file diff --git a/db/tables/010_image_tagging_state.sql b/db/tables/010_image_tagging_state.sql deleted file mode 100644 index dbe230ef..00000000 --- a/db/tables/010_image_tagging_state.sql +++ /dev/null @@ -1,7 +0,0 @@ --- Set up table and autoincrementing primary key -CREATE TABLE Image_Tagging_State ( - ImageId integer REFERENCES Image_Info(ImageId), - TagStateId integer NOT NULL, - ModifiedDtim timestamp NOT NULL default current_timestamp, - CreatedDtim timestamp NOT NULL default current_timestamp -); \ No newline at end of file diff --git a/db/tables/010_image_tags.sql b/db/tables/010_image_tags.sql deleted file mode 100644 index bcdc2de6..00000000 --- a/db/tables/010_image_tags.sql +++ /dev/null @@ -1,13 +0,0 @@ --- Set up table -CREATE TABLE Image_Tags ( - ImageTagId integer NOT NULL UNIQUE, - ImageId integer REFERENCES Image_Info(ImageId) ON DELETE RESTRICT, - --ClassificationId text NOT NULL, --Needed? - --Confidence double precision NOT NULL, --Needed? - X_Min integer NOT NULL, - X_Max integer NOT NULL, - Y_Min integer NOT NULL, - Y_Max integer NOT NULL, - --VOTT_Data json NOT NULL - PRIMARY KEY (ImageTagId,ImageId) -); \ No newline at end of file diff --git a/db/tables/010_tags_classification.sql b/db/tables/010_tags_classification.sql deleted file mode 100644 index f86561be..00000000 --- a/db/tables/010_tags_classification.sql +++ /dev/null @@ -1,10 +0,0 @@ --- Set up table -CREATE TABLE Tags_Classification ( - ImageTagId integer REFERENCES Image_Tags(ImageTagId) UNIQUE, - ClassificationId integer REFERENCES Classification_Info(ClassificationId), - ModifiedDtim timestamp NOT NULL default current_timestamp, - CreatedDtim timestamp NOT NULL default current_timestamp, - PRIMARY KEY (ImageTagId,ClassificationId) - --CONSTRAINT FK_IMAGE_TAG FOREIGN KEY(ImageTagId), - --CONSTRAINT FK_CLASSIFICATION FOREIGN KEY(ClassificationId) -); \ No newline at end of file diff --git a/db/tables/100_image_tags.sql b/db/tables/100_image_tags.sql index 421cdb13..bcdc2de6 100644 --- a/db/tables/100_image_tags.sql +++ b/db/tables/100_image_tags.sql @@ -1,13 +1,13 @@ -- Set up table CREATE TABLE Image_Tags ( - ImageTagId SERIAL UNIQUE, + ImageTagId integer NOT NULL UNIQUE, ImageId integer REFERENCES Image_Info(ImageId) ON DELETE RESTRICT, - X_Min decimal(6,2) NOT NULL, - X_Max decimal(6,2) NOT NULL, - Y_Min decimal(6,2) NOT NULL, - Y_Max decimal(6,2) NOT NULL, - CreatedByUser integer REFERENCES User_Info(UserId), - CreatedDtim timestamp NOT NULL default current_timestamp, + --ClassificationId text NOT NULL, --Needed? + --Confidence double precision NOT NULL, --Needed? + X_Min integer NOT NULL, + X_Max integer NOT NULL, + Y_Min integer NOT NULL, + Y_Max integer NOT NULL, --VOTT_Data json NOT NULL - PRIMARY KEY (ImageId,X_Min,X_Max,Y_Min,Y_Max) --Should we include the bounded box as well? + PRIMARY KEY (ImageTagId,ImageId) ); \ No newline at end of file diff --git a/db/tables/100_tags_classification.sql b/db/tables/100_tags_classification.sql index a285ee2d..f86561be 100644 --- a/db/tables/100_tags_classification.sql +++ b/db/tables/100_tags_classification.sql @@ -1,8 +1,10 @@ -- Set up table CREATE TABLE Tags_Classification ( - ImageTagId integer REFERENCES Image_Tags(ImageTagId), + ImageTagId integer REFERENCES Image_Tags(ImageTagId) UNIQUE, ClassificationId integer REFERENCES Classification_Info(ClassificationId), ModifiedDtim timestamp NOT NULL default current_timestamp, CreatedDtim timestamp NOT NULL default current_timestamp, PRIMARY KEY (ImageTagId,ClassificationId) + --CONSTRAINT FK_IMAGE_TAG FOREIGN KEY(ImageTagId), + --CONSTRAINT FK_CLASSIFICATION FOREIGN KEY(ClassificationId) ); \ No newline at end of file From f156e55e16da0729d8a75c57858de8b4b1f9fd5a Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Mon, 5 Nov 2018 12:31:51 -0800 Subject: [PATCH 24/31] Adding support to check if db already exists. --- db/install-db-resources.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/db/install-db-resources.py b/db/install-db-resources.py index 26bf9e78..129276a1 100644 --- a/db/install-db-resources.py +++ b/db/install-db-resources.py @@ -27,6 +27,7 @@ def execute_queries_from_map(conn, file_query_map): return def database_exists(conn, db_name): +<<<<<<< HEAD if db_name: cursor = conn.cursor() query = "SELECT 1 FROM pg_database WHERE datname=%s" @@ -35,6 +36,16 @@ def database_exists(conn, db_name): if row: return int(row[0]) == 1 return False +======= + result = -1 + if db_name: + cursor = conn.cursor() + query = "SELECT 1 FROM pg_database WHERE datname='{0}'" + cursor.execute(query.format(db_name)) + row = cursor.fetchone() + result = int(row[0]) + return result == 1 +>>>>>>> Adding support to check if db already exists. def create_database(conn, db_name): if db_name: @@ -116,6 +127,10 @@ def main(db_name, overwrite_db): print("Database {0} already exists. Please see --help for overwrite option.".format(db_name)) return + if (database_exists(get_default_connection(), db_name) and not overwrite_db): + print("Database {0} already exists.".format(db_name)) + return + #Set up the database create_database(get_default_connection(),db_name) @@ -133,6 +148,7 @@ def main(db_name, overwrite_db): #traceback.print_exc() if __name__ == "__main__": +<<<<<<< HEAD parser = argparse.ArgumentParser() parser.add_argument('database_name', type=str, @@ -144,3 +160,16 @@ def main(db_name, overwrite_db): args = parser.parse_args() database_name = args.database_name main(args.database_name,args.overwrite) +======= + if len(sys.argv) < 2: + print("Usage: python3 {0} (DB Name) [-force]".format(sys.argv[0])) + elif len(sys.argv) == 2: + main(str(sys.argv[1]),False) + ''' + elif str(sys.argv[2]).lower() == "-force": + main(str(sys.argv[1]),True) + else: + main(str(sys.argv[1]),False) + ''' + +>>>>>>> Adding support to check if db already exists. From 71a52efd73bb6146a0d9dc0a5dfc179459dea83c Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Tue, 6 Nov 2018 08:11:41 -0800 Subject: [PATCH 25/31] Added comment --- db/install-db-resources.py | 1 + 1 file changed, 1 insertion(+) diff --git a/db/install-db-resources.py b/db/install-db-resources.py index 129276a1..f1350838 100644 --- a/db/install-db-resources.py +++ b/db/install-db-resources.py @@ -127,6 +127,7 @@ def main(db_name, overwrite_db): print("Database {0} already exists. Please see --help for overwrite option.".format(db_name)) return + #TODO: Allow overwriting of existing DB if (database_exists(get_default_connection(), db_name) and not overwrite_db): print("Database {0} already exists.".format(db_name)) return From c099e0c4108e147481a92d101389a205bdd8fabe Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Tue, 6 Nov 2018 16:00:13 -0800 Subject: [PATCH 26/31] Fixed issue in db resoruces install file. Updated table schema --- db/install-db-resources.py | 9 +++++++++ db/tables/100_image_tags.sql | 10 +++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/db/install-db-resources.py b/db/install-db-resources.py index f1350838..2f034421 100644 --- a/db/install-db-resources.py +++ b/db/install-db-resources.py @@ -27,6 +27,7 @@ def execute_queries_from_map(conn, file_query_map): return def database_exists(conn, db_name): +<<<<<<< HEAD <<<<<<< HEAD if db_name: cursor = conn.cursor() @@ -38,14 +39,22 @@ def database_exists(conn, db_name): return False ======= result = -1 +======= +>>>>>>> Fixed issue in db resoruces install file. Updated table schema if db_name: cursor = conn.cursor() query = "SELECT 1 FROM pg_database WHERE datname='{0}'" cursor.execute(query.format(db_name)) row = cursor.fetchone() +<<<<<<< HEAD result = int(row[0]) return result == 1 >>>>>>> Adding support to check if db already exists. +======= + if row: + return int(row[0]) == 1 + return False +>>>>>>> Fixed issue in db resoruces install file. Updated table schema def create_database(conn, db_name): if db_name: diff --git a/db/tables/100_image_tags.sql b/db/tables/100_image_tags.sql index bcdc2de6..ac37cba6 100644 --- a/db/tables/100_image_tags.sql +++ b/db/tables/100_image_tags.sql @@ -1,13 +1,13 @@ -- Set up table CREATE TABLE Image_Tags ( - ImageTagId integer NOT NULL UNIQUE, + ImageTagId SERIAL UNIQUE, ImageId integer REFERENCES Image_Info(ImageId) ON DELETE RESTRICT, --ClassificationId text NOT NULL, --Needed? --Confidence double precision NOT NULL, --Needed? - X_Min integer NOT NULL, - X_Max integer NOT NULL, - Y_Min integer NOT NULL, - Y_Max integer NOT NULL, + X_Min double precision NOT NULL, + X_Max double precision NOT NULL, + Y_Min double precision NOT NULL, + Y_Max double precision NOT NULL, --VOTT_Data json NOT NULL PRIMARY KEY (ImageTagId,ImageId) ); \ No newline at end of file From 678eabcd24dd8c09707125d8a503e14e2a1ab00e Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 8 Nov 2018 08:21:29 -0800 Subject: [PATCH 27/31] Updated deployment to be more idempotent. Updated table schemas --- db/install-db-resources.py | 22 +++++++++++++++++++--- db/tables/100_image_tags.sql | 14 +++++++------- db/tables/100_tags_classification.sql | 4 +--- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/db/install-db-resources.py b/db/install-db-resources.py index 2f034421..4eebaf96 100644 --- a/db/install-db-resources.py +++ b/db/install-db-resources.py @@ -69,6 +69,9 @@ def create_database(conn, db_name): return <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Updated deployment to be more idempotent. Updated table schemas def remove_database(conn, db_name): if db_name: cursor = conn.cursor() @@ -81,8 +84,11 @@ def remove_database(conn, db_name): print("No database dropped due to empty parameter") return +<<<<<<< HEAD ======= >>>>>>> Added support for user creating and auditing in DB, DAL, and deployment. +======= +>>>>>>> Updated deployment to be more idempotent. Updated table schemas def install_extensions(conn, list_of_extensions): if (len(list_of_extensions) > 0): cursor = conn.cursor() @@ -136,9 +142,10 @@ def main(db_name, overwrite_db): print("Database {0} already exists. Please see --help for overwrite option.".format(db_name)) return - #TODO: Allow overwriting of existing DB - if (database_exists(get_default_connection(), db_name) and not overwrite_db): - print("Database {0} already exists.".format(db_name)) + if (database_exists(get_default_connection(), db_name) and overwrite_db): + remove_database(get_default_connection(),db_name) + else: + print("Database {0} already exists. Please see --help for overwrite option.".format(db_name)) return #Set up the database @@ -159,6 +166,9 @@ def main(db_name, overwrite_db): if __name__ == "__main__": <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> Updated deployment to be more idempotent. Updated table schemas parser = argparse.ArgumentParser() parser.add_argument('database_name', type=str, @@ -169,6 +179,7 @@ def main(db_name, overwrite_db): args = parser.parse_args() database_name = args.database_name +<<<<<<< HEAD main(args.database_name,args.overwrite) ======= if len(sys.argv) < 2: @@ -183,3 +194,8 @@ def main(db_name, overwrite_db): ''' >>>>>>> Adding support to check if db already exists. +======= + main(args.database_name,args.overwrite) + + +>>>>>>> Updated deployment to be more idempotent. Updated table schemas diff --git a/db/tables/100_image_tags.sql b/db/tables/100_image_tags.sql index ac37cba6..421cdb13 100644 --- a/db/tables/100_image_tags.sql +++ b/db/tables/100_image_tags.sql @@ -2,12 +2,12 @@ CREATE TABLE Image_Tags ( ImageTagId SERIAL UNIQUE, ImageId integer REFERENCES Image_Info(ImageId) ON DELETE RESTRICT, - --ClassificationId text NOT NULL, --Needed? - --Confidence double precision NOT NULL, --Needed? - X_Min double precision NOT NULL, - X_Max double precision NOT NULL, - Y_Min double precision NOT NULL, - Y_Max double precision NOT NULL, + X_Min decimal(6,2) NOT NULL, + X_Max decimal(6,2) NOT NULL, + Y_Min decimal(6,2) NOT NULL, + Y_Max decimal(6,2) NOT NULL, + CreatedByUser integer REFERENCES User_Info(UserId), + CreatedDtim timestamp NOT NULL default current_timestamp, --VOTT_Data json NOT NULL - PRIMARY KEY (ImageTagId,ImageId) + PRIMARY KEY (ImageId,X_Min,X_Max,Y_Min,Y_Max) --Should we include the bounded box as well? ); \ No newline at end of file diff --git a/db/tables/100_tags_classification.sql b/db/tables/100_tags_classification.sql index f86561be..a285ee2d 100644 --- a/db/tables/100_tags_classification.sql +++ b/db/tables/100_tags_classification.sql @@ -1,10 +1,8 @@ -- Set up table CREATE TABLE Tags_Classification ( - ImageTagId integer REFERENCES Image_Tags(ImageTagId) UNIQUE, + ImageTagId integer REFERENCES Image_Tags(ImageTagId), ClassificationId integer REFERENCES Classification_Info(ClassificationId), ModifiedDtim timestamp NOT NULL default current_timestamp, CreatedDtim timestamp NOT NULL default current_timestamp, PRIMARY KEY (ImageTagId,ClassificationId) - --CONSTRAINT FK_IMAGE_TAG FOREIGN KEY(ImageTagId), - --CONSTRAINT FK_CLASSIFICATION FOREIGN KEY(ClassificationId) ); \ No newline at end of file From e588862d184de68ff6ee18186285535d3b0de8d7 Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 8 Nov 2018 10:57:40 -0800 Subject: [PATCH 28/31] Fixed issue --- db/install-db-resources.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/db/install-db-resources.py b/db/install-db-resources.py index 4eebaf96..ab4549d7 100644 --- a/db/install-db-resources.py +++ b/db/install-db-resources.py @@ -136,15 +136,18 @@ def main(db_name, overwrite_db): print("Please set environment variables for DB_HOST, DB_USER, DB_PASS") return +<<<<<<< HEAD if (database_exists(get_default_connection(), db_name) and overwrite_db): remove_database(get_default_connection(),db_name) elif (database_exists(get_default_connection(), db_name) and not overwrite_db): print("Database {0} already exists. Please see --help for overwrite option.".format(db_name)) return +======= +>>>>>>> Fixed issue if (database_exists(get_default_connection(), db_name) and overwrite_db): remove_database(get_default_connection(),db_name) - else: + elif (database_exists(get_default_connection(), db_name) and not overwrite_db): print("Database {0} already exists. Please see --help for overwrite option.".format(db_name)) return From 617732c11dd5942ec0643b79ea499c5c37b4beef Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 8 Nov 2018 12:19:46 -0800 Subject: [PATCH 29/31] Changed query formatting in some areas --- db/install-db-resources.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/install-db-resources.py b/db/install-db-resources.py index ab4549d7..bb74d595 100644 --- a/db/install-db-resources.py +++ b/db/install-db-resources.py @@ -43,8 +43,8 @@ def database_exists(conn, db_name): >>>>>>> Fixed issue in db resoruces install file. Updated table schema if db_name: cursor = conn.cursor() - query = "SELECT 1 FROM pg_database WHERE datname='{0}'" - cursor.execute(query.format(db_name)) + query = "SELECT 1 FROM pg_database WHERE datname=%s" + cursor.execute(query,(db_name,)) row = cursor.fetchone() <<<<<<< HEAD result = int(row[0]) From 6dfaa30354f3549b67af89a00824c93973eceac6 Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 8 Nov 2018 17:07:13 -0800 Subject: [PATCH 30/31] Changes to enforce Tagging_User as a key in the CLI config --- cli/operations.py | 11 +++++++---- cli/test_operations.py | 4 +++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/cli/operations.py b/cli/operations.py index f78585cb..894668a3 100644 --- a/cli/operations.py +++ b/cli/operations.py @@ -20,6 +20,7 @@ TAGGING_SECTION = 'TAGGING' TAGGING_LOCATION_KEY = 'TAGGING_LOCATION' +TAGGING_USER_KEY = 'TAGGING_USER' DEFAULT_NUM_IMAGES = 40 @@ -245,11 +246,12 @@ def storage_config_section(storage_config_section): def tagging_config_section(tagging_config_section): tagging_location_value = tagging_config_section.get(TAGGING_LOCATION_KEY) + tagging_user_value = tagging_config_section.get(TAGGING_USER_KEY) - if not tagging_location_value: + if not tagging_location_value or not tagging_user_value: raise MissingConfigException() - return tagging_location_value + return tagging_location_value, tagging_user_value def read_config_with_parsed_config(parser): @@ -272,7 +274,7 @@ def read_config_with_parsed_config(parser): parser[STORAGE_SECTION] ) - tagging_location = tagging_config_section(parser[TAGGING_SECTION]) + tagging_location, tagging_user = tagging_config_section(parser[TAGGING_SECTION]) return { "key": functions_key, @@ -280,5 +282,6 @@ def read_config_with_parsed_config(parser): "storage_account": storage_account, "storage_key": storage_key, "storage_container": storage_container, - "tagging_location": tagging_location + "tagging_location": tagging_location, + "tagging_user": tagging_user } diff --git a/cli/test_operations.py b/cli/test_operations.py index a2b0c218..dd6faa93 100644 --- a/cli/test_operations.py +++ b/cli/test_operations.py @@ -21,6 +21,7 @@ STORAGE_CONTAINER, TAGGING_SECTION, TAGGING_LOCATION_KEY, + TAGGING_USER_KEY, functions_config_section, storage_config_section, tagging_config_section, @@ -107,7 +108,8 @@ def test_acceptable_config(self): FUNCTIONS_URL: "test" }, TAGGING_SECTION: { - TAGGING_LOCATION_KEY: "test" + TAGGING_LOCATION_KEY: "test", + TAGGING_USER_KEY: "test" } } ) From 593f69e785b67f5f95e928872a4527f09ffcf8b2 Mon Sep 17 00:00:00 2001 From: Andre Briggs Date: Thu, 15 Nov 2018 10:33:49 -0800 Subject: [PATCH 31/31] Revised teh way we do user tracking. --- functions/pipeline/download/__init__.py | 8 ++++---- functions/pipeline/onboarding/__init__.py | 22 +++++++++++----------- functions/pipeline/requirements.txt | 2 +- functions/pipeline/upload/__init__.py | 15 ++++++++++----- requirements.txt | 2 +- 5 files changed, 27 insertions(+), 22 deletions(-) diff --git a/functions/pipeline/download/__init__.py b/functions/pipeline/download/__init__.py index 11a4e295..96f3965a 100644 --- a/functions/pipeline/download/__init__.py +++ b/functions/pipeline/download/__init__.py @@ -12,17 +12,17 @@ def main(req: func.HttpRequest) -> func.HttpResponse: logging.info('Python HTTP trigger function processed a request.') image_count = int(req.params.get('imageCount')) - user_id = int(req.params.get('userId')) + user_name = req.params.get('userName') # setup response object headers = { "content-type": "application/json" } - if not user_id: + if not user_name: return func.HttpResponse( status_code=401, headers=headers, - body=json.dumps({"error": "invalid userId given or omitted"}) + body=json.dumps({"error": "invalid userName given or omitted"}) ) elif not image_count: return func.HttpResponse( @@ -34,7 +34,7 @@ def main(req: func.HttpRequest) -> func.HttpResponse: try: # DB configuration data_access = ImageTagDataAccess(get_postgres_provider()) - + user_id = data_access.create_user(user_name) image_urls = list(data_access.get_new_images(image_count, user_id)) # TODO: Populate starting json with tags, if any exist... (precomputed or retagging?) diff --git a/functions/pipeline/onboarding/__init__.py b/functions/pipeline/onboarding/__init__.py index 7e87ba9a..0961e3f2 100644 --- a/functions/pipeline/onboarding/__init__.py +++ b/functions/pipeline/onboarding/__init__.py @@ -1,5 +1,6 @@ import os import logging +import json import azure.functions as func from ..shared.db_provider import get_postgres_provider @@ -13,10 +14,14 @@ def main(req: func.HttpRequest) -> func.HttpResponse: logging.info('Python HTTP trigger function processed a request.') - user_id = req.params.get('userId') + user_name = req.params.get('userName') - if not user_id: - return func.HttpResponse("userId query parameter invalid or omitted", status_code=401) + if not user_name: + return func.HttpResponse( + status_code=401, + headers={ "content-type": "application/json"}, + body=json.dumps({"error": "invalid userName given or omitted"}) + ) try: req_body = req.get_json() @@ -49,16 +54,11 @@ def main(req: func.HttpRequest) -> func.HttpResponse: image_object_list.append(image) # TODO: Wrap db access section in try/catch, send an appropriate http response in the event of an error - logging.info("Now connecting to database...") data_access = ImageTagDataAccess(get_postgres_provider()) - logging.info("Connected.") - - # Create user id - user_id_number = data_access.create_user(user_id) - logging.info("User id for {0} is {1}".format(user_id, str(user_id_number))) + user_id = data_access.create_user(user_name) # Add new images to the database, and retrieve a dictionary ImageId's mapped to ImageUrl's - image_id_url_map = data_access.add_new_images(image_object_list,user_id_number) + image_id_url_map = data_access.add_new_images(image_object_list,user_id) # Print out dictionary for debugging logging.info("Image ID and URL map dictionary:") @@ -119,7 +119,7 @@ def main(req: func.HttpRequest) -> func.HttpResponse: update_urls_dictionary[image_id] = permanent_storage_path logging.info("Now updating permanent URLs in the DB...") - data_access.update_image_urls(update_urls_dictionary, user_id_number) + data_access.update_image_urls(update_urls_dictionary, user_id) logging.info("Done.") # Construct response string of permanent URLs diff --git a/functions/pipeline/requirements.txt b/functions/pipeline/requirements.txt index fbf84768..41de3ac2 100644 --- a/functions/pipeline/requirements.txt +++ b/functions/pipeline/requirements.txt @@ -1,6 +1,6 @@ azure-functions==1.0.0a5 azure-functions-worker==1.0.0a6 -azure-storage +azure-storage-blob==1.4.0 grpcio==1.14.2 grpcio-tools==1.14.2 protobuf==3.6.1 diff --git a/functions/pipeline/upload/__init__.py b/functions/pipeline/upload/__init__.py index 417dc1e4..9fd4a701 100644 --- a/functions/pipeline/upload/__init__.py +++ b/functions/pipeline/upload/__init__.py @@ -19,11 +19,18 @@ def main(req: func.HttpRequest) -> func.HttpResponse: # TODO: Create if check for userId and valid json checks? vott_json = req.get_json() upload_data = process_vott_json(vott_json) - user_id = int(req.params.get('userId')) - upload_data['userId'] = user_id + user_name = req.params.get('userName') + + if not user_name: + return func.HttpResponse( + status_code=401, + headers={ "content-type": "application/json"}, + body=json.dumps({"error": "invalid userName given or omitted"}) + ) # DB configuration data_access = ImageTagDataAccess(get_postgres_provider()) + user_id = data_access.create_user(user_name) # Update tagged images ids_to_tags = upload_data["imageIdToTags"] @@ -45,9 +52,7 @@ def main(req: func.HttpRequest) -> func.HttpResponse: return func.HttpResponse( body=json.dumps(upload_data), status_code=200, - headers={ - "content-type": "application/json" - } + headers={ "content-type": "application/json"}, ) except Exception as e: return func.HttpResponse( diff --git a/requirements.txt b/requirements.txt index 9de0ccd9..b6871c9c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ azure-functions==1.0.0a5 azure-functions-worker==1.0.0a6 -azure-storage +azure-storage-blob==1.4.0 grpcio==1.14.2 grpcio-tools==1.14.2 protobuf==3.6.1