From ebab05f640e7d44d33963cb54bd817c34baac563 Mon Sep 17 00:00:00 2001 From: Danilo Gonzalez Date: Fri, 12 Jan 2024 16:00:04 +0100 Subject: [PATCH 01/10] WIP add queries to get license information from GitHub, PyPi and CRAN --- licenses/ingest.py | 55 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 licenses/ingest.py diff --git a/licenses/ingest.py b/licenses/ingest.py new file mode 100644 index 0000000000..9260eba8a4 --- /dev/null +++ b/licenses/ingest.py @@ -0,0 +1,55 @@ +# this will ingest GitHub REST API output to a list +# recovering the information + +# command that do the magic: +#curl -L -H "Accept: application/vnd.github+json" -H "Authorization: Bearer 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" -H "X-GitHub-Api-Version: 2022-11-28" https://api.github.com/repos/SINGROUP/SOAPLite/license | jq '.license|{spdx_id}' + +# python traduction: +import requests + +def gitHUBLicenses(repo): + """ + Function that gets spdx_id from github using his API + """ + + url="https://api.github.com/repos/"+repo+"/license" + headers = { + "Accept": "application/vnd.github+json", + "Authorization" : "Bearer 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", + "X-GitHub-Api-Version": "2022-11-28", + } + + test=requests.get(url, headers=headers) + if test==200: + return(test.json()['license']['spdx_id']) + else: + return('no available') + +def pypiLicenses(project): + """ + Function that retrives licence from PiPy + """ + url = "https://pypi.org/pypi/" + r = requests.get(url + project + "/json").json() + return(r['info']['license']) + +def CRANLicenses(project): + url = "http://crandb.r-pkg.org/" + r = requests.get(url + project).json() + print(r['License']) + +# if r.status_code != 200: +# return "not found" +# else: +# return r.json()['Licence'] + +def main(): +# repo="SINGROUP/SOAPLite" +# gitHUBLicenses("SINGROUP/SOAPLite") +# pypiLicenses("easybuild") +# CRANLicenses('mirai') +Other packages + + +main() + From 53a35d0d41142d84ce327337d32f7bc3a61e21d8 Mon Sep 17 00:00:00 2001 From: Danilo Gonzalez Date: Thu, 18 Jan 2024 18:38:14 +0100 Subject: [PATCH 02/10] WIP convert ingest.py into add_license_info.py --- licenses/ingest.py | 47 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/licenses/ingest.py b/licenses/ingest.py index 9260eba8a4..49f1fc175d 100644 --- a/licenses/ingest.py +++ b/licenses/ingest.py @@ -6,6 +6,15 @@ # python traduction: import requests +import argparse +import json + +parser=argparse.ArgumentParser(description='Script to ingest licences') + +parser.add_argument('--source', help='Project available in GitHub,pypi,cran') +parser.add_argument('project', help='Project. For GitHub you should specify owner/repo') +parser.add_argument('--spdx') +args=parser.parse_args() def gitHUBLicenses(repo): """ @@ -34,21 +43,53 @@ def pypiLicenses(project): return(r['info']['license']) def CRANLicenses(project): + """ + Function that retrieves licence from CRAN + """ url = "http://crandb.r-pkg.org/" r = requests.get(url + project).json() - print(r['License']) + return(r['License']) # if r.status_code != 200: # return "not found" # else: # return r.json()['Licence'] +def updateJson(licenseInfo): +# """ +# Function that updates json file +# """ + with open('dummy.json', 'w') as dummy: + json.dump(licenseInfo,dummy) +def licenseInfo(project): + """ + Function that create the project dict + """ + if args.pypi: + lic=pypiLicenses(project) + source="pypi" + info=dict(license=lic, source=source) + test={project:info} +# if args.github: +# lic=gitHUBLicenses(args.project) +# if args.cran: +# lic=CRANLicenses(args.project) + # fill the dictionary with + # { + # "Software": { + # "license": "license", + # "source": "manual, pypi, cran, repology, libraries.io,.." + # "spdx": "spdx_id", + #} + # + return test + def main(): + updateJson(licenseInfo(args.project)) # repo="SINGROUP/SOAPLite" -# gitHUBLicenses("SINGROUP/SOAPLite") +# print(gitHUBLicenses("SINGROUP/SOAPLite")) # pypiLicenses("easybuild") # CRANLicenses('mirai') -Other packages main() From 65231085bf3bea6925c65cfdce48822c35d9cebe Mon Sep 17 00:00:00 2001 From: Danilo Gonzalez Date: Sun, 21 Jan 2024 14:50:43 +0100 Subject: [PATCH 03/10] wip add repology API. --- licenses/ingest.py | 62 ++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/licenses/ingest.py b/licenses/ingest.py index 49f1fc175d..9e46e72092 100644 --- a/licenses/ingest.py +++ b/licenses/ingest.py @@ -13,10 +13,11 @@ parser.add_argument('--source', help='Project available in GitHub,pypi,cran') parser.add_argument('project', help='Project. For GitHub you should specify owner/repo') -parser.add_argument('--spdx') +parser.add_argument('--manual',required=False) args=parser.parse_args() +print (args) -def gitHUBLicenses(repo): +def github(repo): """ Function that gets spdx_id from github using his API """ @@ -34,7 +35,7 @@ def gitHUBLicenses(repo): else: return('no available') -def pypiLicenses(project): +def pypi(project): """ Function that retrives licence from PiPy """ @@ -42,50 +43,51 @@ def pypiLicenses(project): r = requests.get(url + project + "/json").json() return(r['info']['license']) -def CRANLicenses(project): +def cran(project): """ Function that retrieves licence from CRAN """ url = "http://crandb.r-pkg.org/" r = requests.get(url + project).json() return(r['License']) - + # if r.status_code != 200: # return "not found" # else: # return r.json()['Licence'] -def updateJson(licenseInfo): -# """ -# Function that updates json file -# """ - with open('dummy.json', 'w') as dummy: - json.dump(licenseInfo,dummy) +def repology(project): + url="https://repology.org//api/v1/" + r = requests.get(url + project).json() + return(r['License']) + def licenseInfo(project): """ Function that create the project dict """ - if args.pypi: - lic=pypiLicenses(project) - source="pypi" - info=dict(license=lic, source=source) - test={project:info} -# if args.github: -# lic=gitHUBLicenses(args.project) -# if args.cran: -# lic=CRANLicenses(args.project) - # fill the dictionary with - # { - # "Software": { - # "license": "license", - # "source": "manual, pypi, cran, repology, libraries.io,.." - # "spdx": "spdx_id", - #} - # - return test + if args.source=='pypi': + lic=pypi(project) + info=[("license",lic), ("source",args.source)] + print(project,info) + return info + +def updateJson(project,info): + """ + Function that updates json file + """ + with open('licenses.json','r') as licDict: + licenses=json.loads(licDict.read()) + + if project not in licenses.keys(): + print('we do not have the license') + licenses[project]=info + print(licenses) + def main(): - updateJson(licenseInfo(args.project)) + project=args.project + info=licenseInfo(project) + updateJson(project,info) # repo="SINGROUP/SOAPLite" # print(gitHUBLicenses("SINGROUP/SOAPLite")) # pypiLicenses("easybuild") From ac7a1877c0e83d7c07bfab0c04449981cead5924 Mon Sep 17 00:00:00 2001 From: Danilo Gonzalez Date: Tue, 23 Jan 2024 11:13:45 +0100 Subject: [PATCH 04/10] WIP add license to license.json --- licenses/ingest.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/licenses/ingest.py b/licenses/ingest.py index 9e46e72092..775d049519 100644 --- a/licenses/ingest.py +++ b/licenses/ingest.py @@ -25,7 +25,7 @@ def github(repo): url="https://api.github.com/repos/"+repo+"/license" headers = { "Accept": "application/vnd.github+json", - "Authorization" : "Bearer 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", + "Authorization" : "Bearer TOKEN", "X-GitHub-Api-Version": "2022-11-28", } @@ -80,9 +80,11 @@ def updateJson(project,info): if project not in licenses.keys(): print('we do not have the license') - licenses[project]=info - print(licenses) - + licenses[project]=dict(info) + licJson=json.dumps(licenses, indent=4) + + with open('licenses.json','w') as licFile: + licFile.write(licJson) def main(): project=args.project From 8db14fef18af54f98906ad6290b6644eb08942cc Mon Sep 17 00:00:00 2001 From: Danilo Gonzalez Date: Tue, 30 Jan 2024 12:47:04 +0100 Subject: [PATCH 05/10] Enable github and spdx license push. It needs some cosmetic changes --- licenses/ingest.py | 69 ++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/licenses/ingest.py b/licenses/ingest.py index 775d049519..f1cd7dc1b9 100644 --- a/licenses/ingest.py +++ b/licenses/ingest.py @@ -2,7 +2,7 @@ # recovering the information # command that do the magic: -#curl -L -H "Accept: application/vnd.github+json" -H "Authorization: Bearer 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" -H "X-GitHub-Api-Version: 2022-11-28" https://api.github.com/repos/SINGROUP/SOAPLite/license | jq '.license|{spdx_id}' +#curl -L -H "Accept: application/vnd.github+json" -H "Authorization: Bearer TOKEN" -H "X-GitHub-Api-Version: 2022-11-28" https://api.github.com/repos/SINGROUP/SOAPLite/license | jq '.license|{spdx_id}' # python traduction: import requests @@ -11,17 +11,16 @@ parser=argparse.ArgumentParser(description='Script to ingest licences') -parser.add_argument('--source', help='Project available in GitHub,pypi,cran') -parser.add_argument('project', help='Project. For GitHub you should specify owner/repo') -parser.add_argument('--manual',required=False) +parser.add_argument('--source', help='Project available in GitHub,pypi,cran or user that push the license') +parser.add_argument('project', help='Project') +parser.add_argument('--spdx',required=False) args=parser.parse_args() -print (args) -def github(repo): +def github(source): """ Function that gets spdx_id from github using his API """ - + repo=source.removeprefix('github:') url="https://api.github.com/repos/"+repo+"/license" headers = { "Accept": "application/vnd.github+json", @@ -29,11 +28,11 @@ def github(repo): "X-GitHub-Api-Version": "2022-11-28", } - test=requests.get(url, headers=headers) - if test==200: - return(test.json()['license']['spdx_id']) + r=requests.get(url, headers=headers) + if r.status_code != 200: + return "not found" else: - return('no available') + return(r.json()['license']['spdx_id']) def pypi(project): """ @@ -41,7 +40,10 @@ def pypi(project): """ url = "https://pypi.org/pypi/" r = requests.get(url + project + "/json").json() - return(r['info']['license']) + if r.status_code != 200: + return "not found" + else: + return(r['info']['license']) def cran(project): """ @@ -49,26 +51,31 @@ def cran(project): """ url = "http://crandb.r-pkg.org/" r = requests.get(url + project).json() - return(r['License']) - -# if r.status_code != 200: -# return "not found" -# else: -# return r.json()['Licence'] + if r.status_code != 200: + return "not found" + else: + return(r['License']) def repology(project): url="https://repology.org//api/v1/" r = requests.get(url + project).json() - return(r['License']) + if r.status_code != 200: + return "not found" + else: + return(r['License']) def licenseInfo(project): """ - Function that create the project dict + Function that create the project info """ if args.source=='pypi': lic=pypi(project) - info=[("license",lic), ("source",args.source)] - print(project,info) + elif "github" in args.source: + lic=github(args.source) + elif args.spdx: + lic=args.spdx + + info=[("license",lic), ("source",args.source)] return info def updateJson(project,info): @@ -78,23 +85,19 @@ def updateJson(project,info): with open('licenses.json','r') as licDict: licenses=json.loads(licDict.read()) - if project not in licenses.keys(): - print('we do not have the license') + if project in licenses.keys(): + print('project in licenses.json') + else: + print('we do not have the license, adding into licenses.json') licenses[project]=dict(info) - licJson=json.dumps(licenses, indent=4) + licJson=json.dumps(licenses, indent=4) - with open('licenses.json','w') as licFile: - licFile.write(licJson) + with open('licenses.json','w') as licFile: + licFile.write(licJson) def main(): project=args.project info=licenseInfo(project) updateJson(project,info) -# repo="SINGROUP/SOAPLite" -# print(gitHUBLicenses("SINGROUP/SOAPLite")) -# pypiLicenses("easybuild") -# CRANLicenses('mirai') - main() - From 4d674a20f605e3af6455a8368cd782c555170576 Mon Sep 17 00:00:00 2001 From: Danilo Gonzalez Date: Tue, 30 Jan 2024 12:51:20 +0100 Subject: [PATCH 06/10] fix identation --- licenses/ingest.py | 128 ++++++++++++++++++++++----------------------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/licenses/ingest.py b/licenses/ingest.py index f1cd7dc1b9..cef775d1fb 100644 --- a/licenses/ingest.py +++ b/licenses/ingest.py @@ -17,87 +17,87 @@ args=parser.parse_args() def github(source): - """ - Function that gets spdx_id from github using his API - """ - repo=source.removeprefix('github:') - url="https://api.github.com/repos/"+repo+"/license" - headers = { - "Accept": "application/vnd.github+json", - "Authorization" : "Bearer TOKEN", - "X-GitHub-Api-Version": "2022-11-28", - } - - r=requests.get(url, headers=headers) - if r.status_code != 200: - return "not found" - else: - return(r.json()['license']['spdx_id']) + """ + Function that gets spdx_id from github using his API + """ + repo=source.removeprefix('github:') + url="https://api.github.com/repos/"+repo+"/license" + headers = { + "Accept": "application/vnd.github+json", + "Authorization" : "Bearer TOKEN", + "X-GitHub-Api-Version": "2022-11-28", + } + + r=requests.get(url, headers=headers) + if r.status_code != 200: + return "not found" + else: + return(r.json()['license']['spdx_id']) def pypi(project): - """ - Function that retrives licence from PiPy - """ - url = "https://pypi.org/pypi/" - r = requests.get(url + project + "/json").json() - if r.status_code != 200: - return "not found" - else: - return(r['info']['license']) + """ + Function that retrives licence from PiPy + """ + url = "https://pypi.org/pypi/" + r = requests.get(url + project + "/json").json() + if r.status_code != 200: + return "not found" + else: + return(r['info']['license']) def cran(project): """ - Function that retrieves licence from CRAN - """ + Function that retrieves licence from CRAN + """ url = "http://crandb.r-pkg.org/" r = requests.get(url + project).json() - if r.status_code != 200: - return "not found" - else: - return(r['License']) + if r.status_code != 200: + return "not found" + else: + return(r['License']) def repology(project): url="https://repology.org//api/v1/" r = requests.get(url + project).json() - if r.status_code != 200: - return "not found" - else: - return(r['License']) + if r.status_code != 200: + return "not found" + else: + return(r['License']) def licenseInfo(project): - """ - Function that create the project info - """ - if args.source=='pypi': - lic=pypi(project) - elif "github" in args.source: - lic=github(args.source) - elif args.spdx: - lic=args.spdx - - info=[("license",lic), ("source",args.source)] - return info + """ + Function that create the project info + """ + if args.source=='pypi': + lic=pypi(project) + elif "github" in args.source: + lic=github(args.source) + elif args.spdx: + lic=args.spdx + + info=[("license",lic), ("source",args.source)] + return info def updateJson(project,info): - """ - Function that updates json file - """ - with open('licenses.json','r') as licDict: - licenses=json.loads(licDict.read()) - - if project in licenses.keys(): - print('project in licenses.json') - else: - print('we do not have the license, adding into licenses.json') - licenses[project]=dict(info) - licJson=json.dumps(licenses, indent=4) + """ + Function that updates json file + """ + with open('licenses.json','r') as licDict: + licenses=json.loads(licDict.read()) + + if project in licenses.keys(): + print('project in licenses.json') + else: + print('we do not have the license, adding into licenses.json') + licenses[project]=dict(info) + licJson=json.dumps(licenses, indent=4) - with open('licenses.json','w') as licFile: - licFile.write(licJson) + with open('licenses.json','w') as licFile: + licFile.write(licJson) def main(): - project=args.project - info=licenseInfo(project) - updateJson(project,info) + project=args.project + info=licenseInfo(project) + updateJson(project,info) main() From b66e4c3fb694e555916e44b98c4ca88f7d791969 Mon Sep 17 00:00:00 2001 From: Danilo Gonzalez Date: Tue, 30 Jan 2024 12:54:57 +0100 Subject: [PATCH 07/10] Harmonize calls --- licenses/ingest.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/licenses/ingest.py b/licenses/ingest.py index cef775d1fb..6b8ac3b49f 100644 --- a/licenses/ingest.py +++ b/licenses/ingest.py @@ -39,22 +39,22 @@ def pypi(project): Function that retrives licence from PiPy """ url = "https://pypi.org/pypi/" - r = requests.get(url + project + "/json").json() + r = requests.get(url + project + "/json") if r.status_code != 200: return "not found" else: - return(r['info']['license']) + return(r.json()['info']['license']) def cran(project): """ Function that retrieves licence from CRAN """ url = "http://crandb.r-pkg.org/" - r = requests.get(url + project).json() + r = requests.get(url + project) if r.status_code != 200: return "not found" else: - return(r['License']) + return(r.json()['License']) def repology(project): url="https://repology.org//api/v1/" From 8cbb50c13bde1afdb244381b395270d058ddb550 Mon Sep 17 00:00:00 2001 From: Danilo Gonzalez Date: Tue, 30 Jan 2024 12:57:13 +0100 Subject: [PATCH 08/10] rename ingest into add_license_info.py --- licenses/{ingest.py => add_license_info.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename licenses/{ingest.py => add_license_info.py} (100%) diff --git a/licenses/ingest.py b/licenses/add_license_info.py similarity index 100% rename from licenses/ingest.py rename to licenses/add_license_info.py From 15423fa9330d631f11a2c36f3aee854edfd379e7 Mon Sep 17 00:00:00 2001 From: Danilo Gonzalez Date: Mon, 12 Feb 2024 16:58:12 +0100 Subject: [PATCH 09/10] Add ecosyste.ms support. Replace GitHub/pipy calls for ecosyste.ms --- licenses/add_license_info.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/licenses/add_license_info.py b/licenses/add_license_info.py index 6b8ac3b49f..f2834b4234 100644 --- a/licenses/add_license_info.py +++ b/licenses/add_license_info.py @@ -64,14 +64,38 @@ def repology(project): else: return(r['License']) +def ecosysteDotms_pypi(project): + """ + Function that retrieves license info from ecosystem.ms for pipy packages + """ + url = "https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/" + r = requests.get(url + project) + if r.status_code != 200: + return "not found" + else: + return(r.json()['licenses']) +def ecosysteDotms_github(source): + """ + Function that retrieves license info from ecosystem.ms for github repos + """ + repo=source.removeprefix('github:') + url= "https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/" + r = requests.get(url + repo) + if r.status_code != 200: + return "not found" + else: + return(r.json()['license']) + def licenseInfo(project): """ Function that create the project info """ if args.source=='pypi': - lic=pypi(project) + lic=ecosysteDotms_pypi(project) +# lic=pypi(project) elif "github" in args.source: - lic=github(args.source) + lic=ecosysteDotms_github(args.source) +# lic=github(args.source) elif args.spdx: lic=args.spdx From 09d39e5d13d029f824dbf49bc0a851995b865334 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Danilo=20Gonz=C3=A1lez?= Date: Tue, 2 Apr 2024 14:50:22 +0200 Subject: [PATCH 10/10] Switch spdx to manual As suggested in 02/04/2023 meeting. Previously, spdx argument has been used to manually feeded licenses. --- licenses/add_license_info.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/licenses/add_license_info.py b/licenses/add_license_info.py index f2834b4234..9f2ab5e346 100644 --- a/licenses/add_license_info.py +++ b/licenses/add_license_info.py @@ -13,7 +13,7 @@ parser.add_argument('--source', help='Project available in GitHub,pypi,cran or user that push the license') parser.add_argument('project', help='Project') -parser.add_argument('--spdx',required=False) +parser.add_argument('--manual',required=False) args=parser.parse_args() def github(source): @@ -96,8 +96,8 @@ def licenseInfo(project): elif "github" in args.source: lic=ecosysteDotms_github(args.source) # lic=github(args.source) - elif args.spdx: - lic=args.spdx + elif args.manual: + lic=args.manual info=[("license",lic), ("source",args.source)] return info