-
Notifications
You must be signed in to change notification settings - Fork 3
Fix: Python license file has the wrong contents (#7372) #7701
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix: Python license file has the wrong contents (#7372) #7701
Conversation
Codecov Report✅ All modified and coverable lines are covered by tests. Additional details and impacted files@@ Coverage Diff @@
## develop #7701 +/- ##
========================================
Coverage 84.82% 84.82%
========================================
Files 157 157
Lines 23060 23060
========================================
Hits 19561 19561
Misses 3499 3499 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
3da4fea to
75a2415
Compare
achave11-ucsc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
diff --git a/scripts/fetch_requirement_licences.py b/scripts/fetch_requirement_licences.py
index f09ad779f..8ac226e3f 100644
--- a/scripts/fetch_requirement_licences.py
+++ b/scripts/fetch_requirement_licences.py
@@ -49,7 +49,7 @@ log = logging.getLogger(__name__)
class FetchLicenses:
destination_path = f'{config.project_root}/docs/licenses/python/'
- license_file_names = [
+ file_names = [
'LICENSE',
'LICENSE.txt',
'LICENSE.rst',
@@ -61,82 +61,16 @@ class FetchLicenses:
'LICENCE.md'
]
- def main(self, argv):
- parser = argparse.ArgumentParser(description=__doc__,
- formatter_class=AzulArgumentHelpFormatter)
- parser.add_argument('--package', '-p',
- help='Optionally specify one or more packages to '
- 'download from. If not specified, licenses from '
- 'all Python dependencies will be downloaded.',
- nargs='+',
- metavar='PACKAGE',
- )
- parser.add_argument('--debug',
- action='store_true',
- help='Log debugging information')
- args = parser.parse_args(argv)
-
- packages = []
- failures = []
-
- if args.package:
- packages = [p for p in args.package]
- else:
- with open(f'{config.project_root}/requirements.all.txt', 'r') as f:
- packages = [p.split('==')[0] for p in f.readlines()]
-
- for package in packages:
- if package:
- pypi_url = f'https://pypi.org/pypi/{package}'
- response = self.get_response(f'{pypi_url}/json')
- assert isinstance(response, HTTPResponse)
- found = False
- if response.status == 200:
- urls = json.loads(response.data)['info']['project_urls']
- if urls:
- if args.debug:
- log.debug('%s urls: %s', package, urls)
- for url in self.github_urls(urls.values()):
- url_raw = furl(url)
- if len(url_raw.path.segments) > 2:
- if url_raw.path.segments[2] in ('blob', 'tree'):
- url_raw.path.segments[2] = 'raw'
- else:
- url_raw.path.segments.extend(['raw', 'HEAD'])
- url_blob = url_raw.copy()
- url_blob.path.segments[2] = 'blob'
- for filename in self.license_file_names:
- response = self.get_response(f'{url_raw}/{filename}')
- assert isinstance(response, HTTPResponse)
- if response.status == 200:
- if args.debug:
- log.debug('Found %s/%s', url_raw, filename)
- file_path = f'{self.destination_path}{package}.txt'
- with open(file_path, 'wb') as f:
- f.write(f'{url_blob}/{filename}\n\n'.encode('ascii'))
- f.write(response.data)
- log.info('%s... SUCCESS', package)
- found = True
- break
- if found:
- break
- if not found:
- failures.append(package)
- log.info('%s... FAIL (%s)', package, pypi_url)
-
- if failures:
- log.error('Failed to fetch licenses for packages: %s', failures)
-
@cached_property
def http(self) -> HttpClient:
return http_client()
- def get_response(self, url: str) -> HTTPResponse:
+ def fetch(self, url: str) -> HTTPResponse:
while True:
response = self.http.request('GET', url)
if response.status in [301, 302]:
url = response.get_redirect_location()
- retry_after = response.headers.get('Retry-After')
+ retry_after = response.headers.fetch('Retry-After')
if retry_after is not None:
print('Sleeping %.3fs to honor Retry-After property' % retry_after)
time.sleep(retry_after)
@@ -155,13 +89,10 @@ class FetchLicenses:
url_.path.segments.pop()
if url_.netloc == 'github.com' and url_.path.segments:
last_segment = url_.path.segments[-1]
- # Remove '.git' from URL
- # https://github.com/USER/PACKAGE.git
+ # REVIEW: Consider removing comments expressible with code
if last_segment.endswith('.git'):
- url_.path.segments[-1] = last_segment[:-4]
- # Remove README file from path
- # https://github.com/USER/README.md
- elif last_segment.endswith('.md') or last_segment.endswith('.rst'):
+ url_.path.segments[-1] = last_segment.removesuffix('.git')
+ elif last_segment.startswith('README'):
url_.path.segments.pop()
# Remove extra segment from path
# https://github.com/USER/PACKAGE/issues
@@ -176,6 +107,81 @@ class FetchLicenses:
return sorted(urls_)
+def main(argv):
+ # REVIEW: Code structure is difficult to follow in the first commit. Consider
+ # breaking it up to make it more transparent. Separating the fixes from
+ # modifications/alterations may help.
+ # Also, why not continue using main() and the defining the script's business
+ # logic in it, like in other scripts? It seems you've been introducing a
+ # slightly different invocation pattern in some of these scripts.
+ #
+ parser = argparse.ArgumentParser(description=__doc__,
+ formatter_class=AzulArgumentHelpFormatter)
+ parser.add_argument('--package', '-p',
+ help='Optionally specify one or more packages to '
+ 'download from. If not specified, licenses from '
+ 'all Python dependencies will be downloaded.',
+ nargs='+',
+ metavar='PACKAGE',
+ )
+ parser.add_argument('--debug',
+ action='store_true',
+ help='Log debugging information')
+ args = parser.parse_args(argv)
+
+ packages = []
+ failures = []
+
+ if args.package:
+ packages = [p for p in args.package]
+ else:
+ with open(f'{config.project_root}/requirements.all.txt', 'r') as f:
+ packages = [p.split('==')[0] for p in f.readlines()]
+
+ licenses = FetchLicenses()
+ for package in packages:
+ if package:
+ pypi_url = f'https://pypi.org/pypi/{package}'
+ response = licenses.fetch(f'{pypi_url}/json')
+ assert isinstance(response, HTTPResponse)
+ found = False
+ if response.status == 200:
+ urls = json.loads(response.data)['info']['project_urls']
+ if urls:
+ if args.debug:
+ log.debug('%s urls: %s', package, urls)
+ for url in licenses.github_urls(urls.values()):
+ url_raw = furl(url)
+ if len(url_raw.path.segments) > 2:
+ if url_raw.path.segments[2] in ('blob', 'tree'):
+ url_raw.path.segments[2] = 'raw'
+ else:
+ url_raw.path.segments.extend(['raw', 'HEAD'])
+ url_blob = url_raw.copy()
+ url_blob.path.segments[2] = 'blob'
+ for filename in licenses.file_names:
+ response = licenses.fetch(f'{url_raw}/{filename}')
+ assert isinstance(response, HTTPResponse)
+ if response.status == 200:
+ if args.debug:
+ log.debug('Found %s/%s', url_raw, filename)
+ file_path = f'{licenses.destination_path}{package}.txt'
+ with open(file_path, 'wb') as f:
+ f.write(f'{url_blob}/{filename}\n\n'.encode('ascii'))
+ f.write(response.data)
+ log.info('%s... SUCCESS', package)
+ found = True
+ break
+ if found:
+ break
+ if not found:
+ failures.append(package)
+ log.info('%s... FAIL (%s)', package, pypi_url)
+
+ if failures:
+ log.error('Failed to fetch licenses for packages: %s', failures)
+
+
if __name__ == '__main__':
configure_script_logging(log)
- FetchLicenses().main(sys.argv[1:])
+ main(sys.argv[1:])75a2415 to
d4ce788
Compare
achave11-ucsc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM ✅
Note, the class abstraction of your previous version was also good, but the business logic of the script (the main() method) didn't seem appropriate to the class.
| urls_.add(str(url_)) | ||
| return urls_ | ||
|
|
||
| http = http_client() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you allow this side effect of importing the script, you may as well not have the if __name__ … stanza at the bottom.
You may need to define a Main class and make this and attribute thereof. There is plenty of precedent for this technique.
d4ce788 to
03018ea
Compare
Security design review
|
03018ea to
bf094a6
Compare
Linked issues: #7372
Checklist
Author
developissues/<GitHub handle of author>/<issue#>-<slug>1 when the issue title describes a problem, the corresponding PR
title is
Fix:followed by the issue titleAuthor (partiality)
ptag to titles of partial commitspartialor completely resolves all linked issuespartiallabelAuthor (reindex)
rtag to commit title or the changes introduced by this PR will not require reindexing of any deploymentreindex:devor the changes introduced by it will not require reindexing ofdevreindex:anvildevor the changes introduced by it will not require reindexing ofanvildevreindex:anvilprodor the changes introduced by it will not require reindexing ofanvilprodreindex:prodor the changes introduced by it will not require reindexing ofprodreindex:partialand its description documents the specific reindexing procedure fordev,anvildev,anvilprodandprodor requires a full reindex or carries none of the labelsreindex:dev,reindex:anvildev,reindex:anvilprodandreindex:prodAuthor (API changes)
APIor this PR does not modify a REST APIa(A) tag to commit title for backwards (in)compatible changes or this PR does not modify a REST APIapp.pyor this PR does not modify a REST APIAuthor (upgrading deployments)
make docker_images.jsonand committed the resulting changes or this PR does not modifyazul_docker_images, or any other variables referenced in the definition of that variableutag to commit title or this PR does not require upgrading deploymentsupgradeor does not require upgrading deploymentsdeploy:sharedor does not modifydocker_images.json, and does not require deploying thesharedcomponent for any other reasondeploy:gitlabor does not require deploying thegitlabcomponentdeploy:runneror does not require deploying therunnerimageAuthor (hotfixes)
Ftag to main commit title or this PR does not include permanent fix for a temporary hotfixanvilprodandprod) have temporary hotfixes for any of the issues linked to this PRAuthor (before every review)
develop, squashed fixups from prior reviewsmake requirements_updateor this PR does not modifyDockerfile,environment,requirements*.txt,common.mk,Makefileorenvironment.bootRtag to commit title or this PR does not modifyrequirements*.txtreqsor does not modifyrequirements*.txtmake integration_testpasses in personal deployment or this PR does not modify functionality that could affect the IT outcomePeer reviewer (after approval)
Note that after requesting changes, the PR must be assigned to only the author.
System administrator (after approval)
demoorno demono demono sandboxN reviewslabel is accurateOperator
reindex:…labels andrcommit title tagno demodevelopOperator (deploy
.sharedand.gitlabcomponents)_select dev.shared && CI_COMMIT_REF_NAME=develop make -C terraform/shared apply_keep_unusedor this PR is not labeleddeploy:shared_select dev.gitlab && CI_COMMIT_REF_NAME=develop make -C terraform/gitlab applyor this PR is not labeleddeploy:gitlab_select anvildev.shared && CI_COMMIT_REF_NAME=develop make -C terraform/shared apply_keep_unusedor this PR is not labeleddeploy:shared_select anvildev.gitlab && CI_COMMIT_REF_NAME=develop make -C terraform/gitlab applyor this PR is not labeleddeploy:gitlabdeploy:gitlabdeploy:gitlabSystem administrator (post-deploy of
.gitlabcomponent)dev.gitlabare complete or this PR is not labeleddeploy:gitlabanvildev.gitlabare complete or this PR is not labeleddeploy:gitlabOperator (deploy runner image)
_select dev.gitlab && make -C terraform/gitlab/runneror this PR is not labeleddeploy:runner_select anvildev.gitlab && make -C terraform/gitlab/runneror this PR is not labeleddeploy:runnerOperator (sandbox build)
sandboxlabel or PR is labeledno sandboxdevor PR is labeledno sandboxanvildevor PR is labeledno sandboxsandboxdeployment or PR is labeledno sandboxanvilboxdeployment or PR is labeledno sandboxsandboxdeployment or PR is labeledno sandboxanvilboxdeployment or PR is labeledno sandboxsandboxor this PR does not remove catalogs or otherwise causes unreferenced indices insandboxanvilboxor this PR does not remove catalogs or otherwise causes unreferenced indices inanvilboxsandboxor this PR is not labeledreindex:devanvilboxor this PR is not labeledreindex:anvildevsandboxor this PR is not labeledreindex:devanvilboxor this PR is not labeledreindex:anvildevOperator (merge the branch)
pif the PR is also labeledpartialOperator (main build)
devanvildevdevdevanvildevanvildev_select dev.shared && make -C terraform/shared applyor this PR is not labeleddeploy:shared_select anvildev.shared && make -C terraform/shared applyor this PR is not labeleddeploy:shareddevanvildevOperator (reindex)
devor this PR is neither labeledreindex:partialnorreindex:devanvildevor this PR is neither labeledreindex:partialnorreindex:anvildevdevor this PR is neither labeledreindex:partialnorreindex:devanvildevor this PR is neither labeledreindex:partialnorreindex:anvildevdevor this PR is neither labeledreindex:partialnorreindex:devanvildevor this PR is neither labeledreindex:partialnorreindex:anvildevdevor this PR does not require reindexingdevanvildevor this PR does not require reindexinganvildevdevor this PR does not require reindexingdevanvildevor this PR does not require reindexinganvildevdevor this PR does not require reindexingdevanvildevor this PR does not require reindexinganvildevdevor this PR does not require reindexingdevdevor this PR does not require reindexingdevdeploy_browserjob in the GitLab pipeline for this PR indevor this PR does not require reindexingdevanvildevor this PR does not require reindexinganvildevdeploy_browserjob in the GitLab pipeline for this PR inanvildevor this PR does not require reindexinganvildevOperator (mirroring)
devor this PR does not require mirroringdevanvildevor this PR does not require mirroringanvildevdevor this PR does not require mirroringdevanvildevor this PR does not require mirroringanvildevdevor this PR does not require mirroringdevanvildevor this PR does not require mirroringanvildevOperator
deploy:shared,deploy:gitlab,deploy:runner,API,reindex:partial,reindex:anvilprodandreindex:prodlabels to the next promotion PRs or this PR carries none of these labelsdeploy:shared,deploy:gitlab,deploy:runner,API,reindex:partial,reindex:anvilprodandreindex:prodlabels, from the description of this PR to that of the next promotion PRs or this PR carries none of these labelsShorthand for review comments
Lline is too longWline wrapping is wrongQbad quotesFother formatting problem