|
20 | 20 | # ScanCode.io is a free software code scanning tool from nexB Inc. and others. |
21 | 21 | # Visit https://github.com/aboutcode-org/scancode.io for support and download. |
22 | 22 |
|
| 23 | +import re |
23 | 24 | from collections import Counter |
24 | 25 | from collections import defaultdict |
25 | 26 | from contextlib import suppress |
@@ -659,7 +660,10 @@ def _match_purldb_resources( |
659 | 660 |
|
660 | 661 | for to_resource in progress.iter(resource_iterator): |
661 | 662 | resources_by_sha1[to_resource.sha1].append(to_resource) |
662 | | - if to_resource.path.endswith(".map"): |
| 663 | + if ( |
| 664 | + to_resource.path.endswith(".map") |
| 665 | + and "json" in to_resource.file_type.lower() |
| 666 | + ): |
663 | 667 | for js_sha1 in js.source_content_sha1_list(to_resource): |
664 | 668 | resources_by_sha1[js_sha1].append(to_resource) |
665 | 669 | processed_resources_count += 1 |
@@ -1620,21 +1624,20 @@ def match_purldb_resources_post_process(project, logger=None): |
1620 | 1624 | map_count = 0 |
1621 | 1625 |
|
1622 | 1626 | for directory in progress.iter(resource_iterator): |
1623 | | - map_count += _match_purldb_resources_post_process( |
1624 | | - directory, to_extract_directories, to_resources |
1625 | | - ) |
| 1627 | + map_count += _match_purldb_resources_post_process(directory, to_resources) |
1626 | 1628 |
|
1627 | 1629 | logger(f"{map_count:,d} resource processed") |
1628 | 1630 |
|
1629 | 1631 |
|
1630 | | -def _match_purldb_resources_post_process( |
1631 | | - directory_path, to_extract_directories, to_resources |
1632 | | -): |
| 1632 | +def _match_purldb_resources_post_process(directory, to_resources): |
| 1633 | + # Escape special character in directory path |
| 1634 | + escaped_directory_path = re.escape(directory.path) |
| 1635 | + |
1633 | 1636 | # Exclude the content of nested archive. |
1634 | 1637 | interesting_codebase_resources = ( |
1635 | | - to_resources.filter(path__startswith=directory_path) |
| 1638 | + to_resources.filter(path__startswith=directory.path) |
1636 | 1639 | .filter(status=flag.MATCHED_TO_PURLDB_RESOURCE) |
1637 | | - .exclude(path__regex=rf"^{directory_path}.*-extract\/.*$") |
| 1640 | + .exclude(path__regex=rf"^{escaped_directory_path}.*-extract\/.*$") |
1638 | 1641 | ) |
1639 | 1642 |
|
1640 | 1643 | if not interesting_codebase_resources: |
|
0 commit comments