@@ -51,6 +51,7 @@ def __init__(self, results: List[PathScanResult] = []) -> None:
5151 self .paths_containing_personal_data : List [PathScanResult ] = []
5252 self .paths_skipped : List [PathScanResult ] = []
5353 self .paths_excluded : List [PathScanResult ] = []
54+ self .paths_errored : List [PathScanResult ] = []
5455 self .add_path_scan_results (results )
5556
5657 def add_path_scan_results (self , scan_results : List [PathScanResult ]):
@@ -70,6 +71,9 @@ def add_path_scan_result(self, scan_result: PathScanResult):
7071 if scan_result .status == PathScanStatus .SKIPPED :
7172 self .paths_skipped .append (scan_result )
7273
74+ if scan_result .status == PathScanStatus .ERRORED :
75+ self .paths_errored .append (scan_result )
76+
7377 def __str__ (self ) -> str :
7478 with StringIO () as output_buffer :
7579 output_buffer .write ("--------PERSONAL DATA SCAN SUMMARY--------" )
@@ -94,6 +98,13 @@ def __str__(self) -> str:
9498 paths_without_issues_table .add_row ([valid_path .path ])
9599 output_buffer .write (str (paths_without_issues_table ))
96100
101+ if self .paths_errored :
102+ output_buffer .write ("\n \n FILES ERRORED\n " )
103+ errored_paths_table = PrettyTable (["Path" ])
104+ for errored_path in self .paths_errored :
105+ errored_paths_table .add_row ([errored_path .path ])
106+ output_buffer .write (str (errored_paths_table ))
107+
97108 if self .paths_containing_personal_data :
98109 output_buffer .write ("\n \n FILES CONTAINING PERSONAL DATA\n " )
99110
@@ -155,29 +166,33 @@ def _scan_content(self, analyzer: AnalyzerEngine, entities: List[str], content:
155166 async def _scan_path (
156167 self , analyzer : AnalyzerEngine , entities : List [str ], file_path : str , exclusions : List [re .Pattern [str ]]
157168 ) -> PathScanResult :
158- sources = PathFilter ()
159-
160- invalid_check_result = await sources ._check_is_path_invalid (file_path , exclusions )
161- if invalid_check_result is not None :
162- return PathScanResult (file_path , invalid_check_result )
163-
164- file_extension = Path (file_path ).suffix .lower ()
165- async with await open_file (file_path , "r" , encoding = "utf-8" ) as fs :
166- results : List [PersonalDataDetection ] = []
167- if file_extension in self .LINE_BY_LINE_FILE_EXTENSIONS :
168- logger .debug ("Scanning file %s line by line" , file_path )
169- async for line in fs :
170- results .extend (self ._scan_content (analyzer , entities , line .rstrip ()))
171- else :
172- contents = await fs .read ()
173- logger .debug ("Scanning file %s by reading all contents" , file_path )
174- results .extend (self ._scan_content (analyzer , entities , contents ))
175-
176- return PathScanResult (
177- file_path ,
178- status = PathScanStatus .PASSED if len (results ) == 0 else PathScanStatus .FAILED ,
179- results = results ,
180- )
169+ try :
170+ sources = PathFilter ()
171+
172+ invalid_check_result = await sources ._check_is_path_invalid (file_path , exclusions )
173+ if invalid_check_result is not None :
174+ return PathScanResult (file_path , invalid_check_result )
175+
176+ file_extension = Path (file_path ).suffix .lower ()
177+ async with await open_file (file_path , "r" , encoding = "utf-8" ) as fs :
178+ results : List [PersonalDataDetection ] = []
179+ if file_extension in self .LINE_BY_LINE_FILE_EXTENSIONS :
180+ logger .debug ("Scanning file %s line by line" , file_path )
181+ async for line in fs :
182+ results .extend (self ._scan_content (analyzer , entities , line .rstrip ()))
183+ else :
184+ contents = await fs .read ()
185+ logger .debug ("Scanning file %s by reading all contents" , file_path )
186+ results .extend (self ._scan_content (analyzer , entities , contents ))
187+
188+ return PathScanResult (
189+ file_path ,
190+ status = PathScanStatus .PASSED if len (results ) == 0 else PathScanStatus .FAILED ,
191+ results = results ,
192+ )
193+ except Exception :
194+ logger .exception ("The file scanner failed to read file %s" , file_path , stack_info = True )
195+ return PathScanResult (file_path , status = PathScanStatus .ERRORED )
181196
182197 async def scan (
183198 self ,
0 commit comments