测试集中没有rel和json文件夹,可是以下代码会检查rel和json文件夹是否存在,不存在则报错,要怎么解决?
def load_dataset(self, dataset_dir, with_cells, trim=None, debug=False, exts=None):
dataset, cells = [], []
if exts is None:
exts = ['chunk','rel']
if with_cells:
exts.append('json')
sub_paths = self.get_sub_paths(dataset_dir, exts, trim=trim)
for i, paths in enumerate(sub_paths):
if debug and i > 50:
break
chunk_path = paths[0]
relation_path = paths[1]
chunks = self.load_chunks(chunk_path)
# TODO handle big tables
#if len(chunks) > 100 or len(chunks) == 0: continue
relations = self.load_relations(relation_path)
#new_chunks, new_rels = self.clean_chunk_rel(chunks, relations)
#chunks, relations = new_chunks, new_rels
if with_cells:
cell_path = paths[2]
with open(cell_path) as f:
cell_json = json.load(f)
else:
cell_json = None
dataset.append(Data(
chunks=chunks,
relations=relations,
cells=cell_json,
path=chunk_path,
))
return dataset
def get_sub_paths(self, root_dir: str, sub_names: List[str], trim=None):
# Check the existence of directories
assert os.path.isdir(root_dir)
# TODO: sub_dirs redundancy
sub_dirs = []
for sub_name in sub_names:
sub_dir = os.path.join(root_dir, sub_name)
assert os.path.isdir(sub_dir), '"%s" is not dir.' % sub_dir
sub_dirs.append(sub_dir)
paths = []
d = os.listdir(sub_dirs[0])
d = d[:trim] if trim else d
for file_name in d:
sub_paths = [os.path.join(sub_dirs[0], file_name)]
name = os.path.splitext(file_name)[0]
for ext in sub_names[1:]:
sub_path = os.path.join(root_dir, ext, name + '.' + ext)
assert os.path.exists(sub_path)
sub_paths.append(sub_path)
paths.append(sub_paths)
return paths
测试集中没有rel和json文件夹,可是以下代码会检查rel和json文件夹是否存在,不存在则报错,要怎么解决?
def load_dataset(self, dataset_dir, with_cells, trim=None, debug=False, exts=None):
dataset, cells = [], []
if exts is None:
exts = ['chunk','rel']
if with_cells:
exts.append('json')
sub_paths = self.get_sub_paths(dataset_dir, exts, trim=trim)
for i, paths in enumerate(sub_paths):
if debug and i > 50:
break
chunk_path = paths[0]
relation_path = paths[1]
def get_sub_paths(self, root_dir: str, sub_names: List[str], trim=None):
# Check the existence of directories
assert os.path.isdir(root_dir)
# TODO: sub_dirs redundancy
sub_dirs = []
for sub_name in sub_names:
sub_dir = os.path.join(root_dir, sub_name)
assert os.path.isdir(sub_dir), '"%s" is not dir.' % sub_dir
sub_dirs.append(sub_dir)