Skip to content

Commit 7d32ea1

Browse files
Fix object type and remove legacy external tables
- Add save_template() method to Config for creating datajoint.json templates - Add default_store setting to ObjectStorageSettings - Fix get_store_backend() to use default object storage when no store specified - Fix StorageBackend._full_path() to prepend location for all protocols - Fix StorageBackend.open() to create parent directories for write mode - Fix ObjectType to support tuple (extension, data) format for streams - Fix ObjectType to pass through pre-computed metadata for staged inserts - Fix staged_insert.py path handling (use relative paths consistently) - Fix table.py __make_placeholder to handle None values for adapter types - Update schema_object.py to use <object> syntax (angle brackets required) - Remove legacy external table support (Table.external property) - Remove legacy external tests (test_filepath, test_external_class, test_s3) - Add tests for save_template() method Test results: 471 passed, 2 skipped 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent d51c16e commit 7d32ea1

File tree

12 files changed

+280
-396
lines changed

12 files changed

+280
-396
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,3 +187,4 @@ dj_local_conf.json
187187
!.vscode/launch.json
188188
# pixi environments
189189
.pixi
190+
_content/

src/datajoint/builtin_types.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -345,23 +345,42 @@ def encode(
345345
field = key.pop("_field", "data")
346346
primary_key = {k: v for k, v in key.items() if not k.startswith("_")}
347347

348+
# Check for pre-computed metadata (from staged insert)
349+
if isinstance(value, dict) and "path" in value:
350+
# Already encoded, pass through
351+
return value
352+
348353
# Determine content type and extension
349354
is_dir = False
350355
ext = None
351356
size = None
357+
item_count = None
358+
source_path = None
352359

353360
if isinstance(value, bytes):
354361
content = value
355362
size = len(content)
363+
elif isinstance(value, tuple) and len(value) == 2:
364+
# Tuple format: (extension, data) where data is bytes or file-like
365+
ext, data = value
366+
if hasattr(data, "read"):
367+
content = data.read()
368+
else:
369+
content = data
370+
size = len(content)
356371
elif isinstance(value, (str, Path)):
357372
source_path = Path(value)
358373
if not source_path.exists():
359-
raise FileNotFoundError(f"Source path does not exist: {source_path}")
374+
from .errors import DataJointError
375+
376+
raise DataJointError(f"Source path not found: {source_path}")
360377
is_dir = source_path.is_dir()
361378
ext = source_path.suffix if not is_dir else None
362379
if is_dir:
363380
# For directories, we'll upload later
364381
content = None
382+
# Count items in directory
383+
item_count = sum(1 for _ in source_path.rglob("*") if _.is_file())
365384
else:
366385
content = source_path.read_bytes()
367386
size = len(content)
@@ -398,6 +417,7 @@ def encode(
398417
"size": size,
399418
"ext": ext,
400419
"is_dir": is_dir,
420+
"item_count": item_count,
401421
"timestamp": timestamp.isoformat(),
402422
}
403423

@@ -422,12 +442,19 @@ def decode(self, stored: dict, *, key: dict | None = None) -> Any:
422442
return ObjectRef.from_json(stored, backend=backend)
423443

424444
def validate(self, value: Any) -> None:
425-
"""Validate that value is bytes or a valid path."""
445+
"""Validate that value is bytes, path, dict metadata, or (extension, data) tuple."""
426446
from pathlib import Path
427447

428448
if isinstance(value, bytes):
429449
return
430450
if isinstance(value, (str, Path)):
451+
# Could be a path or pre-encoded JSON string
452+
return
453+
if isinstance(value, tuple) and len(value) == 2:
454+
# Tuple format: (extension, data)
455+
return
456+
if isinstance(value, dict) and "path" in value:
457+
# Pre-computed metadata dict (from staged insert)
431458
return
432459
raise TypeError(f"<object> expects bytes or path, got {type(value).__name__}")
433460

src/datajoint/content_registry.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,19 +55,17 @@ def get_store_backend(store_name: str | None = None) -> StorageBackend:
5555
Get a StorageBackend for content storage.
5656
5757
Args:
58-
store_name: Name of the store to use. If None, uses the default store.
58+
store_name: Name of the store to use. If None, uses the default object storage
59+
configuration or the configured default_store.
5960
6061
Returns:
6162
StorageBackend instance
6263
"""
63-
if store_name is None:
64-
# Use default store from object_storage settings
64+
# If store_name is None, check for configured default_store
65+
if store_name is None and config.object_storage.default_store:
6566
store_name = config.object_storage.default_store
66-
if store_name is None:
67-
raise DataJointError(
68-
"No default store configured. Set object_storage.default_store " "or specify a store name explicitly."
69-
)
7067

68+
# get_object_store_spec handles None by returning default object_storage config
7169
spec = config.get_object_store_spec(store_name)
7270
return StorageBackend(spec)
7371

src/datajoint/settings.py

Lines changed: 114 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ class ObjectStorageSettings(BaseSettings):
212212
secure: bool = Field(default=True, description="Use HTTPS")
213213

214214
# Optional settings
215+
default_store: str | None = Field(default=None, description="Default store name when not specified")
215216
partition_pattern: str | None = Field(default=None, description="Path pattern with {attribute} placeholders")
216217
token_length: int = Field(default=8, ge=4, le=16, description="Random suffix length for filenames")
217218

@@ -647,6 +648,118 @@ def override(self, **kwargs: Any) -> Iterator["Config"]:
647648
group, attr = key_parts
648649
setattr(getattr(self, group), attr, original)
649650

651+
@staticmethod
652+
def save_template(
653+
path: str | Path = "datajoint.json",
654+
minimal: bool = True,
655+
create_secrets_dir: bool = True,
656+
) -> Path:
657+
"""
658+
Create a template datajoint.json configuration file.
659+
660+
Credentials should NOT be stored in datajoint.json. Instead, use either:
661+
- Environment variables (DJ_USER, DJ_PASS, DJ_HOST, etc.)
662+
- The .secrets/ directory (created alongside datajoint.json)
663+
664+
Args:
665+
path: Where to save the template. Defaults to 'datajoint.json' in current directory.
666+
minimal: If True (default), create a minimal template with just database settings.
667+
If False, create a full template with all available settings.
668+
create_secrets_dir: If True (default), also create a .secrets/ directory
669+
with template files for credentials.
670+
671+
Returns:
672+
Path to the created config file.
673+
674+
Raises:
675+
FileExistsError: If config file already exists (won't overwrite).
676+
677+
Example:
678+
>>> import datajoint as dj
679+
>>> dj.config.save_template() # Creates minimal template + .secrets/
680+
>>> dj.config.save_template("full-config.json", minimal=False)
681+
"""
682+
filepath = Path(path)
683+
if filepath.exists():
684+
raise FileExistsError(f"File already exists: {filepath}. Remove it first or choose a different path.")
685+
686+
if minimal:
687+
template = {
688+
"database": {
689+
"host": "localhost",
690+
"port": 3306,
691+
},
692+
}
693+
else:
694+
template = {
695+
"database": {
696+
"host": "localhost",
697+
"port": 3306,
698+
"reconnect": True,
699+
"use_tls": None,
700+
},
701+
"connection": {
702+
"init_function": None,
703+
"charset": "",
704+
},
705+
"display": {
706+
"limit": 12,
707+
"width": 14,
708+
"show_tuple_count": True,
709+
},
710+
"object_storage": {
711+
"project_name": None,
712+
"protocol": None,
713+
"location": None,
714+
"bucket": None,
715+
"endpoint": None,
716+
"secure": True,
717+
"partition_pattern": None,
718+
"token_length": 8,
719+
},
720+
"stores": {},
721+
"loglevel": "INFO",
722+
"safemode": True,
723+
"fetch_format": "array",
724+
"enable_python_native_blobs": True,
725+
"cache": None,
726+
"query_cache": None,
727+
"download_path": ".",
728+
}
729+
730+
with open(filepath, "w") as f:
731+
json.dump(template, f, indent=2)
732+
f.write("\n")
733+
734+
logger.info(f"Created template configuration at {filepath.absolute()}")
735+
736+
# Create .secrets/ directory with template files
737+
if create_secrets_dir:
738+
secrets_dir = filepath.parent / SECRETS_DIRNAME
739+
secrets_dir.mkdir(exist_ok=True)
740+
741+
# Create placeholder secret files
742+
secret_templates = {
743+
"database.user": "your_username",
744+
"database.password": "your_password",
745+
}
746+
for secret_name, placeholder in secret_templates.items():
747+
secret_file = secrets_dir / secret_name
748+
if not secret_file.exists():
749+
secret_file.write_text(placeholder)
750+
751+
# Create .gitignore to prevent committing secrets
752+
gitignore_path = secrets_dir / ".gitignore"
753+
if not gitignore_path.exists():
754+
gitignore_path.write_text("# Never commit secrets\n*\n!.gitignore\n")
755+
756+
logger.info(
757+
f"Created {SECRETS_DIRNAME}/ directory with credential templates. "
758+
f"Edit the files in {secrets_dir.absolute()}/ to set your credentials."
759+
)
760+
761+
return filepath.absolute()
762+
650763
# Dict-like access for convenience
651764
def __getitem__(self, key: str) -> Any:
652765
"""Get setting by dot-notation key (e.g., 'database.host')."""
@@ -724,7 +837,7 @@ def _create_config() -> Config:
724837
else:
725838
warnings.warn(
726839
f"No {CONFIG_FILENAME} found. Using defaults and environment variables. "
727-
f"Create {CONFIG_FILENAME} in your project root to configure DataJoint.",
840+
f"Run `dj.config.save_template()` to create a template configuration.",
728841
stacklevel=2,
729842
)
730843

src/datajoint/staged_insert.py

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,8 @@ def _get_storage_path(self, field: str, ext: str = "") -> str:
114114
spec = config.get_object_storage_spec()
115115
partition_pattern = spec.get("partition_pattern")
116116
token_length = spec.get("token_length", 8)
117-
location = spec.get("location", "")
118117

119-
# Build storage path
118+
# Build storage path (relative - StorageBackend will add location prefix)
120119
relative_path, token = build_object_path(
121120
schema=self._table.database,
122121
table=self._table.class_name,
@@ -127,18 +126,14 @@ def _get_storage_path(self, field: str, ext: str = "") -> str:
127126
token_length=token_length,
128127
)
129128

130-
# Full path with location prefix
131-
full_path = f"{location}/{relative_path}" if location else relative_path
132-
133-
# Store staged object info
129+
# Store staged object info (all paths are relative, backend adds location)
134130
self._staged_objects[field] = {
135131
"relative_path": relative_path,
136-
"full_path": full_path,
137132
"ext": ext if ext else None,
138133
"token": token,
139134
}
140135

141-
return full_path
136+
return relative_path
142137

143138
def store(self, field: str, ext: str = "") -> fsspec.FSMap:
144139
"""
@@ -180,11 +175,12 @@ def _compute_metadata(self, field: str) -> dict:
180175
JSON-serializable metadata dict
181176
"""
182177
info = self._staged_objects[field]
183-
full_path = info["full_path"]
178+
relative_path = info["relative_path"]
184179
ext = info["ext"]
185180

186181
# Check if it's a directory (multiple files) or single file
187-
full_remote_path = self._backend._full_path(full_path)
182+
# _full_path adds the location prefix
183+
full_remote_path = self._backend._full_path(relative_path)
188184

189185
try:
190186
is_dir = self._backend.fs.isdir(full_remote_path)
@@ -218,11 +214,11 @@ def _compute_metadata(self, field: str) -> dict:
218214
}
219215

220216
# Write manifest alongside folder
221-
manifest_path = f"{full_path}.manifest.json"
217+
manifest_path = f"{relative_path}.manifest.json"
222218
self._backend.put_buffer(json.dumps(manifest, indent=2).encode(), manifest_path)
223219

224220
metadata = {
225-
"path": info["relative_path"],
221+
"path": relative_path,
226222
"size": total_size,
227223
"hash": None,
228224
"ext": ext,
@@ -233,12 +229,12 @@ def _compute_metadata(self, field: str) -> dict:
233229
else:
234230
# Single file
235231
try:
236-
size = self._backend.size(full_path)
232+
size = self._backend.size(relative_path)
237233
except Exception:
238234
size = 0
239235

240236
metadata = {
241-
"path": info["relative_path"],
237+
"path": relative_path,
242238
"size": size,
243239
"hash": None,
244240
"ext": ext,
@@ -261,8 +257,8 @@ def _finalize(self):
261257
# Process each staged object
262258
for field in list(self._staged_objects.keys()):
263259
metadata = self._compute_metadata(field)
264-
# Store JSON metadata in the record
265-
self._rec[field] = json.dumps(metadata)
260+
# Store metadata dict in the record (ObjectType.encode handles it)
261+
self._rec[field] = metadata
266262

267263
# Insert the record
268264
self._table.insert1(self._rec)
@@ -275,15 +271,15 @@ def _cleanup(self):
275271
return
276272

277273
for field, info in self._staged_objects.items():
278-
full_path = info["full_path"]
274+
relative_path = info["relative_path"]
279275
try:
280276
# Check if it's a directory
281-
full_remote_path = self._backend._full_path(full_path)
277+
full_remote_path = self._backend._full_path(relative_path)
282278
if self._backend.fs.exists(full_remote_path):
283279
if self._backend.fs.isdir(full_remote_path):
284-
self._backend.remove_folder(full_path)
280+
self._backend.remove_folder(relative_path)
285281
else:
286-
self._backend.remove(full_path)
282+
self._backend.remove(relative_path)
287283
except Exception:
288284
pass # Best effort cleanup
289285

src/datajoint/storage.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ def _create_filesystem(self) -> fsspec.AbstractFileSystem:
279279

280280
def _full_path(self, path: str | PurePosixPath) -> str:
281281
"""
282-
Construct full path including bucket for cloud storage.
282+
Construct full path including location/bucket prefix.
283283
284284
Args:
285285
path: Relative path within the storage location
@@ -290,12 +290,21 @@ def _full_path(self, path: str | PurePosixPath) -> str:
290290
path = str(path)
291291
if self.protocol == "s3":
292292
bucket = self.spec["bucket"]
293+
location = self.spec.get("location", "")
294+
if location:
295+
return f"{bucket}/{location}/{path}"
293296
return f"{bucket}/{path}"
294297
elif self.protocol in ("gcs", "azure"):
295298
bucket = self.spec.get("bucket") or self.spec.get("container")
299+
location = self.spec.get("location", "")
300+
if location:
301+
return f"{bucket}/{location}/{path}"
296302
return f"{bucket}/{path}"
297303
else:
298-
# Local filesystem - path is already absolute or relative to cwd
304+
# Local filesystem - prepend location if specified
305+
location = self.spec.get("location", "")
306+
if location:
307+
return str(Path(location) / path)
299308
return path
300309

301310
def put_file(self, local_path: str | Path, remote_path: str | PurePosixPath, metadata: dict | None = None):
@@ -446,6 +455,11 @@ def open(self, remote_path: str | PurePosixPath, mode: str = "rb"):
446455
File-like object
447456
"""
448457
full_path = self._full_path(remote_path)
458+
459+
# For write modes on local filesystem, ensure parent directory exists
460+
if self.protocol == "file" and "w" in mode:
461+
Path(full_path).parent.mkdir(parents=True, exist_ok=True)
462+
449463
return self.fs.open(full_path, mode)
450464

451465
def put_folder(self, local_path: str | Path, remote_path: str | PurePosixPath) -> dict:

0 commit comments

Comments
 (0)