diff --git a/libnczarr/zarr.h b/libnczarr/zarr.h index 9a317f3c68..2a8c8fbf28 100644 --- a/libnczarr/zarr.h +++ b/libnczarr/zarr.h @@ -64,7 +64,7 @@ EXTERNL int NCZ_dimkey(const NC_DIM_INFO_T* dim, char** pathp); EXTERNL int ncz_splitkey(const char* path, NClist* segments); EXTERNL int ncz_nctypedecode(const char* snctype, nc_type* nctypep); EXTERNL int ncz_nctype2dtype(nc_type nctype, int endianness, int purezarr,int len, char** dnamep); -EXTERNL int ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nctypep, int* endianp, int* typelenp); +EXTERNL int ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nctypep, int* endianp, int* typelenp, const char **unitp); EXTERNL int NCZ_inferattrtype(const NCjson* value, nc_type typehint, nc_type* typeidp); EXTERNL int NCZ_inferinttype(unsigned long long u64, int negative); EXTERNL int ncz_fill_value_sort(nc_type nctype, int*); diff --git a/libnczarr/zsync.c b/libnczarr/zsync.c index 85adaf667d..8349674841 100644 --- a/libnczarr/zsync.c +++ b/libnczarr/zsync.c @@ -988,7 +988,7 @@ computeattrinfo(const char* name, const NCjson* jtypes, nc_type typehint, int pu if(strcmp(NCJstring(akey),name)==0) { const NCjson* avalue = NULL; NCJdictget(jtypes,NCJstring(akey),&avalue); - if((stat = ncz_dtype2nctype(NCJstring(avalue),typehint,purezarr,&typeid,NULL,NULL))) goto done; + if((stat = ncz_dtype2nctype(NCJstring(avalue),typehint,purezarr,&typeid,NULL,NULL,NULL))) goto done; // if((stat = ncz_nctypedecode(atype,&typeid))) goto done; break; } @@ -1460,7 +1460,7 @@ define_var1(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, const char* varname) int endianness; if((stat = NCJdictget(jvar,"dtype",&jvalue))<0) {stat = NC_EINVAL; goto done;} /* Convert dtype to nc_type + endianness */ - if((stat = ncz_dtype2nctype(NCJstring(jvalue),NC_NAT,purezarr,&vtype,&endianness,&vtypelen))) + if((stat = ncz_dtype2nctype(NCJstring(jvalue),NC_NAT,purezarr,&vtype,&endianness,&vtypelen, NULL))) goto done; if(vtype > NC_NAT && vtype <= NC_MAX_ATOMIC_TYPE) { /* Locate the NC_TYPE_INFO_T object */ diff --git a/libnczarr/zutil.c b/libnczarr/zutil.c index 1e4b4541a2..38a41130a3 100644 --- a/libnczarr/zutil.c +++ b/libnczarr/zutil.c @@ -487,6 +487,37 @@ ncz_nctype2dtype(nc_type nctype, int endianness, int purezarr, int len, char** d return NC_NOERR; } +const char* datetime_unit_code_to_meaning(const char* unit, size_t len){ + if ( len < 1 || 3 < len) { + return NULL; + } + if (len == 2 && unit[1] == 's'){ + switch(unit[0]){ + case 'm': return (unit[1])?"millisecond":NULL; + case 'u': return (unit[1])?"microsecond":NULL; + case 'n': return (unit[1])?"nanosecond":NULL; + case 'p': return (unit[1])?"picosecond":NULL; + case 'f': return (unit[1])?"femtosecond":NULL; + case 'a': return (unit[1])?"attosecond":NULL; + default: + return NULL; + break; + } + } + + switch (unit[0]) { + case 'Y': return "year"; break; + case 'M': return "month"; break; + case 'W': return "week"; break; + case 'D': return "day"; break; + case 'h': return "hour"; break; + case 'm': return "minute"; break; + case 's': return "second"; break; + default: + break; + } + return NULL; +} /* @internal Convert a numcodecs dtype spec to a corresponding nc_type. @param nctype - [in] dtype the dtype to convert @@ -501,7 +532,7 @@ ncz_nctype2dtype(nc_type nctype, int endianness, int purezarr, int len, char** d */ int -ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nctypep, int* endianp, int* typelenp) +ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nctypep, int* endianp, int* typelenp, const char ** unitp) { int stat = NC_NOERR; int typelen = 0; @@ -511,8 +542,10 @@ ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nct int endianness = -1; const char* p; int n; + const char *unit_name = NULL; - if(endianp) *endianp = NC_ENDIAN_NATIVE; + if (endianp) + *endianp = NC_ENDIAN_NATIVE; if(nctypep) *nctypep = NC_NAT; if(dtype == NULL) goto zerr; @@ -529,65 +562,74 @@ ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nct if(count == 0) goto zerr; p += n; - /* Short circuit fixed length strings */ - if(tchar == 'S') { - /* Fixed length string */ - switch (typelen) { - case 1: - nctype = (endianness == NC_ENDIAN_BIG ? NC_CHAR : NC_STRING); - if(purezarr) nctype = NC_STRING; /* Zarr has no NC_CHAR type */ - break; - default: - nctype = NC_STRING; - break; - } - /* String/char have no endianness */ - endianness = NC_ENDIAN_NATIVE; - } else { - switch(typelen) { - case 1: - switch (tchar) { - case 'i': nctype = NC_BYTE; break; - case 'u': nctype = NC_UBYTE; break; - default: goto zerr; - } - break; - case 2: - switch (tchar) { - case 'i': nctype = NC_SHORT; break; - case 'u': nctype = NC_USHORT; break; - default: goto zerr; - } - break; - case 4: - switch (tchar) { - case 'i': nctype = NC_INT; break; - case 'u': nctype = NC_UINT; break; - case 'f': nctype = NC_FLOAT; break; - default: goto zerr; - } - break; - case 8: - switch (tchar) { - case 'i': nctype = NC_INT64; break; - case 'u': nctype = NC_UINT64; break; - case 'f': nctype = NC_DOUBLE; break; - default: goto zerr; - } - break; - default: goto zerr; - } + switch (tchar) { + case 'S': + /* Fixed length string */ + /* String/char have no endianness */ + switch (typelen) { + case 1: + nctype = (endianness == NC_ENDIAN_BIG ? NC_CHAR : NC_STRING); + if(purezarr) nctype = NC_STRING; /* Zarr has no NC_CHAR type */ + break; + default: + nctype = NC_STRING; + break; + } + endianness = NC_ENDIAN_NATIVE; + break; + case 'm': //timedelta + case 'M': //datetime + if (*p != '[') { + nclog(NCLOGERR, "Malformed dtype %s, time datatypes MUST have units (%s)", dtype,p); + goto zerr; + } + const char *units = ++p; + const char * end = strchr(p,']'); + if (end == NULL) { + nclog(NCLOGERR, "Malformed dtype %s, expected units to be within `[]`", dtype); + goto zerr; + } + // NULL if unable to convert + unit_name = datetime_unit_code_to_meaning(units, (size_t)(end - units)); + if( unit_name == NULL ){ + nclog(NCLOGERR, "Wrong unit value in dtype %s", dtype); + goto zerr; + } + break; + case 'i': // both are int internaly + switch(typelen) { + case 1: nctype = NC_BYTE; break; + case 2: nctype = NC_SHORT; break; + case 4: nctype = NC_INT; break; + case 8: nctype = NC_INT64; break; + default: goto zerr; + } + break; + case 'u': + switch(typelen) { + case 1: nctype = NC_UBYTE; break; + case 2: nctype = NC_USHORT; break; + case 4: nctype = NC_UINT; break; + case 8: nctype = NC_UINT64; break; + default: goto zerr; + } + break; + case 'f': + switch(typelen) { + case 4: nctype = NC_FLOAT; break; + case 8: nctype = NC_DOUBLE; break; + default: goto zerr; + } + break; + default: + goto zerr; + break; } -#if 0 - /* Convert NC_ENDIAN_NATIVE and NC_ENDIAN_NA */ - if(endianness == NC_ENDIAN_NATIVE) - endianness = (NC_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); -#endif - if(nctypep) *nctypep = nctype; if(typelenp) *typelenp = typelen; if(endianp) *endianp = endianness; + if(unitp) *unitp = unit_name; done: return stat;