diff --git a/eng/versioning/external_dependencies.txt b/eng/versioning/external_dependencies.txt index cfc734894b77..d8b17d2900fd 100644 --- a/eng/versioning/external_dependencies.txt +++ b/eng/versioning/external_dependencies.txt @@ -25,6 +25,7 @@ com.github.spotbugs:spotbugs;4.8.3 com.github.spotbugs:spotbugs-annotations;4.8.3 com.github.spotbugs:spotbugs-maven-plugin;4.8.3.1 com.google.code.gson:gson;2.14.0 +com.google.flatbuffers:flatbuffers-java;24.3.25 com.google.guava:guava;33.6.0-jre com.h2database:h2;2.2.220 com.h3xstream.findsecbugs:findsecbugs-plugin;1.9.0 @@ -162,6 +163,10 @@ commons-io:commons-io;2.17.0 net.java.dev.jna:jna-platform;5.17.0 net.oneandone.reflections8:reflections8;0.11.7 net.jonathangiles.tools:dependencyChecker-maven-plugin;1.0.6 +org.apache.arrow:arrow-format;19.0.0 +org.apache.arrow:arrow-memory-core;19.0.0 +org.apache.arrow:arrow-memory-unsafe;19.0.0 +org.apache.arrow:arrow-vector;19.0.0 org.apache.commons:commons-collections4;4.4 org.apache.commons:commons-text;1.10.0 org.apache.maven:maven-plugin-api;3.9.11 @@ -186,6 +191,7 @@ org.apache.maven.plugins:maven-shade-plugin;3.6.0 org.apache.maven.plugins:maven-site-plugin;3.21.0 org.apache.maven.plugins:maven-source-plugin;3.3.1 org.apache.maven.plugins:maven-surefire-plugin;3.5.3 +org.checkerframework:checker-qual;3.42.0 org.codehaus.mojo:animal-sniffer-maven-plugin;1.24 org.codehaus.mojo:build-helper-maven-plugin;3.6.1 org.codehaus.mojo:exec-maven-plugin;3.5.1 diff --git a/sdk/parents/azure-client-sdk-parent/pom.xml b/sdk/parents/azure-client-sdk-parent/pom.xml index ae363a524be1..14c21d9be22f 100644 --- a/sdk/parents/azure-client-sdk-parent/pom.xml +++ b/sdk/parents/azure-client-sdk-parent/pom.xml @@ -602,6 +602,13 @@ com.google.code.findbugs:jsr305:[3.0.2] + + + com.google.flatbuffers:flatbuffers-java:[24.3.25] diff --git a/sdk/storage/azure-storage-blob/assets.json b/sdk/storage/azure-storage-blob/assets.json index 3949125ccf26..99770bd61507 100644 --- a/sdk/storage/azure-storage-blob/assets.json +++ b/sdk/storage/azure-storage-blob/assets.json @@ -2,5 +2,5 @@ "AssetsRepo": "Azure/azure-sdk-assets", "AssetsRepoPrefixPath": "java", "TagPrefix": "java/storage/azure-storage-blob", - "Tag": "java/storage/azure-storage-blob_2eefea69c0" + "Tag": "java/storage/azure-storage-blob_c4cdc68ef6" } diff --git a/sdk/storage/azure-storage-blob/pom.xml b/sdk/storage/azure-storage-blob/pom.xml index 4c591a7d5304..5d94fb7059b5 100644 --- a/sdk/storage/azure-storage-blob/pom.xml +++ b/sdk/storage/azure-storage-blob/pom.xml @@ -56,12 +56,35 @@ --add-reads com.azure.storage.blob=com.azure.core.http.okhttp --add-reads com.azure.storage.blob=com.azure.core.http.jdk.httpclient --add-reads com.azure.storage.blob=com.azure.core.http.vertx + --add-opens java.base/java.nio=ALL-UNNAMED concurrent false + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + base-testCompile + + false + + + + + + + com.azure @@ -138,6 +161,44 @@ 1.17.7 test + + + + + com.google.flatbuffers + flatbuffers-java + 24.3.25 + + + org.apache.arrow + arrow-vector + 19.0.0 + test + + + org.apache.arrow + arrow-memory-core + 19.0.0 + test + + + org.apache.arrow + arrow-memory-unsafe + 19.0.0 + test + + + org.checkerframework + checker-qual + 3.42.0 + test + + diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/BlobContainerAsyncClient.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/BlobContainerAsyncClient.java index b86fe4e76b2f..6d5b4dd29249 100644 --- a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/BlobContainerAsyncClient.java +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/BlobContainerAsyncClient.java @@ -28,6 +28,8 @@ import com.azure.storage.blob.implementation.models.EncryptionScope; import com.azure.storage.blob.implementation.models.ListBlobsFlatSegmentResponse; import com.azure.storage.blob.implementation.models.ListBlobsHierarchySegmentResponse; +import com.azure.storage.blob.implementation.util.ArrowBlobListDeserializer; +import com.azure.storage.blob.implementation.util.ArrowBlobListDeserializer.ArrowListBlobsResult; import com.azure.storage.blob.implementation.util.BlobConstants; import com.azure.storage.blob.implementation.util.BlobSasImplUtil; import com.azure.storage.blob.implementation.util.ModelHelper; @@ -42,6 +44,7 @@ import com.azure.storage.blob.models.CustomerProvidedKey; import com.azure.storage.blob.models.ListBlobsIncludeItem; import com.azure.storage.blob.models.ListBlobsOptions; +import com.azure.storage.blob.models.StorageResponseSerializationFormat; import com.azure.storage.blob.models.PublicAccessType; import com.azure.storage.blob.models.StorageAccountInfo; import com.azure.storage.blob.models.TaggedBlobItem; @@ -1118,18 +1121,35 @@ PagedFlux listBlobsFlatWithOptionalTimeout(ListBlobsOptions options, S Duration timeout) { BiFunction>> func = (marker, pageSize) -> { ListBlobsOptions finalOptions; + /* + If pageSize was not set in a .byPage(int) method, the page size from options will be preserved. + Otherwise, prefer the new value. + */ if (pageSize != null) { if (options == null) { finalOptions = new ListBlobsOptions().setMaxResultsPerPage(pageSize); } else { + // Note that this prefers the value passed to .byPage(int) over the value on the options finalOptions = new ListBlobsOptions().setMaxResultsPerPage(pageSize) .setPrefix(options.getPrefix()) - .setDetails(options.getDetails()); + .setDetails(options.getDetails()) + .setStartFrom(options.getStartFrom()); + if (ModelHelper.resolveSerializationFormat(options.getStorageResponseSerializationFormat()) + == StorageResponseSerializationFormat.ARROW) { + finalOptions.setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW) + .setEndBefore(options.getEndBefore()); + } } } else { finalOptions = options; } + if (finalOptions != null + && ModelHelper.resolveSerializationFormat(finalOptions.getStorageResponseSerializationFormat()) + == StorageResponseSerializationFormat.ARROW) { + return listBlobsFlatSegmentArrow(marker, finalOptions, timeout); + } + return listBlobsFlatSegment(marker, finalOptions, timeout).map(response -> { List value = response.getValue().getSegment() == null ? Collections.emptyList() @@ -1177,6 +1197,61 @@ PagedFlux listBlobsFlatWithOptionalTimeout(ListBlobsOptions options, S timeout); } + private Mono> listBlobsFlatSegmentArrow(String marker, ListBlobsOptions options, + Duration timeout) { + options = options == null ? new ListBlobsOptions() : options; + + ArrayList include + = options.getDetails().toList().isEmpty() ? null : options.getDetails().toList(); + + ListBlobsOptions finalOptions = options; + return StorageImplUtils.applyOptionalTimeout(this.azureBlobStorage.getContainers() + .listBlobFlatSegmentApacheArrowWithResponseAsync(containerName, finalOptions.getPrefix(), marker, + finalOptions.getMaxResultsPerPage(), include, null, finalOptions.getStartFrom(), + finalOptions.getEndBefore(), null, Context.NONE), + timeout).flatMap(response -> { + String contentType = response.getHeaders().getValue(com.azure.core.http.HttpHeaderName.CONTENT_TYPE); + + return FluxUtil.collectBytesInByteBufferStream(response.getValue()).map(bytes -> { + java.io.ByteArrayInputStream inputStream = new java.io.ByteArrayInputStream(bytes); + + if (contentType != null && contentType.contentEquals("application/vnd.apache.arrow.stream")) { + ArrowListBlobsResult arrowResult = ArrowBlobListDeserializer.deserialize(inputStream); + + List value = arrowResult.getBlobItems() + .stream() + .map(ModelHelper::populateBlobItem) + .collect(Collectors.toList()); + + return (PagedResponse) new PagedResponseBase<>(response.getRequest(), + response.getStatusCode(), response.getHeaders(), value, arrowResult.getNextMarker(), + response.getDeserializedHeaders()); + } else { + // XML fallback + try { + ListBlobsFlatSegmentResponse xmlResponse + = ListBlobsFlatSegmentResponse.fromXml(com.azure.xml.XmlReader.fromStream(inputStream)); + + List value = xmlResponse.getSegment() == null + ? Collections.emptyList() + : xmlResponse.getSegment() + .getBlobItems() + .stream() + .map(ModelHelper::populateBlobItem) + .collect(Collectors.toList()); + + return (PagedResponse) new PagedResponseBase<>(response.getRequest(), + response.getStatusCode(), response.getHeaders(), value, xmlResponse.getNextMarker(), + null); + } catch (javax.xml.stream.XMLStreamException e) { + throw LOGGER + .logExceptionAsError(new RuntimeException("Failed to parse XML fallback response", e)); + } + } + }); + }); + } + /** * Returns a reactive Publisher emitting all the blobs and directories (prefixes) under the given directory * (prefix). Directories will have {@link BlobItem#isPrefix()} set to true. @@ -1302,10 +1377,22 @@ PagedFlux listBlobsHierarchyWithOptionalTimeout(String delimiter, List .setPrefix(options.getPrefix()) .setDetails(options.getDetails()) .setStartFrom(options.getStartFrom()); + if (ModelHelper.resolveSerializationFormat(options.getStorageResponseSerializationFormat()) + == StorageResponseSerializationFormat.ARROW) { + finalOptions.setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW) + .setEndBefore(options.getEndBefore()); + } } } else { finalOptions = options; } + + if (finalOptions != null + && ModelHelper.resolveSerializationFormat(finalOptions.getStorageResponseSerializationFormat()) + == StorageResponseSerializationFormat.ARROW) { + return listBlobsHierarchySegmentArrow(marker, delimiter, finalOptions, timeout); + } + return listBlobsHierarchySegment(marker, delimiter, finalOptions, timeout).map(response -> { BlobHierarchyListSegment segment = response.getValue().getSegment(); List value; @@ -1344,6 +1431,70 @@ PagedFlux listBlobsHierarchyWithOptionalTimeout(String delimiter, List timeout); } + private Mono> listBlobsHierarchySegmentArrow(String marker, String delimiter, + ListBlobsOptions options, Duration timeout) { + options = options == null ? new ListBlobsOptions() : options; + if (options.getDetails().getRetrieveSnapshots()) { + throw LOGGER.logExceptionAsError( + new UnsupportedOperationException("Including snapshots in a hierarchical listing is not supported.")); + } + + ArrayList include + = options.getDetails().toList().isEmpty() ? null : options.getDetails().toList(); + + ListBlobsOptions finalOptions = options; + return StorageImplUtils + .applyOptionalTimeout(this.azureBlobStorage.getContainers() + .listBlobHierarchySegmentApacheArrowWithResponseAsync(containerName, delimiter, + finalOptions.getPrefix(), marker, finalOptions.getMaxResultsPerPage(), include, null, + finalOptions.getStartFrom(), finalOptions.getEndBefore(), null, Context.NONE), + timeout) + .flatMap(response -> { + String contentType = response.getHeaders().getValue(com.azure.core.http.HttpHeaderName.CONTENT_TYPE); + + return FluxUtil.collectBytesInByteBufferStream(response.getValue()).map(bytes -> { + java.io.ByteArrayInputStream inputStream = new java.io.ByteArrayInputStream(bytes); + + if (contentType != null && contentType.contentEquals("application/vnd.apache.arrow.stream")) { + ArrowListBlobsResult arrowResult = ArrowBlobListDeserializer.deserialize(inputStream); + + List value = arrowResult.getBlobItems() + .stream() + .map(ModelHelper::populateBlobItem) + .collect(Collectors.toList()); + + return (PagedResponse) new PagedResponseBase<>(response.getRequest(), + response.getStatusCode(), response.getHeaders(), value, arrowResult.getNextMarker(), + response.getDeserializedHeaders()); + } else { + // XML fallback + try { + ListBlobsHierarchySegmentResponse xmlResponse = ListBlobsHierarchySegmentResponse + .fromXml(com.azure.xml.XmlReader.fromStream(inputStream)); + + BlobHierarchyListSegment segment = xmlResponse.getSegment(); + List value = new ArrayList<>(); + if (segment != null) { + segment.getBlobItems() + .forEach(item -> value.add(BlobItemConstructorProxy.create(item))); + segment.getBlobPrefixes() + .forEach(prefix -> value + .add(new BlobItem().setName(ModelHelper.toBlobNameString(prefix.getName())) + .setIsPrefix(true))); + } + + return (PagedResponse) new PagedResponseBase<>(response.getRequest(), + response.getStatusCode(), response.getHeaders(), value, xmlResponse.getNextMarker(), + null); + } catch (javax.xml.stream.XMLStreamException e) { + throw LOGGER + .logExceptionAsError(new RuntimeException("Failed to parse XML fallback response", e)); + } + } + }); + }); + } + /** * Returns a reactive Publisher emitting the blobs in this container whose tags match the query expression. For more * information, including information on the query syntax, see the Azure Docs. diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/BlobContainerClient.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/BlobContainerClient.java index 64de81617f9c..e59b2508f71a 100644 --- a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/BlobContainerClient.java +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/BlobContainerClient.java @@ -25,12 +25,16 @@ import com.azure.storage.blob.implementation.models.ContainersGetAccessPolicyHeaders; import com.azure.storage.blob.implementation.models.ContainersGetAccountInfoHeaders; import com.azure.storage.blob.implementation.models.ContainersGetPropertiesHeaders; +import com.azure.storage.blob.implementation.models.ContainersListBlobFlatSegmentApacheArrowHeaders; import com.azure.storage.blob.implementation.models.ContainersListBlobFlatSegmentHeaders; +import com.azure.storage.blob.implementation.models.ContainersListBlobHierarchySegmentApacheArrowHeaders; import com.azure.storage.blob.implementation.models.ContainersListBlobHierarchySegmentHeaders; import com.azure.storage.blob.implementation.models.EncryptionScope; import com.azure.storage.blob.implementation.models.FilterBlobSegment; import com.azure.storage.blob.implementation.models.ListBlobsFlatSegmentResponse; import com.azure.storage.blob.implementation.models.ListBlobsHierarchySegmentResponse; +import com.azure.storage.blob.implementation.util.ArrowBlobListDeserializer; +import com.azure.storage.blob.implementation.util.ArrowBlobListDeserializer.ArrowListBlobsResult; import com.azure.storage.blob.implementation.util.BlobConstants; import com.azure.storage.blob.implementation.util.BlobSasImplUtil; import com.azure.storage.blob.implementation.util.ModelHelper; @@ -47,6 +51,7 @@ import com.azure.storage.blob.models.ListBlobsOptions; import com.azure.storage.blob.models.PublicAccessType; import com.azure.storage.blob.models.StorageAccountInfo; +import com.azure.storage.blob.models.StorageResponseSerializationFormat; import com.azure.storage.blob.models.TaggedBlobItem; import com.azure.storage.blob.models.UserDelegationKey; import com.azure.storage.blob.options.BlobContainerCreateOptions; @@ -57,6 +62,12 @@ import com.azure.storage.common.implementation.SasImplUtils; import com.azure.storage.common.implementation.StorageImplUtils; +import com.azure.xml.XmlReader; + +import java.io.IOException; +import java.io.InputStream; +import java.io.UncheckedIOException; +import javax.xml.stream.XMLStreamException; import java.net.URI; import java.time.Duration; import java.time.OffsetDateTime; @@ -1029,6 +1040,10 @@ public PagedIterable listBlobs(ListBlobsOptions options, String contin .setStartFrom(options.getStartFrom()) .setDetails(options.getDetails()); + if (options.getStorageResponseSerializationFormat() == StorageResponseSerializationFormat.ARROW) { + finalOptions.setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW); + } + } /* If pageSize was not set in a .byPage(int) method, the page size from options will be preserved. @@ -1040,26 +1055,78 @@ public PagedIterable listBlobs(ListBlobsOptions options, String contin ArrayList include = finalOptions.getDetails().toList().isEmpty() ? null : finalOptions.getDetails().toList(); - Callable> operation - = () -> this.azureBlobStorage.getContainers() - .listBlobFlatSegmentWithResponse(containerName, finalOptions.getPrefix(), nextMarker, - finalOptions.getMaxResultsPerPage(), include, finalOptions.getStartFrom(), null, null, - Context.NONE); - - ResponseBase response - = StorageImplUtils.sendRequest(operation, timeout, BlobStorageException.class); - - List value = response.getValue().getSegment() == null - ? Collections.emptyList() - : response.getValue() - .getSegment() - .getBlobItems() - .stream() - .map(ModelHelper::populateBlobItem) - .collect(Collectors.toList()); - - return new PagedResponseBase<>(response.getRequest(), response.getStatusCode(), response.getHeaders(), - value, response.getValue().getNextMarker(), response.getDeserializedHeaders()); + if (finalOptions.getStorageResponseSerializationFormat() == StorageResponseSerializationFormat.ARROW) { + Callable> operation + = () -> this.azureBlobStorage.getContainers() + .listBlobFlatSegmentApacheArrowWithResponse(containerName, finalOptions.getPrefix(), nextMarker, + finalOptions.getMaxResultsPerPage(), include, null, finalOptions.getStartFrom(), + finalOptions.getEndBefore(), null, Context.NONE); + ResponseBase response + = StorageImplUtils.sendRequest(operation, timeout, BlobStorageException.class); + + String contentType = response.getHeaders().getValue(com.azure.core.http.HttpHeaderName.CONTENT_TYPE); + + // The response body is an InputStream backed by the network buffer. It must be closed to release the + // underlying buffer, otherwise the transport (e.g. Netty) will report a resource leak. + try (InputStream responseBody = response.getValue()) { + if (contentType != null && contentType.contentEquals("application/vnd.apache.arrow.stream")) { + // Arrow response — parse with Arrow parser entrypoint + ArrowListBlobsResult arrowResult = ArrowBlobListDeserializer.deserialize(responseBody); + + List value = arrowResult.getBlobItems() + .stream() + .map(ModelHelper::populateBlobItem) + .collect(Collectors.toList()); + + return new PagedResponseBase<>(response.getRequest(), response.getStatusCode(), + response.getHeaders(), value, arrowResult.getNextMarker(), + response.getDeserializedHeaders()); + } else { + // XML fallback — service returned XML instead of Arrow + try { + ListBlobsFlatSegmentResponse xmlResponse + = ListBlobsFlatSegmentResponse.fromXml(XmlReader.fromStream(responseBody)); + + List value = xmlResponse.getSegment() == null + ? Collections.emptyList() + : xmlResponse.getSegment() + .getBlobItems() + .stream() + .map(ModelHelper::populateBlobItem) + .collect(Collectors.toList()); + + return new PagedResponseBase<>(response.getRequest(), response.getStatusCode(), + response.getHeaders(), value, xmlResponse.getNextMarker(), null); + } catch (XMLStreamException e) { + throw LOGGER + .logExceptionAsError(new RuntimeException("Failed to parse XML fallback response", e)); + } + } + } catch (IOException e) { + throw LOGGER + .logExceptionAsError(new UncheckedIOException("Failed to close ListBlobs response stream.", e)); + } + } else { + Callable> operation + = () -> this.azureBlobStorage.getContainers() + .listBlobFlatSegmentWithResponse(containerName, finalOptions.getPrefix(), nextMarker, + finalOptions.getMaxResultsPerPage(), include, finalOptions.getStartFrom(), null, null, + Context.NONE); + ResponseBase response + = StorageImplUtils.sendRequest(operation, timeout, BlobStorageException.class); + + List value = response.getValue().getSegment() == null + ? Collections.emptyList() + : response.getValue() + .getSegment() + .getBlobItems() + .stream() + .map(ModelHelper::populateBlobItem) + .collect(Collectors.toList()); + + return new PagedResponseBase<>(response.getRequest(), response.getStatusCode(), response.getHeaders(), + value, response.getValue().getNextMarker(), response.getDeserializedHeaders()); + } }; return new PagedIterable<>(pageSize -> retriever.apply(continuationToken, pageSize), retriever); @@ -1164,6 +1231,11 @@ public PagedIterable listBlobsByHierarchy(String delimiter, ListBlobsO .setPrefix(options.getPrefix()) .setDetails(options.getDetails()) .setStartFrom(options.getStartFrom()); + if (ModelHelper.resolveSerializationFormat(options.getStorageResponseSerializationFormat()) + == StorageResponseSerializationFormat.ARROW) { + finalOptions.setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW) + .setEndBefore(options.getEndBefore()); + } } /* If pageSize was not set in a .byPage(int) method, the page size from options will be preserved. @@ -1186,25 +1258,79 @@ private PagedResponse listBlobsHierarchySegment(String marker, String ArrayList include = options.getDetails().toList().isEmpty() ? null : options.getDetails().toList(); - Callable> operation - = () -> azureBlobStorage.getContainers() - .listBlobHierarchySegmentWithResponse(containerName, delimiter, options.getPrefix(), marker, - options.getMaxResultsPerPage(), include, options.getStartFrom(), null, null, Context.NONE); + if (ModelHelper.resolveSerializationFormat(options.getStorageResponseSerializationFormat()) + == StorageResponseSerializationFormat.ARROW) { + Callable> operation + = () -> azureBlobStorage.getContainers() + .listBlobHierarchySegmentApacheArrowWithResponse(containerName, delimiter, options.getPrefix(), + marker, options.getMaxResultsPerPage(), include, null, options.getStartFrom(), + options.getEndBefore(), null, Context.NONE); + ResponseBase response + = StorageImplUtils.sendRequest(operation, timeout, BlobStorageException.class); + + String contentType = response.getHeaders().getValue(com.azure.core.http.HttpHeaderName.CONTENT_TYPE); + + // The response body is an InputStream backed by the network buffer. It must be closed to release the + // underlying buffer, otherwise the transport (e.g. Netty) will report a resource leak. + try (InputStream responseBody = response.getValue()) { + if (contentType != null && contentType.contentEquals("application/vnd.apache.arrow.stream")) { + ArrowListBlobsResult arrowResult = ArrowBlobListDeserializer.deserialize(responseBody); + + List value = arrowResult.getBlobItems() + .stream() + .map(ModelHelper::populateBlobItem) + .collect(Collectors.toList()); + + return new PagedResponseBase<>(response.getRequest(), response.getStatusCode(), + response.getHeaders(), value, arrowResult.getNextMarker(), response.getDeserializedHeaders()); + } else { + // XML fallback — service returned XML instead of Arrow + try { + ListBlobsHierarchySegmentResponse xmlResponse + = ListBlobsHierarchySegmentResponse.fromXml(XmlReader.fromStream(responseBody)); + + BlobHierarchyListSegment segment = xmlResponse.getSegment(); + List value = new ArrayList<>(); + if (segment != null) { + segment.getBlobItems().forEach(item -> value.add(BlobItemConstructorProxy.create(item))); + segment.getBlobPrefixes() + .forEach(prefix -> value + .add(new BlobItem().setName(ModelHelper.toBlobNameString(prefix.getName())) + .setIsPrefix(true))); + } + + return new PagedResponseBase<>(response.getRequest(), response.getStatusCode(), + response.getHeaders(), value, xmlResponse.getNextMarker(), null); + } catch (XMLStreamException e) { + throw LOGGER + .logExceptionAsError(new RuntimeException("Failed to parse XML fallback response", e)); + } + } + } catch (IOException e) { + throw LOGGER + .logExceptionAsError(new UncheckedIOException("Failed to close ListBlobs response stream.", e)); + } + } else { + Callable> operation + = () -> azureBlobStorage.getContainers() + .listBlobHierarchySegmentWithResponse(containerName, delimiter, options.getPrefix(), marker, + options.getMaxResultsPerPage(), include, options.getStartFrom(), null, null, Context.NONE); - ResponseBase response - = StorageImplUtils.sendRequest(operation, timeout, BlobStorageException.class); + ResponseBase response + = StorageImplUtils.sendRequest(operation, timeout, BlobStorageException.class); - BlobHierarchyListSegment segment = response.getValue().getSegment(); - List value = new ArrayList<>(); - if (segment != null) { - segment.getBlobItems().forEach(item -> value.add(BlobItemConstructorProxy.create(item))); - segment.getBlobPrefixes() - .forEach(prefix -> value - .add(new BlobItem().setName(ModelHelper.toBlobNameString(prefix.getName())).setIsPrefix(true))); - } + BlobHierarchyListSegment segment = response.getValue().getSegment(); + List value = new ArrayList<>(); + if (segment != null) { + segment.getBlobItems().forEach(item -> value.add(BlobItemConstructorProxy.create(item))); + segment.getBlobPrefixes() + .forEach(prefix -> value + .add(new BlobItem().setName(ModelHelper.toBlobNameString(prefix.getName())).setIsPrefix(true))); + } - return new PagedResponseBase<>(response.getRequest(), response.getStatusCode(), response.getHeaders(), value, - response.getValue().getNextMarker(), response.getDeserializedHeaders()); + return new PagedResponseBase<>(response.getRequest(), response.getStatusCode(), response.getHeaders(), + value, response.getValue().getNextMarker(), response.getDeserializedHeaders()); + } } /** diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/ContainersImpl.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/ContainersImpl.java index 7fd2af96e4df..af54bda3cb4b 100644 --- a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/ContainersImpl.java +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/ContainersImpl.java @@ -37,7 +37,9 @@ import com.azure.storage.blob.implementation.models.ContainersGetAccessPolicyHeaders; import com.azure.storage.blob.implementation.models.ContainersGetAccountInfoHeaders; import com.azure.storage.blob.implementation.models.ContainersGetPropertiesHeaders; +import com.azure.storage.blob.implementation.models.ContainersListBlobFlatSegmentApacheArrowHeaders; import com.azure.storage.blob.implementation.models.ContainersListBlobFlatSegmentHeaders; +import com.azure.storage.blob.implementation.models.ContainersListBlobHierarchySegmentApacheArrowHeaders; import com.azure.storage.blob.implementation.models.ContainersListBlobHierarchySegmentHeaders; import com.azure.storage.blob.implementation.models.ContainersReleaseLeaseHeaders; import com.azure.storage.blob.implementation.models.ContainersRenameHeaders; @@ -853,6 +855,56 @@ Response listBlobFlatSegmentNoCustomHeadersSync(@H @HeaderParam("x-ms-version") String version, @HeaderParam("x-ms-client-request-id") String requestId, @HeaderParam("Accept") String accept, Context context); + @Get("/{containerName}") + @ExpectedResponses({ 200 }) + @UnexpectedResponseExceptionType(BlobStorageExceptionInternal.class) + Mono>> + listBlobFlatSegmentApacheArrow(@HostParam("url") String url, + @PathParam("containerName") String containerName, @QueryParam("restype") String restype, + @QueryParam("comp") String comp, @HeaderParam("Accept") String accept, + @QueryParam("prefix") String prefix, @QueryParam("marker") String marker, + @QueryParam("maxresults") Integer maxresults, @QueryParam("include") String include, + @QueryParam("timeout") Integer timeout, @QueryParam("startFrom") String startFrom, + @QueryParam("endBefore") String endBefore, @HeaderParam("x-ms-version") String version, + @HeaderParam("x-ms-client-request-id") String requestId, Context context); + + @Get("/{containerName}") + @ExpectedResponses({ 200 }) + @UnexpectedResponseExceptionType(BlobStorageExceptionInternal.class) + Mono listBlobFlatSegmentApacheArrowNoCustomHeaders(@HostParam("url") String url, + @PathParam("containerName") String containerName, @QueryParam("restype") String restype, + @QueryParam("comp") String comp, @HeaderParam("Accept") String accept, @QueryParam("prefix") String prefix, + @QueryParam("marker") String marker, @QueryParam("maxresults") Integer maxresults, + @QueryParam("include") String include, @QueryParam("timeout") Integer timeout, + @QueryParam("startFrom") String startFrom, @QueryParam("endBefore") String endBefore, + @HeaderParam("x-ms-version") String version, @HeaderParam("x-ms-client-request-id") String requestId, + Context context); + + @Get("/{containerName}") + @ExpectedResponses({ 200 }) + @UnexpectedResponseExceptionType(BlobStorageExceptionInternal.class) + ResponseBase listBlobFlatSegmentApacheArrowSync( + @HostParam("url") String url, @PathParam("containerName") String containerName, + @QueryParam("restype") String restype, @QueryParam("comp") String comp, + @HeaderParam("Accept") String accept, @QueryParam("prefix") String prefix, + @QueryParam("marker") String marker, @QueryParam("maxresults") Integer maxresults, + @QueryParam("include") String include, @QueryParam("timeout") Integer timeout, + @QueryParam("startFrom") String startFrom, @QueryParam("endBefore") String endBefore, + @HeaderParam("x-ms-version") String version, @HeaderParam("x-ms-client-request-id") String requestId, + Context context); + + @Get("/{containerName}") + @ExpectedResponses({ 200 }) + @UnexpectedResponseExceptionType(BlobStorageExceptionInternal.class) + Response listBlobFlatSegmentApacheArrowNoCustomHeadersSync(@HostParam("url") String url, + @PathParam("containerName") String containerName, @QueryParam("restype") String restype, + @QueryParam("comp") String comp, @HeaderParam("Accept") String accept, @QueryParam("prefix") String prefix, + @QueryParam("marker") String marker, @QueryParam("maxresults") Integer maxresults, + @QueryParam("include") String include, @QueryParam("timeout") Integer timeout, + @QueryParam("startFrom") String startFrom, @QueryParam("endBefore") String endBefore, + @HeaderParam("x-ms-version") String version, @HeaderParam("x-ms-client-request-id") String requestId, + Context context); + @Get("/{containerName}") @ExpectedResponses({ 200 }) @UnexpectedResponseExceptionType(BlobStorageExceptionInternal.class) @@ -903,6 +955,58 @@ Response listBlobHierarchySegmentNoCustomHead @HeaderParam("x-ms-version") String version, @HeaderParam("x-ms-client-request-id") String requestId, @HeaderParam("Accept") String accept, Context context); + @Get("/{containerName}") + @ExpectedResponses({ 200 }) + @UnexpectedResponseExceptionType(BlobStorageExceptionInternal.class) + Mono>> + listBlobHierarchySegmentApacheArrow(@HostParam("url") String url, + @PathParam("containerName") String containerName, @QueryParam("restype") String restype, + @QueryParam("comp") String comp, @HeaderParam("Accept") String accept, + @QueryParam("prefix") String prefix, @QueryParam("delimiter") String delimiter, + @QueryParam("marker") String marker, @QueryParam("maxresults") Integer maxresults, + @QueryParam("include") String include, @QueryParam("timeout") Integer timeout, + @QueryParam("startFrom") String startFrom, @QueryParam("endBefore") String endBefore, + @HeaderParam("x-ms-version") String version, @HeaderParam("x-ms-client-request-id") String requestId, + Context context); + + @Get("/{containerName}") + @ExpectedResponses({ 200 }) + @UnexpectedResponseExceptionType(BlobStorageExceptionInternal.class) + Mono listBlobHierarchySegmentApacheArrowNoCustomHeaders(@HostParam("url") String url, + @PathParam("containerName") String containerName, @QueryParam("restype") String restype, + @QueryParam("comp") String comp, @HeaderParam("Accept") String accept, @QueryParam("prefix") String prefix, + @QueryParam("delimiter") String delimiter, @QueryParam("marker") String marker, + @QueryParam("maxresults") Integer maxresults, @QueryParam("include") String include, + @QueryParam("timeout") Integer timeout, @QueryParam("startFrom") String startFrom, + @QueryParam("endBefore") String endBefore, @HeaderParam("x-ms-version") String version, + @HeaderParam("x-ms-client-request-id") String requestId, Context context); + + @Get("/{containerName}") + @ExpectedResponses({ 200 }) + @UnexpectedResponseExceptionType(BlobStorageExceptionInternal.class) + ResponseBase + listBlobHierarchySegmentApacheArrowSync(@HostParam("url") String url, + @PathParam("containerName") String containerName, @QueryParam("restype") String restype, + @QueryParam("comp") String comp, @HeaderParam("Accept") String accept, + @QueryParam("prefix") String prefix, @QueryParam("delimiter") String delimiter, + @QueryParam("marker") String marker, @QueryParam("maxresults") Integer maxresults, + @QueryParam("include") String include, @QueryParam("timeout") Integer timeout, + @QueryParam("startFrom") String startFrom, @QueryParam("endBefore") String endBefore, + @HeaderParam("x-ms-version") String version, @HeaderParam("x-ms-client-request-id") String requestId, + Context context); + + @Get("/{containerName}") + @ExpectedResponses({ 200 }) + @UnexpectedResponseExceptionType(BlobStorageExceptionInternal.class) + Response listBlobHierarchySegmentApacheArrowNoCustomHeadersSync(@HostParam("url") String url, + @PathParam("containerName") String containerName, @QueryParam("restype") String restype, + @QueryParam("comp") String comp, @HeaderParam("Accept") String accept, @QueryParam("prefix") String prefix, + @QueryParam("delimiter") String delimiter, @QueryParam("marker") String marker, + @QueryParam("maxresults") Integer maxresults, @QueryParam("include") String include, + @QueryParam("timeout") Integer timeout, @QueryParam("startFrom") String startFrom, + @QueryParam("endBefore") String endBefore, @HeaderParam("x-ms-version") String version, + @HeaderParam("x-ms-client-request-id") String requestId, Context context); + @Get("/{containerName}") @ExpectedResponses({ 200 }) @UnexpectedResponseExceptionType(BlobStorageExceptionInternal.class) @@ -6073,12 +6177,10 @@ public Response listBlobFlatSegmentNoCustomHeaders } /** - * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * The List Blobs operation returns a list of the blobs under the specified container. This operation is for Apache + * Arrow use case so response is returned as raw to be deserialized by the client. * * @param containerName The container name. - * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the - * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the - * appearance of the delimiter character. The delimiter may be a single character or a string. * @param prefix Filters the results to return only containers whose name begins with the specified prefix. * @param marker A string value that identifies the portion of the list of containers to be returned with the next * listing operation. The operation returns the NextMarker value within the response body if the listing operation @@ -6091,36 +6193,35 @@ public Response listBlobFlatSegmentNoCustomHeaders * the remainder of the results. For this reason, it is possible that the service will return fewer results than * specified by maxresults, or than the default of 5000. * @param include Include this parameter to specify one or more datasets to include in the response. - * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is - * supported; For recursive list, multiple entity levels are supported. (Inclusive). * @param timeout The timeout parameter is expressed in seconds. For more information, see <a * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the * analytics logs when storage analytics logging is enabled. * @throws IllegalArgumentException thrown if parameters fail the validation. * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. - * @return an enumeration of blobs along with {@link ResponseBase} on successful completion of {@link Mono}. + * @return the response body along with {@link ResponseBase} on successful completion of {@link Mono}. */ @ServiceMethod(returns = ReturnType.SINGLE) - public Mono> - listBlobHierarchySegmentWithResponseAsync(String containerName, String delimiter, String prefix, String marker, - Integer maxresults, List include, String startFrom, Integer timeout, + public Mono>> + listBlobFlatSegmentApacheArrowWithResponseAsync(String containerName, String prefix, String marker, + Integer maxresults, List include, Integer timeout, String startFrom, String endBefore, String requestId) { return FluxUtil - .withContext(context -> listBlobHierarchySegmentWithResponseAsync(containerName, delimiter, prefix, marker, - maxresults, include, startFrom, timeout, requestId, context)) + .withContext(context -> listBlobFlatSegmentApacheArrowWithResponseAsync(containerName, prefix, marker, + maxresults, include, timeout, startFrom, endBefore, requestId, context)) .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException); } /** - * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * The List Blobs operation returns a list of the blobs under the specified container. This operation is for Apache + * Arrow use case so response is returned as raw to be deserialized by the client. * * @param containerName The container name. - * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the - * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the - * appearance of the delimiter character. The delimiter may be a single character or a string. * @param prefix Filters the results to return only containers whose name begins with the specified prefix. * @param marker A string value that identifies the portion of the list of containers to be returned with the next * listing operation. The operation returns the NextMarker value within the response body if the listing operation @@ -6133,45 +6234,45 @@ public Response listBlobFlatSegmentNoCustomHeaders * the remainder of the results. For this reason, it is possible that the service will return fewer results than * specified by maxresults, or than the default of 5000. * @param include Include this parameter to specify one or more datasets to include in the response. - * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is - * supported; For recursive list, multiple entity levels are supported. (Inclusive). * @param timeout The timeout parameter is expressed in seconds. For more information, see <a * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the * analytics logs when storage analytics logging is enabled. * @param context The context to associate with this operation. * @throws IllegalArgumentException thrown if parameters fail the validation. * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. - * @return an enumeration of blobs along with {@link ResponseBase} on successful completion of {@link Mono}. + * @return the response body along with {@link ResponseBase} on successful completion of {@link Mono}. */ @ServiceMethod(returns = ReturnType.SINGLE) - public Mono> - listBlobHierarchySegmentWithResponseAsync(String containerName, String delimiter, String prefix, String marker, - Integer maxresults, List include, String startFrom, Integer timeout, String requestId, - Context context) { + public Mono>> + listBlobFlatSegmentApacheArrowWithResponseAsync(String containerName, String prefix, String marker, + Integer maxresults, List include, Integer timeout, String startFrom, String endBefore, + String requestId, Context context) { final String restype = "container"; final String comp = "list"; - final String accept = "application/xml"; + final String accept = "application/vnd.apache.arrow.stream,application/xml"; String includeConverted = (include == null) ? null : include.stream() .map(paramItemValue -> Objects.toString(paramItemValue, "")) .collect(Collectors.joining(",")); return service - .listBlobHierarchySegment(this.client.getUrl(), containerName, restype, comp, prefix, delimiter, marker, - maxresults, includeConverted, startFrom, timeout, this.client.getVersion(), requestId, accept, context) + .listBlobFlatSegmentApacheArrow(this.client.getUrl(), containerName, restype, comp, accept, prefix, marker, + maxresults, includeConverted, timeout, startFrom, endBefore, this.client.getVersion(), requestId, + context) .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException); } /** - * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * The List Blobs operation returns a list of the blobs under the specified container. This operation is for Apache + * Arrow use case so response is returned as raw to be deserialized by the client. * * @param containerName The container name. - * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the - * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the - * appearance of the delimiter character. The delimiter may be a single character or a string. * @param prefix Filters the results to return only containers whose name begins with the specified prefix. * @param marker A string value that identifies the portion of the list of containers to be returned with the next * listing operation. The operation returns the NextMarker value within the response body if the listing operation @@ -6184,35 +6285,34 @@ public Response listBlobFlatSegmentNoCustomHeaders * the remainder of the results. For this reason, it is possible that the service will return fewer results than * specified by maxresults, or than the default of 5000. * @param include Include this parameter to specify one or more datasets to include in the response. - * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is - * supported; For recursive list, multiple entity levels are supported. (Inclusive). * @param timeout The timeout parameter is expressed in seconds. For more information, see <a * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the * analytics logs when storage analytics logging is enabled. * @throws IllegalArgumentException thrown if parameters fail the validation. * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. - * @return an enumeration of blobs on successful completion of {@link Mono}. + * @return the response. */ @ServiceMethod(returns = ReturnType.SINGLE) - public Mono listBlobHierarchySegmentAsync(String containerName, String delimiter, - String prefix, String marker, Integer maxresults, List include, String startFrom, - Integer timeout, String requestId) { - return listBlobHierarchySegmentWithResponseAsync(containerName, delimiter, prefix, marker, maxresults, include, - startFrom, timeout, requestId) + public Flux listBlobFlatSegmentApacheArrowAsync(String containerName, String prefix, String marker, + Integer maxresults, List include, Integer timeout, String startFrom, String endBefore, + String requestId) { + return listBlobFlatSegmentApacheArrowWithResponseAsync(containerName, prefix, marker, maxresults, include, + timeout, startFrom, endBefore, requestId) .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException) - .flatMap(res -> Mono.justOrEmpty(res.getValue())); + .flatMapMany(fluxByteBufferResponse -> fluxByteBufferResponse.getValue()); } /** - * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * The List Blobs operation returns a list of the blobs under the specified container. This operation is for Apache + * Arrow use case so response is returned as raw to be deserialized by the client. * * @param containerName The container name. - * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the - * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the - * appearance of the delimiter character. The delimiter may be a single character or a string. * @param prefix Filters the results to return only containers whose name begins with the specified prefix. * @param marker A string value that identifies the portion of the list of containers to be returned with the next * listing operation. The operation returns the NextMarker value within the response body if the listing operation @@ -6225,36 +6325,35 @@ public Mono listBlobHierarchySegmentAsync(Str * the remainder of the results. For this reason, it is possible that the service will return fewer results than * specified by maxresults, or than the default of 5000. * @param include Include this parameter to specify one or more datasets to include in the response. - * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is - * supported; For recursive list, multiple entity levels are supported. (Inclusive). * @param timeout The timeout parameter is expressed in seconds. For more information, see <a * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the * analytics logs when storage analytics logging is enabled. * @param context The context to associate with this operation. * @throws IllegalArgumentException thrown if parameters fail the validation. * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. - * @return an enumeration of blobs on successful completion of {@link Mono}. + * @return the response. */ @ServiceMethod(returns = ReturnType.SINGLE) - public Mono listBlobHierarchySegmentAsync(String containerName, String delimiter, - String prefix, String marker, Integer maxresults, List include, String startFrom, - Integer timeout, String requestId, Context context) { - return listBlobHierarchySegmentWithResponseAsync(containerName, delimiter, prefix, marker, maxresults, include, - startFrom, timeout, requestId, context) + public Flux listBlobFlatSegmentApacheArrowAsync(String containerName, String prefix, String marker, + Integer maxresults, List include, Integer timeout, String startFrom, String endBefore, + String requestId, Context context) { + return listBlobFlatSegmentApacheArrowWithResponseAsync(containerName, prefix, marker, maxresults, include, + timeout, startFrom, endBefore, requestId, context) .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException) - .flatMap(res -> Mono.justOrEmpty(res.getValue())); + .flatMapMany(fluxByteBufferResponse -> fluxByteBufferResponse.getValue()); } /** - * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * The List Blobs operation returns a list of the blobs under the specified container. This operation is for Apache + * Arrow use case so response is returned as raw to be deserialized by the client. * * @param containerName The container name. - * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the - * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the - * appearance of the delimiter character. The delimiter may be a single character or a string. * @param prefix Filters the results to return only containers whose name begins with the specified prefix. * @param marker A string value that identifies the portion of the list of containers to be returned with the next * listing operation. The operation returns the NextMarker value within the response body if the listing operation @@ -6267,35 +6366,34 @@ public Mono listBlobHierarchySegmentAsync(Str * the remainder of the results. For this reason, it is possible that the service will return fewer results than * specified by maxresults, or than the default of 5000. * @param include Include this parameter to specify one or more datasets to include in the response. - * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is - * supported; For recursive list, multiple entity levels are supported. (Inclusive). * @param timeout The timeout parameter is expressed in seconds. For more information, see <a * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the * analytics logs when storage analytics logging is enabled. * @throws IllegalArgumentException thrown if parameters fail the validation. * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. - * @return an enumeration of blobs along with {@link Response} on successful completion of {@link Mono}. + * @return the response body on successful completion of {@link Mono}. */ @ServiceMethod(returns = ReturnType.SINGLE) - public Mono> listBlobHierarchySegmentNoCustomHeadersWithResponseAsync( - String containerName, String delimiter, String prefix, String marker, Integer maxresults, - List include, String startFrom, Integer timeout, String requestId) { + public Mono listBlobFlatSegmentApacheArrowNoCustomHeadersWithResponseAsync(String containerName, + String prefix, String marker, Integer maxresults, List include, Integer timeout, + String startFrom, String endBefore, String requestId) { return FluxUtil - .withContext(context -> listBlobHierarchySegmentNoCustomHeadersWithResponseAsync(containerName, delimiter, - prefix, marker, maxresults, include, startFrom, timeout, requestId, context)) + .withContext(context -> listBlobFlatSegmentApacheArrowNoCustomHeadersWithResponseAsync(containerName, + prefix, marker, maxresults, include, timeout, startFrom, endBefore, requestId, context)) .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException); } /** - * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * The List Blobs operation returns a list of the blobs under the specified container. This operation is for Apache + * Arrow use case so response is returned as raw to be deserialized by the client. * * @param containerName The container name. - * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the - * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the - * appearance of the delimiter character. The delimiter may be a single character or a string. * @param prefix Filters the results to return only containers whose name begins with the specified prefix. * @param marker A string value that identifies the portion of the list of containers to be returned with the next * listing operation. The operation returns the NextMarker value within the response body if the listing operation @@ -6308,45 +6406,44 @@ public Mono> listBlobHierarchySegmen * the remainder of the results. For this reason, it is possible that the service will return fewer results than * specified by maxresults, or than the default of 5000. * @param include Include this parameter to specify one or more datasets to include in the response. - * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is - * supported; For recursive list, multiple entity levels are supported. (Inclusive). * @param timeout The timeout parameter is expressed in seconds. For more information, see <a * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the * analytics logs when storage analytics logging is enabled. * @param context The context to associate with this operation. * @throws IllegalArgumentException thrown if parameters fail the validation. * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. - * @return an enumeration of blobs along with {@link Response} on successful completion of {@link Mono}. + * @return the response body on successful completion of {@link Mono}. */ @ServiceMethod(returns = ReturnType.SINGLE) - public Mono> listBlobHierarchySegmentNoCustomHeadersWithResponseAsync( - String containerName, String delimiter, String prefix, String marker, Integer maxresults, - List include, String startFrom, Integer timeout, String requestId, Context context) { + public Mono listBlobFlatSegmentApacheArrowNoCustomHeadersWithResponseAsync(String containerName, + String prefix, String marker, Integer maxresults, List include, Integer timeout, + String startFrom, String endBefore, String requestId, Context context) { final String restype = "container"; final String comp = "list"; - final String accept = "application/xml"; + final String accept = "application/vnd.apache.arrow.stream,application/xml"; String includeConverted = (include == null) ? null : include.stream() .map(paramItemValue -> Objects.toString(paramItemValue, "")) .collect(Collectors.joining(",")); return service - .listBlobHierarchySegmentNoCustomHeaders(this.client.getUrl(), containerName, restype, comp, prefix, - delimiter, marker, maxresults, includeConverted, startFrom, timeout, this.client.getVersion(), - requestId, accept, context) + .listBlobFlatSegmentApacheArrowNoCustomHeaders(this.client.getUrl(), containerName, restype, comp, accept, + prefix, marker, maxresults, includeConverted, timeout, startFrom, endBefore, this.client.getVersion(), + requestId, context) .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException); } /** - * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * The List Blobs operation returns a list of the blobs under the specified container. This operation is for Apache + * Arrow use case so response is returned as raw to be deserialized by the client. * * @param containerName The container name. - * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the - * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the - * appearance of the delimiter character. The delimiter may be a single character or a string. * @param prefix Filters the results to return only containers whose name begins with the specified prefix. * @param marker A string value that identifies the portion of the list of containers to be returned with the next * listing operation. The operation returns the NextMarker value within the response body if the listing operation @@ -6359,48 +6456,47 @@ public Mono> listBlobHierarchySegmen * the remainder of the results. For this reason, it is possible that the service will return fewer results than * specified by maxresults, or than the default of 5000. * @param include Include this parameter to specify one or more datasets to include in the response. - * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is - * supported; For recursive list, multiple entity levels are supported. (Inclusive). * @param timeout The timeout parameter is expressed in seconds. For more information, see <a * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the * analytics logs when storage analytics logging is enabled. * @param context The context to associate with this operation. * @throws IllegalArgumentException thrown if parameters fail the validation. * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. - * @return an enumeration of blobs along with {@link ResponseBase}. + * @return the response body along with {@link ResponseBase}. */ @ServiceMethod(returns = ReturnType.SINGLE) - public ResponseBase - listBlobHierarchySegmentWithResponse(String containerName, String delimiter, String prefix, String marker, - Integer maxresults, List include, String startFrom, Integer timeout, String requestId, - Context context) { + public ResponseBase + listBlobFlatSegmentApacheArrowWithResponse(String containerName, String prefix, String marker, + Integer maxresults, List include, Integer timeout, String startFrom, String endBefore, + String requestId, Context context) { try { final String restype = "container"; final String comp = "list"; - final String accept = "application/xml"; + final String accept = "application/vnd.apache.arrow.stream,application/xml"; String includeConverted = (include == null) ? null : include.stream() .map(paramItemValue -> Objects.toString(paramItemValue, "")) .collect(Collectors.joining(",")); - return service.listBlobHierarchySegmentSync(this.client.getUrl(), containerName, restype, comp, prefix, - delimiter, marker, maxresults, includeConverted, startFrom, timeout, this.client.getVersion(), - requestId, accept, context); + return service.listBlobFlatSegmentApacheArrowSync(this.client.getUrl(), containerName, restype, comp, + accept, prefix, marker, maxresults, includeConverted, timeout, startFrom, endBefore, + this.client.getVersion(), requestId, context); } catch (BlobStorageExceptionInternal internalException) { throw ModelHelper.mapToBlobStorageException(internalException); } } /** - * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * The List Blobs operation returns a list of the blobs under the specified container. This operation is for Apache + * Arrow use case so response is returned as raw to be deserialized by the client. * * @param containerName The container name. - * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the - * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the - * appearance of the delimiter character. The delimiter may be a single character or a string. * @param prefix Filters the results to return only containers whose name begins with the specified prefix. * @param marker A string value that identifies the portion of the list of containers to be returned with the next * listing operation. The operation returns the NextMarker value within the response body if the listing operation @@ -6413,37 +6509,36 @@ public Mono> listBlobHierarchySegmen * the remainder of the results. For this reason, it is possible that the service will return fewer results than * specified by maxresults, or than the default of 5000. * @param include Include this parameter to specify one or more datasets to include in the response. - * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is - * supported; For recursive list, multiple entity levels are supported. (Inclusive). * @param timeout The timeout parameter is expressed in seconds. For more information, see <a * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the * analytics logs when storage analytics logging is enabled. * @throws IllegalArgumentException thrown if parameters fail the validation. * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. - * @return an enumeration of blobs. + * @return the response. */ @ServiceMethod(returns = ReturnType.SINGLE) - public ListBlobsHierarchySegmentResponse listBlobHierarchySegment(String containerName, String delimiter, - String prefix, String marker, Integer maxresults, List include, String startFrom, - Integer timeout, String requestId) { + public InputStream listBlobFlatSegmentApacheArrow(String containerName, String prefix, String marker, + Integer maxresults, List include, Integer timeout, String startFrom, String endBefore, + String requestId) { try { - return listBlobHierarchySegmentWithResponse(containerName, delimiter, prefix, marker, maxresults, include, - startFrom, timeout, requestId, Context.NONE).getValue(); + return listBlobFlatSegmentApacheArrowWithResponse(containerName, prefix, marker, maxresults, include, + timeout, startFrom, endBefore, requestId, Context.NONE).getValue(); } catch (BlobStorageExceptionInternal internalException) { throw ModelHelper.mapToBlobStorageException(internalException); } } /** - * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * The List Blobs operation returns a list of the blobs under the specified container. This operation is for Apache + * Arrow use case so response is returned as raw to be deserialized by the client. * * @param containerName The container name. - * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the - * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the - * appearance of the delimiter character. The delimiter may be a single character or a string. * @param prefix Filters the results to return only containers whose name begins with the specified prefix. * @param marker A string value that identifies the portion of the list of containers to be returned with the next * listing operation. The operation returns the NextMarker value within the response body if the listing operation @@ -6456,35 +6551,892 @@ public ListBlobsHierarchySegmentResponse listBlobHierarchySegment(String contain * the remainder of the results. For this reason, it is possible that the service will return fewer results than * specified by maxresults, or than the default of 5000. * @param include Include this parameter to specify one or more datasets to include in the response. - * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is - * supported; For recursive list, multiple entity levels are supported. (Inclusive). * @param timeout The timeout parameter is expressed in seconds. For more information, see <a * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the * analytics logs when storage analytics logging is enabled. * @param context The context to associate with this operation. * @throws IllegalArgumentException thrown if parameters fail the validation. * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. - * @return an enumeration of blobs along with {@link Response}. + * @return the response body along with {@link Response}. */ @ServiceMethod(returns = ReturnType.SINGLE) - public Response listBlobHierarchySegmentNoCustomHeadersWithResponse( - String containerName, String delimiter, String prefix, String marker, Integer maxresults, - List include, String startFrom, Integer timeout, String requestId, Context context) { + public Response listBlobFlatSegmentApacheArrowNoCustomHeadersWithResponse(String containerName, + String prefix, String marker, Integer maxresults, List include, Integer timeout, + String startFrom, String endBefore, String requestId, Context context) { try { final String restype = "container"; final String comp = "list"; - final String accept = "application/xml"; + final String accept = "application/vnd.apache.arrow.stream,application/xml"; String includeConverted = (include == null) ? null : include.stream() .map(paramItemValue -> Objects.toString(paramItemValue, "")) .collect(Collectors.joining(",")); - return service.listBlobHierarchySegmentNoCustomHeadersSync(this.client.getUrl(), containerName, restype, - comp, prefix, delimiter, marker, maxresults, includeConverted, startFrom, timeout, - this.client.getVersion(), requestId, accept, context); + return service.listBlobFlatSegmentApacheArrowNoCustomHeadersSync(this.client.getUrl(), containerName, + restype, comp, accept, prefix, marker, maxresults, includeConverted, timeout, startFrom, endBefore, + this.client.getVersion(), requestId, context); + } catch (BlobStorageExceptionInternal internalException) { + throw ModelHelper.mapToBlobStorageException(internalException); + } + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return an enumeration of blobs along with {@link ResponseBase} on successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono> + listBlobHierarchySegmentWithResponseAsync(String containerName, String delimiter, String prefix, String marker, + Integer maxresults, List include, String startFrom, Integer timeout, + String requestId) { + return FluxUtil + .withContext(context -> listBlobHierarchySegmentWithResponseAsync(containerName, delimiter, prefix, marker, + maxresults, include, startFrom, timeout, requestId, context)) + .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException); + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @param context The context to associate with this operation. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return an enumeration of blobs along with {@link ResponseBase} on successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono> + listBlobHierarchySegmentWithResponseAsync(String containerName, String delimiter, String prefix, String marker, + Integer maxresults, List include, String startFrom, Integer timeout, String requestId, + Context context) { + final String restype = "container"; + final String comp = "list"; + final String accept = "application/xml"; + String includeConverted = (include == null) + ? null + : include.stream() + .map(paramItemValue -> Objects.toString(paramItemValue, "")) + .collect(Collectors.joining(",")); + return service + .listBlobHierarchySegment(this.client.getUrl(), containerName, restype, comp, prefix, delimiter, marker, + maxresults, includeConverted, startFrom, timeout, this.client.getVersion(), requestId, accept, context) + .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException); + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return an enumeration of blobs on successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono listBlobHierarchySegmentAsync(String containerName, String delimiter, + String prefix, String marker, Integer maxresults, List include, String startFrom, + Integer timeout, String requestId) { + return listBlobHierarchySegmentWithResponseAsync(containerName, delimiter, prefix, marker, maxresults, include, + startFrom, timeout, requestId) + .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException) + .flatMap(res -> Mono.justOrEmpty(res.getValue())); + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @param context The context to associate with this operation. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return an enumeration of blobs on successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono listBlobHierarchySegmentAsync(String containerName, String delimiter, + String prefix, String marker, Integer maxresults, List include, String startFrom, + Integer timeout, String requestId, Context context) { + return listBlobHierarchySegmentWithResponseAsync(containerName, delimiter, prefix, marker, maxresults, include, + startFrom, timeout, requestId, context) + .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException) + .flatMap(res -> Mono.justOrEmpty(res.getValue())); + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return an enumeration of blobs along with {@link Response} on successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono> listBlobHierarchySegmentNoCustomHeadersWithResponseAsync( + String containerName, String delimiter, String prefix, String marker, Integer maxresults, + List include, String startFrom, Integer timeout, String requestId) { + return FluxUtil + .withContext(context -> listBlobHierarchySegmentNoCustomHeadersWithResponseAsync(containerName, delimiter, + prefix, marker, maxresults, include, startFrom, timeout, requestId, context)) + .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException); + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @param context The context to associate with this operation. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return an enumeration of blobs along with {@link Response} on successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono> listBlobHierarchySegmentNoCustomHeadersWithResponseAsync( + String containerName, String delimiter, String prefix, String marker, Integer maxresults, + List include, String startFrom, Integer timeout, String requestId, Context context) { + final String restype = "container"; + final String comp = "list"; + final String accept = "application/xml"; + String includeConverted = (include == null) + ? null + : include.stream() + .map(paramItemValue -> Objects.toString(paramItemValue, "")) + .collect(Collectors.joining(",")); + return service + .listBlobHierarchySegmentNoCustomHeaders(this.client.getUrl(), containerName, restype, comp, prefix, + delimiter, marker, maxresults, includeConverted, startFrom, timeout, this.client.getVersion(), + requestId, accept, context) + .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException); + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @param context The context to associate with this operation. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return an enumeration of blobs along with {@link ResponseBase}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public ResponseBase + listBlobHierarchySegmentWithResponse(String containerName, String delimiter, String prefix, String marker, + Integer maxresults, List include, String startFrom, Integer timeout, String requestId, + Context context) { + try { + final String restype = "container"; + final String comp = "list"; + final String accept = "application/xml"; + String includeConverted = (include == null) + ? null + : include.stream() + .map(paramItemValue -> Objects.toString(paramItemValue, "")) + .collect(Collectors.joining(",")); + return service.listBlobHierarchySegmentSync(this.client.getUrl(), containerName, restype, comp, prefix, + delimiter, marker, maxresults, includeConverted, startFrom, timeout, this.client.getVersion(), + requestId, accept, context); + } catch (BlobStorageExceptionInternal internalException) { + throw ModelHelper.mapToBlobStorageException(internalException); + } + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return an enumeration of blobs. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public ListBlobsHierarchySegmentResponse listBlobHierarchySegment(String containerName, String delimiter, + String prefix, String marker, Integer maxresults, List include, String startFrom, + Integer timeout, String requestId) { + try { + return listBlobHierarchySegmentWithResponse(containerName, delimiter, prefix, marker, maxresults, include, + startFrom, timeout, requestId, Context.NONE).getValue(); + } catch (BlobStorageExceptionInternal internalException) { + throw ModelHelper.mapToBlobStorageException(internalException); + } + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @param context The context to associate with this operation. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return an enumeration of blobs along with {@link Response}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Response listBlobHierarchySegmentNoCustomHeadersWithResponse( + String containerName, String delimiter, String prefix, String marker, Integer maxresults, + List include, String startFrom, Integer timeout, String requestId, Context context) { + try { + final String restype = "container"; + final String comp = "list"; + final String accept = "application/xml"; + String includeConverted = (include == null) + ? null + : include.stream() + .map(paramItemValue -> Objects.toString(paramItemValue, "")) + .collect(Collectors.joining(",")); + return service.listBlobHierarchySegmentNoCustomHeadersSync(this.client.getUrl(), containerName, restype, + comp, prefix, delimiter, marker, maxresults, includeConverted, startFrom, timeout, + this.client.getVersion(), requestId, accept, context); + } catch (BlobStorageExceptionInternal internalException) { + throw ModelHelper.mapToBlobStorageException(internalException); + } + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. This operation is + * for Apache Arrow use case so response is returned as raw to be deserialized by the client. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return the response body along with {@link ResponseBase} on successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono>> + listBlobHierarchySegmentApacheArrowWithResponseAsync(String containerName, String delimiter, String prefix, + String marker, Integer maxresults, List include, Integer timeout, String startFrom, + String endBefore, String requestId) { + return FluxUtil + .withContext(context -> listBlobHierarchySegmentApacheArrowWithResponseAsync(containerName, delimiter, + prefix, marker, maxresults, include, timeout, startFrom, endBefore, requestId, context)) + .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException); + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. This operation is + * for Apache Arrow use case so response is returned as raw to be deserialized by the client. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @param context The context to associate with this operation. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return the response body along with {@link ResponseBase} on successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono>> + listBlobHierarchySegmentApacheArrowWithResponseAsync(String containerName, String delimiter, String prefix, + String marker, Integer maxresults, List include, Integer timeout, String startFrom, + String endBefore, String requestId, Context context) { + final String restype = "container"; + final String comp = "list"; + final String accept = "application/vnd.apache.arrow.stream,application/xml"; + String includeConverted = (include == null) + ? null + : include.stream() + .map(paramItemValue -> Objects.toString(paramItemValue, "")) + .collect(Collectors.joining(",")); + return service + .listBlobHierarchySegmentApacheArrow(this.client.getUrl(), containerName, restype, comp, accept, prefix, + delimiter, marker, maxresults, includeConverted, timeout, startFrom, endBefore, + this.client.getVersion(), requestId, context) + .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException); + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. This operation is + * for Apache Arrow use case so response is returned as raw to be deserialized by the client. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return the response. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Flux listBlobHierarchySegmentApacheArrowAsync(String containerName, String delimiter, + String prefix, String marker, Integer maxresults, List include, Integer timeout, + String startFrom, String endBefore, String requestId) { + return listBlobHierarchySegmentApacheArrowWithResponseAsync(containerName, delimiter, prefix, marker, + maxresults, include, timeout, startFrom, endBefore, requestId) + .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException) + .flatMapMany(fluxByteBufferResponse -> fluxByteBufferResponse.getValue()); + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. This operation is + * for Apache Arrow use case so response is returned as raw to be deserialized by the client. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @param context The context to associate with this operation. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return the response. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Flux listBlobHierarchySegmentApacheArrowAsync(String containerName, String delimiter, + String prefix, String marker, Integer maxresults, List include, Integer timeout, + String startFrom, String endBefore, String requestId, Context context) { + return listBlobHierarchySegmentApacheArrowWithResponseAsync(containerName, delimiter, prefix, marker, + maxresults, include, timeout, startFrom, endBefore, requestId, context) + .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException) + .flatMapMany(fluxByteBufferResponse -> fluxByteBufferResponse.getValue()); + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. This operation is + * for Apache Arrow use case so response is returned as raw to be deserialized by the client. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return the response body on successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono listBlobHierarchySegmentApacheArrowNoCustomHeadersWithResponseAsync( + String containerName, String delimiter, String prefix, String marker, Integer maxresults, + List include, Integer timeout, String startFrom, String endBefore, String requestId) { + return FluxUtil + .withContext(context -> listBlobHierarchySegmentApacheArrowNoCustomHeadersWithResponseAsync(containerName, + delimiter, prefix, marker, maxresults, include, timeout, startFrom, endBefore, requestId, context)) + .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException); + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. This operation is + * for Apache Arrow use case so response is returned as raw to be deserialized by the client. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @param context The context to associate with this operation. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return the response body on successful completion of {@link Mono}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Mono listBlobHierarchySegmentApacheArrowNoCustomHeadersWithResponseAsync( + String containerName, String delimiter, String prefix, String marker, Integer maxresults, + List include, Integer timeout, String startFrom, String endBefore, String requestId, + Context context) { + final String restype = "container"; + final String comp = "list"; + final String accept = "application/vnd.apache.arrow.stream,application/xml"; + String includeConverted = (include == null) + ? null + : include.stream() + .map(paramItemValue -> Objects.toString(paramItemValue, "")) + .collect(Collectors.joining(",")); + return service + .listBlobHierarchySegmentApacheArrowNoCustomHeaders(this.client.getUrl(), containerName, restype, comp, + accept, prefix, delimiter, marker, maxresults, includeConverted, timeout, startFrom, endBefore, + this.client.getVersion(), requestId, context) + .onErrorMap(BlobStorageExceptionInternal.class, ModelHelper::mapToBlobStorageException); + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. This operation is + * for Apache Arrow use case so response is returned as raw to be deserialized by the client. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @param context The context to associate with this operation. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return the response body along with {@link ResponseBase}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public ResponseBase + listBlobHierarchySegmentApacheArrowWithResponse(String containerName, String delimiter, String prefix, + String marker, Integer maxresults, List include, Integer timeout, String startFrom, + String endBefore, String requestId, Context context) { + try { + final String restype = "container"; + final String comp = "list"; + final String accept = "application/vnd.apache.arrow.stream,application/xml"; + String includeConverted = (include == null) + ? null + : include.stream() + .map(paramItemValue -> Objects.toString(paramItemValue, "")) + .collect(Collectors.joining(",")); + return service.listBlobHierarchySegmentApacheArrowSync(this.client.getUrl(), containerName, restype, comp, + accept, prefix, delimiter, marker, maxresults, includeConverted, timeout, startFrom, endBefore, + this.client.getVersion(), requestId, context); + } catch (BlobStorageExceptionInternal internalException) { + throw ModelHelper.mapToBlobStorageException(internalException); + } + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. This operation is + * for Apache Arrow use case so response is returned as raw to be deserialized by the client. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return the response. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public InputStream listBlobHierarchySegmentApacheArrow(String containerName, String delimiter, String prefix, + String marker, Integer maxresults, List include, Integer timeout, String startFrom, + String endBefore, String requestId) { + try { + return listBlobHierarchySegmentApacheArrowWithResponse(containerName, delimiter, prefix, marker, maxresults, + include, timeout, startFrom, endBefore, requestId, Context.NONE).getValue(); + } catch (BlobStorageExceptionInternal internalException) { + throw ModelHelper.mapToBlobStorageException(internalException); + } + } + + /** + * [Update] The List Blobs operation returns a list of the blobs under the specified container. This operation is + * for Apache Arrow use case so response is returned as raw to be deserialized by the client. + * + * @param containerName The container name. + * @param delimiter When the request includes this parameter, the operation returns a BlobPrefix element in the + * response body that acts as a placeholder for all blobs whose names begin with the same substring up to the + * appearance of the delimiter character. The delimiter may be a single character or a string. + * @param prefix Filters the results to return only containers whose name begins with the specified prefix. + * @param marker A string value that identifies the portion of the list of containers to be returned with the next + * listing operation. The operation returns the NextMarker value within the response body if the listing operation + * did not return all containers remaining to be listed with the current page. The NextMarker value can be used as + * the value for the marker parameter in a subsequent call to request the next page of list items. The marker value + * is opaque to the client. + * @param maxresults Specifies the maximum number of containers to return. If the request does not specify + * maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the + * listing operation crosses a partition boundary, then the service will return a continuation token for retrieving + * the remainder of the results. For this reason, it is possible that the service will return fewer results than + * specified by maxresults, or than the default of 5000. + * @param include Include this parameter to specify one or more datasets to include in the response. + * @param timeout The timeout parameter is expressed in seconds. For more information, see <a + * href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting + * Timeouts for Blob Service Operations.</a>. + * @param startFrom Specifies the relative path to list paths from. For non-recursive list, only one entity level is + * supported; For recursive list, multiple entity levels are supported. (Inclusive). + * @param endBefore Specifies the relative path to end before list paths. (Exclusive). + * @param requestId Provides a client-generated, opaque value with a 1 KB character limit that is recorded in the + * analytics logs when storage analytics logging is enabled. + * @param context The context to associate with this operation. + * @throws IllegalArgumentException thrown if parameters fail the validation. + * @throws BlobStorageExceptionInternal thrown if the request is rejected by server. + * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent. + * @return the response body along with {@link Response}. + */ + @ServiceMethod(returns = ReturnType.SINGLE) + public Response listBlobHierarchySegmentApacheArrowNoCustomHeadersWithResponse(String containerName, + String delimiter, String prefix, String marker, Integer maxresults, List include, + Integer timeout, String startFrom, String endBefore, String requestId, Context context) { + try { + final String restype = "container"; + final String comp = "list"; + final String accept = "application/vnd.apache.arrow.stream,application/xml"; + String includeConverted = (include == null) + ? null + : include.stream() + .map(paramItemValue -> Objects.toString(paramItemValue, "")) + .collect(Collectors.joining(",")); + return service.listBlobHierarchySegmentApacheArrowNoCustomHeadersSync(this.client.getUrl(), containerName, + restype, comp, accept, prefix, delimiter, marker, maxresults, includeConverted, timeout, startFrom, + endBefore, this.client.getVersion(), requestId, context); } catch (BlobStorageExceptionInternal internalException) { throw ModelHelper.mapToBlobStorageException(internalException); } diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/models/BlobListArrowParseException.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/models/BlobListArrowParseException.java new file mode 100644 index 000000000000..6d2465d26bff --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/models/BlobListArrowParseException.java @@ -0,0 +1,30 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.models; + +/** + * Exception thrown when parsing Arrow ListBlobs payloads fails or encounters unsupported content. + */ +public final class BlobListArrowParseException extends RuntimeException { + private static final long serialVersionUID = 1L; + + /** + * Creates an exception with a message. + * + * @param message parse failure details. + */ + public BlobListArrowParseException(String message) { + super(message); + } + + /** + * Creates an exception with a message and cause. + * + * @param message parse failure details. + * @param cause originating cause. + */ + public BlobListArrowParseException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/models/BlobsDownloadHeaders.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/models/BlobsDownloadHeaders.java index 7dd424fe666c..1d409a5a4cdd 100644 --- a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/models/BlobsDownloadHeaders.java +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/models/BlobsDownloadHeaders.java @@ -295,30 +295,6 @@ public final class BlobsDownloadHeaders { @Generated private Long xMsStructuredContentLength; - /* - * The x-ms-access-tier property. - */ - @Generated - private String xMsAccessTier; - - /* - * The x-ms-access-tier-inferred property. - */ - @Generated - private Boolean xMsAccessTierInferred; - - /* - * The x-ms-access-tier-change-time property. - */ - @Generated - private DateTimeRfc1123 xMsAccessTierChangeTime; - - /* - * The x-ms-smart-access-tier property. - */ - @Generated - private String xMsSmartAccessTier; - /* * The x-ms-content-crc64 property. */ @@ -391,16 +367,6 @@ public final class BlobsDownloadHeaders { private static final HttpHeaderName X_MS_STRUCTURED_CONTENT_LENGTH = HttpHeaderName.fromString("x-ms-structured-content-length"); - private static final HttpHeaderName X_MS_ACCESS_TIER = HttpHeaderName.fromString("x-ms-access-tier"); - - private static final HttpHeaderName X_MS_ACCESS_TIER_INFERRED - = HttpHeaderName.fromString("x-ms-access-tier-inferred"); - - private static final HttpHeaderName X_MS_ACCESS_TIER_CHANGE_TIME - = HttpHeaderName.fromString("x-ms-access-tier-change-time"); - - private static final HttpHeaderName X_MS_SMART_ACCESS_TIER = HttpHeaderName.fromString("x-ms-smart-access-tier"); - private static final HttpHeaderName X_MS_CONTENT_CRC64 = HttpHeaderName.fromString("x-ms-content-crc64"); // HttpHeaders containing the raw property values. @@ -563,20 +529,6 @@ public BlobsDownloadHeaders(HttpHeaders rawHeaders) { } else { this.xMsStructuredContentLength = null; } - this.xMsAccessTier = rawHeaders.getValue(X_MS_ACCESS_TIER); - String xMsAccessTierInferred = rawHeaders.getValue(X_MS_ACCESS_TIER_INFERRED); - if (xMsAccessTierInferred != null) { - this.xMsAccessTierInferred = Boolean.parseBoolean(xMsAccessTierInferred); - } else { - this.xMsAccessTierInferred = null; - } - String xMsAccessTierChangeTime = rawHeaders.getValue(X_MS_ACCESS_TIER_CHANGE_TIME); - if (xMsAccessTierChangeTime != null) { - this.xMsAccessTierChangeTime = new DateTimeRfc1123(xMsAccessTierChangeTime); - } else { - this.xMsAccessTierChangeTime = null; - } - this.xMsSmartAccessTier = rawHeaders.getValue(X_MS_SMART_ACCESS_TIER); String xMsContentCrc64 = rawHeaders.getValue(X_MS_CONTENT_CRC64); if (xMsContentCrc64 != null) { this.xMsContentCrc64 = Base64.getDecoder().decode(xMsContentCrc64); @@ -1632,101 +1584,6 @@ public BlobsDownloadHeaders setXMsStructuredContentLength(Long xMsStructuredCont return this; } - /** - * Get the xMsAccessTier property: The x-ms-access-tier property. - * - * @return the xMsAccessTier value. - */ - @Generated - public String getXMsAccessTier() { - return this.xMsAccessTier; - } - - /** - * Set the xMsAccessTier property: The x-ms-access-tier property. - * - * @param xMsAccessTier the xMsAccessTier value to set. - * @return the BlobsDownloadHeaders object itself. - */ - @Generated - public BlobsDownloadHeaders setXMsAccessTier(String xMsAccessTier) { - this.xMsAccessTier = xMsAccessTier; - return this; - } - - /** - * Get the xMsAccessTierInferred property: The x-ms-access-tier-inferred property. - * - * @return the xMsAccessTierInferred value. - */ - @Generated - public Boolean isXMsAccessTierInferred() { - return this.xMsAccessTierInferred; - } - - /** - * Set the xMsAccessTierInferred property: The x-ms-access-tier-inferred property. - * - * @param xMsAccessTierInferred the xMsAccessTierInferred value to set. - * @return the BlobsDownloadHeaders object itself. - */ - @Generated - public BlobsDownloadHeaders setXMsAccessTierInferred(Boolean xMsAccessTierInferred) { - this.xMsAccessTierInferred = xMsAccessTierInferred; - return this; - } - - /** - * Get the xMsAccessTierChangeTime property: The x-ms-access-tier-change-time property. - * - * @return the xMsAccessTierChangeTime value. - */ - @Generated - public OffsetDateTime getXMsAccessTierChangeTime() { - if (this.xMsAccessTierChangeTime == null) { - return null; - } - return this.xMsAccessTierChangeTime.getDateTime(); - } - - /** - * Set the xMsAccessTierChangeTime property: The x-ms-access-tier-change-time property. - * - * @param xMsAccessTierChangeTime the xMsAccessTierChangeTime value to set. - * @return the BlobsDownloadHeaders object itself. - */ - @Generated - public BlobsDownloadHeaders setXMsAccessTierChangeTime(OffsetDateTime xMsAccessTierChangeTime) { - if (xMsAccessTierChangeTime == null) { - this.xMsAccessTierChangeTime = null; - } else { - this.xMsAccessTierChangeTime = new DateTimeRfc1123(xMsAccessTierChangeTime); - } - return this; - } - - /** - * Get the xMsSmartAccessTier property: The x-ms-smart-access-tier property. - * - * @return the xMsSmartAccessTier value. - */ - @Generated - public String getXMsSmartAccessTier() { - return this.xMsSmartAccessTier; - } - - /** - * Set the xMsSmartAccessTier property: The x-ms-smart-access-tier property. - * - * @param xMsSmartAccessTier the xMsSmartAccessTier value to set. - * @return the BlobsDownloadHeaders object itself. - */ - @Generated - public BlobsDownloadHeaders setXMsSmartAccessTier(String xMsSmartAccessTier) { - this.xMsSmartAccessTier = xMsSmartAccessTier; - return this; - } - /** * Get the xMsContentCrc64 property: The x-ms-content-crc64 property. * diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/models/ContainersListBlobFlatSegmentApacheArrowHeaders.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/models/ContainersListBlobFlatSegmentApacheArrowHeaders.java new file mode 100644 index 000000000000..d137f165b8de --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/models/ContainersListBlobFlatSegmentApacheArrowHeaders.java @@ -0,0 +1,186 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// Code generated by Microsoft (R) AutoRest Code Generator. + +package com.azure.storage.blob.implementation.models; + +import com.azure.core.annotation.Fluent; +import com.azure.core.annotation.Generated; +import com.azure.core.http.HttpHeaderName; +import com.azure.core.http.HttpHeaders; +import com.azure.core.util.DateTimeRfc1123; +import java.time.OffsetDateTime; + +/** + * The ContainersListBlobFlatSegmentApacheArrowHeaders model. + */ +@Fluent +public final class ContainersListBlobFlatSegmentApacheArrowHeaders { + /* + * The Content-Type property. + */ + @Generated + private String contentType; + + /* + * The x-ms-client-request-id property. + */ + @Generated + private String xMsClientRequestId; + + /* + * The x-ms-request-id property. + */ + @Generated + private String xMsRequestId; + + /* + * The x-ms-version property. + */ + @Generated + private String xMsVersion; + + /* + * The Date property. + */ + @Generated + private DateTimeRfc1123 date; + + private static final HttpHeaderName X_MS_VERSION = HttpHeaderName.fromString("x-ms-version"); + + // HttpHeaders containing the raw property values. + /** + * Creates an instance of ContainersListBlobFlatSegmentApacheArrowHeaders class. + * + * @param rawHeaders The raw HttpHeaders that will be used to create the property values. + */ + public ContainersListBlobFlatSegmentApacheArrowHeaders(HttpHeaders rawHeaders) { + this.contentType = rawHeaders.getValue(HttpHeaderName.CONTENT_TYPE); + this.xMsClientRequestId = rawHeaders.getValue(HttpHeaderName.X_MS_CLIENT_REQUEST_ID); + this.xMsRequestId = rawHeaders.getValue(HttpHeaderName.X_MS_REQUEST_ID); + this.xMsVersion = rawHeaders.getValue(X_MS_VERSION); + String date = rawHeaders.getValue(HttpHeaderName.DATE); + if (date != null) { + this.date = new DateTimeRfc1123(date); + } else { + this.date = null; + } + } + + /** + * Get the contentType property: The Content-Type property. + * + * @return the contentType value. + */ + @Generated + public String getContentType() { + return this.contentType; + } + + /** + * Set the contentType property: The Content-Type property. + * + * @param contentType the contentType value to set. + * @return the ContainersListBlobFlatSegmentApacheArrowHeaders object itself. + */ + @Generated + public ContainersListBlobFlatSegmentApacheArrowHeaders setContentType(String contentType) { + this.contentType = contentType; + return this; + } + + /** + * Get the xMsClientRequestId property: The x-ms-client-request-id property. + * + * @return the xMsClientRequestId value. + */ + @Generated + public String getXMsClientRequestId() { + return this.xMsClientRequestId; + } + + /** + * Set the xMsClientRequestId property: The x-ms-client-request-id property. + * + * @param xMsClientRequestId the xMsClientRequestId value to set. + * @return the ContainersListBlobFlatSegmentApacheArrowHeaders object itself. + */ + @Generated + public ContainersListBlobFlatSegmentApacheArrowHeaders setXMsClientRequestId(String xMsClientRequestId) { + this.xMsClientRequestId = xMsClientRequestId; + return this; + } + + /** + * Get the xMsRequestId property: The x-ms-request-id property. + * + * @return the xMsRequestId value. + */ + @Generated + public String getXMsRequestId() { + return this.xMsRequestId; + } + + /** + * Set the xMsRequestId property: The x-ms-request-id property. + * + * @param xMsRequestId the xMsRequestId value to set. + * @return the ContainersListBlobFlatSegmentApacheArrowHeaders object itself. + */ + @Generated + public ContainersListBlobFlatSegmentApacheArrowHeaders setXMsRequestId(String xMsRequestId) { + this.xMsRequestId = xMsRequestId; + return this; + } + + /** + * Get the xMsVersion property: The x-ms-version property. + * + * @return the xMsVersion value. + */ + @Generated + public String getXMsVersion() { + return this.xMsVersion; + } + + /** + * Set the xMsVersion property: The x-ms-version property. + * + * @param xMsVersion the xMsVersion value to set. + * @return the ContainersListBlobFlatSegmentApacheArrowHeaders object itself. + */ + @Generated + public ContainersListBlobFlatSegmentApacheArrowHeaders setXMsVersion(String xMsVersion) { + this.xMsVersion = xMsVersion; + return this; + } + + /** + * Get the date property: The Date property. + * + * @return the date value. + */ + @Generated + public OffsetDateTime getDate() { + if (this.date == null) { + return null; + } + return this.date.getDateTime(); + } + + /** + * Set the date property: The Date property. + * + * @param date the date value to set. + * @return the ContainersListBlobFlatSegmentApacheArrowHeaders object itself. + */ + @Generated + public ContainersListBlobFlatSegmentApacheArrowHeaders setDate(OffsetDateTime date) { + if (date == null) { + this.date = null; + } else { + this.date = new DateTimeRfc1123(date); + } + return this; + } +} diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/models/ContainersListBlobHierarchySegmentApacheArrowHeaders.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/models/ContainersListBlobHierarchySegmentApacheArrowHeaders.java new file mode 100644 index 000000000000..8c18650a8be5 --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/models/ContainersListBlobHierarchySegmentApacheArrowHeaders.java @@ -0,0 +1,186 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// Code generated by Microsoft (R) AutoRest Code Generator. + +package com.azure.storage.blob.implementation.models; + +import com.azure.core.annotation.Fluent; +import com.azure.core.annotation.Generated; +import com.azure.core.http.HttpHeaderName; +import com.azure.core.http.HttpHeaders; +import com.azure.core.util.DateTimeRfc1123; +import java.time.OffsetDateTime; + +/** + * The ContainersListBlobHierarchySegmentApacheArrowHeaders model. + */ +@Fluent +public final class ContainersListBlobHierarchySegmentApacheArrowHeaders { + /* + * The Content-Type property. + */ + @Generated + private String contentType; + + /* + * The x-ms-client-request-id property. + */ + @Generated + private String xMsClientRequestId; + + /* + * The x-ms-request-id property. + */ + @Generated + private String xMsRequestId; + + /* + * The x-ms-version property. + */ + @Generated + private String xMsVersion; + + /* + * The Date property. + */ + @Generated + private DateTimeRfc1123 date; + + private static final HttpHeaderName X_MS_VERSION = HttpHeaderName.fromString("x-ms-version"); + + // HttpHeaders containing the raw property values. + /** + * Creates an instance of ContainersListBlobHierarchySegmentApacheArrowHeaders class. + * + * @param rawHeaders The raw HttpHeaders that will be used to create the property values. + */ + public ContainersListBlobHierarchySegmentApacheArrowHeaders(HttpHeaders rawHeaders) { + this.contentType = rawHeaders.getValue(HttpHeaderName.CONTENT_TYPE); + this.xMsClientRequestId = rawHeaders.getValue(HttpHeaderName.X_MS_CLIENT_REQUEST_ID); + this.xMsRequestId = rawHeaders.getValue(HttpHeaderName.X_MS_REQUEST_ID); + this.xMsVersion = rawHeaders.getValue(X_MS_VERSION); + String date = rawHeaders.getValue(HttpHeaderName.DATE); + if (date != null) { + this.date = new DateTimeRfc1123(date); + } else { + this.date = null; + } + } + + /** + * Get the contentType property: The Content-Type property. + * + * @return the contentType value. + */ + @Generated + public String getContentType() { + return this.contentType; + } + + /** + * Set the contentType property: The Content-Type property. + * + * @param contentType the contentType value to set. + * @return the ContainersListBlobHierarchySegmentApacheArrowHeaders object itself. + */ + @Generated + public ContainersListBlobHierarchySegmentApacheArrowHeaders setContentType(String contentType) { + this.contentType = contentType; + return this; + } + + /** + * Get the xMsClientRequestId property: The x-ms-client-request-id property. + * + * @return the xMsClientRequestId value. + */ + @Generated + public String getXMsClientRequestId() { + return this.xMsClientRequestId; + } + + /** + * Set the xMsClientRequestId property: The x-ms-client-request-id property. + * + * @param xMsClientRequestId the xMsClientRequestId value to set. + * @return the ContainersListBlobHierarchySegmentApacheArrowHeaders object itself. + */ + @Generated + public ContainersListBlobHierarchySegmentApacheArrowHeaders setXMsClientRequestId(String xMsClientRequestId) { + this.xMsClientRequestId = xMsClientRequestId; + return this; + } + + /** + * Get the xMsRequestId property: The x-ms-request-id property. + * + * @return the xMsRequestId value. + */ + @Generated + public String getXMsRequestId() { + return this.xMsRequestId; + } + + /** + * Set the xMsRequestId property: The x-ms-request-id property. + * + * @param xMsRequestId the xMsRequestId value to set. + * @return the ContainersListBlobHierarchySegmentApacheArrowHeaders object itself. + */ + @Generated + public ContainersListBlobHierarchySegmentApacheArrowHeaders setXMsRequestId(String xMsRequestId) { + this.xMsRequestId = xMsRequestId; + return this; + } + + /** + * Get the xMsVersion property: The x-ms-version property. + * + * @return the xMsVersion value. + */ + @Generated + public String getXMsVersion() { + return this.xMsVersion; + } + + /** + * Set the xMsVersion property: The x-ms-version property. + * + * @param xMsVersion the xMsVersion value to set. + * @return the ContainersListBlobHierarchySegmentApacheArrowHeaders object itself. + */ + @Generated + public ContainersListBlobHierarchySegmentApacheArrowHeaders setXMsVersion(String xMsVersion) { + this.xMsVersion = xMsVersion; + return this; + } + + /** + * Get the date property: The Date property. + * + * @return the date value. + */ + @Generated + public OffsetDateTime getDate() { + if (this.date == null) { + return null; + } + return this.date.getDateTime(); + } + + /** + * Set the date property: The Date property. + * + * @param date the date value to set. + * @return the ContainersListBlobHierarchySegmentApacheArrowHeaders object itself. + */ + @Generated + public ContainersListBlobHierarchySegmentApacheArrowHeaders setDate(OffsetDateTime date) { + if (date == null) { + this.date = null; + } else { + this.date = new DateTimeRfc1123(date); + } + return this; + } +} diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/ArrowBlobListDeserializer.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/ArrowBlobListDeserializer.java new file mode 100644 index 000000000000..58f661a81b14 --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/ArrowBlobListDeserializer.java @@ -0,0 +1,336 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util; + +import com.azure.storage.blob.implementation.models.BlobItemInternal; +import com.azure.storage.blob.implementation.models.BlobItemPropertiesInternal; +import com.azure.storage.blob.implementation.models.BlobListArrowParseException; +import com.azure.storage.blob.implementation.models.BlobName; +import com.azure.storage.blob.implementation.util.BlobListArrowStreamReader.Batch; +import com.azure.storage.blob.implementation.util.BlobListArrowStreamReader.BoolColumn; +import com.azure.storage.blob.implementation.util.BlobListArrowStreamReader.Column; +import com.azure.storage.blob.implementation.util.BlobListArrowStreamReader.IntColumn; +import com.azure.storage.blob.implementation.util.BlobListArrowStreamReader.MapColumn; +import com.azure.storage.blob.implementation.util.BlobListArrowStreamReader.Parsed; +import com.azure.storage.blob.implementation.util.BlobListArrowStreamReader.StringColumn; +import com.azure.storage.blob.implementation.util.BlobListArrowStreamReader.TimestampColumn; +import com.azure.storage.blob.models.AccessTier; +import com.azure.storage.blob.models.ArchiveStatus; +import com.azure.storage.blob.models.BlobImmutabilityPolicyMode; +import com.azure.storage.blob.models.BlobType; +import com.azure.storage.blob.models.CopyStatusType; +import com.azure.storage.blob.models.LeaseDurationType; +import com.azure.storage.blob.models.LeaseStateType; +import com.azure.storage.blob.models.LeaseStatusType; +import com.azure.storage.blob.models.RehydratePriority; + +import java.io.InputStream; +import java.time.Instant; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.util.ArrayList; +import java.util.Base64; +import java.util.List; +import java.util.Map; + +/** + * Deserializes an Apache Arrow IPC stream from the ListBlobs response into a list of {@link BlobItemInternal} objects. + */ +public final class ArrowBlobListDeserializer { + + /** + * Result of deserializing an Arrow ListBlobs response. + */ + public static final class ArrowListBlobsResult { + private final List blobItems; + private final String nextMarker; + private final Integer numberOfRecords; + + /** + * Creates an ArrowListBlobsResult. + * + * @param blobItems the deserialized blob items + * @param nextMarker the continuation token for the next page, or null if this is the last page + * @param numberOfRecords the total number of records reported by the service, or null if not present + */ + public ArrowListBlobsResult(List blobItems, String nextMarker, Integer numberOfRecords) { + this.blobItems = blobItems; + this.nextMarker = nextMarker; + this.numberOfRecords = numberOfRecords; + } + + /** + * @return the deserialized blob items + */ + public List getBlobItems() { + return blobItems; + } + + /** + * @return the continuation token for the next page, or null if this is the last page + */ + public String getNextMarker() { + return nextMarker; + } + + /** + * @return the total number of records reported by the service, or null if not present + */ + public Integer getNumberOfRecords() { + return numberOfRecords; + } + } + + private ArrowBlobListDeserializer() { + } + + /** + * Deserializes an Arrow IPC stream into blob items and pagination metadata. + * + * @param arrowStream the Arrow IPC input stream from the service response + * @return the deserialized result containing blob items and next marker + * @throws BlobListArrowParseException if deserialization fails + */ + public static ArrowListBlobsResult deserialize(InputStream arrowStream) { + if (arrowStream == null) { + throw new BlobListArrowParseException("ListBlobs Arrow parse failure: input stream is null."); + } + + List results = new ArrayList<>(); + String nextMarker = null; + Integer numberOfRecords = null; + + Parsed parsed = BlobListArrowStreamReader.read(arrowStream); + + Map schemaMetadata = parsed.getSchemaMetadata(); + if (schemaMetadata != null) { + nextMarker = schemaMetadata.get("NextMarker"); + if (nextMarker != null && nextMarker.isEmpty()) { + nextMarker = null; + } + + String numberOfRecordsStr = schemaMetadata.get("NumberOfRecords"); + if (numberOfRecordsStr != null && !numberOfRecordsStr.isEmpty()) { + try { + numberOfRecords = Integer.parseInt(numberOfRecordsStr); + } catch (NumberFormatException e) { + throw new BlobListArrowParseException( + "ListBlobs Arrow parse failure: schema metadata 'NumberOfRecords' isn't a valid integer.", e); + } + } + } + + for (Batch batch : parsed.getBatches()) { + int rowCount = batch.getRowCount(); + for (int i = 0; i < rowCount; i++) { + results.add(readRow(batch, i)); + } + } + + return new ArrowListBlobsResult(results, nextMarker, numberOfRecords); + } + + private static BlobItemInternal readRow(Batch batch, int index) { + BlobItemInternal item = new BlobItemInternal(); + + String name = getVarChar(batch, "Name", index); + if (name != null) { + item.setName(new BlobName().setContent(name)); + } + + String resourceType = getVarChar(batch, "ResourceType", index); + if ("blobprefix".equals(resourceType)) { + item.setIsPrefix(true); + return item; + } + + BlobItemPropertiesInternal properties = new BlobItemPropertiesInternal(); + + Boolean deleted = getBit(batch, "Deleted", index); + if (deleted != null) { + item.setDeleted(deleted); + } + + item.setSnapshot(getVarChar(batch, "Snapshot", index)); + item.setVersionId(getVarChar(batch, "VersionId", index)); + item.setIsCurrentVersion(getBit(batch, "IsCurrentVersion", index)); + item.setHasVersionsOnly(getBit(batch, "HasVersionsOnly", index)); + + Map metadata = getMap(batch, "Metadata", index); + if (metadata != null) { + item.setMetadata(metadata); + } + + Map orMetadata = getMap(batch, "OrMetadata", index); + if (orMetadata != null) { + item.setObjectReplicationMetadata(orMetadata); + } + + Map tags = getMap(batch, "Tags", index); + if (tags != null) { + item.setBlobTags(ModelHelper.toBlobTags(tags)); + } + + properties.setCreationTime(getTimestamp(batch, "Creation-Time", index)); + properties.setLastModified(getTimestamp(batch, "Last-Modified", index)); + properties.setETag(getVarChar(batch, "Etag", index)); + properties.setContentLength(getUInt64(batch, "Content-Length", index)); + properties.setContentType(getVarChar(batch, "Content-Type", index)); + properties.setContentEncoding(getVarChar(batch, "Content-Encoding", index)); + properties.setContentLanguage(getVarChar(batch, "Content-Language", index)); + properties.setContentDisposition(getVarChar(batch, "Content-Disposition", index)); + properties.setCacheControl(getVarChar(batch, "Cache-Control", index)); + + String contentMd5 = getVarChar(batch, "Content-MD5", index); + if (contentMd5 != null) { + properties.setContentMd5(Base64.getDecoder().decode(contentMd5)); + } + + String blobType = getVarChar(batch, "BlobType", index); + if (blobType != null) { + properties.setBlobType(BlobType.fromString(blobType)); + } + + String accessTier = getVarChar(batch, "AccessTier", index); + if (accessTier != null) { + properties.setAccessTier(AccessTier.fromString(accessTier)); + } + properties.setAccessTierInferred(getBit(batch, "AccessTierInferred", index)); + properties.setAccessTierChangeTime(getTimestamp(batch, "AccessTierChangeTime", index)); + + String smartAccessTier = getVarChar(batch, "SmartAccessTier", index); + if (smartAccessTier != null) { + properties.setSmartAccessTier(AccessTier.fromString(smartAccessTier)); + } + + String leaseStatus = getVarChar(batch, "LeaseStatus", index); + if (leaseStatus != null) { + properties.setLeaseStatus(LeaseStatusType.fromString(leaseStatus)); + } + String leaseState = getVarChar(batch, "LeaseState", index); + if (leaseState != null) { + properties.setLeaseState(LeaseStateType.fromString(leaseState)); + } + String leaseDuration = getVarChar(batch, "LeaseDuration", index); + if (leaseDuration != null) { + properties.setLeaseDuration(LeaseDurationType.fromString(leaseDuration)); + } + + properties.setServerEncrypted(getBit(batch, "ServerEncrypted", index)); + properties.setCustomerProvidedKeySha256(getVarChar(batch, "CustomerProvidedKeySha256", index)); + properties.setEncryptionScope(getVarChar(batch, "EncryptionScope", index)); + properties.setIncrementalCopy(getBit(batch, "IncrementalCopy", index)); + + properties.setCopyId(getVarChar(batch, "CopyId", index)); + String copyStatus = getVarChar(batch, "CopyStatus", index); + if (copyStatus != null) { + properties.setCopyStatus(CopyStatusType.fromString(copyStatus)); + } + properties.setCopySource(getVarChar(batch, "CopySource", index)); + properties.setCopyProgress(getVarChar(batch, "CopyProgress", index)); + properties.setCopyCompletionTime(getTimestamp(batch, "CopyCompletionTime", index)); + properties.setCopyStatusDescription(getVarChar(batch, "CopyStatusDescription", index)); + properties.setDestinationSnapshot(getVarChar(batch, "CopyDestinationSnapshot", index)); + + properties.setBlobSequenceNumber(getUInt64(batch, "x-ms-blob-sequence-number", index)); + + properties.setIsSealed(getBit(batch, "Sealed", index)); + properties.setLegalHold(getBit(batch, "LegalHold", index)); + properties.setDeletedTime(getTimestamp(batch, "DeletedTime", index)); + properties.setLastAccessedOn(getTimestamp(batch, "LastAccessTime", index)); + properties.setImmutabilityPolicyExpiresOn(getTimestamp(batch, "ImmutabilityPolicyUntilDate", index)); + + String immutabilityMode = getVarChar(batch, "ImmutabilityPolicyMode", index); + if (immutabilityMode != null) { + properties.setImmutabilityPolicyMode(BlobImmutabilityPolicyMode.fromString(immutabilityMode)); + } + + String archiveStatus = getVarChar(batch, "ArchiveStatus", index); + if (archiveStatus != null) { + properties.setArchiveStatus(ArchiveStatus.fromString(archiveStatus)); + } + + String rehydratePriority = getVarChar(batch, "RehydratePriority", index); + if (rehydratePriority != null) { + properties.setRehydratePriority(RehydratePriority.fromString(rehydratePriority)); + } + + Long tagCount = getUInt64(batch, "TagCount", index); + if (tagCount != null) { + properties.setTagCount(tagCount.intValue()); + } + Long remainingRetentionDays = getUInt64(batch, "RemainingRetentionDays", index); + if (remainingRetentionDays != null) { + properties.setRemainingRetentionDays(remainingRetentionDays.intValue()); + } + + item.setProperties(properties); + return item; + } + + // region Arrow helpers + + private static String getVarChar(Batch batch, String name, int index) { + Column column = batch.getColumn(name); + if (column == null || column.isNull(index)) { + return null; + } + if (!(column instanceof StringColumn)) { + throw new BlobListArrowParseException("ListBlobs Arrow parse failure: field '" + name + + "' has unsupported string column type '" + column.getClass().getSimpleName() + "'."); + } + return ((StringColumn) column).get(index); + } + + private static Long getUInt64(Batch batch, String name, int index) { + Column column = batch.getColumn(name); + if (column == null || column.isNull(index)) { + return null; + } + if (!(column instanceof IntColumn)) { + throw new BlobListArrowParseException("ListBlobs Arrow parse failure: field '" + name + + "' has unsupported integer column type '" + column.getClass().getSimpleName() + "'."); + } + return ((IntColumn) column).get(index); + } + + private static Boolean getBit(Batch batch, String name, int index) { + Column column = batch.getColumn(name); + if (column == null || column.isNull(index)) { + return null; + } + if (!(column instanceof BoolColumn)) { + throw new BlobListArrowParseException("ListBlobs Arrow parse failure: field '" + name + + "' has unsupported boolean column type '" + column.getClass().getSimpleName() + "'."); + } + return ((BoolColumn) column).get(index); + } + + private static OffsetDateTime getTimestamp(Batch batch, String name, int index) { + Column column = batch.getColumn(name); + if (column == null || column.isNull(index)) { + return null; + } + if (!(column instanceof TimestampColumn)) { + throw new BlobListArrowParseException("ListBlobs Arrow parse failure: field '" + name + + "' has unsupported timestamp column type '" + column.getClass().getSimpleName() + "'."); + } + long epochSeconds = ((TimestampColumn) column).getEpochSeconds(index); + return OffsetDateTime.ofInstant(Instant.ofEpochSecond(epochSeconds), ZoneOffset.UTC); + } + + private static Map getMap(Batch batch, String name, int index) { + Column column = batch.getColumn(name); + if (column == null || column.isNull(index)) { + return null; + } + if (!(column instanceof MapColumn)) { + throw new BlobListArrowParseException("ListBlobs Arrow parse failure: field '" + name + + "' has unsupported map column type '" + column.getClass().getSimpleName() + "'."); + } + return ((MapColumn) column).get(index); + } + + //endregion +} diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/BlobListArrowStreamReader.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/BlobListArrowStreamReader.java new file mode 100644 index 000000000000..c1fb68b7c5ec --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/BlobListArrowStreamReader.java @@ -0,0 +1,554 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util; + +import com.azure.storage.blob.implementation.models.BlobListArrowParseException; +import com.azure.storage.blob.implementation.util.arrow.Buffer; +import com.azure.storage.blob.implementation.util.arrow.Endianness; +import com.azure.storage.blob.implementation.util.arrow.Field; +import com.azure.storage.blob.implementation.util.arrow.FieldNode; +import com.azure.storage.blob.implementation.util.arrow.Int; +import com.azure.storage.blob.implementation.util.arrow.KeyValue; +import com.azure.storage.blob.implementation.util.arrow.Message; +import com.azure.storage.blob.implementation.util.arrow.MessageHeader; +import com.azure.storage.blob.implementation.util.arrow.RecordBatch; +import com.azure.storage.blob.implementation.util.arrow.Schema; +import com.azure.storage.blob.implementation.util.arrow.TimeUnit; +import com.azure.storage.blob.implementation.util.arrow.Timestamp; +import com.azure.storage.blob.implementation.util.arrow.Type; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * Minimal Apache Arrow IPC stream reader scoped to the needs of the ListBlobs Arrow response. + *

+ * This reader intentionally supports only the subset of the Arrow IPC format emitted by the Storage ListBlobs + * endpoint: a single schema message followed by zero or more uncompressed, little-endian record batches whose + * columns are UTF-8 strings, booleans, integers, second-precision timestamps, and map<string,string>. Anything + * outside that subset (dictionaries, compression, big-endian, unsupported types) fails fast with + * {@link BlobListArrowParseException}. + *

+ * It depends only on the {@code arrow-format} flatbuffer definitions for metadata decoding and reads record batch + * bodies directly, so it does not require the {@code arrow-vector} runtime. + */ +final class BlobListArrowStreamReader { + + private static final int CONTINUATION_MARKER = 0xFFFFFFFF; + + private BlobListArrowStreamReader() { + } + + /** + * The decoded contents of an Arrow IPC stream. + */ + static final class Parsed { + private final Map schemaMetadata; + private final List batches; + + Parsed(Map schemaMetadata, List batches) { + this.schemaMetadata = schemaMetadata; + this.batches = batches; + } + + Map getSchemaMetadata() { + return schemaMetadata; + } + + List getBatches() { + return batches; + } + } + + /** + * A single decoded record batch: a row count and columns addressable by field name. + */ + static final class Batch { + private final int rowCount; + private final Map columns; + + Batch(int rowCount, Map columns) { + this.rowCount = rowCount; + this.columns = columns; + } + + int getRowCount() { + return rowCount; + } + + Column getColumn(String name) { + return columns.get(name); + } + } + + /** + * Reads and decodes an Arrow IPC stream. + * + * @param stream the Arrow IPC stream. + * @return the decoded schema metadata and record batches. + * @throws BlobListArrowParseException if the stream is malformed or uses an unsupported feature. + */ + static Parsed read(InputStream stream) { + byte[] bytes; + try { + bytes = readAll(stream); + } catch (IOException e) { + throw new BlobListArrowParseException("ListBlobs Arrow parse failure: unable to read IPC stream.", e); + } + + ByteBuffer body = ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN); + + Map schemaMetadata = null; + List fields = null; + List batches = new ArrayList<>(); + + int pos = 0; + int length = bytes.length; + while (pos + 4 <= length) { + int marker = body.getInt(pos); + pos += 4; + + int metadataLength; + if (marker == CONTINUATION_MARKER) { + if (pos + 4 > length) { + break; + } + metadataLength = body.getInt(pos); + pos += 4; + } else { + // Pre-0.15 streams used a bare length prefix without the continuation marker. + metadataLength = marker; + } + + if (metadataLength == 0) { + // End-of-stream marker. + break; + } + if (metadataLength < 0 || pos + metadataLength > length) { + throw new BlobListArrowParseException( + "ListBlobs Arrow parse failure: message metadata length is out of bounds."); + } + + ByteBuffer messageBuffer + = ByteBuffer.wrap(bytes, pos, metadataLength).slice().order(ByteOrder.LITTLE_ENDIAN); + Message message = Message.getRootAsMessage(messageBuffer); + pos += metadataLength; + + long bodyLength = message.bodyLength(); + if (bodyLength < 0 || pos + bodyLength > length) { + throw new BlobListArrowParseException( + "ListBlobs Arrow parse failure: message body length is out of bounds."); + } + int bodyStart = pos; + pos += (int) bodyLength; + + byte headerType = message.headerType(); + if (headerType == MessageHeader.SCHEMA) { + Schema schema = (Schema) message.header(new Schema()); + if (schema == null) { + throw new BlobListArrowParseException( + "ListBlobs Arrow parse failure: schema message header is missing."); + } + if (schema.endianness() != Endianness.LITTLE) { + throw new BlobListArrowParseException( + "ListBlobs Arrow parse failure: only little-endian streams are supported."); + } + schemaMetadata = readKeyValueMetadata(schema); + fields = readFields(schema); + } else if (headerType == MessageHeader.RECORD_BATCH) { + if (fields == null) { + throw new BlobListArrowParseException( + "ListBlobs Arrow parse failure: record batch encountered before schema."); + } + RecordBatch recordBatch = (RecordBatch) message.header(new RecordBatch()); + if (recordBatch == null) { + throw new BlobListArrowParseException( + "ListBlobs Arrow parse failure: record batch message header is missing."); + } + if (recordBatch.compression() != null) { + throw new BlobListArrowParseException( + "ListBlobs Arrow parse failure: compressed record batches are not supported."); + } + batches.add(buildBatch(fields, recordBatch, body, bodyStart)); + } else if (headerType == MessageHeader.DICTIONARY_BATCH) { + throw new BlobListArrowParseException( + "ListBlobs Arrow parse failure: dictionary-encoded streams are not supported."); + } + // Other header types (Tensor, SparseTensor) are not expected and are ignored. + } + + if (fields == null) { + throw new BlobListArrowParseException("ListBlobs Arrow parse failure: stream contained no schema."); + } + + return new Parsed(schemaMetadata == null ? new HashMap<>() : schemaMetadata, batches); + } + + private static Batch buildBatch(List fields, RecordBatch recordBatch, ByteBuffer body, int bodyStart) { + BatchCursor cursor = new BatchCursor(recordBatch, body, bodyStart); + Map columns = new LinkedHashMap<>(); + for (ArrowField field : fields) { + columns.put(field.name, buildColumn(field, cursor)); + } + return new Batch((int) recordBatch.length(), columns); + } + + private static Column buildColumn(ArrowField field, BatchCursor cursor) { + FieldNode node = cursor.nextNode(); + int valueCount = (int) node.length(); + + switch (field.typeType) { + case Type.UTF8: + case Type.BINARY: { + BufferRegion validity = cursor.nextBuffer(); + BufferRegion offsets = cursor.nextBuffer(); + BufferRegion data = cursor.nextBuffer(); + return new StringColumn(valueCount, validity, offsets, data, cursor.body, cursor.bodyStart); + } + + case Type.BOOL: { + BufferRegion validity = cursor.nextBuffer(); + BufferRegion data = cursor.nextBuffer(); + return new BoolColumn(valueCount, validity, data, cursor.body, cursor.bodyStart); + } + + case Type.INT: { + BufferRegion validity = cursor.nextBuffer(); + BufferRegion data = cursor.nextBuffer(); + return new IntColumn(valueCount, validity, data, field.bitWidth, field.signed, cursor.body, + cursor.bodyStart); + } + + case Type.TIMESTAMP: { + BufferRegion validity = cursor.nextBuffer(); + BufferRegion data = cursor.nextBuffer(); + return new TimestampColumn(valueCount, validity, data, cursor.body, cursor.bodyStart); + } + + case Type.MAP: { + BufferRegion validity = cursor.nextBuffer(); + BufferRegion offsets = cursor.nextBuffer(); + // Map has a single Struct child ("entries") with key and value children. + ArrowField entries = field.children.get(0); + StructColumn struct = (StructColumn) buildColumn(entries, cursor); + Column keyColumn = struct.children.get(0); + Column valueColumn = struct.children.get(1); + if (!(keyColumn instanceof StringColumn) || !(valueColumn instanceof StringColumn)) { + throw new BlobListArrowParseException("ListBlobs Arrow parse failure: field '" + field.name + + "' map entries must be string keys and values."); + } + return new MapColumn(valueCount, validity, offsets, (StringColumn) keyColumn, + (StringColumn) valueColumn, cursor.body, cursor.bodyStart); + } + + case Type.STRUCT: { + cursor.nextBuffer(); // struct validity buffer + List children = new ArrayList<>(field.children.size()); + for (ArrowField child : field.children) { + children.add(buildColumn(child, cursor)); + } + return new StructColumn(children); + } + + default: + throw new BlobListArrowParseException("ListBlobs Arrow parse failure: field '" + field.name + + "' has unsupported Arrow type '" + Type.name(field.typeType) + "'."); + } + } + + private static List readFields(Schema schema) { + int count = schema.fieldsLength(); + List fields = new ArrayList<>(count); + for (int i = 0; i < count; i++) { + fields.add(readField(schema.fields(i))); + } + return fields; + } + + private static ArrowField readField(Field field) { + ArrowField arrowField = new ArrowField(); + arrowField.name = field.name(); + arrowField.typeType = field.typeType(); + + if (arrowField.typeType == Type.INT) { + Int intType = (Int) field.type(new Int()); + if (intType != null) { + arrowField.bitWidth = intType.bitWidth(); + arrowField.signed = intType.isSigned(); + } + } else if (arrowField.typeType == Type.TIMESTAMP) { + Timestamp timestamp = (Timestamp) field.type(new Timestamp()); + if (timestamp != null && timestamp.unit() != TimeUnit.SECOND) { + throw new BlobListArrowParseException("ListBlobs Arrow parse failure: field '" + arrowField.name + + "' uses an unsupported timestamp unit '" + TimeUnit.name(timestamp.unit()) + "'."); + } + } + + int childCount = field.childrenLength(); + arrowField.children = new ArrayList<>(childCount); + for (int i = 0; i < childCount; i++) { + arrowField.children.add(readField(field.children(i))); + } + return arrowField; + } + + private static Map readKeyValueMetadata(Schema schema) { + int count = schema.customMetadataLength(); + if (count == 0) { + return new HashMap<>(); + } + Map metadata = new HashMap<>(); + for (int i = 0; i < count; i++) { + KeyValue keyValue = schema.customMetadata(i); + metadata.put(keyValue.key(), keyValue.value()); + } + return metadata; + } + + private static byte[] readAll(InputStream stream) throws IOException { + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + byte[] chunk = new byte[8192]; + int read; + while ((read = stream.read(chunk)) != -1) { + buffer.write(chunk, 0, read); + } + return buffer.toByteArray(); + } + + /** + * A parsed schema field with the minimal type information required for decoding. + */ + private static final class ArrowField { + private String name; + private byte typeType; + private int bitWidth; + private boolean signed; + private List children = new ArrayList<>(); + } + + /** + * Sequentially hands out the field nodes and buffers of a record batch in pre-order. + */ + private static final class BatchCursor { + private final RecordBatch recordBatch; + private final ByteBuffer body; + private final int bodyStart; + private int nodeIndex; + private int bufferIndex; + + BatchCursor(RecordBatch recordBatch, ByteBuffer body, int bodyStart) { + this.recordBatch = recordBatch; + this.body = body; + this.bodyStart = bodyStart; + } + + FieldNode nextNode() { + if (nodeIndex >= recordBatch.nodesLength()) { + throw new BlobListArrowParseException( + "ListBlobs Arrow parse failure: record batch is missing expected field nodes."); + } + return recordBatch.nodes(nodeIndex++); + } + + BufferRegion nextBuffer() { + if (bufferIndex >= recordBatch.buffersLength()) { + throw new BlobListArrowParseException( + "ListBlobs Arrow parse failure: record batch is missing expected buffers."); + } + Buffer buffer = recordBatch.buffers(bufferIndex++); + return new BufferRegion(buffer.offset(), buffer.length()); + } + } + + /** + * Offset and length of a single buffer within the record batch body. + */ + private static final class BufferRegion { + private final long offset; + private final long length; + + BufferRegion(long offset, long length) { + this.offset = offset; + this.length = length; + } + } + + /** + * Base class for decoded columns. + */ + abstract static class Column { + final int valueCount; + final BufferRegion validity; + final ByteBuffer body; + final int bodyStart; + + Column(int valueCount, BufferRegion validity, ByteBuffer body, int bodyStart) { + this.valueCount = valueCount; + this.validity = validity; + this.body = body; + this.bodyStart = bodyStart; + } + + boolean isNull(int index) { + if (validity == null || validity.length == 0) { + return false; + } + int bytePosition = bodyStart + (int) validity.offset + (index >> 3); + int bit = (body.get(bytePosition) >> (index & 7)) & 1; + return bit == 0; + } + } + + /** + * UTF-8 string column (Arrow Utf8/Binary). + */ + static final class StringColumn extends Column { + private final BufferRegion offsets; + private final BufferRegion data; + + StringColumn(int valueCount, BufferRegion validity, BufferRegion offsets, BufferRegion data, ByteBuffer body, + int bodyStart) { + super(valueCount, validity, body, bodyStart); + this.offsets = offsets; + this.data = data; + } + + String get(int index) { + int start = body.getInt(bodyStart + (int) offsets.offset + index * 4); + int end = body.getInt(bodyStart + (int) offsets.offset + (index + 1) * 4); + int dataStart = bodyStart + (int) data.offset + start; + byte[] valueBytes = new byte[end - start]; + for (int i = 0; i < valueBytes.length; i++) { + valueBytes[i] = body.get(dataStart + i); + } + return new String(valueBytes, StandardCharsets.UTF_8); + } + } + + /** + * Boolean column stored as a bitmap (Arrow Bool). + */ + static final class BoolColumn extends Column { + private final BufferRegion data; + + BoolColumn(int valueCount, BufferRegion validity, BufferRegion data, ByteBuffer body, int bodyStart) { + super(valueCount, validity, body, bodyStart); + this.data = data; + } + + boolean get(int index) { + int bytePosition = bodyStart + (int) data.offset + (index >> 3); + int bit = (body.get(bytePosition) >> (index & 7)) & 1; + return bit == 1; + } + } + + /** + * Integer column (Arrow Int) of width 8/16/32/64, signed or unsigned, returned as a long. + */ + static final class IntColumn extends Column { + private final BufferRegion data; + private final int bitWidth; + private final boolean signed; + + IntColumn(int valueCount, BufferRegion validity, BufferRegion data, int bitWidth, boolean signed, + ByteBuffer body, int bodyStart) { + super(valueCount, validity, body, bodyStart); + this.data = data; + this.bitWidth = bitWidth; + this.signed = signed; + } + + long get(int index) { + int base = bodyStart + (int) data.offset; + switch (bitWidth) { + case 64: + return body.getLong(base + index * 8); + + case 32: { + int value = body.getInt(base + index * 4); + return signed ? value : (value & 0xFFFFFFFFL); + } + + case 16: { + short value = body.getShort(base + index * 2); + return signed ? value : (value & 0xFFFF); + } + + case 8: { + byte value = body.get(base + index); + return signed ? value : (value & 0xFF); + } + + default: + throw new BlobListArrowParseException( + "ListBlobs Arrow parse failure: unsupported integer bit width '" + bitWidth + "'."); + } + } + } + + /** + * Second-precision timestamp column (Arrow Timestamp, SECOND unit). + */ + static final class TimestampColumn extends Column { + private final BufferRegion data; + + TimestampColumn(int valueCount, BufferRegion validity, BufferRegion data, ByteBuffer body, int bodyStart) { + super(valueCount, validity, body, bodyStart); + this.data = data; + } + + long getEpochSeconds(int index) { + return body.getLong(bodyStart + (int) data.offset + index * 8); + } + } + + /** + * Struct column holding ordered child columns (used internally for map entries). + */ + static final class StructColumn extends Column { + private final List children; + + StructColumn(List children) { + super(0, null, null, 0); + this.children = children; + } + } + + /** + * Map<string,string> column (Arrow Map of Struct<key:utf8,value:utf8>). + */ + static final class MapColumn extends Column { + private final BufferRegion offsets; + private final StringColumn keys; + private final StringColumn values; + + MapColumn(int valueCount, BufferRegion validity, BufferRegion offsets, StringColumn keys, StringColumn values, + ByteBuffer body, int bodyStart) { + super(valueCount, validity, body, bodyStart); + this.offsets = offsets; + this.keys = keys; + this.values = values; + } + + Map get(int index) { + int start = body.getInt(bodyStart + (int) offsets.offset + index * 4); + int end = body.getInt(bodyStart + (int) offsets.offset + (index + 1) * 4); + Map map = new HashMap<>(); + for (int entry = start; entry < end; entry++) { + map.put(keys.get(entry), values.get(entry)); + } + return map.isEmpty() ? null : map; + } + } +} diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/ModelHelper.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/ModelHelper.java index cb859dfa595d..6bd5deb8cc55 100644 --- a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/ModelHelper.java +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/ModelHelper.java @@ -47,6 +47,7 @@ import com.azure.storage.blob.models.PageBlobCopyIncrementalRequestConditions; import com.azure.storage.blob.models.PageRange; import com.azure.storage.blob.models.ParallelTransferOptions; +import com.azure.storage.blob.models.StorageResponseSerializationFormat; import com.azure.storage.blob.models.TaggedBlobItem; import com.azure.storage.common.Utility; import com.azure.storage.common.implementation.Constants; @@ -85,6 +86,14 @@ public final class ModelHelper { */ public static final int PAGE_BYTES = 512; + /** + * The format that {@link StorageResponseSerializationFormat#AUTO} currently resolves to on the + * wire. Changing this constant is a behavioral change (and should be noted in the CHANGELOG) + * but it is not a public API change. + */ + private static final StorageResponseSerializationFormat DEFAULT_SERIALIZATION_FORMAT + = StorageResponseSerializationFormat.XML; + /** * Determines whether the passed authority is IP style, that is, it is of the format {@code :}. * @@ -663,6 +672,22 @@ public static BlobStorageException mapToBlobStorageException(BlobStorageExceptio internal.getResponse(), code, headerName), internal.getResponse(), internal.getValue()); } + /** + * Resolves a user-supplied {@link StorageResponseSerializationFormat} to the concrete value + * to send on the wire. Treats {@code null} and {@link StorageResponseSerializationFormat#AUTO} + * identically — both yield {@link #DEFAULT_SERIALIZATION_FORMAT}. + * + * @param format the format requested by the caller, or {@code null} if unset. + * @return the concrete {@link StorageResponseSerializationFormat} to send on the wire. + */ + public static StorageResponseSerializationFormat + resolveSerializationFormat(StorageResponseSerializationFormat format) { + if (format == null || format == StorageResponseSerializationFormat.AUTO) { + return DEFAULT_SERIALIZATION_FORMAT; + } + return format; + } + private ModelHelper() { } } diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/BodyCompression.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/BodyCompression.java new file mode 100644 index 000000000000..1def5fa220bd --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/BodyCompression.java @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util.arrow; + +import com.google.flatbuffers.Table; + +import java.nio.ByteBuffer; + +/** + * Accessor for the Arrow IPC {@code BodyCompression} table. + *

+ * The ListBlobs reader only needs to detect the presence of this table to reject compressed record batches, so no + * fields are exposed. + */ +public final class BodyCompression extends Table { + /** + * Positions this accessor at the given table offset. + * + * @param i the table offset. + * @param bb the backing buffer. + */ + public void __init(int i, ByteBuffer bb) { + __reset(i, bb); + } + + /** + * Assigns this accessor to the given table offset. + * + * @param i the table offset. + * @param bb the backing buffer. + * @return this accessor. + */ + public BodyCompression __assign(int i, ByteBuffer bb) { + __init(i, bb); + return this; + } +} + diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Buffer.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Buffer.java new file mode 100644 index 000000000000..2fc6c28447ce --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Buffer.java @@ -0,0 +1,54 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util.arrow; + +import com.google.flatbuffers.Struct; + +import java.nio.ByteBuffer; + +/** + * Accessor for the Arrow IPC {@code Buffer} struct (offset/length of a buffer within a record batch body). + */ +public final class Buffer extends Struct { + /** + * Positions this accessor at the given struct offset. + * + * @param i the struct offset. + * @param bb the backing buffer. + */ + public void __init(int i, ByteBuffer bb) { + __reset(i, bb); + } + + /** + * Assigns this accessor to the given struct offset. + * + * @param i the struct offset. + * @param bb the backing buffer. + * @return this accessor. + */ + public Buffer __assign(int i, ByteBuffer bb) { + __init(i, bb); + return this; + } + + /** + * Gets the byte offset of the buffer relative to the start of the record batch body. + * + * @return the buffer offset. + */ + public long offset() { + return bb.getLong(bb_pos); + } + + /** + * Gets the length, in bytes, of the buffer. + * + * @return the buffer length. + */ + public long length() { + return bb.getLong(bb_pos + 8); + } +} + diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Endianness.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Endianness.java new file mode 100644 index 000000000000..91c30efe2edb --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Endianness.java @@ -0,0 +1,18 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util.arrow; + +/** + * Values for the Arrow IPC {@code Endianness} enum. + */ +public final class Endianness { + private Endianness() { + } + + /** Little-endian byte order. */ + public static final short LITTLE = 0; + /** Big-endian byte order. */ + public static final short BIG = 1; +} + diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Field.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Field.java new file mode 100644 index 000000000000..f121785d16e1 --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Field.java @@ -0,0 +1,99 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util.arrow; + +import com.google.flatbuffers.Table; + +import java.nio.ByteBuffer; + +/** + * Accessor for the Arrow IPC {@code Field} table describing a single column. + */ +public final class Field extends Table { + /** + * Positions this accessor at the given table offset. + * + * @param i the table offset. + * @param bb the backing buffer. + */ + public void __init(int i, ByteBuffer bb) { + __reset(i, bb); + } + + /** + * Assigns this accessor to the given table offset. + * + * @param i the table offset. + * @param bb the backing buffer. + * @return this accessor. + */ + public Field __assign(int i, ByteBuffer bb) { + __init(i, bb); + return this; + } + + /** + * Gets the field name. + * + * @return the field name, or {@code null} when absent. + */ + public String name() { + int o = __offset(4); + return o != 0 ? __string(o + bb_pos) : null; + } + + /** + * Gets the discriminator identifying the field's {@code type} union (see {@link Type}). + * + * @return the type union discriminator, or {@code 0} when absent. + */ + public byte typeType() { + int o = __offset(8); + return o != 0 ? bb.get(o + bb_pos) : 0; + } + + /** + * Resolves the {@code type} union value into the supplied accessor. + * + * @param obj the accessor to assign to the union value. + * @return the assigned accessor, or {@code null} when absent. + */ + public Table type(Table obj) { + int o = __offset(10); + return o != 0 ? __union(obj, o + bb_pos) : null; + } + + /** + * Gets the child field at the given index. + * + * @param j the child index. + * @return the child field accessor. + */ + public Field children(int j) { + return children(new Field(), j); + } + + /** + * Gets the child field at the given index into the supplied accessor. + * + * @param obj the accessor to assign. + * @param j the child index. + * @return the assigned accessor, or {@code null} when absent. + */ + public Field children(Field obj, int j) { + int o = __offset(14); + return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; + } + + /** + * Gets the number of child fields. + * + * @return the child field count. + */ + public int childrenLength() { + int o = __offset(14); + return o != 0 ? __vector_len(o) : 0; + } +} + diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/FieldNode.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/FieldNode.java new file mode 100644 index 000000000000..bc44e0d0ccb6 --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/FieldNode.java @@ -0,0 +1,54 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util.arrow; + +import com.google.flatbuffers.Struct; + +import java.nio.ByteBuffer; + +/** + * Accessor for the Arrow IPC {@code FieldNode} struct (per-column metadata within a record batch). + */ +public final class FieldNode extends Struct { + /** + * Positions this accessor at the given struct offset. + * + * @param i the struct offset. + * @param bb the backing buffer. + */ + public void __init(int i, ByteBuffer bb) { + __reset(i, bb); + } + + /** + * Assigns this accessor to the given struct offset. + * + * @param i the struct offset. + * @param bb the backing buffer. + * @return this accessor. + */ + public FieldNode __assign(int i, ByteBuffer bb) { + __init(i, bb); + return this; + } + + /** + * Gets the number of value slots in the column. + * + * @return the value count. + */ + public long length() { + return bb.getLong(bb_pos); + } + + /** + * Gets the number of null value slots in the column. + * + * @return the null count. + */ + public long nullCount() { + return bb.getLong(bb_pos + 8); + } +} + diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Int.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Int.java new file mode 100644 index 000000000000..efbddb99e3ff --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Int.java @@ -0,0 +1,56 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util.arrow; + +import com.google.flatbuffers.Table; + +import java.nio.ByteBuffer; + +/** + * Accessor for the Arrow IPC {@code Int} type table. + */ +public final class Int extends Table { + /** + * Positions this accessor at the given table offset. + * + * @param i the table offset. + * @param bb the backing buffer. + */ + public void __init(int i, ByteBuffer bb) { + __reset(i, bb); + } + + /** + * Assigns this accessor to the given table offset. + * + * @param i the table offset. + * @param bb the backing buffer. + * @return this accessor. + */ + public Int __assign(int i, ByteBuffer bb) { + __init(i, bb); + return this; + } + + /** + * Gets the bit width of the integer (8, 16, 32, or 64). + * + * @return the bit width, or {@code 0} when absent. + */ + public int bitWidth() { + int o = __offset(4); + return o != 0 ? bb.getInt(o + bb_pos) : 0; + } + + /** + * Gets whether the integer is signed. + * + * @return {@code true} if signed, otherwise {@code false}. + */ + public boolean isSigned() { + int o = __offset(6); + return o != 0 && 0 != bb.get(o + bb_pos); + } +} + diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/KeyValue.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/KeyValue.java new file mode 100644 index 000000000000..c70a275a3278 --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/KeyValue.java @@ -0,0 +1,56 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util.arrow; + +import com.google.flatbuffers.Table; + +import java.nio.ByteBuffer; + +/** + * Accessor for the Arrow IPC {@code KeyValue} table (a single custom metadata entry). + */ +public final class KeyValue extends Table { + /** + * Positions this accessor at the given table offset. + * + * @param i the table offset. + * @param bb the backing buffer. + */ + public void __init(int i, ByteBuffer bb) { + __reset(i, bb); + } + + /** + * Assigns this accessor to the given table offset. + * + * @param i the table offset. + * @param bb the backing buffer. + * @return this accessor. + */ + public KeyValue __assign(int i, ByteBuffer bb) { + __init(i, bb); + return this; + } + + /** + * Gets the metadata key. + * + * @return the key, or {@code null} when absent. + */ + public String key() { + int o = __offset(4); + return o != 0 ? __string(o + bb_pos) : null; + } + + /** + * Gets the metadata value. + * + * @return the value, or {@code null} when absent. + */ + public String value() { + int o = __offset(6); + return o != 0 ? __string(o + bb_pos) : null; + } +} + diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Message.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Message.java new file mode 100644 index 000000000000..f68b14f57281 --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Message.java @@ -0,0 +1,90 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util.arrow; + +import com.google.flatbuffers.Table; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/** + * Accessor for the Arrow IPC {@code Message} table (root of every encapsulated IPC message). + */ +public final class Message extends Table { + /** + * Reads the {@code Message} located at the root offset of the supplied buffer. + * + * @param bb the little-endian buffer positioned at the start of the message. + * @return the {@code Message} accessor. + */ + public static Message getRootAsMessage(ByteBuffer bb) { + return getRootAsMessage(bb, new Message()); + } + + /** + * Reads the {@code Message} located at the root offset of the supplied buffer into {@code obj}. + * + * @param bb the buffer positioned at the start of the message. + * @param obj the accessor instance to assign. + * @return the assigned {@code Message} accessor. + */ + public static Message getRootAsMessage(ByteBuffer bb, Message obj) { + bb.order(ByteOrder.LITTLE_ENDIAN); + return obj.__assign(bb.getInt(bb.position()) + bb.position(), bb); + } + + /** + * Positions this accessor at the given table offset. + * + * @param i the table offset. + * @param bb the backing buffer. + */ + public void __init(int i, ByteBuffer bb) { + __reset(i, bb); + } + + /** + * Assigns this accessor to the given table offset. + * + * @param i the table offset. + * @param bb the backing buffer. + * @return this accessor. + */ + public Message __assign(int i, ByteBuffer bb) { + __init(i, bb); + return this; + } + + /** + * Gets the discriminator identifying the type of the {@code header} union (see {@link MessageHeader}). + * + * @return the header union type, or {@code 0} when absent. + */ + public byte headerType() { + int o = __offset(6); + return o != 0 ? bb.get(o + bb_pos) : 0; + } + + /** + * Resolves the {@code header} union value into the supplied accessor. + * + * @param obj the accessor to assign to the union value. + * @return the assigned accessor, or {@code null} when the header is absent. + */ + public Table header(Table obj) { + int o = __offset(8); + return o != 0 ? __union(obj, o + bb_pos) : null; + } + + /** + * Gets the length, in bytes, of the message body that follows the metadata. + * + * @return the body length, or {@code 0} when absent. + */ + public long bodyLength() { + int o = __offset(10); + return o != 0 ? bb.getLong(o + bb_pos) : 0L; + } +} + diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/MessageHeader.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/MessageHeader.java new file mode 100644 index 000000000000..25e8c69da309 --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/MessageHeader.java @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util.arrow; + +/** + * Discriminator values for the Arrow IPC {@code MessageHeader} union. + */ +public final class MessageHeader { + private MessageHeader() { + } + + /** No header. */ + public static final byte NONE = 0; + /** A {@link Schema} header. */ + public static final byte SCHEMA = 1; + /** A dictionary batch header. */ + public static final byte DICTIONARY_BATCH = 2; + /** A {@link RecordBatch} header. */ + public static final byte RECORD_BATCH = 3; + /** A tensor header. */ + public static final byte TENSOR = 4; + /** A sparse tensor header. */ + public static final byte SPARSE_TENSOR = 5; +} + diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/RecordBatch.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/RecordBatch.java new file mode 100644 index 000000000000..7afa0f3cde36 --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/RecordBatch.java @@ -0,0 +1,130 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util.arrow; + +import com.google.flatbuffers.Table; + +import java.nio.ByteBuffer; + +/** + * Accessor for the Arrow IPC {@code RecordBatch} table. + */ +public final class RecordBatch extends Table { + /** + * Positions this accessor at the given table offset. + * + * @param i the table offset. + * @param bb the backing buffer. + */ + public void __init(int i, ByteBuffer bb) { + __reset(i, bb); + } + + /** + * Assigns this accessor to the given table offset. + * + * @param i the table offset. + * @param bb the backing buffer. + * @return this accessor. + */ + public RecordBatch __assign(int i, ByteBuffer bb) { + __init(i, bb); + return this; + } + + /** + * Gets the number of rows in the record batch. + * + * @return the row count, or {@code 0} when absent. + */ + public long length() { + int o = __offset(4); + return o != 0 ? bb.getLong(o + bb_pos) : 0L; + } + + /** + * Gets the field node at the given index. + * + * @param j the node index. + * @return the field node accessor. + */ + public FieldNode nodes(int j) { + return nodes(new FieldNode(), j); + } + + /** + * Gets the field node at the given index into the supplied accessor. + * + * @param obj the accessor to assign. + * @param j the node index. + * @return the assigned accessor, or {@code null} when absent. + */ + public FieldNode nodes(FieldNode obj, int j) { + int o = __offset(6); + return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; + } + + /** + * Gets the number of field nodes. + * + * @return the field node count. + */ + public int nodesLength() { + int o = __offset(6); + return o != 0 ? __vector_len(o) : 0; + } + + /** + * Gets the buffer region at the given index. + * + * @param j the buffer index. + * @return the buffer accessor. + */ + public Buffer buffers(int j) { + return buffers(new Buffer(), j); + } + + /** + * Gets the buffer region at the given index into the supplied accessor. + * + * @param obj the accessor to assign. + * @param j the buffer index. + * @return the assigned accessor, or {@code null} when absent. + */ + public Buffer buffers(Buffer obj, int j) { + int o = __offset(8); + return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; + } + + /** + * Gets the number of buffers. + * + * @return the buffer count. + */ + public int buffersLength() { + int o = __offset(8); + return o != 0 ? __vector_len(o) : 0; + } + + /** + * Gets the optional body compression descriptor. + * + * @return the body compression accessor, or {@code null} when the batch is uncompressed. + */ + public BodyCompression compression() { + return compression(new BodyCompression()); + } + + /** + * Gets the optional body compression descriptor into the supplied accessor. + * + * @param obj the accessor to assign. + * @return the assigned accessor, or {@code null} when the batch is uncompressed. + */ + public BodyCompression compression(BodyCompression obj) { + int o = __offset(10); + return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; + } +} + diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Schema.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Schema.java new file mode 100644 index 000000000000..1990ea7d8556 --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Schema.java @@ -0,0 +1,110 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util.arrow; + +import com.google.flatbuffers.Table; + +import java.nio.ByteBuffer; + +/** + * Accessor for the Arrow IPC {@code Schema} table. + */ +public final class Schema extends Table { + /** + * Positions this accessor at the given table offset. + * + * @param i the table offset. + * @param bb the backing buffer. + */ + public void __init(int i, ByteBuffer bb) { + __reset(i, bb); + } + + /** + * Assigns this accessor to the given table offset. + * + * @param i the table offset. + * @param bb the backing buffer. + * @return this accessor. + */ + public Schema __assign(int i, ByteBuffer bb) { + __init(i, bb); + return this; + } + + /** + * Gets the byte order of the schema's buffers (see {@link Endianness}). + * + * @return the endianness, or {@code 0} ({@link Endianness#LITTLE}) when absent. + */ + public short endianness() { + int o = __offset(4); + return o != 0 ? bb.getShort(o + bb_pos) : 0; + } + + /** + * Gets the field at the given index. + * + * @param j the field index. + * @return the field accessor. + */ + public Field fields(int j) { + return fields(new Field(), j); + } + + /** + * Gets the field at the given index into the supplied accessor. + * + * @param obj the accessor to assign. + * @param j the field index. + * @return the assigned accessor, or {@code null} when absent. + */ + public Field fields(Field obj, int j) { + int o = __offset(6); + return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; + } + + /** + * Gets the number of top-level fields in the schema. + * + * @return the field count. + */ + public int fieldsLength() { + int o = __offset(6); + return o != 0 ? __vector_len(o) : 0; + } + + /** + * Gets the custom metadata entry at the given index. + * + * @param j the entry index. + * @return the key/value accessor. + */ + public KeyValue customMetadata(int j) { + return customMetadata(new KeyValue(), j); + } + + /** + * Gets the custom metadata entry at the given index into the supplied accessor. + * + * @param obj the accessor to assign. + * @param j the entry index. + * @return the assigned accessor, or {@code null} when absent. + */ + public KeyValue customMetadata(KeyValue obj, int j) { + int o = __offset(8); + return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; + } + + /** + * Gets the number of custom metadata entries. + * + * @return the entry count. + */ + public int customMetadataLength() { + int o = __offset(8); + return o != 0 ? __vector_len(o) : 0; + } +} + diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/TimeUnit.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/TimeUnit.java new file mode 100644 index 000000000000..347e85337f9b --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/TimeUnit.java @@ -0,0 +1,34 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util.arrow; + +/** + * Values for the Arrow IPC {@code TimeUnit} enum. + */ +public final class TimeUnit { + private TimeUnit() { + } + + /** Second resolution. */ + public static final short SECOND = 0; + /** Millisecond resolution. */ + public static final short MILLISECOND = 1; + /** Microsecond resolution. */ + public static final short MICROSECOND = 2; + /** Nanosecond resolution. */ + public static final short NANOSECOND = 3; + + private static final String[] NAMES = { "SECOND", "MILLISECOND", "MICROSECOND", "NANOSECOND" }; + + /** + * Gets the canonical Arrow name for a {@code TimeUnit} value, for diagnostic messages. + * + * @param e the time unit value. + * @return the canonical Arrow time unit name. + */ + public static String name(int e) { + return NAMES[e]; + } +} + diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Timestamp.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Timestamp.java new file mode 100644 index 000000000000..4be2b069813e --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Timestamp.java @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util.arrow; + +import com.google.flatbuffers.Table; + +import java.nio.ByteBuffer; + +/** + * Accessor for the Arrow IPC {@code Timestamp} type table. + */ +public final class Timestamp extends Table { + /** + * Positions this accessor at the given table offset. + * + * @param i the table offset. + * @param bb the backing buffer. + */ + public void __init(int i, ByteBuffer bb) { + __reset(i, bb); + } + + /** + * Assigns this accessor to the given table offset. + * + * @param i the table offset. + * @param bb the backing buffer. + * @return this accessor. + */ + public Timestamp __assign(int i, ByteBuffer bb) { + __init(i, bb); + return this; + } + + /** + * Gets the timestamp resolution (see {@link TimeUnit}). + * + * @return the time unit, or {@code 0} ({@link TimeUnit#SECOND}) when absent. + */ + public short unit() { + int o = __offset(4); + return o != 0 ? bb.getShort(o + bb_pos) : 0; + } +} + diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Type.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Type.java new file mode 100644 index 000000000000..615d0d2ce1f5 --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/Type.java @@ -0,0 +1,84 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util.arrow; + +/** + * Discriminator values for the Arrow IPC {@code Type} union, identifying a field's logical type. + */ +public final class Type { + private Type() { + } + + /** No type. */ + public static final byte NONE = 0; + /** Null type. */ + public static final byte NULL = 1; + /** Integer type (see {@link Int}). */ + public static final byte INT = 2; + /** Floating-point type. */ + public static final byte FLOATING_POINT = 3; + /** Variable-length binary type. */ + public static final byte BINARY = 4; + /** Variable-length UTF-8 string type. */ + public static final byte UTF8 = 5; + /** Boolean type. */ + public static final byte BOOL = 6; + /** Decimal type. */ + public static final byte DECIMAL = 7; + /** Date type. */ + public static final byte DATE = 8; + /** Time-of-day type. */ + public static final byte TIME = 9; + /** Timestamp type (see {@link Timestamp}). */ + public static final byte TIMESTAMP = 10; + /** Interval type. */ + public static final byte INTERVAL = 11; + /** List type. */ + public static final byte LIST = 12; + /** Struct type. */ + public static final byte STRUCT = 13; + /** Union type. */ + public static final byte UNION = 14; + /** Fixed-size binary type. */ + public static final byte FIXED_SIZE_BINARY = 15; + /** Fixed-size list type. */ + public static final byte FIXED_SIZE_LIST = 16; + /** Map type. */ + public static final byte MAP = 17; + /** Duration type. */ + public static final byte DURATION = 18; + /** Large variable-length binary type. */ + public static final byte LARGE_BINARY = 19; + /** Large variable-length UTF-8 string type. */ + public static final byte LARGE_UTF8 = 20; + /** Large list type. */ + public static final byte LARGE_LIST = 21; + /** Run-end encoded type. */ + public static final byte RUN_END_ENCODED = 22; + /** Binary view type. */ + public static final byte BINARY_VIEW = 23; + /** UTF-8 string view type. */ + public static final byte UTF8_VIEW = 24; + /** List view type. */ + public static final byte LIST_VIEW = 25; + /** Large list view type. */ + public static final byte LARGE_LIST_VIEW = 26; + + private static final String[] NAMES = { + "NONE", "Null", "Int", "FloatingPoint", "Binary", "Utf8", "Bool", "Decimal", "Date", "Time", "Timestamp", + "Interval", "List", "Struct_", "Union", "FixedSizeBinary", "FixedSizeList", "Map", "Duration", "LargeBinary", + "LargeUtf8", "LargeList", "RunEndEncoded", "BinaryView", "Utf8View", "ListView", "LargeListView" + }; + + /** + * Gets the canonical Arrow name for a {@code Type} union discriminator, for diagnostic messages. + * + * @param e the discriminator value. + * @return the canonical Arrow type name. + */ + public static String name(int e) { + return NAMES[e]; + } +} + diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/package-info.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/package-info.java new file mode 100644 index 000000000000..ce929799031f --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/implementation/util/arrow/package-info.java @@ -0,0 +1,18 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +/** + * Minimal Apache Arrow IPC FlatBuffer metadata accessors used internally by the Blob Storage ListBlobs Arrow reader. + *

+ * The classes in this package are thin {@link com.google.flatbuffers.Table}/{@link com.google.flatbuffers.Struct} + * accessors over the FlatBuffer-encoded metadata of the Apache Arrow IPC format. They expose only the small subset of + * the {@code org.apache.arrow.flatbuf} schema that {@code BlobListArrowStreamReader} requires, so that the main + * (Java 8 baseline) compile classpath does not depend on the {@code arrow-format} artifact, which ships Java 11 + * bytecode. + *

+ * The field orderings (FlatBuffer vtable slots) and enum values are defined by the public Apache Arrow columnar format + * specification (see {@code Schema.fbs} and {@code Message.fbs} in the Apache Arrow project, licensed under the Apache + * License, Version 2.0) and must match the on-the-wire layout produced by the Storage service. + */ +package com.azure.storage.blob.implementation.util.arrow; + diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/models/BlobDownloadHeaders.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/models/BlobDownloadHeaders.java index 6a372f5b2c74..dfd93a535fc6 100644 --- a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/models/BlobDownloadHeaders.java +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/models/BlobDownloadHeaders.java @@ -825,97 +825,6 @@ public BlobDownloadHeaders setEncryptionScope(String encryptionScope) { return this; } - /** - * Gets the access tier of the blob. - * - * @return the access tier of the blob. This is only set for Page blobs on a premium storage account or for Block - * blobs on blob storage or general purpose V2 account. - */ - public AccessTier getAccessTier() { - String accessTier = internalHeaders.getXMsAccessTier(); - return accessTier == null ? null : AccessTier.fromString(accessTier); - } - - /** - * Sets the access tier of the blob. - * - * @param accessTier the access tier of the blob. - * @return the BlobDownloadHeaders object itself. - */ - public BlobDownloadHeaders setAccessTier(AccessTier accessTier) { - internalHeaders.setXMsAccessTier(accessTier == null ? null : accessTier.toString()); - return this; - } - - /** - * Gets the status of the tier being inferred for the blob. - * - * @return the status of the tier being inferred for the blob. This is only set for Page blobs on a premium storage - * account or for Block blobs on blob storage or general purpose V2 account. - */ - public Boolean isAccessTierInferred() { - return Boolean.TRUE.equals(internalHeaders.isXMsAccessTierInferred()); - } - - /** - * Sets the status of the tier being inferred for the blob. - * - * @param accessTierInferred the status of the tier being inferred for the blob. - * @return the BlobDownloadHeaders object itself. - */ - public BlobDownloadHeaders setAccessTierInferred(Boolean accessTierInferred) { - internalHeaders.setXMsAccessTierInferred(accessTierInferred); - return this; - } - - /** - * Gets the time when the access tier for the blob was last changed. - * - * @return the time when the access tier for the blob was last changed. - */ - public OffsetDateTime getAccessTierChangeTime() { - return internalHeaders.getXMsAccessTierChangeTime(); - } - - /** - * Sets the time when the access tier for the blob was last changed. - * - * @param accessTierChangeTime the time when the access tier for the blob was last changed. - * @return the BlobDownloadHeaders object itself. - */ - public BlobDownloadHeaders setAccessTierChangeTime(OffsetDateTime accessTierChangeTime) { - internalHeaders.setXMsAccessTierChangeTime(accessTierChangeTime); - return this; - } - - /** - * Gets the underlying access tier of the blob when its access tier is {@link AccessTier#SMART}. - *

- * This value is only populated when {@link #getAccessTier()} returns {@link AccessTier#SMART}. In that case, it - * represents the concrete access tier (for example {@link AccessTier#HOT} or {@link AccessTier#COOL}) that the - * service has selected for the blob. For all other access tiers, this property is {@code null} and should be - * ignored. - * - * @return the underlying access tier chosen by the service when the blob's access tier is {@link AccessTier#SMART}, - * or {@code null} if the blob is not using the smart access tier. - */ - public AccessTier getSmartAccessTier() { - String smartAccessTier = internalHeaders.getXMsSmartAccessTier(); - return smartAccessTier == null ? null : AccessTier.fromString(smartAccessTier); - } - - /** - * Sets the underlying access tier of the blob when its access tier is {@link AccessTier#SMART}. - * - * @param smartAccessTier the underlying access tier chosen by the service when the blob's access tier is - * {@link AccessTier#SMART}. - * @return the BlobDownloadHeaders object itself. - */ - public BlobDownloadHeaders setSmartAccessTier(AccessTier smartAccessTier) { - internalHeaders.setXMsSmartAccessTier(smartAccessTier == null ? null : smartAccessTier.toString()); - return this; - } - /** * Get the blobContentMD5 property: If the blob has a MD5 hash, and if request contains range header (Range or * x-ms-range), this response header is returned with the value of the whole blob's MD5 value. This value may or may diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/models/ListBlobsOptions.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/models/ListBlobsOptions.java index 47f29c2ab2b3..e434681ac6e9 100644 --- a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/models/ListBlobsOptions.java +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/models/ListBlobsOptions.java @@ -19,6 +19,8 @@ public final class ListBlobsOptions { private String prefix; private String startFrom; private Integer maxResultsPerPage; + private StorageResponseSerializationFormat storageResponseSerializationFormat; + private String endBefore; /** * Constructs an unpopulated {@link ListBlobsOptions}. @@ -74,7 +76,7 @@ public ListBlobsOptions setPrefix(String prefix) { * This parameter is similar to the prefix filter: it allows listing blobs starting from the specified path, rather than from the beginning of the container. * For non-recursive lists, only one entity level is supported. * - * @return the marker indicating where to start listing blobs + * @return the marker indicating where to start listing blobs (inclusive) */ public String getStartFrom() { return startFrom; @@ -84,7 +86,7 @@ public String getStartFrom() { * Sets an optional parameter that specifies an absolute path within the container. This parameter is similar to the prefix filter: it allows listing blobs starting from the specified path, rather than from the beginning of the container. * For non-recursive lists, only one entity level is supported. * - * @param startFrom The marker indicating where to start listing blobs + * @param startFrom The marker indicating where to start listing blobs (inclusive) * @return the updated ListBlobsOptions object */ public ListBlobsOptions setStartFrom(String startFrom) { @@ -92,6 +94,49 @@ public ListBlobsOptions setStartFrom(String startFrom) { return this; } + /** + * Gets the endBefore value. Only supported with Arrow listings. The listing will end before this path (exclusive). + * + * @return the endBefore value. + */ + public String getEndBefore() { + return endBefore; + } + + /** + * Sets the endBefore value. Only supported with Arrow listings. The listing will end before this path (exclusive). + * + * @param endBefore the endBefore value to set. + * @return the updated ListBlobsOptions object. + */ + public ListBlobsOptions setEndBefore(String endBefore) { + this.endBefore = endBefore; + return this; + } + + /** + * Gets the response serialization format the service should use when listing blobs. + * + * @return the {@link StorageResponseSerializationFormat}, or {@code null} if unset + * (equivalent to {@link StorageResponseSerializationFormat#AUTO}). + */ + public StorageResponseSerializationFormat getStorageResponseSerializationFormat() { + return storageResponseSerializationFormat; + } + + /** + * Sets the response serialization format the service should use when listing blobs. + * + * @param storageResponseSerializationFormat the format to request. {@code null} and + * {@link StorageResponseSerializationFormat#AUTO} both let the SDK pick. + * @return the updated {@link ListBlobsOptions} object. + */ + public ListBlobsOptions + setStorageResponseSerializationFormat(StorageResponseSerializationFormat storageResponseSerializationFormat) { + this.storageResponseSerializationFormat = storageResponseSerializationFormat; + return this; + } + /** * Specifies the maximum number of blobs to return, including all BlobPrefix elements. If the request does not * specify maxResultsPerPage or specifies a value greater than 5,000, the server will return up to 5,000 items. diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/models/StorageResponseSerializationFormat.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/models/StorageResponseSerializationFormat.java new file mode 100644 index 000000000000..e5017accedbc --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/models/StorageResponseSerializationFormat.java @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.models; + +/** + * Defines the serialization format the service uses for list-blobs responses. + */ +public enum StorageResponseSerializationFormat { + /** + * Let the SDK choose the serialization format that is most appropriate for the request. + *

+ * The exact format selected by {@code AUTO} is an implementation detail and may change + * between SDK releases. Choose {@link #XML} or {@link #ARROW} explicitly if you require + * a specific format. + */ + AUTO, + + /** + * XML response format. + */ + XML, + + /** + * Apache Arrow response format. + */ + ARROW +} diff --git a/sdk/storage/azure-storage-blob/src/main/java/module-info.java b/sdk/storage/azure-storage-blob/src/main/java/module-info.java index 597a417add7f..e6947c9af0c4 100644 --- a/sdk/storage/azure-storage-blob/src/main/java/module-info.java +++ b/sdk/storage/azure-storage-blob/src/main/java/module-info.java @@ -5,6 +5,7 @@ requires transitive com.azure.storage.common; requires com.azure.storage.internal.avro; + requires flatbuffers.java; exports com.azure.storage.blob; exports com.azure.storage.blob.models; diff --git a/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/BlobApiTests.java b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/BlobApiTests.java index 71c474ba295c..50a9eb63ef21 100644 --- a/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/BlobApiTests.java +++ b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/BlobApiTests.java @@ -543,36 +543,6 @@ public void downloadAllNullBinaryData() { // headers.getLastAccessedTime() /* TODO (gapra): re-enable when last access time enabled. */ } - @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-10-06") - @Test - public void downloadSmartAccessTierHeaders() { - ByteArrayOutputStream stream = new ByteArrayOutputStream(); - bc.setAccessTier(AccessTier.SMART); - - BlobDownloadResponse response = bc.downloadStreamWithResponse(stream, null, null, null, false, null, null); - ByteBuffer body = ByteBuffer.wrap(stream.toByteArray()); - - assertEquals(DATA.getDefaultData(), body); - assertSmartAccessTierHeaders(response.getDeserializedHeaders()); - } - - @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-10-06") - @Test - public void downloadContentSmartAccessTierHeaders() { - bc.setAccessTier(AccessTier.SMART); - BlobDownloadContentResponse response = bc.downloadContentWithResponse(null, null, null, null); - - TestUtils.assertArraysEqual(DATA.getDefaultBytes(), response.getValue().toBytes()); - assertSmartAccessTierHeaders(response.getDeserializedHeaders()); - } - - private static void assertSmartAccessTierHeaders(BlobDownloadHeaders headers) { - assertEquals(AccessTier.SMART, headers.getAccessTier()); - assertNotNull(headers.getSmartAccessTier()); - assertFalse(headers.isAccessTierInferred()); - assertNotEquals(OffsetDateTime.now(), headers.getAccessTierChangeTime()); - } - @Test public void downloadEmptyFile() { AppendBlobClient bc = cc.getBlobClient("emptyAppendBlob").getAppendBlobClient(); diff --git a/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/BlobAsyncApiTests.java b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/BlobAsyncApiTests.java index ea01df338d18..049e4254e92a 100644 --- a/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/BlobAsyncApiTests.java +++ b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/BlobAsyncApiTests.java @@ -382,33 +382,6 @@ public void downloadAllNullBinaryData() { .verifyComplete(); } - @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-10-06") - @Test - public void downloadSmartAccessTierHeaders() { - Mono response = bc.setAccessTier(AccessTier.SMART) - .then(bc.downloadStreamWithResponse(null, null, null, false)) - .flatMap(r -> { - assertSmartAccessTierHeaders(r.getDeserializedHeaders()); - return FluxUtil.collectBytesInByteBufferStream(r.getValue()); - }) - .flatMap(r -> { - TestUtils.assertArraysEqual(DATA.getDefaultBytes(), r); - return bc.downloadContentWithResponse(null, null); - }); - - StepVerifier.create(response).assertNext(r -> { - assertSmartAccessTierHeaders(r.getDeserializedHeaders()); - TestUtils.assertArraysEqual(DATA.getDefaultBytes(), r.getValue().toBytes()); - }).verifyComplete(); - } - - private static void assertSmartAccessTierHeaders(BlobDownloadHeaders headers) { - assertEquals(AccessTier.SMART, headers.getAccessTier()); - assertNotNull(headers.getSmartAccessTier()); - assertFalse(headers.isAccessTierInferred()); - assertNotEquals(OffsetDateTime.now(), headers.getAccessTierChangeTime()); - } - @Test public void downloadEmptyFile() { AppendBlobAsyncClient bc = ccAsync.getBlobAsyncClient("emptyAppendBlob").getAppendBlobAsyncClient(); diff --git a/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/ContainerApiTests.java b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/ContainerApiTests.java index f46116acdbb5..54ae007c7165 100644 --- a/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/ContainerApiTests.java +++ b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/ContainerApiTests.java @@ -34,6 +34,7 @@ import com.azure.storage.blob.models.PublicAccessType; import com.azure.storage.blob.models.RehydratePriority; import com.azure.storage.blob.models.StorageAccountInfo; +import com.azure.storage.blob.models.StorageResponseSerializationFormat; import com.azure.storage.blob.models.TaggedBlobItem; import com.azure.storage.blob.options.BlobContainerCreateOptions; import com.azure.storage.blob.options.BlobParallelUploadOptions; @@ -50,6 +51,10 @@ import com.azure.storage.common.test.shared.extensions.PlaybackOnly; import com.azure.storage.common.test.shared.extensions.RequiredServiceVersion; import com.azure.storage.common.test.shared.policy.InvalidServiceVersionPipelinePolicy; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.ipc.ArrowStreamReader; +import org.apache.arrow.vector.VectorSchemaRoot; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -58,7 +63,19 @@ import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.ValueSource; +import com.azure.core.http.HttpPipeline; +import com.azure.storage.blob.implementation.AzureBlobStorageImpl; +import com.azure.storage.blob.implementation.AzureBlobStorageImplBuilder; +import com.azure.storage.blob.implementation.models.ContainersListBlobFlatSegmentApacheArrowHeaders; +import com.azure.storage.blob.implementation.models.ContainersListBlobHierarchySegmentApacheArrowHeaders; +import com.azure.storage.blob.implementation.util.ArrowBlobListDeserializer; +import com.azure.storage.blob.implementation.util.ModelHelper; +import com.azure.storage.blob.models.ListBlobsIncludeItem; +import com.azure.core.http.rest.ResponseBase; + import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.util.ArrayList; import java.net.URL; import java.time.OffsetDateTime; import java.util.Arrays; @@ -2128,4 +2145,313 @@ public void getBlobContainerUrlEncodesContainerName() { // then: // assertThrows(BlobStorageException.class, () -> // } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsArrowBasic() { + // Upload a test blob + String blobName = generateBlobName(); + cc.getBlobClient(blobName).getBlockBlobClient().upload(DATA.getDefaultInputStream(), DATA.getDefaultDataSize()); + + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW); + List blobs = cc.listBlobs(options, null).stream().collect(Collectors.toList()); + + assertEquals(1, blobs.size()); + assertEquals(blobName, blobs.get(0).getName()); + assertNotNull(blobs.get(0).getProperties()); + assertEquals(DATA.getDefaultDataSize(), blobs.get(0).getProperties().getContentLength()); + assertEquals(BlobType.BLOCK_BLOB, blobs.get(0).getProperties().getBlobType()); + assertNotNull(blobs.get(0).getProperties().getLastModified()); + assertNotNull(blobs.get(0).getProperties().getETag()); + } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsArrowWithTags() { + // Upload a blob and set tags + String blobName = generateBlobName(); + cc.getBlobClient(blobName).getBlockBlobClient().upload(DATA.getDefaultInputStream(), DATA.getDefaultDataSize()); + + Map tags = new HashMap<>(); + tags.put("tagkey", "tagvalue"); + cc.getBlobClient(blobName).setTags(tags); + + // List with Arrow + retrieveTags + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW) + .setDetails(new BlobListDetails().setRetrieveTags(true)); + List blobs = cc.listBlobs(options, null).stream().collect(Collectors.toList()); + + assertEquals(1, blobs.size()); + assertEquals(blobName, blobs.get(0).getName()); + assertNotNull(blobs.get(0).getTags()); + assertEquals("tagvalue", blobs.get(0).getTags().get("tagkey")); + } + + @ParameterizedTest + @MethodSource("listBlobsFlatRehydratePrioritySupplier") + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsArrowRehydratePriority(RehydratePriority rehydratePriority) { + String name = generateBlobName(); + BlockBlobClient bc = cc.getBlobClient(name).getBlockBlobClient(); + + bc.upload(DATA.getDefaultInputStream(), 7); + + if (rehydratePriority != null) { + bc.setAccessTier(AccessTier.ARCHIVE); + bc.setAccessTierWithResponse(new BlobSetAccessTierOptions(AccessTier.HOT).setPriority(rehydratePriority), + null, null); + } + + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW); + BlobItem item = cc.listBlobs(options, null).iterator().next(); + + assertEquals(rehydratePriority, item.getProperties().getRehydratePriority()); + } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsArrowWithMetadata() { + String blobName = generateBlobName(); + Map metadata = new HashMap<>(); + metadata.put("testkey", "testvalue"); + cc.getBlobClient(blobName) + .getBlockBlobClient() + .uploadWithResponse(DATA.getDefaultInputStream(), DATA.getDefaultDataSize(), null, metadata, null, null, + null, null, null); + + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW) + .setDetails(new BlobListDetails().setRetrieveMetadata(true)); + List blobs = cc.listBlobs(options, null).stream().collect(Collectors.toList()); + + assertEquals(1, blobs.size()); + assertNotNull(blobs.get(0).getMetadata()); + assertEquals("testvalue", blobs.get(0).getMetadata().get("testkey")); + } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsArrowPagination() { + // Upload 3 blobs + for (int i = 0; i < 4; i++) { + cc.getBlobClient("blob" + i) + .getBlockBlobClient() + .upload(DATA.getDefaultInputStream(), DATA.getDefaultDataSize()); + } + + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW) + .setMaxResultsPerPage(1); + List allBlobs = new ArrayList<>(); + for (PagedResponse page : cc.listBlobs(options, null).iterableByPage()) { + assertTrue(page.getValue().size() <= 1); + allBlobs.addAll(page.getValue()); + } + + cc.listBlobs().iterableByPage(2).forEach(page -> { + assertEquals(2, page.getValue().size()); + }); + + assertEquals(4, allBlobs.size()); + } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsArrowNullUseArrowUsesXml() { + // Default apacheArrowEnabled is null — should use XML path without error + String blobName = generateBlobName(); + cc.getBlobClient(blobName).getBlockBlobClient().upload(DATA.getDefaultInputStream(), DATA.getDefaultDataSize()); + + ListBlobsOptions options = new ListBlobsOptions(); + assertNull(options.getStorageResponseSerializationFormat()); + + List blobs = cc.listBlobs(options, null).stream().collect(Collectors.toList()); + assertEquals(1, blobs.size()); + assertEquals(blobName, blobs.get(0).getName()); + } + + @LiveOnly + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsArrowEncryptedBlob() { + // Upload a blob with CPK (customer-provided key) + String blobName = generateBlobName(); + CustomerProvidedKey cpk = new CustomerProvidedKey(Base64.getEncoder().encodeToString(getRandomKey())); + BlobClient cpkClient = cc.getBlobClient(blobName).getCustomerProvidedKeyClient(cpk); + cpkClient.getBlockBlobClient().upload(DATA.getDefaultInputStream(), DATA.getDefaultDataSize()); + + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW); + List blobs = cc.listBlobs(options, null).stream().collect(Collectors.toList()); + + assertEquals(1, blobs.size()); + assertEquals(blobName, blobs.get(0).getName()); + // CPK blob should have server-encrypted = true + assertTrue(blobs.get(0).getProperties().isServerEncrypted()); + // Metadata should be null (no metadata was set) + assertNull(blobs.get(0).getMetadata()); + } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsArrowDeserializer() throws Exception { + String blobName = generateBlobName(); + Map metadata = new HashMap<>(); + metadata.put("testkey", "testvalue"); + cc.getBlobClient(blobName) + .getBlockBlobClient() + .uploadWithResponse(DATA.getDefaultInputStream(), 7, null, metadata, null, null, null, null, null); + + AzureBlobStorageImpl impl = new AzureBlobStorageImplBuilder().pipeline(cc.getHttpPipeline()) + .url(cc.getAccountUrl()) + .version(BlobServiceVersion.getLatest().getVersion()) + .buildClient(); + + // Call the Arrow endpoint + ArrayList include = new ArrayList<>(); + include.add(ListBlobsIncludeItem.METADATA); + + ResponseBase response = impl.getContainers() + .listBlobFlatSegmentApacheArrowWithResponse(containerName, null, null, null, include, null, null, null, + null, com.azure.core.util.Context.NONE); + + // Verify Content-Type is Arrow + String contentType = response.getDeserializedHeaders().getContentType(); + assertTrue(contentType.contains("application/vnd.apache.arrow.stream"), + "Expected Arrow content type but got: " + contentType); + + // Deserialize using ArrowBlobListDeserializer + ArrowBlobListDeserializer.ArrowListBlobsResult result + = ArrowBlobListDeserializer.deserialize(response.getValue()); + + // Verify pagination — single blob, no next page + assertNull(result.getNextMarker()); + + // Verify we got exactly one blob + assertEquals(1, result.getBlobItems().size()); + + com.azure.storage.blob.implementation.models.BlobItemInternal item = result.getBlobItems().get(0); + + // Name + assertNotNull(item.getName()); + assertEquals(blobName, item.getName().getContent()); + + // Properties + assertNotNull(item.getProperties()); + assertEquals(7L, (long) item.getProperties().getContentLength()); + assertEquals("application/octet-stream", item.getProperties().getContentType()); + assertNotNull(item.getProperties().getETag()); + assertNotNull(item.getProperties().getLastModified()); + assertNotNull(item.getProperties().getCreationTime()); + assertEquals(BlobType.BLOCK_BLOB, item.getProperties().getBlobType()); + assertEquals(AccessTier.HOT, item.getProperties().getAccessTier()); + assertTrue(item.getProperties().isAccessTierInferred()); + assertTrue(item.getProperties().isServerEncrypted()); + assertEquals(LeaseStateType.AVAILABLE, item.getProperties().getLeaseState()); + assertEquals(LeaseStatusType.UNLOCKED, item.getProperties().getLeaseStatus()); + assertNotNull(item.getProperties().getContentMd5()); + + // Metadata + assertNotNull(item.getMetadata()); + assertEquals("testvalue", item.getMetadata().get("testkey")); + + // Verify ModelHelper can convert to public BlobItem + BlobItem publicItem = ModelHelper.populateBlobItem(item); + assertEquals(blobName, publicItem.getName()); + assertEquals(7L, (long) publicItem.getProperties().getContentLength()); + } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsByHierarchyArrowBasic() { + // Upload blobs in a directory structure + cc.getBlobClient("dir/blob1") + .getBlockBlobClient() + .upload(DATA.getDefaultInputStream(), DATA.getDefaultDataSize()); + cc.getBlobClient("dir/blob2") + .getBlockBlobClient() + .upload(DATA.getDefaultInputStream(), DATA.getDefaultDataSize()); + cc.getBlobClient("topblob") + .getBlockBlobClient() + .upload(DATA.getDefaultInputStream(), DATA.getDefaultDataSize()); + + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW); + List items = cc.listBlobsByHierarchy("/", options, null).stream().collect(Collectors.toList()); + + // Root level: one prefix "dir/" and one blob "topblob" + assertEquals(2, items.size()); + + BlobItem prefixItem = items.stream().filter(BlobItem::isPrefix).findFirst().orElse(null); + BlobItem blobItem = items.stream().filter(i -> !i.isPrefix()).findFirst().orElse(null); + + assertNotNull(prefixItem); + assertEquals("dir/", prefixItem.getName()); + assertTrue(prefixItem.isPrefix()); + + assertNotNull(blobItem); + assertEquals("topblob", blobItem.getName()); + assertFalse(blobItem.isPrefix()); + assertNotNull(blobItem.getProperties()); + assertEquals(DATA.getDefaultDataSize(), blobItem.getProperties().getContentLength()); + assertEquals(BlobType.BLOCK_BLOB, blobItem.getProperties().getBlobType()); + assertNotNull(blobItem.getProperties().getLastModified()); + assertNotNull(blobItem.getProperties().getETag()); + } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsByHierarchyArrowWithMetadata() { + String blobName = generateBlobName(); + Map metadata = new HashMap<>(); + metadata.put("testkey", "testvalue"); + cc.getBlobClient("dir/" + blobName) + .getBlockBlobClient() + .uploadWithResponse(DATA.getDefaultInputStream(), DATA.getDefaultDataSize(), null, metadata, null, null, + null, null, null); + cc.getBlobClient("topblob") + .getBlockBlobClient() + .upload(DATA.getDefaultInputStream(), DATA.getDefaultDataSize()); + + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW) + .setPrefix("dir/") + .setDetails(new BlobListDetails().setRetrieveMetadata(true)); + List blobs = cc.listBlobsByHierarchy("/", options, null).stream().collect(Collectors.toList()); + + assertEquals(1, blobs.size()); + assertFalse(blobs.get(0).isPrefix()); + assertNotNull(blobs.get(0).getMetadata()); + assertEquals("testvalue", blobs.get(0).getMetadata().get("testkey")); + } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsByHierarchyArrowPagination() { + // Upload blobs across multiple directories + for (int i = 0; i < 3; i++) { + cc.getBlobClient("dir" + i + "/blob") + .getBlockBlobClient() + .upload(DATA.getDefaultInputStream(), DATA.getDefaultDataSize()); + } + cc.getBlobClient("topblob") + .getBlockBlobClient() + .upload(DATA.getDefaultInputStream(), DATA.getDefaultDataSize()); + + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW) + .setMaxResultsPerPage(1); + List allItems = new ArrayList<>(); + for (PagedResponse page : cc.listBlobsByHierarchy("/", options, null).iterableByPage()) { + assertTrue(page.getValue().size() <= 1); + allItems.addAll(page.getValue()); + } + + // 3 prefixes + 1 blob = 4 items + assertEquals(4, allItems.size()); + } + } diff --git a/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/ContainerAsyncApiTests.java b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/ContainerAsyncApiTests.java index 04ebc06dc2b6..6e574cda0e27 100644 --- a/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/ContainerAsyncApiTests.java +++ b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/ContainerAsyncApiTests.java @@ -10,8 +10,14 @@ import com.azure.core.test.TestMode; import com.azure.core.test.utils.MockTokenCredential; import com.azure.core.util.Context; +import com.azure.core.util.FluxUtil; import com.azure.core.util.polling.PollerFlux; import com.azure.identity.DefaultAzureCredentialBuilder; +import com.azure.storage.blob.implementation.AzureBlobStorageImpl; +import com.azure.storage.blob.implementation.AzureBlobStorageImplBuilder; +import com.azure.storage.blob.implementation.models.BlobItemInternal; +import com.azure.storage.blob.implementation.util.ArrowBlobListDeserializer; +import com.azure.storage.blob.implementation.util.ModelHelper; import com.azure.storage.blob.models.*; import com.azure.storage.blob.options.BlobContainerCreateOptions; import com.azure.storage.blob.options.BlobParallelUploadOptions; @@ -41,6 +47,7 @@ import reactor.util.function.Tuple2; import java.net.URL; +import java.io.ByteArrayInputStream; import java.time.Duration; import java.time.OffsetDateTime; import java.util.*; @@ -2142,4 +2149,334 @@ public void getBlobContainerUrlEncodesContainerName() { assertTrue(containerClient.getBlobContainerUrl().contains("my%20container")); } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsArrowBasic() { + // Upload a test blob + String blobName = generateBlobName(); + BlockBlobAsyncClient bc = ccAsync.getBlobAsyncClient(blobName).getBlockBlobAsyncClient(); + + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW); + + StepVerifier + .create( + bc.upload(DATA.getDefaultFlux(), DATA.getDefaultDataSize()).thenMany(ccAsync.listBlobs(options, null))) + .assertNext(item -> { + assertEquals(blobName, item.getName()); + assertNotNull(item.getProperties()); + assertEquals(DATA.getDefaultDataSize(), item.getProperties().getContentLength()); + assertEquals(BlobType.BLOCK_BLOB, item.getProperties().getBlobType()); + assertNotNull(item.getProperties().getLastModified()); + assertNotNull(item.getProperties().getETag()); + }) + .verifyComplete(); + } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsArrowWithMetadata() { + String blobName = generateBlobName(); + Map metadata = new HashMap<>(); + metadata.put("testkey", "testvalue"); + BlockBlobAsyncClient bc = ccAsync.getBlobAsyncClient(blobName).getBlockBlobAsyncClient(); + + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW) + .setDetails(new BlobListDetails().setRetrieveMetadata(true)); + + StepVerifier.create( + bc.uploadWithResponse(DATA.getDefaultFlux(), DATA.getDefaultDataSize(), null, metadata, null, null, null) + .thenMany(ccAsync.listBlobs(options, null))) + .assertNext(item -> { + assertNotNull(item.getMetadata()); + assertEquals("testvalue", item.getMetadata().get("testkey")); + }) + .verifyComplete(); + } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsArrowPagination() { + // Upload 4 blobs + Flux uploads = Flux.range(0, 4) + .flatMap(i -> ccAsync.getBlobAsyncClient("blob" + i) + .getBlockBlobAsyncClient() + .upload(DATA.getDefaultFlux(), DATA.getDefaultDataSize())); + + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW) + .setMaxResultsPerPage(1); + + Mono> result = uploads.then(ccAsync.listBlobs(options, null).byPage().doOnNext(page -> { + assertTrue(page.getValue().size() <= 1); + }).flatMap(page -> Flux.fromIterable(page.getValue())).collectList()); + + StepVerifier.create(result).assertNext(allBlobs -> assertEquals(4, allBlobs.size())).verifyComplete(); + + // Mirror the sync test's secondary assertion: requesting page size 2 yields exactly 2 blobs per page. + StepVerifier.create(ccAsync.listBlobs().byPage(2)).thenConsumeWhile(page -> { + assertEquals(2, page.getValue().size()); + return true; + }).verifyComplete(); + } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsArrowNullUseArrowUsesXml() { + // Default apacheArrowEnabled is null — should use XML path without error + String blobName = generateBlobName(); + BlockBlobAsyncClient bc = ccAsync.getBlobAsyncClient(blobName).getBlockBlobAsyncClient(); + + ListBlobsOptions options = new ListBlobsOptions(); + assertNull(options.getStorageResponseSerializationFormat()); + + StepVerifier + .create( + bc.upload(DATA.getDefaultFlux(), DATA.getDefaultDataSize()).thenMany(ccAsync.listBlobs(options, null))) + .assertNext(item -> assertEquals(blobName, item.getName())) + .verifyComplete(); + } + + @LiveOnly + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsArrowEncryptedBlob() { + // Upload a blob with CPK (customer-provided key) + String blobName = generateBlobName(); + CustomerProvidedKey cpk = new CustomerProvidedKey(Base64.getEncoder().encodeToString(getRandomKey())); + BlockBlobAsyncClient cpkClient + = ccAsync.getBlobAsyncClient(blobName).getCustomerProvidedKeyAsyncClient(cpk).getBlockBlobAsyncClient(); + + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW); + + StepVerifier.create(cpkClient.upload(DATA.getDefaultFlux(), DATA.getDefaultDataSize()) + .thenMany(ccAsync.listBlobs(options, null))).assertNext(item -> { + assertEquals(blobName, item.getName()); + // CPK blob should have server-encrypted = true + assertTrue(item.getProperties().isServerEncrypted()); + // Metadata should be null (no metadata was set) + assertNull(item.getMetadata()); + }).verifyComplete(); + } + + @ParameterizedTest + @MethodSource("listBlobsFlatRehydratePrioritySupplier") + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsArrowRehydratePriority(RehydratePriority rehydratePriority) { + String name = generateBlobName(); + BlockBlobAsyncClient bc = ccAsync.getBlobAsyncClient(name).getBlockBlobAsyncClient(); + + Mono> rehydrate = Mono.empty(); + + if (rehydratePriority != null) { + rehydrate = bc.setAccessTier(AccessTier.ARCHIVE) + .then(bc.setAccessTierWithResponse( + new BlobSetAccessTierOptions(AccessTier.HOT).setPriority(rehydratePriority))); + } + + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW); + + Flux response = bc.upload(DATA.getDefaultFlux(), DATA.getDefaultDataSize()) + .then(rehydrate) + .thenMany(ccAsync.listBlobs(options, null)); + + StepVerifier.create(response) + .assertNext(r -> assertEquals(rehydratePriority, r.getProperties().getRehydratePriority())) + .verifyComplete(); + } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsArrowDeserializer() { + String blobName = generateBlobName(); + Map metadata = new HashMap<>(); + metadata.put("testkey", "testvalue"); + + BlockBlobAsyncClient bc = ccAsync.getBlobAsyncClient(blobName).getBlockBlobAsyncClient(); + + AzureBlobStorageImpl impl = new AzureBlobStorageImplBuilder().pipeline(ccAsync.getHttpPipeline()) + .url(ccAsync.getAccountUrl()) + .version(BlobServiceVersion.getLatest().getVersion()) + .buildClient(); + + List include = new ArrayList<>(); + include.add(ListBlobsIncludeItem.METADATA); + + Mono testMono + = bc.uploadWithResponse(DATA.getDefaultFlux(), 7, null, metadata, null, null, null) + .then(impl.getContainers() + .listBlobFlatSegmentApacheArrowWithResponseAsync(containerName, null, null, null, include, null, + null, null, null)) + .flatMap(response -> { + // Verify Content-Type is Arrow + String contentType = response.getDeserializedHeaders().getContentType(); + assertTrue(contentType.contains("application/vnd.apache.arrow.stream"), + "Expected Arrow content type but got: " + contentType); + + // Collect the Flux body into a byte[] and feed it to the deserializer. + return FluxUtil.collectBytesInByteBufferStream(response.getValue()) + .map(bytes -> ArrowBlobListDeserializer.deserialize(new ByteArrayInputStream(bytes))); + }); + + StepVerifier.create(testMono).assertNext(result -> { + // Verify pagination — single blob, no next page + assertNull(result.getNextMarker()); + + // Verify we got exactly one blob + assertEquals(1, result.getBlobItems().size()); + + BlobItemInternal item = result.getBlobItems().get(0); + + // Name + assertNotNull(item.getName()); + assertEquals(blobName, item.getName().getContent()); + + // Properties + assertNotNull(item.getProperties()); + assertEquals(7L, (long) item.getProperties().getContentLength()); + assertEquals("application/octet-stream", item.getProperties().getContentType()); + assertNotNull(item.getProperties().getETag()); + assertNotNull(item.getProperties().getLastModified()); + assertNotNull(item.getProperties().getCreationTime()); + assertEquals(BlobType.BLOCK_BLOB, item.getProperties().getBlobType()); + assertEquals(AccessTier.HOT, item.getProperties().getAccessTier()); + assertTrue(item.getProperties().isAccessTierInferred()); + assertTrue(item.getProperties().isServerEncrypted()); + assertEquals(LeaseStateType.AVAILABLE, item.getProperties().getLeaseState()); + assertEquals(LeaseStatusType.UNLOCKED, item.getProperties().getLeaseStatus()); + assertNotNull(item.getProperties().getContentMd5()); + + // Metadata + assertNotNull(item.getMetadata()); + assertEquals("testvalue", item.getMetadata().get("testkey")); + + // Verify ModelHelper can convert to public BlobItem + BlobItem publicItem = ModelHelper.populateBlobItem(item); + assertEquals(blobName, publicItem.getName()); + assertEquals(7L, (long) publicItem.getProperties().getContentLength()); + }).verifyComplete(); + } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsByHierarchyArrowBasic() { + // Upload blobs in a directory structure + Flux uploads = Flux.concat( + ccAsync.getBlobAsyncClient("dir/blob1") + .getBlockBlobAsyncClient() + .upload(DATA.getDefaultFlux(), DATA.getDefaultDataSize()), + ccAsync.getBlobAsyncClient("dir/blob2") + .getBlockBlobAsyncClient() + .upload(DATA.getDefaultFlux(), DATA.getDefaultDataSize()), + ccAsync.getBlobAsyncClient("topblob") + .getBlockBlobAsyncClient() + .upload(DATA.getDefaultFlux(), DATA.getDefaultDataSize())); + + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW); + + Mono> items + = uploads.then(ccAsync.listBlobsByHierarchy("/", options).collect(Collectors.toList())); + + StepVerifier.create(items).assertNext(list -> { + // Root level: one prefix "dir/" and one blob "topblob" + assertEquals(2, list.size()); + + BlobItem prefixItem = list.stream().filter(BlobItem::isPrefix).findFirst().orElse(null); + BlobItem blobItem = list.stream().filter(i -> !i.isPrefix()).findFirst().orElse(null); + + assertNotNull(prefixItem); + assertEquals("dir/", prefixItem.getName()); + assertTrue(prefixItem.isPrefix()); + + assertNotNull(blobItem); + assertEquals("topblob", blobItem.getName()); + assertFalse(blobItem.isPrefix()); + assertNotNull(blobItem.getProperties()); + assertEquals(DATA.getDefaultDataSize(), blobItem.getProperties().getContentLength()); + assertEquals(BlobType.BLOCK_BLOB, blobItem.getProperties().getBlobType()); + assertNotNull(blobItem.getProperties().getLastModified()); + assertNotNull(blobItem.getProperties().getETag()); + }).verifyComplete(); + } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsByHierarchyArrowWithMetadata() { + String blobName = generateBlobName(); + Map metadata = new HashMap<>(); + metadata.put("testkey", "testvalue"); + + Mono uploads = ccAsync.getBlobAsyncClient("dir/" + blobName) + .getBlockBlobAsyncClient() + .uploadWithResponse(DATA.getDefaultFlux(), DATA.getDefaultDataSize(), null, metadata, null, null, null) + .then(ccAsync.getBlobAsyncClient("topblob") + .getBlockBlobAsyncClient() + .upload(DATA.getDefaultFlux(), DATA.getDefaultDataSize())); + + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW) + .setPrefix("dir/") + .setDetails(new BlobListDetails().setRetrieveMetadata(true)); + + StepVerifier.create(uploads.thenMany(ccAsync.listBlobsByHierarchy("/", options))).assertNext(item -> { + assertFalse(item.isPrefix()); + assertNotNull(item.getMetadata()); + assertEquals("testvalue", item.getMetadata().get("testkey")); + }).verifyComplete(); + } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsByHierarchyArrowPagination() { + // Upload blobs across multiple directories + Flux uploads = Flux.concat( + Flux.range(0, 3) + .flatMap(i -> ccAsync.getBlobAsyncClient("dir" + i + "/blob") + .getBlockBlobAsyncClient() + .upload(DATA.getDefaultFlux(), DATA.getDefaultDataSize())), + ccAsync.getBlobAsyncClient("topblob") + .getBlockBlobAsyncClient() + .upload(DATA.getDefaultFlux(), DATA.getDefaultDataSize())); + + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW) + .setMaxResultsPerPage(1); + + Mono> result + = uploads.then(ccAsync.listBlobsByHierarchy("/", options).byPage().doOnNext(page -> { + assertTrue(page.getValue().size() <= 1); + }).flatMap(page -> Flux.fromIterable(page.getValue())).collectList()); + + // 3 prefixes + 1 blob = 4 items + StepVerifier.create(result).assertNext(allItems -> assertEquals(4, allItems.size())).verifyComplete(); + } + + @Test + @RequiredServiceVersion(clazz = BlobServiceVersion.class, min = "2026-06-06") + public void listBlobsArrowWithTags() { + // Upload a blob and set tags + String blobName = generateBlobName(); + BlockBlobAsyncClient bc = ccAsync.getBlobAsyncClient(blobName).getBlockBlobAsyncClient(); + + Map tags = new HashMap<>(); + tags.put("tagkey", "tagvalue"); + + ListBlobsOptions options + = new ListBlobsOptions().setStorageResponseSerializationFormat(StorageResponseSerializationFormat.ARROW) + .setDetails(new BlobListDetails().setRetrieveTags(true)); + + Mono upload = bc.upload(DATA.getDefaultFlux(), DATA.getDefaultDataSize()) + .then(ccAsync.getBlobAsyncClient(blobName).setTags(tags)); + + StepVerifier.create(upload.thenMany(ccAsync.listBlobs(options, null))).assertNext(item -> { + assertEquals(blobName, item.getName()); + assertNotNull(item.getTags()); + assertEquals("tagvalue", item.getTags().get("tagkey")); + }).verifyComplete(); + } } diff --git a/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/implementation/util/ArrowBlobListDeserializerTests.java b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/implementation/util/ArrowBlobListDeserializerTests.java new file mode 100644 index 000000000000..e23bf2cbfae0 --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/implementation/util/ArrowBlobListDeserializerTests.java @@ -0,0 +1,34 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util; + +import com.azure.storage.blob.implementation.models.BlobListArrowParseException; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.nio.charset.StandardCharsets; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class ArrowBlobListDeserializerTests { + @Test + public void parseNullStreamFailsFast() { + BlobListArrowParseException exception + = assertThrows(BlobListArrowParseException.class, () -> ArrowBlobListDeserializer.deserialize(null)); + + assertTrue(exception.getMessage().startsWith("ListBlobs Arrow parse failure:")); + } + + @Test + public void parseInvalidPayloadFailsFast() { + ByteArrayInputStream invalidPayload + = new ByteArrayInputStream("not-an-arrow-stream".getBytes(StandardCharsets.UTF_8)); + + BlobListArrowParseException exception = assertThrows(BlobListArrowParseException.class, + () -> ArrowBlobListDeserializer.deserialize(invalidPayload)); + + assertTrue(exception.getMessage().startsWith("ListBlobs Arrow parse failure:")); + } +} diff --git a/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/implementation/util/BlobListArrowAccessorParityTests.java b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/implementation/util/BlobListArrowAccessorParityTests.java new file mode 100644 index 000000000000..a59a2e215723 --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/implementation/util/BlobListArrowAccessorParityTests.java @@ -0,0 +1,298 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util; + +import com.azure.storage.blob.implementation.util.arrow.Buffer; +import com.azure.storage.blob.implementation.util.arrow.Field; +import com.azure.storage.blob.implementation.util.arrow.Int; +import com.azure.storage.blob.implementation.util.arrow.KeyValue; +import com.azure.storage.blob.implementation.util.arrow.Message; +import com.azure.storage.blob.implementation.util.arrow.MessageHeader; +import com.azure.storage.blob.implementation.util.arrow.RecordBatch; +import com.azure.storage.blob.implementation.util.arrow.Schema; +import com.azure.storage.blob.implementation.util.arrow.Timestamp; +import com.azure.storage.blob.implementation.util.arrow.Type; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.SmallIntVector; +import org.apache.arrow.vector.TimeStampSecVector; +import org.apache.arrow.vector.UInt1Vector; +import org.apache.arrow.vector.UInt4Vector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.complex.impl.UnionMapWriter; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.util.Text; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Differential drift detector for the vendored Arrow FlatBuffer accessors. + *

+ * This test builds a real Arrow IPC stream with the official {@code arrow-vector} writer, then decodes the same bytes + * twice: once with Apache Arrow's own generated {@code org.apache.arrow.flatbuf.*} accessors (the upstream source of + * truth, available at test scope via {@code arrow-vector} → {@code arrow-format}) and once with the vendored + * accessors in {@code com.azure.storage.blob.implementation.util.arrow}. It asserts every accessor the ListBlobs reader + * relies on returns identical results from both implementations. + *

+ * Why this exists: the vendored accessors hardcode FlatBuffer vtable offsets and struct sizes taken + * from a specific Arrow schema revision. When the Arrow test dependency is upgraded (for example to 19.x), re-running + * this test re-validates the vendored copy against the new upstream: if Arrow renumbers a field, resizes a struct, or + * changes an accessor's semantics in a way that affects how ListBlobs payloads are parsed, this test fails and points + * at the exact accessor that diverged, so the vendored copy can be brought back in sync. A compatible upgrade that does + * not touch these structures leaves the test passing (no false positives). + */ +public class BlobListArrowAccessorParityTests { + + private static final int CONTINUATION_MARKER = 0xFFFFFFFF; + + @Test + public void vendoredAccessorsMatchArrowAccessors() throws Exception { + byte[] stream; + try (BufferAllocator allocator = new RootAllocator()) { + stream = buildRepresentativePayload(allocator); + } + + List messages = extractMessageMetadata(stream); + boolean sawSchema = false; + boolean sawRecordBatch = false; + + for (byte[] metadata : messages) { + org.apache.arrow.flatbuf.Message arrowMessage + = org.apache.arrow.flatbuf.Message.getRootAsMessage(littleEndian(metadata)); + Message vendoredMessage = Message.getRootAsMessage(littleEndian(metadata)); + + assertEquals(arrowMessage.headerType(), vendoredMessage.headerType(), "headerType"); + assertEquals(arrowMessage.bodyLength(), vendoredMessage.bodyLength(), "bodyLength"); + + if (vendoredMessage.headerType() == MessageHeader.SCHEMA) { + sawSchema = true; + org.apache.arrow.flatbuf.Schema arrowSchema + = (org.apache.arrow.flatbuf.Schema) arrowMessage.header(new org.apache.arrow.flatbuf.Schema()); + Schema vendoredSchema = (Schema) vendoredMessage.header(new Schema()); + assertNotNull(arrowSchema, "arrow schema header"); + assertNotNull(vendoredSchema, "vendored schema header"); + compareSchema(arrowSchema, vendoredSchema); + } else if (vendoredMessage.headerType() == MessageHeader.RECORD_BATCH) { + sawRecordBatch = true; + org.apache.arrow.flatbuf.RecordBatch arrowBatch = (org.apache.arrow.flatbuf.RecordBatch) arrowMessage + .header(new org.apache.arrow.flatbuf.RecordBatch()); + RecordBatch vendoredBatch = (RecordBatch) vendoredMessage.header(new RecordBatch()); + assertNotNull(arrowBatch, "arrow record batch header"); + assertNotNull(vendoredBatch, "vendored record batch header"); + compareRecordBatch(arrowBatch, vendoredBatch); + } + } + + assertTrue(sawSchema, "expected a Schema message in the stream"); + assertTrue(sawRecordBatch, "expected a RecordBatch message in the stream"); + } + + private static void compareSchema(org.apache.arrow.flatbuf.Schema arrow, Schema vendored) { + assertEquals(arrow.endianness(), vendored.endianness(), "schema.endianness"); + assertEquals(arrow.fieldsLength(), vendored.fieldsLength(), "schema.fieldsLength"); + for (int i = 0; i < arrow.fieldsLength(); i++) { + compareField(arrow.fields(i), vendored.fields(i)); + } + assertEquals(arrow.customMetadataLength(), vendored.customMetadataLength(), "schema.customMetadataLength"); + for (int i = 0; i < arrow.customMetadataLength(); i++) { + org.apache.arrow.flatbuf.KeyValue arrowKv = arrow.customMetadata(i); + KeyValue vendoredKv = vendored.customMetadata(i); + assertEquals(arrowKv.key(), vendoredKv.key(), "keyValue.key"); + assertEquals(arrowKv.value(), vendoredKv.value(), "keyValue.value"); + } + } + + private static void compareField(org.apache.arrow.flatbuf.Field arrow, Field vendored) { + assertEquals(arrow.name(), vendored.name(), "field.name"); + assertEquals(arrow.typeType(), vendored.typeType(), "field.typeType for " + vendored.name()); + assertEquals(arrow.childrenLength(), vendored.childrenLength(), "field.childrenLength for " + vendored.name()); + + if (vendored.typeType() == Type.INT) { + org.apache.arrow.flatbuf.Int arrowInt + = (org.apache.arrow.flatbuf.Int) arrow.type(new org.apache.arrow.flatbuf.Int()); + Int vendoredInt = (Int) vendored.type(new Int()); + assertNotNull(arrowInt, "arrow Int type for " + vendored.name()); + assertNotNull(vendoredInt, "vendored Int type for " + vendored.name()); + assertEquals(arrowInt.bitWidth(), vendoredInt.bitWidth(), "int.bitWidth for " + vendored.name()); + assertEquals(arrowInt.isSigned(), vendoredInt.isSigned(), "int.isSigned for " + vendored.name()); + } else if (vendored.typeType() == Type.TIMESTAMP) { + org.apache.arrow.flatbuf.Timestamp arrowTs + = (org.apache.arrow.flatbuf.Timestamp) arrow.type(new org.apache.arrow.flatbuf.Timestamp()); + Timestamp vendoredTs = (Timestamp) vendored.type(new Timestamp()); + assertNotNull(arrowTs, "arrow Timestamp type for " + vendored.name()); + assertNotNull(vendoredTs, "vendored Timestamp type for " + vendored.name()); + assertEquals(arrowTs.unit(), vendoredTs.unit(), "timestamp.unit for " + vendored.name()); + } + + for (int i = 0; i < arrow.childrenLength(); i++) { + compareField(arrow.children(i), vendored.children(i)); + } + } + + private static void compareRecordBatch(org.apache.arrow.flatbuf.RecordBatch arrow, RecordBatch vendored) { + assertEquals(arrow.length(), vendored.length(), "recordBatch.length"); + + assertEquals(arrow.nodesLength(), vendored.nodesLength(), "recordBatch.nodesLength"); + for (int i = 0; i < arrow.nodesLength(); i++) { + assertEquals(arrow.nodes(i).length(), vendored.nodes(i).length(), "fieldNode.length at " + i); + } + + assertEquals(arrow.buffersLength(), vendored.buffersLength(), "recordBatch.buffersLength"); + for (int i = 0; i < arrow.buffersLength(); i++) { + Buffer vendoredBuffer = vendored.buffers(i); + assertEquals(arrow.buffers(i).offset(), vendoredBuffer.offset(), "buffer.offset at " + i); + assertEquals(arrow.buffers(i).length(), vendoredBuffer.length(), "buffer.length at " + i); + } + + assertEquals(arrow.compression() == null, vendored.compression() == null, "recordBatch.compression presence"); + } + + // region helpers + + private static ByteBuffer littleEndian(byte[] metadata) { + return ByteBuffer.wrap(metadata).order(ByteOrder.LITTLE_ENDIAN); + } + + /** + * Splits an Arrow IPC stream into the metadata FlatBuffer of each encapsulated message, mirroring the framing the + * production reader performs (continuation marker + metadata length prefix, body skipped). + */ + private static List extractMessageMetadata(byte[] stream) { + List messages = new ArrayList<>(); + ByteBuffer buffer = ByteBuffer.wrap(stream).order(ByteOrder.LITTLE_ENDIAN); + int pos = 0; + int length = stream.length; + while (pos + 4 <= length) { + int marker = buffer.getInt(pos); + pos += 4; + + int metadataLength; + if (marker == CONTINUATION_MARKER) { + if (pos + 4 > length) { + break; + } + metadataLength = buffer.getInt(pos); + pos += 4; + } else { + metadataLength = marker; + } + + if (metadataLength == 0) { + break; + } + + byte[] metadata = new byte[metadataLength]; + System.arraycopy(stream, pos, metadata, 0, metadataLength); + messages.add(metadata); + pos += metadataLength; + + long bodyLength = Message.getRootAsMessage(littleEndian(metadata)).bodyLength(); + pos += (int) bodyLength; + } + return messages; + } + + /** + * Builds an Arrow IPC stream with a representative ListBlobs schema (string; signed and unsigned integers of + * several bit widths; boolean; second-precision timestamp; and a map<string,string> column) plus schema-level + * metadata, so the differential comparison exercises every vendored accessor — including {@code Int.bitWidth} + * / {@code Int.isSigned} across multiple widths and both signedness values, and the nested map → struct → + * key/value fields. + */ + private static byte[] buildRepresentativePayload(BufferAllocator allocator) throws Exception { + VarCharVector name = new VarCharVector("Name", allocator); + UInt1Vector uint8 = new UInt1Vector("U8", allocator); + SmallIntVector int16 = new SmallIntVector("I16", allocator); + UInt4Vector uint32 = new UInt4Vector("U32", allocator); + BigIntVector contentLength = new BigIntVector("Content-Length", allocator); + BitVector deleted = new BitVector("Deleted", allocator); + TimeStampSecVector creationTime = new TimeStampSecVector("Creation-Time", allocator); + MapVector metadata = MapVector.empty("Metadata", allocator, false); + + name.allocateNew(); + uint8.allocateNew(); + int16.allocateNew(); + uint32.allocateNew(); + contentLength.allocateNew(); + deleted.allocateNew(); + creationTime.allocateNew(); + + name.setSafe(0, "blob1".getBytes(StandardCharsets.UTF_8)); + uint8.setSafe(0, 200); + int16.setSafe(0, -5); + uint32.setSafe(0, 42); + contentLength.setSafe(0, 7L); + deleted.setSafe(0, 0); + creationTime.setSafe(0, 1000L); + + name.setValueCount(1); + uint8.setValueCount(1); + int16.setValueCount(1); + uint32.setValueCount(1); + contentLength.setValueCount(1); + deleted.setValueCount(1); + creationTime.setValueCount(1); + + UnionMapWriter mapWriter = metadata.getWriter(); + mapWriter.setPosition(0); + mapWriter.startMap(); + mapWriter.startEntry(); + mapWriter.key().varChar().writeVarChar(new Text("k1")); + mapWriter.value().varChar().writeVarChar(new Text("v1")); + mapWriter.endEntry(); + mapWriter.endMap(); + metadata.setValueCount(1); + + List vectors = new ArrayList<>(); + vectors.add(name); + vectors.add(uint8); + vectors.add(int16); + vectors.add(uint32); + vectors.add(contentLength); + vectors.add(deleted); + vectors.add(creationTime); + vectors.add(metadata); + + List fields = new ArrayList<>(); + for (FieldVector vector : vectors) { + fields.add(vector.getField()); + } + + Map schemaMetadata = new LinkedHashMap<>(); + schemaMetadata.put("NextMarker", "nextPage"); + schemaMetadata.put("NumberOfRecords", "1"); + org.apache.arrow.vector.types.pojo.Schema schema + = new org.apache.arrow.vector.types.pojo.Schema(fields, schemaMetadata); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try (VectorSchemaRoot root = new VectorSchemaRoot(schema, vectors, 1); + ArrowStreamWriter writer = new ArrowStreamWriter(root, null, out)) { + writer.start(); + writer.writeBatch(); + writer.end(); + } + return out.toByteArray(); + } + + //endregion +} + + diff --git a/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/implementation/util/BlobListArrowFlatbufConstantsTest.java b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/implementation/util/BlobListArrowFlatbufConstantsTest.java new file mode 100644 index 000000000000..3ed329140545 --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/implementation/util/BlobListArrowFlatbufConstantsTest.java @@ -0,0 +1,114 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util; + +import com.azure.storage.blob.implementation.util.arrow.Endianness; +import com.azure.storage.blob.implementation.util.arrow.MessageHeader; +import com.azure.storage.blob.implementation.util.arrow.TimeUnit; +import com.azure.storage.blob.implementation.util.arrow.Type; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * Fidelity tests that pin the vendored Arrow FlatBuffer metadata constants + * (in {@code com.azure.storage.blob.implementation.util.arrow}) to the values defined by the official + * {@code org.apache.arrow.flatbuf} classes shipped in {@code arrow-format} (pulled in transitively at test scope via + * {@code arrow-vector}). + *

+ * {@link BlobListArrowStreamReaderTests} validates decoding behavior against payloads written by the real + * Arrow writer, but only exercises the enum values that a representative ListBlobs response happens to use. These tests + * close that gap by asserting every enum ordinal and name our reader relies on matches Apache Arrow exactly, + * including values that live on error/rejection paths (for example dictionary batches). If a future {@code arrow} + * version bump renumbers or extends one of these enums, these tests fail loudly so the vendored copy can be reviewed. + */ +public class BlobListArrowFlatbufConstantsTest { + + @Test + public void messageHeaderUnionMatchesArrow() { + assertEquals(org.apache.arrow.flatbuf.MessageHeader.NONE, MessageHeader.NONE); + assertEquals(org.apache.arrow.flatbuf.MessageHeader.Schema, MessageHeader.SCHEMA); + assertEquals(org.apache.arrow.flatbuf.MessageHeader.DictionaryBatch, MessageHeader.DICTIONARY_BATCH); + assertEquals(org.apache.arrow.flatbuf.MessageHeader.RecordBatch, MessageHeader.RECORD_BATCH); + assertEquals(org.apache.arrow.flatbuf.MessageHeader.Tensor, MessageHeader.TENSOR); + assertEquals(org.apache.arrow.flatbuf.MessageHeader.SparseTensor, MessageHeader.SPARSE_TENSOR); + } + + @Test + public void endiannessMatchesArrow() { + assertEquals(org.apache.arrow.flatbuf.Endianness.Little, Endianness.LITTLE); + assertEquals(org.apache.arrow.flatbuf.Endianness.Big, Endianness.BIG); + } + + @Test + public void timeUnitMatchesArrow() { + assertEquals(org.apache.arrow.flatbuf.TimeUnit.SECOND, TimeUnit.SECOND); + assertEquals(org.apache.arrow.flatbuf.TimeUnit.MILLISECOND, TimeUnit.MILLISECOND); + assertEquals(org.apache.arrow.flatbuf.TimeUnit.MICROSECOND, TimeUnit.MICROSECOND); + assertEquals(org.apache.arrow.flatbuf.TimeUnit.NANOSECOND, TimeUnit.NANOSECOND); + + // The vendored name table must match Arrow's, element-for-element and in length. + assertEquals(org.apache.arrow.flatbuf.TimeUnit.names.length, namesLength(TimeUnit::name), + "Arrow TimeUnit enum changed size; review the vendored TimeUnit."); + for (int i = 0; i < org.apache.arrow.flatbuf.TimeUnit.names.length; i++) { + assertEquals(org.apache.arrow.flatbuf.TimeUnit.name(i), TimeUnit.name(i), + "TimeUnit name mismatch at ordinal " + i); + } + } + + @Test + public void typeUnionOrdinalsMatchArrow() { + assertEquals(org.apache.arrow.flatbuf.Type.NONE, Type.NONE); + assertEquals(org.apache.arrow.flatbuf.Type.Null, Type.NULL); + assertEquals(org.apache.arrow.flatbuf.Type.Int, Type.INT); + assertEquals(org.apache.arrow.flatbuf.Type.FloatingPoint, Type.FLOATING_POINT); + assertEquals(org.apache.arrow.flatbuf.Type.Binary, Type.BINARY); + assertEquals(org.apache.arrow.flatbuf.Type.Utf8, Type.UTF8); + assertEquals(org.apache.arrow.flatbuf.Type.Bool, Type.BOOL); + assertEquals(org.apache.arrow.flatbuf.Type.Decimal, Type.DECIMAL); + assertEquals(org.apache.arrow.flatbuf.Type.Date, Type.DATE); + assertEquals(org.apache.arrow.flatbuf.Type.Time, Type.TIME); + assertEquals(org.apache.arrow.flatbuf.Type.Timestamp, Type.TIMESTAMP); + assertEquals(org.apache.arrow.flatbuf.Type.Interval, Type.INTERVAL); + assertEquals(org.apache.arrow.flatbuf.Type.List, Type.LIST); + assertEquals(org.apache.arrow.flatbuf.Type.Struct_, Type.STRUCT); + assertEquals(org.apache.arrow.flatbuf.Type.Union, Type.UNION); + assertEquals(org.apache.arrow.flatbuf.Type.FixedSizeBinary, Type.FIXED_SIZE_BINARY); + assertEquals(org.apache.arrow.flatbuf.Type.FixedSizeList, Type.FIXED_SIZE_LIST); + assertEquals(org.apache.arrow.flatbuf.Type.Map, Type.MAP); + assertEquals(org.apache.arrow.flatbuf.Type.Duration, Type.DURATION); + assertEquals(org.apache.arrow.flatbuf.Type.LargeBinary, Type.LARGE_BINARY); + assertEquals(org.apache.arrow.flatbuf.Type.LargeUtf8, Type.LARGE_UTF8); + assertEquals(org.apache.arrow.flatbuf.Type.LargeList, Type.LARGE_LIST); + assertEquals(org.apache.arrow.flatbuf.Type.RunEndEncoded, Type.RUN_END_ENCODED); + assertEquals(org.apache.arrow.flatbuf.Type.BinaryView, Type.BINARY_VIEW); + assertEquals(org.apache.arrow.flatbuf.Type.Utf8View, Type.UTF8_VIEW); + assertEquals(org.apache.arrow.flatbuf.Type.ListView, Type.LIST_VIEW); + assertEquals(org.apache.arrow.flatbuf.Type.LargeListView, Type.LARGE_LIST_VIEW); + } + + @Test + public void typeUnionNamesMatchArrow() { + // A length mismatch means Arrow added/removed a Type; the vendored Type (and reader's switch) must be reviewed. + assertEquals(org.apache.arrow.flatbuf.Type.names.length, namesLength(Type::name), + "Arrow Type enum changed size; review the vendored Type and the reader's type switch."); + for (int i = 0; i < org.apache.arrow.flatbuf.Type.names.length; i++) { + assertEquals(org.apache.arrow.flatbuf.Type.name(i), Type.name(i), "Type name mismatch at ordinal " + i); + } + } + + /** Counts a vendored name table's length by probing for its array bound. */ + private static int namesLength(java.util.function.IntFunction nameFn) { + int count = 0; + while (true) { + try { + nameFn.apply(count); + count++; + } catch (ArrayIndexOutOfBoundsException e) { + return count; + } + } + } +} + diff --git a/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/implementation/util/BlobListArrowStreamReaderRejectionTests.java b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/implementation/util/BlobListArrowStreamReaderRejectionTests.java new file mode 100644 index 000000000000..f91a9e56ab50 --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/implementation/util/BlobListArrowStreamReaderRejectionTests.java @@ -0,0 +1,182 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util; + +import com.azure.storage.blob.implementation.models.BlobListArrowParseException; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.TimeStampMilliVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.complex.impl.UnionMapWriter; +import org.apache.arrow.vector.dictionary.Dictionary; +import org.apache.arrow.vector.dictionary.DictionaryEncoder; +import org.apache.arrow.vector.dictionary.DictionaryProvider; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.pojo.DictionaryEncoding; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.util.Text; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Parity tests for Arrow IPC content the ListBlobs reader intentionally rejects or treats as an error. Where possible + * the payloads are produced with the official {@code arrow-vector} writer (test scope) so the vendored reader's + * rejection paths are validated against genuine Arrow output rather than hand-crafted bytes. The remaining cases cover + * malformed/edge inputs that the writer cannot produce (null and empty streams). + *

+ * Behavioral parity for supported content lives in {@link BlobListArrowStreamReaderTests}; enum/ordinal + * fidelity lives in {@link BlobListArrowFlatbufConstantsTest}. + */ +public class BlobListArrowStreamReaderRejectionTests { + + @Test + public void rejectsNullStream() { + BlobListArrowParseException ex = assertThrows(BlobListArrowParseException.class, + () -> ArrowBlobListDeserializer.deserialize(null)); + assertTrue(ex.getMessage().contains("input stream is null"), "Unexpected message: " + ex.getMessage()); + } + + @Test + public void rejectsStreamWithNoSchema() { + assertRejected(new byte[0], "stream contained no schema"); + } + + @Test + public void rejectsDictionaryEncodedStreams() throws Exception { + byte[] payload; + try (BufferAllocator allocator = new RootAllocator()) { + payload = buildDictionaryEncodedPayload(allocator); + } + assertRejected(payload, "dictionary-encoded streams are not supported"); + } + + @Test + public void rejectsUnsupportedColumnType() throws Exception { + byte[] payload; + try (BufferAllocator allocator = new RootAllocator()) { + Float8Vector score = new Float8Vector("Score", allocator); + score.allocateNew(); + score.setSafe(0, 1.5); + score.setValueCount(1); + payload = writeBatch(allocator, Collections.singletonList(score), 1); + } + assertRejected(payload, "unsupported Arrow type 'FloatingPoint'"); + } + + @Test + public void rejectsUnsupportedTimestampUnit() throws Exception { + byte[] payload; + try (BufferAllocator allocator = new RootAllocator()) { + TimeStampMilliVector creationTime = new TimeStampMilliVector("Creation-Time", allocator); + creationTime.allocateNew(); + creationTime.setSafe(0, 1000L); + creationTime.setValueCount(1); + payload = writeBatch(allocator, Collections.singletonList(creationTime), 1); + } + assertRejected(payload, "unsupported timestamp unit 'MILLISECOND'"); + } + + @Test + public void rejectsMapWithNonStringValues() throws Exception { + byte[] payload; + try (BufferAllocator allocator = new RootAllocator()) { + MapVector metadata = MapVector.empty("Metadata", allocator, false); + UnionMapWriter mapWriter = metadata.getWriter(); + mapWriter.setPosition(0); + mapWriter.startMap(); + mapWriter.startEntry(); + mapWriter.key().varChar().writeVarChar(new Text("k1")); + mapWriter.value().bigInt().writeBigInt(42L); + mapWriter.endEntry(); + mapWriter.endMap(); + metadata.setValueCount(1); + payload = writeBatch(allocator, Collections.singletonList(metadata), 1); + } + assertRejected(payload, "map entries must be string keys and values"); + } + + // region helpers + + private static void assertRejected(byte[] payload, String expectedMessageFragment) { + InputStream stream = new ByteArrayInputStream(payload); + BlobListArrowParseException ex + = assertThrows(BlobListArrowParseException.class, () -> ArrowBlobListDeserializer.deserialize(stream)); + assertTrue(ex.getMessage().contains(expectedMessageFragment), "Unexpected message: " + ex.getMessage()); + } + + /** + * Writes a single-batch Arrow IPC stream from the supplied vectors. The vectors are owned by (and closed with) the + * returned {@link VectorSchemaRoot}. + */ + private static byte[] writeBatch(BufferAllocator allocator, List vectors, int rowCount) + throws Exception { + List fields = new ArrayList<>(vectors.size()); + for (FieldVector vector : vectors) { + fields.add(vector.getField()); + } + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try (VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, rowCount); + ArrowStreamWriter writer = new ArrowStreamWriter(root, null, out)) { + writer.start(); + writer.writeBatch(); + writer.end(); + } + return out.toByteArray(); + } + + /** + * Builds an Arrow IPC stream whose single column is dictionary-encoded, which forces the official writer to emit a + * {@code DictionaryBatch} message ahead of the record batch. + */ + private static byte[] buildDictionaryEncodedPayload(BufferAllocator allocator) throws Exception { + VarCharVector dictVector = new VarCharVector("Name-dict", allocator); + VarCharVector name = new VarCharVector("Name", allocator); + try { + dictVector.allocateNew(); + dictVector.setSafe(0, "blob1".getBytes(StandardCharsets.UTF_8)); + dictVector.setSafe(1, "blob2".getBytes(StandardCharsets.UTF_8)); + dictVector.setValueCount(2); + Dictionary dictionary = new Dictionary(dictVector, new DictionaryEncoding(1L, false, null)); + + name.allocateNew(); + name.setSafe(0, "blob1".getBytes(StandardCharsets.UTF_8)); + name.setSafe(1, "blob2".getBytes(StandardCharsets.UTF_8)); + name.setValueCount(2); + + DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider(); + provider.put(dictionary); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + FieldVector encoded = (FieldVector) DictionaryEncoder.encode(name, dictionary); + try (VectorSchemaRoot root = new VectorSchemaRoot(Collections.singletonList(encoded.getField()), + Collections.singletonList(encoded), 2); + ArrowStreamWriter writer = new ArrowStreamWriter(root, provider, out)) { + writer.start(); + writer.writeBatch(); + writer.end(); + } + return out.toByteArray(); + } finally { + name.close(); + dictVector.close(); + } + } + + //endregion +} + diff --git a/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/implementation/util/BlobListArrowStreamReaderTests.java b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/implementation/util/BlobListArrowStreamReaderTests.java new file mode 100644 index 000000000000..b03027eeb841 --- /dev/null +++ b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/implementation/util/BlobListArrowStreamReaderTests.java @@ -0,0 +1,184 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.storage.blob.implementation.util; + +import com.azure.storage.blob.implementation.models.BlobItemInternal; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.TimeStampSecVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.complex.impl.UnionMapWriter; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.arrow.vector.util.Text; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Parity tests that build a real Arrow IPC payload with the official {@code arrow-vector} writer and validate that the + * internal {@link BlobListArrowStreamReader} / {@link ArrowBlobListDeserializer} decode it identically. This proves the + * custom reader has the same implementation as the Apache Arrow parser + */ +public class BlobListArrowStreamReaderTests { + + @Test + public void parsesRealArrowPayload() throws Exception { + byte[] payload; + try (BufferAllocator allocator = new RootAllocator()) { + payload = buildPayload(allocator); + } + + ArrowBlobListDeserializer.ArrowListBlobsResult result + = ArrowBlobListDeserializer.deserialize(new ByteArrayInputStream(payload)); + + // Schema metadata + assertEquals("nextPage", result.getNextMarker()); + assertEquals(Integer.valueOf(2), result.getNumberOfRecords()); + + // Two rows: one blob, one prefix + List items = result.getBlobItems(); + assertEquals(2, items.size()); + + BlobItemInternal blob = items.get(0); + assertNotNull(blob.getName()); + assertEquals("blob1", blob.getName().getContent()); + assertNull(blob.isPrefix()); + assertEquals(Boolean.FALSE, blob.isDeleted()); + assertNotNull(blob.getProperties()); + assertEquals(7L, (long) blob.getProperties().getContentLength()); + assertEquals("application/octet-stream", blob.getProperties().getContentType()); + assertNotNull(blob.getProperties().getCreationTime()); + assertEquals(1000L, blob.getProperties().getCreationTime().toEpochSecond()); + + Map metadata = blob.getMetadata(); + assertNotNull(metadata); + assertEquals("v1", metadata.get("k1")); + assertEquals("v2", metadata.get("k2")); + + BlobItemInternal prefix = items.get(1); + assertNotNull(prefix.getName()); + assertEquals("dir/", prefix.getName().getContent()); + assertTrue(prefix.isPrefix()); + } + + @Test + public void parsesEmptyMetadataAsNull() throws Exception { + byte[] payload; + try (BufferAllocator allocator = new RootAllocator()) { + payload = buildPayload(allocator); + } + + ArrowBlobListDeserializer.ArrowListBlobsResult result + = ArrowBlobListDeserializer.deserialize(new ByteArrayInputStream(payload)); + // Row 1 (prefix) had no metadata entries; ensure prefix path doesn't surface an (empty) metadata map. + assertNull(result.getBlobItems().get(1).getMetadata()); + assertFalse(result.getBlobItems().isEmpty()); + } + + /** + * Builds an Arrow IPC stream with a representative ListBlobs schema: string, integer, boolean, second-precision + * timestamp, content-type string and a map<string,string> metadata column, plus schema-level NextMarker and + * NumberOfRecords metadata. + */ + private static byte[] buildPayload(BufferAllocator allocator) throws Exception { + VarCharVector name = new VarCharVector("Name", allocator); + VarCharVector resourceType = new VarCharVector("ResourceType", allocator); + BigIntVector contentLength = new BigIntVector("Content-Length", allocator); + VarCharVector contentType = new VarCharVector("Content-Type", allocator); + BitVector deleted = new BitVector("Deleted", allocator); + TimeStampSecVector creationTime = new TimeStampSecVector("Creation-Time", allocator); + MapVector metadata = MapVector.empty("Metadata", allocator, false); + + name.allocateNew(); + resourceType.allocateNew(); + contentLength.allocateNew(); + contentType.allocateNew(); + deleted.allocateNew(); + creationTime.allocateNew(); + + // Row 0: a real blob. + name.setSafe(0, "blob1".getBytes(StandardCharsets.UTF_8)); + // resourceType[0] left null -> not a prefix. + contentLength.setSafe(0, 7L); + contentType.setSafe(0, "application/octet-stream".getBytes(StandardCharsets.UTF_8)); + deleted.setSafe(0, 0); + creationTime.setSafe(0, 1000L); + + // Row 1: a virtual directory (prefix). + name.setSafe(1, "dir/".getBytes(StandardCharsets.UTF_8)); + resourceType.setSafe(1, "blobprefix".getBytes(StandardCharsets.UTF_8)); + // remaining columns null for the prefix row. + + name.setValueCount(2); + resourceType.setValueCount(2); + contentLength.setValueCount(2); + contentType.setValueCount(2); + deleted.setValueCount(2); + creationTime.setValueCount(2); + + UnionMapWriter mapWriter = metadata.getWriter(); + mapWriter.setPosition(0); + mapWriter.startMap(); + writeEntry(mapWriter, "k1", "v1"); + writeEntry(mapWriter, "k2", "v2"); + mapWriter.endMap(); + // Row 1 metadata left null. + metadata.setValueCount(2); + + List vectors = new ArrayList<>(); + vectors.add(name); + vectors.add(resourceType); + vectors.add(contentLength); + vectors.add(contentType); + vectors.add(deleted); + vectors.add(creationTime); + vectors.add(metadata); + + List fields = new ArrayList<>(); + for (FieldVector vector : vectors) { + fields.add(vector.getField()); + } + + Map schemaMetadata = new LinkedHashMap<>(); + schemaMetadata.put("NextMarker", "nextPage"); + schemaMetadata.put("NumberOfRecords", "2"); + Schema schema = new Schema(fields, schemaMetadata); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try (VectorSchemaRoot root = new VectorSchemaRoot(schema, vectors, 2); + ArrowStreamWriter writer = new ArrowStreamWriter(root, null, out)) { + writer.start(); + writer.writeBatch(); + writer.end(); + } + + return out.toByteArray(); + } + + private static void writeEntry(UnionMapWriter mapWriter, String key, String value) { + mapWriter.startEntry(); + mapWriter.key().varChar().writeVarChar(new Text(key)); + mapWriter.value().varChar().writeVarChar(new Text(value)); + mapWriter.endEntry(); + } +} diff --git a/sdk/storage/azure-storage-blob/swagger/README.md b/sdk/storage/azure-storage-blob/swagger/README.md index 98afe0c616dc..0019d026a0d0 100644 --- a/sdk/storage/azure-storage-blob/swagger/README.md +++ b/sdk/storage/azure-storage-blob/swagger/README.md @@ -16,7 +16,7 @@ autorest ### Code generation settings ``` yaml use: '@autorest/java@4.1.63' -input-file: https://raw.githubusercontent.com/seanmcc-msft/azure-rest-api-specs/eb29a830edf5db50758e7d044160c7f18077f7f7/specification/storage/data-plane/Microsoft.BlobStorage/stable/2026-10-06/blob.json +input-file: https://raw.githubusercontent.com/nickliu-msft/azure-rest-api-specs/f85584d452061985a5fc21a67b8fc0b46b75188a/specification/storage/data-plane/Microsoft.BlobStorage/stable/2026-10-06/blob.json java: true output-folder: ../ namespace: com.azure.storage.blob @@ -591,6 +591,24 @@ directive: delete $["x-ms-pageable"]; ``` +### Delete Container_ListBlobFlatSegment_ApacheArrow x-ms-pageable as response is raw Arrow stream +``` yaml +directive: +- from: swagger-document + where: $["x-ms-paths"]["/{containerName}?restype=container&comp=list&flat&arrow"].get + transform: > + delete $["x-ms-pageable"]; +``` + +### Delete Container_ListBlobHierarchySegment_ApacheArrow x-ms-pageable as response is raw Arrow stream +``` yaml +directive: +- from: swagger-document + where: $["x-ms-paths"]["/{containerName}?restype=container&comp=list&hierarchy&arrow"].get + transform: > + delete $["x-ms-pageable"]; +``` + ### BlobDeleteType expandable string enum ``` yaml directive: @@ -708,4 +726,3 @@ directive: ]; ``` - diff --git a/sdk/storage/azure-storage-common/src/test-shared/java/com/azure/storage/common/test/shared/TestEnvironment.java b/sdk/storage/azure-storage-common/src/test-shared/java/com/azure/storage/common/test/shared/TestEnvironment.java index 5d9bc1c9dfac..6134a8dbf9ee 100644 --- a/sdk/storage/azure-storage-common/src/test-shared/java/com/azure/storage/common/test/shared/TestEnvironment.java +++ b/sdk/storage/azure-storage-common/src/test-shared/java/com/azure/storage/common/test/shared/TestEnvironment.java @@ -107,8 +107,8 @@ private static TestAccount readTestAccountFromEnvironment(String prefix, TestMod + "AccountKey=%s;EndpointSuffix=core.windows.net", name, key); } } - String blobEndpoint = String.format(SCHEME + "://%s.blob.core.windows.net", name); - String blobEndpointSecondary = String.format(SCHEME + "://%s-secondary.blob.core.windows.net", name); + String blobEndpoint = String.format(SCHEME + "://%s." + "blob."+ "preprod." +"core.windows.net", name); + String blobEndpointSecondary = String.format(SCHEME + "://%s-secondary." + "preprod." +"core.windows.net", name); String dataLakeEndpoint = String.format(SCHEME + "://%s.dfs.core.windows.net", name); String queueEndpoint = String.format(SCHEME + "://%s.queue.core.windows.net", name); String fileEndpoint = String.format(SCHEME + "://%s.file.core.windows.net", name);