Skip to content

Commit 73890b1

Browse files
authored
[Enhancement](sdk) Add java SDK as a subproject under sdk/java-doris-sdk (#61626)
### What problem does this PR solve? The previous pull request (#58404) supported the Go SDK; this time, a Java version will be added based on it.
1 parent 08203ba commit 73890b1

28 files changed

Lines changed: 2870 additions & 0 deletions

sdk/java-doris-sdk/README.md

Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
<!--
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing,
13+
software distributed under the License is distributed on an
14+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
KIND, either express or implied. See the License for the
16+
specific language governing permissions and limitations
17+
under the License.
18+
-->
19+
20+
# 🚀 Doris Java SDK
21+
22+
[![Java Version](https://img.shields.io/badge/Java-%3E%3D%208-blue.svg)](https://www.oracle.com/java/)
23+
[![Thread Safe](https://img.shields.io/badge/Thread%20Safe-✅-brightgreen.svg)](#-thread-safety)
24+
25+
A lightweight Java stream load client for Apache Doris — easy to use, high performance, and production-ready. Maintained by the Apache Doris core contributor team.
26+
27+
## ✨ Features
28+
29+
**Easy to Use**: Clean Builder API that encapsulates HTTP configuration, multi-format support, and intelligent retry logic.
30+
31+
**High Performance**: Built-in efficient concurrency and batch loading best practices — buffer once, retry multiple times, compress once for Gzip.
32+
33+
**Production Ready**: Battle-tested in large-scale, high-pressure production environments with full observability.
34+
35+
## 📦 Installation
36+
37+
> **Note**: This SDK has not yet been published to Maven Central. You need to build and install it locally first.
38+
39+
### Step 1: Build and install locally
40+
41+
```bash
42+
git clone https://github.com/apache/doris.git
43+
cd doris/sdk/java-doris-sdk
44+
mvn install -DskipTests
45+
```
46+
47+
### Step 2: Add the dependency to your `pom.xml`
48+
49+
```xml
50+
<dependency>
51+
<groupId>org.apache.doris</groupId>
52+
<artifactId>java-doris-sdk</artifactId>
53+
<version>1.0.0</version>
54+
</dependency>
55+
```
56+
57+
## 🚀 Quick Start
58+
59+
### CSV Load
60+
61+
```java
62+
DorisConfig config = DorisConfig.builder()
63+
.endpoints(Arrays.asList("http://127.0.0.1:8030"))
64+
.user("root")
65+
.password("password")
66+
.database("test_db")
67+
.table("users")
68+
.format(DorisConfig.defaultCsvFormat())
69+
.retry(DorisConfig.defaultRetry())
70+
.groupCommit(GroupCommitMode.ASYNC)
71+
.build();
72+
73+
try (DorisLoadClient client = DorisClient.newClient(config)) {
74+
String data = "1,Alice,25\n2,Bob,30\n3,Charlie,35";
75+
LoadResponse response = client.load(DorisClient.stringStream(data));
76+
77+
if (response.getStatus() == LoadResponse.Status.SUCCESS) {
78+
System.out.println("Loaded rows: " + response.getRespContent().getNumberLoadedRows());
79+
}
80+
}
81+
```
82+
83+
### JSON Load
84+
85+
```java
86+
DorisConfig config = DorisConfig.builder()
87+
.endpoints(Arrays.asList("http://127.0.0.1:8030"))
88+
.user("root")
89+
.password("password")
90+
.database("test_db")
91+
.table("users")
92+
.format(DorisConfig.defaultJsonFormat()) // JSON Lines format
93+
.retry(DorisConfig.defaultRetry())
94+
.groupCommit(GroupCommitMode.ASYNC)
95+
.build();
96+
97+
try (DorisLoadClient client = DorisClient.newClient(config)) {
98+
String jsonData = "{\"id\":1,\"name\":\"Alice\",\"age\":25}\n"
99+
+ "{\"id\":2,\"name\":\"Bob\",\"age\":30}\n"
100+
+ "{\"id\":3,\"name\":\"Charlie\",\"age\":35}\n";
101+
102+
LoadResponse response = client.load(DorisClient.stringStream(jsonData));
103+
}
104+
```
105+
106+
## 🛠️ Configuration
107+
108+
### Basic Configuration
109+
110+
```java
111+
DorisConfig config = DorisConfig.builder()
112+
// Required fields
113+
.endpoints(Arrays.asList(
114+
"http://fe1:8030",
115+
"http://fe2:8030" // Multiple FE nodes supported, randomly load-balanced
116+
))
117+
.user("your_username")
118+
.password("your_password")
119+
.database("your_database")
120+
.table("your_table")
121+
122+
// Optional fields
123+
.labelPrefix("my_app") // label prefix
124+
.label("custom_label_001") // custom label
125+
.format(DorisConfig.defaultCsvFormat())
126+
.retry(DorisConfig.defaultRetry())
127+
.groupCommit(GroupCommitMode.ASYNC)
128+
.options(new HashMap<String, String>() {{
129+
put("timeout", "3600");
130+
put("max_filter_ratio", "0.1");
131+
put("strict_mode", "true");
132+
}})
133+
.build();
134+
```
135+
136+
### Data Format
137+
138+
```java
139+
// 1. Use default formats (recommended)
140+
DorisConfig.defaultJsonFormat() // JSON Lines, read_json_by_line=true
141+
DorisConfig.defaultCsvFormat() // CSV, comma-separated, \n line delimiter
142+
143+
// 2. Custom JSON format
144+
new JsonFormat(JsonFormat.Type.OBJECT_LINE) // JSON Lines
145+
new JsonFormat(JsonFormat.Type.ARRAY) // JSON Array
146+
147+
// 3. Custom CSV format
148+
new CsvFormat("|", "\\n") // pipe-separated
149+
```
150+
151+
### Retry Configuration
152+
153+
```java
154+
// 1. Default retry (recommended)
155+
DorisConfig.defaultRetry() // 6 retries, total time limit 60s
156+
// Backoff sequence: 1s, 2s, 4s, 8s, 16s, 32s
157+
158+
// 2. Custom retry
159+
RetryConfig.builder()
160+
.maxRetryTimes(3) // max 3 retries
161+
.baseIntervalMs(2000) // base interval 2 seconds
162+
.maxTotalTimeMs(30000) // total time limit 30 seconds
163+
.build()
164+
165+
// 3. Disable retry
166+
.retry(null)
167+
```
168+
169+
### Group Commit Mode
170+
171+
```java
172+
GroupCommitMode.ASYNC // async mode, highest throughput
173+
GroupCommitMode.SYNC // sync mode, immediately visible after return
174+
GroupCommitMode.OFF // disabled, use traditional stream load
175+
```
176+
177+
> ⚠️ **Note**: When Group Commit is enabled, all label configuration is automatically ignored and a warning is logged.
178+
179+
### Gzip Compression
180+
181+
```java
182+
DorisConfig config = DorisConfig.builder()
183+
// ... other config
184+
.enableGzip(true) // SDK compresses the request body automatically
185+
.build();
186+
```
187+
188+
> The SDK transparently compresses the request body before sending and sets the `compress_type=gz` header automatically. Compression runs only once and is reused across retries.
189+
190+
## 🔄 Concurrent Usage
191+
192+
### Basic Concurrent Example
193+
194+
```java
195+
DorisLoadClient client = DorisClient.newClient(config); // thread-safe, share across threads
196+
197+
ExecutorService executor = Executors.newFixedThreadPool(10);
198+
for (int i = 0; i < 10; i++) {
199+
final int workerId = i;
200+
executor.submit(() -> {
201+
// Each thread uses its own independent data
202+
String data = generateWorkerData(workerId);
203+
LoadResponse response = client.load(DorisClient.stringStream(data));
204+
205+
if (response.getStatus() == LoadResponse.Status.SUCCESS) {
206+
System.out.printf("Worker %d loaded %d rows%n",
207+
workerId, response.getRespContent().getNumberLoadedRows());
208+
}
209+
});
210+
}
211+
```
212+
213+
### ⚠️ Thread Safety
214+
215+
```java
216+
// ✅ Correct: DorisLoadClient is thread-safe, share one instance across threads
217+
DorisLoadClient client = DorisClient.newClient(config);
218+
for (int i = 0; i < 10; i++) {
219+
executor.submit(() -> {
220+
String data = generateData(); // each thread has its own data
221+
client.load(DorisClient.stringStream(data));
222+
});
223+
}
224+
225+
// ❌ Wrong: never share the same InputStream across threads
226+
InputStream sharedStream = new FileInputStream("data.csv");
227+
for (int i = 0; i < 10; i++) {
228+
executor.submit(() -> client.load(sharedStream)); // concurrent reads on shared stream cause data corruption
229+
}
230+
```
231+
232+
## 📊 Response Handling
233+
234+
```java
235+
LoadResponse response = client.load(data);
236+
237+
if (response.getStatus() == LoadResponse.Status.SUCCESS) {
238+
RespContent resp = response.getRespContent();
239+
System.out.println("Load succeeded!");
240+
System.out.println("Loaded rows: " + resp.getNumberLoadedRows());
241+
System.out.println("Load bytes: " + resp.getLoadBytes());
242+
System.out.println("Load time (ms): " + resp.getLoadTimeMs());
243+
System.out.println("Label: " + resp.getLabel());
244+
} else {
245+
System.out.println("Load failed: " + response.getErrorMessage());
246+
247+
// Get detailed error info
248+
if (response.getRespContent().getErrorUrl() != null) {
249+
System.out.println("Error detail: " + response.getRespContent().getErrorUrl());
250+
}
251+
}
252+
```
253+
254+
## 🛠️ Utilities
255+
256+
### Stream Conversion Helpers
257+
258+
```java
259+
// String to InputStream
260+
InputStream stream = DorisClient.stringStream("1,Alice,25\n2,Bob,30");
261+
262+
// byte array to InputStream
263+
byte[] data = ...;
264+
InputStream stream = DorisClient.bytesStream(data);
265+
266+
// Serialize objects to JSON InputStream (uses Jackson)
267+
List<User> users = Arrays.asList(new User(1, "Alice"), new User(2, "Bob"));
268+
InputStream stream = DorisClient.jsonStream(users);
269+
```
270+
271+
### Default Config Builders
272+
273+
```java
274+
DorisConfig.defaultRetry() // 6 retries, 60s total time
275+
DorisConfig.defaultJsonFormat() // JSON Lines format
276+
DorisConfig.defaultCsvFormat() // standard CSV format
277+
278+
RetryConfig.builder()
279+
.maxRetryTimes(3)
280+
.baseIntervalMs(1000)
281+
.maxTotalTimeMs(30000)
282+
.build()
283+
```
284+
285+
## 📈 Production Examples
286+
287+
Full production-grade examples are available under `src/main/java/org/apache/doris/sdk/examples/`:
288+
289+
```bash
290+
# Build
291+
mvn package -DskipTests
292+
293+
# Run all examples
294+
java -cp target/java-doris-sdk-1.0.0.jar org.apache.doris.sdk.examples.ExamplesMain all
295+
296+
# Run individual examples
297+
java -cp target/java-doris-sdk-1.0.0.jar org.apache.doris.sdk.examples.ExamplesMain simple # basic JSON load
298+
java -cp target/java-doris-sdk-1.0.0.jar org.apache.doris.sdk.examples.ExamplesMain single # large batch load (100k records)
299+
java -cp target/java-doris-sdk-1.0.0.jar org.apache.doris.sdk.examples.ExamplesMain json # production JSON load (50k records)
300+
java -cp target/java-doris-sdk-1.0.0.jar org.apache.doris.sdk.examples.ExamplesMain concurrent # concurrent load (1M records, 10 threads)
301+
java -cp target/java-doris-sdk-1.0.0.jar org.apache.doris.sdk.examples.ExamplesMain gzip # gzip compressed load
302+
```
303+
304+
## 🤝 Contributing
305+
306+
Pull requests are welcome!
307+
308+
## 🙏 Acknowledgements
309+
310+
Maintained by the Apache Doris core contributor team.

sdk/java-doris-sdk/pom.xml

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
Licensed to the Apache Software Foundation (ASF) under one
4+
or more contributor license agreements. See the NOTICE file
5+
distributed with this work for additional information
6+
regarding copyright ownership. The ASF licenses this file
7+
to you under the Apache License, Version 2.0 (the
8+
"License"); you may not use this file except in compliance
9+
with the License. You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing,
14+
software distributed under the License is distributed on an
15+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
KIND, either express or implied. See the License for the
17+
specific language governing permissions and limitations
18+
under the License.
19+
-->
20+
<project xmlns="http://maven.apache.org/POM/4.0.0"
21+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
22+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
23+
<modelVersion>4.0.0</modelVersion>
24+
25+
<groupId>org.apache.doris</groupId>
26+
<artifactId>java-doris-sdk</artifactId>
27+
<version>1.0.0</version>
28+
<packaging>jar</packaging>
29+
30+
<name>Apache Doris Java SDK</name>
31+
<description>A lightweight Apache Doris Stream Load client (Java version)</description>
32+
33+
<properties>
34+
<maven.compiler.source>8</maven.compiler.source>
35+
<maven.compiler.target>8</maven.compiler.target>
36+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
37+
</properties>
38+
39+
<dependencies>
40+
<!-- HTTP client -->
41+
<dependency>
42+
<groupId>org.apache.httpcomponents</groupId>
43+
<artifactId>httpclient</artifactId>
44+
<version>4.5.13</version>
45+
</dependency>
46+
47+
<!-- JSON serialization -->
48+
<dependency>
49+
<groupId>com.fasterxml.jackson.core</groupId>
50+
<artifactId>jackson-databind</artifactId>
51+
<version>2.13.5</version>
52+
</dependency>
53+
54+
<!-- Logging facade -->
55+
<dependency>
56+
<groupId>org.slf4j</groupId>
57+
<artifactId>slf4j-log4j12</artifactId>
58+
<version>1.7.9</version>
59+
</dependency>
60+
61+
<!-- Test: JUnit 5 + Mockito -->
62+
<dependency>
63+
<groupId>org.junit.jupiter</groupId>
64+
<artifactId>junit-jupiter</artifactId>
65+
<version>5.10.2</version>
66+
<scope>test</scope>
67+
</dependency>
68+
<dependency>
69+
<groupId>org.mockito</groupId>
70+
<artifactId>mockito-core</artifactId>
71+
<version>4.11.0</version>
72+
<scope>test</scope>
73+
</dependency>
74+
</dependencies>
75+
76+
<build>
77+
<plugins>
78+
<plugin>
79+
<groupId>org.apache.maven.plugins</groupId>
80+
<artifactId>maven-surefire-plugin</artifactId>
81+
<version>3.2.5</version>
82+
</plugin>
83+
</plugins>
84+
</build>
85+
</project>

0 commit comments

Comments
 (0)