Skip to content

Commit 0bb0ef5

Browse files
authored
Implement CsvDataTableStore.getRows() so that it can read back data it has written (#7256)
1 parent d8adf51 commit 0bb0ef5

File tree

2 files changed

+265
-9
lines changed

2 files changed

+265
-9
lines changed

rewrite-core/src/main/java/org/openrewrite/CsvDataTableStore.java

Lines changed: 120 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
*/
1616
package org.openrewrite;
1717

18+
import com.univocity.parsers.csv.CsvParser;
19+
import com.univocity.parsers.csv.CsvParserSettings;
1820
import com.univocity.parsers.csv.CsvWriter;
1921
import com.univocity.parsers.csv.CsvWriterSettings;
2022
import org.jspecify.annotations.Nullable;
@@ -46,18 +48,27 @@
4648
* // Plain CSV
4749
* new CsvDataTableStore(outputDir)
4850
*
49-
* // GZIP compressed with repository columns
51+
* // GZIP compressed with repository columns (write-only)
5052
* new CsvDataTableStore(outputDir,
5153
* path -> new GZIPOutputStream(Files.newOutputStream(path)),
5254
* ".csv.gz",
53-
* List.of(Map.entry("repositoryOrigin", origin), Map.entry("repositoryPath", path)),
54-
* List.of(Map.entry("org1", orgValue)))
55+
* Map.of("repositoryOrigin", origin, "repositoryPath", path),
56+
* Map.of("org1", orgValue))
57+
*
58+
* // GZIP compressed with read-back support
59+
* new CsvDataTableStore(outputDir,
60+
* path -> new GZIPOutputStream(Files.newOutputStream(path)),
61+
* path -> new GZIPInputStream(Files.newInputStream(path)),
62+
* ".csv.gz",
63+
* Map.of("repositoryOrigin", origin, "repositoryPath", path),
64+
* Map.of("org1", orgValue))
5565
* }</pre>
5666
*/
5767
public class CsvDataTableStore implements DataTableStore, AutoCloseable {
5868

5969
private final Path outputDir;
6070
private final Function<Path, OutputStream> outputStreamFactory;
71+
private final Function<Path, InputStream> inputStreamFactory;
6172
private final String fileExtension;
6273
private final Map<String, String> prefixColumns;
6374
private final Map<String, String> suffixColumns;
@@ -67,27 +78,55 @@ public class CsvDataTableStore implements DataTableStore, AutoCloseable {
6778
* Create a store that writes plain CSV files.
6879
*/
6980
public CsvDataTableStore(Path outputDir) {
70-
this(outputDir, CsvDataTableStore::defaultOutputStream, ".csv",
71-
Collections.emptyMap(), Collections.emptyMap());
81+
this(outputDir, CsvDataTableStore::defaultOutputStream, CsvDataTableStore::defaultInputStream,
82+
".csv", Collections.emptyMap(), Collections.emptyMap());
7283
}
7384

7485
/**
75-
* Create a store with full control over output stream creation, file extension,
86+
* Create a store with control over output stream creation, file extension,
7687
* and additional static columns prepended/appended to each row.
88+
* <p>
89+
* {@link #getRows} will always return empty results from this constructor.
90+
* Use the six-argument constructor to provide a matching input stream factory
91+
* if read-back is needed.
7792
*
7893
* @param outputDir directory to write files into
7994
* @param outputStreamFactory creates an output stream for each file path (e.g., wrapping with GZIPOutputStream)
8095
* @param fileExtension file extension including dot (e.g., ".csv" or ".csv.gz")
8196
* @param prefixColumns static columns prepended to each row, in insertion order
8297
* @param suffixColumns static columns appended to each row, in insertion order
98+
* @deprecated Use the six-argument constructor that accepts an {@code inputStreamFactory}
8399
*/
100+
@Deprecated
84101
public CsvDataTableStore(Path outputDir,
85102
Function<Path, OutputStream> outputStreamFactory,
86103
String fileExtension,
87104
Map<String, String> prefixColumns,
88105
Map<String, String> suffixColumns) {
106+
this(outputDir, outputStreamFactory, path -> new ByteArrayInputStream(new byte[0]),
107+
fileExtension, prefixColumns, suffixColumns);
108+
}
109+
110+
/**
111+
* Create a store with full control over output and input stream creation, file extension,
112+
* and additional static columns prepended/appended to each row.
113+
*
114+
* @param outputDir directory to write files into
115+
* @param outputStreamFactory creates an output stream for each file path (e.g., wrapping with GZIPOutputStream)
116+
* @param inputStreamFactory creates an input stream for each file path (e.g., wrapping with GZIPInputStream)
117+
* @param fileExtension file extension including dot (e.g., ".csv" or ".csv.gz")
118+
* @param prefixColumns static columns prepended to each row, in insertion order
119+
* @param suffixColumns static columns appended to each row, in insertion order
120+
*/
121+
public CsvDataTableStore(Path outputDir,
122+
Function<Path, OutputStream> outputStreamFactory,
123+
Function<Path, InputStream> inputStreamFactory,
124+
String fileExtension,
125+
Map<String, String> prefixColumns,
126+
Map<String, String> suffixColumns) {
89127
this.outputDir = outputDir;
90128
this.outputStreamFactory = outputStreamFactory;
129+
this.inputStreamFactory = inputStreamFactory;
91130
this.fileExtension = fileExtension;
92131
this.prefixColumns = prefixColumns;
93132
this.suffixColumns = suffixColumns;
@@ -106,6 +145,14 @@ private static OutputStream defaultOutputStream(Path path) {
106145
}
107146
}
108147

148+
private static InputStream defaultInputStream(Path path) {
149+
try {
150+
return Files.newInputStream(path);
151+
} catch (IOException e) {
152+
throw new UncheckedIOException(e);
153+
}
154+
}
155+
109156
@Override
110157
public <Row> void insertRow(DataTable<Row> dataTable, ExecutionContext ctx, Row row) {
111158
String fileKey = fileKey(dataTable);
@@ -115,7 +162,64 @@ public <Row> void insertRow(DataTable<Row> dataTable, ExecutionContext ctx, Row
115162

116163
@Override
117164
public Stream<?> getRows(String dataTableName, @Nullable String group) {
118-
return Stream.empty();
165+
// Flush any open writers for this data table so all rows are on disk
166+
for (BucketWriter writer : writers.values()) {
167+
if (writer.dataTable.getName().equals(dataTableName) &&
168+
Objects.equals(writer.dataTable.getGroup(), group)) {
169+
writer.flush();
170+
}
171+
}
172+
173+
List<String[]> allRows = new ArrayList<>();
174+
//noinspection DataFlowIssue
175+
File[] files = outputDir.toFile().listFiles((dir, name) -> name.endsWith(fileExtension));
176+
if (files == null) {
177+
return Stream.empty();
178+
}
179+
180+
int prefixCount = prefixColumns.size();
181+
int suffixCount = suffixColumns.size();
182+
183+
for (File file : files) {
184+
try (InputStream is = inputStreamFactory.apply(file.toPath())) {
185+
DataTableDescriptor descriptor = readDescriptor(is);
186+
if (descriptor == null ||
187+
!descriptor.getName().equals(dataTableName) ||
188+
!Objects.equals(descriptor.getGroup(), group)) {
189+
continue;
190+
}
191+
// readDescriptor consumed comment lines; now parse the remaining CSV
192+
// (header + data rows). Re-read the full file with CsvParser.
193+
} catch (IOException e) {
194+
continue;
195+
}
196+
197+
try (InputStream is = inputStreamFactory.apply(file.toPath())) {
198+
CsvParserSettings settings = new CsvParserSettings();
199+
settings.setHeaderExtractionEnabled(true);
200+
settings.getFormat().setComment('#');
201+
CsvParser parser = new CsvParser(settings);
202+
parser.beginParsing(new InputStreamReader(is, StandardCharsets.UTF_8));
203+
204+
String[] row;
205+
while ((row = parser.parseNext()) != null) {
206+
// Strip prefix and suffix columns, returning only data table columns
207+
int dataCount = row.length - prefixCount - suffixCount;
208+
if (dataCount <= 0) {
209+
allRows.add(row);
210+
} else {
211+
String[] dataRow = new String[dataCount];
212+
System.arraycopy(row, prefixCount, dataRow, 0, dataCount);
213+
allRows.add(dataRow);
214+
}
215+
}
216+
parser.stopParsing();
217+
} catch (IOException e) {
218+
// Skip unreadable files
219+
}
220+
}
221+
222+
return allRows.stream();
119223
}
120224

121225
@Override
@@ -146,8 +250,7 @@ private BucketWriter createBucketWriter(DataTable<?> dataTable) {
146250
}
147251

148252
// Build headers: prefix + data table columns + suffix
149-
List<String> headers = new ArrayList<>();
150-
headers.addAll(prefixColumns.keySet());
253+
List<String> headers = new ArrayList<>(prefixColumns.keySet());
151254
for (ColumnDescriptor col : descriptor.getColumns()) {
152255
headers.add(col.getName());
153256
}
@@ -223,6 +326,14 @@ synchronized void writeRow(Object row) {
223326
csvWriter.writeRow((Object[]) values);
224327
}
225328

329+
synchronized void flush() {
330+
csvWriter.flush();
331+
try {
332+
os.flush();
333+
} catch (IOException ignored) {
334+
}
335+
}
336+
226337
void close() {
227338
csvWriter.close();
228339
try {

rewrite-core/src/test/java/org/openrewrite/DataTableStoreTest.java

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,17 @@
2020
import org.junit.jupiter.api.io.TempDir;
2121

2222
import java.io.FileInputStream;
23+
import java.io.IOException;
2324
import java.nio.file.Files;
2425
import java.nio.file.Path;
26+
import java.util.LinkedHashMap;
2527
import java.util.List;
28+
import java.util.Map;
2629
import java.util.stream.Collectors;
2730

2831
import static org.assertj.core.api.Assertions.assertThat;
2932

33+
@SuppressWarnings("DataFlowIssue")
3034
class DataTableStoreTest {
3135

3236
static class TestTable extends DataTable<TestTable.Row> {
@@ -155,6 +159,7 @@ void noopStoreDropsInserts() {
155159

156160
@Test
157161
void noopStoreIsSingleton() {
162+
//noinspection EqualsWithItself
158163
assertThat(DataTableStore.noop()).isSameAs(DataTableStore.noop());
159164
}
160165

@@ -246,6 +251,146 @@ void csvStoreReadDescriptor(@TempDir Path tempDir) throws Exception {
246251
}
247252
}
248253

254+
// =========================================================================
255+
// CsvDataTableStore.getRows
256+
// =========================================================================
257+
258+
static class MultiColTable extends DataTable<MultiColTable.Row> {
259+
public MultiColTable(Recipe recipe) {
260+
super(recipe, "Multi-column table", "A table with multiple columns.");
261+
}
262+
263+
@Value
264+
public static class Row {
265+
@Column(displayName = "Position", description = "The index position")
266+
int position;
267+
268+
@Column(displayName = "Text", description = "The text value")
269+
String text;
270+
}
271+
}
272+
273+
@Test
274+
void csvStoreGetRowsReadsBackWrittenData(@TempDir Path tempDir) {
275+
try (CsvDataTableStore store = new CsvDataTableStore(tempDir)) {
276+
TestTable table = new TestTable(Recipe.noop());
277+
store.insertRow(table, ctx(), new TestTable.Row("alice"));
278+
store.insertRow(table, ctx(), new TestTable.Row("bob"));
279+
280+
List<?> rows = store.getRows(table.getName(), null).collect(Collectors.toList());
281+
assertThat(rows).hasSize(2);
282+
assertThat((String[]) rows.get(0)).containsExactly("alice");
283+
assertThat((String[]) rows.get(1)).containsExactly("bob");
284+
}
285+
}
286+
287+
@Test
288+
void csvStoreGetRowsMultipleColumns(@TempDir Path tempDir) {
289+
try (CsvDataTableStore store = new CsvDataTableStore(tempDir)) {
290+
MultiColTable table = new MultiColTable(Recipe.noop());
291+
store.insertRow(table, ctx(), new MultiColTable.Row(1, "hello"));
292+
store.insertRow(table, ctx(), new MultiColTable.Row(2, "world"));
293+
294+
List<?> rows = store.getRows(table.getName(), null).collect(Collectors.toList());
295+
assertThat(rows).hasSize(2);
296+
assertThat((String[]) rows.get(0)).containsExactly("1", "hello");
297+
assertThat((String[]) rows.get(1)).containsExactly("2", "world");
298+
}
299+
}
300+
301+
@Test
302+
void csvStoreGetRowsReturnsEmptyForMissingTable(@TempDir Path tempDir) {
303+
try (CsvDataTableStore store = new CsvDataTableStore(tempDir)) {
304+
TestTable table = new TestTable(Recipe.noop());
305+
store.insertRow(table, ctx(), new TestTable.Row("alice"));
306+
307+
List<?> rows = store.getRows("nonexistent.Table", null).collect(Collectors.toList());
308+
assertThat(rows).isEmpty();
309+
}
310+
}
311+
312+
@Test
313+
void csvStoreGetRowsMatchesByGroup(@TempDir Path tempDir) {
314+
try (CsvDataTableStore store = new CsvDataTableStore(tempDir)) {
315+
TestTable grouped = new TestTable(Recipe.noop()).withGroup("group-a");
316+
TestTable ungrouped = new TestTable(Recipe.noop());
317+
store.insertRow(grouped, ctx(), new TestTable.Row("grouped-row"));
318+
store.insertRow(ungrouped, ctx(), new TestTable.Row("ungrouped-row"));
319+
320+
List<?> groupedRows = store.getRows(grouped.getName(), "group-a").collect(Collectors.toList());
321+
assertThat(groupedRows).hasSize(1);
322+
assertThat((String[]) groupedRows.getFirst()).containsExactly("grouped-row");
323+
324+
List<?> ungroupedRows = store.getRows(ungrouped.getName(), null).collect(Collectors.toList());
325+
assertThat(ungroupedRows).hasSize(1);
326+
assertThat((String[]) ungroupedRows.getFirst()).containsExactly("ungrouped-row");
327+
}
328+
}
329+
330+
@Test
331+
void csvStoreGetRowsStripsPrefixAndSuffixColumns(@TempDir Path tempDir) {
332+
Map<String, String> prefix = new LinkedHashMap<>();
333+
prefix.put("repo", "my-repo");
334+
Map<String, String> suffix = new LinkedHashMap<>();
335+
suffix.put("org", "my-org");
336+
337+
try (CsvDataTableStore store = new CsvDataTableStore(
338+
tempDir, (path) -> {
339+
try {
340+
return Files.newOutputStream(path);
341+
} catch (IOException e) {
342+
throw new RuntimeException(e);
343+
}
344+
}, (path) -> {
345+
try {
346+
return Files.newInputStream(path);
347+
} catch (IOException e) {
348+
throw new RuntimeException(e);
349+
}
350+
}, ".csv", prefix, suffix)) {
351+
TestTable table = new TestTable(Recipe.noop());
352+
store.insertRow(table, ctx(), new TestTable.Row("alice"));
353+
354+
List<?> rows = store.getRows(table.getName(), null).collect(Collectors.toList());
355+
assertThat(rows).hasSize(1);
356+
// Should only contain the data column, not prefix/suffix
357+
assertThat((String[]) rows.getFirst()).containsExactly("alice");
358+
}
359+
}
360+
361+
@Test
362+
void csvStoreGetRowsAfterClose(@TempDir Path tempDir) {
363+
TestTable table = new TestTable(Recipe.noop());
364+
try (CsvDataTableStore store = new CsvDataTableStore(tempDir)) {
365+
store.insertRow(table, ctx(), new TestTable.Row("alice"));
366+
store.insertRow(table, ctx(), new TestTable.Row("bob"));
367+
}
368+
369+
// Read back from a new store instance pointing at the same directory
370+
try (CsvDataTableStore store2 = new CsvDataTableStore(tempDir)) {
371+
List<?> rows = store2.getRows(table.getName(), null).collect(Collectors.toList());
372+
assertThat(rows).hasSize(2);
373+
assertThat((String[]) rows.get(0)).containsExactly("alice");
374+
assertThat((String[]) rows.get(1)).containsExactly("bob");
375+
}
376+
}
377+
378+
@Test
379+
void csvStoreGetRowsHandlesSpecialCharacters(@TempDir Path tempDir) {
380+
try (CsvDataTableStore store = new CsvDataTableStore(tempDir)) {
381+
TestTable table = new TestTable(Recipe.noop());
382+
store.insertRow(table, ctx(), new TestTable.Row("value with, comma"));
383+
store.insertRow(table, ctx(), new TestTable.Row("value with \"quotes\""));
384+
store.insertRow(table, ctx(), new TestTable.Row("value with\nnewline"));
385+
386+
List<?> rows = store.getRows(table.getName(), null).collect(Collectors.toList());
387+
assertThat(rows).hasSize(3);
388+
assertThat((String[]) rows.get(0)).containsExactly("value with, comma");
389+
assertThat((String[]) rows.get(1)).containsExactly("value with \"quotes\"");
390+
assertThat((String[]) rows.get(2)).containsExactly("value with\nnewline");
391+
}
392+
}
393+
249394
// =========================================================================
250395
// CsvDataTableStore.fileKey
251396
// =========================================================================

0 commit comments

Comments
 (0)