Skip to content

Commit 838538d

Browse files
authored
Merge pull request #142 from TGSAI/141-aws-example
141 aws example
2 parents 1049080 + 0b88f2f commit 838538d

File tree

7 files changed

+468
-0
lines changed

7 files changed

+468
-0
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,5 +133,6 @@ endif()
133133

134134
list(APPEND mdio_COMMON_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR})
135135

136+
136137
add_subdirectory(mdio)
137138

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
cmake_minimum_required(VERSION 3.24)
2+
project(real_data_example_project VERSION 1.0.0 LANGUAGES CXX)
3+
4+
# Set the C++ standard
5+
set(CMAKE_CXX_STANDARD 17)
6+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
7+
8+
# Include FetchContent module
9+
include(FetchContent)
10+
11+
# Fetch the mdio-cpp library from the specified branch
12+
FetchContent_Declare(
13+
mdio
14+
GIT_REPOSITORY https://github.com/TGSAI/mdio-cpp.git
15+
GIT_TAG main
16+
)
17+
FetchContent_MakeAvailable(mdio)
18+
19+
# Fetch the indicators library
20+
FetchContent_Declare(
21+
indicators
22+
GIT_REPOSITORY https://github.com/p-ranav/indicators.git
23+
GIT_TAG v2.3
24+
)
25+
FetchContent_MakeAvailable(indicators)
26+
27+
# Create an executable target
28+
add_executable(real_data_example src/real_data_example.cc)
29+
30+
# Link the mdio and indicators libraries to the executable
31+
target_link_libraries(real_data_example PRIVATE
32+
mdio
33+
${mdio_INTERNAL_DEPS}
34+
${mdio_INTERNAL_S3_DRIVER_DEPS}
35+
PNG::PNG
36+
indicators
37+
)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Seismic Data Extractor
2+
3+
A C++ tool for extracting and processing 3D seismic data slices from MDIO datasets. The tool will extract a slice of a larger MDIO dataset and output the result to a NumPy array called seismic_slice.npy.
4+
5+
This tool demonstrates working with the Poseidon dataset, a public seismic dataset available on AWS Earth, and is provided under a CC BY 4.0 license.
6+
7+
## Overview
8+
9+
This tool allows users to extract specific slices from 3D seismic data stored in MDIO format, with options to specify ranges for inline, crossline, and depth dimensions. The extracted data can be saved in NumPy format for further analysis.
10+
11+
## Prerequisites
12+
13+
- GCC 11 *or better* or Clang14 *or better*
14+
- MDIO library (for reading seismic data)
15+
- indicators library (for progress bars)
16+
- CMake (for building)
17+
18+
## Installation
19+
20+
[Installation instructions would be valuable here]
21+
22+
## Usage
23+
24+
```bash
25+
./real_data_example [flags]
26+
```
27+
28+
### Flags
29+
30+
- `--dataset_path`: Path to the MDIO dataset (default: "s3://tgs-opendata-poseidon/full_stack_agc.mdio")
31+
- `--inline_range`: Inline range in format {inline,start,end,step} (default: "{inline,700,701,1}")
32+
- `--xline_range`: Crossline range in format {crossline,start,end,step} (default: "{crossline,500,700,1}")
33+
- `--depth_range`: Optional depth range in format {depth,start,end,step}
34+
- `--variable_name`: Name of the seismic variable to extract (default: "seismic")
35+
- `--print_dataset`: Print the dataset URL and return without processing
36+
37+
### Example
38+
39+
To print the contents of the file ...
40+
```
41+
./real_data_example --print_dataset --dataset_path="s3://tgs-opendata-poseidon/full_stack_agc.mdio"
42+
```
43+
44+
To slice the data and write to numpy ...
45+
```
46+
./real_data_example --inline_range="{inline,700,701,1}" --xline_range="{crossline,500,1000,1}" --depth_range="{time, 300, 900,1}"
47+
```
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Copyright 2024 TGS
2+
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <mdio/mdio.h>
18+
19+
using Index = mdio::Index;
20+
21+
template <typename T, mdio::DimensionIndex R, mdio::ArrayOriginKind OriginKind>
22+
float bilinear_interpolate(const mdio::SharedArray<T, R, OriginKind>& accessor,
23+
float x, float y, Index slice_index, Index x_min,
24+
Index x_max, Index y_min, Index y_max) {
25+
// Clamp coordinates to valid range
26+
x = std::max(float(x_min), std::min(float(x_max - 1), x));
27+
y = std::max(float(y_min), std::min(float(y_max - 1), y));
28+
29+
Index x0 = std::floor(x);
30+
Index y0 = std::floor(y);
31+
Index x1 = std::min(x0 + 1, x_max - 1);
32+
Index y1 = std::min(y0 + 1, y_max - 1);
33+
34+
float fx = x - x0;
35+
float fy = y - y0;
36+
37+
float c00 = accessor({slice_index, x0, y0});
38+
float c10 = accessor({slice_index, x1, y0});
39+
float c01 = accessor({slice_index, x0, y1});
40+
float c11 = accessor({slice_index, x1, y1});
41+
42+
return (c00 * (1 - fx) * (1 - fy) + c10 * fx * (1 - fy) +
43+
c01 * (1 - fx) * fy + c11 * fx * fy);
44+
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
// Copyright 2024 TGS
2+
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <mdio/mdio.h>
18+
19+
#include <chrono>
20+
#include <indicators/cursor_control.hpp>
21+
#include <indicators/indeterminate_progress_bar.hpp>
22+
#include <memory>
23+
#include <sstream>
24+
#include <string>
25+
#include <thread>
26+
27+
namespace mdio {
28+
29+
template <typename T>
30+
class ProgressTracker {
31+
public:
32+
explicit ProgressTracker(const std::string& message = "Loading data...")
33+
: message_(message),
34+
bar_{
35+
indicators::option::BarWidth{40},
36+
indicators::option::Start{"["},
37+
indicators::option::Fill{"·"},
38+
indicators::option::Lead{"<==>"},
39+
indicators::option::End{"]"},
40+
indicators::option::PostfixText{message},
41+
indicators::option::ForegroundColor{indicators::Color::yellow},
42+
indicators::option::FontStyles{std::vector<indicators::FontStyle>{
43+
indicators::FontStyle::bold}},
44+
} {
45+
std::cout << "\033[2K\r" << std::flush;
46+
indicators::show_console_cursor(false);
47+
start_time_ = std::chrono::steady_clock::now();
48+
}
49+
50+
void tick() {
51+
auto current_time = std::chrono::steady_clock::now();
52+
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(
53+
current_time - start_time_)
54+
.count();
55+
std::stringstream time_str;
56+
time_str << message_ << " " << elapsed << "s";
57+
bar_.set_option(indicators::option::PostfixText{time_str.str()});
58+
bar_.tick();
59+
}
60+
61+
void complete() {
62+
bar_.mark_as_completed();
63+
bar_.set_option(
64+
indicators::option::ForegroundColor{indicators::Color::green});
65+
bar_.set_option(indicators::option::PostfixText{message_ + " completed"});
66+
indicators::show_console_cursor(true);
67+
}
68+
69+
~ProgressTracker() { indicators::show_console_cursor(true); }
70+
71+
private:
72+
std::string message_;
73+
std::chrono::steady_clock::time_point start_time_;
74+
indicators::IndeterminateProgressBar bar_;
75+
};
76+
77+
template <typename T = void, DimensionIndex R = dynamic_rank,
78+
ArrayOriginKind OriginKind = offset_origin>
79+
Future<VariableData<T, R, OriginKind>> ReadWithProgress(
80+
Variable<T>& variable, const std::string& message = "Loading data...") {
81+
auto tracker = std::make_shared<ProgressTracker<T>>(message);
82+
auto future = variable.Read();
83+
84+
std::thread([tracker, future]() {
85+
while (!future.ready()) {
86+
tracker->tick();
87+
std::this_thread::sleep_for(std::chrono::milliseconds(200));
88+
}
89+
tracker->complete();
90+
}).detach();
91+
92+
return future;
93+
}
94+
95+
} // namespace mdio
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// Copyright 2024 TGS
2+
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include <mdio/mdio.h>
16+
17+
#include <indicators/cursor_control.hpp>
18+
#include <indicators/indeterminate_progress_bar.hpp>
19+
#include <indicators/progress_bar.hpp>
20+
21+
#include "absl/flags/flag.h"
22+
#include "absl/flags/parse.h"
23+
#include "absl/strings/str_split.h"
24+
#include "interpolation.h"
25+
#include "progress.h"
26+
#include "seismic_numpy.h"
27+
#include "seismic_png.h"
28+
#include "tensorstore/tensorstore.h"
29+
30+
#define MDIO_RETURN_IF_ERROR(...) TENSORSTORE_RETURN_IF_ERROR(__VA_ARGS__)
31+
32+
using Index = mdio::Index;
33+
34+
ABSL_FLAG(std::string, inline_range, "{inline,700,701,1}",
35+
"Inline range in format {inline,start,end,step}");
36+
ABSL_FLAG(std::string, xline_range, "{crossline,500,700,1}",
37+
"Crossline range in format {crossline,start,end,step}");
38+
ABSL_FLAG(std::string, depth_range, "",
39+
"Optional depth range in format {depth,start,end,step}");
40+
ABSL_FLAG(std::string, variable_name, "seismic",
41+
"Name of the seismic variable");
42+
ABSL_FLAG(bool, print_dataset, false, "Print the dataset URL and return");
43+
ABSL_FLAG(std::string, dataset_path,
44+
"s3://tgs-opendata-poseidon/full_stack_agc.mdio",
45+
"The path to the dataset");
46+
47+
// Make Run a template function for both the type and descriptors
48+
template <typename T, typename... Descriptors>
49+
absl::Status Run(const Descriptors... descriptors) {
50+
// New feature to print the dataset if the flag is set
51+
52+
MDIO_ASSIGN_OR_RETURN(
53+
auto dataset,
54+
mdio::Dataset::Open(std::string(absl::GetFlag(FLAGS_dataset_path)),
55+
mdio::constants::kOpen)
56+
.result())
57+
58+
if (absl::GetFlag(FLAGS_print_dataset)) {
59+
std::cout << dataset << std::endl;
60+
return absl::OkStatus(); // Return early if just printing the dataset
61+
}
62+
63+
// slice the dataset
64+
MDIO_ASSIGN_OR_RETURN(auto inline_slice, dataset.isel(descriptors...));
65+
66+
// Get variable with template type T using the variable name from the CLI
67+
MDIO_ASSIGN_OR_RETURN(auto variable, inline_slice.variables.template get<T>(
68+
absl::GetFlag(FLAGS_variable_name)));
69+
70+
if (variable.rank() != 3) {
71+
return absl::InvalidArgumentError("Seismic data must be 3D");
72+
}
73+
74+
MDIO_ASSIGN_OR_RETURN(auto seismic_data, ReadWithProgress(variable).result())
75+
76+
auto seismic_accessor = seismic_data.get_data_accessor();
77+
78+
// Write numpy file
79+
MDIO_RETURN_IF_ERROR(WriteNumpy(seismic_accessor, "seismic_slice.npy"));
80+
81+
return absl::OkStatus();
82+
}
83+
84+
mdio::RangeDescriptor<> ParseRange(std::string_view range) {
85+
// Remove leading/trailing whitespace and braces
86+
if (range.empty()) {
87+
// FIXME - we need a better way to handle this
88+
return {"ignore_me", 0, 1, 1};
89+
}
90+
91+
range.remove_prefix(range.find_first_not_of(" {"));
92+
range.remove_suffix(range.length() - range.find_last_not_of("} ") - 1);
93+
94+
// Split by comma into parts
95+
std::vector<std::string_view> parts = absl::StrSplit(range, ',');
96+
97+
if (parts.size() != 4) {
98+
throw std::runtime_error(
99+
"Invalid range format. Expected {label,start,end,step}");
100+
}
101+
102+
// Clean up the label (first part) by removing quotes and spaces
103+
auto label = parts[0];
104+
label.remove_prefix(std::min(label.find_first_not_of(" \""), label.size()));
105+
label.remove_suffix(
106+
label.length() -
107+
std::min(label.find_last_not_of(" \"") + 1, label.size()));
108+
109+
return {
110+
label, // dimension name
111+
std::stoi(std::string(parts[1])), // start
112+
std::stoi(std::string(parts[2])), // end
113+
std::stoi(std::string(parts[3])) // step
114+
};
115+
}
116+
117+
int main(int argc, char* argv[]) {
118+
absl::ParseCommandLine(argc, argv);
119+
120+
// keep me in memory
121+
auto inline_range = absl::GetFlag(FLAGS_inline_range);
122+
auto crossline_range = absl::GetFlag(FLAGS_xline_range);
123+
auto depth_range = absl::GetFlag(FLAGS_depth_range);
124+
125+
auto desc1 = ParseRange(inline_range);
126+
auto desc2 = ParseRange(crossline_range);
127+
auto desc3 = ParseRange(depth_range);
128+
129+
return Run<float>(desc1, desc2, desc3).ok() ? 0 : 1;
130+
}

0 commit comments

Comments
 (0)