Skip to content

Commit 9b5d30d

Browse files
committed
Checkpoint in debugging flow
1 parent 3b1dd25 commit 9b5d30d

File tree

1 file changed

+218
-45
lines changed

1 file changed

+218
-45
lines changed

mdio/utils/segy_export.h

Lines changed: 218 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,15 @@ struct TraceHeaderMapping {
7373
source_type(st), variable_name(vn), field_name_in_var(fin) {}
7474
};
7575

76+
// Structure to hold pre-sliced trace header variable data
77+
struct TraceHeaderVariableData {
78+
std::string variable_name;
79+
TraceHeaderMapping::SourceType source_type;
80+
std::string field_name_in_var; // For structured variables
81+
std::optional<VariableData<>> data; // Pre-sliced data (optional to avoid default constructor issues)
82+
nlohmann::json dtype_spec; // For structured variables, holds the dtype specification
83+
};
84+
7685
class TraceHeaderComposer {
7786
public:
7887
TraceHeaderComposer();
@@ -104,9 +113,11 @@ class TraceHeaderMapper {
104113
// Get all the mappings
105114
const std::vector<TraceHeaderMapping>& GetMappings() const { return mappings_; }
106115

107-
// Populate trace header buffer for a specific trace location
116+
// Populate trace header buffer using pre-sliced variable data
108117
Result<void> PopulateTraceHeader(char* header_buffer,
109-
const std::vector<Index>& trace_coords) const;
118+
const std::vector<Index>& trace_coords,
119+
Index trace_index_in_chunk,
120+
std::vector<TraceHeaderVariableData>& variable_data) const;
110121

111122
private:
112123
const Dataset& dataset_;
@@ -510,53 +521,117 @@ inline std::string TraceHeaderMapper::ConvertToTraceHeaderDType(const std::strin
510521
}
511522

512523
inline Result<void> TraceHeaderMapper::PopulateTraceHeader(
513-
char* header_buffer, const std::vector<Index>& trace_coords) const {
524+
char* header_buffer, const std::vector<Index>& trace_coords,
525+
Index trace_index_in_chunk,
526+
std::vector<TraceHeaderVariableData>& variable_data) const {
527+
528+
std::cout << "Debug: PopulateTraceHeader called with " << variable_data.size() << " variable data entries" << std::endl;
529+
for (const auto& vd : variable_data) {
530+
std::cout << " - Variable: '" << vd.variable_name << "', field: '" << vd.field_name_in_var << "'" << std::endl;
531+
}
532+
514533
for (const auto& mapping : mappings_) {
534+
std::cout << "Debug: Looking for mapping - field: '" << mapping.field_name
535+
<< "', variable: '" << mapping.variable_name
536+
<< "', field_in_var: '" << mapping.field_name_in_var << "'" << std::endl;
537+
515538
try {
539+
// Find the corresponding variable data for this mapping
540+
TraceHeaderVariableData* var_data = nullptr;
541+
for (auto& vd : variable_data) {
542+
if (vd.variable_name == mapping.variable_name &&
543+
vd.source_type == mapping.source_type &&
544+
(mapping.source_type == TraceHeaderMapping::SourceType::DIMENSION_VARIABLE ||
545+
vd.field_name_in_var == mapping.field_name_in_var)) {
546+
var_data = &vd;
547+
std::cout << " -> Found matching variable data!" << std::endl;
548+
break;
549+
}
550+
}
551+
552+
if (!var_data) {
553+
std::cerr << "Warning: No pre-sliced data found for mapping '"
554+
<< mapping.field_name << "'" << std::endl;
555+
continue;
556+
}
557+
558+
if (!var_data->data.has_value()) {
559+
std::cerr << "Warning: No data available for mapping '"
560+
<< mapping.field_name << "'" << std::endl;
561+
continue;
562+
}
563+
516564
if (mapping.source_type == TraceHeaderMapping::SourceType::DIMENSION_VARIABLE) {
517-
// Read from dimension variable - this is the simpler case
518-
MDIO_ASSIGN_OR_RETURN(auto var, dataset_.variables.at(mapping.variable_name));
565+
// For dimension variables, we need to calculate the index within the sliced data
566+
const char* src_ptr = reinterpret_cast<const char*>(var_data->data->get_data_accessor().data());
519567

520-
// For dimension variables, we need to find the coordinate index that matches this variable
521-
auto domain = var.dimensions();
522-
auto labels = domain.labels();
568+
// The data is sliced to match the current chunk
569+
// Use the trace index within the chunk to access the correct data element
570+
auto data_shape = var_data->data->dimensions().shape();
571+
Index data_index = trace_index_in_chunk;
523572

524-
// Find the dimension index that corresponds to this variable
525-
for (size_t dim_idx = 0; dim_idx < labels.size() && dim_idx < trace_coords.size(); ++dim_idx) {
526-
if (labels[dim_idx] == mapping.variable_name) {
527-
// Create a slice descriptor to get the specific coordinate value
528-
std::vector<RangeDescriptor<Index>> descs;
529-
descs.push_back({labels[dim_idx], trace_coords[dim_idx], trace_coords[dim_idx] + 1, 1});
530-
531-
MDIO_ASSIGN_OR_RETURN(auto var_slice, var.slice(descs));
532-
MDIO_ASSIGN_OR_RETURN(auto data, var_slice.Read().result());
533-
534-
// Debug: Print the actual value being copied
535-
const char* src_ptr = reinterpret_cast<const char*>(data.get_data_accessor().data());
536-
ptrdiff_t src_offset = data.get_flattened_offset();
537-
538-
if (mapping.dtype == "<i4") {
539-
int32_t value;
540-
std::memcpy(&value, src_ptr + src_offset, sizeof(value));
541-
} else if (mapping.dtype == "<i2") {
542-
int16_t value;
543-
std::memcpy(&value, src_ptr + src_offset, sizeof(value));
544-
} else if (mapping.dtype == "<u2") {
545-
uint16_t value;
546-
std::memcpy(&value, src_ptr + src_offset, sizeof(value));
547-
} else if (mapping.dtype == "<u4") {
548-
uint32_t value;
549-
std::memcpy(&value, src_ptr + src_offset, sizeof(value));
550-
} else if (mapping.dtype == "<f4") {
551-
float value;
552-
std::memcpy(&value, src_ptr + src_offset, sizeof(value));
553-
}
554-
555-
// Copy the data to trace header buffer
556-
std::memcpy(header_buffer + mapping.offset - 1, src_ptr + src_offset, mapping.size);
557-
573+
// Ensure we don't go out of bounds
574+
if (data_shape.size() > 0 && data_index >= data_shape[0]) {
575+
std::cerr << "Warning: Trace index " << data_index
576+
<< " out of bounds for variable data shape " << data_shape[0] << std::endl;
577+
continue;
578+
}
579+
580+
// Get the data type size and calculate offset
581+
size_t element_size = mapping.size;
582+
ptrdiff_t base_offset = var_data->data->get_flattened_offset();
583+
ptrdiff_t src_offset = base_offset + data_index * element_size;
584+
585+
// Copy the data to trace header buffer
586+
std::memcpy(header_buffer + mapping.offset - 1, src_ptr + src_offset, mapping.size);
587+
588+
} else if (mapping.source_type == TraceHeaderMapping::SourceType::STRUCTURED_VARIABLE) {
589+
// For structured variables, extract the specific field from the binary pack
590+
const char* struct_ptr = reinterpret_cast<const char*>(var_data->data->get_data_accessor().data());
591+
592+
// Use the trace index within the chunk to access the correct structured element
593+
auto data_shape = var_data->data->dimensions().shape();
594+
Index data_index = trace_index_in_chunk;
595+
596+
// Ensure we don't go out of bounds
597+
if (data_shape.size() > 0 && data_index >= data_shape[0]) {
598+
std::cerr << "Warning: Trace index " << data_index
599+
<< " out of bounds for structured variable data shape " << data_shape[0] << std::endl;
600+
continue;
601+
}
602+
603+
// Get the size of one structured element
604+
size_t struct_element_size = var_data->data->dtype().size();
605+
ptrdiff_t base_offset = var_data->data->get_flattened_offset();
606+
ptrdiff_t struct_offset = base_offset + data_index * struct_element_size;
607+
608+
// Find the field offset within the structured dtype using pre-loaded spec
609+
if (!var_data->dtype_spec["metadata"]["dtype"].is_array()) {
610+
continue;
611+
}
612+
613+
size_t field_offset = 0;
614+
bool field_found = false;
615+
616+
for (const auto& field : var_data->dtype_spec["metadata"]["dtype"]) {
617+
if (!field.is_array() || field.size() < 2) continue;
618+
619+
std::string field_name = field[0].get<std::string>();
620+
std::string field_dtype = field[1].get<std::string>();
621+
622+
if (field_name == mapping.field_name_in_var) {
623+
field_found = true;
558624
break;
559625
}
626+
627+
// Add this field's size to the offset for the next field
628+
field_offset += TraceHeaderComposer::DTypeSize(field_dtype);
629+
}
630+
631+
if (field_found) {
632+
// Copy the specific field data from the structured variable to trace header
633+
const char* field_ptr = struct_ptr + struct_offset + field_offset;
634+
std::memcpy(header_buffer + mapping.offset - 1, field_ptr, mapping.size);
560635
}
561636
}
562637

@@ -830,6 +905,104 @@ Result<void> MdioToSegy(
830905
MDIO_ASSIGN_OR_RETURN(auto var_slice, seismic_var.slice(descs));
831906
MDIO_ASSIGN_OR_RETURN(auto data, var_slice.Read().result());
832907

908+
// Read and slice trace header variables for this chunk
909+
std::vector<TraceHeaderVariableData> header_variable_data;
910+
if (!mappings.empty()) {
911+
// Create a set of unique variables to avoid duplicate reads
912+
std::set<std::pair<std::string, TraceHeaderMapping::SourceType>> unique_vars;
913+
for (const auto& mapping : mappings) {
914+
unique_vars.insert({mapping.variable_name, mapping.source_type});
915+
}
916+
917+
// Read each unique variable
918+
for (const auto& [var_name, source_type] : unique_vars) {
919+
try {
920+
MDIO_ASSIGN_OR_RETURN(auto var, ds.variables.at(var_name));
921+
922+
// Create slice descriptors for this variable (same spatial dims as seismic, no time dim)
923+
auto var_domain = var.dimensions();
924+
auto var_labels = var_domain.labels();
925+
std::vector<RangeDescriptor<Index>> var_descs;
926+
927+
// Map the spatial dimensions from seismic to this variable
928+
for (size_t d = 0; d < rank - 1; ++d) { // Skip time dimension
929+
std::string seismic_dim = labels[d];
930+
931+
// Find matching dimension in the variable
932+
bool found_dim = false;
933+
for (size_t vd = 0; vd < var_labels.size(); ++vd) {
934+
if (var_labels[vd] == seismic_dim) {
935+
if (d == chunk_dim) {
936+
var_descs.push_back({var_labels[vd], start, end, 1});
937+
} else {
938+
var_descs.push_back({var_labels[vd], outer_coords[d], outer_coords[d]+1, 1});
939+
}
940+
found_dim = true;
941+
break;
942+
}
943+
}
944+
945+
if (!found_dim) {
946+
// If dimension not found, this variable might not have this spatial dimension
947+
// Skip this variable for now
948+
break;
949+
}
950+
}
951+
952+
// Only proceed if we found all necessary dimensions
953+
if (var_descs.size() == var_labels.size()) {
954+
MDIO_ASSIGN_OR_RETURN(auto var_slice, var.slice(var_descs));
955+
MDIO_ASSIGN_OR_RETURN(auto var_data, var_slice.Read().result());
956+
957+
// Store the variable data
958+
TraceHeaderVariableData vd;
959+
vd.variable_name = var_name;
960+
vd.source_type = source_type;
961+
vd.data = var_data; // var_data is already a VariableData object
962+
963+
// For structured variables, also store the dtype specification
964+
if (source_type == TraceHeaderMapping::SourceType::STRUCTURED_VARIABLE) {
965+
auto spec_result = var.spec();
966+
if (spec_result.ok()) {
967+
auto spec_json_result = spec_result.value().ToJson(IncludeDefaults{});
968+
if (spec_json_result.ok()) {
969+
vd.dtype_spec = spec_json_result.value();
970+
}
971+
}
972+
973+
// For structured variables, we need separate entries for each field
974+
if (vd.dtype_spec.contains("metadata") &&
975+
vd.dtype_spec["metadata"].contains("dtype") &&
976+
vd.dtype_spec["metadata"]["dtype"].is_array()) {
977+
978+
std::cout << "Debug: Creating field entries for structured variable '" << var_name << "':" << std::endl;
979+
for (const auto& field : vd.dtype_spec["metadata"]["dtype"]) {
980+
if (field.is_array() && field.size() >= 2) {
981+
std::string field_name = field[0].get<std::string>();
982+
983+
// Create a separate entry for each field
984+
TraceHeaderVariableData field_vd = vd;
985+
field_vd.field_name_in_var = field_name;
986+
header_variable_data.push_back(field_vd);
987+
988+
std::cout << " - Field: '" << field_name << "'" << std::endl;
989+
}
990+
}
991+
} else {
992+
header_variable_data.push_back(vd);
993+
}
994+
} else {
995+
header_variable_data.push_back(vd);
996+
}
997+
}
998+
999+
} catch (const std::exception& e) {
1000+
std::cerr << "Warning: Failed to read trace header variable '"
1001+
<< var_name << "': " << e.what() << std::endl;
1002+
}
1003+
}
1004+
}
1005+
8331006
const char* ptr = reinterpret_cast<const char*>(data.get_data_accessor().data());
8341007
ptrdiff_t off = data.get_flattened_offset();
8351008
std::vector<char> buffer(size * trace_bytes, 0);
@@ -840,14 +1013,14 @@ Result<void> MdioToSegy(
8401013
// Initialize trace header to zeros
8411014
std::memset(tb, 0, 240);
8421015

843-
// Populate trace header fields from mapped data
1016+
// Populate trace header fields from pre-sliced data
8441017
if (!mappings.empty()) {
8451018
// Calculate the coordinate for this trace
8461019
std::vector<Index> trace_coords = outer_coords;
8471020
trace_coords[chunk_dim] = start + i;
8481021

849-
// Use the header mapper to populate the trace header
850-
auto populate_result = header_mapper.PopulateTraceHeader(tb, trace_coords);
1022+
// Use the header mapper to populate the trace header with pre-sliced data
1023+
auto populate_result = header_mapper.PopulateTraceHeader(tb, trace_coords, i, header_variable_data);
8511024
if (!populate_result.ok()) {
8521025
std::cerr << "Warning: Failed to populate trace header for trace at coordinates [";
8531026
for (size_t c = 0; c < trace_coords.size(); ++c) {

0 commit comments

Comments
 (0)