2121package oci
2222
2323import (
24+ "archive/tar"
2425 "bytes"
2526 "context"
2627 "crypto/sha256"
2728 "encoding/json"
2829 "errors"
2930 "fmt"
3031 "io"
32+ "path"
3133 "runtime"
34+ "strings"
3235 "sync"
3336 "sync/atomic"
3437
@@ -42,13 +45,15 @@ import (
4245 "golang.org/x/sync/errgroup"
4346 "golang.org/x/sync/singleflight"
4447 "k8s.io/client-go/util/retry"
48+ "sigs.k8s.io/yaml"
4549
4650 "github.com/conforma/cli/internal/fetchers/oci/files"
4751 "github.com/conforma/cli/internal/utils/oci"
4852)
4953
5054const (
5155 ociBlobName = "ec.oci.blob"
56+ ociBlobFilesName = "ec.oci.blob_files"
5257 ociDescriptorName = "ec.oci.descriptor"
5358 ociImageManifestName = "ec.oci.image_manifest"
5459 ociImageManifestsBatchName = "ec.oci.image_manifests"
@@ -320,6 +325,35 @@ func registerOCIImageFiles() {
320325 rego .RegisterBuiltin2 (& decl , ociImageFiles )
321326}
322327
328+ func registerOCIBlobFiles () {
329+ filesObject := types .NewObject (
330+ nil ,
331+ types .NewDynamicProperty (
332+ types .Named ("path" , types .S ).Description ("the full path of the file within the blob" ),
333+ types .Named ("content" , types .A ).Description ("the file contents" ),
334+ ),
335+ )
336+
337+ decl := rego.Function {
338+ Name : ociBlobFilesName ,
339+ Description : "Fetch structured files (YAML or JSON) from within a blob tar archive." ,
340+ Decl : types .NewFunction (
341+ types .Args (
342+ types .Named ("ref" , types .S ).Description ("OCI blob reference" ),
343+ types .Named ("paths" , types .NewArray ([]types.Type {types .S }, nil )).Description ("the list of paths" ),
344+ ),
345+ types .Named ("files" , filesObject ).Description ("object representing the extracted files" ),
346+ ),
347+ // As per the documentation, enable memoization to ensure function evaluation is
348+ // deterministic. But also mark it as non-deterministic because it does rely on external
349+ // entities, i.e. OCI registry. https://www.openpolicyagent.org/docs/latest/extensions/
350+ Memoize : true ,
351+ Nondeterministic : true ,
352+ }
353+
354+ rego .RegisterBuiltin2 (& decl , ociBlobFiles )
355+ }
356+
323357func registerOCIImageIndex () {
324358 platform := types .NewObject (
325359 []* types.StaticProperty {
@@ -381,6 +415,10 @@ func registerOCIImageIndex() {
381415}
382416
383417func ociBlob (bctx rego.BuiltinContext , a * ast.Term ) (* ast.Term , error ) {
418+ return ociBlobInternal (bctx , a , true )
419+ }
420+
421+ func ociBlobInternal (bctx rego.BuiltinContext , a * ast.Term , verifyDigest bool ) (* ast.Term , error ) {
384422 logger := log .WithField ("function" , ociBlobName )
385423
386424 uri , ok := a .Value .(ast.String )
@@ -454,21 +492,32 @@ func ociBlob(bctx rego.BuiltinContext, a *ast.Term) (*ast.Term, error) {
454492 return nil , nil //nolint:nilerr
455493 }
456494
457- sum := fmt .Sprintf ("sha256:%x" , hasher .Sum (nil ))
458- // io.LimitReader truncates the layer if it exceeds its limit. The condition below catches this
459- // scenario in order to avoid unexpected behavior caused by partial data being returned.
460- if sum != ref .DigestStr () {
461- logger .WithFields (log.Fields {
462- "action" : "verify digest" ,
463- "computed_digest" : sum ,
464- "expected_digest" : ref .DigestStr (),
465- }).Error ("computed digest does not match expected digest" )
466- return nil , nil
495+ // io.LimitReader truncates the layer if it exceeds its limit. The
496+ // condition below catches this scenario in order to avoid unexpected
497+ // behavior caused by partial data being returned.
498+ //
499+ // For ociBlobFiles, we skip the digest verification because there's a
500+ // good chance we'd be calculating the digest of the uncompressed layer
501+ // data which would not match. It might be possible to calculate the
502+ // checksum on the layer data before it is uncompressed, but I think
503+ // that's not as easy as it sounds, since it may require another
504+ // io.Copy which could be inefficient. For now let's just skip it.
505+ //
506+ if verifyDigest {
507+ sum := fmt .Sprintf ("sha256:%x" , hasher .Sum (nil ))
508+ if sum != ref .DigestStr () {
509+ logger .WithFields (log.Fields {
510+ "action" : "verify digest" ,
511+ "computed_digest" : sum ,
512+ "expected_digest" : ref .DigestStr (),
513+ }).Error ("computed digest does not match expected digest" )
514+ return nil , nil
515+ }
467516 }
468517
469518 logger .WithFields (log.Fields {
470519 "action" : "complete" ,
471- "digest" : sum ,
520+ "digest" : ref . DigestStr () ,
472521 }).Debug ("Successfully retrieved blob" )
473522
474523 term := ast .StringTerm (blob .String ())
@@ -950,6 +999,186 @@ func ociImageFiles(bctx rego.BuiltinContext, refTerm *ast.Term, pathsTerm *ast.T
950999 return result .(* ast.Term ), nil
9511000}
9521001
1002+ func ociBlobFiles (bctx rego.BuiltinContext , refTerm * ast.Term , pathsTerm * ast.Term ) (* ast.Term , error ) {
1003+ logger := log .WithField ("function" , ociBlobFilesName )
1004+
1005+ uri , ok := refTerm .Value .(ast.String )
1006+ if ! ok {
1007+ logger .Error ("input ref is not a string" )
1008+ return nil , nil
1009+ }
1010+ refStr := string (uri )
1011+ logger = logger .WithField ("ref" , refStr )
1012+
1013+ if pathsTerm == nil {
1014+ logger .Error ("paths term is nil" )
1015+ return nil , nil
1016+ }
1017+
1018+ // Build cache key from ref + paths (hash the paths for a stable key)
1019+ pathsHash := fmt .Sprintf ("%x" , sha256 .Sum256 ([]byte (pathsTerm .String ())))[:12 ]
1020+ cacheKey := refStr + ":" + pathsHash
1021+
1022+ // Use component-scoped cache if available, otherwise fall back to global.
1023+ // Blob files data can be substantial and is unique per component.
1024+ cc := componentCacheFromContext (bctx .Context )
1025+
1026+ // Check cache first (fast path)
1027+ if cached , found := cc .imageFilesCache .Load (cacheKey ); found {
1028+ logger .Debug ("Blob files served from cache" )
1029+ return cached .(* ast.Term ), nil
1030+ }
1031+
1032+ // Use singleflight to prevent thundering herd
1033+ result , err , _ := cc .imageFilesFlight .Do (cacheKey , func () (any , error ) {
1034+ // Double-check cache inside singleflight
1035+ if cached , found := cc .imageFilesCache .Load (cacheKey ); found {
1036+ logger .Debug ("Blob files served from cache (after singleflight)" )
1037+ return cached , nil
1038+ }
1039+ logger .Debug ("Starting blob files extraction" )
1040+
1041+ // Get the blob content first (skip digest verification due to compressed/uncompressed mismatch)
1042+ blobTerm , err := ociBlobInternal (bctx , refTerm , false )
1043+ if err != nil || blobTerm == nil {
1044+ logger .WithFields (log.Fields {
1045+ "action" : "fetch blob" ,
1046+ "error" : err ,
1047+ }).Error ("failed to fetch blob content" )
1048+ return nil , nil //nolint:nilerr
1049+ }
1050+
1051+ blobContent , ok := blobTerm .Value .(ast.String )
1052+ if ! ok {
1053+ logger .Error ("blob content is not a string" )
1054+ return nil , nil
1055+ }
1056+
1057+ pathsArray , err := builtins .ArrayOperand (pathsTerm .Value , 1 )
1058+ if err != nil {
1059+ logger .WithFields (log.Fields {
1060+ "action" : "convert paths" ,
1061+ "error" : err ,
1062+ }).Error ("failed to convert paths to array operand" )
1063+ return nil , nil //nolint:nilerr
1064+ }
1065+
1066+ // Collect target paths for exact file matching
1067+ var targetPaths []string
1068+ err = pathsArray .Iter (func (pathTerm * ast.Term ) error {
1069+ pathString , ok := pathTerm .Value .(ast.String )
1070+ if ! ok {
1071+ return fmt .Errorf ("path is not a string: %#v" , pathTerm )
1072+ }
1073+ targetPaths = append (targetPaths , string (pathString ))
1074+ return nil
1075+ })
1076+ if err != nil {
1077+ logger .WithFields (log.Fields {
1078+ "action" : "iterate paths" ,
1079+ "error" : err ,
1080+ }).Error ("failed iterating paths" )
1081+ return nil , nil //nolint:nilerr
1082+ }
1083+
1084+ if len (targetPaths ) == 0 {
1085+ logger .Debug ("No paths specified, returning empty result" )
1086+ term := ast .NewTerm (ast .NewObject ())
1087+ cc .imageFilesCache .Store (cacheKey , term )
1088+ return term , nil
1089+ }
1090+
1091+ // Create a tar reader from the blob content
1092+ blobReader := strings .NewReader (string (blobContent ))
1093+ archive := tar .NewReader (blobReader )
1094+
1095+ // Create a set for fast lookup of target paths
1096+ targetPathSet := make (map [string ]bool )
1097+ for _ , path := range targetPaths {
1098+ targetPathSet [path ] = true
1099+ }
1100+
1101+ extractedFiles := map [string ]json.RawMessage {}
1102+ for {
1103+ header , err := archive .Next ()
1104+ if err != nil {
1105+ if err == io .EOF {
1106+ break
1107+ }
1108+ logger .WithFields (log.Fields {
1109+ "action" : "read tar header" ,
1110+ "error" : err ,
1111+ }).Error ("failed to read tar archive" )
1112+ return nil , nil //nolint:nilerr
1113+ }
1114+
1115+ // Check if this file matches any of our target paths
1116+ if ! targetPathSet [header .Name ] {
1117+ continue
1118+ }
1119+
1120+ // Check if the file has a supported extension or is explicitly requested
1121+ ext := path .Ext (header .Name )
1122+ supportedExt := false
1123+ for _ , e := range []string {".yaml" , ".yml" , ".json" } {
1124+ if strings .EqualFold (ext , e ) {
1125+ supportedExt = true
1126+ break
1127+ }
1128+ }
1129+
1130+ // If no supported extension, only process if file is explicitly in target paths
1131+ // This allows processing files without extensions that contain structured data
1132+ if ! supportedExt {
1133+ logger .WithField ("file" , header .Name ).Debug ("file has no supported extension, attempting to parse anyway since it was explicitly requested" )
1134+ }
1135+
1136+ // Read the file content
1137+ data , err := io .ReadAll (archive )
1138+ if err != nil {
1139+ logger .WithFields (log.Fields {
1140+ "action" : "read file content" ,
1141+ "file" : header .Name ,
1142+ "error" : err ,
1143+ }).Error ("failed to read file content" )
1144+ return nil , nil //nolint:nilerr
1145+ }
1146+
1147+ // Convert YAML to JSON if needed
1148+ data , err = yaml .YAMLToJSON (data )
1149+ if err != nil {
1150+ logger .WithFields (log.Fields {
1151+ "action" : "convert to json" ,
1152+ "file" : header .Name ,
1153+ "error" : err ,
1154+ }).Debug ("unable to read file as JSON or YAML, ignoring" )
1155+ continue
1156+ }
1157+
1158+ extractedFiles [header .Name ] = data
1159+ }
1160+
1161+ filesValue , err := ast .InterfaceToValue (extractedFiles )
1162+ if err != nil {
1163+ logger .WithFields (log.Fields {
1164+ "action" : "convert files" ,
1165+ "error" : err ,
1166+ }).Error ("failed to convert files object to value" )
1167+ return nil , nil //nolint:nilerr
1168+ }
1169+
1170+ logger .WithField ("file_count" , len (extractedFiles )).Debug ("Successfully extracted blob files" )
1171+ term := ast .NewTerm (filesValue )
1172+ cc .imageFilesCache .Store (cacheKey , term )
1173+ return term , nil
1174+ })
1175+
1176+ if err != nil || result == nil {
1177+ return nil , nil
1178+ }
1179+ return result .(* ast.Term ), nil
1180+ }
1181+
9531182func ociImageIndex (bctx rego.BuiltinContext , a * ast.Term ) (* ast.Term , error ) {
9541183 logger := log .WithField ("function" , ociImageIndexName )
9551184
@@ -1129,6 +1358,7 @@ func parseReference(uri string) (name.Reference, error) {
11291358
11301359func init () {
11311360 registerOCIBlob ()
1361+ registerOCIBlobFiles ()
11321362 registerOCIDescriptor ()
11331363 registerOCIImageFiles ()
11341364 registerOCIImageManifest ()
0 commit comments