Skip to content

Commit 12b88a8

Browse files
simonbairdclaude
andcommitted
Add new rego function for blob files
The goal is to be able to access the task definitions inside a Tekton task bundle, so we can then apply the task related policy checks to them. Co-authored-by: Claude Code <[email protected]>
1 parent 3a2f6e8 commit 12b88a8

File tree

5 files changed

+470
-11
lines changed

5 files changed

+470
-11
lines changed
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
= ec.oci.blob_files
2+
3+
Fetch structured files (YAML or JSON) from within a blob tar archive.
4+
5+
== Usage
6+
7+
files = ec.oci.blob_files(ref: string, paths: array<string>)
8+
9+
== Parameters
10+
11+
* `ref` (`string`): OCI blob reference
12+
* `paths` (`array<string>`): the list of paths
13+
14+
== Return
15+
16+
`files` (`object`): object representing the extracted files
17+
18+
The object contains dynamic attributes.
19+
The attributes are of `string` type and represent the full path of the file within the blob.
20+
The values are of `any` type and hold the file contents.
21+

docs/modules/ROOT/pages/rego_builtins.adoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ information.
1010
|===
1111
|xref:ec_oci_blob.adoc[ec.oci.blob]
1212
|Fetch a blob from an OCI registry.
13+
|xref:ec_oci_blob_files.adoc[ec.oci.blob_files]
14+
|Fetch structured files (YAML or JSON) from within a blob tar archive.
1315
|xref:ec_oci_descriptor.adoc[ec.oci.descriptor]
1416
|Fetch a raw Image from an OCI registry.
1517
|xref:ec_oci_image_files.adoc[ec.oci.image_files]

docs/modules/ROOT/partials/rego_nav.adoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
* xref:rego_builtins.adoc[Rego Reference]
22
** xref:ec_oci_blob.adoc[ec.oci.blob]
3+
** xref:ec_oci_blob_files.adoc[ec.oci.blob_files]
34
** xref:ec_oci_descriptor.adoc[ec.oci.descriptor]
45
** xref:ec_oci_image_files.adoc[ec.oci.image_files]
56
** xref:ec_oci_image_index.adoc[ec.oci.image_index]

internal/rego/oci/oci.go

Lines changed: 241 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,17 @@
2121
package oci
2222

2323
import (
24+
"archive/tar"
2425
"bytes"
2526
"context"
2627
"crypto/sha256"
2728
"encoding/json"
2829
"errors"
2930
"fmt"
3031
"io"
32+
"path"
3133
"runtime"
34+
"strings"
3235
"sync"
3336
"sync/atomic"
3437

@@ -42,13 +45,15 @@ import (
4245
"golang.org/x/sync/errgroup"
4346
"golang.org/x/sync/singleflight"
4447
"k8s.io/client-go/util/retry"
48+
"sigs.k8s.io/yaml"
4549

4650
"github.com/conforma/cli/internal/fetchers/oci/files"
4751
"github.com/conforma/cli/internal/utils/oci"
4852
)
4953

5054
const (
5155
ociBlobName = "ec.oci.blob"
56+
ociBlobFilesName = "ec.oci.blob_files"
5257
ociDescriptorName = "ec.oci.descriptor"
5358
ociImageManifestName = "ec.oci.image_manifest"
5459
ociImageManifestsBatchName = "ec.oci.image_manifests"
@@ -320,6 +325,35 @@ func registerOCIImageFiles() {
320325
rego.RegisterBuiltin2(&decl, ociImageFiles)
321326
}
322327

328+
func registerOCIBlobFiles() {
329+
filesObject := types.NewObject(
330+
nil,
331+
types.NewDynamicProperty(
332+
types.Named("path", types.S).Description("the full path of the file within the blob"),
333+
types.Named("content", types.A).Description("the file contents"),
334+
),
335+
)
336+
337+
decl := rego.Function{
338+
Name: ociBlobFilesName,
339+
Description: "Fetch structured files (YAML or JSON) from within a blob tar archive.",
340+
Decl: types.NewFunction(
341+
types.Args(
342+
types.Named("ref", types.S).Description("OCI blob reference"),
343+
types.Named("paths", types.NewArray([]types.Type{types.S}, nil)).Description("the list of paths"),
344+
),
345+
types.Named("files", filesObject).Description("object representing the extracted files"),
346+
),
347+
// As per the documentation, enable memoization to ensure function evaluation is
348+
// deterministic. But also mark it as non-deterministic because it does rely on external
349+
// entities, i.e. OCI registry. https://www.openpolicyagent.org/docs/latest/extensions/
350+
Memoize: true,
351+
Nondeterministic: true,
352+
}
353+
354+
rego.RegisterBuiltin2(&decl, ociBlobFiles)
355+
}
356+
323357
func registerOCIImageIndex() {
324358
platform := types.NewObject(
325359
[]*types.StaticProperty{
@@ -381,6 +415,10 @@ func registerOCIImageIndex() {
381415
}
382416

383417
func ociBlob(bctx rego.BuiltinContext, a *ast.Term) (*ast.Term, error) {
418+
return ociBlobInternal(bctx, a, true)
419+
}
420+
421+
func ociBlobInternal(bctx rego.BuiltinContext, a *ast.Term, verifyDigest bool) (*ast.Term, error) {
384422
logger := log.WithField("function", ociBlobName)
385423

386424
uri, ok := a.Value.(ast.String)
@@ -454,21 +492,32 @@ func ociBlob(bctx rego.BuiltinContext, a *ast.Term) (*ast.Term, error) {
454492
return nil, nil //nolint:nilerr
455493
}
456494

457-
sum := fmt.Sprintf("sha256:%x", hasher.Sum(nil))
458-
// io.LimitReader truncates the layer if it exceeds its limit. The condition below catches this
459-
// scenario in order to avoid unexpected behavior caused by partial data being returned.
460-
if sum != ref.DigestStr() {
461-
logger.WithFields(log.Fields{
462-
"action": "verify digest",
463-
"computed_digest": sum,
464-
"expected_digest": ref.DigestStr(),
465-
}).Error("computed digest does not match expected digest")
466-
return nil, nil
495+
// io.LimitReader truncates the layer if it exceeds its limit. The
496+
// condition below catches this scenario in order to avoid unexpected
497+
// behavior caused by partial data being returned.
498+
//
499+
// For ociBlobFiles, we skip the digest verification because there's a
500+
// good chance we'd be calculating the digest of the uncompressed layer
501+
// data which would not match. It might be possible to calculate the
502+
// checksum on the layer data before it is uncompressed, but I think
503+
// that's not as easy as it sounds, since it may require another
504+
// io.Copy which could be inefficient. For now let's just skip it.
505+
//
506+
if verifyDigest {
507+
sum := fmt.Sprintf("sha256:%x", hasher.Sum(nil))
508+
if sum != ref.DigestStr() {
509+
logger.WithFields(log.Fields{
510+
"action": "verify digest",
511+
"computed_digest": sum,
512+
"expected_digest": ref.DigestStr(),
513+
}).Error("computed digest does not match expected digest")
514+
return nil, nil
515+
}
467516
}
468517

469518
logger.WithFields(log.Fields{
470519
"action": "complete",
471-
"digest": sum,
520+
"digest": ref.DigestStr(),
472521
}).Debug("Successfully retrieved blob")
473522

474523
term := ast.StringTerm(blob.String())
@@ -950,6 +999,186 @@ func ociImageFiles(bctx rego.BuiltinContext, refTerm *ast.Term, pathsTerm *ast.T
950999
return result.(*ast.Term), nil
9511000
}
9521001

1002+
func ociBlobFiles(bctx rego.BuiltinContext, refTerm *ast.Term, pathsTerm *ast.Term) (*ast.Term, error) {
1003+
logger := log.WithField("function", ociBlobFilesName)
1004+
1005+
uri, ok := refTerm.Value.(ast.String)
1006+
if !ok {
1007+
logger.Error("input ref is not a string")
1008+
return nil, nil
1009+
}
1010+
refStr := string(uri)
1011+
logger = logger.WithField("ref", refStr)
1012+
1013+
if pathsTerm == nil {
1014+
logger.Error("paths term is nil")
1015+
return nil, nil
1016+
}
1017+
1018+
// Build cache key from ref + paths (hash the paths for a stable key)
1019+
pathsHash := fmt.Sprintf("%x", sha256.Sum256([]byte(pathsTerm.String())))[:12]
1020+
cacheKey := refStr + ":" + pathsHash
1021+
1022+
// Use component-scoped cache if available, otherwise fall back to global.
1023+
// Blob files data can be substantial and is unique per component.
1024+
cc := componentCacheFromContext(bctx.Context)
1025+
1026+
// Check cache first (fast path)
1027+
if cached, found := cc.imageFilesCache.Load(cacheKey); found {
1028+
logger.Debug("Blob files served from cache")
1029+
return cached.(*ast.Term), nil
1030+
}
1031+
1032+
// Use singleflight to prevent thundering herd
1033+
result, err, _ := cc.imageFilesFlight.Do(cacheKey, func() (any, error) {
1034+
// Double-check cache inside singleflight
1035+
if cached, found := cc.imageFilesCache.Load(cacheKey); found {
1036+
logger.Debug("Blob files served from cache (after singleflight)")
1037+
return cached, nil
1038+
}
1039+
logger.Debug("Starting blob files extraction")
1040+
1041+
// Get the blob content first (skip digest verification due to compressed/uncompressed mismatch)
1042+
blobTerm, err := ociBlobInternal(bctx, refTerm, false)
1043+
if err != nil || blobTerm == nil {
1044+
logger.WithFields(log.Fields{
1045+
"action": "fetch blob",
1046+
"error": err,
1047+
}).Error("failed to fetch blob content")
1048+
return nil, nil //nolint:nilerr
1049+
}
1050+
1051+
blobContent, ok := blobTerm.Value.(ast.String)
1052+
if !ok {
1053+
logger.Error("blob content is not a string")
1054+
return nil, nil
1055+
}
1056+
1057+
pathsArray, err := builtins.ArrayOperand(pathsTerm.Value, 1)
1058+
if err != nil {
1059+
logger.WithFields(log.Fields{
1060+
"action": "convert paths",
1061+
"error": err,
1062+
}).Error("failed to convert paths to array operand")
1063+
return nil, nil //nolint:nilerr
1064+
}
1065+
1066+
// Collect target paths for exact file matching
1067+
var targetPaths []string
1068+
err = pathsArray.Iter(func(pathTerm *ast.Term) error {
1069+
pathString, ok := pathTerm.Value.(ast.String)
1070+
if !ok {
1071+
return fmt.Errorf("path is not a string: %#v", pathTerm)
1072+
}
1073+
targetPaths = append(targetPaths, string(pathString))
1074+
return nil
1075+
})
1076+
if err != nil {
1077+
logger.WithFields(log.Fields{
1078+
"action": "iterate paths",
1079+
"error": err,
1080+
}).Error("failed iterating paths")
1081+
return nil, nil //nolint:nilerr
1082+
}
1083+
1084+
if len(targetPaths) == 0 {
1085+
logger.Debug("No paths specified, returning empty result")
1086+
term := ast.NewTerm(ast.NewObject())
1087+
cc.imageFilesCache.Store(cacheKey, term)
1088+
return term, nil
1089+
}
1090+
1091+
// Create a tar reader from the blob content
1092+
blobReader := strings.NewReader(string(blobContent))
1093+
archive := tar.NewReader(blobReader)
1094+
1095+
// Create a set for fast lookup of target paths
1096+
targetPathSet := make(map[string]bool)
1097+
for _, path := range targetPaths {
1098+
targetPathSet[path] = true
1099+
}
1100+
1101+
extractedFiles := map[string]json.RawMessage{}
1102+
for {
1103+
header, err := archive.Next()
1104+
if err != nil {
1105+
if err == io.EOF {
1106+
break
1107+
}
1108+
logger.WithFields(log.Fields{
1109+
"action": "read tar header",
1110+
"error": err,
1111+
}).Error("failed to read tar archive")
1112+
return nil, nil //nolint:nilerr
1113+
}
1114+
1115+
// Check if this file matches any of our target paths
1116+
if !targetPathSet[header.Name] {
1117+
continue
1118+
}
1119+
1120+
// Check if the file has a supported extension or is explicitly requested
1121+
ext := path.Ext(header.Name)
1122+
supportedExt := false
1123+
for _, e := range []string{".yaml", ".yml", ".json"} {
1124+
if strings.EqualFold(ext, e) {
1125+
supportedExt = true
1126+
break
1127+
}
1128+
}
1129+
1130+
// If no supported extension, only process if file is explicitly in target paths
1131+
// This allows processing files without extensions that contain structured data
1132+
if !supportedExt {
1133+
logger.WithField("file", header.Name).Debug("file has no supported extension, attempting to parse anyway since it was explicitly requested")
1134+
}
1135+
1136+
// Read the file content
1137+
data, err := io.ReadAll(archive)
1138+
if err != nil {
1139+
logger.WithFields(log.Fields{
1140+
"action": "read file content",
1141+
"file": header.Name,
1142+
"error": err,
1143+
}).Error("failed to read file content")
1144+
return nil, nil //nolint:nilerr
1145+
}
1146+
1147+
// Convert YAML to JSON if needed
1148+
data, err = yaml.YAMLToJSON(data)
1149+
if err != nil {
1150+
logger.WithFields(log.Fields{
1151+
"action": "convert to json",
1152+
"file": header.Name,
1153+
"error": err,
1154+
}).Debug("unable to read file as JSON or YAML, ignoring")
1155+
continue
1156+
}
1157+
1158+
extractedFiles[header.Name] = data
1159+
}
1160+
1161+
filesValue, err := ast.InterfaceToValue(extractedFiles)
1162+
if err != nil {
1163+
logger.WithFields(log.Fields{
1164+
"action": "convert files",
1165+
"error": err,
1166+
}).Error("failed to convert files object to value")
1167+
return nil, nil //nolint:nilerr
1168+
}
1169+
1170+
logger.WithField("file_count", len(extractedFiles)).Debug("Successfully extracted blob files")
1171+
term := ast.NewTerm(filesValue)
1172+
cc.imageFilesCache.Store(cacheKey, term)
1173+
return term, nil
1174+
})
1175+
1176+
if err != nil || result == nil {
1177+
return nil, nil
1178+
}
1179+
return result.(*ast.Term), nil
1180+
}
1181+
9531182
func ociImageIndex(bctx rego.BuiltinContext, a *ast.Term) (*ast.Term, error) {
9541183
logger := log.WithField("function", ociImageIndexName)
9551184

@@ -1129,6 +1358,7 @@ func parseReference(uri string) (name.Reference, error) {
11291358

11301359
func init() {
11311360
registerOCIBlob()
1361+
registerOCIBlobFiles()
11321362
registerOCIDescriptor()
11331363
registerOCIImageFiles()
11341364
registerOCIImageManifest()

0 commit comments

Comments
 (0)