Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ result = src.Set(config) # truthy on success; result.paths has run artifacts
get_result = src.Get(config)
for provider in get_result.auth_providers:
...
for code_host in get_result.code_hosts:
for code_host_connection in get_result.code_host_connections:
...

# Mapping rules can be passed in memory instead of a maps YAML file -
Expand Down Expand Up @@ -225,7 +225,7 @@ snapshots that make `--apply` reversible.
```

- Queries the Sourcegraph instance for auth providers and code host connections
- Writes generated reference files `auth-providers.yaml` and `code-hosts.yaml` under
- Writes generated reference files `auth-providers.yaml` and `code-host-connections.yaml` under
`src-auth-perms-sync-runs/<src_endpoint>/`
- Creates an empty `maps.yaml` if it doesn't exist

Expand Down Expand Up @@ -327,7 +327,7 @@ Run `src-auth-perms-sync --help` for options
```text
src-auth-perms-sync-runs/<src_endpoint>/
|-- auth-providers.yaml
|-- code-hosts.yaml
|-- code-host-connections.yaml
|-- maps.yaml
`-- runs
`-- timestamp-command
Expand All @@ -341,7 +341,7 @@ src-auth-perms-sync-runs/<src_endpoint>/
- The `src-auth-perms-sync-runs` dir is created under your current working directory
- The `<src_endpoint>` dir is created with the hostname from `SRC_ENDPOINT`
- If `maps.yaml` doesn't exist already, it'll be created for you
- `auth-providers.yaml` and `code-hosts.yaml` are created / replaced by the `get` command,
- `auth-providers.yaml` and `code-host-connections.yaml` are created / replaced by the `get` command,
for you to copy values from, to use in your `maps.yaml`
- Only one `maps.yaml` file can be used at a time per Sourcegraph instance, as each `set --apply`
command resets the state on the Sourcegraph instance to the `maps.yaml` file which was used
Expand Down
2 changes: 1 addition & 1 deletion dev/engineering-requests.md
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ joined to `users` and `repo` for names.

### `GetPermissionSyncDiscovery`

The `get` command writes `code-hosts.yaml` and `auth-providers.yaml`, then uses
The `get` command writes `code-host-connections.yaml` and `auth-providers.yaml`, then uses
the same discovery data for mapping. A single discovery endpoint would simplify
this and avoid multiple round trips.

Expand Down
6 changes: 3 additions & 3 deletions examples/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
# Discover the instance's auth providers and code hosts
discovery = src.Get(config)
for auth_provider in discovery.auth_providers:
print("auth provider:", auth_provider.get("displayName"))
for code_host in discovery.code_hosts:
print("code host:", code_host.get("displayName"))
print("auth provider:", auth_provider["authProvider"].get("displayName"))
for code_host_connection in discovery.code_host_connections:
print("code host:", code_host_connection["codeHostConnection"].get("displayName"))

# Configure your mapping rules
mapping_rules: list[src.MappingRule] = [
Expand Down
2 changes: 1 addition & 1 deletion examples/maps.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# User -> Repo permission mapping rules

# Maintain your maps.yaml file, using the values from auth-providers.yaml and code-hosts.yaml,
# Maintain your maps.yaml file, using the values from auth-providers.yaml and code-host-connections.yaml,
# which are created by the --get command, under `src-auth-perms-sync-runs/<endpoint>/`

# Schema details:
Expand Down
12 changes: 7 additions & 5 deletions src/src_auth_perms_sync/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1036,15 +1036,17 @@ def __bool__(self) -> bool:
class GetResult:
"""Outcome of one discovery run, carrying the discovered data in memory.

`auth_providers` and `code_hosts` hold the same dicts written to
`auth-providers.yaml` and `code-hosts.yaml`, so module callers can
assemble mapping rules without re-parsing files.
`auth_providers` and `code_host_connections` hold the same dicts written to
`auth-providers.yaml` and `code-host-connections.yaml`, so module callers can
assemble mapping rules without re-parsing files. Each entry has a
copy-paste-ready selector block (`authProvider` / `codeHostConnection`)
plus an `info` block of read-only, non-matchable context.
"""

succeeded: bool
paths: backups.RunPaths | None = None
auth_providers: tuple[dict[str, Any], ...] = ()
code_hosts: tuple[dict[str, Any], ...] = ()
code_host_connections: tuple[dict[str, Any], ...] = ()
maps_created: bool = False

def __bool__(self) -> bool:
Expand All @@ -1060,7 +1062,7 @@ def Get(config: Config, *, event_sink: src.EventSink | None = None) -> GetResult
succeeded=True,
paths=run_paths,
auth_providers=tuple(command_data.auth_provider_views or ()),
code_hosts=tuple(command_data.code_host_views or ()),
code_host_connections=tuple(command_data.code_host_connection_views or ()),
maps_created=command_data.maps_created,
)

Expand Down
21 changes: 15 additions & 6 deletions src/src_auth_perms_sync/permissions/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def cmd_get(
) -> run_context.CommandData:
"""Refresh the generated discovery YAML files.

`run_paths.code_hosts_path` receives Sourcegraph code host connection
`run_paths.code_host_connections_path` receives Sourcegraph code host connection
configs, `run_paths.auth_providers_path` receives auth provider configs,
and `run_paths.maps_path` names the maps file recorded in the snapshot.

Expand Down Expand Up @@ -320,12 +320,21 @@ def cmd_get(
]

if run_paths.write_files:
permissions_maps.dump_code_hosts_yaml(run_paths.code_hosts_path, services)
permissions_maps.dump_code_host_connections_yaml(
run_paths.code_host_connections_path, services
)
permissions_maps.dump_auth_providers_yaml(run_paths.auth_providers_path, providers)
log.info("Wrote %s and %s", run_paths.code_hosts_path, run_paths.auth_providers_path)
log.info(
"Wrote %s and %s",
run_paths.code_host_connections_path,
run_paths.auth_providers_path,
)
else:
log.info("Skipping code-hosts.yaml and auth-providers.yaml because --no-files is set.")
cmd_event["code_hosts_path"] = str(run_paths.code_hosts_path)
log.info(
"Skipping code-host-connections.yaml and auth-providers.yaml "
"because --no-files is set."
)
cmd_event["code_host_connections_path"] = str(run_paths.code_host_connections_path)
cmd_event["auth_providers_path"] = str(run_paths.auth_providers_path)
cmd_event["maps_path"] = str(run_paths.maps_path)

Expand Down Expand Up @@ -390,7 +399,7 @@ def cmd_get(
auth_providers=raw_providers,
saml_group_users=saml_group_users,
auth_provider_views=providers,
code_host_views=services,
code_host_connection_views=services,
)


Expand Down
151 changes: 85 additions & 66 deletions src/src_auth_perms_sync/permissions/maps.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,62 +46,67 @@ def auth_provider_to_yaml(
) -> dict[str, Any]:
"""Render an auth provider for the YAML config.

Keys mirror the Sourcegraph site-config schema (`type`, `clientID`,
`displayName`, `configID`). `serviceID` has no direct site-config field
so we use the GraphQL name. `isBuiltin` is dropped (redundant with
`type == "builtin"`). `userCount` is our addition.
Returns a two-block entry:

- `authProvider:` holds exactly the fields a `maps.yaml` rule can
match under `users.authProvider`, so the block is copy-paste ready.
Keys mirror the Sourcegraph site-config schema (`type`, `clientID`,
`displayName`, `configID`); `serviceID` has no direct site-config
field so we use the GraphQL name. `isBuiltin` is dropped (redundant
with `type == "builtin"`).
- `info:` holds read-only context that mapping rules cannot match on
(`userCount`, `samlGroupUserCounts`, and any non-secret
site-config extras).

`site_config_entry`, when provided, is the matching `auth.providers[*]`
JSONC entry (already stripped of redacted/secret fields by
`src/src_auth_perms_sync/shared/site_config.py`). Any
fields it carries that aren't already emitted from GraphQL are
surfaced verbatim, so operators see the full provider config in the
YAML - e.g. `identityProviderMetadataURL`, `serviceProviderIssuer`,
`requireEmailDomain`, `allowSignup`. Order: GraphQL-derived identity
keys first, then site-config extras, then observation-derived metadata.
`src/src_auth_perms_sync/shared/site_config.py`). Any fields it carries
that aren't already matchable go under `info:`, so operators still see
the full provider config - e.g. `identityProviderMetadataURL`,
`serviceProviderIssuer`, `requireEmailDomain`, `allowSignup`.

For SAML providers, `saml_group_user_counts` (group name -> distinct
user count) is ALWAYS surfaced under `samlGroupUserCounts:`, even
when the mapping is empty. The empty case (`{}`) tells the operator
the feature is supported but the IdP didn't release any
user count) is ALWAYS surfaced under `info.samlGroupUserCounts:`, even
when the mapping is empty. Its KEYS are the valid values for an
`authProvider.samlGroup` mapping rule. The empty case (`{}`) tells the
operator the feature is supported but the IdP didn't release any
`groupsAttributeName` (default `groups`) claim in this provider's
assertions - typically because the IdP hasn't been configured to do
so. Operators authoring `authProvider.samlGroup` mapping rules can use this
field to size groups before writing rules, or to learn that they
need to fix their IdP first. Pass `None` (the default for non-SAML
providers) to omit the field entirely.
so. Pass `None` (the default for non-SAML providers) to omit the field.

Empty-string fields are omitted - the builtin provider has no
serviceID / clientID / configID, so those keys would just be noise.
"""
rendered: dict[str, Any] = {"type": provider["serviceType"]}
selector: dict[str, Any] = {"type": provider["serviceType"]}
if provider["serviceID"]:
rendered["serviceID"] = provider["serviceID"]
selector["serviceID"] = provider["serviceID"]
if provider["clientID"]:
rendered["clientID"] = provider["clientID"]
rendered["displayName"] = provider["displayName"]
selector["clientID"] = provider["clientID"]
selector["displayName"] = provider["displayName"]
if provider["configID"]:
rendered["configID"] = provider["configID"]
selector["configID"] = provider["configID"]

info: dict[str, Any] = {}
if site_config_entry is not None:
# Merge in every non-secret site-config field that isn't already
# represented by a GraphQL-derived key above. The GraphQL value
# wins on overlaps (`type`, `displayName`, `clientID`, `configID`)
# since it's the resolved view the server actually uses.
# Surface every non-secret site-config field that isn't already a
# matchable selector key. The GraphQL value wins on overlaps
# (`type`, `displayName`, `clientID`, `configID`) since it's the
# resolved view the server actually uses.
for field_name, value in site_config_entry.items():
if field_name in rendered:
if field_name in selector:
continue
rendered[field_name] = value
rendered["userCount"] = user_count
info[field_name] = value
info["userCount"] = user_count
if saml_group_user_counts is not None:
# Sort by descending count, then group name, so the largest groups
# surface first when an operator skims the file.
rendered["samlGroupUserCounts"] = dict(
info["samlGroupUserCounts"] = dict(
sorted(
saml_group_user_counts.items(),
key=lambda item: (-item[1], item[0]),
)
)
return rendered
return {"authProvider": selector, "info": info}


BUILTIN_PROVIDER_KEY: tuple[str, str, str] = ("builtin", "", "")
Expand Down Expand Up @@ -131,26 +136,45 @@ def count_users_per_provider(


def external_service_to_yaml(service: permission_types.ExternalService) -> dict[str, Any]:
"""Render a code host for the YAML config.
"""Render a code host connection for the YAML config.

Returns a two-block entry:

Keys mirror the human-readable Sourcegraph GraphQL `ExternalService`
fields that maps can match. The opaque GraphQL `id` is omitted;
maps should identify code host connections with `kind`, `displayName`,
`url`, and/or `username`.
- `codeHostConnection:` holds exactly the fields a `maps.yaml` rule
can match under `repos.codeHostConnection` (`kind`, `displayName`,
`url`, and `username` when present), so the block is copy-paste
ready. The opaque GraphQL `id` is omitted.
- `info:` holds read-only context that mapping rules cannot match on
(repo count, timestamps, sync state, creator, etc.).

The JSONC `config` blob is parsed only to lift its top-level
`username` into the read-only discovery YAML. The rest of `config`
is intentionally omitted because maps no longer support matching
code-host connections by arbitrary config subtrees.
`username` into the matchable block. The rest of `config` is
intentionally omitted because maps no longer support matching
code host connections by arbitrary config subtrees.

Optional / nullable fields are omitted when null/empty so the YAML
Optional / nullable info fields are omitted when null/empty so the YAML
stays readable. Booleans are always emitted (true or false) so the
discovered state is explicit.
"""
rendered: dict[str, Any] = {
selector: dict[str, Any] = {
"kind": service["kind"],
"displayName": service["displayName"],
"url": service["url"],
}
raw_config = service.get("config")
if raw_config:
try:
parsed_config = json5.loads(raw_config)
except ValueError:
pass
else:
if isinstance(parsed_config, dict):
config_values = cast(dict[str, Any], parsed_config)
username = config_values.get("username")
if isinstance(username, str) and username:
selector["username"] = username

info: dict[str, Any] = {
"repoCount": service["repoCount"],
"createdAt": service["createdAt"],
"updatedAt": service["updatedAt"],
Expand All @@ -160,53 +184,48 @@ def external_service_to_yaml(service: permission_types.ExternalService) -> dict[
"supportsRepoExclusion": bool(service.get("supportsRepoExclusion")),
}
if service.get("lastSyncAt"):
rendered["lastSyncAt"] = service["lastSyncAt"]
info["lastSyncAt"] = service["lastSyncAt"]
if service.get("nextSyncAt"):
rendered["nextSyncAt"] = service["nextSyncAt"]
info["nextSyncAt"] = service["nextSyncAt"]
if service.get("lastSyncError"):
rendered["lastSyncError"] = service["lastSyncError"]
info["lastSyncError"] = service["lastSyncError"]
if service.get("warning"):
rendered["warning"] = service["warning"]
info["warning"] = service["warning"]
creator = service.get("creator")
if creator and creator.get("username"):
rendered["creator"] = creator["username"]
info["creator"] = creator["username"]
last_updater = service.get("lastUpdater")
if last_updater and last_updater.get("username"):
rendered["lastUpdater"] = last_updater["username"]
raw_config = service.get("config")
if raw_config:
try:
parsed_config = json5.loads(raw_config)
except ValueError:
pass
else:
if isinstance(parsed_config, dict):
config_values = cast(dict[str, Any], parsed_config)
username = config_values.get("username")
if isinstance(username, str) and username:
rendered["username"] = username
return rendered
info["lastUpdater"] = last_updater["username"]
return {"codeHostConnection": selector, "info": info}


def dump_auth_providers_yaml(path: Path, providers: list[dict[str, Any]]) -> None:
header = (
"# Sourcegraph auth provider configs.\n"
"# Generated/refreshed by: src-auth-perms-sync get\n"
"# Use these values when writing maps.yaml rules under `users.authProvider`.\n"
"# Each entry's `authProvider:` block is copy-paste ready: drop it under\n"
"# `users.authProvider` in a maps.yaml rule to match those users.\n"
"# `info:` is read-only context that rules cannot match on. The KEYS of\n"
"# `info.samlGroupUserCounts` are the valid `authProvider.samlGroup` values.\n"
"# This file is read-only reference data; edit maps.yaml, not this file.\n"
)
_dump_readonly_discovery_yaml(path, header, "authProviders", providers)


def dump_code_hosts_yaml(path: Path, code_hosts: list[dict[str, Any]]) -> None:
def dump_code_host_connections_yaml(
path: Path, code_host_connections: list[dict[str, Any]]
) -> None:
header = (
"# Sourcegraph code host connection configs.\n"
"# Generated/refreshed by: src-auth-perms-sync get\n"
"# Use these values when writing maps.yaml rules under `repos.codeHostConnection`.\n"
"# ExternalService.config.username is surfaced as top-level `username` when present.\n"
"# Each entry's `codeHostConnection:` block is copy-paste ready: drop it under\n"
"# `repos.codeHostConnection` in a maps.yaml rule to match those repos.\n"
"# `username` is lifted from ExternalService.config when present.\n"
"# `info:` is read-only context that rules cannot match on.\n"
"# This file is read-only reference data; edit maps.yaml, not this file.\n"
)
_dump_readonly_discovery_yaml(path, header, "codeHostConnections", code_hosts)
_dump_readonly_discovery_yaml(path, header, "codeHostConnections", code_host_connections)


def _dump_readonly_discovery_yaml(
Expand Down Expand Up @@ -244,7 +263,7 @@ def create_maps_yaml_if_missing(path: Path) -> bool:
content = (
"# Auth provider -> code host connection mapping rules\n"
"# Maintain this file, using values from auth-providers.yaml "
"and code-hosts.yaml as references\n"
"and code-host-connections.yaml as references\n"
"\n"
"maps:\n"
"\n"
Expand Down
Loading