Skip to content
12 changes: 12 additions & 0 deletions metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ class Constant:
DATA_TYPE = "dataType"
DASHBOARD = "dashboard"
DASHBOARDS = "dashboards"
TILES = "tiles"
DASHBOARD_KEY = "dashboardKey"
DESCRIPTION = "description"
OWNERSHIP = "ownership"
Expand Down Expand Up @@ -106,6 +107,7 @@ class Constant:
DASHBOARD_USER_ACCESS_RIGHT = "dashboardUserAccessRight"
GROUP_USER_ACCESS_RIGHT = "groupUserAccessRight"
GRAPH_ID = "graphId"
USERS = "users"
WORKSPACES = "workspaces"
TITLE = "title"
EMBED_URL = "embedUrl"
Expand Down Expand Up @@ -571,6 +573,16 @@ class PowerBiDashboardSourceConfig(
description="Retrieve metadata using PowerBI Admin API only. If this is enabled, then Report Pages will not "
"be extracted. Admin API access is required if this setting is enabled",
)
use_scan_result_only: bool = pydantic.Field(
default=False,
description="When enabled, builds reports, dashboards, tiles, datasets, and their ownership "
"directly from the workspace scan result instead of making separate API calls per entity. "
"This significantly reduces API call volume and avoids Power BI rate limiting for large "
"organizations. Requires admin API access (the scan always uses admin endpoints). "
"Note: report pages and dataset parameters are not available in scan results, "
"and tile titles may be missing for some workspaces. "
"Recommended for use with admin_apis_only=true.",
)
# Extract independent datasets
extract_independent_datasets: bool = pydantic.Field(
default=False,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,7 @@ def generate_container_for_workspace(
container_key=self.workspace_key,
name=workspace.name,
sub_types=[workspace.type],
external_url=workspace.webUrl,
extra_properties={
"workspace_id": workspace.id,
"workspace_name": workspace.name,
Expand Down Expand Up @@ -1863,10 +1864,13 @@ def get_workspace_workunit(

yield from auto_workunit(self.emit_app(workspace=workspace))

dashboards_from_scan = self.source_config.use_scan_result_only

for dashboard in workspace.dashboards.values():
try:
# Fetch PowerBi users for dashboards
dashboard.users = self.powerbi_client.get_dashboard_users(dashboard)
# Skip user API call if users were already parsed from scan result
if not dashboards_from_scan:
dashboard.users = self.powerbi_client.get_dashboard_users(dashboard)
# Increase dashboard and tiles count in report
self.reporter.report_dashboards_scanned()
self.reporter.report_charts_scanned(count=len(dashboard.tiles))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class Workspace:
id: str
name: str
type: str # This is used as a subtype of the Container entity.
webUrl: str # External URL for the workspace
dashboards: Dict[str, "Dashboard"] # key = dashboard id
reports: Dict[str, "Report"] # key = report id
datasets: Dict[str, "PowerBIDataset"] # key = dataset id
Expand Down Expand Up @@ -302,7 +303,7 @@ class Report:
name: str
type: ReportType
webUrl: Optional[str]
embedUrl: str
embedUrl: Optional[str]
description: str
dataset_id: Optional[str] # dataset_id is coming from REST API response
dataset: Optional[
Expand Down Expand Up @@ -330,7 +331,7 @@ class CreatedFrom(Enum):

id: str
title: str
embedUrl: str
embedUrl: Optional[str]
dataset_id: Optional[str]
report_id: Optional[str]
createdFrom: CreatedFrom
Expand Down
Loading
Loading