Skip to content

Commit

Permalink
Merge pull request #405 from datacoves/DCV-1833-dbt-coves-generate-do…
Browse files Browse the repository at this point in the history
…cs-that-fixes-links-deferred-models-catalog-json

DCV-1833 new command: generate docs
  • Loading branch information
ssassi authored Oct 16, 2023
2 parents 021b4d1 + 9b56791 commit 9a9ac8f
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 1 deletion.
24 changes: 23 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ dbt-coves setup precommit
dbt-coves generate <resource>
```

Where _\<resource\>_ could be _sources_, _properties_ or _metadata_.
Where _\<resource\>_ could be _sources_, _properties_, _metadata_ or _docs_.

```console
dbt-coves generate sources
Expand Down Expand Up @@ -301,6 +301,24 @@ You can download a [sample csv file](sample_metadata.csv) as reference
| raw | raw | \_airbyte_raw_country_populations | \_airbyte_data | | variant | Airbyte data columns (VARIANT) in Snowflake |
| raw | raw | \_airbyte_raw_country_populations | \_airbyte_ab_id | | varchar | Airbyte unique identifier used during data load |

### Docs generation arguments

You can use dbt-coves to improve the standard dbt docs generation process. It generates your dbt docs, updates external links so they always open in a new tab. It also has the option to merge production `catalog.json` into the local environment when running in deferred mode, so you can run [dbt-checkpoint](https://github.com/dbt-checkpoint/dbt-checkpoint) hooks even when the model has not been run locally.

`dbt-coves generate docs` supports the following args:

```console
--merge-deferred
# Merge a deferred catalog.json into your generated one.
# Flag: no value required.
```

```
--state
# Directory where your production catalog.json is located
# Mandatory when using --merge-deferred
```

## Extract configuration from Airbyte

```console
Expand Down Expand Up @@ -501,6 +519,10 @@ generate:
- TABLE_2
destination: # Where metadata file will be generated, default: 'metadata.csv'
docs:
merge_deferred: true
state: logs/
extract:
airbyte:
path: /config/workspace/load/airbyte # Where json files will be generated
Expand Down
8 changes: 8 additions & 0 deletions dbt_coves/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,16 @@ class GenerateMetadataModel(BaseModel):
no_prompt: Optional[bool] = False


class GenerateDocsModel(BaseModel):
merge_deferred: Optional[bool] = False
state: Optional[str] = ""


class GenerateModel(BaseModel):
sources: Optional[GenerateSourcesModel] = GenerateSourcesModel()
properties: Optional[GeneratePropertiesModel] = GeneratePropertiesModel()
metadata: Optional[GenerateMetadataModel] = GenerateMetadataModel()
docs: Optional[GenerateDocsModel] = GenerateDocsModel()


class ExtractAirbyteModel(BaseModel):
Expand Down Expand Up @@ -165,6 +171,8 @@ class DbtCovesConfig:
"generate.metadata.exclude_relations",
"generate.metadata.destination",
"generate.metadata.no_prompt",
"generate.docs.merge_deferred",
"generate.docs.state",
"extract.airbyte.path",
"extract.airbyte.host",
"extract.airbyte.port",
Expand Down
164 changes: 164 additions & 0 deletions dbt_coves/tasks/generate/docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
from __future__ import nested_scopes

import json
from pathlib import Path
from typing import Any, Dict

from rich.console import Console

from dbt_coves.tasks.base import BaseConfiguredTask
from dbt_coves.tasks.setup.utils import print_row
from dbt_coves.utils.shell import run_and_capture_cwd
from dbt_coves.utils.tracking import trackable

console = Console()


class DbtCovesGenerateDocsException(Exception):
pass


class GenerateDocsTask(BaseConfiguredTask):
"""
Task that generates content on local catalog.json
"""

@classmethod
def register_parser(cls, sub_parsers, base_subparser):
subparser = sub_parsers.add_parser(
"docs",
parents=[base_subparser],
help="Merge models from a catalog.json into another one.",
)
subparser.add_argument(
"--merge-deferred",
action="store_true",
help="Flag to merge deferred models and sources into local catalog",
default=False,
)
subparser.add_argument(
"--state",
type=str,
help="Catalog.json to use as reference for merging",
)
cls.arg_parser = base_subparser
subparser.set_defaults(cls=cls, which="docs")
return subparser

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def get_config_value(self, key):
return self.coves_config.integrated["generate"]["docs"][key]

def _generate_dbt_docs(self):
output = run_and_capture_cwd(["dbt", "docs", "generate"], self.config.project_root)

if output.returncode == 0:
deps_status = "[green]SUCCESS :heavy_check_mark:[/green]"
else:
deps_status = "[red]FAIL :cross_mark:[/red]"
print_row(
"dbt docs generate",
deps_status,
new_section=True,
)
if output.returncode > 0:
raise Exception("dbt deps error. Check logs.")

def _fix_dbt_docs_links(self, docs_path: Path):
dbt_docs_index_path = Path(docs_path, "index.html")
with open(dbt_docs_index_path, "w+") as f:
html_content = f.read()
html_content.replace("</head>", "<base target='_blank'></head>")
f.write(html_content)
console.print(
"[green]:heavy_check_mark:[/green] dbt docs updated. "
"External links will now open in a new tab"
)

def _get_catalog_json(self, docs_folder: Path) -> Dict[str, Any]:
"""
Open json at docs_folder/catalog.json
"""
catalog_path = Path(docs_folder, "catalog.json")
try:
with open(catalog_path, "r") as f:
return json.load(f)
except FileNotFoundError:
raise DbtCovesGenerateDocsException(f"Catalog.json not found at {catalog_path}")

def _merge_catalogs(
self, local_catalog: Dict[str, Any], target_catalog: Dict[str, Any]
) -> Dict[str, Any]:
"""
Merge nodes and sources from the state catalog.json into the local docs.
"""
nodes_diff = set(target_catalog.get("nodes", {}).keys()) - set(
local_catalog.get("nodes", {}).keys()
)
n_nodes_merged = len(nodes_diff)

sources_diff = set(target_catalog.get("sources", {}).keys()) - set(
local_catalog.get("sources", {}).keys()
)
n_sources_merged = len(sources_diff)

local_catalog["nodes"].update(
(key, value)
for key, value in target_catalog.get("nodes", {}).items()
if key not in local_catalog.get("nodes", {})
)
local_catalog["sources"].update(
(key, value)
for key, value in target_catalog.get("sources", {}).items()
if key not in local_catalog.get("sources", {})
)

console.print(
f"Merged [green]{n_nodes_merged} nodes[/green] and [green]{n_sources_merged} sources[/green] into",
"your local catalog.json",
)

def _write_catalog_json(self, catalog: Dict[str, Any], docs_folder: Path):
"""
Write the catalog.json to the docs_folder
"""
catalog_path = Path(docs_folder, "catalog.json")
with open(catalog_path, "w") as f:
json.dump(catalog, f)

def _merge_dbt_catalogs(self, local_path: Path, state_path: Path):
"""
Merge the catalog.json from the stsate into the local docs.
"""
# Get the source and target catalogs
local_catalog = self._get_catalog_json(local_path)
target_catalog = self._get_catalog_json(state_path)

# Merge the catalogs
self._merge_catalogs(local_catalog, target_catalog)

# Write the merged catalog to the source
self._write_catalog_json(local_catalog, local_path)

@trackable
def run(self):
self._generate_dbt_docs()

local_docs_path = Path(self.config.project_root, "target")
self._fix_dbt_docs_links(local_docs_path)

merge_deferred = self.get_config_value("merge_deferred")
state_location = self.get_config_value("state")

if merge_deferred:
target_docs_path = Path(state_location, "catalog.json")
if not state_location or not target_docs_path.exists():
raise DbtCovesGenerateDocsException(
"A valid [red][i]--state[/i][/red] argument is required "
"when using [yellow]--merge-deferred[/yellow]"
)

self._merge_dbt_catalogs(local_docs_path, state_location)
return 0
2 changes: 2 additions & 0 deletions dbt_coves/tasks/generate/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from dbt_coves.tasks.base import BaseConfiguredTask

from .docs import GenerateDocsTask
from .metadata import GenerateMetadataTask
from .properties import GeneratePropertiesTask
from .sources import GenerateSourcesTask
Expand All @@ -23,6 +24,7 @@ class GenerateTask(BaseConfiguredTask):
GenerateSourcesTask,
GenerateMetadataTask,
GenerateTemplatesTask,
GenerateDocsTask,
]

@classmethod
Expand Down
11 changes: 11 additions & 0 deletions dbt_coves/utils/flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ def __init__(self, cli_parser: ArgumentParser) -> None:
"destination": None,
"no_prompt": False,
},
"docs": {
"merge_deferred": False,
"state": None,
},
}
self.extract = {
"airbyte": {
Expand Down Expand Up @@ -235,6 +239,13 @@ def parse_args(self, cli_args: List[str] = list()) -> None:
if self.args.no_prompt:
self.generate["metadata"]["no_prompt"] = True

# generate docs
if self.args.cls.__name__ == "GenerateDocsTask":
if self.args.merge_deferred:
self.generate["docs"]["merge_deferred"] = self.args.merge_deferred
if self.args.state:
self.generate["docs"]["state"] = self.args.state

# load airbyte
if self.args.cls.__name__ == "LoadAirbyteTask":
if self.args.path:
Expand Down

0 comments on commit 9a9ac8f

Please sign in to comment.