Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DCV-1833 new command: generate docs #405

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ dbt-coves setup precommit
dbt-coves generate <resource>
```

Where _\<resource\>_ could be _sources_, _properties_ or _metadata_.
Where _\<resource\>_ could be _sources_, _properties_, _metadata_ or _docs_.

```console
dbt-coves generate sources
Expand Down Expand Up @@ -301,6 +301,24 @@ You can download a [sample csv file](sample_metadata.csv) as reference
| raw | raw | \_airbyte_raw_country_populations | \_airbyte_data | | variant | Airbyte data columns (VARIANT) in Snowflake |
| raw | raw | \_airbyte_raw_country_populations | \_airbyte_ab_id | | varchar | Airbyte unique identifier used during data load |

### Docs generation arguments

You can use dbt-coves to improve the standard dbt docs generation process. It generates your dbt docs, updates external links so they always open in a new tab. It also has the option to merge production `catalog.json` into the local environment when running in deferred mode, so you can run [dbt-checkpoint](https://github.com/dbt-checkpoint/dbt-checkpoint) hooks even when the model has not been run locally.

`dbt-coves generate docs` supports the following args:

```console
--merge-deferred
# Merge a deferred catalog.json into your generated one.
# Flag: no value required.
```

```
--state
# Directory where your production catalog.json is located
# Mandatory when using --merge-deferred
```

## Extract configuration from Airbyte

```console
Expand Down Expand Up @@ -501,6 +519,10 @@ generate:
- TABLE_2
destination: # Where metadata file will be generated, default: 'metadata.csv'

docs:
merge_deferred: true
state: logs/

extract:
airbyte:
path: /config/workspace/load/airbyte # Where json files will be generated
Expand Down
8 changes: 8 additions & 0 deletions dbt_coves/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,16 @@ class GenerateMetadataModel(BaseModel):
no_prompt: Optional[bool] = False


class GenerateDocsModel(BaseModel):
merge_deferred: Optional[bool] = False
state: Optional[str] = ""


class GenerateModel(BaseModel):
sources: Optional[GenerateSourcesModel] = GenerateSourcesModel()
properties: Optional[GeneratePropertiesModel] = GeneratePropertiesModel()
metadata: Optional[GenerateMetadataModel] = GenerateMetadataModel()
docs: Optional[GenerateDocsModel] = GenerateDocsModel()


class ExtractAirbyteModel(BaseModel):
Expand Down Expand Up @@ -165,6 +171,8 @@ class DbtCovesConfig:
"generate.metadata.exclude_relations",
"generate.metadata.destination",
"generate.metadata.no_prompt",
"generate.docs.merge_deferred",
"generate.docs.state",
"extract.airbyte.path",
"extract.airbyte.host",
"extract.airbyte.port",
Expand Down
164 changes: 164 additions & 0 deletions dbt_coves/tasks/generate/docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
from __future__ import nested_scopes

import json
from pathlib import Path
from typing import Any, Dict

from rich.console import Console

from dbt_coves.tasks.base import BaseConfiguredTask
from dbt_coves.tasks.setup.utils import print_row
from dbt_coves.utils.shell import run_and_capture_cwd
from dbt_coves.utils.tracking import trackable

console = Console()


class DbtCovesGenerateDocsException(Exception):
pass


class GenerateDocsTask(BaseConfiguredTask):
"""
Task that generates content on local catalog.json
"""

@classmethod
def register_parser(cls, sub_parsers, base_subparser):
subparser = sub_parsers.add_parser(
"docs",
parents=[base_subparser],
help="Merge models from a catalog.json into another one.",
)
subparser.add_argument(
"--merge-deferred",
action="store_true",
help="Flag to merge deferred models and sources into local catalog",
default=False,
)
subparser.add_argument(
"--state",
type=str,
help="Catalog.json to use as reference for merging",
)
cls.arg_parser = base_subparser
subparser.set_defaults(cls=cls, which="docs")
return subparser

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def get_config_value(self, key):
return self.coves_config.integrated["generate"]["docs"][key]

def _generate_dbt_docs(self):
output = run_and_capture_cwd(["dbt", "docs", "generate"], self.config.project_root)

if output.returncode == 0:
deps_status = "[green]SUCCESS :heavy_check_mark:[/green]"
else:
deps_status = "[red]FAIL :cross_mark:[/red]"
print_row(
"dbt docs generate",
deps_status,
new_section=True,
)
if output.returncode > 0:
raise Exception("dbt deps error. Check logs.")

def _fix_dbt_docs_links(self, docs_path: Path):
dbt_docs_index_path = Path(docs_path, "index.html")
ssassi marked this conversation as resolved.
Show resolved Hide resolved
with open(dbt_docs_index_path, "w+") as f:
html_content = f.read()
html_content.replace("</head>", "<base target='_blank'></head>")
f.write(html_content)
console.print(
"[green]:heavy_check_mark:[/green] dbt docs updated. "
"External links will now open in a new tab"
)

def _get_catalog_json(self, docs_folder: Path) -> Dict[str, Any]:
"""
Open json at docs_folder/catalog.json
"""
catalog_path = Path(docs_folder, "catalog.json")
try:
with open(catalog_path, "r") as f:
return json.load(f)
except FileNotFoundError:
raise DbtCovesGenerateDocsException(f"Catalog.json not found at {catalog_path}")

def _merge_catalogs(
ssassi marked this conversation as resolved.
Show resolved Hide resolved
self, local_catalog: Dict[str, Any], target_catalog: Dict[str, Any]
) -> Dict[str, Any]:
"""
Merge nodes and sources from the state catalog.json into the local docs.
"""
nodes_diff = set(target_catalog.get("nodes", {}).keys()) - set(
local_catalog.get("nodes", {}).keys()
)
n_nodes_merged = len(nodes_diff)

sources_diff = set(target_catalog.get("sources", {}).keys()) - set(
local_catalog.get("sources", {}).keys()
)
n_sources_merged = len(sources_diff)

local_catalog["nodes"].update(
(key, value)
for key, value in target_catalog.get("nodes", {}).items()
if key not in local_catalog.get("nodes", {})
)
local_catalog["sources"].update(
(key, value)
for key, value in target_catalog.get("sources", {}).items()
if key not in local_catalog.get("sources", {})
)

console.print(
f"Merged [green]{n_nodes_merged} nodes[/green] and [green]{n_sources_merged} sources[/green] into",
"your local catalog.json",
)

def _write_catalog_json(self, catalog: Dict[str, Any], docs_folder: Path):
"""
Write the catalog.json to the docs_folder
"""
catalog_path = Path(docs_folder, "catalog.json")
with open(catalog_path, "w") as f:
json.dump(catalog, f)

def _merge_dbt_catalogs(self, local_path: Path, state_path: Path):
"""
Merge the catalog.json from the stsate into the local docs.
"""
# Get the source and target catalogs
local_catalog = self._get_catalog_json(local_path)
target_catalog = self._get_catalog_json(state_path)

# Merge the catalogs
self._merge_catalogs(local_catalog, target_catalog)

# Write the merged catalog to the source
self._write_catalog_json(local_catalog, local_path)

@trackable
def run(self):
self._generate_dbt_docs()

local_docs_path = Path(self.config.project_root, "target")
self._fix_dbt_docs_links(local_docs_path)

merge_deferred = self.get_config_value("merge_deferred")
state_location = self.get_config_value("state")

if merge_deferred:
target_docs_path = Path(state_location, "catalog.json")
if not state_location or not target_docs_path.exists():
raise DbtCovesGenerateDocsException(
"A valid [red][i]--state[/i][/red] argument is required "
"when using [yellow]--merge-deferred[/yellow]"
)

self._merge_dbt_catalogs(local_docs_path, state_location)
return 0
2 changes: 2 additions & 0 deletions dbt_coves/tasks/generate/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from dbt_coves.tasks.base import BaseConfiguredTask

from .docs import GenerateDocsTask
from .metadata import GenerateMetadataTask
from .properties import GeneratePropertiesTask
from .sources import GenerateSourcesTask
Expand All @@ -23,6 +24,7 @@ class GenerateTask(BaseConfiguredTask):
GenerateSourcesTask,
GenerateMetadataTask,
GenerateTemplatesTask,
GenerateDocsTask,
]

@classmethod
Expand Down
11 changes: 11 additions & 0 deletions dbt_coves/utils/flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ def __init__(self, cli_parser: ArgumentParser) -> None:
"destination": None,
"no_prompt": False,
},
"docs": {
"merge_deferred": False,
"state": None,
},
}
self.extract = {
"airbyte": {
Expand Down Expand Up @@ -235,6 +239,13 @@ def parse_args(self, cli_args: List[str] = list()) -> None:
if self.args.no_prompt:
self.generate["metadata"]["no_prompt"] = True

# generate docs
if self.args.cls.__name__ == "GenerateDocsTask":
if self.args.merge_deferred:
self.generate["docs"]["merge_deferred"] = self.args.merge_deferred
if self.args.state:
self.generate["docs"]["state"] = self.args.state

# load airbyte
if self.args.cls.__name__ == "LoadAirbyteTask":
if self.args.path:
Expand Down
Loading