Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add license/copyright as appropriate to .py files and add check-licensing.sh script #715

Merged
merged 3 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/mkdocs_hook.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import logging
import os
import re
Expand Down
7 changes: 7 additions & 0 deletions .github/workflows/test-misc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,10 @@ jobs:
uses: actions/checkout@v4
- name: Make sure all transforms have a test workflow
run: bash scripts/check-workflows.sh
check-licensing:
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Make repo content includes license headers.
run: bash scripts/check-licensing.sh
13 changes: 12 additions & 1 deletion data-connector-lib/test/dpk_connector/core/test_crawler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
import pytest
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import pytest
from dpk_connector.core.crawler import crawl


Expand Down
12 changes: 12 additions & 0 deletions data-connector-lib/test/dpk_connector/core/test_middlewares.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import pytest
from dpk_connector.core.middlewares import DelayingProtegoRobotParser
from pytest_mock import MockerFixture
Expand Down
20 changes: 14 additions & 6 deletions data-connector-lib/test/dpk_connector/core/test_sitemap_spider.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

from pathlib import Path

import pytest
Expand Down Expand Up @@ -73,9 +85,7 @@ def callback(url: str, body: bytes, headers: dict):
assert body.decode("utf-8") == response_body
assert headers == {"Content-Type": "text/html"}

spider = ConnectorSitemapSpider.from_crawler(
crawler, seed_urls=("http://example.com",), callback=callback
)
spider = ConnectorSitemapSpider.from_crawler(crawler, seed_urls=("http://example.com",), callback=callback)
request = Request(
"http://example.com/index.html",
meta={
Expand All @@ -93,9 +103,7 @@ def callback(url: str, body: bytes, headers: dict):
parsed = spider.parse(response)

item = next(parsed)
assert item == ConnectorItem(
dropped=False, downloaded=True, system_request=False, sitemap=False
)
assert item == ConnectorItem(dropped=False, downloaded=True, system_request=False, sitemap=False)

for next_request in parsed:
assert isinstance(next_request, Request) is True
Expand Down
16 changes: 13 additions & 3 deletions data-connector-lib/test/dpk_connector/core/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

# Assisted by WCA@IBM
# Latest GenAI contribution: ibm/granite-20b-code-instruct-v2

Expand All @@ -20,9 +32,7 @@


def test_get_header_value():
response = Response(
"http://example.com", headers={"Content-Type": "application/json"}
)
response = Response("http://example.com", headers={"Content-Type": "application/json"})
assert get_header_value(response, "Content-Type") == "application/json"


Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import os
import sys
from argparse import ArgumentParser
Expand Down
12 changes: 12 additions & 0 deletions data-processing-lib/python/src/data_processing/utils/multilock.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import abc
import datetime
import fcntl
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################


class UnrecoverableException(Exception):
"""
Raised when a transform wants to cancel overall execution
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import os

from data_processing.test_support.data_access import AbstractDataAccessFactoryTests
Expand Down
12 changes: 12 additions & 0 deletions kfp/kfp_ray_components/src/subworkflow.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import sys

from data_processing.utils import ParamsUtils
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import os
import sys

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import json
import os
from typing import Dict
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import os
import sys

Expand Down
13 changes: 13 additions & 0 deletions kfp/pipeline_generator/single-pipeline/pipeline_generator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################


PIPELINE_TEMPLATE_FILE = "simple_pipeline.py"

Expand All @@ -14,6 +26,7 @@
if __name__ == "__main__":
import argparse
import os

import yaml
from jinja2 import Environment, FileSystemLoader

Expand Down
12 changes: 12 additions & 0 deletions kfp/pipeline_generator/superpipeline/super_pipeline_generator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import yaml


Expand Down
26 changes: 23 additions & 3 deletions kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import kfp.compiler as compiler
import kfp.components as comp
import kfp.dsl as dsl
Expand Down Expand Up @@ -82,7 +94,7 @@ def sample_code_ray_orchestrator(
p4_ededup_doc_column: str = "contents",
p4_ededup_hash_cpu: float = 0.5,
p4_ededup_use_snapshot: bool = False,
p4_ededup_snapshot_directory: str = None, # data sampling
p4_ededup_snapshot_directory: str = None, # data sampling
p4_ededup_n_samples: int = 10,
# overriding parameters
p4_overriding_params: str = '{"ray_worker_options": {"image": "'
Expand Down Expand Up @@ -293,13 +305,21 @@ def _set_component(op: dsl.BaseOp, displaied_name: str, prev_op: dsl.BaseOp = No

# header cleanser
header_cleanser = run_header_cleanser_op(
name=p1_orch_header_cleanser_name, prefix="p11_", params=args, host=orch_host, input_folder=license_check.output
name=p1_orch_header_cleanser_name,
prefix="p11_",
params=args,
host=orch_host,
input_folder=license_check.output,
)
_set_component(header_cleanser, "header_cleanser", license_check)

# tokenization
tokenization = run_tokenization_op(
name=p1_orch_tokenization_wf_name, prefix="p10_", params=args, host=orch_host, input_folder=header_cleanser.output
name=p1_orch_tokenization_wf_name,
prefix="p10_",
params=args,
host=orch_host,
input_folder=header_cleanser.output,
)
_set_component(tokenization, "tokenization", header_cleanser)

Expand Down
12 changes: 12 additions & 0 deletions kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import kfp.compiler as compiler
import kfp.components as comp
import kfp.dsl as dsl
Expand Down
Loading