Skip to content

Commit

Permalink
add license/copyright as appropriate to .py files and add check-licen…
Browse files Browse the repository at this point in the history
…sing.sh script

Signed-off-by: David Wood <[email protected]>
  • Loading branch information
daw3rd committed Oct 16, 2024
1 parent bd81cc9 commit 143ba6c
Show file tree
Hide file tree
Showing 48 changed files with 623 additions and 83 deletions.
12 changes: 12 additions & 0 deletions .github/mkdocs_hook.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import logging
import os
import re
Expand Down
13 changes: 12 additions & 1 deletion data-connector-lib/test/dpk_connector/core/test_crawler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
import pytest
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import pytest
from dpk_connector.core.crawler import crawl


Expand Down
12 changes: 12 additions & 0 deletions data-connector-lib/test/dpk_connector/core/test_middlewares.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import pytest
from dpk_connector.core.middlewares import DelayingProtegoRobotParser
from pytest_mock import MockerFixture
Expand Down
25 changes: 16 additions & 9 deletions data-connector-lib/test/dpk_connector/core/test_sitemap_spider.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

from pathlib import Path

import pytest
from dpk_connector.core.item import ConnectorItem
from dpk_connector.core.spiders.sitemap import BaseSitemapSpider, ConnectorSitemapSpider
from scrapy import Request
from scrapy.crawler import Crawler
from scrapy.http import HtmlResponse

from dpk_connector.core.item import ConnectorItem
from dpk_connector.core.spiders.sitemap import BaseSitemapSpider, ConnectorSitemapSpider


@pytest.fixture
def crawler() -> Crawler:
Expand Down Expand Up @@ -59,9 +70,7 @@ def callback(url: str, body: bytes, headers: dict):
assert body.decode("utf-8") == response_body
assert headers == {"Content-Type": "text/html"}

spider = ConnectorSitemapSpider.from_crawler(
crawler, seed_urls=("http://example.com",), callback=callback
)
spider = ConnectorSitemapSpider.from_crawler(crawler, seed_urls=("http://example.com",), callback=callback)
request = Request(
"http://example.com/index.html",
meta={
Expand All @@ -79,9 +88,7 @@ def callback(url: str, body: bytes, headers: dict):
parsed = spider.parse(response)

item = next(parsed)
assert item == ConnectorItem(
dropped=False, downloaded=True, system_request=False, sitemap=False
)
assert item == ConnectorItem(dropped=False, downloaded=True, system_request=False, sitemap=False)

for next_request in parsed:
assert isinstance(next_request, Request) is True
Expand Down
16 changes: 13 additions & 3 deletions data-connector-lib/test/dpk_connector/core/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

# Assisted by WCA@IBM
# Latest GenAI contribution: ibm/granite-20b-code-instruct-v2

Expand All @@ -19,9 +31,7 @@


def test_get_header_value():
response = Response(
"http://example.com", headers={"Content-Type": "application/json"}
)
response = Response("http://example.com", headers={"Content-Type": "application/json"})
assert get_header_value(response, "Content-Type") == "application/json"


Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import os
import sys
from argparse import ArgumentParser
Expand Down
12 changes: 12 additions & 0 deletions data-processing-lib/python/src/data_processing/utils/multilock.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import abc
import datetime
import fcntl
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################


class UnrecoverableException(Exception):
"""
Raised when a transform wants to cancel overall execution
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import os

from data_processing.test_support.data_access import AbstractDataAccessFactoryTests
Expand Down
12 changes: 12 additions & 0 deletions kfp/kfp_ray_components/src/subworkflow.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import sys

from data_processing.utils import ParamsUtils
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import os
import sys

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import json
import os
from typing import Dict
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import os
import sys

Expand Down
13 changes: 13 additions & 0 deletions kfp/pipeline_generator/single-pipeline/pipeline_generator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################


PIPELINE_TEMPLATE_FILE = "simple_pipeline.py"

Expand All @@ -14,6 +26,7 @@
if __name__ == "__main__":
import argparse
import os

import yaml
from jinja2 import Environment, FileSystemLoader

Expand Down
12 changes: 12 additions & 0 deletions kfp/pipeline_generator/superpipeline/super_pipeline_generator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import yaml


Expand Down
26 changes: 23 additions & 3 deletions kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import kfp.compiler as compiler
import kfp.components as comp
import kfp.dsl as dsl
Expand Down Expand Up @@ -82,7 +94,7 @@ def sample_code_ray_orchestrator(
p4_ededup_doc_column: str = "contents",
p4_ededup_hash_cpu: float = 0.5,
p4_ededup_use_snapshot: bool = False,
p4_ededup_snapshot_directory: str = None, # data sampling
p4_ededup_snapshot_directory: str = None, # data sampling
p4_ededup_n_samples: int = 10,
# overriding parameters
p4_overriding_params: str = '{"ray_worker_options": {"image": "'
Expand Down Expand Up @@ -293,13 +305,21 @@ def _set_component(op: dsl.BaseOp, displaied_name: str, prev_op: dsl.BaseOp = No

# header cleanser
header_cleanser = run_header_cleanser_op(
name=p1_orch_header_cleanser_name, prefix="p11_", params=args, host=orch_host, input_folder=license_check.output
name=p1_orch_header_cleanser_name,
prefix="p11_",
params=args,
host=orch_host,
input_folder=license_check.output,
)
_set_component(header_cleanser, "header_cleanser", license_check)

# tokenization
tokenization = run_tokenization_op(
name=p1_orch_tokenization_wf_name, prefix="p10_", params=args, host=orch_host, input_folder=header_cleanser.output
name=p1_orch_tokenization_wf_name,
prefix="p10_",
params=args,
host=orch_host,
input_folder=header_cleanser.output,
)
_set_component(tokenization, "tokenization", header_cleanser)

Expand Down
12 changes: 12 additions & 0 deletions kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import kfp.compiler as compiler
import kfp.components as comp
import kfp.dsl as dsl
Expand Down
Loading

0 comments on commit 143ba6c

Please sign in to comment.