From ac40d1d001de0f7e8cb943956744c33d41d941f3 Mon Sep 17 00:00:00 2001 From: = <=> Date: Tue, 16 Jul 2024 09:03:44 -0400 Subject: [PATCH 1/8] Removed versioning --- .github/workflows/pr.yaml | 21 --------------------- .github/workflows/push.yaml | 31 ------------------------------- setup.py | 2 +- 3 files changed, 1 insertion(+), 53 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 5a68682..cc39c6e 100755 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -11,27 +11,6 @@ jobs: uses: "lgeiger/black-action@master" with: args: ". -l 79 --check" - check-version: - name: Check version - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - repository: ${{ github.event.pull_request.head.repo.full_name }} - ref: ${{ github.event.pull_request.head.ref }} - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - name: Build changelog - run: pip install yaml-changelog>=0.1.7 && make changelog - - name: Make scripts executable - run: chmod -R +x .github/ - - name: Preview changelog update - run: ".github/get-changelog-diff.sh" - - name: Check version number has been properly updated - run: .github/is-version-number-acceptable.sh Test: runs-on: ${{ matrix.os }} continue-on-error: true diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml index da0d25f..2a83ac8 100755 --- a/.github/workflows/push.yaml +++ b/.github/workflows/push.yaml @@ -14,37 +14,6 @@ jobs: uses: "lgeiger/black-action@master" with: args: ". -l 79 --check" - versioning: - name: Update versioning - if: | - (github.repository == 'PolicyEngine/reweight') - && !(github.event.head_commit.message == 'Update reweight') - runs-on: ubuntu-latest - steps: - - name: Checkout repo - uses: actions/checkout@v3 - with: - repository: ${{ github.event.pull_request.head.repo.full_name }} - ref: ${{ github.event.pull_request.head.ref }} - token: ${{ secrets.POLICYENGINE_GITHUB }} - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - name: Build changelog - run: pip install yaml-changelog && make changelog - - name: Make scripts executable - run: chmod -R +x .github/ - - name: Preview changelog update - run: ".github/get-changelog-diff.sh" - - name: Update changelog - uses: EndBug/add-and-commit@v9 - with: - add: "." - committer_name: Github Actions[bot] - author_name: Github Actions[bot] - message: Update reweight - github_token: ${{ secrets.POLICYENGINE_GITHUB }} Test: runs-on: ${{ matrix.os }} if: | diff --git a/setup.py b/setup.py index 882a501..363e940 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="reweight", - version="0.2.0", + version="0.3.0", author="PolicyEngine", author_email="hello@policyengine.org", long_description=readme, From f6ec26c8d94ab6f087c58384110e03f67db61c76 Mon Sep 17 00:00:00 2001 From: = <=> Date: Wed, 17 Jul 2024 10:43:17 -0400 Subject: [PATCH 2/8] Rewrote some comments in the testing notebook --- test.ipynb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test.ipynb b/test.ipynb index 58f43c3..c219280 100644 --- a/test.ipynb +++ b/test.ipynb @@ -86,8 +86,6 @@ " # Initialize a TensorBoard writer\n", " writer = SummaryWriter()\n", "\n", - " #TODO: Write stuff here\n", - "\n", " #Create a Torch tensor of log weights\n", " log_weights = torch.log(household_weights)\n", " log_weights.requires_grad_()\n", @@ -119,7 +117,7 @@ " # Update weights\n", " optimizer.step()\n", "\n", - " # Print loss for every 1000 epochs\n", + " # Print loss for every 100 epochs\n", " if epoch % 100 == 0:\n", " print(f\"Epoch {epoch}, Loss: {loss.item()}\")\n", "\n", From 624214d5b8718c85fa9e6a0be180ee33bcc1e2b5 Mon Sep 17 00:00:00 2001 From: = <=> Date: Wed, 17 Jul 2024 10:43:52 -0400 Subject: [PATCH 3/8] Added (a draft of the) reweight.py function --- reweight/logic/reweight.py | 62 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 reweight/logic/reweight.py diff --git a/reweight/logic/reweight.py b/reweight/logic/reweight.py new file mode 100644 index 0000000..284cc6c --- /dev/null +++ b/reweight/logic/reweight.py @@ -0,0 +1,62 @@ +import pandas as pd +import numpy as np +import torch +from torch.utils.tensorboard import SummaryWriter + +def reweight(initial_weights, estimate_matrix, target_names, target_values, epochs = 1000, epoch_step = 100): + """ + Main reweighting function, suitable for PolicyEngine UK use (PolicyEngine US use and testing TK) + + To avoid the need for equivalisation factors, use relative error: + |predicted - actual|/actual + + Parameters: + household_weights (torch.Tensor): The initial weights given to survey data, which are to be + adjusted by this function. + estimate_matrix (torch.Tensor): A large matrix of estimates, obtained from e.g. a PolicyEngine + Microsimulation instance. + target_names (iterable): The names of a set of target statistics treated as ground truth. + target_values (torch.Tensor): The values of these target statistics. + epochs: The number of iterations that the optimization loop should run for. + epoch_step: The interval at which to print the loss during the optimization loop. + + Returns: + final_weights: a reweighted set of household weights, obtained through an optimization process + over mean squared errors with respect to the target values. + """ + # Initialize a TensorBoard writer + writer = SummaryWriter() + + #Create a Torch tensor of log weights + log_weights = torch.log(initial_weights) + log_weights.requires_grad_() + + # estimate_matrix (cross) exp(log_weights) = target_values + + optimizer = torch.optim.Adam([log_weights]) + + # Training loop + for epoch in range(epochs): + + # Estimate the targets + targets_estimate = torch.exp(log_weights) @ estimate_matrix + # Calculate the loss + loss = torch.mean(((targets_estimate - target_values)/target_values) ** 2) + + writer.add_scalar("Loss/train", loss, epoch) + + optimizer.zero_grad() + + # Perform backpropagation + loss.backward() + + # Update weights + optimizer.step() + + # Print loss whenever the epoch number, when one-indexed, is divisible by epoch_step + if (epoch+1) % epoch_step == 0: + print(f"Epoch {epoch+1}, Loss: {loss.item()}") + + writer.flush() + + return torch.exp(log_weights.detach()) \ No newline at end of file From b4d3bc9d072861278b5286d10f6619646d6d17de Mon Sep 17 00:00:00 2001 From: = <=> Date: Fri, 19 Jul 2024 06:08:48 -0400 Subject: [PATCH 4/8] Added more general pycache specification to gitignore --- .gitignore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index a57fb6d..11e066e 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,6 @@ reweight/tests/__pycache__ ############################# docs/_build -# Pycache folder # +# Pycache folders # ################## -reweight/__pycache__ \ No newline at end of file +**/__pycache__/ \ No newline at end of file From 132a0041534c7ea7f31cd7c8b56849ddaa0cfb22 Mon Sep 17 00:00:00 2001 From: = <=> Date: Mon, 22 Jul 2024 09:55:00 -0400 Subject: [PATCH 5/8] Added a new test for the reweight logic --- reweight/tests/test_uk_prototype.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/reweight/tests/test_uk_prototype.py b/reweight/tests/test_uk_prototype.py index 35ffbce..1265073 100644 --- a/reweight/tests/test_uk_prototype.py +++ b/reweight/tests/test_uk_prototype.py @@ -3,3 +3,25 @@ def test_uk_microsimulation(): # Create a Microsimulation instance sim = Microsimulation() + +def test_uk_prototype(): + from policyengine_uk import Microsimulation + from reweight.logic import reweight + import torch + sim = Microsimulation() + + from policyengine_uk.data import RawFRS_2021_22 + RawFRS_2021_22().download() + + from policyengine_uk.data.datasets.frs.calibration.calibrate import generate_model_variables + ( + household_weights, + weight_adjustment, + values_df, + targets, + targets_array, + equivalisation_factors_array + ) = generate_model_variables("frs_2021", 2025) + + sim_matrix = torch.tensor(values_df.to_numpy(), dtype=torch.float32) + reweight.reweight(household_weights, sim_matrix, targets, targets_array) \ No newline at end of file From fb081962279a1bb9e45586d547701ea2fbf9d0dd Mon Sep 17 00:00:00 2001 From: = <=> Date: Mon, 22 Jul 2024 10:10:27 -0400 Subject: [PATCH 6/8] Reformatted --- reweight/logic/reweight.py | 24 +++++++++++++++++------- reweight/tests/test_uk_prototype.py | 14 ++++++++++---- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/reweight/logic/reweight.py b/reweight/logic/reweight.py index 284cc6c..859e962 100644 --- a/reweight/logic/reweight.py +++ b/reweight/logic/reweight.py @@ -3,10 +3,18 @@ import torch from torch.utils.tensorboard import SummaryWriter -def reweight(initial_weights, estimate_matrix, target_names, target_values, epochs = 1000, epoch_step = 100): + +def reweight( + initial_weights, + estimate_matrix, + target_names, + target_values, + epochs=1000, + epoch_step=100, +): """ Main reweighting function, suitable for PolicyEngine UK use (PolicyEngine US use and testing TK) - + To avoid the need for equivalisation factors, use relative error: |predicted - actual|/actual @@ -27,12 +35,12 @@ def reweight(initial_weights, estimate_matrix, target_names, target_values, epoc # Initialize a TensorBoard writer writer = SummaryWriter() - #Create a Torch tensor of log weights + # Create a Torch tensor of log weights log_weights = torch.log(initial_weights) log_weights.requires_grad_() # estimate_matrix (cross) exp(log_weights) = target_values - + optimizer = torch.optim.Adam([log_weights]) # Training loop @@ -41,7 +49,9 @@ def reweight(initial_weights, estimate_matrix, target_names, target_values, epoc # Estimate the targets targets_estimate = torch.exp(log_weights) @ estimate_matrix # Calculate the loss - loss = torch.mean(((targets_estimate - target_values)/target_values) ** 2) + loss = torch.mean( + ((targets_estimate - target_values) / target_values) ** 2 + ) writer.add_scalar("Loss/train", loss, epoch) @@ -54,9 +64,9 @@ def reweight(initial_weights, estimate_matrix, target_names, target_values, epoc optimizer.step() # Print loss whenever the epoch number, when one-indexed, is divisible by epoch_step - if (epoch+1) % epoch_step == 0: + if (epoch + 1) % epoch_step == 0: print(f"Epoch {epoch+1}, Loss: {loss.item()}") writer.flush() - return torch.exp(log_weights.detach()) \ No newline at end of file + return torch.exp(log_weights.detach()) diff --git a/reweight/tests/test_uk_prototype.py b/reweight/tests/test_uk_prototype.py index 1265073..b413f2f 100644 --- a/reweight/tests/test_uk_prototype.py +++ b/reweight/tests/test_uk_prototype.py @@ -4,24 +4,30 @@ def test_uk_microsimulation(): # Create a Microsimulation instance sim = Microsimulation() + def test_uk_prototype(): from policyengine_uk import Microsimulation from reweight.logic import reweight import torch + sim = Microsimulation() - + from policyengine_uk.data import RawFRS_2021_22 + RawFRS_2021_22().download() - from policyengine_uk.data.datasets.frs.calibration.calibrate import generate_model_variables + from policyengine_uk.data.datasets.frs.calibration.calibrate import ( + generate_model_variables, + ) + ( household_weights, weight_adjustment, values_df, targets, targets_array, - equivalisation_factors_array + equivalisation_factors_array, ) = generate_model_variables("frs_2021", 2025) sim_matrix = torch.tensor(values_df.to_numpy(), dtype=torch.float32) - reweight.reweight(household_weights, sim_matrix, targets, targets_array) \ No newline at end of file + reweight.reweight(household_weights, sim_matrix, targets, targets_array) From 349d70772594c0406b5fef2ee232238ed253d9ae Mon Sep 17 00:00:00 2001 From: = <=> Date: Mon, 22 Jul 2024 10:58:25 -0400 Subject: [PATCH 7/8] Added init scripts to reweight directories --- changelog_entry.yaml | 1 + reweight/data/__init__.py | 1 + reweight/logic/__init__.py | 1 + reweight/tests/__init__.py | 1 + reweight/tests/test_uk_prototype.py | 2 +- 5 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 reweight/data/__init__.py create mode 100644 reweight/logic/__init__.py create mode 100644 reweight/tests/__init__.py diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29..8b13789 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1 @@ + diff --git a/reweight/data/__init__.py b/reweight/data/__init__.py new file mode 100644 index 0000000..badcc11 --- /dev/null +++ b/reweight/data/__init__.py @@ -0,0 +1 @@ +from .reweight import reweight \ No newline at end of file diff --git a/reweight/logic/__init__.py b/reweight/logic/__init__.py new file mode 100644 index 0000000..badcc11 --- /dev/null +++ b/reweight/logic/__init__.py @@ -0,0 +1 @@ +from .reweight import reweight \ No newline at end of file diff --git a/reweight/tests/__init__.py b/reweight/tests/__init__.py new file mode 100644 index 0000000..badcc11 --- /dev/null +++ b/reweight/tests/__init__.py @@ -0,0 +1 @@ +from .reweight import reweight \ No newline at end of file diff --git a/reweight/tests/test_uk_prototype.py b/reweight/tests/test_uk_prototype.py index b413f2f..bb08678 100644 --- a/reweight/tests/test_uk_prototype.py +++ b/reweight/tests/test_uk_prototype.py @@ -7,7 +7,7 @@ def test_uk_microsimulation(): def test_uk_prototype(): from policyengine_uk import Microsimulation - from reweight.logic import reweight + from reweight import reweight import torch sim = Microsimulation() From 7543011bdcce96987fd539a0ab3cccd83c3c9f50 Mon Sep 17 00:00:00 2001 From: = <=> Date: Mon, 22 Jul 2024 16:11:30 -0400 Subject: [PATCH 8/8] Added __init__ scripts for logic directory --- reweight/__init__.py | 3 +- reweight/data/__init__.py | 1 - reweight/logic/__init__.py | 2 +- reweight/tests/__init__.py | 1 - reweight/tests/test_uk_prototype.py | 2 +- test.ipynb | 48 +++++++++++++++++++++++++++++ 6 files changed, 52 insertions(+), 5 deletions(-) delete mode 100644 reweight/data/__init__.py delete mode 100644 reweight/tests/__init__.py diff --git a/reweight/__init__.py b/reweight/__init__.py index 3dc1f76..28fe9cc 100644 --- a/reweight/__init__.py +++ b/reweight/__init__.py @@ -1 +1,2 @@ -__version__ = "0.1.0" +__version__ = "0.3.0" +from .logic.reweight import reweight diff --git a/reweight/data/__init__.py b/reweight/data/__init__.py deleted file mode 100644 index badcc11..0000000 --- a/reweight/data/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .reweight import reweight \ No newline at end of file diff --git a/reweight/logic/__init__.py b/reweight/logic/__init__.py index badcc11..a9de83b 100644 --- a/reweight/logic/__init__.py +++ b/reweight/logic/__init__.py @@ -1 +1 @@ -from .reweight import reweight \ No newline at end of file +from .reweight import reweight diff --git a/reweight/tests/__init__.py b/reweight/tests/__init__.py deleted file mode 100644 index badcc11..0000000 --- a/reweight/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .reweight import reweight \ No newline at end of file diff --git a/reweight/tests/test_uk_prototype.py b/reweight/tests/test_uk_prototype.py index bb08678..dc69b2b 100644 --- a/reweight/tests/test_uk_prototype.py +++ b/reweight/tests/test_uk_prototype.py @@ -30,4 +30,4 @@ def test_uk_prototype(): ) = generate_model_variables("frs_2021", 2025) sim_matrix = torch.tensor(values_df.to_numpy(), dtype=torch.float32) - reweight.reweight(household_weights, sim_matrix, targets, targets_array) + reweight(household_weights, sim_matrix, targets, targets_array) diff --git a/test.ipynb b/test.ipynb index c219280..e346cd3 100644 --- a/test.ipynb +++ b/test.ipynb @@ -76,6 +76,54 @@ "# square error, and then average to get MSE." ] }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from reweight.logic import reweight" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 100, Loss: 48.30685043334961\n", + "Epoch 200, Loss: 40.58155059814453\n", + "Epoch 300, Loss: 34.585235595703125\n", + "Epoch 400, Loss: 29.832853317260742\n", + "Epoch 500, Loss: 25.99891471862793\n", + "Epoch 600, Loss: 22.858182907104492\n", + "Epoch 700, Loss: 20.250896453857422\n", + "Epoch 800, Loss: 18.061073303222656\n", + "Epoch 900, Loss: 16.202829360961914\n", + "Epoch 1000, Loss: 14.611446380615234\n" + ] + }, + { + "data": { + "text/plain": [ + "tensor([1120.1953, 89.4442, 3851.2649, ..., 730.9640, 832.0632,\n", + " 4155.2686])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sim_matrix = torch.tensor(values_df.to_numpy(), dtype=torch.float32)\n", + "\n", + "reweight.reweight(household_weights, sim_matrix, targets, targets_array)" + ] + }, { "cell_type": "code", "execution_count": 29,