Skip to content

Commit

Permalink
Merge branch 'master' of github.com:OpenHumans/open-humans into 20190…
Browse files Browse the repository at this point in the history
…4-feature-datafile-datatypes
  • Loading branch information
madprime committed Apr 10, 2019
2 parents 3020852 + 36c2bc5 commit 70505dc
Show file tree
Hide file tree
Showing 12 changed files with 216 additions and 12 deletions.
53 changes: 53 additions & 0 deletions data_import/filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import datetime

import arrow

from rest_framework.filters import BaseFilterBackend


class AccessLogFilter(BaseFilterBackend):
"""
Used for filtering data returned by the custom API for OHLOG_PROJECT_ID.
"""

def filter_queryset(self, request, queryset, view):

start_date = request.query_params.get("start_date", None)
end_date = request.query_params.get("end_date", None)
if start_date:
try:
start_date = arrow.get(start_date).datetime
except (TypeError, ValueError):
start_date = None
if end_date:
try:
end_date = arrow.get(end_date).datetime
# Special check if start_date and end_date is the same
# If this is the case, assume that a 24 hour period is meant, and set end_time accordingly
if start_date == end_date:
end_date = end_date + datetime.timedelta(
hours=23, minutes=59, seconds=59
)
except (TypeError, ValueError):
end_date = None
if queryset.model.__name__ == "AWSDataFileAccessLog":
# AWS uses 'time' for the timestamp rather than 'date'
if start_date:
queryset = queryset.filter(time__gte=start_date)
if end_date:
queryset = queryset.filter(time__lte=end_date)
else:
if start_date:
queryset = queryset.filter(date__gte=start_date)
if end_date:
queryset = queryset.filter(date__lte=end_date)

datafile_id = request.query_params.get("datafile_id", None)
if datafile_id:
try:
datafile_id = int(datafile_id)
queryset = queryset.filter(serialized_data_file__id=datafile_id)
except ValueError:
pass

return queryset
21 changes: 21 additions & 0 deletions data_import/migrations/0018_auto_20190402_1947.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Generated by Django 2.1.3 on 2019-04-02 19:47

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [("data_import", "0017_auto_20190329_1638")]

operations = [
migrations.AlterField(
model_name="awsdatafileaccesslog",
name="bytes_sent",
field=models.BigIntegerField(null=True),
),
migrations.AlterField(
model_name="awsdatafileaccesslog",
name="object_size",
field=models.BigIntegerField(null=True),
),
]
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by Django 2.1.7 on 2019-04-02 17:45
# Generated by Django 2.1.7 on 2019-04-10 22:40

import django.contrib.postgres.fields.jsonb
import django.core.validators
Expand All @@ -10,7 +10,7 @@ class Migration(migrations.Migration):

dependencies = [
("open_humans", "0014_member_password_reset_redirect"),
("data_import", "0017_auto_20190329_1638"),
("data_import", "0018_auto_20190402_1947"),
]

operations = [
Expand Down
4 changes: 2 additions & 2 deletions data_import/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,8 @@ class AWSDataFileAccessLog(models.Model):
request_uri = models.CharField(max_length=500, null=True)
status = models.IntegerField(null=True)
error_code = models.CharField(max_length=64, null=True)
bytes_sent = models.IntegerField(null=True)
object_size = models.IntegerField(null=True)
bytes_sent = models.BigIntegerField(null=True)
object_size = models.BigIntegerField(null=True)
total_time = models.IntegerField(null=True)
turn_around_time = models.IntegerField(null=True)
referrer = models.CharField(max_length=500, null=True)
Expand Down
15 changes: 15 additions & 0 deletions data_import/permissions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from django.conf import settings

from rest_framework.permissions import BasePermission


class LogAPIAccessAllowed(BasePermission):
"""
Return True if the request is from OHLOG_PROJECT_ID.
"""

def has_permission(self, request, view):
if settings.OHLOG_PROJECT_ID:
if request.auth.id == int(settings.OHLOG_PROJECT_ID):
return True
return False
46 changes: 45 additions & 1 deletion data_import/serializers.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from collections import OrderedDict
from urllib.parse import urlparse, parse_qs

from rest_framework import serializers

from private_sharing.models import DataRequestProject

from .models import DataFile, DataType
from .models import AWSDataFileAccessLog, DataFile, DataType, NewDataFileAccessLog


def serialize_datafile_to_dict(datafile):
Expand Down Expand Up @@ -47,6 +48,49 @@ def to_representation(self, instance):
return ret


class NewDataFileAccessLogSerializer(serializers.ModelSerializer):
"""
Serialize logs of file access requests for custom API endpoint for OHLOG_PROJECT_ID
"""

user = serializers.IntegerField(source="user.id", allow_null=True, default=None)
datafile = serializers.JSONField(source="serialized_data_file")
key = serializers.JSONField(source="data_file_key")

class Meta: # noqa: D101
model = NewDataFileAccessLog
fields = ["date", "ip_address", "user", "datafile", "key", "aws_url"]


class AWSDataFileAccessLogSerializer(serializers.ModelSerializer):
"""
Serialize logs of AWS file access events for custom API endpoint for OHLOG_PROJECT_ID
"""

datafile = serializers.JSONField(source="serialized_data_file")

class Meta: # noqa: D101
model = AWSDataFileAccessLog
fields = [
"time",
"remote_ip",
"request_id",
"operation",
"bucket_key",
"request_uri",
"status",
"bytes_sent",
"object_size",
"total_time",
"turn_around_time",
"referrer",
"user_agent",
"cipher_suite",
"host_header",
"datafile",
]


class DataTypeSerializer(serializers.ModelSerializer):
"""
Serialize DataTypes
Expand Down
16 changes: 15 additions & 1 deletion data_import/urls.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from django.urls import re_path
from django.urls import path, re_path

from .views import (
AWSDataFileAccessLogView,
DataFileDownloadView,
DataTypesCreateView,
DataTypesDetailView,
DataTypesListView,
DataTypesUpdateView,
NewDataFileAccessLogView,
)

app_name = "data-management"
Expand All @@ -16,6 +18,7 @@
DataFileDownloadView.as_view(),
name="datafile-download",
),
# DataTypes paths
re_path(
r"^datatypes/create/", DataTypesCreateView.as_view(), name="datatypes-create"
),
Expand All @@ -30,4 +33,15 @@
name="datatypes-detail",
),
re_path(r"^datatypes/", DataTypesListView.as_view(), name="datatypes-list"),
# Custom API endpoints for OHLOG_PROJECT_ID
path(
"awsdatafileaccesslog/",
AWSDataFileAccessLogView.as_view(),
name="awsdatafileaccesslog",
),
path(
"newdatafileaccesslog/",
NewDataFileAccessLogView.as_view(),
name="newdatafileaccesslog",
),
]
58 changes: 55 additions & 3 deletions data_import/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,29 @@
from django.urls import reverse
from django.views.generic import CreateView, DetailView, TemplateView, UpdateView, View

from django_filters.rest_framework import DjangoFilterBackend
from ipware.ip import get_ip
from rest_framework.generics import ListAPIView

from common.mixins import NeverCacheMixin, PrivateMixin

from data_import.serializers import (
AWSDataFileAccessLogSerializer,
NewDataFileAccessLogSerializer,
serialize_datafile_to_dict,
)
from private_sharing.api_authentication import CustomOAuth2Authentication
from private_sharing.api_permissions import HasValidProjectToken

from .filters import AccessLogFilter
from .forms import DataTypeForm
from .models import DataFile, DataFileKey, DataType, NewDataFileAccessLog
from data_import.serializers import serialize_datafile_to_dict
from .models import (
AWSDataFileAccessLog,
DataFile,
DataFileKey,
DataType,
NewDataFileAccessLog,
)
from .permissions import LogAPIAccessAllowed

UserModel = get_user_model()

Expand Down Expand Up @@ -81,6 +97,42 @@ def get(self, request, *args, **kwargs):
)


class NewDataFileAccessLogView(NeverCacheMixin, ListAPIView):
"""
Custom API endpoint returning logs of file access requests for OHLOG_PROJECT_ID
"""

authentication_classes = (CustomOAuth2Authentication,)
filter_backends = (AccessLogFilter, DjangoFilterBackend)
filterset_fields = ("date",)
permission_classes = (HasValidProjectToken, LogAPIAccessAllowed)
serializer_class = NewDataFileAccessLogSerializer

def get_queryset(self):
queryset = NewDataFileAccessLog.objects.filter(
serialized_data_file__user_id=self.request.user.id
)
return queryset


class AWSDataFileAccessLogView(NeverCacheMixin, ListAPIView):
"""
Custom API endpoint returning logs of AWS file access events for OHLOG_PROJECT_ID
"""

authentication_classes = (CustomOAuth2Authentication,)
filter_backends = (AccessLogFilter, DjangoFilterBackend)
filterset_fields = ("time",)
permission_classes = (HasValidProjectToken, LogAPIAccessAllowed)
serializer_class = AWSDataFileAccessLogSerializer

def get_queryset(self):
queryset = AWSDataFileAccessLog.objects.filter(
serialized_data_file__user_id=self.request.user.id
)
return queryset


class DataTypesListView(NeverCacheMixin, TemplateView):
"""
List all DataTypes.
Expand Down
4 changes: 3 additions & 1 deletion open_humans/management/commands/vacuum_log_bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def handle(self, *args, **options):
]
for index, field_name in enumerate(fields):
field = aws_log_entry._meta.get_field(field_name)
if field.get_internal_type() == "IntegerField":
if "IntegerField" in field.get_internal_type():
log_item = log[index]
if (log_item == "-") or (log_item == '"-"'):
log_item = 0
Expand Down Expand Up @@ -118,6 +118,8 @@ def handle(self, *args, **options):
# Filter out things we don't care to log
if settings.AWS_STORAGE_BUCKET_NAME in url:
continue
if "GET" not in str(aws_log_entry.operation):
continue
if any(
blacklist_item in url for blacklist_item in AWS_LOG_KEY_BLACKLIST
):
Expand Down
2 changes: 2 additions & 0 deletions open_humans/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,8 @@ def to_bool(env, default="false"):
RECAPTCHA_PUBLIC_KEY = os.getenv("RECAPTCHA_PUBLIC_KEY", "")
RECAPTCHA_PRIVATE_KEY = os.getenv("RECAPTCHA_PRIVATE_KEY", "")

OHLOG_PROJECT_ID = os.getenv("OHLOG_PROJECT_ID", None)

ZAPIER_WEBHOOK_URL = os.getenv("ZAPIER_WEBHOOK_URL")

MAX_UNAPPROVED_MEMBERS = int(os.getenv("MAX_UNAPPROVED_MEMBERS", "20"))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# Generated by Django 2.1.7 on 2019-04-02 17:45
# Generated by Django 2.1.7 on 2019-04-10 22:40

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("data_import", "0018_datatype"),
("data_import", "0019_datatype"),
("private_sharing", "0020_auto_20190222_0036"),
]

Expand Down
1 change: 1 addition & 0 deletions requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ Markdown
mock
Pillow # for sorl-thumbnail
pyparsing
python-dateutil
raven
redis
requests
Expand Down

0 comments on commit 70505dc

Please sign in to comment.