Skip to content
This repository has been archived by the owner on Jan 24, 2018. It is now read-only.

626 timestamps #1517

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@
# ga4gh-client==0.1.0
# ga4gh-schemas==0.1.0
#
git+git://github.com/ga4gh/schemas.git@master#egg=ga4gh_schemas
git+git://github.com/ejacox/schemas.git@626_timestamps#egg=ga4gh_schemas
git+git://github.com/ga4gh/ga4gh-client.git@master#egg=ga4gh_client

1 change: 0 additions & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
# that are dependencies of libraries listed in the next section
# and are not already listed in requirements.txt

python-dateutil==2.5.1
PyYAML==3.11

### This section is for the actual libraries ###
Expand Down
8 changes: 4 additions & 4 deletions docs/datarepo.rst
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,11 @@ dataset has a name, which is used to identify it in the repository manager.

.. code-block:: bash

$ ga4gh_repo add-dataset registry.db 1kg -d 'Example dataset using 1000 genomes data'
$ ga4gh_repo add-dataset registry.db 1kg -d 'Example dataset using 1000 genomes data' -c '2007-04-05T12:30-02:00' -u '2008-04-25T18:30Z'

Adds the dataset with the name ``1kg`` and description
``'Example dataset using 1000 genomes data'`` to the
registry database ``registry.db``.
Adds the dataset with the name ``1kg``, description
``'Example dataset using 1000 genomes data'``, and a creation and update time
(in ISO 8601 format) to the registry database ``registry.db``.

----------------
add-referenceset
Expand Down
16 changes: 16 additions & 0 deletions ga4gh/server/cli/repomanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ def addDataset(self):
self._openRepo()
dataset = datasets.Dataset(self._args.datasetName)
dataset.setDescription(self._args.description)
dataset.setCreateDateTime(self._args.createDateTime)
dataset.setUpdateDateTime(self._args.updateDateTime)
dataset.setInfo(json.loads(self._args.info))
self._updateRepo(self._repo.insertDataset, dataset)

Expand Down Expand Up @@ -585,6 +587,18 @@ def addDatasetNameArgument(cls, subparser):
subparser.add_argument(
"datasetName", help="the name of the dataset")

@classmethod
def addDatasetCreateDateTimeArgument(cls, subparser):
subparser.add_argument(
"-c", "--createDateTime", default=None,
help="the creation datetime of the dataset")

@classmethod
def addDatasetUpdateDateTimeArgument(cls, subparser):
subparser.add_argument(
"-u", "--updateDateTime", default=None,
help="the update datetime of the dataset")

@classmethod
def addDatasetInfoArgument(cls, subparser):
subparser.add_argument(
Expand Down Expand Up @@ -742,6 +756,8 @@ def getParser(cls):
cls.addRepoArgument(addDatasetParser)
cls.addDatasetNameArgument(addDatasetParser)
cls.addDatasetInfoArgument(addDatasetParser)
cls.addDatasetCreateDateTimeArgument(addDatasetParser)
cls.addDatasetUpdateDateTimeArgument(addDatasetParser)
cls.addDescriptionOption(addDatasetParser, "dataset")

removeDatasetParser = common_cli.addSubparser(
Expand Down
60 changes: 58 additions & 2 deletions ga4gh/server/datamodel/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from __future__ import print_function
from __future__ import unicode_literals

from datetime import datetime

import ga4gh.server.datamodel as datamodel
import ga4gh.server.datamodel.reads as reads
import ga4gh.server.datamodel.sequence_annotations as sequence_annotations
Expand All @@ -15,9 +17,8 @@
import ga4gh.server.datamodel.genotype_phenotype as g2p
import ga4gh.server.datamodel.rna_quantification as rnaQuantification
import json

import google.protobuf.struct_pb2 as struct_pb2

import dateutil.parser as datetimeParser
import ga4gh.schemas.pb as pb


Expand Down Expand Up @@ -59,6 +60,8 @@ def __init__(self, localId):
self._rnaQuantificationSetIdMap = {}
self._rnaQuantificationSetNameMap = {}
self._info = {}
self._createDateTime = None
self._updateDateTime = None

def populateFromRow(self, row):
"""
Expand All @@ -67,13 +70,51 @@ def populateFromRow(self, row):
"""
self._description = row[b'description']
self._info = json.loads(row[b'info'])
self._createDateTime = row[b'createDateTime']
self._updateDateTime = row[b'updateDateTime']

def setDescription(self, description):
"""
Sets the description for this dataset to the specified value.
"""
self._description = description

def setCreateDateTime(self, datetimeStr):
"""
Sets the creation DateTime for this dataset to the specified value.

The datetime parser accepts a wide variety of time formats, which
are converted to ISO format.
"""
if datetimeStr:
try:
datetimeObj = datetimeParser.parse(datetimeStr)
except ValueError:
raise exceptions.TimestampFormatException(
datetimeStr, self.getLocalId())
except OverflowError:
raise exceptions.TimestampFormatException(
datetimeStr, self.getLocalId())
self._createDateTime = datetimeObj.isoformat()

def setUpdateDateTime(self, datetimeStr):
"""
Sets the update DateTime for this dataset to the specified value.

The datetime parser accepts a wide variety of time formats, which
are converted to ISO format.
"""
if datetimeStr:
try:
datetimeObj = datetimeParser.parse(datetimeStr)
except ValueError:
raise exceptions.TimestampFormatException(
datetimeStr, self.getLocalId())
except OverflowError:
raise exceptions.TimestampFormatException(
datetimeStr, self.getLocalId())
self._updateDateTime = datetimeObj.isoformat()

def setInfo(self, info):
"""
Sets the info for this dataset to the specified value.
Expand Down Expand Up @@ -143,6 +184,8 @@ def toProtocolElement(self):
dataset.description = pb.string(self.getDescription())
for key in self.getInfo():
dataset.info[key].values.extend(_encodeValue(self._info[key]))
dataset.createDateTime = pb.string(self.getCreateDateTime())
dataset.updateDateTime = pb.string(self.getUpdateDateTime())
return dataset

def getVariantSets(self):
Expand Down Expand Up @@ -369,6 +412,18 @@ def getDescription(self):
"""
return self._description

def getCreateDateTime(self):
"""
Returns the free text creation DateTime of this dataset.
"""
return self._createDateTime

def getUpdateDateTime(self):
"""
Returns the free text update DateTime of this dataset.
"""
return self._updateDateTime

def getNumRnaQuantificationSets(self):
"""
Returns the number of rna quantification sets in this dataset.
Expand Down Expand Up @@ -422,6 +477,7 @@ def __init__(
numExpressionLevels=2):
super(SimulatedDataset, self).__init__(localId)
self._description = "Simulated dataset {}".format(localId)
self.setCreateDateTime(datetime(2015, 1, 1).isoformat())

for i in range(numPhenotypeAssociationSets):
localId = "simPas{}".format(i)
Expand Down
10 changes: 7 additions & 3 deletions ga4gh/server/datarepo.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,8 @@ def _createDatasetTable(self, cursor):
name TEXT NOT NULL,
description TEXT,
info TEXT,
createDateTime TEXT,
updateDateTime TEXT,
UNIQUE (name)
);
"""
Expand All @@ -818,15 +820,17 @@ def insertDataset(self, dataset):
Inserts the specified dataset into this repository.
"""
sql = """
INSERT INTO Dataset (id, name, description, info)
VALUES (?, ?, ?, ?);
INSERT INTO Dataset (id, name, description, info, createDateTime,
updateDateTime)
VALUES (?, ?, ?, ?, ?, ?);
"""
cursor = self._dbConnection.cursor()
try:
cursor.execute(sql, (
dataset.getId(), dataset.getLocalId(),
dataset.getDescription(),
json.dumps(dataset.getInfo())))
json.dumps(dataset.getInfo()), dataset.getCreateDateTime(),
dataset.getUpdateDateTime()))
except sqlite3.IntegrityError:
raise exceptions.DuplicateNameException(dataset.getLocalId())

Expand Down
9 changes: 9 additions & 0 deletions ga4gh/server/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,15 @@ def __init__(self, filename, message):
filename, message)


class TimestampFormatException(DataException):
"""
Exception thrown for invalid datetimes.
"""
def __init__(self, timestamp, source):
self.message = "Error reading timestamp '{}' from {}".format(
timestamp, source)


class MalformedException(DataException):
"""
A base exception class for exceptions thrown when faulty VCF file
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ requests==2.7.0
oic==0.7.6
pyOpenSSL==0.15.1
lxml==3.4.4
python-dateutil==2.5.1

# We need sphinx-argparse to build on readthedocs.
sphinx-argparse==0.1.15
Expand Down
2 changes: 2 additions & 0 deletions scripts/prepare_compliance_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ def run(self):
dataset = datasets.Dataset("brca1")
# Some info is set, it isn't important what
dataset.setInfo({"version": ga4gh.server.__version__})
dataset.setCreateDateTime('2008-04-25T18:30Z')
dataset.setUpdateDateTime('2016-05-15T17:34-01:00')
self.repo.insertDataset(dataset)

hg00096Individual = biodata.Individual(dataset, "HG00096")
Expand Down
14 changes: 14 additions & 0 deletions tests/unit/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
from __future__ import unicode_literals

import unittest
from datetime import datetime

import ga4gh.server.datamodel.datasets as datasets
import ga4gh.server.exceptions as exceptions


class TestDatasets(unittest.TestCase):
Expand All @@ -18,7 +20,19 @@ def testToProtocolElement(self):
datasetId = 'ds1'
dataset = datasets.SimulatedDataset(datasetId, 1, 2, 3, 4, 5)
dataset.setInfo({"test": "test"})
createDT = '2015-01-01T00:00:00' # set in simulated
updateDT = datetime.utcnow().isoformat()
dataset.setUpdateDateTime(updateDT)
gaDataset = dataset.toProtocolElement()
self.assertIsNotNone(gaDataset.info)
self.assertEqual(gaDataset.info['test'].values[0].string_value, "test")
self.assertEqual(dataset.getId(), gaDataset.id)
self.assertEqual(gaDataset.createDateTime, createDT)
self.assertEqual(gaDataset.updateDateTime, updateDT)

def testBadDateTime(self):
dataset = datasets.SimulatedDataset('ds1', 1, 2, 3, 4, 5)
badDatetime = '215010100:00:00'
self.assertRaises(
exceptions.TimestampFormatException,
dataset.setUpdateDateTime, badDatetime)