Source code for rest.app
"""
.. See the NOTICE file distributed with this work for additional information
regarding copyright ownership.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from __future__ import print_function
# Required for ReadTheDocs
from functools import wraps # pylint: disable=unused-import
import json
import os
import sys
#import logging
from flask import Flask, request
from flask_restful import Api, Resource
from dmp import dmp
from reader.hdf5_reader import hdf5_reader
from mg_rest_util.mg_auth import authorized
APP = Flask(__name__)
#logging.basicConfig()
def help_usage(error_message, status_code,
parameters_required, parameters_provided):
"""
Usage Help
Description of the basic usage patterns for GET functions for the app,
including any parameters that were provided byt he user along with the
available parameters that are required/optional.
Parameters
----------
error_message : str | None
Error message detailing what has gone wrong. If there are no errors then
None should be passed.
status_code : int
HTTP status code.
parameters_required : list
List of the text names for each paramter required by the end point. An
empty list should be provided if there are no parameters required
parameters_provided : dict
Dictionary of the parameters and the matching values provided by the
user. An empyt dictionary should be passed if there were no parameters
provided by the user.
Returns
-------
str
JSON formated status message to display to the user
"""
parameters = {
'by_user': ['By User [0|1]', 'int', 'OPTIONAL'],
'file_id': ['File ID', 'str', 'REQUIRED'],
'region': ['Chromosome:Start:End', 'str:int:int', 'OPTIONAL'],
'file_type': ['File type (bb, bw, tsv, fasta, fastq, ...)', 'str', 'OPTIONAL'],
'data_type': ['Data type (chip-seq, rna-seq, wgbs, ...)', 'str', 'OPTIONAL'],
'assembly': ['Assembly', 'str', 'REQUIRED'],
'chrom': ['Chromosome', 'str', 'OPTIONAL'],
'start': ['Start', 'int', 'OPTIONAL'],
'end': ['End', 'int', 'OPTIONAL'],
'type': ['add_meta|remove_meta', 'str', 'OPTIONAL'],
'output': [
"Default is None. State 'original' to return the original whole file",
'str', 'OPTIONAL'],
}
used_param = {k : parameters[k] for k in parameters_required if k in parameters}
usage = {
'_links' : {
'_self' : request.base_url,
'_parent' : request.url_root + 'mug/api/dmp'
},
'parameters' : used_param
}
message = {
'usage' : usage,
'status_code' : status_code
}
if parameters_provided:
message['provided_parameters'] = parameters_provided
if error_message is not None:
message['error'] = error_message
return message
def _get_dm_api(user_id=None):
cnf_loc = os.path.dirname(os.path.abspath(__file__)) + '/mongodb.cnf'
if user_id == 'test':
print("TEST USER DM API")
return dmp(cnf_loc, test=True)
if os.path.isfile(cnf_loc) is True:
print("LIVE DM API")
return dmp(cnf_loc)
print("TEST DM API")
return dmp(cnf_loc, test=True)
[docs]class EndPoints(Resource):
"""
Class to handle the http requests for returning information about the end
points
"""
[docs] def get(self):
"""
GET list all end points
List of all of the end points for the current service.
Example
-------
.. code-block:: none
:linenos:
curl -X GET http://localhost:5002/mug/api/dmp
"""
return {
'_links': {
'_self': request.base_url,
'_getFile': request.url_root + 'mug/api/dmp/file',
'_getFiles': request.url_root + 'mug/api/dmp/files',
'_getFileHistory': request.url_root + 'mug/api/dmp/file_history',
'_ping': request.url_root + 'mug/api/dmp/ping',
'_parent': request.url_root + 'mug/api'
}
}
[docs]class FileMeta(Resource):
"""
Class to handle the http requests for retrieving the data from a file.
This class is able to handle big[Bed|Wig] file and serve back the matching
region in the relevant format. It is also possible to stream back the whole
file of any type for use in other tools.
"""
@authorized
[docs] def get(self, user_id):
"""
GET List values from the file
Call to optain regions from the conpressed index files for Bed, Wig and
TSV based file formats that contain genomic information.
Other files can be streamed.
Parameters
----------
file_id : str
Identifier of the file to retrieve data from
Returns
-------
file
Returns a formated in the relevant file type with any genomic
features matching the format of the file.
Examples
--------
.. code-block:: none
:linenos:
curl -X GET http://localhost:5002/mug/api/dmp/track?file_id=test_file
"""
file_id = request.args.get('file_id')
public = request.args.get('public')
params = [file_id]
# Display the parameters available
if sum([x is None for x in params]) == len(params):
return help_usage(None, 200, ['file_id'], {})
# ERROR - one of the required parameters is NoneType
if sum([x is not None for x in params]) != len(params):
return help_usage('MissingParameters', 400, ['file_id'], {})
if user_id is not None:
selected_user_id = user_id['user_id']
if public is not None:
selected_user_id = user_id['public_id']
dmp_api = _get_dm_api(selected_user_id)
return dmp_api.get_file_by_id(selected_user_id, file_id)
return help_usage('Forbidden', 403, ['file_id'], {})
@authorized
[docs] def post(self, user_id):
"""
POST Add a new file to the DM API
Parameters
----------
This should be passed as the data block with the HTTP request:
json : dict
user_id : str
User identifier
file_path : str
Location of the file
file_type : str
Tag for the file extension. The valid parameters are defined
within the DM API documentation (mg-dm-api)
data_type : str
What type of experiment is this data from. Options include
RNA-seq
ChIP-seq
MNase-seq
WGBS
HiC
taxon_id : int
Taxonomic identifier for a species (Human = 9606)
compressed
Options of the compression level of the file. If file is not
compressed then do not include this parameter
source_id : list
List of file_ids that were used for generating this file.
If this is the root file then do not include this parameter.
meta_data : dict
Hash array describing the relevant metadata for the file,
including the assembly if relevant
Returns
-------
file_id
Returns the id of the stored file
Example
-------
.. code-block:: none
:linenos:
echo '{
"data_type": "RNA-seq",
"file_type": "fastq",
"meta_data": {
"assembly" : "GRCh38"
}, "taxon_id": 9606,
"file_path": "/tmp/test/path/RNA-seq/testing_123.fastq",
"parent_dir" "/tmp/test/path/RNA-seq/",
"size": 64000,
}' > data.json
curl -X POST
-H "Content-Type: application/json"
-H "Authorization: Bearer teststring"
-d @data.json http://localhost:5002/mug/api/dmp/file
"""
if user_id is not None:
dmp_api = _get_dm_api(user_id['user_id'])
new_track = json.loads(request.data)
file_path = new_track['file_path'] if 'file_path' in new_track else None
file_type = new_track['file_type'] if 'file_type' in new_track else None
size = new_track['size'] if 'size' in new_track else None
parent_dir = new_track['parent_dir'] if 'parent_dir' in new_track else None
data_type = new_track['data_type'] if 'data_type' in new_track else None
taxon_id = new_track['taxon_id'] if 'taxon_id' in new_track else None
source_id = new_track['source_id'] if 'source_id' in new_track else None
meta_data = new_track['meta_data'] if 'meta_data' in new_track else None
compressed = new_track['compressed'] if 'compressed' in new_track else None
params_required = ['user_id', 'file_path', 'file_type', 'data_type',
'taxon_id', 'source_id', 'meta_data']
params = [user_id, file_path, file_type, data_type, taxon_id,
source_id, meta_data]
# ERROR - one of the required parameters is NoneType
if sum([x is not None for x in params]) != len(params):
return help_usage('MissingParameters', 400, params_required,
user_id)
new_track = json.loads(request.data)
return dmp_api.set_file(
user_id['user_id'],
file_path,
file_type,
size,
parent_dir,
data_type,
taxon_id,
compressed,
source_id,
meta_data
)
return help_usage('Forbidden', 403, [], {})
@authorized
[docs] def put(self, user_id):
"""
PUT Update meta data
Request to update the meta data for a given file. This allows for the
adding or removal of key-value pairs from the meta data.
Parameters
----------
This should be passed as the data block with the HTTP request:
json : dict
user_id : str
User identifier
file_id : str
ID of the stored file
type : str
Options are 'add_meta' or 'remove_meta' to modify they key-value
pairs for the file entry. Minimum sets of pairs are defined
within the DM API (mg-dm-api)
meta_data : dict
Hash array describing the relevant metadata key-value pairs that
are to be added
Returns
-------
file_id
Returns the id of the stored file
Example
-------
To add a new key value pair:
.. code-block:: none
:linenos:
echo '{
"type":"add_meta",
"file_id":"<file_id>",
"meta_data":{"citation":"PMID:1234567890"}
}' > data.json
curl -X PUT
-H "Content-Type: application/json"
-H "Authorization: Bearer teststring"
-d @data.json http://localhost:5002/mug/api/dmp/file
To remove a key value pair:
.. code-block:: none
:linenos:
echo '{
"type":"remove_meta",
"file_id":"<file_id>",
"meta_data":["citation"]
}' > data.json
curl -X PUT
-H "Content-Type: application/json"
-H "Authorization: Bearer teststring"
-d @data.json http://localhost:5002/mug/api/dmp/file
To modify a column value (file size):
.. code-block:: none
:linenos:
echo '{
"type":"modify_column",
"file_id":"<file_id>",
"key":"<column_key>"
"value":"<new_value>"
}' > data.json
curl -X PUT
-H "Content-Type: application/json"
-H "Authorization: Bearer teststring"
-d @data.json http://localhost:5002/mug/api/dmp/file
"""
if user_id is not None:
dmp_api = _get_dm_api(user_id['user_id'])
data_put = json.loads(request.data)
file_id = data_put['file_id']
params_required = ['user_id', 'file_id', 'type']
if data_put['type'] == 'add_meta':
for k in data_put['meta_data']:
result = dmp_api.add_file_metadata(
user_id['user_id'], file_id, k, data_put['meta_data'][k])
elif data_put['type'] == 'remove_meta':
for k in data_put['meta_data']:
result = dmp_api.remove_file_metadata(user_id['user_id'], file_id, k)
elif data_put['type'] == 'modify_column':
result = dmp_api.modify_column(
user_id['user_id'], file_id, data_put['key'], data_put['value']
)
else:
return help_usage('MissingMetaDataParameters', 400, params_required,
{'type' : ['add_meta', 'remove_meta', 'modify_column']})
return result
return help_usage('Forbidden', 403, [], {})
@authorized
[docs] def delete(self, user_id):
"""
DELETE Remove a file from the DM API
Function to remove the file from teh DM API. This will result in the
file being removed from the records and therefore not available within
the VRE or from the RESTful interface
Parameters
----------
This should be passed as the data block with the HTTP request:
json : dict
file_id : str
ID of the stored file
Example
-------
.. code-block:: none
:linenos:
echo '{
"file_id":"<file_id>"
}' > data.json
curl -X DELETE
-H "Content-Type: application/json"
-H "Authorization: Bearer teststring"
-d @data.json http://localhost:5002/mug/api/dmp/file
"""
if user_id is not None:
dmp_api = _get_dm_api(user_id['user_id'])
params_required = ['user_id', 'file_id']
data_delete = json.loads(request.data)
if data_delete['file_id']:
file_id = dmp_api.remove_file(user_id['user_id'], data_delete['file_id'])
else:
return help_usage('MissingMetaDataParameters', 400, params_required,
{})
return file_id
return help_usage('Forbidden', 403, [], {})
[docs]class Files(Resource):
"""
Class to handle the http requests for retrieving the list of files for a
given user handle
"""
@authorized
[docs] def get(self, user_id):
"""
GET List user tracks
Function to list the filess that are owned by a single user. It is
possible to filter by assembly, file or data type, or to find track
files that contain data for a given region
Parameters
----------
assembly : str
Genome assembly accession
region : str
<chromosome>:<start_pos>:<end_pos>
file_type : str
data_type : str
Example
-------
.. code-block:: none
:linenos:
curl -X GET http://localhost:5002/mug/api/dmp/Files?>
"""
if user_id is not None:
region = request.args.get('region')
assembly = request.args.get('assembly')
file_type = request.args.get('file_type')
data_type = request.args.get('data_type')
by_user = request.args.get('by_user')
public = request.args.get('public')
params = [user_id]
# Display the parameters available
if sum([x is None for x in params]) == len(params):
return help_usage(
None, 200,
['region', 'assembly', 'file_type', 'data_type', 'by_user'], {})
selected_user_id = user_id['user_id']
if public is not None:
selected_user_id = user_id['public_id']
dmp_api = _get_dm_api(selected_user_id)
files = []
if region is not None and assembly is not None:
files = self._get_all_files_region(dmp_api, selected_user_id, assembly, region)
elif file_type is not None and assembly is not None:
files = dmp_api.get_files_by_file_type(selected_user_id)
elif data_type is not None and assembly is not None:
files = dmp_api.get_files_by_data_type(selected_user_id)
elif assembly is not None:
files = dmp_api.get_files_by_assembly(selected_user_id, assembly)
elif by_user is not None and int(by_user) == 1:
files = dmp_api.get_files_by_user(selected_user_id)
else:
return help_usage(
None, 200,
['region', 'assembly', 'file_type', 'data_type', 'by_user'], {})
return {
'_links': {
'_self': request.base_url,
'_parent' : request.url_root + 'mug/api/dmp'
},
'files': files
}
return help_usage(
None, 200,
['region', 'assembly', 'file_type', 'data_type', 'by_user'], {})
def _get_all_files_region(self, dmp_api, user_id, assembly, region):
files = []
chrom, start, end = region.split(':')
h5_idx = hdf5_reader(user_id['user_id'])
potential_files = h5_idx.get_regions(assembly, chrom, int(start), int(end))
for f_in in potential_files[1]:
files.append(dmp_api.get_file_by_id(f_in))
for f_in in potential_files[1000]:
files.append(dmp_api.get_file_by_id(f_in))
return files
[docs]class FileHistory(Resource):
"""
Class to handle the http requests for retrieving the list of file history of
a given file for a given user handle
"""
@authorized
[docs] def get(self, user_id):
"""
GET the list of files that were used for generating the defined file
Example
-------
.. code-block:: none
:linenos:
curl -X GET http://localhost:5002/mug/api/dmp/file_history?file_id=<file_id>
"""
if user_id is not None:
dmp_api = _get_dm_api(user_id['user_id'])
file_id = request.args.get('file_id')
params = [user_id, file_id]
# Display the parameters available
if sum([x is None for x in params]) == len(params):
return help_usage(None, 200, [], {})
# ERROR - one of the required parameters is NoneType
if sum([x is not None for x in params]) != len(params):
return help_usage('MissingParameters', 400, [],
{
'file_id' : file_id
})
files = dmp_api.get_file_history(user_id['user_id'], file_id)
return {
'_links': {
'_self': request.base_url,
'_parent' : request.url_root + 'mug/api/dmp'
},
'history_files': files
}
return help_usage('Forbidden', 403, [], {})
[docs]class Ping(Resource):
"""
Class to handle the http requests to ping a service
"""
[docs] def get(self):
"""
GET Status
List the current status of the service along with the relevant
information about the version.
Example
-------
.. code-block:: none
:linenos:
curl -X GET http://localhost:5002/mug/api/dmp/ping
"""
import rest.release as release
res = {
"status": "ready",
"version": release.__version__,
"author": release.__author__,
"license": release.__license__,
"name": release.__rest_name__,
"description": release.__description__,
"_links" : {
'_self' : request.base_url,
'_parent' : request.url_root + 'mug/api/dmp'
}
}
return res
#
# For the services where there needs to be an extra layer (adjacency lists),
# then there needs to be a way of forwarding for this. But the majority of
# things can be redirected to the raw files for use as a track.
#
sys._auth_meta_json = os.path.dirname(os.path.realpath(__file__)) + '/auth_meta.json'
# Define the URIs and their matching methods
REST_API = Api(APP)
# List the available end points for this service
REST_API.add_resource(EndPoints, "/mug/api/dmp", endpoint='dmp_root')
# Get the data for a specific track
REST_API.add_resource(FileMeta, "/mug/api/dmp/file_meta", endpoint='file_meta')
# List the available species for which there are datasets available
REST_API.add_resource(Files, "/mug/api/dmp/files", endpoint='files')
# List file history
REST_API.add_resource(FileHistory, "/mug/api/dmp/file_history", endpoint='file_history')
# Service ping
REST_API.add_resource(Ping, "/mug/api/dmp/ping", endpoint='dmp-ping')
# Initialise the server
if __name__ == "__main__":
APP.run(port=5002, debug=True, use_reloader=False)