Last active
June 8, 2017 08:20
-
-
Save gus3000/a5d8073718a56c5792e241ae645f13b4 to your computer and use it in GitHub Desktop.
Extracts the hypothes.is annotations from a certain group and puts it in a kTBS server
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/python3 | |
| import sys | |
| import requests | |
| import json | |
| import os | |
| import getopt | |
| from datetime import datetime | |
| # user variables -- you can modify this ! | |
| user = 'gus3000' | |
| group = 'QgXXr5E3' | |
| hypothesis_host = 'hypothes.is' | |
| hypothesis_url = 'http://hypothes.is/' | |
| database_name = 'hypothesis' | |
| stored_trace_name = 'annotations' | |
| ktbs_url = 'http://localhost:8001/' | |
| # end of user variables | |
| hypothesis_root = hypothesis_url + 'api/' | |
| base_url = ktbs_url + database_name + '/' | |
| trace_url = base_url + stored_trace_name + '/' | |
| ############### Translation functions ############### | |
| def translateKeys(obj): | |
| for key in obj.keys(): | |
| if key == "id": | |
| new_key = "@id" | |
| obj[new_key] = obj[key] | |
| del obj[key] | |
| elif not key.startswith('m:'): | |
| new_key = 'm:' + key | |
| obj[new_key] = obj[key] | |
| del obj[key] | |
| return obj | |
| def annotation_to_obsel(annotation): | |
| timestamp = rfc3339_to_timestamp(annotation['created']) | |
| obsel = json.loads(json.dumps(annotation), object_hook=translateKeys) | |
| #print(json.dumps(annotation, indent=2), file=open('/tmp/obsel.json', 'w')) | |
| obsel["@type"] = "m:SimpleObsel" | |
| obsel["begin"] = timestamp | |
| obsel["end"] = timestamp | |
| return obsel | |
| def rfc3339_to_timestamp(date_utc): | |
| #props to https://stackoverflow.com/a/27251994 | |
| utc_dt = datetime.strptime(date_utc, '%Y-%m-%dT%H:%M:%S.%f+00:00') | |
| # Convert UTC datetime to seconds since the Epoch | |
| timestamp = int((utc_dt - datetime(1970, 1, 1)).total_seconds()) | |
| return timestamp | |
| ############### KTBS related functions ############### | |
| def create_database(): | |
| if requests.get(base_url).ok: | |
| print('Database', database_name, 'already exists, skipping creation') | |
| return | |
| post_request = \ | |
| { | |
| "@id": database_name + '/', | |
| "@type": "Base", | |
| "label": "Hypothes.is corresponding database" | |
| } | |
| headers = {'Content-type': 'application/json'} | |
| ret = requests.post(ktbs_url, data=json.dumps(post_request), headers=headers) | |
| def create_model(): | |
| model = \ | |
| { | |
| "@context": "http://liris.cnrs.fr/silex/2011/ktbs-jsonld-context", | |
| "@graph": [ | |
| { | |
| "@id": "http://localhost:8001/'+ database_name +'/annotationModel", | |
| "@type": "TraceModel", | |
| "inBase": "./" , | |
| "hasUnit": "millisecond" | |
| }, | |
| { | |
| "@id": "#Annotation", | |
| "@type": "ObselType" | |
| } | |
| # TODO | |
| ] | |
| } | |
| def create_trace(): | |
| if requests.get(trace_url).ok: | |
| print('Trace', stored_trace_name, 'already exists, skipping creation') | |
| return | |
| post_request = \ | |
| { | |
| "@id": stored_trace_name + '/', | |
| "@type": "StoredTrace", | |
| "hasModel": "http://liris.cnrs.fr/silex/2011/simple-trace-model", | |
| "origin": "1970-01-01T00:00:00Z" | |
| } | |
| headers = {'Content-type': 'application/json'} | |
| ret = requests.post(base_url, data=json.dumps(post_request), headers=headers) | |
| def add_obsel(obsel): | |
| headers = {'Content-type': 'application/json'} | |
| requests.post(trace_url, data=json.dumps(obsel), headers=headers) | |
| def delete(): | |
| requests.delete(trace_url) | |
| requests.delete(base_url) | |
| ############### Hypothes.is related functions ############### | |
| def fetch_annotations(): | |
| query = \ | |
| { | |
| 'group': group, | |
| 'order': 'asc' | |
| } | |
| headers = \ | |
| { | |
| 'Host': hypothesis_host, | |
| 'Accept': 'application/json', | |
| 'Authorization': 'Bearer '+ TOKEN | |
| } | |
| url = hypothesis_root + 'search' | |
| ret = requests.get(url, params=query, headers=headers) | |
| annotations = json.loads(ret.text) | |
| # print(json.dumps(annotations, indent=2), file=open('/tmp/blah.json','w')) | |
| return annotations['rows'] | |
| ############### Usability functions ############### | |
| def print_usage(): | |
| print(sys.argv[0], '[-d|--delete]') | |
| def handle_options(argv): | |
| try: | |
| opts, args = getopt.getopt(argv,"d",["delete"]) | |
| except getopt.GetoptError: | |
| print_usage() | |
| sys.exit(2) | |
| for opt,arg in opts: | |
| if opt == '-d': | |
| delete() | |
| print('everything deleted !') | |
| sys.exit(0) | |
| ############### MAIN ############### | |
| if __name__ == '__main__': | |
| handle_options(sys.argv[1:]) | |
| TOKEN = os.environ.get('H_DEV_TOKEN') | |
| if TOKEN == None: | |
| print('Warning : The environment var \'H_DEV_TOKEN\' must contain your API token. Some requests may fail.') | |
| TOKEN = '' | |
| print('Creating database', base_url) | |
| create_database() | |
| # create_model() | |
| print('Creating trace \''+ stored_trace_name +'\'') | |
| create_trace() | |
| print('Fetching annotations...') | |
| annotations = fetch_annotations() | |
| print('Converting to obsels...') | |
| obsels = [annotation_to_obsel(a) for a in annotations] | |
| print('Adding obsels to the kTBS') | |
| for o in obsels: | |
| add_obsel(o) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment