Skip to content

Instantly share code, notes, and snippets.

@gus3000
Last active June 8, 2017 08:20
Show Gist options
  • Select an option

  • Save gus3000/a5d8073718a56c5792e241ae645f13b4 to your computer and use it in GitHub Desktop.

Select an option

Save gus3000/a5d8073718a56c5792e241ae645f13b4 to your computer and use it in GitHub Desktop.
Extracts the hypothes.is annotations from a certain group and puts it in a kTBS server
#!/bin/python3
import sys
import requests
import json
import os
import getopt
from datetime import datetime
# user variables -- you can modify this !
user = 'gus3000'
group = 'QgXXr5E3'
hypothesis_host = 'hypothes.is'
hypothesis_url = 'http://hypothes.is/'
database_name = 'hypothesis'
stored_trace_name = 'annotations'
ktbs_url = 'http://localhost:8001/'
# end of user variables
hypothesis_root = hypothesis_url + 'api/'
base_url = ktbs_url + database_name + '/'
trace_url = base_url + stored_trace_name + '/'
############### Translation functions ###############
def translateKeys(obj):
for key in obj.keys():
if key == "id":
new_key = "@id"
obj[new_key] = obj[key]
del obj[key]
elif not key.startswith('m:'):
new_key = 'm:' + key
obj[new_key] = obj[key]
del obj[key]
return obj
def annotation_to_obsel(annotation):
timestamp = rfc3339_to_timestamp(annotation['created'])
obsel = json.loads(json.dumps(annotation), object_hook=translateKeys)
#print(json.dumps(annotation, indent=2), file=open('/tmp/obsel.json', 'w'))
obsel["@type"] = "m:SimpleObsel"
obsel["begin"] = timestamp
obsel["end"] = timestamp
return obsel
def rfc3339_to_timestamp(date_utc):
#props to https://stackoverflow.com/a/27251994
utc_dt = datetime.strptime(date_utc, '%Y-%m-%dT%H:%M:%S.%f+00:00')
# Convert UTC datetime to seconds since the Epoch
timestamp = int((utc_dt - datetime(1970, 1, 1)).total_seconds())
return timestamp
############### KTBS related functions ###############
def create_database():
if requests.get(base_url).ok:
print('Database', database_name, 'already exists, skipping creation')
return
post_request = \
{
"@id": database_name + '/',
"@type": "Base",
"label": "Hypothes.is corresponding database"
}
headers = {'Content-type': 'application/json'}
ret = requests.post(ktbs_url, data=json.dumps(post_request), headers=headers)
def create_model():
model = \
{
"@context": "http://liris.cnrs.fr/silex/2011/ktbs-jsonld-context",
"@graph": [
{
"@id": "http://localhost:8001/'+ database_name +'/annotationModel",
"@type": "TraceModel",
"inBase": "./" ,
"hasUnit": "millisecond"
},
{
"@id": "#Annotation",
"@type": "ObselType"
}
# TODO
]
}
def create_trace():
if requests.get(trace_url).ok:
print('Trace', stored_trace_name, 'already exists, skipping creation')
return
post_request = \
{
"@id": stored_trace_name + '/',
"@type": "StoredTrace",
"hasModel": "http://liris.cnrs.fr/silex/2011/simple-trace-model",
"origin": "1970-01-01T00:00:00Z"
}
headers = {'Content-type': 'application/json'}
ret = requests.post(base_url, data=json.dumps(post_request), headers=headers)
def add_obsel(obsel):
headers = {'Content-type': 'application/json'}
requests.post(trace_url, data=json.dumps(obsel), headers=headers)
def delete():
requests.delete(trace_url)
requests.delete(base_url)
############### Hypothes.is related functions ###############
def fetch_annotations():
query = \
{
'group': group,
'order': 'asc'
}
headers = \
{
'Host': hypothesis_host,
'Accept': 'application/json',
'Authorization': 'Bearer '+ TOKEN
}
url = hypothesis_root + 'search'
ret = requests.get(url, params=query, headers=headers)
annotations = json.loads(ret.text)
# print(json.dumps(annotations, indent=2), file=open('/tmp/blah.json','w'))
return annotations['rows']
############### Usability functions ###############
def print_usage():
print(sys.argv[0], '[-d|--delete]')
def handle_options(argv):
try:
opts, args = getopt.getopt(argv,"d",["delete"])
except getopt.GetoptError:
print_usage()
sys.exit(2)
for opt,arg in opts:
if opt == '-d':
delete()
print('everything deleted !')
sys.exit(0)
############### MAIN ###############
if __name__ == '__main__':
handle_options(sys.argv[1:])
TOKEN = os.environ.get('H_DEV_TOKEN')
if TOKEN == None:
print('Warning : The environment var \'H_DEV_TOKEN\' must contain your API token. Some requests may fail.')
TOKEN = ''
print('Creating database', base_url)
create_database()
# create_model()
print('Creating trace \''+ stored_trace_name +'\'')
create_trace()
print('Fetching annotations...')
annotations = fetch_annotations()
print('Converting to obsels...')
obsels = [annotation_to_obsel(a) for a in annotations]
print('Adding obsels to the kTBS')
for o in obsels:
add_obsel(o)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment