Skip to content

Instantly share code, notes, and snippets.

@JakaKokosar
Last active February 25, 2019 15:27
Show Gist options
  • Select an option

  • Save JakaKokosar/741fee5af312d943c7688febc0757777 to your computer and use it in GitHub Desktop.

Select an option

Save JakaKokosar/741fee5af312d943c7688febc0757777 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"Orange datasets are hosted on http://datasets.biolab.si/\n",
"\n",
"Folder structure:\n",
"\n",
"\"core\" -> publicly available datasets in Orange\n",
"\"sc \" -> publicly available datasets in scOrange\n",
"\n",
"To read and download datasets we use \"https://github.com/biolab/serverfiles\"\n",
"installable trough \"pip install serverfiles\"\n",
"\n",
"\"\"\"\n",
"\n",
"import serverfiles\n",
"from Orange.data import Table"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('DC_expMatrix_DCnMono.tab.gz',),\n",
" ('DC_expMatrix_deeper.characterization.tab.gz',),\n",
" ('aml-1k.pickle',),\n",
" ('aml-8k.pickle',),\n",
" ('ccp_data_Tcells_normCounts.counts.all_genes.tab.gz',),\n",
" ('ccp_data_Tcells_normCounts.counts.cycle_genes.tab.gz',),\n",
" ('ccp_data_liver.counts.all_genes.tab.gz',),\n",
" ('ccp_data_liver.counts.cycle_genes.tab.gz',),\n",
" ('ccp_data_mESCbulk.counts.all_genes.tab.gz',),\n",
" ('ccp_data_mESCbulk.counts.cycle_genes.tab.gz',),\n",
" ('ccp_normCountsBuettnerEtAl.counts.all_genes.tab.gz',),\n",
" ('ccp_normCountsBuettnerEtAl.counts.cycle_genes.tab.gz',),\n",
" ('ccp_normCounts_mESCquartz.counts.all_genes.tab.gz',),\n",
" ('ccp_normCounts_mESCquartz.counts.cycle_genes.tab.gz',),\n",
" ('cdp_expression_macosko.tab.gz',),\n",
" ('cdp_expression_shekhar.tab.gz',),\n",
" ('dm_proj_neurons_li2017.pkl.gz',),\n",
" ('nestorawa_forcellcycle.pkl.gz',),\n",
" ('pbmc_kang2018_raw_control.pkl.gz',),\n",
" ('pbmc_kang2018_raw_stimulated.pkl.gz',),\n",
" ('pbmc_kang2018_sample.pkl.gz',)]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"single_cell_url = 'http://datasets.biolab.si/sc'\n",
"\n",
"# connect to the HTTP file server\n",
"sf = serverfiles.ServerFiles(server=single_cell_url)\n",
"\n",
"# list all available files\n",
"sf.listfiles()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# setup local folder structure\n",
"lf = serverfiles.LocalFiles('path_to_my_data', serverfiles=sf)\n",
"lf.listfiles()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('aml-1k.pickle',)]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Download data from serverfiles to localfiles\n",
"lf.download('aml-1k.pickle')\n",
"lf.listfiles()\n",
"\n",
"# to get the path of downloaded file\n",
"# lf.localpath('aml-1k.pickle')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"path_to_my_data/aml-1k.pickle\n",
"[('aml-1k.pickle',)]\n",
"path_to_my_data/aml-8k.pickle\n",
"[('aml-8k.pickle',), ('aml-1k.pickle',)]\n"
]
}
],
"source": [
"# to get a path of a given file use localpath_download,\n",
"# if the file does not exist it will be downloaded from the\n",
"# serverfiles automatically\n",
"\n",
"aml_1k = lf.localpath_download('aml-1k.pickle')\n",
"print(aml_1k)\n",
"print(lf.listfiles())\n",
"aml_8k = lf.localpath_download('aml-8k.pickle')\n",
"print(aml_8k)\n",
"print(lf.listfiles())"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[[0.000, 0.000, 5.648, 5.267, 0.000, ... | healthy] {1, 6681b0788fc2b2e21975e26f588cc7a9, ACGGGAGATGACCA-1},\n",
" [0.983, 0.573, 0.000, 0.000, 0.000, ... | AML] {1, 7c1e27874a82e7d5c1c877fa2cba7ba7, GAACAGCTGCTTAG-1},\n",
" [0.000, 0.000, 0.000, 0.000, 4.069, ... | healthy] {2, b9d78fbc8b1bf9aa478e1fbd93ac883c, TCTTACGAAAAAGC-1},\n",
" [0.000, 0.000, 0.000, 0.000, 2.304, ... | healthy] {1, 841c0e79f017a8ece40b2532f82a9c7a, CGAGCCGACTCTAT-1},\n",
" [0.000, 0.000, 0.000, 0.000, 0.000, ... | healthy] {2, 08c51bbdc6bb95fc17cd3965e6d6c4fb, GCAAACTGTTGGCA-1},\n",
" ...\n",
"]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# This is useful because now we can do something like this:\n",
"def load(experiment):\n",
" # checks if new version available\n",
" lf.update(experiment)\n",
" # ensures that we have the data localy\n",
" return Table(lf.localpath_download(experiment))\n",
"\n",
"# note that using load method we keep our localfiles and serverfiles in sync.\n",
"# it will always get the latest version from the serverfiles.\n",
"load('aml-1k.pickle')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# if one does not want to work with LocalFiles \n",
"# access data directly trough ServerFiles\n",
"\n",
"sf.download('aml-1k.pickle', target='./my_downloaded_file.pickle')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![alt text](https://user-images.githubusercontent.com/15876321/53346326-16bc9700-3917-11e9-835d-00a5407310ea.png)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment