Last active
February 25, 2019 15:27
-
-
Save JakaKokosar/741fee5af312d943c7688febc0757777 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "\"\"\"\n", | |
| "Orange datasets are hosted on http://datasets.biolab.si/\n", | |
| "\n", | |
| "Folder structure:\n", | |
| "\n", | |
| "\"core\" -> publicly available datasets in Orange\n", | |
| "\"sc \" -> publicly available datasets in scOrange\n", | |
| "\n", | |
| "To read and download datasets we use \"https://github.com/biolab/serverfiles\"\n", | |
| "installable trough \"pip install serverfiles\"\n", | |
| "\n", | |
| "\"\"\"\n", | |
| "\n", | |
| "import serverfiles\n", | |
| "from Orange.data import Table" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[('DC_expMatrix_DCnMono.tab.gz',),\n", | |
| " ('DC_expMatrix_deeper.characterization.tab.gz',),\n", | |
| " ('aml-1k.pickle',),\n", | |
| " ('aml-8k.pickle',),\n", | |
| " ('ccp_data_Tcells_normCounts.counts.all_genes.tab.gz',),\n", | |
| " ('ccp_data_Tcells_normCounts.counts.cycle_genes.tab.gz',),\n", | |
| " ('ccp_data_liver.counts.all_genes.tab.gz',),\n", | |
| " ('ccp_data_liver.counts.cycle_genes.tab.gz',),\n", | |
| " ('ccp_data_mESCbulk.counts.all_genes.tab.gz',),\n", | |
| " ('ccp_data_mESCbulk.counts.cycle_genes.tab.gz',),\n", | |
| " ('ccp_normCountsBuettnerEtAl.counts.all_genes.tab.gz',),\n", | |
| " ('ccp_normCountsBuettnerEtAl.counts.cycle_genes.tab.gz',),\n", | |
| " ('ccp_normCounts_mESCquartz.counts.all_genes.tab.gz',),\n", | |
| " ('ccp_normCounts_mESCquartz.counts.cycle_genes.tab.gz',),\n", | |
| " ('cdp_expression_macosko.tab.gz',),\n", | |
| " ('cdp_expression_shekhar.tab.gz',),\n", | |
| " ('dm_proj_neurons_li2017.pkl.gz',),\n", | |
| " ('nestorawa_forcellcycle.pkl.gz',),\n", | |
| " ('pbmc_kang2018_raw_control.pkl.gz',),\n", | |
| " ('pbmc_kang2018_raw_stimulated.pkl.gz',),\n", | |
| " ('pbmc_kang2018_sample.pkl.gz',)]" | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "single_cell_url = 'http://datasets.biolab.si/sc'\n", | |
| "\n", | |
| "# connect to the HTTP file server\n", | |
| "sf = serverfiles.ServerFiles(server=single_cell_url)\n", | |
| "\n", | |
| "# list all available files\n", | |
| "sf.listfiles()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[]" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# setup local folder structure\n", | |
| "lf = serverfiles.LocalFiles('path_to_my_data', serverfiles=sf)\n", | |
| "lf.listfiles()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[('aml-1k.pickle',)]" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Download data from serverfiles to localfiles\n", | |
| "lf.download('aml-1k.pickle')\n", | |
| "lf.listfiles()\n", | |
| "\n", | |
| "# to get the path of downloaded file\n", | |
| "# lf.localpath('aml-1k.pickle')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "path_to_my_data/aml-1k.pickle\n", | |
| "[('aml-1k.pickle',)]\n", | |
| "path_to_my_data/aml-8k.pickle\n", | |
| "[('aml-8k.pickle',), ('aml-1k.pickle',)]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# to get a path of a given file use localpath_download,\n", | |
| "# if the file does not exist it will be downloaded from the\n", | |
| "# serverfiles automatically\n", | |
| "\n", | |
| "aml_1k = lf.localpath_download('aml-1k.pickle')\n", | |
| "print(aml_1k)\n", | |
| "print(lf.listfiles())\n", | |
| "aml_8k = lf.localpath_download('aml-8k.pickle')\n", | |
| "print(aml_8k)\n", | |
| "print(lf.listfiles())" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[[0.000, 0.000, 5.648, 5.267, 0.000, ... | healthy] {1, 6681b0788fc2b2e21975e26f588cc7a9, ACGGGAGATGACCA-1},\n", | |
| " [0.983, 0.573, 0.000, 0.000, 0.000, ... | AML] {1, 7c1e27874a82e7d5c1c877fa2cba7ba7, GAACAGCTGCTTAG-1},\n", | |
| " [0.000, 0.000, 0.000, 0.000, 4.069, ... | healthy] {2, b9d78fbc8b1bf9aa478e1fbd93ac883c, TCTTACGAAAAAGC-1},\n", | |
| " [0.000, 0.000, 0.000, 0.000, 2.304, ... | healthy] {1, 841c0e79f017a8ece40b2532f82a9c7a, CGAGCCGACTCTAT-1},\n", | |
| " [0.000, 0.000, 0.000, 0.000, 0.000, ... | healthy] {2, 08c51bbdc6bb95fc17cd3965e6d6c4fb, GCAAACTGTTGGCA-1},\n", | |
| " ...\n", | |
| "]" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# This is useful because now we can do something like this:\n", | |
| "def load(experiment):\n", | |
| " # checks if new version available\n", | |
| " lf.update(experiment)\n", | |
| " # ensures that we have the data localy\n", | |
| " return Table(lf.localpath_download(experiment))\n", | |
| "\n", | |
| "# note that using load method we keep our localfiles and serverfiles in sync.\n", | |
| "# it will always get the latest version from the serverfiles.\n", | |
| "load('aml-1k.pickle')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# if one does not want to work with LocalFiles \n", | |
| "# access data directly trough ServerFiles\n", | |
| "\n", | |
| "sf.download('aml-1k.pickle', target='./my_downloaded_file.pickle')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.1" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment