Created
March 30, 2022 15:20
-
-
Save kantale/6a6308b326dd9782b609d30e53b78051 to your computer and use it in GitHub Desktop.
test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "id": "9a9cd5f6", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "C:\\Users\\user\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "!cd" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "2e3c5b3e", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "f = open('mitsos.txt')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "id": "5add9f33", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "f = open('C:/Users/user/Downloads/gwas_catalog_v1.0-associations_e105_r2022-03-23.tsv')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "id": "855464ef", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "f.close()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "id": "49c835b0", | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "ename": "SyntaxError", | |
| "evalue": "EOL while scanning string literal (1419483448.py, line 1)", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[1;36m Input \u001b[1;32mIn [6]\u001b[1;36m\u001b[0m\n\u001b[1;33m f = open(r'C:\\Users\\user\\Downloads\\')\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m EOL while scanning string literal\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "f = open(r'C:\\Users\\user\\Downloads\\')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "id": "cb74cab6", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "f = open('C:/Users/user/results.txt')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "id": "c54e245c", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<_io.TextIOWrapper name='C:/Users/user/results.txt' mode='r' encoding='cp1253'>" | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "f" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "id": "675037ff", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "_io.TextIOWrapper" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "type(f)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "id": "91509d59", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "s = f.read()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "id": "ae481e3b", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'this is a fantastic file\\nvery precious data\\nmuch science\\nnobel\\n'" | |
| ] | |
| }, | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "s" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "id": "44aa220c", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "this is a fantastic file\n", | |
| "very precious data\n", | |
| "much science\n", | |
| "nobel\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print (s)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "id": "e2a7a3d9", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "g = f.read()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "id": "d6b24664", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print (g)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "id": "9d04f1b0", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0" | |
| ] | |
| }, | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "len(g)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "id": "63623e7d", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "f.close()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "id": "18ebe4db", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "f = open('C:/Users/user/results.txt')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "id": "3c274b33", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "g = f.read()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "id": "2180acec", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'this is a fantastic file\\nvery precious data\\nmuch science\\nnobel\\n'" | |
| ] | |
| }, | |
| "execution_count": 19, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "g" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "id": "638275eb", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "f.close()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 21, | |
| "id": "5eeb7b1c", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "f = open('C:/Users/user/results.txt')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 22, | |
| "id": "1aec6ac5", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "line = f.readline()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 23, | |
| "id": "6cf41a73", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "this is a fantastic file\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print (line)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 24, | |
| "id": "72fa5759", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "g = f.read()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "id": "4bc07fa0", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "very precious data\n", | |
| "much science\n", | |
| "nobel\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print (g)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 26, | |
| "id": "807e280c", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "f.close()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 27, | |
| "id": "68f4c41b", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "f = open('C:/Users/user/results.txt')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 28, | |
| "id": "b7f017ae", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "a = f.readline()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 29, | |
| "id": "b833a5d0", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "b = f.readline()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 30, | |
| "id": "7cad2af7", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "very precious data\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print (b)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 31, | |
| "id": "28233975", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "much science\n", | |
| "nobel\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print (f.read())" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 32, | |
| "id": "34832a8e", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "10" | |
| ] | |
| }, | |
| "execution_count": 32, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "f.seek(10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 33, | |
| "id": "74c46fc6", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'fantastic file\\n'" | |
| ] | |
| }, | |
| "execution_count": 33, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "f.readline()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 34, | |
| "id": "146bfadb", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "f.close()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 35, | |
| "id": "89dfc799", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "f = open('C:/Users/user/results.txt')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 44, | |
| "id": "918bf6b4", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "this is a fantastic file\n", | |
| "\n", | |
| "very precious data\n", | |
| "\n", | |
| "much science\n", | |
| "\n", | |
| "nobel\n", | |
| "\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "f = open('C:/Users/user/results.txt')\n", | |
| "while True:\n", | |
| " line = f.readline()\n", | |
| " print (line)\n", | |
| " if not line:\n", | |
| " break\n", | |
| "f.close()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 45, | |
| "id": "ca1b40be", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "this is a fantastic file\n", | |
| "\n", | |
| "very precious data\n", | |
| "\n", | |
| "much science\n", | |
| "\n", | |
| "nobel\n", | |
| "\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "f = open('C:/Users/user/results.txt')\n", | |
| "while True:\n", | |
| " line = f.readline()\n", | |
| " print (line)\n", | |
| " if len(line) == 0:\n", | |
| " break\n", | |
| "f.close()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 46, | |
| "id": "2e323488", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "this is a fantastic file\n", | |
| "\n", | |
| "very precious data\n", | |
| "\n", | |
| "much science\n", | |
| "\n", | |
| "nobel\n", | |
| "\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "f = open('C:/Users/user/results.txt')\n", | |
| "while True:\n", | |
| " line = f.readline()\n", | |
| " print (line)\n", | |
| " if line == '':\n", | |
| " break\n", | |
| "f.close()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 47, | |
| "id": "ed3d5adf", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "S\n", | |
| "D\n", | |
| "A\n", | |
| "F\n", | |
| "G\n", | |
| "F\n", | |
| "D\n", | |
| "S\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "for x in 'SDAFGFDS':\n", | |
| " print (x)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 48, | |
| "id": "1a6abddf", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "5\n", | |
| "6\n", | |
| "7\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "for x in [5,6,7,]:\n", | |
| " print (x)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 70, | |
| "id": "4cf8358b", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "this is a fantastic file\n", | |
| "\n", | |
| "very precious data\n", | |
| "\n", | |
| "much science\n", | |
| "\n", | |
| "nobel\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "f = open('C:/Users/user/results.txt')\n", | |
| "for line in f:\n", | |
| " print (line)\n", | |
| "f.close()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 71, | |
| "id": "71008beb", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "this is a fantastic file\n", | |
| "\n", | |
| "very precious data\n", | |
| "\n", | |
| "much science\n", | |
| "\n", | |
| "nobel\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "with open('C:/Users/user/results.txt') as f:\n", | |
| " for l in f:\n", | |
| " print (l)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 72, | |
| "id": "267081b8", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "l = []" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 73, | |
| "id": "853f918b", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "much science\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "with open('C:/Users/user/results.txt') as f:\n", | |
| " lc = 0\n", | |
| " for l in f:\n", | |
| " lc += 1\n", | |
| " if lc<3:\n", | |
| " continue\n", | |
| " print (l)\n", | |
| " break" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 91, | |
| "id": "18fcfe24", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "dyjfjfghjfkhjthjhkhkhjfgfgmgdd.fgjdalkgjshdlfkgjhsdlkfjghsldkfjghsldkjghskldfjghlsdkfjhglskdfjghlskdfjghskldjfghsldkfjghsldkjgh sljgsldkj ghskldjgh sldkgjh skldfjgh skldfjghsldkjghsldkfjgh skldjg hsldfkjghsldkjg hsldfkjgh sldkjgh sldkgjh sldkjg hsdlfkjgh sdfkjg hsldkfjg hsldkfjg hsldkfjg hsldkfjg hsldkfjgh sldkfjg sldkfjgh sldkjgh sldfjgh sldkfjgh sldkfjgh sldkfjg hsldkfjg hsldkfjgh sldkfjgh sldkfjgh fjkldh\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "with open('C:/Users/user/results.txt') as f:\n", | |
| "\n", | |
| " for lc, l in enumerate(f):\n", | |
| " if lc < 6:\n", | |
| " continue\n", | |
| " print (l)\n", | |
| " break" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "0ea53c41", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 79, | |
| "id": "fc41c82e", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "my_list = [] \n", | |
| "with open('C:/Users/user/results.txt') as f:\n", | |
| "\n", | |
| " for lc, l in enumerate(f):\n", | |
| " if lc in {1,2}:\n", | |
| " my_list.append(l)\n", | |
| " \n", | |
| " " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 78, | |
| "id": "486fa248", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "['very precious data\\n', 'much science. bravo!\\n']" | |
| ] | |
| }, | |
| "execution_count": 78, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "my_list" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 80, | |
| "id": "a711106b", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "with open('C:/Users/user/results.txt') as f:\n", | |
| "\n", | |
| " my_list = [l for lc, l in enumerate(f) if lc in {1,2}]\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 81, | |
| "id": "6be9a275", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "['very precious data\\n', 'much science. bravo!\\n']" | |
| ] | |
| }, | |
| "execution_count": 81, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "my_list" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 82, | |
| "id": "d39cab02", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| " my_list = [l for lc, l in enumerate(open('C:/Users/user/results.txt')) if lc in {1,2}]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 83, | |
| "id": "53e6f47c", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "['very precious data\\n', 'much science. bravo!\\n']" | |
| ] | |
| }, | |
| "execution_count": 83, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "my_list" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 84, | |
| "id": "73dcc947", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "a = 'ghjkgfghj\\n'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 85, | |
| "id": "6ee824de", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'ghjkgfghj\\n'" | |
| ] | |
| }, | |
| "execution_count": 85, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "a" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 86, | |
| "id": "667eae1f", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'ghjkgfghj'" | |
| ] | |
| }, | |
| "execution_count": 86, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "a.strip()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 87, | |
| "id": "213abd5c", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'sfdagsagszdfg'" | |
| ] | |
| }, | |
| "execution_count": 87, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "' sfdagsagszdfg '.strip()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 88, | |
| "id": "a3c4f62b", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'sadfasdfasdf'" | |
| ] | |
| }, | |
| "execution_count": 88, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "'sadfasdfasdf\\n'[:-1]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 89, | |
| "id": "58c1e766", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'sadfasdfasdf'" | |
| ] | |
| }, | |
| "execution_count": 89, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "'sadfasdfasdf\\n'.replace('\\n', '')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "64d13d07", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "036de9a6", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "dc0c76a9", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 41, | |
| "id": "12c94bfc", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "mitsos - alex\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print ('mitsos', end=' - ')\n", | |
| "print ('alex')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 42, | |
| "id": "30416060", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "True" | |
| ] | |
| }, | |
| "execution_count": 42, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "bool('sdfsdfg')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 43, | |
| "id": "4d55acd0", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "False" | |
| ] | |
| }, | |
| "execution_count": 43, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "bool('')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 92, | |
| "id": "55ebe918", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "488\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "my_list = [] \n", | |
| "with open('C:/Users/user/results.txt') as f:\n", | |
| "\n", | |
| " data = f.read()\n", | |
| "print (len(data))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 93, | |
| "id": "8b524632", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "ename": "UnicodeDecodeError", | |
| "evalue": "'charmap' codec can't decode byte 0xd2 in position 16: character maps to <undefined>", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[1;31mUnicodeDecodeError\u001b[0m Traceback (most recent call last)", | |
| "Input \u001b[1;32mIn [93]\u001b[0m, in \u001b[0;36m<cell line: 2>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m my_list \u001b[38;5;241m=\u001b[39m [] \n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mC:/Users/user/alex.docx\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m----> 4\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m (\u001b[38;5;28mlen\u001b[39m(data))\n", | |
| "File \u001b[1;32m~\\miniconda3\\lib\\encodings\\cp1253.py:23\u001b[0m, in \u001b[0;36mIncrementalDecoder.decode\u001b[1;34m(self, input, final)\u001b[0m\n\u001b[0;32m 22\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecode\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m, final\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[1;32m---> 23\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcodecs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcharmap_decode\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\u001b[43mdecoding_table\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n", | |
| "\u001b[1;31mUnicodeDecodeError\u001b[0m: 'charmap' codec can't decode byte 0xd2 in position 16: character maps to <undefined>" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "my_list = [] \n", | |
| "with open('C:/Users/user/alex.docx') as f:\n", | |
| "\n", | |
| " data = f.read()\n", | |
| "print (len(data))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 107, | |
| "id": "c9197a9f", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "379\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "my_list = [] \n", | |
| "with open('C:/Users/user/alex2.txt') as f:\n", | |
| "\n", | |
| " data = f.read()\n", | |
| "print (len(data))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 108, | |
| "id": "717bbc56", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Xf/lgk jh;flsgkhjsfkj ghsfkldghsldkjghsldkfjg hldakfjgh sldfkj ghsldkfjgh skldfjgh skldfjgh skldfjgh ldskfj ghsldkfj ghsldkfjg sldkfjgh sdljgh sldjgh sldkfjgh sldkjgh sldkjgh sldkfjg sldkfjg hsldkjgh sldkfjg hsldfjkg hsldkjgh ldfjgh sldkfjgh \n", | |
| "1. Xghfg\n", | |
| "2. Hfd\n", | |
| "3. Hfd\n", | |
| "4. Ghfd\n", | |
| "5. Ghd\n", | |
| "6. Fghfg\n", | |
| "7. h\n", | |
| "sldkfjg sldkfjgh slkdfjg sdfklgh skldfgh sdlkjg hsldfjgh sldkfjgh sldj ghlsdkf h\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print (data)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 94, | |
| "id": "acea5310", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def f(l):\n", | |
| " return sum(l)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 95, | |
| "id": "fc6cf55f", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "60" | |
| ] | |
| }, | |
| "execution_count": 95, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "f([10, 20, 30])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 99, | |
| "id": "8f1e4ff2", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "60" | |
| ] | |
| }, | |
| "execution_count": 99, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "def f(l):\n", | |
| " if not len(l):\n", | |
| " return 0\n", | |
| " return l[0] + f(l[1:])\n", | |
| "\n", | |
| "f([10, 20, 30])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 105, | |
| "id": "38860d07", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "l = [['Helen', 23, 8], ['Kostas', 25, 9], ['Alex', 22, 9], ['Maria', 24, 7]]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 106, | |
| "id": "1bbbfd73", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "s = 'a σρετςερτςρε σδφγ zfdgsdfg'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 101, | |
| "id": "b0da2ea6", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "l = [4,5,4,3,4,5,6,7,6,1,5,4]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 102, | |
| "id": "e62a9c2f", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "1" | |
| ] | |
| }, | |
| "execution_count": 102, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "min(l)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 104, | |
| "id": "4f8da98f", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "1" | |
| ] | |
| }, | |
| "execution_count": 104, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "def f(l):\n", | |
| " if len(l) ==2:\n", | |
| " if l[0] < l[1]:\n", | |
| " return l[0]\n", | |
| " return l[1]\n", | |
| " \n", | |
| " if l[0] < l[1]:\n", | |
| " return f( [l[0]] + l[2:] )\n", | |
| " \n", | |
| " return f([l[1]] + l[2:])\n", | |
| " \n", | |
| " \n", | |
| "f(l)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 110, | |
| "id": "ad7334b4", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "my_list = [] \n", | |
| "with open('C:/Users/user/findings.txt', 'w') as f:\n", | |
| "\n", | |
| " f.write('cvghjklcvbnjkfyucvhjkcvhcgh')\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "1bd94d08", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "983f66ba", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "68e5239a", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 115, | |
| "id": "136095ee", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "ename": "FileExistsError", | |
| "evalue": "[Errno 17] File exists: 'C:/Users/user/findings.txt'", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[1;31mFileExistsError\u001b[0m Traceback (most recent call last)", | |
| "Input \u001b[1;32mIn [115]\u001b[0m, in \u001b[0;36m<cell line: 2>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m my_list \u001b[38;5;241m=\u001b[39m [] \n\u001b[1;32m----> 2\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mC:/Users/user/findings.txt\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mx\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[0;32m 4\u001b[0m f\u001b[38;5;241m.\u001b[39mwrite(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mSUCH RESEARCH! WOW!!!!\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", | |
| "\u001b[1;31mFileExistsError\u001b[0m: [Errno 17] File exists: 'C:/Users/user/findings.txt'" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "my_list = [] \n", | |
| "with open('C:/Users/user/findings.txt', 'x') as f:\n", | |
| "\n", | |
| " f.write('SUCH RESEARCH! WOW!!!!')\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 118, | |
| "id": "36f6bca0", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "my_list = [] \n", | |
| "with open('C:/Users/user/findings.txt', 'w') as f:\n", | |
| "\n", | |
| " f.write('SUCH RESEARCH! WOW!!!!\\n')\n", | |
| " f.write('cannot get better than this!\\n')\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "cce3e72b", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 36, | |
| "id": "3b932420", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Progress: 0\n", | |
| "Progress: 10000\n", | |
| "Progress: 20000\n", | |
| "Progress: 30000\n", | |
| "Progress: 40000\n", | |
| "Progress: 50000\n", | |
| "Progress: 60000\n", | |
| "Progress: 70000\n", | |
| "Progress: 80000\n", | |
| "Progress: 90000\n", | |
| "Progress: 100000\n", | |
| "Progress: 110000\n", | |
| "Progress: 120000\n", | |
| "Progress: 130000\n", | |
| "Progress: 140000\n", | |
| "Progress: 150000\n", | |
| "Progress: 160000\n", | |
| "Progress: 170000\n", | |
| " read 175920 lines\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "data = []\n", | |
| "with open('C:/Users/user/Downloads/gwas_catalog_v1.0-associations_e105_r2022-03-23.tsv', \n", | |
| " encoding='iso-8859-1') as f:\n", | |
| " line = f.readline()\n", | |
| " header = line.strip().split('\\t')\n", | |
| "\n", | |
| " for line_counter, line in enumerate(f):\n", | |
| " l = f.readline()\n", | |
| " \n", | |
| " if line_counter % 10_000 == 0: \n", | |
| " print (f'Progress: {line_counter}')\n", | |
| "\n", | |
| " l_list = l.strip().split('\\t')\n", | |
| " \n", | |
| " data.append(l_list)\n", | |
| " \n", | |
| "print (f' read {line_counter} lines')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "id": "026f83b9", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "['2019-02-14',\n", | |
| " '29507422',\n", | |
| " 'Hoffmann TJ',\n", | |
| " '2018-03-05',\n", | |
| " 'Nat Genet',\n", | |
| " 'www.ncbi.nlm.nih.gov/pubmed/29507422',\n", | |
| " 'A large electronic-health-record-based genome-wide study of serum lipids.',\n", | |
| " 'High density lipoprotein cholesterol levels',\n", | |
| " '76,627 European ancestry individuals, 7,795 Hispanic individuals, 6,855 East Asian ancestry individuals, 2,958 African American individuals, 439 South Asian ancestry individuals',\n", | |
| " 'NA',\n", | |
| " '2p24.1',\n", | |
| " '2',\n", | |
| " '21041028',\n", | |
| " 'NR',\n", | |
| " 'APOB',\n", | |
| " '',\n", | |
| " '',\n", | |
| " 'ENSG00000084674',\n", | |
| " '',\n", | |
| " '',\n", | |
| " 'rs1367117-G',\n", | |
| " 'rs1367117',\n", | |
| " '0',\n", | |
| " '1367117',\n", | |
| " 'missense_variant',\n", | |
| " '0',\n", | |
| " 'NR',\n", | |
| " '3E-6',\n", | |
| " '5.522878745280337',\n", | |
| " '',\n", | |
| " '0.018',\n", | |
| " 'unit increase',\n", | |
| " 'Affymetrix [at least 7091467] (imputed)',\n", | |
| " 'N']" | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "data[100]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 134, | |
| "id": "c3813db4", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "['']" | |
| ] | |
| }, | |
| "execution_count": 134, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "''.strip().split('\\t')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "id": "73b52e01", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{'DATE ADDED TO CATALOG': '2019-02-14',\n", | |
| " 'PUBMEDID': '29507422',\n", | |
| " 'FIRST AUTHOR': 'Hoffmann TJ',\n", | |
| " 'DATE': '2018-03-05',\n", | |
| " 'JOURNAL': 'Nat Genet',\n", | |
| " 'LINK': 'www.ncbi.nlm.nih.gov/pubmed/29507422',\n", | |
| " 'STUDY': 'A large electronic-health-record-based genome-wide study of serum lipids.',\n", | |
| " 'DISEASE/TRAIT': 'High density lipoprotein cholesterol levels',\n", | |
| " 'INITIAL SAMPLE SIZE': '76,627 European ancestry individuals, 7,795 Hispanic individuals, 6,855 East Asian ancestry individuals, 2,958 African American individuals, 439 South Asian ancestry individuals',\n", | |
| " 'REPLICATION SAMPLE SIZE': 'NA',\n", | |
| " 'REGION': '2p24.1',\n", | |
| " 'CHR_ID': '2',\n", | |
| " 'CHR_POS': '21041028',\n", | |
| " 'REPORTED GENE(S)': 'NR',\n", | |
| " 'MAPPED_GENE': 'APOB',\n", | |
| " 'UPSTREAM_GENE_ID': '',\n", | |
| " 'DOWNSTREAM_GENE_ID': '',\n", | |
| " 'SNP_GENE_IDS': 'ENSG00000084674',\n", | |
| " 'UPSTREAM_GENE_DISTANCE': '',\n", | |
| " 'DOWNSTREAM_GENE_DISTANCE': '',\n", | |
| " 'STRONGEST SNP-RISK ALLELE': 'rs1367117-G',\n", | |
| " 'SNPS': 'rs1367117',\n", | |
| " 'MERGED': '0',\n", | |
| " 'SNP_ID_CURRENT': '1367117',\n", | |
| " 'CONTEXT': 'missense_variant',\n", | |
| " 'INTERGENIC': '0',\n", | |
| " 'RISK ALLELE FREQUENCY': 'NR',\n", | |
| " 'P-VALUE': '3E-6',\n", | |
| " 'PVALUE_MLOG': '5.522878745280337',\n", | |
| " 'P-VALUE (TEXT)': '',\n", | |
| " 'OR or BETA': '0.018',\n", | |
| " '95% CI (TEXT)': 'unit increase',\n", | |
| " 'PLATFORM [SNPS PASSING QC]': 'Affymetrix [at least 7091467] (imputed)',\n", | |
| " 'CNV': 'N'}" | |
| ] | |
| }, | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dict(zip(header, data[100]))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "id": "ef4efef8", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "d = {}\n", | |
| "\n", | |
| "for item in data:\n", | |
| " for h, v in zip(header, item):\n", | |
| " if not h in d:\n", | |
| " d[h] = []\n", | |
| " \n", | |
| " d[h].append(v)\n", | |
| " " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 27, | |
| "id": "e137b689", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "123" | |
| ] | |
| }, | |
| "execution_count": 27, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "len(set(d['FIRST AUTHOR']))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 28, | |
| "id": "6492bc17", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{'Ahmed S',\n", | |
| " 'Allen RJ',\n", | |
| " 'Almgren P',\n", | |
| " 'Almli LM',\n", | |
| " 'Anney RJL',\n", | |
| " 'Arpawong TE',\n", | |
| " 'Astle WJ',\n", | |
| " 'Bei JX',\n", | |
| " 'Benyamin B',\n", | |
| " 'Biernacka JM',\n", | |
| " 'Bonas-Guarch S',\n", | |
| " 'Cha S',\n", | |
| " 'Chan JP',\n", | |
| " 'Chang X',\n", | |
| " 'Chaturvedi S',\n", | |
| " 'Chen H',\n", | |
| " 'Chenoweth MJ',\n", | |
| " 'Christophersen IE',\n", | |
| " 'Clarke TK',\n", | |
| " 'Coleman JRI',\n", | |
| " 'Conti DV',\n", | |
| " 'Cooper JD',\n", | |
| " 'Corre T',\n", | |
| " 'Darlow JM',\n", | |
| " 'Day FR',\n", | |
| " 'Delgado DA',\n", | |
| " 'Dong C',\n", | |
| " 'Dudenkov TM',\n", | |
| " 'Ferreira MA',\n", | |
| " 'Gao B',\n", | |
| " 'Gorski M',\n", | |
| " 'Graff M',\n", | |
| " 'Guo Q',\n", | |
| " 'Hammerschlag AR',\n", | |
| " 'Haryono SJ',\n", | |
| " 'Hinks A',\n", | |
| " 'Hofer P',\n", | |
| " 'Hoffmann TJ',\n", | |
| " 'Hong X',\n", | |
| " 'Hu X',\n", | |
| " 'Ikram MA',\n", | |
| " 'Ilboudo Y',\n", | |
| " 'Jia P',\n", | |
| " 'Jonsson L',\n", | |
| " 'Jun GR',\n", | |
| " 'Justice AE',\n", | |
| " 'Kawaguchi T',\n", | |
| " 'Kerr KF',\n", | |
| " 'Kim KW',\n", | |
| " 'Kim M',\n", | |
| " 'Kimura M',\n", | |
| " 'Konte B',\n", | |
| " 'Kristiansen W',\n", | |
| " 'Kunz M',\n", | |
| " 'Lee MH',\n", | |
| " 'Lee MK',\n", | |
| " 'Lee TH',\n", | |
| " 'Lencer R',\n", | |
| " 'Lessard CJ',\n", | |
| " 'Li C',\n", | |
| " 'Li D',\n", | |
| " 'Li J',\n", | |
| " 'Li M',\n", | |
| " 'Litchfield K',\n", | |
| " 'Liu JZ',\n", | |
| " 'Liu Y',\n", | |
| " 'Lu AT',\n", | |
| " 'Lutz SM',\n", | |
| " 'Lv H',\n", | |
| " 'Mack S',\n", | |
| " 'Magvanjav O',\n", | |
| " 'Marenholz I',\n", | |
| " 'McKay JD',\n", | |
| " 'Michailidou K',\n", | |
| " 'Milne RL',\n", | |
| " 'Miron J',\n", | |
| " 'Moore CB',\n", | |
| " 'Moore KN',\n", | |
| " 'Morris AP',\n", | |
| " 'Morton LM',\n", | |
| " 'Munz M',\n", | |
| " 'Nakada TA',\n", | |
| " 'Ng E',\n", | |
| " 'Nolte IM',\n", | |
| " 'Persad PJ',\n", | |
| " 'Qian DC',\n", | |
| " 'Randall CL',\n", | |
| " 'Ravenhall M',\n", | |
| " 'Ren HY',\n", | |
| " 'Saccone NL',\n", | |
| " 'Sakamoto Y',\n", | |
| " 'Sanchez-Juan P',\n", | |
| " 'Sanchez-Roige S',\n", | |
| " 'Scelo G',\n", | |
| " 'Seyerle AA',\n", | |
| " 'Shah AA',\n", | |
| " 'Shen X',\n", | |
| " 'Sobota RS',\n", | |
| " 'Sud A',\n", | |
| " 'Sugier PE',\n", | |
| " 'Suh Y',\n", | |
| " 'Suhre K',\n", | |
| " 'Sun Y',\n", | |
| " 'Tachmazidou I',\n", | |
| " 'Tapper W',\n", | |
| " 'Thompson AG',\n", | |
| " 'Tian C',\n", | |
| " 'Tomer Y',\n", | |
| " 'Turley P',\n", | |
| " 'Wang Z',\n", | |
| " 'Ward-Caviness CK',\n", | |
| " 'Wattacheril J',\n", | |
| " 'Winkler TW',\n", | |
| " 'Witt SH',\n", | |
| " 'Xu W',\n", | |
| " 'Yashin AI',\n", | |
| " 'Yeo A',\n", | |
| " 'Yin X',\n", | |
| " 'Yucesoy B',\n", | |
| " 'Zai CC',\n", | |
| " 'Zhang Y',\n", | |
| " 'Zhang YB',\n", | |
| " 'Zhou H'}" | |
| ] | |
| }, | |
| "execution_count": 28, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "set(d['FIRST AUTHOR'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 30, | |
| "id": "f3190b93", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{'Michailidou K', 'Tachmazidou I'}" | |
| ] | |
| }, | |
| "execution_count": 30, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "set([x for x in d['FIRST AUTHOR'] if 'idou' in x])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 120, | |
| "id": "49ac0396", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'DATE ADDED TO CATALOG\\tPUBMEDID\\tFIRST AUTHOR\\tDATE\\tJOURNAL\\tLINK\\tSTUDY\\tDISEASE/TRAIT\\tINITIAL SAMPLE SIZE\\tREPLICATION SAMPLE SIZE\\tREGION\\tCHR_ID\\tCHR_POS\\tREPORTED GENE(S)\\tMAPPED_GENE\\tUPSTREAM_GENE_ID\\tDOWNSTREAM_GENE_ID\\tSNP_GENE_IDS\\tUPSTREAM_GENE_DISTANCE\\tDOWNSTREAM_GENE_DISTANCE\\tSTRONGEST SNP-RISK ALLELE\\tSNPS\\tMERGED\\tSNP_ID_CURRENT\\tCONTEXT\\tINTERGENIC\\tRISK ALLELE FREQUENCY\\tP-VALUE\\tPVALUE_MLOG\\tP-VALUE (TEXT)\\tOR or BETA\\t95% CI (TEXT)\\tPLATFORM [SNPS PASSING QC]\\tCNV\\n'" | |
| ] | |
| }, | |
| "execution_count": 120, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "line" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 122, | |
| "id": "908eec77", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "['DATE ADDED TO CATALOG',\n", | |
| " 'PUBMEDID',\n", | |
| " 'FIRST AUTHOR',\n", | |
| " 'DATE',\n", | |
| " 'JOURNAL',\n", | |
| " 'LINK',\n", | |
| " 'STUDY',\n", | |
| " 'DISEASE/TRAIT',\n", | |
| " 'INITIAL SAMPLE SIZE',\n", | |
| " 'REPLICATION SAMPLE SIZE',\n", | |
| " 'REGION',\n", | |
| " 'CHR_ID',\n", | |
| " 'CHR_POS',\n", | |
| " 'REPORTED GENE(S)',\n", | |
| " 'MAPPED_GENE',\n", | |
| " 'UPSTREAM_GENE_ID',\n", | |
| " 'DOWNSTREAM_GENE_ID',\n", | |
| " 'SNP_GENE_IDS',\n", | |
| " 'UPSTREAM_GENE_DISTANCE',\n", | |
| " 'DOWNSTREAM_GENE_DISTANCE',\n", | |
| " 'STRONGEST SNP-RISK ALLELE',\n", | |
| " 'SNPS',\n", | |
| " 'MERGED',\n", | |
| " 'SNP_ID_CURRENT',\n", | |
| " 'CONTEXT',\n", | |
| " 'INTERGENIC',\n", | |
| " 'RISK ALLELE FREQUENCY',\n", | |
| " 'P-VALUE',\n", | |
| " 'PVALUE_MLOG',\n", | |
| " 'P-VALUE (TEXT)',\n", | |
| " 'OR or BETA',\n", | |
| " '95% CI (TEXT)',\n", | |
| " 'PLATFORM [SNPS PASSING QC]',\n", | |
| " 'CNV']" | |
| ] | |
| }, | |
| "execution_count": 122, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "line.strip().split('\\t')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "2b60418e", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "400f9e13", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "4efb7a4f", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "5c29077f", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.9.7" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment