Created
September 7, 2022 09:24
-
-
Save akash-kd/aff43f225c78aa86f57ecabcb1001fc7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import imaplib | |
| import email | |
| from email.header import decode_header #To load saved login credentials from a yaml file | |
| username ="webteameced@gmail.com" | |
| password ="cvswpicrtmdxkcit" | |
| #URL for IMAP connection | |
| imap_url = 'imap.gmail.com' | |
| # Connection with GMAIL using SSL | |
| my_mail = imaplib.IMAP4_SSL(imap_url) | |
| # Log in using your credentials | |
| my_mail.login(username, password) | |
| # Select the Inbox to fetch messages | |
| my_mail.select('"[Gmail]/Sent Mail"') | |
| #Define Key and Value for email search | |
| #For other keys (criteria): https://gist.github.com/martinrusev/6121028#file-imap-search | |
| key = 'FROM' | |
| value = 'webteameced@gmail.com' | |
| _, data = my_mail.search(None, 'ALL') #Search for emails with specific key and value | |
| mail_id_list = data[0].split() #IDs of all emails that we want to fetch | |
| msgs = [] # empty list to capture all messages | |
| #Iterate through messages and extract data into the msgs list | |
| def obtain_header(msg): | |
| # decode the email subject | |
| subject, encoding = decode_header(msg["Subject"])[0] | |
| if isinstance(subject, bytes): | |
| subject = subject.decode(encoding) | |
| # decode email sender | |
| From, encoding = decode_header(msg.get("From"))[0] | |
| if isinstance(From, bytes): | |
| From = From.decode(encoding) | |
| print("Subject:", subject) | |
| print("From:", From) | |
| return subject, From | |
| count = 0 | |
| for num in mail_id_list: | |
| typ, msg = my_mail.fetch(num, '(RFC822)') #RFC822 returns whole message (BODY fetches just body) | |
| for response in msg: | |
| if isinstance(response, tuple): | |
| msg = email.message_from_bytes(response[1]) | |
| subject, From = obtain_header(msg) | |
| if msg.is_multipart(): | |
| # iterate over email parts | |
| for part in msg.walk(): | |
| # extract content type of email | |
| content_type = part.get_content_type() | |
| content_disposition = str(part.get("Content-Disposition")) | |
| try: | |
| body = part.get_payload(decode=True).decode() | |
| except: | |
| pass | |
| if content_type == "text/plain" and "attachment" not in content_disposition: | |
| text_file = open("./data{0}.txt".format(count), "w") | |
| text_file.write(body) | |
| text_file.close() | |
| else: | |
| # extract content type of email | |
| content_type = msg.get_content_type() | |
| # get the email body | |
| body = msg.get_payload(decode=True).decode() | |
| if content_type == "text/plain": | |
| print(body) | |
| print("="*100) | |
| count = count +1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment