-
-
Save pecigonzalo/c147e3f174fca90bec66efbd9eb24ad3 to your computer and use it in GitHub Desktop.
| #! /usr/bin/env python | |
| # Import a mbox file to a Google Group using https://developers.google.com/admin-sdk/groups-migration/index | |
| # You'll need to install https://developers.google.com/api-client-library/python/ | |
| # and enable Groups Migration API, read prerequisits of the API SDK | |
| from __future__ import print_function | |
| import mailbox | |
| import StringIO | |
| import time | |
| import apiclient | |
| import httplib2 | |
| from apiclient import discovery | |
| from oauth2client import client, tools | |
| from oauth2client.file import Storage | |
| try: | |
| import argparse | |
| flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args() | |
| except ImportError: | |
| flags = None | |
| # The email address of the group to import to | |
| groupId = raw_input('Enter groupId: ') | |
| # https://console.developers.google.com/project/mysociety-groups-import/apiui/credential | |
| # Generate a Client ID for Native Application. | |
| # You'll be prompted to complete an auth flow on the first run. | |
| # The user will need to be an admin. | |
| scope = 'https://www.googleapis.com/auth/apps.groups.migration' | |
| storage = Storage('credentials.dat') | |
| credentials = storage.get() | |
| if not credentials or credentials.invalid: | |
| client_id = raw_input('Enter client_id: ') | |
| client_secret = raw_input('Enter client_secret: ') | |
| flow = client.OAuth2WebServerFlow(client_id, client_secret, scope) | |
| if flags: | |
| credentials = tools.run_flow(flow, storage, flags) | |
| else: | |
| # Needed only for compatibility with Python 2.6 | |
| credentials = tools.run(flow, storage) | |
| http = credentials.authorize(httplib2.Http()) | |
| service = discovery.build('groupsmigration', 'v1', http=http) | |
| mbox_path = raw_input('Enter mbox_path: ') | |
| mb = mailbox.mbox(mbox_path) # The path of the mbox file to import | |
| i = 1 | |
| total_messages = len(mb) | |
| for msg in mb: | |
| stream = StringIO.StringIO() | |
| stream.write(msg.as_string()) | |
| media = apiclient.http.MediaIoBaseUpload( | |
| stream, mimetype='message/rfc822') | |
| response = service.archive().insert( | |
| groupId=groupId, media_body=media).execute() | |
| print('Message {} of {}: {}'.format( | |
| i, | |
| total_messages, | |
| response['responseCode']) | |
| ) | |
| i = i + 1 | |
| time.sleep(1) | |
| print('Done.') |
@korchix, I dont think there is, this is a Google limitation.
@pecigonzalo is there a way to ignore mails with a size larger than this limit from the upload ? so the upload doesn't crash and continue with the next mail ?
@pecigonzalo that sound promising... can you please help with this, because i don't know python. and have no idea how to achieve this goal
I think (not tested) you can do the following:
message_size = message.as_string().__sizeof__()
if message_size >= 26214400:
continue
You have to insert that after the stream.write(msg.as_string()) and before the media = apiclient.http.MediaIoBaseUpload(
Or stream.__sizeof__() instead of message.as_string().__sizeof__()
@pecigonzalo thank you very much, much appreciated.
is it possible to log (for instance) the message subject of the message that will be ignored, so i can identify them later.
something like
if message_size >= 26214400:
print(message.subject)
continue
@pecigonzalo thank you very much for your help. it works now, like i wanted.
i made this changes:
...
stream.write(msg.as_string())
message_size = msg.as_string().__sizeof__()
if message_size >= 26214400:
print('Message {} - Size {} - subject : {}'.format(i, message_size, msg['subject']))
continue
media = apiclient.http.MediaIoBaseUpload ...
one last question, ist the sleep method a necessary ?
time.sleep(1)
or is it OK to comment it, to speed up the upload.
you're right, there is a limit: https://developers.google.com/admin-sdk/groups-migration/v1/limits
Many thanks @pecigonzalo! Inspired by this I created https://github.com/XaviTorello/mail2gg (with mbox and IMAP fetching support) :)
@Yeikop you probably do not the API installed as indicated at the start of the file.
I made some improvements to the original code. First, I converted the messages to UTF-8 and replaced characters that couldn't be converted. Some of my messages were not being accepted because they lacked a Message-ID, so I added Message-ID to them. With this version, I successfully transferred thousands of messages without any issues
for msg in mb:
try:
stream = StringIO()
msg_string_with_prefix = f"Message-Id: <backup@groupnametest.com>\n{msg.as_string()}"
msg_encoded = msg_string_with_prefix.encode('utf-8', errors='replace')
msg_string = msg_encoded.decode('utf-8') # Convert bytes back to a string
stream.write(msg_string)
media = apiclient.http.MediaIoBaseUpload(
stream, mimetype='message/rfc822')
response = service.archive().insert(
groupId=groupId, media_body=media).execute()
print('Message {} of {}: {}'.format(
i,
total_messages,
response['responseCode'])
)
except Exception as e:
print(f"Error in processing message {i}: {str(e)}")
i = i + 1
time.sleep(0.2)
Unfortunately, this gist no longer works as Google disabled out-of-band OAuth access.
Hi @pecigonzalo thank you for this gist,
while uploading some mails, i got this error:
File "/Users/xxxxx/Downloads/pythonClientLibrary/google-api-python-client-1.8.3/googleapiclient/discovery.py", line 861, in method raise MediaUploadSizeError("Media larger than: %s" % maxSize) googleapiclient.errors.MediaUploadSizeError: Media larger than: 26214400is there anyway to increase the size of
maxSize, or a way to be able to upload mails larger than 26mb ?