Skip to content

Instantly share code, notes, and snippets.

@goltsevnet
Last active August 23, 2021 14:45
Show Gist options
  • Select an option

  • Save goltsevnet/c6916ebdaa5e8ecba4f0f374c202b0de to your computer and use it in GitHub Desktop.

Select an option

Save goltsevnet/c6916ebdaa5e8ecba4f0f374c202b0de to your computer and use it in GitHub Desktop.
import os
import shutil
import time
import random
from movies.models import *
from tqdm import tqdm
from users.models import User
from model_bakery import baker
genre_names = [
'Action', 'Adventure', 'Fantasy', 'Sci-Fi', 'Drama', 'Music', 'Romance', 'Thriller', 'Mystery',
'Comedy', 'Animation', 'Family', 'Biography', 'Musical', 'Crime', 'Short', 'Western', 'Documentary',
'History', 'War', 'Game-Show', 'Reality-TV', 'Horror', 'Sport', 'Talk-Show', 'News',
]
role_names = ['Actor', 'Producer', 'Screenwriter']
def clear_migrations() -> None:
"""Удаляет папки migrations и создает новые с файлом __init__.py."""
dirs_path = ['./movies/migrations', './users/migrations']
for dir_path in dirs_path:
try:
shutil.rmtree(dir_path)
except OSError as e:
print('Error: %s : %s' % (dir_path, e.strerror))
os.mkdir(dir_path)
with open(dir_path + '/__init__.py', 'w'):
pass
def create_superuser() -> None:
"""Создание супер пользователя."""
user = User.objects.create(
email='user@example.com'
)
user.set_password('Parol123')
user.is_superuser = True
user.is_staff = True
user.save()
def makemigrations_migrate() -> None:
"""Начальные миграции."""
os.system('python manage.py makemigrations users')
os.system('python manage.py migrate users')
os.system('python manage.py makemigrations')
os.system('python manage.py migrate')
def gen_users(users: int) -> None:
"""Генерация пользователей с сохранением в базу."""
pbar = tqdm(total=2, initial=1, desc='gen_users:build_batch')
baker.make('users.User', _quantity=users, _bulk_create=True)
pbar.update()
pbar.close()
def gen_genres() -> list[Genre]:
"""Генерация жанров с сохранение в базу."""
genres = list()
pbar = tqdm(genre_names, desc='gen_genres_and_save')
for name in pbar:
genres.append(Genre.objects.create(name=name))
pbar.close()
return genres
def gen_person_type() -> list[PersonType]:
"""Создание типов персон из списка genre_names с сохранением."""
person_type_instances = list()
pbar = tqdm(role_names, desc='gen_person_type')
for name in pbar:
person_type_instances.append(PersonType.objects.create(type=name))
pbar.close()
return person_type_instances
def gen_person_role(persons) -> list[PersonRole]:
"""Генерация ролей для персон с сохранением. В количестве persons * 2."""
pbar = tqdm(total=2, initial=1, desc='gen_person_role:build_batch')
roles_instance = baker.make('movies.PersonRole', _quantity=persons * 2, _bulk_create=True)
pbar.update()
pbar.set_description('gen_person_role')
pbar.close()
return roles_instance
def create_filmwork_type_obj() -> tuple[FilmworkType, FilmworkType]:
"""Создание двух типов, movie и serial."""
pbar = tqdm(total=2, desc='create_filmwork_type_obj: movie')
movie_type_instance = FilmworkType.objects.create(name='movie')
pbar.update()
pbar.set_description('create_filmwork_type_obj: serial')
serial_type_instance = FilmworkType.objects.create(name='serial')
pbar.set_description('create_filmwork_type_obj')
pbar.update()
pbar.close()
return movie_type_instance, serial_type_instance
def gen_persons(persons, person_type_instances) -> list[Person]:
"""Генерация персон с сохранением в базу."""
pbar = tqdm(total=2, initial=1, desc='gen_persons')
persons = baker.make('movies.Person', type=lambda: random.choices(person_type_instances, [95, 3, 2])[0],
_quantity=persons, _bulk_create=True)
pbar.update()
pbar.close()
return persons
def filmwork_reg(type_instance, genres, persons, q) -> None:
"""
Хотелось бы функцию улучшить, пока не знаю как... Думаю, что можно выполнить один методом make.
Генерация Filmwork объектов и вставка связей в through таблицу для объектов genres и producers.
"""
build_batch: list[Filmwork] = baker.make(
'movies.Filmwork',
type=type_instance,
_quantity=q,
_bulk_create=True,
)
through_filmwork_genres = Filmwork.genres.through
through_filmwork_producers = Filmwork.producers.through
through_f_g = list()
through_f_p = list()
for film in build_batch:
through_f_g.append(through_filmwork_genres(filmwork_id=film.id, genre_id=random.choice(genres).id))
through_f_p.append(through_filmwork_producers(filmwork_id=film.id, person_id=random.choice(persons).id))
through_filmwork_genres.objects.bulk_create(through_f_g)
through_filmwork_producers.objects.bulk_create(through_f_p)
def person_bulk_create(persons: list[Person], roles_instance: list[PersonType]) -> None:
"""
Для каждой персоны создадим связь в промежуточной модели.
:param persons:
:param roles_instance:
:return:
"""
pbar = tqdm(persons, desc='persons bulk_create')
through_person_roles = Person.roles.through
through_p_r = list()
for person in pbar:
through_p_r.append(through_person_roles(person_id=person.id, personrole_id=roles_instance.pop().id))
through_p_r.append(through_person_roles(person_id=person.id, personrole_id=roles_instance.pop().id))
def gen_movies_or_serials(count: int, f_type: FilmworkType, genres: list[Genre],
persons: list[Person], batch_size: int) -> None:
"""
Генерация кинопроизведений [FilmworkType],
:param count:
:param f_type: instance типа кинопроизведения
:param genres: список жанров [Genre]
:param persons: список персон [Person]
:param batch_size: размер пакета для bulk
:return:
"""
while_count_movies = count // batch_size + (1 if count % batch_size > 0 else 0)
with tqdm(total=count, desc=f'Генерация {f_type.name}') as pbar:
for i in range(while_count_movies):
quantity = batch_size if count > batch_size else count
filmwork_reg(f_type, genres, persons, q=quantity)
count -= batch_size
pbar.update(quantity)
def person_role_fk_filmwork(roles_instance, random_filmwork_batch, batch_size):
"""
Добавление FK filmwork в таблицу person_role.
:param roles_instance:
:param random_filmwork_batch:
:param batch_size:
:return:
"""
with tqdm(roles_instance, total=len(roles_instance),
desc='Распределение в PersonRole FK на Filmwork') as pbar:
for role in pbar:
role.movies = random.choice(random_filmwork_batch)
pbar = tqdm(total=2, initial=1, desc='PersonRole bulk_update [\'movies\']')
PersonRole.objects.bulk_update(roles_instance, ['movies'], batch_size=batch_size)
pbar.update()
pbar.close()
def start_gen(users=1000, persons=10000, movies=1000000, serials=200000, batch_size=10000, superuser=True) -> None:
"""
Подготовка и запуск генерации сущностей
Время генерации сущностей на 7700k 4800mhz ~11 минут.
:param users: model.User
:param persons: model.Person
:param movies: model.Filmwork(type='movie')
:param serials: model.Filmwork(type='serial')
:param batch_size: for bulk_create
:param superuser: создать ли суперпользователя user@example.com password Parol123
:return: None
"""
makemigrations_migrate()
if superuser:
create_superuser()
start_time = time.time()
gen_users(users)
genres = gen_genres()
person_type_instances = gen_person_type()
roles_instance = gen_person_role(persons)
roles_instance_copy = roles_instance.copy()
movie_type_instance, serial_type_instance = create_filmwork_type_obj()
persons = gen_persons(persons, person_type_instances)
person_bulk_create(persons, person_type_instances)
gen_movies_or_serials(movies, movie_type_instance, genres, persons, batch_size)
gen_movies_or_serials(serials, serial_type_instance, genres, persons, batch_size)
random_filmwork_batch = Filmwork.objects.all()[:len(roles_instance_copy)]
person_role_fk_filmwork(roles_instance_copy, random_filmwork_batch, batch_size)
print('--- %s seconds --- create objects' % (time.time() - start_time))
print('--- superuser user@example.com password Parol123 ---')
from django.core.management.base import BaseCommand
from movies.management.commands.generate.generate import start_gen
class Command(BaseCommand):
def handle(self, *args, **options):
return start()
person_count = 1000
multiplex = person_count / 10
def start():
start_gen(
users=1000,
persons=10000,
movies=1000000,
serials=200000,
batch_size=25000,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment