James-Rocker/basic.py

## basic.py
# Without type hints
def add(a, b):
    return a + b


# Calling the function
result = add(2, 3)


# With type hints
def add(a: int, b: int) -> int:
    """
    Notice a cool interaction mypy has though. If you have already defined a function name, you don't have to redefine
    it. This is the same for variables too.

    Otherwise, you get, `error: Name "add" already defined on line 2`
    """
    return a + b


# Calling the function
result_hinted = add(2, 3)


# Failure example
def add(a, b):
    return a + b


result_fail = add(2, "3")

## with_classes.py
from typing import List

"""
If you are using python versions older than 3.9 and want to document the types inside your objects, you had to directly
import them. This is why you might see `from typing import List` at the top of some files.

Notice in `with_pandas_exercise.py`, we are using DataFrame as a python type hint. This is because class objects are
used as type hints.

Let's play with this concept a bit more by creating a class and using it in another class.
"""


class Person:
    def __init__(self, name: str, age: int) -> None:
        self.name: str = name
        self.age: int = age

    def have_birthday(self) -> None:
        self.age += 1

    def __str__(self) -> str:
        return f"{self.name} is {self.age} years old"


class PeopleManager:
    def __init__(self) -> None:
        self.people: List[Person] = []

    def add_person(self, person: Person) -> None:
        self.people.append(person)

    def get_person(self, name: str) -> Person:
        for person in self.people:
            if person.name == name:
                return person
        raise ValueError(f"Person with name {name} not found")


# Create instances and use the classes
person1 = Person("Alice", 30)
person2 = Person("Bob", 25)

# And a people manager class to manage them
manager = PeopleManager()
manager.add_person(person1)
manager.add_person(person2)

# These people exist and can be retrieved with no issue
print(manager.get_person("Alice"))
print(manager.get_person("Bob"))

# Let's birth a human and add them to the manager
person3 = Person("Charlie", "Thirty")
"""
Notice how, this is accepted behaviour from the class, it produces no error and is relatively innocuous. However, when
we do anything with this
"""
try:
    person3.have_birthday()
except TypeError:
    print(f"{person3.name}, is not a valid age")

manager.add_person(person3)
print(manager.get_person("Charlie"))

for human in manager.people:
    print({human.name: human.age})

"""
How about when using it within the context of pandas
"""

import pandas as pd

# Create a dataframe with the people
people_dict = {"people": manager.people, "ages": [person.age for person in manager.people]}

df = pd.DataFrame.from_dict(people_dict)

# Now we have to go back and add additional code to handle this. If we had type hints, we could have avoided this
print(df)

## with_pandas_basic.py
import pandas as pd

"""
Notice how mypy doesn't pick this error up because it has no idea on the types until run time
"""


# Look at me, I'm just a simple little function. Please don't misuse my poorly laid out type
def add_new_column(df, column_name, default_value):
    df[column_name] = default_value
    return df


data = {'A': [1, 2, 3]}
data_df = pd.DataFrame(data)
data_df_with_col = add_new_column(data_df, 'B', 0)
print(data_df_with_col)

# breaking this function by passing a dictionary as the column name because nothing is stopping us from doing that
data = {'A': [1, 2, 3]}
data_df = pd.DataFrame(data)
try:
    data_df_with_col = add_new_column(data_df, False, '0')
    print(data_df_with_col)
except TypeError:
    print("You silly goose. Error: Column name must be a string")


# Now with proper type hinting, we can spot this is misused and refactor our code before running anything
def add_new_column_with_check(df, column_name: str, default_value: int) -> pd.DataFrame:
    df[column_name] = default_value
    return df


data = {'A': [1, 2, 3]}
data_df = pd.DataFrame(data)
data_df_with_col = add_new_column(data_df, 'B', 0)
try:
    data_df_with_col = add_new_column_with_check(data_df, False, '0')
    print(data_df_with_col)
except TypeError:
    print("You silly goose. Error: Column name must be a string")

## with_pandas_exercise.py
import pandas as pd

"""
Here's an example of where you can save yourself some time by using type hints.

Lets say we have a huge dataframe and we want to apply a function to a column. We can use the apply function in pandas.
However, what if you are using the incorrect data type? This is where type hints can save you time and prevent you from
waiting for the code to run before you realise you've made a mistake.
"""


size = 100
list_size = [x for x in range(size)]


def apply_function(df_to_apply):
    df_to_apply['C'] = df_to_apply['B'].apply(lambda x: x * 2)
    return df_to_apply


# Notice this still runs, even though we've passed completely incorrect data types
data = {'A': list_size, 'B': size}
dummy_df = pd.DataFrame(data)
dummy_df = apply_function(dummy_df)
print(dummy_df)


def reapply_function(df_to_apply: pd.DataFrame) -> pd.DataFrame:
    df_to_apply['C'] = df_to_apply['B'].apply(lambda x: x * 2)
    return df_to_apply


correct_data: dict[str, list[int]] = {'A': list_size, 'B': list_size}
corrected_dummy_df = pd.DataFrame(correct_data)
corrected_dummy_df = reapply_function(corrected_dummy_df)
print(corrected_dummy_df)
	# Without type hints
	def add(a, b):
	return a + b


	# Calling the function
	result = add(2, 3)


	# With type hints
	def add(a: int, b: int) -> int:
	"""
	Notice a cool interaction mypy has though. If you have already defined a function name, you don't have to redefine
	it. This is the same for variables too.

	Otherwise, you get, `error: Name "add" already defined on line 2`
	"""
	return a + b


	# Calling the function
	result_hinted = add(2, 3)


	# Failure example
	def add(a, b):
	return a + b


	result_fail = add(2, "3")
	from typing import List

	"""
	If you are using python versions older than 3.9 and want to document the types inside your objects, you had to directly
	import them. This is why you might see `from typing import List` at the top of some files.

	Notice in `with_pandas_exercise.py`, we are using DataFrame as a python type hint. This is because class objects are
	used as type hints.

	Let's play with this concept a bit more by creating a class and using it in another class.
	"""


	class Person:
	def __init__(self, name: str, age: int) -> None:
	self.name: str = name
	self.age: int = age

	def have_birthday(self) -> None:
	self.age += 1

	def __str__(self) -> str:
	return f"{self.name} is {self.age} years old"


	class PeopleManager:
	def __init__(self) -> None:
	self.people: List[Person] = []

	def add_person(self, person: Person) -> None:
	self.people.append(person)

	def get_person(self, name: str) -> Person:
	for person in self.people:
	if person.name == name:
	return person
	raise ValueError(f"Person with name {name} not found")


	# Create instances and use the classes
	person1 = Person("Alice", 30)
	person2 = Person("Bob", 25)

	# And a people manager class to manage them
	manager = PeopleManager()
	manager.add_person(person1)
	manager.add_person(person2)

	# These people exist and can be retrieved with no issue
	print(manager.get_person("Alice"))
	print(manager.get_person("Bob"))

	# Let's birth a human and add them to the manager
	person3 = Person("Charlie", "Thirty")
	"""
	Notice how, this is accepted behaviour from the class, it produces no error and is relatively innocuous. However, when
	we do anything with this
	"""
	try:
	person3.have_birthday()
	except TypeError:
	print(f"{person3.name}, is not a valid age")

	manager.add_person(person3)
	print(manager.get_person("Charlie"))

	for human in manager.people:
	print({human.name: human.age})

	"""
	How about when using it within the context of pandas
	"""

	import pandas as pd

	# Create a dataframe with the people
	people_dict = {"people": manager.people, "ages": [person.age for person in manager.people]}

	df = pd.DataFrame.from_dict(people_dict)

	# Now we have to go back and add additional code to handle this. If we had type hints, we could have avoided this
	print(df)
	import pandas as pd

	"""
	Notice how mypy doesn't pick this error up because it has no idea on the types until run time
	"""


	# Look at me, I'm just a simple little function. Please don't misuse my poorly laid out type
	def add_new_column(df, column_name, default_value):
	df[column_name] = default_value
	return df


	data = {'A': [1, 2, 3]}
	data_df = pd.DataFrame(data)
	data_df_with_col = add_new_column(data_df, 'B', 0)
	print(data_df_with_col)

	# breaking this function by passing a dictionary as the column name because nothing is stopping us from doing that
	data = {'A': [1, 2, 3]}
	data_df = pd.DataFrame(data)
	try:
	data_df_with_col = add_new_column(data_df, False, '0')
	print(data_df_with_col)
	except TypeError:
	print("You silly goose. Error: Column name must be a string")


	# Now with proper type hinting, we can spot this is misused and refactor our code before running anything
	def add_new_column_with_check(df, column_name: str, default_value: int) -> pd.DataFrame:
	df[column_name] = default_value
	return df


	data = {'A': [1, 2, 3]}
	data_df = pd.DataFrame(data)
	data_df_with_col = add_new_column(data_df, 'B', 0)
	try:
	data_df_with_col = add_new_column_with_check(data_df, False, '0')
	print(data_df_with_col)
	except TypeError:
	print("You silly goose. Error: Column name must be a string")
	import pandas as pd

	"""
	Here's an example of where you can save yourself some time by using type hints.

	Lets say we have a huge dataframe and we want to apply a function to a column. We can use the apply function in pandas.
	However, what if you are using the incorrect data type? This is where type hints can save you time and prevent you from
	waiting for the code to run before you realise you've made a mistake.
	"""


	size = 100
	list_size = [x for x in range(size)]


	def apply_function(df_to_apply):
	df_to_apply['C'] = df_to_apply['B'].apply(lambda x: x * 2)
	return df_to_apply


	# Notice this still runs, even though we've passed completely incorrect data types
	data = {'A': list_size, 'B': size}
	dummy_df = pd.DataFrame(data)
	dummy_df = apply_function(dummy_df)
	print(dummy_df)


	def reapply_function(df_to_apply: pd.DataFrame) -> pd.DataFrame:
	df_to_apply['C'] = df_to_apply['B'].apply(lambda x: x * 2)
	return df_to_apply


	correct_data: dict[str, list[int]] = {'A': list_size, 'B': list_size}
	corrected_dummy_df = pd.DataFrame(correct_data)
	corrected_dummy_df = reapply_function(corrected_dummy_df)
	print(corrected_dummy_df)