Last active
July 9, 2024 12:16
-
-
Save James-Rocker/4c4ee0764ae45efb0b14190600bb06f4 to your computer and use it in GitHub Desktop.
Why use mypy - CSC
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Without type hints | |
| def add(a, b): | |
| return a + b | |
| # Calling the function | |
| result = add(2, 3) | |
| # With type hints | |
| def add(a: int, b: int) -> int: | |
| """ | |
| Notice a cool interaction mypy has though. If you have already defined a function name, you don't have to redefine | |
| it. This is the same for variables too. | |
| Otherwise, you get, `error: Name "add" already defined on line 2` | |
| """ | |
| return a + b | |
| # Calling the function | |
| result_hinted = add(2, 3) | |
| # Failure example | |
| def add(a, b): | |
| return a + b | |
| result_fail = add(2, "3") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from typing import List | |
| """ | |
| If you are using python versions older than 3.9 and want to document the types inside your objects, you had to directly | |
| import them. This is why you might see `from typing import List` at the top of some files. | |
| Notice in `with_pandas_exercise.py`, we are using DataFrame as a python type hint. This is because class objects are | |
| used as type hints. | |
| Let's play with this concept a bit more by creating a class and using it in another class. | |
| """ | |
| class Person: | |
| def __init__(self, name: str, age: int) -> None: | |
| self.name: str = name | |
| self.age: int = age | |
| def have_birthday(self) -> None: | |
| self.age += 1 | |
| def __str__(self) -> str: | |
| return f"{self.name} is {self.age} years old" | |
| class PeopleManager: | |
| def __init__(self) -> None: | |
| self.people: List[Person] = [] | |
| def add_person(self, person: Person) -> None: | |
| self.people.append(person) | |
| def get_person(self, name: str) -> Person: | |
| for person in self.people: | |
| if person.name == name: | |
| return person | |
| raise ValueError(f"Person with name {name} not found") | |
| # Create instances and use the classes | |
| person1 = Person("Alice", 30) | |
| person2 = Person("Bob", 25) | |
| # And a people manager class to manage them | |
| manager = PeopleManager() | |
| manager.add_person(person1) | |
| manager.add_person(person2) | |
| # These people exist and can be retrieved with no issue | |
| print(manager.get_person("Alice")) | |
| print(manager.get_person("Bob")) | |
| # Let's birth a human and add them to the manager | |
| person3 = Person("Charlie", "Thirty") | |
| """ | |
| Notice how, this is accepted behaviour from the class, it produces no error and is relatively innocuous. However, when | |
| we do anything with this | |
| """ | |
| try: | |
| person3.have_birthday() | |
| except TypeError: | |
| print(f"{person3.name}, is not a valid age") | |
| manager.add_person(person3) | |
| print(manager.get_person("Charlie")) | |
| for human in manager.people: | |
| print({human.name: human.age}) | |
| """ | |
| How about when using it within the context of pandas | |
| """ | |
| import pandas as pd | |
| # Create a dataframe with the people | |
| people_dict = {"people": manager.people, "ages": [person.age for person in manager.people]} | |
| df = pd.DataFrame.from_dict(people_dict) | |
| # Now we have to go back and add additional code to handle this. If we had type hints, we could have avoided this | |
| print(df) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| """ | |
| Notice how mypy doesn't pick this error up because it has no idea on the types until run time | |
| """ | |
| # Look at me, I'm just a simple little function. Please don't misuse my poorly laid out type | |
| def add_new_column(df, column_name, default_value): | |
| df[column_name] = default_value | |
| return df | |
| data = {'A': [1, 2, 3]} | |
| data_df = pd.DataFrame(data) | |
| data_df_with_col = add_new_column(data_df, 'B', 0) | |
| print(data_df_with_col) | |
| # breaking this function by passing a dictionary as the column name because nothing is stopping us from doing that | |
| data = {'A': [1, 2, 3]} | |
| data_df = pd.DataFrame(data) | |
| try: | |
| data_df_with_col = add_new_column(data_df, False, '0') | |
| print(data_df_with_col) | |
| except TypeError: | |
| print("You silly goose. Error: Column name must be a string") | |
| # Now with proper type hinting, we can spot this is misused and refactor our code before running anything | |
| def add_new_column_with_check(df, column_name: str, default_value: int) -> pd.DataFrame: | |
| df[column_name] = default_value | |
| return df | |
| data = {'A': [1, 2, 3]} | |
| data_df = pd.DataFrame(data) | |
| data_df_with_col = add_new_column(data_df, 'B', 0) | |
| try: | |
| data_df_with_col = add_new_column_with_check(data_df, False, '0') | |
| print(data_df_with_col) | |
| except TypeError: | |
| print("You silly goose. Error: Column name must be a string") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| """ | |
| Here's an example of where you can save yourself some time by using type hints. | |
| Lets say we have a huge dataframe and we want to apply a function to a column. We can use the apply function in pandas. | |
| However, what if you are using the incorrect data type? This is where type hints can save you time and prevent you from | |
| waiting for the code to run before you realise you've made a mistake. | |
| """ | |
| size = 100 | |
| list_size = [x for x in range(size)] | |
| def apply_function(df_to_apply): | |
| df_to_apply['C'] = df_to_apply['B'].apply(lambda x: x * 2) | |
| return df_to_apply | |
| # Notice this still runs, even though we've passed completely incorrect data types | |
| data = {'A': list_size, 'B': size} | |
| dummy_df = pd.DataFrame(data) | |
| dummy_df = apply_function(dummy_df) | |
| print(dummy_df) | |
| def reapply_function(df_to_apply: pd.DataFrame) -> pd.DataFrame: | |
| df_to_apply['C'] = df_to_apply['B'].apply(lambda x: x * 2) | |
| return df_to_apply | |
| correct_data: dict[str, list[int]] = {'A': list_size, 'B': list_size} | |
| corrected_dummy_df = pd.DataFrame(correct_data) | |
| corrected_dummy_df = reapply_function(corrected_dummy_df) | |
| print(corrected_dummy_df) |
Author
Author
I'd also encourage you to read through https://github.com/typeddjango/awesome-python-typing?tab=readme-ov-file#stub-packages which has much more comprehensive approach to python type checking
Author
for the Vscode users, https://github.com/microsoft/vscode-mypy. For pycharm users it's done out of the box
This is also a good cheat sheet- https://mypy.readthedocs.io/en/stable/cheat_sheet_py3.html
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Commands used are
pip install pandas mypymypy --ignore-missing-imports --disallow-incomplete-defs