-
-
Save rkhapov/69d251250aa1bb83dae4e41d008575a8 to your computer and use it in GitHub Desktop.
2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| from enum import Enum | |
| from html.parser import HTMLParser | |
| class Sex(Enum): | |
| MALE = 1 | |
| FEMALE = 2 | |
| def __hash__(self): | |
| return self.value | |
| def __eq__(self, other): | |
| return self.value == other.value | |
| @staticmethod | |
| def get_sex_by_name(name): | |
| if Sex._is_male_name(name): | |
| return Sex.MALE | |
| return Sex.FEMALE | |
| @staticmethod | |
| def _is_male_name(name): | |
| name = str(name) | |
| return name in ['Илья', 'Лёва', 'Никита', 'Игорь'] or ( | |
| not (name.endswith('а') or name.endswith('я') or name.endswith('ь'))) | |
| class HTMLStatisticParser(HTMLParser): | |
| def __init__(self): | |
| super().__init__() | |
| self._tag_states = {} | |
| self._current_year = 0 | |
| self._years_statistic = {} | |
| def get_statistic(self): | |
| return self._years_statistic | |
| def handle_starttag(self, tag, attrs): | |
| dict_name = HTMLStatisticParser._get_dict_name_by_tag(tag, attrs) | |
| if dict_name in self._tag_states: | |
| self._tag_states[dict_name] += 1 | |
| else: | |
| self._tag_states[dict_name] = 1 | |
| def handle_endtag(self, tag): | |
| dict_name = HTMLStatisticParser._get_dict_name_by_tag(tag, None) | |
| self._tag_states[dict_name] -= 1 | |
| def handle_data(self, data): | |
| if self._is_parser_in_tag('td') and self._is_parser_in_tag('a'): | |
| self._handle_name(data) | |
| elif self._is_parser_in_tag('td') and self._is_parser_in_tag('h3'): | |
| self._handle_year(data) | |
| def _handle_name(self, data): | |
| _, name = str(data).split(' ') | |
| sex = Sex.get_sex_by_name(name) | |
| if self._current_year not in self._years_statistic: | |
| self._years_statistic[self._current_year] = {} | |
| if sex not in self._years_statistic[self._current_year]: | |
| self._years_statistic[self._current_year][sex] = {} | |
| if name not in self._years_statistic[self._current_year][sex]: | |
| self._years_statistic[self._current_year][sex][name] = 0 | |
| self._years_statistic[self._current_year][sex][name] += 1 | |
| def _handle_year(self, data): | |
| self._current_year = int(data) | |
| def _is_parser_in_tag(self, tag_name): | |
| return tag_name in self._tag_states and self._tag_states[tag_name] >= 1 | |
| def error(self, message): | |
| print(str(message)) | |
| exit(1) | |
| @staticmethod | |
| def _get_dict_name_by_tag(tag, attrs): | |
| return tag | |
| class Statistic: | |
| def __init__(self, filename, caching=True): | |
| self._stat = self._make_stat(filename) | |
| self._caching = caching | |
| self._years_list = None | |
| self._caching_dict = None | |
| def get_years(self): | |
| if self._caching: | |
| if self._years_list is None: | |
| self._years_list = self._make_years_list() | |
| return self._years_list | |
| else: | |
| return self._make_years_list() | |
| def get_name_statistic(self, year=None, sex=None): | |
| if self._caching: | |
| if self._caching_dict is None: | |
| self._caching_dict = {} | |
| if (year, sex) not in self._caching_dict: | |
| self._caching_dict[(year, sex)] = self._make_list_for_condition(year, sex) | |
| return self._caching_dict[(year, sex)] | |
| else: | |
| return self._make_list_for_condition(year, sex) | |
| def _make_years_list(self): | |
| return tuple(sorted(self._stat.keys())) | |
| def _make_list_for_condition(self, year=None, sex=None): | |
| if year is None: | |
| years_list = self.get_years() | |
| else: | |
| years_list = (int(year),) | |
| if sex is None: | |
| sex_list = (Sex.MALE, Sex.FEMALE) | |
| else: | |
| sex_list = (sex,) | |
| result_dict = {} | |
| for y in years_list: | |
| for s in sex_list: | |
| for key in self._stat[y][s]: | |
| if key in result_dict: | |
| result_dict[key] += self._stat[y][s][key] | |
| else: | |
| result_dict[key] = self._stat[y][s][key] | |
| return tuple(sorted(result_dict.items(), key=lambda node: node[1], reverse=True)) | |
| @staticmethod | |
| def _make_stat(filename): | |
| with open(filename, 'r') as file: | |
| content = file.read() | |
| parser = HTMLStatisticParser() | |
| parser.feed(content) | |
| return parser.get_statistic() | |
| def make_stat(filename): | |
| return Statistic(filename) | |
| def extract_years(stat): | |
| if isinstance(stat, Statistic): | |
| return list(map(str, stat.get_years())) | |
| raise TypeError('Expected Statistic class') | |
| def extract_general(stat): | |
| if isinstance(stat, Statistic): | |
| return stat.get_name_statistic() | |
| raise TypeError('Expected Statistic class') | |
| def extract_general_male(stat): | |
| if isinstance(stat, Statistic): | |
| return stat.get_name_statistic(sex=Sex.MALE) | |
| raise TypeError('Expected Statistic class') | |
| def extract_general_female(stat): | |
| if isinstance(stat, Statistic): | |
| return stat.get_name_statistic(sex=Sex.FEMALE) | |
| raise TypeError('Expected Statistic class') | |
| def extract_year(stat, year): | |
| if isinstance(stat, Statistic): | |
| return stat.get_name_statistic(year=year) | |
| raise TypeError('Expected Statistic class') | |
| def extract_year_male(stat, year): | |
| if isinstance(stat, Statistic): | |
| return stat.get_name_statistic(year=year, sex=Sex.MALE) | |
| raise TypeError('Expected Statistic class') | |
| def extract_year_female(stat, year): | |
| if isinstance(stat, Statistic): | |
| return stat.get_name_statistic(year=year, sex=Sex.FEMALE) | |
| raise TypeError('Expected Statistic class') | |
| if __name__ == "__main__": | |
| stat = make_stat('home.html') | |
| print('Years:', extract_years(stat)) | |
| print('General:', extract_general(stat)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment