Skip to content

Commit 436dce9

Browse files
committed
fixed Player html parsing and scaled back methods; fixes #46
1 parent 5d5cc64 commit 436dce9

File tree

1 file changed

+13
-245
lines changed

1 file changed

+13
-245
lines changed

espncricinfo/player.py

Lines changed: 13 additions & 245 deletions
Original file line numberDiff line numberDiff line change
@@ -7,47 +7,30 @@
77
class Player(object):
88

99
def __init__(self, player_id):
10-
self.url = "https://www.espncricinfo.com/ci/content/player/{0}.html".format(str(player_id))
10+
self.url = "https://www.espncricinfo.com/player/player-name-{0}".format(str(player_id))
1111
self.json_url = "http://core.espnuk.org/v2/sports/cricket/athletes/{0}".format(str(player_id))
1212
self.parsed_html = self.get_html()
1313
self.json = self.get_json()
14-
self.player_information = self._parse_player_information()
1514
self.cricinfo_id = str(player_id)
15+
self.__unicode__ = self._full_name()
16+
self.name = self._name()
17+
self.first_name = self._first_name()
18+
self.full_name = self._full_name()
19+
self.date_of_birth = self._date_of_birth()
20+
self.current_age = self._current_age()
21+
self.playing_role = self._playing_role()
22+
self.batting_style = self._batting_style()
23+
self.bowling_style = self._bowling_style()
24+
1625
if self.parsed_html:
17-
self.__unicode__ = self._full_name()
18-
self.name = self._name()
19-
self.first_name = self._first_name()
20-
self.full_name = self._full_name()
21-
self.date_of_birth = self._date_of_birth()
22-
self.current_age = self._current_age()
2326
self.major_teams = self._major_teams()
24-
self.nickname = self._nickname()
25-
self.playing_role = self._playing_role()
26-
self.batting_style = self._batting_style()
27-
self.bowling_style = self._bowling_style()
28-
self.batting_fielding_averages = self._batting_fielding_averages()
29-
self.bowling_averages = self._bowling_averages()
30-
self.test_debut = self._test_debut()
31-
self.last_test = self._last_test()
32-
self.t20i_debut = self._t20i_debut()
33-
self.last_t20i = self._last_t20i()
34-
self.first_class_debut = self._first_class_debut()
35-
self.last_first_class = self._last_first_class()
36-
self.list_a_debut = self._list_a_debut()
37-
self.last_list_a = self._last_list_a()
38-
self.t20_debut = self._t20_debut()
39-
self.last_t20 = self._last_t20()
40-
self.odi_debut = self._odi_debut()
41-
self.last_odi = self._last_odi()
42-
self.recent_matches = self._recent_matches()
4327

4428
def get_html(self):
4529
r = requests.get(self.url)
4630
if r.status_code == 404:
4731
raise PlayerNotFoundError
4832
else:
49-
soup = BeautifulSoup(r.text, 'html.parser')
50-
return soup.find("div", class_="pnl490M")
33+
return BeautifulSoup(r.text, 'html.parser')
5134

5235
def get_json(self):
5336
r = requests.get(self.json_url)
@@ -56,9 +39,6 @@ def get_json(self):
5639
else:
5740
return r.json()
5841

59-
def _parse_player_information(self):
60-
return self.parsed_html.find_all('p', class_='ciPlayerinformationtxt')
61-
6242
def _name(self):
6343
return self.json['name']
6444

@@ -81,13 +61,7 @@ def _current_age(self):
8161
return self.json['age']
8262

8363
def _major_teams(self):
84-
return next((p.text.replace('Major teams ','').split(', ') for p in self.player_information if p.find('b').text == 'Major teams'), None)
85-
86-
def _nickname(self):
87-
return next((p.find('span').text for p in self.player_information if p.find('b').text == 'Nickname'), None)
88-
89-
def _also_known_as(self):
90-
return next((p.find('span').text for p in self.player_information if p.find('b').text == 'Also known as'), None)
64+
return [x.text for x in self.parsed_html.find('div', class_='overview-teams-grid').find_all('h5')]
9165

9266
def _playing_role(self):
9367
return self.json['position']
@@ -98,212 +72,6 @@ def _batting_style(self):
9872
def _bowling_style(self):
9973
return next((x for x in self.json['style'] if x['type'] == 'bowling'), None)
10074

101-
def _batting_fielding_averages(self):
102-
if len(self.parsed_html.findAll('table', class_='engineTable')) == 4:
103-
headers = ['matches', 'innings', 'not_out', 'runs', 'high_score', 'batting_average', 'balls_faced', 'strike_rate', 'centuries', 'fifties', 'fours', 'sixes', 'catches', 'stumpings']
104-
bat_field = [td.text.strip() for td in self.parsed_html.find('table', class_='engineTable').findAll('td')]
105-
num_formats = int(len(bat_field)/15)
106-
format_positions = [15*x for x in range(num_formats)]
107-
formats = [bat_field[x] for x in format_positions]
108-
avg_starts = [x+1 for x in format_positions[:num_formats]]
109-
avg_finish = [x+14 for x in avg_starts]
110-
format_averages = [bat_field[x:y] for x,y in zip(avg_starts, avg_finish)]
111-
combined = list(zip(formats, format_averages))
112-
l = [{x: dict(zip(headers, y))} for x,y in combined]
113-
return { k: v for d in l for k, v in d.items() }
114-
else:
115-
return None
116-
117-
def _bowling_averages(self):
118-
if len(self.parsed_html.findAll('table', class_='engineTable')) == 4:
119-
headers = ['matches', 'innings', 'balls_delivered', 'runs', 'wickets', 'best_innings', 'best_match', 'bowling_average', 'economy', 'strike_rate', 'four_wickets', 'five_wickets', 'ten_wickets']
120-
bowling = [td.text.strip() for td in self.parsed_html.findAll('table', class_='engineTable')[1].findAll('td')]
121-
num_formats = int(len(bowling)/14)
122-
format_positions = [14*x for x in range(num_formats)]
123-
formats = [bowling[x] for x in format_positions]
124-
avg_starts = [x+1 for x in format_positions[:num_formats]]
125-
avg_finish = [x+13 for x in avg_starts]
126-
format_averages = [bowling[x:y] for x,y in zip(avg_starts, avg_finish)]
127-
combined = list(zip(formats, format_averages))
128-
l = [{x: dict(zip(headers, y))} for x,y in combined]
129-
return { k: v for d in l for k, v in d.items() }
130-
else:
131-
return None
132-
133-
def _debuts_and_lasts(self):
134-
if len(self.parsed_html.findAll('table', class_='engineTable')) == 4:
135-
return self.parsed_html.findAll('table', class_='engineTable')[2]
136-
else:
137-
return None
138-
139-
def _test_debut(self):
140-
if self._debuts_and_lasts() is not None:
141-
test_debut = next((tr for tr in self._debuts_and_lasts().findAll('tr') if tr.find('b').text == 'Test debut'), None)
142-
if test_debut:
143-
url = 'http://www.espncricinfo.com'+test_debut.find('a')['href']
144-
match_id = int(test_debut.find('a')['href'].split('/', 4)[4].split('.')[0])
145-
title = test_debut.findAll('td')[1].text.replace(' scorecard','')
146-
return {'url': url, 'match_id': match_id, 'title': title}
147-
else:
148-
return None
149-
else:
150-
return None
151-
152-
def _last_test(self):
153-
if self._debuts_and_lasts() is not None:
154-
last_test = next((tr for tr in self._debuts_and_lasts().findAll('tr') if tr.find('b').text == 'Last Test'), None)
155-
if last_test:
156-
url = 'http://www.espncricinfo.com'+last_test.find('a')['href']
157-
match_id = int(last_test.find('a')['href'].split('/', 4)[4].split('.')[0])
158-
title = last_test.findAll('td')[1].text.replace(' scorecard','')
159-
return {'url': url, 'match_id': match_id, 'title': title}
160-
else:
161-
return None
162-
else:
163-
return None
164-
165-
def _t20i_debut(self):
166-
if self._debuts_and_lasts() is not None:
167-
t20i_debut = next((tr for tr in self._debuts_and_lasts().findAll('tr') if tr.find('b').text == 'T20I debut'), None)
168-
if t20i_debut:
169-
url = 'http://www.espncricinfo.com'+t20i_debut.find('a')['href']
170-
match_id = int(t20i_debut.find('a')['href'].split('/', 4)[4].split('.')[0])
171-
title = t20i_debut.findAll('td')[1].text.replace(' scorecard','')
172-
return {'url': url, 'match_id': match_id, 'title': title}
173-
else:
174-
return None
175-
else:
176-
return None
177-
178-
def _last_t20i(self):
179-
if self._debuts_and_lasts() is not None:
180-
last_t20i = next((tr for tr in self._debuts_and_lasts().findAll('tr') if tr.find('b').text == 'Last T20I'), None)
181-
if last_t20i:
182-
url = 'http://www.espncricinfo.com'+last_t20i.find('a')['href']
183-
match_id = int(last_t20i.find('a')['href'].split('/', 4)[4].split('.')[0])
184-
title = last_t20i.findAll('td')[1].text.replace(' scorecard','')
185-
return {'url': url, 'match_id': match_id, 'title': title}
186-
else:
187-
return None
188-
else:
189-
return None
190-
191-
def _first_class_debut(self):
192-
if self._debuts_and_lasts() is not None:
193-
first_class_debut = next((tr for tr in self._debuts_and_lasts().findAll('tr') if tr.find('b').text == 'First-class debut'), None)
194-
if first_class_debut:
195-
try:
196-
url = 'http://www.espncricinfo.com'+first_class_debut.find('a')['href']
197-
match_id = int(first_class_debut.find('a')['href'].split('/', 4)[4].split('.')[0])
198-
title = first_class_debut.findAll('td')[1].text.replace(' scorecard','')
199-
return {'url': url, 'match_id': match_id, 'title': title}
200-
except:
201-
return {'url': None, 'match_id': None, 'title': first_class_debut.findAll('td')[1].text}
202-
else:
203-
return None
204-
else:
205-
return None
206-
207-
def _last_first_class(self):
208-
if self._debuts_and_lasts() is not None:
209-
last_first_class = next((tr for tr in self._debuts_and_lasts().findAll('tr') if tr.find('b').text == 'Last First-class'), None)
210-
if last_first_class:
211-
url = 'http://www.espncricinfo.com'+last_first_class.find('a')['href']
212-
match_id = int(last_first_class.find('a')['href'].split('/', 4)[4].split('.')[0])
213-
title = last_first_class.findAll('td')[1].text.replace(' scorecard','')
214-
return {'url': url, 'match_id': match_id, 'title': title}
215-
else:
216-
return None
217-
return None
218-
219-
def _list_a_debut(self):
220-
if self._debuts_and_lasts() is not None:
221-
list_a_debut = next((tr for tr in self._debuts_and_lasts().findAll('tr') if tr.find('b').text == 'List A debut'), None)
222-
if list_a_debut:
223-
try:
224-
url = 'http://www.espncricinfo.com'+list_a_debut.find('a')['href']
225-
match_id = int(list_a_debut.find('a')['href'].split('/', 4)[4].split('.')[0])
226-
title = list_a_debut.findAll('td')[1].text.replace(' scorecard','')
227-
return {'url': url, 'match_id': match_id, 'title': title}
228-
except:
229-
return {'url': None, 'match_id': None, 'title': list_a_debut.findAll('td')[1].text}
230-
else:
231-
return None
232-
else:
233-
return None
234-
235-
def _last_list_a(self):
236-
if self._debuts_and_lasts() is not None:
237-
last_list_a = next((tr for tr in self._debuts_and_lasts().findAll('tr') if tr.find('b').text == 'Last List A'), None)
238-
if last_list_a:
239-
url = 'http://www.espncricinfo.com'+last_list_a.find('a')['href']
240-
match_id = int(last_list_a.find('a')['href'].split('/', 4)[4].split('.')[0])
241-
title = last_list_a.findAll('td')[1].text.replace(' scorecard','')
242-
return {'url': url, 'match_id': match_id, 'title': title}
243-
else:
244-
return None
245-
else:
246-
return None
247-
248-
def _t20_debut(self):
249-
if self._debuts_and_lasts() is not None:
250-
t20_debut = next((tr for tr in self._debuts_and_lasts().findAll('tr') if tr.find('b').text == 'Twenty20 debut'), None)
251-
if t20_debut:
252-
url = 'http://www.espncricinfo.com'+t20_debut.find('a')['href']
253-
match_id = int(t20_debut.find('a')['href'].split('/', 4)[4].split('.')[0])
254-
title = t20_debut.findAll('td')[1].text.replace(' scorecard','')
255-
return {'url': url, 'match_id': match_id, 'title': title}
256-
else:
257-
return None
258-
else:
259-
return None
260-
261-
def _last_t20(self):
262-
if self._debuts_and_lasts() is not None:
263-
last_t20 = next((tr for tr in self._debuts_and_lasts().findAll('tr') if tr.find('b').text == 'Last Twenty20'), None)
264-
if last_t20:
265-
url = 'http://www.espncricinfo.com'+last_t20.find('a')['href']
266-
match_id = int(last_t20.find('a')['href'].split('/', 4)[4].split('.')[0])
267-
title = last_t20.findAll('td')[1].text.replace(' scorecard','')
268-
return {'url': url, 'match_id': match_id, 'title': title}
269-
else:
270-
return None
271-
else:
272-
return None
273-
274-
def _odi_debut(self):
275-
if self._debuts_and_lasts() is not None:
276-
odi_debut = next((tr for tr in self._debuts_and_lasts().findAll('tr') if tr.find('b').text == 'ODI debut'), None)
277-
if odi_debut:
278-
url = 'http://www.espncricinfo.com'+odi_debut.find('a')['href']
279-
match_id = int(odi_debut.find('a')['href'].split('/', 4)[4].split('.')[0])
280-
title = odi_debut.findAll('td')[1].text.replace(' scorecard','')
281-
return {'url': url, 'match_id': match_id, 'title': title}
282-
else:
283-
return None
284-
else:
285-
return None
286-
287-
def _last_odi(self):
288-
if self._debuts_and_lasts() is not None:
289-
last_odi = next((tr for tr in self._debuts_and_lasts().findAll('tr') if tr.find('b').text == 'Last ODI'), None)
290-
if last_odi:
291-
url = 'http://www.espncricinfo.com'+last_odi.find('a')['href']
292-
match_id = int(last_odi.find('a')['href'].split('/', 4)[4].split('.')[0])
293-
title = last_odi.findAll('td')[1].text.replace(' scorecard','')
294-
return {'url': url, 'match_id': match_id, 'title': title}
295-
else:
296-
return None
297-
else:
298-
return None
299-
300-
def _recent_matches(self):
301-
try:
302-
table = self.parsed_html.findAll('table', class_='engineTable')[-1]
303-
return [x.find('a')['href'].split('/', 4)[4].split('.')[0] for x in table.findAll('tr')[1:]]
304-
except:
305-
return None
306-
30775
def in_team_for_match(self, match_id):
30876
m = Match(match_id)
30977
if next((p for p in m.team_1_players if p['object_id'] == self.cricinfo_id), None) or next((p for p in m.team_2_players if p['object_id'] == self.cricinfo_id), None):

0 commit comments

Comments
 (0)