2021-10-16 09:32:33 +08:00
|
|
|
import bs4
|
2019-10-28 04:48:38 +08:00
|
|
|
import requests
|
|
|
|
|
|
|
|
|
2021-10-16 09:32:33 +08:00
|
|
|
def get_movie_data_from_soup(soup: bs4.element.ResultSet) -> dict[str, str]:
|
|
|
|
return {
|
|
|
|
"name": soup.h3.a.text,
|
|
|
|
"genre": soup.find("span", class_="genre").text.strip(),
|
|
|
|
"rating": soup.strong.text,
|
|
|
|
"page_link": f"https://www.imdb.com{soup.a.get('href')}",
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def get_imdb_top_movies(num_movies: int = 5) -> tuple:
|
|
|
|
"""Get the top num_movies most highly rated movies from IMDB and
|
|
|
|
return a tuple of dicts describing each movie's name, genre, rating, and URL.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
num_movies: The number of movies to get. Defaults to 5.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A list of tuples containing information about the top n movies.
|
|
|
|
|
|
|
|
>>> len(get_imdb_top_movies(5))
|
|
|
|
5
|
|
|
|
>>> len(get_imdb_top_movies(-3))
|
|
|
|
0
|
|
|
|
>>> len(get_imdb_top_movies(4.99999))
|
|
|
|
4
|
|
|
|
"""
|
|
|
|
num_movies = int(float(num_movies))
|
|
|
|
if num_movies < 1:
|
|
|
|
return ()
|
2019-11-15 02:59:43 +08:00
|
|
|
base_url = (
|
2021-10-16 09:32:33 +08:00
|
|
|
"https://www.imdb.com/search/title?title_type="
|
|
|
|
f"feature&sort=num_votes,desc&count={num_movies}"
|
|
|
|
)
|
|
|
|
source = bs4.BeautifulSoup(requests.get(base_url).content, "html.parser")
|
|
|
|
return tuple(
|
|
|
|
get_movie_data_from_soup(movie)
|
|
|
|
for movie in source.find_all("div", class_="lister-item mode-advanced")
|
2019-11-15 02:59:43 +08:00
|
|
|
)
|
2019-10-28 04:48:38 +08:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2021-10-16 09:32:33 +08:00
|
|
|
import json
|
|
|
|
|
|
|
|
num_movies = int(input("How many movies would you like to see? "))
|
|
|
|
print(
|
|
|
|
", ".join(
|
|
|
|
json.dumps(movie, indent=4) for movie in get_imdb_top_movies(num_movies)
|
|
|
|
)
|
|
|
|
)
|