In Part 1, we covered
In this part, we'll
Reference: https://dash.plotly.com/
Screen shot from https://plotly.com/python/
In Codespaces, it seems we probably don't need to install anything, but if you're working in another environment...
python3 -m pip install pandas
python3 -m pip install plotly
import plotly.express as px
import csv
# reads data from a file into a 2d list
def data_prep(filename):
with open(filename) as movie_file:
data_reader = csv.reader(movie_file)
data = []
for row in data_reader:
data.append(row)
return data
# convert strings to ints along one column in the 2D list
def convert_column_to_int(data_2d_list,col_num):
for row in data_2d_list:
row[col_num] = int(row[col_num])
data = data_prep("HighestHolywoodGrossingMovies.csv")
header = data[0]
data = data[1:]
convert_column_to_int(data,7) #world sales is read in as a string by default
# 1 - the column index for movie titles
# 7 - the column index for world sales numbers
fig = px.bar(data,x=1,y=7,labels={"1":"Title","7":"World Sales ($)"},title="Movie Sales")
fig.show()
Again, most Plotly examples use Pandas dataframes, but you don't have to teach your students Pandas
It works even better with a list of dictionary records (e.g., common with JSON)
Here's a way you can convert CSV data to JSON for your students
import pandas as pd
df = pd.read_csv("HighestHolywoodGrossingMovies.csv")
df.to_json("HighestHolywoodGrossingMovies.json",orient="records")
Here's the file: HighestHolywoodGrossingMovies.json
import json
from pprint import pprint
with open("HighestHolywoodGrossingMovies.json") as json_file:
data = json.load(json_file)
pprint(data[:3])
[{'Distributor': 'Walt Disney Studios Motion Pictures', 'Domestic Sales (in $)': 936662225, 'Genre': "['Action', 'Adventure', 'Sci-Fi']", 'International Sales (in $)': 1132859475, 'License': 'PG-13', 'Movie Info': 'As a new threat to the galaxy rises, Rey, a desert scavenger, ' 'and Finn, an ex-stormtrooper, must join Han Solo and ' 'Chewbacca to search for the one hope of restoring peace.', 'Movie Runtime': '2 hr 18 min', 'Release Date': 'December 16, 2015', 'Title': 'Star Wars: Episode VII - The Force Awakens (2015)', 'Unnamed: 0': 0, 'World Sales (in $)': 2069521700}, {'Distributor': 'Walt Disney Studios Motion Pictures', 'Domestic Sales (in $)': 858373000, 'Genre': "['Action', 'Adventure', 'Drama', 'Sci-Fi']", 'International Sales (in $)': 1939128328, 'License': 'PG-13', 'Movie Info': 'After the devastating events of Avengers: Infinity War, the ' 'universe is in ruins. With the help of remaining allies, the ' "Avengers assemble once more in order to reverse Thanos' " 'actions and restore balance to the universe.', 'Movie Runtime': '3 hr 1 min', 'Release Date': 'April 24, 2019', 'Title': 'Avengers: Endgame (2019)', 'Unnamed: 0': 1, 'World Sales (in $)': 2797501328}, {'Distributor': 'Twentieth Century Fox', 'Domestic Sales (in $)': 760507625, 'Genre': "['Action', 'Adventure', 'Fantasy', 'Sci-Fi']", 'International Sales (in $)': 2086738578, 'License': 'PG-13', 'Movie Info': 'A paraplegic Marine dispatched to the moon Pandora on a ' 'unique mission becomes torn between following his orders and ' 'protecting the world he feels is his home.', 'Movie Runtime': '2 hr 42 min', 'Release Date': 'December 16, 2009', 'Title': 'Avatar (2009)', 'Unnamed: 0': 2, 'World Sales (in $)': 2847246203}]
import plotly.express as px
import json
data = []
with open("HighestHolywoodGrossingMovies.json") as json_file:
data = json.load(json_file)
fig = px.bar(data,x="Title",y='World Sales (in $)',title="Movie Sales")
fig.show()
This visualization has too many things on the x axis!
Can you write some code so that it only plots a subset of the data?
Make it your own
One of the really cool things about Dash is that it is designed to work well with Plotly.
There is a Dash component called Graph
which expects a parameter called figure
- you can pass that any kind of Plotly figure.
from dash import Dash, html, dcc
import plotly.express as px
import json
data = []
with open("HighestHolywoodGrossingMovies.json") as json_file:
data = json.load(json_file)
app = Dash(__name__)
app.layout = html.Div(children = [
dcc.Markdown(
id = "title",
children = "## Movie Sales Dashboard"
),
dcc.Graph(
id = "movie_sales_bar_graph",
figure = px.bar(data,x="Title",y='World Sales (in $)',title="Movie Sales")
)
])
if __name__ == '__main__':
app.run_server(debug=True)
The figure
parameter of a dcc.Graph
can be updated by a callback function
from dash import Dash, html, dcc
from dash.dependencies import Input, Output
import plotly.express as px
import json
def get_movie_titles(data_records):
titles = []
for item in data_records:
titles.append(item["Title"])
return titles
data = []
with open("HighestHolywoodGrossingMovies.json") as json_file:
data = json.load(json_file)
app = Dash(__name__)
app.layout = html.Div(children = [
dcc.Markdown(
id = "title",
children = "## Movie Sales Dashboard"
),
dcc.Dropdown(
id = "titles_dropdown",
options = get_movie_titles(data),
value = ["Star Wars: Episode VII - The Force Awakens (2015)","Avengers: Endgame (2019)"],
multi = True
),
dcc.Graph(
id = "movie_sales_bar_graph",
figure = px.bar(data,x="Title",y='World Sales (in $)',title="Movie Sales")
)
])
@app.callback(
Output("movie_sales_bar_graph","figure"),
Input("titles_dropdown","value")
)
def update_sales_graph(selected_title_list):
records_to_display = []
for movie_record in data:
if movie_record["Title"] in selected_title_list:
records_to_display.append(movie_record)
new_fig = px.bar(records_to_display,x="Title",y='World Sales (in $)',title="Movie Sales")
return new_fig
if __name__ == '__main__':
app.run_server(debug=True)
Here's some data from the WHO on life expectancy
HALElifeExpectancyAtBirth.json
source: https://www.kaggle.com/datasets/utkarshxy/who-worldhealth-statistics-2020-complete
Create a dashboad with this data.
See Plotly chart examples here: https://plotly.com/python/
See map examples at https://plotly.com/python/maps/
import plotly.express as px
import json
def filter_data(data_records,year,sex):
relevant_records = []
for item in data_records:
if item["Period"] == year and item["Dim1"] == sex:
relevant_records.append(item)
return relevant_records
data = []
with open("HALElifeExpectancyAtBirth.json") as json_file:
data = json.load(json_file)
display_data = filter_data(data,2019,"Both sexes")
fig = px.choropleth(display_data,locations="Location",locationmode="country names",color="First Tooltip",labels={'First Tooltip':'Life Expectancy (years)'},title="Healthy Life Expectancy at birth, 2019")
fig.show()
Dash provides easy-to-follow instructions for deploying on Heroku: https://dash.plotly.com/deployment
You may be able to do it for free on PythonAnywhere: https://towardsdatascience.com/the-easiest-way-to-deploy-your-dash-app-for-free-f92c575bb69e
One of my students (Jae Choi) put together a template for deploying a Dash app on AWS Lambda (some free requests, $0.20 per million requests): https://github.com/Jaethem8y/dash-serverless-template