In [3]:
from dsc106_utils import *
In [4]:
mpg = sns.load_dataset('mpg').dropna()
mpg
Out[4]:
mpg | cylinders | displacement | horsepower | ... | acceleration | model_year | origin | name | |
---|---|---|---|---|---|---|---|---|---|
0 | 18.0 | 8 | 307.0 | 130.0 | ... | 12.0 | 70 | usa | chevrolet chevelle malibu |
1 | 15.0 | 8 | 350.0 | 165.0 | ... | 11.5 | 70 | usa | buick skylark 320 |
2 | 18.0 | 8 | 318.0 | 150.0 | ... | 11.0 | 70 | usa | plymouth satellite |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
395 | 32.0 | 4 | 135.0 | 84.0 | ... | 11.6 | 82 | usa | dodge rampage |
396 | 28.0 | 4 | 120.0 | 79.0 | ... | 18.6 | 82 | usa | ford ranger |
397 | 31.0 | 4 | 119.0 | 82.0 | ... | 19.4 | 82 | usa | chevy s-10 |
392 rows × 9 columns
In [5]:
mpg['origin'].value_counts().reset_index()
Out[5]:
origin | count | |
---|---|---|
0 | usa | 245 |
1 | japan | 79 |
2 | europe | 68 |
Nimble design moves¶
In [6]:
px.bar(
mpg['origin'].value_counts().reset_index(),
x='origin',
y='count',
)
In [7]:
px.bar(
mpg['origin'].value_counts().reset_index(),
y='origin',
x='count',
)
In [8]:
px.scatter(
mpg['origin'].value_counts().reset_index(),
x='origin',
y='count',
)
In [9]:
px.scatter(
mpg['origin'].value_counts().reset_index(),
y='origin',
x='count',
)
1 Nominal, 1 Quantitative¶
In [10]:
px.bar(
mpg['origin'].value_counts().reset_index(),
x='origin',
y='count',
)
In [11]:
px.scatter(
mpg['origin'].value_counts().reset_index(),
x='origin',
y='count',
)
In [12]:
# Should we do this?
px.line(
mpg['origin'].value_counts().reset_index(),
x='origin',
y='count',
)
Encoding 3 variables¶
In [15]:
px.scatter(
mpg,
x='horsepower',
y='mpg',
)
In [18]:
px.scatter(
mpg,
x='horsepower',
y='mpg',
# Q-ratio:
color='acceleration',
)
In [19]:
px.scatter(
mpg,
x='horsepower',
y='mpg',
# Nominal
color='origin',
)
In [26]:
occ = mpg.groupby(['origin', 'cylinders']).size().rename('count').reset_index()
occ
Out[26]:
origin | cylinders | count | |
---|---|---|---|
0 | europe | 4 | 61 |
1 | europe | 5 | 3 |
2 | europe | 6 | 4 |
... | ... | ... | ... |
6 | usa | 4 | 69 |
7 | usa | 6 | 73 |
8 | usa | 8 | 103 |
9 rows × 3 columns
In [30]:
px.bar(
occ,
x='count',
y='origin',
color='cylinders',
)
In [36]:
omc = mpg.groupby(['origin', 'model_year']).size().rename('count').reset_index()
omc
Out[36]:
origin | model_year | count | |
---|---|---|---|
0 | europe | 70 | 5 |
1 | europe | 71 | 4 |
2 | europe | 72 | 5 |
... | ... | ... | ... |
36 | usa | 80 | 6 |
37 | usa | 81 | 13 |
38 | usa | 82 | 19 |
39 rows × 3 columns
In [37]:
px.line(
omc,
x='model_year',
y='count',
color='origin'
)
What about 4 channels?¶
In [43]:
mpg
Out[43]:
mpg | cylinders | displacement | horsepower | ... | acceleration | model_year | origin | name | |
---|---|---|---|---|---|---|---|---|---|
0 | 18.0 | 8 | 307.0 | 130.0 | ... | 12.0 | 70 | usa | chevrolet chevelle malibu |
1 | 15.0 | 8 | 350.0 | 165.0 | ... | 11.5 | 70 | usa | buick skylark 320 |
2 | 18.0 | 8 | 318.0 | 150.0 | ... | 11.0 | 70 | usa | plymouth satellite |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
395 | 32.0 | 4 | 135.0 | 84.0 | ... | 11.6 | 82 | usa | dodge rampage |
396 | 28.0 | 4 | 120.0 | 79.0 | ... | 18.6 | 82 | usa | ford ranger |
397 | 31.0 | 4 | 119.0 | 82.0 | ... | 19.4 | 82 | usa | chevy s-10 |
392 rows × 9 columns
In [49]:
means = (mpg
.groupby(['origin', 'model_year'])
['mpg']
.max()
.reset_index()
)
means
Out[49]:
origin | model_year | mpg | |
---|---|---|---|
0 | europe | 70 | 26.0 |
1 | europe | 71 | 30.0 |
2 | europe | 72 | 26.0 |
... | ... | ... | ... |
36 | usa | 80 | 32.1 |
37 | usa | 81 | 39.0 |
38 | usa | 82 | 38.0 |
39 rows × 3 columns
In [53]:
px.scatter(
means,
x='model_year',
y='origin',
color='origin', # redundant encoding!
size='mpg',
)