SELECT year, month, day, wday, state, is_male, gestation_weeks, mother_age, father_age, apgar_1min, apgar_5min, weight_pounds FROM [publicdata:samples.natality] where year between 1980 and 1984
%%sql
#count how many records are in my table
select count(*)
from [thematic-lore-110201:Natality_1980_1984.Births_1980_1984]
#check out the schema of the table
%bigquery schema --table thematic-lore-110201:Natality_1980_1984.Births_1980_1984
SQL to count births by year and month
%%sql --module birthdata
select year, month, concat(string(month),"/",string(year)) as yearmonth, count(*) as births
from [thematic-lore-110201:Natality_1980_1984.Births_1980_1984]
group by year, month, yearmonth
order by year, month
%chart line --data birthdata --fields yearmonth,births
The birth rates are cyclical! Which month has the most?
%%sql --module birthmonth
select month, count(*) as births
from [thematic-lore-110201:Natality_1980_1984.Births_1980_1984]
group by month
order by month
%chart columns --data birthmonth --fields month,births
width: 500
height: 500
Hmm, not sure how to adjust axis or labeling yet.
%%sql --module birth_by_day
SELECT year, month, day, count(*) AS births,
TIMESTAMP(STRING(year) + '-' + STRING(month) + '-' + STRING(day)) AS timestamp
FROM [thematic-lore-110201:Natality_1980_1984.Births_1980_1984]
where year > 1980 and day < 99 --there is a day 99 that is messing up the chart
group by year, month, day, timestamp
ORDER BY year DESC, month DESC, day DESC
%%chart annotation --fields timestamp,births --data birth_by_day
displayZoomButtons: false
%%sql
select wday, count(*) as births
from [thematic-lore-110201:Natality_1980_1984.Births_1980_1984]
group by wday
order by wday
Turns out there's no data in the wday column! Maybe for this range of years.
%%sql --module birthweekday
select dayofweek(TIMESTAMP(TIMESTAMP(STRING(year) + '-' + STRING(month) + '-' + STRING(day)))) as weekday, count(*) as births
from [thematic-lore-110201:Natality_1980_1984.Births_1980_1984]
where year is not null and day is not null and month is not null and day < 99
group by weekday
order by weekday
%chart columns --data birthweekday --fields weekday,births
width: 500
height: 500
Relatively few births on weekends!
%%sql
select min(abs(father_age - mother_age)) as minimum_difference, round(avg(abs(father_age - mother_age)),1) as mean_difference,
round(stddev(abs(father_age - mother_age)),1) as std_dev_difference, max(abs(father_age - mother_age)) as maximum_difference
from [thematic-lore-110201:Natality_1980_1984.Births_1980_1984]
where year = 1982 and month = 1 and day = 20 and father_age < 99
slightly positive trendline? still very small R^2
%%sql
select gestation_weeks
from [thematic-lore-110201:Natality_1980_1984.Births_1980_1984]
group by gestation_weeks --got error that select distinct not currently available
order by gestation_weeks
%%sql --module gestation_weight
select gestation_weeks, weight_pounds
from [thematic-lore-110201:Natality_1980_1984.Births_1980_1984]
where year = 1982 and month = 1 and day = 20
and gestation_weeks < 99 and gestation_weeks is not null
--forgot to bring in live baby variable, so using apgar as proxy
and apgar_1min is not null and apgar_1min < 99
%%chart scatter --data gestation_weight
height: 800
hAxis:
title: Gestation Weeks
vAxis:
title: Birth Weight (lbs)
trendlines:
0:
type: linear
color: green
showR2: true
visibleInLegend: true
hm, still not as high of an R^2 as I would have expected! Lots of variation in the weight even at similar weeks.