Home 00 Dataplay 01 Download And Load 02 Merge Data 03 Map Basics Intake... 04 Nb 2 Html 05 Map Correlation N... 06 Timelapse Data Gi...

Don't Look! I'm changing!

URL Copied

Musical loops

About this Tutorial:

BinderBinderBinderOpen Source Love svg3

NPM LicenseActivePython VersionsGitHub last commit

GitHub starsGitHub watchersGitHub forksGitHub followers

TweetTwitter Follow

Description: This notebook was made to demonstrate how to make a gif map by merging 2 datasets. The first being a dataset containing mappable coordinates onto which the second dataset may mapping its information of interest.

This lab is split into two sections.

Input(s):

Output: Files, Gif

*please note

SETUP

Import Modules

%%capture
 !pip install geopandas
 !pip install VitalSigns
import numpy as np
 import pandas as pd
from dataplay import merge
 from dataplay import intaker
 from VitalSigns import acsDownload
import ipywidgets as widgets
 !jupyter nbextension enable --py widgetsnbextension
 %matplotlib inline
#@title Run This Cell: Import Modules
 
 # Once installed we need to import and configure the Widgets
 from IPython.core.interactiveshell import InteractiveShell
 InteractiveShell.ast_node_interactivity = 'all'
 import ipywidgets as widgets
 from ipywidgets import interact, interact_manual
 
 # Used 4 Importing Data
 import urllib.request as urllib
 from urllib.parse import urlencode
 # This Prevents Timeouts when Importing
 import socket
 socket.setdefaulttimeout(10.0)
 
 # Pandas Data Manipulation Libraries
 import pandas as pd
 # Show entire column widths
 pd.set_option('display.max_colwidth', -1)
 # 4 Working with Json Data
 import json
 # 4 Data Processing
 import numpy as np
 # 4 Reading Json Data into Pandas
 from pandas.io.json import json_normalize
 
 # 4 exporting data as CSV
 import csv
 
 from VitalSigns.acsDownload import retrieve_acs_data
 
 from dataplay.merge import mergeDatasets
 
 from dataplay.geoms import readInGeometryData
 from dataplay.geoms import map_points
 from dataplay.geoms import workWithGeometryData
 
 # Geo-Formatting
 # Postgres-Conversion
 import geopandas as gpd
 import psycopg2,pandas,numpy
 from shapely import wkb
 import os
 import sys
 
 # In case file is KML
 import fiona
 fiona.drvsupport.supported_drivers['kml'] = 'rw' # enable KML support which is disabled by default
 fiona.drvsupport.supported_drivers['KML'] = 'rw' # enable KML support which is disabled by default
 
 # https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.2010.html
 # https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2010&layergroup=Census+Tracts
 
 from geopandas import GeoDataFrame
 
 from shapely.wkt import loads
 from pandas import ExcelWriter
 from pandas import ExcelFile
 
 # load libraries
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 import geopandas as gpd
 import glob
 
 # Gif
 import imageio
 
 # Pictures
 from PIL import Image
 import requests
 from io import BytesIO

Configure Enviornment

 pd.set_option('display.expand_frame_repr', False)
 pd.set_option('display.precision', 2)
 from IPython.core.interactiveshell import InteractiveShell
 InteractiveShell.ast_node_interactivity = "all"
 
 # pd.set_option('display.expand_frame_repr', False)
 # pd.set_option('display.precision', 2)
 # pd.reset_option('max_colwidth')
 pd.set_option('max_colwidth', 20)
 # pd.reset_option('max_colwidth')

Conveince Functions

#@title Run This Cell: Misc Function Declarations
 # These functions right here are used in the calculations below.
 # Finds a column matchings a substring
 def getColName  (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
 def getColByName (df, col): return df[getColName(df, col)]
 
 # Pulls a column from one dataset into a new dataset.
 # This is not a crosswalk. calls getColByName()
 def addKey(df, fi, col):
     key = getColName(df, col)
     val = getColByName(df, col)
     fi[key] = val
     return fi
 # Return 0 if two specified columns are equal.
 def nullIfEqual(df, c1, c2):
     return df.apply(lambda x:
         x[getColName(df, c1)]+x[getColName(df, c2)] if x[getColName(df, c1)]+x[getColName(df, c2)] != 0 else 0, axis=1)
 # I'm thinking this doesnt need to be a function..
 def sumInts(df): return df.sum(numeric_only=True)

This next function was created in previous colabs. We are going to recycle it for use in this lab

Retrieve GIS Data

Import Data of Interest: (HHCHPOV)

# BNIA ArcGIS Homepage: https://data-bniajfi.opendata.arcgis.com/
 final = intaker.Intake.getData("https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/"+regexMatchingColumnsToMakeTheGifWith.capitalize()+"/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson")[['CSA2010', 'hhchpov15', 'hhchpov16', 'hhchpov17', 'hhchpov18', 'hhchpov19', 'geometry']]
 final.head(1)

MAPPING

Fantastic!

Your data is all together in a single dataset.

now what?

First lets take the centerpoint of each geometry. This will be where we place text on the each geometry.

Lets make a GIF

fileNames = []
 labelBounds = True
 specialLabelCol = False # Labels on GEOM Centroids
 saveGifAs = './test.gif'
 label = 'Household Poverty'
 annotation = 'Source: Maryland Vital Statistics; Analysis by: Baltimore Neighborhood Indicators Alliance' 
 fontsize='22'

Data was successfully merged across all years and geometry.

Now we want the tractname, geometry, and the specific column we want to make a gif from.

td = final.copy()
 td = td.reindex(sorted(td.columns), axis=1)
# This will ensure numbers are rounded to whole digits when displaying the reults
 
 gifCols = td.filter(regex=regexMatchingColumnsToMakeTheGifWith).columns.values
 
 td[gifCols] = td[gifCols].fillna(-1)
 td[gifCols] = td[gifCols].astype('int32')
 td.head()

Data exploration is essential! But not covered in this lab.

Everything is almost ready to start making our gifmap!

Lets just get the minimum and maximum values so that our color ramp will have consistent values on each picture.

mins = []
 maxs = []
 for col in td.filter(regex=regexMatchingColumnsToMakeTheGifWith).columns:
   mins.append(td[col].min())
   maxs.append(td[col].max())
 print(mins, maxs)
 
 # set the min and max range for the choropleth map
 vmin, vmax = min(mins), max(maxs)
 print('Smallest Value: ', vmin, ', Max Value:', vmax)
for indx, col in enumerate(merged.filter(regex="hhchpov").columns):
     print('INDEX', indx)
     print('Col: '+str(col) )
     image_name = col+'.jpg'
     fileNames.append(image_name)
 
     # create map, UDPATE: added plt.Normalize to keep the legend range the same for all maps
     fig = merged.plot(column=col, cmap='Blues', figsize=(10,10), 
         linewidth=0.8, edgecolor='0.8', vmin=vmin, vmax=vmax,
         legend=True, norm=plt.Normalize(vmin=vmin, vmax=vmax) 
     )
     
     # https://stackoverflow.com/questions/38899190/geopandas-label-polygons
     if labelBounds:
       labelColumn = col
       if specialLabelCol: labelColumn = specialLabelCol
       merged.apply(lambda x: fig.annotate(s=x[labelColumn], xy=x.geometry.centroid.coords[0], ha='center'),axis=1);
     
     # remove axis off chart and set title
     fig.axis('off')
     fig.set_title(str(col.replace("hhchpov", "Houshold Childhood Poverty 20")), fontdict={'fontsize': fontsize, 'fontweight' : '3'})
     
     # create an annotation for the  data source
     fig.annotate(annotation,
             xy=(0.1, .08), xycoords='figure fraction',
             horizontalalignment='left', verticalalignment='top',
             fontsize=10, color='#555555')
     
     # this will save the figure as a high-res png in the output path. you can also save as svg if you prefer.
     # filepath = os.path.join(output_path, image_name)
     chart = fig.get_figure()
     # fig.savefig(“map_export.png”, dpi=300)
     chart.savefig(image_name, dpi=300)
     plt.close(chart)
         
 images = []
 for filename in fileNames:
     images.append(imageio.imread(filename))
 imageio.mimsave(saveGifAs, images, fps=.5)
 
 
 # This will print out a picture of each picture in the gifmap.
 from PIL import Image
 import requests
 from io import BytesIO
 for filename in fileNames:
     img = Image.open(filename) 
     size = 328, 328
     img.thumbnail(size, Image.ANTIALIAS)
     img

Final Result

import geopandas as gpd
 import numpy as np
 import pandas as pd
 from dataplay import geoms
 
 # Gif
 import imageio
 
 # Pictures
 from PIL import Image
 import requests
 from io import BytesIO
def getMinMax(df):
   mins = df.min().values
   maxs = df.max().values
   print("Min&Max: ", mins, maxs)
   return [mins, maxs]
def getAbsMinMax(df):
   # Get Min Max
   mins, maxs = getMinMax(df)
   return [min(mins), max(maxs)]
def createGif(fileNames, saveGifAs, images):
   print("CREATING GIF")
   # This will print out a picture of each picture in the gifmap as well.
   for filename in fileNames:
       # images.append(imageio.imread(filename))
       img = Image.open(filename)
       size = 328, 328
       img.thumbnail(size, Image.ANTIALIAS)
       print(img)
   imageio.mimsave(saveGifAs, images, fps=.5)
   print("GIF CREATED")
def createPicture(df, col, vmin, vmax, labelBounds, title, annotation, fontsize):
     print( '~~~~~~~~~~~~~~~~ \r\n createPicture for: ', 'Col: '+str(col))
 
     # create map, UDPATE: added plt.Normalize to keep the legend range the same for all maps
     fig = df.plot(column=col, cmap='Blues', figsize=(10,10),
       linewidth=0.8, edgecolor='0.8', vmin=vmin, vmax=vmax,
       legend=True, norm=plt.Normalize(vmin=vmin, vmax=vmax)
     )
 
     if labelBounds:
       if type(True) == type(labelBounds): labelBounds = col
       print('Adding Label: ', labelBounds)
       df.apply(lambda x: fig.annotate(s=x[labelBounds], xy=x['geometry'].centroid.coords[0], ha='center') if x.geometry else False ,axis=1);
     # remove axis off chart and set title
     fig.axis('off')
 
     print('Setting Title: ', title)
     fig.set_title(str(col.replace("final", title)), fontdict={'fontsize': fontsize, 'fontweight' : '3'})
 
     print('Setting Data-Source Annotation: ', annotation)
     fig.annotate(annotation, xy=(0.1, .08), xycoords='figure fraction', horizontalalignment='left', verticalalignment='top', fontsize=10, color='#555555')
 
     # this will save the figure as a high-res png in the output path. you can also save as svg if you prefer.
     # filepath = os.path.join(output_path, image_name)
     print('Get Figure: ')
     chart = fig.get_figure()
     # fig.savefig(“map_export.png”, dpi=300)
     print('Save Figure: ')
     chart.savefig( str(col)+".png" , dpi=300)
     print('Saved & Finished.')
     plt.close(chart)
 
     return ''
def createGifMap(df, saveGifAs, labelBounds, title, annotation, fontsize):
   # set the min and max range for the choropleth map
   print('createGifMap')
   vmin, vmax = getAbsMinMax( df.filter(regex="final") )
 
   print('Creating Pictures')
   fileNames = []
   images = []
   # For each column
   for indx, col in enumerate( df.filter( regex="final").columns ):
     createPicture(df, col, vmin, vmax, labelBounds, title, annotation, fontsize)
     print('Adding to images list')
     images.append(imageio.imread(str(col)+".png"))
   print( '~~~~~~~~~~~~~~~~ \r\n Saving images to !')
 
   createGif(fileNames, saveGifAs, images)
# Change these values in the cell below using different geographic reference codes will change those parameters
 
 # Group By Crosswalked column. Included automatically in final result
 # Do Not Group, Include the Crosswalked Column in the final result 
 # Create the trav45 Indicator 
 
 state = '24'
 county = '510'
 tract = '*'
 # Specify the download parameters the acs download function will receieve here
 year = '19'
 years = ['17', '16', '15']
 tableId = 'B08303' 
 saveAcs = True
 
 # Crosswalk Table 
 cwUrl = 'CSA-to-Tract-2010.csv'
 cw_left_col = 'tract'
 cw_right_col= 'TRACTCE10' 
 merge_how= 'CSA2010'
 saveCrosswalked = True
 crosswalkedFileName = False
 
 groupBy = False # 'CSA2010'
 aggMethod = 'sum'
 columnsToInclude = ['CSA2010']
 
 finalFileName = './trav45_20'+year+'_tracts_26July2019.csv' 
 # Alternatively - groupBy = False & columnsToInclude = ['CSA2010']
 
 
 # This lower half is to merge to the geom
 from dataplay import merge 
 
 # Secondary Table 
 right_ds = 'https://services1.arcgis.com/mVFRs7NF4iFitgbY/ArcGIS/rest/services/Hhchpov/FeatureServer/0/query?where=1%3D1&outFields=*&returnGeometry=true&f=pgeojson'
 right_col ='CSA2010'
 
 interactive = True
 merge_how = 'outer'
labelBounds = False # 'CSA2010'
 annotation = 'Source: Baltimore Neighborhood Indicators Alliance' 
 title = 'Indicator Name' 
 fontsize='22'
td = td.rename(columns=lambda x: re.sub('biz1_','final',x))
td = td.filter(regex="final|CSA2010|tract|geometry")
 td = td.reindex(sorted(td.columns), axis=1) 
t = """ """
 !pip install nbdev
 from google.colab import drive
 drive.mount('/content/drive')
 %cd /content/drive/My Drive/'Software Development Documents'/dataplay/
 
 # !pip install dataplay