{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import os\n", "import matplotlib.pyplot as plt\n", "from datetime import datetime" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "60" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.options.display.max_rows" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#pd.options.display.max_rows = 120" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "pd.reset_option('display.max_rows')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "60" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.options.display.max_rows" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ddir = r'C:\\Users\\Glenn\\Documents\\Stats\\Oct 2017'\n", "ssheet1 ='W3 - Occupations of Work Apps Approved (data)_2005-10'\n", "ssheet2 ='W3 - Occupations of Work Apps Approved (data)_2010-16'" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "df1 = pd.read_csv(os.path.join(ddir,ssheet1+\".csv\"))\n", "df2 = pd.read_csv(os.path.join(ddir,ssheet2+\".csv\"))\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(Application Category 454613\n", " dtype: int64, Application Category 681831\n", " dtype: int64)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(df1[['Application Category']].count(), df2[['Application Category']].count())" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Application Category 1136444\n", "dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3 = df1.append(df2)\n", "df3[['Application Category']].count()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Number of Applications 2010445\n", "dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3[['Number of Applications']].sum()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df3 = df3[df3['Decision Type']=='Approved']" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Number of Applications 1897192\n", "dtype: int64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3[['Number of Applications']].sum()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "s1=df3.loc[:,'Month Decided'].apply(lambda x:pd.to_datetime(datetime(int(x[:4]),int(x[5:]),1 )))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df3.drop('Month Decided', axis=1, inplace=True)\n", "df3.drop('Occupation Code', axis=1, inplace=True)\n", "df3.index = s1\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Occupation Code** is not particularly useful and erroneously gets mixed up with the sum-groupby results. **Month Decided** removed because it has been moved to the index." ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df3 = df3.reindex()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index([u'Number of Applications', u'Application Category', u'Application Type',\n", " u'Application Criteria', u'Application Stream',\n", " u'Application Substream', u'Branch Location', u'Decision Type',\n", " u'Job Offer?', u'Applicant Type', u'Labour Market Check',\n", " u'Nationality', u'Gender', u'Financial Year Decided', u'Occupation',\n", " u'Occupation Standard', u'Standard Major Group',\n", " u'Standard Submajor Group', u'Standard Minor Group',\n", " u'Standard Unit Group', u'Region', u'Period'],\n", " dtype='object')" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3.columns" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Number of ApplicationsApplication CategoryApplication TypeApplication CriteriaApplication StreamApplication SubstreamBranch LocationDecision TypeJob Offer?Applicant Type...GenderFinancial Year DecidedOccupationOccupation StandardStandard Major GroupStandard Submajor GroupStandard Minor GroupStandard Unit GroupRegionPeriod
Month Decided
2006-11-012Group VisaWorkCrew of foreign fishing vesselWorkCrewOffshoreApprovedNo Job OfferPrincipal Applicant/ Application...Male2006/07(not recorded)NaN(not recorded)(not recorded)(not recorded)(not recorded)(unknown)01JUL2010
2007-11-014Group VisaWorkCrew of foreign fishing vesselWorkCrewOffshoreApprovedNo Job OfferPrincipal Applicant/ Application...Male2007/08(not recorded)NaN(not recorded)(not recorded)(not recorded)(not recorded)(unknown)01JUL2010
2007-12-011Group VisaWorkCrew of foreign fishing vesselWorkCrewOffshoreApprovedNo Job OfferPrincipal Applicant/ Application...Male2007/08(not recorded)NaN(not recorded)(not recorded)(not recorded)(not recorded)(unknown)01JUL2010
\n", "

3 rows × 22 columns

\n", "
" ], "text/plain": [ " Number of Applications Application Category Application Type \\\n", "Month Decided \n", "2006-11-01 2 Group Visa Work \n", "2007-11-01 4 Group Visa Work \n", "2007-12-01 1 Group Visa Work \n", "\n", " Application Criteria Application Stream \\\n", "Month Decided \n", "2006-11-01 Crew of foreign fishing vessel Work \n", "2007-11-01 Crew of foreign fishing vessel Work \n", "2007-12-01 Crew of foreign fishing vessel Work \n", "\n", " Application Substream Branch Location Decision Type \\\n", "Month Decided \n", "2006-11-01 Crew Offshore Approved \n", "2007-11-01 Crew Offshore Approved \n", "2007-12-01 Crew Offshore Approved \n", "\n", " Job Offer? Applicant Type ... \\\n", "Month Decided ... \n", "2006-11-01 No Job Offer Principal Applicant/ Application ... \n", "2007-11-01 No Job Offer Principal Applicant/ Application ... \n", "2007-12-01 No Job Offer Principal Applicant/ Application ... \n", "\n", " Gender Financial Year Decided Occupation \\\n", "Month Decided \n", "2006-11-01 Male 2006/07 (not recorded) \n", "2007-11-01 Male 2007/08 (not recorded) \n", "2007-12-01 Male 2007/08 (not recorded) \n", "\n", " Occupation Standard Standard Major Group \\\n", "Month Decided \n", "2006-11-01 NaN (not recorded) \n", "2007-11-01 NaN (not recorded) \n", "2007-12-01 NaN (not recorded) \n", "\n", " Standard Submajor Group Standard Minor Group \\\n", "Month Decided \n", "2006-11-01 (not recorded) (not recorded) \n", "2007-11-01 (not recorded) (not recorded) \n", "2007-12-01 (not recorded) (not recorded) \n", "\n", " Standard Unit Group Region Period \n", "Month Decided \n", "2006-11-01 (not recorded) (unknown) 01JUL2010 \n", "2007-11-01 (not recorded) (unknown) 01JUL2010 \n", "2007-12-01 (not recorded) (unknown) 01JUL2010 \n", "\n", "[3 rows x 22 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3.head(3)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Glenn\\Anaconda2\\lib\\site-packages\\pandas\\core\\generic.py:1101: PerformanceWarning: \n", "your performance may suffer as PyTables will pickle object types that it cannot\n", "map directly to c-types [inferred_type->mixed,key->block1_values] [items->['Application Category', 'Application Type', 'Application Criteria', 'Application Stream', 'Application Substream', 'Branch Location', 'Decision Type', 'Job Offer?', 'Applicant Type', 'Labour Market Check', 'Nationality', 'Gender', 'Financial Year Decided', 'Occupation', 'Occupation Standard', 'Standard Major Group', 'Standard Submajor Group', 'Standard Minor Group', 'Standard Unit Group', 'Region', 'Period']]\n", "\n", " return pytables.to_hdf(path_or_buf, key, self, **kwargs)\n" ] } ], "source": [ "df3.to_hdf(\"work_visas_2005-2016.hdf\",'table',mode='w')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#df4 = pd.read_hdf('work_visas_2005-2016.hdf','table')\n", "#df4.head(3)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Number of ApplicationsApplication CategoryApplication TypeApplication CriteriaApplication StreamApplication SubstreamBranch LocationDecision TypeJob Offer?Applicant Type...GenderFinancial Year DecidedOccupationOccupation StandardStandard Major GroupStandard Submajor GroupStandard Minor GroupStandard Unit GroupRegionPeriod
Month Decided
2006-11-012Group VisaWorkCrew of foreign fishing vesselWorkCrewOffshoreApprovedNo Job OfferPrincipal Applicant/ Application...Male2006/07(not recorded)NaN(not recorded)(not recorded)(not recorded)(not recorded)(unknown)01JUL2010
2007-11-014Group VisaWorkCrew of foreign fishing vesselWorkCrewOffshoreApprovedNo Job OfferPrincipal Applicant/ Application...Male2007/08(not recorded)NaN(not recorded)(not recorded)(not recorded)(not recorded)(unknown)01JUL2010
2007-12-011Group VisaWorkCrew of foreign fishing vesselWorkCrewOffshoreApprovedNo Job OfferPrincipal Applicant/ Application...Male2007/08(not recorded)NaN(not recorded)(not recorded)(not recorded)(not recorded)(unknown)01JUL2010
2007-12-012Group VisaWorkCrew of foreign fishing vesselWorkCrewOffshoreApprovedNo Job OfferPrincipal Applicant/ Application...Male2007/08(not recorded)NaN(not recorded)(not recorded)(not recorded)(not recorded)(unknown)01JUL2010
2008-05-012Group VisaWorkCrew of foreign fishing vesselWorkCrewOffshoreApprovedNo Job OfferPrincipal Applicant/ Application...Male2007/08(not recorded)NaN(not recorded)(not recorded)(not recorded)(not recorded)(unknown)01JUL2010
\n", "

5 rows × 22 columns

\n", "
" ], "text/plain": [ " Number of Applications Application Category Application Type \\\n", "Month Decided \n", "2006-11-01 2 Group Visa Work \n", "2007-11-01 4 Group Visa Work \n", "2007-12-01 1 Group Visa Work \n", "2007-12-01 2 Group Visa Work \n", "2008-05-01 2 Group Visa Work \n", "\n", " Application Criteria Application Stream \\\n", "Month Decided \n", "2006-11-01 Crew of foreign fishing vessel Work \n", "2007-11-01 Crew of foreign fishing vessel Work \n", "2007-12-01 Crew of foreign fishing vessel Work \n", "2007-12-01 Crew of foreign fishing vessel Work \n", "2008-05-01 Crew of foreign fishing vessel Work \n", "\n", " Application Substream Branch Location Decision Type \\\n", "Month Decided \n", "2006-11-01 Crew Offshore Approved \n", "2007-11-01 Crew Offshore Approved \n", "2007-12-01 Crew Offshore Approved \n", "2007-12-01 Crew Offshore Approved \n", "2008-05-01 Crew Offshore Approved \n", "\n", " Job Offer? Applicant Type ... \\\n", "Month Decided ... \n", "2006-11-01 No Job Offer Principal Applicant/ Application ... \n", "2007-11-01 No Job Offer Principal Applicant/ Application ... \n", "2007-12-01 No Job Offer Principal Applicant/ Application ... \n", "2007-12-01 No Job Offer Principal Applicant/ Application ... \n", "2008-05-01 No Job Offer Principal Applicant/ Application ... \n", "\n", " Gender Financial Year Decided Occupation \\\n", "Month Decided \n", "2006-11-01 Male 2006/07 (not recorded) \n", "2007-11-01 Male 2007/08 (not recorded) \n", "2007-12-01 Male 2007/08 (not recorded) \n", "2007-12-01 Male 2007/08 (not recorded) \n", "2008-05-01 Male 2007/08 (not recorded) \n", "\n", " Occupation Standard Standard Major Group \\\n", "Month Decided \n", "2006-11-01 NaN (not recorded) \n", "2007-11-01 NaN (not recorded) \n", "2007-12-01 NaN (not recorded) \n", "2007-12-01 NaN (not recorded) \n", "2008-05-01 NaN (not recorded) \n", "\n", " Standard Submajor Group Standard Minor Group \\\n", "Month Decided \n", "2006-11-01 (not recorded) (not recorded) \n", "2007-11-01 (not recorded) (not recorded) \n", "2007-12-01 (not recorded) (not recorded) \n", "2007-12-01 (not recorded) (not recorded) \n", "2008-05-01 (not recorded) (not recorded) \n", "\n", " Standard Unit Group Region Period \n", "Month Decided \n", "2006-11-01 (not recorded) (unknown) 01JUL2010 \n", "2007-11-01 (not recorded) (unknown) 01JUL2010 \n", "2007-12-01 (not recorded) (unknown) 01JUL2010 \n", "2007-12-01 (not recorded) (unknown) 01JUL2010 \n", "2008-05-01 (not recorded) (unknown) 01JUL2010 \n", "\n", "[5 rows x 22 columns]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3.head(5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df3.tail(3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df3.groupby('Standard Major Group').sum().sort_values('Number of Applications')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "not_recorded = df3[df3['Standard Major Group'] == '(not recorded)']\n", "not_recorded.groupby('Application Criteria').sum().sort_values('Number of Applications', ascending=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "not_recorded.groupby([pd.Grouper(freq='A'),'Application Criteria']).sum()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 1 }