{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import os\n", "import matplotlib.pyplot as plt\n", "from datetime import datetime" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "60" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.options.display.max_rows" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#pd.options.display.max_rows = 120" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "pd.reset_option('display.max_rows')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "60" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.options.display.max_rows" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ddir = r'C:\\Users\\Glenn\\Documents\\Stats\\Oct 2017'\n", "ssheet1 ='W3 - Occupations of Work Apps Approved (data)_2005-10'\n", "ssheet2 ='W3 - Occupations of Work Apps Approved (data)_2010-16'" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "df1 = pd.read_csv(os.path.join(ddir,ssheet1+\".csv\"))\n", "df2 = pd.read_csv(os.path.join(ddir,ssheet2+\".csv\"))\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(Application Category 454613\n", " dtype: int64, Application Category 681831\n", " dtype: int64)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(df1[['Application Category']].count(), df2[['Application Category']].count())" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Application Category 1136444\n", "dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3 = df1.append(df2)\n", "df3[['Application Category']].count()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Number of Applications 2010445\n", "dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3[['Number of Applications']].sum()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df3 = df3[df3['Decision Type']=='Approved']" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Number of Applications 1897192\n", "dtype: int64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3[['Number of Applications']].sum()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "s1=df3.loc[:,'Month Decided'].apply(lambda x:pd.to_datetime(datetime(int(x[:4]),int(x[5:]),1 )))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df3.drop('Month Decided', axis=1, inplace=True)\n", "df3.drop('Occupation Code', axis=1, inplace=True)\n", "df3.index = s1\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Occupation Code** is not particularly useful and erroneously gets mixed up with the sum-groupby results. **Month Decided** removed because it has been moved to the index." ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df3 = df3.reindex()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index([u'Number of Applications', u'Application Category', u'Application Type',\n", " u'Application Criteria', u'Application Stream',\n", " u'Application Substream', u'Branch Location', u'Decision Type',\n", " u'Job Offer?', u'Applicant Type', u'Labour Market Check',\n", " u'Nationality', u'Gender', u'Financial Year Decided', u'Occupation',\n", " u'Occupation Standard', u'Standard Major Group',\n", " u'Standard Submajor Group', u'Standard Minor Group',\n", " u'Standard Unit Group', u'Region', u'Period'],\n", " dtype='object')" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3.columns" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Number of Applications | \n", "Application Category | \n", "Application Type | \n", "Application Criteria | \n", "Application Stream | \n", "Application Substream | \n", "Branch Location | \n", "Decision Type | \n", "Job Offer? | \n", "Applicant Type | \n", "... | \n", "Gender | \n", "Financial Year Decided | \n", "Occupation | \n", "Occupation Standard | \n", "Standard Major Group | \n", "Standard Submajor Group | \n", "Standard Minor Group | \n", "Standard Unit Group | \n", "Region | \n", "Period | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Month Decided | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
2006-11-01 | \n", "2 | \n", "Group Visa | \n", "Work | \n", "Crew of foreign fishing vessel | \n", "Work | \n", "Crew | \n", "Offshore | \n", "Approved | \n", "No Job Offer | \n", "Principal Applicant/ Application | \n", "... | \n", "Male | \n", "2006/07 | \n", "(not recorded) | \n", "NaN | \n", "(not recorded) | \n", "(not recorded) | \n", "(not recorded) | \n", "(not recorded) | \n", "(unknown) | \n", "01JUL2010 | \n", "
2007-11-01 | \n", "4 | \n", "Group Visa | \n", "Work | \n", "Crew of foreign fishing vessel | \n", "Work | \n", "Crew | \n", "Offshore | \n", "Approved | \n", "No Job Offer | \n", "Principal Applicant/ Application | \n", "... | \n", "Male | \n", "2007/08 | \n", "(not recorded) | \n", "NaN | \n", "(not recorded) | \n", "(not recorded) | \n", "(not recorded) | \n", "(not recorded) | \n", "(unknown) | \n", "01JUL2010 | \n", "
2007-12-01 | \n", "1 | \n", "Group Visa | \n", "Work | \n", "Crew of foreign fishing vessel | \n", "Work | \n", "Crew | \n", "Offshore | \n", "Approved | \n", "No Job Offer | \n", "Principal Applicant/ Application | \n", "... | \n", "Male | \n", "2007/08 | \n", "(not recorded) | \n", "NaN | \n", "(not recorded) | \n", "(not recorded) | \n", "(not recorded) | \n", "(not recorded) | \n", "(unknown) | \n", "01JUL2010 | \n", "
3 rows × 22 columns
\n", "\n", " | Number of Applications | \n", "Application Category | \n", "Application Type | \n", "Application Criteria | \n", "Application Stream | \n", "Application Substream | \n", "Branch Location | \n", "Decision Type | \n", "Job Offer? | \n", "Applicant Type | \n", "... | \n", "Gender | \n", "Financial Year Decided | \n", "Occupation | \n", "Occupation Standard | \n", "Standard Major Group | \n", "Standard Submajor Group | \n", "Standard Minor Group | \n", "Standard Unit Group | \n", "Region | \n", "Period | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Month Decided | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
2006-11-01 | \n", "2 | \n", "Group Visa | \n", "Work | \n", "Crew of foreign fishing vessel | \n", "Work | \n", "Crew | \n", "Offshore | \n", "Approved | \n", "No Job Offer | \n", "Principal Applicant/ Application | \n", "... | \n", "Male | \n", "2006/07 | \n", "(not recorded) | \n", "NaN | \n", "(not recorded) | \n", "(not recorded) | \n", "(not recorded) | \n", "(not recorded) | \n", "(unknown) | \n", "01JUL2010 | \n", "
2007-11-01 | \n", "4 | \n", "Group Visa | \n", "Work | \n", "Crew of foreign fishing vessel | \n", "Work | \n", "Crew | \n", "Offshore | \n", "Approved | \n", "No Job Offer | \n", "Principal Applicant/ Application | \n", "... | \n", "Male | \n", "2007/08 | \n", "(not recorded) | \n", "NaN | \n", "(not recorded) | \n", "(not recorded) | \n", "(not recorded) | \n", "(not recorded) | \n", "(unknown) | \n", "01JUL2010 | \n", "
2007-12-01 | \n", "1 | \n", "Group Visa | \n", "Work | \n", "Crew of foreign fishing vessel | \n", "Work | \n", "Crew | \n", "Offshore | \n", "Approved | \n", "No Job Offer | \n", "Principal Applicant/ Application | \n", "... | \n", "Male | \n", "2007/08 | \n", "(not recorded) | \n", "NaN | \n", "(not recorded) | \n", "(not recorded) | \n", "(not recorded) | \n", "(not recorded) | \n", "(unknown) | \n", "01JUL2010 | \n", "
2007-12-01 | \n", "2 | \n", "Group Visa | \n", "Work | \n", "Crew of foreign fishing vessel | \n", "Work | \n", "Crew | \n", "Offshore | \n", "Approved | \n", "No Job Offer | \n", "Principal Applicant/ Application | \n", "... | \n", "Male | \n", "2007/08 | \n", "(not recorded) | \n", "NaN | \n", "(not recorded) | \n", "(not recorded) | \n", "(not recorded) | \n", "(not recorded) | \n", "(unknown) | \n", "01JUL2010 | \n", "
2008-05-01 | \n", "2 | \n", "Group Visa | \n", "Work | \n", "Crew of foreign fishing vessel | \n", "Work | \n", "Crew | \n", "Offshore | \n", "Approved | \n", "No Job Offer | \n", "Principal Applicant/ Application | \n", "... | \n", "Male | \n", "2007/08 | \n", "(not recorded) | \n", "NaN | \n", "(not recorded) | \n", "(not recorded) | \n", "(not recorded) | \n", "(not recorded) | \n", "(unknown) | \n", "01JUL2010 | \n", "
5 rows × 22 columns
\n", "