Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 34 additions & 33 deletions data-analysis/data_analysis_findings.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,8 @@
"import pandas as pd\n",
"\n",
"james_bond_data_html = pd.read_html(\n",
" \"https://en.wikipedia.org/wiki/List_of_James_Bond_novels_and_short_stories\"\n",
" \"https://en.wikipedia.org/wiki/List_of_James_Bond_novels_and_short_stories\",\n",
" storage_options={\"User-Agent\": \"Mozilla/5.0\"},\n",
")\n",
"james_bond_tables = james_bond_data_html[1].convert_dtypes()"
]
Expand Down Expand Up @@ -305,7 +306,7 @@
" .assign(\n",
" income_usa=lambda data: (\n",
" data[\"income_usa\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" )\n",
Expand All @@ -327,17 +328,17 @@
" .assign(\n",
" income_usa=lambda data: (\n",
" data[\"income_usa\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" income_world=lambda data: (\n",
" data[\"income_world\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" movie_budget=lambda data: (\n",
" data[\"movie_budget\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" )\n",
Expand Down Expand Up @@ -367,21 +368,21 @@
" .assign(\n",
" income_usa=lambda data: (\n",
" data[\"income_usa\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" income_world=lambda data: (\n",
" data[\"income_world\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" movie_budget=lambda data: (\n",
" data[\"movie_budget\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" film_length=lambda data: (\n",
" data[\"film_length\"].str.removesuffix(\"mins\").astype(\"Int64\")\n",
" data[\"film_length\"].str.removesuffix(\" mins\").astype(\"Int64\")\n",
" ),\n",
" )\n",
")"
Expand Down Expand Up @@ -442,21 +443,21 @@
" .assign(\n",
" income_usa=lambda data: (\n",
" data[\"income_usa\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" income_world=lambda data: (\n",
" data[\"income_world\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" movie_budget=lambda data: (\n",
" data[\"movie_budget\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" film_length=lambda data: (\n",
" data[\"film_length\"].str.removesuffix(\"mins\").astype(\"Int64\")\n",
" data[\"film_length\"].str.removesuffix(\" mins\").astype(\"Int64\")\n",
" ),\n",
" release_date=lambda data: pd.to_datetime(\n",
" data[\"release_date\"], format=\"%B, %Y\"\n",
Expand Down Expand Up @@ -529,22 +530,22 @@
" .assign(\n",
" income_usa=lambda data: (\n",
" data[\"income_usa\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" income_world=lambda data: (\n",
" data[\"income_world\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" movie_budget=lambda data: (\n",
" data[\"movie_budget\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" * 1000\n",
" ),\n",
" film_length=lambda data: (\n",
" data[\"film_length\"].str.removesuffix(\"mins\").astype(\"Int64\")\n",
" data[\"film_length\"].str.removesuffix(\" mins\").astype(\"Int64\")\n",
" ),\n",
" release_date=lambda data: pd.to_datetime(\n",
" data[\"release_date\"], format=\"%B, %Y\"\n",
Expand Down Expand Up @@ -597,22 +598,22 @@
" .assign(\n",
" income_usa=lambda data: (\n",
" data[\"income_usa\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" income_world=lambda data: (\n",
" data[\"income_world\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" movie_budget=lambda data: (\n",
" data[\"movie_budget\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" * 1000\n",
" ),\n",
" film_length=lambda data: (\n",
" data[\"film_length\"].str.removesuffix(\"mins\").astype(\"Int64\")\n",
" data[\"film_length\"].str.removesuffix(\" mins\").astype(\"Int64\")\n",
" ),\n",
" release_date=lambda data: pd.to_datetime(\n",
" data[\"release_date\"], format=\"%B, %Y\"\n",
Expand Down Expand Up @@ -662,22 +663,22 @@
" .assign(\n",
" income_usa=lambda data: (\n",
" data[\"income_usa\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" income_world=lambda data: (\n",
" data[\"income_world\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" movie_budget=lambda data: (\n",
" data[\"movie_budget\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" * 1000\n",
" ),\n",
" film_length=lambda data: (\n",
" data[\"film_length\"].str.removesuffix(\"mins\").astype(\"Int64\")\n",
" data[\"film_length\"].str.removesuffix(\" mins\").astype(\"Int64\")\n",
" ),\n",
" release_date=lambda data: pd.to_datetime(\n",
" data[\"release_date\"], format=\"%B, %Y\"\n",
Expand Down Expand Up @@ -738,23 +739,23 @@
" .assign(\n",
" income_usa=lambda data: (\n",
" data[\"income_usa\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" income_world=lambda data: (\n",
" data[\"income_world\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" movie_budget=lambda data: (\n",
" data[\"movie_budget\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" * 1000\n",
" ),\n",
" film_length=lambda data: (\n",
" data[\"film_length\"]\n",
" .str.removesuffix(\"mins\")\n",
" .str.removesuffix(\" mins\")\n",
" .astype(\"Int64\")\n",
" .replace(1200, 120)\n",
" ),\n",
Expand Down Expand Up @@ -820,23 +821,23 @@
" .assign(\n",
" income_usa=lambda data: (\n",
" data[\"income_usa\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" income_world=lambda data: (\n",
" data[\"income_world\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" movie_budget=lambda data: (\n",
" data[\"movie_budget\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" * 1000\n",
" ),\n",
" film_length=lambda data: (\n",
" data[\"film_length\"]\n",
" .str.removesuffix(\"mins\")\n",
" .str.removesuffix(\" mins\")\n",
" .astype(\"Int64\")\n",
" .replace(1200, 120)\n",
" ),\n",
Expand Down
8 changes: 4 additions & 4 deletions data-analysis/data_analysis_results.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -54,23 +54,23 @@
" .assign(\n",
" income_usa=lambda data: (\n",
" data[\"income_usa\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" income_world=lambda data: (\n",
" data[\"income_world\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" ),\n",
" movie_budget=lambda data: (\n",
" data[\"movie_budget\"]\n",
" .replace(\"[$,]\", \"\", regex=True)\n",
" .replace(r\"[$,\\s]\", \"\", regex=True)\n",
" .astype(\"Float64\")\n",
" * 1000\n",
" ),\n",
" film_length=lambda data: (\n",
" data[\"film_length\"]\n",
" .str.removesuffix(\"mins\")\n",
" .str.removesuffix(\" mins\")\n",
" .astype(\"Int64\")\n",
" .replace(1200, 120)\n",
" ),\n",
Expand Down
52 changes: 26 additions & 26 deletions data-analysis/james_bond_data_cleansed.csv
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
bond_actor,bond_kills,car_manufacturer,film_length,imdb,income_usa,income_world,martinis_consumed,movie_budget,movie_title,release_date,rotten_tomatoes,release_year
Sean Connery,4,Sunbeam,110,7.3,16067035.0,59567035.0,2,1000000.0,Dr. No,1962-06-01,7.7,1962
Sean Connery,11,Bentley,115,7.5,24800000.0,78900000.0,0,2000000.0,From Russia with Love,1963-08-01,8.0,1963
Sean Connery,9,Aston Martin,110,7.8,51100000.0,124900000.0,1,3000000.0,Goldfinger,1964-05-01,8.4,1964
Sean Connery,20,Aston Martin,130,7.0,63600000.0,141200000.0,0,9000000.0,Thunderball,1965-09-01,6.8,1965
Sean Connery,21,Toyota,117,6.9,43100000.0,111600000.0,1,9500000.0,You Only Live Twice,1967-11-01,6.3,1967
George Lazenby,5,Mercury,142,6.8,22800000.0,82000000.0,1,8000000.0,On Her Majesty's Secret Service,1969-07-01,6.7,1969
Sean Connery,7,Ford,120,6.7,43800000.0,116000000.0,0,7200000.0,Diamonds Are Forever,1971-03-01,6.3,1971
Roger Moore,8,AMC,121,6.8,35400000.0,161800000.0,0,7000000.0,Live and Let Die,1973-08-01,5.9,1973
Roger Moore,1,AMC,125,6.7,21000000.0,97600000.0,0,7000000.0,The Man with the Golden Gun,1974-07-01,5.1,1974
Roger Moore,31,Lotus,125,7.1,46800000.0,185400000.0,1,14000000.0,The Spy Who Loved Me,1977-04-01,6.8,1977
Roger Moore,12,Lotus,126,6.2,70300000.0,210300000.0,1,31000000.0,Moonraker,1979-10-01,5.7,1979
Roger Moore,18,Citroen,127,6.8,54800000.0,195300000.0,0,28000000.0,For Your Eyes Only,1981-06-01,6.3,1981
Roger Moore,15,Bajaj,131,6.5,67900000.0,187500000.0,0,27500000.0,Octopussy,1983-03-01,5.3,1983
Roger Moore,5,Rolls Royce,131,6.2,50327960.0,152627960.0,0,30000000.0,A View to a Kill,1985-10-01,4.7,1985
Timothy Dalton,13,Rolls Royce,130,6.7,51185000.0,191200000.0,2,40000000.0,The Living Daylights,1987-05-01,6.3,1987
Timothy Dalton,10,Aston Martin,133,6.5,34667015.0,156167015.0,1,42000000.0,License to Kill,1989-01-01,6.0,1989
Pierce Brosnan,47,BMW,130,7.2,106429941.0,356429941.0,1,60000000.0,GoldenEye,1995-09-01,6.9,1995
Pierce Brosnan,30,Aston Martin,119,6.4,125304276.0,339504276.0,1,110000000.0,Tomorrow Never Dies,1997-07-01,6.0,1997
Pierce Brosnan,27,BMW,128,6.3,126930660.0,361730660.0,1,135000000.0,The World Is Not Enough,1999-06-01,5.7,1999
Pierce Brosnan,31,Aston Martin,133,6.0,160942139.0,431942139.0,2,142000000.0,Die Another Day,2002-08-01,6.1,2002
Daniel Craig,11,Aston Martin,144,7.9,167365000.0,596365000.0,3,102000000.0,Casino Royale,2006-02-01,7.8,2006
Daniel Craig,16,Aston Martin,106,6.7,169368427.0,591692078.0,6,230000000.0,Quantum of Solace,2008-12-01,6.1,2008
Daniel Craig,26,Aston Martin,143,7.8,304360277.0,1108561108.0,1,200000000.0,Skyfall,2012-11-01,8.2,2012
Daniel Craig,30,Aston Martin,148,6.8,200074175.0,879620923.0,1,245000000.0,Spectre,2015-09-01,6.4,2015
Daniel Craig,14,Aston Martin,163,7.3,160891007.0,759959662.0,1,275000000.0,No Time to Die,2021-11-01,7.3,2021
release_date,movie_title,bond_actor,car_manufacturer,income_usa,income_world,movie_budget,film_length,imdb,rotten_tomatoes,martinis_consumed,bond_kills,release_year
1962-06-01,Dr. No,Sean Connery,Sunbeam,16067035.0,59567035.0,1000000.0,110,7.3,7.7,2,4,1962
1963-08-01,From Russia with Love,Sean Connery,Bentley,24800000.0,78900000.0,2000000.0,115,7.5,8.0,0,11,1963
1964-05-01,Goldfinger,Sean Connery,Aston Martin,51100000.0,124900000.0,3000000.0,110,7.8,8.4,1,9,1964
1965-09-01,Thunderball,Sean Connery,Aston Martin,63600000.0,141200000.0,9000000.0,130,7.0,6.8,0,20,1965
1967-11-01,You Only Live Twice,Sean Connery,Toyota,43100000.0,111600000.0,9500000.0,117,6.9,6.3,1,21,1967
1969-07-01,On Her Majesty's Secret Service,George Lazenby,Mercury,22800000.0,82000000.0,8000000.0,142,6.8,6.7,1,5,1969
1971-03-01,Diamonds Are Forever,Sean Connery,Ford,43800000.0,116000000.0,7200000.0,120,6.7,6.3,0,7,1971
1973-08-01,Live and Let Die,Roger Moore,AMC,35400000.0,161800000.0,7000000.0,121,6.8,5.9,0,8,1973
1974-07-01,The Man with the Golden Gun,Roger Moore,AMC,21000000.0,97600000.0,7000000.0,125,6.7,5.1,0,1,1974
1977-04-01,The Spy Who Loved Me,Roger Moore,Lotus,46800000.0,185400000.0,14000000.0,125,7.1,6.8,1,31,1977
1979-10-01,Moonraker,Roger Moore,Lotus,70300000.0,210300000.0,31000000.0,126,6.2,5.7,1,12,1979
1981-06-01,For Your Eyes Only,Roger Moore,Citroen,54800000.0,195300000.0,28000000.0,127,6.8,6.3,0,18,1981
1983-03-01,Octopussy,Roger Moore,Bajaj,67900000.0,187500000.0,27500000.0,131,6.5,5.3,0,15,1983
1985-10-01,A View to a Kill,Roger Moore,Rolls Royce,50327960.0,152627960.0,30000000.0,131,6.2,4.7,0,5,1985
1987-05-01,The Living Daylights,Timothy Dalton,Rolls Royce,51185000.0,191200000.0,40000000.0,130,6.7,6.3,2,13,1987
1989-01-01,License to Kill,Timothy Dalton,Aston Martin,34667015.0,156167015.0,42000000.0,133,6.5,6.0,1,10,1989
1995-09-01,GoldenEye,Pierce Brosnan,BMW,106429941.0,356429941.0,60000000.0,130,7.2,6.9,1,47,1995
1997-07-01,Tomorrow Never Dies,Pierce Brosnan,Aston Martin,125304276.0,339504276.0,110000000.0,119,6.4,6.0,1,30,1997
1999-06-01,The World Is Not Enough,Pierce Brosnan,BMW,126930660.0,361730660.0,135000000.0,128,6.3,5.7,1,27,1999
2002-08-01,Die Another Day,Pierce Brosnan,Aston Martin,160942139.0,431942139.0,142000000.0,133,6.0,6.1,2,31,2002
2006-02-01,Casino Royale,Daniel Craig,Aston Martin,167365000.0,596365000.0,102000000.0,144,7.9,7.8,3,11,2006
2008-12-01,Quantum of Solace,Daniel Craig,Aston Martin,169368427.0,591692078.0,230000000.0,106,6.7,6.1,6,16,2008
2012-11-01,Skyfall,Daniel Craig,Aston Martin,304360277.0,1108561108.0,200000000.0,143,7.8,8.2,1,26,2012
2015-09-01,Spectre,Daniel Craig,Aston Martin,200074175.0,879620923.0,245000000.0,148,6.8,6.4,1,30,2015
2021-11-01,No Time to Die,Daniel Craig,Aston Martin,160891007.0,759959662.0,275000000.0,163,7.3,7.3,1,14,2021
6 changes: 6 additions & 0 deletions data-analysis/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pandas==3.0.3
matplotlib==3.10.9
scikit-learn==1.9.0
openpyxl==3.1.5
pyarrow==24.0.0
lxml==6.1.1
Loading