{ "cells": [ { "cell_type": "markdown", "id": "2713953f-cc8a-4aaa-845d-8a3231c0d04a", "metadata": { "user_expressions": [] }, "source": [ "# Data Preprocessing" ] }, { "cell_type": "code", "execution_count": 1, "id": "ca838dc6-3673-429a-8189-8414dd011b8c", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 2, "id": "63af9c7e-03ce-4b79-a85c-367288c4a50b", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('data/Aemf1.csv')" ] }, { "cell_type": "code", "execution_count": 3, "id": "b39929a7-378a-4587-a5f8-1cbe0503888a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | City | \n", "Price | \n", "Day | \n", "Room Type | \n", "Shared Room | \n", "Private Room | \n", "Person Capacity | \n", "Superhost | \n", "Multiple Rooms | \n", "Business | \n", "Cleanliness Rating | \n", "Guest Satisfaction | \n", "Bedrooms | \n", "City Center (km) | \n", "Metro Distance (km) | \n", "Attraction Index | \n", "Normalised Attraction Index | \n", "Restraunt Index | \n", "Normalised Restraunt Index | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "Amsterdam | \n", "194.033698 | \n", "Weekday | \n", "Private room | \n", "False | \n", "True | \n", "2.0 | \n", "False | \n", "1 | \n", "0 | \n", "10.0 | \n", "93.0 | \n", "1 | \n", "5.022964 | \n", "2.539380 | \n", "78.690379 | \n", "4.166708 | \n", "98.253896 | \n", "6.846473 | \n", "
1 | \n", "Amsterdam | \n", "344.245776 | \n", "Weekday | \n", "Private room | \n", "False | \n", "True | \n", "4.0 | \n", "False | \n", "0 | \n", "0 | \n", "8.0 | \n", "85.0 | \n", "1 | \n", "0.488389 | \n", "0.239404 | \n", "631.176378 | \n", "33.421209 | \n", "837.280757 | \n", "58.342928 | \n", "
2 | \n", "Amsterdam | \n", "264.101422 | \n", "Weekday | \n", "Private room | \n", "False | \n", "True | \n", "2.0 | \n", "False | \n", "0 | \n", "1 | \n", "9.0 | \n", "87.0 | \n", "1 | \n", "5.748312 | \n", "3.651621 | \n", "75.275877 | \n", "3.985908 | \n", "95.386955 | \n", "6.646700 | \n", "
3 | \n", "Amsterdam | \n", "433.529398 | \n", "Weekday | \n", "Private room | \n", "False | \n", "True | \n", "4.0 | \n", "False | \n", "0 | \n", "1 | \n", "9.0 | \n", "90.0 | \n", "2 | \n", "0.384862 | \n", "0.439876 | \n", "493.272534 | \n", "26.119108 | \n", "875.033098 | \n", "60.973565 | \n", "
4 | \n", "Amsterdam | \n", "485.552926 | \n", "Weekday | \n", "Private room | \n", "False | \n", "True | \n", "2.0 | \n", "True | \n", "0 | \n", "0 | \n", "10.0 | \n", "98.0 | \n", "1 | \n", "0.544738 | \n", "0.318693 | \n", "552.830324 | \n", "29.272733 | \n", "815.305740 | \n", "56.811677 | \n", "
5 | \n", "Amsterdam | \n", "552.808567 | \n", "Weekday | \n", "Private room | \n", "False | \n", "True | \n", "3.0 | \n", "False | \n", "0 | \n", "0 | \n", "8.0 | \n", "100.0 | \n", "2 | \n", "2.131420 | \n", "1.904668 | \n", "174.788957 | \n", "9.255191 | \n", "225.201662 | \n", "15.692376 | \n", "
6 | \n", "Amsterdam | \n", "215.124317 | \n", "Weekday | \n", "Private room | \n", "False | \n", "True | \n", "2.0 | \n", "False | \n", "0 | \n", "0 | \n", "10.0 | \n", "94.0 | \n", "1 | \n", "1.881092 | \n", "0.729747 | \n", "200.167652 | \n", "10.599010 | \n", "242.765524 | \n", "16.916251 | \n", "
7 | \n", "Amsterdam | \n", "2771.307384 | \n", "Weekday | \n", "Entire home/apt | \n", "False | \n", "False | \n", "4.0 | \n", "True | \n", "0 | \n", "0 | \n", "10.0 | \n", "100.0 | \n", "3 | \n", "1.686807 | \n", "1.458404 | \n", "208.808109 | \n", "11.056528 | \n", "272.313823 | \n", "18.975219 | \n", "
8 | \n", "Amsterdam | \n", "1001.804420 | \n", "Weekday | \n", "Entire home/apt | \n", "False | \n", "False | \n", "4.0 | \n", "False | \n", "0 | \n", "0 | \n", "9.0 | \n", "96.0 | \n", "2 | \n", "3.719141 | \n", "1.196112 | \n", "106.226456 | \n", "5.624761 | \n", "133.876202 | \n", "9.328686 | \n", "
9 | \n", "Amsterdam | \n", "276.521454 | \n", "Weekday | \n", "Private room | \n", "False | \n", "True | \n", "2.0 | \n", "False | \n", "1 | \n", "0 | \n", "10.0 | \n", "88.0 | \n", "1 | \n", "3.142361 | \n", "0.924404 | \n", "206.252862 | \n", "10.921226 | \n", "238.291258 | \n", "16.604478 | \n", "