From Single Bushes to Forests: Enhancing Actual Property Predictions with Ensembles

By thehemashow

September 23, 2024

0

37

# Import needed libraries for preprocessing

import pandas as pd

from sklearn.pipeline import Pipeline

from sklearn.impute import SimpleImputer

from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, FunctionTransformer

from sklearn.compose import ColumnTransformer

# Load the dataset

Ames = pd.read_csv(‘Ames.csv’)

# Convert the beneath numeric options to categorical options

Ames[‘MSSubClass’] = Ames[‘MSSubClass’].astype(‘object’)

Ames[‘YrSold’] = Ames[‘YrSold’].astype(‘object’)

Ames[‘MoSold’] = Ames[‘MoSold’].astype(‘object’)

# Exclude ‘PID’ and ‘SalePrice’ from options and particularly deal with the ‘Electrical’ column

numeric_features = Ames.select_dtypes(embrace=[‘int64’, ‘float64’]).drop(columns=[‘PID’, ‘SalePrice’]).columns

categorical_features = Ames.select_dtypes(embrace=[‘object’]).columns.distinction([‘Electrical’])

electrical_feature = [‘Electrical’]

# Manually specify the classes for ordinal encoding based on the info dictionary

ordinal_order = {

‘Electrical’: [‘Mix’, ‘FuseP’, ‘FuseF’, ‘FuseA’, ‘SBrkr’], # Electrical system

‘LotShape’: [‘IR3’, ‘IR2’, ‘IR1’, ‘Reg’], # Common form of property

‘Utilities’: [‘ELO’, ‘NoSeWa’, ‘NoSewr’, ‘AllPub’], # Sort of utilities accessible

‘LandSlope’: [‘Sev’, ‘Mod’, ‘Gtl’], # Slope of property

‘ExterQual’: [‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Evaluates the standard of the fabric on the outside

‘ExterCond’: [‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Evaluates the current situation of the fabric on the outside

‘BsmtQual’: [‘None’, ‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Top of the basement

‘BsmtCond’: [‘None’, ‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Common situation of the basement

‘BsmtExposure’: [‘None’, ‘No’, ‘Mn’, ‘Av’, ‘Gd’], # Walkout or backyard stage basement partitions

‘BsmtFinType1’: [‘None’, ‘Unf’, ‘LwQ’, ‘Rec’, ‘BLQ’, ‘ALQ’, ‘GLQ’], # High quality of basement completed space

‘BsmtFinType2’: [‘None’, ‘Unf’, ‘LwQ’, ‘Rec’, ‘BLQ’, ‘ALQ’, ‘GLQ’], # High quality of second basement completed space

‘HeatingQC’: [‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Heating high quality and situation

‘KitchenQual’: [‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Kitchen high quality

‘Practical’: [‘Sal’, ‘Sev’, ‘Maj2’, ‘Maj1’, ‘Mod’, ‘Min2’, ‘Min1’, ‘Typ’], # House performance

‘FireplaceQu’: [‘None’, ‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Hearth high quality

‘GarageFinish’: [‘None’, ‘Unf’, ‘RFn’, ‘Fin’], # Inside end of the storage

‘GarageQual’: [‘None’, ‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Storage high quality

‘GarageCond’: [‘None’, ‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Storage situation

‘PavedDrive’: [‘N’, ‘P’, ‘Y’], # Paved driveway

‘PoolQC’: [‘None’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Pool high quality

‘Fence’: [‘None’, ‘MnWw’, ‘GdWo’, ‘MnPrv’, ‘GdPrv’] # Fence high quality

}

# Extract record of ALL ordinal options from dictionary

ordinal_features = record(ordinal_order.keys())

# Record of ordinal options besides Electrical

ordinal_except_electrical = [feature for feature in ordinal_features if feature != ‘Electrical’]

# Helper operate to fill ‘None’ for lacking categorical knowledge

def fill_none(X):

return X.fillna(“None”)

# Pipeline for ‘Electrical’: Fill lacking worth with mode then apply ordinal encoding

electrical_transformer = Pipeline(steps=[

(‘impute_electrical’, SimpleImputer(strategy=‘most_frequent’)),

(‘ordinal_electrical’, OrdinalEncoder(categories=[ordinal_order[‘Electrical’]]))

])

# Pipeline for numeric options: Impute lacking values utilizing imply

numeric_transformer = Pipeline(steps=[

(‘impute_mean’, SimpleImputer(strategy=‘mean’))

])

# Pipeline for ordinal options: Fill lacking values with ‘None’ then apply ordinal encoding

ordinal_transformer = Pipeline(steps=[

(‘fill_none’, FunctionTransformer(fill_none, validate=False)),

(‘ordinal’, OrdinalEncoder(categories=[ordinal_order[feature] for characteristic in ordinal_features if characteristic in ordinal_except_electrical]))

])

# Pipeline for nominal categorical options: Fill lacking values with ‘None’ then apply one-hot encoding

nominal_features = [feature for feature in categorical_features if feature not in ordinal_features]

categorical_transformer = Pipeline(steps=[

(‘fill_none’, FunctionTransformer(fill_none, validate=False)),

(‘onehot’, OneHotEncoder(handle_unknown=‘ignore’))

])

# Mixed preprocessor for numeric, ordinal, nominal, and particular electrical knowledge

preprocessor = ColumnTransformer(

transformers=[

(‘electrical’, electrical_transformer, [‘Electrical’]),

(‘num’, numeric_transformer, numeric_features),

(‘ordinal’, ordinal_transformer, ordinal_except_electrical),

(‘nominal’, categorical_transformer, nominal_features)

])

# Apply the preprocessing pipeline to Ames

transformed_data = preprocessor.fit_transform(Ames).toarray()

# Generate column names for the one-hot encoded options

onehot_features = preprocessor.named_transformers_[‘nominal’].named_steps[‘onehot’].get_feature_names_out()

# Mix all characteristic names

all_feature_names = [‘Electrical’] + record(numeric_features) + record(ordinal_except_electrical) + record(onehot_features)

# Convert the remodeled array to a DataFrame

transformed_df = pd.DataFrame(transformed_data, columns=all_feature_names)

From Single Bushes to Forests: Enhancing Actual Property Predictions with Ensembles

Related Articles

How To Drive Google Procuring Development With Solely One Of Every Product

Symbiotic Safety updates its IDE extension to present builders higher insights into insecure code as it’s written

Google Faces EU Expenses Over Alleged DMA Breaches

ABOUT US