# Import needed libraries for preprocessing
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, FunctionTransformer
from sklearn.compose import ColumnTransformer
# Load the dataset
Ames = pd.read_csv(‘Ames.csv’)
# Convert the beneath numeric options to categorical options
Ames[‘MSSubClass’] = Ames[‘MSSubClass’].astype(‘object’)
Ames[‘YrSold’] = Ames[‘YrSold’].astype(‘object’)
Ames[‘MoSold’] = Ames[‘MoSold’].astype(‘object’)
# Exclude ‘PID’ and ‘SalePrice’ from options and particularly deal with the ‘Electrical’ column
numeric_features = Ames.select_dtypes(embrace=[‘int64’, ‘float64’]).drop(columns=[‘PID’, ‘SalePrice’]).columns
categorical_features = Ames.select_dtypes(embrace=[‘object’]).columns.distinction([‘Electrical’])
electrical_feature = [‘Electrical’]
# Manually specify the classes for ordinal encoding based on the info dictionary
ordinal_order = {
‘Electrical’: [‘Mix’, ‘FuseP’, ‘FuseF’, ‘FuseA’, ‘SBrkr’], # Electrical system
‘LotShape’: [‘IR3’, ‘IR2’, ‘IR1’, ‘Reg’], # Common form of property
‘Utilities’: [‘ELO’, ‘NoSeWa’, ‘NoSewr’, ‘AllPub’], # Sort of utilities accessible
‘LandSlope’: [‘Sev’, ‘Mod’, ‘Gtl’], # Slope of property
‘ExterQual’: [‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Evaluates the standard of the fabric on the outside
‘ExterCond’: [‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Evaluates the current situation of the fabric on the outside
‘BsmtQual’: [‘None’, ‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Top of the basement
‘BsmtCond’: [‘None’, ‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Common situation of the basement
‘BsmtExposure’: [‘None’, ‘No’, ‘Mn’, ‘Av’, ‘Gd’], # Walkout or backyard stage basement partitions
‘BsmtFinType1’: [‘None’, ‘Unf’, ‘LwQ’, ‘Rec’, ‘BLQ’, ‘ALQ’, ‘GLQ’], # High quality of basement completed space
‘BsmtFinType2’: [‘None’, ‘Unf’, ‘LwQ’, ‘Rec’, ‘BLQ’, ‘ALQ’, ‘GLQ’], # High quality of second basement completed space
‘HeatingQC’: [‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Heating high quality and situation
‘KitchenQual’: [‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Kitchen high quality
‘Practical’: [‘Sal’, ‘Sev’, ‘Maj2’, ‘Maj1’, ‘Mod’, ‘Min2’, ‘Min1’, ‘Typ’], # House performance
‘FireplaceQu’: [‘None’, ‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Hearth high quality
‘GarageFinish’: [‘None’, ‘Unf’, ‘RFn’, ‘Fin’], # Inside end of the storage
‘GarageQual’: [‘None’, ‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Storage high quality
‘GarageCond’: [‘None’, ‘Po’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Storage situation
‘PavedDrive’: [‘N’, ‘P’, ‘Y’], # Paved driveway
‘PoolQC’: [‘None’, ‘Fa’, ‘TA’, ‘Gd’, ‘Ex’], # Pool high quality
‘Fence’: [‘None’, ‘MnWw’, ‘GdWo’, ‘MnPrv’, ‘GdPrv’] # Fence high quality
}
# Extract record of ALL ordinal options from dictionary
ordinal_features = record(ordinal_order.keys())
# Record of ordinal options besides Electrical
ordinal_except_electrical = [feature for feature in ordinal_features if feature != ‘Electrical’]
# Helper operate to fill ‘None’ for lacking categorical knowledge
def fill_none(X):
return X.fillna(“None”)
# Pipeline for ‘Electrical’: Fill lacking worth with mode then apply ordinal encoding
electrical_transformer = Pipeline(steps=[
(‘impute_electrical’, SimpleImputer(strategy=‘most_frequent’)),
(‘ordinal_electrical’, OrdinalEncoder(categories=[ordinal_order[‘Electrical’]]))
])
# Pipeline for numeric options: Impute lacking values utilizing imply
numeric_transformer = Pipeline(steps=[
(‘impute_mean’, SimpleImputer(strategy=‘mean’))
])
# Pipeline for ordinal options: Fill lacking values with ‘None’ then apply ordinal encoding
ordinal_transformer = Pipeline(steps=[
(‘fill_none’, FunctionTransformer(fill_none, validate=False)),
(‘ordinal’, OrdinalEncoder(categories=[ordinal_order[feature] for characteristic in ordinal_features if characteristic in ordinal_except_electrical]))
])
# Pipeline for nominal categorical options: Fill lacking values with ‘None’ then apply one-hot encoding
nominal_features = [feature for feature in categorical_features if feature not in ordinal_features]
categorical_transformer = Pipeline(steps=[
(‘fill_none’, FunctionTransformer(fill_none, validate=False)),
(‘onehot’, OneHotEncoder(handle_unknown=‘ignore’))
])
# Mixed preprocessor for numeric, ordinal, nominal, and particular electrical knowledge
preprocessor = ColumnTransformer(
transformers=[
(‘electrical’, electrical_transformer, [‘Electrical’]),
(‘num’, numeric_transformer, numeric_features),
(‘ordinal’, ordinal_transformer, ordinal_except_electrical),
(‘nominal’, categorical_transformer, nominal_features)
])
# Apply the preprocessing pipeline to Ames
transformed_data = preprocessor.fit_transform(Ames).toarray()
# Generate column names for the one-hot encoded options
onehot_features = preprocessor.named_transformers_[‘nominal’].named_steps[‘onehot’].get_feature_names_out()
# Mix all characteristic names
all_feature_names = [‘Electrical’] + record(numeric_features) + record(ordinal_except_electrical) + record(onehot_features)
# Convert the remodeled array to a DataFrame
transformed_df = pd.DataFrame(transformed_data, columns=all_feature_names)