{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "9hcdRjNhDlfa" }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.impute import SimpleImputer\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score\n", "from sklearn.neighbors import KNeighborsRegressor\n", "from sklearn.metrics import mean_squared_error, r2_score\n", "from sklearn import preprocessing\n", "from sklearn.linear_model import LinearRegression, Lasso, Ridge\n", "from sklearn.feature_selection import VarianceThreshold, SelectFromModel\n", "from sklearn.pipeline import Pipeline\n", "\n", "from sklearn.tree import DecisionTreeRegressor, plot_tree\n", "from sklearn.ensemble import BaggingRegressor, RandomForestRegressor\n", "from tqdm.auto import trange" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "HoPCr-BoDduR" }, "outputs": [], "source": [ "url_a6tf ='https://drive.google.com/file/d/12V3p_jVKzrkMTLImFLxv_1YRymz5qTWG/view?usp=share_link'\n", "url_a6tf='https://drive.google.com/uc?id=' + url_a6tf.split('/')[-2]\n", "url_th ='https://drive.google.com/file/d/1XC-fCaBnDiQLm82sWfMuhzPJtIytDfuy/view?usp=share_link'\n", "url_th='https://drive.google.com/uc?id=' + url_th.split('/')[-2]\n", "url_tet ='https://docs.google.com/spreadsheets/d/1CkelCtYNUOi2VtMP7bY-XfstuGgWSL1f/edit?usp=share_link&ouid=101749734890205141586&rtpof=true&sd=true'\n", "url_tet='https://drive.google.com/uc?id=' + url_tet.split('/')[-2]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "0Nu5vaqxDiTZ" }, "outputs": [], "source": [ "a6tf = pd.read_csv(url_a6tf)\n", "a6tf[['Participant Private ID']] = a6tf[['Participant Private ID']].astype(int)\n", "a6tf.drop('Participant Public ID', axis=1, inplace=True)\n", "a6tf = a6tf.set_index('Participant Private ID')\n", "\n", "a6tf['DoBRU-year'].fillna(a6tf['DoBRU-year'].median(), inplace=True)\n", "a6tf.columns = ['Gender', 'Age', 'Education']\n", "\n", "\n", "task_huav = pd.read_csv(url_th)\n", "task_huav[['Participant Private ID']] = task_huav[['Participant Private ID']].astype(int)\n", "task_huav.drop('Participant Public ID', axis=1, inplace=True)\n", "task_huav = task_huav.set_index('Participant Private ID')\n", "task_huav_con = task_huav[task_huav.columns[:3]]\n", "task_huav_con.columns = ['Consent fair offer',\n", " 'Consent conditionally fair offer', 'Consent unfair offer']\n", "\n", "\n", "triada_empathy_tolerance = pd.read_excel(url_tet, index_col=1)\n", "triada_empathy_tolerance.drop('Unnamed: 0', axis=1, inplace=True)\n", "\n", "data = pd.concat([a6tf, triada_empathy_tolerance, task_huav_con], axis=1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 327 }, "id": "s0Qvw9g18Or3", "outputId": "36f69508-f09b-4f64-9731-2399c5e34b61" }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " | Gender | \n", "Age | \n", "Education | \n", "Machiavellianism | \n", "Psychopathy | \n", "Narcissism | \n", "Perspective-taking scale | \n", "Fantasy scale | \n", "Empathic concern scale | \n", "Personal distress scale | \n", "Tolerance to uncertainty | \n", "Intolerance to uncertainty | \n", "Interpersonal intolerance to uncertainty | \n", "Consent fair offer | \n", "Consent conditionally fair offer | \n", "Consent unfair offer | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Participant Private ID | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
6905533 | \n", "2 | \n", "20.0 | \n", "3 | \n", "11 | \n", "5 | \n", "15 | \n", "22 | \n", "35 | \n", "27 | \n", "24 | \n", "50 | \n", "49 | \n", "46 | \n", "6 | \n", "6 | \n", "0 | \n", "
6908178 | \n", "2 | \n", "28.0 | \n", "5 | \n", "15 | \n", "6 | \n", "17 | \n", "25 | \n", "15 | \n", "29 | \n", "17 | \n", "66 | \n", "64 | \n", "30 | \n", "6 | \n", "6 | \n", "10 | \n", "
7079747 | \n", "2 | \n", "19.0 | \n", "5 | \n", "19 | \n", "18 | \n", "14 | \n", "21 | \n", "7 | \n", "7 | \n", "7 | \n", "57 | \n", "69 | \n", "29 | \n", "6 | \n", "3 | \n", "0 | \n", "
7079758 | \n", "2 | \n", "21.0 | \n", "4 | \n", "9 | \n", "10 | \n", "18 | \n", "26 | \n", "27 | \n", "23 | \n", "26 | \n", "62 | \n", "59 | \n", "44 | \n", "5 | \n", "4 | \n", "4 | \n", "
7081909 | \n", "2 | \n", "20.0 | \n", "3 | \n", "10 | \n", "12 | \n", "10 | \n", "24 | \n", "19 | \n", "21 | \n", "21 | \n", "71 | \n", "41 | \n", "36 | \n", "6 | \n", "3 | \n", "0 | \n", "
GridSearchCV(cv=5,\n", " estimator=Pipeline(steps=[('scaler', MinMaxScaler()),\n", " ('variance',\n", " VarianceThreshold(threshold=0.05)),\n", " ('selection',\n", " SelectFromModel(estimator=Lasso(alpha=0.1))),\n", " ('regressor', Ridge(alpha=5.0))]),\n", " param_grid={'regressor__alpha': array([4. , 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5. , 5.1, 5.2,\n", " 5.3, 5.4, 5.5, 5.6, 5.7, 5.8, 5.9, 6. ]),\n", " 'selection__estimator__alpha': array([0.01 , 0.0195, 0.029 , 0.0385, 0.048 , 0.0575, 0.067 , 0.0765,\n", " 0.086 , 0.0955, 0.105 , 0.1145, 0.124 , 0.1335, 0.143 , 0.1525,\n", " 0.162 , 0.1715, 0.181 , 0.1905, 0.2 ]),\n", " 'variance__threshold': [0.004, 0.005, 0.0055, 0.0059,\n", " 0.006, 0.065]})In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=5,\n", " estimator=Pipeline(steps=[('scaler', MinMaxScaler()),\n", " ('variance',\n", " VarianceThreshold(threshold=0.05)),\n", " ('selection',\n", " SelectFromModel(estimator=Lasso(alpha=0.1))),\n", " ('regressor', Ridge(alpha=5.0))]),\n", " param_grid={'regressor__alpha': array([4. , 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5. , 5.1, 5.2,\n", " 5.3, 5.4, 5.5, 5.6, 5.7, 5.8, 5.9, 6. ]),\n", " 'selection__estimator__alpha': array([0.01 , 0.0195, 0.029 , 0.0385, 0.048 , 0.0575, 0.067 , 0.0765,\n", " 0.086 , 0.0955, 0.105 , 0.1145, 0.124 , 0.1335, 0.143 , 0.1525,\n", " 0.162 , 0.1715, 0.181 , 0.1905, 0.2 ]),\n", " 'variance__threshold': [0.004, 0.005, 0.0055, 0.0059,\n", " 0.006, 0.065]})
Pipeline(steps=[('scaler', MinMaxScaler()),\n", " ('variance', VarianceThreshold(threshold=0.05)),\n", " ('selection', SelectFromModel(estimator=Lasso(alpha=0.1))),\n", " ('regressor', Ridge(alpha=5.0))])
MinMaxScaler()
VarianceThreshold(threshold=0.05)
SelectFromModel(estimator=Lasso(alpha=0.1))
Lasso(alpha=0.1)
Lasso(alpha=0.1)
Ridge(alpha=5.0)
\n", " | max_depth | \n", "MSE | \n", "
---|---|---|
0 | \n", "2 | \n", "15.886932 | \n", "
1 | \n", "3 | \n", "20.552046 | \n", "
2 | \n", "4 | \n", "25.642643 | \n", "
3 | \n", "8 | \n", "25.761934 | \n", "
4 | \n", "6 | \n", "26.589187 | \n", "
\n", " | min_samples_leaf | \n", "MSE | \n", "
---|---|---|
0 | \n", "3 | \n", "15.566753 | \n", "
1 | \n", "17 | \n", "15.621588 | \n", "
2 | \n", "16 | \n", "15.621588 | \n", "
3 | \n", "15 | \n", "15.621588 | \n", "
4 | \n", "14 | \n", "15.621588 | \n", "
\n", " | feature | \n", "importance | \n", "
---|---|---|
0 | \n", "Fantasy scale | \n", "0.394085 | \n", "
1 | \n", "Narcissism | \n", "0.328240 | \n", "
2 | \n", "Machiavellianism | \n", "0.277674 | \n", "
RandomForestRegressor(max_depth=11, n_estimators=7, random_state=0)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestRegressor(max_depth=11, n_estimators=7, random_state=0)