"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(10,4))\n",
"\n",
"plt.hist(np.log1p(target['target']), bins = 200);"
]
},
{
"cell_type": "markdown",
"id": "a0b88862-bd0a-4493-9342-38f20407bf78",
"metadata": {},
"source": [
"В распределении очень много нулей, так что стоит смотреть чуть уже:"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "a7da6bae-7c27-4cc3-ba62-82d3a606a15f",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0gAAAFfCAYAAAB0q+zRAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAANAtJREFUeJzt3X9UVPed//HXVGBUCrcCgWFWNDY1xBTiSTELY9No/IGyISQ1Z7WlZ1ZbV5PGaFn1pJqcPSHndMXajaaVjbWuVSOm5OwmpumaToJHJXURf1DZqrHUbjXFE0ZMFgY1dCDmfv/o9X4z/NJBkF/Pxzn3HO6973v53OvFmdd87v2MwzRNUwAAAAAAfa6vGwAAAAAA/QUBCQAAAAAsBCQAAAAAsBCQAAAAAMBCQAIAAAAACwEJAAAAACwEJAAAAACwRPR1A3rLp59+qg8++EAxMTFyOBx93RwAAAAAfcQ0TV26dElut1uf+1zXfUSDNiB98MEHSklJ6etmAAAAAOgnamtrNXr06C5rBm1AiomJkfTXkxAbG9vHrQEAAADQV5qampSSkmJnhK4M2oB07ba62NhYAhIAAACAG3r0hkEaAAAAAMBCQAIAAAAACwEJAAAAACwEJAAAAACwEJAAAAAAwEJAAgAAAAALAQkAAAAALAQkAAAAALAQkAAAAADAQkACAAAAAAsBCQAAAAAsBCQAAAAAsET0dQOGittX7QmZP7f2oT5qCQAAAIDO0IMEAAAAABYCEgAAAABYbiogFRUVyeFwqKCgwF5mmqYKCwvldrs1YsQITZ06VadOnQrZLhgMaunSpUpISFB0dLTy8vJ0/vz5kJqGhgZ5vV4ZhiHDMOT1etXY2HgzzQUAAACALnU7IB09elQ/+9nPdM8994QsX7dundavX6/i4mIdPXpULpdLM2fO1KVLl+yagoIC7d69W6WlpTp48KAuX76s3NxcXb161a7Jz89XdXW1fD6ffD6fqqur5fV6u9tcAAAAALiubgWky5cv61vf+pa2bNmiUaNG2ctN09SLL76oZ599VnPmzFFaWpp27Nihjz/+WK+88ookKRAIaOvWrXrhhRc0Y8YM3XvvvSopKdGJEye0d+9eSdLp06fl8/n07//+7/J4PPJ4PNqyZYv+67/+SzU1NT1w2AAAAADQXrcC0pIlS/TQQw9pxowZIcvPnj0rv9+v7Oxse5nT6dSUKVNUUVEhSaqqqlJra2tIjdvtVlpaml1z6NAhGYahzMxMuyYrK0uGYdg1bQWDQTU1NYVMAAAAABCOsIf5Li0t1W9/+1sdPXq03Tq/3y9JSkpKClmelJSk999/366JiooK6Xm6VnNte7/fr8TExHb7T0xMtGvaKioq0vPPPx/u4QAAAACALawepNraWn3ve99TSUmJhg8f3mmdw+EImTdNs92yttrWdFTf1X5Wr16tQCBgT7W1tV3+PgAAAABoK6yAVFVVpfr6emVkZCgiIkIREREqLy/XT37yE0VERNg9R217eerr6+11LpdLLS0tamho6LLmwoUL7X7/xYsX2/VOXeN0OhUbGxsyAQAAAEA4wgpI06dP14kTJ1RdXW1PkyZN0re+9S1VV1fri1/8olwul8rKyuxtWlpaVF5ersmTJ0uSMjIyFBkZGVJTV1enkydP2jUej0eBQEBHjhyxaw4fPqxAIGDXAAAAAEBPC+sZpJiYGKWlpYUsi46OVnx8vL28oKBAa9as0fjx4zV+/HitWbNGI0eOVH5+viTJMAwtXLhQK1asUHx8vOLi4rRy5Uqlp6fbgz5MmDBBs2fP1qJFi7R582ZJ0uLFi5Wbm6vU1NSbPmgAAAAA6EjYgzRcz9NPP63m5mY9+eSTamhoUGZmpt555x3FxMTYNRs2bFBERITmzp2r5uZmTZ8+Xdu3b9ewYcPsml27dmnZsmX2aHd5eXkqLi7u6eYCAAAAgM1hmqbZ143oDU1NTTIMQ4FAoF88j3T7qj0h8+fWPtRHLQEAAACGlnCyQbe+BwkAAAAABiMCEgAAAABYCEgAAAAAYCEgAQAAAICFgAQAAAAAFgISAAAAAFgISAAAAABgISABAAAAgIWABAAAAAAWAhIAAAAAWAhIAAAAAGAhIAEAAACAhYAEAAAAABYCEgAAAABYCEgAAAAAYCEgAQAAAICFgAQAAAAAFgISAAAAAFgISAAAAABgISABAAAAgIWABAAAAAAWAhIAAAAAWAhIAAAAAGAhIAEAAACAhYAEAAAAAJawAtKmTZt0zz33KDY2VrGxsfJ4PPr1r39tr1+wYIEcDkfIlJWVFbKPYDCopUuXKiEhQdHR0crLy9P58+dDahoaGuT1emUYhgzDkNfrVWNjY/ePEgAAAABuQFgBafTo0Vq7dq2OHTumY8eOadq0aXrkkUd06tQpu2b27Nmqq6uzp7feeitkHwUFBdq9e7dKS0t18OBBXb58Wbm5ubp69apdk5+fr+rqavl8Pvl8PlVXV8vr9d7koQIAAABA1yLCKX744YdD5v/lX/5FmzZtUmVlpb785S9LkpxOp1wuV4fbBwIBbd26VTt37tSMGTMkSSUlJUpJSdHevXs1a9YsnT59Wj6fT5WVlcrMzJQkbdmyRR6PRzU1NUpNTe1w38FgUMFg0J5vamoK59AAAAAAoPvPIF29elWlpaW6cuWKPB6PvfzAgQNKTEzUnXfeqUWLFqm+vt5eV1VVpdbWVmVnZ9vL3G630tLSVFFRIUk6dOiQDMOww5EkZWVlyTAMu6YjRUVF9i15hmEoJSWlu4cGAAAAYIgKOyCdOHFCn//85+V0OvXEE09o9+7duvvuuyVJOTk52rVrl/bt26cXXnhBR48e1bRp0+yeHb/fr6ioKI0aNSpkn0lJSfL7/XZNYmJiu9+bmJho13Rk9erVCgQC9lRbWxvuoQEAAAAY4sK6xU6SUlNTVV1drcbGRr322muaP3++ysvLdffdd2vevHl2XVpamiZNmqSxY8dqz549mjNnTqf7NE1TDofDnv/sz53VtOV0OuV0OsM9HAAAAACwhd2DFBUVpS996UuaNGmSioqKNHHiRP34xz/usDY5OVljx47VmTNnJEkul0stLS1qaGgIqauvr1dSUpJdc+HChXb7unjxol0DAAAAAL3hpr8HyTTNkMERPuujjz5SbW2tkpOTJUkZGRmKjIxUWVmZXVNXV6eTJ09q8uTJkiSPx6NAIKAjR47YNYcPH1YgELBrAAAAAKA3hHWL3TPPPKOcnBylpKTo0qVLKi0t1YEDB+Tz+XT58mUVFhbqscceU3Jyss6dO6dnnnlGCQkJ+vrXvy5JMgxDCxcu1IoVKxQfH6+4uDitXLlS6enp9qh2EyZM0OzZs7Vo0SJt3rxZkrR48WLl5uZ2OoIdAAAAAPSEsALShQsX5PV6VVdXJ8MwdM8998jn82nmzJlqbm7WiRMn9PLLL6uxsVHJycl68MEH9eqrryomJsbex4YNGxQREaG5c+equblZ06dP1/bt2zVs2DC7ZteuXVq2bJk92l1eXp6Ki4t76JABAAAAoGMO0zTNvm5Eb2hqapJhGAoEAoqNje3r5uj2VXtC5s+tfaiPWgIAAAAMLeFkg5t+BgkAAAAABgsCEgAAAABYCEgAAAAAYCEgAQAAAICFgAQAAAAAFgISAAAAAFgISAAAAABgISABAAAAgIWABAAAAAAWAhIAAAAAWAhIAAAAAGAhIAEAAACAhYAEAAAAABYCEgAAAABYCEgAAAAAYCEgAQAAAICFgAQAAAAAFgISAAAAAFgISAAAAABgISABAAAAgIWABAAAAAAWAhIAAAAAWAhIAAAAAGAhIAEAAACAJayAtGnTJt1zzz2KjY1VbGysPB6Pfv3rX9vrTdNUYWGh3G63RowYoalTp+rUqVMh+wgGg1q6dKkSEhIUHR2tvLw8nT9/PqSmoaFBXq9XhmHIMAx5vV41NjZ2/ygBAAAA4AaEFZBGjx6ttWvX6tixYzp27JimTZumRx55xA5B69at0/r161VcXKyjR4/K5XJp5syZunTpkr2PgoIC7d69W6WlpTp48KAuX76s3NxcXb161a7Jz89XdXW1fD6ffD6fqqur5fV6e+iQAQAAAKBjDtM0zZvZQVxcnH70ox/pO9/5jtxutwoKCvT9739f0l97i5KSkvTDH/5Qjz/+uAKBgG677Tbt3LlT8+bNkyR98MEHSklJ0VtvvaVZs2bp9OnTuvvuu1VZWanMzExJUmVlpTwej37/+98rNTX1htrV1NQkwzAUCAQUGxt7M4fYI25ftSdk/tzah/qoJQAAAMDQEk426PYzSFevXlVpaamuXLkij8ejs2fPyu/3Kzs7265xOp2aMmWKKioqJElVVVVqbW0NqXG73UpLS7NrDh06JMMw7HAkSVlZWTIMw67pSDAYVFNTU8gEAAAAAOEIOyCdOHFCn//85+V0OvXEE09o9+7duvvuu+X3+yVJSUlJIfVJSUn2Or/fr6ioKI0aNarLmsTExHa/NzEx0a7pSFFRkf3MkmEYSklJCffQAAAAAAxxYQek1NRUVVdXq7KyUt/97nc1f/58vffee/Z6h8MRUm+aZrtlbbWt6aj+evtZvXq1AoGAPdXW1t7oIQEAAACApG4EpKioKH3pS1/SpEmTVFRUpIkTJ+rHP/6xXC6XJLXr5amvr7d7lVwul1paWtTQ0NBlzYULF9r93osXL7brnfosp9Npj653bQIAAACAcNz09yCZpqlgMKhx48bJ5XKprKzMXtfS0qLy8nJNnjxZkpSRkaHIyMiQmrq6Op08edKu8Xg8CgQCOnLkiF1z+PBhBQIBuwYAAAAAekNEOMXPPPOMcnJylJKSokuXLqm0tFQHDhyQz+eTw+FQQUGB1qxZo/Hjx2v8+PFas2aNRo4cqfz8fEmSYRhauHChVqxYofj4eMXFxWnlypVKT0/XjBkzJEkTJkzQ7NmztWjRIm3evFmStHjxYuXm5t7wCHYAAAAA0B1hBaQLFy7I6/Wqrq5OhmHonnvukc/n08yZMyVJTz/9tJqbm/Xkk0+qoaFBmZmZeueddxQTE2PvY8OGDYqIiNDcuXPV3Nys6dOna/v27Ro2bJhds2vXLi1btswe7S4vL0/FxcU9cbwAAAAA0Kmb/h6k/orvQQIAAAAg3aLvQQIAAACAwYaABAAAAAAWAhIAAAAAWAhIAAAAAGAhIAEAAACAhYAEAAAAABYCEgAAAABYCEgAAAAAYCEgAQAAAICFgAQAAAAAFgISAAAAAFgi+roBAAAA+P9uX7UnZP7c2of6qCXA0EQPEgAAAABYCEgAAAAAYCEgAQAAAICFgAQAAAAAFgISAAAAAFgISAAAAABgISABAAAAgIWABAAAAAAWAhIAAAAAWAhIAAAAAGAhIAEAAACAhYAEAAAAABYCEgAAAABYwgpIRUVFuu+++xQTE6PExEQ9+uijqqmpCalZsGCBHA5HyJSVlRVSEwwGtXTpUiUkJCg6Olp5eXk6f/58SE1DQ4O8Xq8Mw5BhGPJ6vWpsbOzeUQIAAADADQgrIJWXl2vJkiWqrKxUWVmZPvnkE2VnZ+vKlSshdbNnz1ZdXZ09vfXWWyHrCwoKtHv3bpWWlurgwYO6fPmycnNzdfXqVbsmPz9f1dXV8vl88vl8qq6ultfrvYlDBQAAAICuRYRT7PP5Qua3bdumxMREVVVV6YEHHrCXO51OuVyuDvcRCAS0detW7dy5UzNmzJAklZSUKCUlRXv37tWsWbN0+vRp+Xw+VVZWKjMzU5K0ZcsWeTwe1dTUKDU1NayDBAAAAIAbcVPPIAUCAUlSXFxcyPIDBw4oMTFRd955pxYtWqT6+np7XVVVlVpbW5WdnW0vc7vdSktLU0VFhSTp0KFDMgzDDkeSlJWVJcMw7Jq2gsGgmpqaQiYAAAAACEe3A5Jpmlq+fLnuv/9+paWl2ctzcnK0a9cu7du3Ty+88IKOHj2qadOmKRgMSpL8fr+ioqI0atSokP0lJSXJ7/fbNYmJie1+Z2Jiol3TVlFRkf28kmEYSklJ6e6hAQAAABiiwrrF7rOeeuop/e53v9PBgwdDls+bN8/+OS0tTZMmTdLYsWO1Z88ezZkzp9P9maYph8Nhz3/2585qPmv16tVavny5Pd/U1ERIAgAAABCWbvUgLV26VG+++ab279+v0aNHd1mbnJyssWPH6syZM5Ikl8ullpYWNTQ0hNTV19crKSnJrrlw4UK7fV28eNGuacvpdCo2NjZkAgAAAIBwhBWQTNPUU089pddff1379u3TuHHjrrvNRx99pNraWiUnJ0uSMjIyFBkZqbKyMrumrq5OJ0+e1OTJkyVJHo9HgUBAR44csWsOHz6sQCBg1wAAAABATwvrFrslS5bolVde0S9/+UvFxMTYzwMZhqERI0bo8uXLKiws1GOPPabk5GSdO3dOzzzzjBISEvT1r3/drl24cKFWrFih+Ph4xcXFaeXKlUpPT7dHtZswYYJmz56tRYsWafPmzZKkxYsXKzc3lxHsAAAAAPSasALSpk2bJElTp04NWb5t2zYtWLBAw4YN04kTJ/Tyyy+rsbFRycnJevDBB/Xqq68qJibGrt+wYYMiIiI0d+5cNTc3a/r06dq+fbuGDRtm1+zatUvLli2zR7vLy8tTcXFxd48TAAAAAK4rrIBkmmaX60eMGKG33377uvsZPny4Nm7cqI0bN3ZaExcXp5KSknCaBwAAAAA35aa+BwkAAAAABhMCEgAAAABYCEgAAAAAYCEgAQAAAIAlrEEaAAAAhrrbV+2xfz639qE+bAmA3kAPEgAAAABYCEgAAAAAYCEgAQAAAICFgAQAAAAAFgISAAAAAFgISAAAAABgISABAAAAgIWABAAAAAAWvigWAAD0a5/9YlaJL2cF0LvoQQIAAAAACwEJAAAAACwEJAAAAACwEJAAAAAAwEJAAgAAAAALAQkAAAAALAQkAAAAALAQkAAAAADAQkACAAAAAAsBCQAAAAAsBCQAAAAAsIQVkIqKinTfffcpJiZGiYmJevTRR1VTUxNSY5qmCgsL5Xa7NWLECE2dOlWnTp0KqQkGg1q6dKkSEhIUHR2tvLw8nT9/PqSmoaFBXq9XhmHIMAx5vV41NjZ27ygBAAAA4AaEFZDKy8u1ZMkSVVZWqqysTJ988omys7N15coVu2bdunVav369iouLdfToUblcLs2cOVOXLl2yawoKCrR7926Vlpbq4MGDunz5snJzc3X16lW7Jj8/X9XV1fL5fPL5fKqurpbX6+2BQwYAAACAjkWEU+zz+ULmt23bpsTERFVVVemBBx6QaZp68cUX9eyzz2rOnDmSpB07digpKUmvvPKKHn/8cQUCAW3dulU7d+7UjBkzJEklJSVKSUnR3r17NWvWLJ0+fVo+n0+VlZXKzMyUJG3ZskUej0c1NTVKTU3tiWMHAADo925ftcf++dzah/qwJcDQcFPPIAUCAUlSXFycJOns2bPy+/3Kzs62a5xOp6ZMmaKKigpJUlVVlVpbW0Nq3G630tLS7JpDhw7JMAw7HElSVlaWDMOwa9oKBoNqamoKmQAAAAAgHN0OSKZpavny5br//vuVlpYmSfL7/ZKkpKSkkNqkpCR7nd/vV1RUlEaNGtVlTWJiYrvfmZiYaNe0VVRUZD+vZBiGUlJSuntoAAAAAIaobgekp556Sr/73e/0i1/8ot06h8MRMm+aZrtlbbWt6ai+q/2sXr1agUDAnmpra2/kMAAAAADA1q2AtHTpUr355pvav3+/Ro8ebS93uVyS1K6Xp76+3u5VcrlcamlpUUNDQ5c1Fy5caPd7L1682K536hqn06nY2NiQCQAAAADCEVZAMk1TTz31lF5//XXt27dP48aNC1k/btw4uVwulZWV2ctaWlpUXl6uyZMnS5IyMjIUGRkZUlNXV6eTJ0/aNR6PR4FAQEeOHLFrDh8+rEAgYNcAAAAAQE8LaxS7JUuW6JVXXtEvf/lLxcTE2D1FhmFoxIgRcjgcKigo0Jo1azR+/HiNHz9ea9as0ciRI5Wfn2/XLly4UCtWrFB8fLzi4uK0cuVKpaen26PaTZgwQbNnz9aiRYu0efNmSdLixYuVm5vLCHYAAAAAek1YAWnTpk2SpKlTp4Ys37ZtmxYsWCBJevrpp9Xc3Kwnn3xSDQ0NyszM1DvvvKOYmBi7fsOGDYqIiNDcuXPV3Nys6dOna/v27Ro2bJhds2vXLi1btswe7S4vL0/FxcXdOUYAAAAAuCFhBSTTNK9b43A4VFhYqMLCwk5rhg8fro0bN2rjxo2d1sTFxamkpCSc5gEAAADATQkrIAEAALTFF5kCGEwISH2EFxMAAACg/+n29yABAAAAwGBDDxIAABi0uGMDQLgISIPIZ18EJF4IAAD4LF4nAdwIbrEDAAAAAAsBCQAAAAAsBCQAAAAAsBCQAAAAAMBCQAIAAAAACwEJAAAAACwEJAAAAACwEJAAAAAAwMIXxQIAAHQTXz4LDD70IAEAAACAhR4kAACANj7bMxROr1BX23V3nwBuLQISAAAY8treKgdg6CIg9QPcvwwAAAD0DzyDBAAAAAAWAhIAAAAAWLjFDgAAhIXndQAMZgSkAY4XKQAA+ideo4GBiVvsAAAAAMBCDxIAAEAXbkVPEL1NQP9BDxIAAAAAWMIOSO+++64efvhhud1uORwOvfHGGyHrFyxYIIfDETJlZWWF1ASDQS1dulQJCQmKjo5WXl6ezp8/H1LT0NAgr9crwzBkGIa8Xq8aGxvDPkAAAICO3L5qjz0BwDVh32J35coVTZw4Ud/+9rf12GOPdVgze/Zsbdu2zZ6PiooKWV9QUKBf/epXKi0tVXx8vFasWKHc3FxVVVVp2LBhkqT8/HydP39ePp9PkrR48WJ5vV796le/CrfJAAAA/QqhDOi/wg5IOTk5ysnJ6bLG6XTK5XJ1uC4QCGjr1q3auXOnZsyYIUkqKSlRSkqK9u7dq1mzZun06dPy+XyqrKxUZmamJGnLli3yeDyqqalRampquM0e0D77n+i5tQ/1YUsAAACAwa1XnkE6cOCAEhMTdeedd2rRokWqr6+311VVVam1tVXZ2dn2MrfbrbS0NFVUVEiSDh06JMMw7HAkSVlZWTIMw65pKxgMqqmpKWQCAACDD7fGAehNPR6QcnJytGvXLu3bt08vvPCCjh49qmnTpikYDEqS/H6/oqKiNGrUqJDtkpKS5Pf77ZrExMR2+05MTLRr2ioqKrKfVzIMQykpKT18ZAAAAAAGux4f5nvevHn2z2lpaZo0aZLGjh2rPXv2aM6cOZ1uZ5qmHA6HPf/Znzur+azVq1dr+fLl9nxTU9OgDEl8WgYAGCy4hRxAf9Tr34OUnJyssWPH6syZM5Ikl8ullpYWNTQ0hPQi1dfXa/LkyXbNhQsX2u3r4sWLSkpK6vD3OJ1OOZ3OXjiCW48QBAAAAPSNXv8epI8++ki1tbVKTk6WJGVkZCgyMlJlZWV2TV1dnU6ePGkHJI/Ho0AgoCNHjtg1hw8fViAQsGsAAAAAoKeF3YN0+fJl/fGPf7Tnz549q+rqasXFxSkuLk6FhYV67LHHlJycrHPnzumZZ55RQkKCvv71r0uSDMPQwoULtWLFCsXHxysuLk4rV65Uenq6PardhAkTNHv2bC1atEibN2+W9NdhvnNzc4fcCHYAAAAAbp2wA9KxY8f04IMP2vPXnvuZP3++Nm3apBMnTujll19WY2OjkpOT9eCDD+rVV19VTEyMvc2GDRsUERGhuXPnqrm5WdOnT9f27dvt70CSpF27dmnZsmX2aHd5eXkqLi7u9oECAAAAwPWEHZCmTp0q0zQ7Xf/2229fdx/Dhw/Xxo0btXHjxk5r4uLiVFJSEm7zAAAAAKDbev0ZJAAAAAAYKHp9FDsAAIDe0nbkV4YLB3Cz6EECAAAAAAs9SAAAYNDguwQB3Cx6kAAAAADAQkACAAAAAAsBCQAAAAAsBCQAAAAAsBCQAAAAAMDCKHYAAKDP8X1GAPoLAhIAAIPMZ8MGQQMAwsMtdgAAAABgISABAAAAgIWABAAAAAAWnkECAAA9hsEWAAx09CABAAAAgIWABAAAAAAWbrEDAAAYILiFEeh99CABAAAAgIWABAAAAAAWbrEDAAC95rO3hHE7GICBgIAEAABuibbPz/RULQD0JG6xAwAAAAALAQkAAAAALNxiBwAAMAgwBDjQM8LuQXr33Xf18MMPy+12y+Fw6I033ghZb5qmCgsL5Xa7NWLECE2dOlWnTp0KqQkGg1q6dKkSEhIUHR2tvLw8nT9/PqSmoaFBXq9XhmHIMAx5vV41NjaGfYAAAAAAcKPCDkhXrlzRxIkTVVxc3OH6devWaf369SouLtbRo0flcrk0c+ZMXbp0ya4pKCjQ7t27VVpaqoMHD+ry5cvKzc3V1atX7Zr8/HxVV1fL5/PJ5/OpurpaXq+3G4cIAAAwON2+ao89AegZYd9il5OTo5ycnA7XmaapF198Uc8++6zmzJkjSdqxY4eSkpL0yiuv6PHHH1cgENDWrVu1c+dOzZgxQ5JUUlKilJQU7d27V7NmzdLp06fl8/lUWVmpzMxMSdKWLVvk8XhUU1Oj1NTU7h4vAAAAAHSqRwdpOHv2rPx+v7Kzs+1lTqdTU6ZMUUVFhSSpqqpKra2tITVut1tpaWl2zaFDh2QYhh2OJCkrK0uGYdg1bQWDQTU1NYVMAAAgFD0OANC1Hg1Ifr9fkpSUlBSyPCkpyV7n9/sVFRWlUaNGdVmTmJjYbv+JiYl2TVtFRUX280qGYSglJeWmjwcAAADA0NIrw3w7HI6QedM02y1rq21NR/Vd7Wf16tUKBAL2VFtb242WAwAAABjKejQguVwuSWrXy1NfX2/3KrlcLrW0tKihoaHLmgsXLrTb/8WLF9v1Tl3jdDoVGxsbMgEAAABAOHo0II0bN04ul0tlZWX2spaWFpWXl2vy5MmSpIyMDEVGRobU1NXV6eTJk3aNx+NRIBDQkSNH7JrDhw8rEAjYNQAwkH32ORCeBQHQG/g/BuiesEexu3z5sv74xz/a82fPnlV1dbXi4uI0ZswYFRQUaM2aNRo/frzGjx+vNWvWaOTIkcrPz5ckGYahhQsXasWKFYqPj1dcXJxWrlyp9PR0e1S7CRMmaPbs2Vq0aJE2b94sSVq8eLFyc3MZwQ4AAABArwk7IB07dkwPPvigPb98+XJJ0vz587V9+3Y9/fTTam5u1pNPPqmGhgZlZmbqnXfeUUxMjL3Nhg0bFBERoblz56q5uVnTp0/X9u3bNWzYMLtm165dWrZsmT3aXV5eXqffvQQAAG5eVz0N59Y+dAtbAgB9J+yANHXqVJmm2el6h8OhwsJCFRYWdlozfPhwbdy4URs3buy0Ji4uTiUlJeE2DwAGpM++MeWNKAAAfadXRrEDAAAAgIEo7B4kAAAw9PCgP4ChgoA0iHHLDgDcGm3DQ3/6P5dgAwDh4RY7AAAAALAQkAAAAADAwi12AAAMUdx+BwDtEZAA3BCeaQMAAEMBt9gBAAAAgIUeJAAd4tYbDCb9tQf0Zka/66/HBAADHT1IAAAAAGChBwkA2ujP32kDAAB6Fz1IAAAAAGChBwkAgAGA5wIB4NagBwkAAAAALPQgAQAADHGMigj8fwQkYAjr7mAEQ30QA95I9H9D7Xa0oXa8ANCbCEjAEMKbKGBg4W8WPWWof7AFhIOABADX0dWb1K7edPDmtnP0wgEA+isCEoAhiU9T0R8RqgGg7xGQAEC8MUXvIYwDwMBCQAIGoHDezIfzZmwghoTeOhfd1RPnsDfeUPMmHQCAG0NAGiJ4c4T+jmsU/RHPSgHA0ENAQju8IRhcBmKvUFuEJwAAcKsQkDAo3kCjb3U3VIczOlxP4FoHAADXQ0DCoEHPV//EvwsA9D98YAR07nM9vcPCwkI5HI6QyeVy2etN01RhYaHcbrdGjBihqVOn6tSpUyH7CAaDWrp0qRISEhQdHa28vDydP3++p5sKYIC6fdUeewIAAOhJvdKD9OUvf1l79+6154cNG2b/vG7dOq1fv17bt2/XnXfeqR/84AeaOXOmampqFBMTI0kqKCjQr371K5WWlio+Pl4rVqxQbm6uqqqqQvYFoP8htAD8HQDAQNYrASkiIiKk1+ga0zT14osv6tlnn9WcOXMkSTt27FBSUpJeeeUVPf744woEAtq6dat27typGTNmSJJKSkqUkpKivXv3atasWR3+zmAwqGAwaM83NTX1wpFhoBgMD/UPhmNA7+vqjTjXTP9EeAKA/q1XAtKZM2fkdrvldDqVmZmpNWvW6Itf/KLOnj0rv9+v7Oxsu9bpdGrKlCmqqKjQ448/rqqqKrW2tobUuN1upaWlqaKiotOAVFRUpOeff743DmdQ4gW6fxrs/y6D/fgAYDDgAzoMdT0ekDIzM/Xyyy/rzjvv1IULF/SDH/xAkydP1qlTp+T3+yVJSUlJIdskJSXp/ffflyT5/X5FRUVp1KhR7Wqubd+R1atXa/ny5fZ8U1OTUlJSeuqwhqyu/pPs6/9A+/ObbQYmAAAAGJh6PCDl5OTYP6enp8vj8eiOO+7Qjh07lJWVJUlyOBwh25im2W5ZW9ercTqdcjqdN9Fy3IgbDSV9HZ76s94YEhuDW28E7hu9Na8vbuHjAwYAQF/q9WG+o6OjlZ6erjNnzujRRx+V9NdeouTkZLumvr7e7lVyuVxqaWlRQ0NDSC9SfX29Jk+e3NvNBW4pQg/CxTXTf/BvAQCDU68HpGAwqNOnT+trX/uaxo0bJ5fLpbKyMt17772SpJaWFpWXl+uHP/yhJCkjI0ORkZEqKyvT3LlzJUl1dXU6efKk1q1b19vNxU3o7puFW/EJ9a3+BB7AjbvVf0vd7eHmbx4AhoYeD0grV67Uww8/rDFjxqi+vl4/+MEP1NTUpPnz58vhcKigoEBr1qzR+PHjNX78eK1Zs0YjR45Ufn6+JMkwDC1cuFArVqxQfHy84uLitHLlSqWnp9uj2mHo6E+32vRGAAR6U29fe9xKCwAYjHo8IJ0/f17f/OY39eGHH+q2225TVlaWKisrNXbsWEnS008/rebmZj355JNqaGhQZmam3nnnHfs7kCRpw4YNioiI0Ny5c9Xc3Kzp06dr+/btfAfSIEFgAABg4OhPH1YCt0KPB6TS0tIu1zscDhUWFqqwsLDTmuHDh2vjxo3auHFjD7cOAxnBCrg1+FsDAAxlvf4MEtDfcFsQ0DtuxbN+vfH3yqfjAIDPIiBhyOvqzRGfpAPd01vBpi+GHQcADC0EJNwSAyVoDJR2AgDQF7gLA0MBAQkAMCj0xAccfEgCAPhcXzcAAAAAAPoLepAAAL2OnhlgcGKQEwxG9CABAAAAgIWABAAAAAAWAhIAAAAAWHgGCQAAADeNIcAxWNCDBAAAAAAWAhIAAAAAWLjFDgAAAD2OIcAxUNGDBAAAAAAWAhIAAAAAWLjFDgAAAL2q7Qh3n8Xtd+hv6EECAAAAAAsBCQAAAAAs3GIHAACAfoPR79DXCEgAAADoM109nwT0BW6xAwAAAAALPUgAAADolxj9Dn2BgAQAAIABp214IjChpxCQAAAAMKgQnnAzCEgAAAAY8Lq6HY9b9RCOfj9Iw0svvaRx48Zp+PDhysjI0G9+85u+bhIAAAAGidtX7bEnQOrnPUivvvqqCgoK9NJLL+mrX/2qNm/erJycHL333nsaM2ZMXzcPAAAAg8jNhCR6ogYPh2maZl83ojOZmZn6yle+ok2bNtnLJkyYoEcffVRFRUUhtcFgUMFg0J4PBAIaM2aMamtrFRsbe8va3Jm0597u6yYAAACgnzn5/Ky+bsKQ0NTUpJSUFDU2NsowjC5r+20PUktLi6qqqrRq1aqQ5dnZ2aqoqGhXX1RUpOeff77d8pSUlF5rIwAAAHAzjBf7ugVDy6VLlwZuQPrwww919epVJSUlhSxPSkqS3+9vV7969WotX77cnv/000/1f//3f4qPj5fD4ej19nblWmLtL71ZQw3nv+9w7vsW57/vcO77Fue/b3H++w7nvnOmaerSpUtyu93Xre23AematuHGNM0OA4/T6ZTT6QxZ9oUvfKE3mxa22NhYLtY+xPnvO5z7vsX57zuc+77F+e9bnP++w7nv2PV6jq7pt6PYJSQkaNiwYe16i+rr69v1KgEAAABAT+i3ASkqKkoZGRkqKysLWV5WVqbJkyf3UasAAAAADGb9+ha75cuXy+v1atKkSfJ4PPrZz36mP//5z3riiSf6umlhcTqdeu6559rdAohbg/Pfdzj3fYvz33c4932L89+3OP99h3PfM/r1MN/SX78odt26daqrq1NaWpo2bNigBx54oK+bBQAAAGAQ6vcBCQAAAABulX77DBIAAAAA3GoEJAAAAACwEJAAAAAAwEJAAgAAAAALAamHvPTSSxo3bpyGDx+ujIwM/eY3v+myvry8XBkZGRo+fLi++MUv6qc//ektaungUlRUpPvuu08xMTFKTEzUo48+qpqami63OXDggBwOR7vp97///S1q9eBQWFjY7hy6XK4ut+G67zm33357h9fxkiVLOqznuu++d999Vw8//LDcbrccDofeeOONkPWmaaqwsFBut1sjRozQ1KlTderUqevu97XXXtPdd98tp9Opu+++W7t37+6lIxjYujr/ra2t+v73v6/09HRFR0fL7XbrH/7hH/TBBx90uc/t27d3+Pfwl7/8pZePZuC53vW/YMGCducxKyvruvvl+r++6537jq5hh8OhH/3oR53uk2v/xhCQesCrr76qgoICPfvsszp+/Li+9rWvKScnR3/+8587rD979qz+7u/+Tl/72td0/PhxPfPMM1q2bJlee+21W9zyga+8vFxLlixRZWWlysrK9Mknnyg7O1tXrly57rY1NTWqq6uzp/Hjx9+CFg8uX/7yl0PO4YkTJzqt5brvWUePHg0599e+VPvv//7vu9yO6z58V65c0cSJE1VcXNzh+nXr1mn9+vUqLi7W0aNH5XK5NHPmTF26dKnTfR46dEjz5s2T1+vV//zP/8jr9Wru3Lk6fPhwbx3GgNXV+f/444/129/+Vv/8z/+s3/72t3r99df1hz/8QXl5edfdb2xsbMjfQl1dnYYPH94bhzCgXe/6l6TZs2eHnMe33nqry31y/d+Y6537ttfvz3/+czkcDj322GNd7pdr/waYuGl/+7d/az7xxBMhy+666y5z1apVHdY//fTT5l133RWy7PHHHzezsrJ6rY1DRX19vSnJLC8v77Rm//79piSzoaHh1jVsEHruuefMiRMn3nA9133v+t73vmfecccd5qefftrheq77niHJ3L17tz3/6aefmi6Xy1y7dq297C9/+YtpGIb505/+tNP9zJ0715w9e3bIslmzZpnf+MY3erzNg0nb89+RI0eOmJLM999/v9Oabdu2mYZh9GzjhoCOzv/8+fPNRx55JKz9cP2H70au/UceecScNm1alzVc+zeGHqSb1NLSoqqqKmVnZ4csz87OVkVFRYfbHDp0qF39rFmzdOzYMbW2tvZaW4eCQCAgSYqLi7tu7b333qvk5GRNnz5d+/fv7+2mDUpnzpyR2+3WuHHj9I1vfEN/+tOfOq3luu89LS0tKikp0Xe+8x05HI4ua7nue9bZs2fl9/tDrm2n06kpU6Z0+hogdf730NU2uDGBQEAOh0Nf+MIXuqy7fPmyxo4dq9GjRys3N1fHjx+/NQ0chA4cOKDExETdeeedWrRokerr67us5/rveRcuXNCePXu0cOHC69Zy7V8fAekmffjhh7p69aqSkpJCliclJcnv93e4jd/v77D+k08+0YcffthrbR3sTNPU8uXLdf/99ystLa3TuuTkZP3sZz/Ta6+9ptdff12pqamaPn263n333VvY2oEvMzNTL7/8st5++21t2bJFfr9fkydP1kcffdRhPdd973njjTfU2NioBQsWdFrDdd87rv0/H85rwLXtwt0G1/eXv/xFq1atUn5+vmJjYzutu+uuu7R9+3a9+eab+sUvfqHhw4frq1/9qs6cOXMLWzs45OTkaNeuXdq3b59eeOEFHT16VNOmTVMwGOx0G67/nrdjxw7FxMRozpw5XdZx7d+YiL5uwGDR9lNb0zS7/CS3o/qOluPGPfXUU/rd736ngwcPdlmXmpqq1NRUe97j8ai2tlb/+q//qgceeKC3mzlo5OTk2D+np6fL4/Hojjvu0I4dO7R8+fIOt+G67x1bt25VTk6O3G53pzVc970r3NeA7m6DzrW2tuob3/iGPv30U7300ktd1mZlZYUMJPDVr35VX/nKV7Rx40b95Cc/6e2mDirz5s2zf05LS9OkSZM0duxY7dmzp8s361z/PevnP/+5vvWtb133WSKu/RtDD9JNSkhI0LBhw9p96lFfX9/u05FrXC5Xh/URERGKj4/vtbYOZkuXLtWbb76p/fv3a/To0WFvn5WVxacnNyk6Olrp6emdnkeu+97x/vvva+/evfrHf/zHsLflur9510ZuDOc14Np24W6DzrW2tmru3Lk6e/asysrKuuw96sjnPvc53Xffffw99IDk5GSNHTu2y3PJ9d+zfvOb36impqZbrwNc+x0jIN2kqKgoZWRk2CNIXVNWVqbJkyd3uI3H42lX/84772jSpEmKjIzstbYORqZp6qmnntLrr7+uffv2ady4cd3az/Hjx5WcnNzDrRtagsGgTp8+3el55LrvHdu2bVNiYqIeeuihsLflur9548aNk8vlCrm2W1paVF5e3ulrgNT530NX26Bj18LRmTNntHfv3m594GKapqqrq/l76AEfffSRamtruzyXXP89a+vWrcrIyNDEiRPD3pZrvxN9NTrEYFJaWmpGRkaaW7duNd977z2zoKDAjI6ONs+dO2eapmmuWrXK9Hq9dv2f/vQnc+TIkeY//dM/me+99565detWMzIy0vzP//zPvjqEAeu73/2uaRiGeeDAAbOurs6ePv74Y7um7fnfsGGDuXv3bvMPf/iDefLkSXPVqlWmJPO1117ri0MYsFasWGEeOHDA/NOf/mRWVlaaubm5ZkxMDNf9LXT16lVzzJgx5ve///1267jue86lS5fM48ePm8ePHzclmevXrzePHz9uj5K2du1a0zAM8/XXXzdPnDhhfvOb3zSTk5PNpqYmex9erzdkZNP//u//NocNG2auXbvWPH36tLl27VozIiLCrKysvOXH1991df5bW1vNvLw8c/To0WZ1dXXI60AwGLT30fb8FxYWmj6fz/zf//1f8/jx4+a3v/1tMyIiwjx8+HBfHGK/1tX5v3TpkrlixQqzoqLCPHv2rLl//37T4/GYf/M3f8P13wOu93+PaZpmIBAwR44caW7atKnDfXDtdw8BqYf827/9mzl27FgzKirK/MpXvhIyzPT8+fPNKVOmhNQfOHDAvPfee82oqCjz9ttv7/TCRtckdTht27bNrml7/n/4wx+ad9xxhzl8+HBz1KhR5v3332/u2bPn1jd+gJs3b56ZnJxsRkZGmm6325wzZ4556tQpez3Xfe97++23TUlmTU1Nu3Vc9z3n2hDpbaf58+ebpvnXob6fe+450+VymU6n03zggQfMEydOhOxjypQpdv01//Ef/2GmpqaakZGR5l133UVY7URX5//s2bOdvg7s37/f3kfb819QUGCOGTPGjIqKMm+77TYzOzvbrKiouPUHNwB0df4//vhjMzs727ztttvMyMhIc8yYMeb8+fPNP//5zyH74Prvnuv932Oaprl582ZzxIgRZmNjY4f74NrvHodpWk9JAwAAAMAQxzNIAAAAAGAhIAEAAACAhYAEAAAAABYCEgAAAABYCEgAAAAAYCEgAQAAAICFgAQAAAAAFgISAAAAAFgISAAAAABgISABAAAAgIWABAAAAACW/wcYxtnyre/F7QAAAABJRU5ErkJggg==",
"text/plain": [
"
"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(10,4))\n",
"\n",
"plt.hist(np.log1p(target.loc[target['target'] > 0, 'target']), bins = 200);"
]
},
{
"cell_type": "markdown",
"id": "38caffeb-d66a-4393-bfdc-9ece2ec720ac",
"metadata": {},
"source": [
"Распределение выглядит очень похожим на смесь:\n",
"- Есть клиенты с значениями около нуля\n",
"- Есть компонента смеси с центром в районе 6, то есть ~400 (np.exp(6) - 1)\n",
"- Есть компонента справа, с центров в районе 13, то есть ~440,000\n",
"- И есть еще клиенты с ровно 0, которых мы убрали с графика\n",
"\n",
"Выглядит заманчиво и для ML, и визуализации. Но нас пока интересует только сабмит.\n",
"\n",
"### Catboost\n",
"\n",
"Начнём собирать всё что нам потребуется дя обучения Catboost-а.\n",
"\n",
"- Будем ли мы проверять, что порядок `user_id` полностью совпадает в train и target?\n",
"- Будем ли мы сразу настраивать свою валидацию и делить данные?\n",
"- Или может быть будем что-либо преобразовывать?\n",
"\n",
"Нет, нас интересует atboost сабмит ASAP 🤗️️️️️️ "
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "378306c2-1373-4868-96d2-0e9b54bcde9f",
"metadata": {},
"outputs": [],
"source": [
"train_pool = Pool(data = train, \n",
" label = np.log1p(target['target']), \n",
" cat_features = categorical_features_indices)"
]
},
{
"cell_type": "markdown",
"id": "e35224b7-1329-47ce-8ec6-e69d0ae6a0a9",
"metadata": {},
"source": [
"#### Обучение\n",
"\n",
"Главные настройки, которые нам стоит учесть:\n",
"\n",
"- Так как метрика соревнования это RMSLE, а мы уже логарифмировали (log1p) целевую переменную, оптимизировать мы будем RMSE\n",
"- У нас много пропусков в данных, поэтому нам очень повезло что у Catboost есть настройка nan_mode"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "ff688458-d150-42e8-99d1-b8aced7ffd61",
"metadata": {},
"outputs": [],
"source": [
"model = CatBoostRegressor(iterations = 100, \n",
" depth = 6, \n",
" learning_rate = 0.1, \n",
" loss_function = 'RMSE', \n",
" nan_mode = 'Min', \n",
" random_seed = 314,\n",
" verbose = 10)\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "3cd7c516-80b2-438c-ac59-f14e181c09ee",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0:\tlearn: 5.1242446\ttotal: 128ms\tremaining: 12.7s\n",
"10:\tlearn: 2.7903763\ttotal: 891ms\tremaining: 7.21s\n",
"20:\tlearn: 2.2642536\ttotal: 1.65s\tremaining: 6.21s\n",
"30:\tlearn: 2.1466213\ttotal: 2.33s\tremaining: 5.18s\n",
"40:\tlearn: 2.1033289\ttotal: 3s\tremaining: 4.31s\n",
"50:\tlearn: 2.0807207\ttotal: 3.63s\tremaining: 3.48s\n",
"60:\tlearn: 2.0606817\ttotal: 4.26s\tremaining: 2.72s\n",
"70:\tlearn: 2.0467002\ttotal: 4.91s\tremaining: 2s\n",
"80:\tlearn: 2.0315319\ttotal: 5.6s\tremaining: 1.31s\n",
"90:\tlearn: 2.0204322\ttotal: 6.22s\tremaining: 615ms\n",
"99:\tlearn: 2.0125081\ttotal: 6.84s\tremaining: 0us\n"
]
},
{
"data": {
"text/plain": [
""
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.fit(train_pool)"
]
},
{
"cell_type": "markdown",
"id": "fe974317-8a22-4a11-abf7-298245c3e3a9",
"metadata": {},
"source": [
"Мы успешно обучили модель 🌟️️️️️️\n",
"\n",
"И вправду — зачем нам валидация, если можно ее сразу отправить в соревнование и узнать наш результат на лидерборде? Он же не будет прямо сильно хуже чем в логе обучения? (ведь правда, да?)\n",
"\n",
"### Подготовка сабмита\n",
"\n",
"Посмотрим на пример рабочего бейзлайн решения. \n",
"\n",
"Именно в таком формате платформа ждет от нас решения:"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "456c9f2e-e51c-4419-a402-871cfd1322d6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(318451, 2)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sample = pd.read_csv('data/task3/sample_submit_naive.csv')\n",
"sample.shape"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "da43b692-9ec7-415d-9c61-42ffaaf6cdf5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
"
\n",
"
\n",
"
user_id
\n",
"
predict
\n",
"
\n",
" \n",
" \n",
"
\n",
"
0
\n",
"
1000008
\n",
"
1.004656e+06
\n",
"
\n",
"
\n",
"
1
\n",
"
1000009
\n",
"
0.000000e+00
\n",
"
\n",
"
\n",
"
2
\n",
"
1000013
\n",
"
5.047758e+02
\n",
"
\n",
"
\n",
"
3
\n",
"
1000016
\n",
"
1.680799e+05
\n",
"
\n",
"
\n",
"
4
\n",
"
1000017
\n",
"
2.222542e+02
\n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" user_id predict\n",
"0 1000008 1.004656e+06\n",
"1 1000009 0.000000e+00\n",
"2 1000013 5.047758e+02\n",
"3 1000016 1.680799e+05\n",
"4 1000017 2.222542e+02"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sample.head(5)"
]
},
{
"cell_type": "markdown",
"id": "526becca-affc-439c-a378-15aa630705f3",
"metadata": {},
"source": [
"С форматом тоже всё понятно. \n",
"\n",
"Важно заметить, что предсказания от нас ждут без преобразований целевой переменной, так что нужно будет сделать обратные преобразования предсказаний нашей модели.\n",
"\n",
"#### Использование модели \n",
"\n",
"Тестовые данные у нас уже есть, но их нужно подготовить для формата Catboost-а."
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "4ba55359-c12f-49af-9ddd-7cc50594f2d8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"