flockonus · March 23, 2024 00:12
diff --git a/deepctr_0026_synthetic_gen_only.ipynb b/deepctr_0026_synthetic_gen_only.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data input Generation\n",
    "\n",
    "Unfortunately not deterministic, would benefit from being.\n",
    "\n",
    "# Model training\n",
    "\n",
    "I'm keeping the last 60 items out of the training set for post benchmarking later.\n",
    "\n",
    "Mind for the split ratio I'm using 0.05 for the test/validation, which is arguably very small.\n",
    "\n",
    "`train, test = train_test_split(ticker_data, test_size=0.05)`\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import math\n",
    "\n",
    "def calculate(a: float, b: float, c: float, d: float, e: float) -> float:\n",
    "    \"\"\"\n",
    "    This function takes five floats as input and performs a series of\n",
    "        mathematical operations on them based on certain conditions.\n",
    "    \"\"\"\n",
    "\n",
    "    if abs(a - b) < 0.001:\n",
    "        ret = math.sqrt(abs(c * d + e) + 3.1413)\n",
    "    elif a > b and c > d:\n",
    "        ret = math.log(abs(a * b * c) + 3.1413) + d - e\n",
    "    elif a < 0 and b > 0:\n",
    "        ret = (c + d) / (abs(a * e) + 3.1413)\n",
    "    else:\n",
    "        ret = a - b + math.log10(abs(c * d * e) + 3.1413)\n",
    "    return ret\n",
    "\n",
    "\n",
    "N = 10000\n",
    "LOW = -1\n",
    "HIGH = 7\n",
    "\n",
    "# create 5 arrays of 10 random numbers\n",
    "import numpy as np\n",
    "\n",
    "a = np.random.randint(LOW, HIGH, N)\n",
    "b = np.random.randint(LOW, HIGH, N)\n",
    "c = np.random.randint(LOW, HIGH, N)\n",
    "d = np.random.randint(LOW, HIGH, N)\n",
    "e = np.random.randint(LOW, HIGH, N)\n",
    "\n",
    "# calculate the result for each set of numbers\n",
    "results = [calculate(a[i], b[i], c[i], d[i], e[i]) for i in range(N)]\n",
    "\n",
    "# concatanate all into a dataframe\n",
    "import pandas as pd\n",
    "\n",
    "df = pd.DataFrame({\"a\": a, \"b\": b, \"c\": c, \"d\": d, \"e\": e, \"results\": results})\n",
    "\n",
    "# save to pickle\n",
    "# df.to_pickle(\"./jars/data-0040-synthetic.pkl\")\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "omit_length = 60\n",
    "targets = ['results']\n",
    "\n",
    "use = df.iloc[:-omit_length]\n",
    "omit = df.iloc[-omit_length:]\n",
    "\n",
    "print(\"train\", use.shape, \"omit\", omit.shape)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Data input Generation\n",
	"\n",
	"Unfortunately not deterministic, would benefit from being.\n",
	"\n",
	"# Model training\n",
	"\n",
	"I'm keeping the last 60 items out of the training set for post benchmarking later.\n",
	"\n",
	"Mind for the split ratio I'm using 0.05 for the test/validation, which is arguably very small.\n",
	"\n",
	"`train, test = train_test_split(ticker_data, test_size=0.05)`\n",
	"\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import math\n",
	"\n",
	"def calculate(a: float, b: float, c: float, d: float, e: float) -> float:\n",
	" \"\"\"\n",
	" This function takes five floats as input and performs a series of\n",
	" mathematical operations on them based on certain conditions.\n",
	" \"\"\"\n",
	"\n",
	" if abs(a - b) < 0.001:\n",
	" ret = math.sqrt(abs(c * d + e) + 3.1413)\n",
	" elif a > b and c > d:\n",
	" ret = math.log(abs(a * b * c) + 3.1413) + d - e\n",
	" elif a < 0 and b > 0:\n",
	" ret = (c + d) / (abs(a * e) + 3.1413)\n",
	" else:\n",
	" ret = a - b + math.log10(abs(c * d * e) + 3.1413)\n",
	" return ret\n",
	"\n",
	"\n",
	"N = 10000\n",
	"LOW = -1\n",
	"HIGH = 7\n",
	"\n",
	"# create 5 arrays of 10 random numbers\n",
	"import numpy as np\n",
	"\n",
	"a = np.random.randint(LOW, HIGH, N)\n",
	"b = np.random.randint(LOW, HIGH, N)\n",
	"c = np.random.randint(LOW, HIGH, N)\n",
	"d = np.random.randint(LOW, HIGH, N)\n",
	"e = np.random.randint(LOW, HIGH, N)\n",
	"\n",
	"# calculate the result for each set of numbers\n",
	"results = [calculate(a[i], b[i], c[i], d[i], e[i]) for i in range(N)]\n",
	"\n",
	"# concatanate all into a dataframe\n",
	"import pandas as pd\n",
	"\n",
	"df = pd.DataFrame({\"a\": a, \"b\": b, \"c\": c, \"d\": d, \"e\": e, \"results\": results})\n",
	"\n",
	"# save to pickle\n",
	"# df.to_pickle(\"./jars/data-0040-synthetic.pkl\")\n",
	"\n",
	"df"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"omit_length = 60\n",
	"targets = ['results']\n",
	"\n",
	"use = df.iloc[:-omit_length]\n",
	"omit = df.iloc[-omit_length:]\n",
	"\n",
	"print(\"train\", use.shape, \"omit\", omit.shape)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3 (ipykernel)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.10.13"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}