Skip to content

Instantly share code, notes, and snippets.

@hhbyyh
Created November 16, 2017 01:41
Show Gist options
  • Save hhbyyh/7154c1d58e0f0473540defb696949dfc to your computer and use it in GitHub Desktop.
Save hhbyyh/7154c1d58e0f0473540defb696949dfc to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"outputs": [
{
"text": "--2017-11-15 19:19:24-- https://repo1.maven.org/maven2/com/intel/analytics/bigdl/bigdl-SPARK_2.1/0.3.0/bigdl-SPARK_2.1-0.3.0-jar-with-dependencies.jar\nResolving repo1.maven.org (repo1.maven.org)... 151.101.48.209\nConnecting to repo1.maven.org (repo1.maven.org)|151.101.48.209|:443... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 77227587 (74M) [application/java-archive]\nSaving to: \u2018bigdl-SPARK_2.1-0.3.0-jar-with-dependencies.jar.5\u2019\n\n100%[======================================>] 77,227,587 98.5MB/s in 0.7s \n\n2017-11-15 19:19:25 (98.5 MB/s) - \u2018bigdl-SPARK_2.1-0.3.0-jar-with-dependencies.jar.5\u2019 saved [77227587/77227587]\n\n",
"output_type": "stream",
"name": "stdout"
}
],
"metadata": {},
"source": "!(export sv=2.1 bv=0.3.0 ; cd ~/data/libs/ && wget https://repo1.maven.org/maven2/com/intel/analytics/bigdl/bigdl-SPARK_${sv}/${bv}/bigdl-SPARK_${sv}-${bv}-jar-with-dependencies.jar)",
"execution_count": 1
},
{
"cell_type": "code",
"outputs": [
{
"text": "Requirement already satisfied: bigdl==0.3.0 in /gpfs/global_fs01/sym_shared/YPProdSpark/user/sec8-2a9b10a0423ed7-afcff0442863/.local/lib/python2.7/site-packages\r\nRequirement already satisfied: pyspark>=2.2 in /gpfs/global_fs01/sym_shared/YPProdSpark/user/sec8-2a9b10a0423ed7-afcff0442863/.local/lib/python2.7/site-packages (from bigdl==0.3.0)\r\nRequirement already satisfied: numpy>=1.7 in /usr/local/src/bluemix_jupyter_bundle.v70/notebook/lib/python2.7/site-packages (from bigdl==0.3.0)\r\nRequirement already satisfied: py4j==0.10.4 in /gpfs/global_fs01/sym_shared/YPProdSpark/user/sec8-2a9b10a0423ed7-afcff0442863/.local/lib/python2.7/site-packages (from pyspark>=2.2->bigdl==0.3.0)\r\n",
"output_type": "stream",
"name": "stdout"
}
],
"metadata": {},
"source": "!pip install bigdl==0.3.0 | cat",
"execution_count": 2
},
{
"cell_type": "code",
"outputs": [
{
"text": "Prepending /gpfs/fs01/user/sec8-2a9b10a0423ed7-afcff0442863/.local/lib/python2.7/site-packages/bigdl/share/conf/spark-bigdl.conf to sys.path\n",
"output_type": "stream",
"name": "stdout"
},
{
"text": "/gpfs/fs01/user/sec8-2a9b10a0423ed7-afcff0442863/.local/lib/python2.7/site-packages/bigdl/util/engine.py:39: UserWarning: Find both SPARK_HOME and pyspark. You may need to check whether they match with each other. SPARK_HOME environment variable is set to: /usr/local/src/spark21master/spark-2.1.0-bin-2.7.3, and pyspark is found in: /gpfs/fs01/user/sec8-2a9b10a0423ed7-afcff0442863/.local/lib/python2.7/site-packages/pyspark/__init__.pyc. If they are unmatched, please use one source only to avoid conflict. For example, you can unset SPARK_HOME and use pyspark only.\n warnings.warn(warning_msg)\n",
"output_type": "stream",
"name": "stderr"
}
],
"metadata": {},
"source": "from bigdl.nn.layer import *\nfrom bigdl.nn.criterion import *\nfrom bigdl.util.common import *\nfrom pyspark import SparkContext\nimport numpy as np",
"execution_count": 3
},
{
"cell_type": "code",
"outputs": [
{
"text": "creating: createLinear\n{u'Linear328ccbf5': {u'gradWeight': array([[ 0., 0.]], dtype=float32), u'bias': array([-0.01126827], dtype=float32), u'weight': array([[ 0.6098693 , 0.19927095]], dtype=float32), u'gradBias': array([ 0.], dtype=float32)}}\n",
"output_type": "stream",
"name": "stdout"
}
],
"metadata": {},
"source": "linear = Linear(2, 1)\nprint (linear.parameters())",
"execution_count": 4
},
{
"cell_type": "code",
"outputs": [
{
"text": "[ 0.20005913]\n",
"output_type": "stream",
"name": "stdout"
}
],
"metadata": {},
"source": "input = np.array([1,-2])\n# forward to output\noutput = linear.forward(input)\nprint (output)",
"execution_count": 5
},
{
"cell_type": "code",
"outputs": [
{
"text": "None\nNone\nNone\n1\nNone\n1\n1\n",
"output_type": "stream",
"name": "stdout"
}
],
"metadata": {},
"source": "print (sc.getLocalProperty(\"spark.executor.cores\"))\nsc.getConf().set(\"spark.executor.cores\", \"1\")\nprint (sc.getLocalProperty(\"spark.executor.cores\"))\n\nsc.stop()\nconfCore=create_spark_conf()\nprint (confCore.get(\"spark.executor.cores\"))\nconfCore.set(\"spark.executor.cores\", 1)\nconfCore.set(\"spark.cores.max\", 1)\nprint (confCore.get(\"spark.executor.cores\"))\nsc = SparkContext(appName=\"text_classifier\", conf=confCore)\nprint (sc.getLocalProperty(\"spark.executor.cores\"))\nprint (sc.getConf().get(\"spark.executor.cores\"))\nprint (sc.getConf().get(\"spark.cores.max\"))\ninit_engine()",
"execution_count": 7
},
{
"cell_type": "code",
"outputs": [
{
"text": "('Extracting', 'train-images-idx3-ubyte.gz')\n('Extracting', 'train-labels-idx1-ubyte.gz')\n('Extracting', 't10k-images-idx3-ubyte.gz')\n('Extracting', 't10k-labels-idx1-ubyte.gz')\ncreating: createSequential\ncreating: createReshape\ncreating: createSpatialConvolution\ncreating: createTanh\ncreating: createSpatialMaxPooling\ncreating: createTanh\ncreating: createSpatialConvolution\ncreating: createSpatialMaxPooling\ncreating: createReshape\ncreating: createLinear\ncreating: createTanh\ncreating: createLinear\ncreating: createLogSoftMax\ncreating: createClassNLLCriterion\ncreating: createDefault\ncreating: createSGD\ncreating: createMaxEpoch\ncreating: createOptimizer\ncreating: createEveryEpoch\ncreating: createTop1Accuracy\ntraining finished\n",
"output_type": "stream",
"name": "stdout"
}
],
"metadata": {},
"source": "\nfrom optparse import OptionParser\nfrom bigdl.dataset import mnist\nfrom bigdl.dataset.transformer import *\nfrom bigdl.nn.layer import *\nfrom bigdl.nn.criterion import *\nfrom bigdl.optim.optimizer import *\nfrom bigdl.util.common import *\n\n\ndef build_model(class_num):\n model = Sequential()\n model.add(Reshape([1, 28, 28]))\n model.add(SpatialConvolution(1, 6, 5, 5))\n model.add(Tanh())\n model.add(SpatialMaxPooling(2, 2, 2, 2))\n model.add(Tanh())\n model.add(SpatialConvolution(6, 12, 5, 5))\n model.add(SpatialMaxPooling(2, 2, 2, 2))\n model.add(Reshape([12 * 4 * 4]))\n model.add(Linear(12 * 4 * 4, 100))\n model.add(Tanh())\n model.add(Linear(100, class_num))\n model.add(LogSoftMax())\n return model\n\n\ndef get_mnist(sc, data_type=\"train\", location=\"/tmp/mnist\"):\n \"\"\"\n Get and normalize the mnist data. We would download it automatically\n if the data doesn't present at the specific location.\n :param sc: SparkContext\n :param data_type: training data or testing data\n :param location: Location storing the mnist\n :return: A RDD of (features: Ndarray, label: Ndarray)\n \"\"\"\n (images, labels) = mnist.read_data_sets(location, data_type)\n images = sc.parallelize(images)\n labels = sc.parallelize(labels + 1) # Target start from 1 in BigDL\n record = images.zip(labels)\n return record\n\ndef get_end_trigger():\n return MaxEpoch(10)\n\ntrain_data = get_mnist(sc, \"train\", \"\")\\\n .map(lambda rec_tuple: (normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD),\n rec_tuple[1]))\\\n .map(lambda t: Sample.from_ndarray(t[0], t[1]))\ntest_data = get_mnist(sc, \"test\", \"\")\\\n .map(lambda rec_tuple: (normalizer(rec_tuple[0], mnist.TEST_MEAN, mnist.TEST_STD),\n rec_tuple[1]))\\\n .map(lambda t: Sample.from_ndarray(t[0], t[1]))\noptimizer = Optimizer(\n model=build_model(10),\n training_rdd=train_data,\n criterion=ClassNLLCriterion(),\n optim_method=SGD(learningrate=0.01, learningrate_decay=0.0002),\n end_trigger=get_end_trigger(),\n batch_size=128)\noptimizer.set_validation(\n batch_size=128,\n val_rdd=test_data,\n trigger=EveryEpoch(),\n val_method=[Top1Accuracy()]\n)\ntrained_model = optimizer.optimize()\nparameters = trained_model.parameters()\nprint(\"training finished\")",
"execution_count": 13
},
{
"cell_type": "code",
"outputs": [
{
"text": "creating: createTop1Accuracy\nEvaluated result: 0.946300029755, total_num: 10000, method: Top1Accuracy\n",
"output_type": "stream",
"name": "stdout"
}
],
"metadata": {},
"source": "# test_data = get_mnist(sc, \"test\").map(\n# normalizer(mnist.TEST_MEAN, mnist.TEST_STD))\nresults = trained_model.evaluate(test_data, 128, [Top1Accuracy()])\nfor result in results:\n print(result)",
"execution_count": 14
},
{
"cell_type": "code",
"outputs": [],
"metadata": {
"collapsed": true
},
"source": "",
"execution_count": null
}
],
"nbformat": 4,
"metadata": {
"kernelspec": {
"display_name": "Python 2 with Spark 2.1",
"name": "python2-spark21",
"language": "python"
},
"language_info": {
"mimetype": "text/x-python",
"version": "2.7.11",
"file_extension": ".py",
"name": "python",
"pygments_lexer": "ipython2",
"nbconvert_exporter": "python",
"codemirror_mode": {
"version": 2,
"name": "ipython"
}
}
},
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment