Created
November 16, 2017 01:41
-
-
Save hhbyyh/7154c1d58e0f0473540defb696949dfc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"outputs": [ | |
{ | |
"text": "--2017-11-15 19:19:24-- https://repo1.maven.org/maven2/com/intel/analytics/bigdl/bigdl-SPARK_2.1/0.3.0/bigdl-SPARK_2.1-0.3.0-jar-with-dependencies.jar\nResolving repo1.maven.org (repo1.maven.org)... 151.101.48.209\nConnecting to repo1.maven.org (repo1.maven.org)|151.101.48.209|:443... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 77227587 (74M) [application/java-archive]\nSaving to: \u2018bigdl-SPARK_2.1-0.3.0-jar-with-dependencies.jar.5\u2019\n\n100%[======================================>] 77,227,587 98.5MB/s in 0.7s \n\n2017-11-15 19:19:25 (98.5 MB/s) - \u2018bigdl-SPARK_2.1-0.3.0-jar-with-dependencies.jar.5\u2019 saved [77227587/77227587]\n\n", | |
"output_type": "stream", | |
"name": "stdout" | |
} | |
], | |
"metadata": {}, | |
"source": "!(export sv=2.1 bv=0.3.0 ; cd ~/data/libs/ && wget https://repo1.maven.org/maven2/com/intel/analytics/bigdl/bigdl-SPARK_${sv}/${bv}/bigdl-SPARK_${sv}-${bv}-jar-with-dependencies.jar)", | |
"execution_count": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"outputs": [ | |
{ | |
"text": "Requirement already satisfied: bigdl==0.3.0 in /gpfs/global_fs01/sym_shared/YPProdSpark/user/sec8-2a9b10a0423ed7-afcff0442863/.local/lib/python2.7/site-packages\r\nRequirement already satisfied: pyspark>=2.2 in /gpfs/global_fs01/sym_shared/YPProdSpark/user/sec8-2a9b10a0423ed7-afcff0442863/.local/lib/python2.7/site-packages (from bigdl==0.3.0)\r\nRequirement already satisfied: numpy>=1.7 in /usr/local/src/bluemix_jupyter_bundle.v70/notebook/lib/python2.7/site-packages (from bigdl==0.3.0)\r\nRequirement already satisfied: py4j==0.10.4 in /gpfs/global_fs01/sym_shared/YPProdSpark/user/sec8-2a9b10a0423ed7-afcff0442863/.local/lib/python2.7/site-packages (from pyspark>=2.2->bigdl==0.3.0)\r\n", | |
"output_type": "stream", | |
"name": "stdout" | |
} | |
], | |
"metadata": {}, | |
"source": "!pip install bigdl==0.3.0 | cat", | |
"execution_count": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"outputs": [ | |
{ | |
"text": "Prepending /gpfs/fs01/user/sec8-2a9b10a0423ed7-afcff0442863/.local/lib/python2.7/site-packages/bigdl/share/conf/spark-bigdl.conf to sys.path\n", | |
"output_type": "stream", | |
"name": "stdout" | |
}, | |
{ | |
"text": "/gpfs/fs01/user/sec8-2a9b10a0423ed7-afcff0442863/.local/lib/python2.7/site-packages/bigdl/util/engine.py:39: UserWarning: Find both SPARK_HOME and pyspark. You may need to check whether they match with each other. SPARK_HOME environment variable is set to: /usr/local/src/spark21master/spark-2.1.0-bin-2.7.3, and pyspark is found in: /gpfs/fs01/user/sec8-2a9b10a0423ed7-afcff0442863/.local/lib/python2.7/site-packages/pyspark/__init__.pyc. If they are unmatched, please use one source only to avoid conflict. For example, you can unset SPARK_HOME and use pyspark only.\n warnings.warn(warning_msg)\n", | |
"output_type": "stream", | |
"name": "stderr" | |
} | |
], | |
"metadata": {}, | |
"source": "from bigdl.nn.layer import *\nfrom bigdl.nn.criterion import *\nfrom bigdl.util.common import *\nfrom pyspark import SparkContext\nimport numpy as np", | |
"execution_count": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"outputs": [ | |
{ | |
"text": "creating: createLinear\n{u'Linear328ccbf5': {u'gradWeight': array([[ 0., 0.]], dtype=float32), u'bias': array([-0.01126827], dtype=float32), u'weight': array([[ 0.6098693 , 0.19927095]], dtype=float32), u'gradBias': array([ 0.], dtype=float32)}}\n", | |
"output_type": "stream", | |
"name": "stdout" | |
} | |
], | |
"metadata": {}, | |
"source": "linear = Linear(2, 1)\nprint (linear.parameters())", | |
"execution_count": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"outputs": [ | |
{ | |
"text": "[ 0.20005913]\n", | |
"output_type": "stream", | |
"name": "stdout" | |
} | |
], | |
"metadata": {}, | |
"source": "input = np.array([1,-2])\n# forward to output\noutput = linear.forward(input)\nprint (output)", | |
"execution_count": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"outputs": [ | |
{ | |
"text": "None\nNone\nNone\n1\nNone\n1\n1\n", | |
"output_type": "stream", | |
"name": "stdout" | |
} | |
], | |
"metadata": {}, | |
"source": "print (sc.getLocalProperty(\"spark.executor.cores\"))\nsc.getConf().set(\"spark.executor.cores\", \"1\")\nprint (sc.getLocalProperty(\"spark.executor.cores\"))\n\nsc.stop()\nconfCore=create_spark_conf()\nprint (confCore.get(\"spark.executor.cores\"))\nconfCore.set(\"spark.executor.cores\", 1)\nconfCore.set(\"spark.cores.max\", 1)\nprint (confCore.get(\"spark.executor.cores\"))\nsc = SparkContext(appName=\"text_classifier\", conf=confCore)\nprint (sc.getLocalProperty(\"spark.executor.cores\"))\nprint (sc.getConf().get(\"spark.executor.cores\"))\nprint (sc.getConf().get(\"spark.cores.max\"))\ninit_engine()", | |
"execution_count": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"outputs": [ | |
{ | |
"text": "('Extracting', 'train-images-idx3-ubyte.gz')\n('Extracting', 'train-labels-idx1-ubyte.gz')\n('Extracting', 't10k-images-idx3-ubyte.gz')\n('Extracting', 't10k-labels-idx1-ubyte.gz')\ncreating: createSequential\ncreating: createReshape\ncreating: createSpatialConvolution\ncreating: createTanh\ncreating: createSpatialMaxPooling\ncreating: createTanh\ncreating: createSpatialConvolution\ncreating: createSpatialMaxPooling\ncreating: createReshape\ncreating: createLinear\ncreating: createTanh\ncreating: createLinear\ncreating: createLogSoftMax\ncreating: createClassNLLCriterion\ncreating: createDefault\ncreating: createSGD\ncreating: createMaxEpoch\ncreating: createOptimizer\ncreating: createEveryEpoch\ncreating: createTop1Accuracy\ntraining finished\n", | |
"output_type": "stream", | |
"name": "stdout" | |
} | |
], | |
"metadata": {}, | |
"source": "\nfrom optparse import OptionParser\nfrom bigdl.dataset import mnist\nfrom bigdl.dataset.transformer import *\nfrom bigdl.nn.layer import *\nfrom bigdl.nn.criterion import *\nfrom bigdl.optim.optimizer import *\nfrom bigdl.util.common import *\n\n\ndef build_model(class_num):\n model = Sequential()\n model.add(Reshape([1, 28, 28]))\n model.add(SpatialConvolution(1, 6, 5, 5))\n model.add(Tanh())\n model.add(SpatialMaxPooling(2, 2, 2, 2))\n model.add(Tanh())\n model.add(SpatialConvolution(6, 12, 5, 5))\n model.add(SpatialMaxPooling(2, 2, 2, 2))\n model.add(Reshape([12 * 4 * 4]))\n model.add(Linear(12 * 4 * 4, 100))\n model.add(Tanh())\n model.add(Linear(100, class_num))\n model.add(LogSoftMax())\n return model\n\n\ndef get_mnist(sc, data_type=\"train\", location=\"/tmp/mnist\"):\n \"\"\"\n Get and normalize the mnist data. We would download it automatically\n if the data doesn't present at the specific location.\n :param sc: SparkContext\n :param data_type: training data or testing data\n :param location: Location storing the mnist\n :return: A RDD of (features: Ndarray, label: Ndarray)\n \"\"\"\n (images, labels) = mnist.read_data_sets(location, data_type)\n images = sc.parallelize(images)\n labels = sc.parallelize(labels + 1) # Target start from 1 in BigDL\n record = images.zip(labels)\n return record\n\ndef get_end_trigger():\n return MaxEpoch(10)\n\ntrain_data = get_mnist(sc, \"train\", \"\")\\\n .map(lambda rec_tuple: (normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD),\n rec_tuple[1]))\\\n .map(lambda t: Sample.from_ndarray(t[0], t[1]))\ntest_data = get_mnist(sc, \"test\", \"\")\\\n .map(lambda rec_tuple: (normalizer(rec_tuple[0], mnist.TEST_MEAN, mnist.TEST_STD),\n rec_tuple[1]))\\\n .map(lambda t: Sample.from_ndarray(t[0], t[1]))\noptimizer = Optimizer(\n model=build_model(10),\n training_rdd=train_data,\n criterion=ClassNLLCriterion(),\n optim_method=SGD(learningrate=0.01, learningrate_decay=0.0002),\n end_trigger=get_end_trigger(),\n batch_size=128)\noptimizer.set_validation(\n batch_size=128,\n val_rdd=test_data,\n trigger=EveryEpoch(),\n val_method=[Top1Accuracy()]\n)\ntrained_model = optimizer.optimize()\nparameters = trained_model.parameters()\nprint(\"training finished\")", | |
"execution_count": 13 | |
}, | |
{ | |
"cell_type": "code", | |
"outputs": [ | |
{ | |
"text": "creating: createTop1Accuracy\nEvaluated result: 0.946300029755, total_num: 10000, method: Top1Accuracy\n", | |
"output_type": "stream", | |
"name": "stdout" | |
} | |
], | |
"metadata": {}, | |
"source": "# test_data = get_mnist(sc, \"test\").map(\n# normalizer(mnist.TEST_MEAN, mnist.TEST_STD))\nresults = trained_model.evaluate(test_data, 128, [Top1Accuracy()])\nfor result in results:\n print(result)", | |
"execution_count": 14 | |
}, | |
{ | |
"cell_type": "code", | |
"outputs": [], | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": "", | |
"execution_count": null | |
} | |
], | |
"nbformat": 4, | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2 with Spark 2.1", | |
"name": "python2-spark21", | |
"language": "python" | |
}, | |
"language_info": { | |
"mimetype": "text/x-python", | |
"version": "2.7.11", | |
"file_extension": ".py", | |
"name": "python", | |
"pygments_lexer": "ipython2", | |
"nbconvert_exporter": "python", | |
"codemirror_mode": { | |
"version": 2, | |
"name": "ipython" | |
} | |
} | |
}, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment