Created
January 19, 2018 20:08
-
-
Save hhbyyh/eb99c55cdda5294e0fad588b2ffbfafb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"kernelspec": { | |
"language": "python", | |
"display_name": "Python 2 with Spark 2.1", | |
"name": "python2-spark21" | |
}, | |
"language_info": { | |
"version": "2.7.11", | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"nbconvert_exporter": "python", | |
"name": "python", | |
"pygments_lexer": "ipython2", | |
"codemirror_mode": { | |
"version": 2, | |
"name": "ipython" | |
} | |
} | |
}, | |
"cells": [ | |
{ | |
"execution_count": 1, | |
"cell_type": "code", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": "--2018-01-19 13:23:16-- https://repo1.maven.org/maven2/com/intel/analytics/bigdl/bigdl-SPARK_2.1/0.3.0/bigdl-SPARK_2.1-0.3.0-jar-with-dependencies.jar\nResolving repo1.maven.org (repo1.maven.org)... 151.101.48.209\nConnecting to repo1.maven.org (repo1.maven.org)|151.101.48.209|:443... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 77227587 (74M) [application/java-archive]\nSaving to: \u2018bigdl-SPARK_2.1-0.3.0-jar-with-dependencies.jar.4\u2019\n\n100%[======================================>] 77,227,587 66.3MB/s in 1.1s \n\n2018-01-19 13:23:18 (66.3 MB/s) - \u2018bigdl-SPARK_2.1-0.3.0-jar-with-dependencies.jar.4\u2019 saved [77227587/77227587]\n\n" | |
} | |
], | |
"source": "!(export sv=2.1 bv=0.3.0 ; cd ~/data/libs/ && wget https://repo1.maven.org/maven2/com/intel/analytics/bigdl/bigdl-SPARK_${sv}/${bv}/bigdl-SPARK_${sv}-${bv}-jar-with-dependencies.jar)" | |
}, | |
{ | |
"execution_count": 2, | |
"cell_type": "code", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": "Requirement already satisfied: bigdl==0.3.0 in /gpfs/global_fs01/sym_shared/YPProdSpark/user/s95e-7e9e9b7c0b9bb4-b210673fb348/.local/lib/python2.7/site-packages\nRequirement already satisfied: pyspark>=2.2 in /gpfs/global_fs01/sym_shared/YPProdSpark/user/s95e-7e9e9b7c0b9bb4-b210673fb348/.local/lib/python2.7/site-packages (from bigdl==0.3.0)\nRequirement already satisfied: numpy>=1.7 in /usr/local/src/bluemix_jupyter_bundle.v77/notebook/lib/python2.7/site-packages (from bigdl==0.3.0)\nRequirement already satisfied: py4j==0.10.4 in /gpfs/global_fs01/sym_shared/YPProdSpark/user/s95e-7e9e9b7c0b9bb4-b210673fb348/.local/lib/python2.7/site-packages (from pyspark>=2.2->bigdl==0.3.0)\n" | |
} | |
], | |
"source": "!pip install bigdl==0.3.0 | cat" | |
}, | |
{ | |
"execution_count": 1, | |
"cell_type": "code", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": "Prepending /gpfs/fs01/user/s95e-7e9e9b7c0b9bb4-b210673fb348/.local/lib/python2.7/site-packages/bigdl/share/conf/spark-bigdl.conf to sys.path\n" | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": "/gpfs/fs01/user/s95e-7e9e9b7c0b9bb4-b210673fb348/.local/lib/python2.7/site-packages/bigdl/util/engine.py:39: UserWarning: Find both SPARK_HOME and pyspark. You may need to check whether they match with each other. SPARK_HOME environment variable is set to: /usr/local/src/spark21master/spark-2.1.2-bin-2.7.3, and pyspark is found in: /gpfs/fs01/user/s95e-7e9e9b7c0b9bb4-b210673fb348/.local/lib/python2.7/site-packages/pyspark/__init__.pyc. If they are unmatched, please use one source only to avoid conflict. For example, you can unset SPARK_HOME and use pyspark only.\n warnings.warn(warning_msg)\n" | |
} | |
], | |
"source": "from bigdl.nn.layer import *\nfrom bigdl.nn.criterion import *\nfrom bigdl.util.common import *\nfrom pyspark import SparkContext\nimport numpy as np" | |
}, | |
{ | |
"execution_count": 2, | |
"cell_type": "code", | |
"metadata": {}, | |
"outputs": [], | |
"source": "sc.stop()\nconfCore=create_spark_conf()\nconfCore.set(\"spark.executor.cores\", 1)\nconfCore.set(\"spark.cores.max\", 1)\nsc = SparkContext(appName=\"Mnist\", conf=confCore)\ninit_engine()" | |
}, | |
{ | |
"execution_count": 3, | |
"cell_type": "code", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": "creating: createLinear\n{u'Linearaa5ab3b4': {u'gradWeight': array([[ 0., 0.]], dtype=float32), u'bias': array([ 0.53909093], dtype=float32), u'weight': array([[ 0.22592682, -0.69651681]], dtype=float32), u'gradBias': array([ 0.], dtype=float32)}}\n" | |
} | |
], | |
"source": "linear = Linear(2, 1)\nprint (linear.parameters())" | |
}, | |
{ | |
"execution_count": 4, | |
"cell_type": "code", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": "('Extracting', 'train-images-idx3-ubyte.gz')\n('Extracting', 'train-labels-idx1-ubyte.gz')\n('Extracting', 't10k-images-idx3-ubyte.gz')\n('Extracting', 't10k-labels-idx1-ubyte.gz')\ncreating: createSequential\ncreating: createReshape\ncreating: createSpatialConvolution\ncreating: createTanh\ncreating: createSpatialMaxPooling\ncreating: createTanh\ncreating: createSpatialConvolution\ncreating: createSpatialMaxPooling\ncreating: createReshape\ncreating: createLinear\ncreating: createTanh\ncreating: createLinear\ncreating: createLogSoftMax\ncreating: createClassNLLCriterion\ncreating: createDefault\ncreating: createSGD\ncreating: createMaxEpoch\ncreating: createOptimizer\ncreating: createEveryEpoch\ncreating: createTop1Accuracy\ntraining finished\n" | |
} | |
], | |
"source": "from optparse import OptionParser\nfrom bigdl.dataset import mnist\nfrom bigdl.dataset.transformer import *\nfrom bigdl.nn.layer import *\nfrom bigdl.nn.criterion import *\nfrom bigdl.optim.optimizer import *\nfrom bigdl.util.common import *\n\n\ndef build_model(class_num):\n model = Sequential()\n model.add(Reshape([1, 28, 28]))\n model.add(SpatialConvolution(1, 6, 5, 5))\n model.add(Tanh())\n model.add(SpatialMaxPooling(2, 2, 2, 2))\n model.add(Tanh())\n model.add(SpatialConvolution(6, 12, 5, 5))\n model.add(SpatialMaxPooling(2, 2, 2, 2))\n model.add(Reshape([12 * 4 * 4]))\n model.add(Linear(12 * 4 * 4, 100))\n model.add(Tanh())\n model.add(Linear(100, class_num))\n model.add(LogSoftMax())\n return model\n\n\ndef get_mnist(sc, data_type=\"train\", location=\"/tmp/mnist\"):\n \"\"\"\n Get and normalize the mnist data. We would download it automatically\n if the data doesn't present at the specific location.\n :param sc: SparkContext\n :param data_type: training data or testing data\n :param location: Location storing the mnist\n :return: A RDD of (features: Ndarray, label: Ndarray)\n \"\"\"\n (images, labels) = mnist.read_data_sets(location, data_type)\n images = sc.parallelize(images)\n labels = sc.parallelize(labels + 1) # Target start from 1 in BigDL\n record = images.zip(labels)\n return record\n\ndef get_end_trigger():\n return MaxEpoch(10)\n\ntrain_data = get_mnist(sc, \"train\", \"\")\\\n .map(lambda rec_tuple: (normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD),\n rec_tuple[1]))\\\n .map(lambda t: Sample.from_ndarray(t[0], t[1]))\ntest_data = get_mnist(sc, \"test\", \"\")\\\n .map(lambda rec_tuple: (normalizer(rec_tuple[0], mnist.TEST_MEAN, mnist.TEST_STD),\n rec_tuple[1]))\\\n .map(lambda t: Sample.from_ndarray(t[0], t[1]))\noptimizer = Optimizer(\n model=build_model(10),\n training_rdd=train_data,\n criterion=ClassNLLCriterion(),\n optim_method=SGD(learningrate=0.01, learningrate_decay=0.0002),\n end_trigger=get_end_trigger(),\n batch_size=128)\noptimizer.set_validation(\n batch_size=128,\n val_rdd=test_data,\n trigger=EveryEpoch(),\n val_method=[Top1Accuracy()]\n)\ntrained_model = optimizer.optimize()\nparameters = trained_model.parameters()\nprint(\"training finished\")" | |
}, | |
{ | |
"execution_count": 6, | |
"cell_type": "code", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": "creating: createTop1Accuracy\nEvaluated result: 0.951300024986, total_num: 10000, method: Top1Accuracy\n" | |
} | |
], | |
"source": "\nresults = trained_model.evaluate(test_data, 128, [Top1Accuracy()])\nfor result in results:\n print(result)" | |
}, | |
{ | |
"execution_count": null, | |
"cell_type": "code", | |
"metadata": {}, | |
"outputs": [], | |
"source": "" | |
} | |
], | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment