// API callback
myFunc({"version":"1.0","encoding":"UTF-8","feed":{"xmlns":"http://www.w3.org/2005/Atom","xmlns$openSearch":"http://a9.com/-/spec/opensearchrss/1.0/","xmlns$blogger":"http://schemas.google.com/blogger/2008","xmlns$georss":"http://www.georss.org/georss","xmlns$gd":"http://schemas.google.com/g/2005","xmlns$thr":"http://purl.org/syndication/thread/1.0","id":{"$t":"tag:blogger.com,1999:blog-309796417999444696"},"updated":{"$t":"2013-06-19T08:23:51.845-07:00"},"category":[{"term":"Clojure"},{"term":"je"},{"term":"Frameworks"},{"term":"Java"},{"term":"Maven"},{"term":"APIs"},{"term":"Leiningan"}],"title":{"type":"text","$t":"jayunit100"},"subtitle":{"type":"html","$t":"statelessly urez"},"link":[{"rel":"http://schemas.google.com/g/2005#feed","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/posts/default"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default?alt\u003djson-in-script"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/"},{"rel":"hub","href":"http://pubsubhubbub.appspot.com/"},{"rel":"next","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default?alt\u003djson-in-script\u0026start-index\u003d26\u0026max-results\u003d25"}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"generator":{"version":"7.00","uri":"http://www.blogger.com","$t":"Blogger"},"openSearch$totalResults":{"$t":"55"},"openSearch$startIndex":{"$t":"1"},"openSearch$itemsPerPage":{"$t":"25"},"entry":[{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-5064913066247027743"},"published":{"$t":"2013-06-18T10:22:00.002-07:00"},"updated":{"$t":"2013-06-18T19:11:44.619-07:00"},"title":{"type":"text","$t":"Find class method dependencies in 8 seconds"},"content":{"type":"html","$t":"Scouring through large amounts of source code using just \"grep\" is soooo 1990.\u0026nbsp; With git, pipes, and scripting languages, you can combine the convenience of grep with simpler and more flexible parsing tools.\u0026nbsp;\u0026nbsp; Here's a couple of ways to find dependencies given a java class name, without having to load all your code into an IDE.\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eScenario\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eYou want to know which classes reference a particular compilation unit in your code.\u003cbr /\u003eBut more and you want to know which methods in those classes are making the references.\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eHow to do it with class info only using grep + perl/python/ettc\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"-webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; border-collapse: separate; color: black; font-family: 'Lucida Grande', arial, sans-serif; font-size: 12px; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: 20px; orphans: 2; text-align: auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px;\"\u003eA way to start is to run:\u0026nbsp;\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"-webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; border-collapse: separate; color: black; font-family: 'Lucida Grande', arial, sans-serif; font-size: 12px; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: 20px; orphans: 2; text-align: auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px;\"\u003egit grep -l MyClassName | grep \".java\" | perl -pe 's!.*/!!'\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003eThis will give all the matching classes.\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eHow to do it with method specific information using \"git -p\"\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eUsing git's code parser, you can run \u003cbr /\u003e\u003cbr /\u003egit grep -p \u003cspan class\u003d\"Apple-style-span\" style\u003d\"-webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; border-collapse: separate; color: black; font-family: 'Lucida Grande', arial, sans-serif; font-size: 12px; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: 20px; orphans: 2; text-align: auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px;\"\u003e MyClassName \u003c/span\u003e| grep java\u003cb\u003e \u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eThis will return a several lines per match: One for the match, and another specifically for the method which CONTAINS the match.\u0026nbsp; You can parse out the methods by scanning one row above the match itself.\u0026nbsp; \u003cbr /\u003e\u003cbr /\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"-webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; border-collapse: separate; color: black; font-family: 'Lucida Grande', arial, sans-serif; font-size: 12px; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: 20px; orphans: 2; text-align: auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px;\"\u003e\u003cb\u003eMoral of the story \u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"-webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; border-collapse: separate; color: black; font-family: 'Lucida Grande', arial, sans-serif; font-size: 12px; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: 20px; orphans: 2; text-align: auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px;\"\u003eYou don't have to use sed for everything.\u0026nbsp; Instead, consider\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"-webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; border-collapse: separate; color: black; font-family: 'Lucida Grande', arial, sans-serif; font-size: 12px; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: 20px; orphans: 2; text-align: auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px;\"\u003e1) Piping into python and perl which can do a wonderful job reading from standard in and stream editing a file.\u0026nbsp;\u0026nbsp;\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"-webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; border-collapse: separate; color: black; font-family: 'Lucida Grande', arial, sans-serif; font-size: 12px; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: 20px; orphans: 2; text-align: auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px;\"\u003e2) Better yet, git comes stock with programmer specific grep utilities that understand the semantics of common programming languages.\u003c/span\u003e\u003cbr /\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"-webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; border-collapse: separate; color: black; font-family: 'Lucida Grande', arial, sans-serif; font-size: 12px; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: 20px; orphans: 2; text-align: auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"font-size: x-large;\"\u003eThat is all :) \u003cspan class\u003d\"Apple-style-span\" style\u003d\"-webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; border-collapse: separate; color: black; font-family: 'Lucida Grande', arial, sans-serif; font-size: 12px; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: 20px; orphans: 2; text-align: auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003c/span\u003e\u003cbr /\u003e\u003cb\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"-webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; border-collapse: separate; color: black; font-family: 'Lucida Grande', arial, sans-serif; font-size: 12px; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: 20px; orphans: 2; text-align: auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003c/b\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/5064913066247027743/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2013/06/find-class-dependencies-in-8-seconds.html#comment-form","title":"0 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/5064913066247027743"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/5064913066247027743"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2013/06/find-class-dependencies-in-8-seconds.html","title":"Find class method dependencies in 8 seconds"}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"thr$total":{"$t":"0"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-4121646252483479097"},"published":{"$t":"2013-05-22T10:15:00.001-07:00"},"updated":{"$t":"2013-05-30T06:04:04.193-07:00"},"title":{"type":"text","$t":"Debugging HBASE cluster setup "},"content":{"type":"html","$t":"Setting up HBase can be tricky because of the intermediate states of processes which may be running.\u0026nbsp; Here are some important configs I found + an idempotent install script, reproducible hbase deployment which cleans your system and restarts hbase from scratch, so that you can easily update configs until everything works correctly.\u0026nbsp; I also used some log grepping scripts alongside this to quickly and automatically report errors in the setup after running the script. \u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e1) The most important thing to get right is /etc/hosts.\u0026nbsp; \"Can't connect to master\" exceptions might ensue if its not right.... /etc/hosts It should look something like this:\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e#Note that the loopback NEEDS TO BE 127.0.0.1 (ubuntu deviates from this, so you have to fix it).\u003cbr /\u003e127.0.0.1\u0026nbsp;\u0026nbsp; localhost\u0026nbsp; localhost.localdomain localhost4 localhost4.localdomain4\u003cbr /\u003e#::1\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; localhost localhost.localdomain localhost6 localhost6.localdomain6\u003cbr /\u003e\u003cdiv style\u003d\"background-color: yellow;\"\u003e192.168.122.200 hbase-master\u003c/div\u003e\u003cdiv style\u003d\"background-color: yellow;\"\u003e192.168.122.201 hbase-regionserver1\u003c/div\u003e\u003cdiv style\u003d\"background-color: yellow;\"\u003e192.168.122.202 hbase-regionserver2\u003c/div\u003e\u003cdiv style\u003d\"background-color: yellow;\"\u003e192.168.122.203 hbase-regionserver3\u003c/div\u003e\u003cbr /\u003e(note - hbase-master isn't identified as localhost)... this is important.\u0026nbsp; Also note that the related \"PleaseHoldException\" is related to a failed master - but can be caused by more than just bad hosts.\u0026nbsp; It can be caused , for example, if the Hmaster fails to start due to internal or file system errors.\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e2) Make sure zookeeper is running properly, otherwise nothing will work.\u0026nbsp; Each region server AND the master should be defined in a comma delimited string in the \u0026lt;value\u0026gt; tag in the \u003c/b\u003ehbase-site.xml\u003cb\u003e file\u003c/b\u003e:\u003cbr /\u003e\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; \u0026lt;property\u0026gt;\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; \u0026lt;name\u0026gt;hbase.zookeeper.quorum\u0026lt;/name\u0026gt;\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; \u0026lt;value\u0026gt;\u003cspan style\u003d\"background-color: yellow;\"\u003ehbase-master,hbase-regionserver1,hbase-regionserver2,hbase-regionserver3\u003c/span\u003e\u0026lt;/value\u0026gt;\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; \u0026lt;/property\u0026gt;\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e3) If queries fail because no active master is found, you need to (duh) make sure your zookeeper \"parent\" node is running (in my case, its the same as hbase master).\u0026nbsp; For example, given this configuration -- \u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; \u0026lt;property\u0026gt;\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; \u0026lt;name\u0026gt;zookeeper.znode.parent\u0026lt;/name\u0026gt;\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; \u0026lt;value\u0026gt;/\u003cspan style\u003d\"background-color: yellow;\"\u003ehbase-master\u003c/span\u003e\u0026lt;/value\u0026gt;\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; \u0026lt;/property\u0026gt;\u003cbr /\u003e\u003cbr /\u003eYou should see the following on the \"hbase-master\" machine.\u003cbr /\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e[root@hbase-master]\u0026gt; jps\u003c/blockquote\u003e\u003cblockquote\u003e12388 HQuorumPeer\u003cbr /\u003e\u003cspan style\u003d\"background-color: yellow;\"\u003e14136 HMaster\u003c/span\u003e\u003cbr style\u003d\"background-color: yellow;\" /\u003e2952 JobTracker\u003c/blockquote\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cb\u003e4) Avoid time sync exceptions which will prevent cluster startup --- Install NTP and start it on all nodes - clocks on the nodes need to be synchronized.\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003ehttp://www.cyberciti.biz/faq/howto-install-ntp-to-synchronize-server-clock/\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e5) This script will totally clean out your file system and Hbase daemons... Don't use it in production !\u0026nbsp; Just use it for installation...\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e#Setting up and debugging hbase can be a little tricky - best to automate it\u003cbr /\u003e#with log cleaning in a script like this (run it from your head node on your\u003cbr /\u003e#newly installed test cluster).\u0026nbsp; At the end of this script, a test table is created\u003cbr /\u003e#in debug mode - so you can see any ensuing errors...\u0026nbsp; This script should be\u003cbr /\u003e#idempotent (modify highlighted parts for your cluster).\u003cbr /\u003e\u003cbr /\u003enodes\u003d\u003cspan style\u003d\"background-color: yellow;\"\u003e(hbase-master hbase-regionserver1 hbase-regionserver2 hbase-regionserver3) \u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003eecho $nodes\u003cbr /\u003e\u003cspan style\u003d\"background-color: yellow;\"\u003ehbaseinstall/hbase-0.94.7/bin/\u003c/span\u003ehbase-daemon.sh stop master\u003cbr /\u003e\u003cbr /\u003eecho \"WARNING !!!! CLEARING OUT ALL OF YOUR HBASE DATA HIT A KEY TO CONTINUE !!!\"\u003cbr /\u003eread\u003cbr /\u003e\u003cbr /\u003eecho \"CLEARING hbase/ IN 5 SECONDS!\"\u003cbr /\u003esleep 5\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: yellow;\"\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: yellow;\"\u003ehadoop fs -rmr hbase/*\u003c/span\u003e\u0026nbsp; #if using other file systems (S3, gluster, etc..), you might modify this line.\u003cbr /\u003e\u003cbr /\u003efor i in \"${nodes[@]}\"\u003cbr /\u003edo\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp; echo \"Cleaning $i\"\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp; #Get rid of logs, so that after restart/reconfiguring you can easily\u0026nbsp; debug the changes.\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp; ssh root@$i rm -rf /tmp/hbase-root/*\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp; ssh root@$i rm -rf\u003cspan style\u003d\"background-color: yellow;\"\u003e hbaseinstall/hbase-0.94.7/\u003c/span\u003elogs/*\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp; #Reliably kill Zookeeper/RegionServers .\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp; ssh root@$i killall -9 java\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp; echo \"Done...\"\u003cbr /\u003edone\u003cbr /\u003e\u003cbr /\u003e#############################\u003cbr /\u003esleep 2\u003cbr /\u003e#############################\u003cbr /\u003e\u003cbr /\u003eecho \"restarting hbase\" \u003cbr /\u003ehbaseinstall/hbase-0.94.7/bin/start-hbase.sh\u003cb\u003e\u003c/b\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cb\u003e##############################\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003esleep 2\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003ccode\u003e#######################################################\u003c/code\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003ccode\u003e#Now , invoke a shell in debug mode and create a table.\u0026nbsp; \u003c/code\u003e\u003cbr /\u003e\u003ccode\u003e \u003c/code\u003e\u003ccode\u003e\u003cspan class\u003d\"lit\"\u003ehbaseinstall/hbase-0.94.7/bin/hbase shell -d \u0026lt;\u0026lt;EOF\u003cbr /\u003ecreate 't1','f1' \u003cbr /\u003eEOF\u003c/span\u003e\u003c/code\u003e\u003cbr /\u003e\u003ccode\u003e\u003cspan class\u003d\"lit\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003c/code\u003e \u003cbr /\u003e\u003ccode\u003e\u003cspan class\u003d\"lit\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003c/code\u003e "},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/4121646252483479097/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2013/05/debugging-hbase-installation.html#comment-form","title":"0 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/4121646252483479097"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/4121646252483479097"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2013/05/debugging-hbase-installation.html","title":"Debugging HBASE cluster setup "}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"thr$total":{"$t":"0"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-1885493081574516028"},"published":{"$t":"2013-05-15T17:56:00.003-07:00"},"updated":{"$t":"2013-05-29T13:09:31.667-07:00"},"title":{"type":"text","$t":"KVM Clusters on the Fly : virt-install + Kickstart with static IPs "},"content":{"type":"html","$t":"\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003ctable align\u003d\"center\" cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd style\u003d\"text-align: center;\"\u003e\u003ca href\u003d\"http://2.bp.blogspot.com/_FDipjI8SVMg/TJeiq7V1R0I/AAAAAAAAANg/vTmchqWtiCg/s1600/penguins.jpg\" imageanchor\u003d\"1\" style\u003d\"margin-left: auto; margin-right: auto;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"209\" src\u003d\"http://2.bp.blogspot.com/_FDipjI8SVMg/TJeiq7V1R0I/AAAAAAAAANg/vTmchqWtiCg/s320/penguins.jpg\" width\u003d\"320\" /\u003e\u003c/a\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003eAutomated cluster creation in VMs by scriptifying your KVM setups.\u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e\u003cbr /\u003eAutomatically provisioning VM clusters comes up alot:\u003cbr /\u003e\u003cul\u003e\u003cli\u003eWhen you want to simulate / practice installing a clustered app or utility.  \u003c/li\u003e\u003cli\u003eWhen you want to confirm that workload distribution is occuring properly.\u003c/li\u003e\u003cli\u003eTo confirm that RPC and other communication intensive apps are configured correctly.\u0026nbsp; \u003c/li\u003e\u003c/ul\u003eA few weeks ago we went through \u003ca href\u003d\"http://jayunit100.blogspot.com/2013/03/a-completely-rebuildable-fedora16.html\"\u003esetting up single node, rebuildable gluster VM directly from source using KVM.\u003c/a\u003e \u003cbr /\u003e\u003cbr /\u003eBut -- the guilt of having to click the Virtual Machine Manager UI just to browse for a path to an ISO was unbearable... So... I finally forced myself how to figure out how to automatically deploy VMs using virt-install. \u003cbr /\u003e\u0026nbsp; \u003cbr /\u003e\u003cb\u003eEnter KVM\u0026nbsp;+ virt-install + Kickstart\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eAfter trying it out - I can honestly say that KVM's virt-install with kickstart on Fedora/RHEL is an excellent alternative to Vagrant (albeit KVM specific). See the vagrant gripes regarding overabstractions here \u003ca href\u003d\"https://news.ycombinator.com/item?id\u003d4406467\"\u003ehttps://news.ycombinator.com/item?id\u003d4406467\u003c/a\u003e, many of which are solved by kickstart's much simpler paradigm.\u0026nbsp;\u003cbr /\u003e\u003cbr /\u003e\u003cblockquote class\u003d\"tr_bq\"\u003eHowever, the use of kickstart with KVM requires a little extra research because of its lower level and more powerful feature subsets.\u0026nbsp; In any case, I'm no virtualization expert and I got it working. Like most things, once you get a solid template to work from, its easy to incrementally learn and customize.\u003c/blockquote\u003e\u003cbr /\u003e\u003cb\u003eSo here's how to \"scriptify\" Fedora16 VM creation ~ using the virt-install utility (which is like a programmatic version of the Virtual Machine Manager).\u003c/b\u003e \u003cbr /\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003evirt-install --name VM-MyNewVM-1 --hvm --ram 1024 --disk path\u003d/VirtualMachines/VM-MyNewVM-1.img,size\u003d10,size\u003d10 --network network:default --vnc --os-type\u003dlinux --os-variant\u003dfedora16 --location \u003cspan style\u003d\"background-color: yellow;\"\u003ehttp://redhat.download.fedoraproject.org/pub/fedora/linux/releases/16/Fedora/x86_64/os/\u003c/span\u003e -x \"\u003cspan style\u003d\"background-color: lime;\"\u003eks\u003dhttp://pastebin.com/raw.php?i\u003dUUX1qcpa\u003c/span\u003e\"\u003c/span\u003e\u003c/blockquote\u003e\u003cbr /\u003e^^ Yup - thats it.\u0026nbsp; That ONE LINE of shell script created a 1GB VM for you with (if you include the \"pastebin\" script whose contents are below) disk partitions and a static ip.\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: small;\"\u003e(note, I've since updated this with some snippets from this excellent minimal fedora kickstarter template \u003c/span\u003e\u003ca href\u003d\"https://gist.github.com/bburky/2913219\"\u003ehttps://gist.github.com/bburky/2913219\u003c/a\u003e to make the install leaner)\u003cbr /\u003e\u003cspan style\u003d\"font-size: small;\"\u003e \u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cpre\u003e# CONTENTS OF THE PASTEBIN SCRIPT ABOVE\u003c/pre\u003e\u003cpre\u003e# Kickstart file automatically generated by anaconda.\u003cbr /\u003e#version\u003dDEVEL\u003cbr /\u003einstall\u003cbr /\u003ecdrom\u003cbr /\u003elang en_US.UTF-8\u003cbr /\u003ekeyboard us\u003cbr /\u003enetwork --onboot yes --device eth0 --bootproto dhcp --noipv6\u003cbr /\u003etimezone --utc America/New_York\u003cbr /\u003erootpw  --iscrypted $6$9bRPXTZZMy0FNl2A$lgY.MS3pZ.0PVg4o3AQeJOydPwGVphdKT07tHlJUmdoRTz4UQQ/L54ny0QHkdubMquqkr4jw37DxmM0FL5kRn1\u003cbr /\u003eselinux --enforcing\u003cbr /\u003eauthconfig --enableshadow --passalgo\u003dsha512\u003cbr /\u003efirewall --service\u003dssh\u003cbr /\u003e# The following is the partition information you requested\u003cbr /\u003e# Note that any partitions you deleted are not expressed\u003cbr /\u003e# here so unless you clear all partitions first, this is\u003cbr /\u003e# not guaranteed to work\u003cbr /\u003e# Uncommented by j\u003cbr /\u003ezerombr\u003cbr /\u003eclearpart --all\u003cbr /\u003eautopart\u003c/pre\u003e\u003cpre\u003e\u0026nbsp;\u003c/pre\u003e\u003cpre\u003e#Static IP address with gateway to the outside world - 192.168.122.1 is the default KVM gateway.\u003c/pre\u003e\u003cpre\u003enetwork --bootproto\u003dstatic --ip\u003d192.168.122.99 --netmask\u003d255.255.255.0 --gateway\u003d192.168.122.1 --nameserver\u003d192.168.122.1\u003cbr /\u003ebootloader --location\u003dmbr --timeout\u003d5 --append\u003d\"rhgb quiet\"\u003cbr /\u003e\u003cbr /\u003e%packages\u003cbr /\u003e@core\u003cbr /\u003e@online-docs\u003cbr /\u003e%end\u003c/pre\u003e\u003cbr /\u003e\u003cb\u003eDayumnnn... How does this work?\u003c/b\u003e \u003cbr /\u003e\u003cbr /\u003eHere's whats embedded in the action packed \u003ci\u003evirt-install \u003c/i\u003ecommand above:\u003cbr /\u003e\u003cul\u003e\u003cli\u003e\u003cb\u003e--location: \u003c/b\u003eDirect access link  to the to-be guest's OS source code - not an ISO.\u0026nbsp; Get the correct root link to the\u0026nbsp; Fedora source tree.\u0026nbsp; This took more than a couple of google searches (evidently, the tree root is not Fedora/, but rather, Fedora/\u0026lt;arch\u0026gt;/os/.\u003c/li\u003e\u003cli\u003e\u003cb\u003e\"ks\u003d\"\u003c/b\u003e An http accessible(also this can be put on an NFS mount, but I find http / pastebin to be easier) kickstart file by stealing one from an existing fedora OS and putting it in a universally accessible location (either NFS or aURL on the web).\u0026nbsp;\u0026nbsp; See \u003ca href\u003d\"http://www.centos.org/docs/4/html/rhel-sag-en-4/s1-kickstart2-startinginstall.html\"\u003ewww.centos.org/docs/4/html/rhel-sag-en-4/s1-kickstart2-startinginstall.html\u003c/a\u003e for details on different ways to specify kickstarter scripts.\u003c/li\u003e\u003cli\u003e\u003cb\u003e(Inside of the pastebin url, whose contents are below)\u003c/b\u003e: Update the partitioning in your kickstarter template file so that partitioning happens automatically - that is - so that you don't have to do any interactive disk partitioning.\u0026nbsp; For more in depth partitioning and a general understanding of the very powerful partitioning API in kickstart: See \u003ca href\u003d\"http://www.dark.ca/2009/08/03/complex-partitioning-in-kickstart\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003ehttp://www.dark.ca/2009/08/03/complex-partitioning-in-kickstart.\u003c/span\u003e\u003c/a\u003e\u003c/li\u003e\u003c/ul\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: small;\"\u003eSo, in summary - to start with, the only parameters you need to modify above are - \u003c/span\u003e\u003cspan style\u003d\"font-size: small;\"\u003eYou can change:\u003c/span\u003e\u003cbr /\u003e\u003col\u003e\u003col\u003e\u003cli\u003e\u003cspan style\u003d\"font-size: small;\"\u003eThe fedora url, which points to the install tree for the particular OS your using (make sure and get the path right), and\u0026nbsp;\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"font-size: small;\"\u003eThe \"pastebin\" address above is just silly way to put up a kickstart script.\u0026nbsp; \u003c/span\u003e\u003c/li\u003e\u003c/ol\u003e\u003c/ol\u003e\u003cb\u003eKickstarting your kickstart scripts...\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: small;\"\u003eSo -\u0026gt; how do you build a create a kickstart script from scratch ? You don't have to :) ...\u0026nbsp;\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003e\u0026nbsp;Fedora/RHEL create a kickstart script for you when you do a manual install, just setup your first VM manually and use its autogenerated kickstart\u003c/b\u003e (which is created based on how you setup the OS, and written to /root/anaconda.cfg).\u003c/span\u003e\u003c/blockquote\u003e\u003cspan style\u003d\"font-size: small;\"\u003eOnce I ran this file, I found it wasn't \u003ci\u003ecompletely\u003c/i\u003e automated, i.e. the disk partitions were being requested by Fedora interactively.\u0026nbsp; To squelch this, I added the zerombr and the clearpart commands into the kickstart, In the end, my bare bones kickstart template looked a little like this...\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003eNext?\u0026nbsp;\u003c/b\u003e \u003c/span\u003e\u003cbr /\u003e\u003cpre\u003e \u003c/pre\u003eKVM supports post scriplets and \"ks_append\" tags.\u0026nbsp; These essentially will run shell commands for you after the box is setup.\u0026nbsp; For example, you can append this to the end of your file.\u0026nbsp; \u003cbr /\u003e\u003cpre\u003e \u003c/pre\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cdiv style\u003d\"font-family: \u0026quot;Courier New\u0026quot;,Courier,monospace;\"\u003e\u0026lt;ks_appends\u0026gt;\u003c/div\u003e\u003cdiv style\u003d\"font-family: \u0026quot;Courier New\u0026quot;,Courier,monospace;\"\u003e\u0026lt;ks_append\u0026gt;\u0026lt;![CDATA[\u003c/div\u003e\u003cdiv style\u003d\"font-family: \u0026quot;Courier New\u0026quot;,Courier,monospace;\"\u003e%post\u003c/div\u003e\u003cdiv style\u003d\"font-family: \u0026quot;Courier New\u0026quot;,Courier,monospace;\"\u003etouch /tmp/file_i_created_after_kickstarting.txt \u003c/div\u003e\u003cdiv style\u003d\"font-family: \u0026quot;Courier New\u0026quot;,Courier,monospace;\"\u003e%end\u0026nbsp;\u003c/div\u003e\u003cdiv style\u003d\"font-family: \u0026quot;Courier New\u0026quot;,Courier,monospace;\"\u003e]]\u0026gt;\u0026lt;/ks_append\u0026gt;\u003c/div\u003e\u003cdiv style\u003d\"font-family: \u0026quot;Courier New\u0026quot;,Courier,monospace;\"\u003e\u0026lt;/ks_appends\u0026gt;\u003c/div\u003e\u003c/blockquote\u003e\u003cpre\u003e\u003cbr /\u003e\u003c/pre\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003eTesting that it worked:\u0026nbsp;\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: small;\"\u003eMake sure you got your static IPS and disks right --\u003cb\u003e\u0026gt;\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003c/span\u003e\u003cspan style\u003d\"font-size: small;\"\u003e1) \u003ci\u003eifconfig | grep 192\u003c/i\u003e #Confirm that the static ips were assigned properly from the kickstart script.\u0026nbsp;\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"font-size: small;\"\u003e2) \u003ci\u003edf -h /root\u003c/i\u003e #Confirm that /root has several gigs of space in it. For fancier provisioning, replace the autopart (which I found was necessary, along with clearpart --all, to avoid the fedora interactive \"Storage Device Warning\" dialog.\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003eParting Words ...\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: small;\"\u003eVirt-install can be used with a kickstart script to automate VM creation with static ips, disk partitions, memory, and a whole host of other goodies, right off the bat - and that means that virt-install can set up an entire cluster for you.\u0026nbsp;\u0026nbsp;\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: small;\"\u003eAdmittedly, the syntax is not as elegant and its not platform neutral as the similar vagrant tool...\u0026nbsp; But who cares?\u0026nbsp; You can customize it to any degree - both on the hardware side and the user space packages.\u0026nbsp; For example, there are\u0026nbsp; \u003ca href\u003d\"http://linuxgazette.net/issue43/nielsen.kickstart.html\"\u003ekickstarter templates for complex deployments,\u003c/a\u003e \u003ca href\u003d\"http://www.outsidaz.org/blog/2009/06/28/minimal-server-installs-with-kickstart/\"\u003eminimal servers\u003c/a\u003e\u003ci\u003e \u003c/i\u003eand \u003ca href\u003d\"http://kaivanov.blogspot.com/2010/09/kickstart-example.html\"\u003ecombining your kickstart script with shell commands to do installations of packages at the end of OS setup\u003c/a\u003e. \u003c/span\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/1885493081574516028/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2013/05/clusters-on-fly-kvm-libvirt-kickstart.html#comment-form","title":"0 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/1885493081574516028"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/1885493081574516028"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2013/05/clusters-on-fly-kvm-libvirt-kickstart.html","title":"KVM Clusters on the Fly : virt-install + Kickstart with static IPs "}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"media$thumbnail":{"xmlns$media":"http://search.yahoo.com/mrss/","url":"http://2.bp.blogspot.com/_FDipjI8SVMg/TJeiq7V1R0I/AAAAAAAAANg/vTmchqWtiCg/s72-c/penguins.jpg","height":"72","width":"72"},"thr$total":{"$t":"0"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-3268294421274481484"},"published":{"$t":"2013-04-23T12:03:00.003-07:00"},"updated":{"$t":"2013-04-24T15:17:08.560-07:00"},"title":{"type":"text","$t":"The k/v pair salmon run in mapreduce -\u003e hdfs. "},"content":{"type":"html","$t":"\u003ctable align\u003d\"center\" cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd style\u003d\"text-align: center;\"\u003e\u003ca href\u003d\"http://1.bp.blogspot.com/-ORRW5mH5T4w/UXbSbqHjr_I/AAAAAAAACsg/FM-JBTg9JTw/s1600/chartt.png\" imageanchor\u003d\"1\" style\u003d\"margin-left: auto; margin-right: auto;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"640\" src\u003d\"http://1.bp.blogspot.com/-ORRW5mH5T4w/UXbSbqHjr_I/AAAAAAAACsg/FM-JBTg9JTw/s640/chartt.png\" width\u003d\"321\" /\u003e\u003c/a\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003eThe HDFS write path is lonnnng and hairy.\u0026nbsp; Here's some imagery of it (somewhat raw and undervalidated, so please comment if something looks funny).\u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e\u003cbr /\u003eHave you ever seen those little salmons that swim ALL THE WAY up the river, into the ocean, just to breed?\u0026nbsp; Well thats kinda how k/v pairs in MapReduce applications work.\u0026nbsp; They have to go a LONG WAY before the finally get to reside somewhere permanently on local disk.\u0026nbsp; \u003cbr /\u003e\u003cbr /\u003eThe fact that MapReduce abstracts \"key/value\" pairs as an application level nicety makes the write path for a real file very intriguing.\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eFirst off - MapReduce distributes your k/v pairs into partitions:\u0026nbsp;\u003c/b\u003e \u003cbr /\u003e\u003cul\u003e\u003cli\u003eFor a given MapReduce job, you typically have several output files.\u0026nbsp; These are called partitions (part-r-0000, part-r-0001, ...).\u003c/li\u003e\u003cli\u003eEach file in HDFS is broken into BLOCKS.\u0026nbsp;\u0026nbsp;\u003c/li\u003e\u003cli\u003eThe partitions are requested by the MapReduce layer - every time a mapper runs, a \"part-****\" file output stream is created.\u0026nbsp; This is done by the FileOutputFormat classes.\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eThe first \"level\" of buffering that you  control is in the FileOutputFormat - which takes k,v pairs directly.\u0026nbsp;  Although TextOutputFormat\u0026nbsp; doesn't seem to buffer, other output formats  (SequenceFileOutputFormat), actually do.\u0026nbsp;\u0026nbsp; \u003c/span\u003e\u003c/span\u003e\u003c/li\u003e\u003c/ul\u003e\u003cblockquote class\u003d\"tr_bq\"\u003eNote : Partitions are a user-level feature - is the fundamental mechanism for distributing algorithms over a cluster.\u0026nbsp;\u0026nbsp; Since each partition corresponds to a single reducer, you need to be careful that you partition your workloads evenly - otherwise you'll get the \"long-tail\" problem (example: a web crawler with keys as domain roots with default partitioning will be extremely inefficient - because the most common sights will only be crawled in a single reducer). \u003c/blockquote\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003eNext: The partition files are broken into blocks, and written to the DFS: \u003c/b\u003e\u003c/span\u003e\u003c/span\u003e\u003cbr /\u003e\u003cul\u003e\u003cli\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eBuffering of writes occurs inside of the DataStreamer, which creates a \"blockStream\" for writing.\u003c/span\u003e\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eThe main job of the DFSOutputStream class is to translate bytes into packets that can be written and acknowledge reliably.\u0026nbsp;\u003c/span\u003e\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eThe DFSOutputStream uses its inner DataStreamer class to handle the logic of creating OutputStreams which directly write to, and acknowledge progress, of writing contents to a block.\u003c/span\u003e\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eWriting to the DFSOutputStream is fast - no waiting on remote calls\u0026nbsp; synchronously.\u0026nbsp; All acks are aynchronously done (if youre reading this post, though, you probably alredy know that).\u003c/span\u003e\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eThe DataStreamer picks the packets up off the ackQueue, and once a packet is the \"last\" one in a block, the block is closed for writing, and a new one is created. \u003c/span\u003e\u003c/span\u003e\u003c/li\u003e\u003c/ul\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eThere are thus at least two layers of client side buffering that occur in HDFS - one, the buffering of the output stream which is directly writing over a socket to remote blocks, and two, the buffering which occurs as a natural consequence of the fact that \"Packets\" accumulate a certain amount of bytes in memory before they are put on the write queue.\u0026nbsp; \u003c/span\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e \u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003eInspect the k/v salmon-run write path for yourself :\u003c/b\u003e\u003c/span\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003c/span\u003e\u003c/span\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e \u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eThere could be some ambigueties or (gasp) inaccuracies in the diagram above. Please do feel free to validate it and comment.\u0026nbsp; The class names correspond directly to those used in the nodes of this graph.\u0026nbsp; The github urls for the corresponding hadoop projects are :\u0026nbsp;\u003c/span\u003e\u003c/span\u003e\u003cbr /\u003e\u003cul\u003e\u003cli\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003ehttps://github.com/apache/hadoop-common\u003c/span\u003e\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003ehttps://github.com/apache/hadoop-hdfs\u003c/span\u003e\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"font-size: small;\"\u003ehttps://github.com/apache/hadoop-mapred\u0026nbsp; \u003c/span\u003e\u003c/li\u003e\u003c/ul\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eOf course, it would behoov you to scour this code using eclipse, since there are 100s of relevant classes, and you can easily build eclipse projects from the sub projects by running \"mvn eclipse:eclipse\".\u0026nbsp;\u003c/span\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eYou might also want to run the full build.\u0026nbsp; In order to do that you'll have to have protobuffs installed : \u003ca href\u003d\"http://stackoverflow.com/questions/15745010/org-apache-maven-plugin-mojoexecutionexception-protoc-failure\"\u003ehttp://stackoverflow.com/questions/15745010/org-apache-maven-plugin-mojoexecutionexception-protoc-failure\u003c/a\u003e. \u0026nbsp; \u003c/span\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003eGenerating the graph :\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003e\u0026nbsp;\u003c/b\u003e\u003c/span\u003e\u003c/span\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eThis graph can be generated in graphviz using the neato layout, or on erdos http://sandbox.kidstrythisathome.com/erdos/, which can visualize reasonably sized graphviz snippets.\u0026nbsp;\u003cb\u003e \u003c/b\u003e\u003c/span\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003edigraph g{\u003cbr /\u003e\u0026nbsp; node [shape\u003drecord];\u003cbr /\u003e\u0026nbsp; MapOoutputCollector [label\u003d\"\u0026lt;f1\u0026gt; DirectMapOutputCollector|\u0026lt;f2\u0026gt; MapOutputBuffer\"];\u0026nbsp; \u003cbr /\u003e\u0026nbsp; DFSClient -\u0026gt; DFSOutputStream [label\u003d\"writes to\"]; \u003cbr /\u003e\u0026nbsp; DFSOutputStream -\u0026gt; Streamer [label\u003d\"create\"] ; \u003cbr /\u003e\u0026nbsp; DFSOutputStream -\u0026gt; AckQueue [label\u003d\" puts packets\"];\u003cbr /\u003e\u0026nbsp; Streamer -\u0026gt; AckQueue [label\u003d\"take packets\"];\u003cbr /\u003e\u0026nbsp; Streamer -\u0026gt; DataNode [label\u003d\"write packet\"] ;\u003cbr /\u003e\u0026nbsp; Streamer -\u0026gt; Socket [label\u003d\"read ack\"] ; \u003cbr /\u003e\u0026nbsp; DataNode -\u0026gt; Socket [label\u003d\"write ack\"]; \u003cbr /\u003e\u0026nbsp; DistributedFileSystem -\u0026gt; DFSClient [label\u003d\"creates a\"];\u003cbr /\u003e\u0026nbsp; TaskTracker -\u0026gt; MapTask [label\u003d\"creates\"];\u003cbr /\u003e\u0026nbsp; MapTask -\u0026gt; UserMapper [label\u003d\"run(context,rReader,rWriter)\"];\u003cbr /\u003e\u0026nbsp; UserMapper -\u0026gt; MapOoutputCollector [label\u003d\"forwards (k,v) writes to\"];\u003cbr /\u003e\u0026nbsp; MapOoutputCollector -\u0026gt; SequenceFileOutputFormat [label\u003d\"writes (k,v) to\"]; \u003cbr /\u003e\u0026nbsp; SequenceFileOutputFormat -\u0026gt; SequenceFileOutputFormat_Writer [label\u003d\"creates inner\"];\u003cbr /\u003e\u0026nbsp; SequenceFileOutputFormat_Writer -\u0026gt; FSDataOutputStream [label\u003d\"writes byes to\"];\u003cbr /\u003e\u0026nbsp; TextOutputFormat_Writer -\u0026gt; FSDataOutputStream [label\u003d\"writes bytes to\"] ;\u003cbr /\u003e\u0026nbsp; FSDataOutputStream -\u0026gt; DistributedFileSystem [label\u003d\"connects to \"];}\u003c/span\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/3268294421274481484/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2013/04/the-kv-pair-salmon-run-in-mapreduce-hdfs.html#comment-form","title":"1 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/3268294421274481484"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/3268294421274481484"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2013/04/the-kv-pair-salmon-run-in-mapreduce-hdfs.html","title":"The k/v pair salmon run in mapreduce -\u003e hdfs. "}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"media$thumbnail":{"xmlns$media":"http://search.yahoo.com/mrss/","url":"http://1.bp.blogspot.com/-ORRW5mH5T4w/UXbSbqHjr_I/AAAAAAAACsg/FM-JBTg9JTw/s72-c/chartt.png","height":"72","width":"72"},"thr$total":{"$t":"1"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-102126417789997416"},"published":{"$t":"2013-04-17T19:24:00.001-07:00"},"updated":{"$t":"2013-04-17T21:31:21.491-07:00"},"title":{"type":"text","$t":"Setting up Archiva to host your Maven Repo in 15 minutes"},"content":{"type":"html","$t":"\u003ctable align\u003d\"center\" cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd style\u003d\"text-align: center;\"\u003e\u003ca href\u003d\"http://archiva.apache.org/images/archiva.png\" imageanchor\u003d\"1\" style\u003d\"margin-left: auto; margin-right: auto;\"\u003e\u003cimg border\u003d\"0\" src\u003d\"http://archiva.apache.org/images/archiva.png\" /\u003e\u003c/a\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003eArchiva is a fully open alternative to Nexus - a simple web-ui for managing and serving up your maven repos.\u0026nbsp; \u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e\u003cbr /\u003eSetting up a maven repository is as simple as setting up a web accessible directory structure, for example, you can do this using nothing other than a \u003ca href\u003d\"http://jayunit100.blogspot.com/2012/04/deploy-your-own-maven-repos.html\"\u003epublic github repo\u003c/a\u003e .\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eArchiva vs. nexus vs ...\u003c/b\u003e\u0026nbsp; \u003cbr /\u003e\u003cbr /\u003eI'm not going to go into the details here, as there are plenty of lmgtfy resources on this.\u0026nbsp; Nexus is extremely sophisticated, and has a very professional look and feel with great scalability.\u0026nbsp; Archiva is the up-and-coming apache answer to nexus, and is slightly more \"open\".\u0026nbsp; But either maven repository server has essentially the same core features for moderate to small size java projects.\u003cbr /\u003e\u003cbr /\u003eAnyways... here goes - setting up archiva (in this case I've done this on EC2 in a machine with the correctly opened ports).\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eDirections for setting up Archiva (fedora/rhel) with open-jdk.\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e1) yum install glibc.i686 (fedora/rhel - this might only required for a minute open-jdk issue).\u003cbr /\u003e\u003cbr /\u003e2) Update the conf.xml file (for example, to serve on another port than 8080 if you don't want it to collide with another server).\u003cbr /\u003e\u003cbr /\u003e3) apache-archiva-1.3.6/bin/archiva start\u003cbr /\u003e\u003cbr /\u003e4) Test locally : run \u003ci\u003ewget localhost:80/archiva\u003c/i\u003e , which should return something meaningful after a few seconds of initialization.\u003cbr /\u003e\u003cbr /\u003e5) Now the server is running, but you need to set it up: \u003cbr /\u003e\u003cbr /\u003e6) Setup an admin account, which will log you in.\u0026nbsp;\u0026nbsp; (note, if you screw this up, you can delete it by deleting the data/databases/users directory).\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eYou can add a jar to the repo and watch how archiva updates the maven repo for you, \u003c/b\u003e\u003cbr /\u003e8)Go to \"http://\u0026lt;your_server\u0026gt;/archiva/upload!doUpload.action\", and then click \"upload artifact\".\u003cbr /\u003e\u003cbr /\u003e9) After you enter the fields in (don't worry, this can be deleted), you will find that the following files have been amended in your installation:\u003cbr /\u003e\u003cbr /\u003e./apache-archiva-1.3.6/data/repositories/snapshots/.indexer/_2.cfs\u003cbr /\u003e./apache-archiva-1.3.6/data/repositories/snapshots/\u0026lt;your group\u0026gt;/1/maven-metadata.xml\u003cbr /\u003e./apache-archiva-1.3.6/data/repositories/snapshots/\u0026lt;your group\u0026gt;/1/1/1-1.JAR\u003cbr /\u003e./apache-archiva-1.3.6/data/repositories/snapshots/\u0026lt;your group\u0026gt;/1/1/maven-metadata.xml\u003cbr /\u003e./apache-archiva-1.3.6/data/databases/archiva/log/log1.dat\u003cbr /\u003e./index.html\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eWhat happened? \u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eArchiva has updated the internal maven information for you and created groups/versions/etc...\u003cbr /\u003e\u003cbr /\u003eNow - you can go and \u003ci\u003edelete \u003c/i\u003ethe same repo via the user interface, and try the fancy, automated, command-liney strategy below - which will be more familiar to die-hard maven users who administer their own repos locally:\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eHow to publish manually to a maven repo:\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eYou can also directly publish to the internal archiva repo using standard mvn deploy:deploy syntax: \u003cbr /\u003e\u003cbr /\u003e10) mvn deploy:deploy-file -Dfile\u003d\u0026lt;path_to_jar\u0026gt; -DpomFile\u003d\u0026lt;path_to_pom\u0026gt; -DrepositoryId\u003dInternal -Durl\u003dfile:\u0026lt;path_to_archiva\u0026gt;/data/repositories/internal (deploys to the internal/ repository, as opposed to the snapshot/ repository). \u003cbr /\u003e\u003cbr /\u003e11) Now, wait ! After you do this, you have to UPDATE the archiva database.\u0026nbsp;\u0026nbsp; This allows archiva to see all the unprocessed artifacts that are in your repository. You can run these updates by clicking the convenient little buttons at http://\u0026lt;your_ip_address\u0026gt;/archiva/admin/database.action .\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eYay: Now you can browse, deploy, delete, and publish your jars from a single interface. \u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eSo the moral of the story?\u0026nbsp; In spite of all the jargon, Maven repos are still really just a bunch of directories - but its the convention-over-configuration part that has turned maven into a universal language for JVM dependencies. "},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/102126417789997416/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2013/04/maven-repo-management-with-archiva.html#comment-form","title":"0 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/102126417789997416"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/102126417789997416"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2013/04/maven-repo-management-with-archiva.html","title":"Setting up Archiva to host your Maven Repo in 15 minutes"}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"thr$total":{"$t":"0"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-9135915868245997341"},"published":{"$t":"2013-04-07T08:33:00.002-07:00"},"updated":{"$t":"2013-04-07T16:28:46.555-07:00"},"title":{"type":"text","$t":"Turning micro-commits into one megacommit"},"content":{"type":"html","$t":"\u003cdiv style\u003d\"text-align: center;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003ctable align\u003d\"center\" cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd style\u003d\"text-align: center;\"\u003e\u003ca href\u003d\"http://upload.wikimedia.org/wikipedia/commons/4/42/1989_HK_Sheung_Wan_Bonham_Strand_VITA_Distilled_Water.jpg\" imageanchor\u003d\"1\" style\u003d\"margin-left: auto; margin-right: auto;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"240\" src\u003d\"http://upload.wikimedia.org/wikipedia/commons/4/42/1989_HK_Sheung_Wan_Bonham_Strand_VITA_Distilled_Water.jpg\" width\u003d\"320\" /\u003e\u003c/a\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003eWhen I first learned about versioning you were told to \"commit often\".\u0026nbsp; On larger open source projects I've been painfully learning that the story is a little different -- you want to squash the granularity of your commits so that there is a 1-1 commit-to-feature ratio.\u003cbr /\u003eSo - you need to distill all your micro commits into a **single** mega commit.\u0026nbsp; \u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cb\u003e First you need to know where \"micro-commits\" come from.\u0026nbsp;\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003eMicro commits can come from (at least) two places.\u0026nbsp; In my case - they come from pulls, and from me.\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e1) The insidious micro-commit generator lurking within: git pull.\u003c/b\u003e \u003cbr /\u003eWhen you issue:\u003cbr /\u003e\u003cbr /\u003e\u003cdiv style\u003d\"background-color: yellow;\"\u003e\u003ci\u003egit pull\u003c/i\u003e\u003c/div\u003e\u003cbr /\u003eGit is actually doing TWO things for you:\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003e- Pulling down source code from remote\u003c/i\u003e\u003cbr /\u003e\u003ci\u003e- Merging it into your branch \u0026lt;-- this is a source of a commit :(\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e2) Another micro-commit source: you. \u003c/b\u003e\u003cbr /\u003eThe other micro-commits are the ones you deliberately do:\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003e\u003cspan style\u003d\"background-color: yellow;\"\u003egit commit -m \"another teenie-tiny commit with a single semicolon in it\"\u003c/span\u003e\u003c/i\u003e\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cb\u003e\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cb\u003eNow - you need to know how to squash them. \u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e\u0026nbsp;\u003c/b\u003eThere are two rules to commit squashing:\u003cbr /\u003e\u003cul\u003e\u003cli\u003eREBASE when you pull from remote ! This squashes your \"pull\" micro commits.\u0026nbsp; \u003c/li\u003e\u003cli\u003eWhen you push to the main branch (or issue pull requests to the main branch), you should do so by CREATING a specific new branch, merging commits from your \"development\" branch by using the \"--squash\" option.\u0026nbsp;\u003c/li\u003e\u003cli\u003eNote that you can automate rebasing in your .git/ config files. \u003c/li\u003e\u003c/ul\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cb\u003eExample\u003c/b\u003e\u003ci\u003e\u0026nbsp;\u003c/i\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003e\u0026nbsp;\u003c/i\u003eSay you have \"my_feature_branch\" that you want to merge into \"master\". \u0026nbsp; Here is what a workflow might look like. \u003ci\u003e\u003cbr /\u003e\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003e#\u003cb\u003eDo some work\u003c/b\u003e\u003c/i\u003e\u003cbr /\u003e\u003ci\u003egit checkout my_feature_branch\u003c/i\u003e\u003cbr /\u003e\u003ci\u003etouch a\u003c/i\u003e\u003cbr /\u003e\u003ci\u003egit add a\u003c/i\u003e\u003cbr /\u003e\u003ci\u003egit commit -m \"micro commit 1\"\u003cb\u003e\u0026nbsp;\u003c/b\u003e\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003e\u003cb\u003e#Maybe you want to pull some stuff down ... heres the cleanest way to do it.\u003c/b\u003e\u003c/i\u003e\u003cbr /\u003e\u003ci\u003egit pull origin master --\u003cspan style\u003d\"color: lime; font-size: large;\"\u003erebase\u003c/span\u003e\u0026nbsp;\u003c/i\u003e\u003cbr /\u003e\u003ci\u003e#okay... now lets do some MORE work... \u003c/i\u003e\u003cbr /\u003e\u003ci\u003etouch b\u003c/i\u003e\u003cbr /\u003e\u003ci\u003egit add b\u0026nbsp;\u003c/i\u003e\u003cbr /\u003e\u003ci\u003egit commit -m \"micro commit 2\" \u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cu\u003e\u003ci\u003e\u003cb\u003eAt this point, you have 2 commits (you would have 3 if you didn't do the rebase\u003c/b\u003e\u003c/i\u003e\u003c/u\u003e).\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e\u003ci\u003e# Now... checkout the master branch, and merge all of micro commits into one mega commit. \u003c/i\u003e\u003c/b\u003e\u003cbr /\u003e\u003ci\u003egit checkout master\u003c/i\u003e\u003cbr /\u003e\u003ci\u003egit merge --\u003cspan style\u003d\"color: lime; font-size: large;\"\u003esquash\u003c/span\u003e my_feature_branch\u003c/i\u003e\u003cbr /\u003e\u003ci\u003egit commit -m \"a mega commit with 2 commits worth of new stuff, which is already also merged with latest master\"\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cb\u003eYAY ! No no more micro commits in your history :).\u003c/b\u003e\u003c/span\u003e\u003ci\u003e\u003cbr /\u003e\u003c/i\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/9135915868245997341/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2013/04/turning-micro-commits-into-one.html#comment-form","title":"0 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/9135915868245997341"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/9135915868245997341"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2013/04/turning-micro-commits-into-one.html","title":"Turning micro-commits into one megacommit"}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"thr$total":{"$t":"0"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-5561412815645167954"},"published":{"$t":"2013-03-27T19:55:00.001-07:00"},"updated":{"$t":"2013-05-16T11:23:49.963-07:00"},"title":{"type":"text","$t":"A completely rebuildable Fedora16 Gluster development box."},"content":{"type":"html","$t":"\u003cdiv style\u003d\"text-align: center;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: center;\"\u003e\u003ctable align\u003d\"center\" cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd style\u003d\"text-align: center;\"\u003e\u003ca href\u003d\"http://upload.wikimedia.org/wikipedia/commons/a/a6/Meat_eater_ant_feeding_on_honey02.jpg\" imageanchor\u003d\"1\" style\u003d\"margin-left: auto; margin-right: auto;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"213\" src\u003d\"http://upload.wikimedia.org/wikipedia/commons/a/a6/Meat_eater_ant_feeding_on_honey02.jpg\" width\u003d\"320\" /\u003e\u003c/a\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003ePlaying with gluster in a simple Fedora  VM gives you a chance to mess with the translator stack, fine tune your  install, and other general aspects of glusterd maintenance.\u0026nbsp; Here's an easy to rebuild and tear down gluster-development-environment on KVM with Fedora 16, without depending on any particular external disk device. \u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: center;\"\u003e\u003cbr /\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cb\u003eBefore you start...\u003c/b\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eThis is for developers or people that want to go into the innards of gluster in a nice, happy, safe, non-hazardous enviornment.\u0026nbsp;\u003ci\u003e\u0026nbsp;\u003c/i\u003e\u003c/div\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003ci\u003e\u003cspan style\u003d\"background-color: #ea9999;\"\u003eIf you are a \"real\" gluster user looking for tutorials, then you probably shouldn't be here !!! Instead, go to \u003ca href\u003d\"http://community.gluster.org/\"\u003ehttp://community.gluster.org\u003c/a\u003e or \u003ca href\u003d\"http://redhat.com/storage\"\u003ehttp://redhat.com/storage\u003c/a\u003e.\u0026nbsp;\u0026nbsp;\u003c/span\u003e\u003c/i\u003e\u003c/div\u003e\u003c/blockquote\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eThe workflow here is for people who:\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003col\u003e\u003cli\u003eWant to build gluster from source without learning all the dark arts of c building.\u003c/li\u003e\u003cli\u003eWant to be able to edit gluster source code and see the results immediately.\u003c/li\u003e\u003cli\u003eWant to be able to easily restore their gluster environment after ruining it (either be deleting libraries, mucking up the code, or corrupting volume / brick information).\u003c/li\u003e\u003c/ol\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eThis means that you don't have to worry  about ruining or corrupting your precious gluster installation or  corrupting volumes which somehow might effect the overal stability of  gluster services.\u0026nbsp; If you corrupt something, you can rebuild this entire  thing from scratch by running one shell script.\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u003c/div\u003e\u003cbr /\u003e\u003cb\u003ePre-requisites\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e\u0026nbsp;\u003c/b\u003e \u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eAlmost anyone should be able to follow the steps here to set up a gluster development environment.\u0026nbsp; All you really need is KVM, KVM's virt-manager, and an internet connection.\u0026nbsp; Within 20 minutes (or so) you should have a working gluster file system that has been built directly from source.\u0026nbsp; More importantly - by rerunning the scripts in this post you will be able to \u003ci\u003erebuild \u003c/i\u003eyour entire environment from scratch by running the install script, which, at the onset, cleans out any possible remnants of your gluster installation for you.\u003c/div\u003e\u003cbr /\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cb\u003eBTW: There is no magic here.\u003c/b\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eTo make a reproducible, fully virtualized, simple to set-up and tear down gluster sandbox environment you need to solve 4 simple problems.\u0026nbsp; Its easy to forget the solution to any one of these, which is the only reason why this post exists.\u003c/div\u003e\u003col style\u003d\"text-align: left;\"\u003e\u003cli\u003e\u003ci\u003eAutomatically, safely, non-manually re-creating brand new brick disks on the fly: \u003c/i\u003eYou can create a loop-back mount as your gluster brick.\u0026nbsp; Thus, you don't need to get all fancy in attaching a device as your mount point.\u0026nbsp; That is, you can simulate a \"disk device\" with the unix \"truncate\" command. Thanks to \u003cspan class\u003d\"author-p-28714 url\"\u003e\u003cb\u003e\u003ca href\u003d\"http://spinningmatt.wordpress.com/\"\u003ematt farellee\u003c/a\u003e \u003c/b\u003efor showing me this.\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003ci\u003eBuilding gluster from source in a fresh fedora box with no libraries already installed.\u0026nbsp; \u003c/i\u003eYou can easily build gluster from source into RPMs whose state is very easy to manage.\u0026nbsp; \u003cb\u003e Justin Clift has a great article on this (\u003c/b\u003e\u003cb\u003e\u003ca href\u003d\"http://www.gluster.org/community/documentation/index.php/CompilingRPMS\"\u003ehttp://www.gluster.org/community/documentation/index.php/CompilingRPMS\u003c/a\u003e\u003c/b\u003e\u003cb\u003e)\u003c/b\u003e, which is adopted into a shell script here.\u003c/li\u003e\u003cli\u003e\u003ci\u003ePurging old gluster source code and libraries so that you know your build is really and truly fresh: \u003c/i\u003eYou can purge all of gluster's artifacts on your system in a couple of simple commands, just to be safe, as per a recent email thread \u003ca href\u003d\"http://www.mail-archive.com/gluster-devel@nongnu.org/msg09129.html\"\u003ehttp://www.mail-archive.com/gluster-devel@nongnu.org/msg09129.html\u003c/a\u003e.\u0026nbsp; The logic for this is *also* included in this post. \u003c/li\u003e\u003cli\u003e\u003ci\u003eExternal internet access from inside of a KVM virtual network\u003c/i\u003e is done through a default gateway address that you need to get right.\u0026nbsp; This is necessary building the source from github, acquiring the yum dependencies.\u0026nbsp; I've written this up here \u003cspan class\u003d\"author-p-28714 url\"\u003e\u003ca href\u003d\"http://jayunit100.blogspot.com/2013/03/static-ips-on-minimal-kvm-fedora-16.html\"\u003ehttp://jayunit100.blogspot.com/2013/03/static-ips-on-minimal-kvm-fedora-16.html\u003c/a\u003e. \u003c/span\u003e\u003c/li\u003e\u003c/ol\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cb\u003eThis could be easier.\u003c/b\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eIdeally however, you would realize an even more automated lifecycle for you gluster sandbox, using a modular configuration/infrastructure driver like puppet.\u0026nbsp;\u0026nbsp;\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eIn fact there has been chatter about using puppet and kvm together on the gluster community site:\u0026nbsp; \u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003ca href\u003d\"http://www.gluster.org/2012/08/how-i-do-vm-management-using-puppet-kvm-and-glusterfs-on-rhelcentos/\"\u003ehttp://www.gluster.org/2012/08/how-i-do-vm-management-using-puppet-kvm-and-glusterfs-on-rhelcentos/\u003c/a\u003e.\u0026nbsp; \u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eSo, yeah... It would be great to see the scripts and tricks cobbled together in this post completely puppetized at some point.\u0026nbsp;\u0026nbsp;\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eBut for now, this should be easy enough:\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"font-weight: bold;\"\u003e1) Download the Fedora 16 ISO (updated: official fedora project link).\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-weight: bold;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003ewget\u003cspan style\u003d\"font-weight: bold;\"\u003e \u003c/span\u003e\u003cspan class\u003d\"author-p-28714\"\u003e\u003c/span\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003e\u003ca href\u003d\"http://download.fedoraproject.org/pub/fedora/linux/releases/16/Fedora/x86_64/iso/Fedora-16-x86_64-DVD.iso\"\u003ehttp://download.fedoraproject.org/pub/fedora/linux/releases/16/Fedora/x86_64/iso/Fedora-16-x86_64-DVD.iso\u003c/a\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003e\u003cb\u003e2)  Install a fresh fedora box using KVM\u0026nbsp;\u003c/b\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003eYou can do this in the linux virtual machine manager ui, which allows you to create KVM boxes.\u0026nbsp;\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003col\u003e\u003cli\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003eClick \"create a new virtual machine\"\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003eSelect \"local install media\"\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003eUser ISO Image, and browse to the above Fedora ISO file ^^.\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003eAllocate some RAM (a reasonable amount, 1G+, since your building a complex software application in it).\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003eCreate a disk image directly on the drive (or get fancy and attach extra disk devices after).\u0026nbsp; I used 30G.\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003eWhen the install process starts - you'll see the Fedora splash screens come up.\u0026nbsp; \u003cspan style\u003d\"background-color: lime;\"\u003eChose \"MINIMAL\" as the installation type -that way your VM is super lean, starts up fast, and doesnt waste any resources on fancy windowing.\u003c/span\u003e\u0026nbsp; You can use the basic terminal that KVM gives you, or else, you can SSH into it from your host to run the install commands.\u003c/span\u003e\u003c/li\u003e\u003c/ol\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003e\u003cb\u003e3) Start the VM installation process \u003c/b\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003eThe Fedora installation window will pop up - you can use the default settings, and (suggested) select \"Minimal\" install.\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003e\u003cb\u003e4)\u003c/b\u003e\u0026nbsp; \u003cb style\u003d\"background-color: lime;\"\u003eSetup\u003c/b\u003e\u003cspan style\u003d\"background-color: lime;\"\u003e \u003c/span\u003e\u003cb style\u003d\"background-color: lime;\"\u003enetworking\u003c/b\u003e\u003cspan style\u003d\"background-color: lime;\"\u003e so that you have a static IP address\u003c/span\u003e, and so that your VM is capable of pinging the outside world.\u0026nbsp;\u0026nbsp;\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cul\u003e\u003cli\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003eI've described this in the previous post: \u003ca href\u003d\"http://jayunit100.blogspot.com/2013/03/static-ips-on-minimal-kvm-fedora-16.html\"\u003ehttp://jayunit100.blogspot.com/2013/03/static-ips-on-minimal-kvm-fedora-16.html\u003c/a\u003e.\u0026nbsp; This involves editing the gateway proper vm.ifcfg-eth0 file (the file that defines the eth0 properties).\u0026nbsp;\u0026nbsp;\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003eBefore you install gluster, you've got to make sure you have the GATEWAY field working on KVM.\u0026nbsp; In my case I used KVM's NAT networking, and found out (the hard way) that the\u003cspan style\u003d\"background-color: yellow;\"\u003e\u003cspan style\u003d\"background-color: white;\"\u003e \u003c/span\u003eGATEWAY for KVM boxes is proxied as an internal ip address inside of the guest machine, in my case , gateway was 198.168.122.1.\u0026nbsp;\u003c/span\u003e\u003c/span\u003e \u003c/li\u003e\u003cul\u003e\u003cli\u003eWhen your guests are in a \"virtual  network\", you bridge to them through libvirtd, and that is done through \"192.168.122.1\".\u0026nbsp; \u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003eFor RHEL ~ you will set these values using the \"system-config-network\" tool, which is visual (you can install it via \u003ci\u003eyum install system-configure-network-tui\u003c/i\u003e).\u003c/span\u003e\u003c/li\u003e\u003c/ul\u003e\u003c/ul\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003e\u003cb\u003e5)\u0026nbsp; \u003cspan style\u003d\"background-color: lime;\"\u003eInstall a few essentials on your bare bones Fedora box \u003c/span\u003e(if you chose the minimal installation):\u0026nbsp;\u003c/b\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; \u003ci\u003eyum install openssh-clients #\u0026lt;-- this is \"scp\" \u003c/i\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003ci\u003e\u0026nbsp; \u0026nbsp; \u0026nbsp; yum install tree#\u0026lt;-- you'll want this to view directories\u003c/i\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; \u003ci\u003eyum install git #we're pulling gluster from github.\u003c/i\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003e\u003ci\u003e\u0026nbsp; \u0026nbsp; \u0026nbsp; #below: gluster build dependencies: \u003c/i\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003e\u003ci\u003e\u0026nbsp; \u0026nbsp; \u0026nbsp; yum install libtool autoconf automake flex bison openssl openssl-devel libibverbs-devel readline-devel libxml-devel libxml2-devel make\u003c/i\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003e\u003ci\u003e\u0026nbsp; \u0026nbsp; \u0026nbsp;\u0026nbsp; #This is a new one...\u003c/i\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003e\u003ci\u003e\u0026nbsp; \u0026nbsp; \u0026nbsp;\u0026nbsp; yum install librdmacm-devel\u003c/i\u003e \u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cb\u003e4) Run this script !\u0026nbsp;\u003c/b\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cul\u003e\u003cli\u003e\u003cb\u003eThis script\u003cspan style\u003d\"background-color: lime;\"\u003e builds gluster RPMs and installs them for you locally.\u003c/span\u003e\u003c/b\u003e\u003c/li\u003e\u003cli\u003e\u003cb\u003eIt also REMOVES old rpms and old gluster artifacts that might exist on your system, and pulls fresh from \u003ca href\u003d\"https://github.com/gluster/glusterfs\"\u003e\u003ci\u003ehttps://github.com/gluster/glusterfs\u003c/i\u003e\u003c/a\u003e.\u003c/b\u003e\u003c/li\u003e\u003cli\u003e\u003cb\u003eIts taken largely from a community article on building gluster RPMs\u003c/b\u003e\u003ca href\u003d\"http://www.gluster.org/community/documentation/index.php/CompilingRPMS\"\u003e\u003cb\u003e \u003c/b\u003e\u003c/a\u003e\u003cb\u003e\u003ca href\u003d\"http://www.gluster.org/community/documentation/index.php/CompilingRPMS\"\u003ehttp://www.gluster.org/community/documentation/index.php/CompilingRPMS\u003c/a\u003e\u003c/b\u003e\u003cb\u003e\u003ca href\u003d\"http://www.gluster.org/community/documentation/index.php/CompilingRPMS\"\u003e. \u003c/a\u003e\u003c/b\u003e\u003c/li\u003e\u003c/ul\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cpre\u003eread -p \"Purging any trace of gluster from your system, you could even lose data, hit any key to continue.\" \u003cbr /\u003e\u003cbr /\u003ekillall -9 -r gluster\u003cbr /\u003eyum remove $(rpm -qa | grep gluster)\u003cbr /\u003erm -rf /var/lib/glusterd\u003cbr /\u003erm -rf /etc/glusterfs\u003cbr /\u003e\u003cbr /\u003eecho \"Now the really dangerous part is over..., starting the install - getting libraries\"\u003cbr /\u003e\u003cbr /\u003eecho \"Now switching to /tmp\"\u003cbr /\u003ecd /tmp/\u003cbr /\u003esudo yum -y install gcc python-devel python-setuptools\u003cbr /\u003esudo easy_install python-swiftclient\u003cbr /\u003esudo yum -y install http://download.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm\u003cbr /\u003esudo yum -y install python-webob1.0 python-paste-deploy1.5 python-sphinx10\u003cbr /\u003esudo yum -y install http://download.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm\u003cbr /\u003esudo yum -y install git autoconf automake bison dos2unix flex fuse-devel libaio-devel \\\u003cbr /\u003e   libibverbs-devel libtool libxml2-devel lvm2-devel make openssl-devel pkgconfig \\\u003cbr /\u003e   python-devel python-eventlet python-netifaces python-paste-deploy python-simplejson \\\u003cbr /\u003e   python-sphinx python-webob pyxattr readline-devel rpm-build systemtap-sdt-devel tar\u003cbr /\u003esudo yum -y install rpcbind\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003esleep 1\u003cbr /\u003eecho \"Cloning down the source\"\u003cbr /\u003egit clone git://git.gluster.org/glusterfs\u0026nbsp;\u003c/pre\u003e\u003cpre\u003eecho \"Checking out 3.4 --- update this branch in the line below (git branch -a to list) or press enter to continue.\"\u003c/pre\u003e\u003cpre\u003eread \u003c/pre\u003e\u003cpre\u003egit checkout release-3.4 \u003c/pre\u003e\u003cpre\u003egit pull\u003cbr /\u003e\u003cbr /\u003esleep 1 \u003cbr /\u003eecho \"Now starting the build\" \u003cbr /\u003ecd glusterfs\u003cbr /\u003e./autogen.sh\u003cbr /\u003e./configure --enable-fusermount\u003cbr /\u003e\u003cbr /\u003e#Rather than \"make install\", we make an rpm distribution.  Suggested by\u003cbr /\u003e#Jeff Darcy and others as the right way to install. \u003cbr /\u003emake dist\u003cbr /\u003e\u003cbr /\u003ecd extras/LinuxRPM\u003cbr /\u003emake glusterrpms\u003cbr /\u003e\u003cbr /\u003esleep 2 \u003cbr /\u003e\u003cbr /\u003eecho \"Done building gluster.  Now installing .  \"\u0026nbsp;\u003c/pre\u003e\u003cpre\u003e\u0026nbsp;\u003c/pre\u003e\u003cpre\u003e#Order matters here:\u003c/pre\u003e\u003cpre\u003erpm -ivh glusterfs-*git-1.fc16.x86_64.rpm \u003cbr /\u003erpm -ivh glusterfs-*devel-*git-1.fc16.x86_64.rpm \u003cbr /\u003erpm -ivh glusterfs-*fuse-*git-1.fc16.x86_64.rpm \u003cbr /\u003erpm -ivh glusterfs-*server-*git-1.fc16.x86_64.rpm\u003cbr /\u003e\u003cbr /\u003eecho \"DONE. Note, if you get a final error message, it can be ignored for a dev enviornment.\"\u003cbr /\u003e\u003cbr /\u003eecho \"starting glusterd now !\"\u003cbr /\u003eservice glusterd start\u003c/pre\u003e\u003cpre\u003e\u0026nbsp;\u003c/pre\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003e\u003cb\u003e5) Finally, \u003cspan style\u003d\"background-color: lime;\"\u003ecreate a loop-back mount system - a super easy trick for simulating a real \"disk\" device by just allocating a big file using the truncate command.\u0026nbsp;\u0026nbsp;\u003c/span\u003e\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003e\u003cb\u003eYou can run this script with the sample inputs in the echo statements below.\u0026nbsp;\u0026nbsp;\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e\u003cspan class\u003d\"author-p-28714 url\"\u003e\u003cb\u003e\u003cspan style\u003d\"background-color: white;\"\u003eNote: The first couple of lines unmount and remove the brick and mount point directory, \u003cspan style\u003d\"background-color: magenta;\"\u003eso please please please don't use this for anything important.\u003c/span\u003e\u0026nbsp;\u003c/span\u003e\u003c/b\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"font-family: \u0026quot;Courier New\u0026quot;,Courier,monospace;\"\u003e\u003cspan class\u003d\"author-p-28714 url\" style\u003d\"font-size: x-small;\"\u003e\u003cspan style\u003d\"background-color: white;\"\u003e#Delete a mount and brick, and recreate them using a loopback.\u003c/span\u003e\u003cbr style\u003d\"background-color: white;\" /\u003e#This script is good for highly simplified development\u003cbr /\u003e\u003cbr /\u003eif [ \"$#\" -ne 3 ]\u003cbr /\u003e\u0026nbsp; then\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; echo \"Usage: mount location, brick location.\u0026nbsp; \"\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; echo \"For example : /mnt/glusterfs /mnt/mybrick1 MyVolume\"\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; exit 1 #exit shell script\u003cbr /\u003efi\u003cbr /\u003e\u003cbr /\u003eMNT\u003d$1\u003cbr /\u003eBRICK\u003d$2\u003cbr /\u003eVOL\u003d$3\u003cbr /\u003e\u003cbr /\u003eread -p \"WARNING: DELETING $MNT and $BRICK ... Press a key to continue !\"\u003cbr /\u003eumount $MNT\u003cbr /\u003erm -rf $MNT\u003cbr /\u003emkdir -p $MNT\u003cbr /\u003e\u003cbr /\u003eumount $BRICK\u003cbr /\u003erm -rf $BRICK\u003cbr /\u003emkdir -p $BRICK\u003cbr /\u003e\u003cbr /\u003eecho \"Now creating a file ${BRICK}.raw\"\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: lime;\"\u003etruncate -s 1G ${BRICK}.raw ;\u003c/span\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan class\u003d\"author-p-28714 url\" style\u003d\"font-size: x-small;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan class\u003d\"author-p-28714 url\" style\u003d\"font-size: x-small;\"\u003e#NOTE: ext4 is not ideal... SHOULD be mkfs.xfs instead of ext4\u003c/span\u003e\u003cbr /\u003e\u003cspan class\u003d\"author-p-28714 url\" style\u003d\"font-size: x-small;\"\u003e#ext4 is not recommomended by the gluster team !\u003cbr /\u003emkfs.ext4 ${BRICK}.raw ;\u003cbr /\u003e\u003cbr /\u003e#Here is where the magic happens.\u003cbr /\u003e\u003cbr /\u003eecho \"Now mounting the loopback!\"\u003cbr /\u003emount -o loop ${BRICK}.raw ${BRICK} ;\u003cbr /\u003e\u003cbr /\u003eecho \"Now creating the volume which writes to loopback brick\"\u003cbr /\u003egluster volume create $VOL $(hostname):$BRICK\u003cbr /\u003e\u003cbr /\u003eecho \"Now starting the volume...\"\u003cbr /\u003esleep 1\u003cbr /\u003egluster volume start $VOL\u003cbr /\u003e\u003cbr /\u003esleep 1\u003cbr /\u003eecho \"Finally : mounting gluster to $MNT\"\u003cbr /\u003emount -t glusterfs $(hostname):$VOL $MNT\u003c/span\u003e\u003c/div\u003e\u003c/div\u003e\u003cpre\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e \u003c/span\u003e\u003c/pre\u003e\u003cb\u003e\u003c/b\u003e\u003c/div\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/5561412815645167954/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2013/03/a-completely-rebuildable-fedora16.html#comment-form","title":"1 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/5561412815645167954"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/5561412815645167954"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2013/03/a-completely-rebuildable-fedora16.html","title":"A completely rebuildable Fedora16 Gluster development box."}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"thr$total":{"$t":"1"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-6459472501879841923"},"published":{"$t":"2013-03-26T08:07:00.001-07:00"},"updated":{"$t":"2013-03-26T16:16:21.118-07:00"},"title":{"type":"text","$t":"Static IPs on KVM + (minimal) Fedora 16 installation"},"content":{"type":"html","$t":"\u003cb\u003eGetting networking to work the way you want it to in a VM\u003c/b\u003e \u003cb\u003ecan be scary sometimes.\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e\u0026nbsp;\u003c/b\u003e\u003cbr /\u003eI recently found it was tricky to have both a STATIC IP as well as internet connectivity inside of KVM provisioned VMs.\u0026nbsp;\u0026nbsp; The ultimate reason was that the \"GATEWAY\" field was not being set right.\u0026nbsp;\u003cbr /\u003e\u003cbr /\u003eThe moral of the story:\u0026nbsp; On KVM boxes, when bridging in a virtual network, the virt managed default bridged IP address needs to be set as the gateway... Thus, when your guests are in a \"virtual  network\" - the \"gateway\" is the internal IP address provided to them by the hypervisor that represents the \"outside\" of their internal network. \u0026nbsp;\u0026nbsp; \u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e1) Stop the firewalls in iptables:\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; iptables -F\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e2) Edit /etc/sysconfig/network-scripts/ifcfg-eth0 like so (obviously the \"IPADDR\" can be anything you want as long as its in the right range. \u003cbr /\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003eDEVICE\u003d\"eth0\" \u003c/span\u003e\u003c/blockquote\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003eHWADDR\u003d\"52:54:00:94:AF:F4\"\u003c/span\u003e\u003c/blockquote\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cspan style\u003d\"background-color: yellow; font-size: x-small;\"\u003eBOOTPROTO\u003d\"static\" \u0026lt;--- Change this from \"dhcp\" to \"static\"\u003c/span\u003e\u003c/blockquote\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003eONBOOT\u003d\"yes\"\u003c/span\u003e\u003c/blockquote\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003eNM_CONTROLLED\u003d\"yes\"\u003c/span\u003e\u003c/blockquote\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003eIPADDR\u003d192.168.122.171 \u0026lt;--- this is any old arbitrary IP in-range.\u003c/span\u003e\u003c/blockquote\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003eNETMASK\u003d255.255.255.0\u003c/span\u003e\u003c/blockquote\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cspan style\u003d\"background-color: yellow; font-size: x-small;\"\u003eGATEWAY\u003d192.168.122.1\u003c/span\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003e \u0026lt;--- this is aliased ipaddress of your host inside of KVM.\u003c/span\u003e\u003c/blockquote\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003eUSERCTL\u003d\"no\"\u003c/span\u003e\u003c/blockquote\u003e3) Restart networking so that the IP and GATEWAY can be reset. \u003cbr /\u003e\u003cbr /\u003e\u003ci\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; service network restart\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e4) Ping somebody and watch the magic unfold: \u003cbr /\u003e\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; ping www.yahoo.com\u003cbr /\u003e\u003cbr /\u003eYay! \u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eIf you get a \"Destination Host Unreachable\":\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eYour gateway is wrong.\u0026nbsp;\u0026nbsp; Make sure your gateway is the same as your host vm's ipaddress.\u0026nbsp; \u003cbr /\u003e\u0026nbsp;\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003e\u003cb\u003eIf you get an unknown host:\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eYou may not have an assigned IP (IPADDR wrong or out of range).\u003cb\u003e\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003ci\u003eA strange note to keep in mind --- somehow, even with a bad gateway (i.e. destination host unreachable), I could still resolve hostnames - not sure why.\u0026nbsp; \u003c/i\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/6459472501879841923/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2013/03/static-ips-on-minimal-kvm-fedora-16.html#comment-form","title":"0 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/6459472501879841923"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/6459472501879841923"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2013/03/static-ips-on-minimal-kvm-fedora-16.html","title":"Static IPs on KVM + (minimal) Fedora 16 installation"}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"thr$total":{"$t":"0"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-2904909814011688684"},"published":{"$t":"2013-03-18T14:32:00.004-07:00"},"updated":{"$t":"2013-03-18T15:36:31.644-07:00"},"title":{"type":"text","$t":"Idiomatic Github development : pull request + fetch remote"},"content":{"type":"html","$t":"\u003cbr /\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: center;\"\u003e\u003c/div\u003e\u003cbr /\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: center;\"\u003e\u003ca href\u003d\"http://4.bp.blogspot.com/-tmZXcR4kvg0/UUeKhhe5MWI/AAAAAAAACLI/8p3XTZlTlwc/s1600/get_preview.png\" imageanchor\u003d\"1\" style\u003d\"margin-left: 1em; margin-right: 1em;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"320\" src\u003d\"http://4.bp.blogspot.com/-tmZXcR4kvg0/UUeKhhe5MWI/AAAAAAAACLI/8p3XTZlTlwc/s320/get_preview.png\" width\u003d\"280\" /\u003e\u003c/a\u003e\u003c/div\u003e\u0026nbsp; \u003cbr /\u003eWhen getting started with github, its easy to get confused between \"forks\" and \"branches\".\u0026nbsp; In this post we'll go through a simple workflow for pull-request driven development, including the necessary \"trick\" to keep your personal \"forked\" repository in sync with the original github repo.\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e1) Forks are personal - you can \"fork\" a git hub repository as a way to start developing against an existing repoistory without being coupled to it.\u0026nbsp;\u0026nbsp;\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e2) Branches are not necessarily personal : their namespace shared in the same original repository.\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cb\u003eUsing branches as a commiter on the primary repo: \u003c/b\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e1) Become a commiter on the repo, write your own branches, and then merge your branches into head.\u003cbr /\u003e\u003cbr /\u003eor\u0026nbsp;\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e\u003cb\u003eContribute by using pull-requests as a non-commiter:\u003c/b\u003e \u003c/div\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime; text-align: left;\"\u003e\u003cb\u003e \u003c/b\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime; text-align: left;\"\u003e2) FORK the original repo, write code anywhere (including on your \"master\" branch), and simply issue pull requests upstream.\u0026nbsp;\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cb\u003eThe advantage of the 2nd approach :\u0026nbsp;\u003c/b\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cb\u003e1) It doesn't require you to be a registered commiter on a project. \u003c/b\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003cb\u003e2) It doesn't constrain your workflow (you can write and push changes directly to your master, without worrying about branches).\u0026nbsp;\u0026nbsp;\u0026nbsp;\u003c/b\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003cb\u003e3) It naturally enables the convenient github enabled pull user interface as a methodology for incorporating fixes.\u003c/b\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003eIn any case, the above methodologies will allow you to develop without effecting the \"real\" master, and you can use a pull request to \"simulate\" the insulation provided by branching. \u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cb\u003eHowever \u003c/b\u003eat some point you will want to pull the changes from the main repo into your fork.\u0026nbsp; You can do this by adding a 2nd remote to your fork, as follows (see 6 below):\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eAn example:\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime; text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime; text-align: left;\"\u003e1) Fork https://github.com/gluster/hadoop-glusterfs\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime; text-align: left;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime; text-align: left;\"\u003e2) git clone https://github.com/jayunit100/hadoop-glusterfs \u003c/div\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e\u003ci\u003e\u003cu\u003e3) git remote add upstream https://github.com/gluster/hadoop-glusterfs\u003c/u\u003e\u003c/i\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e4) Edit some stuff.\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e4) git add -A . ; git commit -m \"my first commit to my fork\" ; git push \u003c/div\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e5) Issue a pull request \u003c/div\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e#A few days later, maybe the original repo changes, and we want to pull those changes down:\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e6) git fetch upstream \u0026lt;-- keep your fork in sync with the original repo.\u003c/div\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/2904909814011688684/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2013/03/pull-requests-fetch-remote-for.html#comment-form","title":"0 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/2904909814011688684"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/2904909814011688684"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2013/03/pull-requests-fetch-remote-for.html","title":"Idiomatic Github development : pull request + fetch remote"}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"media$thumbnail":{"xmlns$media":"http://search.yahoo.com/mrss/","url":"http://4.bp.blogspot.com/-tmZXcR4kvg0/UUeKhhe5MWI/AAAAAAAACLI/8p3XTZlTlwc/s72-c/get_preview.png","height":"72","width":"72"},"thr$total":{"$t":"0"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-6193495027319035134"},"published":{"$t":"2013-02-25T17:15:00.001-08:00"},"updated":{"$t":"2013-02-26T14:59:54.117-08:00"},"title":{"type":"text","$t":"Setup a jenkins/git build server on EC2/RHEL"},"content":{"type":"html","$t":"\u003cb\u003eFirst, create your AMI and make sure that you've set up \"security groups\" to allow for an open HTTP 8080 port as described \u003ca href\u003d\"http://jayunit100.blogspot.com/2013/02/making-your-ec2-instances-application.html\"\u003ehere.\u003c/a\u003e\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eNow, ssh into your AMI, and do the following:\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e#First make sure and install javac - its probably not on the machine from the beggining.\u003c/b\u003e\u003cbr /\u003e\u0026nbsp;yum install java-1.6.0-openjdk-devel\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e#Resize the filesystem to be as big as possible - this is for fresh AMIs.\u0026nbsp; Sometimes, for some reason , the filesystems are small.\u0026nbsp;\u003c/b\u003e \u003cbr /\u003e\u003cbr /\u003e#install jenkins \u003cbr /\u003esudo wget -O /etc/yum.repos.d/jenkins.repo http://pkg.jenkins-ci.org/redhat/jenkins.repo\u003cbr /\u003esudo rpm --import http://pkg.jenkins-ci.org/redhat/jenkins-ci.org.key\u003cbr /\u003esudo yum install jenkins\u003cbr /\u003eservice jenkins start\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e#make sure its internally accessible/running before testing the external access by wgetting the jenkins home page:\u003c/b\u003e\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; \u003cbr /\u003ewget http://ec2-XX-XXX-XX-XXX.compute-1.amazonaws.com:8080\u003cbr /\u003ecat index.html \u0026lt;-- you should see something here :)\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e#stop iptables so that you can see the jenkins build server pages outside of ec2. \u003c/b\u003e\u003cbr /\u003e/etc/init.d/iptables stop\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cb\u003eNow you should be able to go to \u003c/b\u003e\u003cbr /\u003ehttp://ec2-XX-XXX-XX-XXX.compute-1.amazonaws.com:8080 on your local browser and see the jenkins home page :)\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eNext , set up your accounts permissions so that users can create accounts for themselves (temporary for a couple of minutes, and then deselect the \"allow users to sign up\" checkbox).\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003ehttp://ec2-XX-XXX-XX-XXX.compute-1.amazonaws.com:8080/configureSecurity/? \u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cb\u003eYay ! Now you have a jenkins build server with accounts.\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/6193495027319035134/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2013/02/setup-jenkinsgit-build-server-on-ec2rhel.html#comment-form","title":"0 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/6193495027319035134"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/6193495027319035134"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2013/02/setup-jenkinsgit-build-server-on-ec2rhel.html","title":"Setup a jenkins/git build server on EC2/RHEL"}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"thr$total":{"$t":"0"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-5252447938167280982"},"published":{"$t":"2013-02-25T11:22:00.002-08:00"},"updated":{"$t":"2013-02-25T13:44:47.781-08:00"},"title":{"type":"text","$t":"Making your EC2 instances application friendly"},"content":{"type":"html","$t":"\u003cb\u003eApp-oriented cloud servers like OpenShift, Heroku, and Google AppEngine come \"out of the box\" configured for immediate usability.\u0026nbsp; In constrast, EC2 instances provide you with a higher amount of flexibility at the cost of some initial, upfront configuration.\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: center;\"\u003e\u003ca href\u003d\"http://aws.typepad.com/files/AroundApp.png\" imageanchor\u003d\"1\" style\u003d\"margin-left: 1em; margin-right: 1em;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"320\" src\u003d\"http://aws.typepad.com/files/AroundApp.png\" width\u003d\"317\" /\u003e\u003c/a\u003e\u003c/div\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003eSo, in order to define some of the common modifications you need to set up an EC2 server, lets walk through the set up of a jenkins build server on EC2.\u0026nbsp; Like any server, a build server will require a few non-trivial modifications:\u003cbr /\u003e\u003col\u003e\u003cli\u003eSecurity groups need to be correctly defined, so that ports are unblocked.\u003c/li\u003e\u003cli\u003eElastic ips need to be setup so that you can access your server at the same ip address.\u003c/li\u003e\u003cli\u003eEnabling HTTP ports (i.e. 80, 8080, ...).\u003c/li\u003e\u003c/ol\u003e\u0026nbsp;\u003cb\u003e1) Setting up Security Groups\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e\u0026nbsp;\u0026nbsp;\u003c/b\u003e \u003cbr /\u003eAt first glance, it appears that the \"Default\" security groups in AWS are wide open to the outside world.\u0026nbsp; However, AWS machine default security settings are very restrictive and generally need to be loosened up for any kind of server:\u003ci\u003e the default settings only allow machines in the *exact same* security group to see one another.\u003c/i\u003e\u0026nbsp;\u0026nbsp; The below image compares opened security group with closed one.\u0026nbsp; The difference is in the definition of the \"Source\" attribute, which specifies a range.\u0026nbsp; 0.0.0.0/0 essentially means \"Any and everyone in the entire universe\".\u0026nbsp; \u003cbr /\u003e\u003cbr /\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: center;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: center;\"\u003e\u003ca href\u003d\"http://2.bp.blogspot.com/-PqbMfDhpmbQ/USuzGfmqHDI/AAAAAAAAB84/JcrGANKsZf0/s1600/secgroups.jpg\" imageanchor\u003d\"1\" style\u003d\"margin-left: 1em; margin-right: 1em;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"200\" src\u003d\"http://2.bp.blogspot.com/-PqbMfDhpmbQ/USuzGfmqHDI/AAAAAAAAB84/JcrGANKsZf0/s320/secgroups.jpg\" width\u003d\"320\" /\u003e\u003c/a\u003e\u003c/div\u003e\u003cbr /\u003e\u003cb\u003e2) Attaching Elastic IPs to your instances:\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\"Elastic IP\" addresses in AWS are essentially static IP addresses which are bound at the cloud layer, rather than at the machine layer.\u0026nbsp; That is, you associate elastic-ip addresses directly with named machine instances, rather than machines themselves.\u0026nbsp; This allows you to dynamically reroute the traffic to an instance without modifying any files on the AMI instance itself.\u003cbr /\u003e\u003cbr /\u003eThe below window shows an elastic ip, which is bound directly to an instance id.\u0026nbsp; The nice thing about the elastic-ip is that it can be rebound directly from this same interface, with no need of mucking with the individual machine.\u0026nbsp;\u003cbr /\u003e\u003cbr /\u003eUnlike other application oriented cloud-providers (i.e. heroku, openshift), AWS doesn't natively support easy-to-remember machine names, so you have to set up your own DNS CNAME records if you want a meaningful named host.\u003cbr /\u003e\u003cbr /\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: center;\"\u003e\u003ca href\u003d\"http://2.bp.blogspot.com/-XJ21oZYA0eI/USu4FHu4wHI/AAAAAAAAB9g/PMSgWbu22_Y/s1600/elasticipsec2.jpg\" imageanchor\u003d\"1\" style\u003d\"margin-left: 1em; margin-right: 1em;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"71\" src\u003d\"http://2.bp.blogspot.com/-XJ21oZYA0eI/USu4FHu4wHI/AAAAAAAAB9g/PMSgWbu22_Y/s320/elasticipsec2.jpg\" width\u003d\"320\" /\u003e\u003c/a\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: center;\"\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: center;\"\u003e\u003c/div\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e3) Enabling HTTP ports\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eDouble check that you have HTTP access opened up (the below screenshot is taken from \u003ca href\u003d\"http://coenraets.org/\"\u003ehttp://coenraets.org/\u003c/a\u003e) :\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: center;\"\u003e\u003ca href\u003d\"http://coenraets.org/blog/wp-content/uploads/2011/11/aws5.jpg\" imageanchor\u003d\"1\" style\u003d\"margin-left: 1em; margin-right: 1em;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"225\" src\u003d\"http://coenraets.org/blog/wp-content/uploads/2011/11/aws5.jpg\" width\u003d\"320\" /\u003e\u003c/a\u003e\u003c/div\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003eNext, by default, certain AMI instances may have IP tables turned ON (i.e. RHEL).\u0026nbsp; To turn off iptables:\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003e/etc/init.d/iptables stop \u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eFinally, you should be able to access your services at: \u003c/b\u003e\u003cb\u003ehttp://ec2-\u003cspan style\u003d\"background-color: yellow;\"\u003e[XXXX]\u003c/span\u003e.amazonaws.com:\u003cspan style\u003d\"background-color: yellow;\"\u003e[PORT]\u003c/span\u003e/ (\u003c/b\u003ei.e. http://\u003cspan style\u003d\"background-color: yellow;\"\u003eec2-22-333-44-555\u003c/span\u003e.compute-1.amazonaws.com:\u003cspan style\u003d\"background-color: yellow;\"\u003e8080\u003c/span\u003e/\u003cb\u003e)\u003c/b\u003e.\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: center;\"\u003e\u003c/div\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/5252447938167280982/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2013/02/making-your-ec2-instances-application.html#comment-form","title":"0 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/5252447938167280982"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/5252447938167280982"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2013/02/making-your-ec2-instances-application.html","title":"Making your EC2 instances application friendly"}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"media$thumbnail":{"xmlns$media":"http://search.yahoo.com/mrss/","url":"http://2.bp.blogspot.com/-PqbMfDhpmbQ/USuzGfmqHDI/AAAAAAAAB84/JcrGANKsZf0/s72-c/secgroups.jpg","height":"72","width":"72"},"thr$total":{"$t":"0"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-6228082741760373491"},"published":{"$t":"2013-02-14T17:24:00.002-08:00"},"updated":{"$t":"2013-02-25T15:44:55.261-08:00"},"title":{"type":"text","$t":"Java Management Extensions, Raw profiling, and ThreadMXBeans.  "},"content":{"type":"html","$t":"\u003cspan id\u003d\"goog_1114033238\"\u003e\u003c/span\u003e\u003cspan id\u003d\"goog_1114033239\"\u003e\u003c/span\u003eThe\u0026nbsp;\u003ca href\u003d\"http://en.wikipedia.org/wiki/Java_Management_Extensions\"\u003eJava Management Extensions\u003c/a\u003e\u0026nbsp;are a hidden gem in the JDK that many plain Java developers ignore- probably because of the deceptively enterprisey name. \u0026nbsp;These APIs are not just useful for profiling large Java EE apps. \u0026nbsp;Rather - they give you a precise and unbiased view into any snippet of code, by allowing you to look at CPU time and thread usage in a very direct fashion.\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eThe poor man's JMX profiling might look something like this:\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003elong startTime\u003d\u003cspan style\u003d\"background-color: red;\"\u003eSystem.currentTimeInMilliseconds()\u003c/span\u003e;\u003cbr /\u003edoSomething();\u003cbr /\u003elong mseconds \u003d System.currentTimeInMilliseconds()-mseconds();\u003cbr /\u003eSystem.out.println(\"It took \" + seconds +\" milliseconds\");\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eThis might work on a simple machine that is not running any other applications. \u0026nbsp;But in a multiuser (or worse multithreaded) envioronment, the meaning of a raw time stamp before and after a method is lost. \u0026nbsp;A better way to do the same test:\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003elong startTime\u003d\u003cspan style\u003d\"background-color: lime; font-family: Monaco; font-size: 11px;\"\u003eManagementFactory.getThreadMXBean().getCurrentThreadCpuTime()/1000000;\u003c/span\u003e\u003cbr /\u003edoSomething();\u003cbr /\u003elong mseconds \u003d\u003cspan style\u003d\"font-family: Monaco; font-size: 11px;\"\u003eManagementFactory.getThreadMXBean().getCurrentThreadCpuTime()1000000;\u003c/span\u003e\u003cbr /\u003eSystem.out.println(\"It took \" + mseconds + \" cpu milliseconds\");\u003cbr /\u003e\u003cbr /\u003eCPU time, if measured in Milliseconds, can have a HUGE standard deviation because of the fact that we run so many different applications on a machine.\u0026nbsp; The ThreadMXBeans are able to lock down the time, specifically, when your JVM is actually USING the CPU. \u0026nbsp; \u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eI'm really serious here:\u0026nbsp; Check out how, in a simple test which adds several elements to a vector, we can see that, as we increase the size of the test the CPU time and real time diverge very rapidly ():\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eSIZE OF TEST : 10000\u003cbr /\u003eraw time:6 milliseconds \u003cbr /\u003ecpu time:5 milliseconds\u003cbr /\u003e\u003cbr /\u003eSIZE OF TEST : 19952\u003cbr /\u003eraw time:6 milliseconds \u003cbr /\u003ecpu time:2 milliseconds\u003cbr /\u003e\u003cbr /\u003eSIZE OF TEST : 39810\u003cbr /\u003eraw time:2 milliseconds \u003cbr /\u003ecpu time:1 milliseconds\u003cbr /\u003e\u003cbr /\u003eSIZE OF TEST : 79432\u003cbr /\u003eraw time:3 milliseconds \u003cbr /\u003ecpu time:3 milliseconds\u003cbr /\u003e\u003cbr /\u003eSIZE OF TEST : 158489\u003cbr /\u003eraw time:6 milliseconds \u003cbr /\u003ecpu time:6 milliseconds\u003cbr /\u003e\u003cbr /\u003eSIZE OF TEST : 316228\u003cbr /\u003eraw time:11 milliseconds \u003cbr /\u003ecpu time:8 milliseconds\u003cbr /\u003e\u003cbr /\u003eSIZE OF TEST : 630959\u003cbr /\u003eraw time:21 milliseconds \u003cbr /\u003ecpu time:16 milliseconds\u003cbr /\u003e\u003cbr /\u003eSIZE OF TEST : 1258929\u003cbr /\u003eraw time:78 milliseconds \u003cbr /\u003ecpu time:33 milliseconds\u003cbr /\u003e\u003cbr /\u003eSIZE OF TEST : 2511895\u003cbr /\u003eraw time:204 milliseconds \u003cbr /\u003ecpu time:69 milliseconds\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eThe code (RudolF sandbox repository git@github.com:jayunit100/RudolF.git, or just paste this and run it as a main class). \u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cpre\u003epackage net.rudolfcode.jvm;\u003cbr /\u003e\u003cbr /\u003eimport java.lang.management.ManagementFactory;\u003cbr /\u003eimport java.util.ArrayList;\u003cbr /\u003eimport java.util.Collection;\u003cbr /\u003eimport java.util.Vector;\u003cbr /\u003e\u003cbr /\u003e/**\u003cbr /\u003e * You can use this class to profile insert times for a collection type\u0026nbsp;\u003c/pre\u003e\u003cpre\u003e(Vector in the example, but swapping it with an ArrayList is 10X faster at large scale).\u003c/pre\u003e\u003cpre\u003e\u0026nbsp;*/\u003cbr /\u003epublic class Example3 {\u003cbr /\u003e\u003cbr /\u003e public static void main(String[] args) throws Exception{\u003cbr /\u003e   profile((int)Math.pow(10, i));\u003cbr /\u003e }\u003cbr /\u003e\u003cbr /\u003e private static void profile(int nums) {\u003cbr /\u003e   \u003cbr /\u003e  ManagementFactory.getThreadMXBean().getCurrentThreadCpuTime();\u003cbr /\u003e    System.out.println(\"SIZE OF TEST : \" +nums);\u003cbr /\u003e    final long cpuStart1\u003dSystem.currentTimeMillis();\u003cbr /\u003e    run(new Vector(),nums);\u003cbr /\u003e    final long cpu1\u003d(System.currentTimeMillis()-cpuStart1);\u003cbr /\u003e    System.out.println(\"raw time:\"+cpu1 + \" milliseconds \");\u003cbr /\u003e\u003cbr /\u003e    final long cpuStart2\u003dManagementFactory.getThreadMXBean().getCurrentThreadCpuTime()/1000000;\u003cbr /\u003e    run(new Vector(),nums);\u003cbr /\u003e    final long cpu2\u003dManagementFactory.getThreadMXBean().getCurrentThreadCpuTime()/1000000;\u003cbr /\u003e    System.out.println(\"cpu time:\"+(cpu2-cpuStart2) +\" milliseconds\");\u003cbr /\u003e    System.out.println();\u003cbr /\u003e }\u003cbr /\u003e \u003cbr /\u003e public static float average(Collection\u0026lt;Long\u0026gt; l){\u003cbr /\u003e  long value\u003d0;\u003cbr /\u003e  for(Long v : l){\u003cbr /\u003e   value+\u003dv;\u003cbr /\u003e  }\u003cbr /\u003e  return ((float)value)/(float)l.size();\u003cbr /\u003e }\u003cbr /\u003e  \u003cbr /\u003e /**\u003cbr /\u003e  * Add \"size\" elements into a collection .\u003cbr /\u003e  */\u003cbr /\u003e public static void run(Collection impl, int size) {\u003cbr /\u003e  impl.clear();\u003cbr /\u003e  //add a bunch of stuff to a collection\u003cbr /\u003e  for(int x \u003d 10 ; x \u0026lt; size; x++){\u003cbr /\u003e   impl.add((short)x);\u003cbr /\u003e  }\u003cbr /\u003e }\u003cbr /\u003e\u003cbr /\u003e}\u003cbr /\u003e\u003c/pre\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/6228082741760373491/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2013/02/raw-profiling-vs-threadmxbeans.html#comment-form","title":"0 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/6228082741760373491"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/6228082741760373491"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2013/02/raw-profiling-vs-threadmxbeans.html","title":"Java Management Extensions, Raw profiling, and ThreadMXBeans.  "}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"thr$total":{"$t":"0"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-4225426239057893249"},"published":{"$t":"2013-01-11T14:09:00.001-08:00"},"updated":{"$t":"2013-05-16T12:45:24.057-07:00"},"title":{"type":"text","$t":"Setting up a Java+Maven on RHEL/Fedora"},"content":{"type":"html","$t":"\u003cdiv style\u003d\"text-align: center;\"\u003e\u003cb\u003eJava Development on RHEL\u003c/b\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: center;\"\u003e\u003cbr /\u003e(This post is very very boring)\u003c/div\u003e\u003cdiv style\u003d\"text-align: center;\"\u003e\u003cbr /\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cdiv style\u003d\"background-color: yellow; text-align: left;\"\u003e\u003cb\u003eFirst you need the JDK !\u003c/b\u003e\u003c/div\u003e\u003c/blockquote\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eI never understood the config --alternatives command in linux... I guess its really simple. It just configures what symlinks should point to when more than one program is capable of performing a certain function on your machine.\u0026nbsp; \u003cbr /\u003e\u003cbr /\u003eBut, since I just got my RedHat laptop, I had to hack it up to use JDK 64.\u0026nbsp; Rather than 32 bit java, and I was afraid to \"uninstall\" 32 bit java, because uninstalling the stock, standard, issued Java seems to have strange consequences sometimes...\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eSo I went about it the \"configure --alternatives\" route:\u0026nbsp; \u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eTo update java (RHEL):\u0026nbsp;\u003c/b\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003ci\u003ejvyas\u0026gt; yum install java-1.6.0-sun-devel.x86_64\u003c/i\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003ci\u003ejvyas\u0026gt; su\u003c/i\u003e\u003cbr /\u003e\u003cb\u003eThen, as root:\u0026nbsp; \u003c/b\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003ci\u003eroot\u0026gt; alternatives --config java\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003eThere are 2 programs which provide 'java'.\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eSelection\u0026nbsp;\u0026nbsp;\u0026nbsp; Command\u003cbr /\u003e-----------------------------------------------\u003cbr /\u003e*\u0026nbsp; 1\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; /usr/lib/jvm/jre-1.6.0-sun.x86_64/bin/java\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e+ 2\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; /usr/lib/jvm/jre-1.6.0-sun/bin/java\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003cblockquote class\u003d\"tr_bq\"\u003eFedora?\u003cb\u003e\u003c/b\u003e\u003c/blockquote\u003e\u003cb\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; To install java on Fedora\u0026nbsp; \u003c/b\u003e(openjdk)\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; jvyas\u0026gt; sudo yum list *java* | grep open | grep devel #\u0026lt;-- sanity check to see what JDKs are\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; available.\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; Hopefully, you should see something like \"java-1.6.0-openjdk-devel.x86_64\".\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; jvyas\u0026gt; sudo yum install java-1.6.0-openjdk-devel.x86_64\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eAnd next, you simply hit (1) to tell your machine to use java.\u003c/b\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eAnd then if you run it a second time, you can see that you are now using the up to date java, with the other one still intact: \u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eroot\u0026gt; alternatives --config java\u003cbr /\u003e\u003cbr /\u003eThere are 2 programs which provide 'java'.\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u0026nbsp; Selection\u0026nbsp;\u0026nbsp;\u0026nbsp; Command\u003cbr /\u003e-----------------------------------------------\u003cbr /\u003e*+ 1\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; /usr/lib/jvm/jre-1.6.0-sun.x86_64/bin/java\u003cbr /\u003e\u0026nbsp;\u0026nbsp; 2\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; /usr/lib/jvm/jre-1.6.0-sun/bin/java\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eAnd then theres finally the grand finale: \u003c/b\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e[root@jvyas usr]# java -version\u003cbr /\u003ejava version \"1.6.0_37\"\u003cbr /\u003eJava(TM) SE Runtime Environment (build 1.6.0_37-b06)\u003cbr /\u003eJava HotSpot(TM) 64-Bit Server VM (build 20.12-b01, mixed mode)\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eYay \u003c/b\u003efor 64 bit JDK :)\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e\u003cu\u003e\u003cbr /\u003e\u003c/u\u003e\u003c/b\u003e\u003cb\u003e\u003cu\u003eSETTING UP MAVEN\u003c/u\u003e\u003c/b\u003e\u003cbr /\u003e\u003cblockquote class\u003d\"tr_bq\" style\u003d\"background-color: yellow;\"\u003e\u003cb\u003eNow you (probably) need Maven .. (no, you cant just say \"yum install mvn\")?\u0026nbsp; So.. here's how you do it.\u0026nbsp;\u0026nbsp;\u003c/b\u003e\u003c/blockquote\u003e\u003cbr /\u003e\u003ci\u003eYou DON'T need a fancy auto-installer for maven !\u0026nbsp; Maven is, after all, just a jar file with a couple of simple binary wrappers as executables. \u0026nbsp; The Maven executables for all operating systems are under the bin/ directory, but they have to run from inside of your maven folder so that they can access the jar resources using relative paths. \u0026nbsp; \u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003eManually look up the download link to \"Maven 3.0.5 (Binary tar.gz)\".\u003cbr /\u003eIts always changing (mirrors).\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eBut for example when I went to the site it was:\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003e1) wget http://mirror.quintex.com/apache/maven/maven-3/3.0.4/binaries/apache-maven-3.0.4-bin.tar.gz\u003c/i\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003ci\u003e2) tar -xvf apache-maven-3.0.4-bin.tar\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003e3) Follow the instructions in apache-maven-3.0.4/README.txt\u0026nbsp;\u0026nbsp; (basically, you just add the bin/ directory to your path, and your all set since the lib/ has all the maven required jars).\u0026nbsp;\u0026nbsp;\u0026nbsp;\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003e\u003cb\u003eThis part is standard linuxy/profile stuff... \u003c/b\u003e \u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003e4) Make sure that you update your .bash_profile to add the maven binaries to your PATH. In other words, you should have a line that looks something like this:\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003eecho \"setting the path now..\"\u0026nbsp; \u003c/i\u003e\u003cbr /\u003e\u003ci\u003ePATH\u003d$PATH:$HOME/bin:/home/jvyas/Development/apache-maven-3.0.4/bin\u003c/i\u003e\u003cbr /\u003e\u003ci\u003e\u003c/i\u003e\u003cbr /\u003e\u003ci\u003e\u003c/i\u003e\u003cbr /\u003e\u003ci\u003e\u003c/i\u003e\u003cbr /\u003e\u003ci\u003e5) Now, create a new shell (confirm that your .bash_profile or .bashrc is being called by inspecting it for the \"setting the path now\" message.\u0026nbsp; This tripped me up on my current build because my .bash_profile shell was being called, but simply deferring to .bashrc and then exiting prematurely.\u0026nbsp;\u003c/i\u003e\u003cbr /\u003e\u003ci\u003e\u003c/i\u003e\u003cbr /\u003e\u003ci\u003e\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003eNow you should be able to run \"mvn --version\" from anywhere after you open a new terminal, to confirm that your mvn was installed properly. \u003cbr /\u003e\u003ci\u003e\u003c/i\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eYippee ... mvn3 + java 6+ \u003d dev productivity :)\u003c/div\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/4225426239057893249/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2013/01/configuring-alternatives.html#comment-form","title":"1 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/4225426239057893249"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/4225426239057893249"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2013/01/configuring-alternatives.html","title":"Setting up a Java+Maven on RHEL/Fedora"}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"thr$total":{"$t":"1"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-121249209999313857"},"published":{"$t":"2012-12-23T10:21:00.002-08:00"},"updated":{"$t":"2013-02-26T14:58:49.644-08:00"},"title":{"type":"text","$t":"Why Functional Programming and Big Data go hand-in-hand"},"content":{"type":"html","$t":"\u003ctable align\u003d\"center\" cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd style\u003d\"text-align: center;\"\u003e\u003ca href\u003d\"http://upload.wikimedia.org/wikipedia/commons/f/fe/Detailed_petri_net.png\" imageanchor\u003d\"1\" style\u003d\"margin-left: auto; margin-right: auto;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"154\" src\u003d\"http://upload.wikimedia.org/wikipedia/commons/f/fe/Detailed_petri_net.png\" width\u003d\"320\" /\u003e\u003c/a\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003eState is the enemy of dynamic computation.\u0026nbsp; Although it cannot be eliminated, it can be avoided by defining data driven workflows in terms of functions, predicates, and tuples (as opposed to defining a workflow in terms of sequential steps).\u0026nbsp; \u003c/td\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003e\u003c/td\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003e\u003c/td\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003e\u003cbr /\u003e\u003c/td\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003e\u003cbr /\u003e\u003c/td\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003e\u003cbr /\u003e\u003c/td\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003e\u003cbr /\u003e\u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e\u003cbr /\u003e\u003cbr /\u003eBig data workflows involve transforming large amounts of information, often unstructured, into data science driven insights, or highly available data stores used by applications.\u0026nbsp; In order to create a big data store - we often have to define several tasks required to accomplish this transformation.\u003cbr /\u003e\u003cbr /\u003eInitially, many of us might envision such transformations as a flow of states.\u0026nbsp; This sort of thinking, while simplistic, is dangerous.\u0026nbsp; The reason is because (1) state is hard to maintain (2) understanding state requires defining and persisting transformations along side the state itself and (3) when dealing with large data sets, reprocessing is required if algorithms need to change - and that reprocessing is both costly from a computational perspective, as well as in terms of raw time wasted waiting for completion of a batch workflow.\u003cbr /\u003e\u003cbr /\u003eFor example, lets say we want to take a list of conversations, and extract human entites (names) from them and output a list of human entites who appear to be linked to one another\u003cb\u003e (\u003c/b\u003ei.e. joe and mary are linked if they have had a recent conversation). If we broke this into a linear flow of tasks, it might take more than a day to process all this information if we have, say, a pedabyte of raw text sitting on a computation cluster of 16 standard machines.\u003cbr /\u003e\u003cbr /\u003eWe can define these transformations imperatively, using a low-level framework like Hadoop's MapReduce.\u0026nbsp; Alternatively, we can define it using flows (i.e. using a framework such as Cascading/Cascalog, which is based off of the declarative programming paradigm).\u0026nbsp; \u003cbr /\u003e\u003cbr /\u003e\u003cu\u003e\u003cb\u003eThe Imperative Approach : Finite State Machines\u003c/b\u003e\u003c/u\u003e\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003eLets consider the above \"finite state machine\" approach (in the real world, this might correspond to a MapReduce application with several jobs).\u0026nbsp; The simplest way to design a workflow for such extraction might involve creating a two step workflow.\u0026nbsp; We start with a large set of text documents.\u0026nbsp; The first step would \"clean\" conversations, outputting plain text words.\u0026nbsp; The second step would involve linking and deduplicated the parties involved by using joins, for example, via MapReduce.\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eThe advantages\u003c/b\u003e of this - all data is entirely processed when the flow is completed, and it is extremely performant in the best case scenario, because we can stream through large documents very efficiently in a batch workflow, due to the minimization of latency of starting and stopping.\u0026nbsp; Its \"easy\" to explain to someone - there is \"step 1\", \"step 2\", and so on.\u0026nbsp; However, the disadvantage is that it is difficult to redirect and modify.\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eThe disadvantages\u003c/b\u003e of this approach is that its, essentially, all-or-nothing.\u0026nbsp; If something fails during the state transformations, because we are processing all data in batch, we will potentially have to recompile our source code, and restart the workflow from scratch.\u0026nbsp; \u003cbr /\u003e\u003cbr /\u003e\u003cu\u003e\u003cb\u003eThe alternative: Rather than envisioning this problem as a two step transformation, \u003c/b\u003e\u003cb\u003ewe could envision it as a flow of data transformations, or as a single, infintely running work queue.\u003c/b\u003e\u003c/u\u003e\u003cbr /\u003e\u003cu\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003c/u\u003eOnce we get away from the imperative, state-based view of the problem, we find that the conceptual roots for any solution will be much more well-aligned with declarative, functional programming idioms, rather than imperative ones.\u0026nbsp; Languages such as Erlang and Lisp give us the basis for dealing with long running, evolving systems.\u003cbr /\u003e\u003cbr /\u003eFor example, in a functionally inspired solution to this problem we might design the following architecture:\u003cbr /\u003e\u003cul\u003e\u003cli\u003eDocuments are continuously preprocessed (unimportant words and characters are removed) and put into a key value store.\u0026nbsp; The key here is the document.\u0026nbsp;\u0026nbsp;\u003c/li\u003e\u003cli\u003eDocuments are marked as processed or unprocessed.\u0026nbsp; This can be done, also, using a key value store.\u003c/li\u003e\u003cli\u003eEntity relations are extracted from documents and placed into a graph database in a separate process.\u003c/li\u003e\u003cli\u003eA thread continually listens to document transformation events.\u0026nbsp; When a document is transformed, this thread adds its key to the top of a work queue.\u0026nbsp;\u003c/li\u003e\u003cli\u003eWhen the algorithm for entity extraction is updated, the graph database can be deleted, and all documents can be lazily marked as unprocessed.\u0026nbsp;\u0026nbsp;\u003c/li\u003e\u003c/ul\u003e\u003cb\u003eExamples of transformative, rather than iterative big-data workflows :\u003c/b\u003e \u003cbr /\u003e\u003cbr /\u003e\u003col\u003e\u003cli\u003eFor an even more sophisticated perspective on real-time, dynamic processing of large data sets, watch Nathan Marz's talk on storm \u003ca href\u003d\"https://www.youtube.com/watch?v\u003dcF8a_FZwULI\"\u003ehttps://www.youtube.com/watch?v\u003dcF8a_FZwULI\u003c/a\u003e.\u0026nbsp; Storm allows for iterative, fault-tolerant processing of streams with real-time groupings and feed splitting, providing an elegant and highly scalable (in terms of computation, as well in terms of throughput).\u003c/li\u003e\u003cli\u003eAnother alternative to imperatively defined data processes is elegantly implemented in berkeley's \"Spark\" platform.\u0026nbsp; The spark platform implements fault tolerance by virtue of the fact that lost nodes can be reclaimed by re-application of the functional definitions originally used to define data transformations (see \u003ca href\u003d\"http://vimeo.com/20757432\"\u003ehttp://vimeo.com/20757432\u003c/a\u003e).\u003c/li\u003e\u003c/ol\u003e\u003cbr /\u003e\u003cb\u003eThe advantage\u003c/b\u003e of the latter approach is that as soon as our system starts, it begins producing data - and if we decide to improve our algorithms while processing, that is very easy to do - because at any given time we can choose to begin reprocessing documents on our work queue (of course, in order to implement this effectively we would need to have logic for avoiding addition of duplicate data to the system).\u0026nbsp;\u0026nbsp; There are a few obvious \u003cb\u003edisadvantages \u003c/b\u003eincluding best-case performance drop offs.\u0026nbsp; Overall, however, this sort of message driven architecture proves to be extremely valuable in many industrial-strength big-data applications (for example, chat rooms, high volume websites with a need for asynchronous workflows, bank and financial systems, etc...).\u003cbr /\u003e\u003cbr /\u003eThis highly decoupled architecture allows for continuous improvement without downtime.\u0026nbsp; \u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eTechnically? \u003c/b\u003eIn a sophisticated, highly dynamic and concurrent scenario, a language such as Erlang becomes extremely relevant.\u0026nbsp; But why?\u0026nbsp; Because a complex, concurrent system with high levels of uptime needs to be fault tolerant and robust (meaning that individual failures don't cause the entire system to go down).\u0026nbsp; How can we make a system robust?\u0026nbsp; By making it dynamic.\u0026nbsp; Adding a dynamic element to a concurrent work queue allows us to continuously improve and fine tune the algorithms we are using to extract entities without causing downtime.\u0026nbsp; It also gaurantees that, at any given moment, data is continually being processed - so we don't waste our large cluster resources by forcing them to sit idle while rebuilding or retooling libraries. \u0026nbsp; \u003cbr /\u003e\u003cbr /\u003e\u003cu\u003e\u003cb\u003eWhy is this sort of problem solving less common in imperative langauges?\u003c/b\u003e\u003c/u\u003e\u003cbr /\u003e\u003cbr /\u003eWe've all heard of these new-fangled startups (flightcaster, mixpanel, etc...) who have chosen to use functional languages to solve scalability problems.\u0026nbsp; And we inevitably ask ourselves...\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eCan't you just implement that fancy stateless architecture in Java ? Aren't all programs simply state-machines which can be reduced to the same exact computational model? Why do we need a new language?\u0026nbsp;\u003c/b\u003e \u003cbr /\u003e\u003cbr /\u003eThe reason why functional languages encourage scalable software architectures is because of the fact that they, from the bottom up, eliminate as much shared state as possible from a language.\u0026nbsp; This leads to scalable \"micro\" components.\u0026nbsp; Those micro components then scale naturally into larger components.\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eThe idea that micro-scalable application is macro-scalable is not hot-air.\u0026nbsp; Scalability is a bottom-up phenomenon. The largest systems in the world evolved this way, including Life. \u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eLife is designed not from the top-down, but rather from the bottom up.\u0026nbsp; Each cell is autonomous.\u0026nbsp; All cellular systems are coordinated by message passing, as opposed to hierarchically.\u0026nbsp; When cells don't communicate effectively, or stop listening to each other, - we get cancer (for example, the kinase and signalling profiles of tumor cells are highly perturbed, where as the internal physiology is conserved).\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eBack to programming langauges: What does bottom up scalability give you?\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eFault tolerance means that a system can continue working even through failures.\u0026nbsp; There are two ways to do this:\u003ci\u003e predict failures \u003c/i\u003eand prescribe the desired error handling, o\u003ci\u003er simply build \"ready-to-fail\" logic into an application \u003c/i\u003efrom the beginning.\u0026nbsp; In order to allow for failure, \u003cu\u003eyou have to allow an application to continually restart itself and, importantly, to continue to learn and pick up incremental improvements in source code.\u0026nbsp; This is exceedingly complex in any pre-compiled / non functional language. \u003c/u\u003e\u003cbr /\u003e\u003cul\u003e\u003cli\u003e\u003ci\u003eRobust? \u003c/i\u003eYou have to manage memory and state manually.\u0026nbsp; This is great for micro-optimization.\u0026nbsp; But when building large concurrent systems, micro optimizations only provide you with a small constant time speed up, which is offset drastically by the scalability benefits gleaned from transparently scalable, concurrent algorithms.\u0026nbsp; Furthermore : an application with explicit memory management and complex state will require more \"steps\" and increasingly complicated failure handling (stateless operations have gauranteed atomicity properties which are inherent in the language itself).\u003c/li\u003e\u003cli\u003e\u003ci\u003eDynamic? \u003c/i\u003eYou have to write your own framework for converting messages into operations (that is, for example, you can't directly send java classes around as executables at runtime).\u0026nbsp; Languages like clojure and erlang support the passing around of dynamic functionality from the ground up: You can inject hotfixes in a running system much more naturally than you would in a structured language.\u003c/li\u003e\u003c/ul\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/121249209999313857/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2012/12/why-functional-programming-and-big-data.html#comment-form","title":"4 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/121249209999313857"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/121249209999313857"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2012/12/why-functional-programming-and-big-data.html","title":"Why Functional Programming and Big Data go hand-in-hand"}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"thr$total":{"$t":"4"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-5103528557771211589"},"published":{"$t":"2012-11-13T13:50:00.001-08:00"},"updated":{"$t":"2013-02-25T15:53:52.725-08:00"},"title":{"type":"text","$t":"A lightning tour of the thingy that runs your code"},"content":{"type":"html","$t":"\u003ctable align\u003d\"center\" cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd\u003e\u003ca href\u003d\"http://4.bp.blogspot.com/-resjq2Lz0wA/UJRChHWbEoI/AAAAAAAABRM/BcWsGVinv5Q/s1600/Linux_kernel_map.jpg\" imageanchor\u003d\"1\" style\u003d\"margin-left: auto; margin-right: auto;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"240\" src\u003d\"http://4.bp.blogspot.com/-resjq2Lz0wA/UJRChHWbEoI/AAAAAAAABRM/BcWsGVinv5Q/s320/Linux_kernel_map.jpg\" width\u003d\"320\" /\u003e\u003c/a\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\"\u003e\u003ci\u003eWhat happens when you press \"go\" ?\u0026nbsp;\u003c/i\u003e\u003cbr /\u003e\u003ci\u003e\u003cbr /\u003e\u003c/i\u003e\u003cbr /\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003eWe think in abstractions... we code in mega-abstractions...\u0026nbsp;\u003c/b\u003e\u003c/span\u003e\u003cb style\u003d\"font-size: medium;\"\u003ebut...\u003c/b\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eI recently read about \u003c/span\u003e\u003c/span\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eAmdahl's law in this awesome article about caching\u003cspan style\u003d\"font-size: small;\"\u003e for high \u003cspan style\u003d\"font-size: small;\"\u003eperformance web sites\u003c/span\u003e:\u003c/span\u003e \u003c/span\u003e\u003ca href\u003d\"http://architects.dzone.com/articles/role-caching-large-scale\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003ehttp://architects.dzone.com/articles/role-caching-large-scale\u003c/span\u003e\u003c/a\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e.\u0026nbsp; The emphasis was on\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e detection of \u003cspan style\u003d\"font-size: small;\"\u003ethe \u003c/span\u003et\u003cspan style\u003d\"font-size: small;\"\u003erue bottleneck in a system\u003c/span\u003e.\u003cspan style\u003d\"font-size: small;\"\u003e\u0026nbsp; T\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003ehe Linux \u003cspan style\u003d\"font-size: small;\"\u003eoperating system's \u003cspan style\u003d\"font-size: small;\"\u003etransparent and\u003cspan style\u003d\"font-size: small;\"\u003e universal\u003c/span\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003ely accepted organization\u003c/span\u003e\u003cspan style\u003d\"font-size: small;\"\u003e lends itself \u003cspan style\u003d\"font-size: small;\"\u003equite well to recursive \u003cspan style\u003d\"font-size: small;\"\u003ebottle\u003cspan style\u003d\"font-size: small;\"\u003eneck analysis.\u0026nbsp; Nevertheless, its never easy\u003cspan style\u003d\"font-size: small;\"\u003e, w\u003cspan style\u003d\"font-size: small;\"\u003ehen given a slow or \u003cspan style\u003d\"font-size: small;\"\u003estalled program, to tell exactly whats wrong.\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003eWhy is this hard?\u003c/b\u003e \u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eThere is no righ\u003cspan style\u003d\"font-size: small;\"\u003et answer to this question - but \u003cspan style\u003d\"font-size: small;\"\u003epart of the problem is \u003cspan style\u003d\"font-size: small;\"\u003ethat as developers \u003ca href\u003d\"http://www.joelonsoftware.com/articles/LeakyAbstractions.html\"\u003ewe deal in abstractions which do not map d\u003c/a\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003ca href\u003d\"http://www.joelonsoftware.com/articles/LeakyAbstractions.html\"\u003eirectly to \u003c/a\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003ca href\u003d\"http://www.joelonsoftware.com/articles/LeakyAbstractions.html\"\u003ethe work done by a system\u003c/a\u003e.\u0026nbsp; This\u003cspan style\u003d\"font-size: small;\"\u003e is\u003c/span\u003e \u003cspan style\u003d\"font-size: small;\"\u003e(kin\u003cspan style\u003d\"font-size: small;\"\u003ed of) a\u003c/span\u003e\u003c/span\u003e good thing - it frees us to focus on business logic\u003cspan style\u003d\"font-size: small;\"\u003e, \u003c/span\u003emodularity\u003cspan style\u003d\"font-size: small;\"\u003e, and horizontal scalability\u003cspan style\u003d\"font-size: small;\"\u003e.\u003cspan style\u003d\"font-size: small;\"\u003e \u0026nbsp;\u003c/span\u003e\u003c/span\u003e\u003c/span\u003eLanguage makers have done an amazing job at separating programmers from machines\u003cspan style\u003d\"font-size: small;\"\u003e, and we should a\u003cspan style\u003d\"font-size: small;\"\u003eppl\u003cspan style\u003d\"font-size: small;\"\u003eaud them wholehear\u003cspan style\u003d\"font-size: small;\"\u003eted\u003cspan style\u003d\"font-size: small;\"\u003ely.\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u0026nbsp; But when software runs into \u003cspan style\u003d\"font-size: small;\"\u003es\u003cspan style\u003d\"font-size: small;\"\u003ecalability or performance problems\u003cspan style\u003d\"font-size: small;\"\u003e, abstraction can really \u003cspan style\u003d\"font-size: small;\"\u003ecome back to bi\u003cspan style\u003d\"font-size: small;\"\u003ete you.\u0026nbsp; \u003c/span\u003e\u003c/span\u003e\u003c/span\u003eD\u003cspan style\u003d\"font-size: small;\"\u003eisks might be slow, resources could be scarce, networking performance may be dismal, or simple libraries may be corrupted or missing.\u003cspan style\u003d\"font-size: small;\"\u003e\u0026nbsp; \u003c/span\u003eIn any case - you've got to be able to find out \u003cspan style\u003d\"font-size: small;\"\u003ewhy - and your core programming language\u003cspan style\u003d\"font-size: small;\"\u003e chops won'\u003cspan style\u003d\"font-size: small;\"\u003et always help.\u003c/span\u003e\u0026nbsp;\u003c/span\u003e\u003c/span\u003e \u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"font-size: small;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003e\u003cspan style\u003d\"font-size: small;\"\u003eBut what will help... is an understanding of your operating system. So \u003c/span\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003ethe following is a quick\u003c/span\u003e\u003c/span\u003e\u003cspan style\u003d\"font-size: small;\"\u003e tour of \u003cspan style\u003d\"font-size: small;\"\u003ethe Linux \u003cspan style\u003d\"font-size: small;\"\u003ei\u003c/span\u003ennards I always \u003cspan style\u003d\"font-size: small;\"\u003efind useful, \u003c/span\u003e\u003cspan style\u003d\"font-size: small;\"\u003efollow\u003cspan style\u003d\"font-size: small;\"\u003eed by\u003cspan style\u003d\"font-size: small;\"\u003e several links which sh\u003cspan style\u003d\"font-size: small;\"\u003eed some much \u003cspan style\u003d\"font-size: small;\"\u003eneeded light on the internals of how programming languages ultimately connect to the kernel.\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/b\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e1)\u0026nbsp;\u003cspan style\u003d\"text-align: left;\"\u003eFirst, \u003cu\u003ethe directory structure in linux\u003c/u\u003e. \u0026nbsp;Very basic:\u0026nbsp;\u003c/span\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003e/bin:\u0026nbsp;\u003c/b\u003eThese are binaries that anybody can run.\u0026nbsp;\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003e/sbin:\u0026nbsp;\u003c/b\u003eThese are binaries that only root runs (ifup, ifconfig, ...)\u0026nbsp;\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003e/home:\u0026nbsp;\u003c/b\u003eThis is where all your data goes.\u0026nbsp;\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003e/tmp:\u0026nbsp;\u003c/b\u003eThis is where all your temporary files go. Its cleared automatically by the OS, periodically.\u0026nbsp;\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003e/usr:\u0026nbsp;\u003c/b\u003eThis is where user create/built programs typically go. For example, if you compile your own version of a particular C library because it didn't work out of the box on your machine, you would put it in /usr/local.\u0026nbsp;\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003e/opt:\u0026nbsp;\u003c/b\u003eApplications and programs go here, in particular, the ones that you get from external sources, in one peice. For example, an executable version of google chrome would go in /opt.\u0026nbsp;\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003e/var:\u0026nbsp;\u003c/b\u003eSystem related files go here that are created by programs. Log files (/var/log), mail related stuff, cached (/log/cache/) data from programs that run.\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cb\u003e/etc:\u0026nbsp;\u003c/b\u003eConfiguration files for stuff like networking, disks, etc...\u0026nbsp;\u003cspan style\u003d\"text-align: -webkit-auto;\"\u003eLinux runs several services: When you change something, you need to restart the corresponding service... Otherwise you sill see no results.\u0026nbsp;\u003c/span\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"text-align: -webkit-auto;\"\u003e\u003cb\u003e/dev:\u0026nbsp;\u003c/b\u003e\u003c/span\u003e\u003cspan style\u003d\"text-align: -webkit-auto;\"\u003eThe files in this directory represent devices (audio, keyboards, disk drives, etc..)\u003c/span\u003e\u003c/span\u003e\u003c/div\u003e\u003c/td\u003e\u003ctd class\u003d\"tr-caption\"\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003c/td\u003e\u003ctd class\u003d\"tr-caption\"\u003e\u003cbr /\u003e\u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e2) \u003cu\u003eSome\u0026nbsp;quick notes about networking:\u003c/u\u003e\u003cbr /\u003e\u003cul\u003e\u003cli\u003e\u003cb\u003ethe dhclient command:\u0026nbsp;\u003c/b\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003eThis command will renew your ip address (if its dynamic). \u0026nbsp;Importantly, this command also tells your DHCP server what your computer's name is. \u0026nbsp;\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cb\u003e/etc/init.d/networking restart:\u0026nbsp;\u003c/b\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003eThis command will restart the networking services, reload configuration files.\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cb\u003e(file) /etc/network/interfaces:\u0026nbsp;\u003c/b\u003e\u003cspan style\u003d\"font-size: x-small;\"\u003eThis has incantations that define the network cards, while defining their corresponding attributes - for example, what IP address to assign to it, and what IP address should be used to reach the outside world (the gateway), etc... \u0026nbsp;There is, however *no DNS information* in this file. \u0026nbsp;DNS info is in another file called ...\u0026nbsp;\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cb\u003e(file) /etc/resolv.conf :\u0026nbsp;\u003c/b\u003eThis is where your DNS information resides (its a map of domain names, to ip addresses).\u003c/li\u003e\u003c/ul\u003e\u003cdiv\u003e3) \u003cu\u003eDisks\u003c/u\u003e ~ like any OS, \u003cu\u003elinux mounts and partitions disks for you\u003c/u\u003e. \u0026nbsp;It is these disks, partitions, and ultimately, the filesystems on those partitions, which contain the data you see when you type \"ls\". \u0026nbsp;Some of the basic commands available on Linux to play with this are:\u003cbr /\u003e\u003cul\u003e\u003cli\u003e\u003cb\u003eThe fdisk command:\u0026nbsp;\u003c/b\u003eThis will show you how many disks you have on your machine, including partitions (for example, one partition is for the kernel, another for swap space, etc...).\u003c/li\u003e\u003cli\u003e\u003cb\u003eThe df and du commands for looking at disks/directory usage:\u0026nbsp;\u003c/b\u003eThese commands tell you the filesystem and directory usage statistics. \u0026nbsp;\u003c/li\u003e\u003cli\u003e\u003cb\u003eThe rsync command for synchronization:\u0026nbsp;\u003c/b\u003eThis command synchronizes directories. \u0026nbsp;Notably, you can use the -e option to sync files over ssh in different places.\u003c/li\u003e\u003cli\u003e\u003cb\u003e(file) /etc/fstab:\u0026nbsp;\u003c/b\u003eThis file defines defines filesystem mounting that occurs when your OS loads.\u003c/li\u003e\u003cli\u003eA revealing and accessible comparison of the linux file system, compared with others:\u0026nbsp;\u003ca href\u003d\"http://www.howtogeek.com/115229/htg-explains-why-linux-doesnt-need-defragmenting/\"\u003ehttp://www.howtogeek.com/115229/htg-explains-why-linux-doesnt-need-defragmenting/\u003c/a\u003e\u003ci\u003e\u0026nbsp;\u003c/i\u003e\u003c/li\u003e\u003c/ul\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003ci\u003eDo disks in the cloud work the same?\u0026nbsp; \u003c/i\u003e\u003c/blockquote\u003e\u003c/div\u003e\u003cblockquote\u003eYes:\u0026nbsp; They are still mounted as devices, and the device abstraction is sufficiently robust to support extremely diverse disk implementations.\u0026nbsp; Its instructive to consider the way, in the cloud, the \"/dev\" directory maps to disks.\u0026nbsp; As an experiment, we can create an AMI instance with no external storage, and run the following command:\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003els /dev/\u0026nbsp;\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003eSubsequently, we can attach an EBS (Elastic Block Storage - a fancy new-fangled cloud storage solution) drive to that same instance using the AWS console.\u0026nbsp; Shortly thereafter, when running the same command (\u003ci\u003els /dev\u003c/i\u003e), we will see a new entry:\u0026nbsp; This entry represents the raw disk device.\u0026nbsp; By mounting it as a filesystem, we can then write to that detached storage device.\u0026nbsp;\u0026nbsp; \u003c/blockquote\u003e\u003cbr /\u003e4) How your code works in the Linux environment:\u003cbr /\u003e\u003cul\u003e\u003cli\u003eWhen you run your programs, they ultimately call system functions. \u0026nbsp;\u003c/li\u003e\u003cli\u003eThose system functions request operations of the kernel OR, they simply write instructions that the CPU can access. \u0026nbsp;\u003c/li\u003e\u003cli\u003e\u003cb\u003eKernel operations access devices or do other privileged tasks:\u0026nbsp;\u003c/b\u003eAccessing a disk, connecting to a socket, etc...\u0026nbsp;\u003c/li\u003e\u003cli\u003e\u003cb\u003eLibrary operations don't require access to the kernel:\u0026nbsp;\u003c/b\u003eSimple calculations can be done without accessing the kernel. \u0026nbsp;Finally, you should know that the Linux operating system is written in C. \u0026nbsp;So when a higher level language requests a service (i.e. opening a file), it ultimately does it by a C binding of one sort or another. \u0026nbsp;\u003c/li\u003e\u003c/ul\u003e\u003cb\u003e5) Finally - a list of articles that will help you to understand your code, in order of increasing abstraction.\u003c/b\u003e\u003cbr /\u003e\u003cul\u003e\u003cli\u003eThe Kernel: The hyperoptimized code written in C that runs your machine.\u0026nbsp;\u003c/li\u003e\u003cul\u003e\u003cli\u003e\u003ca href\u003d\"http://blog.markloiseau.com/2012/04/hello-world-loadable-kernel-module-tutorial/\"\u003ehttp://blog.markloiseau.com/2012/04/hello-world-loadable-kernel-module-tutorial/\u003c/a\u003e\u003c/li\u003e\u003cli\u003e\u003ca href\u003d\"http://www.tuxradar.com/content/how-linux-kernel-works\"\u003ehttp://www.tuxradar.com/content/how-linux-kernel-works\u003c/a\u003e\u003c/li\u003e\u003c/ul\u003e\u003cli\u003eThe C Programming language : the lowest level language that can be sanely used by a conventional developer to an build a real-world, end-user application - C is the ultimate basis for higher languages. \u0026nbsp;Directly compiles to assembly.\u0026nbsp;\u003c/li\u003e\u003cul\u003e\u003cli\u003eC is fast - largely due to the fact that it compiles directly to machine code.\u003c/li\u003e\u003cli\u003eC requests services of the kernel by calling system level functions.\u003c/li\u003e\u003cli\u003eHow C assemblers work:\u0026nbsp;\u003ca href\u003d\"http://www.erg.abdn.ac.uk/~gorry/eg2069/comp.html\"\u003ehttp://www.erg.abdn.ac.uk/~gorry/eg2069/comp.html\u003c/a\u003e\u003c/li\u003e\u003cli\u003eCompiling and \"linking\":\u0026nbsp;\u003ca href\u003d\"http://www.tenouk.com/ModuleW.html\"\u003ehttp://www.tenouk.com/ModuleW.html\u003c/a\u003e\u003c/li\u003e\u003c/ul\u003e\u003cli\u003ePython programs : written in C, but runs at a very high level. \u0026nbsp;\u003c/li\u003e\u003cul\u003e\u003cli\u003ePython is an interpreted language. \u0026nbsp;\u003c/li\u003e\u003cli\u003ePython it is read in by a C program at runtime, and instructions are executed sequentially. \u0026nbsp;\u003c/li\u003e\u003cli\u003ePython code is implemented, ultimately, by the C language.\u003c/li\u003e\u003cli\u003e\u003ca href\u003d\"http://tech.blog.aknin.name/2010/09/02/pythons-innards-hello-ceval-c-2/\"\u003ehttp://tech.blog.aknin.name/2010/09/02/pythons-innards-hello-ceval-c-2/\u003c/a\u003e\u003c/li\u003e\u003c/ul\u003e\u003cli\u003eJava and the JVM programs :\u0026nbsp;\u003c/li\u003e\u003cul\u003e\u003cli\u003eJava programs are run by a C program called \"java\". \u0026nbsp;Java is compiled to \"byte-code\", which is run by a C compiler. \u0026nbsp;Java is ultimately optimized by the JVM, which is capable of byte-code optimization at runtime by using tricks like inlining (combining separated functions into a single method).\u0026nbsp;\u003c/li\u003e\u003cli\u003eJava is reasonably fast simply because of the statefullness of the JVM - it can remember, optimize, and reorganize loops at runtime.\u0026nbsp;\u003c/li\u003e\u003cli\u003eJava classes:\u0026nbsp;\u003ca href\u003d\"http://onjava.com/pub/a/onjava/2005/01/26/classloading.html\"\u003ehttp://onjava.com/pub/a/onjava/2005/01/26/classloading.html\u003c/a\u003e\u003c/li\u003e\u003cli\u003eThe JVM: \u003ca href\u003d\"http://java.dzone.com/articles/jvm-internals-series-part-1\"\u003ehttp://java.dzone.com/articles/jvm-internals-series-part-1\u003c/a\u003e\u003c/li\u003e\u003cli\u003eThe java memory model:\u0026nbsp;\u003ca href\u003d\"http://www.youtube.com/watch?v\u003dWTVooKLLVT8\"\u003ehttp://www.youtube.com/watch?v\u003dWTVooKLLVT8\u003c/a\u003e\u003c/li\u003e\u003c/ul\u003e\u003c/ul\u003e\u003cdiv\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv\u003eIf you're really adventurous, you can see the way the whole thing fits together here in this clickable visualization of the entire dependency hierarchy of a linux OS. \u0026nbsp;Very useful for getting a picture of whats going on under the hood:\u0026nbsp;\u003ca href\u003d\"http://www.makelinux.net/kernel_map/\"\u003ehttp://www.makelinux.net/kernel_map/\u003c/a\u003e.\u003c/div\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/5103528557771211589/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2012/11/a-lightning-tour-of-thingy-that-runs.html#comment-form","title":"2 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/5103528557771211589"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/5103528557771211589"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2012/11/a-lightning-tour-of-thingy-that-runs.html","title":"A lightning tour of the thingy that runs your code"}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"media$thumbnail":{"xmlns$media":"http://search.yahoo.com/mrss/","url":"http://4.bp.blogspot.com/-resjq2Lz0wA/UJRChHWbEoI/AAAAAAAABRM/BcWsGVinv5Q/s72-c/Linux_kernel_map.jpg","height":"72","width":"72"},"thr$total":{"$t":"2"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-3544677247949496067"},"published":{"$t":"2012-10-26T03:27:00.000-07:00"},"updated":{"$t":"2012-11-02T06:03:05.321-07:00"},"title":{"type":"text","$t":"Go back in time with \"git blame\" + \"git show\" "},"content":{"type":"html","$t":"\u003cb\u003eThere are no shortage of ways to look at individual changes to individual lines in a Git repository.\u0026nbsp; But what happens if you want to see what an entire file looked like, as of a particularly \"long time ago\", in the absence of an exact commit message or date?\u0026nbsp;\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003ctable align\u003d\"center\" cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd style\u003d\"text-align: center;\"\u003e\u003ca href\u003d\"http://1.bp.blogspot.com/-4AXpKNQLDe8/UIpm35nvQOI/AAAAAAAABQI/EjHFrKzJuTE/s1600/git.jpg\" imageanchor\u003d\"1\" style\u003d\"margin-left: auto; margin-right: auto;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"320\" src\u003d\"http://1.bp.blogspot.com/-4AXpKNQLDe8/UIpm35nvQOI/AAAAAAAABQI/EjHFrKzJuTE/s320/git.jpg\" width\u003d\"130\" /\u003e\u003c/a\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003eUnfortunately, alot of times, we don't know the exact hash of the  commit we want to investigate.\u0026nbsp; In this very breif and embarrassingly simple post I'll outline a  simple strategy for finding the \"last version that probably worked\". \u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e\u003cbr /\u003e**NOTE: If you have to do this alot - there is probably something systemically wrong in your dev process.**\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eLets consider the following scenario:\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e1) You know a file used to work, about 2 or 3 months ago.\u003cbr /\u003e2) You want to see how it was different\u003cbr /\u003e3) Line by line structural changes don't help - the file has since been restructured.\u0026nbsp; Methods were moved around.\u0026nbsp; Objects were refactored out, etc....\u0026nbsp; So you want the ENTIRE file as of a certain state.\u0026nbsp; In order to do this, you need to use \"date\" as a search filter.\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e\u003cspan style\u003d\"font-size: large;\"\u003eFirst: You have to find out the \"state\" of the file which you are interested in.\u0026nbsp; In git, this means finding the commit hash.\u003c/span\u003e\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eThe simple method, if you trust the commiters: \"git log\".\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eThe \"git log\" command will give all commits related to a file:\u003cbr /\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cspan style\u003d\"background-color: lime;\"\u003egit log src/myfilethatchanged.clj\u003c/span\u003e\u003c/blockquote\u003e\u0026nbsp;Will yield :\u003cbr /\u003e\u0026nbsp;\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; Author:jayunit100\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; Date:Dec 25 1:1:1 2012 +010\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; Ripping out everything related to this really important function b/c its christmass\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; .......\u003cbr /\u003e\u003cbr /\u003eHowever, this is hardly the case.\u0026nbsp; Rather - commit messages are often (at best) highly granular, or (at worst) non existent.\u0026nbsp; Very rarely will a commit define a possible bug that you can search for by eye using git log.\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eIn the (common) case where you don't know quite which commit you are interested in, you can simply use\"git blame | grep \u0026lt;date regex\u0026gt;\" to view several file states around the time you are interested in:\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cspan style\u003d\"background-color: lime;\"\u003egit blame src/myfilethatchanged.clj\u0026nbsp; | grep jayunit100 | grep 2012-[89]\u003c/span\u003e\u003c/blockquote\u003e\u0026nbsp;At this point, you will get a list of git commit ids, followed by the commit contents.\u0026nbsp; Not particularly useful for fine grained debugging.\u0026nbsp; But it narrows it down to the right date!\u003cbr /\u003e\u003cbr /\u003e\u003cblockquote class\u003d\"tr_bq\"\u003ea28jcz0a jayunit100                         2012-09-22 18:43:58 +0200 934   //this is a bad idea. \u003c/blockquote\u003e\u003cblockquote class\u003d\"tr_bq\"\u003ee36fb8c9 jayunit100                         2012-09-22 17:13:11 +0200 934 //some other inoccous line of code\u0026nbsp; \u003c/blockquote\u003e\u003cbr /\u003eAh\u0026nbsp; ^^ this looks like the commit which might have been done at the time that everything was working correctly.\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cb\u003e\u003c/b\u003e\u003cb\u003eNow, finally, here is how we write the entire file at that state in time out\u003c/b\u003e: \u003c/span\u003e\u003cbr /\u003e\u0026nbsp; \u003cbr /\u003e\u003cspan style\u003d\"background-color: lime;\"\u003e\u0026nbsp; \u0026nbsp; \u0026nbsp; \u0026nbsp; \u0026nbsp; git show e36fb8c9:src/myfilethatchanged.clj \u0026gt; myfilethatchangedOLD.clj\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003eThus, it is super easy to be a source dectective by simply using \"git blame\" along with \"git show\" to recreate a \"whole\" version of a working file at the time of an ancient commit, even if you don't know the exact date/hash of the commit.\u003cbr /\u003e\u003cbr /\u003eThe trick is to first use \"git blame\"with, for example, a simple grep to scan for the date range you are interested in.\u0026nbsp; Then, we find some candidate commits of interest, followed by \"git show\" to recover the exact state of the file at a given timepoint.\u0026nbsp; "},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/3544677247949496067/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2012/10/find-last-working-version-with-git.html#comment-form","title":"2 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/3544677247949496067"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/3544677247949496067"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2012/10/find-last-working-version-with-git.html","title":"Go back in time with \"git blame\" + \"git show\" "}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"media$thumbnail":{"xmlns$media":"http://search.yahoo.com/mrss/","url":"http://1.bp.blogspot.com/-4AXpKNQLDe8/UIpm35nvQOI/AAAAAAAABQI/EjHFrKzJuTE/s72-c/git.jpg","height":"72","width":"72"},"thr$total":{"$t":"2"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-887104668656678158"},"published":{"$t":"2012-10-21T16:35:00.001-07:00"},"updated":{"$t":"2012-10-29T04:16:01.635-07:00"},"title":{"type":"text","$t":"A pure game of life."},"content":{"type":"html","$t":"\u003cb\u003e\u003cspan style\u003d\"font-size: large;\"\u003ePure functional programming is at the heart of scalable software design... Even when your not using a functional language. \u0026nbsp;If you've ever wondered why \"learning a functional language might make you a better developer\", then this post is for you. \u0026nbsp;We will take a simple problem: Conway's game of life, and demonstrate a purely functional approach to its implementation.\u003c/span\u003e\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003c/b\u003e\u003cbr /\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e** update: since writing this post, I've discovered an amazing treatment of the exact same subject by Manual Rotter about 2 months ago, which is here :\u0026nbsp;\u003ca href\u003d\"http://programmablelife.blogspot.com/2012/08/conways-game-of-life-in-clojure.html\"\u003ehttp://programmablelife.blogspot.com/2012/08/conways-game-of-life-in-clojure.html\u003c/a\u003e\u0026nbsp;**\u0026nbsp;\u003c/blockquote\u003e\u003ctable align\u003d\"center\" cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd style\u003d\"text-align: center;\"\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: center;\"\u003e\u003ca href\u003d\"http://1.bp.blogspot.com/-_4XlM0UgBI8/UISOOCvq8eI/AAAAAAAABN4/HLeSYM1FlkA/s1600/Glider.jpg\" imageanchor\u003d\"1\" style\u003d\"margin-left: 1em; margin-right: 1em;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"200\" src\u003d\"http://1.bp.blogspot.com/-_4XlM0UgBI8/UISOOCvq8eI/AAAAAAAABN4/HLeSYM1FlkA/s200/Glider.jpg\" width\u003d\"200\" /\u003e\u003c/a\u003e\u003c/div\u003e\u003cbr /\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003eJust because it looks like an array doesn't mean you have to code it that way.\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"font-size: small; text-align: -webkit-auto;\"\u003eThe simplest and most intuitive implementation of Conway's game of life uses a 2D array of cells wherein the cells are mutated to reflect the updated state during each \"turn\" of the game.\u003c/span\u003e\u003cspan style\u003d\"font-size: small; text-align: center;\"\u003e\u0026nbsp;\u0026nbsp;\u003c/span\u003e\u003c/div\u003e\u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e\u003cb\u003e\u003cspan style\u003d\"color: red;\"\u003eUsing an array data structure as a crutch for modelling 2 dimensional data, such as that in the game of life, seems like an obvious and direct approach. \u0026nbsp;\u003c/span\u003e\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e\u003cspan style\u003d\"color: red;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003c/b\u003e\u003cb\u003e\u003cspan style\u003d\"color: red;\"\u003eHowever, it comes at a very high cost. \u0026nbsp;By using the physical \"place\" where cell datas are stored, as the mechanism for looking up the state of cells and simulating our world, we constrain and complicate the entire simulation needlessly. \u0026nbsp;\u003c/span\u003e\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e\u003cspan style\u003d\"color: red;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003c/b\u003e\u003cb\u003e\u003cspan style\u003d\"font-size: large;\"\u003eYou might be tempted to ask: How could a 2D array possibly be a bad choice for modelling a 2D world ?\u0026nbsp;\u003c/span\u003e\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eA recent talk on \u003ca href\u003d\"http://www.youtube.com/watch?v\u003d-6BsiVyC1kM\"\u003eThe Value of Values\u003c/a\u003e\u0026nbsp;by Rich Hickey sheds light on some of the pitfalls of such \"place\" oriented programming - its an amazing talk, and it really extols the virtues of declarative and functional problem solving in a new light. \u0026nbsp;In particular, we can consider this scenario:\u003cbr /\u003e\u003ci\u003e\u003cbr /\u003e\u003c/i\u003e\u003ci\u003e1) Storing the universe in a 2D array means that the game of life's universe size is limited to the amount of memory available. \u0026nbsp;We cannot simulate an infinitely large universe.\u003c/i\u003e\u003cbr /\u003e\u003ci\u003e\u003cbr /\u003e\u003c/i\u003e\u003ci\u003e2) The board cannot be dynamically expanded/shrunk at runtime.\u0026nbsp;\u003c/i\u003e\u003cbr /\u003e\u003ci\u003e\u003cbr /\u003e\u003c/i\u003e\u003ci\u003e3) If we want to apply a new feature from the universe (i.e. simulating a \"natural disaster\") we would have to mutate the state of the board, and can thus lead to multiple intermediate states for the universe in a given turn.\u003c/i\u003e\u003cbr /\u003e\u003ci\u003e\u003cbr /\u003e\u003c/i\u003e\u003ci\u003e4) We must take into account the fact that, while updating the cells, you have a board that is in an intermediate state. \u0026nbsp;This might, for example, require us to copying the 2D array into a \"previous\" object, while\u0026nbsp;writing\u0026nbsp;a new 2D array out. \u0026nbsp;\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eSuddenly it becomes clear that the 2D array is an unscalable model for the game of life !\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003eThe reason for this is that it couples the core feature of the board - which is the simple operation of knowing wether a cell is alive or dead - to a data structure, or, as Rich Hickey might call it: a \"place\". \u0026nbsp;The \"place\" then limits the dynamics of the simulation ~\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eSo lets try our hand at a purely functional solution to the game of life - one which doesn't couple the universe to any particular data structure, while lazily evaluating cell states. \u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003eThe basic premise of this solution is as follows:\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: large;\"\u003e1) \u003cu\u003eThe board itself is a function\u003c/u\u003e, rather than an array. \u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"font-size: large;\"\u003e2) The \"first\" state of the board is defined as a function as well.\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"font-size: large;\"\u003e3) To view the results of play of a \"turn\" of the game, we simply apply this function to a set of coordinates -\u003cu\u003e we never need to calculate or store the entire board\u003c/u\u003e\u0026nbsp;in memory.\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"font-size: large;\"\u003e4) To play multiple turns - we can simply \u003cu\u003eapply the board function to itself over and over again\u003c/u\u003e.\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"font-size: large;\"\u003e5) For the actual playing of the game (i.e. viewing the whole board), we simply can run Clojure's doseq against a list comprehension of the cells - this can stream the data to stdout.\u003cb\u003e\u0026nbsp;\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: large;\"\u003eSo... here's the code! Its a little bit raw at the moment, but it should be self explanatory. \u0026nbsp;This is a first iteration and comments are of course certainly welcome. \u0026nbsp;I've put this in a branch of the learn-clojure repo.\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: large;\"\u003ePlease not - this is pu\u003cspan style\u003d\"font-size: large;\"\u003erely \u003cspan style\u003d\"font-size: large;\"\u003etheor\u003cspan style\u003d\"font-size: large;\"\u003ee\u003cspan style\u003d\"font-size: large;\"\u003etical\u003cspan style\u003d\"font-size: large;\"\u003e, since it doe\u003cspan style\u003d\"font-size: large;\"\u003es\u003cspan style\u003d\"font-size: large;\"\u003en't use any memoiza\u003cspan style\u003d\"font-size: large;\"\u003etion, the performance hits \u003cspan style\u003d\"font-size: large;\"\u003ecould really hurt you.\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e \u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cb\u003eA pure game of life in Clojure.\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cpre style\u003d\"white-space: pre-wrap; word-wrap: break-word;\"\u003e(ns problems.life)\u0026nbsp;\u003c/pre\u003e\u003c/blockquote\u003e\u003cblockquote class\u003d\"tr_bq\"\u003e\u003cpre style\u003d\"white-space: pre-wrap; word-wrap: break-word;\"\u003e;This function determines wether a cell is alive or dead.\u003cbr /\u003e;Its a simplified version of the classic life function.\u003cbr /\u003e; f: the initial board function (f), \u003cbr /\u003e; x, y (the x/y coordinates)\u003cbr /\u003e(defn live [f x y] \u003cbr /\u003e  (let [neighbors\u003cbr /\u003e                  (+ \u003cbr /\u003e                   (max 0 (f x (dec y)))\u003cbr /\u003e                   (max 0 (f x (inc y))) \u003cbr /\u003e                   (max 0 (f (inc x) y))\u003cbr /\u003e                   (max 0 (f (dec x) y))\u003cbr /\u003e                   (max 0 (f (dec x) (dec y)))\u003cbr /\u003e                   (max 0 (f (inc x) (inc y)))\u003cbr /\u003e                   (max 0 (f (dec x) (inc y)))\u003cbr /\u003e                   (max 0 (f (inc x) (dec y))))]\u003cbr /\u003e    (cond\u003cbr /\u003e      (\u003d neighbors 0) 0\u003cbr /\u003e      (\u003d neighbors 1) 1\u003cbr /\u003e      (\u003d neighbors 2) 1\u003cbr /\u003e      (\u003d neighbors 3) 0\u003cbr /\u003e      :else 0)))\u003cbr /\u003e\u003cbr /\u003e;The gameboard is a function with -1's for boundaries.\u003cbr /\u003e(defn boardstart [x y]\u003cbr /\u003e  (cond \u003cbr /\u003e     (or (\u0026gt; x 2) (\u0026gt; y 2) (\u0026gt; 0 x) (\u0026gt; 0 y)) -1 ; Out of range\u003cbr /\u003e     (\u003d 0 x) 1 ; Cells on top are alive.\u003cbr /\u003e     :else 0))\u003cbr /\u003e\u003cbr /\u003e;Create a function which will dynamically calculate the state of the board when invoked.\u003cbr /\u003e(defn newboard [f]\u003cbr /\u003e  #(cond \u003cbr /\u003e     (\u0026gt; 0 (f %1 %2)) -1 ; Out of range\u003cbr /\u003e     :else (live f %1 %2)))\u003cbr /\u003e\u003cbr /\u003e;Print the current state of the board\u003cbr /\u003e(defn printstate [board size]\u003cbr /\u003e  (println \"starting state dump\")  \u003cbr /\u003e  (doseq [x (range size) y (range size)] \u003cbr /\u003e      (println x \" \" y \" | \" (board x y))))\u003cbr /\u003e\u003cbr /\u003e;To play, we simply call newboard over and over again. \u003cbr /\u003e;The effect is simply to calculate the gameboard functionally, so \u003cbr /\u003e;the board is recalculated at every call. Next step will be to add a bitcache or something\u003cbr /\u003e;of the sort that is decoupled from the calling of the board.  \u003cbr /\u003e(defn main1 []\u003cbr /\u003e  (print \"\\n------------\\n\")\u003cbr /\u003e  (printstate boardstart 3)\u003cbr /\u003e  (print \"\\n------------\\n\")\u003cbr /\u003e  (printstate (newboard boardstart) 3)\u003cbr /\u003e  (print \"\\n------------\\n\"))\u003cbr /\u003e  (printstate (newboard (newboard boardstart)) 3)\u003c/pre\u003e\u003c/blockquote\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cscript src\u003d\"https://gist.github.com/3928900.js?file\u003dPureGOL\"\u003e\u003c/script\u003e \u003cscript src\u003d\"https://raw.github.com/moski/gist-Blogger/master/public/gistLoader.js\" type\u003d\"text/javascript\"\u003e\u003c/script\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/887104668656678158/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2012/10/a-pure-game-of-life.html#comment-form","title":"2 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/887104668656678158"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/887104668656678158"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2012/10/a-pure-game-of-life.html","title":"A pure game of life."}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"media$thumbnail":{"xmlns$media":"http://search.yahoo.com/mrss/","url":"http://1.bp.blogspot.com/-_4XlM0UgBI8/UISOOCvq8eI/AAAAAAAABN4/HLeSYM1FlkA/s72-c/Glider.jpg","height":"72","width":"72"},"thr$total":{"$t":"2"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-2354437987858982211"},"published":{"$t":"2012-09-12T12:46:00.003-07:00"},"updated":{"$t":"2012-09-28T10:51:58.652-07:00"},"title":{"type":"text","$t":"Envelope-driven development"},"content":{"type":"html","$t":"\u003cb\u003eThis post is about parsimonious planning, back-of-the-envelope engineering, and extreme productivity.\u003c/b\u003e \u003cbr /\u003e\u003cbr /\u003e\u003ctable align\u003d\"center\" cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd style\u003d\"text-align: center;\"\u003e\u003ca href\u003d\"http://4.bp.blogspot.com/-t8aAfUE9F74/UFDY2SX_lAI/AAAAAAAABNQ/1OqKtnIUU68/s1600/envelop.jpg\" style\u003d\"margin-left: auto; margin-right: auto;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"240\" src\u003d\"http://4.bp.blogspot.com/-t8aAfUE9F74/UFDY2SX_lAI/AAAAAAAABNQ/1OqKtnIUU68/s320/envelop.jpg\" width\u003d\"320\" /\u003e\u003c/a\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003eBy back of the envelope, we don't mean flow charting.\u0026nbsp;  Rather, we mean real world consideration of the quantitative aspects of  all of the major segments in a system.\u0026nbsp; Notice that there is little mention of accidental aspects of the system (i.e. Java, C++, AWS, Java, Heroku, ...).\u0026nbsp; The envelope doesn't document technology - rather, it tells us about the actual problem we are solving in a largely technology independent manner.\u003cbr /\u003e\u0026nbsp; \u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e\u003cb\u003eThis post is not about brain teasers:\u0026nbsp; Rather, its about spurring your ability to implement envelope-driven-development in the real world, quickly.\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eI'm not a hardware guy, not at all.\u0026nbsp; But in any case, I'm going to (attempt to) show you how to quickly walk through  3 simple estimation problems which test your ability to estimate the  necessary CPU, I/O, etc... resources for maintaining a simple web site.\u0026nbsp; Of course, the example site can't be too simple - otherwise, we wouldn't learn anything.\u0026nbsp; So... we'll use a  \"community forum\" as the template (i.e. the website is dynamic, and grows over time).\u0026nbsp;\u003cbr /\u003e\u003cbr /\u003eBut first, especially if you're new to this: to get better at back-of-the-envelope calculations - I suggest spending some time on these websites: \u003cbr /\u003e\u003cul\u003e\u003cli\u003emothereff.in/byte-counter (A simple and easy get a feel for data size for single records).\u003c/li\u003e\u003cli\u003ehttp://infolab.stanford.edu/~backrub/google.html (the original plan,  from the 1990s... the definitive back of the envelope use case, because  google still hasn't solve this problem)\u003c/li\u003e\u003cli\u003ehttp://aws.amazon.com/whitepapers/ (great case studies here, to get your mind ready)\u003c/li\u003e\u003cli\u003ehttp://www.t1shopper.com/tools/calculate/ (numbers!) \u003c/li\u003e\u003cli\u003ehttp://architects.dzone.com/articles/every-programmer-should-know (more numbers)\u003c/li\u003e\u003cli\u003ehttp://www.hypexr.org/ (great for linux tricks for estimating file sizes, copying files, etc..)\u003c/li\u003e\u003c/ul\u003e\u003cb\u003ePrototypes vs Envelopes\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eBefore we begin, lets consider the benefits of envelope-driven development in light of the infamous \"prototype\".\u0026nbsp; We've all built a prototype before, and we have probably learned that prototypes don't always tell us very much about the way our real world application will scale.\u0026nbsp; In addition, they take a while, and involve alot of boilerplate coding that doesn't advance our intuition about the target system we're trying to build.\u0026nbsp; \u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003e\u003cdiv style\u003d\"text-align: center;\"\u003e\u003cb\u003ePrototyping vs Back-of-the-envelope architecting \u003c/b\u003e\u003c/div\u003e\u003ctable\u003e  \u003ctbody\u003e\u003ctr\u003e \u003ctd\u003e\u003cbr /\u003e\u003c/td\u003e \u003ctd\u003e\u003cb\u003eEstimate \u003c/b\u003e\u003c/td\u003e \u003ctd\u003e\u003cb\u003ePrototype\u003c/b\u003e \u003c/td\u003e \u003c/tr\u003e\u003ctr\u003e \u003ctd\u003e\u003cu\u003eData Scale\u003c/u\u003e \u003c/td\u003e \u003ctd\u003eTB of data and beyond \u003c/td\u003e \u003ctd\u003eMB of data \u003c/td\u003e \u003c/tr\u003e\u003ctr\u003e \u003ctd\u003e\u003cu\u003eTime\u003c/u\u003e \u003c/td\u003e \u003ctd\u003eDays or less \u003c/td\u003e \u003ctd\u003eWeeks, Months \u003c/td\u003e \u003c/tr\u003e\u003ctr\u003e \u003ctd\u003e\u003cu\u003eDeliverables\u003c/u\u003e \u003c/td\u003e \u003ctd\u003eScalability estimates, hot spots for further research. Quantitative metrics for moving forward. \u003c/td\u003e \u003ctd\u003e\"Working\" code (grain of salt!) that usually doesn't really work at all.\u003c/td\u003e \u003c/tr\u003e\u003ctr\u003e \u003ctd\u003e\u003cu\u003e\u0026nbsp;Cost($)\u003c/u\u003e \u003c/td\u003e \u003ctd\u003e100s-1000s \u003c/td\u003e\u003ctd\u003e1000s (novice built), 10K-50K+ (expert built) \u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003eSo there you have it: Our back of the envelope architecture deals with large data sets (by abstracting them), in a shorter period of time, without overpromising.\u0026nbsp; The only real advantage to building a prototype as a first go is if you really, really, really must see working code for some reason (i.e. you are evaluating the competency of the people building your system, rather than focusing on the system itself).\u0026nbsp; It this is the case - this article is not for you.\u0026nbsp; This post is for those who know, from the get-go, that the system they are building is of primary importance, having direct, immediate value\u003cb\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e... So, here's how this is gonna work:\u0026nbsp; \u003c/span\u003e\u003c/span\u003e\u003c/b\u003e\u003cbr /\u003e\u003col\u003e\u003cli\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eThese exercises aren't knowledge based!\u0026nbsp; Novices should be able to attempt them. \u0026nbsp; \u003c/span\u003e\u003c/span\u003e\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eEach exercise will begin with some basic facts, which you might need to answer the question.\u003c/span\u003e\u003c/span\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u0026nbsp;\u003c/span\u003e\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eThe questions will be highlighted in green, for the impatient. \u003c/span\u003e\u003c/span\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u0026nbsp;\u003c/span\u003e\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eNone of these questions are particularly difficult.\u0026nbsp; But... If you aren't used to thinking in terms of real, hard numbers for software deployments, they might throw you off a little.\u0026nbsp;\u003c/span\u003e\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"font-size: small;\"\u003eThe \"scenario\" here is as follows: You're architecting the deployment for a web-forum, which will need to respond to a reasonable number of requests, grow over time, and feature a search functionality - and you want to know what, if any, bottlenecks you might run into from an i/o, disk usage, server bandwidth perspective. \u003cspan style\u003d\"font-size: large;\"\u003e\u003cb\u003e\u003c/b\u003e\u003c/span\u003e\u003c/span\u003e\u003cb\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003c/b\u003e\u003c/li\u003e\u003c/ol\u003e\u003cbr /\u003e\u003cb\u003e\u003cspan style\u003d\"font-size: large;\"\u003eExercise 1\u003c/span\u003e\u003c/b\u003e\u003cbr /\u003e\u003cdiv id\u003d\"post_message_2405826\"\u003e\u003cul\u003e\u003cli\u003eThe average packet size is 1 KB.\u003c/li\u003e\u003cli\u003eThere is 1 char per byte in UTF 8 (duh) \u003c/li\u003e\u003cli\u003eThis bullet has under 60 UTF8 bytes in it... precisely: 50.\u003c/li\u003e\u003cli\u003eThere are 1000KB in a megabyte (duh).\u003c/li\u003e\u003cli\u003e10E6 \u003d 1 Million.\u0026nbsp; 10E6 bytes \u003d 1 MB \u003c/li\u003e\u003c/ul\u003e\u003c/div\u003e\u003cb\u003eNow: lets say you are building a web-forum, you know, like http://www.skateboard-city.com/ or something...\u0026nbsp; And you want to  estimate how big your database might need to get, and its growth rate (so that you can decide where to host it, what scalability issues you might have, etc).\u0026nbsp;\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: lime;\"\u003e\u003cb\u003eAssuming, say 1000 users, with 10 posts a day... What kind of computing  infrastructure do you need to manage the data ?\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003eWe can easily simplify the question by bounding this question, by again being parsimonious... Lets just put a cap of 100GB on the amount of disk space.\u0026nbsp; It makes the problem extremely tractable.\u0026nbsp; We know that any modern hard drive can easily handle that.\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u0026nbsp;So the question now becomes: will your forum-thingy ever exceed  100GB of data?\u0026nbsp; \u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003eIf not, clearly, its data can be contained a small linux box under your  desk.\u0026nbsp;\u0026nbsp; Armed with nothing other than the above data points, we can  safely (and confidently) determine a back-of-the-envelope answer to this question : with no coding required.\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003eAssumption: Lets assume that a typical forum post might be a paragraph ~ thats about 4 sentences.\u0026nbsp; Okay... So how much data is that?\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003ci\u003e\u003cspan style\u003d\"background-color: white;\"\u003eYour growth rate is about 240 bytes per day, times 10, which amounts  to about 2.4 kilo bytes per day, which is about 3/1000 Megabytes.\u0026nbsp; So,  it will take you a year to hit the MB mark, and 1000 years to reach the gigabyte mark.\u0026nbsp;\u003c/span\u003e \u003cspan style\u003d\"background-color: yellow;\"\u003eWhich means that you can  save your entire forum on a a tiny hardrive, or even, a USB thumb  drive, for years to come (we can safely assume that you also have space to save the basic static content of the site, as well, which won't be growing).\u0026nbsp;\u0026nbsp;\u003c/span\u003e\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003eThe answer:\u003ci\u003e\u003c/i\u003e The 100GB limit won't be reached for quite some time.\u0026nbsp; Your safe hosting this on a rinky-dink machine that sits underneath your desktop.\u0026nbsp; At least, your safe in terms of disk space.\u003cbr /\u003e\u003cbr /\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cb\u003e\u003cspan style\u003d\"font-size: large;\"\u003eExersize 2\u0026nbsp;\u003c/span\u003e\u003c/b\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003eNow, lets focus on scaling this burgeoning forum so that continues to be responsive under expected load. \u003c/span\u003e\u003c/div\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003eOkay : So the last question was pretty easy.\u0026nbsp; 10 posts a day isn't that much.\u0026nbsp; We know that we can host our fictional forum's core database, which grows reasonably slowly, on a  cheap box under our desktop.\u0026nbsp; But, can we handle the traffic, i.e., the i/o ?\u0026nbsp; \u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003eNow, again, here are the freebies:\u0026nbsp;\u003c/span\u003e \u003cbr /\u003e\u003cul\u003e\u003cli\u003e\u003cspan style\u003d\"background-color: white;\"\u003eYou know you can stream music on your computer.\u0026nbsp;\u0026nbsp;\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"background-color: white;\"\u003eA typical song is 3MB, and you only listen to pop (~3 minutes)\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"background-color: white;\"\u003eYou can upload and send an image using wifi from your iphone in 10 seconds.\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"background-color: white;\"\u003eA typical image is 1MB.\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"background-color: white;\"\u003eThe amount of time it takes to send a packet (1.5 KB or so) around the world is about 100 milliseconds.\u0026nbsp;\u003c/span\u003e\u003c/li\u003e\u003c/ul\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"background-color: white;\"\u003eThe real question here: Can a home webserver, along with a  mediocre cable connection handle expected traffic, if you are getting  10,000 hits a day on your site?\u0026nbsp; \u003c/span\u003e\u003c/div\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: lime;\"\u003eWhat is the cost (in terms of bandwidth), for hosting 10,000 hits ?\u003c/span\u003e\u0026nbsp;  First, lets assume that each page will have 2.4 KB of content.\u0026nbsp; But...  since we know that the data on our site will be an underestimate of a  page's size, we might want to bump that up to 10KB, per page, just to be  safe.\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cb\u003eIs that a good estimate?\u0026nbsp;\u0026nbsp;\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003eWe can easily find out by firing up  the terminal and checking:\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003ci\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; find ./ -name *html | xargs ls -altrh\u003c/i\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003eAgain, we've just \"sureyed\" a reasonably sized sample of html pages with no \"real\" coding required.\u0026nbsp; \u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003eOn my laptop, most of the HTML files are, in fact, between 10 and 50  KB.\u0026nbsp; So... Lets bump it up another order of magnitude, just to be safe.\u0026nbsp;  We will estimate that our forum page sizes are 100KB a pop.\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003ci\u003eSo, spreading 100KB over a period of 24 hours (admittedly, ignoring  peak times), we need an upstream connection speed of 10,000*100KB/24  hours \u003d 50,000 KB/ hour \u003d 50 MB / hr.\u0026nbsp;\u003c/i\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cb\u003eOkay, so we need to stream around 50MB per hour.\u0026nbsp; Can we easily do this on a small machine on a reasonably fast, conventional network?\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003eAgain, we can simplify this by being pessimistic: can our home internet connection handle that  sort of load ?\u0026nbsp; Easily.\u0026nbsp; Without googleing for \"Megabytes per hour, home  internet\", we can estimate this as well.\u0026nbsp;\u0026nbsp; Since our Iphone is capable  of uploading 1 MB in under a minute, \u003c/span\u003e\u003cspan style\u003d\"background-color: yellow;\"\u003eits obvious that we can handle 60MB in under an hour.\u0026nbsp; Problem solved.\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e\u003cspan style\u003d\"font-size: large;\"\u003eExersize 3\u0026nbsp;\u003c/span\u003e\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eLets say we want to enable a search engine for our little  blog site.\u0026nbsp; \u003cspan style\u003d\"background-color: lime;\"\u003eHow much extra disk space would we need to index all the  text on the site, and how will we host the site?\u0026nbsp; \u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003eThis one is trickier because we need to factor in CPU cycles, Memory,  and the architecture of a search engine.\u0026nbsp; Here's a really aweomse freebie for this exersize that you can tuck away:\u003cbr /\u003e\u003cul\u003e\u003cli\u003eThe initial google whitepaper's lexicon of all words spanned 14 million words.\u0026nbsp;\u0026nbsp;\u003c/li\u003e\u003c/ul\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003ci\u003eA quick reality check : \u003c/i\u003e\u003ci\u003eSo: Let's estimate how much memory  this might take up: if each word is  ~8 characters, at 8 bytes/character, we have about 64 bytes per word.\u0026nbsp;  64 bytes/word * 10 million words ~ 640 million bytes, which is around  640 MB.\u0026nbsp; The actual size of the first google search engines word index  was around 256MB, which means they might have used some sort of  compression to store the words... OR, that the average word length is  less than 8 characters.\u0026nbsp; A quick google search yields 5.10 as the  average word length.\u0026nbsp; What happens if we plug this into\u0026nbsp; our estimate?\u0026nbsp;  We get slightly closer ~400 MB.\u0026nbsp; So... maybe words on the web are  smaller.\u0026nbsp; We're certainly close enough for this estimate, and its  encouraging that, by adding more data to our equation, we get almost 50%  closer to the \"right\" answer.\u0026nbsp;\u003c/i\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003eAnd, finally, here are the other basic ingredients that we will need\u003ci\u003e \u003c/i\u003eto drum up a badass estimate for our now fictional, searchable forum:\u003c/span\u003e\u003cbr /\u003e\u003cul\u003e\u003cli\u003e\u003cspan style\u003d\"background-color: white;\"\u003eSearch engines use \"inverted indexes\" : i.e., they dont index each page  separately with terms, because this would mean that a search would have  to traverse ALL pages.\u0026nbsp; Rather, they index words that point to  documents, and use a hashing function to find the exact location of a  word in constant time.\u0026nbsp; \u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"background-color: white;\"\u003eThe average home computer has about 4G of RAM.\u003c/span\u003e\u003c/li\u003e\u003c/ul\u003e\u003cspan style\u003d\"background-color: white;\"\u003eFinally! The question:\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cb\u003eDo we need to buy extra RAM or buy a multi-core machine CPU to host a performant search-engine\u003c/b\u003e \u003cb\u003efor forum ?\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003eAgain, we start by translating the question: Will our search engine be  CPU/Memory intensive? More specifically... Will our intel quad-core chip  and 4GB machine be able to handle the load (lets assume 2GB is reserved  for other applications, which means the server is effectively 2GB).\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan style\u003d\"background-color: white;\"\u003eFirst, we can tackle the CPU cycles :\u003c/span\u003e\u003cbr /\u003e\u003cul\u003e\u003cli\u003e\u003cspan style\u003d\"background-color: white;\"\u003eWe know these will not be an issue  because looking up an inverted index is a constant time operation.\u0026nbsp;\u0026nbsp;\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan style\u003d\"background-color: white;\"\u003eWe also know that if our server is under sever load, there will  be alot of CPU cycles spent processing requests which may decrease its  availability for searches.\u003c/span\u003e\u003c/li\u003e\u003c/ul\u003e\u003cbr /\u003e\u003ci\u003eAnother aside, just to be safe - lets estimate the amount of free CPU  time which we expect to have for searching:\u0026nbsp; To estimate if CPU load is  going to be effected by incoming requests, we have: 10,000 (number of  requests daily) / 24 (hour) ~ 50 req/hr.\u0026nbsp; Assuming our web servers can  handle one request in under 100 ms, this means that they, on average,  will be spinning over requests for 5000ms, per hour, which is 6 seconds,  or\u003cspan style\u003d\"background-color: yellow;\"\u003e 1/600th of an hour, which means there should be more than enough\u003c/span\u003e\u003c/i\u003e\u003cspan style\u003d\"background-color: yellow;\"\u003e CPU bandwidth to go around.\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003eAnd as far as \u003cspan style\u003d\"background-color: white;\"\u003ememory\u003c/span\u003e is concerned, that's easy to :)\u003cbr /\u003e\u003cbr /\u003e\u003cul\u003e\u003cli\u003eWe can guess that  our site will certainly have an in-memory word index of below 500MB, \u003cu\u003esince Google's initial search index, which spanned tens of millions of pages, was only 250MB\u003c/u\u003e.\u0026nbsp;\u0026nbsp; \u003c/li\u003e\u003cli\u003eSo, again.... it looks like our tiny linux box will be capable of hosting an internal search engine and index, entirely locally.\u0026nbsp; Yay :)\u003cspan style\u003d\"background-color: yellow;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003c/li\u003e\u003c/ul\u003e\u003cbr /\u003e\u003cb\u003eSo... what have we learned from our little envelope friend ?\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cul\u003e\u003cli\u003eThe disk space won't exceed a few hundred Gigs for a few centuries, assuming that we don't grow too fast.\u0026nbsp; So no need for any fancy remote hard disk cloud stuff, at least not for storing content.\u0026nbsp; \u003c/li\u003e\u003cli\u003eThe web server will be streaming 50MB or so an hour, which is reasonable and easy to maintain. \u003c/li\u003e\u003cli\u003eThe search engine just needs a few 100 extra MB or ram to do its job... no problem!\u003cb\u003e\u0026nbsp;\u0026nbsp;\u003c/b\u003e\u003c/li\u003e\u003cli\u003eWith a little DIY and some creative thinking, its pretty clear that this forum can be done with extreme parsimony !\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003c/li\u003e\u003c/ul\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cb\u003eFinal Thoughts\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eThanks google!\u003c/b\u003e \u003cbr /\u003e\u003cbr /\u003eI had to practice this stuff for the dreaded google-interview once.\u0026nbsp; Thats how I got started down this masochistic road.\u0026nbsp; I didn't get the job, I think because of the fact that I was struggling to find a recursive solution to the gray code problem... Or something like that... But I learned ton about on the fly estimating when preparing for it, and I got to eat ice cream out of some kind of robot-ice-cream-truck thingy, while witnessing my brain ooze out of the sides of my ears.\u0026nbsp; So I think it was a worthwhile experience.\u0026nbsp;  \u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003eBut oddly enough: they never really even asked me any estimation questions.\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003eI think estimation questions aren't particularly popular at a lot of software companies... I can't, for the life of me, imagine why.\u0026nbsp; It seems like estimation is the single most important aspect of our day to day lives as developers.\u0026nbsp; I think this might be because most programmers secretly hate non-discretized (i.e. continuous) functions, which tend to be really important when doing back-of-the-envelope calculations.\u0026nbsp; Or maybe most programmers just don't like envelopes?\u0026nbsp; Or maybe we, as developers, just like building new stuff so damn much that we would rather do extra work than be parsimonious about executing things efficiently.\u0026nbsp; In any case, even if you like doing extra work... You can still benefit from parsimony: It frees you up to work on truly interesting problems, by decreasing the amount of time that you waste spinning your wheels on lukewarm, prototypical, not-quite-there-yet code.\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eNow its your turn!!!!!!!!!!!!!!!!!!!!!\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eNow that you've been conviced that back-of-the-envelope engineering is awesome, fun, and valuable, I'm sure you're wondering: Where can I find more problems? Harder ones?\u0026nbsp; How can I become an expert in this stuff?\u003cbr /\u003e\u003cbr /\u003eWell... when I was in grad school, I remember a guy named \u003ca href\u003d\"http://www.bio-toolkit.com/\"\u003eMarty Schiller\u003c/a\u003e - he was a protein bioinformatician who did real experiments on real cells... and he used to back-of-the-envelope every experiment we did.\u0026nbsp; Why?\u0026nbsp; Because real experiments cost money, and his wife always wanted him to be home on time.\u0026nbsp; He was a parsimonious bastard!\u0026nbsp; And he got good at parsimony by applying it, every day, to his own work.\u0026nbsp;\u0026nbsp; \u003cbr /\u003e\u003cbr /\u003eSo... Want to be an envelope expert?\u0026nbsp; Be like marty : Just take 20 minutes every morning to estimate the  amount of CPU, memory, and disk I/O resources that your days work will  be using.\u0026nbsp; You can even do this if your not an engineer : just use rough  benchmarks, i.e., assume that actively browsing the web takes up 3% of  the memory on your computer, and that watching a YouTube video consumes  15% of your network download bandwidth, etc... Or better yet, use htop,  along with your task manager, to actually see what kind of resources you  use for common tasks.\u003cbr /\u003e\u003cbr /\u003eThe fringe benefits of this sort of thinking are reaped immediately.\u0026nbsp; You'll be amazed at the effects this have on your work.\u0026nbsp; You will suddenly understand concurrency, non-blocking IO, database internals, and the underlying functionality of your platform much better, without ever having to buy a book about them.\u0026nbsp; Why?\u0026nbsp; Because when you start back-of-the-enveloping, you force yourself to understand the fundamentals of the system you are dealing with. \u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eOne last, final warning:\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eDon't let your back of the envelope estimate turn into  an executive summary.\u0026nbsp; Really... if the envelope could predict the size of the human genome, convey the architecture of the worlds first laser light generator, and predict the magnitude of the first nuclear explosion (yes, in fact, these are 3 famous, landmark envelope predictions which were all within an order of magnitude) - its probably quite capable of describing the meat of your current software-architecture, regardless of the accidental complexities technolog(ies) involved."},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/2354437987858982211/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2012/09/envelope-driven-development_12.html#comment-form","title":"2 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/2354437987858982211"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/2354437987858982211"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2012/09/envelope-driven-development_12.html","title":"Envelope-driven development"}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"media$thumbnail":{"xmlns$media":"http://search.yahoo.com/mrss/","url":"http://4.bp.blogspot.com/-t8aAfUE9F74/UFDY2SX_lAI/AAAAAAAABNQ/1OqKtnIUU68/s72-c/envelop.jpg","height":"72","width":"72"},"thr$total":{"$t":"2"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-7829252087299812877"},"published":{"$t":"2012-07-27T17:08:00.001-07:00"},"updated":{"$t":"2012-10-29T10:54:36.718-07:00"},"title":{"type":"text","$t":"The tiny microbe's survival guide for Hadoop deployment."},"content":{"type":"html","$t":"\u003cdiv style\u003d\"text-align: center;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003ctable align\u003d\"center\" cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd style\u003d\"text-align: center;\"\u003e\u003ca href\u003d\"http://upload.wikimedia.org/wikipedia/commons/4/44/Acrocanthosaurus_size_comparison.png\" imageanchor\u003d\"1\" style\u003d\"margin-left: auto; margin-right: auto;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"275\" src\u003d\"http://upload.wikimedia.org/wikipedia/commons/4/44/Acrocanthosaurus_size_comparison.png\" width\u003d\"640\" /\u003e\u003c/a\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003eScalable code is not enough... you need to know your big data plaform works on the inside.\u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003eThis post is about how to switch paradigms. \u0026nbsp;It assumes that you've seen one or two of the thousands of big-data sales pitch videos. \u0026nbsp;Maybe you've even skimmed a few parts of the \u003ca href\u003d\"http://hadoopbook.com/\"\u003eelephant book\u003c/a\u003e. \u0026nbsp;So whats next ? \u0026nbsp; \u0026nbsp;\u003c/span\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: yellow;\"\u003eNow - its time to start\u0026nbsp;RUNNING your MapReduce jobs in a real cluster...\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eFrom \"write once, run anywhere\" to \"write once, run EVERYWHERE\".\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eMap/reduce jobs run over several machines at once, and these machines need to play nice with each other. \u0026nbsp;Although you don't have to be a network engineer to setup a hadoop platform - you do have to be ready to think about data and computation quite differently. \u0026nbsp;You can't drag files around, turn off a few firewalls, and pray for the best... Rather: you need to understand Unix, the JVM, threads, and cloud-based file systems like S3. \u003cbr /\u003e\u003ci\u003e\u003cbr /\u003e\u003c/i\u003e\u003ci\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"font-size: large;\"\u003eIn particular, you must also understand how these different technologies interplay in order to compose a maintainable, continuously deployable beacon of big-data beauty:\u003c/span\u003e\u003c/i\u003e\u003cbr /\u003e\u003ctable align\u003d\"center\" cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd style\u003d\"text-align: center;\"\u003e\u003ca href\u003d\"http://2.bp.blogspot.com/-ttaPemdIyFY/UBM7KMWcVQI/AAAAAAAABJw/c20cSkSqTIY/s1600/chart-1.png\" imageanchor\u003d\"1\" style\u003d\"margin-left: auto; margin-right: auto;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"320\" src\u003d\"http://2.bp.blogspot.com/-ttaPemdIyFY/UBM7KMWcVQI/AAAAAAAABJw/c20cSkSqTIY/s320/chart-1.png\" width\u003d\"244\" /\u003e\u003c/a\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003eA rough diagram of the way our hadoop deployments work with respect to existing libraries and tools... Its  important to note that even the CODE that Mappers and reducers run off  of is loaded, by indirection into HDFS, as is the data.\u0026nbsp; Thus, you have  to be comfortable with several different technologies in order to run  code, integrate data, and deploy data on HDFS.\u0026nbsp; \u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e\u003cb\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"font-size: large;\"\u003eSo : Here goes.\u0026nbsp; The tiny-microbe's simplified Hadoop deployment survival guide !\u003c/span\u003e\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eMany of these bullets are directly related to EMR - amazon's dynamic, super-scalable map/reduce computing platform (but they apply equally to any large, distributed computation infrastructure that is based on a hadoop-like paradigm).\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e1) The easy stuff: CHECK YOUR \u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: yellow;\"\u003ePLATFORM\u003c/span\u003e.\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eGet a birds eye view of hdfs:\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eDo you really have \"big data?\".... There's only one way to find out: \u003cbr /\u003e\u003cul\u003e\u003cli\u003e\u003ci\u003ehadoop fs -dus \u003c/i\u003e(total size of all files).\u003ci\u003e \u003c/i\u003e\u003c/li\u003e\u003cli\u003e\u003ci\u003ehadoop fs -du\u003c/i\u003e (get the cumulative file sizes, by directory).\u0026nbsp; \u003c/li\u003e\u003cli\u003e\u003ci\u003ehadoop fs -lsr \u003c/i\u003e(recursively list all files). \u003c/li\u003e\u003c/ul\u003e\u003cb\u003eElastic Mapreduce : Know your instances!\u003c/b\u003e \u003cbr /\u003e\u003cbr /\u003eOne of the most critical mistakes I made on EMR was that of ignoring existing AMI-instance information.\u0026nbsp; This can easily be ascertained via the EMR elastic-mapreduce ruby client:\u003cbr /\u003e\u003ci\u003eelastic-mapreduce --region eu-west-1 --describe --jobflow XXXXXXX\u0026nbsp;\u003c/i\u003e\u003cbr /\u003eIgnoring most of the boiler plate, we see AmiVersion as well as hadoop version.\u003cbr /\u003e\u003cbr /\u003e\u0026nbsp;\u003ci\u003e\u0026nbsp;\u0026nbsp; \"AmiVersion\": \"latest\"\u003c/i\u003e\u003cbr /\u003e\u003ci\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp; \"HadoopVersion\": \"0.20.205\"\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e\u0026nbsp;These are critical : an old \"AmiVersion\" can be the harbinger of doom: your shell configuration scripts, and even the internal mapper/reducer java classpath settings can be completely thrown off if you have old or noncompatible jars or \"obsolete\" resources dominating your executable or classpaths.\u0026nbsp; The simplest way to \"broadly\" avoid such issues is just to make sure you have a sane AmiVersion and HadoopVersion values by running this simple command.\u0026nbsp; \u003cbr /\u003e\u003cbr /\u003eI learned this the hard way by using an old version of the amazon ruby elastic-mapreduce client.\u0026nbsp; Old client scripts can lead to old AMI deployments, even if you specify \"latest\" all over the place!\u0026nbsp; So make sure, in addition to checking your cluster after setting it up, that your using the \"right\" client side tools for deploying and setting up your infrastructure.\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e2) Custom configuration cannot be avoided.\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eHadoop allows you to toggle everything from the amount of memory used in sorting, to default timeouts, the number of total counters, and even the way the classpath is built.\u0026nbsp; \u003cu\u003eYOU WILL VERY LIKELY HAVE TO MODIFY THESE\u003c/u\u003e at some point, because every map/reduce job has its own idiosyncrasies. \u0026nbsp;Understanding the configuration of hadoop jobs and clusters (these two configuration tasks are distinct... of course), is an essential part of understanding the basics of how hadoop works, and you \u003ci\u003eneed to understand how hadoop works in order to be confident when running large, expensive map-reduce jobs.\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003eI've been using these links alot lately. \u0026nbsp;I'll update this with time:\u003cbr /\u003e\u003cul\u003e\u003cli\u003eFor configuring specific jobs, the best resource is the actual hadoop\u003ca href\u003d\"http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/mapred/JobConf.html\"\u003e\u0026nbsp;javadocs\u003c/a\u003e : these are the most up-to-date documentation available. \u0026nbsp;\u003c/li\u003e\u003cli\u003eThe \"official\" \u003ca href\u003d\"http://hadoop.apache.org/common/docs/r0.23.1/hadoop-project-dist/hadoop-common/core-default.xml\"\u003econfiguration docs\u003c/a\u003e: although there might be some typos, these are especially useful for configuring cluster specific parameters.\u003c/li\u003e\u003cli\u003eThe essential\u0026nbsp;\u003ca href\u003d\"http://www.cloudera.com/blog/2009/12/7-tips-for-improving-mapreduce-performance/\"\u003eCloudera tuning basics.\u003c/a\u003e\u0026nbsp; These will effect both the way you run, as well as write, your map/reduce jobs.\u003c/li\u003e\u003cli\u003e\u003ca href\u003d\"http://hadoop.apache.org/mapreduce/docs/r0.21.0/mapred_tutorial.html#Task+JVM+Reuse\"\u003eAnother important \"official\" article on how the JVM gets reused. \u003c/a\u003e\u0026nbsp;I found this confusing at first, but its really important to understand, especially for large jobs, when latency can be a big problem.\u003c/li\u003e\u003cli\u003eRemember: changing your configuration parameters (i.e. those in conf/*xml), might require you to restart your name-node / task trackers!\u0026nbsp; \u003c/li\u003e\u003c/ul\u003e\u003cbr /\u003e\u003cb\u003e3) \u003cspan style\u003d\"background-color: yellow;\"\u003eClasspaths\u003c/span\u003e: Something fishy ? Debug them at \u003cu\u003eruntime\u003c/u\u003e. \u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eClasspath issues are easy to fix on simple webservers : you can hard code them, you can bundle them...in fact, you can even run a simple webserver right out of your IDE.\u0026nbsp; Obviously, this is a no-no for distributed, large-scale computation.\u0026nbsp; Rather than run away from the all mighty java classpath, embrace it as your friend.\u0026nbsp; Its EASY to debug !\u003cbr /\u003e\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; //a snippet adopted from http://www.mkyong.com/java/\u003cbr /\u003e\u003cpre class\u003d\"java\" style\u003d\"font-family: monospace;\"\u003e\u003cspan style\u003d\"color: black; font-weight: bold;\"\u003e\u003c/span\u003e   \u003cspan style\u003d\"color: black; font-weight: bold;\"\u003epublic\u003c/span\u003e \u003cspan style\u003d\"color: black; font-weight: bold;\"\u003estatic\u003c/span\u003e \u003cspan style\u003d\"color: #000066; font-weight: bold;\"\u003evoid\u003c/span\u003e dumpClasspath\u003cspan style\u003d\"color: #009900;\"\u003e(\u003c/span\u003e\u003cspan style\u003d\"color: #009900;\"\u003e\u003c/span\u003e\u003cspan style\u003d\"color: #009900;\"\u003e)\u003c/span\u003e \u003cspan style\u003d\"color: #009900;\"\u003e{\u003c/span\u003e\u003cbr /\u003e        \u003cspan style\u003d\"color: #003399;\"\u003eClassLoader\u003c/span\u003e cl \u003cspan style\u003d\"color: #339933;\"\u003e\u003d\u003c/span\u003e \u003cspan style\u003d\"color: #003399;\"\u003eClassLoader\u003c/span\u003e.\u003cspan style\u003d\"color: #006633;\"\u003egetSystemClassLoader\u003c/span\u003e\u003cspan style\u003d\"color: #009900;\"\u003e(\u003c/span\u003e\u003cspan style\u003d\"color: #009900;\"\u003e)\u003c/span\u003e\u003cspan style\u003d\"color: #339933;\"\u003e;\u003c/span\u003e\u003cspan style\u003d\"color: #339933;\"\u003e\u003c/span\u003e\u003cbr /\u003e        \u003cspan style\u003d\"color: black; font-weight: bold;\"\u003efor\u003c/span\u003e\u003cspan style\u003d\"color: #009900;\"\u003e(\u003c/span\u003e\u003cspan style\u003d\"color: #003399;\"\u003eURL\u003c/span\u003e url\u003cspan style\u003d\"color: #339933;\"\u003e:\u003c/span\u003e urls\u003cspan style\u003d\"color: #009900;\"\u003e)\u003c/span\u003e\u003cspan style\u003d\"color: #009900;\"\u003e{\u003c/span\u003e\u0026nbsp;\u003c/pre\u003e\u003cpre class\u003d\"java\" style\u003d\"font-family: monospace;\"\u003e\u0026nbsp;         //System.out is a bad idea in the cloud!\u003c/pre\u003e\u003cpre class\u003d\"java\" style\u003d\"font-family: monospace;\"\u003e\u003cspan style\u003d\"color: #009900;\"\u003e          log.info(\u003c/span\u003e\u003cspan style\u003d\"color: #009900;\"\u003e(\u003c/span\u003e\u003cspan style\u003d\"color: #009900;\"\u003e(\u003c/span\u003e\u003cspan style\u003d\"color: #003399;\"\u003eURLClassLoader\u003c/span\u003e\u003cspan style\u003d\"color: #009900;\"\u003e)\u003c/span\u003ecl\u003cspan style\u003d\"color: #009900;\"\u003e)\u003c/span\u003e.\u003cspan style\u003d\"color: #006633;\"\u003egetURLs\u003c/span\u003e\u003cspan style\u003d\"color: #009900;\"\u003e(\u003c/span\u003e\u003cspan style\u003d\"color: #009900;\"\u003e)\u003c/span\u003e\u003cspan style\u003d\"color: #009900;\"\u003e)\u003c/span\u003e\u003cspan style\u003d\"color: #339933;\"\u003e;\u003c/span\u003e\u003cbr /\u003e        \u003cspan style\u003d\"color: #009900;\"\u003e}\u003c/span\u003e\u003cbr /\u003e   \u003cspan style\u003d\"color: #009900;\"\u003e}\u003c/span\u003e\u003c/pre\u003e\u003cpre class\u003d\"java\" style\u003d\"font-family: monospace;\"\u003e\u003cspan style\u003d\"color: #009900;\"\u003e\u0026nbsp;\u003c/span\u003e\u003c/pre\u003eThe above snippet will dump each and every classpath entry to your console, eliminating the guesswork associated with questions like \"dammit... which jar is it using for comons-xxx....\".\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e3) \u003cspan style\u003d\"background-color: yellow;\"\u003eLogging\u003c/span\u003e : Yes sir ! You can still barf all over the console !\u0026nbsp; \u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eSystem.outs don't work in the cloud - the JVMs running your map/reduce jobs are running in a different completely separately from your interface to the NameNode.\u0026nbsp; BUT, hadoop's web interface collects all of your \u003cspan style\u003d\"background-color: lime;\"\u003elogging\u003c/span\u003e information per each mapper/reducer for you...So...\u0026nbsp; stop using System.out, and start using logs, for EVERYTHING that you want to be able to see and debug.\u0026nbsp; We use \u003ca href\u003d\"http://www.slf4j.org/\"\u003ethe slf4j, library independent logger\u003c/a\u003e, which can simply pick a default logger (i.e log4j) off your classpath for you, to eliminate variability.\u0026nbsp; \u003cbr /\u003e\u003cbr /\u003e\u003cu\u003eNow - you can use your logs to debug anything that goes wrong through the hadoop web ui.\u003c/u\u003e The hadoop ui provides a 4KB, 8KB, and FULL view of all logs for EVERY single node, so, when something goes wrong - here's how you track it down:\u003cbr /\u003e\u003cul\u003e\u003cli\u003eGo to your task tracker interface.\u003c/li\u003e\u003cli\u003eLook for the \"failed tasks\" data table.\u0026nbsp;\u003c/li\u003e\u003cli\u003eClick on the \"failed task\"\u003c/li\u003e\u003cli\u003eClick on the \"Logs\" column (in EMR, you do this with the \"links\" commandline web browser, and you can download any whole file onto disk through the links interface).\u003c/li\u003e\u003c/ul\u003e\u003cb\u003e4) \u003cspan style\u003d\"background-color: yellow;\"\u003eUnit tests:\u003c/span\u003e Real data, real expectations. \u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eAlot of unit tests simply test that your code doesn't crash miserably, for example:\u003cbr /\u003e\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; Assert.assertTrue(myUrl !\u003d null);\u003cbr /\u003e\u003cbr /\u003eNow - imagine you had to crawl 1,000,000 web pages - based on stringified urls. \u0026nbsp;There are ALOT of bad possible urls that might get sent into any such crawler, for example (\"\", \"http://\u0026lt;your url goes here\u0026gt;\", \"1xxxx\", etc...).\u0026nbsp; A much better test would be, then, to confirm that any URL generator/parser/extractor is ACTUALLY generating valid, useful urls:\u003cbr /\u003e\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp; Assert.assertTrue(\u003cspan style\u003d\"background-color: lime;\"\u003enew Url(myUrl).getHost().length \u0026gt; 1\u003c/span\u003e); \u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e5) For production : \u003cspan style\u003d\"background-color: yellow;\"\u003eIntegration tests are more important\u003c/span\u003e than Unit tests ! \u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eWhen we decide to test a full pipeline of distributed tasks, we need to know that, when run on a reasonable size data set, we are not\u003cbr /\u003e\u003cbr /\u003e\u0026nbsp;\u0026nbsp; 1) overlogging\u003cbr /\u003e\u0026nbsp;\u0026nbsp; 2) creating memory leaks\u003cbr /\u003e\u0026nbsp;\u0026nbsp; 3) filling up the heap with unnecessary objects in global variables\u003cbr /\u003e\u0026nbsp;\u0026nbsp; 4) maintaining state between computations of orthogonal records\u003cbr /\u003e\u003cbr /\u003eThese sorts of bugs are easily seen in a reasonable size integration test, but lost in a small unit tests with mocked data.\u0026nbsp; Unit tests are, of course, an important developer tool for creating maintainable code - but they are more of a diagnostic tool than anything else.\u0026nbsp; When running at scale, you MUST first test your data locally with real integration tests that process large amounts of data (that is, that process an amount of data that local machines can reasonably deal with in 5 to 10 minutes).\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e6) Yo : Did I mention the \u003cspan style\u003d\"background-color: yellow;\"\u003eclasspath\u003c/span\u003e?\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eI gotta get back to this : the classpath really is important.\u0026nbsp; Here are 3 things to remember :\u003cbr /\u003e\u003cbr /\u003e\u0026nbsp;\u0026nbsp; a) There are 2 classpaths : the classpath that YOUR CODE has.\u0026nbsp; This is the known as the \"HADOOP_CLASSPATH\"... And\u003cbr /\u003e\u003cbr /\u003e\u0026nbsp;\u0026nbsp;  b) The runtime classpath: the classpath that YOUR MAPPERS/REDUCERS are  run under.\u0026nbsp; Again, you can use a simple runtime dump of the classpath  (you better log it if you want to see it through the hadoop web  interface), to debug any issues. When combining a lack of rigor for  dealing with your classpath along with a problem such as (1) above  (laziness in inspecting and understanding your cluster setup), you  expose yourself to a whole host of runtime related ClassNotFound or  method-not-found errors which will be due to the fact that your runtime  mappers/reducers are inundated in irrelevant, obsolete libraries that  precede those which you use for development.\u0026nbsp; \u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e7) Its not enough to scale your processing ... you also need\u003cspan style\u003d\"background-color: yellow;\"\u003e scalable data views\u003c/span\u003e !\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eCalculating data for a large input set in a short period of time is great, but you need to serve it up to add value to your business.\u0026nbsp; Make damn sure you have a scalable view platform.\u0026nbsp; The type of \"sink\" that you put your hadoop data into will require on the use case.\u0026nbsp; Here is a brief summary of whats I've been playing with lately.\u0026nbsp; Of course, this is but a small sample:\u003cbr /\u003e\u003cul\u003e\u003cli\u003e\u003cb\u003eSOLR\u003c/b\u003e: A search engine, where all fields can be indexed.\u0026nbsp; You can query by indices, regular expressions, and retrieve data as XML or json.\u0026nbsp; SOLR is memory intensive, caching results and to provide high performance indexes over the many attributes of for each tuple.\u0026nbsp; \u003c/li\u003e\u003cli\u003e\u003cb\u003eDYNAMODB\u003c/b\u003e: A key/value store thats rapidly scalable.\u0026nbsp; Amazon has reported up to 250K inserts per second.\u0026nbsp; Lookups are done by keys.\u0026nbsp; DYNAMODB is run entirely off of solid state disks, in the cloud, by amazon, for you :)\u0026nbsp; So... you really don't have to worry about the details too much.\u0026nbsp;\u0026nbsp; Great for lazy programmers that don't want to worry about administering a large data store :)\u0026nbsp; The downside is that there are limitations on record sizes (64KB), and the indexing isn't as broad as SOLRs.\u003c/li\u003e\u003cli\u003e\u003cb\u003eColumnar databases like Cassandra / HBase\u003c/b\u003e: These sorts of databases depart from the relational \"table-blocks\" approach by creating column blocks.\u0026nbsp; Columnar databases generally scale better when dealing with large, dynamic data sets. For example, when adding new features to a record (i.e. adding a \"social security column\" to a \"persons\" database), individual records (\"persons\" in this example)\u0026nbsp; don't have to be modified to accomodate the newly introduced data type, since each column is a separate disk block.\u0026nbsp; \u003c/li\u003e\u003cli\u003e\u003cb\u003eRelational systems\u003c/b\u003e may still work... with the added benefit of making your sysadmin happy :) For a few million reasonably sized records, you may not even need any fancy NoSQL databases at all.\u0026nbsp; An unsharded MySQL instance might do the trick.\u0026nbsp; And of course, you have the advantage that \"everyone\" knows SQL.\u003c/li\u003e\u003c/ul\u003e\u003cbr /\u003e\u003cb\u003e8) Deal with dirty data by cleaning and counting ... not by ignoring!\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eJust because your processing unstructured text doesn't mean you can hack without validation.\u0026nbsp; Strive for jobs that specialize in handling of clean OR dirty records - but not both.\u0026nbsp; Why?\u0026nbsp; Because you should be able to define attributes and relationships, and \u003cu\u003eknow \u003c/u\u003e\u003cu style\u003d\"background-color: lime;\"\u003ethat those relationships are preserved logically throughout your pipeline\u003c/u\u003e\u003cspan style\u003d\"background-color: lime;\"\u003e.\u003c/span\u003e\u0026nbsp;\u0026nbsp;\u003cul\u003e\u003cli\u003eIf people are defined as having a \"shoe size\", make sure that you can count how many do, and do not have records for their \"shoe sizes\".\u0026nbsp;\u0026nbsp;\u003c/li\u003e\u003cli\u003eIf every individual needs a social security number... throw an exception when you see one that goes missing, and test for that exception exhaustively.\u003c/li\u003e\u003cli\u003e\u0026nbsp;If quality exceptions are hurting you by killing a job : prepend a cleaning job to the beggining of a data set, rather than avoid quality exceptions.\u003c/li\u003e\u003c/ul\u003eRemember : you will be (most-likely) running the same jobs over and  over again, against increasingly larger data sets.\u0026nbsp; You will need to  have a plan for dealing with old data, bad data, recycled data, etc...\u0026nbsp; And of course : your tests should validate that those relationships are preserved down to the last drop.\u0026nbsp; A single missing character or case-inversion can lead to millions or billions of lost data points. \u0026nbsp; \u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e9) Counters .\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eEffectively evaluating the totality of a job, is difficult when looking at logs, since a typical job might have many 100s of tasks which get logged separately.\u0026nbsp; In contrast, \u003ci\u003ecounters\u003c/i\u003e are easily aggregated into a concise view which can be eyeballed in a matter of seconds.\u0026nbsp; \u003cbr /\u003e\u003cul\u003e\u003cli\u003eIt is easy to overlook \u003cb\u003ethat there are both\u003c/b\u003e \u003cb\u003eTASK as well as JOB specific counters\u003c/b\u003e which are easily accessed through your hadoop job-tracker UI.\u0026nbsp; TASK specific counters can be accessed, for example, at the localhost's job tracker \"taskdetails.jsp\" page, i.e. http://localhost:9100/taskdetails.jsp?tipid\u003d\u0026lt;your_task_id_here\u0026gt;\u003c/li\u003e\u003cul\u003e\u003cli\u003eTASK scoped counters will tell you if a specific task is having any issues.\u0026nbsp; If it is, you can check it out directly by looking at the logs.\u003c/li\u003e\u003cli\u003eJOB scoped counters are the counters hadoop prints out at the end of your job - and are the counters shown in the job tracker web ui.\u0026nbsp; These are, ultimately, the ones which you will rely on the most to evaluate a job's success/failure.\u003c/li\u003e\u003cli\u003eCounters from FAILED jobs are removed from the overall tally by the JobTracker - this is something to beware of when you try to use counters to debug failing or buggy jobs. \u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; \u003c/li\u003e\u003c/ul\u003e\u003c/ul\u003e\u003cul\u003e\u003cli\u003e\u003cb\u003eYou can also use d\u003c/b\u003e\u003cb\u003eynamic counters to increase the spectrum of attributes that you talley\u003c/b\u003e \u003cb\u003ewithout overcomplicating your code.\u003c/b\u003e\u0026nbsp;\u003c/li\u003e\u003cul\u003e\u003cli\u003eNote:\u0026nbsp;dynamic counters take up memory and can explode - so don't use too many of them, and make sure they are bounded ! \u003c/li\u003e\u003cli\u003econtext.getCounter(\"my_cntr_group\",\"Value_success_\"+isValidValue(x)).increment(1);\u003c/li\u003e\u003cli\u003eUse\u003cb\u003e counters as \"bins\" \u003c/b\u003eof records created :\u003c/li\u003e\u003cul\u003e\u003cli\u003eIf you are outputing numbers from 1-1000, you can have counters which emit the log of these scores, so you have an idea of how many are above 10, versus\u0026nbsp; how many are above 100).\u0026nbsp;\u0026nbsp;\u003c/li\u003e\u003c/ul\u003e\u003c/ul\u003e\u003cul\u003e \u003cul\u003e\u003cli\u003eRemember that hadoop has a\u0026nbsp; host of parameters you can use to increase the # of total allowed counters: \u003ca href\u003d\"http://www.thecloudavenue.com/2011/12/limiting-usage-counters-in-hadoop.html\"\u003ehttp://www.thecloudavenue.com/2011/12/limiting-usage-counters-in-hadoop.html\u003c/a\u003e.\u003ccode class\u003d\"java plain\"\u003e\u0026nbsp;\u003c/code\u003e\u003c/li\u003e\u003cli\u003e\u003ccode class\u003d\"java plain\"\u003econf.setInt(\u003c/code\u003e\u003ccode class\u003d\"java string\"\u003e\"mapreduce.job.counters.limit\"\u003c/code\u003e\u003ccode class\u003d\"java plain\"\u003e, 1000\u003c/code\u003e\u003ccode class\u003d\"java plain\"\u003e);\u003c/code\u003e\u003c/li\u003e\u003c/ul\u003e\u003c/ul\u003e\u003c/ul\u003eI might venture to say that counters get to the very essence of computing... in the cloud as well as in conventional programming.\u0026nbsp; In any case, they are certainly one of the simplest and most efficient debugging tools in the hadoop arsenal.\u0026nbsp; Our good pal \u003ca href\u003d\"http://silverwraith.com/\"\u003emister avleen\u003c/a\u003e might think of this as a real-time, distrubted\u003ci\u003e \"grep ...... | wc -l\"\u003c/i\u003e, which continually runs in the background.\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e10) When all else fails: Know how to find your \u003cspan style\u003d\"background-color: yellow;\"\u003enodes\u003c/span\u003e.\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eThe cloud is like a good baby sitter... it will free you of the mundane aspects of child-care...\u0026nbsp; But in case of emergencies, your gonna have to be on call.\u0026nbsp; Sometimes, things just break, somewhat stochastically.\u0026nbsp; Certain machines can act funny... or certain records can cause a particular reducer to go haywire.\u0026nbsp; In these cases, its nice to know that you can actually go straight to the scene of the crime.\u0026nbsp; These commands, taken from\u003cspan style\u003d\"background-color: lime;\"\u003e https://github.com/Yelp/mrjob/wiki/Accessing-Elastic-MapReduce-slave-nodes-via-SSH\u003c/span\u003e, can be absolute lifesavers in a pinch (again, of course, these are quite important for EMR where the machines are ephemeral) .\u003cbr /\u003e\u003cbr /\u003eGet the ip address of your name node : \u003ccode\u003e\u003cbr /\u003e\u003c/code\u003e\u003cbr /\u003e\u003cpre\u003e\u003ccode\u003e$\u0026gt; elastic-mapreduce --describe j-JOBFLOWID | grep MasterPublicDnsName | cut -d'\"' -f4\u003c/code\u003e\u003c/pre\u003e\u003cbr /\u003eList IPs of slave nodes, so you can ssh into them and hack about :\u003cbr /\u003e\u003cpre\u003e\u003ci\u003e\u003ccode\u003e$\u0026gt; hadoop dfsadmin -report | grep ^Name | cut -f2 -d: | cut -f2 -d' '\u003c/code\u003e\u003c/i\u003e\u003c/pre\u003e\u003cbr /\u003eAlthough hacking about individual nodes is not going to lead to a production deployable big-data software architecture\u003cb\u003e (\u003c/b\u003ei.e. any approach to administering hadoop that requires regular node-to-node manual intervention is going to be very difficult to scale), such hackery is essential for certain types of root-cause-analysis.\u0026nbsp; For example, I recently found that going around deleting jar files in a particular directory, and rerunning a job, rapidly demonstrated the cause of a error that was killing one of my jobs.\u0026nbsp; Of course, once you find the problem, its easy enough to add the fix as a new command in your EMR bootstrap script or puppet/chef whatever machine builder. \u003cbr /\u003e\u003cbr /\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/7829252087299812877/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2012/07/the-survival-guide-of-tiny-microbe.html#comment-form","title":"2 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/7829252087299812877"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/7829252087299812877"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2012/07/the-survival-guide-of-tiny-microbe.html","title":"The tiny microbe's survival guide for Hadoop deployment."}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"media$thumbnail":{"xmlns$media":"http://search.yahoo.com/mrss/","url":"http://2.bp.blogspot.com/-ttaPemdIyFY/UBM7KMWcVQI/AAAAAAAABJw/c20cSkSqTIY/s72-c/chart-1.png","height":"72","width":"72"},"thr$total":{"$t":"2"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-5337029014719637251"},"published":{"$t":"2012-07-23T16:04:00.000-07:00"},"updated":{"$t":"2012-09-19T19:32:17.688-07:00"},"title":{"type":"text","$t":"Hadoop: Denormalization of many-to-many data by using multiple map keys for a single value."},"content":{"type":"html","$t":"When I first read about \u003cu\u003ereduce side joins\u003c/u\u003e in hadoop, I spent some time walking through a bunch of examples from this\u003ca href\u003d\"http://www.inf.ed.ac.uk/publications/thesis/online/IM100859.pdf\"\u003e\u003ccite\u003e whitepaper by Jairam Chandar\u003c/cite\u003e\u003c/a\u003e on Hadoop join-algorithms.\u003cbr /\u003e\u003cbr /\u003eIn the beggining, everything seemed simple enough - because I was focusing on joins over 1-1 data relations.\u0026nbsp; For example, if\u0026nbsp; we need to join a persons individual \"name\" to their indivudual \"shoesize\", via their  \"id\", we are only joining a finite number of elements (2) into a single outputted data structure. \u003cbr /\u003e\u003cbr /\u003e\u003ctable align\u003d\"center\" cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd style\u003d\"text-align: center;\"\u003e\u003ca href\u003d\"http://4.bp.blogspot.com/-3kZvhuPKVkI/UA25gHVQyPI/AAAAAAAABJE/70__xqx2SVw/s1600/chart-1.png\" imageanchor\u003d\"1\" style\u003d\"margin-left: auto; margin-right: auto;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"65\" src\u003d\"http://4.bp.blogspot.com/-3kZvhuPKVkI/UA25gHVQyPI/AAAAAAAABJE/70__xqx2SVw/s640/chart-1.png\" width\u003d\"640\" /\u003e\u003c/a\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003eOn the far left, we have a file with key-\u0026gt;value pairs (i.e.  typically, these might be tab separated in hadoop).\u0026nbsp; These are read in  by \"trivial\" mappers, which do nothing other than emit the separate  key/values, which are then shuffle/sorted into the same data tuple,  which is then the input to a single reduce function.\u0026nbsp; The join can be  implemented in the reducer very easily, by simply adding the first and  second \"values\" to a concatenated string, or a MapWritable, or any other  data type. \u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003eThese types of joins can be done in a very straightforward way in hadoop, often involving a little hacking with character separators, or pair objects.\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eBut what about Graphs, Relations, and Networks ?\u0026nbsp;\u003c/b\u003e \u003cbr /\u003e\u003cbr /\u003eWhen we talk about \"big-data\", we ultimately are going to have to face the fact that some data sets are combinatorial expansive.\u0026nbsp; Social networks are a great example : The amount of interconnections increase geometrically with respect to the number of nodes.\u0026nbsp;\u0026nbsp; When joining data in this paradigm, the simpler strategies for map/reduce joins do not work.\u003cbr /\u003e\u003cbr /\u003eSo lets learn about how we can use many emissions of keys from SINGLE mapper, in order to create an arbitrarily large number of nested view, where each node is a key which can ultimately have many related nodes in its value tuples.\u0026nbsp; \u003cbr /\u003e\u003cbr /\u003e\u003ctable align\u003d\"center\" cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd style\u003d\"text-align: center;\"\u003e\u003ca href\u003d\"http://2.bp.blogspot.com/-38CDxJDTTB4/UA3BxosWKaI/AAAAAAAABJQ/qNdCeQLAgJI/s1600/chart-2.png\" imageanchor\u003d\"1\" style\u003d\"margin-left: auto; margin-right: auto;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"38\" src\u003d\"http://2.bp.blogspot.com/-38CDxJDTTB4/UA3BxosWKaI/AAAAAAAABJQ/qNdCeQLAgJI/s640/chart-2.png\" width\u003d\"640\" /\u003e\u003c/a\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003eThe 2nd job in a two job flow which produces highly-denormalized, accumulated data: On the far left, we depict that each user is related to many  other users, for example both \"steve\" and \"laly\" are related to  \"jayunit100\", and the \"\u003cb\u003eMegajoiner\u003c/b\u003e job #1\" captures this information for us, emitting objects which have \"lists\" of related nodes.\u0026nbsp; Thus the input to \u003cb\u003eMegajoiner\u003c/b\u003e job #2 is akin to a pojo (containing a user's data), combined with an adjacency list (which has all the users friends).\u0026nbsp; Now, if we emit steve's data, n times (where n is the number  of friends steve has), with each key being the friend's id (shown above  is 1234, the id of jayunit100), we will now have steve's data mapped  into jayunit100's reducer.\u0026nbsp; The power of this is that, since we are  doing this for ALL users, we have each node's reducer accessing all  related node's data !\u0026nbsp; This gives us a massively denormalized \"summary\"  of all related nodes data for any given node.\u0026nbsp; For example, on the far  right, we can see that our reducer receives a large list of information,  which can then be used to create a \"snapshot\" of all of the shoesizes  and names of individuals who are known to be \u003ci\u003erelated\u003c/i\u003e to the jayunit100  user.\u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e\u003cbr /\u003e\u003cb\u003eMegajoiner job #1: Join all user information to user interactions: \u003c/b\u003ethus creating an intermediate hadoop output file which contains objects which have (1) self-describing data (i.e. shoesize, id, name) and (2) A adjacency list of id's for all related objects (i.e. id's of friends).\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003eOur first job will scan the entire network of individuals, emitting data about (1) a given individual and (2) ids of all his related nodes (i.e. his friends).\u0026nbsp; So, it will emit the key 1234, with jayunit100's information, and additionally, it can store, inside of jayunit100's information, a list of ids for friends of jayunit100 (i.e. 4567, 8999).\u0026nbsp; It will also emit 1234-\u0026gt;\u003cb\u003e4567,8999.\u0026nbsp; \u003c/b\u003eThese relations will \"guide\" the next job's mapper - which can emit the same personal information over and over again, each time with a different key. That is , the next job will be able to scan through these relations (4567, 8999), and emit each one as a key which points to the data of jayunit100.\u0026nbsp; \u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e{id:1234 name:\"jayunit100\" shoesize:8.5 {relations:[4567, 8999]}\u003cbr /\u003e\u003cbr /\u003e{id:8999 name:\"steve\" shoesize:6 {relations:[4567, 1234]}\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cb\u003e\u003cb\u003eMegajoiner\u003c/b\u003e job #2: Invert the keys: \u003c/b\u003eUse the embedded adjacency list ids as keys, and emit all of them pointing to the primary data for the node in which they are found.\u0026nbsp; That is, emit a related node as the KEY of the individual record's value.\u0026nbsp; This leads to the same data being sent to many different reducers, like this:\u003cbr /\u003e\u003cbr /\u003e1234-\u0026gt;\u003cbr /\u003e\u0026nbsp;{id:1234 name:\"jayunit100\" shoesize:8.5},\u003cbr /\u003e\u0026nbsp;{id:8999 name:\"steve\" shoesize:6 {relations:[4567, 1234]},\u003cbr /\u003e\u0026nbsp;... \u003cbr /\u003e\u003cb\u003e\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: yellow;\"\u003eThis is somewhat counterintuitive, since most of us \"learned\" to create hash data structures by defining keys which uniquely map to values.\u003c/span\u003eThe key thing to realize here, however, is that keys are just a \"routing\" mechanism in hadoop, which allow us to optimize and scale calculations by sending data to a large number of distributed nodes.\u003cbr /\u003e\u003cbr /\u003eSo, again, in this case, we are actually emitting MANY keys for a SINGLE value.  This way, hadoop's \"shuffle and sort\" will send many different values to a given reducer, so that we can emit a large summary with information from many different indices, allowing for extremely denormalized data outputs, which can be independently utilized without having to do extra table scans or lookups.\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003eThis sort of thing is extremely important in cases where, for example, we want to recover summary data for a webservice in a matter of milliseconds. \u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eTADAAAA !\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eIn Job2 we can now join an ARBITRARY number of records.\u0026nbsp; This is akin to the cross product which we all learned about in discrete mathematics and set theory some time ago.\u0026nbsp; By emitting the same data, over and over again, simply varying keys, we increase the number of reducers which will have access to a given record.\u0026nbsp; We can create super-high performance, denormalized database indices in a massively parallel Map-reduce idiom !\u0026nbsp; This strategy requires only as much memory as any individual final result requires: there is no need to use an intermediate data structure that holds large cache's of personal data, nor is there a need to use a large external query system to iteratively join data over and over again.\u0026nbsp; In other words : \u003cspan style\u003d\"background-color: yellow;\"\u003eThis approach scales (even in extreme scenarios where a given node has many related nodes... because the reducer inputs do not need to be stored in memory).\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003eA quick aside : You'll notice that I used a richer data abstraction for the join values in these examples...\u0026nbsp; Most of the time, especially in tutorials, we see simpler TextWritable or other, more primitive writable types.\u0026nbsp; You might be wondering how the concrete implementation of such a join would work: and there are many answers.\u0026nbsp; Richer data types abound in real-world MapReduce jobs- we can use pojos that are serialized (avro/thrift/protocolbuffers), or MapWritables to deal with complex value types.\u0026nbsp; Alternatively, we can use a more semantically rich abstraction for data processing, such as that provided by the\u003ca href\u003d\"http://pangool.net/\"\u003e pangool project\u003c/a\u003e, from the \u003ca href\u003d\"http://www.datasalt.com/\"\u003edatasalt folks\u003c/a\u003e which gives you an abstract framework for dealing with the  implementation specific details of dealing with hadoop records which  have many values.\u0026nbsp; Pangool allows you to use relational and predicate logic in defining such map/reduce transformations.\u003ci\u003e\u0026nbsp; \u003cu\u003e\u0026nbsp;\u003c/u\u003e\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003e\u003cu\u003eIn any case... No matter what hammer you choose to drive your MapReduce jobs, the core point here is that an understanding of the way we can cleverly use keys to distribute workloads to reducers is part of the beauty of the entire MapReduce paradigm.\u003c/u\u003e\u003ci\u003e\u0026nbsp; \u003c/i\u003e\u003c/i\u003eIn this case, we do so by overloading keys for a given value. In other cases, we might do the opposite: we may desire to overload multiple values for a given key."},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/5337029014719637251/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2012/07/hadoop-denormalization-of-many-to-many.html#comment-form","title":"0 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/5337029014719637251"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/5337029014719637251"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2012/07/hadoop-denormalization-of-many-to-many.html","title":"Hadoop: Denormalization of many-to-many data by using multiple map keys for a single value."}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"media$thumbnail":{"xmlns$media":"http://search.yahoo.com/mrss/","url":"http://4.bp.blogspot.com/-3kZvhuPKVkI/UA25gHVQyPI/AAAAAAAABJE/70__xqx2SVw/s72-c/chart-1.png","height":"72","width":"72"},"thr$total":{"$t":"0"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-3561691168171216673"},"published":{"$t":"2012-07-01T17:58:00.004-07:00"},"updated":{"$t":"2012-10-21T17:29:31.327-07:00"},"title":{"type":"text","$t":"Real world functional iteration : An alternative to mutable intermediate data-structures."},"content":{"type":"html","$t":"\u003ctable cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd style\u003d\"text-align: center;\"\u003e\u003ca href\u003d\"http://2.bp.blogspot.com/-Yth9VYsmCTo/UISSyGe0NrI/AAAAAAAABOI/zZze10-GPBw/s1600/Fp_no_destructive_assignment.jpg\" imageanchor\u003d\"1\" style\u003d\"margin-left: auto; margin-right: auto;\"\u003e\u003cimg border\u003d\"0\" src\u003d\"http://2.bp.blogspot.com/-Yth9VYsmCTo/UISSyGe0NrI/AAAAAAAABOI/zZze10-GPBw/s1600/Fp_no_destructive_assignment.jpg\" /\u003e\u003c/a\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003e\u003ctable align\u003d\"center\" cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"font-size: 13px;\"\u003eFunctional languages replace the need for mutability by providing us with convenient, idiomatic mechanisms for defining transformations without explicitly modifying or assigning variables.\u0026nbsp; This level of decoupling transparently provides us with natural concurrency constructs and scalability, and is in fact the basis for the now famous map-reduce paradigm for processing data at massive scale.\u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cspan style\u003d\"font-size: small;\"\u003eIts obvious that simplistic loops can be easily replaced by deterministic, declarative functions in a functional language. \u0026nbsp;For example:\u003c/span\u003e\u003c/div\u003e\u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e\u003cdiv style\u003d\"background-color: #e06666;\"\u003eint sum \u003d 0; int[] numbers\u003d....;\u003c/div\u003e\u003cdiv style\u003d\"background-color: #e06666;\"\u003efor(int i \u003d 0 ; i \u0026lt; numbers.length ; i++)\u003c/div\u003e\u003cdiv style\u003d\"background-color: #e06666;\"\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp; sum +\u003d numbers[i]; \u003c/div\u003e\u003cbr /\u003eCan be written as\u003cbr /\u003e\u003cbr /\u003e\u003cdiv style\u003d\"background-color: #d9ead3;\"\u003e(reduce + numbers) \u003c/div\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: large;\"\u003e\u003cb\u003eThat was easy: but what about in the real world ?\u0026nbsp;\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003eAs problems get more complicated, it becomes easy to throw random data structures at them (sets, hashmaps, treemaps, ....).\u0026nbsp; Domain specific \"objects\" get created, bloating your code, with names like \"SpiderMonkeyRendevouz\" and \"SpiderMonkeyRelationFrequency\"... And before you know it - you begin writing code to transform one data structure into another, for no reason whatsover, other than the fact that your \"inital\" data structure did not directly compute the answer to the problem you were trying to solve.\u003cbr /\u003e\u003cbr /\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003eAll that work just to calculate how often two spider monkeys were seen together in the same tree :) \u003c/div\u003e\u003cbr /\u003eSo, without further ado, lets work our way through a case study in the removal of intermediate state by proper use of Clojure's \"reduce\" function (we will completely abandon the SpiderMonkey antics for this exersize).\u0026nbsp; I'm reasonably confident that, if you read this post in its entirety, your ability to avoid such pitfalls will be significantly improved in the future.\u0026nbsp; \u003cbr /\u003e\u003cbr /\u003e\"Given a string, count the number of instances of each word in that string, and output the result as a map\".\u003cbr /\u003e\u003cbr /\u003e\u0026nbsp;\u003cb\u003eExample 1: A very sub-optimal solution to the word-count problem.\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e\u0026nbsp;\u003c/b\u003e \u003cbr /\u003e; Here was my first (non-linear performing, as well as ugly) attempt at building a map of words to their counts in a string :\u003cbr /\u003e(defn word-enrichment \u003cbr /\u003e\u0026nbsp; \"input:\u0026nbsp; a string 'a b b'\u003cbr /\u003e\u0026nbsp;\u0026nbsp; output: a map : {'a' 1 'b' 2}\"\u003cbr /\u003e\u0026nbsp; [str_in] \u003cbr /\u003e\u0026nbsp; {:pre [(\u003d (type str_in) (type \"\"))]}\u003cbr /\u003e\u0026nbsp; (let [all \u003cspan style\u003d\"background-color: yellow;\"\u003e(cs/split str_in #\"\\b+\")\u003c/span\u003e] \u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp; (into {} ; \u0026lt;- store each emitted word count into this map\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; (for [unique_word (set all)]\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; [unique_word \u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u003cspan style\u003d\"background-color: yellow;\"\u003e (count (filter #(\u003d unique_word %) all)\u003c/span\u003e)]))))\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"background-color: white;\"\u003eHow it works : This code first stores all words into a set by splitting a string by word boundaries.\u003c/span\u003e\u0026nbsp; Then, it counts the words by rescanning the string for each word, emitting them into a map (thats what the 'into {}' prefix to the 'for' list comprehension is defining : a map as the collector).\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eImproving the above example by decreasing its use of intermediate state.\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003eSo the first red flag here is the unique set of words, which is  obviously unnecessary.\u0026nbsp; It would be more ideal to count each word as we  read it, storing the results in some kind of map.\u0026nbsp; This would be quite  obvious to almost anyone....\u003cbr /\u003e\u003cbr /\u003eHowever, the alternative, in  a language like Clojure eschew's mutable data structures, might evade  you at first : how can we iterate through the words in the text and  build up a map without declaring a global variable?\u003cbr /\u003e\u003cbr /\u003eLets formalize these issues by adding to requirements to our new solution: \u003cbr /\u003e\u003cbr /\u003e\u003ci\u003e1) You can only read through the input string once\u003c/i\u003e\u003ci\u003e.\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003e \u003c/i\u003e\u003cbr /\u003e\u003cdiv style\u003d\"background-color: yellow;\"\u003e\u003ci\u003e\u003cspan style\u003d\"font-size: large;\"\u003e2) You can't use any global or mutable variables.\u003c/span\u003e\u003c/i\u003e\u003c/div\u003e\u003cbr /\u003e\u003cb\u003eHow can we count words in a single pass without a mutable data structure ? \u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eThe key here is to think functionally.\u0026nbsp; \u003cu\u003eSince \"for\" emits items, wouldn't it be nice to emit them into a function, for example, a function which merges results, one after another, from each \"for\" emission.\u003c/u\u003e \u003cbr /\u003e\u003cbr /\u003eYes... And in fact, that is what \"reduce\" does !\u003cbr /\u003e\u003cbr /\u003eFirst, lets take a look at the behaviour of the clojure reduce function - I know this is beginners stuff but don't worry, it will get fancy very quickly.\u003cbr /\u003e\u003cbr /\u003e(reduce + [1 1])\u003cbr /\u003e\u0026gt;2\u003cbr /\u003e\u003cbr /\u003eAnd now, we can see that it works with the sequential output of \"for\" just as well.\u003cbr /\u003e\u003cbr /\u003e(reduce + (for [x [1 1]] x))\u003cbr /\u003e\u0026gt;2\u003cbr /\u003e\u003cbr /\u003eAnd now, after a little RTFM i.e. (doc reduce), it becomes clear that we can write our own, 2 argument function implementation in reduce. \u003cbr /\u003e\u003cbr /\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e(reduce (fn add [a b] (+ a b))\u0026nbsp; [1 1])\u003c/div\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e\u0026gt;2\u003c/div\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: white;\"\u003eSo... how does this relate to a word count ?\u0026nbsp; Instead of taking 2 ints, and returning a new one - as above, we can take in a \"map\", and a \"word\", and return a new \"map\" !\u0026nbsp; So, as a template for this sort of function, we combine all these concepts below in a 3 argument call to reduce, which takes advantage of the fact that reduce can take an initial value.\u003c/div\u003e\u003cbr /\u003e\u0026gt; (reduce \u003cspan style\u003d\"background-color: yellow;\"\u003e(fn add [a b] (+ a b)) \u003c/span\u003e\u003cspan style\u003d\"background-color: lime;\"\u003e10\u003c/span\u003e \u003cspan style\u003d\"background-color: cyan;\"\u003e[1 1 1]\u003c/span\u003e)\u003cbr /\u003e13\u003cbr /\u003e\u003cbr /\u003eNow lets try to build our new word count function - first, lets see if we can output something other than a number from each reduce step (i.e. lets output an updated map with a count of 999 for the newly read in word) : \u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u0026gt; (reduce #(assoc %1 %2 999) {} [\"a\" \"a\" \"b\"])\u003cbr /\u003e{\"b\" 999, \"a\" 999}\u003cbr /\u003e\u003cbr /\u003eGood ! Our anonymous function as emitted \"words\" as keys, and the number 999 as values, and is clearly getting updated as it goes through the list.\u0026nbsp; Now lets make it smart enough to increment, rather than overwrite, when a word has already been read:\u003cbr /\u003e\u003cbr /\u003e\u0026gt; (reduce \u003cbr /\u003e\u0026nbsp; #(let [v (%1 %2)] \u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; (assoc %1 %2 \u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; (if v (inc v) 1) )) \u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; {} \u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; [\"a\" \"a\" \"b\" \"b\" \"b\"] )\u003cb\u003e\u003cspan style\u003d\"font-size: x-large;\"\u003e\u0026nbsp;\u003c/span\u003e\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e\u003cspan style\u003d\"font-size: x-large;\"\u003eExample 2: The final result:\u0026nbsp; \u003c/span\u003e\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eLets do some clean up...\u003cbr /\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: white;\"\u003euser\u003d\u0026gt;\u003c/div\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e\u003cspan style\u003d\"background-color: lime;\"\u003e\u0026nbsp;(defn word-enrichment \u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e\u003cspan style\u003d\"background-color: lime;\"\u003e\u0026nbsp; \"input:\u0026nbsp; a string 'a b b'\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e\u003cspan style\u003d\"background-color: lime;\"\u003e\u0026nbsp;\u0026nbsp; output: a map : {'a' 1 'b' 2}\"\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e\u003cspan style\u003d\"background-color: lime;\"\u003e\u0026nbsp; [str_in] \u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e\u003cspan style\u003d\"background-color: lime;\"\u003e\u0026nbsp; {:pre [(\u003d (type str_in) (type \"\"))]}\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e\u003cspan style\u003d\"background-color: lime;\"\u003e\u0026nbsp; (let [all (clojure.string/split str_in #\"\\b+\")]\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e\u003cspan style\u003d\"background-color: lime;\"\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp; (reduce \u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e\u003cspan style\u003d\"background-color: lime;\"\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; #(let [v (%1 %2)] \u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e\u003cspan style\u003d\"background-color: lime;\"\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; (assoc %1 %2 \u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: white;\"\u003e\u003cspan style\u003d\"background-color: lime;\"\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; (if v (inc v) 1) )) {} all))) \u003c/span\u003e\u003c/div\u003e\u003cbr /\u003eAnd a test: \u003cbr /\u003e\u003cbr /\u003euser\u003d\u0026gt; (word-enrichment \"The practice of stateless programming is the sign of a fine young gentleman .\")\u003cbr /\u003e\u003cbr /\u003e{\"\" 1, \" \" 12, \"a\" 1, \"is\" 1, \"stateless\" 1, \" .\" 1, \"The\" 1, \"the\" 1, \"of\" 2, \"young\" 1, \"programming\" 1, \"fine\" 1, \"practice\" 1, \"gentleman\" 1, \"sign\" 1}\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"font-size: x-large;\"\u003e\u003cb\u003eSummary\u003c/b\u003e\u003c/span\u003e\u003cbr /\u003e\u003cbr /\u003eWe have thus replaced a \"data-structure\" driven solution to a problem  (i.e. creating an intermediary set of unique words) with a functional one, which is based around Clojure's \"reduce\"  function.\u0026nbsp;\u0026nbsp; This is particularly relevant in today's map/reduce driven age of functional computation :\u0026nbsp; \u003cu\u003e\u003cb\u003eWhat do you get when you abstracting iteration, and removing intermediate data on a massive scale? Hadoop! \u003c/b\u003e\u003c/u\u003e\u003cbr /\u003e\u003cbr /\u003eSo, although this post's contents are related specifically to how  we can use \"reduce\" to eliminate intermediate state and data in a very simple situation, there is a deeper lesson- functional, side-effect free iteration is a far-superior approach to computation than the typical \"dump-it-in-a-domain-specific-object-and-figure-the-details-out-later\" approach. \u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/3561691168171216673/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2012/07/reducing-your-state-before-its-too-late.html#comment-form","title":"7 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/3561691168171216673"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/3561691168171216673"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2012/07/reducing-your-state-before-its-too-late.html","title":"Real world functional iteration : An alternative to mutable intermediate data-structures."}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"media$thumbnail":{"xmlns$media":"http://search.yahoo.com/mrss/","url":"http://2.bp.blogspot.com/-Yth9VYsmCTo/UISSyGe0NrI/AAAAAAAABOI/zZze10-GPBw/s72-c/Fp_no_destructive_assignment.jpg","height":"72","width":"72"},"thr$total":{"$t":"7"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-8874639117919541552"},"published":{"$t":"2012-06-14T18:04:00.002-07:00"},"updated":{"$t":"2012-06-15T12:03:20.819-07:00"},"title":{"type":"text","$t":"(\u003d :heaven (+ (- eclipse projects) counterclockwise)"},"content":{"type":"html","$t":"\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px; text-align: left;\"\u003e\u003ca href\u003d\"http://1.bp.blogspot.com/-XmTtoB4b7Yg/T9qFLfAW5sI/AAAAAAAABIE/v74S0oQJQmI/s1600/Screen+Shot+2012-06-14+at+8.43.08+PM.png\" imageanchor\u003d\"1\" style\u003d\"background-color: white; margin-left: 1em; margin-right: 1em;\"\u003e\u003cbr /\u003e\u003cimg border\u003d\"0\" height\u003d\"200\" src\u003d\"http://1.bp.blogspot.com/-XmTtoB4b7Yg/T9qFLfAW5sI/AAAAAAAABIE/v74S0oQJQmI/s320/Screen+Shot+2012-06-14+at+8.43.08+PM.png\" width\u003d\"320\" /\u003e\u003c/a\u003e\u003c/div\u003e\u003cb style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003e\u003cb style\u003d\"background-color: white;\"\u003eThe big, bad Eclipse is great for whipping a massive java source tree into shape, fast. \u0026nbsp;But for Clojure - its a different, more thoughtful development idiom. \u0026nbsp;You don't need to browse complex type hierarchies and fold 1000s of lines of code every other second. \u0026nbsp;Debuggers are, although available, not usually necessary due to the functional nature of things : problems can be directly isolated. \u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cb style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003e\u003cb style\u003d\"background-color: white;\"\u003eThus - I don't want heavyweight java project management in a dynamic language environment, and I don't any need refactoring fanciness. \u0026nbsp;Here is how to transform Eclipse into a super-productive, lightweight IDE for zen-style Clojure development:\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003col\u003e\u003cli\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003e\u003cspan class\u003d\"Apple-style-span\"\u003eInstall counterclockwise from the Eclipse Marketplace by searching for it - or just add it via\u0026nbsp;\u003c/span\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"-webkit-border-horizontal-spacing: 2px; -webkit-border-vertical-spacing: 2px; font-family: arial, sans-serif; font-size: 13px;\"\u003e\u003ca href\u003d\"http://ccw.cgrand.net/updatesite\" rel\u003d\"nofollow\" style\u003d\"color: #0000cc;\"\u003ehttp://ccw.cgrand.net/updatesite\u003c/a\u003e.\u003c/span\u003e\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003eGo to window-\u0026gt;Open Perspective-\u0026gt;Remote System Explorer and find your Clojure source code folders. This is a handy trick that is little known in eclipse - you can use your file system as a \"left panel\" for browsing files. \u0026nbsp;Dynamic languages, which don't benefit from IDE managed source code the way C++/Java does, are dominated by a market of \"simple\" editors that provide this sort of lightweight interface. \u0026nbsp;\u003c/span\u003e\u003c/li\u003e\u003c/ol\u003e\u003cdiv\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cb style\u003d\"background-color: white;\"\u003eWhy Eclipse - what about the XXX IDE ?\u003c/b\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003eAfter bouts with VI, Coda, BBEdit, and even (briefly) emacs, I finally settled on eclipse for the following reasons :\u0026nbsp;\u003c/span\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003c/div\u003e\u003col\u003e\u003cli\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003eIts stable.\u0026nbsp;\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003eIt works.\u0026nbsp;\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003eIts free.\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003eThe Counterclockwise plugin is completely self installing.\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003eIt \"knows\" java - so even if you don't write a lot of java, you can easily have a mock java project for browsing through APIs and method names, just for fun, along side your clojure development environment.\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003eEclipse doesn't have to be heavy weight ! You can use it as a simple text editor if you follow the steps above.\u0026nbsp;\u003c/span\u003e\u003c/li\u003e\u003c/ol\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cbr /\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cb style\u003d\"background-color: white;\"\u003eWhat about other plugins ?\u0026nbsp;\u003c/b\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003eWell... lets see....\u003c/span\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003c/div\u003e\u003col\u003e\u003cli\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003e\u003cb\u003eNetBeans \u003c/b\u003eboasts the \"Enclojure\" plugin, but it doesn't transparently and easily install in the latest netbeans (1.7). \u0026nbsp;I also found that, due to dependency conflicts, the\u003cb\u003e Enclojure plugin \u003c/b\u003edoesn't readily install even in the suggested version, 1.6.9\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003e\u003cb\u003eIntelliJ\u003c/b\u003e is not 100% free, although La Clojure is.\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003eThe beautiful\u003cb\u003e TextMate\u003c/b\u003e is not free, although David Nolen's plugin for Clojure in TextMate is free. \u0026nbsp;\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003e\u003cb\u003eCoda 2\u003c/b\u003e is not free, and although Coda used to support a \"Clojure.Modes\" plugin (i.e. fro SubEthaEdit), the new Coda is not backwards compatible. \u0026nbsp;Plus, its over 75$ and its not cross platform !\u0026nbsp;\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003e\u003cb\u003eEmacs + Slime\u003c/b\u003e ? \u0026nbsp;These are effective and well tested, but difficult to use if you come from the VI world.\u003c/span\u003e\u003c/li\u003e\u003cli\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003e\u003cb\u003eVIM\u003c/b\u003e ? The vim-eclipse plugin is effective -- but do you really want to use it for large projects ?\u003c/span\u003e\u003c/li\u003e\u003c/ol\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/span\u003e\u003cbr /\u003e\u003cb style\u003d\"background-color: white;\"\u003eConclusion\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cb style\u003d\"background-color: white;\"\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cspan class\u003d\"Apple-style-span\" style\u003d\"background-color: white;\"\u003e(\u003d :happiness (+ :eclipse :counterclockwise :remote_mode))\u003c/span\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/8874639117919541552/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2012/06/heaven-eclipse-projects.html#comment-form","title":"2 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/8874639117919541552"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/8874639117919541552"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2012/06/heaven-eclipse-projects.html","title":"(\u003d :heaven (+ (- eclipse projects) counterclockwise)"}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"media$thumbnail":{"xmlns$media":"http://search.yahoo.com/mrss/","url":"http://1.bp.blogspot.com/-XmTtoB4b7Yg/T9qFLfAW5sI/AAAAAAAABIE/v74S0oQJQmI/s72-c/Screen+Shot+2012-06-14+at+8.43.08+PM.png","height":"72","width":"72"},"thr$total":{"$t":"2"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-802575322793368157"},"published":{"$t":"2012-05-07T15:45:00.000-07:00"},"updated":{"$t":"2012-07-31T12:34:16.906-07:00"},"title":{"type":"text","$t":"How Git, heroku, and your laptop work together to coordinate decentralized code, sites, and deployments."},"content":{"type":"html","$t":"\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cu\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e \u003c/u\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eMy first time playing with heroku was very cool, but \u003ci\u003emystifying\u003c/i\u003e - it wasn't clear how or why it was that I needed to run \"git init\", and why I was \"pushing\" code to heroku.\u0026nbsp; As a java developer, I'm used to setting up a tomcat server, dropping a jar in some random folder, and then praying to see something on localhost:8080.\u0026nbsp; Heroku is much more elegant - it uses the same tested model for commiting code (git), as it does for committing deployments !\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eAt first this seems confusing, because we associate git only as a tool for managing source code (some of us only use git in conjunction with github, and have never fathomed the idea that we can use git to send runnable code to the cloud).\u003c/b\u003e \u003cbr /\u003e\u003cbr /\u003eSo, I was forced to decouple my notion of \"git\" as a version control tool from \"git\" as a generic source code commisioning utility, when I decided I  wanted to experiement with deploying a blog (using the excellent 5-minute toto blogging tool : http://www.rubyinside.com/deploy-blog-with-toto-and-heroku-2962.html), that others could help and collaborate on. \u003cbr /\u003e\u003cbr /\u003eI quickly found that I didn't fully understand heroku's model.\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003eSo ... Here is a visual explanation of the way it all works :\u0026nbsp;\u0026nbsp; The commands are labelled edges.\u0026nbsp; The arrows represent the flow of information.\u0026nbsp; When you clone an app, you get its \u003cu\u003econtents\u003c/u\u003e from github.\u0026nbsp; When you create a heroku app, you get a new app name and its associated meta-data from heroku, which is associated with your account (I assume there is a small shred of information stored locally on your machine regarding this, but it is trivial and can easily be re-retrieved from the heroku servers from any other machine, once reinstall the heroku toolbelt and it learns your heroku account credentials).\u0026nbsp; \u003cu\u003eThus, there is very little essential application \"state\" saved on your machine, in fact there is none.\u003c/u\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003eThe mystery about this all is simply that git doesn't care how many repos you associate it with !\u0026nbsp; You can push to github and heroku at the same time.\u0026nbsp; Each repo is independent.\u0026nbsp;\u003c/b\u003e \u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u0026nbsp;So : A web app that is deployed on heroku, but source-hosted at github, might be created and maintained in a way that looks like this - where the \"circles\" represent machines, and the \"arrows\" represent the flow of information : \u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: center;\"\u003e\u003ca href\u003d\"http://1.bp.blogspot.com/-61KHbKppEpg/T6hR3H2DRsI/AAAAAAAAA6Y/cUqZfCBRzng/s1600/chart.png\" imageanchor\u003d\"1\" style\u003d\"clear: left; float: left; margin-bottom: 1em; margin-right: 1em;\"\u003e\u003cimg alt\u003d\"\" border\u003d\"0\" height\u003d\"190\" src\u003d\"http://1.bp.blogspot.com/-61KHbKppEpg/T6hR3H2DRsI/AAAAAAAAA6Y/cUqZfCBRzng/s640/chart.png\" title\u003d\"The flow of information in a github integrated heroku app. \" width\u003d\"640\" /\u003e\u003c/a\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: center;\"\u003e\u003ca href\u003d\"http://2.bp.blogspot.com/-rzI9lwc0Fuk/T6hQ4TLb8qI/AAAAAAAAA6Q/uuqX7Ohi9HQ/s1600/chart.png\" imageanchor\u003d\"1\" style\u003d\"margin-left: 1em; margin-right: 1em;\"\u003e\u003cbr /\u003e\u003c/a\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cu\u003e\u003cb\u003eMust-knows for using github, git, and heroku together: \u003c/b\u003e\u003c/u\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cb\u003e1) Everyone knows this already, but just to be complete : \"heroku create\" is the normal \"first step\".\u0026nbsp; You create an application.\u0026nbsp; This registers a new app with heroku.\u0026nbsp;\u0026nbsp;\u003c/b\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cb\u003e\u0026nbsp;\u003c/b\u003eHeroku returns back an application URL to your command line prompt.\u0026nbsp; If you already \"have\" an application, this step is unnecessary.\u0026nbsp; See the next step to understand why. \u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cb\u003e2) Once you create an app, you push code to the heroku server's under the git branch of that app.\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eHeroku uses git for code deployment.\u0026nbsp; So... what if your code is already on git ?\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e3) Git lets you have multiple repositories !\u0026nbsp; The \"git push\" command actually can take 2 arguments - a repo, and a branch.\u0026nbsp; Thus, when we deploy\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003egit push heroku master\u003cbr /\u003e\u003cbr /\u003eWe are actually pushing the master branch to heroku.\u0026nbsp; We can also push to git using :\u003cbr /\u003e\u003cbr /\u003egit push origin master, if the origin of source is from git.\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cu\u003e\u003cb\u003eThe punchline : You can easily reproduce your environment for pushing code to heroku, and pulling it from github, on multiple or any machines as follows -\u003c/b\u003e\u003c/u\u003e\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e1) use heroku to add your public keys :\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003ejaylinux@ubuntu:~$ heroku keys:add\u003cbr /\u003eFound the following SSH public keys:\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; \u003cbr /\u003e1) github_rsa.pub\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; \u003cbr /\u003e2) id_rsa.pub\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; \u003cbr /\u003eWhich would you like to use with your Heroku account? 2\u003cbr /\u003e\u003cb\u003e2) git clone your project from the git repo.\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e3) git add your (already existing) heroku app to your git configuration as a 2nd remote repository :\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003egit add remote heroku git@heroku.com:fierce-samurai-6972.git\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e4) Make some changes, push them to heroku :\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003egit push heroku master\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e5) Don't forget to sync your changes with github so you don't lose your work on github !\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003egit push (or 'git push origin master', or 'git push github master')\u0026nbsp;\u003cb\u003e \u003c/b\u003e\u003cbr /\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cu\u003e\u003cb\u003eSome helpful commands\u0026nbsp; \u003c/b\u003e\u003c/u\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cb\u003eTo see all the apps you've created : \u003c/b\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003ejaylinux@ubuntu:~/Development/$ heroku apps\u003c/div\u003equiet-warrior-557\u003cbr /\u003efierce-samurai-6972\u003cbr /\u003egentle-ice-2166\u003cbr /\u003efalling-autumn-9592\u003cbr /\u003esimple-rain-5194\u003cbr /\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cb\u003eTo see all the details for an app you're working on :\u0026nbsp; \u003c/b\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003ejaylinux@ubuntu:~/Development/rudolfblog$ heroku info --app fierce-samurai-6972\u003cbr /\u003e\u003d\u003d\u003d fierce-samurai-6972\u003cbr /\u003eAddons:\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; Shared Database 5MB\u003cbr /\u003eCollaborators: mfenwick100@gmail.com\u003cbr /\u003e\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; sesanker0@gmail.com\u003cbr /\u003eDomain Name:\u0026nbsp;\u0026nbsp; fierce-samurai-6972.heroku.com\u003cbr /\u003eDynos:\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; 1\u003cbr /\u003eGit URL:\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; git@heroku.com:fierce-samurai-6972.git\u003cbr /\u003eOwner:\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; jayunit100@gmail.com\u003cbr /\u003eRepo Size:\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; 500k\u003cbr /\u003eSlug Size:\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; 1M\u003cbr /\u003eStack:\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; bamboo-mri-1.9.2\u003cbr /\u003eWeb URL:\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; http://fierce-samurai-6972.heroku.com/\u003cbr /\u003eWorkers:\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp;\u0026nbsp; 0\u003cbr /\u003e\u003cbr /\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: left;\"\u003e\u003cbr /\u003e\u003c/div\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/802575322793368157/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2012/05/how-git-heroku-and-your-laptop-work.html#comment-form","title":"5 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/802575322793368157"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/802575322793368157"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2012/05/how-git-heroku-and-your-laptop-work.html","title":"How Git, heroku, and your laptop work together to coordinate decentralized code, sites, and deployments."}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"media$thumbnail":{"xmlns$media":"http://search.yahoo.com/mrss/","url":"http://1.bp.blogspot.com/-61KHbKppEpg/T6hR3H2DRsI/AAAAAAAAA6Y/cUqZfCBRzng/s72-c/chart.png","height":"72","width":"72"},"thr$total":{"$t":"5"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-4938822612439424945"},"published":{"$t":"2012-04-30T15:37:00.001-07:00"},"updated":{"$t":"2012-10-29T20:12:48.116-07:00"},"title":{"type":"text","$t":"Vagrant : baby steps"},"content":{"type":"html","$t":"\u003cb\u003eVagrant can build, and destroy, your entire dev setup in a matter of minutes. \u0026nbsp;Its a powerful tool for\u0026nbsp;achieving\u0026nbsp;a cleanroom enginerring deployment setup.\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003ctable align\u003d\"center\" cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd style\u003d\"text-align: center;\"\u003e\u003ca href\u003d\"http://1.bp.blogspot.com/-boqHnQbjt2c/UI9Fm2vscdI/AAAAAAAABQ0/vb8Z5w0xutQ/s1600/vagrant_chilling.jpg\" imageanchor\u003d\"1\" style\u003d\"margin-left: auto; margin-right: auto;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"320\" src\u003d\"http://1.bp.blogspot.com/-boqHnQbjt2c/UI9Fm2vscdI/AAAAAAAABQ0/vb8Z5w0xutQ/s320/vagrant_chilling.jpg\" width\u003d\"320\" /\u003e\u003c/a\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003eVagrant allows you to setup a personalized VM on any machine in a  matter of minutes - and reduces the tedium associated with building a  local virtualbox environment which mimics a server. \u0026nbsp;To specify your VM,  you can provision (1) an OS version by name and url (vagrant will fetch  it for you) and (2) A provisioner - (i.e. this could simply be a shell  script which runs after the base box is set up.) \u0026nbsp;The two commands  \"vagrant up\" and \"vagrant destroy\" are then all you need to build and  tear down your development environment in a matter of seconds.\u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e\u003cbr /\u003e\u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003e\u003cdiv class\u003d\"separator\" style\u003d\"clear: both; text-align: center;\"\u003e\u003c/div\u003e\u003cbr /\u003e\u003cbr /\u003eI've given up on alot of things in my life, especially including the idea of managing software on my development machine.\u0026nbsp;\u0026nbsp; Im not a sysadmin guy but... vagrant is really cool so I'm forcing myself to start using it.\u0026nbsp; Also DanielKnell told me to... so... Here's how I got a vagrant VM environment running on ubuntu.\u003cbr /\u003e\u003cbr /\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003cu\u003e(Vagrant requires a solid Ruby setup - heres what i did to get it working.)\u003c/u\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003eFirst, install ruby and the ruby bundler :\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003esudo apt-get install ruby-bundler \u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003eTo do this , create a directory, cd to it, and create a gem file that looks like this. \u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003ci\u003esource 'http://rubygems.org'\u003cbr /\u003egem 'vagrant', \"~\u0026gt; 0.8.1\"\u003cbr /\u003egem 'veewee', \"~\u0026gt; 0.2.0\"\u0026nbsp;\u0026nbsp;\u003c/i\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e(note that veewee isn't really necessary here). \u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003eTHEN\u0026nbsp;\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003cb\u003erun the bundle install via the gemfile :\u0026nbsp; \u003c/b\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003ci\u003e$\u0026gt; sudo bundle install\u003c/i\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003eNow... In ubuntu 10, I had to update ruby gems to avoid this crazy error related to date formats :\u0026nbsp;\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003cbr /\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003ci\u003e$\u0026gt;sudo gem install rubygems-update\u003c/i\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: cyan;\"\u003e\u003ci\u003e$\u0026gt;update_rubygems\u003c/i\u003e\u003c/div\u003e\u003cbr /\u003e\u003cb\u003eNow, both veewee and vagrant are installed.\u003c/b\u003e\u003cbr /\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e\u003cb\u003eLets build a VM !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\u003c/b\u003e\u003c/div\u003e\u003cbr /\u003eYou can borrow a full blown vm base box to start : \u003cb\u003e\u003cbr /\u003e\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003evagrant box add base https://s3.amazonaws.com/cloudbiolinux/cbl_ubuntu_11_4_32_20110628.box\u0026nbsp;\u0026nbsp;\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003eOr, simpler and faster one like this :\u0026nbsp; \u003cbr /\u003e\u003cbr /\u003e\u003ci\u003evagrant box add ubuntu-lucid-32 http://files.vagrantup.com/lucid32.box\u003c/i\u003e\u003cbr /\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e\u003cb\u003eNext, before I install it - How do I customize it ?\u0026nbsp;\u003c/b\u003e\u003c/div\u003e\u003cbr /\u003eVagrant has a set \"Vagrantfile\" that is initialized for you. This file is created with the \"vagrant init my-base-box\" command.\u0026nbsp; For example :\u003cbr /\u003e\u003ci\u003e\u003cbr /\u003e\u003c/i\u003e\u003ci\u003evagrant init ubuntu-lucid-32\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003eSo, you first can initialize vagrant via the init command, followed by the box name, and then you can edit the contents of the \"Vagrantfile\" to point to a shell script which executes some custom instructions, i.e. installing programs.\u003cbr /\u003e\u003cbr /\u003e\u003cb\u003e\u003cspan style\u003d\"background-color: lime;\"\u003eNow what ? \u003c/span\u003e\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003eNow, you can get into your vm :\u003cbr /\u003e\u003cbr /\u003e\u003ci\u003evagrant ssh\u003c/i\u003e\u003cbr /\u003e\u003cbr /\u003eYipeeee.\u003cbr /\u003e\u003cbr /\u003eNow, as long as you commit changes to the ssh file associated with your Vagrant , anyone can run this vagrant up command, from anywhere, to recreate your environment.\u003cbr /\u003e\u003cbr /\u003eFinally - if you screw up the environment, you can run \"vagrant destroy\" followed by \"vagrant up\" to refresh it to the original start point. \u003cbr /\u003e\u003cbr /\u003e------- UPDATES --------\u003cbr /\u003e\u003cbr /\u003eJust found got this wonderful piece of advice on irc #vagrant :\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cdiv class\u003d\"envelope highlight\" id\u003d\"YQO2IP48YV3\" style\u003d\"background-color: rgba(0, 0, 0, 0.0976563); font-family: 'Lucida Grande'; margin-bottom: 3px; padding-bottom: 1px; padding-left: 5px; padding-right: 5px; padding-top: 1px;\"\u003e\u003ca class\u003d\"member\" href\u003d\"member:bgy_\" style\u003d\"color: #ff9900; font-weight: bold; margin-right: 0.5ex; text-decoration: none !important; white-space: nowrap;\" title\u003d\"~bgy@prompt-o-log.net\"\u003ebgy_\u003c/a\u003e\u003cspan class\u003d\"hidden\" style\u003d\"left: -900px; position: fixed; top: -900px;\"\u003e:\u003c/span\u003e\u003cspan class\u003d\"message\" style\u003d\"-webkit-line-break: after-white-space; -webkit-nbsp-mode: space; word-wrap: break-word;\"\u003e\u003ca class\u003d\"member highlight\" href\u003d\"member:Jayunit100\" style\u003d\"color: inherit; font-weight: inherit; text-decoration: none !important;\"\u003eJayunit100\u003c/a\u003e: vagrant reload should reboot the box, or you can do it manually by chaining vagrant halt \u0026amp;\u0026amp; vagrant up\u003c/span\u003e\u003c/div\u003e\u003cdiv class\u003d\"envelope\" id\u003d\"F46W7J58YV3\" style\u003d\"font-family: 'Lucida Grande'; margin-bottom: 3px; padding-bottom: 1px; padding-left: 5px; padding-right: 5px; padding-top: 1px;\"\u003e\u003cspan class\u003d\"timestamp hidden\" style\u003d\"color: #888888; display: block; float: right; font-size: 9px; left: -900px; margin-left: 5px; margin-top: 0px; position: fixed; top: -900px; word-wrap: normal;\"\u003e[\u003c/span\u003e\u003cspan class\u003d\"timestamp\" style\u003d\"color: #888888; display: block; float: right; font-size: 9px; margin-left: 5px; margin-top: 0px; word-wrap: normal;\"\u003e4:24pm\u003c/span\u003e\u003cspan class\u003d\"timestamp hidden\" style\u003d\"color: #888888; display: block; float: right; font-size: 9px; left: -900px; margin-left: 5px; margin-top: 0px; position: fixed; top: -900px; word-wrap: normal;\"\u003e]\u003c/span\u003e\u003ca class\u003d\"member self\" href\u003d\"member:identifier:jayunit100\" style\u003d\"color: #aa2211; font-weight: bold; margin-right: 0.5ex; text-decoration: none !important; white-space: nowrap;\" title\u003d\"Jpeerindex@c-71-235-206-176.hsd1.ct.comcast.net\"\u003eJayunit100\u003c/a\u003e\u003cspan class\u003d\"hidden\" style\u003d\"left: -900px; position: fixed; top: -900px;\"\u003e:\u003c/span\u003e\u003cspan class\u003d\"message\" style\u003d\"-webkit-line-break: after-white-space; -webkit-nbsp-mode: space; word-wrap: break-word;\"\u003eaha thanks !\u003c/span\u003e\u003c/div\u003e\u003cbr /\u003eOh and watch out for this :\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cdiv class\u003d\"envelope\" id\u003d\"N7XY8T98YV3\" style\u003d\"font-family: 'Lucida Grande'; margin-bottom: 3px; padding-bottom: 1px; padding-left: 5px; padding-right: 5px; padding-top: 1px;\"\u003e\u003ca class\u003d\"member self\" href\u003d\"member:identifier:jayunit100\" style\u003d\"color: #aa2211; font-weight: bold; margin-right: 0.5ex; text-decoration: none !important; white-space: nowrap;\" title\u003d\"Jpeerindex@c-71-235-206-176.hsd1.ct.comcast.net\"\u003eJayunit100\u003c/a\u003e\u003cspan class\u003d\"hidden\" style\u003d\"left: -900px; position: fixed; top: -900px;\"\u003e:\u003c/span\u003e\u003cspan class\u003d\"message\" style\u003d\"-webkit-line-break: after-white-space; -webkit-nbsp-mode: space; word-wrap: break-word;\"\u003ereload appears to be provisioning ?\u003c/span\u003e\u003c/div\u003e\u003cdiv class\u003d\"envelope\" id\u003d\"CHNXKQA8YV3\" style\u003d\"font-family: 'Lucida Grande'; margin-bottom: 3px; padding-bottom: 1px; padding-left: 5px; padding-right: 5px; padding-top: 1px;\"\u003e\u003cspan class\u003d\"timestamp hidden\" style\u003d\"color: #888888; display: block; float: right; font-size: 9px; left: -900px; margin-left: 5px; margin-top: 0px; position: fixed; top: -900px; word-wrap: normal;\"\u003e[\u003c/span\u003e\u003cspan class\u003d\"timestamp\" style\u003d\"color: #888888; display: block; float: right; font-size: 9px; margin-left: 5px; margin-top: 0px; word-wrap: normal;\"\u003e4:28pm\u003c/span\u003e\u003cspan class\u003d\"timestamp hidden\" style\u003d\"color: #888888; display: block; float: right; font-size: 9px; left: -900px; margin-left: 5px; margin-top: 0px; position: fixed; top: -900px; word-wrap: normal;\"\u003e]\u003c/span\u003e\u003ca class\u003d\"member\" href\u003d\"member:adt22\" style\u003d\"color: #ff9900; font-weight: bold; margin-right: 0.5ex; text-decoration: none !important; white-space: nowrap;\" title\u003d\"~amir@n2-94-168.dhcp.drexel.edu\"\u003eadt22\u003c/a\u003e\u003cspan class\u003d\"hidden\" style\u003d\"left: -900px; position: fixed; top: -900px;\"\u003e:\u003c/span\u003e\u003cspan class\u003d\"message\" style\u003d\"-webkit-line-break: after-white-space; -webkit-nbsp-mode: space; word-wrap: break-word;\"\u003eis that a problem?\u003c/span\u003e\u003c/div\u003e\u003cdiv class\u003d\"envelope\" id\u003d\"QDOY11B8YV3\" style\u003d\"font-family: 'Lucida Grande'; margin-bottom: 3px; padding-bottom: 1px; padding-left: 5px; padding-right: 5px; padding-top: 1px;\"\u003e\u003cspan class\u003d\"timestamp hidden\" style\u003d\"color: #888888; display: block; float: right; font-size: 9px; left: -900px; margin-left: 5px; margin-top: 0px; position: fixed; top: -900px; word-wrap: normal;\"\u003e[\u003c/span\u003e\u003cspan class\u003d\"timestamp\" style\u003d\"color: #888888; display: block; float: right; font-size: 9px; margin-left: 5px; margin-top: 0px; word-wrap: normal;\"\u003e4:28pm\u003c/span\u003e\u003cspan class\u003d\"timestamp hidden\" style\u003d\"color: #888888; display: block; float: right; font-size: 9px; left: -900px; margin-left: 5px; margin-top: 0px; position: fixed; top: -900px; word-wrap: normal;\"\u003e]\u003c/span\u003e....\u003c/div\u003e\u003cdiv class\u003d\"envelope\" id\u003d\"CYT6KRF8YV3\" style\u003d\"font-family: 'Lucida Grande'; margin-bottom: 3px; padding-bottom: 1px; padding-left: 5px; padding-right: 5px; padding-top: 1px;\"\u003e\u003cspan class\u003d\"timestamp hidden\" style\u003d\"color: #888888; display: block; float: right; font-size: 9px; left: -900px; margin-left: 5px; margin-top: 0px; position: fixed; top: -900px; word-wrap: normal;\"\u003e[\u003c/span\u003e\u003cspan class\u003d\"timestamp\" style\u003d\"color: #888888; display: block; float: right; font-size: 9px; margin-left: 5px; margin-top: 0px; word-wrap: normal;\"\u003e4:31pm\u003c/span\u003e\u003cspan class\u003d\"timestamp hidden\" style\u003d\"color: #888888; display: block; float: right; font-size: 9px; left: -900px; margin-left: 5px; margin-top: 0px; position: fixed; top: -900px; word-wrap: normal;\"\u003e]\u003c/span\u003e\u003ca class\u003d\"member\" href\u003d\"member:adt22\" style\u003d\"color: #ff9900; font-weight: bold; margin-right: 0.5ex; text-decoration: none !important; white-space: nowrap;\" title\u003d\"~amir@n2-94-168.dhcp.drexel.edu\"\u003eadt22\u003c/a\u003e\u003cspan class\u003d\"hidden\" style\u003d\"left: -900px; position: fixed; top: -900px;\"\u003e:\u003c/span\u003e\u003cspan class\u003d\"message\" style\u003d\"-webkit-line-break: after-white-space; -webkit-nbsp-mode: space; word-wrap: break-word;\"\u003eyeah: vagrant reload --no-provision\u003c/span\u003e\u003c/div\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/4938822612439424945/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2012/04/vagrant.html#comment-form","title":"2 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/4938822612439424945"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/4938822612439424945"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2012/04/vagrant.html","title":"Vagrant : baby steps"}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"media$thumbnail":{"xmlns$media":"http://search.yahoo.com/mrss/","url":"http://1.bp.blogspot.com/-boqHnQbjt2c/UI9Fm2vscdI/AAAAAAAABQ0/vb8Z5w0xutQ/s72-c/vagrant_chilling.jpg","height":"72","width":"72"},"thr$total":{"$t":"2"}},{"id":{"$t":"tag:blogger.com,1999:blog-309796417999444696.post-902673115912392142"},"published":{"$t":"2012-04-22T22:31:00.001-07:00"},"updated":{"$t":"2012-10-29T20:16:24.220-07:00"},"title":{"type":"text","$t":"Easy dependency resolution with ivy's exclude tag."},"content":{"type":"html","$t":"\u003cbr /\u003e\u003cb\u003eYour code is only as good as its worst library.\u003c/b\u003e\u0026nbsp; \u003cbr /\u003e\u003ctable align\u003d\"center\" cellpadding\u003d\"0\" cellspacing\u003d\"0\" class\u003d\"tr-caption-container\" style\u003d\"margin-left: auto; margin-right: auto; text-align: center;\"\u003e\u003ctbody\u003e\u003ctr\u003e\u003ctd style\u003d\"text-align: center;\"\u003e\u003ca href\u003d\"http://1.bp.blogspot.com/-r6QKkm3QnGI/UI9GVYdTabI/AAAAAAAABQ8/vOii8Ha4Blk/s1600/depsj.jpg\" imageanchor\u003d\"1\" style\u003d\"margin-left: auto; margin-right: auto;\"\u003e\u003cimg border\u003d\"0\" height\u003d\"198\" src\u003d\"http://1.bp.blogspot.com/-r6QKkm3QnGI/UI9GVYdTabI/AAAAAAAABQ8/vOii8Ha4Blk/s320/depsj.jpg\" width\u003d\"320\" /\u003e\u003c/a\u003e\u003c/td\u003e\u003c/tr\u003e\u003ctr\u003e\u003ctd class\u003d\"tr-caption\" style\u003d\"text-align: center;\"\u003eThe lamest thing in the world is getting a \"NoSuchMethodException\"  because you deploy an executable which puts the wrong version of the  right libraries on the classpath... Or alternatively, because the JVM  you are running already has another version of the same library which  supercedes your applications required dependency.\u0026nbsp; The more libraries  you have, and the more environments you run your code in, the more  likely this may be.\u0026nbsp; Solve this problem once and for all by excluding  old dependencies in your less-up-to-date jars.\u003c/td\u003e\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e\u003cbr /\u003e\u003cbr /\u003eIvy is a dependency manager that makes Ant work with maven repositories, transparently managing and accumulating all your jar's for you, under the hood. \u0026nbsp;The awesome thing about these dependency managers is that they manage transitive dependencies for you, so that you don't have to worry about the dependencies of your dependencies. \u003cbr /\u003e\u003cbr /\u003e90% of the time this is perfect, --- but what happens when you have a transitive dependency that references an old version of a jar, while your source code needs the same library's updated version ?\u003cbr /\u003e\u003cbr /\u003e\u003cdiv style\u003d\"background-color: #ea9999;\"\u003e!! This leads to a compile error !! \u0026nbsp;\u003c/div\u003e\u003cbr /\u003eNot necessarily in your IDE, but quite possibly in Ant.\u0026nbsp; Or maybe vice-versa.\u0026nbsp; In my case, specifically, I had an old version of commons-io referenced from a jar which my source code depended on. \u0026nbsp;This is a \"transitive dependency\" (A referenced B which referenced C, making A transitively dependant on C). \u003cbr /\u003e\u003cbr /\u003e\u003cdiv\u003e\u003cspan style\u003d\"background-color: white;\"\u003eThis is kind of like a codependent relationship, for those of you that know what that is. Its not good at all, it basically means that any issues in the \"old\" jar will effect your shiny new application due to the transitive dependency. \u0026nbsp;But java's class loader is smart enough to use the \"new\" jar in place of the old one, if you can just convince ivy and ant to ignore the old jar.\u0026nbsp;\u003c/span\u003e\u003c/div\u003e\u003cdiv style\u003d\"background-color: #e06666;\"\u003e\u003c/div\u003e\u003cbr /\u003e\u003cb\u003eSo how do you solve these sorts of issues ?\u0026nbsp;\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e1) Track down the bad jar version by inspecting the javac classpath in ant.\u0026nbsp; This can be done by running ant in verbose mode (ant -v) and simply grepping for the library name.\u0026nbsp; In may case, this meant doing something like this (an old version of commons-io was messing up my compilation step):\u003cbr /\u003e\u003cbr /\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003eant -v \u0026gt; /tmp/out\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003egrep 'commons-io' /tmp/out\u0026nbsp;\u003c/div\u003e\u003cbr /\u003e2) Once grep confirmed that, indeed, ant was putting an old jar file (that broke my compile) at the beggining of the classpath, I simply had to find out where that jar was coming from.\u0026nbsp; This, again, is easy if you look at the verbose output of your dependency resolution.\u003cbr /\u003e\u003cbr /\u003e\u003cbr /\u003e3) Finally, in your ivy.xml file, go to the dependency whose transitive dependency is causing the collision.\u0026nbsp; For example, and inside its dependency declaration, add an \"exclude\" block, which will force ivy NOT to download certain dependencies : \u003cbr /\u003e\u003cbr /\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e\u0026lt;dependency org\u003d\"mydepWithAncientTransitives\" name\u003d... \u0026gt;\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e\u0026nbsp;\u0026nbsp; \u0026lt;exclude name\u003d\"commons-io\"/\u0026gt;\u003c/div\u003e\u003cdiv style\u003d\"background-color: lime;\"\u003e\u0026lt;/dependency\u0026gt;\u0026nbsp;\u003c/div\u003e\u003cbr /\u003e\u003cb\u003e\u003cspan style\u003d\"color: black;\"\u003eVoila\u0026nbsp;\u003c/span\u003e\u003c/b\u003e\u003cbr /\u003e\u003cbr /\u003e\u003cspan style\u003d\"color: black;\"\u003eYou should now be able to re run your resolve/build step in ant, and this time, the \"old\" jar won't stand in the way of your compiler - and you should be able to confirm this by again looking at the output of \u003cspan style\u003d\"background-color: lime;\"\u003e\"ant -v\"\u003c/span\u003e\u003c/span\u003e\u003cb\u003e\u003cspan style\u003d\"color: black;\"\u003e. \u0026nbsp;\u003c/span\u003e\u003c/b\u003e"},"link":[{"rel":"replies","type":"application/atom+xml","href":"http://jayunit100.blogspot.com/feeds/902673115912392142/comments/default","title":"Post Comments"},{"rel":"replies","type":"text/html","href":"http://jayunit100.blogspot.com/2012/04/take-your-jars-by-horn.html#comment-form","title":"0 Comments"},{"rel":"edit","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/902673115912392142"},{"rel":"self","type":"application/atom+xml","href":"http://www.blogger.com/feeds/309796417999444696/posts/default/902673115912392142"},{"rel":"alternate","type":"text/html","href":"http://jayunit100.blogspot.com/2012/04/take-your-jars-by-horn.html","title":"Easy dependency resolution with ivy's exclude tag."}],"author":[{"name":{"$t":"Jay Vyas"},"uri":{"$t":"https://plus.google.com/107497856501050642644"},"email":{"$t":"noreply@blogger.com"},"gd$image":{"rel":"http://schemas.google.com/g/2005#thumbnail","width":"32","height":"32","src":"//lh6.googleusercontent.com/-KRDifK6qFm8/AAAAAAAAAAI/AAAAAAAABoI/ALdJ9FcCWmM/s512-c/photo.jpg"}}],"media$thumbnail":{"xmlns$media":"http://search.yahoo.com/mrss/","url":"http://1.bp.blogspot.com/-r6QKkm3QnGI/UI9GVYdTabI/AAAAAAAABQ8/vOii8Ha4Blk/s72-c/depsj.jpg","height":"72","width":"72"},"thr$total":{"$t":"0"}}]}});