<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href="http://www.blogger.com/styles/atom.css" type="text/css"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' xmlns:georss='http://www.georss.org/georss' xmlns:gd='http://schemas.google.com/g/2005' xmlns:thr='http://purl.org/syndication/thread/1.0'><id>tag:blogger.com,1999:blog-3867310391951630980</id><updated>2011-11-27T15:16:01.927-08:00</updated><category term='Chart Formatting'/><category term='Twitter'/><category term='GeoLocation'/><category term='World Bank Data'/><category term='Learning Statistics'/><category term='MySQL'/><category term='SQL'/><category term='Guardian Data'/><category term='RODM'/><category term='Statistics'/><category term='Date'/><category term='Economics'/><category term='Data Mining'/><category term='Spreadsheet'/><category term='fractals'/><category term='music'/><category term='XML'/><category term='ggplot2'/><category term='Number Formatting'/><category term='Oracle'/><category term='Google'/><category term='Strings'/><category term='RODBC'/><category term='BP Oil Spill'/><category term='igraph'/><category term='Book Reviews'/><category term='Color'/><category term='Maps'/><category term='GitHub'/><category term='Conferences'/><category term='Financial'/><category term='Programming Languages'/><category term='quantmod'/><category term='iPhone'/><category term='R-Chart'/><category term='Ruby'/><category term='R Environment'/><category term='R Packages'/><category term='Time'/><category term='Cramer'/><category term='JSON'/><category term='Developing R Programs'/><category term='fitness'/><category term='Data Preparation'/><title type='text'>R-Chart</title><subtitle type='html'>The R language as experienced by a web application/database developer...</subtitle><link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml' href='http://www.r-chart.com/feeds/posts/default'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default?max-results=100'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/'/><link rel='hub' href='http://pubsubhubbub.appspot.com/'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><generator version='7.00' uri='http://www.blogger.com'>Blogger</generator><openSearch:totalResults>77</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>100</openSearch:itemsPerPage><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-6837203126169082016</id><published>2011-09-02T03:37:00.000-07:00</published><updated>2011-09-02T03:37:27.646-07:00</updated><title type='text'>Word Cloud from Blog RSS</title><content type='html'>&lt;a href="http://1.bp.blogspot.com/-iTSzxNlw4V0/TmCwKljIh0I/AAAAAAAAAoc/gXMjeFtBsLs/s1600/r-chart-cloud.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="271" src="http://1.bp.blogspot.com/-iTSzxNlw4V0/TmCwKljIh0I/AAAAAAAAAoc/gXMjeFtBsLs/s400/r-chart-cloud.png" width="400" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Crazy busy &amp;nbsp;- no time to blog recently.  Time enough for pretty pictures based upon previous words though...(thanks&amp;nbsp;&lt;a href="http://www.wordle.net/"&gt;http://www.wordle.net&lt;/a&gt;).&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-scOSv1sYTNk/TmCxdTZQcRI/AAAAAAAAAok/XfQS64uUVgo/s1600/r-chart-cloud_tree.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="212" src="http://2.bp.blogspot.com/-scOSv1sYTNk/TmCxdTZQcRI/AAAAAAAAAok/XfQS64uUVgo/s320/r-chart-cloud_tree.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;span class="Apple-style-span" style="font-size: large;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-6837203126169082016?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/6837203126169082016/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2011/09/word-cloud-from-blog-rss.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/6837203126169082016'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/6837203126169082016'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2011/09/word-cloud-from-blog-rss.html' title='Word Cloud from Blog RSS'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-iTSzxNlw4V0/TmCwKljIh0I/AAAAAAAAAoc/gXMjeFtBsLs/s72-c/r-chart-cloud.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-1594521702624711592</id><published>2011-01-01T12:43:00.000-08:00</published><updated>2011-01-01T12:43:17.232-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Statistics'/><category scheme='http://www.blogger.com/atom/ns#' term='fractals'/><category scheme='http://www.blogger.com/atom/ns#' term='World Bank Data'/><title type='text'>Ten News Stories of 2010 - and the Statistics that Made Them</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://www.statsoft.com/Portals/0/blog/World%20Statistics%20Day.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://www.statsoft.com/Portals/0/blog/World%20Statistics%20Day.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;span class="Apple-style-span" style="font-size: large;"&gt;Significance Magazine&lt;/span&gt;&lt;br /&gt;According to&amp;nbsp;&lt;a href="http://www.significancemagazine.org/view/index.html"&gt;Significance&amp;nbsp;Magazine&lt;/a&gt;&amp;nbsp;(jointly published by&amp;nbsp;&lt;a href="http://www.rss.org.uk/main.asp?page=0"&gt;Royal Statistical Society&lt;/a&gt;&amp;nbsp;and&amp;nbsp;the&amp;nbsp;&lt;a href="http://www.amstat.org/"&gt;American Statistical Association&lt;/a&gt;) the following are the top ten stories of 2010.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;1. Progress in the prevention of HIV: &amp;nbsp; &amp;nbsp;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;Public health studies result in HIV treatment advancements.&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;2. Drug regulation: restrictions and retractions: &amp;nbsp;&amp;nbsp;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;Related to&amp;nbsp;breast cancer and type 2 diabetes.&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;3. Measuring a teacher's value: &amp;nbsp;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;a href="http://projects.latimes.com/value-added/"&gt;LA Times&lt;/a&gt; graded teachers based on standards tests results.&amp;nbsp;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;4. Political rhetoric finds a helpmeet in statistics:&lt;/b&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;"a statistical recovery and a human recession."&lt;br /&gt;&lt;b&gt;5. Census of Marine Life:&amp;nbsp;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;The &lt;a href="http://www.plosone.org/article/info:doi/10.1371/journal.pone.0012110"&gt;first census of the world's seas&lt;/a&gt; completed in 2010.&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;6. &lt;a href="http://www.nytimes.com/2010/09/24/business/24jelinek.html?_r=1&amp;amp;scp=2&amp;amp;sq=Jelinek&amp;amp;st=cse"&gt;Death&lt;/a&gt; of &lt;a href="http://www.clsp.jhu.edu/~jelinek/"&gt;Frederick Jelinek&lt;/a&gt;: &amp;nbsp;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;a pioneer in speech recognition and statistical methods of NLP.&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;7. The genetic key to Shangri-La: &amp;nbsp;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;Dr. Paola Sebastiani genetics advancements related to longevity.&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;8. Screening saves:&amp;nbsp;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;CT Scanning&amp;nbsp;definitively&amp;nbsp;associated with a reduced risk of lung cancer mortality.&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;9. Fat kills:&amp;nbsp;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="apple-style-span"&gt;&lt;span style="color: black;"&gt;Quantitative reviews in various areas of health and nutrition.&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="apple-style-span"&gt;&lt;span style="color: black;"&gt;&lt;b&gt;10. Words, words words:&amp;nbsp;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;a href="http://www.culturomics.org/"&gt;Culturomics&lt;/a&gt;&amp;nbsp;project produces the &lt;a href="http://ngrams.googlelabs.com/"&gt;Google Ngram Viewer&lt;/a&gt;.&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;The details are parceled out in 5 articles:&amp;nbsp;&lt;a href="http://www.significancemagazine.org/details/webexclusive/971759/Ten-News-Stories-of-2010---and-the-Statistics-that-Made-Them_--Part-1.html"&gt;Part 1&lt;/a&gt;&amp;nbsp;|&amp;nbsp;&lt;a href="http://www.significancemagazine.org/details/webexclusive/971905/Ten-News-Stories-of-2010---and-the-Statistics-that-Made-Them_--Part-2.html"&gt;Part 2&lt;/a&gt;&amp;nbsp;&amp;nbsp;|&amp;nbsp;&lt;a href="http://www.significancemagazine.org/details/webexclusive/972091/Ten-News-Stories-of-2010---and-the-Statistics-that-Made-Them_--Part-3.html"&gt;Part 3&lt;/a&gt;&amp;nbsp;|&amp;nbsp;&lt;a href="http://ten%20news%20stories%20of%202010%20-%20and%20the%20statistics%20that%20made%20them.%20part%204/"&gt;Part 4&lt;/a&gt;&amp;nbsp;|&amp;nbsp;&lt;a href="http://www.significancemagazine.org/details/webexclusive/972741/Ten-News-Stories-of-2010---and-the-Statistics-that-Made-Them_--Part-5.html"&gt;Part 5&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;iframe align="left" frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?t=rcha-20&amp;amp;o=1&amp;amp;p=8&amp;amp;l=bpl&amp;amp;asins=1148902732&amp;amp;fc1=000000&amp;amp;IS2=1&amp;amp;lt1=_blank&amp;amp;m=amazon&amp;amp;lc1=0000FF&amp;amp;bc1=000000&amp;amp;bg1=FFFFFF&amp;amp;f=ifr" style="align: left; height: 245px; padding-right: 10px; padding-top: 5px; width: 131px;"&gt;&lt;/iframe&gt;&lt;br /&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-size: large;"&gt;Other Stories - or my $0.02.&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;The following are not exactly in the same category as the listings in significance magazine - but they involve personalities and events that affect many members of the R community and have some sort of analytical/statistical&amp;nbsp;significance.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;&lt;a href="http://unstats.un.org/unsd/wsd/Default.aspx"&gt;World Statistics Day&lt;/a&gt;&lt;/b&gt;&lt;br /&gt;I mean, I missed picking up a greeting card - but the objective of the celebration is pretty worthwhile:&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: Calibri, Helvetica, Arial; font-size: 14px; font-style: italic; line-height: 16px;"&gt;building support and better understanding for official statistics among the general public and the policy-makers worldwide.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;&lt;a href="http://www.r-bloggers.com/"&gt;R-Bloggers&lt;/a&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;For the R community, R-Bloggers has had a &lt;a href="http://www.r-statistics.com/2011/01/r-bloggers-in-2010-top-14-r-posts-site-statistics-and-invitation-for-sponsors/"&gt;banner year&lt;/a&gt; and provided a great deal of visibility for the R community. &amp;nbsp;They are looking for &lt;a href="http://www.r-statistics.com/2011/01/r-bloggers-in-2010-top-14-r-posts-site-statistics-and-invitation-for-sponsors/"&gt;sponsorship&lt;/a&gt; - so please consider supporting them.&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;U.S. Economic News&lt;/b&gt;&lt;br /&gt;News involved the use of additional zeros tacked on to end of numbers. &amp;nbsp;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;The &lt;a href="http://www.recovery.gov/Pages/default.aspx"&gt;recovery.org&lt;/a&gt; web site has been somewhat underwhelming. &amp;nbsp;&lt;a href="http://www.edwardtufte.com/bboard/q-and-a-fetch-msg?msg_id=0003e0&amp;amp;topic_id=1&amp;amp;topic="&gt;Edward Tufte's nomination&lt;/a&gt; to&amp;nbsp;&lt;span class="Apple-style-span" style="font-family: 'times new roman', times, serif;"&gt;serve on the Recovery Independent Advisory Panel was a fascinating development. &amp;nbsp;His emphasis on clear and truthful presentation of information could be a Good Thing.&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;New Era of Data Journalism&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;The World Bank has continued to provide &lt;a href="http://data.worldbank.org/"&gt;more data&lt;/a&gt; on economic and social topics. &amp;nbsp;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;A &lt;a href="http://www.r-chart.com/2010/06/plotting-world-bank-data-with-r.html"&gt;couple&lt;/a&gt; of &lt;a href="http://www.r-chart.com/2010/09/new-world-bank-data-available.html"&gt;blog posts&lt;/a&gt; covered this, and&amp;nbsp;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;an &lt;a href="http://cran.r-project.org/web/packages/WDI/index.html"&gt;R package&lt;/a&gt; is also available to access the World Bank Data API. &amp;nbsp;There has been an increased refinement in &lt;a href="http://www.guardian.co.uk/data"&gt;data journalism&lt;/a&gt;&amp;nbsp;as well as controversy surrounding WikiLeaks during 2010. &amp;nbsp;&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;b&gt;Data Marketplace&lt;/b&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;a href="http://infochimps.com/"&gt;InfoChimps&lt;/a&gt;&amp;nbsp;is pioneering an online marketplace for buying and selling data. &amp;nbsp;Seems that they have a plausible idea - they &lt;a href="http://venturebeat.com/2010/11/07/infochimps-funding/"&gt;recently landed 1.2 million dollars&lt;/a&gt; in funding.&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;Benoit Mandelbrot&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;Another noteworthy death this year that was not mentioned was the loss of the "Father of Fractals" -&amp;nbsp;&lt;a href="http://www.r-chart.com/2010/10/benoit-mandelbrot-father-of-fractals.html"&gt;Benoit Mandelbrot&lt;/a&gt;. &amp;nbsp;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;div&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-1594521702624711592?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/1594521702624711592/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2011/01/ten-news-stories-of-2010-and-statistics.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/1594521702624711592'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/1594521702624711592'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2011/01/ten-news-stories-of-2010-and-statistics.html' title='Ten News Stories of 2010 - and the Statistics that Made Them'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-2226286964020013103</id><published>2010-12-31T14:17:00.000-08:00</published><updated>2010-12-31T14:17:25.314-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='R-Chart'/><title type='text'>R-Chart: Year End Wrap Up</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TR5WOI8cQWI/AAAAAAAAAkU/MnZPvKNyVko/s1600/happyNewYear.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="187" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TR5WOI8cQWI/AAAAAAAAAkU/MnZPvKNyVko/s400/happyNewYear.jpg" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;iframe align="left" frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?t=rcha-20&amp;amp;o=1&amp;amp;p=8&amp;amp;l=bpl&amp;amp;asins=1420068725&amp;amp;fc1=000000&amp;amp;IS2=1&amp;amp;lt1=_blank&amp;amp;m=amazon&amp;amp;lc1=0000FF&amp;amp;bc1=000000&amp;amp;bg1=FFFFFF&amp;amp;f=ifr" style="align: left; height: 245px; padding-right: 10px; padding-top: 5px; width: 131px;"&gt;&lt;/iframe&gt;Thanks to everyone who visited and commented here at R-Chart over the last year! &amp;nbsp;Blogging has forced me to&amp;nbsp;crystallize&amp;nbsp;my thoughts and I hope others have benefited a bit from these meanderings. &amp;nbsp;It it great to interact with the&amp;nbsp;knowledgeable, educated and friendly folks in the R community. &lt;br /&gt;&lt;br /&gt;I make no claims to be an expert or authority on statistics, visualization, design or any of the myriad of other topics touched on over the past year. &amp;nbsp;I appreciate all who have provided encouragement, suggestions and corrections. &amp;nbsp;Unlike many of you more scientifically minded types who meticulously verify all conclusions before speaking, I tend to throw ideas out in the blog and make adjustments and corrections based upon feedback. &amp;nbsp;This is really one of the great values of blogging - and so again, thank you for your responsiveness. &amp;nbsp;It was unexpected and very helpful.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Lessons Learned&lt;/b&gt;&lt;br /&gt;In case you blog or are thinking of blogging, I thought you might be interested in how things have worked here at R-Chart to this point.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Make Good Titles&lt;/b&gt;&lt;br /&gt;It was interesting to find out which items were of most interest (based upon the number of hits per page). &amp;nbsp;A great deal seems to be based upon the headline to the blog - never underestimate the value of a well-constructed-sound-byte of a title. &amp;nbsp;This often dictates the future of a posting. &amp;nbsp;Bad title = no response. &amp;nbsp;I really never gave much thought to how important it is to construct a meaningful, attention grabbing title.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Blog Promotion&lt;/b&gt;&lt;br /&gt;Promotion of each article also took more time than I expected. &amp;nbsp;Tal over at &lt;a href="http://www.r-bloggers.com/"&gt;R-Bloggers&lt;/a&gt;&amp;nbsp;really does the R community a service - bloggers who sign up have content aggregated automatically. &amp;nbsp;If you want to draw additional readers you have to do a certain amount of footwork yourself. &amp;nbsp;I get about 15% of total traffic to the site from search engines - which is kind of low. &amp;nbsp;Most of the generic sites that I submitted the blog to didn't send any traffic. &amp;nbsp;Content that was of specific interest to a given community ended up resulting in the most traffic.&lt;br /&gt;&lt;br /&gt;The top sites that have sent traffic this way are shown below.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.reddit.com/"&gt;www.reddit.com&lt;/a&gt; &lt;span class="Apple-tab-span" style="white-space: pre;"&gt; &lt;/span&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp;15,218&lt;br /&gt;&lt;a href="http://www.google.com/"&gt;www.google.com&lt;/a&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;  &lt;/span&gt;8,932&lt;br /&gt;&lt;a href="http://news.ycombinator.com/"&gt;news.ycombinator.com&lt;/a&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt; &lt;/span&gt;7,211&lt;br /&gt;&lt;a href="http://www.r-bloggers.com/"&gt;www.r-bloggers.com&lt;/a&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt; &lt;/span&gt;4,885&lt;br /&gt;&lt;a href="http://www.dzone.com/"&gt;www.dzone.com&lt;/a&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;  &lt;/span&gt;3,682&lt;br /&gt;&lt;a href="http://habrahabr.ru/"&gt;habrahabr.ru&lt;/a&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;  &lt;/span&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1,167 &amp;nbsp;(Hi to friends in Russia for this - the highest ranking non-English site)&lt;br /&gt;&lt;a href="http://twitter.com/"&gt;twitter.com&lt;/a&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;  &lt;/span&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp;689&lt;br /&gt;&lt;a href="http://www.google.co.in/"&gt;www.google.co.in&lt;/a&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt; &lt;/span&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp;565&lt;br /&gt;&lt;a href="http://www.google.co.uk/"&gt;www.google.co.uk&lt;/a&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt; &lt;/span&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp;531&lt;br /&gt;&lt;a href="http://www.rubyflow.com/"&gt;www.rubyflow.com&lt;/a&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt; &lt;/span&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp;470&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R is International&lt;/b&gt;&lt;br /&gt;I was really amazed at the international response - folks from 164 countries around the world hit the blog since its inception. &amp;nbsp;Germany was the top non-English site in total visits and France was also well represented. &lt;br /&gt;&lt;br /&gt;This probably is of no surprise to many - R has been widely used in academic research and there are a relatively small number of highly specialized professionals around the world using R. &amp;nbsp;It's obvious that the web reaches everywhere - it is not obvious who will end up visiting a given site.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Interest as Indicated by Traffic&lt;/b&gt;&lt;br /&gt;A few other numbers of note:&lt;br /&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;96,928&amp;nbsp;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;  &lt;/span&gt;R-Chart Pageviews all time history as of 01/31/2010. &amp;nbsp;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;620&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;  &lt;/span&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Downloads of the &lt;a href="http://www.r-chart.com/2010/07/free-r-chart-iphone-app.html"&gt;free R-Chart iPhone application&lt;/a&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;237&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;  &lt;/span&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Total days blogging at blogspot (as.Date('2010-12-31') - as.Date('2010-05-08'))&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;195 &amp;nbsp;&amp;nbsp;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt; &lt;/span&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Days blog has lived at r-chart.com (as.Date('2010-12-31') - as.Date('2010-06-19'))&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;158&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;  &lt;/span&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Comments on this blog&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;b&gt;Advertising&lt;/b&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;Apologies to folks who are put off by the advertising. &amp;nbsp;I had a goal to dip into this area a bit to come to offset costs and maybe buy a book or two. &amp;nbsp;This may happen eventually...&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;$ 42.89&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;  &lt;/span&gt;AdSense Revenue&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;$ 13.46&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;  &lt;/span&gt;Advertising Revenue through Amazon affiliates&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;Again - thanks to all - and have a Happy New Year&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-2226286964020013103?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/2226286964020013103/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/12/r-chart-year-end-wrap-up.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/2226286964020013103'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/2226286964020013103'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/12/r-chart-year-end-wrap-up.html' title='R-Chart: Year End Wrap Up'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/_FsLa1cMTCWU/TR5WOI8cQWI/AAAAAAAAAkU/MnZPvKNyVko/s72-c/happyNewYear.jpg' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-1042341025364158802</id><published>2010-12-08T04:00:00.000-08:00</published><updated>2010-12-08T09:52:10.277-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><category scheme='http://www.blogger.com/atom/ns#' term='Google'/><title type='text'>Google AI Challenge: Scores/Rank by Language</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TP9u1mZfPjI/AAAAAAAAAkA/i7m8PEC4_Pk/s1600/googleai2010_facet_by_language.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TP9u1mZfPjI/AAAAAAAAAkA/i7m8PEC4_Pk/s400/googleai2010_facet_by_language.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;A quick follow up to the &lt;a href="http://www.r-chart.com/2010/12/google-ai-challenge-languages-used-by.html"&gt;previous post&lt;/a&gt;: about the the scores in the 2010 Google AI competition relative to programming language. &amp;nbsp;The chart above makes each language visible and discrete - and the scales are the same.&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;library(ggplot2)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df&amp;lt;- read.csv('googleAI2010.csv',sep=';',header=FALSE)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df$V7 &amp;lt;- NULL&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;names(df)&amp;lt;- c('rank', 'username','country','organization','language','elo_score')&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;ggplot(data=df, aes(x=rank, y=elo_score, color=language)) +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;+ geom_point(size=1) +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;+ facet_wrap(~ language) + opts(title='Google AI 2010: Score by Rank for each Language')&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: left;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: left;"&gt;It is based upon a simple comparison of rank and score.&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TP9t7pJsyfI/AAAAAAAAAj8/euJnhAeN2f8/s1600/googleai2010_score_by_rank.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TP9t7pJsyfI/AAAAAAAAAj8/euJnhAeN2f8/s400/googleai2010_score_by_rank.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df&amp;lt;- read.csv('googleAI2010.csv',sep=';',header=FALSE)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df$V7 &amp;lt;- NULL&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;names(df)&amp;lt;- c('rank', 'username','country','organization','language','elo_score')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;ggplot(data=df, aes(x=rank, y=elo_score)) + geom_point(size=1) + opts(title='Google AI Score by Rank')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Another approach to viewing this information is a histogram by score (which ignores rank). &amp;nbsp;With a binwidth of 100 (and ignoring low scores of people who signed up but who dropped out relatively early) a (nearly) bimodal distribution appears.&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TP9vbsueChI/AAAAAAAAAkE/CXEGheJYM9g/s1600/googleai2010_histogram.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TP9vbsueChI/AAAAAAAAAkE/CXEGheJYM9g/s400/googleai2010_histogram.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;qplot(data=df, x=elo_score, geom='histogram', binwidth=100)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Any ideas about why this is not normal? &amp;nbsp;Is there some aspect of ELO scoring that leads to this shape? &amp;nbsp;Or are there different types of programmers represented?&lt;br /&gt;&lt;br /&gt;This can be broken down by language. &amp;nbsp;To avoid difficulty distinguishing colors, the rainbow palette is used and a few languages are not reported (since they were not highly represented in the competition).&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;library(sqldf)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df2=sqldf("select * from df where language not in ('Groovy','Scala','Go','OCaml')")&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df2$language=factor(df2$language)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;qplot(data=df2, x=elo_score, fill=language, geom='histogram', binwidth=100) + scale_fill_manual(values=rainbow(12))&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TP9wSRv87sI/AAAAAAAAAkI/9kP8BvDvDNQ/s1600/googleai2010_histogram_by_lang.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="319" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TP9wSRv87sI/AAAAAAAAAkI/9kP8BvDvDNQ/s320/googleai2010_histogram_by_lang.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;As mentioned in the&amp;nbsp;&lt;a href="http://www.r-chart.com/2010/12/google-ai-challenge-languages-used-by.html"&gt;previous post&lt;/a&gt;, the data is available at GitHub - feel free to post some of your own visualizations of this data.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-1042341025364158802?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/1042341025364158802/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/12/google-ai-challenge-scoresrank-by.html#comment-form' title='5 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/1042341025364158802'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/1042341025364158802'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/12/google-ai-challenge-scoresrank-by.html' title='Google AI Challenge: Scores/Rank by Language'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/_FsLa1cMTCWU/TP9u1mZfPjI/AAAAAAAAAkA/i7m8PEC4_Pk/s72-c/googleai2010_facet_by_language.png' height='72' width='72'/><thr:total>5</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-6168320883529187100</id><published>2010-12-02T16:18:00.000-08:00</published><updated>2010-12-08T04:03:40.307-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><category scheme='http://www.blogger.com/atom/ns#' term='Ruby'/><category scheme='http://www.blogger.com/atom/ns#' term='Google'/><title type='text'>Google AI Challenge: Languages Used by the Best Programmers</title><content type='html'>&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgxWY_QNZI/AAAAAAAAAjc/ORVJjtoDBvg/s1600/program_language_density_plot.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgxWY_QNZI/AAAAAAAAAjc/ORVJjtoDBvg/s400/program_language_density_plot.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;The &lt;/span&gt;&lt;a href="http://www.zdnet.com/blog/burnette/its-youth-vs-experience-in-the-google-ai-challenge-and-the-kids-are-winning/2123?tag=mantle_skin;content"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;Google AI Challenge&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt; recently wrapped up with a &lt;/span&gt;&lt;a href="http://www.zdnet.com/blog/burnette/hungarian-lisp-developer-walks-away-with-google-ai-contest/2131"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;Lisp developer from Hungary&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt; as the winner. &amp;nbsp;The competition challenges contestants to create bots that push the limits of AI and game theory. &amp;nbsp;These bots compete against one another, and a &lt;/span&gt;&lt;a href="http://ai-contest.com/rankings.php"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;complete ranking&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt; of competitors is available. &amp;nbsp;The big story today is that the winner (&lt;/span&gt;&lt;a href="http://quotenil.com/"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;Gábor Melis&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;) used Lisp to beat out over 4000 other contestants around the world using a host of different programming languages. &amp;nbsp;&amp;nbsp;&lt;/span&gt;&lt;/div&gt;&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;b&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;Paul Graham has stated that&amp;nbsp;&lt;/span&gt;&lt;a href="http://www.paulgraham.com/arcll1.html"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;Java was designed for "average" programmers&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&amp;nbsp;while other languages (&lt;/span&gt;&lt;a href="http://www.paulgraham.com/avg.html"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;like Lisp&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;) are for good programmers. &amp;nbsp;The fact that the winner of the competition wrote in Lisp seems to support this assertion. &amp;nbsp;Or should we see Mr. Melis as an anomaly who happened to use Lisp for this task?&lt;/span&gt;&lt;/div&gt;&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Programming Languages Usage&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;div&gt;Java, C++, Python and C# were heavily used overall.&lt;/div&gt;&lt;/b&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: x-small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; language count(*)&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Java &amp;nbsp; &amp;nbsp; 1634&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;2 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; C++ &amp;nbsp; &amp;nbsp; 1232&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;3 &amp;nbsp; &amp;nbsp; &amp;nbsp;Python &amp;nbsp; &amp;nbsp; &amp;nbsp;948&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;4 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;C# &amp;nbsp; &amp;nbsp; &amp;nbsp;485&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;5 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; PHP &amp;nbsp; &amp;nbsp; &amp;nbsp; 80&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;6 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Ruby &amp;nbsp; &amp;nbsp; &amp;nbsp; 55&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;7 &amp;nbsp; &amp;nbsp; Haskell &amp;nbsp; &amp;nbsp; &amp;nbsp; 51&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;8 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Perl &amp;nbsp; &amp;nbsp; &amp;nbsp; 42&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;9 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Lisp &amp;nbsp; &amp;nbsp; &amp;nbsp; 33&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;10 Javascript &amp;nbsp; &amp;nbsp; &amp;nbsp; 19&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;11 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;C &amp;nbsp; &amp;nbsp; &amp;nbsp; 18&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;12 &amp;nbsp; &amp;nbsp; &amp;nbsp;OCaml &amp;nbsp; &amp;nbsp; &amp;nbsp; 12&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;13 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Go &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;6&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;14 &amp;nbsp; &amp;nbsp; &amp;nbsp;Scala &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;4&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;15 &amp;nbsp; &amp;nbsp; Groovy &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;In the Top 200&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; language count(*)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Java &amp;nbsp; &amp;nbsp; &amp;nbsp; 70&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;2 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; C++ &amp;nbsp; &amp;nbsp; &amp;nbsp; 64&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;3 &amp;nbsp; &amp;nbsp; &amp;nbsp;Python &amp;nbsp; &amp;nbsp; &amp;nbsp; 34&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;4 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;C# &amp;nbsp; &amp;nbsp; &amp;nbsp; 17&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;5 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; C &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;4&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;6 &amp;nbsp; &amp;nbsp; Haskell &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;3&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;7 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; PHP &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;3&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;8 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Ruby &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;2&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;9 &amp;nbsp;Javascript &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;10 &amp;nbsp; &amp;nbsp; &amp;nbsp; Lisp &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;11 &amp;nbsp; &amp;nbsp; &amp;nbsp;OCaml &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Top 100&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;1 &amp;nbsp; &amp;nbsp; Java &amp;nbsp; &amp;nbsp; &amp;nbsp; 33&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;2 &amp;nbsp; &amp;nbsp; &amp;nbsp;C++ &amp;nbsp; &amp;nbsp; &amp;nbsp; 32&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;3 &amp;nbsp; Python &amp;nbsp; &amp;nbsp; &amp;nbsp; 20&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;4 &amp;nbsp; &amp;nbsp; &amp;nbsp; C# &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;9&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;5 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;C &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;3&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;6 &amp;nbsp;Haskell &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;7 &amp;nbsp; &amp;nbsp; Lisp &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;8 &amp;nbsp; &amp;nbsp;OCaml &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;Top 10&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&amp;nbsp;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&amp;nbsp;language count(*)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;1 &amp;nbsp; &amp;nbsp; Java &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;4&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;2 &amp;nbsp; &amp;nbsp; &amp;nbsp;C++ &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;3&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;3 &amp;nbsp; &amp;nbsp; &amp;nbsp; C# &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;2&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;4 &amp;nbsp; &amp;nbsp; Lisp &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;/div&gt;&lt;br /&gt;The plot above is a bit difficult to discern due to the number of languages represented (and similarity in colors). &amp;nbsp;So here is a breakdown by language.&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TPgyBXF3PhI/AAAAAAAAAjg/M6v-8WEvv98/s1600/lisp_density_plot.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TPgyBXF3PhI/AAAAAAAAAjg/M6v-8WEvv98/s400/lisp_density_plot.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;Lisp does appear to be skewed towards higher ranking. &amp;nbsp;But even more striking are the C hippies:&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgyKSc-rvI/AAAAAAAAAjk/5Si6vyFPEvQ/s1600/c_density_plot.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgyKSc-rvI/AAAAAAAAAjk/5Si6vyFPEvQ/s400/c_density_plot.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;The functional crowd represented with Haskell also ranked on the higher end:&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPg0CBK3W5I/AAAAAAAAAj4/q3iEbQvyMhc/s1600/haskell_density_plot.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPg0CBK3W5I/AAAAAAAAAj4/q3iEbQvyMhc/s400/haskell_density_plot.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;How about Java? &amp;nbsp;There is a trend towards the average - but a significantly larger number of entrants used Java. &amp;nbsp;It also is a language taught in many colleges, and might reflect greater student participation in these languages (although MIT did focus on Lisp back in the day...).&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgybPKcEbI/AAAAAAAAAjs/2THlgOgE-mo/s1600/java_density_plot.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgybPKcEbI/AAAAAAAAAjs/2THlgOgE-mo/s400/java_density_plot.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;How about representatives from the Microsoft? &amp;nbsp;Einstein and Elvis showed up - Mort was not interested.&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgy6h6N5EI/AAAAAAAAAj0/8PXgOSzN8ck/s1600/csharp_density_plot.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgy6h6N5EI/AAAAAAAAAj0/8PXgOSzN8ck/s400/csharp_density_plot.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;I can post charts of other languages if anyone asks - otherwise, &lt;a href="https://github.com/ezgraphs/R-Programs/tree/master/google-ai-2010/"&gt;download the files for yourself&lt;/a&gt; and draw your own conclusions. &amp;nbsp;And congratulations to&amp;nbsp;&lt;b&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;div style="display: inline !important;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;a href="http://quotenil.com/"&gt;Gábor Melis&lt;/a&gt;&amp;nbsp;- I am again feeling the inspiration to delve into the mysteries of Lisp and meander among mountains of parenthesis...&lt;/span&gt;&lt;/div&gt;&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Methodology Used&lt;/b&gt;&lt;br /&gt;No need to proceed further unless you are interested in how the results listed above were derived. &lt;br /&gt;&lt;br /&gt;Basically, I used Ruby to scrape the results from the &lt;a href="http://ai-contest.com/rankings.php"&gt;Google AI Rankings site&lt;/a&gt;. &amp;nbsp;The results were read into Ruby, and ggplot2 and sqldf libraries were used to analyze the results.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Get the Data into R&lt;/b&gt;&lt;br /&gt;So to find out more...I whipped up a&amp;nbsp;&lt;a href="https://github.com/ezgraphs/R-Programs/blob/master/google-ai-2010/googleAI2010.rb"&gt;ruby script&lt;/a&gt;&amp;nbsp;to create a &lt;a href="https://github.com/ezgraphs/R-Programs/blob/master/google-ai-2010/googleAI2010.csv"&gt;delimited file&lt;/a&gt; from the 47 page listing online. &amp;nbsp;(Feel free to get these from their GitHub location and do some additional validation/analysis of your own). &amp;nbsp; Read this file into R:&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;df&amp;lt;- read.csv('googleAI2010.csv',sep=';',header=FALSE)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;df$V7 &amp;lt;- NULL&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;names(df)&amp;lt;- c('rank', 'username','country','organization','language','elo_score')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Sanity Check&lt;/b&gt;&lt;br /&gt;Most of this work can be done in idiomatic R (which has some significant Lisp influences) - which might be a better way to honor the winner. &amp;nbsp;However, I find myself using sqlite more and more these days - particularly in mobile development. &amp;nbsp;So I used the sqldf library which uses this database behind the scenes.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://ai-contest.com/country_rankings.php"&gt;Country rankings&lt;/a&gt; are available online, and the following emulates these results. &amp;nbsp;Specifically, the number of entrants in the top 200 ranked contestants from each country can be derived as follows:&lt;br /&gt;&lt;b&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;/b&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;library('sqldf')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;b&gt;&lt;/b&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;b&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;div style="display: inline !important;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;top200=df[df$rank &amp;lt;= 200,]&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;/b&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;b&gt;&lt;/b&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;sqldf('select country, count(*) from top200 group by country order by 2 desc')&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;a href="http://ai-contest.com/organization_rankings.php"&gt;Organization rankings&lt;/a&gt;&amp;nbsp;are similar, representing the top organizations within the top 100. &amp;nbsp;There are some anomalies here, the highest ranking "Other" is not shown in the online version for obvious reasons, and only the most of these have only one entrant in the top 100 an are listed in an arbitrary manner. &amp;nbsp;However, the results are otherwise the same in R.&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;b&gt;&lt;b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;t&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;op100=df[df$rank &amp;lt;= 100,]&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;/b&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;sqldf('select organization, count(*) from top100 group by organization order by 2 desc')&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;/b&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;R Code&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;The following are additional snippets of R code used to generate the results above.&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small; font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman';"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;# Language Usage&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;b&gt;&lt;div style="display: inline !important;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;sqldf('select language, count(*) from df group by language order by 2 desc')&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;/b&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;sqldf('select language, count(*) from top200 group by language order by 2 desc')&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;sqldf('select language, count(*) from top100 group by language order by 2 desc')&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;top10=df[df$rank &amp;lt;= 10,]&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;sqldf('select language, count(*) from top10 group by language order by 2 desc')&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&amp;nbsp;If you fiddle enough with the bucket size for histograms, you might be able to draw some conclusions... but the density plot seemed like a nicer option. &amp;nbsp;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: x-small;"&gt;library('ggplot2')&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;# Substitute your favorite language of those available for Lisp below&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;qplot(data=df[df$language=='Lisp',], x=rank, geom='histogram', binwidth=1000) + opts(title='Lisp')&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman';"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TPgoWQmQXrI/AAAAAAAAAjI/9hHaM-lAQ-A/s1600/lisp_ranking_histogram.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="319" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TPgoWQmQXrI/AAAAAAAAAjI/9hHaM-lAQ-A/s320/lisp_ranking_histogram.png" style="cursor: move;" width="320" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;# The density plot at the top of this posting:&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;ggplot(data=df, aes(rank, fill=language)) +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&amp;nbsp;&amp;nbsp;geom_density(alpha = 0.2) +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman';"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&amp;nbsp;xlim(0,5000) +&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;/span&gt;&lt;/b&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&amp;nbsp;&amp;nbsp;opts(title='2010 Google AI Challenge Rankings')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;ggsave('program_language_density_plot.png')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;# Breakdown by language:&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;ggplot(data=df[df$language=='Scala',], aes(rank, fill=language)) + geom_density(alpha = 0.2) + xlim(0,5000) + opts(title='Scala')&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;div&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;"&gt;&lt;b&gt;Update:&lt;/b&gt; &amp;nbsp;I have been keeping up with the comments - and sketched out some other ways of looking at the data in &lt;/span&gt;&lt;a href="http://www.r-chart.com/2010/12/google-ai-challenge-scoresrank-by.html"&gt;&lt;span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;"&gt;another post&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;"&gt;.&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-6168320883529187100?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/6168320883529187100/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/12/google-ai-challenge-languages-used-by.html#comment-form' title='6 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/6168320883529187100'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/6168320883529187100'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/12/google-ai-challenge-languages-used-by.html' title='Google AI Challenge: Languages Used by the Best Programmers'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgxWY_QNZI/AAAAAAAAAjc/ORVJjtoDBvg/s72-c/program_language_density_plot.png' height='72' width='72'/><thr:total>6</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-1179246055042215866</id><published>2010-11-10T16:24:00.000-08:00</published><updated>2010-11-10T16:24:08.016-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><category scheme='http://www.blogger.com/atom/ns#' term='Financial'/><title type='text'>Mortgage Calculator (and Amortization Charts) with R</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNs3fPWMZaI/AAAAAAAAAhk/OM5HulgRhZM/s1600/mortgage_calculator.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="276" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNs3fPWMZaI/AAAAAAAAAhk/OM5HulgRhZM/s320/mortgage_calculator.PNG" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;Mortgage rates have been at historic lows recently. &amp;nbsp;The rates are posted &lt;a href="http://www.bankrate.com/"&gt;various places&lt;/a&gt; online along with simple mortgage calculators. &amp;nbsp;Such calculators illustrate the payment schedule for a mortgage based upon selected terms. But with less than a dozen lines of R code, you can do a far more sophisticated analysis.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Mortgage Calculation Function&lt;/b&gt;&lt;br /&gt;Rather than reinvent the wheel, you can work with &lt;a href="http://faculty.ucr.edu/~tgirke/Documents/R_BioCond/My_R_Scripts/mortgage.R"&gt;this nice R function&lt;/a&gt;&amp;nbsp;by&amp;nbsp;Thomas Girke (Associate Professor of Bioinformatics&amp;nbsp;over at UC Riverside). &amp;nbsp;At the R prompt, you can grab it from its home online by calling source:&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"&gt;source("http://faculty.ucr.edu/~tgirke/Documents/R_BioCond/My_R_Scripts/mortgage.R")&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"&gt;This loads the function and outputs a helpful description of the function:&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;The monthly mortgage payments and amortization rates can be calculted with the mortgage() function like this:&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;mortgage(P=500000, I=6, L=30, amort=T, plotData=T)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;P = principal (loan amount)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;I = annual interest rate&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;L = length of the loan in years&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;So keep in mind that there is a huge amount of R code available online:&amp;nbsp;&lt;/div&gt;&lt;div&gt;&lt;ul&gt;&lt;li&gt;&lt;a href="http://cran.r-project.org/web/packages/"&gt;CRAN&lt;/a&gt;&amp;nbsp;&lt;/li&gt;&lt;li&gt;&lt;a href="https://github.com/languages/R"&gt;Github&lt;/a&gt;&amp;nbsp;&lt;/li&gt;&lt;li&gt;&lt;a href="http://code.google.com/hosting/search?q=label:R"&gt;Google Code&lt;/a&gt;&amp;nbsp;&lt;/li&gt;&lt;/ul&gt;&lt;/div&gt;&lt;div&gt;are just the beginning. &amp;nbsp;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;Instant R Graphical User Interfaces&lt;/b&gt;&lt;/div&gt;&lt;br /&gt;Rather than simply calling the function directly, you can quickly construct a GUI input widget using the fgui library.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"&gt;library(fgui)&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-size: 13px;"&gt;gui(mortgage)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-size: 13px;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-size: 13px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"&gt;With this trivial invocation, a window pops up.&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-size: 13px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TNssTo9JGWI/AAAAAAAAAhU/L5_IfLDdJi4/s1600/mortgage_fgui.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="186" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TNssTo9JGWI/AAAAAAAAAhU/L5_IfLDdJi4/s400/mortgage_fgui.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-size: 13px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-size: 13px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-size: 13px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"&gt;Not terribly fancy, but about the simplest way you will ever be able to construct a GUI! &amp;nbsp;In this case a mortgage amount of $90,000 for 10 years at 3.75% is illustrated. &amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-size: 13px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-size: 13px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"&gt;After entering these values, click OK to actually call the function. &amp;nbsp;This results in a &amp;nbsp;good deal of interesting output. &amp;nbsp;Close the pop up window and look at the R Console:&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-size: 13px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-size: 13px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;Monthly payment: $900.5512 (stored in monthPay)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Total cost: $108066.1&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;"&gt;As indicated in this message, an R object named monthPay contains the amount of the monthly payment and can be used in subsequent R commands and calculations. &amp;nbsp;You also are greeted with a graph illustrating annual interest and payments as a stacked bar chart.&lt;/span&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TNsuNb_5WdI/AAAAAAAAAhY/Y_BKSSSv9Yg/s1600/mortgage_annual_interest_and_principal.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TNsuNb_5WdI/AAAAAAAAAhY/Y_BKSSSv9Yg/s400/mortgage_annual_interest_and_principal.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;"&gt;Plenty of useful information! &amp;nbsp;But that's not all...&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;"&gt;&lt;b&gt;Beyond the Basics&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;"&gt;You might have noticed a number of messages regarding data stored in R objects. &amp;nbsp;This is where the power of R exceeds that of any standard mortgage calculator. &amp;nbsp;These objects can serve as input to other calculations or charting operations.&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;"&gt;The aDFmonth object contains amortization data for each month, while aDFyear contains annual information. In the following example, a new data frame is created from the monthly data that does not include the amortization information and plot it using ggplot2. &amp;nbsp;(The amortization data is a&amp;nbsp;significantly&amp;nbsp;different scale and better viewed independently).&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: 13px;"&gt;library(ggplot2)&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"&gt;DF=melt(aDFmonth[-1], id.vars='Year')&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"&gt;ggplot(DF, aes(x=Year,y=value, group=variable)) + geom_line() + facet_wrap(~ variable, ncol=1)&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNsvbu2PGzI/AAAAAAAAAhc/hgsBQnNFeng/s1600/mortgage_payment_principal_interest.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNsvbu2PGzI/AAAAAAAAAhc/hgsBQnNFeng/s400/mortgage_payment_principal_interest.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"&gt;You can quickly manipulate the data frame to view amortization information instead. &amp;nbsp;Use the exact same ggplot call (though the facet_wrap is removed below as unnecessary for a single variable) &amp;nbsp;to create a chart scaled to fit the values relevant to the amortization.&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"&gt;DF=melt(aDFmonth[c(1,5)], id.vars='Year')&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"&gt;ggplot(DF, aes(x=Year,y=value, group=variable))+ geom_line()&amp;nbsp;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNswYk5iz7I/AAAAAAAAAhg/rJEFKPoWUwk/s1600/mortgage_amortization.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNswYk5iz7I/AAAAAAAAAhg/rJEFKPoWUwk/s400/mortgage_amortization.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;"&gt;The limits of calculations and visualizations available in a web calculator or Excel are reached pretty quickly. &amp;nbsp;R provides the means to create relatively full featured solutions in only a few lines of code.&amp;nbsp;&lt;/span&gt;&lt;/div&gt;&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-1179246055042215866?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/1179246055042215866/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/11/mortgage-calculator-and-amortization.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/1179246055042215866'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/1179246055042215866'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/11/mortgage-calculator-and-amortization.html' title='Mortgage Calculator (and Amortization Charts) with R'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/_FsLa1cMTCWU/TNs3fPWMZaI/AAAAAAAAAhk/OM5HulgRhZM/s72-c/mortgage_calculator.PNG' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-8351171166433109175</id><published>2010-11-09T16:50:00.000-08:00</published><updated>2010-11-11T03:44:56.360-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><category scheme='http://www.blogger.com/atom/ns#' term='Economics'/><title type='text'>Don't be a Turkey</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNnmxtYGe7I/AAAAAAAAAhQ/JRqsx6Sx0z4/s1600/turkey.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="298" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNnmxtYGe7I/AAAAAAAAAhQ/JRqsx6Sx0z4/s400/turkey.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px; text-align: center;"&gt;&lt;span class="Apple-style-span" style="-webkit-border-horizontal-spacing: 2px; -webkit-border-vertical-spacing: 2px; font-family: Verdana, Arial, Helvetica, sans-serif; font-size: small;"&gt;'Indeed, I am moving on: my new project is about methods on how to domesticate the unknown, exploit randomness, figure out&amp;nbsp;&lt;i&gt;how to live in a world we don't understand very well&lt;/i&gt;. While most human thought (particularly since the enlightenment) has focused us on how to turn knowledge into decisions, my new mission is to build methods to turn lack of information, lack of understanding, and lack of "knowledge" into decisions—how, as we will see, not to be a "turkey".'&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px; text-align: center;"&gt;&lt;span class="Apple-style-span" style="-webkit-border-horizontal-spacing: 2px; -webkit-border-vertical-spacing: 2px; font-family: Verdana, Arial, Helvetica, sans-serif; font-size: small;"&gt;-&amp;nbsp;&lt;a href="http://www.edge.org/3rd_culture/taleb08/taleb08_index.html"&gt;Nassim Nicholas Taleb&lt;/a&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;With thanksgiving on the way, an economic lesson provided by a turkey's statistical department seems appropriate. &amp;nbsp; &amp;nbsp;Our turkey - let's call him auRthur - like most turkeys has a statistical department at his disposal. &amp;nbsp;His&amp;nbsp;department is in fact tracking an index - the Turkey Welfare Index which is a reflection of how much the human race cares about auRthur. &amp;nbsp;Notice the relatively positive trend... until &lt;whack&gt; Thanksgiving Day...&lt;/whack&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TNnfcvvyMWI/AAAAAAAAAhM/1T860Uju4n8/s1600/TurkeyWelfareIndex.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TNnfcvvyMWI/AAAAAAAAAhM/1T860Uju4n8/s400/TurkeyWelfareIndex.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;Evidently, our auRthur's statistical department utilized a model that had some flaws - "past performance is not necessarily a predictor of future returns". &amp;nbsp; This is because the harvesting of the turkey is a "rare event." &amp;nbsp;Rare (unprecedented) events are difficult to predict. &amp;nbsp;The story is not terribly amusing to turkeys to begin with - but becomes less amusing to humans when understood as a metaphor of the financial meltdown and statistical modeling in use by banking institutions. &amp;nbsp;Essentially, banking institutions assumed a huge amount of risk because a catastrophic meltdown was simply outside the realm of consideration. &amp;nbsp;It was not represented in most of the models in use.&lt;br /&gt;&lt;br /&gt;A great and vivid illustration. &amp;nbsp;See &lt;a href="http://www.edge.org/3rd_culture/taleb08/taleb08_index.html"&gt;Nassim Nicholas Taleb's essay&lt;/a&gt; where this chart and illustration&amp;nbsp;originally&amp;nbsp;appeared at &lt;a href="http://edge.org/"&gt;edge.org&lt;/a&gt;. &amp;nbsp;This article discusses the limits of statistical thinking and is a good springboard to other writings by Taleb - who was a&amp;nbsp;practitioner&amp;nbsp;of risk as he ran a hedge fund for a number of years and saw many of the practices in the financial industry up close and personal.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;The chart above was created using R and ggplot2. &amp;nbsp;The data frame named DF was populated with data related to the Turkey Welfare Index.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt; DF&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; TWI Day color&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;1 &amp;nbsp; 14 &amp;nbsp; 1 black&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;2 &amp;nbsp; 15 &amp;nbsp; 2 black&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;3 &amp;nbsp; 16 &amp;nbsp; 3 black&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;4 &amp;nbsp; 17 &amp;nbsp; 4 black&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;5 &amp;nbsp; 18 &amp;nbsp; 5 black&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;6 &amp;nbsp; 19 &amp;nbsp; 6 black&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;7 &amp;nbsp; 20 &amp;nbsp; 7 black&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;8 -100 &amp;nbsp; 8 &amp;nbsp; red&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;UPADTE: &amp;nbsp;This can be entered in a few different ways. &amp;nbsp;One is through a grid (which requires that you specify the Day as a factor).&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;DF=edit(data.frame())&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;DF$Day=factor(DF$Day)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;Plotted using ggplot2:&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;library(ggplot2)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;ggplot(data=DF, aes(x=Day, y=TWI, fill=color)) +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;geom_bar() +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;scale_fill_manual(value= c("black", "red")) +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;theme_bw() + scale_x_discrete(breaks = NA) +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;opts(legend.position='none', axis.title.x=theme_blank(),&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;axis.title.y=theme_blank(),&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; title='Turkey Welfare Index')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;This included a couple of somewhat unusual settings to shut off labels and axes that results in the simple "plain" appearance you see above.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;So - Happy Thanksgiving - understand statistics and don't be a turkey...&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-8351171166433109175?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/8351171166433109175/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/11/dont-be-turkey.html#comment-form' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/8351171166433109175'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/8351171166433109175'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/11/dont-be-turkey.html' title='Don&apos;t be a Turkey'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/_FsLa1cMTCWU/TNnmxtYGe7I/AAAAAAAAAhQ/JRqsx6Sx0z4/s72-c/turkey.png' height='72' width='72'/><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-6556038199488581490</id><published>2010-11-03T17:16:00.000-07:00</published><updated>2010-11-03T17:16:48.628-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='iPhone'/><title type='text'>iPhone App Store Acceptance Time / Download Results</title><content type='html'>&lt;table&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TNH66yq20fI/AAAAAAAAAgw/6pfNg0qbn8c/s1600/r-iphone.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="200" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TNH66yq20fI/AAAAAAAAAgw/6pfNg0qbn8c/s200/r-iphone.png" width="146" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;/td&gt; &lt;td&gt;&amp;nbsp;&amp;nbsp;&lt;/td&gt; &lt;td&gt;&lt;br /&gt;Complaints about the iPhone App Store are not uncommon among developers. &amp;nbsp;The submission process is frustrating at best, you can expect arbitrary rejections, and Apple's policies have not always been particularly open or welcoming. &amp;nbsp;If you make it through the process and get an app accepted, it can be essentially buried where it will remain unused unless you dedicate significant energy to marketing it. &lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;And so I figured I would do a small scale experiment to check App Store response in these areas (and App Store user behavior)....and post the results here in the hopes that developers planning to write iPhone apps would benefit from my experience.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Cheaper by the (Half) Dozen&lt;/b&gt;&lt;br /&gt;Having a limited amount of time available, and appreciating the value of quick iterations in development or in any business process, I limited myself to submitting a few apps with similar functionality and different target audiences. &amp;nbsp;What follows is my findings (with a bit of R code used for analysis). &lt;br /&gt;&lt;br /&gt;Six free iPhone apps were submitted for publication during a 21 day period between &amp;nbsp;July 23,&amp;nbsp;2010 and &amp;nbsp;August 13, 2010. &amp;nbsp;Four apps were accepted and two were rejected.&lt;br /&gt;&lt;br /&gt;The four apps that were accepted:&lt;br /&gt;&lt;br /&gt;&lt;ul&gt;&lt;li&gt;&lt;a href="http://itunes.apple.com/us/app/r-chart/id384073723?mt=8"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;R-Chart&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&amp;nbsp;(R Programming Community News) has been available 96 days and averaged 4.44 downloads per day. &amp;nbsp;(&lt;a href="http://itunes.apple.com/us/app/r-chart/id384073723?mt=8"&gt;Download it&lt;/a&gt; if you want to keep up with this blog and/or R community news).&lt;/span&gt;&lt;/li&gt;&lt;li&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;a href="http://itunes.apple.com/us/app/frb/id386008821?mt=8"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;FRB&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&amp;nbsp;(U.S. Federal Reserve Board News) has been available 82 days and averaged 4.15 downloads per day.&lt;/span&gt;&lt;/span&gt;&lt;/li&gt;&lt;li&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;a href="http://itunes.apple.com/us/app/duq-news/id384506820?mt=8"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;Duq News&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&amp;nbsp;(Duquesne University News) has been available 92 days and averaged 2.52 downloads per day.&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/li&gt;&lt;li&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;a href="http://itunes.apple.com/us/app/visit-the-lehigh-valley/id384370305?mt=8"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;Visit the Lehigh Valley&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&amp;nbsp;(Lehigh Valley Tourism Info) has been available 93 &amp;nbsp;days and averaged1.31 downloads per day.&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/li&gt;&lt;/ul&gt;&lt;br /&gt;Two apps were rejected &amp;nbsp; &lt;br /&gt;&lt;br /&gt;&lt;ul&gt;&lt;li&gt;&amp;nbsp;DeSales U (DeSales University News)&lt;/li&gt;&lt;li&gt;&amp;nbsp;Blender Buzz (Blender Software Blog/News )&lt;/li&gt;&lt;/ul&gt;&lt;br /&gt;&lt;div&gt;&lt;b&gt;App Functionality and Subject Area&lt;/b&gt;&lt;/div&gt;&lt;div&gt;All apps contained essentially equivalent functionality, but differed by subject area and graphic and styling qualities. &amp;nbsp;The "&lt;a href="http://itunes.apple.com/us/app/r-chart/id384073723?mt=8"&gt;R-Chart&lt;/a&gt;" and "Blender Buzz" apps reference resources of interest to software users and were intended to promote &lt;a href="http://www.r-chart.com/"&gt;this blog&lt;/a&gt; and the &lt;a href="http://blenderbuzz.blogspot.com/"&gt;Blender Buzz blog&lt;/a&gt;. &amp;nbsp;They are topical and not limited by locale or institution. &amp;nbsp;"&lt;a href="http://itunes.apple.com/us/app/duq-news/id384506820?mt=8"&gt;Duq News&lt;/a&gt;" and "DeSales U" provide news from &lt;a href="http://www.desales.edu/"&gt;Duquesne&lt;/a&gt; and &lt;a href="http://www.desales.edu/"&gt;DeSales Universities&lt;/a&gt;. &amp;nbsp;"&lt;a href="http://itunes.apple.com/us/app/visit-the-lehigh-valley/id384370305?mt=8"&gt;Visit the Lehigh Valley&lt;/a&gt;" provides information about places and events for visitors to Eastern Pennsylvania. The "&lt;a href="http://itunes.apple.com/us/app/frb/id386008821?mt=8"&gt;FRB&lt;/a&gt;" app provides latest publicly available news from the U.S. Federal Reserve Board.&lt;/div&gt;&lt;br /&gt;&lt;b&gt;Review Process and Time&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;It took between 7 and 9 days for the App Store to review an app and either accept or reject it. &amp;nbsp;It does not appear that subject area of the app contributed to its acceptance. &amp;nbsp;Both R-Chart and Blender Buzz were directed at programming communities - one was accepted and the other was rejected. &amp;nbsp;Likewise, one of the University apps was accepted, the other was rejected. &amp;nbsp;Each app was submitted once only. &amp;nbsp;None were resubmitted after initial rejection.&lt;/div&gt;&lt;div class="separator" style="clear: both; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TNHtcGxrSgI/AAAAAAAAAgs/pW3a-E1g58g/s1600/processing_time.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TNHtcGxrSgI/AAAAAAAAAgs/pW3a-E1g58g/s400/processing_time.png" style="cursor: move;" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;Download Counts&lt;/b&gt;&lt;/div&gt;&lt;div&gt;A total of 1120 have been downloaded. &amp;nbsp;As noted above, the result is between 1 and 4 downloads per day.&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TNHqECZOZmI/AAAAAAAAAgo/iBLdSIV-WGg/s1600/app_downloads.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TNHqECZOZmI/AAAAAAAAAgo/iBLdSIV-WGg/s400/app_downloads.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;b&gt;&lt;u&gt;&lt;br /&gt;&lt;/u&gt;&lt;/b&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;div&gt;I have not expended much effort in promotion. &amp;nbsp;I tweeted about the R-Chart App and mentioned it here on this blog but never promoted any of the others. &amp;nbsp;So the downloads were the result of folks searching for an app that was of interest to them.&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Conclusion...&lt;/b&gt;&lt;br /&gt;So getting an app accepted by the app store is not an insurmountable process - but does require time and planning. &amp;nbsp;It is not an activity that will just take care of itself. &amp;nbsp;And because of&amp;nbsp;inconsistency in the process, you would do better to allot a bit of extra time for the app store process. &amp;nbsp;And unless your app fills a rather unique niche, you will need to do marketing in the same way that you would for a web site or any other resource.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;R Code used in the Analysis is Below&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;gt; # Read in the data&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt; df = read.csv('app_stats.txt')&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt; df&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;App Downloads Submitted_Date Response_Date Response&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; R-Chart &amp;nbsp; &amp;nbsp; &amp;nbsp; 426 &amp;nbsp; &amp;nbsp; 2010-07-23 &amp;nbsp; &amp;nbsp;2010-07-30 Accepted&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;2 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; FRB &amp;nbsp; &amp;nbsp; &amp;nbsp; 340 &amp;nbsp; &amp;nbsp; 2010-08-04 &amp;nbsp; &amp;nbsp;2010-08-13 Accepted&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;3 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Duq News &amp;nbsp; &amp;nbsp; &amp;nbsp; 232 &amp;nbsp; &amp;nbsp; 2010-07-26 &amp;nbsp; &amp;nbsp;2010-08-03 Accepted&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;4 Visit the Lehigh Valley &amp;nbsp; &amp;nbsp; &amp;nbsp; 122 &amp;nbsp; &amp;nbsp; 2010-07-25 &amp;nbsp; &amp;nbsp;2010-08-02 Accepted&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;5 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; DeSales U &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 0 &amp;nbsp; &amp;nbsp; 2010-08-02 &amp;nbsp; &amp;nbsp;2010-08-09 Rejected&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;6 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Blender Buzz &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 0 &amp;nbsp; &amp;nbsp; 2010-07-26 &amp;nbsp; &amp;nbsp;2010-08-03 Rejected&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt; # Get the total downloads&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt; sum(df$Downloads)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;"&gt;&amp;gt; Do a plot of downloads&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;"&gt;&amp;gt; ggplot(data=df, aes(x=App, y=Downloads, fill=Response))&amp;nbsp;&lt;/span&gt;&lt;/div&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;+ geom_bar() + coord_flip() + theme_bw()&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt; ggsave('app_downloads.png')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt; ggplot(data=df, aes(x=App, y=as.numeric(Processing_Time), fill=Response)) + geom_bar() + coord_flip() + theme_bw()&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt; ggsave('processing_time.png')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt; # Cast the date columns as such&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt; df$Submitted_Date=as.Date(df$Submitted_Date)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt; df$Response_Date=as.Date(df$Response_Date)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt; # Find out total dates each application was on the market&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt; as.Date('2010-11-03') - min(df$Submitted_Date)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt; # Determine total number of days apps were being submitted for review&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;"&gt;&amp;gt; max(df$Response_Date) - min(df$Submitted_Date)&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;"&gt;&amp;gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;"&gt;&amp;gt; # Processing time at the app store&amp;nbsp;&lt;/span&gt;&lt;/div&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;gt; df$Processing_Time = df$Response_Date - df$Submitted_Date&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;gt; # Time each app has been available for download&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;gt; df$days_available = as.Date('2010-11-03') - df$Response_Date&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;gt;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;gt; # Downloads per day&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt; df$downloads_per_day = df$Downloads/as.numeric(df$days_available)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt; # Limit view to selected columns&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt;&amp;nbsp;df[c(1,7,8)]&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-6556038199488581490?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/6556038199488581490/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/11/iphone-app-store-acceptance-time.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/6556038199488581490'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/6556038199488581490'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/11/iphone-app-store-acceptance-time.html' title='iPhone App Store Acceptance Time / Download Results'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/_FsLa1cMTCWU/TNH66yq20fI/AAAAAAAAAgw/6pfNg0qbn8c/s72-c/r-iphone.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-4939968277579407625</id><published>2010-10-18T19:20:00.000-07:00</published><updated>2010-10-18T19:20:16.398-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><title type='text'>Hadley on a Postage Stamp?</title><content type='html'>&lt;img src="http://imgur.com/DrIlR.png" /&gt;&lt;br /&gt;&lt;br /&gt;Yet another reason to check out the &lt;a href="http://github.com/hadley/ggplot2/wiki/Crime-in-Downtown-Houston,-Texas-:-Combining-ggplot2-and-Google-Maps"&gt;ggplot2 wiki!&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-4939968277579407625?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/4939968277579407625/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/10/hadley-on-postage-stamp.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/4939968277579407625'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/4939968277579407625'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/10/hadley-on-postage-stamp.html' title='Hadley on a Postage Stamp?'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-7793013875051299367</id><published>2010-10-18T15:12:00.000-07:00</published><updated>2010-10-18T15:14:42.968-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><category scheme='http://www.blogger.com/atom/ns#' term='quantmod'/><category scheme='http://www.blogger.com/atom/ns#' term='Cramer'/><title type='text'>Cramer's Stock Pick Recommendations Analyzed (Part II)</title><content type='html'>&lt;table&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TLzFo4s-JuI/AAAAAAAAAeg/4y2xqNdiH1c/s1600/R_With_Jim_Cramer2.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;br /&gt;&lt;img border="0" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TLzFo4s-JuI/AAAAAAAAAeg/4y2xqNdiH1c/s1600/R_With_Jim_Cramer2.png" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;td&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&lt;/td&gt;&lt;td&gt;This is the second post (previous one&lt;a href="http://www.r-chart.com/2010/10/cramers-stock-pick-recommendations.html"&gt; here&lt;/a&gt;) that provides an analysis of Cramer's stock recommendations based upon the &lt;a href="http://www.thestreet.com/mad-money/index.html"&gt;Mad Money Stock Screener&lt;/a&gt; as of 10/15/2010.&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;&lt;b&gt;Recommendations by Segment&lt;/b&gt;&lt;br /&gt;As mentioned in the &lt;a href="http://www.r-chart.com/2010/10/cramers-stock-pick-recommendations.html"&gt;previous post&lt;/a&gt;, recommendations are referenced either by a number below or by name.&lt;br /&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;# &amp;nbsp; Description&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt; &lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;5 &amp;nbsp; Buy&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;4 &amp;nbsp; Positive&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;3 &amp;nbsp; Hold&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;2 &amp;nbsp; Negative&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;1 &amp;nbsp; Sell&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;br /&gt;Referencing calls by number is used to provide average scores and to plot results. &amp;nbsp;Also as noted previously, not all segments of the show are available through the Stock Screener.&lt;br /&gt;&lt;br /&gt;Calls made during the Lightning Round and Mail Bag have an average of less than 4 (positive). &amp;nbsp;Calls driven by questions by the audience have a lower average, while those in interviews are higher.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;b&gt;Segment &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Average Call&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;Discussed &amp;nbsp; &amp;nbsp; &amp;nbsp; 4.387156&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;Featured &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;4.351351&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;Interview &amp;nbsp; &amp;nbsp; &amp;nbsp; 4.895954&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;Lighting Round &amp;nbsp;3.764465&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;Mail Bag &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;3.605691&lt;/span&gt;&lt;br /&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLy75miX3cI/AAAAAAAAAd8/1XKHJUZ-uqM/s1600/cramer_average_call_by_segment.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLy75miX3cI/AAAAAAAAAd8/1XKHJUZ-uqM/s400/cramer_average_call_by_segment.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;b&gt;Calls Trail the Market&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;This is a bit more speculative a representation. &amp;nbsp;The S &amp;amp; P and Down Jones Industrial Average are used in this comparison. &amp;nbsp;The call (on a scale of 1 to 5) is multiplied by a factor so that a smoothed condition mean line is generated on the chart. &amp;nbsp;The factor is arbitrary, it just makes the line fit on the chart in a reasonable location, so the directionality of the call line is relevant - not the degree. &lt;br /&gt;&lt;br /&gt;The S &amp;amp; P appears as the red line and the average of the calls for each day (times a factor of 3000) appears in blue.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLy-6-SnKLI/AAAAAAAAAeE/Z9aFUrdv1xM/s1600/s_and_p_calls.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLy-6-SnKLI/AAAAAAAAAeE/Z9aFUrdv1xM/s400/s_and_p_calls.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;The DJI is multiplied by a factor of 3000 in the chart below.&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLy_sBcpCoI/AAAAAAAAAeI/BO62OLv610g/s1600/dji_calls.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLy_sBcpCoI/AAAAAAAAAeI/BO62OLv610g/s400/dji_calls.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;Although it is not completely clear, it appears that stock picks tend to trail the market movement. &amp;nbsp;For this reason they generally sound plausible. &amp;nbsp;Calls tend to be more pessimistic at the time the market has been moving down.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;5 of Cramer's Favorites&lt;/b&gt;&lt;br /&gt;A few of the stocks that Cramer has recommended in the last year qualify in a special way as "favorites." &amp;nbsp;They are the stocks that appear the most times in the data with only a buy recommendation.&lt;br /&gt;&lt;b&gt;&lt;b&gt;&lt;/b&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;b&gt;&lt;div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;div style="display: inline !important;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;/b&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;b&gt;&lt;/b&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;b&gt;&lt;div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;div style="display: inline !important;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;Wynn Resorts (WYNN) had a range of $27.00 between the lowest and highest 19 buy recommendations.&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;/b&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;b&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLzCmj_6l0I/AAAAAAAAAeM/c21g-67sr8U/s1600/jim_cramer_WYNN.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLzCmj_6l0I/AAAAAAAAAeM/c21g-67sr8U/s400/jim_cramer_WYNN.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp;&lt;/span&gt;&lt;/div&gt;&lt;/b&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;div style="display: inline !important;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;Weatherford Int'l (WFT)&amp;nbsp;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;div style="display: inline !important;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;had a range of&amp;nbsp;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;div style="display: inline !important;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;$6.18 between its 16 buy recommendations.&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TLzCs-f11DI/AAAAAAAAAeQ/Nb6UZLS9RQE/s1600/jim_cramer_WFT.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TLzCs-f11DI/AAAAAAAAAeQ/Nb6UZLS9RQE/s400/jim_cramer_WFT.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;b&gt;&lt;b&gt;&lt;b&gt;&lt;div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;b&gt;&lt;div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;div style="display: inline !important;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;NVIDIA (NVDA) &amp;nbsp;had range of 9.05 between its 15 buy recommendations&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLzCzuy17OI/AAAAAAAAAeU/kiOW-pLhocQ/s1600/jim_cramer_NVDA.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLzCzuy17OI/AAAAAAAAAeU/kiOW-pLhocQ/s400/jim_cramer_NVDA.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;b&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;div style="display: inline !important;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;Cypress Semiconductor (CY) had a range of 4.58 between its 13 buy recommendations.&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;/b&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLzC3adPLPI/AAAAAAAAAeY/-ZWlQWdDFbs/s1600/jim_cramer_CY.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLzC3adPLPI/AAAAAAAAAeY/-ZWlQWdDFbs/s400/jim_cramer_CY.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;b&gt;&lt;/b&gt;&lt;br /&gt;&lt;b&gt;&lt;div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;div style="display: inline !important;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;Teva Pharmaceutical (TEVA) had a 17.03range in its 12 buy recommendations.&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;/b&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TLzC9MColUI/AAAAAAAAAec/rirZeq5wsYk/s1600/jim_cramer_TEVA.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TLzC9MColUI/AAAAAAAAAec/rirZeq5wsYk/s400/jim_cramer_TEVA.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;Conclusion&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;We are bombarded with predictions and promises regularly in the news and media. Over time, I have grown more and more suspicious of the ability of individuals to consistently predict stock prices by simply having a superficial knowledge of current market motions and a general awareness of current financial news. &amp;nbsp;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt; &lt;/span&gt; &lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;b&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&amp;nbsp;I often think about verifying the claims, but often don't have the time... figured I would at least take a cursory look. &amp;nbsp;I hope this sort of thing becomes more common to keep the media honest. &amp;nbsp;&lt;b&gt;&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;b&gt;&lt;div style="display: inline !important;"&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;At least Jim Cramer's show is cast somewhat in the realm of entertainment. &amp;nbsp;Again, I'll refer you to &lt;a href="http://online.barrons.com/article/SB118681265755995100.html"&gt;Bill Alpert of Barrons&lt;/a&gt; who has done more extensive analysis and reporting on Cramer's recommendations. &amp;nbsp;&lt;/span&gt;&lt;/div&gt;&lt;/b&gt;&lt;/b&gt;&lt;/b&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-7793013875051299367?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/7793013875051299367/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/10/cramers-stock-pick-recommendations_18.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/7793013875051299367'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/7793013875051299367'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/10/cramers-stock-pick-recommendations_18.html' title='Cramer&apos;s Stock Pick Recommendations Analyzed (Part II)'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/_FsLa1cMTCWU/TLzFo4s-JuI/AAAAAAAAAeg/4y2xqNdiH1c/s72-c/R_With_Jim_Cramer2.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-2241698458412841038</id><published>2010-10-16T15:56:00.000-07:00</published><updated>2010-10-16T15:56:30.551-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><title type='text'>2010 ggplot2 Case Study Competition Winners</title><content type='html'>&lt;table&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td&gt;&lt;br /&gt;&lt;iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?lt1=_blank&amp;amp;bc1=000000&amp;amp;IS2=1&amp;amp;bg1=FFFFFF&amp;amp;fc1=000000&amp;amp;lc1=0000FF&amp;amp;t=rcha-20&amp;amp;o=1&amp;amp;p=8&amp;amp;l=as1&amp;amp;m=amazon&amp;amp;f=ifr&amp;amp;md=10FE9736YVPPT7A0FBG2&amp;amp;asins=0387981403" style="height: 240px; width: 120px;"&gt;&lt;/iframe&gt;&lt;/td&gt; &lt;td&gt;&lt;br /&gt;The 2010 ggplot2 Case Study Competition Winners &lt;a href="http://github.com/hadley/ggplot2/wiki"&gt;have been announced&lt;/a&gt;! &amp;nbsp;Congratulations to the winners!&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;ul&gt;&lt;li&gt;&lt;a href="http://github.com/hadley/ggplot2/wiki/Crime-in-Downtown-Houston,-Texas-:-Combining-ggplot2-and-Google-Maps"&gt;Grand Prize&lt;/a&gt;:&amp;nbsp;David Kahle, Rice University&lt;/li&gt;&lt;li&gt;&lt;a href="http://github.com/hadley/ggplot2/wiki/Mapping-electrical-activity-in-the-human-neocortex"&gt;Finalist&lt;/a&gt;:&amp;nbsp;Michael Lavine, UMass Amherst&amp;nbsp;&lt;/li&gt;&lt;li&gt;&lt;a href="http://github.com/hadley/ggplot2/wiki/Case-Study:-Raman-Spectroscopic-Grading-of-Gliomas"&gt;Finalist&lt;/a&gt;: Claudia Beleites, TU Dresden &amp;amp; Uni. Trieste&lt;/li&gt;&lt;/ul&gt;&lt;br /&gt;Check out their entries to get a glimpse of what is possible with R and ggplot2.&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-2241698458412841038?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/2241698458412841038/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/10/2010-ggplot2-case-study-competition.html#comment-form' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/2241698458412841038'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/2241698458412841038'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/10/2010-ggplot2-case-study-competition.html' title='2010 ggplot2 Case Study Competition Winners'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-1700837471508690785</id><published>2010-10-16T13:07:00.000-07:00</published><updated>2010-10-18T09:47:18.782-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><category scheme='http://www.blogger.com/atom/ns#' term='quantmod'/><category scheme='http://www.blogger.com/atom/ns#' term='Cramer'/><title type='text'>Cramer's Stock Pick Recommendations Analyzed</title><content type='html'>&lt;div class="MsoNormal"&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLx6DP7sw3I/AAAAAAAAAd4/rL18W-q7TTE/s1600/R_With_Jim_Cramer2.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLx6DP7sw3I/AAAAAAAAAd4/rL18W-q7TTE/s1600/R_With_Jim_Cramer2.png" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;Bill Alpert of Barron’s has demonstrated the use of R in financial journalism as he &lt;a href="http://online.barrons.com/article/SB118681265755995100.html"&gt;criticized&lt;/a&gt; the performance of &lt;a href="http://www.cnbc.com/id/15838187/"&gt;Jim Cramer’s&lt;/a&gt; stock picks.&amp;nbsp;&amp;nbsp;Patrick Burns was an &lt;a href="http://burns-stat.com/pages/Working/cramer_vs_pseudocramer.pdf"&gt;advisor for the analysis&lt;/a&gt; done in the article. &amp;nbsp;R was an important tool that allowed them to do their research as indicated by Burns in &lt;a href="http://burns-stat.com/pages/Working/cramer_vs_pseudocramer.pdf"&gt;his article&lt;/a&gt; and by Alpert in &lt;a href="http://www.flickr.com/photos/palewire/4427731028/"&gt;his presentation&lt;/a&gt;&amp;nbsp;and in an &lt;a href="http://cran.r-project.org/doc/Rnews/Rnews_2007-3.pdf"&gt;article&lt;/a&gt; published in R News. &amp;nbsp;Their specific critique were centered around obtaining objective verification of claims by CNBC that by following Cramer's advice, one could beat the S &amp;amp; P 500 index. &amp;nbsp; I recommend you to these sources if you are interested in a more comprehensive analysis.&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;The following is is based upon data available on the Mad Money Stock Screener as of&amp;nbsp;10/15/2010.&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;b&gt;Mad Money Stock Screener&lt;/b&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;Since Cramer's &lt;a href="http://www.thestreet.com/mad-money/index.html"&gt;Mad Money Stock Screener&lt;/a&gt; is available on line there is at least an "unofficial" group of Cramer's recommendations available to analyze. &amp;nbsp;It is apparent that the data is not complete, as several program segments in the drop down are not represented. &amp;nbsp;Selecting any of the following segments results in no records returned:&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;/div&gt;&lt;ul&gt;&lt;li&gt;Caller's Stock&lt;/li&gt;&lt;li&gt;Game Plan&lt;/li&gt;&lt;li&gt;Sudden Death&lt;/li&gt;&lt;/ul&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;In addition, the date provided includes month and day (but not year information). &amp;nbsp;It appears that data from one calendar year is available through the web site. &amp;nbsp;This was born out by plotting the price at the time of the recommendation on charts for individual equities.&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;b&gt;Always A Bull Market Somewhere&lt;/b&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;According to his profile, "Jim Cramer believes that there is always a bull market somewhere, and he wants to help you find it". &amp;nbsp;His optimistic, entertaining and confident approach that he exudes on screen is reflected in his history of stock picks. &amp;nbsp;Recommendations are indicated either by a number below or by name:&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;# &amp;nbsp; Description&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;5 &amp;nbsp; Buy&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;4 &amp;nbsp; Positive&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;3 &amp;nbsp; Hold&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;2 &amp;nbsp; Negative&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;1 &amp;nbsp; Sell&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;Cramer is functioning as an entertainer with financial knowledge. &amp;nbsp;The show is geared towards providing action oriented advice (buy/sell) and tends to be skewed towards positive actions. &amp;nbsp;This fits with his profile description - if there is always a bull market, there is always something to buy. &amp;nbsp;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;b&gt;Analysis of All Recommendations&lt;/b&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;The following jitter demonstrates that a 5 (Buy Recommendation) is most often given, and the smoothing indicates that Cramer is generally positive in his ratings.&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLnyy8EeU6I/AAAAAAAAAdY/uJMGEQY8qzU/s1600/jim_cramer_recommendations_by_segment_jitter.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLnyy8EeU6I/AAAAAAAAAdY/uJMGEQY8qzU/s400/jim_cramer_recommendations_by_segment_jitter.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;The program segment in use might also shed some light on the recommendations given. &amp;nbsp;As mentioned earlier, not every program segment is represented in the data.&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLnyQ4XpPTI/AAAAAAAAAdU/l7LQGWgllac/s1600/jim_cramer_recommendations_by_segment.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLnyQ4XpPTI/AAAAAAAAAdU/l7LQGWgllac/s400/jim_cramer_recommendations_by_segment.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;The specific totals represented above:&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &lt;b&gt;Buy &amp;nbsp; &amp;nbsp; Hold Negative Positive &amp;nbsp; &amp;nbsp; Sell&amp;nbsp;&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp;2336 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;7 &amp;nbsp; &amp;nbsp; &amp;nbsp;229 &amp;nbsp; &amp;nbsp; &amp;nbsp;422 &amp;nbsp; &amp;nbsp; &amp;nbsp;559&amp;nbsp;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;The vast majority of the time, Cramer gives a buy recommendation. &amp;nbsp;The second most often provided recommendation is to sell. &amp;nbsp;A clear "Buy" or "Sell" is certainly more entertaining to hear than a "Meh... &amp;nbsp;hold." &amp;nbsp;None of this in and of itself means that Cramer's ratings are bad or inferior to other sources. &amp;nbsp;It simply serves to illustrate that the program is geared towards entertainment. &amp;nbsp;At best, one might hope that only the clear winners are topics of conversation on the program. &amp;nbsp;However, further analysis at least calls this into question.&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;b&gt;Individual Stock Recommendations&lt;/b&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;Not every company has an equal number of recommendations. &amp;nbsp;The top 5 (in terms of total recommendations made) are Apple, Citigroup, Intel, Bank of America and Ford Motor Company. &amp;nbsp;In order to get a sense of the a given&amp;nbsp;recommendation&amp;nbsp;in the scope of wider market history, the data from the stock screener can be superimposed on a stock chart.&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;b&gt;Apple&lt;/b&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLn5bzqcKKI/AAAAAAAAAdc/7Nuoa1YyX9c/s1600/jim_cramer_AAPL.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLn5bzqcKKI/AAAAAAAAAdc/7Nuoa1YyX9c/s400/jim_cramer_AAPL.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;Apple was given a&amp;nbsp;recommendation&amp;nbsp;on 90&amp;nbsp;occasions (more than twice times as many than the next most popular companies recommended). The&amp;nbsp;average recommendation was 4.933333 and Cramer recommended Buy 84 times and Positive 6 times. &amp;nbsp;The clear upward trend in Apple's price justifies an optimistic view in recent history. &amp;nbsp;None of the remaining stocks in the top 5 recommended fits this pattern though.&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;b&gt;Citigroup&lt;/b&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TLn61ZxthTI/AAAAAAAAAdg/YaglqDPl2BI/s1600/jim_cramer_C.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TLn61ZxthTI/AAAAAAAAAdg/YaglqDPl2BI/s400/jim_cramer_C.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;Citigroup&amp;nbsp;was given a&amp;nbsp;recommendation&amp;nbsp;on 40&amp;nbsp;occasions and had&amp;nbsp;an average recommendation of&amp;nbsp;4.850000, Cramer recommended Buy 36 times and Positive 3 times and Negative 1 time. &amp;nbsp;The negative rating was on the 12/08/2009 Lighting Round. &amp;nbsp;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;b&gt;Intel&lt;/b&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TLn-xQJ39lI/AAAAAAAAAdo/EDA97hkhfjo/s1600/jim_cramer__INTC.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TLn-xQJ39lI/AAAAAAAAAdo/EDA97hkhfjo/s400/jim_cramer__INTC.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;Intel was tied for second place with Citigroup with 40 recommendations. &amp;nbsp;It had an average recommendation of 4.900000. &amp;nbsp;He recommended&amp;nbsp;Buy on 36 occasions and&amp;nbsp;Positive on 4 occasions.&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;&lt;b&gt;Bank of America&lt;/b&gt;&amp;nbsp;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLoAI0ZrHHI/AAAAAAAAAds/QWPm5G8iBrw/s1600/jim_cramer_BAC.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLoAI0ZrHHI/AAAAAAAAAds/QWPm5G8iBrw/s400/jim_cramer_BAC.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;Bank of America had an average of 4.820513 in the 39 times it was recommended. &amp;nbsp;It was given a Buy recommendation 34 times, a Positive 4 times and a negative once (During the 11/24/2009&amp;nbsp;Lighting Round).&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;b&gt;Ford Motor&amp;nbsp;&lt;/b&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;Ford Motor Company was given an average of 4.777778 in the 36 times it was recommended (a Buy 28 times and a Positive 6 times).&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;b&gt;A Negative Example&lt;/b&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;One additional stock that is of interest is British&amp;nbsp;Petroleum&amp;nbsp;(BP), which had an rough ride this year due to the &lt;a href="http://www.r-chart.com/2010/06/plotting-bp-oil-spill-testing-data.html"&gt;Deepwater Horizon Oil Spill&lt;/a&gt;. &amp;nbsp;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLoCljdYhcI/AAAAAAAAAdw/FXa-htDiB5E/s1600/jim_cramer_BP.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLoCljdYhcI/AAAAAAAAAdw/FXa-htDiB5E/s400/jim_cramer_BP.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;Cramer issued a Sell&amp;nbsp;recommendation&amp;nbsp;on 10 occasions, a Positive on 1 occasion and a Buy on 6 Occasions (a total of 17 recommendations).&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;b&gt;Conclusion&lt;/b&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;I'll leave it to you to draw your own conclusions about how to interpret recommendations given by Jim Cramer. &amp;nbsp;For myself, I find him entertaining and well informed about financial news for a wide range of stocks. &amp;nbsp;He also has the track record as a successful hedge fund manager over the course of a number of years. &amp;nbsp;However, I am skeptical about the ability of analysts to&amp;nbsp;consistently&amp;nbsp;predict the direction of the market. &amp;nbsp;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;Depending upon the reception of this post, I can provide additional information about the methods used to obtain the data and create the charts above and also show how other stocks recommended on Mad Money have performed. &amp;nbsp;Let me know if you have any interest in the comments.&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-1700837471508690785?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/1700837471508690785/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/10/cramers-stock-pick-recommendations.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/1700837471508690785'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/1700837471508690785'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/10/cramers-stock-pick-recommendations.html' title='Cramer&apos;s Stock Pick Recommendations Analyzed'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/_FsLa1cMTCWU/TLx6DP7sw3I/AAAAAAAAAd4/rL18W-q7TTE/s72-c/R_With_Jim_Cramer2.png' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-2838026290767710922</id><published>2010-10-16T06:46:00.000-07:00</published><updated>2010-10-16T10:13:32.402-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='fractals'/><title type='text'>Benoit Mandelbrot (the Father of Fractals) dead at 85</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://upload.wikimedia.org/wikipedia/commons/thumb/f/fc/Mandel_zoom_08_satellite_antenna.jpg/800px-Mandel_zoom_08_satellite_antenna.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="240" src="http://upload.wikimedia.org/wikipedia/commons/thumb/f/fc/Mandel_zoom_08_satellite_antenna.jpg/800px-Mandel_zoom_08_satellite_antenna.jpg" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.fooledbyrandomness.com/"&gt;Nicholas Taleb&lt;/a&gt;&amp;nbsp;and&amp;nbsp;&lt;a href="http://twitter.com/#search?q=benoit%20mandelbrot"&gt;Recent tweets&lt;/a&gt;&amp;nbsp;indicate that &lt;a href="http://kottke.org/10/10/benoit-mandelbrot-rip"&gt;Benoit Mandelbrot has died&lt;/a&gt; at age 85. &amp;nbsp;Mandelbrot was a French and American mathematician, best known as the father of fractal geometry. &amp;nbsp;His official &lt;a href="http://www.math.yale.edu/mandelbrot/"&gt;biography at Yale&lt;/a&gt;&amp;nbsp;does not yet reflect this (it was last updated in March 2010) and his &lt;a href="http://en.wikipedia.org/wiki/Beno%C3%AEt_Mandelbrot"&gt;Wikipedia page&lt;/a&gt; is protected from editing&amp;nbsp;for the moment.&lt;br /&gt;&lt;br /&gt;UPDATE: &amp;nbsp;The &lt;a href="http://www.nytimes.com/2010/10/17/us/17mandelbrot.html?_r=2"&gt;New York Times&lt;/a&gt; has confirmed this.&lt;br /&gt;&lt;br /&gt;He is known for&amp;nbsp;changing the way researchers perceive and characterize the phenomenon of natural growth and for the implications for his work for scientists and mathematicians. &amp;nbsp;But fractals have become iconic to the masses as expressive of the idea that beauty and mathematics are inextricably related. &amp;nbsp;As I &lt;a href="http://www.r-chart.com/2010/08/fractals-in-r.html"&gt;mentioned&amp;nbsp;elsewhere&lt;/a&gt;&amp;nbsp;my brother and I were inspired at an early age by the wonderful designs that could be created by simple mathematical equations. &lt;br /&gt;&lt;br /&gt;He will be missed.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-2838026290767710922?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/2838026290767710922/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/10/benoit-mandelbrot-father-of-fractals.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/2838026290767710922'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/2838026290767710922'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/10/benoit-mandelbrot-father-of-fractals.html' title='Benoit Mandelbrot (the Father of Fractals) dead at 85'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-487357515084765504</id><published>2010-10-06T08:45:00.000-07:00</published><updated>2010-10-06T08:45:15.207-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><category scheme='http://www.blogger.com/atom/ns#' term='fitness'/><title type='text'>Belgian Astronomers and Exercise Machines</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TKyZWjjRgxI/AAAAAAAAAdQ/llS8uX5lGeQ/s1600/BMI.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TKyZWjjRgxI/AAAAAAAAAdQ/llS8uX5lGeQ/s400/BMI.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;In the twisting paths of human discovery, you never quite know what intellectual enterprise is going to result in a world changing discovery. &amp;nbsp;For instance, the mathematical notion of &lt;a href="http://en.wikipedia.org/wiki/Expected_value"&gt;expected value&lt;/a&gt; did not grow up in a sterile, academic environment. &amp;nbsp;&amp;nbsp;In 1654 Blaise Pascal was approached by Chevalier de Méré who was interested in gambling problems. &amp;nbsp;Pascal corresponded with Fermat and thus the mathematical &lt;a href="http://en.wikipedia.org/wiki/Theory_of_probability"&gt;theory of probabilities&lt;/a&gt; was born. &lt;br /&gt;&lt;br /&gt;In recent days reports on economic upheaval have often cast financial industries as institutions based upon greed and power that contribute nothing of value to society. &amp;nbsp;Defenders of the free market are quick to respond with the immediate economic benefits provided by such institutions as they mitigate risk and serve as "middle men" in modern financial markets. &amp;nbsp;What is seldom considered is that discoveries in one area often find application in a separate area of life that was never considered during the initial investigation. &amp;nbsp;And so one day, perhaps Wall Street calculations might be put to non-financial use that benefit mankind in other ways. &amp;nbsp;There is historical precedent. &amp;nbsp;For instance, interests in the insurance industry served to popularize and apply a 19th century Belgian calculation in a manner that is now used on modern exercise machines. &lt;br /&gt;&lt;br /&gt;&lt;b&gt;A Belgian Astronomer: Adolphe Quetelet&lt;/b&gt;&lt;br /&gt;&lt;a href="http://ndt.oxfordjournals.org/content/23/1/47.full"&gt;Adolphe Quetelet&lt;/a&gt; (1796–1874) was a Belgian mathematician, astronomer and statistician. &amp;nbsp;While studying astronomical activities in Paris he interacted with Joseph Fourier (1768–1830), Siméon Poisson (1781–1840) and Pierre Laplace (1749–1827). &amp;nbsp; He went on to put his new found appreciation of probability to practical use in the study of the human body (a subject he had initially approached as a painter and sculptor). &amp;nbsp;One calculation he created, dubbed the Quetelet Index, is a number that expresses a relationship between a person's height and weight. &amp;nbsp;Quetelet was not specifically interested in the use of his index for health purposes, but simply for defining the characteristics of "normal" or &lt;a href="http://ndt.oxfordjournals.org/content/23/1/47.full"&gt;"average" man&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;The Financial Industry&lt;/b&gt;&lt;br /&gt;In the mid 20th century actuaries observed increased mortality in overweight policyholders. &amp;nbsp;And so in an effort to construct more accurate mortality tables the relationship between weight and cardiovascular disease became the subject of epidemiological studies. &amp;nbsp;Weight tables were first used to predict life expectancy as far back as 1913. &amp;nbsp;But tables of ideal or desirable weight were developed by the Metropolitan Life Insurance Company in the 1940's. &amp;nbsp;In the 1960s, a small group insurance industry experts began to use the Quetelet Index. &amp;nbsp;But it remained for a an American scientist to perform a comparative study of available indices and rename the Quetelet Index to the form that we know it today where it has become a subject related to &lt;a href="http://profiles.nlm.nih.gov/NN/B/C/Q/W/_/nnbcqw.ocr"&gt;health and nutrition&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;An American Oceanographer, Biologist, and Physiologist&lt;/b&gt;&lt;br /&gt;&lt;a href="http://en.wikipedia.org/wiki/Ancel_Keys"&gt;Ancel Benjamin Keys&lt;/a&gt; was a scientist who wrote an article for the July 1972 "Journal of Chronic Diseases" that &lt;a href="http://www.ncbi.nlm.nih.gov/pubmed/17890752"&gt;coined the phrase "body mass index"&lt;/a&gt; or &lt;a href="http://en.wikipedia.org/wiki/Body_mass_index"&gt;BMI&lt;/a&gt; as a modern designation for the Quetelet Index. &amp;nbsp;Interestingly enough, Keys early studies culminated in a B.A. in economics and political science in 1925. &amp;nbsp;His first Ph.D. was in oceanography and biology but his later work was related to his second Ph.D. focused on physiology. &amp;nbsp;He is best known for two dietary contributions - the &lt;a href="http://en.wikipedia.org/wiki/K-ration"&gt;K-Ration&lt;/a&gt; and the &lt;a href="http://en.wikipedia.org/wiki/Mediterranean_diet"&gt;Mediterranean Diet&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;Keys (and &lt;a href="http://www.slate.com/id/2223095/"&gt;others today&lt;/a&gt;) considered the tendency in the insurance industry to equate relative body weight with excess risk of death to be &lt;a href="http://mbbnet.umn.edu/firsts/blackburn_h.html"&gt;somewhat simplistic&lt;/a&gt;. &amp;nbsp;There is worldwide variation according to diet and physical activity habits. In most industrial countries people in the middle range of body weight are healthier than those at an extreme.&lt;br /&gt;&lt;br /&gt;Because of these types of concerns, BMI is often considered along with other concerns that can indicate potential health risks. &amp;nbsp;&lt;a href="http://www.win.niddk.nih.gov/publications/tools.htm"&gt;Specifically&lt;/a&gt;:&lt;br /&gt;&lt;br /&gt;&lt;ul&gt;&lt;li&gt;A BMI in the overweight category along with certain diseases&amp;nbsp;&lt;/li&gt;&lt;li&gt;A BMI of less than 25 and a waist size above the standard (35 for women or 40 for men)&amp;nbsp;&lt;/li&gt;&lt;/ul&gt;&lt;br /&gt;The actual BMI ranges considered healthy or at risk are still being debated. &amp;nbsp;In 1998, the U.S. National Institutes of Health changed U.S. definition of normal from 27.8 to 25 to conform to &lt;a href="http://apps.who.int/bmi/index.jsp?introPage=intro_3.html"&gt;World Health Organization Standards&lt;/a&gt;. &amp;nbsp;In addition, other countries in the world are encouraging the upper limit for BMI to be even lower than 25.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;BMI and R&lt;/b&gt;&lt;br /&gt;One’s optimal weight can be derived using the BMI and height as follows:&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;optimal_weight = function (height, bmi){&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;round((height**2 * bmi) / 703)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;}&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;A grid, similar to one found &lt;a href="http://www.nhlbi.nih.gov/guidelines/obesity/bmi_tbl.htm"&gt;here&lt;/a&gt;&amp;nbsp;and the chart above can be constructed a &lt;a href="http://github.com/ezgraphs/R-Programs/blob/master/bmi.R"&gt;script found at GitHub&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;For example:&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;gt; create_bmi_dataframe(bmi_end=30)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp;19 &amp;nbsp;20 &amp;nbsp;21 &amp;nbsp;22 &amp;nbsp;23 &amp;nbsp;24 &amp;nbsp;25 &amp;nbsp;26 &amp;nbsp;27 &amp;nbsp;28 &amp;nbsp;29 &amp;nbsp;30&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;60 &amp;nbsp;97 102 108 113 118 123 128 133 138 143 149 154&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;61 101 106 111 116 122 127 132 138 143 148 153 159&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;62 104 109 115 120 126 131 137 142 148 153 159 164&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;63 107 113 119 124 130 135 141 147 152 158 164 169&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;64 111 117 122 128 134 140 146 151 157 163 169 175&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;65 114 120 126 132 138 144 150 156 162 168 174 180&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;66 118 124 130 136 143 149 155 161 167 173 180 186&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;67 121 128 134 140 147 153 160 166 172 179 185 192&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;68 125 132 138 145 151 158 164 171 178 184 191 197&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;69 129 135 142 149 156 163 169 176 183 190 196 203&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;70 132 139 146 153 160 167 174 181 188 195 202 209&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;71 136 143 151 158 165 172 179 186 194 201 208 215&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;72 140 147 155 162 170 177 184 192 199 206 214 221&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;73 144 152 159 167 174 182 190 197 205 212 220 227&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;74 148 156 164 171 179 187 195 203 210 218 226 234&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;75 152 160 168 176 184 192 200 208 216 224 232 240&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;76 156 164 173 181 189 197 205 214 222 230 238 246&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;77 160 169 177 186 194 202 211 219 228 236 245 253&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;78 164 173 182 190 199 208 216 225 234 242 251 260&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;79 169 178 186 195 204 213 222 231 240 249 257 266&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;80 173 182 191 200 209 218 228 237 246 255 264 273&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;There is some variation with the government site – perhaps related to rounding.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-487357515084765504?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/487357515084765504/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/10/belgian-astronomers-and-exercise.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/487357515084765504'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/487357515084765504'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/10/belgian-astronomers-and-exercise.html' title='Belgian Astronomers and Exercise Machines'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/_FsLa1cMTCWU/TKyZWjjRgxI/AAAAAAAAAdQ/llS8uX5lGeQ/s72-c/BMI.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-571095303318431682</id><published>2010-10-04T16:10:00.000-07:00</published><updated>2010-10-04T16:10:48.086-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><category scheme='http://www.blogger.com/atom/ns#' term='Ruby'/><category scheme='http://www.blogger.com/atom/ns#' term='fitness'/><title type='text'>Max Heart Rate Calculations Compared</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TKpesmcsmpI/AAAAAAAAAdM/Xw11_Hgr95c/s1600/R_Heart.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TKpesmcsmpI/AAAAAAAAAdM/Xw11_Hgr95c/s1600/R_Heart.png" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;Physical fitness has become increasingly technical and data driven. &amp;nbsp;I started running a bit in the last few months and have been delving into the prevailing wisdom related to assessing ones health as a baseline for pursuing various fitness goals. &amp;nbsp;Some of the terms related to tracking a heart rate gave me visions of white lab coats, cardiac monitors and sophisticated formulas based upon years of scientific analysis. &amp;nbsp;And while there may be truth to this, the practical reality is quite a bit simpler.&lt;br /&gt;&lt;div style="text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TKpYJ4Lg1OI/AAAAAAAAAc8/6rmE4n7hdWQ/s1600/max_heart_rate_calculation_methods.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="319" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TKpYJ4Lg1OI/AAAAAAAAAc8/6rmE4n7hdWQ/s320/max_heart_rate_calculation_methods.png" style="cursor: move;" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;In many workout routines, a &lt;a href="http://www.americanheart.org/presenter.jhtml?identifier=4736"&gt;target heart rate&lt;/a&gt; is&amp;nbsp;calculated which is supposed to identify a range (usually in beats per minute) during an exercise routine&amp;nbsp;that will provide optimal cardiovascular value. &amp;nbsp;The basic idea is that you want a work out that is rigorous enough to derive a benefit from the exercise without harming your body. &amp;nbsp;It is appealing in that it provides an objective measure to evaluate your workout. &amp;nbsp;And once your workout can be measured, it is possible to set goals and work to improve your heath.&lt;br /&gt;&lt;br /&gt;What is implied in the idea of a target heart rate is that there is some upper limit that cannot safely be exceeded. &amp;nbsp;You might think that you need to be hooked up to a bunch of cardiac sensors to find out this value - and although this might be optimal, it is not the technique used by most folks. &amp;nbsp;Instead, there are relatively simple formulas that are used to calculate a maximum heart rate for an individual. &amp;nbsp;They are usually based only upon age (although some calculations consider gender as well).&lt;br /&gt;&lt;br /&gt;&lt;table&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td&gt;&lt;b&gt;Maximum Heart Rate Calculations&lt;/b&gt;&lt;br /&gt;&lt;a href="http://en.wikipedia.org/wiki/Heart_rate"&gt;Various formulas&lt;/a&gt;&amp;nbsp;(most of them simple linear formulas at that) have been devised to estimate individual Maximum Heart Rates. &amp;nbsp;However actual maximum heart rates vary significantly&amp;nbsp;between individuals based upon physiology, physical fitness and other factors&amp;nbsp;so the value of the metric is disputed. &amp;nbsp;Nevertheless, I was interested in comparing the available formulas to get a sense of a range (based upon "ensembling" if you will) of what is being reported or suggested by health sites, software and machines that use this value. &lt;br /&gt;&lt;br /&gt;One of the gizmos I have begun using is the Garmin GPS with heart rate monitor. &amp;nbsp;I am impressed with its performance so far. &amp;nbsp;It includes its own software that does most of the types of data aggregation and summary that you would like - but I look forward to geeking out and seeing what can be done with the data in R in later posts.&lt;/td&gt;&lt;td&gt;&lt;br /&gt;&lt;iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?lt1=_blank&amp;amp;bc1=000000&amp;amp;IS2=1&amp;amp;bg1=FFFFFF&amp;amp;fc1=000000&amp;amp;lc1=0000FF&amp;amp;t=rcha-20&amp;amp;o=1&amp;amp;p=8&amp;amp;l=as1&amp;amp;m=amazon&amp;amp;f=ifr&amp;amp;md=10FE9736YVPPT7A0FBG2&amp;amp;asins=B000CSWCQA" style="height: 240px; width: 120px;"&gt;&lt;/iframe&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;&lt;b&gt;Method&lt;/b&gt;&lt;br /&gt;A &lt;a href="http://github.com/ezgraphs/Ruby-Scripts/blob/master/heart_rate_max.rb"&gt;ruby script&lt;/a&gt;&amp;nbsp;was used to create a semicolon delimited file with the maximum heart rate from ages 18 through 90 for various calculation methods described in the &lt;a href="http://en.wikipedia.org/wiki/Heart_rate"&gt;Wikipedia article&lt;/a&gt;. &amp;nbsp;The &lt;a href="http://github.com/ezgraphs/R-Programs/blob/master/heart_rate_max.csv"&gt;resulting data&lt;/a&gt;&amp;nbsp;can be read into an &lt;a href="http://github.com/ezgraphs/R-Programs/blob/master/heart_rate_max.R"&gt;R script&lt;/a&gt; to produce the charts in this blog. &lt;br /&gt;&lt;br /&gt;&amp;nbsp;A summary that combines the calculations combined does not make a whole lot of sense since two of the calculations in use are for women only and one is for men only. &amp;nbsp;However, all of the techniques fit within a relatively narrow range (since we human beings aren't quite that random). &amp;nbsp;Besides, the two calculations for women are among the most divergent presented, and so cancel each other out in part (though they probably pull down the average for they younger and older ends of the spectrum).&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TKpZpS0CO2I/AAAAAAAAAdE/Rn-gUpVBtUE/s1600/all_methods_smoothed.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="319" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TKpZpS0CO2I/AAAAAAAAAdE/Rn-gUpVBtUE/s320/all_methods_smoothed.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;This average is included in the chart below - which is easier to see if you generate it yourself and stretch it to a size suitable for your monitor.&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TKpZpS0CO2I/AAAAAAAAAdE/Rn-gUpVBtUE/s1600/all_methods_smoothed.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;/a&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TKpZmegkceI/AAAAAAAAAdA/BV9tWLzRVc4/s1600/all_methods_smoothed_points.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="319" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TKpZmegkceI/AAAAAAAAAdA/BV9tWLzRVc4/s320/all_methods_smoothed_points.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;The only input value considered in calculations is gender - two of which are specific to women and one for men. &amp;nbsp;It seems that the most popular calculations don't bother with gender anyway.&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TKpZqxjKanI/AAAAAAAAAdI/sE1y18LsPSc/s1600/women_heart_rate_max.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="319" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TKpZqxjKanI/AAAAAAAAAdI/sE1y18LsPSc/s320/women_heart_rate_max.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;There are a number of possibilities for using R with fitness devices that provide heart rate information, geographic data, time, distance, caloric intake and consumption, etc. &amp;nbsp;I was was not able to find much in the way of open source fitness related calculation software APIs &amp;nbsp;- so this could be an new area for R developers to address. &amp;nbsp;(It also provides some balance to the relatively sedentary life of developing and blogging).&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-571095303318431682?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/571095303318431682/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/10/max-heart-rate-calculations-compared.html#comment-form' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/571095303318431682'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/571095303318431682'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/10/max-heart-rate-calculations-compared.html' title='Max Heart Rate Calculations Compared'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/_FsLa1cMTCWU/TKpesmcsmpI/AAAAAAAAAdM/Xw11_Hgr95c/s72-c/R_Heart.png' height='72' width='72'/><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-5372288810399868062</id><published>2010-09-24T16:10:00.000-07:00</published><updated>2010-09-24T16:20:13.911-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Data Preparation'/><title type='text'>Find Duplicate Records in a File</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;span class="Apple-style-span" style="-webkit-border-horizontal-spacing: 2px; -webkit-border-vertical-spacing: 2px;"&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TJ0xg1VG4QI/AAAAAAAAAcc/qLZWC4C3-m4/s1600/R_duplicate_record.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TJ0xg1VG4QI/AAAAAAAAAcc/qLZWC4C3-m4/s1600/R_duplicate_record.PNG" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;In the world of data preparation a common task is to identify duplicate records in a file or data set. &amp;nbsp;A few years ago, I did most development work in Java, and shudder to think of the amount of code required to accomplish this sort of task. &amp;nbsp;Since that time I been involved in many projects that did not require programming in a specific language, but simply "getting the job done." &amp;nbsp;With that in mind, "removal of duplicate records in a file" can be construed as manipulating a data set rather than an exercise in file processing. &amp;nbsp;The following shows how R compares with other technologies when performing this task.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;A file named "file.txt" containing semicolon delimited records appears in the examples below:&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;1;abc;123;etc&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;1234;qwer;4321;etc&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;1235;asdf;4341;etc&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;1;abc;123;etc&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;1234;qwer;5555;etc&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Identifying rows in which every field is duplicated is relatively straight forward using Unix utilities. &amp;nbsp;The file can be outputted using the cat (concatenate) command, the results sorted, and a unique list of results (prefaced by the count of occurrences in the file) can be filtered by a regular expression that indicates any row that has a number of occurrences not equal to one.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&amp;nbsp;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;cat file.txt | sort | uniq -c | grep "^ &amp;nbsp; [^1 ]"&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Often, it is more concise to speak Unix than English. &amp;nbsp;This starts to break down a bit when considering each line in the file as a record with distinguishable fields.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;General purpose programming languages can do the same thing, but are a bit more verbose. &amp;nbsp;In ruby, the file can be read into an array of arrays.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&amp;nbsp;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;a=File.open('file.txt').readlines.map{|l|l.split(';')}&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;A list of unique rows can be outputted using the following one liner:&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;a.uniq.each{|l|puts l.join(';')}&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;And with a bit more effort, you can write a program that will filter the results as needed. &amp;nbsp;This type of processing can also be done declaratively in R where such a file is read in as a data frame.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df=read.csv('file.txt',sep=';',header=FALSE)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; unique(df)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;The duplicated function can also be used to identify the single row that is duplicated.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; df[duplicated(df),]&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;That is to say, the all fields in all records in the following data frames are equal.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; unique(df)==df[!duplicated(df),]&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;The situation gets a bit more complicated when you want to only use some of the delimited fields to identify duplicate records. &amp;nbsp;In the data set above, rows 1 and 4 are identical. &amp;nbsp;Consider the requirement to recognize lines 2 and 5 as identical (due to the first two fields matching). &amp;nbsp;In Unix, you could use awk with the -F option to process the delimited fields. &amp;nbsp;In ruby you could store key fields in variables and compare them with each row. &amp;nbsp;If you come from the SQL world, you could use the R sqldf package to treat the data frame as a database table.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; sqldf('select * from df group by V1, V2 having count(*) &amp;gt;1')&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;The R way of getting this information is to identify the indices of the duplicated rows.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; df[duplicated(df[c(1,2)]),]&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;I enjoy looking at the overlapping aspects of programming languages and utilities (like &lt;a href="http://www.oracle.com/technology/pub/articles/saternos-filtering.html"&gt;this OTN Article&lt;/a&gt; from a few years ago). &amp;nbsp; It is helpful for highlighting the right tool for the right job, and aids in communication with other technical professionals. &amp;nbsp;&lt;/div&gt;&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-5372288810399868062?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/5372288810399868062/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/09/find-duplicate-records-in-file.html#comment-form' title='7 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/5372288810399868062'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/5372288810399868062'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/09/find-duplicate-records-in-file.html' title='Find Duplicate Records in a File'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/_FsLa1cMTCWU/TJ0xg1VG4QI/AAAAAAAAAcc/qLZWC4C3-m4/s72-c/R_duplicate_record.PNG' height='72' width='72'/><thr:total>7</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-9014193283880751967</id><published>2010-09-22T16:19:00.000-07:00</published><updated>2010-09-22T16:19:28.493-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='World Bank Data'/><title type='text'>New World Bank Data Available</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TJqOTzDi7GI/AAAAAAAAAcM/_GAp7GAkwEA/s1600/totalpopulation.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="319" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TJqOTzDi7GI/AAAAAAAAAcM/_GAp7GAkwEA/s320/totalpopulation.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;Just announced: &amp;nbsp;World Bank Data &lt;a href="http://data.worldbank.org/news/new-features"&gt;features and data&lt;/a&gt; are available. &amp;nbsp;&lt;a href="http://www.r-chart.com/search/label/World%20Bank%20Data"&gt;Previous posts&lt;/a&gt; have demonstrated how to access and plot this data using R (including the use of the R &lt;a href="http://cran.r-project.org/web/packages/WDI/index.html"&gt;WDI package&lt;/a&gt;). &amp;nbsp;The chart above can be created using the following program in R.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="color: #333333; line-height: 18px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;library(ggplot2)&lt;br /&gt;library(WDI)&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="color: #333333; line-height: 18px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;DF &amp;lt;- WDI(country=c("US","FR","DE","GB","CN","RU","IN"), indicator="SP.POP.TOTL", start=1990, end=2008)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;ggplot(DF, aes(year, SP.POP.TOTL,&amp;nbsp;ggplot(DF, aes(year, SP.POP.TOTL, color=country))+geom_line(stat="identity")+theme_bw()+xlab("Year")+opts(title="Total Population")+ylab("")&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="color: #333333; font-family: 'Courier New', Courier, monospace; font-size: small; line-height: 18px;"&gt;&lt;span class="Apple-style-span" style="color: black; font-family: 'Times New Roman'; font-size: medium; line-height: normal;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-9014193283880751967?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/9014193283880751967/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/09/new-world-bank-data-available.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/9014193283880751967'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/9014193283880751967'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/09/new-world-bank-data-available.html' title='New World Bank Data Available'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/_FsLa1cMTCWU/TJqOTzDi7GI/AAAAAAAAAcM/_GAp7GAkwEA/s72-c/totalpopulation.png' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-718438522319707387</id><published>2010-09-18T11:29:00.000-07:00</published><updated>2010-09-18T11:29:32.263-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Conferences'/><title type='text'>Elder Research Two Day Course</title><content type='html'>... or what I did on my summer vacation...&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.blogger.com/"&gt;&lt;/a&gt;&lt;span id="goog_183505738"&gt;&lt;/span&gt;&lt;span id="goog_183505739"&gt;&lt;/span&gt;Just got back from the &lt;a href="http://www.datamininglab.com/Training/Annual2DayCourse/tabid/66/Default.aspx"&gt;Elder Research Two Day Course&lt;/a&gt;&amp;nbsp;"&lt;a href="http://www.datamininglab.com/Training/Annual2DayCourse/tabid/66/Default.aspx"&gt;Tools for Discovering Patterns in Data&lt;/a&gt;". &amp;nbsp;It was a great course that (while not R specific) provides a great overview of Data Mining tools and techniques and insight into current applications in a wide variety of industries. &amp;nbsp; &lt;br /&gt;&lt;br /&gt;&lt;table&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td&gt;Dr. Elder is a coauthor of a book available online (and provided with the course) called "Handbook of Statistical Data Analysis and Data Mining Applications." &amp;nbsp;This book&amp;nbsp;contains a wealth of practical examples and tutorials (most using the Statsoft Statistica software). &amp;nbsp;It has a decidedly practical emphasis that allows you to see how algorithms are used to discern patterns in the data and to evaluate and compare how effective they are with specific data sets. &amp;nbsp;Functional areas covered in the tutorials include aviation safety, movie box office receipts, customer services, credit scoring, automobile brand review, quality control, business administration in a medical industry, psychological evaluation, dentistry and profit analysis. &amp;nbsp;This is very helpful for those who prefer to work from the concrete to the general (rather than being provided mathematical abstractions that you then apply to specific situations). &amp;nbsp;They might also be helpful for showing a business user why data mining matters and what value it brings to a business or organization.&lt;/td&gt;&lt;td&gt;&lt;iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?lt1=_blank&amp;amp;bc1=000000&amp;amp;IS2=1&amp;amp;bg1=FFFFFF&amp;amp;fc1=000000&amp;amp;lc1=0000FF&amp;amp;t=rcha-20&amp;amp;o=1&amp;amp;p=8&amp;amp;l=as1&amp;amp;m=amazon&amp;amp;f=ifr&amp;amp;md=10FE9736YVPPT7A0FBG2&amp;amp;asins=0123747651" style="height: 240px; width: 120px;"&gt;&lt;/iframe&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;&lt;table&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td&gt;&lt;br /&gt;&lt;iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?lt1=_blank&amp;amp;bc1=000000&amp;amp;IS2=1&amp;amp;bg1=FFFFFF&amp;amp;fc1=000000&amp;amp;lc1=0000FF&amp;amp;t=rcha-20&amp;amp;o=1&amp;amp;p=8&amp;amp;l=as1&amp;amp;m=amazon&amp;amp;f=ifr&amp;amp;md=10FE9736YVPPT7A0FBG2&amp;amp;asins=0321321367" style="height: 240px; width: 120px;"&gt;&lt;/iframe&gt;&lt;/td&gt;&lt;td&gt;&lt;br /&gt;The conference covered many of the same topics discussed in Introduction to Data Mining by Tan, Steinbach and Kumar. &amp;nbsp;However, there were many more concrete examples and applications of techniques in specific areas of finance, industry, government and education. &amp;nbsp;A section of the book on ensemble methods is included in a larger section simply titled "Classification: Alternative Techniques". &amp;nbsp;Dr. Elder went into greater detail on these topics and demonstrated the effectiveness of combining multiple models into a single model that is usually more accurate than the best of the individual component classifiers. &amp;nbsp;It seems that different classifiers "see" certain parts of data sets better than others, and that combining classifiers results in a final analysis in which the best (most accurate) elements of each classification are retained while the worst aspects are largely ignored. &amp;nbsp;By combining classifiers and manipulating the training set and input features a more accurate final model can be obtained.&amp;nbsp;&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;&lt;table&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td&gt;&lt;br /&gt;More detail about ensemble methods is available in another book coauthored by Dr. Elder entitled Ensemble Methods in Data Mining. &amp;nbsp;This book goes into greater detail about how and when to use&amp;nbsp;ensembling&amp;nbsp;and includes some examples in R. &amp;nbsp;The use of multiple classification techniques raises a number of interesting issues - on the one hand they seem to work in practice, but there use makes it more difficult to trace how a final combined model is constructed from the original data set. &amp;nbsp;This has raised some interesting issues about the definition of complexity and the quest for simple accurate models.&lt;br /&gt;&lt;br /&gt;Dr. Andrew Fast presented on Text Mining and Social Network Analysis - and provided some valuable insights into these rapidly developing fields. &amp;nbsp;There were also a number of software demos and time to interact with other members of Elder Research staff and conference participants.&lt;/td&gt; &lt;td&gt;&lt;br /&gt;&lt;iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?lt1=_blank&amp;amp;bc1=000000&amp;amp;IS2=1&amp;amp;bg1=FFFFFF&amp;amp;fc1=000000&amp;amp;lc1=0000FF&amp;amp;t=rcha-20&amp;amp;o=1&amp;amp;p=8&amp;amp;l=as1&amp;amp;m=amazon&amp;amp;f=ifr&amp;amp;md=10FE9736YVPPT7A0FBG2&amp;amp;asins=1608452840" style="height: 240px; width: 120px;"&gt;&lt;/iframe&gt;&lt;/td&gt; &lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;The conference took place in Charlottesville VA which is a great setting with many historical and recreational attractions nearby. &lt;br /&gt;&lt;br /&gt;So that's what I did on my summer vacation...&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-718438522319707387?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/718438522319707387/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/09/elder-research-two-day-course.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/718438522319707387'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/718438522319707387'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/09/elder-research-two-day-course.html' title='Elder Research Two Day Course'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-7052403659735605620</id><published>2010-09-07T17:54:00.000-07:00</published><updated>2010-09-07T17:54:39.761-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><category scheme='http://www.blogger.com/atom/ns#' term='music'/><title type='text'>Ah Bach...</title><content type='html'>As announced by David Smith over at&amp;nbsp;&lt;a href="http://blog.revolutionanalytics.com/2010/09/competition-data-visualization-with-ggplot2.html"&gt;Revolution Analytics&lt;/a&gt;,&amp;nbsp;&amp;nbsp;a &lt;a href="http://github.com/hadley/ggplot2/wiki/Case-study-competition"&gt;ggplot2 Case Study Competition&lt;/a&gt; is on...&lt;br /&gt;&lt;br /&gt;&lt;table&gt;&lt;tr&gt;&lt;td&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/Johann_Sebastian_Bach.jpg/220px-Johann_Sebastian_Bach.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/Johann_Sebastian_Bach.jpg/220px-Johann_Sebastian_Bach.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;/td&gt;&lt;td&gt;&amp;nbsp; &amp;nbsp;&lt;/td&gt;&lt;td&gt;&lt;br /&gt;&lt;br /&gt;Rather than blogging for the last few days, I &lt;a href="http://github.com/hadley/ggplot2/wiki/Bach-2-Part-Invention-in-F-Major-BWV779"&gt;cobbled together an entry&lt;/a&gt;. &amp;nbsp;It is not a particularly mind bending use of ggplot2, but the subject matter is relatively original. &amp;nbsp;It is an brief analysis and visualization of a J.S. Bach 2 Part Invention. &amp;nbsp;And because Bach's music is so well structured, the visualization itself is nice looking and well balanced. &amp;nbsp;Perhaps suitable for geeky tee shirts...&lt;br /&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;a href="http://github.com/hadley/ggplot2/wiki/Bach-2-Part-Invention-in-F-Major-BWV779"&gt;Check it out&lt;/a&gt; when you get a chance.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-7052403659735605620?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/7052403659735605620/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/09/ah-bach.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/7052403659735605620'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/7052403659735605620'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/09/ah-bach.html' title='Ah Bach...'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-6836947123869493862</id><published>2010-09-02T17:34:00.000-07:00</published><updated>2010-09-02T17:34:25.732-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><category scheme='http://www.blogger.com/atom/ns#' term='Data Mining'/><title type='text'>Bot Botany - K-Means and ggplot2</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;span class="Apple-style-span" style="font-family: Arial; font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-size: 13px;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TIBByxcNtzI/AAAAAAAAAcE/SyYEDxjHVs4/s1600/botbotany.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TIBByxcNtzI/AAAAAAAAAcE/SyYEDxjHVs4/s400/botbotany.PNG" width="390" /&gt;&lt;/a&gt;&lt;/div&gt;So if you had a robot that was an expert at botany - would you have a bot botanist? &amp;nbsp;Among other things, it would need to to distinguish flowers through vision and image processing, and be able to classify various kinds of plants based upon specific characteristics. &amp;nbsp;What do both of these requirements have in common? &amp;nbsp; They can be done using the &lt;a href="http://en.wikipedia.org/wiki/K-means_clustering"&gt;k-means clustering&lt;/a&gt;. &amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Image_segmentation"&gt;Image segmentation&lt;/a&gt;&amp;nbsp;can be used to allow our robot to recognize objects. &amp;nbsp;Based upon petal and sepal size, it could determine say - the species of an iris. &amp;nbsp;The well-known iris data set has been featured in &lt;a href="http://www.r-chart.com/2010/07/thinking-about-graphs.html"&gt;other posts&lt;/a&gt;. &lt;br /&gt;&lt;br /&gt;&lt;b&gt;K-Means in R&lt;/b&gt;&lt;br /&gt;If you look up the k-means algorithm online or in a reference book, you will be met with a flurry a mathematical symbols and formal explanations. &amp;nbsp;The basic principal (informally stated) is rather simple... given set of observations (picture a scatter plot of points), and a number of groups or clusters that you wish to group them in, the k-means algorithm finds the center of each group and associates observations with the groups with the "closest" center.&lt;br /&gt;&lt;br /&gt;To use k-means in R, call the &lt;b&gt;kmeans &lt;/b&gt;function with a matrix of values and the number of centers. &amp;nbsp;The function&amp;nbsp;seeks to partition the points into&amp;nbsp;&lt;i&gt;k&lt;/i&gt;&amp;nbsp;groups (the number of centers) such that the sum of squares from points to the assigned cluster centers is minimized. &amp;nbsp;Each observation (point) belongs to the cluster with the nearest mean.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;How-To&lt;/b&gt;&lt;br /&gt;To start, we will copy the iris data set to a separate data frame. &amp;nbsp;Not strictly speaking necessary, but makes it easier me to reflexively enter df whenever the data frame is in view. &amp;nbsp;Next we create a matrix object containing only the Petal Length and Width.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df=iris&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;m=as.matrix(cbind(df$Petal.Length, df$Petal.Width),ncol=2)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Now we will do the actual clustering. &lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;cl=(kmeans(m,3))&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Simple eh? &amp;nbsp;The cl object contains a number of interesting attributes associated with the model. &lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;cl$size&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;cl$withinss&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Next we do a bit of data formatting and preparation for subsequent calls to graph the data. &amp;nbsp;Notice that we add the cluster information back to our original data frame. &amp;nbsp;This is a good organization of the data and also a requirement for working with ggplot2 which is designed to use data frames.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df$cluster=factor(cl$cluster)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;centers=as.data.frame(cl$centers)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;The following graph color codes the points by cluster. &amp;nbsp;We also add the centers and a semi transparent halo around the center to emphasize the place of the center... and its role in classifying the observations into clusters.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;library(ggplot2)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;ggplot(data=df, aes(x=Petal.Length, y=Petal.Width, color=cluster )) +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;geom_point() +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;geom_point(data=centers, aes(x=V1,y=V2, color='Center')) +&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;geom_point(data=centers, aes(x=V1,y=V2, color='Center'), size=52, alpha=.3, legend=FALSE)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TIAmvNNXSTI/AAAAAAAAAb8/wFveqOH5q8k/s1600/iris_kmeans_cluster.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TIAmvNNXSTI/AAAAAAAAAb8/wFveqOH5q8k/s400/iris_kmeans_cluster.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;This plot is an interesting example of how several different sets of data (in this case the actual observations as well as the centers) in separate data frames can be included in a single ggplot2 chart. &lt;br /&gt;&lt;br /&gt;&lt;b&gt;Misclassified&amp;nbsp;Observations&lt;/b&gt;&lt;br /&gt;The models is pretty accurate, but not perfect. &amp;nbsp;The following SQL statement highlights the few misclassified observations:&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;sqldf('select Species, cluster, count(*) from df group by Species, Cluster')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; Species cluster count(*)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;1 &amp;nbsp; &amp;nbsp; setosa &amp;nbsp; &amp;nbsp; &amp;nbsp; 2 &amp;nbsp; &amp;nbsp; &amp;nbsp; 50&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;2 versicolor &amp;nbsp; &amp;nbsp; &amp;nbsp; 1 &amp;nbsp; &amp;nbsp; &amp;nbsp; 48&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;3 versicolor &amp;nbsp; &amp;nbsp; &amp;nbsp; 3 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;2&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;4 &amp;nbsp;virginica &amp;nbsp; &amp;nbsp; &amp;nbsp; 1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;6&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;5 &amp;nbsp;virginica &amp;nbsp; &amp;nbsp; &amp;nbsp; 3 &amp;nbsp; &amp;nbsp; &amp;nbsp; 44&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;So we grab the outliers into their own data frame....&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df2 = sqldf('select * from df where (Species || cluster) in&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; (select Species || cluster from df group by Species, Cluster having count(*) &amp;lt; 10)')&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;Now we can enhance the previous graph to put a diamond around misclassified points.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;l&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;ast_plot() + &amp;nbsp;geom_point(data=df2, aes(x=Petal_Length, y=Petal_Width, shape=5, alpha=.7, size=4.5), legend=FALSE)&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TIAmhPpdihI/AAAAAAAAAb0/P4miPDTDvrU/s1600/iris_kmeans_misclassified.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TIAmhPpdihI/AAAAAAAAAb0/P4miPDTDvrU/s400/iris_kmeans_misclassified.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;And so with a bit of Data Mining knowledge and the R programming language, even our machines can stop and smell the roses...&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-6836947123869493862?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/6836947123869493862/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/09/bot-botany-k-means-and-ggplot2.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/6836947123869493862'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/6836947123869493862'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/09/bot-botany-k-means-and-ggplot2.html' title='Bot Botany - K-Means and ggplot2'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/_FsLa1cMTCWU/TIBByxcNtzI/AAAAAAAAAcE/SyYEDxjHVs4/s72-c/botbotany.PNG' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-4802408713335086512</id><published>2010-08-31T17:51:00.000-07:00</published><updated>2010-08-31T17:51:31.701-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Learning Statistics'/><title type='text'>Better than Average</title><content type='html'>&lt;span class="Apple-style-span" style="font-family: Arial; font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-size: 13px;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: Arial; font-size: small;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: Arial; font-size: small;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: Arial; font-size: small;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: Arial; font-size: small;"&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TH2hTwRF3pI/AAAAAAAAAbs/LqE0yGJ3F6c/s1600/MovingAverage.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TH2hTwRF3pI/AAAAAAAAAbs/LqE0yGJ3F6c/s400/MovingAverage.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;table&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td&gt;&lt;br /&gt;&lt;br /&gt;&lt;iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?lt1=_blank&amp;amp;bc1=000000&amp;amp;IS2=1&amp;amp;bg1=FFFFFF&amp;amp;fc1=000000&amp;amp;lc1=0000FF&amp;amp;t=rcha-20&amp;amp;o=1&amp;amp;p=8&amp;amp;l=as1&amp;amp;m=amazon&amp;amp;f=ifr&amp;amp;md=10FE9736YVPPT7A0FBG2&amp;amp;asins=0387293175" style="height: 240px; width: 120px;"&gt;&lt;/iframe&gt;&lt;br /&gt;&lt;br /&gt;&lt;/td&gt;&lt;td&gt;&lt;br /&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;The &lt;a href="http://www.nist.gov/index.html"&gt;NIST&lt;/a&gt;'s&amp;nbsp;&lt;a href="http://www.itl.nist.gov/div898/handbook/index.htm"&gt;The Engineering Statistics Handbook&lt;/a&gt;&amp;nbsp;includes&amp;nbsp;an&amp;nbsp;&lt;a href="http://www.itl.nist.gov/div898/handbook/pmc/section4/pmc4.htm"&gt;Introduction to Time Series Analysis&lt;/a&gt;&amp;nbsp;which&amp;nbsp;provides a great way of demonstrating how R can be used to make such calculations. &amp;nbsp;This post replicates the analysis of the data set introduced under&amp;nbsp;&lt;a href="http://www.itl.nist.gov/div898/handbook/pmc/section4/pmc42.htm"&gt;Averaging Methods&lt;/a&gt;&amp;nbsp;using R. &lt;br /&gt;&lt;br /&gt;As you might expect, Time Series Analysis is a broad subject that has been investigated in depth elsewhere. &amp;nbsp;If you need more information, a book such as &lt;a href="http://www.amazon.com/dp/0387293175?tag=rcha-20&amp;amp;camp=213381&amp;amp;creative=390973&amp;amp;linkCode=as4&amp;amp;creativeASIN=0387293175&amp;amp;adid=1541HN2AYC61FXKB3NDG&amp;amp;"&gt;Time Series Analysis and Its Applications&lt;/a&gt;&amp;nbsp;provides a much more in depth look at the mathematical theory involved as well as providing practical examples of the use of R for analysis and forecasting.&lt;br /&gt;&lt;br /&gt;But back to the NIST handbook... the&amp;nbsp;&lt;a href="http://www.itl.nist.gov/div898/handbook/pmc/section4/pmc42.htm"&gt;data set they used&lt;/a&gt;&amp;nbsp;represents supplier deliveries to a warehouse. &amp;nbsp;The calculations that follow demonstrate how to perform the calculations they do in this section of the handbook using R.&lt;/div&gt;&lt;br /&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;&lt;br /&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;b&gt;&lt;u&gt;Supplier Amount(in 1000 of $)&lt;/u&gt;&lt;/b&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;1&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;9&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;2&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;8&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;3&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;9&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;4&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;12&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;5&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;9&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;6&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;12&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;7&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;11&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;8&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;7&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;9&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;13&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;10&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;9&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;11&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;11&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;12&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;10&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Simple Average (Mean)&lt;/b&gt;&lt;br /&gt;In R the series can be represented as a vector.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;v=c(9,8,9,12,9,12,11,7,13,9,11,10)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;The average of the series is 10.&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;mean(v)&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;The "error" amount that each entry in the vector differs from the mean can be calcuated as follows.&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;s - mean(s)&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;This value can serve as the basis for a measure to ascertain how well a model fits (Error Squared).&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;(v - mean(v))^2&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Finally, the sum or mean of these results can be used to compute values that represent the overall fit (or amount of error) for the estimate.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;sum((v - mean(v))^2) # SSE" is the sum of the squared errors.&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;mean((v - mean(v))^2) # MSE" is the mean of the squared errors.&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Now that we have a simple values that indicate how good an estimate for a set is, we can test with other values. &lt;br /&gt;Rather than writing out an entire calculation each time, we can create a function in R and apply the function to each value in a vector.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;sse = function(x, series){sum((series - x)^2)}&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;mse = function(x, series){mean((series - x)^2)}&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;To compare the estimate (10) with 7, 9, and 12.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;sapply(c(7,9,10,12),sse,v)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;sapply(c(7,9,10,12),mse,v)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Analyzing Time Series Data&lt;/b&gt;&lt;br /&gt;A &lt;a href="http://en.wikipedia.org/wiki/Time_series"&gt;time series&lt;/a&gt; is simply a sequence of data points in time. &amp;nbsp;Time series data has unique characteristics which allow it&amp;nbsp;to be processed in a similar manner regardless of the underlying data represented. &amp;nbsp;Many disciplines deal with this type of data including&amp;nbsp;statistics, signal processing, econometrics and mathematical finance. &amp;nbsp;Such data appears in business in relationship to sales forecasting, budgetary analysis, yield projections, and in the process / quality control arena.&amp;nbsp;In other blog entries, they are used in relation to &lt;a href="http://www.r-chart.com/2010/06/stock-analysis-using-r.html"&gt;stock market&amp;nbsp;analysis&lt;/a&gt; and &lt;a href="http://www.r-chart.com/2010/06/world-bank-api-r-package-available.html"&gt;economic data&lt;/a&gt;. &amp;nbsp;They are relevant to web sites and are available through tools like Google Analytics. &lt;br /&gt;&lt;br /&gt;So time series data is widely applicable but has common features regardless of its application. &amp;nbsp;It can be &lt;i&gt;analyzed&lt;/i&gt; to identify its characteristics and patterns. &amp;nbsp;This often leads to &lt;i&gt;forecasting &lt;/i&gt;in which a model is used to&lt;br /&gt;predict future events based upon past data.&lt;br /&gt;&lt;br /&gt;All time series data has the following common qualities:&lt;br /&gt;&lt;br /&gt;&lt;ul&gt;&lt;li&gt;a natural temporal ordering&lt;/li&gt;&lt;li&gt;often events that are close together are generally more closely related than those further apart&lt;/li&gt;&lt;li&gt;in most cases, past values are assumed to influence future values (rather than the other way around)&lt;/li&gt;&lt;li&gt;usually spaced at uniform intervals&lt;/li&gt;&lt;/ul&gt;&lt;br /&gt;The data set we are working with is a bit odd to consider as a time series - a supplier is not a unit of time. &amp;nbsp;However, it is useful for making the point that a &amp;nbsp;"simple" average (or mean)&amp;nbsp;of all past observations is only a useful estimate for when there are no trends. &amp;nbsp;Not sure what to make of this. &amp;nbsp;I emailed the government and asked for clarification. &amp;nbsp;Will post the answer here if I receive a response.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;In R, a vector can be cast to a time series object as follows:&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;s=as.ts(c(9,8,9,12,9,12,11,7,13,9,11,10))&lt;/span&gt;&amp;nbsp;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Moving Average&lt;/b&gt;&lt;br /&gt;A &lt;a href="http://en.wikipedia.org/wiki/Moving_average"&gt;moving average&lt;/a&gt; is described in the &lt;a href="http://www.itl.nist.gov/div898/handbook/pmc/section4/pmc421.htm"&gt;NIST Handbook&lt;/a&gt;&amp;nbsp;and is also referred to as "smoothing" - a term that comes up in ggplot2 (geom_smooth). &amp;nbsp;There are a myriad of functions available in R that involves some sort of lagged calculation of a series of numbers. &amp;nbsp;A simple example that almost does the trick involves rollapply:&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;rollapply(s, 3, mean)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;This works, but it is not clear that the first two entries were skipped. &amp;nbsp;Better to use a library that has additional checks coded in...&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;library(TTR)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;SMA(s,3)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;If you take a&amp;nbsp;look at the code inside... you can get an idea of the additional verification and error checking (which accounts for missing values at the beginning of the list). &amp;nbsp;To view the source, simply input the function name without any parenthesis:&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;SMA&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;You can drill down into the internally called methods in this case:&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;runMean&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;runSum&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;With this method available, we can calculate the Error and the Error Squared:&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;s - SMA(s,3) &amp;nbsp; &amp;nbsp; &amp;nbsp;# Error&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;(s - SMA(s,3))^2 &amp;nbsp;# Error Squared&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Note that the calculated mean replaced missing entries as zeroes...&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;x=((s - SMA(s,3))^2)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;x[ is.na(x) ] &amp;lt;- 0&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;mean(x)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Oh - in case you were interested in the plot:&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;"&gt;library(ggplot2)&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df = as.data.frame(as.ts(v))&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;"&gt;df$idx = as.numeric(rownames(df))&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;"&gt;&lt;div&gt;df$x= as.numeric(df$x)&lt;/div&gt;&lt;div&gt;qplot(data=df, idx, x) + geom_line() + geom_smooth()&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: Arial;"&gt;&lt;span class="Apple-style-span" style="font-size: medium;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-4802408713335086512?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/4802408713335086512/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/08/better-than-average.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/4802408713335086512'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/4802408713335086512'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/08/better-than-average.html' title='Better than Average'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/_FsLa1cMTCWU/TH2hTwRF3pI/AAAAAAAAAbs/LqE0yGJ3F6c/s72-c/MovingAverage.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-7919296186803098682</id><published>2010-08-27T15:21:00.000-07:00</published><updated>2010-08-27T15:21:38.912-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><category scheme='http://www.blogger.com/atom/ns#' term='fractals'/><title type='text'>Fractals in R</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/THg4nXR4RCI/AAAAAAAAAbU/N6EoNqOAPrQ/s1600/mandelbrot1.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="357" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/THg4nXR4RCI/AAAAAAAAAbU/N6EoNqOAPrQ/s400/mandelbrot1.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;a href="http://users.utu.fi/attenka/"&gt;Atte Tenkanen&lt;/a&gt;&amp;nbsp;had&amp;nbsp;&lt;a href="http://fractalswithr.blogspot.com/"&gt;a blog on fractals&lt;/a&gt;&amp;nbsp;using R for a time.&amp;nbsp;Much of his source code is still available online. &amp;nbsp;To produce his version of the Mandelbrot set:&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;source('&lt;a href="http://users.utu.fi/attenka/mandelbrot_set.R"&gt;http://users.utu.fi/attenka/mandelbrot_set.R&lt;/a&gt;')&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://en.wikipedia.org/wiki/Fractal"&gt;Fractals&lt;/a&gt;&amp;nbsp;(such as the&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Mandelbrot_set"&gt;Mandelbrot Set&lt;/a&gt;&amp;nbsp;pictured above) &amp;nbsp;are objects that display self-similarity on all scales. &amp;nbsp;Fractal are mathematical concepts with practical applications. For example, fractal dimensions provide a solution to measuring a coastline - where you come up with different lengths depending upon the length of the ruler you use. (the &lt;a href="http://en.wikipedia.org/wiki/Coastline_paradox"&gt;Coastline Paradox&lt;/a&gt;).&lt;br /&gt;&lt;br /&gt;There also pretty and fun to look at....&lt;br /&gt;&lt;br /&gt;Back in the 80's my little brother and I would type in fractal equations into a Radio Shack Color Computer. &amp;nbsp;We would spend half a day typing in a program and debugging it and the other half a day waiting for the image to render. &amp;nbsp;He later improved the process by using a Commodore Amiga and more than two fingers for typing.&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/THg4xbToDJI/AAAAAAAAAbc/FGTObobpj_c/s1600/mandelbrot_ggplot2.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/THg4xbToDJI/AAAAAAAAAbc/FGTObobpj_c/s400/mandelbrot_ggplot2.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;You can also do this type of plot using ggplot2 - and most of the effort is related to turning off axes and legends. &amp;nbsp;The source is on github and can be run from there.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;source('&lt;a href="http://github.com/ezgraphs/R-Programs/raw/master/mandelbrot.R"&gt;http://github.com/ezgraphs/R-Programs/raw/master/mandelbrot.R&lt;/a&gt;')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman';"&gt;&lt;span class="Apple-style-span" style="font-size: medium;"&gt;So now I can download source code from somewhere out on the interweb thingy and use open source software to render these images in a matter of seconds. &amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman';"&gt;&lt;span class="Apple-style-span" style="font-size: medium;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman';"&gt;&lt;span class="Apple-style-span" style="font-size: medium;"&gt;Just like we used to do when we were kids....&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-7919296186803098682?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/7919296186803098682/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/08/fractals-in-r.html#comment-form' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/7919296186803098682'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/7919296186803098682'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/08/fractals-in-r.html' title='Fractals in R'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/_FsLa1cMTCWU/THg4nXR4RCI/AAAAAAAAAbU/N6EoNqOAPrQ/s72-c/mandelbrot1.png' height='72' width='72'/><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-3988954280236540365</id><published>2010-08-24T20:07:00.000-07:00</published><updated>2010-08-24T20:07:01.063-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><category scheme='http://www.blogger.com/atom/ns#' term='Data Preparation'/><category scheme='http://www.blogger.com/atom/ns#' term='Financial'/><category scheme='http://www.blogger.com/atom/ns#' term='SQL'/><title type='text'>How Safe is Your Money?</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/THReBRgE6xI/AAAAAAAAAY8/TcZJleHACv0/s1600/totalFDICProblemInstitutionsByYear.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/THReBRgE6xI/AAAAAAAAAY8/TcZJleHACv0/s400/totalFDICProblemInstitutionsByYear.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;The &lt;a href="http://www.fdic.gov/about/learn/symbol/index.html"&gt;FDIC&lt;/a&gt;&amp;nbsp;regularly publishes a &lt;a href="http://www.fdic.gov/bank/individual/failed/banklist.html"&gt;Failed Bank List&lt;/a&gt; and related statistics. &amp;nbsp;This post uses data in the &lt;a href="http://www.fdic.gov/bank/statistical/stats/2010mar/fdic.xls"&gt;original XLS&lt;/a&gt; from the FDIC web site which is formatted for human consumption to produce the charts below using R. &amp;nbsp;Note that 2010 data below is incomplete. &lt;br /&gt;&lt;br /&gt;The chart above is sort of a theme for the analysis. &amp;nbsp;It is interesting to note that there were a large number of "Problem" Banks in 1990-1992 - percentage wise comparable to the last few years. &amp;nbsp;However the total number of banks has dropped every year since 1990 (indicated by the color above). &amp;nbsp; This is true of both Commercial Banks and Savings Institutions (Value indicates the number of institutions).&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/THRe_R8P1-I/AAAAAAAAAZE/Ws2wt8lF7_M/s1600/CommercialBanks.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/THRe_R8P1-I/AAAAAAAAAZE/Ws2wt8lF7_M/s400/CommercialBanks.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/THRfEdMproI/AAAAAAAAAZM/7aFJL26bj48/s1600/SavingsInstitutions.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/THRfEdMproI/AAAAAAAAAZM/7aFJL26bj48/s400/SavingsInstitutions.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;Creation of New Charters has been decreasing and has not opposed this trend.&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/THRfzkqI61I/AAAAAAAAAZU/HndSafZC3uM/s1600/CommercialBanksNewCharters.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/THRfzkqI61I/AAAAAAAAAZU/HndSafZC3uM/s400/CommercialBanksNewCharters.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/THRgC4ph2CI/AAAAAAAAAZc/Ca7Xbb-U8NE/s1600/SavingsInstitutionsNewCharters.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/THRgC4ph2CI/AAAAAAAAAZc/Ca7Xbb-U8NE/s400/SavingsInstitutionsNewCharters.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;Consolidation been one reason in the decline in the total number of institutions (especially during the 1990s), but these have decreased during the last several years.&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/THRgZogNw1I/AAAAAAAAAZk/m_CrEwj3s_Q/s1600/CommercialBanksMergers.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/THRgZogNw1I/AAAAAAAAAZk/m_CrEwj3s_Q/s400/CommercialBanksMergers.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/THRgfaiB8pI/AAAAAAAAAZs/AvHrFhp2ilQ/s1600/SavingsInstitutionsMergers.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/THRgfaiB8pI/AAAAAAAAAZs/AvHrFhp2ilQ/s400/SavingsInstitutionsMergers.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;The number of Problem Institutions in the early 1990s (and their assets) was even higher than the last few years. &lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/THRhKgQJ_FI/AAAAAAAAAZ0/XPGrUnm22KA/s1600/ProblemInstitutionsNumber.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/THRhKgQJ_FI/AAAAAAAAAZ0/XPGrUnm22KA/s400/ProblemInstitutionsNumber.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;Dollar amounts are expressed in billions (on the following and subsequent charts).&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/THRhOswQGYI/AAAAAAAAAZ8/LXMw1TED6p4/s1600/ProblemInstitutionsAssets.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/THRhOswQGYI/AAAAAAAAAZ8/LXMw1TED6p4/s400/ProblemInstitutionsAssets.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;b&gt;Combined Deposit Insurance Fund&lt;/b&gt;&lt;br /&gt;The &lt;a href="http://www.fdic.gov/deposit/insurance/index.html"&gt;Combined Deposit Insurance Fund&lt;/a&gt;&amp;nbsp;is used to guarantee the safety on deposits in member banks. &amp;nbsp;The amount of&amp;nbsp;Insured&amp;nbsp;Deposits has been&amp;nbsp;&amp;nbsp;growing.&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/THRjC7M0ABI/AAAAAAAAAaE/i8zvtM5Z9p0/s1600/CombinedDep.Ins.FundInsuredDeposits.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/THRjC7M0ABI/AAAAAAAAAaE/i8zvtM5Z9p0/s400/CombinedDep.Ins.FundInsuredDeposits.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;However, the Fund Balance itself (along with the Reserve Ratio)&amp;nbsp;been negative since 2009. &amp;nbsp;The balance was also negative in 1991.&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/THRjYXbPgNI/AAAAAAAAAaM/h1JK5M7vyXY/s1600/CombinedDep.Ins.FundFundBalance.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/THRjYXbPgNI/AAAAAAAAAaM/h1JK5M7vyXY/s400/CombinedDep.Ins.FundFundBalance.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;The Number of Failed Institutions (described in detail on the &lt;a href="http://www.fdic.gov/bank/individual/failed/banklist.html"&gt;Failed Bank List&lt;/a&gt;) is actually less than the early 1990s, but the total assets involved are much higher.&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/THRlIYlyDYI/AAAAAAAAAaU/xNeKVkusoQE/s1600/NumberFailedInstitutions.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/THRlIYlyDYI/AAAAAAAAAaU/xNeKVkusoQE/s400/NumberFailedInstitutions.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/THRlOJPlLGI/AAAAAAAAAac/eEGG0MSqE60/s1600/FailedAssets.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/THRlOJPlLGI/AAAAAAAAAac/eEGG0MSqE60/s400/FailedAssets.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;The amount of assistance provided has also been greater during the last few years. &amp;nbsp;There actually was assistance provided in 1990-1992, but the relative amount is so little that it does not appear in the charts below.&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/THRlz11d5vI/AAAAAAAAAak/xp4K_S3wf0c/s1600/NumberAssistedInstitutions.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/THRlz11d5vI/AAAAAAAAAak/xp4K_S3wf0c/s400/NumberAssistedInstitutions.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/THRl6SFl7-I/AAAAAAAAAas/_9vwsigNeu0/s1600/AssistedAssets.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/THRl6SFl7-I/AAAAAAAAAas/_9vwsigNeu0/s400/AssistedAssets.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;Definitions do not appear in the spreadsheet, but on &lt;a href="http://www2.fdic.gov/hsob/help.asp"&gt;other parts of the FDIC site&lt;/a&gt;, the "estimated loss is the difference between the amount disbursed from the Deposit Insurance Fund (DIF) to cover obligations to insured depositors and the amount estimated to be ultimately recovered from the liquidation of the receivership estate". &amp;nbsp;Simply said, the amount paid out by the government that could not be recovered in some way from the failed bank. &lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/THRnwrA8MsI/AAAAAAAAAa0/R2HVxJ2zJeA/s1600/EstimatedLossesDIF.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/THRnwrA8MsI/AAAAAAAAAa0/R2HVxJ2zJeA/s400/EstimatedLossesDIF.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;The "Resolution Receivables" are also not defined in the spreadsheet. &amp;nbsp;But &lt;a href="http://www.fdic.gov/about/strategic/report/2008annualreport/statements_dif_4.html"&gt;elsewhere on the FDIC site&lt;/a&gt;&amp;nbsp;appear to represent&amp;nbsp;"payments made by the DIF to cover obligations to insured depositors, advances to receiverships and conservatorships for working capital, and administrative expenses paid on behalf of receiverships and conservatorships". &lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/THRoaZHpyRI/AAAAAAAAAa8/0VUv0D8iE20/s1600/ResolutionReceivables.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/THRoaZHpyRI/AAAAAAAAAa8/0VUv0D8iE20/s400/ResolutionReceivables.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;b&gt;Number of Employees&lt;/b&gt;&lt;br /&gt;The number of FDIC employees has been adjusted in some way - and so years prior to 2008 have been revised (according to the note in the spreadsheet) "according to a new full-time equivalent methodology".&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/THRo-57webI/AAAAAAAAAbM/Piljak67iWw/s1600/NumberofFDICEmployees.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/THRo-57webI/AAAAAAAAAbM/Piljak67iWw/s400/NumberofFDICEmployees.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;b&gt;Technical Notes&lt;/b&gt;&lt;br /&gt;The&amp;nbsp;&lt;a href="http://www.fdic.gov/bank/statistical/stats/2010mar/fdic.xls"&gt;original XLS&lt;/a&gt;&amp;nbsp;from the FDIC web site was saved as a csv using Excel. &amp;nbsp;This &lt;a href="http://github.com/ezgraphs/R-Programs/blob/master/data/fdic.csv"&gt;csv&lt;/a&gt; and the &lt;a href="http://github.com/ezgraphs/R-Programs/blob/master/fdic.R"&gt;R Source&lt;/a&gt; used to create the plots above are available on github. &lt;br /&gt;&lt;br /&gt;&lt;b&gt;Data Preparation&lt;/b&gt;&lt;br /&gt;Many texts on data analysis or data mining make some comment about the amount of time used to prepare data. &amp;nbsp;It needs to be "cleansed" or "reshaped" or otherwise transformed into a form that can be manipulated in the intended manner. &amp;nbsp;Because the spreadsheet from the FDIC is directed at a human audience (rather than a computer program) it has a number of irregularities. &amp;nbsp;There is text information in the header and footer, subheadings over related sections, columns added for space, and spaces added to cells. &amp;nbsp;In addition, naming the rows and columns and reshaping the data make it easier to plot. &amp;nbsp;There are a number of mechanisms available using native R and available packages. &amp;nbsp;I ended up using several of them - but because of overlapping functionality, there are many ways this work could be accomplished. &amp;nbsp;A few examples are below, the rest can be seen in the &lt;a href="http://github.com/ezgraphs/R-Programs/blob/master/fdic.R"&gt;source file&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;To remove empty columns reference data frame column&amp;nbsp;indices&amp;nbsp;as a list of negative numbers.&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;df=df[c(-2,-5,-7,-10,-12,-14,-16,-18,-20,-22,-24,-26,-28,-30,-32,-34,-36,-38,-40,-42)]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Rows can be removed in a similar fashion.&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;df=df[c(-1,-5,-9,-10,-13,-14,-18,-21,-24, -27),]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;To remove trailing rows reference a negative sequence as the first parameter in the data frame.&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;"&gt;df=df[-(29:36),]&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;To change the column names, substitute out characters&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;colnames(df)=gsub('X','',colnames(df))&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;or explicitly set column names.&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;colnames(df)[1]='Statistic'&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;I am kind of fuzzy about the specific rules - but columns must be cast as factors in certain&amp;nbsp;contexts and as characters in others to get the desired result. &amp;nbsp;If there is a set rule about usage, please let&amp;nbsp;me know. &amp;nbsp;At this point, I am assuming that these types are not handled in a consistent way and you just have&amp;nbsp;to know how a particular function behaves.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;df$Statistic=as.character(df$Statistic)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;div&gt;To trim out leading space in all cells of a given column use an appropriate regular expression.&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df$Statistic=gsub('^ *','',df$Statistic)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;To include subheading information include it inline. &amp;nbsp;This can be done by explicit naming of rows&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df$Statistic[11]='Problem Institutions Number'&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;or in a somewhat generic fashion where the contents of one field are appended to another.&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df$Statistic[3]=paste(df$Statistic[2],df$Statistic[3])&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;div&gt;To pivot data, you can sometimes do a transpose and the result is a matrix.&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;t(df)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;br /&gt;A better result is obtained using the &lt;a href="http://had.co.nz/reshape/"&gt;reshape&lt;/a&gt; package.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;library(reshape)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df.melted=melt(df, id="Statistic")&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;The data is to be treated as numeric. &amp;nbsp;To strip out all commas and cast the values to numerics&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df.melted$value=as.numeric(gsub(',','',as.character(df.melted$value)))&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;Again, casting factors as numerics produces funky results in any case.&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df.melted$Year=as.numeric(as.character(df.melted$Year))&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Subsets of each series contained in the melted data can be obtained by matching on the Statistic in view.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;fdic_employees = df.melted[df$Statistic==&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; 'Number of FDIC Employees***',c('Year','value')]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;The chart at the top was obtained by combining results from several of these subsets. &amp;nbsp;The &lt;a href="http://code.google.com/p/sqldf/"&gt;sqldf&lt;/a&gt; package is used to combine the data frames and do some calculations&lt;/div&gt;&lt;div&gt;&amp;nbsp;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;pin=df.melted[df.melted$Statistic=="Problem Institutions Number",]&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;cb= df.melted[df.melted$Statistic=="Commercial Banks",]&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;si=df.melted[df.melted$Statistic=="Savings Institutions",]&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;library('sqldf')&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;pct_prob=sqldf('SELECT&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;p.Year Year,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;p.value prob, c.value + s.value "Total Institutions",&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;p.value / (c.value + s.value) * 100 "Percent Problem"&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;FROM pin p&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;JOIN cb c ON c.Year = p.Year&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;JOIN si s ON s.Year = p.Year')&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;Plotting the Data&lt;/b&gt;&lt;/div&gt;&lt;div&gt;The plotting could be done in individual chunks of code like the following.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;library(ggplot2)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;p = qplot(data=pct_prob, Year, `Percent Problem`, color=`Total Institutions`, group=1)&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;p = p + geom_line()&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;p + opts(axis.text.x=theme_text(angle=-90, hjust=0), title='Total FDIC Problem Institutions by Year')&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;ggsave('totalFDICProblemInstitutionsByYear.png')&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;However, many of the charts had the exact same format and simply applied to a different series. &amp;nbsp;So I created a function that would produce the desired chart for any supplies Statistic.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;fdicPlot=function(statistic){&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;f=df.melted[df.melted$Statistic==statistic,]&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;ggplot(data=f, aes(x=Year, y=value)) + geom_line() + geom_point() + opts(title=statistic)&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;img=paste(gsub(" |\\(|\\)|\\*",'',i),'.png',sep='')&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;ggsave(img)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;}&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;The function could then be called for each available Statistic.&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;for (i in df$Statistic){fdicPlot(i)}&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;dev.off()&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;All in all, a useful exercise for reviewing FDIC data a bit more closely and seeing how R can be used to munge data into a format&amp;nbsp;conducive&amp;nbsp;to producing charts with ggplot2.&lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-3988954280236540365?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/3988954280236540365/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/08/how-safe-is-your-money.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/3988954280236540365'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/3988954280236540365'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/08/how-safe-is-your-money.html' title='How Safe is Your Money?'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/_FsLa1cMTCWU/THReBRgE6xI/AAAAAAAAAY8/TcZJleHACv0/s72-c/totalFDICProblemInstitutionsByYear.png' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-2174285842038769635</id><published>2010-08-21T12:09:00.000-07:00</published><updated>2010-08-21T12:09:26.115-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><category scheme='http://www.blogger.com/atom/ns#' term='Maps'/><category scheme='http://www.blogger.com/atom/ns#' term='Ruby'/><category scheme='http://www.blogger.com/atom/ns#' term='GeoLocation'/><title type='text'>Map of Upcoming Ruby Conferences</title><content type='html'>&lt;table&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;One of the top searches on &lt;a href="http://rubyflow.com/search/conference"&gt;rubyflow&lt;/a&gt;&amp;nbsp;is “&lt;a href="http://rubyflow.com/search/conference"&gt;conference&lt;/a&gt;”. &amp;nbsp;A recent &lt;a href="http://www.r-chart.com/2010/07/maps-geocoding-and-r-user-conference.html"&gt;post &lt;/a&gt;showed how to create a map with the location of the 2010 R User Conference. &amp;nbsp;So why not expand on the subject and create a map with numerous conference locations throughout the world?&lt;/td&gt; &lt;td&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/THAgT2_h4_I/AAAAAAAAAYE/VJyvVC2Q39s/s1600/RubyConferencesUS.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;img border="0" height="400" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/THAgT2_h4_I/AAAAAAAAAYE/VJyvVC2Q39s/s400/RubyConferencesUS.png" width="313" /&gt;&lt;/span&gt;&lt;/a&gt;&lt;/td&gt; &lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;This post shows how to create a map of locations (upcoming Ruby conferences) them straight off the web using Ruby and R.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;The Packages and APIs&lt;/b&gt;&lt;br /&gt;Both R and Ruby have a ton of functionality baked in to accomplish this task. &amp;nbsp;Ruby can scrape the web using &lt;a href="http://github.com/whymirror/hpricot"&gt;Hpricot&lt;/a&gt; and geocode the information using Google. &amp;nbsp;It can call R through &lt;a href="http://www.rforge.net/Rserve/"&gt;Rserve&lt;/a&gt;, and the &lt;a href="http://cran.r-project.org/web/packages/maps/index.html"&gt;maps&lt;/a&gt; and &lt;a href="http://had.co.nz/ggplot2/"&gt;ggplot2&lt;/a&gt;&amp;nbsp;libraries can be used to render the result.&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;b&gt;The Process&lt;/b&gt;&lt;br /&gt;&lt;a href="http://github.com/ezgraphs/Ruby-Scripts/blob/master/upcoming_ruby_conferences.rb"&gt;Ruby code&lt;/a&gt;&amp;nbsp;described here is available on Github. &amp;nbsp;Rserve must be running when the program is run. &amp;nbsp;The following block of code is used to create a connection – and if the connection is not available, starts Rserve.&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;begin&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;puts "Creating a new Rserve Connection."&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;$c = Connection.new&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;rescue&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;puts "Could not create an Rserve Connection: #{$!}"&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;puts "Trying to start one now..."&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;File.open('tmp.R','w'){|f|f.puts "library(Rserve)\nRserve()"}&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;system('"R.exe" --no-save &amp;lt; tmp.R')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;sleep 3&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;$c = Connection.new&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;puts "Rserve Started."&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;end&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Getting a list of conferences involves parsing HTML. &amp;nbsp;In R the XML packages includes useful functionality and can be handy when data is in an HTML table. &amp;nbsp;In this case, the data was not in an HTML table. &amp;nbsp;Instead, the Hpricot parser accepts an XPath expression and iterates over the relevant elements. &amp;nbsp;The data is extracted and stored in an array of hashes.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;def get_conference_list()&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;u='http://blog.sphereinc.com/2010/08/13-upcoming-ruby-and-rails-conferences-you-dont-want-to-miss'&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;doc=Hpricot(open(u))&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;recs=[]&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;(doc/"//div[@id='post-216']/div/p/strong").entries.each_with_index{|e,i| &amp;nbsp; &amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; h={}&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; e.inner_text.split("\n").each{|d|&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; p=d.split(':')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; unless [nil,''].include?(p[0]) or &amp;nbsp;[nil,''].include?(p[1])&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; # &amp;nbsp;puts "&amp;gt;&amp;gt;#{p[0].strip} = #{p[1].strip}&amp;lt;&amp;lt;"&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; h[p[0].strip]= p[1].strip &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; end&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; }&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; recs &amp;lt;&amp;lt; h&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;}&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;recs&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;end&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Hpricot can parse XML as well as HTML, and so is used to get the latitude and longitude for each location.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;def get_location(str) &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp;u=URI.encode(&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp;"http://maps.google.com/maps/api/geocode/xml?sensor=false&amp;amp;address=#{str}"&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp;)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp;loc=(Hpricot.XML(open(u)))/'//location'&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp;h={}&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp;h['lat']=(loc/:lat).inner_text&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp;h['lng']=(loc/:lng).inner_text&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp;h&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;end&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;A &lt;a href="http://github.com/ezgraphs/Ruby-Scripts/blob/master/ruby_conference_locations.txt"&gt;data file&lt;/a&gt; is created which contains semicolon delimited records. &amp;nbsp;This will provide the input to the R program.&lt;br /&gt;&lt;br /&gt;Finally, an &lt;a href="http://github.com/ezgraphs/Ruby-Scripts/blob/master/upcoming_ruby_conferences.R"&gt;R program&lt;/a&gt; is used to plot the data. &amp;nbsp;Since the files in view will be in the current working directory (and Rserve has no reference to this) it is substituted in prior to executing the program.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Challenges&lt;/b&gt;&lt;br /&gt;In many cases ggplot2 creates publication quality graphics with a simple call. &amp;nbsp;In this case, data was grouped very unevenly, and attempts to automatically add text (in this case the city names) to the map can result in overlapping and large amounts of wasted space. &lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/THAhJaIIE-I/AAAAAAAAAYU/ZLwMwKwixh8/s1600/RubyConferencesWorld.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="193" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/THAhJaIIE-I/AAAAAAAAAYU/ZLwMwKwixh8/s400/RubyConferencesWorld.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;I thought of a variety of ways to address the problem.&lt;br /&gt;&lt;ul&gt;&lt;li&gt;The simplest was to &lt;b&gt;&lt;i&gt;resize the image&lt;/i&gt;&lt;/b&gt; various ways. &amp;nbsp;This was not successful - but I did later crop the images in a manual post processing step. &amp;nbsp;&lt;/li&gt;&lt;li&gt;Another approach was to &lt;i&gt;&lt;b&gt;modify the scale&lt;/b&gt;&lt;/i&gt; in use in some way (e.g. the use of a log scale with scatter plots as Tal pointed out in &lt;a href="http://www.r-chart.com/2010/08/programming-language-popularity.html"&gt;this post&lt;/a&gt;). &amp;nbsp;The ggplot2 package includes a large number of &lt;a href="http://en.wikipedia.org/wiki/Map_projection"&gt;map projections&lt;/a&gt;&amp;nbsp;(ways of representing a three dimensional sphere in two dimensions). &amp;nbsp;Available projections provided with ggplot2 are described in help:&lt;/li&gt;&lt;/ul&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;mapproject&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;None of these solved the problem in and of themselves, but provided some interesting variations. &lt;br /&gt;&lt;ul&gt;&lt;li&gt;I decided instead to simply “&lt;b&gt;&lt;i&gt;zoom in&lt;/i&gt;&lt;/b&gt;” on the relevant part of the chart and &lt;b&gt;&lt;i&gt;split the chart &lt;/i&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;(sort of ad hoc faceting)&lt;/span&gt;&lt;/b&gt;. &amp;nbsp;In some cases this worked perfectly.&lt;/li&gt;&lt;/ul&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/THAg98uwg6I/AAAAAAAAAYM/4bTvqNLiEZ8/s1600/RubyConferencesJapan.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/THAg98uwg6I/AAAAAAAAAYM/4bTvqNLiEZ8/s400/RubyConferencesJapan.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;In other cases the results were suboptimal. &amp;nbsp;Paths were sometimes drawn between points on the border that had been cropped and so extraneous map lines appear in some zooms. &amp;nbsp;I finally decided that a general purpose automatic solution was not readily available (or at least known to me). &amp;nbsp;So I cleaned up the final images using image editing software (Gimp).&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/THAiUkuupWI/AAAAAAAAAY0/TMpvJNfzVZA/s1600/RubyConferencesEurope.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/THAiUkuupWI/AAAAAAAAAY0/TMpvJNfzVZA/s400/RubyConferencesEurope.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;If anybody has ideas about how this kind of processing can be used to automatically generate graphs that are "cleaned up" please comment below. &amp;nbsp;Scriptable solutions that create finished products with no manual intervention are preferable.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;The code is available on &lt;a href="http://github.com/ezgraphs/Ruby-Scripts"&gt;Github&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-2174285842038769635?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/2174285842038769635/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/08/map-of-upcoming-ruby-conferences.html#comment-form' title='5 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/2174285842038769635'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/2174285842038769635'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/08/map-of-upcoming-ruby-conferences.html' title='Map of Upcoming Ruby Conferences'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/_FsLa1cMTCWU/THAgT2_h4_I/AAAAAAAAAYE/VJyvVC2Q39s/s72-c/RubyConferencesUS.png' height='72' width='72'/><thr:total>5</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-6446361987433524356</id><published>2010-08-17T14:35:00.000-07:00</published><updated>2010-08-17T15:39:37.008-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><category scheme='http://www.blogger.com/atom/ns#' term='SQL'/><category scheme='http://www.blogger.com/atom/ns#' term='Programming Languages'/><title type='text'>Programming Language Popularity: StackOverflow and Ohloh</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TGrw4W9yy5I/AAAAAAAAAXk/tpPnzaESK-Q/s1600/programming_language_popularity.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TGrw4W9yy5I/AAAAAAAAAXk/tpPnzaESK-Q/s400/programming_language_popularity.png" width="310" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;In the following example, programming language popularity is measured based upon two data sets. &amp;nbsp;The first is the number of &amp;nbsp;contributors&amp;nbsp;associated with a language on&amp;nbsp;&lt;/span&gt;&lt;a href="https://www.ohloh.net/languages"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;ohloh.net&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;. &amp;nbsp;The second is&amp;nbsp;&lt;/span&gt;&lt;a href="http://stackoverflow.com/tags"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;tag usage at stackoverflow.com&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;. &amp;nbsp;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;b&gt;SQL with no DDL&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;I admit it... in an age of &lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;a href="http://en.wikipedia.org/wiki/NoSQL"&gt;NoSQL&lt;/a&gt;...&amp;nbsp;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;I like SQL. &amp;nbsp;I agree that fixed table schemas can be a real pain&amp;nbsp;though... who wants the overhead of defining database tables for&amp;nbsp;a quick comparison of two data sets?&amp;nbsp;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;Joining on the language name provides a simple, intuitive way to correlate&amp;nbsp;the results sets. &amp;nbsp;Of course there are limitations to this&amp;nbsp;approach - after all SQL was designed for &lt;/span&gt;&lt;i&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;relational &lt;/span&gt;&lt;/b&gt;&lt;/i&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;databases. &amp;nbsp;Since a "join" is being done based upon language and tag name,&amp;nbsp;some languages may be under represented. &amp;nbsp;For instance &amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;ul&gt;&lt;li&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;JQuery - a Javascript library - is a leading tag. &amp;nbsp;&amp;nbsp;&lt;/span&gt;&lt;/li&gt;&lt;li&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;Objective C questions might appear under the iPhone tag.&lt;/span&gt;&lt;/li&gt;&lt;li&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;C# Questions might appear under .NET, ASP.NET or other&amp;nbsp;Microsoft tags.&lt;/span&gt;&lt;/li&gt;&lt;/ul&gt;Another problem that &lt;i&gt;can &lt;/i&gt;occur in this type of comparison is that keyed data might not actually correlate with a matched key. &amp;nbsp; For instance the C programming language might be compared with a record pertaining to the third letter of the alphabet. &amp;nbsp;This is not a problem in the current example because the specific domain of both data sets is programming languages.&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;b&gt;Top Languages&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;In this particular analysis, I am really interested in outliers&amp;nbsp;- not the vast majority of the languages that appear in the data set. &amp;nbsp;So the name of each&amp;nbsp;point will be plotted beside it. &amp;nbsp;For less popular languages,&amp;nbsp;this chart is impossible to read and madly cluttered... but it is great for focusing on the most popular languages.&amp;nbsp;&amp;nbsp; So rather than coming up with a publication-quality graphic, the chart above provides a "quick-and-dirty" perspective that can lead to helpful discussions for people familiar with the programming language domain.&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;In the&amp;nbsp;&lt;a href="http://www.r-chart.com/2010/08/github-stats-on-programming-languages.html"&gt;previous post&lt;/a&gt;, Ruby ranked at the top. &amp;nbsp;This demonstrates the Ruby centric nature of github, which was initially directed towards the ruby community. &amp;nbsp;Similar trends affect the results in the current post (where Ruby ranks&amp;nbsp;12th in tag count and&amp;nbsp;16th in the number of contributors). &amp;nbsp;R is 18th in tag count and 33rd in number of contributors.&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;The data was extracted over the last few days and is available on github in&amp;nbsp;&lt;a href="http://github.com/ezgraphs/R-Programs/blob/master/data/ohlo_2010-08-16.txt"&gt;ohlo_2010-08-16.txt&lt;/a&gt;&amp;nbsp;and&amp;nbsp;&lt;a href="http://github.com/ezgraphs/R-Programs/blob/master/data/stackoverflow.txt"&gt;stackoverflow.txt&lt;/a&gt;&amp;nbsp;(warning 400MB file... all tags from stackoverflow are listed in it). &amp;nbsp;The process to analyze the files involved the following R Code.&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;library(ggplot2)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;library(sqldf)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;SODF=read.csv('stackoverflow.txt',header=TRUE, sep=';')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;OHLODF=read.csv('ohlo_2010-08-16.txt',header=TRUE, sep=';')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;head(OHLODF)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;head(SODF)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df = sqldf('select Name name, Count tag_count, Contributors contributors&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;from OHLODF o&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;join SODF s on LOWER(s.Tag) = LOWER(o.Name) order by 1')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;ggplot(data=df,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; aes(x=tag_count, y=contributors, color=name)) +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;geom_point() +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;geom_text(aes(label = name))&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;The resulting chart is displayed above. &amp;nbsp;To list the top 10 languages:&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;gt; head(df[order(df$contributors, decreasing=TRUE),],10)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; name tag_count contributors&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;57 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;XML &amp;nbsp; &amp;nbsp; 12374 &amp;nbsp; &amp;nbsp; &amp;nbsp; 133183&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;24 &amp;nbsp; &amp;nbsp; &amp;nbsp; HTML &amp;nbsp; &amp;nbsp; 21936 &amp;nbsp; &amp;nbsp; &amp;nbsp; 106012&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;28 &amp;nbsp; &amp;nbsp; &amp;nbsp; Java &amp;nbsp; &amp;nbsp; 62386 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;78098&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;9 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; C &amp;nbsp; &amp;nbsp; 17256 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;78023&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;13 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;CSS &amp;nbsp; &amp;nbsp; 16429 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;72060&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;11 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;C++ &amp;nbsp; &amp;nbsp; 38691 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;61831&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;29 JavaScript &amp;nbsp; &amp;nbsp; 46608 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;60677&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;33 &amp;nbsp; &amp;nbsp; &amp;nbsp; Make &amp;nbsp; &amp;nbsp; &amp;nbsp; 537 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;50328&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;44 &amp;nbsp; &amp;nbsp; Python &amp;nbsp; &amp;nbsp; 31852 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;38691&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;39 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;PHP &amp;nbsp; &amp;nbsp; 53884 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;36952&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;gt; head(df[order(df$tag_count, decreasing=TRUE),],10)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;name tag_count contributors&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;10 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;C# &amp;nbsp; &amp;nbsp;101811 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;22198&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;28 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Java &amp;nbsp; &amp;nbsp; 62386 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;78098&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;39 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; PHP &amp;nbsp; &amp;nbsp; 53884 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;36952&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;29 &amp;nbsp;JavaScript &amp;nbsp; &amp;nbsp; 46608 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;60677&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;11 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; C++ &amp;nbsp; &amp;nbsp; 38691 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;61831&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;44 &amp;nbsp; &amp;nbsp; &amp;nbsp;Python &amp;nbsp; &amp;nbsp; 31852 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;38691&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;48 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; SQL &amp;nbsp; &amp;nbsp; 25316 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;28069&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;24 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;HTML &amp;nbsp; &amp;nbsp; 21936 &amp;nbsp; &amp;nbsp; &amp;nbsp; 106012&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;9 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;C &amp;nbsp; &amp;nbsp; 17256 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;78023&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;37 Objective-C &amp;nbsp; &amp;nbsp; 17250 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 6555&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;All other things being equal, one might think that the&amp;nbsp;relationship between contributors to projects and tag counts&amp;nbsp;might be roughly linear. &amp;nbsp;As it stands, that is not the case at&amp;nbsp;all. &lt;br /&gt;&lt;br /&gt;&lt;b&gt;Web Oriented Languages&lt;/b&gt;&lt;br /&gt;The languages represented show a significant representation of&amp;nbsp;web applications related technologies. &amp;nbsp;HTML, CSS, Java Script&amp;nbsp;and PHP are used almost exclusively for such development, and&amp;nbsp;Ruby, Python, Perl, Java, C#, SQL are also heavily used for web&amp;nbsp;applications (though not exclusively). &amp;nbsp;C, C++, Objective C and&amp;nbsp;Make are related technologies that are geared less towards web&amp;nbsp;development. &lt;br /&gt;&lt;br /&gt;&lt;b&gt;Microsoft&amp;nbsp;&lt;/b&gt;&lt;br /&gt;According to &lt;a href="http://en.wikipedia.org/wiki/Stack_Overflow"&gt;wikipedia&lt;/a&gt;&amp;nbsp;StackOverflow is a Microsoft partner and stackoverflow itself was developed on the Microsoft platform. &amp;nbsp;This might provide some explanation to the high representation of C#.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Simple Languages = Less Questions&lt;/b&gt;&lt;br /&gt;XML and HTML are markup languages with relatively simple syntax,&amp;nbsp;hence the relatively small tag count. &amp;nbsp;CSS and&amp;nbsp;Make are also relatively small languages with specific uses&amp;nbsp;rather than general purpose programming languages. &amp;nbsp;The fact&amp;nbsp;that C++ was developed as an enhancement to the C programming&amp;nbsp;language explains why there are more questions (and a larger tag&amp;nbsp;count) for C++ than C. &amp;nbsp;A more speculative suggestion is&amp;nbsp;that&amp;nbsp;Perl's relatively low tag count indicates that the "more than&amp;nbsp;one way to do it" philosophy leads to less questions. &amp;nbsp;An&amp;nbsp;obvious alternative is that Perl users simply ask questions in&amp;nbsp;other venues.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Conclusion&lt;/b&gt;&lt;br /&gt;All measures of programming language popularity have their limitations. &amp;nbsp;Correlating various sets of data can provide some additional insights into their prevalence and usage. &amp;nbsp;R and sqld provide a convenient means of making such comparisons. &amp;nbsp;And ggplot2 provides a great way of charting results.&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;a name='more'&gt;&lt;/a&gt;&lt;/span&gt;&lt;br /&gt;&lt;b&gt;Update&lt;/b&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;A log scale (as suggested by Tal in the comments) provides better insight into the majority of languages that appear clustered in the lower left hand corner of the chart. &amp;nbsp;However, though this site might be considered R rated, the **** was added through later image editing to make it fit for all audiences. &amp;nbsp;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;ggplot(data=df,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; aes(x=log(tag_count), y=log(contributors), color=name)) +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;geom_point() +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;geom_text(aes(label = name))&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TGsPlUni5aI/AAAAAAAAAX8/klwOW7Iauek/s1600/programming_language_popularity_log.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TGsPlUni5aI/AAAAAAAAAX8/klwOW7Iauek/s400/programming_language_popularity_log.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-6446361987433524356?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/6446361987433524356/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/08/programming-language-popularity.html#comment-form' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/6446361987433524356'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/6446361987433524356'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/08/programming-language-popularity.html' title='Programming Language Popularity: StackOverflow and Ohloh'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/_FsLa1cMTCWU/TGrw4W9yy5I/AAAAAAAAAXk/tpPnzaESK-Q/s72-c/programming_language_popularity.png' height='72' width='72'/><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-788296136388684696</id><published>2010-08-09T13:36:00.000-07:00</published><updated>2010-08-09T14:37:49.946-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='GitHub'/><title type='text'>GitHub Stats on Programming Languages</title><content type='html'>GitHub has become a popular site for Open Source Developers to stash code and collaborate on projects. &amp;nbsp;The following are some stats and analysis related to programming languages in use based upon the number of users and&amp;nbsp;repositories. &amp;nbsp;The &lt;a href="http://github.com/ezgraphs/R-Programs/blob/master/git_lang_stats.txt"&gt;data &lt;/a&gt;was obtained from GitHub's searches. &amp;nbsp;&amp;nbsp;&lt;a href="http://github.com/ezgraphs/R-Programs/blob/master/git_lang_stats.txt"&gt;It &lt;/a&gt;and the &lt;a href="http://github.com/ezgraphs/R-Programs/blob/master/github_lang_stats.R"&gt;R code&lt;/a&gt; are available in GitHub as well&amp;nbsp;(a lovely recursive relationship I must say).&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TGBlqQEs5yI/AAAAAAAAAXM/HkwNFJnohoI/s1600/top_10_github_languages_by_reps.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TGBlqQEs5yI/AAAAAAAAAXM/HkwNFJnohoI/s400/top_10_github_languages_by_reps.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;gt; df.top_ten_reps&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp;Language Repositories Users&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Ruby &amp;nbsp; &amp;nbsp; &amp;nbsp; 104239 23123&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;2 &amp;nbsp; JavaScript &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;44482 10895&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;3 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Perl &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;34232 &amp;nbsp;2178&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;4 &amp;nbsp; &amp;nbsp; &amp;nbsp; Python &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;32150 &amp;nbsp;8775&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;5 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;PHP &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;21685 &amp;nbsp;8872&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;6 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Java &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;17687 &amp;nbsp;6618&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;7 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;C &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;16137 &amp;nbsp;5558&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;8 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;C++ &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;12521 &amp;nbsp;5595&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;9 &amp;nbsp;Objective-C &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 8027 &amp;nbsp;2520&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;10 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;C# &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 6061 &amp;nbsp;2706&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Ruby has a commanding lead in terms of the number of repositories with 32.17% - more than the next two (Javascript and Perl) combined. &amp;nbsp;R is ranked 25th with 191 repositories or about 0.06% and only 6 projects behind the D programming language. &amp;nbsp;The top 5 are scripting languages, Java ranks 6th and the C family rounds out the top ten. &amp;nbsp;Relatively open languages lead the pack, followed by those with a proprietary focus (Objective-C for Apple and C# for Microsoft).&lt;br /&gt;&lt;br /&gt;When ranked by number of users, the top two remain the same. &amp;nbsp;There is a bit of shuffling with the remainder of the top ten.&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TGBluxwEg6I/AAAAAAAAAXU/jrRah0rpsMs/s1600/top_10_github_languages_by_users.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TGBluxwEg6I/AAAAAAAAAXU/jrRah0rpsMs/s400/top_10_github_languages_by_users.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;gt; df.top_ten_users&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp;Language Repositories Users&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Ruby &amp;nbsp; &amp;nbsp; &amp;nbsp; 104239 23123&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;2 &amp;nbsp; JavaScript &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;44482 10895&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;3 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;PHP &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;21685 &amp;nbsp;8872&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;4 &amp;nbsp; &amp;nbsp; &amp;nbsp; Python &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;32150 &amp;nbsp;8775&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;5 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Java &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;17687 &amp;nbsp;6618&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;6 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;C++ &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;12521 &amp;nbsp;5595&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;7 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;C &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;16137 &amp;nbsp;5558&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;8 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; C# &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 6061 &amp;nbsp;2706&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;9 &amp;nbsp;Objective-C &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 8027 &amp;nbsp;2520&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;10 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Perl &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;34232 &amp;nbsp;2178&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;The most striking is that Perl drops to 9th place. &amp;nbsp;There are significantly less users associated with Perl - particularly for the number of projects. &amp;nbsp;I noticed that there was &lt;a href="http://github.com/blog/276-perl-mirror-on-github"&gt;a migration of Perl language&lt;/a&gt; source code&amp;nbsp;to GitHub - so perhaps modules were migrated as well...but I couldn't find any specific announcements that clarified this. &lt;br /&gt;&lt;br /&gt;All other things being equal, you might expect there to be a relationship between of the number of users to repositories. There is to some degree -&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;gt; df.Ratio=df.Repositories / df.Users&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;gt; mean(df[df$Ratio &amp;gt; 0 &amp;amp; df$User &amp;gt; 0 &amp;amp; !is.na(df$Ratio), ])&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;5.064732&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;A linear model suggests a slightly lower value (between 4 and 5). &lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TGBmDbPj0VI/AAAAAAAAAXc/KP53gbO-GOE/s1600/github_languages_reps_per_user_all.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TGBmDbPj0VI/AAAAAAAAAXc/KP53gbO-GOE/s400/github_languages_reps_per_user_all.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;Here is a plot restricted to the Top 10.&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TGBlkorNVWI/AAAAAAAAAXE/rd0fRUnfKnU/s1600/github_languages_reps_per_user_top10.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TGBlkorNVWI/AAAAAAAAAXE/rd0fRUnfKnU/s400/github_languages_reps_per_user_top10.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;Some of the lesser used languages have few users and more repositories like IO (19 per user) and CoffeeScript (17 per user). &amp;nbsp;Perl has a remarkable 15 per user.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;Full data Set&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Language Repositories Users Rep.pct &amp;nbsp; &amp;nbsp; Ratio&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Ruby &amp;nbsp; &amp;nbsp; &amp;nbsp; 104239 23123 &amp;nbsp; 32.17 &amp;nbsp;4.508022&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;2 &amp;nbsp; &amp;nbsp; JavaScript &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;44482 10895 &amp;nbsp; 13.73 &amp;nbsp;4.082790&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;3 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Perl &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;34232 &amp;nbsp;2178 &amp;nbsp; 10.56 15.717172&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;4 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Python &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;32150 &amp;nbsp;8775 &amp;nbsp; &amp;nbsp;9.92 &amp;nbsp;3.663818&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;5 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;PHP &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;21685 &amp;nbsp;8872 &amp;nbsp; &amp;nbsp;6.69 &amp;nbsp;2.444206&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;6 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Java &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;17687 &amp;nbsp;6618 &amp;nbsp; &amp;nbsp;5.46 &amp;nbsp;2.672560&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;7 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;C &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;16137 &amp;nbsp;5558 &amp;nbsp; &amp;nbsp;4.98 &amp;nbsp;2.903383&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;8 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;C++ &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;12521 &amp;nbsp;5595 &amp;nbsp; &amp;nbsp;3.86 &amp;nbsp;2.237891&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;9 &amp;nbsp; &amp;nbsp;Objective-C &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 8027 &amp;nbsp;2520 &amp;nbsp; &amp;nbsp;2.48 &amp;nbsp;3.185317&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;10 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;C# &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 6061 &amp;nbsp;2706 &amp;nbsp; &amp;nbsp;1.87 &amp;nbsp;2.239837&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;11 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Shell &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 4657 &amp;nbsp;1011 &amp;nbsp; &amp;nbsp;1.44 &amp;nbsp;4.606330&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;12 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;VimL &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 4248 &amp;nbsp;1267 &amp;nbsp; &amp;nbsp;1.31 &amp;nbsp;3.352802&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;13 &amp;nbsp;ActionScript &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 2609 &amp;nbsp;1104 &amp;nbsp; &amp;nbsp;0.81 &amp;nbsp;2.363225&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;14 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Erlang &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 2520 &amp;nbsp; 532 &amp;nbsp; &amp;nbsp;0.78 &amp;nbsp;4.736842&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;15 &amp;nbsp; &amp;nbsp; &amp;nbsp; Haskell &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 2290 &amp;nbsp; 641 &amp;nbsp; &amp;nbsp;0.71 &amp;nbsp;3.572543&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;16 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Scala &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 2154 &amp;nbsp; 539 &amp;nbsp; &amp;nbsp;0.66 &amp;nbsp;3.996289&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;17 &amp;nbsp; &amp;nbsp; &amp;nbsp; Clojure &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 2082 &amp;nbsp; 481 &amp;nbsp; &amp;nbsp;0.64 &amp;nbsp;4.328482&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;18 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Lua &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 1754 &amp;nbsp; 511 &amp;nbsp; &amp;nbsp;0.54 &amp;nbsp;3.432485&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;19 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Groovy &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;870 &amp;nbsp; 261 &amp;nbsp; &amp;nbsp;0.27 &amp;nbsp;3.333333&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;20 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Scheme &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;707 &amp;nbsp; 140 &amp;nbsp; &amp;nbsp;0.22 &amp;nbsp;5.050000&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;21 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Go &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;398 &amp;nbsp; 103 &amp;nbsp; &amp;nbsp;0.12 &amp;nbsp;3.864078&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;22 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; OCaml &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;382 &amp;nbsp; 121 &amp;nbsp; &amp;nbsp;0.12 &amp;nbsp;3.157025&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;23 &amp;nbsp; Objective-J &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;355 &amp;nbsp; 109 &amp;nbsp; &amp;nbsp;0.11 &amp;nbsp;3.256881&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;24 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; D &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;197 &amp;nbsp; &amp;nbsp;64 &amp;nbsp; &amp;nbsp;0.06 &amp;nbsp;3.078125&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;25 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; R &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;191 &amp;nbsp; &amp;nbsp;69 &amp;nbsp; &amp;nbsp;0.06 &amp;nbsp;2.768116&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;26 &amp;nbsp; &amp;nbsp;ColdFusion &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;180 &amp;nbsp; &amp;nbsp;56 &amp;nbsp; &amp;nbsp;0.06 &amp;nbsp;3.214286&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;27 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Tcl &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;125 &amp;nbsp; &amp;nbsp;39 &amp;nbsp; &amp;nbsp;0.04 &amp;nbsp;3.205128&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;28 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; ooc &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;112 &amp;nbsp; &amp;nbsp;11 &amp;nbsp; &amp;nbsp;0.03 10.181818&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;29 &amp;nbsp; &amp;nbsp; &amp;nbsp; FORTRAN &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 93 &amp;nbsp; &amp;nbsp;47 &amp;nbsp; &amp;nbsp;0.03 &amp;nbsp;1.978723&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;30 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; ASP &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 88 &amp;nbsp; &amp;nbsp;35 &amp;nbsp; &amp;nbsp;0.03 &amp;nbsp;2.514286&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;31 &amp;nbsp; &amp;nbsp; Smalltalk &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 80 &amp;nbsp; &amp;nbsp;14 &amp;nbsp; &amp;nbsp;0.02 &amp;nbsp;5.714286&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;32 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;HaXe &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 75 &amp;nbsp; &amp;nbsp;14 &amp;nbsp; &amp;nbsp;0.02 &amp;nbsp;5.357143&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;33 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;F# &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 74 &amp;nbsp; &amp;nbsp; 5 &amp;nbsp; &amp;nbsp;0.02 14.800000&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;34 &amp;nbsp; &amp;nbsp; &amp;nbsp; Verilog &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 74 &amp;nbsp; &amp;nbsp;26 &amp;nbsp; &amp;nbsp;0.02 &amp;nbsp;2.846154&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;35 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;VHDL &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 64 &amp;nbsp; &amp;nbsp;14 &amp;nbsp; &amp;nbsp;0.02 &amp;nbsp;4.571429&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;36 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Io &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 57 &amp;nbsp; &amp;nbsp; 3 &amp;nbsp; &amp;nbsp;0.02 19.000000&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;37 SuperCollider &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 53 &amp;nbsp; &amp;nbsp;11 &amp;nbsp; &amp;nbsp;0.02 &amp;nbsp;4.818182&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;38 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Arc &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 48 &amp;nbsp; &amp;nbsp;15 &amp;nbsp; &amp;nbsp;0.01 &amp;nbsp;3.200000&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;39 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Delphi &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 43 &amp;nbsp; &amp;nbsp;16 &amp;nbsp; &amp;nbsp;0.01 &amp;nbsp;2.687500&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;40 &amp;nbsp; &amp;nbsp; &amp;nbsp;Assembly &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 41 &amp;nbsp; &amp;nbsp; 5 &amp;nbsp; &amp;nbsp;0.01 &amp;nbsp;8.200000&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;41 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Boo &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 41 &amp;nbsp; &amp;nbsp; 6 &amp;nbsp; &amp;nbsp;0.01 &amp;nbsp;6.833333&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;42 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Nu &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 40 &amp;nbsp; &amp;nbsp; 4 &amp;nbsp; &amp;nbsp;0.01 10.000000&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;43 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Eiffel &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 39 &amp;nbsp; &amp;nbsp;15 &amp;nbsp; &amp;nbsp;0.01 &amp;nbsp;2.600000&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;44 &amp;nbsp;CoffeeScript &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 34 &amp;nbsp; &amp;nbsp; 2 &amp;nbsp; &amp;nbsp;0.01 17.000000&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;45 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Vala &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 27 &amp;nbsp; &amp;nbsp; 3 &amp;nbsp; &amp;nbsp;0.01 &amp;nbsp;9.000000&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;46 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Racket &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 20 &amp;nbsp; &amp;nbsp; 8 &amp;nbsp; &amp;nbsp;0.01 &amp;nbsp;2.500000&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;47 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Self &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;7 &amp;nbsp; &amp;nbsp; 3 &amp;nbsp; &amp;nbsp;0.00 &amp;nbsp;2.333333&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;48 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Duby &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;4 &amp;nbsp; &amp;nbsp; 0 &amp;nbsp; &amp;nbsp;0.00 &amp;nbsp; &amp;nbsp; &amp;nbsp; Inf&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;49 &amp;nbsp; &amp;nbsp; &amp;nbsp; Max/MSP &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;4 &amp;nbsp; &amp;nbsp; 2 &amp;nbsp; &amp;nbsp;0.00 &amp;nbsp;2.000000&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;50 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;sclang &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;2 &amp;nbsp; &amp;nbsp; 0 &amp;nbsp; &amp;nbsp;0.00 &amp;nbsp; &amp;nbsp; &amp;nbsp; Inf&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;51 &amp;nbsp; Common Lisp &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;0 &amp;nbsp; &amp;nbsp; 0 &amp;nbsp; &amp;nbsp;0.00 &amp;nbsp; &amp;nbsp; &amp;nbsp; NaN&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;52 &amp;nbsp; &amp;nbsp;Emacs Lisp &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;0 &amp;nbsp; &amp;nbsp; 0 &amp;nbsp; &amp;nbsp;0.00 &amp;nbsp; &amp;nbsp; &amp;nbsp; NaN&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;53 &amp;nbsp; &amp;nbsp; Pure Data &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;0 &amp;nbsp; &amp;nbsp; 0 &amp;nbsp; &amp;nbsp;0.00 &amp;nbsp; &amp;nbsp; &amp;nbsp; NaN&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;54 &amp;nbsp;Visual Basic &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;0 &amp;nbsp; &amp;nbsp; 0 &amp;nbsp; &amp;nbsp;0.00 &amp;nbsp; &amp;nbsp; &amp;nbsp; NaN&lt;/span&gt;&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-788296136388684696?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/788296136388684696/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/08/github-stats-on-programming-languages.html#comment-form' title='13 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/788296136388684696'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/788296136388684696'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/08/github-stats-on-programming-languages.html' title='GitHub Stats on Programming Languages'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/_FsLa1cMTCWU/TGBlqQEs5yI/AAAAAAAAAXM/HkwNFJnohoI/s72-c/top_10_github_languages_by_reps.png' height='72' width='72'/><thr:total>13</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-7882648119500402627</id><published>2010-08-07T14:11:00.000-07:00</published><updated>2010-08-07T14:52:25.291-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><category scheme='http://www.blogger.com/atom/ns#' term='Ruby'/><category scheme='http://www.blogger.com/atom/ns#' term='Data Mining'/><title type='text'>Iris Data Set Visualization Web App in &lt; 100 LOC</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TF3Dku0BncI/AAAAAAAAAWE/W8dXlMOJvVg/s1600/sepal_length_width_species1.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TF3Dku0BncI/AAAAAAAAAWE/W8dXlMOJvVg/s400/sepal_length_width_species1.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;table&gt;&lt;tbody&gt;&lt;tr&gt;     &lt;td&gt;&lt;br /&gt;&lt;iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?lt1=_blank&amp;amp;bc1=000000&amp;amp;IS2=1&amp;amp;bg1=FFFFFF&amp;amp;fc1=000000&amp;amp;lc1=0000FF&amp;amp;t=rcha-20&amp;amp;o=1&amp;amp;p=8&amp;amp;l=as1&amp;amp;m=amazon&amp;amp;f=ifr&amp;amp;md=10FE9736YVPPT7A0FBG2&amp;amp;asins=0321321367" style="height: 240px; width: 120px;"&gt;&lt;/iframe&gt;&lt;/td&gt;     &lt;td&gt;&lt;br /&gt;The &lt;a href="http://en.wikipedia.org/wiki/Iris_flower_data_set"&gt;iris data set&lt;/a&gt; pops up pretty regularly in statistical literature. &amp;nbsp;It consists of 50 records from three species of Iris flowers (Iris setosa, Iris virginica and Iris versicolor). &amp;nbsp; I came across it recently while reading &lt;a href="http://www.amazon.com/dp/0321321367?tag=rcha-20&amp;amp;camp=213381&amp;amp;creative=390973&amp;amp;linkCode=as4&amp;amp;creativeASIN=0321321367&amp;amp;adid=1G4GRMHGAPAB51F5PVWH&amp;amp;"&gt;Introduction to Data Mining&lt;/a&gt;. &amp;nbsp; It comes up in several places in the book to demonstrate techniques for visualization and classification. &amp;nbsp;&lt;span class="Apple-style-span" style="-webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px;"&gt;There has been a number of articles, posts and videos about R and the web in recent times.&amp;nbsp;&lt;span class="Apple-style-span" style="-webkit-border-horizontal-spacing: 2px; -webkit-border-vertical-spacing: 2px;"&gt;&amp;nbsp;This post presents a way of creating some plots for the data set using R and a Ruby Web Application.&lt;/span&gt;&lt;/span&gt;&lt;/td&gt;   &lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;&lt;b&gt;R and the Web&lt;/b&gt;&lt;br /&gt;There are a number of situations where it would make sense to expose a data set and wrap a certain amount of R functionality within a web application. &amp;nbsp;Non-R users might need access to the data. &amp;nbsp;You might want to provide a presentation of findings available through the web. &amp;nbsp;You might even want to collaborate with other R developers by posting an HTML table that they can read in using XML. &amp;nbsp;In time, I expect that some standard web application frameworks will emerge to fill in the gap. &amp;nbsp;And so the current application is some “thinking out loud” on my part in that direction.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Prerequisites&lt;/b&gt;&lt;br /&gt;In order to run this application on your machine, R and Ruby must be installed and functional. &amp;nbsp; The R packages ggplot2, R2HTML, RServe are used as well as the iris data set. &amp;nbsp;This web app was written and tested in Windows, but should run in *nix with small modifications.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Ruby Configuration&lt;/b&gt;&lt;br /&gt;Install the package to allow communication with RServe&lt;br /&gt;&lt;br /&gt;&amp;nbsp;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp; &amp;nbsp;gem install rserve-client&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;On *nix systems folks often sudo to install.&lt;br /&gt;&lt;br /&gt;If for some reason you do not go the normal route of installing a gem (e.g. you downloaded from github at http://github.com/clbustos/Rserve-Ruby-client), make sure that your ruby $LOAD_PATH has the library available when you run the program.&lt;br /&gt;&lt;br /&gt;You can even do this in line in the ruby program by including a line like the following at the beginning of the program:&lt;br /&gt;&lt;br /&gt;$LOAD_PATH&amp;lt;&amp;lt;'C:\clbustos-Rserve-Ruby-client-v0.2.4-2-g47b0da7\lib' &lt;br /&gt;&lt;br /&gt;The application itself is available on &lt;a href="http://github.com/ezgraphs/iris_web_app"&gt;github&lt;/a&gt;. &lt;br /&gt;&lt;b&gt;Run the Web Application&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;Start Rserve. &lt;br /&gt;&amp;nbsp;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;C:\Program Files\R\R-2.10.1\library\Rserve&amp;gt; Rserve&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;You should see output like this if it started successfully.&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;Rserve: Ok, ready to answer queries.&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Start the Ruby web application - specify a port if you like with the -p option.&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;iris_data_set_webapp.rb -p 4445&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;With these steps complete, you should be able to hit the application at http://localhost:4445. &amp;nbsp;Three links are available. &amp;nbsp;The r_version link simply demonstrates that Ruby (through the sinatra framework and Rserve client) can communicate with R. &amp;nbsp;Clicking this link causes the version of R to display in the browser.&lt;br /&gt;&lt;br /&gt;The second link is to the iris data itself. &amp;nbsp;This page displays a formatted HTML table rendered using the R2HTML package. &amp;nbsp;Admittedly, this is a bit of a confusion of concerns (view information be provided by R) but it provided a convenient mechanism to convert a data frame to an HTML table.&lt;br /&gt;&lt;br /&gt;The third link allows you to modify the aesthetics of the plot. &amp;nbsp;Specifically, the x, y and color can be set to any of the available variables. &amp;nbsp;The result is a "grammatically&amp;nbsp;correct"&amp;nbsp;chart.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Code Walkthrough and Commentary&lt;/b&gt;&lt;br /&gt;The package declarations could be written out&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; require 'rubygems'&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; require 'sinatra'&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&amp;nbsp;&amp;nbsp;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt; &amp;nbsp;...&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Instead all of the packages are included in an array (surrounded by brackets). &amp;nbsp;Then each require directive is issued as we iterate through each element in the array..&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;['rubygems', 'sinatra', 'rserve','fileutils','haml'].each{|r|require r}&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;The packages being used are&lt;br /&gt;&lt;br /&gt;&lt;i&gt;rubygems -&amp;nbsp;&lt;/i&gt;the ruby packaging system itself&lt;br /&gt;&lt;i&gt;sinatra -&amp;nbsp;&lt;/i&gt;a minimal web app DSL&lt;br /&gt;&lt;i&gt;rserve -&amp;nbsp;&lt;/i&gt;to integrate with R&lt;br /&gt;&lt;i&gt;fileutils &amp;nbsp;-&amp;nbsp;&lt;/i&gt;some convenience methods for file system access&lt;br /&gt;&lt;i&gt;haml &lt;/i&gt;- well, this one requires some explanation...&lt;br /&gt;&lt;br /&gt;&lt;a href="http://haml-lang.com/"&gt;HAML &lt;/a&gt;is one of the many Ruby mark up/templating languages that is in vogue today among Rubyists. &amp;nbsp;It seems to save a few keystrokes from writing straight HTML, but it slows me down since I think in HTML and end up working backwards to writing the HAML. &amp;nbsp;I kind of&amp;nbsp;like the pythonesque interpretation of indentation being meaningful and that the HTML looks pretty.&lt;br /&gt;&lt;br /&gt;Anyway, it is used here but I am still on the fence about it. &lt;br /&gt;&lt;br /&gt;To experiment with haml using irb, just require haml, create an engine and output the results to HTML.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;require 'haml'&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;Haml::Engine.new('%h3 hello world').to_html&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Back to the web app. &amp;nbsp;Create a global connection to Rserve.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;include Rserve&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;$c = Connection.new&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&amp;nbsp;The following lines kinda-sorta reload Sinatra most if the time which allows &amp;nbsp;you to change code and view the changes without starting and stopping the server. &amp;nbsp;Only it does not always work &amp;nbsp;:) …but it works enough for me that I included it and just restart if things are not updating the way I expect.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;configure do&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;Sinatra::Application.reset!&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;use Rack::Reloader&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;end&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;This looks at a line that comes from the web app source file itself. &amp;nbsp;Yep, kind of wild (echoes of &lt;a href="http://camping.rubyforge.org/"&gt;camping&lt;/a&gt;). &amp;nbsp;If the line matches the regexp &amp;nbsp;and is one of the get functions below (other than the index itself), we pull out the url path and slap it in an HTML anchor. &amp;nbsp;This is a convenient way to have a home screen during development where each get URL can be invoked.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;def anchor(line)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;if line=~/get \'\/([a-z|A-Z])/&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; l=line.split[1].gsub("'",''); &lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; haml "%a{:href =&amp;gt; '#{l}'}&amp;gt; #{l} \n%br" &amp;nbsp; &amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;end &lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;end&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Return a string of html with an heading that says “Links” and a link to each “get” URL available in the web application. &amp;nbsp;The list of links is generated by reading the contents of this file, and creating a hyperlink (if possible) using the “anchor” method above.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;get '/' do&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;html=haml '%h3 Links'&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;File.open(__FILE__).readlines.each{|l|html+=anchor(l).to_s}&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;html&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;end&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;This is a simple example of how integration with R works. &amp;nbsp; The connection to RServe named $c is sent a string of R code to evaluate. &amp;nbsp;We expect a single result that we interpret as a string.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;get '/r_version' do &lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;$c.eval("R.version.string").as_string &amp;nbsp; &lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;end&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;This method creates an R script, evaluates it and returns a link to an image that will appear in the public directory that is in the same directory with this file. &amp;nbsp;The &amp;lt; &amp;lt; SCRIPT syntax is sometimes called a heredoc. &amp;nbsp;It is just a convenient way to create multi line strings - you could use double quotes in this context as well. &amp;nbsp;The variables x, y and color that are passed in are substituted &amp;nbsp;where you see #{x}, #{y}, #{color}.&lt;script a="" called="" is="" sometimes="" syntax=""&gt;&lt;a href="http://en.wikipedia.org/wiki/Here_document"&gt;heredoc&lt;/a&gt;.&lt;span style="mso-spacerun:yes"&gt;&amp;nbsp; &lt;/span&gt;It is just a convenient way to create multiline strings - you could use double quotes in this context as well.&lt;span style="mso-spacerun:yes"&gt;&amp;nbsp; &lt;/span&gt;The variables x, y and color that are passedin are substituted &lt;span style="mso-spacerun:yes"&gt;&amp;nbsp;&lt;/span&gt;where you see #{x},#{y}, #{color}.&lt;script #{color}.="" #{x},="" #{y},="" &amp;nbsp;it="" &amp;nbsp;the="" &amp;nbsp;where="" -="" a="" and="" are="" as="" called="" color="" context="" convenient="" could="" create="" double="" heredoc.="" in="" is="" just="" line="" multi="" passed="" quotes="" see="" sometimes="" strings="" substituted="" syntax="" that="" this="" to="" use="" variables="" way="" well.="" x,="" y="" you=""&gt;&lt;/script&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;def irisplot(x,y,color)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp;script= &amp;lt; &amp;lt; SCRIPT&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;library(ggplot2)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;ggplot(&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;data=iris,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; aes(x=#{x}, y=#{y}, color=#{color})&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; ) + geom_point()&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; ggsave('#{FileUtils.pwd}/public/irisplot.png')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;SCRIPT&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;$c.eval(script)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;" &amp;lt; img src='irisplot.png' width='600', height='600' &amp;gt; "&lt;br /&gt;&amp;nbsp;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;end&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;&lt;i&gt;Note: The spaces between the less than signs for the HEREDOC are artificial - they were required because blogger was not correctly interpreting them together. &amp;nbsp;Similar problem with the image tag - I just added spaces to prevent rendering issues.&lt;/i&gt;&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;This example demonstrates how to open an independent R script and run it. &amp;nbsp;See the iris.R script itself for more information about what is going on. &amp;nbsp;In general, the R2HTML package is being used to create a file whos handle is returned. &amp;nbsp;We then read the contents&amp;nbsp;of the file in and these are returned as HTML. &amp;nbsp;The contents of the file are an HTML table that represents the iris data frame.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;get '/iris_data' do &amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;url=$c.eval(File.open('iris.R').readlines.join("\n")).as_string&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp;File.open(url).readlines()&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;end&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;This page can be scraped using R and two lines of code. &amp;nbsp;You could read this data into R running on another computer on the network:&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;library(XML)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;df=readHTMLTable('http://nameofmachine:4445/iris')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;The following creates an iris plot using the parameters passed in.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;post '/plot' do&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;irisplot(params['x'],params['y'],params['color'])&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;end&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Finally, returns the form that allows you to input which fields are used to create a plot for the iris data set.&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;get '/iris_plot_input' do&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;# Retrieve the iris data set column names into a ruby class variable.&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;# These will be used to populate dropdowns.&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;@colnames=$c.eval('data(iris);colnames(iris);').as_strings&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;# WARNING Hard Coded Defaults below. &amp;nbsp;I used these so that&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;# we would have reasonable values by default.&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;# Put all of the HAML markup in a string&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;html=&amp;lt;&lt;haml&gt;&lt;/haml&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;%form{ :action =&amp;gt; "/plot", :method =&amp;gt; "post"}&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;%table&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp;%tr&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp;%td&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;%label{:for =&amp;gt; "name"} x:&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp;%td &amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;%select{:name=&amp;gt;'x'} &amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;= @colnames.each do |col|&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;%option{:value=&amp;gt; col, :selected =&amp;gt; (col == 'Sepal.Length')}&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;=col&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp;%tr&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp;%td &amp;nbsp; &amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;%label{:for =&amp;gt; "name"} y: &amp;nbsp; &amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp;%td &amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;%select{:name=&amp;gt;'y'} &amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;= @colnames.each do |col|&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;%option{:value=&amp;gt; col, :selected =&amp;gt; (col == 'Sepal.Width')}&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;=col&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp;%tr&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp;%td &amp;nbsp; &amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;%label{:for =&amp;gt; "name"} color:&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp;%td &amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;%select{:name=&amp;gt;'color'} &amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;= @colnames.each do |col|&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;%option{:value=&amp;gt; col, :selected =&amp;gt; (col == 'Species')}&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;=col&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp;%tr&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp;%td&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;%input{:type =&amp;gt; "submit", :value =&amp;gt; "Create Plot"}&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;HAML&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;# Render it with the HAML engine&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;haml html&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;end&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Examples produced by the app:&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TF3VZCGq35I/AAAAAAAAAWs/7UpiPNEkOsA/s1600/sepal_width_length_species2.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TF3VZCGq35I/AAAAAAAAAWs/7UpiPNEkOsA/s400/sepal_width_length_species2.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TF3VfHbIHbI/AAAAAAAAAW0/tcyJTDLkYgk/s1600/petal_width_length_species4.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TF3VfHbIHbI/AAAAAAAAAW0/tcyJTDLkYgk/s400/petal_width_length_species4.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TF3Vj3uoqBI/AAAAAAAAAW8/BNuQYv7oXXo/s1600/species_petal_length_width5.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TF3Vj3uoqBI/AAAAAAAAAW8/BNuQYv7oXXo/s400/species_petal_length_width5.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;This application is amazing in that it simply pulls together some of the best programming resources around. &amp;nbsp;In well under 100 lines of code it is simple and easy to maintain. &amp;nbsp;With that in mind, I have been thinking about other directions that could be used to generalize this approach. &amp;nbsp;One is simply to bundle Sinatra with R (perhaps using JRuby). &amp;nbsp;Sinatra web apps could then be dynamically based upon data sets (kind of like the current app) or around R functions (kind of like the fgui package). &amp;nbsp;It seems like Hadley Wickham had a similar idea first and has a r&lt;a href="http://github.com/hadley/sinartra"&gt;elated project on Github&lt;/a&gt;. His approach is to port Sinatra to R so that web apps could be developed in R without the use of another language such as Ruby.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;script&gt;&lt;/script&gt;&lt;/span&gt;&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-7882648119500402627?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/7882648119500402627/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/08/iris-data-set-visualization-web-app-in.html#comment-form' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/7882648119500402627'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/7882648119500402627'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/08/iris-data-set-visualization-web-app-in.html' title='Iris Data Set Visualization Web App in &lt; 100 LOC'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/_FsLa1cMTCWU/TF3Dku0BncI/AAAAAAAAAWE/W8dXlMOJvVg/s72-c/sepal_length_width_species1.png' height='72' width='72'/><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-3755972341829489455</id><published>2010-08-04T14:59:00.000-07:00</published><updated>2010-08-05T13:50:38.450-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><category scheme='http://www.blogger.com/atom/ns#' term='Maps'/><title type='text'>U.S. Unemployment Data: Animated Choropleth Maps</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TFncpRRvDzI/AAAAAAAAAVE/IK2MU9MNrdo/s1600/2010_unemployment.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;br /&gt;&lt;img border="0" height="307" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TFncpRRvDzI/AAAAAAAAAVE/IK2MU9MNrdo/s400/2010_unemployment.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;I was curious about creating maps with ggplot2 - and was happy to find that great minds have long since tackled this topic. &amp;nbsp;Folks who are interested in how to learn about R (and ggplot2) might be interested in the process. &amp;nbsp;(There should be a "greatest hits" listing related to R topics like this - the best I can think of to do is revive them in a forum like this).&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;After looking through the ggplot2 book (which had some simple examples) and the online documentation, I did a few searches and came across the topic on &lt;a href="http://stackoverflow.com/questions/1260965/developing-geographic-thematic-maps-with-r"&gt;stackoverflow&lt;/a&gt;. &amp;nbsp;At the end of this post there is a reference to previous discussions in the R community. &amp;nbsp;It seems that an article on&amp;nbsp;&lt;a href="http://flowingdata.com/2009/11/12/how-to-make-a-us-county-thematic-map-using-free-tools/"&gt;Flowing Data&lt;/a&gt; (using Python) inspired a bit of discussion of implementations of &lt;a href="http://en.wikipedia.org/wiki/Choropleth_map"&gt;choropleth maps&lt;/a&gt; with R. &amp;nbsp;And so a challenge was issued to &lt;a href="http://blog.revolutionanalytics.com/2009/11/choropleth-map-r-challenge.html"&gt;do this in R&lt;/a&gt;. &amp;nbsp;&lt;a href="http://blog.revolutionanalytics.com/2009/11/choropleth-challenge-result.html"&gt;Several solutions&lt;/a&gt; were created and reviewed - and I decided to replicate the first response. &amp;nbsp;Fortunately Hadley Wickham included his solution in his &lt;a href="http://gist.github.com/233134"&gt;github repository&lt;/a&gt;. &amp;nbsp;A slightly modified version of his original chart is shown below.&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TFncpRRvDzI/AAAAAAAAAVE/IK2MU9MNrdo/s1600/2010_unemployment.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TFncj8IeAII/AAAAAAAAAU8/KvK9VocDmh8/s1600/2009_unemployment.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="307" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TFncj8IeAII/AAAAAAAAAU8/KvK9VocDmh8/s400/2009_unemployment.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;br /&gt;My revised version included a title, legend name and call to coord_map. &amp;nbsp;I followed the same process as the original challenge - downloaded &lt;a href="http://www.bls.gov/lau/laucntycur14.zip"&gt;the data&lt;/a&gt; from the Bureau of Labor Statistics and saved the relevant sheet as a csv.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;m=ggplot(choropleth, aes(long, lat, group = group)) +&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; geom_polygon(aes(fill = rate_d), colour = alpha("white", 1/2), size = 0.2) +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; geom_polygon(data = state_df, colour = "white", fill = NA) +&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; scale_fill_brewer(pal = "PuRd", name="Rate")&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;m + coord_map() + opts(title="US Unemployment Data (2009)")&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Worked like a charm. &amp;nbsp;Note that there are some known problems with this solution. &amp;nbsp;Some additional mapping/data cleanup is required to get a complete view of the data set (which explains why&amp;nbsp;Louisiana&amp;nbsp;is missing). &amp;nbsp;I thought it would be neat to animate the results. &amp;nbsp;So I subcontracted out animation using Gimp to a local expert. &amp;nbsp;He created the following - a quick attempt to illustrate the changes over time. &amp;nbsp;It is not ideal, you kind of have to pick a point stare at it and watch for changes over time.&lt;br /&gt;&lt;br /&gt;Incidentally, blogger does not support animated gifs directly,&lt;s&gt; so I added the image in Google docs and linked to it from there&lt;/s&gt;. &amp;nbsp;This does not seem to work due to authentication issues. &amp;nbsp;&lt;a href="http://picasaweb.google.com/lh/photo/QPGNui3NE6-YVALn1gJSqms5uul4AZf9bJTIjyrZFWc?feat=directlink"&gt;Uploaded it to Picasa&lt;/a&gt; and you can view it there instead.&lt;br /&gt;&lt;br /&gt;I know that there is an &lt;a href="http://cran.r-project.org/web/packages/animation/index.html"&gt;R animation package&lt;/a&gt;, but was not sure of its applicability in this scenario. &amp;nbsp;How would you create an animation that illustrates changes on a map over time?&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-3755972341829489455?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/3755972341829489455/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/08/us-unemployment-data-animated.html#comment-form' title='5 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/3755972341829489455'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/3755972341829489455'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/08/us-unemployment-data-animated.html' title='U.S. Unemployment Data: Animated Choropleth Maps'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/_FsLa1cMTCWU/TFncpRRvDzI/AAAAAAAAAVE/IK2MU9MNrdo/s72-c/2010_unemployment.png' height='72' width='72'/><thr:total>5</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-7742982902869283942</id><published>2010-08-03T14:52:00.000-07:00</published><updated>2010-08-03T14:52:46.076-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='iPhone'/><title type='text'>Suggestions for R-Chart iPhone App?</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TFNMeDMcNDI/AAAAAAAAATs/FZniE1o50N8/s1600/r-chart-iphone.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TFNMeDMcNDI/AAAAAAAAATs/FZniE1o50N8/s320/r-chart-iphone.png" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;Have any ideas for news sources to include in &amp;nbsp;the new &lt;a href="http://www.r-chart.com/2010/07/free-r-chart-iphone-app.html"&gt;R-Chart iPhone App&lt;/a&gt;? &amp;nbsp;I am hoping to make some revisions and fixes and figured I would get some feedback from anyone who happened download it. &amp;nbsp;Specifically, I am planning on removing the message button and was thinking of including another RSS feed. &amp;nbsp;What news sources on R would you like to see included? &amp;nbsp;I am considering adding CRAN...although these updates already show up under the Twitter listing. &amp;nbsp;Any other ideas?&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-7742982902869283942?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/7742982902869283942/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/08/suggestions-for-r-chart-iphone-app.html#comment-form' title='5 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/7742982902869283942'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/7742982902869283942'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/08/suggestions-for-r-chart-iphone-app.html' title='Suggestions for R-Chart iPhone App?'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/_FsLa1cMTCWU/TFNMeDMcNDI/AAAAAAAAATs/FZniE1o50N8/s72-c/r-chart-iphone.png' height='72' width='72'/><thr:total>5</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-1053317135683326624</id><published>2010-08-03T14:38:00.000-07:00</published><updated>2010-08-03T14:38:55.738-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Oracle'/><category scheme='http://www.blogger.com/atom/ns#' term='RODM'/><title type='text'>RODM Article on the Oracle Technology Network</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TFiLrQrYphI/AAAAAAAAAUs/7CF-Vu6AIl4/s1600/rodm_data_miner.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TFiLrQrYphI/AAAAAAAAAUs/7CF-Vu6AIl4/s320/rodm_data_miner.PNG" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;A &lt;a href="http://www.oracle.com/technetwork/articles/datawarehouse/saternos-r-161569.html"&gt;new article&lt;/a&gt; over at OTN demonstrates the new &lt;a href="http://cran.fhcrc.org/web/packages/RODM/index.html"&gt;RODM package&lt;/a&gt; that allows you to control Oracle Data Mining within the database itself using R. &amp;nbsp;This and the recent announcement over at &lt;a href="http://www.revolutionanalytics.com/news-events/news-room/2010/revolution-analytics-brings-big-data-analysis-to-R.php"&gt;Revolution Analytics&lt;/a&gt;&amp;nbsp;both reveal more solutions for processing large data sets and suggest new applications for the use of R.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-1053317135683326624?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/1053317135683326624/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/08/rodm-article-on-oracle-technology.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/1053317135683326624'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/1053317135683326624'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/08/rodm-article-on-oracle-technology.html' title='RODM Article on the Oracle Technology Network'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/_FsLa1cMTCWU/TFiLrQrYphI/AAAAAAAAAUs/7CF-Vu6AIl4/s72-c/rodm_data_miner.PNG' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-4289902767273984878</id><published>2010-07-30T16:10:00.000-07:00</published><updated>2010-07-31T09:37:24.961-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><title type='text'>Thinking about Graphs</title><content type='html'>&lt;div class="MsoNormal"&gt;&lt;div style="text-align: center;"&gt;&lt;img src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TFNaOmNO0XI/AAAAAAAAAUU/kZ-7u_WdzQw/s400/scatterplot_abline_line_best_fit.png" /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;A &lt;a href="http://online.wsj.com/article/SB10001424052748703467304575383131592767868.html?mod=WSJ_LifeStyle_Lifestyle_5"&gt;recent Wall Street Journal article&lt;/a&gt; ruminated about the degree that language shapes thought (rather than the other way around). &amp;nbsp;This idea has rather profound implications in the more specific domain of programming languages. We initially learn a programming language but later “think” in terms of the language.&amp;nbsp;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;To some degree, we are constrained in our ability to solve problems if we only know a single language. This situation has been recognized different ways by the programming community. T&lt;a href="http://en.wikipedia.org/wiki/Logo_(programming_language)"&gt;he Logo programming language&lt;/a&gt; was built based upon &lt;a href="http://en.wikipedia.org/wiki/Constructionist_learning"&gt;constructionist learning theory&lt;/a&gt; and was intended to provide a “mental model” for children to come to understand mathematical constructs. In recent times, many programmers have committed to being polyglots, learning new languages as a part of professional development. Their concern is not always to learn the latest language that they will need to work, but to find out new ways of conceptualizing problems and structuring solutions. This leads to a more subtle goal of ggplot2. &amp;nbsp;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;The &lt;a href="http://had.co.nz/ggplot2/"&gt;ggplot2 package&lt;/a&gt; is appealing because it makes it possible to quickly create appealing graphs and charts. However, it is based upon an underlying “grammar of graphics”. This “Grammar” serves a number of purposes. It provides a structure for the API implementation. The API is designed so that you specify what you want rather than how to create it.&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;Another, perhaps more subtle effect is that it also can influence the way that an R programmer thinks about creating a graph. With this in mind, it is helpful to “think through” the process of creating a chart in the terms presented by ggplot2 in a more disciplined fashion.&lt;/div&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-size: xx-large;"&gt;&lt;b&gt;Components of a Plot&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-family: 'Times New Roman'; font-size: 12pt;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;According&amp;nbsp;Hadley Wickham (the author of ggplot and the&amp;nbsp;&lt;a href="https://www.amazon.com/dp/0387981403?tag=rcha-20&amp;amp;camp=0&amp;amp;creative=0&amp;amp;linkCode=as1&amp;amp;creativeASIN=0387981403&amp;amp;adid=156K0MSSGCDDWNAG53QY&amp;amp;"&gt;ggplot book&lt;/a&gt;), the following components make up a plot:&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;/div&gt;&lt;ul&gt;&lt;li&gt;Data&lt;/li&gt;&lt;li&gt;Aesthetic Mappings&lt;/li&gt;&lt;li&gt;Geometric Objects&lt;/li&gt;&lt;li&gt;Statistical Transformations&lt;/li&gt;&lt;li&gt;Position Adjustment&lt;/li&gt;&lt;li&gt;Faceting&lt;/li&gt;&lt;li&gt;Coordinate System&lt;/li&gt;&lt;/ul&gt;&lt;div&gt;The &lt;a href="http://had.co.nz/ggplot2/"&gt;Reference Manual&lt;/a&gt;&amp;nbsp;is also organized around these components:&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;/div&gt;&lt;ul&gt;&lt;li&gt;Geoms (Geometric Objects)&lt;/li&gt;&lt;li&gt;Statistics (Statistical Transformations)&lt;/li&gt;&lt;li&gt;Scales&lt;/li&gt;&lt;li&gt;Coordinate System&lt;/li&gt;&lt;li&gt;Faceting&lt;/li&gt;&lt;li&gt;Position Adjustment&amp;nbsp;&lt;/li&gt;&lt;/ul&gt;&lt;div&gt;&lt;div class="MsoNormal"&gt;He has organized the material slightly differently in a &lt;a href="http://had.co.nz/vanderbilt-vis/2a-theory.pdf"&gt;presentation at Vanderbilt&lt;/a&gt;.&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;/div&gt;&lt;ul&gt;&lt;li&gt;Default Data Set&lt;/li&gt;&lt;li&gt;Set of Aesthetic Mappings&lt;/li&gt;&lt;li&gt;Multiple Layers (points, jittered points, box plots, histogram&lt;/li&gt;&lt;li&gt;Scale for Each Aesthetic&lt;/li&gt;&lt;li&gt;Faceting Specification&lt;/li&gt;&lt;li&gt;Coordinate System&lt;/li&gt;&lt;/ul&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;In this case a layer comprises several of the elements listed earlier.&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;/div&gt;&lt;ul&gt;&lt;li&gt;Data set and Aesthetic Mapping&lt;/li&gt;&lt;li&gt;Geometric Object.&lt;/li&gt;&lt;li&gt;Statistics&lt;/li&gt;&lt;li&gt;Position Adjustment&lt;/li&gt;&lt;/ul&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;Data is not included as a part of ggplot2.&amp;nbsp; In addition, algebra (from a component identified by Wilkinson) is not included as it in the realm of data transformation rather than actual chart creation.&amp;nbsp;&amp;nbsp;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;The individual components of the grammar are fairly well defined regardless of where they appear on a list. The possible interactions between the components are rather complex. The construction of traditional charts are defined by a distinct combination of components. For example, the combination of geom and a stat is significant. At other times, the coordinate system is a defining factor. (A &lt;span id="goog_616165035"&gt;&lt;/span&gt;&lt;a href="http://www.r-chart.com/2010/07/pie-charts-in-ggplot2.html"&gt;pie char&lt;span id="goog_616165036"&gt;&lt;/span&gt;t&lt;/a&gt; is a one column stacked bar chart that is mapped to a polar coordinate system).&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;br /&gt;&lt;b style="mso-bidi-font-weight: normal;"&gt;&lt;u&gt;Chart&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; Geom&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; Stat&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; Coordinate System&lt;o:p&gt;&lt;/o:p&gt;&lt;/u&gt;&lt;/b&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;Scatterplot&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; point&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;identity&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;cartesian&lt;/div&gt;&lt;div class="MsoNormal"&gt;Histogram&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; bar&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; bin&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; cartesian&lt;/div&gt;&lt;div class="MsoNormal"&gt;Pie Chart&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; bar&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; identity&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;polar&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-size: x-large;"&gt;&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-large;"&gt;&lt;b&gt;Iris Data Set&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;The&lt;a href="http://en.wikipedia.org/wiki/Iris_flower_data_set"&gt; iris data set&lt;/a&gt; is a &lt;a href="http://en.wikipedia.org/wiki/Data_set#Classic_data_sets"&gt;well known set&lt;/a&gt; of multivariate data introduced by Ronald Fisher in the 1930s. The first few rows of the set are as follows:&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;Sepal.Length Sepal.Width Petal.Length Petal.Width &amp;nbsp; &amp;nbsp;Species&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 5.1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 3.5&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1.4&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0.2&amp;nbsp; setosa&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;2&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 4.9&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 3.0&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1.4&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0.2&amp;nbsp; setosa&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;3&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 4.7&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 3.2&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1.3&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0.2&amp;nbsp; setosa&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;4&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 4.6&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 3.1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1.5&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0.2&amp;nbsp; setosa&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;5&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 5.0&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 3.6&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1.4&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0.2&amp;nbsp; setosa&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-size: x-small;"&gt;6&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 5.4&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 3.9&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;1.7&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0.4&amp;nbsp; setosa&lt;/span&gt;&lt;/div&gt;&lt;/span&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;The&amp;nbsp;charts below will use the following components:&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-size: 16px;"&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;ul&gt;&lt;li&gt;Data - iris data set&lt;/li&gt;&lt;li&gt;Mapping to aesthetic&lt;/li&gt;&lt;ul&gt;&lt;li&gt;x - Petal.Length&lt;/li&gt;&lt;li&gt;y - Petal.Width&lt;/li&gt;&lt;li&gt;Color - Species&lt;/li&gt;&lt;/ul&gt;&lt;li&gt;Geometric Object - point&lt;/li&gt;&lt;/ul&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;A scatterpoint that includes these components can be created using qplot. &amp;nbsp;&lt;b&gt;However, see Harlan's comment below (and this &lt;a href="http://www.harlan.harris.name/2010/03/ggplot-and-concepts-whats-right-and-whats-wrong/"&gt;his blog&lt;/a&gt; which I appear to be echoing) - this is probably not the best way to start if one wants to "think" in the grammar rather than simply produce a good looking graph in the smallest number of keystrokes.  Better to use ggplot as demonstrated later in the post.&lt;/b&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;library(ggplot2)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;qplot(Petal.Length, Petal.Width, data=iris, color=Species)&lt;/span&gt;&lt;/span&gt;&lt;o:p&gt;&lt;/o:p&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TFNYGX31ECI/AAAAAAAAAT0/L_TOlzksX8c/s1600/scatterplot.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TFNYGX31ECI/AAAAAAAAAT0/L_TOlzksX8c/s400/scatterplot.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;The main decision that needed to be made to construct this call was how to map the aesthetics.&amp;nbsp; It is important to consider whether each variable being mapped is discrete or continuous to create a meaningful (and not just grammatically correct) result.&amp;nbsp;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&lt;b style="mso-bidi-font-weight: normal;"&gt;&lt;u&gt;Discrete &amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; Continuous&lt;o:p&gt;&lt;/o:p&gt;&lt;/u&gt;&lt;/b&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;Color&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Distinct color&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; Gradient (red to blue)&lt;/div&gt;&lt;div class="MsoNormal"&gt;Size&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; Distinct steps&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; Radius based on value&lt;/div&gt;&lt;div class="MsoNormal"&gt;Shape&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; Distinct Shape&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp;N/A&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;We can even include more information by mapping another attribute (sepal area – derived from sepal length times width) to size.&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;qplot(Petal.Length,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;Petal.Width,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;data=iris,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;size=Sepal.Length * Sepal.Width,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;color=Species)&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TFNYr8lK7pI/AAAAAAAAAT8/3oo2mbAkfAY/s1600/scatterplot_color_size_shaper.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TFNYr8lK7pI/AAAAAAAAAT8/3oo2mbAkfAY/s400/scatterplot_color_size_shaper.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;A great deal of information can be encoded using the various data attributes.&amp;nbsp; The plot gives an some indication regarding the petal length and width (based upon the position), species (based upon color) and sepal area (based upon size).&amp;nbsp; However, not every value is clearly in view.&amp;nbsp; There are a few changes that might provide an indication that values might overlap.&amp;nbsp; A jitter is might be used.&amp;nbsp; A better alternative is to set an alpha value provides a degree of transparency. &amp;nbsp;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span style="font-family: 'Times New Roman'; font-size: 12pt;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span style="font-family: 'Times New Roman'; font-size: 12pt;"&gt;&lt;span class="Apple-style-span" style="font-size: medium;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;qplot(Petal.Length,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; Petal.Width,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; data=iris,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; size=Sepal.Length * Sepal.Width,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; color=Species,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&amp;nbsp;&amp;nbsp; alpha=0.3)&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;/span&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span style="font-family: 'Times New Roman'; font-size: 12pt;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span style="font-family: 'Times New Roman'; font-size: 12pt;"&gt;&lt;span class="Apple-style-span" style="font-size: medium;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TFNY-SCtt4I/AAAAAAAAAUE/MYkRBrcSRYo/s1600/scatterplot_color_size_shaper_alpha.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TFNY-SCtt4I/AAAAAAAAAUE/MYkRBrcSRYo/s400/scatterplot_color_size_shaper_alpha.png" width="400" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;We can be more explicit about what is going on using ggplot rather than qplot.&amp;nbsp; The basic scatterplot can be created and in this case will be stored in a variable.&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;div class="MsoNormal" style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;p = ggplot(data=iris,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; aes(Petal.Length,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; Petal.Width,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; color=Species)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; ) + geom_point()&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;With the original plot in a variable, we can add components and immediately see their effect as it is rendered.&amp;nbsp; A line might help discern a trend in the original scatterplot. When applying a stat, you need to – well - &amp;nbsp;think statistically.&amp;nbsp; Consider the following.&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;p + stat_abline()&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TFNZzrw51nI/AAAAAAAAAUM/FYcFGw_8Wvg/s1600/scatterplot_abline_default.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TFNZzrw51nI/AAAAAAAAAUM/FYcFGw_8Wvg/s400/scatterplot_abline_default.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;The line created doesn’t mean much – this is because it is simply a line with a slope of 1 and intercept of zero.&amp;nbsp; A more meaningful line can be created by determining the line of best fit.&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;coef(lm(Petal.Width ~ Petal.Length, data=iris))&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;o:p&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/o:p&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;o:p&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;# this returns&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/o:p&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;# (Intercept) Petal.Length &lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;# &amp;nbsp;-0.3630755&amp;nbsp;&amp;nbsp;&amp;nbsp; 0.4157554&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;p +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;stat_abline(intercept=-0.363,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;slope=0.416,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;color='purple')&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TFNaOmNO0XI/AAAAAAAAAUU/kZ-7u_WdzQw/s1600/scatterplot_abline_line_best_fit.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TFNaOmNO0XI/AAAAAAAAAUU/kZ-7u_WdzQw/s400/scatterplot_abline_line_best_fit.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;So calling a given stat did &lt;b style="mso-bidi-font-weight: normal;"&gt;&lt;i style="mso-bidi-font-style: normal;"&gt;something&lt;/i&gt;&lt;/b&gt; in this case.&amp;nbsp; To get it to do something meaningful required additional work. &amp;nbsp;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-size: x-large;"&gt;&lt;b&gt;Distinction between Grammar Components&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;The distinction between a statistic and geometric object is not always clear (at least in terms of the ggplot2 API).&amp;nbsp;&amp;nbsp; A line with a slope and intercept might be though of as a statistic or a geometric object.&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;p + geom_abline(intercept=-0.363,slope=0.416, color='purple')&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;Likewise a position adjustment (like a jitter) can be thought of as both a geometric and positional terms.&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;qplot(Petal.Length,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp;Petal.Width,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp;data=iris,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp;position='jitter') +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;geom_abline(intercept=-0.363, slope=0.416)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;qplot(Petal.Length, Petal.Width, data=iris) +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;geom_jitter()&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;I point out these idiosyncrasies because – as with many formal abstractions of real world concepts – edge cases exist.&amp;nbsp; For example, in &lt;a href="http://en.wikibooks.org/wiki/Music_Theory/Counterpoint/Species_Counterpoint/In_Two_Voices"&gt;Western music theory&lt;/a&gt;, one dutifully learns the rules of counterpoint&amp;nbsp;only to find out that they are not always observed by composers in practice and that certain constructs are not easily classified.&amp;nbsp; This doesn’t eliminate the usefulness to studying music theory. &amp;nbsp;It simply highlights the difficulty in neatly categorizing every aspect of a specific creation in an accurate an meaningful way.&amp;nbsp; And for what its worth, I think that Hadley Wickham as done a marvelous job – and appears to have taken an approach of providing an interface to underlying functionality when it appears in more than one category.&lt;/div&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;&lt;span style="font-family: 'Times New Roman'; font-size: 12pt;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-large;"&gt;&lt;b&gt;Order of Application&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;Note that the order in which geoms and stats are applied matters! &amp;nbsp;For instance:&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;p+geom_boxplot()&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TFNbLBm_xlI/AAAAAAAAAUc/I6JayI9VSzo/s1600/scatterplot_boxplot_overlay.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TFNbLBm_xlI/AAAAAAAAAUc/I6JayI9VSzo/s400/scatterplot_boxplot_overlay.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;The boxplot obscures the original points. &amp;nbsp;These can be added back on after applying the boxplot.&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;scap+geom_boxplot()+geom_point()&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TFNbUB0GTWI/AAAAAAAAAUk/0FMe4bFrL68/s1600/scatterplot_boxplot_underlay.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TFNbUB0GTWI/AAAAAAAAAUk/0FMe4bFrL68/s400/scatterplot_boxplot_underlay.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;This gives a glimpse of the flexibility and sophistication of the system. &amp;nbsp;The fundamental elements of chart design that comprise the grammar can be combined in new and flexible ways. &amp;nbsp;Not every grammatically correct possibility is aesthetically pleasing or accurate as interpreted by human perception. &amp;nbsp;But ggplot2 is worth learning not only for its own sake, but for the insights it can provide into the creative activity of constructing charts and graphs.&lt;/div&gt;&lt;div class="MsoNormal" style="tab-stops: 90.75pt;"&gt;&lt;span style="font-family: 'Times New Roman'; font-size: 12pt;"&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp;&lt;/span&gt;&lt;/div&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-4289902767273984878?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/4289902767273984878/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/07/thinking-about-graphs.html#comment-form' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/4289902767273984878'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/4289902767273984878'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/07/thinking-about-graphs.html' title='Thinking about Graphs'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/_FsLa1cMTCWU/TFNaOmNO0XI/AAAAAAAAAUU/kZ-7u_WdzQw/s72-c/scatterplot_abline_line_best_fit.png' height='72' width='72'/><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-6867274962361039198</id><published>2010-07-30T15:05:00.000-07:00</published><updated>2010-07-30T15:05:30.193-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='iPhone'/><title type='text'>Free R Chart iPhone App</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TFNMeDMcNDI/AAAAAAAAATs/FZniE1o50N8/s1600/r-chart-iphone.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TFNMeDMcNDI/AAAAAAAAATs/FZniE1o50N8/s400/r-chart-iphone.png" width="213" /&gt;&lt;/a&gt;&lt;/div&gt;Download the free&amp;nbsp;R-Chart iPhone Application&amp;nbsp;for the latest news and updates from top R web sites... (and this blog too).&lt;br /&gt;&lt;br /&gt;&lt;a href="itms://itunes.apple.com/us/app/r-chart/id384073723?mt=8"&gt;App Store Link&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Leave a comment if you have any feedback or suggestions!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3867310391951630980-6867274962361039198?l=www.r-chart.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://www.r-chart.com/feeds/6867274962361039198/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.r-chart.com/2010/07/free-r-chart-iphone-app.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/6867274962361039198'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3867310391951630980/posts/default/6867274962361039198'/><link rel='alternate' type='text/html' href='http://www.r-chart.com/2010/07/free-r-chart-iphone-app.html' title='Free R Chart iPhone App'/><author><name>C</name><uri>http://www.blogger.com/profile/02893688387818336028</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/_FsLa1cMTCWU/TFNMeDMcNDI/AAAAAAAAATs/FZniE1o50N8/s72-c/r-chart-iphone.png' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3867310391951630980.post-3046803104574035283</id><published>2010-07-29T12:32:00.000-07:00</published><updated>2010-07-29T12:32:06.923-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ggplot2'/><category scheme='http://www.blogger.com/atom/ns#' term='SQL'/><title type='text'>Pie Charts in ggplot2</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;i&gt;...and other isomorphic data shape presentations...&lt;/i&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;i&gt;&lt;br /&gt;&lt;/i&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TFHJVgmxV2I/AAAAAAAAATk/2-hAhNNk7ns/s1600/pie_chart3.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="173" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TFHJVgmxV2I/AAAAAAAAATk/2-hAhNNk7ns/s400/pie_chart3.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;The &lt;/span&gt;&lt;a href="http://en.wikipedia.org/wiki/Pie_chart"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;Pie Chart&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&amp;nbsp;has been widely&amp;nbsp;criticized in recent times by statisticians. &amp;nbsp;&lt;/span&gt;&lt;a href="http://www.edwardtufte.com/tufte/"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;Edward Tufte&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt; goes as far as to call this the "&lt;/span&gt;&lt;a href="http://www.edwardtufte.com/bboard/q-and-a-fetch-msg?msg_id=00018S"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;prevailing orthodoxy&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;." &amp;nbsp;The reasons generally cited:&lt;/span&gt;&lt;br /&gt;&lt;ul&gt;&lt;li&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;The relative size of each slice is difficult to interpret. &amp;nbsp;Studies have shown that piecharts are hard to read.&lt;/span&gt;&lt;/li&gt;&lt;li&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;Pie charts require too much space to present too little information.&lt;/span&gt;&lt;/li&gt;&lt;li&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;They are frequently are rendered in 3d (which makes the previous two issues worse).&lt;/span&gt;&lt;/li&gt;&lt;li&gt;&lt;span class="Apple-style-span" style="font-family: times, serif;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&amp;nbsp;There are better visualization alternatives. &amp;nbsp;For example, bar or point charts can display the same data&lt;/span&gt;&lt;span class="Apple-style-span"&gt;.&lt;/span&gt;&lt;/span&gt;&lt;/li&gt;&lt;/ul&gt;&lt;div&gt;&lt;table&gt;&lt;tbody&gt;&lt;tr&gt; &lt;td&gt;&lt;iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?t=rcha-20&amp;amp;o=1&amp;amp;p=8&amp;amp;l=as1&amp;amp;asins=0387245448&amp;amp;fc1=000000&amp;amp;IS2=1&amp;amp;lt1=_blank&amp;amp;m=amazon&amp;amp;lc1=0000FF&amp;amp;bc1=000000&amp;amp;bg1=FFFFFF&amp;amp;f=ifr" style="height: 240px; width: 120px;"&gt;&lt;/iframe&gt;&lt;/td&gt;      &lt;td&gt;The second chapter of &lt;a href="http://www.cs.uic.edu/~wilkinson/"&gt;Leland Wilkinson's&lt;/a&gt; "The Grammar of Graphics" is "How to make a Pie." &amp;nbsp;He is less critical of pie charts and in fact uses one as an example of how his&amp;nbsp;"grammar of graphics" provides a framework that defines the steps and process required&amp;nbsp;to construct one. &amp;nbsp;It is an interesting exercise that shows how the decisions&amp;nbsp;required to construct a pie chart can described in terms of an underlying grammar&amp;nbsp;that also can describe the construction of a wide variety of other visualizations.&lt;br /&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;His pie chart example is based upon survey that asked the question&amp;nbsp;"How often, if at all, do you think the peer review refereeing system for scholarly journals&amp;nbsp;in your field is biased in favor of the following categories of people?"&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;table&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td&gt;&lt;a href="http://had.co.nz/"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;Hadley Wickham's&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt; R package ggplot2 was created based upon Wilkinson's writings. &amp;nbsp;It also incorporates design principles championed by Edward Tufte. &amp;nbsp;&amp;nbsp;Pie charts are created by transforming a stacked bar chart using polar coordinates. &amp;nbsp;&lt;/span&gt;&lt;a href="http://had.co.nz/ggplot2/coord_polar.html"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;Polar coordinates&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt; are also used to create some other circular charts (like bullseye charts). &amp;nbsp;&lt;/span&gt;&lt;span class="Apple-style-span" style="-webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;The final chart creating using ggplot2 appears above. &amp;nbsp;In the ggplot2 book the following components are listed that make up a plot:&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;ul&gt;&lt;li&gt;Data&lt;/li&gt;&lt;li&gt;Aesthetic Mappings&lt;/li&gt;&lt;li&gt;Geometric Objects&lt;/li&gt;&lt;li&gt;Statistical Transformations&lt;/li&gt;&lt;li&gt;Position Adjustment&lt;/li&gt;&lt;li&gt;Faceting&lt;/li&gt;&lt;li&gt;Coordinate System&lt;/li&gt;&lt;/ul&gt;Each of these categories will be cited below along with its ggplot2 expression.&lt;br /&gt;&lt;br /&gt;&lt;ul&gt;&lt;/ul&gt;&lt;br /&gt;&lt;/td&gt;&lt;td&gt;&lt;br /&gt;&lt;iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?t=rcha-20&amp;amp;o=1&amp;amp;p=8&amp;amp;l=as1&amp;amp;asins=0387981403&amp;amp;fc1=000000&amp;amp;IS2=1&amp;amp;lt1=_blank&amp;amp;m=amazon&amp;amp;lc1=0000FF&amp;amp;bc1=000000&amp;amp;bg1=FFFFFF&amp;amp;f=ifr" style="height: 240px; width: 120px;"&gt;&lt;/iframe&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;On page 38 Wilkinson shows the final results in two pie charts. &amp;nbsp;A table that replicates the&amp;nbsp;responses that he presented is as follows.&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;Summary&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;Response&lt;/span&gt;&lt;span class="Apple-style-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;Gender&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;0.08&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;1&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;  &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;1&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;0.11&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;2&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;  &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;1&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;0.17&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;3&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;  &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;1&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;0.32&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;4&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;  &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;1&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;0.32&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;5&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;  &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;1&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;0.3&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;1&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;  &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;2&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;0.15&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;2&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;  &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;2&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;0.1&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;3&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;  &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;2&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;0.07&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;4&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;  &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;2&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;0.38&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt; &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;5&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;  &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;2&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: medium;"&gt;&lt;span class="Apple-style-span" style="font-size: large;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;span class="Apple-style-span"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-size: x-large;"&gt;Data&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;The following is the R code required to accomplish this. &amp;nbsp;I also used the sqldf package do replace numeric values with corresponding string values. (Not the most R-ish way of approaching the problem)&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;library(ggplot2)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;library(sqldf)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;df = read.csv('data.csv')&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;df=sqldf("select Summary,&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;CASE WHEN Gender==1 THEN 'Female'&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; WHEN Gender==2 THEN 'Male'&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;END gender,&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;CASE WHEN Response==1 THEN '1) rarely'&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; WHEN Response==2 THEN '2) infrequently'&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; WHEN Response==3 THEN '3) occasionally'&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; WHEN Response==4 THEN '4) frequently'&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; WHEN Response==5 THEN '5) not sure'&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp;END response from df")&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-size: x-large;"&gt;&lt;b&gt;Aesthetics Mappings&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;Now that we have a data frame with the data in the desired format, our initial intent is to create a stacked bar chart. &amp;nbsp;The chart will have a single column, so the x coordinate will be set to 1. &amp;nbsp;The y coordinate represents the amount reported in the "Summary" column. &amp;nbsp;The color (or fill) is based upon the response column.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;p = ggplot(data=df,&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; aes(x=factor(1),&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; y=Summary,&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; fill = factor(response)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp;),&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;)&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: x-large;"&gt;&lt;b&gt;Geometric Objects&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;The geometric object (or geom) in this case will be used to create a bar chart. In ggplot2 this is identified as &lt;a href="http://had.co.nz/ggplot2/geom_bar.html"&gt;geom_bar&lt;/a&gt;.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;p=p + geom_bar(width = 1)&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: x-large;"&gt;&lt;b&gt;Statistical Transformations&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;A statistical transformation (or stat) is used to transform or summarize the data. &amp;nbsp;We are not using one in this example.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-size: x-large;"&gt;Position Adjustment&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;A position adjustment is used to modify the position of displayed elements in some way. &amp;nbsp;For example, you can modify a stacked bar chart so that each column is the same height (and sections of a given bar are therefore proportional). &amp;nbsp;Again, these do not come into play in the current example.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-size: x-large;"&gt;Faceting&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;Faceting is used to represent each value for a given variable in its own chart. &amp;nbsp;In the current example, there will be two charts created - one for male and the other for female.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;p=p+facet_grid(facets=. ~ gender)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;At this point, we can stop and display the chart as it stands.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;p&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TFHDNSX2LkI/AAAAAAAAATM/KnGWfMyOdKA/s1600/stacked_bar_chart.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TFHDNSX2LkI/AAAAAAAAATM/KnGWfMyOdKA/s400/stacked_bar_chart.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;div&gt;So it is evident that there is a close relationship between stacked bar charts and pie charts indicated through the use of the grammar. &amp;nbsp;This is not obvious in systems where a pie chart is created by rendering a circle and modifying the image.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;p = p + coord_polar(theta="y")&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;p&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TFHEPAqBghI/AAAAAAAAATU/hApA1ettvNs/s1600/pie_chart1.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TFHEPAqBghI/AAAAAAAAATU/hApA1ettvNs/s400/pie_chart1.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;div&gt;The final few lines clean up the x and y labels and modify the title for the legend.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;p = p + xlab('') +&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;ylab('') +&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;labs(fill='Response') &amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;And this completes the chart &amp;nbsp;displayed at the top of this article. &amp;nbsp;You can also create a chart for each gender individually by limiting the data displayed in the frame and removing the faceting:&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;ggplot(data=df[df$gender=='Male',],&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; aes(x=factor(1),&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; y=Summary,&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; fill = factor(response))) +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;geom_bar(width = 1) +&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;coord_polar(theta="y") +&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;xlab('Males') +&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;ylab('') +&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;labs(fill='Response')&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;ggplot(data=df[df$gender=='Female',],&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nb
