This is an automated email from the ASF dual-hosted git repository.

git-site-role pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new da2ed38  Publishing website 2019/09/04 17:00:12 at commit d251702
da2ed38 is described below

commit da2ed38e5fe9402d76d691f1f733a91e9fbc49d0
Author: jenkins <bui...@apache.org>
AuthorDate: Wed Sep 4 17:00:13 2019 +0000

    Publishing website 2019/09/04 17:00:12 at commit d251702
---
 .../{index.html => blog/2019/09/04/gsoc-19.html}   | 299 +++++++--------------
 website/generated-content/blog/index.html          |  32 +++
 website/generated-content/feed.xml                 | 162 ++++++-----
 website/generated-content/index.html               |  10 +-
 4 files changed, 232 insertions(+), 271 deletions(-)

diff --git a/website/generated-content/index.html 
b/website/generated-content/blog/2019/09/04/gsoc-19.html
similarity index 53%
copy from website/generated-content/index.html
copy to website/generated-content/blog/2019/09/04/gsoc-19.html
index 57b28e2..f651080 100644
--- a/website/generated-content/index.html
+++ b/website/generated-content/blog/2019/09/04/gsoc-19.html
@@ -28,9 +28,8 @@
   <meta charset="utf-8">
   <meta http-equiv="X-UA-Compatible" content="IE=edge">
   <meta name="viewport" content="width=device-width, initial-scale=1">
-  <title>Apache Beam</title>
-  <meta name="description" content="Apache Beam is an open source, unified 
model and set of language-specific SDKs for defining and executing data 
processing workflows, and also data ingestion and integration flows, supporting 
Enterprise Integration Patterns (EIPs) and Domain Specific Languages (DSLs). 
Dataflow pipelines simplify the mechanics of large-scale batch and streaming 
data processing and can run on a number of runtimes like Apache Flink, Apache 
Spark, and Google Cloud Dataflow  [...]
-">
+  <title>Google Summer of Code &#39;19</title>
+  <meta name="description" content="Google Summer of Code was an amazing 
learning experience for me.I contributed to open source, learned about Apache 
Beam’s internals and worked with the best ...">
   <link href="https://fonts.googleapis.com/css?family=Roboto:100,300,400"; 
rel="stylesheet">
   <link rel="stylesheet" href="/css/site.css">
   <script src="https://code.jquery.com/jquery-2.2.4.min.js";></script>
@@ -39,7 +38,7 @@
   <script src="/js/fix-menu.js"></script>
   <script src="/js/section-nav.js"></script>
   <script src="/js/page-nav.js"></script>
-  <link rel="canonical" href="https://beam.apache.org/"; data-proofer-ignore>
+  <link rel="canonical" 
href="https://beam.apache.org/blog/2019/09/04/gsoc-19.html"; data-proofer-ignore>
   <link rel="shortcut icon" type="image/x-icon" href="/images/favicon.ico">
   <link rel="alternate" type="application/rss+xml" title="Apache Beam" 
href="https://beam.apache.org/feed.xml";>
   <link rel="stylesheet" 
href="https://use.fontawesome.com/releases/v5.4.1/css/all.css"; 
integrity="sha384-5sAR7xN1Nv6T6+dT2mhtzEpVJvfS3NScPQTrOxhwjIuvcA67KV2R5Jz6kr4abQsz"
 crossorigin="anonymous">
@@ -53,7 +52,7 @@
   </script>
 </head>
 
-  <body class="body body--index">
+  <body class="body ">
     <!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -141,7 +140,7 @@
             GitHub links will not resolve until the markdown source is 
available on the master branch.
             New pages would fail validation during development / PR test 
automation.
           -->
-          <a 
href="https://github.com/apache/beam/edit/master/website/src/index.md"; 
data-proofer-ignore>
+          <a 
href="https://github.com/apache/beam/edit/master/website/src/_posts/2019-09-04-gsoc-19.md";
 data-proofer-ignore>
             <i class="far fa-edit fa-lg" alt="Edit on GitHub" title="Edit on 
GitHub"></i>
           </a>
         </li>
@@ -151,6 +150,31 @@
 
     <div class="body__contained">
       <!--
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. See accompanying LICENSE file.
+-->
+
+
+
+<article class="post" itemscope itemtype="http://schema.org/BlogPosting";>
+
+  <header class="post-header">
+    <h1 class="post-title" itemprop="name headline">Google Summer of Code 
'19</h1>
+    <p class="post-meta"><time datetime="2019-09-04T01:00:01-07:00" 
itemprop="datePublished">Sep 4, 2019</time> •
+       Tanay Tummalapalli [<a 
href="https://twitter.com/ttanay100";>@ttanay100</a>]
+      
+    </p>
+  </header>
+
+  <div class="post-content" itemprop="articleBody">
+    <!--
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
@@ -163,207 +187,80 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
-<div class="hero-bg">
-  <div class="hero section">
-    <div class="hero__cols">
-      <div class="hero__cols__col">
-        <div class="hero__cols__col__content">
-          <div class="hero__title">
-            Apache Beam: An advanced unified programming model
-          </div>
-          <div class="hero__subtitle">
-            Implement batch and streaming data processing jobs that run on any 
execution engine.
-          </div>
-          <div class="hero__ctas hero__ctas--first">
-            <a class="button button--primary" 
href="/get-started/beam-overview/">Learn more</a>
-            <a class="button button--primary" 
href="/get-started/try-apache-beam/">Try Beam</a>
-            <a class="button button--primary" 
href="/get-started/downloads/">Download Beam SDK 2.15.0</a>
-          </div>
-          <div class="hero__ctas">
-            <a class="button" href="/get-started/quickstart-java/">Java 
Quickstart</a>
-            <a class="button" href="/get-started/quickstart-py/">Python 
Quickstart</a>
-            <a class="button" href="/get-started/quickstart-go/">Go 
Quickstart</a>
-          </div>
-        </div>
-      </div>
-      <div class="hero__cols__col">
-        <div class="hero__blog">
-          <div class="hero__blog__title">
-            The latest from the blog
-          </div>
-          <div class="hero__blog__cards">
-            
-            <a class="hero__blog__cards__card" 
href="/blog/2019/08/22/beam-2.15.0.html">
-              <div class="hero__blog__cards__card__title">Apache Beam 
2.15.0</div>
-              <div class="hero__blog__cards__card__date">Aug 22, 2019</div>
-            </a>
-            
-            <a class="hero__blog__cards__card" 
href="/blog/2019/07/31/beam-2.14.0.html">
-              <div class="hero__blog__cards__card__title">Apache Beam 
2.14.0</div>
-              <div class="hero__blog__cards__card__date">Jul 31, 2019</div>
-            </a>
-            
-            <a class="hero__blog__cards__card" 
href="/blog/2019/06/11/looping-timers.html">
-              <div class="hero__blog__cards__card__title">Looping timers in 
Apache Beam</div>
-              <div class="hero__blog__cards__card__date">Jun 11, 2019</div>
-            </a>
-            
-          </div>
-        </div>
-      </div>
-    </div>
-  </div>
-</div>
 
-<div class="pillars section">
-  <div class="pillars__title">
-    All about Apache Beam
-  </div>
-  <div class="pillars__cols">
-    
-    <div class="pillars__cols__col">
-      <div class="pillars__cols__col__title">
-        Unified
-      </div>
-      <div class="pillars__cols__col__body">
-        Use a single programming model for both batch and streaming use cases.
-      </div>
-    </div>
-    
-    <div class="pillars__cols__col">
-      <div class="pillars__cols__col__title">
-        Portable
-      </div>
-      <div class="pillars__cols__col__body">
-        Execute pipelines on multiple execution environments.
-      </div>
-    </div>
-    
-    <div class="pillars__cols__col">
-      <div class="pillars__cols__col__title">
-        Extensible
-      </div>
-      <div class="pillars__cols__col__body">
-        Write and share new SDKs, IO connectors, and transformation libraries.
-      </div>
-    </div>
-    
-  </div>
-</div>
+<p>Google Summer of Code was an amazing learning experience for me.
+I contributed to open source, learned about Apache Beam’s internals and worked 
with the best engineers in the world.</p>
 
-<div class="graphic section">
-<div class="graphic__image">
-<img src="/images/beam_architecture.png" alt="Beam architecture" />
-</div>
-</div>
+<!--more-->
 
-<div class="logos section">
-  <div class="logos__title">
-    Works with
-  </div>
-  <div class="logos__logos">
-    
-    <div class="logos__logos__logo">
-      <a href="http://apex.apache.org";><img src="/images/logo_apex.png" 
alt="APEX" /></a>
-    </div>
-    
-    <div class="logos__logos__logo">
-      <a href="http://flink.apache.org";><img src="/images/logo_flink.png" 
alt="Flink" /></a>
-    </div>
-    
-    <div class="logos__logos__logo">
-      <a href="http://spark.apache.org/";><img src="/images/logo_spark.png" 
alt="Spark" /></a>
-    </div>
-    
-    <div class="logos__logos__logo">
-      <a href="https://cloud.google.com/dataflow/";><img 
src="/images/logo_google_cloud.png" alt="Google Cloud Dataflow" /></a>
-    </div>
-    
-    <div class="logos__logos__logo">
-      <a href="http://gearpump.apache.org/";><img 
src="/images/logo_gearpump.png" alt="Gearpump" /></a>
-    </div>
-    
-    <div class="logos__logos__logo">
-      <a href="http://samza.apache.org/";><img src="/images/logo_samza.png" 
alt="Samza" /></a>
-    </div>
-    
-  </div>
-</div>
+<h2 id="motivation">Motivation</h2>
+<p>Two of my friends had participated in GSoC in 2018. I was intrigued by 
their experience.
+The idea of working on open-source software that could potentially be used by 
developers across the world, while being mentored by the best people in a field 
was exciting!
+So, I decided to give Google Summer of Code a shot this year.</p>
 
-<div class="cards section section--wide">
-  <div class="section__contained">
-    <div class="cards__title">
-      Testimonials
-    </div>
-    <div class="cards__cards">
-      
-      <div class="cards__cards__card">
-        <div class="cards__cards__card__body">
-          A framework that delivers the flexibility and advanced functionality 
our customers need.
-        </div>
-        <div class="cards__cards__card__user">
-          <!-- TODO: Implement icons.
-          <div class="cards__cards__card__user__icon">
-          </div>
-          -->
-          <div class="cards__cards__card__user__name">
-            –Talend
-          </div>
-        </div>
-      </div>
-      
-      <div class="cards__cards__card">
-        <div class="cards__cards__card__body">
-          Apache Beam has powerful semantics that solve real-world challenges 
of stream processing.
-        </div>
-        <div class="cards__cards__card__user">
-          <!-- TODO: Implement icons.
-          <div class="cards__cards__card__user__icon">
-          </div>
-          -->
-          <div class="cards__cards__card__user__name">
-            –PayPal
-          </div>
-        </div>
-      </div>
-      
-      <div class="cards__cards__card">
-        <div class="cards__cards__card__body">
-          Apache Beam represents a principled approach for analyzing data 
streams.
-        </div>
-        <div class="cards__cards__card__user">
-          <!-- TODO: Implement icons.
-          <div class="cards__cards__card__user__icon">
-          </div>
-          -->
-          <div class="cards__cards__card__user__name">
-            –data Artisans
-          </div>
-        </div>
-      </div>
-      
-    </div>
-    <div class="cards__body">
-      Beam is an open source community and contributions are greatly 
appreciated!
-      If you’d like to contribute, please see the <a 
href="/contribute/">Contribute</a> section.
-    </div>
-  </div>
-</div>
+<h2 id="what-is-google-summer-of-code">What is Google Summer of Code?</h2>
+<p><a href="https://summerofcode.withgoogle.com/";>Google Summer of Code</a> is 
a global program hosted by Google focused on introducing students to open 
source software development.
+Students work on a 3 month programming project with an open source 
organization during their break from university.</p>
+
+<h2 id="why-apache-beam">Why Apache Beam?</h2>
+<p>While interning at <a href="https://atlan.com/";>Atlan</a>, I discovered the 
field of Data Engineering. I found the challenges and the discussions of the 
engineers there interesting. While researching for my internship project, I 
came across the Streaming Systems book. It introduced me to the unified model 
of Apache Beam for Batch and Streaming Systems, which I was fascinated by.
+I wanted to explore Data Engineering, so for GSoC, I wanted to work on a 
project in that field. Towards the end of my internship, I started contributing 
to Apache Airflow(very cool project) and Apache Beam, hoping one of them would 
participate in GSoC. I got lucky!</p>
+
+<p><a href="https://youtu.be/U2eWLb-LD44";>Also, Spotify’s Discover Weekly uses 
Apache Beam!</a></p>
+
+<h2 id="preparation">Preparation</h2>
+<p>I had already read the <a href="http://streamingsystems.net/";>Streaming 
Systems book</a>. So, I had an idea of the concepts that Beam is built on, but 
had never actually used Beam.
+Before actually submitting a proposal, I went through a bunch of resources to 
make sure I had a concrete understanding of Beam.
+I read the <a 
href="https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-101";>Streaming
 101</a> and <a 
href="https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-102";>Streaming
 102</a> blogs by Tyler Akidau. They are the perfect introduction to Beam’s 
unified model for Batch and Streaming.
+In addition, I watched all Beam talks on YouTube. You can find them on the <a 
href="https://beam.apache.org/documentation/resources/videos-and-podcasts/";>Beam
 Website</a>.
+Beam has really good documentation. The <a 
href="https://beam.apache.org/documentation/programming-guide/";>Programming 
Guide</a> lays out all of Beam’s concepts really well. <a 
href="https://beam.apache.org/documentation/execution-model/";>Beam’s execution 
model</a> is also documented well and is a must-read to understand how Beam 
processes data.
+<a href="https://www.waitingforcode.com/apache-beam";>waitingforcode.com</a> 
also has good blog posts about Beam concepts.
+To get a better sense of the Beam codebase, I played around with it and worked 
on some PRs to understand Beam better and got familiar with the test suite and 
workflows.</p>
+
+<h2 id="gsoc-journey">GSoC Journey</h2>
+<p>GSoC has 2 phases. The first is the Community Bonding period in which 
students get familiar with the project and the community. The other is the 
actual Coding Period in which students work on their projects. Since the Coding 
Period has three evaluations spaced out by a month, I divided my project into 
three parts focusing on the implementation, tests, and documentation or 
improvements.</p>
+
+<h3 id="project">Project</h3>
+<p>My project(<a 
href="https://issues.apache.org/jira/browse/BEAM-6611";>BEAM-6611</a>) added 
support for File Loads method of inserting data into BigQuery for streaming 
pipelines. It builds on PR - <a 
href="https://github.com/apache/beam/pull/7655";>#7655</a> for <a 
href="https://issues.apache.org/jira/browse/BEAM-6553";>BEAM-6553</a> that added 
support in the Python SDK for writing to BigQuery using File Loads method for 
Batch pipelines. Streaming pipelines with non-default Windowing, Tri [...]
+You can find my proposal <a 
href="https://docs.google.com/document/d/15Peyd3Z_wu5rvGWw8lMLpZuTyyreM_JOAEFFWvF97YY/edit?usp=sharing";>here</a>.</p>
+
+<h3 id="community-bonding">Community Bonding</h3>
+<p>When GSoC started, my semester end exams had not yet finished. As a result, 
I couldn’t get much done. I worked on three PTransforms for the Python SDK - 
Latest, WithKeys and Reify.</p>
+
+<h3 id="coding-period-i">Coding Period I</h3>
+<p>In this period, I wrote some Integration Tests for the BigQuery sink using 
Streaming Inserts in streaming mode. I worked on a failing integration test for 
my project. I also finished the implementation of my project. But, one 
PostCommit test didn’t pass. I realized that the matcher for the Integration 
Test that queried BigQuery for the results was intended to be used in Batch 
mode. So, I wrote a version of the matcher to work in streaming mode.</p>
+
+<h3 id="coding-period-ii">Coding Period II</h3>
+<p>Even after I had added the matcher for streaming mode, the PostComit tests 
did not pass. A test was being run even though it was not specified. I isolated 
the failure to a <a 
href="https://nose.readthedocs.io/en/latest/doc_tests/test_multiprocess/multiprocess.html#other-differences-in-test-running";>limitation</a>
 of the multiprocess plugin for <a 
href="https://nose.readthedocs.io/en/latest/";>nose(a Python test framework)</a> 
due to which it found more tests than had been specified. It [...]
+I also worked on small issues related to testing.</p>
+
+<p>This period was marked by a few exciting events:</p>
+<ul>
+  <li>Ending up in the top #100 contributors to apache/beam.</li>
+  <li>My first ever PR Review on an open source project.</li>
+</ul>
+
+<p><img 
src="https://pbs.twimg.com/media/D_XNSC-UIAUmswG?format=png&amp;name=small"; 
alt="Weird flex but ok" /></p>
+
+<h3 id="coding-period-iii">Coding Period III</h3>
+<p>This was the final coding period before the program ended. Since my project 
was merged earlier than expected, my mentor suggested another issue(<a 
href="https://issues.apache.org/jira/browse/BEAM-7742";>BEAM-7742</a>) in the 
same area - BigQueryIO, that I found interesting. So, I worked on partitioning 
written files in BigQuery to ensure that all load jobs triggered adhere to the 
load job size limitations specified for BigQuery.
+While working on my project, I was using a pipeline that uses PubSub as a 
source and BigQuery as a sink to validate my changes. My mentor suggested we 
add them to the Beam test suite as it would be the ultimate test for 
BigQueryIO. I also worked on adding this test to Beam.</p>
+
+<p>You can find the list of PRs I worked on <a 
href="https://github.com/apache/beam/pulls?utf8=%E2%9C%93&amp;q=is%3Apr+author%3Attanay";>here</a>.</p>
+
+<h2 id="conclusion">Conclusion</h2>
+<p>GSoC has been a lesson in discipline and goal-setting for me. Deciding what 
I wanted to work on and how much I wanted to get done each week was an 
important lesson.
+I had never worked remotely, so this was a new experience. Although I 
struggled with it initially, I appreciate the flexibility that it comes with.
+I also had a lot of fun learning about Apache Beam’s internals, and other 
tools in the same ecosystem.
+This was also the first time I had written code with a test-first approach.</p>
+
+<p>I thank my mentor - Pablo Estrada, Apache Beam, The Apache Software 
Foundation and Google Summer of Code for this opportunity. I am also grateful 
to my mentor for helping me with everything I needed and more, and the Apache 
Beam community for being supportive and encouraging.</p>
+
+<p>With the right effort, perseverance, conviction, and a plan, anything is 
possible. Anything.</p>
 
-<div class="ctas section">
-  <div class="ctas__title">
-    Get started
-  </div>
-  <div class="ctas__ctas ctas__ctas--top">
-  <a class="button button--primary" href="/get-started/beam-overview/">Learn 
more</a>
-  <a class="button button--primary" href="/get-started/downloads/">Download 
Beam SDK 2.15.0</a>
-  </div>
-  <div class="ctas__ctas">
-  <a class="button" href="/get-started/quickstart-java/">Java Quickstart</a>
-  <a class="button" href="/get-started/quickstart-py/">Python Quickstart</a>
-  <a class="button" href="/get-started/quickstart-go/">Go Quickstart</a>
   </div>
-</div>
+
+</article>
 
     </div>
     <!--
diff --git a/website/generated-content/blog/index.html 
b/website/generated-content/blog/index.html
index 0352aea..6d6b1ff 100644
--- a/website/generated-content/blog/index.html
+++ b/website/generated-content/blog/index.html
@@ -169,6 +169,38 @@ limitations under the License.
 <p>This is the blog for the Apache Beam project. This blog contains news and 
updates
 for the project.</p>
 
+<h3 id="google-summer-of-code-19"><a class="post-link" 
href="/blog/2019/09/04/gsoc-19.html">Google Summer of Code ‘19</a></h3>
+<p><i>Sep 4, 2019 •
+ Tanay Tummalapalli [<a href="https://twitter.com/ttanay100";>@ttanay100</a>]
+</i></p>
+
+<!--
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+<p>Google Summer of Code was an amazing learning experience for me.
+I contributed to open source, learned about Apache Beam’s internals and worked 
with the best engineers in the world.</p>
+
+<!-- Render a "read more" button if the post is longer than the excerpt -->
+
+<p>
+<a class="btn btn-default btn-sm" href="/blog/2019/09/04/gsoc-19.html" 
role="button">
+Read more&nbsp;<span class="glyphicon glyphicon-menu-right" 
aria-hidden="true"></span>
+</a>
+</p>
+
+<hr />
+
 <h3 id="apache-beam-2150"><a class="post-link" 
href="/blog/2019/08/22/beam-2.15.0.html">Apache Beam 2.15.0</a></h3>
 <p><i>Aug 22, 2019 •</i></p>
 
diff --git a/website/generated-content/feed.xml 
b/website/generated-content/feed.xml
index ce1f99f..9879203 100644
--- a/website/generated-content/feed.xml
+++ b/website/generated-content/feed.xml
@@ -20,6 +20,103 @@
     <generator>Jekyll v3.2.0</generator>
     
       <item>
+        <title>Google Summer of Code '19</title>
+        <description>&lt;!--
+Licensed under the Apache License, Version 2.0 (the &quot;License&quot;);
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an &quot;AS IS&quot; BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+--&gt;
+
+&lt;p&gt;Google Summer of Code was an amazing learning experience for me.
+I contributed to open source, learned about Apache Beam’s internals and worked 
with the best engineers in the world.&lt;/p&gt;
+
+&lt;!--more--&gt;
+
+&lt;h2 id=&quot;motivation&quot;&gt;Motivation&lt;/h2&gt;
+&lt;p&gt;Two of my friends had participated in GSoC in 2018. I was intrigued 
by their experience.
+The idea of working on open-source software that could potentially be used by 
developers across the world, while being mentored by the best people in a field 
was exciting!
+So, I decided to give Google Summer of Code a shot this year.&lt;/p&gt;
+
+&lt;h2 id=&quot;what-is-google-summer-of-code&quot;&gt;What is Google Summer 
of Code?&lt;/h2&gt;
+&lt;p&gt;&lt;a href=&quot;https://summerofcode.withgoogle.com/&quot;&gt;Google 
Summer of Code&lt;/a&gt; is a global program hosted by Google focused on 
introducing students to open source software development.
+Students work on a 3 month programming project with an open source 
organization during their break from university.&lt;/p&gt;
+
+&lt;h2 id=&quot;why-apache-beam&quot;&gt;Why Apache Beam?&lt;/h2&gt;
+&lt;p&gt;While interning at &lt;a 
href=&quot;https://atlan.com/&quot;&gt;Atlan&lt;/a&gt;, I discovered the field 
of Data Engineering. I found the challenges and the discussions of the 
engineers there interesting. While researching for my internship project, I 
came across the Streaming Systems book. It introduced me to the unified model 
of Apache Beam for Batch and Streaming Systems, which I was fascinated by.
+I wanted to explore Data Engineering, so for GSoC, I wanted to work on a 
project in that field. Towards the end of my internship, I started contributing 
to Apache Airflow(very cool project) and Apache Beam, hoping one of them would 
participate in GSoC. I got lucky!&lt;/p&gt;
+
+&lt;p&gt;&lt;a href=&quot;https://youtu.be/U2eWLb-LD44&quot;&gt;Also, 
Spotify’s Discover Weekly uses Apache Beam!&lt;/a&gt;&lt;/p&gt;
+
+&lt;h2 id=&quot;preparation&quot;&gt;Preparation&lt;/h2&gt;
+&lt;p&gt;I had already read the &lt;a 
href=&quot;http://streamingsystems.net/&quot;&gt;Streaming Systems 
book&lt;/a&gt;. So, I had an idea of the concepts that Beam is built on, but 
had never actually used Beam.
+Before actually submitting a proposal, I went through a bunch of resources to 
make sure I had a concrete understanding of Beam.
+I read the &lt;a 
href=&quot;https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-101&quot;&gt;Streaming
 101&lt;/a&gt; and &lt;a 
href=&quot;https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-102&quot;&gt;Streaming
 102&lt;/a&gt; blogs by Tyler Akidau. They are the perfect introduction to 
Beam’s unified model for Batch and Streaming.
+In addition, I watched all Beam talks on YouTube. You can find them on the 
&lt;a 
href=&quot;https://beam.apache.org/documentation/resources/videos-and-podcasts/&quot;&gt;Beam
 Website&lt;/a&gt;.
+Beam has really good documentation. The &lt;a 
href=&quot;https://beam.apache.org/documentation/programming-guide/&quot;&gt;Programming
 Guide&lt;/a&gt; lays out all of Beam’s concepts really well. &lt;a 
href=&quot;https://beam.apache.org/documentation/execution-model/&quot;&gt;Beam’s
 execution model&lt;/a&gt; is also documented well and is a must-read to 
understand how Beam processes data.
+&lt;a 
href=&quot;https://www.waitingforcode.com/apache-beam&quot;&gt;waitingforcode.com&lt;/a&gt;
 also has good blog posts about Beam concepts.
+To get a better sense of the Beam codebase, I played around with it and worked 
on some PRs to understand Beam better and got familiar with the test suite and 
workflows.&lt;/p&gt;
+
+&lt;h2 id=&quot;gsoc-journey&quot;&gt;GSoC Journey&lt;/h2&gt;
+&lt;p&gt;GSoC has 2 phases. The first is the Community Bonding period in which 
students get familiar with the project and the community. The other is the 
actual Coding Period in which students work on their projects. Since the Coding 
Period has three evaluations spaced out by a month, I divided my project into 
three parts focusing on the implementation, tests, and documentation or 
improvements.&lt;/p&gt;
+
+&lt;h3 id=&quot;project&quot;&gt;Project&lt;/h3&gt;
+&lt;p&gt;My project(&lt;a 
href=&quot;https://issues.apache.org/jira/browse/BEAM-6611&quot;&gt;BEAM-6611&lt;/a&gt;)
 added support for File Loads method of inserting data into BigQuery for 
streaming pipelines. It builds on PR - &lt;a 
href=&quot;https://github.com/apache/beam/pull/7655&quot;&gt;#7655&lt;/a&gt; 
for &lt;a 
href=&quot;https://issues.apache.org/jira/browse/BEAM-6553&quot;&gt;BEAM-6553&lt;/a&gt;
 that added support in the Python SDK for writing to BigQuery using File Loads 
method  [...]
+You can find my proposal &lt;a 
href=&quot;https://docs.google.com/document/d/15Peyd3Z_wu5rvGWw8lMLpZuTyyreM_JOAEFFWvF97YY/edit?usp=sharing&quot;&gt;here&lt;/a&gt;.&lt;/p&gt;
+
+&lt;h3 id=&quot;community-bonding&quot;&gt;Community Bonding&lt;/h3&gt;
+&lt;p&gt;When GSoC started, my semester end exams had not yet finished. As a 
result, I couldn’t get much done. I worked on three PTransforms for the Python 
SDK - Latest, WithKeys and Reify.&lt;/p&gt;
+
+&lt;h3 id=&quot;coding-period-i&quot;&gt;Coding Period I&lt;/h3&gt;
+&lt;p&gt;In this period, I wrote some Integration Tests for the BigQuery sink 
using Streaming Inserts in streaming mode. I worked on a failing integration 
test for my project. I also finished the implementation of my project. But, one 
PostCommit test didn’t pass. I realized that the matcher for the Integration 
Test that queried BigQuery for the results was intended to be used in Batch 
mode. So, I wrote a version of the matcher to work in streaming mode.&lt;/p&gt;
+
+&lt;h3 id=&quot;coding-period-ii&quot;&gt;Coding Period II&lt;/h3&gt;
+&lt;p&gt;Even after I had added the matcher for streaming mode, the PostComit 
tests did not pass. A test was being run even though it was not specified. I 
isolated the failure to a &lt;a 
href=&quot;https://nose.readthedocs.io/en/latest/doc_tests/test_multiprocess/multiprocess.html#other-differences-in-test-running&quot;&gt;limitation&lt;/a&gt;
 of the multiprocess plugin for &lt;a 
href=&quot;https://nose.readthedocs.io/en/latest/&quot;&gt;nose(a Python test 
framework)&lt;/a&gt; due to whi [...]
+I also worked on small issues related to testing.&lt;/p&gt;
+
+&lt;p&gt;This period was marked by a few exciting events:&lt;/p&gt;
+&lt;ul&gt;
+  &lt;li&gt;Ending up in the top #100 contributors to apache/beam.&lt;/li&gt;
+  &lt;li&gt;My first ever PR Review on an open source project.&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;p&gt;&lt;img 
src=&quot;https://pbs.twimg.com/media/D_XNSC-UIAUmswG?format=png&amp;amp;name=small&quot;
 alt=&quot;Weird flex but ok&quot; /&gt;&lt;/p&gt;
+
+&lt;h3 id=&quot;coding-period-iii&quot;&gt;Coding Period III&lt;/h3&gt;
+&lt;p&gt;This was the final coding period before the program ended. Since my 
project was merged earlier than expected, my mentor suggested another 
issue(&lt;a 
href=&quot;https://issues.apache.org/jira/browse/BEAM-7742&quot;&gt;BEAM-7742&lt;/a&gt;)
 in the same area - BigQueryIO, that I found interesting. So, I worked on 
partitioning written files in BigQuery to ensure that all load jobs triggered 
adhere to the load job size limitations specified for BigQuery.
+While working on my project, I was using a pipeline that uses PubSub as a 
source and BigQuery as a sink to validate my changes. My mentor suggested we 
add them to the Beam test suite as it would be the ultimate test for 
BigQueryIO. I also worked on adding this test to Beam.&lt;/p&gt;
+
+&lt;p&gt;You can find the list of PRs I worked on &lt;a 
href=&quot;https://github.com/apache/beam/pulls?utf8=%E2%9C%93&amp;amp;q=is%3Apr+author%3Attanay&quot;&gt;here&lt;/a&gt;.&lt;/p&gt;
+
+&lt;h2 id=&quot;conclusion&quot;&gt;Conclusion&lt;/h2&gt;
+&lt;p&gt;GSoC has been a lesson in discipline and goal-setting for me. 
Deciding what I wanted to work on and how much I wanted to get done each week 
was an important lesson.
+I had never worked remotely, so this was a new experience. Although I 
struggled with it initially, I appreciate the flexibility that it comes with.
+I also had a lot of fun learning about Apache Beam’s internals, and other 
tools in the same ecosystem.
+This was also the first time I had written code with a test-first 
approach.&lt;/p&gt;
+
+&lt;p&gt;I thank my mentor - Pablo Estrada, Apache Beam, The Apache Software 
Foundation and Google Summer of Code for this opportunity. I am also grateful 
to my mentor for helping me with everything I needed and more, and the Apache 
Beam community for being supportive and encouraging.&lt;/p&gt;
+
+&lt;p&gt;With the right effort, perseverance, conviction, and a plan, anything 
is possible. Anything.&lt;/p&gt;
+</description>
+        <pubDate>Wed, 04 Sep 2019 01:00:01 -0700</pubDate>
+        <link>https://beam.apache.org/blog/2019/09/04/gsoc-19.html</link>
+        <guid 
isPermaLink="true">https://beam.apache.org/blog/2019/09/04/gsoc-19.html</guid>
+        
+        
+        <category>blog</category>
+        
+        <category>gsoc</category>
+        
+      </item>
+    
+      <item>
         <title>Apache Beam 2.15.0</title>
         <description>&lt;!--
 Licensed under the Apache License, Version 2.0 (the &quot;License&quot;);
@@ -1214,70 +1311,5 @@ Valentyn Tymofieiev, Xinyu Liu, Yifan Zou, Yueyang 
Qiu&lt;/p&gt;
         
       </item>
     
-      <item>
-        <title>Apache Beam is applying to Season of Docs</title>
-        <description>&lt;!--
-Licensed under the Apache License, Version 2.0 (the &quot;License&quot;);
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an &quot;AS IS&quot; BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
---&gt;
-
-&lt;p&gt;The Apache Beam community is thrilled to announce its application to 
the first edition of  Season of Docs 2019!&lt;/p&gt;
-
-&lt;!--more--&gt;
-
-&lt;p&gt;&lt;img src=&quot;/images/blog/SoD.png&quot; alt=&quot;Season of Docs 
2019 flyer&quot; height=&quot;455&quot; width=&quot;640&quot; /&gt;&lt;/p&gt;
-
-&lt;p&gt;&lt;a 
href=&quot;https://developers.google.com/season-of-docs/&quot;&gt;Season of 
Docs&lt;/a&gt; is a unique program that pairs technical writers with open 
source mentors to contribute to open source. This creates an opportunity to 
introduce the technical writer to an open source community and provide guidance 
while the writer works on a real world open source project. We, in the Apache 
Beam community, would love to take this chance and invite technical writers to 
collaborate wi [...]
-
-&lt;p&gt;Apache Beam does have help from excellent technical writers, but the 
documentation needs of the project often exceed their bandwidth. This is why we 
are excited about this program.&lt;/p&gt;
-
-&lt;p&gt;After discussing ideas in the community, we have been able to find 
mentors, and frame two ideas that we think would be a great fit for an incoming 
tech writer to tackle. We hope you will find this opportunity interesting - and 
if you do, please get in touch by emailing the Apache Beam mailing list at 
&lt;a 
href=&quot;mailto:d...@beam.apache.org&quot;&gt;d...@beam.apache.org&lt;/a&gt; 
(you will need to subscribe first by emailing to &lt;a 
href=&quot;mailto:dev-subscribe@beam.apache [...]
-
-&lt;p&gt;The project ideas available in Apache Beam are described below. 
Please take a look and ask any questions that you may have. We will be very 
happy to help you get onboarded with the project.&lt;/p&gt;
-
-&lt;h3 id=&quot;project-ideas&quot;&gt;Project ideas&lt;/h3&gt;
-
-&lt;p&gt;&lt;strong&gt;Deployment of Flink and Spark Clusters for use with 
Portable Beam&lt;/strong&gt;&lt;/p&gt;
-
-&lt;p&gt;The Apache Beam vision has been to provide a framework for users to 
write and execute pipelines on the programming language of your choice, and the 
runner of your choice. As the reality of Beam has evolved towards this vision, 
the way in which Beam is run on top of runners such as Apache Spark and Apache 
Flink has changed.&lt;/p&gt;
-
-&lt;p&gt;These changes are documented in the wiki and in design documents, and 
are accessible for Beam contributors; but they are not available in the 
user-facing documentation. This has been a barrier of adoption for other users 
of Beam.&lt;/p&gt;
-
-&lt;p&gt;This project involves improving the &lt;a 
href=&quot;https://beam.apache.org/documentation/runners/flink/&quot;&gt;Flink 
Runner page&lt;/a&gt; to include strategies to deploy Beam on a few different 
environments: A Kubernetes cluster, a Google Cloud Dataproc cluster, and an AWS 
EMR cluster. There are other places in the documentation that should be updated 
in this regard, such as the &lt;a 
href=&quot;https://beam.apache.org/documentation/sdks/python-streaming/&quot;&gt;Python
 st [...]
-
-&lt;p&gt;After working on the Flink Runner, then similar updates should be 
made to the &lt;a 
href=&quot;https://beam.apache.org/documentation/runners/spark/&quot;&gt;Spark 
Runner page&lt;/a&gt;, and the &lt;a 
href=&quot;https://beam.apache.org/get-started/beam-overview/&quot;&gt;getting 
started documentation&lt;/a&gt;.&lt;/p&gt;
-
-&lt;p&gt;&lt;strong&gt;The runner comparison page / capability matrix 
update&lt;/strong&gt;&lt;/p&gt;
-
-&lt;p&gt;Beam maintains a &lt;a 
href=&quot;https://beam.apache.org/documentation/runners/capability-matrix/&quot;&gt;capability
 matrix&lt;/a&gt; to track which Beam features are supported by which set of 
language SDKs + Runners.
-This project involves a number of &lt;a 
href=&quot;https://issues.apache.org/jira/browse/BEAM-2888&quot;&gt;corrections 
and improvements to the capability matrix&lt;/a&gt;; followed by a few larger 
set of changes, involving:&lt;/p&gt;
-
-&lt;ul&gt;
-  &lt;li&gt;Plain english summaries for each runner’s support of the Beam 
model.&lt;/li&gt;
-  &lt;li&gt;A paragraph-length description of the production-readiness for 
each runner.&lt;/li&gt;
-  &lt;li&gt;Comparisons for non-model differences between runners.&lt;/li&gt;
-  &lt;li&gt;Comparison for support of the portability framework for each 
runner.&lt;/li&gt;
-&lt;/ul&gt;
-
-&lt;p&gt;Thank you, and we are looking forward to hearing from you!&lt;/p&gt;
-</description>
-        <pubDate>Fri, 19 Apr 2019 01:00:01 -0700</pubDate>
-        
<link>https://beam.apache.org/blog/2019/04/19/season-of-docs.html</link>
-        <guid 
isPermaLink="true">https://beam.apache.org/blog/2019/04/19/season-of-docs.html</guid>
-        
-        
-        <category>blog</category>
-        
-      </item>
-    
   </channel>
 </rss>
diff --git a/website/generated-content/index.html 
b/website/generated-content/index.html
index 57b28e2..9eb300f 100644
--- a/website/generated-content/index.html
+++ b/website/generated-content/index.html
@@ -193,6 +193,11 @@ limitations under the License.
           </div>
           <div class="hero__blog__cards">
             
+            <a class="hero__blog__cards__card" 
href="/blog/2019/09/04/gsoc-19.html">
+              <div class="hero__blog__cards__card__title">Google Summer of 
Code '19</div>
+              <div class="hero__blog__cards__card__date">Sep 4, 2019</div>
+            </a>
+            
             <a class="hero__blog__cards__card" 
href="/blog/2019/08/22/beam-2.15.0.html">
               <div class="hero__blog__cards__card__title">Apache Beam 
2.15.0</div>
               <div class="hero__blog__cards__card__date">Aug 22, 2019</div>
@@ -203,11 +208,6 @@ limitations under the License.
               <div class="hero__blog__cards__card__date">Jul 31, 2019</div>
             </a>
             
-            <a class="hero__blog__cards__card" 
href="/blog/2019/06/11/looping-timers.html">
-              <div class="hero__blog__cards__card__title">Looping timers in 
Apache Beam</div>
-              <div class="hero__blog__cards__card__date">Jun 11, 2019</div>
-            </a>
-            
           </div>
         </div>
       </div>

Reply via email to