svn commit: r22161 - /dev/spark/KEYS
Author: holden Date: Sat Oct 7 21:34:44 2017 New Revision: 22161 Log: Add holden's apache code signing key to KEYS Modified: dev/spark/KEYS Modified: dev/spark/KEYS == --- dev/spark/KEYS (original) +++ dev/spark/KEYS Sat Oct 7 21:34:44 2017 @@ -278,3 +278,71 @@ eR3RR5PuL0qfpMC8F04mnZA8pXlnWrDWoCt3B+/3 bMvhuKfWYTW83VdY2jtUOEU/P+V5QEKS8IDVw25PCU2s7hw= =vPd0 -END PGP PUBLIC KEY BLOCK- + +pub 4096R/E90ADC5D 2017-09-19 +uid Holden Karau (CODE SIGNING KEY) +sub 4096R/73B5CA24 2017-09-19 + +-BEGIN PGP PUBLIC KEY BLOCK- +Version: GnuPG v1 + +mQINBFnBqJkBEADTT910624+3C1MgWIS+pLgkIvah7035F6curoSIYqmM1L6+m90 +8QBDwZXOAsAEwQd0qss0nwhZiUxdpL1Kj21zIL2L4jp7XYPFrBWhQ0zNqs1vA1Ds +IZ1+oDyxu4j4zKgf/v5o6ot5VmTosrVquQNyxqY5iaEwu+CavpojqCe/blGQLCnZ +up4QQa4zriW4LO530s1RM1k137QhY99/nLVgLgp2HysFuaqKIjKYcOpThV4ZORyh +yyQrcxoel0sA1fCoC6Bb9wKlbsr/ziPR8MI4S6XzT1ZCgXA4aUERlDTLxJ3XvXuS +vcV9ty17PZNawnZXdsoEph+uNxllnOaTaVp4dmvncp2AaK3dCts4rcFDkb8x8rxb +RcJCU1vg2cZzwsmgw7Mr/Z9Z+RHoBUfTVMXJ4syuSH/UxQRHJCoszHpL6j3ULu4U +aG6FHcQA/AmSNrFBjqWn9w63IP4SPZsKf4zeBlfDLbmR2MpLu+J1AR8b6mUZ/SAE +96p1x0aoGBRTfkeHJo8sK+hdhrMbqytCFBsf5PR+xzyhB9EGQZma2VphX928vNmJ +xbGEmoT275OgVtQnyW4NdWl+bJfZCz105Lr5boJxMhKeWGTZWDSqPYhlAwDxrRim +4mvL7pmRgtfJj/r1cnZnHQlqCUMjlW7La4KSqgitEDb14MQ12gPOsle36QARAQAB +tDNIb2xkZW4gS2FyYXUgKENPREUgU0lHTklORyBLRVkpIDxob2xkZW5AYXBhY2hl +Lm9yZz6JAjcEEwECACECGwMCHgECF4AFAlnB/NoFCwkIBwMFFQoJCAsFFgIDAQAA +CgkQpwobKekK3F2cbBAAsJYDujNqYW3QMZ9IShLKZg0fHQXU8Rz7sG/rYcjWCu2F +qQYbxg6YN0z2IhFe1L1rtNRTl769VXA0gT9mV3NQkcaKJe1Z3yPiCm6KoVXF1ZNT +a1AE6p7NmT6d8OaY6WZ230lZcliTyMEYe9yc5N+aJwT1bMhqbakOB/AJfFc/8Pd7 +N+sy4a40TLwbmfk9HpGk7y44Y+YXoCg18G9HJekopJzKU6KEX7kCJFpqqm6mgMFv +1lfBvFHHui6y7Ow0s/2WIEjZfNcyQQ+ijsGcdRndCZ5Inuxgj092ENDzTOinp7rd +FyAOmESZETdm6AUoCPxZcIKnhUV4dDolThyXsFTyth2OqfYD+spyZvCj0/jRq0yD +0meqWxR9987sRYsVuPJ5cB0gouMUCmt1XhhHgYa+CYdmwi20PS9mIeyJch8I3fIF +Zhml/o7QqfYUH0WA/axR3CjJh93OhsC1vdQ+ZUhhquFhPbF6yklEFKl/R1sUqt5/ +ZN3zjQysoTQArRqAU9xzYFY/7n7LlklURP2NKkTIiIw2JlPZ2czWNoFG/w1bu/RH +W7L2ff5O5IWQAquBUSPEt6LUD+Jc5Mrfm4BlJOKGVeHCPZ/QvPSBlxmSDcJK+qgR +h2p0Tj0rHEGZ6ox6eH9oRkSbFeSh95PEeF7qesW7WSTyyUBD2D5Do1mOjrcbb2OJ +AhwEEAECAAYFAlnB4CQACgkQyBB5rBuuc/zd5hAArnaAtEc0md1lfU4uzwIm+sfw +byg6wH7cZTXNgClVCB/x3dA0BziWDxIfgVK0ItgnVR9Od++PcAAfqiJlLps7aI+x +CnC05vKqsm23DuzjFe6eYqiNnopYgbLdZkn3T6ihUQ/IhsLctNIiKUoEFVJP1gqB +kXN6FD3XdnYCa5pkUZx40etp13fzLkFhPEPwAwl8MfHyzW0vOzKfUrHbGbQ2V1KB +b4gUK7v77g2uLT1apa3mqd53SMNX0bJ8oHQkH9gKIXaEs8fFTOVvpiNbDWlAKZ4E +t75SrAaGtlQBCYe3JKFoGsXxO/0UTjEasvp32XspfD1cJvcXRB3bgfbtcomsa0XF +67cTGqRNw9wY7GKnf4WjjuRApF0BfAYQSES8muW+GcVZUrsZlrMHroZ5s2jJhPuU +rll+at9TtppIddjYrGrkhV2XqKDfX1GA5u5Bp73IE71HBiGkbSqMVXwxI9YmK5XE +WVnFQelmzMsdxi2HyjuoQhRtX6iGHYH4lix1emZm6lMEjhmO5e8K5Qv6Vig9dXoT +gwWMNsSmtQqb1Fs4MYIbcfwaq4Wv+neA7D60sDP+fByV87JBTRHBTRJ/PYPSLl+b +IGYhS05xY+pTGMzMeTLxw8UQKFKcVKFUfhGuEe3LAV/LG8lkqLabxPVIEWI+ARuZ +MTVonSMCicaFQTODVHq5Ag0EWcGomQEQAMupQeAozhq11DVR4kwcx8GDoDyYnv5/ +JtNTJpX9IFCC3bv9auSPdvOAm55tB4jlHVGgRnlymj+GhmtSvUL/rHFRFinVRRmf +Mg1K4Nr9uGykVpjM++ma1C7kcShBiXlOc7ArBbNZsyEbHh4hFsrf3iV/MsU5jBdC +Cc2RGoaNcOQehbXvbR+sq7Hi0xpauYDozNU8oPxwsNbSa8jip0yyOJeKc9P3jVt7 +TtwrzmDVu2Q5EroIbS/7d74U4JASWbAwd7Ij06KOmQx2JZx6yXdLMJyyKQGCm37P +dxzZ2PwDcQah16kOKcKCj0lIIYucrM9+z/lMgOq9zzFD38+7eExF4DoHYjuMbTdl +idz2cgtTpCAYWxyKATtKyT1jXoefeF4d65ZM116U2P8k3QDfnAzIzsumw5bT0CQj +N4NwLquyqfHZk08PpxCgj4AoV3S85bHq6mXLPMFcUyMaJcpvvgesXXT45mw8WvV1 +drvXk2UrawlYOXs1eagOdeAjp+HTECmvxNEpEUdq8rC15UHw4j1C/Gsm3qdhrQZM +GdGvrSNmzXDKRFZRi4OChNfVIEzxxxd2iu5T4/ZuvilZsbGhlHdTGf22KvuQSE6x +nPDF1d8vMFQK2iQb9tZ0YVuouN3k9YSIP8AYNiv3Jt5jBZ9bXj7oju4d65PViS/O +zOjdsD88EwZ7ABEBAAGJAh8EGAECAAkFAlnBqJkCGwwACgkQpwobKekK3F3hnQ/+ +M6ue4Gfm7vLsNnJkhRN8JI7iZvKWFpH7fKBDQLOWJULXO7Nv9fY2VncOCN+j0ukq +EpMdktoCm61uY3LiF3Cr6t1TSzAHwlBaj9GwcXoBlh5L9go1RraLgWieZaxHNZS+ +vl8hBujl/qazue5dcmniQiuvOriSlerc4obioYtXoFodTSQii/7Xr/z3rdXoiB9M +ZJO4K3BhSJbVPiNvrY4fXVAI35hxdB0ba0DQ0Q5vhQ2I62azM41EWYlOQgKKKdbH +Y++dQmRAvYa10vB0scEmmDbiYX1KrnUdngOjcFOCMEhXHx5frDauwbOA4gu3JByP +DLn64WZQzk55W8NV7i9hm0Lr3tYoPLw21tRQqSYb3PW0yrjSRLltYyoUVko2P6je +zv9mPC8+psXtBElmJLLp7nJZ6qq4ku22IHOcsLF7T2IHTXj/VM/TKD7qWpSCg7hy +LTipkqDKfo993LnM6ICjpZ1ddWRzpqGCiATk1sYdHyKSjExDAcY5PpLzBlSDa6t+ +hy326k/gOFg31qljQURrAgpJDYXW/4KAPAa6exEYZmb0Kdkw+tcquug+6KCn3F4k +6JH51ypa3Y7LeU8KLaKv8Reg8vD49fi2X7muLGRG10pCGd8Z151n77sauwl3ZMT5 +MTh6A0xHJkPaf2mQH4N6G3o5Vxs6oTv5oGr2vw6CnlU= +=B+j6 +-END PGP PUBLIC KEY BLOCK- - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r22160 - /dev/spark/KEYS
Author: holden Date: Sat Oct 7 21:33:21 2017 New Revision: 22160 Log: Copy KEYS file into dev from release so we can append to it. Added: dev/spark/KEYS - copied unchanged from r22159, release/spark/KEYS - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r22159 - /dev/spark/KEYS
Author: holden Date: Sat Oct 7 21:29:35 2017 New Revision: 22159 Log: Remove dev KEYS Removed: dev/spark/KEYS - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r22158 - /dev/spark/KEYS
Author: holden Date: Sat Oct 7 21:23:01 2017 New Revision: 22158 Log: Add Holden's key to KEYS Added: dev/spark/KEYS Added: dev/spark/KEYS == --- dev/spark/KEYS (added) +++ dev/spark/KEYS Sat Oct 7 21:23:01 2017 @@ -0,0 +1,348 @@ +pub 4096R/15E06093 2014-11-13 + Key fingerprint = A864 F0C3 E526 2F9A 229B 85D0 7B16 5D2A 15E0 6093 +uid Andrew Or +sub 4096R/170AE5D6 2014-11-13 + +-BEGIN PGP PUBLIC KEY BLOCK- +Version: GnuPG v2.0.19 (FreeBSD) + +mQINBFRj/j0BEADmfEfAvf1zgrgCdIQeW0zjCxVfXP/amjTNAaO+dZ+opnUo2XlB +NZdFG6NSMK4mZGV/h4ObFyl4YLAmR5oIBxqV1ntpvWF5OZ4N6CXwgzbNWLDXpsOP +iL4098F2hRFC66Yuf+k1blknCtWpifC3Ld5TAsybFXbtnhZqo0a9ZDsmiGiYcEkO +gu6s5n1lxklZPUkflCQDMTQN1Ow+m3WgAmXr1WVV2zgNmNFYkb8dCu4htC8PQ54X +JxpEs6D4rEI06C4rw8SvT8VKFsxxKD/OuQk/LckYioulzQT8y0OgGNDyK7cUDpIM +H8EjRp6Qu1jXpgRoOr9k0WNVV5JEV8zzjZQKHlAEVUHNmSKUdnXuKAChKFPfWU9N +9YQbLEeWZsdlVsh9ocKA1UxTgRh27oUq8S/tGOO1snVFuXaVaGrGWcom4xXIB3Is +zGjCDsVgrErrLMRNCzsAJuitOzcbG459+pGMieI5W4rTWqqENfjSIk6y7WBEJ1Bg +MxxZtKS5aKiANSviugEdCJ26+yg9qqdyRnh7DmnAeXpMaCdeZgP6zRLVqNBCCPtY +9pEhj9KuHmmuO73HSnn9rAjURtKHizV/fXnN8swwlEpsC/ALW21sIgOfAtQ9zdjy +3+Z4yXh/ryDJSXKnL2hXihBTqTJMSAlv548Xk5S84DP12qJ73RAPkp7eJwARAQAB +tCBBbmRyZXcgT3IgPGFuZHJld29yMTRAZ21haWwuY29tPokCOAQTAQIAIgUCVGP+ +PQIbAwYLCQgHAwIGFQgCCQoLBBYCAwECHgECF4AACgkQexZdKhXgYJPxFA/+JQeC +DYTzgDzjP69eEjZDp7PCZ1Q8KVFcDyuUUNXe/4aOykjAkLz4y5rMPW2nO0GX3FuX +Q+4UTcw70cjXQNtooMO9NjBwhqOnxyW6Ef6L3BOwG8TQY2xyDa5/DHlc5PJbLk1R +rCEvGS6Qe91fmwlR3gVVzJocpLJxEUm/ASaPQowkAmYrthg4bIfzIKHzy2HrKzpH +dLSSODJ54Uo8Ba5GmFKOkrR4xWVPZ7ARHDW3GsT3+c4YHJXvrBwhsXv2mTg56N5e +mDTWZj+MFaDAoLO3y1NwyD1Iy45lMoj5+BDl67UpKCxMDaivKcdJCRKz8onMwpr3 +E/5t5mtSOOhvAQHlJEwyN4fi/1TKbqjqBXNMfcdJjyQkSvvqLqnCVDpp/Cacz/Xt +Hg6OcAs/3RNiUZZXO7BkWtsO/FY7fUoj24qNmQ+9GA9HoFAPukNTN9dLzGmFGc2I +BUeUNeYCVyrAmYETh8w8BoyG5swLeRvmU+yUQcsofwxfXPQ3FznCRKObdUtsjanl +o4CDXk//4g8Acs0pcx80KwlFGSvoIarZo551GafIq2j8G9Xnt+s4jYDPWq5CoJcj +5P7hRQLRst8MvuMAjN2Cos7lr7c78S2SKKrhk/6Xlhi8iRrGiDC0DUco2UETxFxn +aRup57dEvn0xxMYM/cCVoY9Z1sv2I5vq/XJQEVm5Ag0EVGP+PQEQAOvHlL3YAZE2 +ojS3JrQaXIBPGe7Y1ToJLgVFQ0x6C5JFA3Y0aqR/W4bN4clkPS/6pRVoKo7IsVIc +bURNYqRWbCkk+3HDPTgQurLbHSqobKbfGwp5++FebQruiNesK/sNoCc5GPImuHy3 +iH7Xgr7/gYZipsZL8UWgX6E5bRKjqrlCXu+26B6Zf6SGIkp9z5dBrWOKAxIJQtfa +9SLUL5l6/EVWOjnAgpqqlPufzUEsMjHCuIg/5rGo/ye13zzwbORHJdCDUKS/q+ES +MEFbN5/80LA4HbcGRULXLq1jFwrf63PRZZsoyUw1w0TF16Csjhtm0y2A7dMOzRFO +YCkewy4Dla7vdjFLDShLUaCmCDb76f8n/2b0NDU6jIebEBJ6JSQFjUrilgcXW/Vf +6oZM1PGuDWoDBWfo/+1rpHq5XWTsmiyut91YGr4kfdcjShz72tjLJ+deVsTA/VTv +GD6lgowSzh1o9fOVYpMmrT5sVGN5iYppt7aG0wAm5B0O9X3yntzj0MafIHx/Jtmi +HQ6b+kmDkHIVe+vOp/d1RJnEbCt3NQLivD5Vx71wWVdn+QwtHWyI6YyeAaiOZU3j +tXY7LnoVJYO4ireS/9Gl5s3vupRqirz1P8H52WfOkFQfbSZqZx/HADY+rgmEGQ5B +WRpvH7gGnPluGLY9eGnjizFO1ZVG0Jr1ABEBAAGJAh8EGAECAAkFAlRj/j0CGwwA +CgkQexZdKhXgYJM40RAAoUY36RgSr4ViZWWQGg0WpHhGzOrlq2tgCKdD26SuOEQ8 +hHMfR+q9IK2/vQ784cPOIlGCiZoVW8c3CA0BSaydC7mnYA800jOPrgXb4dTqT/we +5VHuhqU0k9kL/79sX1z3ZFt4pX7eCq0FGwBTbz2/cqNvNlEe0ZAHYKkHk4dxcMes +gY0ScxI3n7LQhlWJ0FosWN41manSml+rIKqQjgei/YMDnE2TsJrDfIF4qlMxcftF +FMmH0ED2nsoJdu54mhoPE4dphKTX9c4cvmH8IBvGjk19YCLG8McVLTNMQ0cs13pG +45vOGdH8hQTPz6Bcyf9k+5sT0mL6lkcY+tf5+d4zeGmeDj8TeFbqvY4aE3vWxpKL +xjChHFptAuFW54tIu2f9qyFHYo4cNPTtCTrTR7oe1rMSaRyYBQImWTre6QsElB6b +Lyi82swpm3m7jyg21sc0gksXV6vmVe6dSw4V69QwSgCEaLFW7ZPa0e/Xj7dPyqJW +vAtyLhUjCKgE2VlII+nmpNuKEVu9satirnnjLbM72T1YAqrqwWNzVqmFvTnVG2ij +9HQSu6KBRFECCQrKTvkhtR25bdpywAaPfFgAssulem2P/tj92Gq8aU31AVmiMc4v +Sn9VVgY6afSDBmzeCrJ4XKe+HsVXFvLjCjF/OsnSGuaE2NIeVW68WgDDuOxrrGY= +=zes/ +-END PGP PUBLIC KEY BLOCK- + +pub 4096R/82667DC1 2014-07-17 + Key fingerprint = 3E1C BD0F 0533 D602 E80C 6E98 6B32 9460 8266 7DC1 +uid Xiangrui Meng (CODE SIGNING KEY) +sub 4096R/1603A24E 2014-07-17 + +-BEGIN PGP PUBLIC KEY BLOCK- +Version: GnuPG v2.0.19 (FreeBSD) + +mQINBFPHIAoBEACf3vXTmVqU6o3h37gAsM2PK/7wXLrH3kHoQfwjg8HX/hawrVnD +BPeBGfcxXKk6470efu4XjEBptHywobuU4YmrMy7fqlCAFAjTTKwtCntDGrPcl/wx +tg8hqF3JC33eVItZuFepOBIWvL6lWjSAugiO4IN2qrBahRTVnU2/NB8yFAMrYpYx +P1OcSbyRNImU8oDZWcLeXneBo9a+AQkUKSiU9vObxBd69ioUoxv+kmU3YIrJFbk5 +cuKhU1PNvCQi/XhUaw1l1OMeXkO77FtSJyKdhZzrkcvFOhvyMv2eMxFOL7j8gM0H +cAcSc1qkt5a+ulAH5te/oKjpUov6aBCL982FOFT7o9KOgbNUwW+pmWU3V+g+fiZ1 +i5gCHnWpmhQrZ82Na3i+ASA2Hx9tbVIqys7MWsGtYsiF+zfNBFkbR9z9vHLyDB8c +oaPA8O4rloW+9b8O14ZU4Df8PfxqQfRnfYMELrws5tbZB/Xy0+kWCY0j4tdwWP2a +OZWXe5h3Az7TLSeSBb6VrCHNTA+mroaN4AGKG/3KQJJg6MHVcUYxPLsZmEN5/hLp +JU99ItPu6+4XctbFODVJD8Uq4/a/VyFaaQSxAMpVUpcFoZ2bUPDYOF8+rRxQ0WTD +e8+lxdzwXHKOtKKcDA73IeQo6rv/GfuB8lsB8hPF/fkWpz47XQOMxJb6wwARAQAB +tDJYaWFuZ3J1aSBNZW5nIChDT0RFIFNJR05JTkcgS0VZKSA8bWVuZ0BhcGFjaGUu +b3JnPokCNwQTAQoAIQUCU8cgCgIbAwULCQgHAwUVCgkICwUWAgMBAAIeAQIXgAAK +CRBrMpRggmZ9wbrTEACRj2NSUdgtrKLqQnQK1UF0M1+68fduUOD+NXIMdJY8TDcZ +Pd/qQfDb0maIb1sH2m4GTwyTFMs6DJK7ESSx0yDjT/Pm5yCFiDQwhnoJxP5CbUTk +myX45zwlQhESK/I2PiRO23f7ba34LaIWLP6CFiOPiiNqDoh2dwzACnh0ykZqiLE1 +PywdNWFfohwi3LammWYik
spark git commit: [SPARK-22156][MLLIB] Fix update equation of learning rate in Word2Vec.scala
Repository: spark Updated Branches: refs/heads/master 2030f1951 -> 5eacc3bfa [SPARK-22156][MLLIB] Fix update equation of learning rate in Word2Vec.scala ## What changes were proposed in this pull request? Current equation of learning rate is incorrect when `numIterations` > `1`. This PR is based on [original C code](https://github.com/tmikolov/word2vec/blob/master/word2vec.c#L393). cc: mengxr ## How was this patch tested? manual tests I modified [this example code](https://spark.apache.org/docs/2.1.1/mllib-feature-extraction.html#example). ### `numIteration=1` Code ```scala import org.apache.spark.mllib.feature.{Word2Vec, Word2VecModel} val input = sc.textFile("data/mllib/sample_lda_data.txt").map(line => line.split(" ").toSeq) val word2vec = new Word2Vec() val model = word2vec.fit(input) val synonyms = model.findSynonyms("1", 5) for((synonym, cosineSimilarity) <- synonyms) { println(s"$synonym $cosineSimilarity") } ``` Result ``` 2 0.175856813788414 0 0.10971353203058243 4 0.09818313270807266 3 0.012947646901011467 9 -0.09881238639354706 ``` ### `numIteration=5` Code ```scala import org.apache.spark.mllib.feature.{Word2Vec, Word2VecModel} val input = sc.textFile("data/mllib/sample_lda_data.txt").map(line => line.split(" ").toSeq) val word2vec = new Word2Vec() word2vec.setNumIterations(5) val model = word2vec.fit(input) val synonyms = model.findSynonyms("1", 5) for((synonym, cosineSimilarity) <- synonyms) { println(s"$synonym $cosineSimilarity") } ``` Result ``` 0 0.9898583889007568 2 0.9808019399642944 4 0.9794934391975403 3 0.9506527781486511 9 -0.9065656661987305 ``` Author: Kento NOZAWA Closes #19372 from nzw0301/master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5eacc3bf Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5eacc3bf Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5eacc3bf Branch: refs/heads/master Commit: 5eacc3bfa9b9c1435ce04222ac7f943b5f930cf4 Parents: 2030f19 Author: Kento NOZAWA Authored: Sat Oct 7 08:30:48 2017 +0100 Committer: Sean Owen Committed: Sat Oct 7 08:30:48 2017 +0100 -- .../scala/org/apache/spark/mllib/feature/Word2Vec.scala | 12 1 file changed, 8 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5eacc3bf/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index 6f96813..b8c306d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -353,11 +353,14 @@ class Word2Vec extends Serializable with Logging { val syn0Global = Array.fill[Float](vocabSize * vectorSize)((initRandom.nextFloat() - 0.5f) / vectorSize) val syn1Global = new Array[Float](vocabSize * vectorSize) +val totalWordsCounts = numIterations * trainWordsCount + 1 var alpha = learningRate for (k <- 1 to numIterations) { val bcSyn0Global = sc.broadcast(syn0Global) val bcSyn1Global = sc.broadcast(syn1Global) + val numWordsProcessedInPreviousIterations = (k - 1) * trainWordsCount + val partial = newSentences.mapPartitionsWithIndex { case (idx, iter) => val random = new XORShiftRandom(seed ^ ((idx + 1) << 16) ^ ((-k - 1) << 8)) val syn0Modify = new Array[Int](vocabSize) @@ -368,11 +371,12 @@ class Word2Vec extends Serializable with Logging { var wc = wordCount if (wordCount - lastWordCount > 1) { lwc = wordCount - // TODO: discount by iteration? - alpha = -learningRate * (1 - numPartitions * wordCount.toDouble / (trainWordsCount + 1)) + alpha = learningRate * +(1 - (numPartitions * wordCount.toDouble + numWordsProcessedInPreviousIterations) / + totalWordsCounts) if (alpha < learningRate * 0.0001) alpha = learningRate * 0.0001 - logInfo("wordCount = " + wordCount + ", alpha = " + alpha) + logInfo(s"wordCount = ${wordCount + numWordsProcessedInPreviousIterations}, " + +s"alpha = $alpha") } wc += sentence.length var pos = 0 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org