[1/6] nutch git commit: NUTCH-2284 Basic Authentication support for Nutch 2.X REST API.
Repository: nutch Updated Branches: refs/heads/2.x 7fc92a247 -> 699fda4f4 NUTCH-2284 Basic Authentication support for Nutch 2.X REST API. Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/52ffc5a9 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/52ffc5a9 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/52ffc5a9 Branch: refs/heads/2.x Commit: 52ffc5a983f261570fd25f10f2f8fcf70d543c88 Parents: 72a99cf Author: Furkan KAMACI Authored: Sun Jun 19 23:27:15 2016 +0300 Committer: Furkan KAMACI Committed: Sun Jun 19 23:27:55 2016 +0300 -- conf/nutch-default.xml | 26 src/java/org/apache/nutch/api/NutchServer.java | 27 +++-- 2 files changed, 51 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/52ffc5a9/conf/nutch-default.xml -- diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml index 117737b..f5111f7 100644 --- a/conf/nutch-default.xml +++ b/conf/nutch-default.xml @@ -1435,4 +1435,30 @@ + + restapi.auth + false + +Whether to enable HTTP basic authentication for communicating with RESTAPI. +Use the restapi.auth.username and restapi.auth.auth.password properties to configure +your credentials. + + + + + restapi.auth.username + login + +Username for HTTP basic authentication. restapi.auth should be true to use this property. + + + + + restapi.auth.password + secret1 + +Password for HTTP basic authentication. restapi.auth should be true to use this property. + + + http://git-wip-us.apache.org/repos/asf/nutch/blob/52ffc5a9/src/java/org/apache/nutch/api/NutchServer.java -- diff --git a/src/java/org/apache/nutch/api/NutchServer.java b/src/java/org/apache/nutch/api/NutchServer.java index ea316a9..3429beb 100644 --- a/src/java/org/apache/nutch/api/NutchServer.java +++ b/src/java/org/apache/nutch/api/NutchServer.java @@ -45,10 +45,13 @@ import org.apache.nutch.api.resources.JobResource; import org.apache.nutch.api.resources.SeedResource; import org.restlet.Component; import org.restlet.Context; +import org.restlet.data.ChallengeScheme; import org.restlet.data.Protocol; import org.restlet.data.Reference; import org.restlet.ext.jaxrs.JaxRsApplication; import org.restlet.resource.ClientResource; +import org.restlet.security.ChallengeAuthenticator; +import org.restlet.security.MapVerifier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -108,8 +111,28 @@ public class NutchServer extends Application { application.setStatusService(new ErrorStatusService()); childContext.getAttributes().put(NUTCH_SERVER, this); -// Attach the application. -component.getDefaultHost().attach(application); +boolean isSecure = configManager.get(ConfigResource.DEFAULT).getBoolean("restapi.auth", false); + +if (!isSecure) { + // Attach the application. + component.getDefaultHost().attach(application); + return; +} + +// Guard the restlet with BASIC authentication. +ChallengeAuthenticator guard = new ChallengeAuthenticator(null, ChallengeScheme.HTTP_BASIC, "testRealm"); +// Instantiates a Verifier of identifier/secret couples based on a simple Map. +MapVerifier mapVerifier = new MapVerifier(); + +// Load a single static login/secret pair. +String username = configManager.get(ConfigResource.DEFAULT).get("restapi.auth.username", "login"); +String password = configManager.get(ConfigResource.DEFAULT).get("restapi.auth.password", "secret"); + +mapVerifier.getLocalSecrets().put(username, password.toCharArray()); +guard.setVerifier(mapVerifier); +guard.setNext(application); + +component.getDefaultHost().attach(guard); } @Override
[5/6] nutch git commit: NUTCH-2284 nutch-default.xml descriptions for restapi.auth.username and restapi.auth.password are updated.
NUTCH-2284 nutch-default.xml descriptions for restapi.auth.username and restapi.auth.password are updated. Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/8e2552e8 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/8e2552e8 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/8e2552e8 Branch: refs/heads/2.x Commit: 8e2552e88999d19a599844559a9c646b62fb Parents: 4d1e9e3 Author: Furkan KAMACI Authored: Mon Jul 4 14:19:38 2016 +0300 Committer: Furkan KAMACI Committed: Mon Jul 4 14:19:38 2016 +0300 -- conf/nutch-default.xml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/8e2552e8/conf/nutch-default.xml -- diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml index ff22b63..1985dfc 100644 --- a/conf/nutch-default.xml +++ b/conf/nutch-default.xml @@ -1450,14 +1450,16 @@ login Username for HTTP basic authentication. restapi.auth should be true to use this property. +"login" is used for username as default. restapi.auth.password - secret1 + secret Password for HTTP basic authentication. restapi.auth should be true to use this property. +"secret" is used for password as default.
[2/2] nutch git commit: Merge branch 'NUTCH-2288' of https://github.com/kamaci/nutch into 2.x this closes #130
Merge branch 'NUTCH-2288' of https://github.com/kamaci/nutch into 2.x this closes #130 Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/5a1afbaf Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/5a1afbaf Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/5a1afbaf Branch: refs/heads/2.x Commit: 5a1afbaf7aa14c4adf960c6b0b16a8c0fb066bcc Parents: 699fda4 4c14f20 Author: Lewis John McGibbney Authored: Tue Jul 12 17:10:07 2016 -0700 Committer: Lewis John McGibbney Committed: Tue Jul 12 17:10:07 2016 -0700 -- ivy/ivy.xml | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) --
[1/2] nutch git commit: NUTCH-2288 Upgrade Restlet to 2.3.7.
Repository: nutch Updated Branches: refs/heads/2.x 699fda4f4 -> 5a1afbaf7 NUTCH-2288 Upgrade Restlet to 2.3.7. Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/4c14f20b Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/4c14f20b Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/4c14f20b Branch: refs/heads/2.x Commit: 4c14f20bbec554bf790a5c095c0ebebb317e17c6 Parents: 72a99cf Author: Furkan KAMACI Authored: Sun Jun 26 20:49:31 2016 +0300 Committer: Furkan KAMACI Committed: Mon Jul 4 16:04:34 2016 +0300 -- ivy/ivy.xml | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/4c14f20b/ivy/ivy.xml -- diff --git a/ivy/ivy.xml b/ivy/ivy.xml index 5e2f5a3..c909323 100644 --- a/ivy/ivy.xml +++ b/ivy/ivy.xml @@ -76,9 +76,10 @@ - - - + + + +
nutch git commit: NUTCH-2285 Digest Authentication support for Nutch 2.X REST API.
Repository: nutch Updated Branches: refs/heads/2.x 5a1afbaf7 -> 04eb5707e NUTCH-2285 Digest Authentication support for Nutch 2.X REST API. Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/04eb5707 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/04eb5707 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/04eb5707 Branch: refs/heads/2.x Commit: 04eb5707e47fc286bee4625a02d79be15c0d0a3e Parents: 5a1afba Author: Furkan KAMACI Authored: Wed Jul 13 23:34:46 2016 +0300 Committer: Furkan KAMACI Committed: Wed Jul 13 23:34:46 2016 +0300 -- conf/nutch-default.xml | 19 --- src/java/org/apache/nutch/api/NutchServer.java | 59 +--- .../api/security/AuthenticationTypeEnum.java| 26 + 3 files changed, 75 insertions(+), 29 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/04eb5707/conf/nutch-default.xml -- diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml index 1985dfc..a4fede2 100644 --- a/conf/nutch-default.xml +++ b/conf/nutch-default.xml @@ -1437,29 +1437,30 @@ restapi.auth - false + NONE -Whether to enable HTTP basic authentication for communicating with RESTAPI. +Configures authentication type for communicating with RESTAPI. Valid values are BASIC, DIGEST and NONE. +When no authentication type is defined NONE will be used as default which does not provide security. Use the restapi.auth.username and restapi.auth.password properties to configure -your credentials. +your credentials if security is used. restapi.auth.username - login + admin -Username for HTTP basic authentication. restapi.auth should be true to use this property. -"login" is used for username as default. +Username for REST API authentication. restapi.auth property should be set to either BASIC or DIGEST to use this property. +"nutch" is used for username as default. restapi.auth.password - secret + nutch -Password for HTTP basic authentication. restapi.auth should be true to use this property. -"secret" is used for password as default. +Password for REST API authentication. restapi.auth property should be set to either BASIC or DIGEST to use this property. +"nutch" is used for password as default. http://git-wip-us.apache.org/repos/asf/nutch/blob/04eb5707/src/java/org/apache/nutch/api/NutchServer.java -- diff --git a/src/java/org/apache/nutch/api/NutchServer.java b/src/java/org/apache/nutch/api/NutchServer.java index 6af991c..af948cd 100644 --- a/src/java/org/apache/nutch/api/NutchServer.java +++ b/src/java/org/apache/nutch/api/NutchServer.java @@ -43,6 +43,7 @@ import org.apache.nutch.api.resources.ConfigResource; import org.apache.nutch.api.resources.DbResource; import org.apache.nutch.api.resources.JobResource; import org.apache.nutch.api.resources.SeedResource; +import org.apache.nutch.api.security.AuthenticationTypeEnum; import org.restlet.Component; import org.restlet.Context; import org.restlet.data.ChallengeScheme; @@ -51,6 +52,7 @@ import org.restlet.data.Reference; import org.restlet.ext.jaxrs.JaxRsApplication; import org.restlet.resource.ClientResource; import org.restlet.security.ChallengeAuthenticator; +import org.restlet.ext.crypto.DigestAuthenticator; import org.restlet.security.MapVerifier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -90,6 +92,10 @@ public class NutchServer extends Application { * 'INFO' however best attempts should always be made to specify a logging * level.
* {@link org.apache.nutch.api.NutchServer} can be run as secure. restapi.auth property + * should be set to BASIC or DIGEST atnutch-site.xml
to enable HTTP basic authentication + * or digest authentication when communicating with RESTAPI. + * Use restapi.auth.username and restapi.auth.auth.password properties atnutch-site.xml
to configure + * credentials when security is enabled with restapi.auth property. * should be set to true atnutch-site.xml
to enable HTTP basic authentication * for communicating with RESTAPI. * Use the restapi.auth.username and restapi.auth.auth.password properties to configure @@ -116,28 +122,31 @@ public class NutchServer extends Application { application.setStatusService(new ErrorStatusService()); childContext.getAttributes().put(NUTCH_SERVER, this); -boolean isSecure = configManager.get(ConfigResource.DEFAULT).getBoolean("restapi.auth", false); - -if (!isSecure) { - // Attach the application. - component.getDefaultHost().attach(app
nutch git commit: NUTCH-2289 SSL support for Nutch 2.X REST API.
Repository: nutch Updated Branches: refs/heads/2.x 04eb5707e -> c210b9f2c NUTCH-2289 SSL support for Nutch 2.X REST API. Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/c210b9f2 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/c210b9f2 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/c210b9f2 Branch: refs/heads/2.x Commit: c210b9f2cbcd22c5aa7458f85052262bdaa6df83 Parents: 04eb570 Author: Furkan KAMACI Authored: Thu Jul 14 01:20:34 2016 +0300 Committer: Furkan KAMACI Committed: Thu Jul 14 23:03:39 2016 +0300 -- conf/nutch-default.xml | 31 +++- src/java/org/apache/nutch/api/NutchServer.java | 50 +++- .../api/security/AuthenticationTypeEnum.java| 1 + 3 files changed, 67 insertions(+), 15 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/c210b9f2/conf/nutch-default.xml -- diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml index a4fede2..10904a2 100644 --- a/conf/nutch-default.xml +++ b/conf/nutch-default.xml @@ -1439,7 +1439,7 @@ restapi.auth NONE -Configures authentication type for communicating with RESTAPI. Valid values are BASIC, DIGEST and NONE. +Configures authentication type for communicating with RESTAPI. Valid values are BASIC, DIGEST, SSL and NONE. When no authentication type is defined NONE will be used as default which does not provide security. Use the restapi.auth.username and restapi.auth.password properties to configure your credentials if security is used. @@ -1451,7 +1451,7 @@ admin Username for REST API authentication. restapi.auth property should be set to either BASIC or DIGEST to use this property. -"nutch" is used for username as default. +"admin" is used for username as default. @@ -1464,4 +1464,31 @@ + + restapi.auth.ssl.storepath + etc/nutch-ssl.keystore.jks + +Key store path for jks file. restapi.auth property should be set to SSL to use this property. +etc/nutch-ssl.keystore.jks is used for restapi.auth.ssl.storepath as default. + + + + + restapi.auth.ssl.storepass + password + +Key store path for jks file. restapi.auth property should be set to SSL to use this property. +"password" is used for restapi.auth.ssl.storepass as default. + + + + + restapi.auth.ssl.keypass + password + +Key store path for jks file. restapi.auth property should be set to SSL to use this property. +"password" is used for restapi.auth.ssl.keypass as default. + + + http://git-wip-us.apache.org/repos/asf/nutch/blob/c210b9f2/src/java/org/apache/nutch/api/NutchServer.java -- diff --git a/src/java/org/apache/nutch/api/NutchServer.java b/src/java/org/apache/nutch/api/NutchServer.java index af948cd..b5ca6e8 100644 --- a/src/java/org/apache/nutch/api/NutchServer.java +++ b/src/java/org/apache/nutch/api/NutchServer.java @@ -46,6 +46,7 @@ import org.apache.nutch.api.resources.SeedResource; import org.apache.nutch.api.security.AuthenticationTypeEnum; import org.restlet.Component; import org.restlet.Context; +import org.restlet.Server; import org.restlet.data.ChallengeScheme; import org.restlet.data.Protocol; import org.restlet.data.Reference; @@ -54,6 +55,7 @@ import org.restlet.resource.ClientResource; import org.restlet.security.ChallengeAuthenticator; import org.restlet.ext.crypto.DigestAuthenticator; import org.restlet.security.MapVerifier; +import org.restlet.util.Series; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -92,14 +94,13 @@ public class NutchServer extends Application { * 'INFO' however best attempts should always be made to specify a logging * level.
* {@link org.apache.nutch.api.NutchServer} can be run as secure. restapi.auth property - * should be set to BASIC or DIGEST atnutch-site.xml
to enable HTTP basic authentication - * or digest authentication when communicating with RESTAPI. - * Use restapi.auth.username and restapi.auth.auth.password properties atnutch-site.xml
to configure - * credentials when security is enabled with restapi.auth property. - * should be set to true atnutch-site.xml
to enable HTTP basic authentication - * for communicating with RESTAPI. - * Use the restapi.auth.username and restapi.auth.auth.password properties to configure - * your credentials. + * should be set to BASIC, DIGEST or SSL atnutch-site.xml
to enable HTTP basic authentication, + * digest authentication or SSL when communicating with RESTAPI. + * Set restapi.auth.username and restapi.auth.password properties atnut
nutch git commit: NUTCH-2292 Mavenize the build for nutch-core and nutch-plugins
Repository: nutch Updated Branches: refs/heads/NUTCH-2292 0bf453e57 -> 6e0da2f4c NUTCH-2292 Mavenize the build for nutch-core and nutch-plugins Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/6e0da2f4 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/6e0da2f4 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/6e0da2f4 Branch: refs/heads/NUTCH-2292 Commit: 6e0da2f4c85d71c0078f935b1e3b72a91852d045 Parents: 0bf453e Author: Lewis John McGibbney Authored: Sat Jul 16 13:45:05 2016 -0700 Committer: Lewis John McGibbney Committed: Sat Jul 16 13:45:05 2016 -0700 -- .gitignore| 5 - nutch-core/pom.xml| 2 +- nutch-plugins/pom.xml | 2 +- pom.xml | 1 + 4 files changed, 7 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/6e0da2f4/.gitignore -- diff --git a/.gitignore b/.gitignore index 7a70f9d..da7dacb 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,7 @@ logs/ target/ nutch-core/target nutch-plugins/target -nutch-plugins/*/target \ No newline at end of file +nutch-plugins/*/target +*.project +*.settings +*.classpath http://git-wip-us.apache.org/repos/asf/nutch/blob/6e0da2f4/nutch-core/pom.xml -- diff --git a/nutch-core/pom.xml b/nutch-core/pom.xml index 62e2e58..ec97b89 100644 --- a/nutch-core/pom.xml +++ b/nutch-core/pom.xml @@ -489,7 +489,7 @@ ${project.build.directory} -${build.finalName}.jar + ${project.build.finalName}.jar ${project.basedir} http://git-wip-us.apache.org/repos/asf/nutch/blob/6e0da2f4/nutch-plugins/pom.xml -- diff --git a/nutch-plugins/pom.xml b/nutch-plugins/pom.xml index e07f487..68b21e6 100644 --- a/nutch-plugins/pom.xml +++ b/nutch-plugins/pom.xml @@ -139,7 +139,7 @@ ${project.build.directory} -${build.finalName}.jar + ${project.build.finalName}.jar ${project.basedir} http://git-wip-us.apache.org/repos/asf/nutch/blob/6e0da2f4/pom.xml -- diff --git a/pom.xml b/pom.xml index a3b9271..57e77d1 100644 --- a/pom.xml +++ b/pom.xml @@ -25,6 +25,7 @@ org.apache.maven.plugins maven-compiler-plugin +3.5.1 1.7 1.7
nutch git commit: NUTCH-2292 Mavenize the build for nutch-core and nutch-plugins
Repository: nutch Updated Branches: refs/heads/NUTCH-2292 6e0da2f4c -> 1ba35649a NUTCH-2292 Mavenize the build for nutch-core and nutch-plugins Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/1ba35649 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/1ba35649 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/1ba35649 Branch: refs/heads/NUTCH-2292 Commit: 1ba35649aeb2f298eafccfab812483a2b31dc9b6 Parents: 6e0da2f Author: Lewis John McGibbney Authored: Sat Jul 16 14:09:39 2016 -0700 Committer: Lewis John McGibbney Committed: Sat Jul 16 14:09:39 2016 -0700 -- nutch-plugins/index-replace/pom.xml | 4 +-- .../protocol-interactiveselenium/pom.xml| 2 +- pom.xml | 27 +--- 3 files changed, 27 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/1ba35649/nutch-plugins/index-replace/pom.xml -- diff --git a/nutch-plugins/index-replace/pom.xml b/nutch-plugins/index-replace/pom.xml index d39851d..08fdba7 100644 --- a/nutch-plugins/index-replace/pom.xml +++ b/nutch-plugins/index-replace/pom.xml @@ -38,12 +38,12 @@ org.apache.nutch index-basic -${parent.version} +${project.parent.version} org.apache.nutch index-metadata -${parent.version} +${project.parent.version} http://git-wip-us.apache.org/repos/asf/nutch/blob/1ba35649/nutch-plugins/protocol-interactiveselenium/pom.xml -- diff --git a/nutch-plugins/protocol-interactiveselenium/pom.xml b/nutch-plugins/protocol-interactiveselenium/pom.xml index ced9cdc..4e2bc63 100644 --- a/nutch-plugins/protocol-interactiveselenium/pom.xml +++ b/nutch-plugins/protocol-interactiveselenium/pom.xml @@ -43,7 +43,7 @@ org.apache.nutch lib-selenium -${project.parent.version} +${project.project.parent.version} http://git-wip-us.apache.org/repos/asf/nutch/blob/1ba35649/pom.xml -- diff --git a/pom.xml b/pom.xml index 57e77d1..ec35bca 100644 --- a/pom.xml +++ b/pom.xml @@ -1,13 +1,34 @@ + http://maven.apache.org/POM/4.0.0"; xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd";> - -nutch-parent +4.0.0 + +org.apache +apache +10 + org.apache.nutch +nutch-parent 1.13-SNAPSHOT -4.0.0 pom +Apache Nutch ${project.basedir}
nutch git commit: NUTCH-2292 Mavenize the build for nutch-core and nutch-plugins
Repository: nutch Updated Branches: refs/heads/NUTCH-2292 1ba35649a -> 82b03bc15 NUTCH-2292 Mavenize the build for nutch-core and nutch-plugins Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/82b03bc1 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/82b03bc1 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/82b03bc1 Branch: refs/heads/NUTCH-2292 Commit: 82b03bc1575374cb8981ee8f5205cf73077dd3d5 Parents: 1ba3564 Author: Lewis John McGibbney Authored: Sat Jul 16 14:14:58 2016 -0700 Committer: Lewis John McGibbney Committed: Sat Jul 16 14:14:58 2016 -0700 -- nutch-plugins/lib-selenium/pom.xml | 12 +--- nutch-plugins/protocol-interactiveselenium/pom.xml | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/82b03bc1/nutch-plugins/lib-selenium/pom.xml -- diff --git a/nutch-plugins/lib-selenium/pom.xml b/nutch-plugins/lib-selenium/pom.xml index fed912d..cae99cd 100644 --- a/nutch-plugins/lib-selenium/pom.xml +++ b/nutch-plugins/lib-selenium/pom.xml @@ -36,13 +36,19 @@ -org.seleniumhq.selenium selenium-java 2.48.2 +org.seleniumhq.selenium +selenium-java +2.48.2 -com.opera operadriver 1.5 +com.opera +operadriver +1.5 -com.codeborne phantomjsdriver 1.2.1 +com.codeborne +phantomjsdriver +1.2.1 http://git-wip-us.apache.org/repos/asf/nutch/blob/82b03bc1/nutch-plugins/protocol-interactiveselenium/pom.xml -- diff --git a/nutch-plugins/protocol-interactiveselenium/pom.xml b/nutch-plugins/protocol-interactiveselenium/pom.xml index 4e2bc63..ced9cdc 100644 --- a/nutch-plugins/protocol-interactiveselenium/pom.xml +++ b/nutch-plugins/protocol-interactiveselenium/pom.xml @@ -43,7 +43,7 @@ org.apache.nutch lib-selenium -${project.project.parent.version} +${project.parent.version}
[3/3] nutch git commit: Merge branch 'master' of https://github.com/sjwoodard/nutch this closes #129
Merge branch 'master' of https://github.com/sjwoodard/nutch this closes #129 Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/9dd251d0 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/9dd251d0 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/9dd251d0 Branch: refs/heads/master Commit: 9dd251d030c5a336901906bfb1fa26830c49b6fe Parents: 5943d11 6c1537b Author: Lewis John McGibbney Authored: Sat Jul 16 14:23:42 2016 -0700 Committer: Lewis John McGibbney Committed: Sat Jul 16 14:23:42 2016 -0700 -- .../java/org/apache/nutch/indexwriter/solr/SolrUtils.java| 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) --
[1/3] nutch git commit: NUTCH-2267 - Solr and Hadoop JAR mismatch
Repository: nutch Updated Branches: refs/heads/master 5943d11ad -> 9dd251d03 NUTCH-2267 - Solr and Hadoop JAR mismatch Explicitly pass in an instance of SystemDefaultHttpClient to CloudSolrClient, otherwise SolrJ will use a default implementation of CloseableHttpClient, which is not present in the HttpClient and HttpCore JARs in Hadoop < 2.8 (see https://issues.apache.org/jira/browse/SOLR-7948 and https://issues.apache.org/jira/browse/HADOOP-12767). Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/f64686bb Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/f64686bb Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/f64686bb Branch: refs/heads/master Commit: f64686bb06cec2e31c9560d7e7e7f050311d62f1 Parents: d29be63 Author: Steven Authored: Mon Jun 27 09:30:52 2016 -0400 Committer: GitHub Committed: Mon Jun 27 09:30:52 2016 -0400 -- .../src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java| 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/f64686bb/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java -- diff --git a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java index eec0080..85a9c4c 100644 --- a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java +++ b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java @@ -22,6 +22,7 @@ import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.mapred.JobConf; +import org.apache.http.impl.client.SystemDefaultHttpClient; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.impl.CloudSolrClient; @@ -60,7 +61,8 @@ public class SolrUtils { } public static CloudSolrClient getCloudSolrClient(String url) throws MalformedURLException { -CloudSolrClient sc = new CloudSolrClient(url.replace('|', ',')); +SystemDefaultHttpClient httpClient = new SystemDefaultHttpClient(); +CloudSolrClient sc = new CloudSolrClient(url.replace('|', ','), httpClient); sc.setParallelUpdates(true); sc.connect(); return sc;
[2/3] nutch git commit: Use static HttpClient for all SOLR connections
Use static HttpClient for all SOLR connections Changed HttpClient to static based on http://hc.apache.org/httpclient-3.x/performance.html and added connection all SolrJ connections. Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/6c1537b1 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/6c1537b1 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/6c1537b1 Branch: refs/heads/master Commit: 6c1537b16ba5f3e0fc28fff5a60595e01437900d Parents: f64686b Author: Steven Authored: Thu Jun 30 07:48:31 2016 -0400 Committer: GitHub Committed: Thu Jun 30 07:48:31 2016 -0400 -- .../java/org/apache/nutch/indexwriter/solr/SolrUtils.java| 8 1 file changed, 4 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/6c1537b1/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java -- diff --git a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java index 85a9c4c..d70bc62 100644 --- a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java +++ b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java @@ -32,6 +32,7 @@ import java.net.MalformedURLException; public class SolrUtils { public static Logger LOG = LoggerFactory.getLogger(SolrUtils.class); + private static HttpClient HTTP_CLIENT = new SystemDefaultHttpClient(); /** * @@ -52,7 +53,7 @@ public class SolrUtils { } } else { for (int i = 0; i < urls.length; i++) { -SolrClient sc = new HttpSolrClient(urls[i]); +SolrClient sc = new HttpSolrClient(urls[i], HTTP_CLIENT); solrClients.add(sc); } } @@ -61,15 +62,14 @@ public class SolrUtils { } public static CloudSolrClient getCloudSolrClient(String url) throws MalformedURLException { -SystemDefaultHttpClient httpClient = new SystemDefaultHttpClient(); -CloudSolrClient sc = new CloudSolrClient(url.replace('|', ','), httpClient); +CloudSolrClient sc = new CloudSolrClient(url.replace('|', ','), HTTP_CLIENT); sc.setParallelUpdates(true); sc.connect(); return sc; } public static SolrClient getHttpSolrClient(String url) throws MalformedURLException { -SolrClient sc =new HttpSolrClient(url); +SolrClient sc =new HttpSolrClient(url, HTTP_CLIENT); return sc; }
nutch git commit: NUTCH-2287 Indexer-elastic plugin should use Elasticsearch BulkProcessor and BackoffPolicy
Repository: nutch Updated Branches: refs/heads/master 9dd251d03 -> 80afa3134 NUTCH-2287 Indexer-elastic plugin should use Elasticsearch BulkProcessor and BackoffPolicy Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/80afa313 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/80afa313 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/80afa313 Branch: refs/heads/master Commit: 80afa3134d7e8de07fcfbb03dba1c51fb7c7dce2 Parents: 9dd251d Author: Joseph Naegele Authored: Thu Jun 30 13:42:34 2016 -0400 Committer: Lewis John McGibbney Committed: Sat Jul 16 14:36:35 2016 -0700 -- build.xml | 1 + conf/nutch-default.xml | 25 +- src/plugin/build.xml| 1 + src/plugin/indexer-elastic/build.xml| 13 + src/plugin/indexer-elastic/plugin.xml | 5 +- .../indexwriter/elastic/ElasticConstants.java | 5 +- .../indexwriter/elastic/ElasticIndexWriter.java | 236 +-- .../src/test/conf/nutch-site-test.xml | 57 + .../elastic/TestElasticIndexWriter.java | 221 + 9 files changed, 431 insertions(+), 133 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/80afa313/build.xml -- diff --git a/build.xml b/build.xml index a1c41ed..0ee60a1 100644 --- a/build.xml +++ b/build.xml @@ -1032,6 +1032,7 @@ + http://git-wip-us.apache.org/repos/asf/nutch/blob/80afa313/conf/nutch-default.xml -- diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml index 324de5c..67326ee 100644 --- a/conf/nutch-default.xml +++ b/conf/nutch-default.xml @@ -1812,8 +1812,8 @@ visit https://wiki.apache.org/nutch/SimilarityScoringFilter--> elastic.host - The hostname to send documents to using TransportClient. Either host - and port must be defined or cluster. + Comma-separated list of hostnames to send documents to using + TransportClient. Either host and port must be defined or cluster. @@ -1847,6 +1847,27 @@ visit https://wiki.apache.org/nutch/SimilarityScoringFilter--> Maximum size of the bulk in bytes. + + elastic.exponential.backoff.millis + 100 + Initial delay for the BulkProcessor's exponential backoff policy. + + + + + elastic.exponential.backoff.retries + 10 + Number of times the BulkProcessor's exponential backoff policy + should retry bulk operations. + + + + elastic.bulk.close.timeout + 600 + Number of seconds allowed for the BulkProcessor to complete its + last operation. + + http://git-wip-us.apache.org/repos/asf/nutch/blob/80afa313/src/plugin/build.xml -- diff --git a/src/plugin/build.xml b/src/plugin/build.xml index 75ae2e7..20ef870 100755 --- a/src/plugin/build.xml +++ b/src/plugin/build.xml @@ -106,6 +106,7 @@ + http://git-wip-us.apache.org/repos/asf/nutch/blob/80afa313/src/plugin/indexer-elastic/build.xml -- diff --git a/src/plugin/indexer-elastic/build.xml b/src/plugin/indexer-elastic/build.xml index 38955ff..6955f61 100644 --- a/src/plugin/indexer-elastic/build.xml +++ b/src/plugin/indexer-elastic/build.xml @@ -19,4 +19,17 @@ + + + + + + + + + + + + + http://git-wip-us.apache.org/repos/asf/nutch/blob/80afa313/src/plugin/indexer-elastic/plugin.xml -- diff --git a/src/plugin/indexer-elastic/plugin.xml b/src/plugin/indexer-elastic/plugin.xml index d99a665..401e342 100644 --- a/src/plugin/indexer-elastic/plugin.xml +++ b/src/plugin/indexer-elastic/plugin.xml @@ -6,9 +6,9 @@ The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - + http://www.apache.org/licenses/LICENSE-2.0 - + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -28,7 +28,6 @@ - http://git-wip-us.apache.org/repos/asf/nutch/blob/80afa313/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticConstants.java -- diff --git a/src/plugin/indexer-elastic/src/java/org/apache/nutch/
[2/3] nutch git commit: Merge branch 'NUTCH-2287' of https://github.com/naegelejd/nutch this closes #131
Merge branch 'NUTCH-2287' of https://github.com/naegelejd/nutch this closes #131 Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/9ce097b8 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/9ce097b8 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/9ce097b8 Branch: refs/heads/master Commit: 9ce097b8cd63e476908ce64a7af6020cf12ed216 Parents: 80afa31 0fff24a Author: Lewis John McGibbney Authored: Sat Jul 16 14:40:38 2016 -0700 Committer: Lewis John McGibbney Committed: Sat Jul 16 14:40:38 2016 -0700 -- --
[3/3] nutch git commit: Revert botched commit of NUTCH-2267
Revert botched commit of NUTCH-2267 Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/fda3e148 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/fda3e148 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/fda3e148 Branch: refs/heads/master Commit: fda3e148bcf01d8b0020d4550127fa30aea8aab6 Parents: 9ce097b Author: Lewis John McGibbney Authored: Sat Jul 16 14:44:27 2016 -0700 Committer: Lewis John McGibbney Committed: Sat Jul 16 14:44:27 2016 -0700 -- .../java/org/apache/nutch/indexwriter/solr/SolrUtils.java| 8 +++- 1 file changed, 3 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/fda3e148/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java -- diff --git a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java index d70bc62..eec0080 100644 --- a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java +++ b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java @@ -22,7 +22,6 @@ import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.mapred.JobConf; -import org.apache.http.impl.client.SystemDefaultHttpClient; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.impl.CloudSolrClient; @@ -32,7 +31,6 @@ import java.net.MalformedURLException; public class SolrUtils { public static Logger LOG = LoggerFactory.getLogger(SolrUtils.class); - private static HttpClient HTTP_CLIENT = new SystemDefaultHttpClient(); /** * @@ -53,7 +51,7 @@ public class SolrUtils { } } else { for (int i = 0; i < urls.length; i++) { -SolrClient sc = new HttpSolrClient(urls[i], HTTP_CLIENT); +SolrClient sc = new HttpSolrClient(urls[i]); solrClients.add(sc); } } @@ -62,14 +60,14 @@ public class SolrUtils { } public static CloudSolrClient getCloudSolrClient(String url) throws MalformedURLException { -CloudSolrClient sc = new CloudSolrClient(url.replace('|', ','), HTTP_CLIENT); +CloudSolrClient sc = new CloudSolrClient(url.replace('|', ',')); sc.setParallelUpdates(true); sc.connect(); return sc; } public static SolrClient getHttpSolrClient(String url) throws MalformedURLException { -SolrClient sc =new HttpSolrClient(url, HTTP_CLIENT); +SolrClient sc =new HttpSolrClient(url); return sc; }
[1/3] nutch git commit: NUTCH-2287 Indexer-elastic plugin should use Elasticsearch BulkProcessor and BackoffPolicy
Repository: nutch Updated Branches: refs/heads/master 80afa3134 -> fda3e148b NUTCH-2287 Indexer-elastic plugin should use Elasticsearch BulkProcessor and BackoffPolicy Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/0fff24ac Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/0fff24ac Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/0fff24ac Branch: refs/heads/master Commit: 0fff24acff99df1ce9d3f9c9b1a2c674f9c8e325 Parents: 5943d11 Author: Joseph Naegele Authored: Thu Jun 30 13:42:34 2016 -0400 Committer: Joseph Naegele Committed: Tue Jul 5 18:37:15 2016 -0400 -- build.xml | 1 + conf/nutch-default.xml | 25 +- src/plugin/build.xml| 1 + src/plugin/indexer-elastic/build.xml| 13 + src/plugin/indexer-elastic/plugin.xml | 5 +- .../indexwriter/elastic/ElasticConstants.java | 5 +- .../indexwriter/elastic/ElasticIndexWriter.java | 236 +-- .../src/test/conf/nutch-site-test.xml | 57 + .../elastic/TestElasticIndexWriter.java | 221 + 9 files changed, 431 insertions(+), 133 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/0fff24ac/build.xml -- diff --git a/build.xml b/build.xml index a1c41ed..0ee60a1 100644 --- a/build.xml +++ b/build.xml @@ -1032,6 +1032,7 @@ + http://git-wip-us.apache.org/repos/asf/nutch/blob/0fff24ac/conf/nutch-default.xml -- diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml index 324de5c..67326ee 100644 --- a/conf/nutch-default.xml +++ b/conf/nutch-default.xml @@ -1812,8 +1812,8 @@ visit https://wiki.apache.org/nutch/SimilarityScoringFilter--> elastic.host - The hostname to send documents to using TransportClient. Either host - and port must be defined or cluster. + Comma-separated list of hostnames to send documents to using + TransportClient. Either host and port must be defined or cluster. @@ -1847,6 +1847,27 @@ visit https://wiki.apache.org/nutch/SimilarityScoringFilter--> Maximum size of the bulk in bytes. + + elastic.exponential.backoff.millis + 100 + Initial delay for the BulkProcessor's exponential backoff policy. + + + + + elastic.exponential.backoff.retries + 10 + Number of times the BulkProcessor's exponential backoff policy + should retry bulk operations. + + + + elastic.bulk.close.timeout + 600 + Number of seconds allowed for the BulkProcessor to complete its + last operation. + + http://git-wip-us.apache.org/repos/asf/nutch/blob/0fff24ac/src/plugin/build.xml -- diff --git a/src/plugin/build.xml b/src/plugin/build.xml index 75ae2e7..20ef870 100755 --- a/src/plugin/build.xml +++ b/src/plugin/build.xml @@ -106,6 +106,7 @@ + http://git-wip-us.apache.org/repos/asf/nutch/blob/0fff24ac/src/plugin/indexer-elastic/build.xml -- diff --git a/src/plugin/indexer-elastic/build.xml b/src/plugin/indexer-elastic/build.xml index 38955ff..6955f61 100644 --- a/src/plugin/indexer-elastic/build.xml +++ b/src/plugin/indexer-elastic/build.xml @@ -19,4 +19,17 @@ + + + + + + + + + + + + + http://git-wip-us.apache.org/repos/asf/nutch/blob/0fff24ac/src/plugin/indexer-elastic/plugin.xml -- diff --git a/src/plugin/indexer-elastic/plugin.xml b/src/plugin/indexer-elastic/plugin.xml index d99a665..401e342 100644 --- a/src/plugin/indexer-elastic/plugin.xml +++ b/src/plugin/indexer-elastic/plugin.xml @@ -6,9 +6,9 @@ The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - + http://www.apache.org/licenses/LICENSE-2.0 - + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -28,7 +28,6 @@ - http://git-wip-us.apache.org/repos/asf/nutch/blob/0fff24ac/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticConstants.java -- diff --git a/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwr
[3/3] nutch git commit: Merge branch 'NUTCH-2280' of https://github.com/stevegy/nutch this closes #134
Merge branch 'NUTCH-2280' of https://github.com/stevegy/nutch this closes #134 Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/9f32fe84 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/9f32fe84 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/9f32fe84 Branch: refs/heads/master Commit: 9f32fe84a0a2ec1fc3761cb6b0c277584b0ed484 Parents: fda3e14 753cad0 Author: Lewis John McGibbney Authored: Sat Jul 23 23:13:33 2016 -0700 Committer: Lewis John McGibbney Committed: Sat Jul 23 23:13:33 2016 -0700 -- conf/httpclient-auth.xml.template | 6 ++ .../apache/nutch/protocol/httpclient/Http.java | 79 .../httpclient/HttpFormAuthConfigurer.java | 21 +- .../httpclient/HttpFormAuthentication.java | 62 +++ 4 files changed, 121 insertions(+), 47 deletions(-) --
[1/3] nutch git commit: fix the cookie policy issue when the form authentication receives session cookie in a non-standard format - NUTCH-2280
Repository: nutch Updated Branches: refs/heads/master fda3e148b -> 9f32fe84a fix the cookie policy issue when the form authentication receives session cookie in a non-standard format - NUTCH-2280 Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/993e997e Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/993e997e Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/993e997e Branch: refs/heads/master Commit: 993e997e2d5795c0623cdf5614d02c7a8ce405d5 Parents: 5943d11 Author: Steve Yao Authored: Tue Jul 12 19:41:10 2016 +0800 Committer: Steve Yao Committed: Tue Jul 12 19:41:10 2016 +0800 -- .../apache/nutch/protocol/httpclient/Http.java | 79 .../httpclient/HttpFormAuthConfigurer.java | 21 +- .../httpclient/HttpFormAuthentication.java | 28 +++ 3 files changed, 95 insertions(+), 33 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/993e997e/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java -- diff --git a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java index 75506ce..9b91180 100644 --- a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java +++ b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java @@ -69,16 +69,16 @@ import org.apache.nutch.util.NutchConfiguration; * session. * * - * Documentation can be found on the Nutch https://wiki.apache.org/nutch/HttpAuthenticationSchemes"; - * >HttpAuthenticationSchemes wiki page. + * Documentation can be found on the Nutch + * https://wiki.apache.org/nutch/HttpAuthenticationSchemes"; > + * HttpAuthenticationSchemes wiki page. * * - * The original description of the motivation to support https://wiki.apache.org/nutch/HttpPostAuthentication"; - * >HttpPostAuthentication is also included on the Nutch wiki. Additionally - * HttpPostAuthentication development is documented at the https://issues.apache.org/jira/browse/NUTCH-827";>NUTCH-827 Jira + * The original description of the motivation to support + * https://wiki.apache.org/nutch/HttpPostAuthentication"; > + * HttpPostAuthentication is also included on the Nutch wiki. Additionally + * HttpPostAuthentication development is documented at the + * https://issues.apache.org/jira/browse/NUTCH-827";>NUTCH-827 Jira * issue. * * @author Susam Pal @@ -146,6 +146,7 @@ public class Http extends HttpBase { setCredentials(); } catch (Exception ex) { if (LOG.isErrorEnabled()) { +LOG.error("Http ", ex); LOG.error("Could not read " + authFile + " : " + ex.getMessage()); } } @@ -202,15 +203,15 @@ public class Http extends HttpBase { // NUTCH-1836: Modification to increase the number of available connections // for multi-threaded crawls. // -params.setMaxTotalConnections(conf.getInt( -"mapred.tasktracker.map.tasks.maximum", 5) -* conf.getInt("fetcher.threads.fetch", maxThreadsTotal)); +params.setMaxTotalConnections( +conf.getInt("mapred.tasktracker.map.tasks.maximum", 5) +* conf.getInt("fetcher.threads.fetch", maxThreadsTotal)); // Also set max connections per host to maxThreadsTotal since all threads // might be used to fetch from the same host - otherwise timeout errors can // occur -params.setDefaultMaxConnectionsPerHost(conf.getInt( -"fetcher.threads.fetch", maxThreadsTotal)); +params.setDefaultMaxConnectionsPerHost( +conf.getInt("fetcher.threads.fetch", maxThreadsTotal)); // executeMethod(HttpMethod) seems to ignore the connection timeout on the // connection manager. @@ -226,10 +227,8 @@ public class Http extends HttpBase { // prefer UTF-8 headers.add(new Header("Accept-Charset", "utf-8,ISO-8859-1;q=0.7,*;q=0.7")); // prefer understandable formats -headers -.add(new Header( -"Accept", - "text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5")); +headers.add(new Header("Accept", + "text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5")); // accept gzipped content headers.add(new Header("Accept-Encoding", "x-gzip, gzip, deflate")); hostConf.getParams().setParameter("http.default-headers", headers); @@ -266,7 +265,6 @@ public class Http extends HttpBase { */ private static synchronized void setCredentials()
[2/3] nutch git commit: Format the HttpFormAuthentication.java with eclipse format and add javadoc. Add the httpclient-auth.xml.template for cookie policy config example.
Format the HttpFormAuthentication.java with eclipse format and add javadoc. Add the httpclient-auth.xml.template for cookie policy config example. Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/753cad0b Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/753cad0b Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/753cad0b Branch: refs/heads/master Commit: 753cad0bd66ab525eb618d7f0e947eec034e207d Parents: 993e997 Author: Steve Yao Authored: Wed Jul 13 12:21:26 2016 +0800 Committer: Steve Yao Committed: Wed Jul 13 12:21:26 2016 +0800 -- conf/httpclient-auth.xml.template | 6 ++ .../httpclient/HttpFormAuthentication.java | 62 +++- 2 files changed, 40 insertions(+), 28 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/753cad0b/conf/httpclient-auth.xml.template -- diff --git a/conf/httpclient-auth.xml.template b/conf/httpclient-auth.xml.template index ce5ed7e..9d23093 100644 --- a/conf/httpclient-auth.xml.template +++ b/conf/httpclient-auth.xml.template @@ -82,6 +82,9 @@ + + BROWSER_COMPATIBILITY + it is critical that the following fields are substituted: @@ -98,6 +101,9 @@ the field and password respectively * - form element attributes for which we wish to skip fields + * value from is a constant value symbol from +org.apache.commons.httpclient.cookie.CookiePolicy, like BROWSER_COMPATIBILITY, +DEFAULT, RFC_2109, etc. More information on HTTP POST can be located at https://wiki.apache.org/nutch/HttpPostAuthentication http://git-wip-us.apache.org/repos/asf/nutch/blob/753cad0b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpFormAuthentication.java -- diff --git a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpFormAuthentication.java b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpFormAuthentication.java index a6d4aa4..2f36538 100644 --- a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpFormAuthentication.java +++ b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpFormAuthentication.java @@ -51,8 +51,7 @@ public class HttpFormAuthentication { static { defaultLoginHeaders.put("User-Agent", "Mozilla/5.0"); -defaultLoginHeaders -.put("Accept", +defaultLoginHeaders.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); defaultLoginHeaders.put("Accept-Language", "en-US,en;q=0.5"); defaultLoginHeaders.put("Connection", "keep-alive"); @@ -79,15 +78,12 @@ public class HttpFormAuthentication { Set removedFormFields) { this.authConfigurer.setLoginUrl(loginUrl); this.authConfigurer.setLoginFormId(loginForm); -this.authConfigurer -.setLoginPostData(loginPostData == null ? new HashMap() -: loginPostData); -this.authConfigurer -.setAdditionalPostHeaders(additionalPostHeaders == null ? new HashMap() -: additionalPostHeaders); -this.authConfigurer -.setRemovedFormFields(removedFormFields == null ? new HashSet() -: removedFormFields); +this.authConfigurer.setLoginPostData( +loginPostData == null ? new HashMap() : loginPostData); +this.authConfigurer.setAdditionalPostHeaders(additionalPostHeaders == null +? new HashMap() : additionalPostHeaders); +this.authConfigurer.setRemovedFormFields( +removedFormFields == null ? new HashSet() : removedFormFields); this.client = new HttpClient(); } @@ -118,11 +114,11 @@ public class HttpFormAuthentication { // Entity enclosing requests cannot be redirected without user // intervention setLoginHeader(post); - + // NUTCH-2280 LOGGER.debug("FormAuth: set cookie policy"); this.setCookieParams(authConfigurer, post.getParams()); - + post.addParameters(params.toArray(new NameValuePair[0])); int rspCode = client.executeMethod(post); if (LOGGER.isDebugEnabled()) { @@ -143,25 +139,34 @@ public class HttpFormAuthentication { } } } - + /** + * NUTCH-2280 Set the cookie policy value from httpclient-auth.xml for the + * Post httpClient action. + * + * @param fromConfigurer + * - the httpclient-auth.xml values + * + * @param params + * - the HttpMethodParams from the current httpclient instance + * * @throws NoSuchFieldException * @throws SecurityException * @throws IllegalArgumentException * @throws IllegalAccessExcept
nutch git commit: NUTCH-2294 Authorization Support for REST API
Repository: nutch Updated Branches: refs/heads/2.x c210b9f2c -> b7f3fce42 NUTCH-2294 Authorization Support for REST API Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/b7f3fce4 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/b7f3fce4 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/b7f3fce4 Branch: refs/heads/2.x Commit: b7f3fce42acdba00ca53e3be98acb78fc37bd168 Parents: c210b9f Author: Furkan KAMACI Authored: Fri Aug 19 23:48:58 2016 +0300 Committer: Furkan KAMACI Committed: Sat Aug 20 03:58:44 2016 +0300 -- conf/nutch-default.xml | 18 +--- ivy/ivy.xml | 8 +- src/java/org/apache/nutch/api/NutchServer.java | 28 ++--- .../nutch/api/resources/AdminResource.java | 16 ++- .../nutch/api/resources/ConfigResource.java | 9 ++ .../apache/nutch/api/resources/DbResource.java | 7 ++ .../apache/nutch/api/resources/JobResource.java | 7 ++ .../nutch/api/resources/SeedResource.java | 12 ++- .../api/security/AuthorizationRoleEnum.java | 38 +++ .../apache/nutch/api/security/SecurityUtil.java | 107 +++ 10 files changed, 214 insertions(+), 36 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/b7f3fce4/conf/nutch-default.xml -- diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml index 10904a2..f1a16fc 100644 --- a/conf/nutch-default.xml +++ b/conf/nutch-default.xml @@ -1447,20 +1447,12 @@ - restapi.auth.username - admin + restapi.auth.users + admin|admin|admin,user|user|user -Username for REST API authentication. restapi.auth property should be set to either BASIC or DIGEST to use this property. -"admin" is used for username as default. - - - - - restapi.auth.password - nutch - -Password for REST API authentication. restapi.auth property should be set to either BASIC or DIGEST to use this property. -"nutch" is used for password as default. +Username, password and role combination for REST API authentication/authorization. restapi.auth property should be set to either BASIC or DIGEST to use this property. +Username, password and role should be delimited by pipe character (|) Every user should be separated with comma character (,). i.e. admin|admin|admin,user|user|user. +Default is admin|admin|admin,user|user|user http://git-wip-us.apache.org/repos/asf/nutch/blob/b7f3fce4/ivy/ivy.xml -- diff --git a/ivy/ivy.xml b/ivy/ivy.xml index c909323..db42162 100644 --- a/ivy/ivy.xml +++ b/ivy/ivy.xml @@ -76,10 +76,10 @@ - - - - + + + + http://git-wip-us.apache.org/repos/asf/nutch/blob/b7f3fce4/src/java/org/apache/nutch/api/NutchServer.java -- diff --git a/src/java/org/apache/nutch/api/NutchServer.java b/src/java/org/apache/nutch/api/NutchServer.java index b5ca6e8..3bdfc6c 100644 --- a/src/java/org/apache/nutch/api/NutchServer.java +++ b/src/java/org/apache/nutch/api/NutchServer.java @@ -44,6 +44,7 @@ import org.apache.nutch.api.resources.DbResource; import org.apache.nutch.api.resources.JobResource; import org.apache.nutch.api.resources.SeedResource; import org.apache.nutch.api.security.AuthenticationTypeEnum; +import org.apache.nutch.api.security.SecurityUtil; import org.restlet.Component; import org.restlet.Context; import org.restlet.Server; @@ -52,9 +53,10 @@ import org.restlet.data.Protocol; import org.restlet.data.Reference; import org.restlet.ext.jaxrs.JaxRsApplication; import org.restlet.resource.ClientResource; -import org.restlet.security.ChallengeAuthenticator; import org.restlet.ext.crypto.DigestAuthenticator; -import org.restlet.security.MapVerifier; +import org.restlet.security.ChallengeAuthenticator; +import org.restlet.security.LocalVerifier; +import org.restlet.security.MemoryRealm; import org.restlet.util.Series; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -147,6 +149,7 @@ public class NutchServer extends Application { application.add(this); application.setStatusService(new ErrorStatusService()); childContext.getAttributes().put(NUTCH_SERVER, this); +application.setRoles(SecurityUtil.getRoles(application)); switch (authenticationType) { case NONE: @@ -155,19 +158,27 @@ public class NutchServer extends Application { break; case BASIC: ChallengeAuthenticator challengeGuard = new ChallengeAuthenticator(null, ChallengeScheme.HTTP_BASIC, "Nutch REST API Realm"); -challengeGuard.setVerifier(retrieveServerCredentials()); +/
[1/2] nutch git commit: NUTCH-2302 RAMConfManager Could Be Constructed With Custom Configuration
Repository: nutch Updated Branches: refs/heads/2.x 5c3a38128 -> 22683a1df NUTCH-2302 RAMConfManager Could Be Constructed With Custom Configuration Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/fd722c89 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/fd722c89 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/fd722c89 Branch: refs/heads/2.x Commit: fd722c896468fe047758891d75a58259c88289d8 Parents: b7f3fce Author: Furkan KAMACI Authored: Sat Aug 20 16:08:47 2016 +0300 Committer: Furkan KAMACI Committed: Sun Aug 21 00:40:45 2016 +0300 -- .../apache/nutch/api/impl/RAMConfManager.java | 65 1 file changed, 65 insertions(+) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/fd722c89/src/java/org/apache/nutch/api/impl/RAMConfManager.java -- diff --git a/src/java/org/apache/nutch/api/impl/RAMConfManager.java b/src/java/org/apache/nutch/api/impl/RAMConfManager.java index 2afd658..13c05fd 100644 --- a/src/java/org/apache/nutch/api/impl/RAMConfManager.java +++ b/src/java/org/apache/nutch/api/impl/RAMConfManager.java @@ -33,19 +33,44 @@ import org.apache.nutch.util.NutchConfiguration; import com.google.common.collect.Maps; +/** + * Configuration manager which holds a map of {@link Configuration} type configurations and ids. + */ public class RAMConfManager implements ConfManager { private Map configurations = Maps.newConcurrentMap(); private AtomicInteger newConfigId = new AtomicInteger(); + /** + * Public constructor which creates a default configuration with id of {@link ConfigResource#DEFAULT}. + */ public RAMConfManager() { configurations.put(ConfigResource.DEFAULT, NutchConfiguration.create()); } + /** + * Public constructor which accepts a configuration id and {@link Configuration} type configuration. + */ + public RAMConfManager(String confId, Configuration configuration) { +configurations.put(confId, configuration); + } + + /** + * Lists configuration keys. + * + * @return Set of configuration keys + */ public Set list() { return configurations.keySet(); } + /** + * Returns configuration map for give configuration id. + * + * @param confId Configuration id. + * @return Configuration for given configuration id. + * {@link ConfigResource#DEFAULT} is used if given configuration id is null. + */ public Configuration get(String confId) { if (confId == null) { return configurations.get(ConfigResource.DEFAULT); @@ -53,6 +78,13 @@ public class RAMConfManager implements ConfManager { return configurations.get(confId); } + /** + * Returns configuration map for give configuration id. + * An empty map is returned if a configuration could not be retrieved for given configuration id. + * + * @param confId Configuration id + * @return map of configurations + */ public Map getAsMap(String confId) { Configuration configuration = configurations.get(confId); if (configuration == null) { @@ -68,6 +100,13 @@ public class RAMConfManager implements ConfManager { return configMap; } + /** + * Sets a property for the configuration which has given configuration id. + * + * @param confId Configuration id + * @param propName property name to set + * @param propValue property value to set + */ public void setProperty(String confId, String propName, String propValue) { if (!configurations.containsKey(confId)) { throw new IllegalArgumentException("Unknown configId '" + confId + "'"); @@ -76,10 +115,23 @@ public class RAMConfManager implements ConfManager { conf.set(propName, propValue); } + /** + * Deletes configuration for given configuration id. + * + * @param confId Configuration id + */ public void delete(String confId) { configurations.remove(confId); } + /** + * Creates hadoop configuration for given Nutch configuration. + * Checks whether it can create a Nutch configuration or not before it creates. + * Throws {@link IllegalArgumentException} if can not pass {{@link #canCreate(NutchConfig)}}. + * + * @param nutchConfig Nutch configuration + * @return created configuration id + */ @Override public String create(NutchConfig nutchConfig) { if (StringUtils.isBlank(nutchConfig.getConfigId())) { @@ -94,6 +146,14 @@ public class RAMConfManager implements ConfManager { return nutchConfig.getConfigId(); } + /** + * Checks can create a Nutch configuration or not. + * + * @param nutchConfig Nutch configuration + * @return True if forcing is enabled at Nutch configuration. + * Otherwise makes a check based on whether there is an existing configuration at configuration set
[2/2] nutch git commit: Merge branch '2.x' of https://git-wip-us.apache.org/repos/asf/nutch into 2.x
Merge branch '2.x' of https://git-wip-us.apache.org/repos/asf/nutch into 2.x Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/22683a1d Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/22683a1d Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/22683a1d Branch: refs/heads/2.x Commit: 22683a1df83fb8100acdda388382e181c1b5b43d Parents: fd722c8 5c3a381 Author: Lewis John McGibbney Authored: Mon Aug 22 20:23:55 2016 -0700 Committer: Lewis John McGibbney Committed: Mon Aug 22 20:23:55 2016 -0700 -- conf/nutch-default.xml | 7 +++ src/java/org/apache/nutch/crawl/GeneratorMapper.java | 7 +++ 2 files changed, 14 insertions(+) --
nutch git commit: NUTCH-2303 NutchServer Could Be Able To Select a Configuration to Use
Repository: nutch Updated Branches: refs/heads/2.x 22683a1df -> 6227f3b17 NUTCH-2303 NutchServer Could Be Able To Select a Configuration to Use Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/6227f3b1 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/6227f3b1 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/6227f3b1 Branch: refs/heads/2.x Commit: 6227f3b171b67e790a089d6fee4d3c65de0e0ee1 Parents: 22683a1 Author: Furkan KAMACI Authored: Tue Aug 23 12:16:00 2016 +0300 Committer: Furkan KAMACI Committed: Tue Aug 23 12:16:00 2016 +0300 -- src/java/org/apache/nutch/api/NutchServer.java | 89 ++-- .../apache/nutch/api/security/SecurityUtil.java | 5 +- 2 files changed, 85 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/6227f3b1/src/java/org/apache/nutch/api/NutchServer.java -- diff --git a/src/java/org/apache/nutch/api/NutchServer.java b/src/java/org/apache/nutch/api/NutchServer.java index 3bdfc6c..e65742d 100644 --- a/src/java/org/apache/nutch/api/NutchServer.java +++ b/src/java/org/apache/nutch/api/NutchServer.java @@ -85,6 +85,8 @@ public class NutchServer extends Application { private Component component; private ConfManager configManager; private JobManager jobMgr; + private String activeConfId; + private long started; private boolean running; @@ -105,7 +107,47 @@ public class NutchServer extends Application { * @see org.apache.nutch.api.security.AuthenticationTypeEnum */ public NutchServer() { -configManager = new RAMConfManager(); +this(new RAMConfManager()); + } + + /** + * Public constructor which accepts {@link RAMConfManager} RAM configuration manager and the port we wish to run the server on as + * well as the logging granularity. If the latter option is not provided via + * {@link org.apache.nutch.api.NutchServer#main(String[])} then it defaults to + * 'INFO' however best attempts should always be made to specify a logging + * level.
+ * {@link org.apache.nutch.api.NutchServer} can be run as secure. restapi.auth property + * should be set to BASIC, DIGEST or SSL atnutch-site.xml
to enable HTTP basic authentication, + * digest authentication or SSL when communicating with RESTAPI. + * Set restapi.auth.username and restapi.auth.password properties atnutch-site.xml
to configure + * credentials when BASIC or DIGEST authentication is used. + * Set restapi.auth.ssl.storepath, restapi.auth.ssl.storepass and restapi.auth.ssl.keypass when SSL is used. + * + * @see org.apache.nutch.api.security.AuthenticationTypeEnum + */ + public NutchServer(RAMConfManager ramConfManager) { +this(ramConfManager, ConfigResource.DEFAULT); + } + + /** + * Public constructor which accepts {@link RAMConfManager} RAM configuration manager, a configuration id to use from + * RAM configuration manager and the port we wish to run the server on as + * well as the logging granularity. If the latter option is not provided via + * {@link org.apache.nutch.api.NutchServer#main(String[])} then it defaults to + * 'INFO' however best attempts should always be made to specify a logging + * level.
+ * {@link org.apache.nutch.api.NutchServer} can be run as secure. restapi.auth property + * should be set to BASIC, DIGEST or SSL atnutch-site.xml
to enable HTTP basic authentication, + * digest authentication or SSL when communicating with RESTAPI. + * Set restapi.auth.username and restapi.auth.password properties atnutch-site.xml
to configure + * credentials when BASIC or DIGEST authentication is used. + * Set restapi.auth.ssl.storepath, restapi.auth.ssl.storepass and restapi.auth.ssl.keypass when SSL is used. + * + * @see org.apache.nutch.api.security.AuthenticationTypeEnum + */ + public NutchServer(RAMConfManager ramConfManager, String confId) { +configManager = ramConfManager; +activeConfId = confId; BlockingQueue runnables = Queues .newArrayBlockingQueue(JOB_CAPACITY); NutchServerPoolExecutor executor = new NutchServerPoolExecutor(10, @@ -116,7 +158,7 @@ public class NutchServer extends Application { component = new Component(); component.getLogger().setLevel(Level.parse(logLevel)); -AuthenticationTypeEnum authenticationType = configManager.get(ConfigResource.DEFAULT).getEnum("restapi.auth", AuthenticationTypeEnum.NONE); +AuthenticationTypeEnum authenticationType = configManager.get(activeConfId).getEnum("restapi.auth", AuthenticationTypeEnum.NONE); if (authenticationType == AuthenticationTypeEnum.SSL) { // Add a new HTTPS server listening on defined port.
[2/2] nutch git commit: Merge branch 'NUTCH-2306' of https://github.com/kamaci/nutch into 2.x
Merge branch 'NUTCH-2306' of https://github.com/kamaci/nutch into 2.x Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/5012c742 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/5012c742 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/5012c742 Branch: refs/heads/2.x Commit: 5012c742c0621897c28a50dbf1ba795f0af4d968 Parents: 6227f3b ed96b10 Author: Lewis John McGibbney Authored: Tue Aug 23 08:57:46 2016 -0700 Committer: Lewis John McGibbney Committed: Tue Aug 23 08:57:46 2016 -0700 -- .../nutch/api/model/response/NutchStatus.java | 65 .../nutch/api/resources/AbstractResource.java | 14 + .../nutch/api/resources/AdminResource.java | 1 + 3 files changed, 80 insertions(+) --
[1/2] nutch git commit: NUTCH-2306 Id of Active Configuration Could Be Stored at NutchStatus and Exposed via REST API
Repository: nutch Updated Branches: refs/heads/2.x 6227f3b17 -> 5012c742c NUTCH-2306 Id of Active Configuration Could Be Stored at NutchStatus and Exposed via REST API Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/ed96b104 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/ed96b104 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/ed96b104 Branch: refs/heads/2.x Commit: ed96b104ddf82bcb20557a29b251c3fd73eb146a Parents: 22683a1 Author: Furkan KAMACI Authored: Tue Aug 23 12:55:54 2016 +0300 Committer: Furkan KAMACI Committed: Tue Aug 23 12:55:54 2016 +0300 -- .../nutch/api/model/response/NutchStatus.java | 65 .../nutch/api/resources/AbstractResource.java | 14 + .../nutch/api/resources/AdminResource.java | 1 + 3 files changed, 80 insertions(+) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/ed96b104/src/java/org/apache/nutch/api/model/response/NutchStatus.java -- diff --git a/src/java/org/apache/nutch/api/model/response/NutchStatus.java b/src/java/org/apache/nutch/api/model/response/NutchStatus.java index 3674447..95aac20 100644 --- a/src/java/org/apache/nutch/api/model/response/NutchStatus.java +++ b/src/java/org/apache/nutch/api/model/response/NutchStatus.java @@ -24,44 +24,109 @@ import java.util.Set; import org.apache.commons.collections.CollectionUtils; import org.apache.nutch.api.model.response.JobInfo.State; +/** + * Information object for status of {@link org.apache.nutch.api.NutchServer}. + * Gives information about when server is started, its configurations, jobs, running jobs + * and active configuration id. + * + * @see org.apache.nutch.api.NutchServer + */ public class NutchStatus { private Date startDate; private Set configuration; private Collection jobs; private Collection runningJobs; + private String activeConfId; + /** + * Gets start date of the {@link org.apache.nutch.api.NutchServer} + * + * @return start date of the server + */ public Date getStartDate() { return startDate; } + /** + * Sets start date of the {@link org.apache.nutch.api.NutchServer} + * + * @param startDate start date + */ public void setStartDate(Date startDate) { this.startDate = startDate; } + /** + * Gets configuration ids + * + * @return configuration ids + */ public Set getConfiguration() { return configuration; } + /** + * Sets configuration ids + * + * @param configuration configuration ids + */ public void setConfiguration(Set configuration) { this.configuration = configuration; } + /** + * Gets jobs + * + * @return jobs + */ public Collection getJobs() { return jobs; } + /** + * Sets jobs + * @param jobs jobs + */ public void setJobs(Collection jobs) { this.jobs = jobs; } + /** + * Gets running jobs + * + * @return running jobs + */ public Collection getRunningJobs() { return purgeFinishedFailedJobs(runningJobs); } + /** + * Sets running jobs + * + * @param runningJobs running jobs + */ public void setRunningJobs(Collection runningJobs) { this.runningJobs = runningJobs; } + /** + * Gets active configuration id + * + * @return active configuration id + */ + public String getActiveConfId() { +return activeConfId; + } + + /** + * Sets active configuration id + * + * @param activeConfId active configuration id + */ + public void setActiveConfId(String activeConfId) { +this.activeConfId = activeConfId; + } + private Collection purgeFinishedFailedJobs( Collection runningJobColl) { if (CollectionUtils.isNotEmpty(runningJobColl)) { http://git-wip-us.apache.org/repos/asf/nutch/blob/ed96b104/src/java/org/apache/nutch/api/resources/AbstractResource.java -- diff --git a/src/java/org/apache/nutch/api/resources/AbstractResource.java b/src/java/org/apache/nutch/api/resources/AbstractResource.java index 9e6811e..dfea2dd 100644 --- a/src/java/org/apache/nutch/api/resources/AbstractResource.java +++ b/src/java/org/apache/nutch/api/resources/AbstractResource.java @@ -27,20 +27,34 @@ import org.apache.nutch.api.JobManager; import org.apache.nutch.api.NutchServer; import org.restlet.Context; +/** + * Abstract base class for {@link NutchServer} REST APIs. + */ @Produces({ MediaType.APPLICATION_JSON }) public abstract class AbstractResource { protected ConfManager configManager; protected JobManager jobManager; + protected String activeConfId; protected NutchServer server; + /** + * Constructor method for {@link AbstractResource} + * Retrieves {@link org.apache.nutch
[2/2] nutch git commit: Merge branch 'NUTCH-2301' of https://github.com/kamaci/nutch into 2.x
Merge branch 'NUTCH-2301' of https://github.com/kamaci/nutch into 2.x Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/9ecdc9be Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/9ecdc9be Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/9ecdc9be Branch: refs/heads/2.x Commit: 9ecdc9be391778c10e54c84269c74fcffdb599fd Parents: 5012c74 3bc3d81 Author: Lewis John McGibbney Authored: Tue Aug 23 14:18:58 2016 -0700 Committer: Lewis John McGibbney Committed: Tue Aug 23 14:18:58 2016 -0700 -- src/test/nutch-site.xml | 48 src/test/nutch-ssl.keystore.jks | Bin 0 -> 2300 bytes .../nutch/api/AbstractNutchAPITestBase.java | 186 +++ src/test/org/apache/nutch/api/TestAPI.java | 225 --- src/test/org/apache/nutch/api/TestNutchAPI.java | 100 + 5 files changed, 334 insertions(+), 225 deletions(-) --
[1/2] nutch git commit: NUTCH-2301 Tests for Security Layer of NutchServer Are Created
Repository: nutch Updated Branches: refs/heads/2.x 5012c742c -> 9ecdc9be3 NUTCH-2301 Tests for Security Layer of NutchServer Are Created Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/3bc3d81e Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/3bc3d81e Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/3bc3d81e Branch: refs/heads/2.x Commit: 3bc3d81e964aac59f61951740e848bd429a15b3c Parents: 22683a1 Author: Furkan KAMACI Authored: Tue Aug 23 23:41:43 2016 +0300 Committer: Furkan KAMACI Committed: Tue Aug 23 23:41:43 2016 +0300 -- src/test/nutch-site.xml | 48 src/test/nutch-ssl.keystore.jks | Bin 0 -> 2300 bytes .../nutch/api/AbstractNutchAPITestBase.java | 186 +++ src/test/org/apache/nutch/api/TestAPI.java | 225 --- src/test/org/apache/nutch/api/TestNutchAPI.java | 100 + 5 files changed, 334 insertions(+), 225 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/3bc3d81e/src/test/nutch-site.xml -- diff --git a/src/test/nutch-site.xml b/src/test/nutch-site.xml index e599547..4dbce0c 100644 --- a/src/test/nutch-site.xml +++ b/src/test/nutch-site.xml @@ -29,4 +29,52 @@ + + restapi.auth + NONE + +Configures authentication type for communicating with RESTAPI. Valid values are BASIC, DIGEST, SSL and NONE. +When no authentication type is defined NONE will be used as default which does not provide security. +Use the restapi.auth.username and restapi.auth.password properties to configure +your credentials if security is used. + + + + + restapi.auth.users + admin|admin|admin,user|user|user + +Username, password and role combination for REST API authentication/authorization. restapi.auth property should be set to either BASIC or DIGEST to use this property. +Username, password and role should be delimited by pipe character (|) Every user should be separated with comma character (,). i.e. admin|admin|admin,user|user|user. +Default is admin|admin|admin,user|user|user + + + + + restapi.auth.ssl.storepath + nutch-ssl.keystore.jks + +Key store path for jks file. restapi.auth property should be set to SSL to use this property. +etc/nutch-ssl.keystore.jks is used for restapi.auth.ssl.storepath as default. + + + + + restapi.auth.ssl.storepass + password + +Key store path for jks file. restapi.auth property should be set to SSL to use this property. +"password" is used for restapi.auth.ssl.storepass as default. + + + + + restapi.auth.ssl.keypass + password + +Key store path for jks file. restapi.auth property should be set to SSL to use this property. +"password" is used for restapi.auth.ssl.keypass as default. + + + http://git-wip-us.apache.org/repos/asf/nutch/blob/3bc3d81e/src/test/nutch-ssl.keystore.jks -- diff --git a/src/test/nutch-ssl.keystore.jks b/src/test/nutch-ssl.keystore.jks new file mode 100644 index 000..9d0bd01 Binary files /dev/null and b/src/test/nutch-ssl.keystore.jks differ http://git-wip-us.apache.org/repos/asf/nutch/blob/3bc3d81e/src/test/org/apache/nutch/api/AbstractNutchAPITestBase.java -- diff --git a/src/test/org/apache/nutch/api/AbstractNutchAPITestBase.java b/src/test/org/apache/nutch/api/AbstractNutchAPITestBase.java new file mode 100644 index 000..3cdff23 --- /dev/null +++ b/src/test/org/apache/nutch/api/AbstractNutchAPITestBase.java @@ -0,0 +1,186 @@ +/*** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.nutch.api; + +import org.apache.hadoop.conf.Configuration; +import org.apache.nutch.api.impl.RAMConfManager; +import org.apache.n
nutch git commit: NUTCH-2308 SSL connection test at TestNutchAPI is implemented.
Repository: nutch Updated Branches: refs/heads/2.x 9ecdc9be3 -> 75d846cf3 NUTCH-2308 SSL connection test at TestNutchAPI is implemented. Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/75d846cf Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/75d846cf Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/75d846cf Branch: refs/heads/2.x Commit: 75d846cf3998faeffa6edf5a7d7fec2d22c8d4d9 Parents: 9ecdc9b Author: Furkan KAMACI Authored: Thu Aug 25 23:54:52 2016 +0300 Committer: Furkan KAMACI Committed: Tue Aug 30 19:02:29 2016 +0300 -- conf/nutch-default.xml | 4 +- src/java/org/apache/nutch/api/NutchServer.java | 10 +- .../nutch/api/resources/AdminResource.java | 8 +- .../nutch/api/resources/ConfigResource.java | 8 +- .../apache/nutch/api/resources/DbResource.java | 4 +- .../apache/nutch/api/resources/JobResource.java | 4 +- .../nutch/api/resources/SeedResource.java | 4 +- .../apache/nutch/api/security/SecurityUtil.java | 108 --- .../nutch/api/security/SecurityUtils.java | 107 ++ src/test/nutch-site.xml | 2 +- src/test/nutch-ssl.keystore.jks | Bin 2300 -> 2272 bytes src/test/nutch.cer | Bin 0 -> 921 bytes .../nutch/api/AbstractNutchAPITestBase.java | 89 +-- src/test/org/apache/nutch/api/TestNutchAPI.java | 33 +++--- src/test/testTrustKeyStore | Bin 0 -> 983 bytes 15 files changed, 231 insertions(+), 150 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/75d846cf/conf/nutch-default.xml -- diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml index d2181c5..575ce5d 100644 --- a/conf/nutch-default.xml +++ b/conf/nutch-default.xml @@ -1465,10 +1465,10 @@ restapi.auth.ssl.storepath - etc/nutch-ssl.keystore.jks + nutch-ssl.keystore.jks Key store path for jks file. restapi.auth property should be set to SSL to use this property. -etc/nutch-ssl.keystore.jks is used for restapi.auth.ssl.storepath as default. +nutch-ssl.keystore.jks is used for restapi.auth.ssl.storepath as default. http://git-wip-us.apache.org/repos/asf/nutch/blob/75d846cf/src/java/org/apache/nutch/api/NutchServer.java -- diff --git a/src/java/org/apache/nutch/api/NutchServer.java b/src/java/org/apache/nutch/api/NutchServer.java index e65742d..802bbef 100644 --- a/src/java/org/apache/nutch/api/NutchServer.java +++ b/src/java/org/apache/nutch/api/NutchServer.java @@ -44,7 +44,7 @@ import org.apache.nutch.api.resources.DbResource; import org.apache.nutch.api.resources.JobResource; import org.apache.nutch.api.resources.SeedResource; import org.apache.nutch.api.security.AuthenticationTypeEnum; -import org.apache.nutch.api.security.SecurityUtil; +import org.apache.nutch.api.security.SecurityUtils; import org.restlet.Component; import org.restlet.Context; import org.restlet.Server; @@ -168,7 +168,7 @@ public class NutchServer extends Application { parameters.add("sslContextFactory", "org.restlet.engine.ssl.DefaultSslContextFactory"); String keyStorePath = configManager.get(activeConfId) - .get("restapi.auth.ssl.storepath", "etc/nutch-ssl.keystore.jks"); + .get("restapi.auth.ssl.storepath", "nutch-ssl.keystore.jks"); parameters.add("keyStorePath", keyStorePath); String keyStorePassword = configManager.get(activeConfId) @@ -191,7 +191,7 @@ public class NutchServer extends Application { application.add(this); application.setStatusService(new ErrorStatusService()); childContext.getAttributes().put(NUTCH_SERVER, this); -application.setRoles(SecurityUtil.getRoles(application)); +application.setRoles(SecurityUtils.getRoles(application)); switch (authenticationType) { case NONE: @@ -201,7 +201,7 @@ public class NutchServer extends Application { case BASIC: ChallengeAuthenticator challengeGuard = new ChallengeAuthenticator(null, ChallengeScheme.HTTP_BASIC, "Nutch REST API Realm"); //Create in-memory users with roles -MemoryRealm basicAuthRealm = SecurityUtil.constructRealm(application, configManager, confId); +MemoryRealm basicAuthRealm = SecurityUtils.constructRealm(application, configManager, confId); //Attach verifier to check authentication and enroler to determine roles challengeGuard.setVerifier(basicAuthRealm.getVerifier()); challengeGuard.setEnroler(basicAuthRealm.getEnroler()); @@ -212,7 +212,7 @@ public class NutchServer extends Application { case D
[2/3] nutch git commit: NUTCH-2264 Forbidden APIs are Checked at Build
NUTCH-2264 Forbidden APIs are Checked at Build Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/a671540a Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/a671540a Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/a671540a Branch: refs/heads/2.x Commit: a671540a94d8afafd72a09396c97d9ede43a7ea2 Parents: 9ecdc9b Author: Furkan KAMACI Authored: Mon Aug 29 18:24:50 2016 +0300 Committer: Furkan KAMACI Committed: Mon Aug 29 18:44:25 2016 +0300 -- build.xml | 29 ivy/ivy.xml | 2 ++ .../org/apache/nutch/api/impl/JobWorker.java| 9 +++--- .../nutch/api/resources/AdminResource.java | 3 +- .../nutch/api/resources/SeedResource.java | 9 +++--- .../org/apache/nutch/crawl/DbUpdaterJob.java| 3 +- .../org/apache/nutch/crawl/GeneratorJob.java| 3 +- .../org/apache/nutch/crawl/InjectorJob.java | 5 ++-- .../org/apache/nutch/fetcher/FetcherJob.java| 3 +- .../apache/nutch/fetcher/FetcherReducer.java| 8 +++--- .../apache/nutch/host/HostDbUpdateReducer.java | 5 ++-- .../org/apache/nutch/host/HostInjectorJob.java | 3 +- .../org/apache/nutch/net/URLFilterChecker.java | 5 ++-- .../apache/nutch/net/URLNormalizerChecker.java | 5 ++-- src/java/org/apache/nutch/parse/ParseUtil.java | 8 -- src/java/org/apache/nutch/parse/ParserJob.java | 3 +- src/java/org/apache/nutch/protocol/Content.java | 3 +- .../apache/nutch/protocol/RobotRulesParser.java | 6 ++-- src/java/org/apache/nutch/tools/Benchmark.java | 3 +- src/java/org/apache/nutch/tools/DmozParser.java | 5 ++-- .../org/apache/nutch/tools/ResolveUrls.java | 8 +++--- .../apache/nutch/tools/arc/ArcRecordReader.java | 3 +- .../apache/nutch/tools/proxy/FakeHandler.java | 11 src/java/org/apache/nutch/util/Bytes.java | 3 +- .../org/apache/nutch/util/EncodingDetector.java | 7 +++-- src/java/org/apache/nutch/util/TimingUtil.java | 3 +- src/java/org/apache/nutch/util/URLUtil.java | 5 ++-- .../nutch/util/domain/DomainStatistics.java | 3 +- .../nutch/webui/client/impl/RemoteCommand.java | 6 ++-- .../creativecommons/nutch/CCParseFilter.java| 7 +++-- .../indexer/anchor/AnchorIndexingFilter.java| 3 +- .../nutch/indexer/html/HtmlIndexingFilter.java | 3 +- .../indexer/more/TestMoreIndexingFilter.java| 3 +- .../nutch/analysis/lang/HTMLLanguageParser.java | 9 +++--- .../analysis/lang/TestHTMLLanguageParser.java | 5 ++-- .../nutch/protocol/http/api/HttpBase.java | 3 +- .../protocol/http/api/HttpRobotRulesParser.java | 5 ++-- .../protocol/http/api/TestRobotRulesParser.java | 10 --- .../nutch/urlfilter/api/RegexURLFilterBase.java | 6 ++-- .../urlfilter/api/RegexURLFilterBaseTest.java | 12 +--- .../nutch/microformats/reltag/RelTagParser.java | 3 +- .../microformats/reltag/TestRelTagParser.java | 3 +- .../nutch/parse/html/DOMContentUtils.java | 3 +- .../nutch/parse/html/HTMLMetaProcessor.java | 13 + .../nutch/parse/html/TestDOMContentUtils.java | 3 +- .../parse/html/TestRobotsMetaProcessor.java | 3 +- .../apache/nutch/parse/js/JSParseFilter.java| 5 ++-- .../nutch/parse/metatags/MetaTagsParser.java| 3 +- .../parse/metatags/TestMetaTagsParser.java | 7 +++-- .../nutch/parse/tika/DOMContentUtils.java | 4 ++- .../nutch/parse/tika/HTMLMetaProcessor.java | 13 + .../nutch/parse/tika/DOMContentUtilsTest.java | 3 +- .../nutch/parse/tika/TestImageMetadata.java | 5 ++-- .../org/apache/nutch/protocol/file/File.java| 3 +- .../nutch/protocol/file/FileResponse.java | 3 +- .../org/apache/nutch/protocol/ftp/Client.java | 3 +- .../java/org/apache/nutch/protocol/ftp/Ftp.java | 3 +- .../apache/nutch/protocol/ftp/FtpResponse.java | 5 ++-- .../nutch/protocol/ftp/FtpRobotRulesParser.java | 5 ++-- .../nutch/protocol/http/HttpResponse.java | 5 ++-- .../httpclient/HttpBasicAuthentication.java | 5 ++-- .../org/apache/nutch/protocol/sftp/Sftp.java| 3 +- .../scoring/opic/TestOPICScoringFilter.java | 8 +- .../nutch/collection/TestSubcollection.java | 3 +- .../nutch/urlfilter/domain/DomainURLFilter.java | 8 -- .../nutch/urlfilter/prefix/PrefixURLFilter.java | 3 +- .../nutch/urlfilter/suffix/SuffixURLFilter.java | 15 ++ .../urlnormalizer/basic/BasicURLNormalizer.java | 12 +++- .../urlnormalizer/regex/RegexURLNormalizer.java | 7 +++-- .../regex/TestRegexURLNormalizer.java | 5 ++-- .../apache/nutch/parse/TestSitemapParser.java | 20 ++ .../apache/nutch/plugin/TestPluginSystem.java | 12 .../apache/nutch/util/TestEncodingDetector.java | 12 .../org/apache/nutch/util/TestGZIPUtils.java| 25 +--
[3/3] nutch git commit: Merge branch 'NUTCH-2264_2' of https://github.com/kamaci/nutch into 2.x
Merge branch 'NUTCH-2264_2' of https://github.com/kamaci/nutch into 2.x Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/32d1486d Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/32d1486d Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/32d1486d Branch: refs/heads/2.x Commit: 32d1486dfd9d1326dbd912126c52a5b58f2a827c Parents: 75d846c a671540 Author: Lewis John McGibbney Authored: Tue Aug 30 09:59:53 2016 -0700 Committer: Lewis John McGibbney Committed: Tue Aug 30 09:59:53 2016 -0700 -- build.xml | 29 ivy/ivy.xml | 2 ++ .../org/apache/nutch/api/impl/JobWorker.java| 9 +++--- .../nutch/api/resources/AdminResource.java | 3 +- .../nutch/api/resources/SeedResource.java | 9 +++--- .../org/apache/nutch/crawl/DbUpdaterJob.java| 3 +- .../org/apache/nutch/crawl/GeneratorJob.java| 3 +- .../org/apache/nutch/crawl/InjectorJob.java | 5 ++-- .../org/apache/nutch/fetcher/FetcherJob.java| 3 +- .../apache/nutch/fetcher/FetcherReducer.java| 8 +++--- .../apache/nutch/host/HostDbUpdateReducer.java | 5 ++-- .../org/apache/nutch/host/HostInjectorJob.java | 3 +- .../org/apache/nutch/net/URLFilterChecker.java | 5 ++-- .../apache/nutch/net/URLNormalizerChecker.java | 5 ++-- src/java/org/apache/nutch/parse/ParseUtil.java | 8 -- src/java/org/apache/nutch/parse/ParserJob.java | 3 +- src/java/org/apache/nutch/protocol/Content.java | 3 +- .../apache/nutch/protocol/RobotRulesParser.java | 6 ++-- src/java/org/apache/nutch/tools/Benchmark.java | 3 +- src/java/org/apache/nutch/tools/DmozParser.java | 5 ++-- .../org/apache/nutch/tools/ResolveUrls.java | 8 +++--- .../apache/nutch/tools/arc/ArcRecordReader.java | 3 +- .../apache/nutch/tools/proxy/FakeHandler.java | 11 src/java/org/apache/nutch/util/Bytes.java | 3 +- .../org/apache/nutch/util/EncodingDetector.java | 7 +++-- src/java/org/apache/nutch/util/TimingUtil.java | 3 +- src/java/org/apache/nutch/util/URLUtil.java | 5 ++-- .../nutch/util/domain/DomainStatistics.java | 3 +- .../nutch/webui/client/impl/RemoteCommand.java | 6 ++-- .../creativecommons/nutch/CCParseFilter.java| 7 +++-- .../indexer/anchor/AnchorIndexingFilter.java| 3 +- .../nutch/indexer/html/HtmlIndexingFilter.java | 3 +- .../indexer/more/TestMoreIndexingFilter.java| 3 +- .../nutch/analysis/lang/HTMLLanguageParser.java | 9 +++--- .../analysis/lang/TestHTMLLanguageParser.java | 5 ++-- .../nutch/protocol/http/api/HttpBase.java | 3 +- .../protocol/http/api/HttpRobotRulesParser.java | 5 ++-- .../protocol/http/api/TestRobotRulesParser.java | 10 --- .../nutch/urlfilter/api/RegexURLFilterBase.java | 6 ++-- .../urlfilter/api/RegexURLFilterBaseTest.java | 12 +--- .../nutch/microformats/reltag/RelTagParser.java | 3 +- .../microformats/reltag/TestRelTagParser.java | 3 +- .../nutch/parse/html/DOMContentUtils.java | 3 +- .../nutch/parse/html/HTMLMetaProcessor.java | 13 + .../nutch/parse/html/TestDOMContentUtils.java | 3 +- .../parse/html/TestRobotsMetaProcessor.java | 3 +- .../apache/nutch/parse/js/JSParseFilter.java| 5 ++-- .../nutch/parse/metatags/MetaTagsParser.java| 3 +- .../parse/metatags/TestMetaTagsParser.java | 7 +++-- .../nutch/parse/tika/DOMContentUtils.java | 4 ++- .../nutch/parse/tika/HTMLMetaProcessor.java | 13 + .../nutch/parse/tika/DOMContentUtilsTest.java | 3 +- .../nutch/parse/tika/TestImageMetadata.java | 5 ++-- .../org/apache/nutch/protocol/file/File.java| 3 +- .../nutch/protocol/file/FileResponse.java | 3 +- .../org/apache/nutch/protocol/ftp/Client.java | 3 +- .../java/org/apache/nutch/protocol/ftp/Ftp.java | 3 +- .../apache/nutch/protocol/ftp/FtpResponse.java | 5 ++-- .../nutch/protocol/ftp/FtpRobotRulesParser.java | 5 ++-- .../nutch/protocol/http/HttpResponse.java | 5 ++-- .../httpclient/HttpBasicAuthentication.java | 5 ++-- .../org/apache/nutch/protocol/sftp/Sftp.java| 3 +- .../scoring/opic/TestOPICScoringFilter.java | 8 +- .../nutch/collection/TestSubcollection.java | 3 +- .../nutch/urlfilter/domain/DomainURLFilter.java | 8 -- .../nutch/urlfilter/prefix/PrefixURLFilter.java | 3 +- .../nutch/urlfilter/suffix/SuffixURLFilter.java | 15 ++ .../urlnormalizer/basic/BasicURLNormalizer.java | 12 +++- .../urlnormalizer/regex/RegexURLNormalizer.java | 7 +++-- .../regex/TestRegexURLNormalizer.java | 5 ++-- .../apache/nutch/parse/TestSitemapParser.java | 20 ++ .../apache/nutch/plugin/TestPluginSystem.java | 12 .../apache/nutch/util/TestEncodingDetector.java | 12 .../org/apache/nutch/
[1/3] nutch git commit: NUTCH-2264 Forbidden APIs are Checked at Build
Repository: nutch Updated Branches: refs/heads/2.x 75d846cf3 -> 32d1486df http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java -- diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java index 0db617c..d7acfdc 100644 --- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java +++ b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java @@ -35,6 +35,7 @@ import java.io.IOException; import java.net.InetAddress; import java.net.URL; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.LinkedList; import java.util.List; @@ -114,7 +115,7 @@ public class FtpResponse { if (addr != null && conf.getBoolean("store.ip.address", false) == true) { String ipString = addr.getHostAddress(); // get the ip address page.getMetadata().put(new Utf8("_ip_"), -ByteBuffer.wrap(ipString.getBytes())); +ByteBuffer.wrap(ipString.getBytes(StandardCharsets.UTF_8))); } // idled too long, remote server or ourselves may have timed out, @@ -521,7 +522,7 @@ public class FtpResponse { x.append("\n"); -return new String(x).getBytes(); +return new String(x).getBytes(StandardCharsets.UTF_8); } } http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java -- diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java index cc039bd..4ce9a83 100644 --- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java +++ b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java @@ -29,6 +29,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.net.URL; +import java.util.Locale; /** * This class is used for parsing robots for urls belonging to FTP protocol. It @@ -63,9 +64,9 @@ public class FtpRobotRulesParser extends RobotRulesParser { */ public BaseRobotRules getRobotRulesSet(Protocol ftp, URL url) { -String protocol = url.getProtocol().toLowerCase(); // normalize to lower +String protocol = url.getProtocol().toLowerCase(Locale.ROOT); // normalize to lower // case -String host = url.getHost().toLowerCase(); // normalize to lower case +String host = url.getHost().toLowerCase(Locale.ROOT); // normalize to lower case BaseRobotRules robotRules = (SimpleRobotRules) CACHE.get(protocol + ":" + host); http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java -- diff --git a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java index 773958a..965eaac 100644 --- a/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java +++ b/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java @@ -36,6 +36,7 @@ import javax.net.ssl.SSLSocket; import javax.net.ssl.SSLSocketFactory; import java.net.URL; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashSet; import java.util.Set; @@ -138,7 +139,7 @@ public class HttpResponse implements Response { String ipString = sockAddr.getAddress().getHostAddress(); // get the ip // address page.getMetadata().put(new Utf8("_ip_"), -ByteBuffer.wrap(ipString.getBytes())); +ByteBuffer.wrap(ipString.getBytes(StandardCharsets.UTF_8))); } // make request @@ -182,7 +183,7 @@ public class HttpResponse implements Response { // } reqStr.append("\r\n"); - byte[] reqBytes = reqStr.toString().getBytes(); + byte[] reqBytes = reqStr.toString().getBytes(StandardCharsets.UTF_8); req.write(reqBytes); req.flush(); http://git-wip-us.apache.org/repos/asf/nutch/blob/a671540a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java -- diff --git a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java b/src/plugin/
nutch git commit: NUTCH-2122 Missing package-info.java classes for webui packages are implemented.
Repository: nutch Updated Branches: refs/heads/2.x 32d1486df -> 8ad3e44a3 NUTCH-2122 Missing package-info.java classes for webui packages are implemented. Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/8ad3e44a Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/8ad3e44a Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/8ad3e44a Branch: refs/heads/2.x Commit: 8ad3e44a37fb1146d135cbfaeabff118f573afce Parents: 32d1486 Author: Furkan KAMACI Authored: Tue Aug 30 22:53:26 2016 +0300 Committer: Furkan KAMACI Committed: Tue Aug 30 22:53:26 2016 +0300 -- .../nutch/webui/client/impl/package-info.java | 22 .../nutch/webui/client/model/package-info.java | 22 .../apache/nutch/webui/client/package-info.java | 22 .../apache/nutch/webui/config/package-info.java | 22 .../apache/nutch/webui/model/package-info.java | 22 .../org/apache/nutch/webui/package-info.java| 22 .../nutch/webui/pages/assets/package-info.java | 22 .../webui/pages/components/package-info.java| 22 .../nutch/webui/pages/crawls/package-info.java | 22 .../webui/pages/instances/package-info.java | 22 .../nutch/webui/pages/menu/package-info.java| 22 .../apache/nutch/webui/pages/package-info.java | 22 .../nutch/webui/pages/seed/package-info.java| 22 .../webui/pages/settings/package-info.java | 22 .../nutch/webui/service/impl/package-info.java | 22 .../nutch/webui/service/package-info.java | 22 16 files changed, 352 insertions(+) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/8ad3e44a/src/java/org/apache/nutch/webui/client/impl/package-info.java -- diff --git a/src/java/org/apache/nutch/webui/client/impl/package-info.java b/src/java/org/apache/nutch/webui/client/impl/package-info.java new file mode 100644 index 000..0947062 --- /dev/null +++ b/src/java/org/apache/nutch/webui/client/impl/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Contains implementation of client classes and interfaces for Web UI + */ +package org.apache.nutch.webui.client.impl; + http://git-wip-us.apache.org/repos/asf/nutch/blob/8ad3e44a/src/java/org/apache/nutch/webui/client/model/package-info.java -- diff --git a/src/java/org/apache/nutch/webui/client/model/package-info.java b/src/java/org/apache/nutch/webui/client/model/package-info.java new file mode 100644 index 000..38a585e --- /dev/null +++ b/src/java/org/apache/nutch/webui/client/model/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Contains model classes of client for Web UI + */ +package org.apache.nutch.webui.client.model; + http://git-wip-us.apache.org/repos/asf/nutch/blob/8ad3e44a/src/java/org/apache/nutch/webui/client/package-info.java -
nutch git commit: NUTCH-2314 indexer-elastic2 plugin is used for javadoc and eclipse targets
Repository: nutch Updated Branches: refs/heads/2.x 8ad3e44a3 -> 7dcc5fa69 NUTCH-2314 indexer-elastic2 plugin is used for javadoc and eclipse targets Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/7dcc5fa6 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/7dcc5fa6 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/7dcc5fa6 Branch: refs/heads/2.x Commit: 7dcc5fa69f3edd431b47d127048fd9f97b442fa6 Parents: 8ad3e44 Author: Furkan KAMACI Authored: Fri Sep 2 18:00:46 2016 +0300 Committer: Furkan KAMACI Committed: Sat Sep 3 02:14:32 2016 +0300 -- build.xml | 6 -- default.properties | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/7dcc5fa6/build.xml -- diff --git a/build.xml b/build.xml index f051838..c85db6d 100644 --- a/build.xml +++ b/build.xml @@ -657,7 +657,8 @@ - + + @@ -1020,7 +1021,8 @@ --> - + + http://git-wip-us.apache.org/repos/asf/nutch/blob/7dcc5fa6/default.properties -- diff --git a/default.properties b/default.properties index 8e11ebf..342465a 100644 --- a/default.properties +++ b/default.properties @@ -157,7 +157,7 @@ plugins.index=\ # Indexing Backend Plugins # plugins.indexer=\ - org.apache.nutch.indexwriter.elastic*:\ + org.apache.nutch.indexwriter.elastic2:\ org.apache.nutch.indexwriter.solr* #
[2/2] nutch git commit: NUTCH-2089 Nutch 2.x is moved to compile on JDK 8
NUTCH-2089 Nutch 2.x is moved to compile on JDK 8 Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/0ea78907 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/0ea78907 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/0ea78907 Branch: refs/heads/2.x Commit: 0ea78907dee6b07058b66a99e395aea8cf623e92 Parents: 7dcc5fa Author: Furkan KAMACI Authored: Sun Sep 4 00:53:31 2016 +0300 Committer: Furkan KAMACI Committed: Sun Sep 4 01:38:13 2016 +0300 -- src/java/org/apache/nutch/api/NutchServer.java | 23 +++--- .../apache/nutch/api/impl/RAMConfManager.java | 3 + .../nutch/crawl/AbstractFetchSchedule.java | 23 +++--- .../nutch/crawl/AdaptiveFetchSchedule.java | 6 +- .../org/apache/nutch/crawl/FetchSchedule.java | 25 ++ .../org/apache/nutch/crawl/GeneratorJob.java| 20 - .../apache/nutch/crawl/SignatureFactory.java| 7 +- .../nutch/crawl/TextProfileSignature.java | 2 +- .../org/apache/nutch/fetcher/FetcherJob.java| 12 ++- .../org/apache/nutch/indexer/IndexUtil.java | 4 +- .../org/apache/nutch/net/URLNormalizers.java| 16 ++-- .../apache/nutch/parse/NutchSitemapParse.java | 4 +- .../apache/nutch/parse/ParsePluginsReader.java | 6 +- src/java/org/apache/nutch/parse/Parser.java | 2 +- .../org/apache/nutch/parse/ParserChecker.java | 6 +- .../apache/nutch/plugin/PluginRepository.java | 6 +- .../org/apache/nutch/scoring/ScoringFilter.java | 19 ++--- .../org/apache/nutch/storage/StorageUtils.java | 2 +- .../apache/nutch/tools/arc/ArcRecordReader.java | 20 ++--- src/java/org/apache/nutch/util/Bytes.java | 12 +-- .../org/apache/nutch/util/EncodingDetector.java | 10 +-- src/java/org/apache/nutch/util/MimeUtil.java| 4 +- src/java/org/apache/nutch/util/NodeWalker.java | 10 +-- src/java/org/apache/nutch/util/NutchJob.java| 4 +- src/java/org/apache/nutch/util/NutchTool.java | 17 +++- .../apache/nutch/util/PrefixStringMatcher.java | 8 +- .../apache/nutch/util/SuffixStringMatcher.java | 8 +- src/java/org/apache/nutch/util/TableUtil.java | 4 +- src/java/org/apache/nutch/util/TimingUtil.java | 2 +- .../apache/nutch/util/TrieStringMatcher.java| 8 +- src/java/org/apache/nutch/util/URLUtil.java | 83 ++-- .../apache/nutch/util/domain/DomainSuffix.java | 5 +- .../nutch/util/domain/TopLevelDomain.java | 4 +- .../org/apache/nutch/parse/feed/FeedParser.java | 2 +- .../indexer/anchor/AnchorIndexingFilter.java| 2 +- .../nutch/indexer/metadata/MetadataIndexer.java | 2 +- .../nutch/indexer/more/MoreIndexingFilter.java | 4 +- .../nutch/analysis/lang/HTMLLanguageParser.java | 15 +++- .../nutch/urlfilter/api/RegexURLFilterBase.java | 12 +-- .../org/apache/nutch/parse/html/DOMBuilder.java | 50 ++-- .../nutch/parse/html/DOMContentUtils.java | 2 +- .../parse/html/XMLCharacterRecognizer.java | 2 +- .../apache/nutch/parse/js/JSParseFilter.java| 6 +- .../org/apache/nutch/parse/swf/SWFParser.java | 2 +- .../org/apache/nutch/parse/tika/DOMBuilder.java | 50 ++-- .../parse/tika/XMLCharacterRecognizer.java | 2 +- .../apache/nutch/parse/tika/TestRSSParser.java | 2 +- .../org/apache/nutch/protocol/file/File.java| 4 +- .../nutch/protocol/file/TestProtocolFile.java | 2 +- .../org/apache/nutch/protocol/ftp/Client.java | 2 +- .../java/org/apache/nutch/protocol/ftp/Ftp.java | 4 +- .../DummySSLProtocolSocketFactory.java | 2 +- .../apache/nutch/protocol/httpclient/Http.java | 4 +- .../httpclient/HttpBasicAuthentication.java | 4 +- .../apache/nutch/scoring/link/package-info.java | 3 +- .../nutch/scoring/opic/OPICScoringFilter.java | 2 +- .../nutch/collection/CollectionManager.java | 2 +- .../nutch/urlfilter/domain/DomainURLFilter.java | 11 +-- .../nutch/urlfilter/domain/package-info.java| 2 - .../nutch/urlfilter/prefix/PrefixURLFilter.java | 2 +- .../nutch/urlfilter/suffix/SuffixURLFilter.java | 14 ++-- .../nutch/urlfilter/validator/UrlValidator.java | 10 +-- .../urlnormalizer/regex/RegexURLNormalizer.java | 4 +- .../nutch/api/AbstractNutchAPITestBase.java | 2 +- .../org/apache/nutch/crawl/TestGenerator.java | 2 +- .../org/apache/nutch/fetcher/TestFetcher.java | 6 +- .../org/apache/nutch/util/CrawlTestUtil.java| 7 +- 67 files changed, 318 insertions(+), 309 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/java/org/apache/nutch/api/NutchServer.java -- diff --git a/src/java/org/apache/nutch/api/NutchServer.java b/src/java/org/apache/nutch/api/NutchServer.java index 802bbef..5118497 100644 --- a/src/java/org/apache/nutch/api/NutchServer.java +++ b/sr
[1/2] nutch git commit: NUTCH-2089 Nutch 2.x is moved to compile on JDK 8
Repository: nutch Updated Branches: refs/heads/2.x 7dcc5fa69 -> 0ea78907d http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRSSParser.java -- diff --git a/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRSSParser.java b/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRSSParser.java index 67d3dcc..19035c0 100644 --- a/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRSSParser.java +++ b/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRSSParser.java @@ -59,7 +59,7 @@ public class TestRSSParser { /** * * The test method: tests out the following 2 asserts: - * + * * * * There are 3 outlinks read from the sample rss file http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java -- diff --git a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java index 0695439..20ba474 100644 --- a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java +++ b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java @@ -99,8 +99,8 @@ public class File implements Protocol { * * @param url * Text containing the url - * @param datum - * The CrawlDatum object corresponding to the url + * @param page + * {@link WebPage} object relative to the URL * * @return {@link ProtocolOutput} object for the content of the file indicated * by url http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java -- diff --git a/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java b/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java index 856a649..952648f 100644 --- a/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java +++ b/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java @@ -39,7 +39,7 @@ import static org.junit.Assert.*; * * * Unit tests for the {@link File}Protocol. - * + * * . */ public class TestProtocolFile { http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java -- diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java index ffa2091..8b272ec 100644 --- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java +++ b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java @@ -54,7 +54,7 @@ import org.apache.commons.net.ftp.FTPConnectionClosedException; * servers out there, when partial downloading is enforeced by closing data * channel socket on our client side, the server side immediately closes control * channel (socket). Our codes deal with such a bad behavior. (4) LIST is used - * to obtain remote file attributes if possible. MDTM & SIZE would be nice, but + * to obtain remote file attributes if possible. MDTM & SIZE would be nice, but * not as ubiquitously implemented as LIST. (5) Avoid using ABOR in single * thread? Do not use it at all. * http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java -- diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java index 3f3a7e8..9f3f9c5 100644 --- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java +++ b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java @@ -114,8 +114,8 @@ public class Ftp implements Protocol { * * @param url * Text containing the ftp url - * @param datum - * The CrawlDatum object corresponding to the url + * @param page + * {@link WebPage} object relative to the URL * * @return {@link ProtocolOutput} object for the url */ http://git-wip-us.apache.org/repos/asf/nutch/blob/0ea78907/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java -- diff --git a/src/plugin/protocol-httpclient/sr
nutch git commit: NUTH-2329 Update Slf4j logging for Java 8 and upgrade miredot plugin version
Repository: nutch Updated Branches: refs/heads/master d4c924e56 -> 9092e233f NUTH-2329 Update Slf4j logging for Java 8 and upgrade miredot plugin version Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/9092e233 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/9092e233 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/9092e233 Branch: refs/heads/master Commit: 9092e233f49bd6c1edbe768efd590af4bb2a4b55 Parents: d4c924e Author: Lewis John McGibbney Authored: Tue Oct 18 12:39:23 2016 -0700 Committer: Lewis John McGibbney Committed: Tue Oct 18 12:39:23 2016 -0700 -- ivy/ivy.xml| 4 ++-- ivy/mvn.template | 6 +++--- src/java/org/apache/nutch/fetcher/Fetcher.java | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/9092e233/ivy/ivy.xml -- diff --git a/ivy/ivy.xml b/ivy/ivy.xml index adc7e91..853a639 100644 --- a/ivy/ivy.xml +++ b/ivy/ivy.xml @@ -128,8 +128,8 @@ - - + + http://git-wip-us.apache.org/repos/asf/nutch/blob/9092e233/ivy/mvn.template -- diff --git a/ivy/mvn.template b/ivy/mvn.template index 68813fc..8e3c794 100644 --- a/ivy/mvn.template +++ b/ivy/mvn.template @@ -1,4 +1,4 @@ - +
[3/3] nutch git commit: Merge branch 'NUTCH-2350' of https://github.com/kamaci/nutch into 2.x
Merge branch 'NUTCH-2350' of https://github.com/kamaci/nutch into 2.x Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/7d3e45f2 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/7d3e45f2 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/7d3e45f2 Branch: refs/heads/2.x Commit: 7d3e45f2314504350f69417f1f14df62b1b8bef5 Parents: def0677 6e074fc Author: Lewis John McGibbney Authored: Wed Jan 18 09:07:33 2017 -0800 Committer: Lewis John McGibbney Committed: Wed Jan 18 09:07:33 2017 -0800 -- .../org/apache/nutch/webui/client/model/NutchStatus.java| 9 + 1 file changed, 9 insertions(+) --
[2/3] nutch git commit: NUTCH-2350 Added missing activeConfId field to NutchStatus.
NUTCH-2350 Added missing activeConfId field to NutchStatus. Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/6e074fc0 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/6e074fc0 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/6e074fc0 Branch: refs/heads/2.x Commit: 6e074fc0b61f421cb7bc516e92dea33c3ce23fd5 Parents: 6e3c34d Author: kamaci Authored: Sun Jan 15 23:09:56 2017 +0200 Committer: kamaci Committed: Sun Jan 15 23:09:56 2017 +0200 -- .../org/apache/nutch/webui/client/model/NutchStatus.java| 9 + 1 file changed, 9 insertions(+) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/6e074fc0/src/java/org/apache/nutch/webui/client/model/NutchStatus.java -- diff --git a/src/java/org/apache/nutch/webui/client/model/NutchStatus.java b/src/java/org/apache/nutch/webui/client/model/NutchStatus.java index 0c5c425..047399d 100644 --- a/src/java/org/apache/nutch/webui/client/model/NutchStatus.java +++ b/src/java/org/apache/nutch/webui/client/model/NutchStatus.java @@ -27,6 +27,7 @@ public class NutchStatus implements Serializable { private Set configuration; private Collection jobs; private Collection runningJobs; + private String activeConfId; public Date getStartDate() { return startDate; @@ -59,4 +60,12 @@ public class NutchStatus implements Serializable { public void setRunningJobs(Collection runningJobs) { this.runningJobs = runningJobs; } + + public String getActiveConfId() { +return activeConfId; + } + + public void setActiveConfId(String activeConfId) { +this.activeConfId = activeConfId; + } }
[1/3] nutch git commit: NUTCH-2344 Authentication support for Web GUI
Repository: nutch Updated Branches: refs/heads/2.x 6e3c34db1 -> 7d3e45f23 NUTCH-2344 Authentication support for Web GUI Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/def06773 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/def06773 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/def06773 Branch: refs/heads/2.x Commit: def067735c5a6dc46d867c4c89cb176a275b1967 Parents: 6e3c34d Author: kamaci Authored: Mon Jan 9 17:35:49 2017 +0200 Committer: kamaci Committed: Mon Jan 9 17:35:49 2017 +0200 -- conf/nutch-default.xml | 11 +++ ivy/ivy.xml | 1 + .../apache/nutch/webui/NutchUiApplication.java | 57 .../nutch/webui/NutchUiApplication.properties | 1 + .../nutch/webui/pages/AbstractBasePage.java | 3 +- .../apache/nutch/webui/pages/LogOutPage.java| 14 +++- .../nutch/webui/pages/assets/nutch-style.css| 48 ++ .../webui/pages/auth/AuthenticatedWebPage.java | 24 +++ .../webui/pages/auth/AuthorizationStrategy.java | 52 +++ .../nutch/webui/pages/auth/SignInPage.html | 29 .../nutch/webui/pages/auth/SignInPage.java | 70 .../nutch/webui/pages/auth/SignInSession.java | 51 ++ .../org/apache/nutch/webui/pages/auth/User.java | 54 +++ .../nutch/webui/pages/auth/package-info.java| 22 ++ 14 files changed, 435 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/def06773/conf/nutch-default.xml -- diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml index 575ce5d..52cb920 100644 --- a/conf/nutch-default.xml +++ b/conf/nutch-default.xml @@ -1490,4 +1490,15 @@ + +webgui.auth.users +admin|admin,user|user + + Username, password combination for Web GUI authentication. + Username and password should be delimited by pipe character (|) + Every user should be separated with comma character (,). i.e. admin|admin,user|user. + Default is admin|admin,user|user + + + http://git-wip-us.apache.org/repos/asf/nutch/blob/def06773/ivy/ivy.xml -- diff --git a/ivy/ivy.xml b/ivy/ivy.xml index e173e71..9f43252 100644 --- a/ivy/ivy.xml +++ b/ivy/ivy.xml @@ -155,6 +155,7 @@ + http://git-wip-us.apache.org/repos/asf/nutch/blob/def06773/src/java/org/apache/nutch/webui/NutchUiApplication.java -- diff --git a/src/java/org/apache/nutch/webui/NutchUiApplication.java b/src/java/org/apache/nutch/webui/NutchUiApplication.java index 6fd2396..49bee56 100644 --- a/src/java/org/apache/nutch/webui/NutchUiApplication.java +++ b/src/java/org/apache/nutch/webui/NutchUiApplication.java @@ -16,11 +16,22 @@ */ package org.apache.nutch.webui; +import org.apache.nutch.api.ConfManager; +import org.apache.nutch.api.impl.RAMConfManager; +import org.apache.nutch.api.resources.ConfigResource; import org.apache.nutch.webui.pages.DashboardPage; import org.apache.nutch.webui.pages.assets.NutchUiCssReference; +import org.apache.nutch.webui.pages.auth.AuthorizationStrategy; +import org.apache.nutch.webui.pages.auth.SignInSession; +import org.apache.nutch.webui.pages.auth.User; +import org.apache.wicket.Session; import org.apache.wicket.markup.html.WebPage; import org.apache.wicket.protocol.http.WebApplication; +import org.apache.wicket.request.Request; +import org.apache.wicket.request.Response; import org.apache.wicket.spring.injection.annot.SpringComponentInjector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.beans.BeansException; import org.springframework.context.ApplicationContext; import org.springframework.context.ApplicationContextAware; @@ -33,12 +44,20 @@ import de.agilecoders.wicket.core.settings.SingleThemeProvider; import de.agilecoders.wicket.core.settings.Theme; import de.agilecoders.wicket.extensions.markup.html.bootstrap.icon.FontAwesomeCssReference; +import java.util.HashMap; +import java.util.Map; + @Component public class NutchUiApplication extends WebApplication implements ApplicationContextAware { private static final String THEME_NAME = "bootstrap"; private ApplicationContext context; + private Map userMap = new HashMap<>(); + private ConfManager configManager = new RAMConfManager(); + + private static final Logger LOG = LoggerFactory.getLogger(NutchUiApplication.class); + /** * @see org.apache.wicket.Application#getHomePage() */ @@ -53,6 +72,9 @@ public class NutchUiApplication extends WebApplication implement
[1/2] nutch git commit: NUTCH-2346 Types are checked at object equality
Repository: nutch Updated Branches: refs/heads/2.x 54300a89e -> 4d63474d7 NUTCH-2346 Types are checked at object equality Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/170f8c13 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/170f8c13 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/170f8c13 Branch: refs/heads/2.x Commit: 170f8c1375c8826c6397de0eb80e2fa29d2bfe5f Parents: 6e3c34d Author: kamaci Authored: Mon Jan 9 17:40:04 2017 +0200 Committer: kamaci Committed: Mon Jan 9 17:40:04 2017 +0200 -- src/java/org/apache/nutch/crawl/GeneratorJob.java | 8 src/java/org/apache/nutch/metadata/Metadata.java | 7 +++ 2 files changed, 11 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/170f8c13/src/java/org/apache/nutch/crawl/GeneratorJob.java -- diff --git a/src/java/org/apache/nutch/crawl/GeneratorJob.java b/src/java/org/apache/nutch/crawl/GeneratorJob.java index 1627590..3bfa249 100644 --- a/src/java/org/apache/nutch/crawl/GeneratorJob.java +++ b/src/java/org/apache/nutch/crawl/GeneratorJob.java @@ -119,6 +119,14 @@ public class GeneratorJob extends NutchTool implements Tool { @Override public boolean equals(Object obj) { + if (obj == null) { +return false; + } + + if (this.getClass() != obj.getClass()) { +return false; + } + SelectorEntry other = (SelectorEntry) obj; if (!url.equals(other.url)) return false; http://git-wip-us.apache.org/repos/asf/nutch/blob/170f8c13/src/java/org/apache/nutch/metadata/Metadata.java -- diff --git a/src/java/org/apache/nutch/metadata/Metadata.java b/src/java/org/apache/nutch/metadata/Metadata.java index 03c5db8..a56ec2b 100644 --- a/src/java/org/apache/nutch/metadata/Metadata.java +++ b/src/java/org/apache/nutch/metadata/Metadata.java @@ -190,13 +190,12 @@ public class Metadata implements Writable, CreativeCommons, DublinCore, return false; } -Metadata other = null; -try { - other = (Metadata) o; -} catch (ClassCastException cce) { +if (this.getClass() != o.getClass()) { return false; } +Metadata other = (Metadata) o; + if (other.size() != size()) { return false; }
[2/2] nutch git commit: Merge branch 'NUTCH-2346' of https://github.com/kamaci/nutch into 2.x
Merge branch 'NUTCH-2346' of https://github.com/kamaci/nutch into 2.x Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/4d63474d Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/4d63474d Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/4d63474d Branch: refs/heads/2.x Commit: 4d63474d705de1c49e066034aa4b771837a24b57 Parents: 54300a8 170f8c1 Author: Lewis John McGibbney Authored: Sat Jan 21 10:40:19 2017 -0800 Committer: Lewis John McGibbney Committed: Sat Jan 21 10:40:19 2017 -0800 -- src/java/org/apache/nutch/crawl/GeneratorJob.java | 8 src/java/org/apache/nutch/metadata/Metadata.java | 7 +++ 2 files changed, 11 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/4d63474d/src/java/org/apache/nutch/crawl/GeneratorJob.java --
nutch git commit: NUTCH-2346v2 Check Types at Object Equality v2
Repository: nutch Updated Branches: refs/heads/2.x 4d63474d7 -> 022ed5c03 NUTCH-2346v2 Check Types at Object Equality v2 Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/022ed5c0 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/022ed5c0 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/022ed5c0 Branch: refs/heads/2.x Commit: 022ed5c03206fab821770f85c2711f7c01edb17e Parents: 4d63474 Author: Lewis John McGibbney Authored: Thu Jan 26 14:14:42 2017 -0800 Committer: Lewis John McGibbney Committed: Thu Jan 26 14:14:42 2017 -0800 -- src/java/org/apache/nutch/metadata/Metadata.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/nutch/blob/022ed5c0/src/java/org/apache/nutch/metadata/Metadata.java -- diff --git a/src/java/org/apache/nutch/metadata/Metadata.java b/src/java/org/apache/nutch/metadata/Metadata.java index a56ec2b..99e3c76 100644 --- a/src/java/org/apache/nutch/metadata/Metadata.java +++ b/src/java/org/apache/nutch/metadata/Metadata.java @@ -190,7 +190,7 @@ public class Metadata implements Writable, CreativeCommons, DublinCore, return false; } -if (this.getClass() != o.getClass()) { +if (!Metadata.class.isAssignableFrom(o.getClass())) { return false; }
[nutch] 03/03: NUTCH-2292 fix numerous Maven compiler warnings and replace all calls to System.exit
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch NUTCH-2292 in repository https://gitbox.apache.org/repos/asf/nutch.git commit 10b23cd0b8d53fd250253160162c8d3deb0cb9ea Author: Lewis John McGibbney AuthorDate: Tue Mar 7 22:52:25 2017 -0800 NUTCH-2292 fix numerous Maven compiler warnings and replace all calls to System.exit --- .gitignore | 7 ++- nutch-core/pom.xml | 4 ++-- .../src/main/java/org/apache/nutch/crawl/CrawlDb.java | 3 +-- .../java/org/apache/nutch/crawl/CrawlDbMerger.java | 4 +--- .../java/org/apache/nutch/crawl/CrawlDbReader.java | 4 +--- .../java/org/apache/nutch/crawl/DeduplicationJob.java | 4 +--- .../main/java/org/apache/nutch/crawl/Generator.java| 4 +--- .../src/main/java/org/apache/nutch/crawl/Injector.java | 3 +-- .../src/main/java/org/apache/nutch/crawl/LinkDb.java | 3 +-- .../main/java/org/apache/nutch/crawl/LinkDbMerger.java | 4 +--- .../main/java/org/apache/nutch/crawl/LinkDbReader.java | 4 +--- .../main/java/org/apache/nutch/fetcher/Fetcher.java| 3 +-- .../main/java/org/apache/nutch/hostdb/ReadHostDb.java | 3 +-- .../java/org/apache/nutch/hostdb/UpdateHostDb.java | 3 +-- .../java/org/apache/nutch/indexer/CleaningJob.java | 4 +--- .../apache/nutch/indexer/IndexingFiltersChecker.java | 18 -- .../java/org/apache/nutch/indexer/IndexingJob.java | 4 +--- .../java/org/apache/nutch/net/URLFilterChecker.java| 7 ++- .../org/apache/nutch/net/URLNormalizerChecker.java | 5 + .../org/apache/nutch/parse/ParsePluginsReader.java | 3 +-- .../main/java/org/apache/nutch/parse/ParseSegment.java | 7 ++- .../java/org/apache/nutch/parse/ParserChecker.java | 6 ++ .../org/apache/nutch/protocol/RobotRulesParser.java| 3 +-- .../org/apache/nutch/scoring/webgraph/LinkDumper.java | 4 +--- .../org/apache/nutch/scoring/webgraph/LinkRank.java| 3 +-- .../org/apache/nutch/scoring/webgraph/NodeDumper.java | 4 +--- .../apache/nutch/scoring/webgraph/ScoreUpdater.java| 4 +--- .../org/apache/nutch/scoring/webgraph/WebGraph.java| 3 +-- .../java/org/apache/nutch/segment/SegmentMerger.java | 4 +--- .../java/org/apache/nutch/segment/SegmentReader.java | 4 +--- .../java/org/apache/nutch/service/NutchServer.java | 2 +- .../main/java/org/apache/nutch/tools/Benchmark.java| 3 +-- .../org/apache/nutch/tools/CommonCrawlDataDumper.java | 15 +++ .../main/java/org/apache/nutch/tools/DmozParser.java | 4 ++-- .../java/org/apache/nutch/tools/FreeGenerator.java | 4 +--- .../org/apache/nutch/tools/arc/ArcSegmentCreator.java | 4 +--- .../java/org/apache/nutch/tools/warc/WARCExporter.java | 4 +--- .../main/java/org/apache/nutch/util/CommandRunner.java | 7 ++- .../java/org/apache/nutch/util/EncodingDetector.java | 2 +- .../org/apache/nutch/tools/proxy/ProxyTestbed.java | 4 ++-- .../java/org/apache/nutch/parse/feed/FeedParser.java | 3 +-- nutch-plugins/index-replace/pom.xml| 4 ++-- .../org/apache/nutch/protocol/http/api/HttpBase.java | 5 ++--- .../java/org/apache/nutch/parse/zip/ZipParser.java | 2 +- nutch-plugins/pom.xml | 3 +-- .../main/java/org/apache/nutch/protocol/file/File.java | 7 ++- .../main/java/org/apache/nutch/protocol/ftp/Ftp.java | 4 ++-- nutch-plugins/protocol-httpclient/pom.xml | 5 - .../net/urlnormalizer/basic/BasicURLNormalizer.java| 1 - .../net/urlnormalizer/regex/RegexURLNormalizer.java| 1 - .../urlnormalizer/regex/TestRegexURLNormalizer.java| 6 +++--- pom.xml| 1 + 52 files changed, 80 insertions(+), 147 deletions(-) diff --git a/.gitignore b/.gitignore index e0cfd33..68acee7 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,9 @@ nutch-plugins/*/target # IntelliJ Idea .idea -**.iml \ No newline at end of file +**.iml + +# Eclipse IDE +*.settings +*.project +*.classpath diff --git a/nutch-core/pom.xml b/nutch-core/pom.xml index 62e2e58..54d927d 100644 --- a/nutch-core/pom.xml +++ b/nutch-core/pom.xml @@ -28,7 +28,7 @@ nutch-core jar -Apache Nutch +nutch-core Nutch is an open source web-search software. It builds on Hadoop, Tika and Solr, adding web-specifics, such as a crawler, a link-graph database etc. @@ -489,7 +489,7 @@ ${project.build.directory} -${build.finalName}.jar + ${project.build.finalName}.jar ${project.basedir} diff --git a/nutch-core/src/main/java/org/apache
[nutch] 02/03: Merge branch 'NUTCH-2292' of https://github.com/apache/nutch into NUTCH-2292
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch NUTCH-2292 in repository https://gitbox.apache.org/repos/asf/nutch.git commit 9a0ce9e3e8a7d1fc6093b31e40ff26e503dc7beb Merge: ecc60d7 62491d5 Author: Lewis John McGibbney AuthorDate: Tue Mar 7 14:20:23 2017 -0800 Merge branch 'NUTCH-2292' of https://github.com/apache/nutch into NUTCH-2292 .gitignore | 6 +++- conf/nutch-default.xml | 8 + default.properties | 4 +-- ivy/mvn.template | 4 +-- .../java/org/apache/nutch/crawl/CrawlDatum.java| 8 ++--- .../main/java/org/apache/nutch/crawl/CrawlDb.java | 21 +-- .../java/org/apache/nutch/crawl/CrawlDbMerger.java | 2 +- .../java/org/apache/nutch/crawl/CrawlDbReader.java | 14 .../org/apache/nutch/crawl/CrawlDbReducer.java | 2 +- .../org/apache/nutch/crawl/DeduplicationJob.java | 2 +- .../java/org/apache/nutch/crawl/Generator.java | 6 ++-- .../main/java/org/apache/nutch/crawl/Injector.java | 2 +- .../main/java/org/apache/nutch/crawl/Inlinks.java | 8 ++--- .../main/java/org/apache/nutch/crawl/LinkDb.java | 21 +-- .../java/org/apache/nutch/crawl/LinkDbMerger.java | 2 +- .../java/org/apache/nutch/crawl/LinkDbReader.java | 2 +- .../nutch/crawl/MimeAdaptiveFetchSchedule.java | 2 +- .../apache/nutch/crawl/TextProfileSignature.java | 6 ++-- .../org/apache/nutch/fetcher/FetchItemQueues.java | 2 +- .../java/org/apache/nutch/fetcher/FetchNodeDb.java | 2 +- .../java/org/apache/nutch/fetcher/Fetcher.java | 17 - .../org/apache/nutch/fetcher/FetcherThread.java| 6 ++-- .../apache/nutch/fetcher/FetcherThreadEvent.java | 2 +- .../nutch/fetcher/FetcherThreadPublisher.java | 0 .../apache/nutch/hostdb/UpdateHostDbReducer.java | 14 .../org/apache/nutch/indexer/IndexWriters.java | 2 +- .../nutch/indexer/IndexingFiltersChecker.java | 2 +- .../java/org/apache/nutch/indexer/IndexingJob.java | 21 +-- .../org/apache/nutch/indexer/NutchDocument.java| 2 +- .../java/org/apache/nutch/indexer/NutchField.java | 4 +-- .../java/org/apache/nutch/metadata/Metadata.java | 2 +- .../nutch/metadata/SpellCheckedMetadata.java | 2 +- .../java/org/apache/nutch/net/URLNormalizers.java | 8 ++--- .../org/apache/nutch/parse/OutlinkExtractor.java | 2 +- .../java/org/apache/nutch/parse/ParseData.java | 5 +-- .../org/apache/nutch/parse/ParseOutputFormat.java | 4 +-- .../org/apache/nutch/parse/ParsePluginList.java| 4 +-- .../org/apache/nutch/parse/ParsePluginsReader.java | 4 +-- .../java/org/apache/nutch/parse/ParseResult.java | 2 +- .../java/org/apache/nutch/parse/ParseSegment.java | 15 .../java/org/apache/nutch/parse/ParseText.java | 5 +-- .../java/org/apache/nutch/parse/ParserChecker.java | 2 +- .../java/org/apache/nutch/parse/ParserFactory.java | 4 +-- .../java/org/apache/nutch/plugin/Extension.java| 2 +- .../org/apache/nutch/plugin/ExtensionPoint.java| 2 +- .../org/apache/nutch/plugin/PluginDescriptor.java | 16 - .../apache/nutch/plugin/PluginManifestParser.java | 2 +- .../org/apache/nutch/plugin/PluginRepository.java | 30 .../java/org/apache/nutch/protocol/Content.java| 5 +-- .../org/apache/nutch/protocol/ProtocolStatus.java | 2 +- .../apache/nutch/protocol/RobotRulesParser.java| 6 ++-- .../org/apache/nutch/publisher/NutchPublisher.java | 0 .../apache/nutch/publisher/NutchPublishers.java| 0 .../apache/nutch/scoring/webgraph/LinkDumper.java | 6 ++-- .../apache/nutch/scoring/webgraph/LinkRank.java| 6 ++-- .../apache/nutch/scoring/webgraph/NodeReader.java | 2 +- .../apache/nutch/scoring/webgraph/WebGraph.java| 8 ++--- .../nutch/segment/ContentAsTextInputFormat.java| 2 +- .../org/apache/nutch/segment/SegmentMerger.java| 10 +++--- .../org/apache/nutch/segment/SegmentReader.java| 17 - .../java/org/apache/nutch/service/NutchServer.java | 4 +-- .../java/org/apache/nutch/service/SeedManager.java | 0 .../org/apache/nutch/service/impl/LinkReader.java | 8 ++--- .../org/apache/nutch/service/impl/NodeReader.java | 8 ++--- .../apache/nutch/service/impl/SeedManagerImpl.java | 0 .../apache/nutch/service/impl/SequenceReader.java | 12 +++ .../nutch/service/model/request/DbQuery.java | 2 +- .../service/model/response/FetchNodeDbInfo.java| 2 +- .../apache/nutch/service/resources/DbResource.java | 2 +- .../java/org/apache/nutch/tools/Benchmark.java | 8 ++--- .../apache/nutch/tools/CommonCrawlDataDumper.java | 4 +-- .../nutch/tools/CommonCrawlFormatJettinson.java| 4 +-- .../java/org/apache/nutch/tools/DmozParser.java| 22 .../java/org/apache/nutch/tools/FileDumper.ja
[nutch] 01/03: Merge branch 'NUTCH-2292' of https://github.com/apache/nutch into NUTCH-2292
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch NUTCH-2292 in repository https://gitbox.apache.org/repos/asf/nutch.git commit ecc60d7890e20ae822c848661ee2a6224f9fbe1d Merge: 2b93a66 2175c76 Author: Lewis John McGibbney AuthorDate: Fri Feb 24 12:13:47 2017 -0800 Merge branch 'NUTCH-2292' of https://github.com/apache/nutch into NUTCH-2292 .gitignore | 6 + {src/bin => bin}/crawl | 0 {src/bin => bin}/nutch | 0 nutch-core/pom.xml | 522 + .../apache/nutch/crawl/AbstractFetchSchedule.java | 0 .../apache/nutch/crawl/AdaptiveFetchSchedule.java | 0 .../java/org/apache/nutch/crawl/CrawlDatum.java| 0 .../main}/java/org/apache/nutch/crawl/CrawlDb.java | 0 .../java/org/apache/nutch/crawl/CrawlDbFilter.java | 0 .../java/org/apache/nutch/crawl/CrawlDbMerger.java | 0 .../java/org/apache/nutch/crawl/CrawlDbReader.java | 0 .../org/apache/nutch/crawl/CrawlDbReducer.java | 0 .../org/apache/nutch/crawl/DeduplicationJob.java | 0 .../apache/nutch/crawl/DefaultFetchSchedule.java | 0 .../java/org/apache/nutch/crawl/FetchSchedule.java | 0 .../apache/nutch/crawl/FetchScheduleFactory.java | 0 .../java/org/apache/nutch/crawl/Generator.java | 0 .../java/org/apache/nutch/crawl/Injector.java | 0 .../main}/java/org/apache/nutch/crawl/Inlink.java | 0 .../main}/java/org/apache/nutch/crawl/Inlinks.java | 0 .../main}/java/org/apache/nutch/crawl/LinkDb.java | 0 .../java/org/apache/nutch/crawl/LinkDbFilter.java | 0 .../java/org/apache/nutch/crawl/LinkDbMerger.java | 0 .../java/org/apache/nutch/crawl/LinkDbReader.java | 0 .../java/org/apache/nutch/crawl/MD5Signature.java | 0 .../nutch/crawl/MimeAdaptiveFetchSchedule.java | 0 .../java/org/apache/nutch/crawl/NutchWritable.java | 0 .../java/org/apache/nutch/crawl/Signature.java | 0 .../apache/nutch/crawl/SignatureComparator.java| 0 .../org/apache/nutch/crawl/SignatureFactory.java | 0 .../org/apache/nutch/crawl/TextMD5Signature.java | 0 .../apache/nutch/crawl/TextProfileSignature.java | 0 .../org/apache/nutch/crawl/URLPartitioner.java | 0 .../main}/java/org/apache/nutch/crawl/package.html | 0 .../java/org/apache/nutch/fetcher/FetchItem.java | 0 .../org/apache/nutch/fetcher/FetchItemQueue.java | 0 .../org/apache/nutch/fetcher/FetchItemQueues.java | 0 .../java/org/apache/nutch/fetcher/FetchNode.java | 0 .../java/org/apache/nutch/fetcher/FetchNodeDb.java | 0 .../java/org/apache/nutch/fetcher/Fetcher.java | 0 .../apache/nutch/fetcher/FetcherOutputFormat.java | 0 .../org/apache/nutch/fetcher/FetcherThread.java| 0 .../java/org/apache/nutch/fetcher/QueueFeeder.java | 0 .../java/org/apache/nutch/fetcher/package.html | 0 .../java/org/apache/nutch/hostdb/HostDatum.java| 0 .../java/org/apache/nutch/hostdb/ReadHostDb.java | 0 .../org/apache/nutch/hostdb/ResolverThread.java| 0 .../java/org/apache/nutch/hostdb/UpdateHostDb.java | 0 .../apache/nutch/hostdb/UpdateHostDbMapper.java| 0 .../apache/nutch/hostdb/UpdateHostDbReducer.java | 0 .../java/org/apache/nutch/indexer/CleaningJob.java | 0 .../java/org/apache/nutch/indexer/IndexWriter.java | 0 .../org/apache/nutch/indexer/IndexWriters.java | 0 .../org/apache/nutch/indexer/IndexerMapReduce.java | 0 .../apache/nutch/indexer/IndexerOutputFormat.java | 0 .../apache/nutch/indexer/IndexingException.java| 0 .../org/apache/nutch/indexer/IndexingFilter.java | 0 .../org/apache/nutch/indexer/IndexingFilters.java | 0 .../nutch/indexer/IndexingFiltersChecker.java | 0 .../java/org/apache/nutch/indexer/IndexingJob.java | 0 .../org/apache/nutch/indexer/NutchDocument.java| 0 .../java/org/apache/nutch/indexer/NutchField.java | 0 .../org/apache/nutch/indexer/NutchIndexAction.java | 0 .../java/org/apache/nutch/indexer/package.html | 0 .../org/apache/nutch/metadata/CreativeCommons.java | 0 .../java/org/apache/nutch/metadata/DublinCore.java | 0 .../main}/java/org/apache/nutch/metadata/Feed.java | 0 .../org/apache/nutch/metadata/HttpHeaders.java | 0 .../org/apache/nutch/metadata/MetaWrapper.java | 0 .../java/org/apache/nutch/metadata/Metadata.java | 0 .../java/org/apache/nutch/metadata/Nutch.java | 0 .../nutch/metadata/SpellCheckedMetadata.java | 0 .../java/org/apache/nutch/metadata/package.html| 0 .../org/apache/nutch/net/URLExemptionFilter.java | 0 .../org/apache/nutch/net/URLExemptionFilters.java | 0 .../main}/java/org/apache/nutch/net/URLFilter.java | 0 .../org/apache/nutch/net/URLFilterChecker.java | 0 .../org/apache/nutch/net/URLFilterException.java | 0 .../java/org/apache/nutch/net
[nutch] branch NUTCH-2292 updated (62491d5 -> 10b23cd)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch NUTCH-2292 in repository https://gitbox.apache.org/repos/asf/nutch.git. from 62491d5 Merge with latest changes from master new ecc60d7 Merge branch 'NUTCH-2292' of https://github.com/apache/nutch into NUTCH-2292 new 9a0ce9e Merge branch 'NUTCH-2292' of https://github.com/apache/nutch into NUTCH-2292 new 10b23cd NUTCH-2292 fix numerous Maven compiler warnings and replace all calls to System.exit The 3 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "adds" were already present in the repository and have only been added to this reference. Summary of changes: .gitignore | 7 ++- nutch-core/pom.xml | 4 ++-- .../src/main/java/org/apache/nutch/crawl/CrawlDb.java | 3 +-- .../java/org/apache/nutch/crawl/CrawlDbMerger.java | 4 +--- .../java/org/apache/nutch/crawl/CrawlDbReader.java | 4 +--- .../java/org/apache/nutch/crawl/DeduplicationJob.java | 4 +--- .../main/java/org/apache/nutch/crawl/Generator.java| 4 +--- .../src/main/java/org/apache/nutch/crawl/Injector.java | 3 +-- .../src/main/java/org/apache/nutch/crawl/LinkDb.java | 3 +-- .../main/java/org/apache/nutch/crawl/LinkDbMerger.java | 4 +--- .../main/java/org/apache/nutch/crawl/LinkDbReader.java | 4 +--- .../main/java/org/apache/nutch/fetcher/Fetcher.java| 3 +-- .../main/java/org/apache/nutch/hostdb/ReadHostDb.java | 3 +-- .../java/org/apache/nutch/hostdb/UpdateHostDb.java | 3 +-- .../java/org/apache/nutch/indexer/CleaningJob.java | 4 +--- .../apache/nutch/indexer/IndexingFiltersChecker.java | 18 -- .../java/org/apache/nutch/indexer/IndexingJob.java | 4 +--- .../java/org/apache/nutch/net/URLFilterChecker.java| 7 ++- .../org/apache/nutch/net/URLNormalizerChecker.java | 5 + .../org/apache/nutch/parse/ParsePluginsReader.java | 3 +-- .../main/java/org/apache/nutch/parse/ParseSegment.java | 7 ++- .../java/org/apache/nutch/parse/ParserChecker.java | 6 ++ .../org/apache/nutch/protocol/RobotRulesParser.java| 3 +-- .../org/apache/nutch/scoring/webgraph/LinkDumper.java | 4 +--- .../org/apache/nutch/scoring/webgraph/LinkRank.java| 3 +-- .../org/apache/nutch/scoring/webgraph/NodeDumper.java | 4 +--- .../apache/nutch/scoring/webgraph/ScoreUpdater.java| 4 +--- .../org/apache/nutch/scoring/webgraph/WebGraph.java| 3 +-- .../java/org/apache/nutch/segment/SegmentMerger.java | 4 +--- .../java/org/apache/nutch/segment/SegmentReader.java | 4 +--- .../java/org/apache/nutch/service/NutchServer.java | 2 +- .../main/java/org/apache/nutch/tools/Benchmark.java| 3 +-- .../org/apache/nutch/tools/CommonCrawlDataDumper.java | 15 +++ .../main/java/org/apache/nutch/tools/DmozParser.java | 4 ++-- .../java/org/apache/nutch/tools/FreeGenerator.java | 4 +--- .../org/apache/nutch/tools/arc/ArcSegmentCreator.java | 4 +--- .../java/org/apache/nutch/tools/warc/WARCExporter.java | 4 +--- .../main/java/org/apache/nutch/util/CommandRunner.java | 7 ++- .../java/org/apache/nutch/util/EncodingDetector.java | 2 +- .../org/apache/nutch/tools/proxy/ProxyTestbed.java | 4 ++-- .../java/org/apache/nutch/parse/feed/FeedParser.java | 3 +-- nutch-plugins/index-replace/pom.xml| 4 ++-- .../org/apache/nutch/protocol/http/api/HttpBase.java | 5 ++--- .../java/org/apache/nutch/parse/zip/ZipParser.java | 2 +- nutch-plugins/pom.xml | 3 +-- .../main/java/org/apache/nutch/protocol/file/File.java | 7 ++- .../main/java/org/apache/nutch/protocol/ftp/Ftp.java | 4 ++-- nutch-plugins/protocol-httpclient/pom.xml | 5 - .../net/urlnormalizer/basic/BasicURLNormalizer.java| 1 - .../net/urlnormalizer/regex/RegexURLNormalizer.java| 1 - .../urlnormalizer/regex/TestRegexURLNormalizer.java| 6 +++--- pom.xml| 1 + 52 files changed, 80 insertions(+), 147 deletions(-) -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] branch master updated: Prepare for Nutch 1.13 release
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git The following commit(s) were added to refs/heads/master by this push: new b972b68 Prepare for Nutch 1.13 release b972b68 is described below commit b972b685ba55186849d20fc9398db47d9626096f Author: Lewis John McGibbney AuthorDate: Tue Mar 28 19:29:57 2017 -0700 Prepare for Nutch 1.13 release --- CHANGES.txt| 50 +- NOTICE.txt | 2 +- conf/nutch-default.xml | 2 +- default.properties | 4 ++-- 4 files changed, 53 insertions(+), 5 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 9056a08..81784a7 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,52 @@ -Nutch Change Log +# Nutch Change Log + +Nutch 1.13 Release 28/03/2017 (dd/mm/) +Release Report: https://s.apache.org/wq3x + +Sub-task + +[NUTCH-2246] - Refactor /seed endpoint for backward compatibility + +Bug + +[NUTCH-1553] - Property 'indexer.delete.robots.noindex' not working when using parser-html. +[NUTCH-2242] - lastModified not always set +[NUTCH-2291] - Fix mrunit dependencies +[NUTCH-2337] - urlnormalizer-basic to strip empty port +[NUTCH-2345] - FetchItemQueue logs are logged with wrong class name +[NUTCH-2349] - urlnormalizer-basic NPE for ill-formed URL "http:/" +[NUTCH-2357] - Index metadata throw Exception because writable object cannot be cast to Text +[NUTCH-2359] - Parsefilter-regex raises IndexOutOfBoundsException when rules are ill-formed +[NUTCH-2364] - http.agent.rotate: IllegalArgumentException / last element of agent names ignored +[NUTCH-2366] - Deprecated Job constructor in hostdb/ReadHostDb.java + +Improvement + +[NUTCH-1308] - Add main() to ZipParser +[NUTCH-2164] - Inconsistent 'Modified Time' in crawl db +[NUTCH-2234] - Upgrade to elasticsearch 2.3.3 +[NUTCH-2236] - Upgrade to Hadoop 2.7.2 +[NUTCH-2262] - Utilize parameterized logging notation across Fetcher +[NUTCH-2272] - Index checker server to optionally keep client connection open +[NUTCH-2286] - CrawlDbReader -stats to show fetch time and interval +[NUTCH-2287] - Indexer-elastic plugin should use Elasticsearch BulkProcessor and BackoffPolicy +[NUTCH-2299] - Remove obsolete properties protocol.plugin.check.* +[NUTCH-2300] - Fetcher to optionally save robots.txt +[NUTCH-2327] - Seeds injected in REST workflow must be ingested into HDFS +[NUTCH-2329] - Update Slf4j logging for Java 8 and upgrade miredot plugin version +[NUTCH-2336] - SegmentReader to implement Tool +[NUTCH-2352] - Log with Generic Class Name at Nutch 1.x +[NUTCH-2355] - Protocol plugins to set cookie if Cookie metadata field is present +[NUTCH-2367] - Get single record from HostDB + +New Feature + +[NUTCH-2132] - Publisher/Subscriber model for Nutch to emit events + +Task + +[NUTCH-2171] - Upgrade Nutch Trunk to Java 1.8 + Nutch 1.12 Release 28/05/2016 (dd/mm/) Release Report: https://s.apache.org/nutch1.12 diff --git a/NOTICE.txt b/NOTICE.txt index e468ca5..870c475 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1,5 +1,5 @@ Apache Nutch -Copyright 2015 The Apache Software Foundation +Copyright 2017 The Apache Software Foundation This product includes software developed by The Apache Software Foundation (http://www.apache.org/). diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml index 08fb8a0..679a58e 100644 --- a/conf/nutch-default.xml +++ b/conf/nutch-default.xml @@ -164,7 +164,7 @@ http.agent.version - Nutch-1.13-SNAPSHOT + Nutch-1.13 A version string to advertise in the User-Agent header. diff --git a/default.properties b/default.properties index 081affc..5ca3577 100644 --- a/default.properties +++ b/default.properties @@ -14,9 +14,9 @@ # limitations under the License. name=apache-nutch -version=1.13-SNAPSHOT +version=1.13 final.name=${name}-${version} -year=2016 +year=2017 basedir = ./ src.dir = ./src/java -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] branch master updated: Update deploy plugin version
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git The following commit(s) were added to refs/heads/master by this push: new 045761b Update deploy plugin version 045761b is described below commit 045761bfd79287172a73dc5a4e84cfc4f44b6332 Author: Lewis John McGibbney AuthorDate: Tue Mar 28 19:50:17 2017 -0700 Update deploy plugin version --- build.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/build.xml b/build.xml index 8c9e778..77eec60 100644 --- a/build.xml +++ b/build.xml @@ -284,7 +284,7 @@ - + @@ -294,7 +294,7 @@ - + @@ -305,7 +305,7 @@ - + -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] branch master updated: Correct repository location and downgrade gpg plugin version
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git The following commit(s) were added to refs/heads/master by this push: new bd0da35 Correct repository location and downgrade gpg plugin version bd0da35 is described below commit bd0da3569aa14105799ed39204d4f0a31c77b42c Author: Lewis John McGibbney AuthorDate: Tue Mar 28 20:30:27 2017 -0700 Correct repository location and downgrade gpg plugin version --- build.xml| 6 +++--- ivy/mvn.template | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/build.xml b/build.xml index 77eec60..8c9e778 100644 --- a/build.xml +++ b/build.xml @@ -284,7 +284,7 @@ - + @@ -294,7 +294,7 @@ - + @@ -305,7 +305,7 @@ - + diff --git a/ivy/mvn.template b/ivy/mvn.template index 4d74fa7..e9bfb21 100644 --- a/ivy/mvn.template +++ b/ivy/mvn.template @@ -35,9 +35,9 @@ - scm:git:https://git-wip-us.apache.org/repos/asf/nutch.git - scm:git:http://git-wip-us.apache.org/repos/asf/nutch.git -https://git-wip-us.apache.org/repos/asf/nutch.git + scm:git:https://github.com/apache/nutch.git +scm:git:http://github.com/apache/nutch.git +https://github.com/apache/nutch.git -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] annotated tag release-1.13 updated (bd0da35 -> 16330b0)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to annotated tag release-1.13 in repository https://gitbox.apache.org/repos/asf/nutch.git. *** WARNING: tag release-1.13 was modified! *** from bd0da35 (commit) to 16330b0 (tag) tagging bd0da3569aa14105799ed39204d4f0a31c77b42c (commit) replaces release-1.12 tagged by Lewis John McGibbney on Tue Mar 28 21:04:10 2017 -0700 - Log - Apache Nutch 1.13 RC#1 Tag --- No new revisions were added by this update. Summary of changes: -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
svn commit: r18948 [1/3] - /dev/nutch/1.13/
Author: lewismc Date: Wed Mar 29 05:11:10 2017 New Revision: 18948 Log: Stage Apache Nutch 1.13 RC#1 release artifacts Added: dev/nutch/1.13/ dev/nutch/1.13/CHANGES.txt dev/nutch/1.13/KEYS dev/nutch/1.13/apache-nutch-1.13-bin.tar.gz (with props) dev/nutch/1.13/apache-nutch-1.13-bin.tar.gz.asc dev/nutch/1.13/apache-nutch-1.13-bin.tar.gz.md5 dev/nutch/1.13/apache-nutch-1.13-bin.tar.gz.sha1 dev/nutch/1.13/apache-nutch-1.13-bin.zip (with props) dev/nutch/1.13/apache-nutch-1.13-bin.zip.asc dev/nutch/1.13/apache-nutch-1.13-bin.zip.md5 dev/nutch/1.13/apache-nutch-1.13-bin.zip.sha1 dev/nutch/1.13/apache-nutch-1.13-src.tar.gz (with props) dev/nutch/1.13/apache-nutch-1.13-src.tar.gz.asc dev/nutch/1.13/apache-nutch-1.13-src.tar.gz.md5 dev/nutch/1.13/apache-nutch-1.13-src.tar.gz.sha1 dev/nutch/1.13/apache-nutch-1.13-src.zip (with props) dev/nutch/1.13/apache-nutch-1.13-src.zip.asc dev/nutch/1.13/apache-nutch-1.13-src.zip.md5 dev/nutch/1.13/apache-nutch-1.13-src.zip.sha1
svn commit: r18948 [2/3] - /dev/nutch/1.13/
5] - IndexingFilterChecker to optionally follow N redirects +[NUTCH-2196] - IndexingFilterChecker to optionally normalize +[NUTCH-2197] - Add solr5 solrcloud indexer support +[NUTCH-2204] - Remove junit lib from runtime +[NUTCH-2218] - Switch CrawlCompletion arg parsing to Commons CLI +[NUTCH-2221] - Introduce db.ignore.internal.links to FetcherThread +[NUTCH-2229] - Allow Jexl expressions on CrawlDatum's fixed attributes +[NUTCH-2231] - Jexl support in generator job +[NUTCH-2252] - Allow phantomjs as a browser for selenium options +[NUTCH-2263] - Support for mingram and maxgram at Unigram Cosine Similarity Model + +New Feature + +[NUTCH-961] - Expose Tika's boilerpipe support +[NUTCH-1325] - HostDB for Nutch +[NUTCH-2144] - Plugin to override db.ignore.external to exempt interesting external domain URLs +[NUTCH-2190] - Protocol normalizer +[NUTCH-2191] - Add protocol-htmlunit +[NUTCH-2194] - Run IndexingFilterChecker as simple Telnet server +[NUTCH-2219] - Criteria order to be configurable in DeduplicationJob +[NUTCH-2227] - RegexParseFilter +[NUTCH-2245] - Developed the NGram Model on the existing Unigram Cosine Similarity Model + +Task + +[NUTCH-2201] - Remove loops program from webgraph package +[NUTCH-2211] - Filter and normalizer checkers missing in bin/nutch +[NUTCH-2220] - Rename db.* options used only by the linkdb to linkdb.* + +Nutch 1.11 Release 03/12/2015 (dd/mm/) +Release Report: http://s.apache.org/nutch11 + +* NUTCH-2176 Clean up of log4j.properties (markus) + +* NUTCH-2107 plugin.xml to validate against plugin.dtd (snagel) + +* NUTCH-2177 Generator produces only one partition even in distributed mode (jnioche, snagel) + +* NUTCH-2158 Upgrade to Tika 1.11 (jnioche, snagel) + +* NUTCH-2175 Typos in property descriptions in nutch-default.xml (Roannel Fernández Hernández via snagel) + +* NUTCH-2069 Ignore external links based on domain (jnioche) + +* NUTCH-2173 String.join in FileDumper breaks the build (joyce) + +* NUTCH-2166 Add reverse URL format to dump tool (joyce) + +* NUTCH-2157 Addressing Miredot REST API Warnings (Sujen Shah) + +* NUTCH-2165 FileDumper Util hard codes part-# folder name (joyce) + +* NUTCH-2167 Backport TableUtil from 2.x for URL reversing (joyce) + +* NUTCH-2160 Upgrade Selenium Java to 2.48.2 (lewismc, kwhitehall) + +* NUTCH-2120 Remove MapWritable from trunk codebase (lewismc) + +* NUTCH-1911 Improve DomainStatistics tool command line parsing (joyce) + +* NUTCH-2064 URLNormalizer basic to encode reserved chars and decode non-reserved chars (markus, snagel) + +* NUTCH-2159 Ensure that all WebApp files are copied into generated artifacts for 1.X Webapp (lewismc) + +* NUTCH-2154 Nutch REST API (DB) suffering NullPointerException (Aron Ahmadia, Sujen Shah via mattmann) + +* NUTCH-2150 Add protocolstats utility (Michael Joyce via mattmann) + +* NUTCH-2146 hashCode on the Outlink class (jorgelbg via mattmann) + +* NUTCH-2155 Create a "crawl completeness" utility (Michael Joyce via mattmann) + +* NUTCH-1988 Make nested output directory dump optional... again (Michael Joyce via lewismc) + +* NUTCH-1800 Documentation for Nutch 1.X and 2.X REST APIs (lewismc) + +* NUTCH-2149 REST endpoint to read Nutch sequence files (Sujen Shah) + +* NUTCH-2139 Basic plugin to index inlinks and outlinks (jorgelbg) + +* NUTCH-2128 Review and update mapred --> mapreduce config params in crawl script (lewismc) + +* NUTCH-2141 Change the InteractiveSelenium plugin handler Interface to return page content + (Balaji Gurumurthy via mattmann) + +* NUTCH-2129 Add protocol status tracking to crawl datum (Michael Joyce via mattmann) + +* NUTCH-2142 Nutch File Dump - FileNotFoundException (Invalid Argument) Error (Karanjeet Singh via mattmann) + +* NUTCH-2136 Implement a different version of Naive Bayes Parse Filter (Asitang Mishra) + +* NUTCH-2109 Create a brute force click-all-ajax-links utility fucntion for selenium interactive plugin (Asitang Mishra) + +* NUTCH-2108 Add a function to the selenium interactive plugin interface to do multiple manipulation of driver and then return the data (Asitang Mishra) + +* NUTCH-2124 Fetcher following same redirect again and again (Yogendra Kumar Soni via snagel) + +* NUTCH-2123 Seed List REST API returns Text but headers indicate/require JSON + (Aron Ahmadia, Sujen Shah via mattmann) + +* NUTCH-2086 Nutch 1.X Webui (Sujen Shah, mattmann via lewismc) + +* NUTCH-2121 Update javadoc link for Hadoop 2.4.0 in default.properties (Sujen Shah) + +* NUTCH-2119 Eclipse shows build path errors on building Nutch (Sujen Shah) + +* NUTCH-2117 NutchServer CLI Option for CMD_PORT is incorrect and should be CMD_HOST (zhangmianhongni via lewismc) + +* NUTCH-2115 - Add total counts to mimetype stats (Jimmy Joyce via lewismc) + +* NUTCH-2111 Delete temporary files location for selenium tmp files after driver quits (Kim Whitehall via lewismc) + +* N
svn commit: r18948 [3/3] - /dev/nutch/1.13/
Added: dev/nutch/1.13/KEYS == --- dev/nutch/1.13/KEYS (added) +++ dev/nutch/1.13/KEYS Wed Mar 29 05:11:10 2017 @@ -0,0 +1,364 @@ +This file contains the PGP keys of various developers. +Please don't use them for email unless you have to. Their main +purpose is code signing. + +Examples of importing this file in your keystore: + gpg --import KEYS.txt + (need pgp and other examples here) + +Examples of adding your key to this file: + pgp -kxa and append it to this file. + (pgpk -ll && pgpk -xa ) >> this file. + (gpg --list-sigs + && gpg --armor --export ) >> this file. + +pub 1024D/A7239D59 2005-10-12 + Key fingerprint = 4B96 409A 098D BD51 1DF2 BC18 DBAF 69BE A723 9D59 +uid Doug Cutting (Lucene guy) +sig 3A7239D59 2005-10-12 Doug Cutting (Lucene guy) +sub 2048g/ADDE5978 2005-10-12 +sig A7239D59 2005-10-12 Doug Cutting (Lucene guy) + +-BEGIN PGP PUBLIC KEY BLOCK- +Version: GnuPG v1.4.1 (FreeBSD) + +mQGiBENNR5oRBAC2ZzxD2fXYht8qkfT/6tjWJxLG4KH2dLEWSYEzku8ZtJ7eA6X7 +/hcvZdhjGH0aA6MAEVSxh6LO1hmRARE2e2Br68j4TjwbQ0J5BOgkMMAArmQe7w6B +RjKUI3H74Qbfjuk4Ebf1fNkRkpwuw+JxZu5pqpACqwv6nPhcSDDjbuA/1wCgj+++ +uxVSQMF4Xrd0hApOSYGHL8kD/jCU+vM3ILuFVTCgfC5RehmqwQo/f6KEv99jJSxX +ClcksiLquOH8vMc3MV1YWOe4u93DI7iAYzCylS1s2Wn0bLEBrbdGKLMH4hSSMDRC +pjnyvzvnEMhMU+Jn3LK6lQw4nHH+aDGFcYZ2pQen7JAcYz7l6QeTsvMnRV+v13K1 +/zRjA/9QUxrgg2N5WQnEhMegIWBKVhxQV6a2mSfeNd0ApxzdqdoHZNkUD+pKMB0F +oQ9aP55KbtvFosurFgEmvwLIoMnQohxjIhdk0Hx3xMT17CtYl04F0C+QNxeXpWr7 +/B0kq8nALn17hXz5A1bFaiMHX86QmvNyMTDUC2VrVbkV251dlLQuRG91ZyBDdXR0 +aW5nIChMdWNlbmUgZ3V5KSA8Y3V0dGluZ0BhcGFjaGUub3JnPoheBBMRAgAeBQJD +TUeaAhsDBgsJCAcDAgMVAgMDFgIBAh4BAheAAAoJENuvab6nI51ZjRAAoIZ96gYE +f8QCDpXkBQqtNgRiF4t5AJ9JKMrN/Ow+Kyl75FU9U2KWyPoMk7kCDQRDTUejEAgA +m3UdcglfOdgqI7Z9XUX38yqiFzNozSvTdOt3j6evIVvjJ3e0P87tUQlrdsbMcaXd ++PAc7EA5LE0eJlE9jR1/18tsIlYi/n1hxz1lWtaZ+9he3yTB12QmAf4MMTXaRBkI +ZqwdwZxmL5V+2TmhFT2bIzPLgrMHNsA4dtQuBak41GC+VXovqitS9Xzse2Ki+U9u +SiRPsD7x5DcgJm9sg/zqCNrvDN8vOC8iHa/CIqsZr3xaPgfQLZp6Xk3doHLc6IJ9 +6knDAZvzJFgfj8MGCQoOExE/1XoNGTWcgoiy0D30ADG+rtIbaRT8tdQ6m19/ytqd +Zm7ibB7b78/pyfvvcB5tKwADBgf9GwdUdHUPjezlFpcCI/K3XHKdPLi00HJ2L1O8 +5pErBjDyZ5ey7vAMuYB5O31dB7pncSVsTdt9RRQHS+iLrv9aJjvYhV4yQU0ADkgC +9qEvxm7wpn76AT+Z1LIay/vNoQPxnfWq+uZD/Lnku1VcnMZ5teSG6uJzApBGYsgN +xpPPsobKKvclZdhO5NhhZLFZ0taWh4pna2jpDTLmyRa4kO7p7rIixsKxFfLUUc33 +2RqBomnm9eRlSvC4BBCq6M7YPLG0Rv5WmzuuWpc865EaMoBEtwPQBb4+qcMN69Lp +3x6EaymTWmHx1o8aUjAxhORE/miy53eGPzIXY+csjMyAmSxDG4hJBBgRAgAJBQJD +TUejAhsMAAoJENuvab6nI51ZlTIAn0oHlUPw+v1gVUJ8D2Nu26knOqJKAJ4spe/k +Sc2xRlsNP3tZiO+jYMAFSg== +=goQx +-END PGP PUBLIC KEY BLOCK- +pub 1024D/7C491924 2006-03-30 Piotr Kosiorowski +sig 3 7C491924 2006-03-30 Piotr Kosiorowski +sub 2048g/4A70BB35 2006-03-30 +sig 7C491924 2006-03-30 Piotr Kosiorowski + +-BEGIN PGP PUBLIC KEY BLOCK- +Version: GnuPG v1.2.7 (GNU/Linux) + +mQGiBEQrfF8RBACblz5gaIolsKdJgtdy913C+k/QXvaeg3R+8dXXkgVgC5vvRbUk +Ei6UBRMU5H0cNE76d0XlMYP1MccqdowsfPfWxl04VViW6p+KHmBa2ICIWvq1PQXL +XhocuRZn6dzfnxcFjsJlsKXtX+okzL9rc1AHiPsb+14XFQtd0/uxs/qeswCgyM68 +hxpwMZU4U0Q7yYkB8usVjbcD/iC65v+8DPhVgxp4o66JJqTYkBZ73mS4f/DDlQsL +9qCj8h9rLYHmV85hSx3pBBDuz/HjIzu5ruj+l78H++WISXE82hj++OS0bpKnb+nV +x/iN+b/Y0W0CzMEms+42LcNz1azvLL6ZBgLwnUePT1mBnOy6UgFW1XZGow/XO4Lr +2py7BACP2WrV+rAzc8RcelmvE3eaAj0DJhAl2Brkdl7B4KDNpBTYZM2TaJ5G5pDK +EhzH3O6IZP4dRh4iEipl+qcJ0eC5OlKHxqyXXbQYH5jzqkl+4cAQRkCliWuFrGcO +o3XaOFE54dpY8FZbineEJLrg4Ynh592gO731IcP4gm401ORGv7QrUGlvdHIgS29z +aW9yb3dza2kgPHBrb3Npb3Jvd3NraUBhcGFjaGUub3JnPoheBBMRAgAeBQJEK3xf +AhsDBgsJCAcDAgMVAgMDFgIBAh4BAheAAAoJEEsO4ix8SRkkWLwAoLrn6dtn38yI +8dja2k2lJJ7PVpOoAJ9qZO+QfOfJRf1H+1L6qOuviiDkR7kCDQREK3xpEAgAklbu +2ctaceFu6nolNd3cnKNqDNppvSRSwDzZZytXjzV10E5VW7fYlN1+huOSV9nRLAIL +stNloFiOdQGElT0t8Xi9N9X1BuzSkxWMKqDHaTOSnKNupCuDzz9F3oYXVMbLwZBG +GJAMezd6WuCl+KyhsJgt0GD/H2Ucyck2CqTQRZFPOPOPB2urZbmw8F5bTI3u9J1Q +ElwApNTrHS04HyNEq5o9j/iTMvvunnkliQFI0Z/flvfHaV6go3/ZhMeVkLU7m/mq +bPh467HN0MTN5O+znak164nBumxcqD8yUF5TiWD42dykNffbN2ajZzgVvTxWerVV +mqVMTetbhl3Hoaff0wADBQf/d+XRxh7etS3IO5Jvv85de9QvQPFm5JZpnTNfdnil +b9G3WRjZIsdmAG2khtJNmlUMUegK0ej6jsCFmsWTqg8cbCG7TBcYySWKSTGklELu +N69g9VaG60GUX6EOoEmfRMrINlq/5egRSs8gukb1qYC0+0ZpaiWu5+PDx7ocADOq +FwZUcsp9k0c4y2QUxvzvuRNCjJftTNQT5k+r1RxBnk5RYxiL2ga/UfUXZ3gXZvPV +sTeSDXiMAAHvCqnAKGyGK7boST+zAD3qkBLH7nL44rS+9H+piERCmavSLcxI0CoZ +oJb9uniYB8HMLDhOc1HwN3L5QzEJ68JdGmdEapvxE0r6s4hJBBgRAgAJBQJEK3xp +AhsMAAoJEEsO4ix8SRkk2WUAn1/AYISfGPmMKuppjMZmAcs8Svm0AKCWsjGPGKSi +gKskqkY17BkNKkbBtQ== +=j0Ns +-END PGP PUBLIC KEY BLOCK- +pub 1024D/0B7E6CFA 2006-07-06 +uid Sami Siren +sig 30B7E6CFA 2006-07-06 Sami Siren +sig E222DE4F 2007-05-02 Mathias Herberts +sig 911203E4 2007-05-02 Mathias Herberts +sig 302DA568 2007-05-03 Rodent of Unusual Size (DSA) +sig 2C312D2F 2007-05-03 Rodent of Unusual Size (DSS) +sig F12F6072 2007
svn commit: r19008 - /dev/nutch/1.13/ /release/nutch/1.13/
Author: lewismc Date: Sun Apr 2 17:50:34 2017 New Revision: 19008 Log: Release Apache Nutch 1.13 Added: release/nutch/1.13/ - copied from r19007, dev/nutch/1.13/ Removed: dev/nutch/1.13/
svn commit: r19009 - /release/nutch/1.12/
Author: lewismc Date: Sun Apr 2 17:51:13 2017 New Revision: 19009 Log: Remove ol Nutch 1.12 Removed: release/nutch/1.12/
svn commit: r1789904 - in /nutch/cms_site/trunk/content: ./ apidocs/apidocs-1.13/ apidocs/apidocs-1.13/org/ apidocs/apidocs-1.13/org/apache/ apidocs/apidocs-1.13/org/apache/nutch/ apidocs/apidocs-1.13
Author: lewismc Date: Sun Apr 2 18:11:27 2017 New Revision: 1789904 URL: http://svn.apache.org/viewvc?rev=1789904&view=rev Log: Update all Nutch documentation to reflect 1.13 release [This commit notification would consist of 243 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.]
svn commit: r1009625 - /websites/production/nutch/content/
Author: lewismc Date: Sun Apr 2 18:14:50 2017 New Revision: 1009625 Log: Publishing svnmucc operation to nutch site by lewismc Added: websites/production/nutch/content/ - copied from r1009624, websites/staging/nutch/trunk/content/
svn commit: r1009626 - /websites/production/nutch/content/
Author: lewismc Date: Sun Apr 2 18:15:21 2017 New Revision: 1009626 Log: Publishing svnmucc operation to nutch site by lewismc Added: websites/production/nutch/content/ - copied from r1009625, websites/staging/nutch/trunk/content/
svn commit: r1009627 - /websites/production/nutch/content/
Author: lewismc Date: Sun Apr 2 18:16:16 2017 New Revision: 1009627 Log: Publishing svnmucc operation to nutch site by lewismc Added: websites/production/nutch/content/ - copied from r1009626, websites/staging/nutch/trunk/content/
svn commit: r1009629 - /websites/production/nutch/content/
Author: lewismc Date: Sun Apr 2 18:20:55 2017 New Revision: 1009629 Log: Publishing svnmucc operation to nutch site by lewismc Added: websites/production/nutch/content/ - copied from r1009627, websites/staging/nutch/trunk/content/
svn commit: r1009630 - /websites/production/nutch/content/
Author: lewismc Date: Sun Apr 2 18:25:01 2017 New Revision: 1009630 Log: Publishing svnmucc operation to nutch site by lewismc Added: websites/production/nutch/content/ - copied from r1009629, websites/staging/nutch/trunk/content/
[nutch] branch master updated: Prepare for new development
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git The following commit(s) were added to refs/heads/master by this push: new 724a240 Prepare for new development 724a240 is described below commit 724a2407b39855fe697065fcc3ab8e411a232273 Author: Lewis John McGibbney AuthorDate: Sun Apr 2 11:28:12 2017 -0700 Prepare for new development --- conf/nutch-default.xml | 2 +- default.properties | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml index 679a58e..c112bd1 100644 --- a/conf/nutch-default.xml +++ b/conf/nutch-default.xml @@ -164,7 +164,7 @@ http.agent.version - Nutch-1.13 + Nutch-1.14-SNAPSHOT A version string to advertise in the User-Agent header. diff --git a/default.properties b/default.properties index 5ca3577..36e4027 100644 --- a/default.properties +++ b/default.properties @@ -14,7 +14,7 @@ # limitations under the License. name=apache-nutch -version=1.13 +version=1.14-SNAPSHOT final.name=${name}-${version} year=2017 -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] 01/01: Merge pull request #139 from bmzhao/NUTCH-2296-elasticsearch-rest-indexing
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git commit 25d0cf73436146d44122f992eca6fa04e444de39 Merge: 724a240 767764e Author: Lewis John McGibbney AuthorDate: Wed Apr 5 19:26:39 2017 -0700 Merge pull request #139 from bmzhao/NUTCH-2296-elasticsearch-rest-indexing Fix for NUTCH-2296: Elasticsearch Indexing Over Rest thank you @bmzhao build.xml | 3 + conf/log4j.properties | 4 + conf/nutch-default.xml | 72 + ivy/ivy.xml| 7 + src/plugin/build.xml | 2 + src/plugin/indexer-elastic-rest/build-ivy.xml | 54 src/plugin/indexer-elastic-rest/build.xml | 22 ++ .../indexer-elastic-rest/howto_upgrade_es.txt | 23 ++ src/plugin/indexer-elastic-rest/ivy.xml| 43 +++ src/plugin/indexer-elastic-rest/plugin.xml | 52 .../elasticrest/ElasticRestConstants.java | 33 +++ .../elasticrest/ElasticRestIndexWriter.java| 329 + .../indexwriter/elasticrest/package-info.java | 22 ++ 13 files changed, 666 insertions(+) diff --cc ivy/ivy.xml index 853a639,8e048fa..cc7f8d3 --- a/ivy/ivy.xml +++ b/ivy/ivy.xml @@@ -127,10 -127,14 +127,17 @@@ + + + + + + + + + + -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] branch master updated (724a240 -> 25d0cf7)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git. from 724a240 Prepare for new development adds 6f9a8f3 working elasticsearch rest adds 767764e log parameterization, code formatting new 25d0cf7 Merge pull request #139 from bmzhao/NUTCH-2296-elasticsearch-rest-indexing The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "adds" were already present in the repository and have only been added to this reference. Summary of changes: build.xml | 3 + conf/log4j.properties | 4 + conf/nutch-default.xml | 72 + ivy/ivy.xml| 7 + src/plugin/build.xml | 2 + .../build-ivy.xml | 30 +- .../build.xml | 2 +- .../indexer-elastic-rest/howto_upgrade_es.txt} | 10 +- .../{parse-html => indexer-elastic-rest}/ivy.xml | 41 +-- src/plugin/indexer-elastic-rest/plugin.xml | 52 .../elasticrest/ElasticRestConstants.java} | 18 +- .../elasticrest/ElasticRestIndexWriter.java| 329 + .../indexwriter/elasticrest}/package-info.java | 4 +- 13 files changed, 523 insertions(+), 51 deletions(-) copy src/plugin/{index-geoip => indexer-elastic-rest}/build-ivy.xml (64%) copy src/plugin/{index-links => indexer-elastic-rest}/build.xml (93%) copy src/{java/org/apache/nutch/protocol/package-info.java => plugin/indexer-elastic-rest/howto_upgrade_es.txt} (74%) copy src/plugin/{parse-html => indexer-elastic-rest}/ivy.xml (55%) create mode 100644 src/plugin/indexer-elastic-rest/plugin.xml copy src/plugin/{indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticConstants.java => indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestConstants.java} (65%) create mode 100644 src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestIndexWriter.java copy src/{java/org/apache/nutch/tools => plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest}/package-info.java (84%) -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] 01/01: Merge pull request #182 from Omkar20895/NUTCH-2372
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git commit e9b823db993004511c909548e0a5da342d687ce9 Merge: ee8b1ef 61985f1 Author: Lewis John McGibbney AuthorDate: Sat Apr 15 04:16:30 2017 -0700 Merge pull request #182 from Omkar20895/NUTCH-2372 NUTCH-2372 Fixing the errors in documentation src/java/org/apache/nutch/crawl/FetchSchedule.java | 4 +-- src/java/org/apache/nutch/crawl/Generator.java | 2 +- src/java/org/apache/nutch/crawl/Injector.java | 13 +++ src/java/org/apache/nutch/hostdb/ReadHostDb.java | 2 +- .../apache/nutch/hostdb/UpdateHostDbMapper.java| 14 .../apache/nutch/hostdb/UpdateHostDbReducer.java | 6 ++-- src/java/org/apache/nutch/net/URLNormalizers.java | 4 +-- src/java/org/apache/nutch/parse/ParseResult.java | 2 +- src/java/org/apache/nutch/parse/ParserChecker.java | 2 +- .../org/apache/nutch/plugin/PluginRepository.java | 2 +- .../org/apache/nutch/segment/SegmentMerger.java| 4 --- src/java/org/apache/nutch/segment/SegmentPart.java | 2 +- .../apache/nutch/service/impl/ConfManagerImpl.java | 2 +- .../nutch/tools/AbstractCommonCrawlFormat.java | 2 +- .../apache/nutch/tools/CommonCrawlDataDumper.java | 6 ++-- .../org/apache/nutch/tools/CommonCrawlFormat.java | 4 +-- .../nutch/tools/CommonCrawlFormatFactory.java | 6 ++-- .../nutch/tools/CommonCrawlFormatSimple.java | 3 +- src/java/org/apache/nutch/tools/FileDumper.java| 6 +--- .../apache/nutch/tools/arc/ArcRecordReader.java| 11 +++--- .../org/apache/nutch/util/EncodingDetector.java| 5 ++- src/java/org/apache/nutch/util/LockUtil.java | 4 +-- src/java/org/apache/nutch/util/MimeUtil.java | 4 +-- .../org/apache/nutch/util/PrefixStringMatcher.java | 8 ++--- .../org/apache/nutch/util/SuffixStringMatcher.java | 8 ++--- src/java/org/apache/nutch/util/TableUtil.java | 4 +-- src/java/org/apache/nutch/util/TimingUtil.java | 2 +- .../org/apache/nutch/util/TrieStringMatcher.java | 8 ++--- src/java/org/apache/nutch/util/URLUtil.java| 41 ++ .../nutch/indexer/feed/FeedIndexingFilter.java | 2 +- .../nutch/indexer/anchor/AnchorIndexingFilter.java | 2 +- .../nutch/indexer/geoip/GeoIPIndexingFilter.java | 3 -- .../nutch/indexer/links/LinksIndexingFilter.java | 24 ++--- .../nutch/indexer/metadata/MetadataIndexer.java| 2 +- .../nutch/indexer/replace/ReplaceIndexer.java | 18 +- .../nutch/indexwriter/dummy/DummyIndexWriter.java | 2 +- .../apache/nutch/indexwriter/solr/SolrUtils.java | 2 +- .../nutch/analysis/lang/HTMLLanguageParser.java| 3 +- .../nutch/protocol/htmlunit/HtmlUnitWebDriver.java | 2 +- .../nutch/urlfilter/api/RegexURLFilterBase.java| 4 +-- .../nutch/protocol/selenium/HttpWebClient.java | 2 +- .../indexer/filter/MimeTypeIndexingFilter.java | 3 +- .../apache/nutch/parse/zip/ZipTextExtractor.java | 2 +- .../java/org/apache/nutch/protocol/ftp/Client.java | 2 +- .../httpclient/HttpBasicAuthentication.java| 4 +-- .../nutch/scoring/opic/OPICScoringFilter.java | 5 ++- .../scoring/similarity/util/LuceneTokenizer.java | 2 +- .../apache/nutch/collection/CollectionManager.java | 2 +- .../subcollection/SubcollectionIndexingFilter.java | 3 +- .../nutch/urlfilter/domain/DomainURLFilter.java| 10 +++--- .../domainblacklist/DomainBlacklistURLFilter.java | 10 +++--- .../nutch/urlfilter/suffix/SuffixURLFilter.java| 7 ++-- .../indexer/urlmeta/URLMetaIndexingFilter.java | 6 ++-- .../scoring/urlmeta/URLMetaScoringFilter.java | 2 +- .../querystring/QuerystringURLNormalizer.java | 2 +- 55 files changed, 130 insertions(+), 177 deletions(-) -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] branch master updated (ee8b1ef -> e9b823d)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git. from ee8b1ef Added Furkan KAMACI as developer. adds 61985f1 NUTCH-2372 Fixing the errors in documentation new e9b823d Merge pull request #182 from Omkar20895/NUTCH-2372 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "adds" were already present in the repository and have only been added to this reference. Summary of changes: src/java/org/apache/nutch/crawl/FetchSchedule.java | 4 +-- src/java/org/apache/nutch/crawl/Generator.java | 2 +- src/java/org/apache/nutch/crawl/Injector.java | 13 +++ src/java/org/apache/nutch/hostdb/ReadHostDb.java | 2 +- .../apache/nutch/hostdb/UpdateHostDbMapper.java| 14 .../apache/nutch/hostdb/UpdateHostDbReducer.java | 6 ++-- src/java/org/apache/nutch/net/URLNormalizers.java | 4 +-- src/java/org/apache/nutch/parse/ParseResult.java | 2 +- src/java/org/apache/nutch/parse/ParserChecker.java | 2 +- .../org/apache/nutch/plugin/PluginRepository.java | 2 +- .../org/apache/nutch/segment/SegmentMerger.java| 4 --- src/java/org/apache/nutch/segment/SegmentPart.java | 2 +- .../apache/nutch/service/impl/ConfManagerImpl.java | 2 +- .../nutch/tools/AbstractCommonCrawlFormat.java | 2 +- .../apache/nutch/tools/CommonCrawlDataDumper.java | 6 ++-- .../org/apache/nutch/tools/CommonCrawlFormat.java | 4 +-- .../nutch/tools/CommonCrawlFormatFactory.java | 6 ++-- .../nutch/tools/CommonCrawlFormatSimple.java | 3 +- src/java/org/apache/nutch/tools/FileDumper.java| 6 +--- .../apache/nutch/tools/arc/ArcRecordReader.java| 11 +++--- .../org/apache/nutch/util/EncodingDetector.java| 5 ++- src/java/org/apache/nutch/util/LockUtil.java | 4 +-- src/java/org/apache/nutch/util/MimeUtil.java | 4 +-- .../org/apache/nutch/util/PrefixStringMatcher.java | 8 ++--- .../org/apache/nutch/util/SuffixStringMatcher.java | 8 ++--- src/java/org/apache/nutch/util/TableUtil.java | 4 +-- src/java/org/apache/nutch/util/TimingUtil.java | 2 +- .../org/apache/nutch/util/TrieStringMatcher.java | 8 ++--- src/java/org/apache/nutch/util/URLUtil.java| 41 ++ .../nutch/indexer/feed/FeedIndexingFilter.java | 2 +- .../nutch/indexer/anchor/AnchorIndexingFilter.java | 2 +- .../nutch/indexer/geoip/GeoIPIndexingFilter.java | 3 -- .../nutch/indexer/links/LinksIndexingFilter.java | 24 ++--- .../nutch/indexer/metadata/MetadataIndexer.java| 2 +- .../nutch/indexer/replace/ReplaceIndexer.java | 18 +- .../nutch/indexwriter/dummy/DummyIndexWriter.java | 2 +- .../apache/nutch/indexwriter/solr/SolrUtils.java | 2 +- .../nutch/analysis/lang/HTMLLanguageParser.java| 3 +- .../nutch/protocol/htmlunit/HtmlUnitWebDriver.java | 2 +- .../nutch/urlfilter/api/RegexURLFilterBase.java| 4 +-- .../nutch/protocol/selenium/HttpWebClient.java | 2 +- .../indexer/filter/MimeTypeIndexingFilter.java | 3 +- .../apache/nutch/parse/zip/ZipTextExtractor.java | 2 +- .../java/org/apache/nutch/protocol/ftp/Client.java | 2 +- .../httpclient/HttpBasicAuthentication.java| 4 +-- .../nutch/scoring/opic/OPICScoringFilter.java | 5 ++- .../scoring/similarity/util/LuceneTokenizer.java | 2 +- .../apache/nutch/collection/CollectionManager.java | 2 +- .../subcollection/SubcollectionIndexingFilter.java | 3 +- .../nutch/urlfilter/domain/DomainURLFilter.java| 10 +++--- .../domainblacklist/DomainBlacklistURLFilter.java | 10 +++--- .../nutch/urlfilter/suffix/SuffixURLFilter.java| 7 ++-- .../indexer/urlmeta/URLMetaIndexingFilter.java | 6 ++-- .../scoring/urlmeta/URLMetaScoringFilter.java | 2 +- .../querystring/QuerystringURLNormalizer.java | 2 +- 55 files changed, 130 insertions(+), 177 deletions(-) -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] 01/01: Merge pull request #168 from r0ann3l/NUTCH-2333
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git commit 2a6f9ac4763b257f902478ef1947ad63bcc4f2d0 Merge: e9b823d 628b04b Author: Lewis John McGibbney AuthorDate: Sat Apr 15 04:18:07 2017 -0700 Merge pull request #168 from r0ann3l/NUTCH-2333 fix for NUTCH-2333 contributed by r0ann3l build.xml | 3 + conf/nutch-default.xml | 89 ++ src/plugin/build.xml | 2 + src/plugin/indexer-rabbit/build-ivy.xml| 54 ++ src/plugin/indexer-rabbit/build.xml| 22 +++ src/plugin/indexer-rabbit/ivy.xml | 43 + src/plugin/indexer-rabbit/plugin.xml | 39 + .../nutch/indexwriter/rabbit/RabbitDocument.java | 54 ++ .../indexwriter/rabbit/RabbitIndexWriter.java | 194 + .../indexwriter/rabbit/RabbitMQConstants.java | 44 + .../nutch/indexwriter/rabbit/RabbitMessage.java| 71 11 files changed, 615 insertions(+) -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] branch master updated (e9b823d -> 2a6f9ac)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git. from e9b823d Merge pull request #182 from Omkar20895/NUTCH-2372 adds 5873a24 Fixes for NUTCH-2333: Added the lines for ant runtime task adds 62496ae Fixes for NUTCH-2333: Added the logic for indexing process adds 594564b Fixes for NUTCH-2333: Added the properties for RabbitMQ indexer. adds 17886f7 Fixes for NUTCH-2333: Added new properties to indexer adds c0af89a Fixes for NUTCH-2333: Corrected some comments in the configuration file and indexer description message. adds 628b04b Merge branch 'master' into NUTCH-2333 new 2a6f9ac Merge pull request #168 from r0ann3l/NUTCH-2333 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "adds" were already present in the repository and have only been added to this reference. Summary of changes: build.xml | 3 + conf/nutch-default.xml | 89 ++ src/plugin/build.xml | 2 + .../{index-geoip => indexer-rabbit}/build-ivy.xml | 2 +- .../{indexer-dummy => indexer-rabbit}/build.xml| 2 +- .../{publish-rabbitmq => indexer-rabbit}/ivy.xml | 3 +- .../{indexer-dummy => indexer-rabbit}/plugin.xml | 15 +- .../nutch/indexwriter/rabbit/RabbitDocument.java} | 37 +++- .../indexwriter/rabbit/RabbitIndexWriter.java | 194 + .../indexwriter/rabbit/RabbitMQConstants.java} | 47 ++--- .../nutch/indexwriter/rabbit/RabbitMessage.java| 71 11 files changed, 417 insertions(+), 48 deletions(-) copy src/plugin/{index-geoip => indexer-rabbit}/build-ivy.xml (96%) copy src/plugin/{indexer-dummy => indexer-rabbit}/build.xml (94%) copy src/plugin/{publish-rabbitmq => indexer-rabbit}/ivy.xml (95%) copy src/plugin/{indexer-dummy => indexer-rabbit}/plugin.xml (71%) copy src/{java/org/apache/nutch/webui/service/SeedListService.java => plugin/indexer-rabbit/src/java/org/apache/nutch/indexwriter/rabbit/RabbitDocument.java} (50%) create mode 100644 src/plugin/indexer-rabbit/src/java/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriter.java copy src/{test/org/apache/nutch/plugin/SimpleTestPlugin.java => plugin/indexer-rabbit/src/java/org/apache/nutch/indexwriter/rabbit/RabbitMQConstants.java} (50%) create mode 100644 src/plugin/indexer-rabbit/src/java/org/apache/nutch/indexwriter/rabbit/RabbitMessage.java -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] 01/01: Merge pull request #157 from r0ann3l/NUTCH-2132
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git commit 3628e2fd6974b9bdd98cc80274a35c97e22197dc Merge: 2a6f9ac 7ce1b4b Author: Lewis John McGibbney AuthorDate: Sat Apr 15 04:18:35 2017 -0700 Merge pull request #157 from r0ann3l/NUTCH-2132 fix for NUTCH-2132 contributed by r0ann3l conf/nutch-default.xml | 58 +- src/plugin/publish-rabbitmq/plugin.xml | 2 +- .../publisher/rabbitmq/RabbitMQPublisherImpl.java | 52 +++ 3 files changed, 100 insertions(+), 12 deletions(-) -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] branch master updated (2a6f9ac -> 3628e2f)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git. from 2a6f9ac Merge pull request #168 from r0ann3l/NUTCH-2333 adds 9d65ac6 Fixes for NUTCH-2132: Added the library amqp adds bc9a2c8 Fixes for NUTCH-2132: Added new properties adds 5eb77a9 Fixes for NUTCH-2132: Deleted empty comments adds ee65175 Fixes for NUTCH-2132: Fixed the default port adds 7ce1b4b Merge branch 'master' into NUTCH-2132 new 3628e2f Merge pull request #157 from r0ann3l/NUTCH-2132 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "adds" were already present in the repository and have only been added to this reference. Summary of changes: conf/nutch-default.xml | 58 +- src/plugin/publish-rabbitmq/plugin.xml | 2 +- .../publisher/rabbitmq/RabbitMQPublisherImpl.java | 52 +++ 3 files changed, 100 insertions(+), 12 deletions(-) -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] branch master updated: Syntax issue
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git The following commit(s) were added to refs/heads/master by this push: new 9249bb0 Syntax issue 9249bb0 is described below commit 9249bb0fed2e186e6e985b1fe20041f03f2a2b66 Author: Lewis John McGibbney AuthorDate: Sat Apr 15 04:45:00 2017 -0700 Syntax issue --- .../java/org/apache/nutch/publisher/rabbitmq/RabbitMQPublisherImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugin/publish-rabbitmq/src/java/org/apache/nutch/publisher/rabbitmq/RabbitMQPublisherImpl.java b/src/plugin/publish-rabbitmq/src/java/org/apache/nutch/publisher/rabbitmq/RabbitMQPublisherImpl.java index c8ae2fd..dc2ca32 100644 --- a/src/plugin/publish-rabbitmq/src/java/org/apache/nutch/publisher/rabbitmq/RabbitMQPublisherImpl.java +++ b/src/plugin/publish-rabbitmq/src/java/org/apache/nutch/publisher/rabbitmq/RabbitMQPublisherImpl.java @@ -36,7 +36,7 @@ public class RabbitMQPublisherImpl implements NutchPublisher{ private static String EXCHANGE_SERVER; private static String EXCHANGE_TYPE; - private static String HOST + private static String HOST; private static int PORT; private static String VIRTUAL_HOST; -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] 01/01: Merge pull request #175 from jorgelbg/NUTCH-2353
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git commit b5fd9109c81acf1c643ae3618ea5303ec147a259 Merge: 9823911 7deb576 Author: Lewis John McGibbney AuthorDate: Thu May 18 23:20:48 2017 -0700 Merge pull request #175 from jorgelbg/NUTCH-2353 Fix for NUTCH-2353 contributed by jorgelbg src/java/org/apache/nutch/service/model/request/SeedUrl.java | 8 src/java/org/apache/nutch/service/resources/SeedResource.java | 11 +++ src/java/org/apache/nutch/webui/model/SeedUrl.java| 9 + 3 files changed, 28 insertions(+) diff --cc src/java/org/apache/nutch/service/resources/SeedResource.java index e8a5be3,f25de6f..180c033 --- a/src/java/org/apache/nutch/service/resources/SeedResource.java +++ b/src/java/org/apache/nutch/service/resources/SeedResource.java @@@ -17,8 -17,8 +17,9 @@@ package org.apache.nutch.service.resources; import java.io.OutputStream; +import java.lang.invoke.MethodHandles; import java.util.Collection; + import java.util.Iterator; import java.util.Map; import javax.ws.rs.Consumes; -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] branch master updated (9823911 -> b5fd910)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git. from 9823911 Update CHANGES.txt adds 7deb576 Fix for NUTCH-2353 contributed by jorgelbg new b5fd910 Merge pull request #175 from jorgelbg/NUTCH-2353 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "adds" were already present in the repository and have only been added to this reference. Summary of changes: src/java/org/apache/nutch/service/model/request/SeedUrl.java | 8 src/java/org/apache/nutch/service/resources/SeedResource.java | 11 +++ src/java/org/apache/nutch/webui/model/SeedUrl.java| 9 + 3 files changed, 28 insertions(+) -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] branch 2.x updated (755799f -> 1216411)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch 2.x in repository https://gitbox.apache.org/repos/asf/nutch.git. from 755799f Merge branch 'sebastian-nagel-NUTCH-2376-http-accept-header-2.x' into 2.x adds bda0076 NUTCH-2373 HBaseIndexWriter - indexer for hbase implemented adds 9541ad8 NUTCH-2373 Multiple SLF4J bindings issue solved, unnecessary dependencies removed, hbase index mapping file added adds 42257b3 NUTCH-2373 Extra newline removed adds 0f023e8 NUTCH-2373 Code formatted using Nutch eclipse-codeformat.xml adds 7d6f3c3 NUTCH-2373 getting mapped qualifier name from key issue solved adds 3db3699 NUTCH-2373 Boilerplate default column family removed, considering first column family as default adds 112202f commit() on close() to write prevent missing the last batch adds 0103f4d NUTCH-2373 typo corrected in hbaseindex-mapping.xml adds 108dab0 Merge remote-tracking branch 'origin/2.x' into 2.x adds 906c021 Merge remote-tracking branch 'upstream/2.x' into 2.x adds 0ca33f5 Parameterized logging with Slf4j applied. adds efde99e log level info->debug inside write to avoid flood logging adds f72e9e4 Original ivy.xml restored fixing the two spaces tab issue adds 9fb6bdd indexer-hbase added into Javadoc artifact's packageset in build.xml adds f8ffdea Logger inside write() removed to avoid flood logging, log added into commit(), made HBaseIndexWriter.LOG identifier public. adds 9dad864 NUTCH-2373 An issue on document counting fixed, default batch-size increased to 500 new 1216411 Merge pull request #184 from kaidul/2.x The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "adds" were already present in the repository and have only been added to this reference. Summary of changes: build.xml | 3 + conf/hbaseindex-mapping.xml| 60 +++ conf/nutch-default.xml | 38 + src/plugin/build.xml | 2 + .../{index-anchor => indexer-hbase}/build.xml | 2 +- src/plugin/{index-more => indexer-hbase}/ivy.xml | 6 +- .../{index-anchor => indexer-hbase}/plugin.xml | 20 +-- .../nutch/indexwriter/hbase/HBaseConstants.java| 48 ++ .../nutch/indexwriter/hbase/HBaseIndexWriter.java | 166 +++ .../indexwriter/hbase/HBaseMappingReader.java | 175 + .../nutch/indexwriter/hbase}/package-info.java | 5 +- 11 files changed, 509 insertions(+), 16 deletions(-) create mode 100644 conf/hbaseindex-mapping.xml copy src/plugin/{index-anchor => indexer-hbase}/build.xml (94%) copy src/plugin/{index-more => indexer-hbase}/ivy.xml (85%) copy src/plugin/{index-anchor => indexer-hbase}/plugin.xml (66%) create mode 100644 src/plugin/indexer-hbase/src/java/org/apache/nutch/indexwriter/hbase/HBaseConstants.java create mode 100644 src/plugin/indexer-hbase/src/java/org/apache/nutch/indexwriter/hbase/HBaseIndexWriter.java create mode 100644 src/plugin/indexer-hbase/src/java/org/apache/nutch/indexwriter/hbase/HBaseMappingReader.java copy src/{java/org/apache/nutch/parse => plugin/indexer-hbase/src/java/org/apache/nutch/indexwriter/hbase}/package-info.java (86%) -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] 01/01: Merge pull request #184 from kaidul/2.x
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch 2.x in repository https://gitbox.apache.org/repos/asf/nutch.git commit 1216411b2fe08fb85638525161fdfd2945add0dc Merge: 755799f 9dad864 Author: Lewis John McGibbney AuthorDate: Mon May 22 14:00:37 2017 -0700 Merge pull request #184 from kaidul/2.x NUTCH-2373 Index writer plugin for hbase implemented build.xml | 3 + conf/hbaseindex-mapping.xml| 60 +++ conf/nutch-default.xml | 38 + src/plugin/build.xml | 2 + src/plugin/indexer-hbase/build.xml | 22 +++ src/plugin/indexer-hbase/ivy.xml | 41 + src/plugin/indexer-hbase/plugin.xml| 40 + .../nutch/indexwriter/hbase/HBaseConstants.java| 48 ++ .../nutch/indexwriter/hbase/HBaseIndexWriter.java | 166 +++ .../indexwriter/hbase/HBaseMappingReader.java | 175 + .../nutch/indexwriter/hbase/package-info.java | 21 +++ 11 files changed, 616 insertions(+) -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] 01/01: Merge pull request #190 from vipulbehl/master
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git commit b7011f58aaed1564cdce2002954347f13af1d9f3 Merge: db77f19 1fe077e Author: Lewis John McGibbney AuthorDate: Mon May 22 14:06:52 2017 -0700 Merge pull request #190 from vipulbehl/master Read Paragraph Line Breaks .../src/java/org/apache/nutch/parse/tika/DOMContentUtils.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] branch master updated (db77f19 -> b7011f5)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git. from db77f19 Merge branch 'sebastian-nagel-NUTCH-2376-http-accept-header' adds 1fe077e Read Paragraph Line Breaks new b7011f5 Merge pull request #190 from vipulbehl/master The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "adds" were already present in the repository and have only been added to this reference. Summary of changes: .../src/java/org/apache/nutch/parse/tika/DOMContentUtils.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] branch 2.x updated (1216411 -> 881197b)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch 2.x in repository https://gitbox.apache.org/repos/asf/nutch.git. from 1216411 Merge pull request #184 from kaidul/2.x adds 32a57b5 NUTCH-2388 bin/crawl indexing only webpages containing batchID instead of all new 881197b Merge pull request #191 from kaidul/NUTCH-2388 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "adds" were already present in the repository and have only been added to this reference. Summary of changes: src/bin/crawl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] 01/01: Merge pull request #191 from kaidul/NUTCH-2388
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch 2.x in repository https://gitbox.apache.org/repos/asf/nutch.git commit 881197b1948006f47e9858afbd46117ab415a98f Merge: 1216411 32a57b5 Author: Lewis John McGibbney AuthorDate: Tue May 23 09:48:24 2017 -0700 Merge pull request #191 from kaidul/NUTCH-2388 NUTCH-2388 bin/crawl indexing only webpages of current batch instead of all src/bin/crawl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] branch 2.x updated (881197b -> aa682e7)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch 2.x in repository https://gitbox.apache.org/repos/asf/nutch.git. from 881197b Merge pull request #191 from kaidul/NUTCH-2388 add b92aa37 NUTCH-2374 Upgrade Nutch 2.X to Gora 0.7 add 25460eb Update for new data stores (JCache) add ab3a4a8 Added license header to the generated sources new aa682e7 Merge pull request #183 from cloudysunny14/NUTCH-2374 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: conf/gora.properties |8 + ...base-site.xml.template => hazelcast-client.xml} |9 +- ivy/ivy.xml| 19 +- src/gora/webpage.avsc |6 +- .../org/apache/nutch/crawl/WebTableReader.java |4 +- .../org/apache/nutch/host/HostDbUpdateJob.java |2 +- src/java/org/apache/nutch/storage/Host.java| 540 ++-- src/java/org/apache/nutch/storage/ParseStatus.java | 566 ++-- .../org/apache/nutch/storage/ProtocolStatus.java | 595 ++-- .../org/apache/nutch/storage/StorageUtils.java |2 +- src/java/org/apache/nutch/storage/WebPage.java | 3227 .../apache/nutch/util/domain/DomainStatistics.java |2 +- 12 files changed, 1963 insertions(+), 3017 deletions(-) copy conf/{hbase-site.xml.template => hazelcast-client.xml} (70%) -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] 01/01: Merge pull request #183 from cloudysunny14/NUTCH-2374
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch 2.x in repository https://gitbox.apache.org/repos/asf/nutch.git commit aa682e724afc61a197cc06b530a3755311a17e0a Merge: 881197b ab3a4a8 Author: Lewis John McGibbney AuthorDate: Thu Jun 15 20:31:05 2017 -0700 Merge pull request #183 from cloudysunny14/NUTCH-2374 NUTCH-2374 Upgrade Nutch 2.X to Gora 0.7 conf/gora.properties |8 + conf/hazelcast-client.xml | 26 + ivy/ivy.xml| 19 +- src/gora/webpage.avsc |6 +- .../org/apache/nutch/crawl/WebTableReader.java |4 +- .../org/apache/nutch/host/HostDbUpdateJob.java |2 +- src/java/org/apache/nutch/storage/Host.java| 540 ++-- src/java/org/apache/nutch/storage/ParseStatus.java | 566 ++-- .../org/apache/nutch/storage/ProtocolStatus.java | 595 ++-- .../org/apache/nutch/storage/StorageUtils.java |2 +- src/java/org/apache/nutch/storage/WebPage.java | 3227 .../apache/nutch/util/domain/DomainStatistics.java |2 +- 12 files changed, 1984 insertions(+), 3013 deletions(-) -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] branch master updated (403ec19 -> 327f3d4)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git. from 403ec19 Update version number in bin/nutch add 230b987 (doc) make consistency changes regarding casing with comments and spacing of imports new 327f3d4 Merge pull request #207 from kpm1985/development The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: src/java/org/apache/nutch/parse/ParseOutputFormat.java | 5 +++-- src/java/org/apache/nutch/parse/ParserChecker.java | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] 01/01: Merge pull request #207 from kpm1985/development
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git commit 327f3d415e43323c1cb7fb8628fd03079cca1d95 Merge: 403ec19 230b987 Author: Lewis John McGibbney AuthorDate: Mon Jul 24 13:44:30 2017 -0700 Merge pull request #207 from kpm1985/development (doc) make consistency changes regarding casing with comments and spa… src/java/org/apache/nutch/parse/ParseOutputFormat.java | 5 +++-- src/java/org/apache/nutch/parse/ParserChecker.java | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] 01/01: Merge pull request #206 from smartive/fix/NUTCH-2403-selenium-phantomjs-doc
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git commit 2de30d2e25921e9d134de64d6955d387b0d33fc0 Merge: 327f3d4 df5f962 Author: Lewis John McGibbney AuthorDate: Mon Jul 24 13:52:54 2017 -0700 Merge pull request #206 from smartive/fix/NUTCH-2403-selenium-phantomjs-doc NUTCH-2403: Fix spelling of phantomJS configuration src/plugin/protocol-selenium/README.md | 48 +- 1 file changed, 24 insertions(+), 24 deletions(-) -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] branch master updated (327f3d4 -> 2de30d2)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git. from 327f3d4 Merge pull request #207 from kpm1985/development add df5f962 NUTCH-2403: Fix spelling of phantomJS configuration new 2de30d2 Merge pull request #206 from smartive/fix/NUTCH-2403-selenium-phantomjs-doc The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: src/plugin/protocol-selenium/README.md | 48 +- 1 file changed, 24 insertions(+), 24 deletions(-) -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] 01/01: Merge pull request #192 from kaidul/NUTCH-2389
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch 2.x in repository https://gitbox.apache.org/repos/asf/nutch.git commit 5f6c383cc91f59ad28d9110efa4ad0109eda9117 Merge: 365077c b2130b4 Author: Lewis John McGibbney AuthorDate: Sun Jul 30 09:36:27 2017 -0700 Merge pull request #192 from kaidul/NUTCH-2389 NUTCH-2389 Precise data extractor implemented for 2.x build.xml | 5 + conf/jsoup-extractor-example.xml | 88 ++ conf/jsoup-extractor.xml | 53 ++ conf/nutch-default.xml | 9 ++ src/plugin/build.xml | 3 + src/plugin/jsoup-extractor/build.xml | 28 src/plugin/jsoup-extractor/ivy.xml | 40 + src/plugin/jsoup-extractor/plugin.xml | 56 +++ .../nutch/core/jsoup/extractor/JsoupDocument.java | 127 +++ .../core/jsoup/extractor/JsoupDocumentReader.java | 179 + .../jsoup/extractor/JsoupExtractorConstants.java | 36 + .../jsoup/extractor/normalizer/Normalizable.java | 22 +++ .../normalizer/SimpleStringNormalizer.java | 31 .../jsoup/extractor/normalizer/package-info.java | 22 +++ .../nutch/core/jsoup/extractor/package-info.java | 22 +++ .../jsoup/extractor/JsoupIndexingFilter.java | 85 ++ .../indexer/jsoup/extractor/package-info.java | 22 +++ .../parse/jsoup/extractor/JsoupHtmlParser.java | 118 ++ .../nutch/parse/jsoup/extractor/package-info.java | 22 +++ .../parse/jsoup/extractor/TestJsoupHtmlParser.java | 102 .../parse/jsoup/extractor/ViewCountNormalizer.java | 30 21 files changed, 1100 insertions(+) -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] branch 2.x updated (365077c -> 5f6c383)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch 2.x in repository https://gitbox.apache.org/repos/asf/nutch.git. from 365077c Merge branch 'kaidul-NUTCH-2393' into 2.x add f41735c NUTCH-2389 jsoup-extractor with parse filter, indexing filter and unit testing implemented add fe6997f NUTCH-2389 jsoup-extractor/ivy.xml commited add 17bd8f6 NUTCH-2389 Unit test implemented but not passed add 52e785d NUTCH-2389 package name changed add 82ff292 NUTCH-2389 JsoupDocumentReader parsing bug fixed add 39ef777 NUTCH-2389 Unit test passed, xml parsing issue fixed add 66cbd7f NUTCH-2389 IndexingFilter Utf8 conversion issue solved add 60abbc4 NUTCH-2389 Class name corrected in jsoup-extractor.xml add 6e11dee NUTCH-2389 Unnecessary header import removed add 1ede445 NUTCH-2389 Missing license information added add b2130b4 NUTCH-2389 Diamond sign declaration removed and make Java 1.6 compatible new 5f6c383 Merge pull request #192 from kaidul/NUTCH-2389 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: build.xml | 5 + conf/jsoup-extractor-example.xml | 88 ++ conf/jsoup-extractor.xml | 53 ++ conf/nutch-default.xml | 9 ++ src/plugin/build.xml | 3 + .../plugin/jsoup-extractor/build.xml | 20 +-- .../{index-metadata => jsoup-extractor}/ivy.xml| 3 +- src/plugin/jsoup-extractor/plugin.xml | 56 +++ .../nutch/core/jsoup/extractor/JsoupDocument.java | 127 +++ .../core/jsoup/extractor/JsoupDocumentReader.java | 179 + .../jsoup/extractor/JsoupExtractorConstants.java | 36 + .../jsoup/extractor/normalizer/Normalizable.java} | 8 +- .../normalizer/SimpleStringNormalizer.java}| 24 ++- .../jsoup/extractor/normalizer}/package-info.java | 4 +- .../nutch/core/jsoup/extractor}/package-info.java | 4 +- .../jsoup/extractor/JsoupIndexingFilter.java} | 68 .../indexer/jsoup/extractor}/package-info.java | 4 +- .../parse/jsoup/extractor/JsoupHtmlParser.java | 118 ++ .../nutch/parse/jsoup/extractor}/package-info.java | 4 +- .../parse/jsoup/extractor/TestJsoupHtmlParser.java | 102 .../jsoup/extractor/ViewCountNormalizer.java} | 21 +-- 21 files changed, 856 insertions(+), 80 deletions(-) create mode 100644 conf/jsoup-extractor-example.xml create mode 100644 conf/jsoup-extractor.xml copy ivy/ivy-configurations.xml => src/plugin/jsoup-extractor/build.xml (65%) copy src/plugin/{index-metadata => jsoup-extractor}/ivy.xml (95%) create mode 100644 src/plugin/jsoup-extractor/plugin.xml create mode 100644 src/plugin/jsoup-extractor/src/java/org/apache/nutch/core/jsoup/extractor/JsoupDocument.java create mode 100644 src/plugin/jsoup-extractor/src/java/org/apache/nutch/core/jsoup/extractor/JsoupDocumentReader.java create mode 100644 src/plugin/jsoup-extractor/src/java/org/apache/nutch/core/jsoup/extractor/JsoupExtractorConstants.java copy src/{java/org/apache/nutch/host/package-info.java => plugin/jsoup-extractor/src/java/org/apache/nutch/core/jsoup/extractor/normalizer/Normalizable.java} (85%) copy src/{java/org/apache/nutch/crawl/InjectType.java => plugin/jsoup-extractor/src/java/org/apache/nutch/core/jsoup/extractor/normalizer/SimpleStringNormalizer.java} (70%) copy src/{java/org/apache/nutch/host => plugin/jsoup-extractor/src/java/org/apache/nutch/core/jsoup/extractor/normalizer}/package-info.java (89%) copy src/{java/org/apache/nutch/api => plugin/jsoup-extractor/src/java/org/apache/nutch/core/jsoup/extractor}/package-info.java (85%) copy src/plugin/{tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java => jsoup-extractor/src/java/org/apache/nutch/indexer/jsoup/extractor/JsoupIndexingFilter.java} (55%) copy src/{java/org/apache/nutch/host => plugin/jsoup-extractor/src/java/org/apache/nutch/indexer/jsoup/extractor}/package-info.java (89%) create mode 100644 src/plugin/jsoup-extractor/src/java/org/apache/nutch/parse/jsoup/extractor/JsoupHtmlParser.java copy src/{java/org/apache/nutch/host => plugin/jsoup-extractor/src/java/org/apache/nutch/parse/jsoup/extractor}/package-info.java (87%) create mode 100644 src/plugin/jsoup-extractor/src/test/org/apache/nutch/parse/jsoup/extractor/TestJsoupHtmlParser.java copy src/{java/org/apache/nutch/crawl/InjectType.java => plugin/jsoup-extractor/src/test/org/apache/nutch/parse/jsoup/extractor/ViewCountNormalizer.java} (70%) -- To stop receivi
[nutch] branch 2.x updated (5f6c383 -> 952851c)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch 2.x in repository https://gitbox.apache.org/repos/asf/nutch.git. from 5f6c383 Merge pull request #192 from kaidul/NUTCH-2389 add a6870de NUTCH-2404 Fix for Failed Jenkin build #1588 after merging pull request #192 (NUTCH-2389). new 952851c Merge pull request #208 from kaidul/2.x The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: src/plugin/jsoup-extractor/build.xml | 5 - .../nutch/parse/jsoup/extractor/TestJsoupHtmlParser.java | 12 ++-- 2 files changed, 6 insertions(+), 11 deletions(-) -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] 01/01: Merge pull request #208 from kaidul/2.x
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch 2.x in repository https://gitbox.apache.org/repos/asf/nutch.git commit 952851c57871e4c3cc89f27894c30e836770a676 Merge: 5f6c383 a6870de Author: Lewis John McGibbney AuthorDate: Mon Jul 31 09:51:23 2017 -0700 Merge pull request #208 from kaidul/2.x NUTCH-2404 Fix for Failed Jenkin build #1588 after merging pull request #192 (NUTCH-2389). src/plugin/jsoup-extractor/build.xml | 5 - .../nutch/parse/jsoup/extractor/TestJsoupHtmlParser.java | 12 ++-- 2 files changed, 6 insertions(+), 11 deletions(-) -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] 01/01: Merge pull request #210 from kpm1985/playground
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git commit 926ebdba98e29d979129888029d71b1c14356729 Merge: 44f7ad9 02f789c Author: Lewis John McGibbney AuthorDate: Wed Aug 9 10:17:54 2017 -0700 Merge pull request #210 from kpm1985/playground NUTCH-2406 Minor improvements .../org/apache/nutch/crawl/AbstractFetchSchedule.java | 9 + src/java/org/apache/nutch/crawl/CrawlDatum.java| 18 +- src/java/org/apache/nutch/net/URLExemptionFilter.java | 2 +- src/java/org/apache/nutch/net/URLNormalizers.java | 1 + 4 files changed, 16 insertions(+), 14 deletions(-) -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] branch master updated (44f7ad9 -> 926ebdb)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git. from 44f7ad9 NUTCH-2368 Variable generate.max.count and fetcher.server.delay add f79fbc1 Minor formatting and case corrections add 02f789c Do some math and format some stuff. new 926ebdb Merge pull request #210 from kpm1985/playground The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../org/apache/nutch/crawl/AbstractFetchSchedule.java | 9 + src/java/org/apache/nutch/crawl/CrawlDatum.java| 18 +- src/java/org/apache/nutch/net/URLExemptionFilter.java | 2 +- src/java/org/apache/nutch/net/URLNormalizers.java | 1 + 4 files changed, 16 insertions(+), 14 deletions(-) -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] branch 2.x updated (952851c -> 71ed6f2)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch 2.x in repository https://gitbox.apache.org/repos/asf/nutch.git. from 952851c Merge pull request #208 from kaidul/2.x add 49ff77e NUTCH-2405 1. Missed root tag added in jsoup-extractor.xml like jsoup-extractor-example.xml 2. jsoup API text() used instead of ownText() to get full contents under CSS selector 3. => typo fixed add 50c8d82 Merge remote-tracking branch 'upstream/2.x' into 2.x new 71ed6f2 Merge pull request #209 from kaidul/2.x The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: conf/jsoup-extractor-example.xml | 2 +- conf/jsoup-extractor.xml | 65 +++--- .../parse/jsoup/extractor/JsoupHtmlParser.java | 4 +- 3 files changed, 36 insertions(+), 35 deletions(-) -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] 01/01: Merge pull request #209 from kaidul/2.x
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch 2.x in repository https://gitbox.apache.org/repos/asf/nutch.git commit 71ed6f2ee372b4480f0dfce635bb242da77bc53d Merge: 952851c 50c8d82 Author: Lewis John McGibbney AuthorDate: Wed Aug 9 10:25:25 2017 -0700 Merge pull request #209 from kaidul/2.x Issues mentioned in NUTCH-2405 fixed conf/jsoup-extractor-example.xml | 2 +- conf/jsoup-extractor.xml | 65 +++--- .../parse/jsoup/extractor/JsoupHtmlParser.java | 4 +- 3 files changed, 36 insertions(+), 35 deletions(-) -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] 01/01: Merge pull request #201 from lewismc/NUTCH-2400
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git commit d4f611a1b452c6c6c08e6167c8e95a95d69cc3c4 Merge: 78fbb11 4115dca Author: Lewis John McGibbney AuthorDate: Tue Aug 15 08:51:06 2017 -0700 Merge pull request #201 from lewismc/NUTCH-2400 NUTCH-2400 Solr 6.6.0 compatibility conf/schema.xml | 37 +++-- 1 file changed, 15 insertions(+), 22 deletions(-) -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] branch master updated (78fbb11 -> d4f611a)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git. from 78fbb11 Merge pull request #212 from sebastian-nagel/NUTCH-2408-updatedb-unparsed-segment add 1857e62 NUTCH-2400 Solr 6.6.0 compatibility add 4115dca NUTCH-2400 Solr 6.6.0 compatibility new d4f611a Merge pull request #201 from lewismc/NUTCH-2400 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: conf/schema.xml | 37 +++-- 1 file changed, 15 insertions(+), 22 deletions(-) -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] 01/01: Merge pull request #203 from smartive/fix/elastic-rest-multivalues
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git commit a2e4800f33afb65f53cdfb52bacac280410a105c Merge: bad20b1 e319a37 Author: Lewis John McGibbney AuthorDate: Mon Aug 21 09:49:38 2017 -0700 Merge pull request #203 from smartive/fix/elastic-rest-multivalues NUTCH-2399: Fix indexer-elastic-rest for multivalue fields .../elasticrest/ElasticRestIndexWriter.java| 24 ++ 1 file changed, 16 insertions(+), 8 deletions(-) -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] branch master updated (bad20b1 -> a2e4800)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git. from bad20b1 Merge pull request #213 from sebastian-nagel/NUTCH-2378-child-first-class-loader add e319a37 Fix indexer-elastic-rest for multivalue fields new a2e4800 Merge pull request #203 from smartive/fix/elastic-rest-multivalues The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../elasticrest/ElasticRestIndexWriter.java| 24 ++ 1 file changed, 16 insertions(+), 8 deletions(-) -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] 01/01: Merge pull request #227 from kpm1985/NUTCH-2436
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git commit da64358def60ec6a5595f916b493bc6fe3636adc Merge: d06ccde 4d67a77 Author: Lewis John McGibbney AuthorDate: Thu Sep 28 11:53:21 2017 -0700 Merge pull request #227 from kpm1985/NUTCH-2436 NUTCH-2436 Fix empty comment and unnecessary semicolon ivy/ivy-2.2.0.jar | Bin 947592 -> 0 bytes src/java/org/apache/nutch/util/CommandRunner.java | 3 +-- 2 files changed, 1 insertion(+), 2 deletions(-) -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .
[nutch] branch master updated (d06ccde -> da64358)
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git. from d06ccde Merge pull request #223 from sebastian-nagel/NUTCH-2430 add d2bf52f clean my stuff... add 1a80d30 Merge branch 'master' of https://www.github.com/kpm1985/nutch add c5816cf Merge branch 'master' of https://www.github.com/kpm1985/nutch add 4d67a77 NUTCH-2436 Fix new da64358 Merge pull request #227 from kpm1985/NUTCH-2436 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: ivy/ivy-2.2.0.jar | Bin 947592 -> 0 bytes src/java/org/apache/nutch/util/CommandRunner.java | 3 +-- 2 files changed, 1 insertion(+), 2 deletions(-) delete mode 100644 ivy/ivy-2.2.0.jar -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" '].
[nutch] 01/01: Merge pull request #228 from tulay/NUTCH-2437
This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch 2.x in repository https://gitbox.apache.org/repos/asf/nutch.git commit 16696af017563d5132985d91b22315554a8dd59e Merge: d95396e 93b8796 Author: Lewis John McGibbney AuthorDate: Wed Oct 4 10:42:45 2017 -0700 Merge pull request #228 from tulay/NUTCH-2437 fix for NUTCH-2437 contributed by tmzzngl conf/gora-mongodb-mapping.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- To stop receiving notification emails like this one, please contact "commits@nutch.apache.org" .