Author: danielr
Date: 2008-03-15 22:57:55 +0100 (Sat, 15 Mar 2008)
New Revision: 4567
Modified:
trunk/source/de/anomic/net/ftpc.java
trunk/source/de/anomic/plasma/crawler/plasmaFTPLoader.java
Log:
FTP:
- report connection status (to break if no connection possible)
- fixed isFolder()
- additional error output
- fixed paths with encoded symbols (ie. a%20file.txt)
- refactoring
Modified: trunk/source/de/anomic/net/ftpc.java
===================================================================
--- trunk/source/de/anomic/net/ftpc.java 2008-03-15 10:56:47 UTC (rev
4566)
+++ trunk/source/de/anomic/net/ftpc.java 2008-03-15 21:57:55 UTC (rev
4567)
@@ -249,11 +249,11 @@
.booleanValue());
} catch (final InvocationTargetException e) {
if (e.getMessage() == null) {
- } else if (ControlSocket == null) {
+ } else if (notConnected()) {
// the error was probably caused because there is no
// connection
errPrintln("not connected. no effect.");
- e.printStackTrace();
+ e.printStackTrace(err);
return ret;
} else {
errPrintln("ftp internal exception: target exception " +
e);
@@ -266,7 +266,7 @@
// consider first that the user attempted to execute a java
// command from
// the current path; either local or remote
- if (ControlSocket == null) {
+ if (notConnected()) {
// try a local exec
try {
javaexec(cmd);
@@ -470,7 +470,7 @@
errPrintln("Syntax: CD <path>");
return true;
}
- if (ControlSocket == null) {
+ if (notConnected()) {
return LCD();
}
try {
@@ -531,7 +531,7 @@
errPrintln("Syntax: DEL <file>");
return true;
}
- if (ControlSocket == null) {
+ if (notConnected()) {
return LDEL();
}
try {
@@ -551,7 +551,7 @@
errPrintln("Syntax: DIR [<path>|<file>]");
return true;
}
- if (ControlSocket == null) {
+ if (notConnected()) {
return LDIR();
}
try {
@@ -575,18 +575,21 @@
} catch (final IOException e) {
errPrintln("Connection to server lost.");
}
- ControlSocket = null;
- DataSocketActive = null;
- DataSocketPassive = null;
- clientInput = null;
- clientOutput = null;
+ try {
+ closeConnection();
+ } catch (final IOException e) {
+ ControlSocket = null;
+ DataSocketActive = null;
+ DataSocketPassive = null;
+ clientInput = null;
+ clientOutput = null;
+ }
prompt = "ftp [local]>";
return true;
}
private String quit() throws IOException {
- // send delete command
send("QUIT");
// read status reply
@@ -595,25 +598,8 @@
throw new IOException(reply);
}
- // cleanup
- if (ControlSocket != null) {
- clientOutput.close();
- clientInput.close();
- ControlSocket.close();
- ControlSocket = null;
- }
+ closeConnection();
- if (DataSocketActive != null) {
- DataSocketActive.close();
- DataSocketActive = null;
- }
- if (DataSocketPassive != null) {
- DataSocketPassive.close();
- DataSocketPassive = null; // "Once a socket has been closed, it is
- // not available for further networking
- // use"
- }
-
return reply;
}
@@ -633,8 +619,8 @@
final File local = absoluteLocalFile(localFilename);
if (local.exists()) {
- errPrintln("Error: local file " + local.toString() + " already
exists.");
- errPrintln(logPrefix + " File " + remote + " not
retrieved. Local file unchanged.");
+ errPrintln("Error: local file " + local.toString() + " already
exists.\n" + " File " + remote
+ + " not retrieved. Local file unchanged.");
} else {
if (withoutLocalFile) {
retrieveFilesRecursively(remote, false);
@@ -732,7 +718,8 @@
}
// check if we actually changed into the folder
final String changedPath = pwd();
- if (!(changedPath.equals(path) || changedPath.equals(currentFolder
+ "/" + path))) {
+ if (!(changedPath.equals(path) || changedPath.equals(currentFolder
+ + (currentFolder.endsWith("/") ? "" : "/") + path))) {
throw new IOException("folder is '" + changedPath + "' should
be '" + path + "'");
}
// return to last folder
@@ -1172,7 +1159,7 @@
final String dateString = tokens.group(3) + " " + tokens.group(4)
+ " " + year + " " + time;
try {
date = lsDateFormat.parse(dateString);
- } catch (ParseException e) {
+ } catch (final ParseException e) {
errPrintln(logPrefix + "---- Error: not ls date-format '" +
dateString + "': " + e.getMessage());
date = new Date();
}
@@ -1391,7 +1378,7 @@
errPrintln("Syntax: LS [<path>|<file>]");
return true;
}
- if (ControlSocket == null) {
+ if (notConnected()) {
return LLS();
}
try {
@@ -1415,7 +1402,6 @@
outPrintln("----
v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v");
for (final String element : list) {
outPrintln(element);
- outPrintln("--> " + parseListData(element));
}
outPrintln("----
^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^");
}
@@ -1477,7 +1463,7 @@
errPrintln("Syntax: MKDIR <folder-name>");
return true;
}
- if (ControlSocket == null) {
+ if (notConnected()) {
return LMKDIR();
}
try {
@@ -1545,7 +1531,7 @@
errPrintln("Syntax: MV <from> <to>");
return true;
}
- if (ControlSocket == null) {
+ if (notConnected()) {
return LMV();
}
try {
@@ -1604,7 +1590,7 @@
outPrintln("---- Connection to " + cmd[1] + " established.");
prompt = "ftp [" + cmd[1] + "]>";
} catch (final IOException e) {
- errPrintln("Error: connecting " + cmd[1] + " on port " + port + "
failed.");
+ errPrintln("Error: connecting " + cmd[1] + " on port " + port + "
failed: " + e.getMessage());
}
return true;
}
@@ -1614,20 +1600,59 @@
exec("close", false); // close any existing connections first
}
- ControlSocket = new Socket(host, port);
- ControlSocket.setSoTimeout(getTimeout());
- clientInput = new BufferedReader(new
InputStreamReader(ControlSocket.getInputStream()));
- clientOutput = new DataOutputStream(new
BufferedOutputStream(ControlSocket.getOutputStream()));
+ try {
+ ControlSocket = new Socket(host, port);
+ ControlSocket.setSoTimeout(getTimeout());
+ clientInput = new BufferedReader(new
InputStreamReader(ControlSocket.getInputStream()));
+ clientOutput = new DataOutputStream(new
BufferedOutputStream(ControlSocket.getOutputStream()));
- // read and return server message
- this.host = host;
- this.port = port;
- remotemessage = receive();
- if ((remotemessage != null) && (remotemessage.length() > 3)) {
- remotemessage = remotemessage.substring(4);
+ // read and return server message
+ this.host = host;
+ this.port = port;
+ remotemessage = receive();
+ if ((remotemessage != null) && (remotemessage.length() > 3)) {
+ remotemessage = remotemessage.substring(4);
+ }
+ } catch (final IOException e) {
+ // if a connection was opened, it should not be used
+ closeConnection();
+ throw new IOException(e);
}
}
+ /**
+ * @return
+ */
+ public boolean notConnected() {
+ return ControlSocket == null;
+ }
+
+ /**
+ * close all sockets
+ *
+ * @throws IOException
+ */
+ private void closeConnection() throws IOException {
+ // cleanup
+ if (ControlSocket != null) {
+ clientOutput.close();
+ clientInput.close();
+ ControlSocket.close();
+ ControlSocket = null;
+ }
+
+ if (DataSocketActive != null) {
+ DataSocketActive.close();
+ DataSocketActive = null;
+ }
+ if (DataSocketPassive != null) {
+ DataSocketPassive.close();
+ DataSocketPassive = null; // "Once a socket has been closed, it is
+ // not available for further networking
+ // use"
+ }
+ }
+
public boolean PROMPT() {
errPrintln("prompt is always off");
return true;
@@ -1658,7 +1683,7 @@
errPrintln("Syntax: PWD (no parameter)");
return true;
}
- if (ControlSocket == null) {
+ if (notConnected()) {
return LPWD();
}
try {
@@ -1701,7 +1726,7 @@
errPrintln("Syntax: RMDIR <folder-name>");
return true;
}
- if (ControlSocket == null) {
+ if (notConnected()) {
return LRMDIR();
}
try {
@@ -1713,7 +1738,7 @@
}
public boolean QUIT() {
- if (ControlSocket != null) {
+ if (!notConnected()) {
exec("close", false);
}
return false;
@@ -1780,7 +1805,7 @@
login(cmd[1], cmd[2]);
outPrintln("---- Granted access for user " + cmd[1] + ".");
} catch (final IOException e) {
- errPrintln("Error: authorization of user " + cmd[1] + " failed.");
+ errPrintln("Error: authorization of user " + cmd[1] + " failed: "
+ e.getMessage());
}
return true;
}
@@ -2354,6 +2379,7 @@
* @throws IOException
*/
private void login(final String account, final String password) throws
IOException {
+ unsetLoginData();
// send user name
send("USER " + account);
@@ -2381,6 +2407,15 @@
}
/**
+ * we are authorized to use the server
+ *
+ * @return
+ */
+ public boolean isLoggedIn() {
+ return (account != null && password != null && remotegreeting != null);
+ }
+
+ /**
* remember username and password which were used to login
*
* @param account
@@ -2394,6 +2429,12 @@
remotegreeting = reply;
}
+ private void unsetLoginData() {
+ account = null;
+ password = null;
+ remotegreeting = null;
+ }
+
public void sys() throws IOException {
// send system command
send("SYST");
@@ -2451,7 +2492,7 @@
* @param timeout
* in seconds, 0 = infinite
*/
- public void setDataSocketTimeout(int timeout) {
+ public void setDataSocketTimeout(final int timeout) {
DataSocketTimeout = timeout;
try {
Modified: trunk/source/de/anomic/plasma/crawler/plasmaFTPLoader.java
===================================================================
--- trunk/source/de/anomic/plasma/crawler/plasmaFTPLoader.java 2008-03-15
10:56:47 UTC (rev 4566)
+++ trunk/source/de/anomic/plasma/crawler/plasmaFTPLoader.java 2008-03-15
21:57:55 UTC (rev 4567)
@@ -90,8 +90,12 @@
*/
public plasmaHTCache.Entry load(final plasmaCrawlEntry entry) {
final yacyURL entryUrl = entry.url();
- final String fullPath = entryUrl.getPath();
+ final String fullPath = getPath(entryUrl);
+ final File cacheFile = createCachefile(entryUrl);
+ // the return value
+ plasmaHTCache.Entry htCache = null;
+
// determine filename and path
String file, path;
if (fullPath.endsWith("/")) {
@@ -113,72 +117,78 @@
final ByteArrayOutputStream berr = new ByteArrayOutputStream();
final ftpc ftpClient = createFTPClient(berr);
- plasmaHTCache.Entry htCache = null;
- try {
- openConnection(ftpClient, entryUrl);
+ if (openConnection(ftpClient, entryUrl)) {
+ // ftp stuff
+ try {
+ // testing if the specified file is a directory
+ if (file.length() > 0) {
+ ftpClient.exec("cd \"" + path + "\"", false);
- // testing if the specified file is a directory
- if (file.length() > 0) {
- ftpClient.exec("cd \"" + path + "\"", false);
+ final boolean isFolder = ftpClient.isFolder(file);
+ if (isFolder) {
+ path = fullPath + "/";
+ file = "";
+ }
+ }
- // testing if the current name is a directoy
- final boolean isFolder = ftpClient.isFolder(file);
- if (isFolder) {
- path = fullPath + "/";
- file = "";
+ if (file.length() == 0) {
+ // directory -> get list of files
+ // create a htcache entry
+ htCache = createCacheEntry(entry, "text/html", new Date());
+ if (!generateDirlist(ftpClient, entry, path, cacheFile)) {
+ htCache = null;
+ }
+ } else {
+ // file -> download
+ try {
+ htCache = getFile(ftpClient, entry, cacheFile);
+ } catch (final Exception e) {
+ // add message to errorLog
+ (new PrintStream(berr)).print(e.getMessage());
+ }
}
+ } finally {
+ closeConnection(ftpClient);
}
+ }
- // creating a cache file object
- final File cacheFile = plasmaHTCache.getCachePath(entryUrl);
+ // pass the downloaded resource to the cache manager
+ if (berr.size() > 0 || htCache == null) {
+ // some error logging
+ final String detail = (berr.size() > 0) ? "\n Errorlog: " +
berr.toString() : "";
+ log.logWarning("Unable to download URL " + entry.url().toString()
+ detail);
+ sb.crawlQueues.errorURL.newEntry(entry, null, new Date(), 1,
plasmaCrawlEURL.DENIED_SERVER_DOWNLOAD_ERROR);
- // TODO: invalid file path check
-
- // testing if the file already exists
- if (cacheFile.isFile()) {
- // delete the file if it already exists
- plasmaHTCache.deleteURLfromCache(entryUrl);
- } else {
- // create parent directories
- cacheFile.getParentFile().mkdirs();
+ // an error has occured. cleanup
+ if (cacheFile.exists()) {
+ cacheFile.delete();
}
+ } else {
+ // announce the file
+ plasmaHTCache.writeFileAnnouncement(cacheFile);
+ }
- if (file.length() == 0) {
- // directory -> get list of files
- // create a htcache entry
- htCache = createCacheEntry(entry, "text/html", new Date());
- if (!generateDirlist(ftpClient, entry, path, cacheFile)) {
- htCache = null;
- }
- } else {
- // file -> download
- try {
- htCache = getFile(ftpClient, entry, cacheFile);
- } catch (final Exception e) {
- }
- }
+ return htCache;
+ }
- // pass the downloaded resource to the cache manager
- if (berr.size() > 0 || htCache == null) {
- // some error logging
- final String detail = (berr.size() > 0) ? "\n Errorlog: " +
berr.toString() : "";
- log.logWarning("Unable to download URL " +
entry.url().toString() + detail);
- sb.crawlQueues.errorURL.newEntry(entry, null, new Date(), 1,
- plasmaCrawlEURL.DENIED_SERVER_DOWNLOAD_ERROR);
+ /**
+ * creating a cache file object
+ *
+ * @param entryUrl
+ * @return
+ */
+ private File createCachefile(final yacyURL entryUrl) {
+ final File cacheFile = plasmaHTCache.getCachePath(entryUrl);
- // an error has occured. cleanup
- if (cacheFile.exists()) {
- cacheFile.delete();
- }
- } else {
- // announce the file
- plasmaHTCache.writeFileAnnouncement(cacheFile);
- }
-
- return htCache;
- } finally {
- closeConnection(ftpClient);
+ // testing if the file already exists
+ if (cacheFile.isFile()) {
+ // delete the file if it already exists
+ plasmaHTCache.deleteURLfromCache(entryUrl);
+ } else {
+ // create parent directories
+ cacheFile.getParentFile().mkdirs();
}
+ return cacheFile;
}
/**
@@ -196,8 +206,9 @@
* @param ftpClient
* @param host
* @param port
+ * @return success
*/
- private void openConnection(final ftpc ftpClient, final yacyURL entryUrl) {
+ private boolean openConnection(final ftpc ftpClient, final yacyURL
entryUrl) {
// get username and password
final String userInfo = entryUrl.getUserInfo();
String userName = "anonymous", userPwd = "anonymous";
@@ -218,12 +229,20 @@
} else {
ftpClient.exec("open " + host + " " + port, false);
}
+ if (ftpClient.notConnected()) {
+ return false;
+ }
// login to the server
ftpClient.exec("user " + userName + " " + userPwd, false);
- // change transfer mode to binary
- ftpClient.exec("binary", false);
+ if (ftpClient.isLoggedIn()) {
+ // change transfer mode to binary
+ ftpClient.exec("binary", false);
+ } else {
+ return false;
+ }
+ return true;
}
/**
@@ -240,7 +259,7 @@
final yacyURL entryUrl = entry.url();
final String extension = plasmaParser.getFileExt(entryUrl);
final String mimeType = plasmaParser.getMimeTypeByFileExt(extension);
- final String path = entryUrl.getPath();
+ final String path = getPath(entryUrl);
// if the mimetype and file extension is supported we start to download
// the file
@@ -264,7 +283,7 @@
log.logInfo("REJECTED TOO BIG FILE with size " + size + "
Bytes for URL " + entry.url().toString());
sb.crawlQueues.errorURL.newEntry(entry, null, new Date(), 1,
plasmaCrawlEURL.DENIED_FILESIZE_LIMIT_EXCEEDED);
- throw new Exception("filesize too big: " + size + " bytes");
+ throw new Exception("file size exceeds limit");
}
} else {
// if the response has not the right file type then reject file
@@ -276,6 +295,16 @@
}
/**
+ * gets path suitable for FTP (url-decoded, double-quotes escaped)
+ *
+ * @param entryUrl
+ * @return
+ */
+ private String getPath(final yacyURL entryUrl) {
+ return yacyURL.unescape(entryUrl.getPath()).replace("\"", "\"\"");
+ }
+
+ /**
* @param ftpClient
* @param entry
* @param cacheFile
_______________________________________________
YaCy-svn mailing list
[email protected]
https://lists.berlios.de/mailman/listinfo/yacy-svn