Re: [Libguestfs] [PATCH v2] python: add simple wrappers for PyObject<->string functions

2017-05-09 Thread noxdafox

On 09/05/17 16:48, Pino Toscano wrote:

The current need for #ifdef's based on the presence of
PyString_FromString makes both the OCaml code of the generator, and the
generated C code a mess to read.

Hence, add three simple wrappers to make both the OCaml, and C code more
readable, and easier to tweak in the future.
---
  generator/python.ml | 72 -
  python/handle.c | 65 ++-
  2 files changed, 53 insertions(+), 84 deletions(-)

diff --git a/generator/python.ml b/generator/python.ml
index 0162733..cf08294 100644
--- a/generator/python.ml
+++ b/generator/python.ml
@@ -91,6 +91,9 @@ extern PyObject *guestfs_int_py_event_to_string (PyObject 
*self, PyObject *args)
  extern char **guestfs_int_py_get_string_list (PyObject *obj);
  extern PyObject *guestfs_int_py_put_string_list (char * const * const argv);
  extern PyObject *guestfs_int_py_put_table (char * const * const argv);
+extern PyObject *guestfs_int_py_fromstring (const char *str);
+extern PyObject *guestfs_int_py_fromstringsize (const char *str, size_t size);
+extern char *guestfs_int_py_asstring (PyObject *obj);
  
  ";
  
@@ -178,31 +181,16 @@ and generate_python_structs () =

  function
  | name, FString ->
  pr "  PyDict_SetItemString (dict, \"%s\",\n" name;
-pr "#ifdef HAVE_PYSTRING_ASSTRING\n";
-pr "PyString_FromString (%s->%s));\n"
-  typ name;
-pr "#else\n";
-pr "PyUnicode_FromString (%s->%s));\n"
-  typ name;
-pr "#endif\n"
+pr "guestfs_int_py_fromstring (%s->%s));\n"
+  typ name
  | name, FBuffer ->
  pr "  PyDict_SetItemString (dict, \"%s\",\n" name;
-pr "#ifdef HAVE_PYSTRING_ASSTRING\n";
-pr "PyString_FromStringAndSize (%s->%s, 
%s->%s_len));\n"
-  typ name typ name;
-pr "#else\n";
-pr "PyBytes_FromStringAndSize (%s->%s, 
%s->%s_len));\n"
-  typ name typ name;
-pr "#endif\n"
+pr "guestfs_int_py_fromstringsize (%s->%s, 
%s->%s_len));\n"
+  typ name typ name
  | name, FUUID ->
  pr "  PyDict_SetItemString (dict, \"%s\",\n" name;
-pr "#ifdef HAVE_PYSTRING_ASSTRING\n";
-pr "PyString_FromStringAndSize (%s->%s, 
32));\n"
-  typ name;
-pr "#else\n";
-pr "PyBytes_FromStringAndSize (%s->%s, 
32));\n"
-  typ name;
-pr "#endif\n"
+pr "guestfs_int_py_fromstringsize (%s->%s, 
32));\n"
+  typ name
  | name, (FBytes|FUInt64) ->
  pr "  PyDict_SetItemString (dict, \"%s\",\n" name;
  pr "PyLong_FromUnsignedLongLong 
(%s->%s));\n"
@@ -229,15 +217,9 @@ and generate_python_structs () =
  pr "PyDict_SetItemString (dict, \"%s\", Py_None);\n" name;
  pr "  }\n"
  | name, FChar ->
-pr "#ifdef HAVE_PYSTRING_ASSTRING\n";
  pr "  PyDict_SetItemString (dict, \"%s\",\n" name;
-pr "PyString_FromStringAndSize (&%s->%s, 
1));\n"
-  typ name;
-pr "#else\n";
-pr "  PyDict_SetItemString (dict, \"%s\",\n" name;
-pr "PyUnicode_FromStringAndSize (&%s->%s, 
1));\n"
-  typ name;
-pr "#endif\n"
+pr "guestfs_int_py_fromstringsize (&%s->%s, 
1));\n"
+  typ name
) cols;
pr "  return dict;\n";
pr "};\n";
@@ -419,13 +401,7 @@ and generate_python_actions actions () =
pr "optargs_s.%s = PyLong_AsLongLong (py_%s);\n" n n;
pr "if (PyErr_Occurred ()) goto out;\n"
  | OString _ ->
-  pr "#ifdef HAVE_PYSTRING_ASSTRING\n";
-  pr "optargs_s.%s = PyString_AsString (py_%s);\n" n n;
-  pr "#else\n";
-  pr "PyObject *bytes;\n";
-  pr "bytes = PyUnicode_AsUTF8String (py_%s);\n" n;
-  pr "optargs_s.%s = PyBytes_AS_STRING (bytes);\n" n;
-  pr "#endif\n";
+  pr "optargs_s.%s = guestfs_int_py_asstring (py_%s);\n" n n
  | OStringList _ ->
pr "optargs_s.%s = guestfs_int_py_get_string_list 
(py_%s);\n" n n;
pr "if (!optargs_s.%s) goto out;\n" n;
@@ -480,30 +456,18 @@ and generate_python_actions actions () =
 | RBool _ -> pr "  py_r = PyLong_FromLong ((long) r);\n"
 | RInt64 _ -> pr "  py_r = PyLong_FromLongLong (r);\n"
 | RConstString _ ->
-   pr 

Re: [Libguestfs] [PATCH v7 1/7] daemon: expose file upload logic

2017-04-24 Thread NoxDaFox
2017-04-24 11:58 GMT+03:00 Richard W.M. Jones :

> On Sun, Apr 23, 2017 at 07:49:56PM +0300, Matteo Cafasso wrote:
> > +  if (r == -1) { /* write error */
> > +err = errno;
> > +r = cancel_receive ();
>
> You need to use ignore_value here, and it needs to be in a separate
> commit, as discussed previously.
>
> Rich.
>

My bad I misunderstood, will fix it.


>
> --
> Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~
> rjones
> Read my programming and virtualization blog: http://rwmj.wordpress.com
> virt-top is 'top' for virtual machines.  Tiny program with many
> powerful monitoring features, net stats, disk stats, logging, etc.
> http://people.redhat.com/~rjones/virt-top
>
___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH v3 0/7] Feature: Yara file scanning

2017-02-21 Thread NoxDaFox
2017-02-21 16:32 GMT+02:00 Pino Toscano <ptosc...@redhat.com>:

> On Monday, 20 February 2017 13:46:29 CET NoxDaFox wrote:
> > 2017-02-20 12:26 GMT+02:00 Daniel P. Berrange <berra...@redhat.com>:
> >
> > > On Sun, Feb 19, 2017 at 07:09:51PM +0200, Matteo Cafasso wrote:
> > > > Rebase patches on top of 1.35.25.
> > > >
> > > > No changes since last series.
> > >
> > > Can you explain the motivation behind adding the APis to libguestfs ?
> > >
> > > Since the libguestfs VM is separate from the real VM, it can't
> > > be relying on any live process state to scan for malicious code,
> > > so must be exclusively considering the file contents.
> > >
> > This is the use case. For the former one, there are tools such as Rekall
> > and Volatility which already do a great job.
> >
> > http://www.rekall-forensic.com/
> > http://www.volatilityfoundation.org/
> >
> > > Could yara not simply use the existing libguestfs APIs to do its
> > > work. At the simplest case this might be having the FS fuse mounted
> > > at a location. Alternatively having it directly use the C API to
> > > access content it needs would be safer against malicious symlinks.
> >
> > There are both security and performance implication in using the FS fuse
> > locally mounted.
>
> IMHO the ideal way would be having yara access files inside disks using
> libguestfs. libyara already has APIs for scanning files from different
> input types, i.e. yr_rules_scan_* -- what is missing is a generic I/O
> implementation, which could be easy to wire up using the existing
> YR_STREAM stuff.
>
> This would mean that:
> a) disks are accessed only within libguestfs
> b) the yara API accesses files normally
> c) nothing is mounted on the host
>

Streaming APIs might be very useful indeed but the use case I have in mind
is a bit different.

The advantage of having tools such as Yara, Grep, Hivex and Libmagic
integrated within libguestfs is the added security layer.

Exploits are data driven, the way such data is accessed does not matter.
What matter is where that data is accessed.
Streaming the data out from libguestfs would not bring great benefits
neither performance nor security wise.
It might actually increase the attack surface as the vulnerability could
hide both on the tool and on libguestfs streaming interface.

IMHO a non intrusive integration altogether with plugin-type packages would
bring the best benefit to the Users.
 - libguestfs
 - libguestfs-forensics
 - libguestfs-recovery
 - ...

If I'm not mistaken, libguestfs already supports such architecture.


--
> Pino Toscano
> ___
> Libguestfs mailing list
> Libguestfs@redhat.com
> https://www.redhat.com/mailman/listinfo/libguestfs
>
___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH v3 0/7] Feature: Yara file scanning

2017-02-20 Thread NoxDaFox
2017-02-20 12:26 GMT+02:00 Daniel P. Berrange :

> On Sun, Feb 19, 2017 at 07:09:51PM +0200, Matteo Cafasso wrote:
> > Rebase patches on top of 1.35.25.
> >
> > No changes since last series.
>
> Can you explain the motivation behind adding the APis to libguestfs ?
>
> Since the libguestfs VM is separate from the real VM, it can't
> be relying on any live process state to scan for malicious code,
> so must be exclusively considering the file contents.
>

This is the use case. For the former one, there are tools such as Rekall
and Volatility which already do a great job.

http://www.rekall-forensic.com/
http://www.volatilityfoundation.org/


> Could yara not simply use the existing libguestfs APIs to do its
> work. At the simplest case this might be having the FS fuse mounted
> at a location. Alternatively having it directly use the C API to
> access content it needs would be safer against malicious symlinks.
>

There are both security and performance implication in using the FS fuse
locally mounted.

A vulnerability within the Yara scanner might lead to the host being
exposed. This is one of the reasons libguestfs is a good tool for helping
disk forensics as it adds a layer of protection against exploits.
More information here:
http://libguestfs.org/guestfs-security.1.html
https://pythonhosted.org/vminspect/#design-principles

Moreover, the fuse FS brings performance impacts which might be
considerable if we want to scan an entire FS or set of folders.
http://libguestfs.org/guestfs.3.html#mount-local


>
> Perhaps there's performance benefits todoing it by adding new APIs ?
> If so do you have any info on the scale of the benefit ?
>

> Regards,
> Daniel
> --
> |: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/
> :|
> |: http://libvirt.org  -o- http://virt-manager.org
> :|
> |: http://entangle-photo.org   -o-http://search.cpan.org/~danberr/
> :|
>
___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH v2 4/6] New API: internal_yara_scan

2016-11-24 Thread noxdafox

On 24/11/16 17:42, Pino Toscano wrote:

On Tuesday, 22 November 2016 19:41:10 CET noxdafox wrote:

yara_load supports loading rules already compiled, which could have a
namespace set -- I guess it should be reported here as well.

The namespace is accessible via the YR_RULE struct:
https://github.com/VirusTotal/yara/blob/master/libyara/include/yara/types.h#L242

Yet is nowere to be found in the C API documentation.
http://yara.readthedocs.io/en/v3.5.0/capi.html#c.YR_RULE

That's why I kept it out of the scope. I can obviously add it but we're
not sure whether they will expose it differently in future versions of Yara.

Drat... Maybe it would be worth asking them if it's just a documentation
issue, or it is really private. In any case, it is not a big issue at
the moment.

https://github.com/VirusTotal/yara/issues/570

Let's keep it out for this patch series. I'll make sure we'll have a 
clear answer before the next stable release of libguestfs.


I'll slowly proceed applying the suggested changes. Thanks.



That triggers another question: should the yara support allow to load
more rules one after each other (with namespaces as well), instead of
just one?

We surely can do. I'll see what can be done. Maybe an optional parameter
"namespace" in the yara_load API.

Right, that is what I was thinking about.



___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH v2 2/6] New API: yara_load

2016-11-22 Thread noxdafox

On 21/11/16 18:27, Pino Toscano wrote:

On Wednesday, 9 November 2016 22:38:53 CET Matteo Cafasso wrote:

The yara_load API allows to load a set of Yara rules contained within a
file on the host.

Rules can be in binary format, as when compiled with yarac command, or
in source code format. In the latter case, the rules will be first
compiled and then loaded.

Subsequent calls of the yara_load API will result in the discard of the
previously loaded rules.

Signed-off-by: Matteo Cafasso 
---
[...]
diff --git a/daemon/cleanups.c b/daemon/cleanups.c
index 092e493..a02e521 100644
--- a/daemon/cleanups.c
+++ b/daemon/cleanups.c
@@ -78,3 +93,16 @@ cleanup_free_stringsbuf (void *ptr)
  {
free_stringsbuf ((struct stringsbuf *) ptr);
  }
+
+#ifdef HAVE_YARA
+
+void
+cleanup_destroy_yara_compiler (void *ptr)
+{
+  YR_COMPILER *compiler = * (YR_COMPILER **) ptr;
+
+  if (compiler != NULL)
+yr_compiler_destroy (compiler);
+}
+

This should rather be directly in daemon/yara.c, since libyara would be
used there only.


+static int
+upload_rules_file (char *rules_path)
+{
+  int ret = 0;
+  CLEANUP_CLOSE int fd = 0;
+  struct write_callback_data data = { .written = 0 };
+
+  data.fd = mkstemp (rules_path);
+  if (data.fd == -1) {
+reply_with_perror ("mkstemp");
+return -1;
+  }
+
+  ret = receive_file (write_callback, );
+  if (ret == -1) {
+/* Write error. */
+cancel_receive ();
+reply_with_error ("write error: %s", rules_path);
+return -1;
+  }
+  if (ret == -2) {
+/* Cancellation from library */
+reply_with_error ("file upload cancelled");
+return -1;
+  }
+
+  return 0;
+}

This function does not always unlink the temporary file on error, and
it is never close either -- my suggestion would be to reuse and expose
parts of the "upload" function in daemon/upload.c:

   int upload_to_fd (int fd)

With the above, upload_rules_file could not be needed anymore, and the
logic to open a temporary fd could be moved directly at the beginning
of do_yara_load.
Just one question: shall the changes in upload.c be in a separate commit 
"expose XYZ"? What is the preferred way?

This question applies also for the notes in PATCH 5.



+/* Compile source code rules and load them.
+ * Return ERROR_SUCCESS on success, Yara error code type on error.
+ */
+static int
+compile_rules_file (const char *rules_path)
+{
+  int ret = 0;
+  CLEANUP_FCLOSE FILE *rule_file = NULL;
+  CLEANUP_DESTROY_YARA_COMPILER YR_COMPILER *compiler = NULL;
+
+  ret = yr_compiler_create ();
+  if (ret != ERROR_SUCCESS) {
+reply_with_error ("yr_compiler_create");
+return ret;
+  }
+
+  yr_compiler_set_callback (compiler, compile_error_callback, NULL);
+
+  rule_file = fopen (rules_path, "r");
+  if (rule_file == NULL) {
+reply_with_error ("unable to open rules file");
+return ret;
+  }
+
+  ret = yr_compiler_add_file (compiler, rule_file, NULL, rules_path);

Considering it's only a temporary file, and thus we don't show its path
in error message, we could avoid passing rules_path here -- it looks
like the libyara API allows NULL for this parameter.


+  if (ret > 0)
+return ret;
+
+  ret = yr_compiler_get_rules (compiler, );
+  if (ret != ERROR_SUCCESS)
+reply_with_error ("yr_compiler_get_rules");

The API doc say the return value can be either ERROR_SUCCESS or
ERROR_INSUFICENT_MEMORY, so I'd map the latter to ENOMEM and use
reply_with_perror.


+/* Yara compilation error callback.
+ * Reports back the compilation error message.
+ * Prints compilation warnings if verbose.
+ */
+static void
+compile_error_callback(int level, const char *name, int line,
+   const char *message, void *data)
+{
+  if (level == YARA_ERROR_LEVEL_ERROR)
+reply_with_error ("(%d): Yara error: %s", line, message);

"Yara error (line %d): %s"


+  else
+fprintf (stderr, "(%d): Yara warning: %s\n", line, message);

Ditto.
In addition, IMHO this message should be shown only when verbose is
true... like what is written in the API doc comment at the beginning
of the function :)


+}
+
+/* Clean up yara handle on daemon exit. */
+void yara_finalize (void) __attribute__((destructor));
+void
+yara_finalize (void)
+{
+  int ret = 0;

   if (!initialized)
 return;


+
+  if (rules != NULL) {
+yr_rules_destroy (rules);
+rules = NULL;
+  }
+
+  ret = yr_finalize ();
+  if (ret != ERROR_SUCCESS)
+perror ("yr_finalize");
+
+  initialized = false;
+}

Thanks,


___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH v2 4/6] New API: internal_yara_scan

2016-11-22 Thread noxdafox

Ok on most of the comments, only few notes on the last one.

On 22/11/16 11:04, Pino Toscano wrote:

On Wednesday, 9 November 2016 22:38:55 CET Matteo Cafasso wrote:

The internal_yara_scan runs the Yara engine with the previously loaded
rules against the given file.

For each rule matching against the scanned file, a struct containing
the file name and the rule identifier is returned.

The gathered list of yara_detection structs is serialised into XDR format
and written to a file.

Signed-off-by: Matteo Cafasso 
---
  daemon/yara.c| 87 
  generator/actions.ml | 10 
  generator/structs.ml |  9 
  gobject/Makefile.inc |  2 +
  java/Makefile.inc|  1 +
  java/com/redhat/et/libguestfs/.gitignore |  1 +
  src/MAX_PROC_NR  |  2 +-
  7 files changed, 111 insertions(+), 1 deletion(-)

diff --git a/daemon/yara.c b/daemon/yara.c
index fe1f69a..8e7d328 100644
--- a/daemon/yara.c
+++ b/daemon/yara.c
@@ -52,6 +52,8 @@ static int upload_rules_file (char *);
  static int compile_rules_file (const char *);
  static int write_callback (void *, const void *, size_t);
  static void compile_error_callback (int, const char *, int, const char *, 
void *);
+static int yara_rules_callback (int , void *, void *);
+static int send_detection_info (const char *, YR_RULE *);
  
  /* Has one FileIn parameter. */

  int
@@ -107,6 +109,39 @@ do_yara_destroy (void)
return 0;
  }
  
+/* Has one FileOut parameter. */

+int
+do_internal_yara_scan (const char *path)
+{
+  int ret = 0;
+  CLEANUP_CLOSE int fd = 0;

This must be initialized as -1, otherwise the CLEANUP_CLOSE handler
will close the fd 0, which is stdin of the daemon.


+
+  if (rules == NULL) {
+reply_with_error ("no yara rules loaded");
+return -1;
+  }
+
+  CHROOT_IN;
+  fd = open (path, O_RDONLY|O_CLOEXEC);
+  CHROOT_OUT;
+
+  if (fd < 0) {
+reply_with_perror ("%s", path);
+yr_finalize ();

I don't think that's the right place for yr_finalize.


+return -1;
+  }
+
+  reply (NULL, NULL);  /* Reply message. */
+
+  ret = yr_rules_scan_fd (rules, fd, 0, yara_rules_callback, (void *) path, 0);
+  if (ret == ERROR_SUCCESS)
+ret = send_file_end (0);  /* File transfer end. */
+  else
+send_file_end (1);  /* Cancel file transfer. */
+
+  return 0;
+}
+
  /* Upload rules file on a temporary file.
   * Return 0 on success, -1 on error.
   */
@@ -209,6 +244,58 @@ compile_error_callback(int level, const char *name, int 
line,
  fprintf (stderr, "(%d): Yara warning: %s\n", line, message);
  }
  
+/* Yara scan callback, called by yr_rules_scan_file.

+ * Return 0 on success, -1 on error.
+ */
+static int
+yara_rules_callback (int code, void *message, void *data)
+{
+  int ret = 0;
+
+  if (code == CALLBACK_MSG_RULE_MATCHING)
+ret = send_detection_info ((const char *)data, (YR_RULE *) message);
+
+  return (ret == 0) ? CALLBACK_CONTINUE : CALLBACK_ERROR;
+}
+
+/* Serialize file path and rule name and send it out.
+ * Return 0 on success, -1 on error.
+ */
+static int
+send_detection_info (const char *name, YR_RULE *rule)
+{
+  XDR xdr;
+  int ret = 0;
+  size_t len = 0;
+  struct guestfs_int_yara_detection detection;
+  CLEANUP_FREE char *buf = NULL, *fname = NULL;

fname is not used here -- I suggest passing --enable-werror to
autogen.sh or configure, so any compiler warning is turned to error.


+
+  detection.name = (char *) name;
+  detection.rule = (char *) rule->identifier;
+
+  /* Serialize detection struct. */
+  buf = malloc (GUESTFS_MAX_CHUNK_SIZE);
+  if (buf == NULL) {
+perror ("malloc");
+return -1;
+  }
+
+  xdrmem_create (, buf, GUESTFS_MAX_CHUNK_SIZE, XDR_ENCODE);
+
+  ret = xdr_guestfs_int_yara_detection (, );
+  if (ret == 0) {
+perror ("xdr_guestfs_int_yara_detection");
+return -1;
+  }
+
+  len = xdr_getpos ();
+
+  xdr_destroy ();
+
+  /* Send serialised tsk_detection out. */

Typo in comment.


+  return send_file_write (buf, len);
+}
+
  /* Clean up yara handle on daemon exit. */
  void yara_finalize (void) __attribute__((destructor));
  void
diff --git a/generator/actions.ml b/generator/actions.ml
index 152c651..d9006f2 100644
--- a/generator/actions.ml
+++ b/generator/actions.ml
@@ -13280,6 +13280,16 @@ Previously loaded rules will be destroyed." };
  shortdesc = "destroy previously loaded yara rules";
  longdesc = "\
  Destroy previously loaded Yara rules in order to free libguestfs resources." 
};
+
+  { defaults with
+name = "internal_yara_scan"; added = (1, 35, 15);
+style = RErr, [Pathname "path"; FileOut "filename";], [];
+proc_nr = Some 473;
+visibility = VInternal;
+optional = Some "libyara";
+shortdesc = "scan a file with the loaded yara rules";
+longdesc = "Internal function for yara_scan." };
+
  ]
  
  (* Non-API meta-commands available only in guestfish.

diff --git 

Re: [Libguestfs] [PATCH v4 0/3] New API - find_block

2016-10-11 Thread NoxDaFox
2016-10-11 11:56 GMT+03:00 Pino Toscano :

> On Saturday, 8 October 2016 18:27:21 CEST Matteo Cafasso wrote:
> > Patch ready for merging.
> >
> > v4:
> >
> >  - check return code of tsk_fs_attr_walk
> >  - pass TSK_FS_FILE_WALK_FLAG_NOSPARSE as additional flag to
> >  tsk_fs_attr_walk
> >
> > After discussing with TSK authors the behaviour is clear. [1]
>
> Thanks, this improves the situation a bit.
>
> > In case of COMPRESSED blocks, the callback will be called for all the
> > attributes no matter whether they are on disk or not (sparse). In
> > such cases, the block address will be 0. [2]
>
> Note that the API docs say:
>   For compressed and sparse attributes, the address *may* be zero.
> (emphasis is mine)
>

> My concern is that, if the address in such cases is "unspecified", then
> the comparisons in "attrwalk_callback" are done against a
> random/unitialized value (which would be bad).
>

I understand your concerns. The data will not be wrong. Is the API
documentation being misleading.
The data *will* be 0 for SPARSE blocks and *might* be 0 or not for
compressed blocks based on certain criteria. See below.


> Also, if the block address would be zero, what's the point of having it
> among the blocks tsk_fs_attr_walk() iterates over?
>

This is due to the way NTFS organizes information and deals with its
compression and the way the API loops over them.

For each file or directory, there is a MFT (Master File Table) record which
consists in a linear repository of attributes (1Kb of size each).
Attributes can be resident within the MFT or non-resident according to
their size. The $DATA attribute storing the actual file content is an
example of typically non-resident ones.

Non-resident attributes are stored on disk in what is referred as data-runs
(contiguous blocks) which are then mapped within the attribute itself. A
typical file greater than 800 Bytes has the $DATA attribute containing a
map of data runs with their location on the disk. If the map itself is too
big for the $DATA attribute (this can happen if the actual content is too
fragmented), then extra records are created and their mapping is placed in
a special attribute called $ATTRIBUTE_LIST. [1]

When the given file is compressed (native NTFS compression, not application
level one), the algorithm goes on each data run within the attribute and:
[2]
 1 if the data run is zero filled, will set the corresponding blocks as
sparse and set their address to 0.
 2 if compressing the data run does not save any disk block, it will leave
it as is.
 3 if compressing the data run does save one or more blocks, the spared one
will be again marked as sparse and their address will be 0.

Note that the entire attribute will be marked as compressed no matter what
happened to the clusters on disk.

The logic loops through all non-resident attributes (which is what we want:
we want all the disk blocks allocated for that file). For each attribute,
it loops over all the blocks which that attributes maps and calls the
callback.

Our issue is the information at the origin of the sparse flag: the
information might come from the block (BAD/ALLOC/UNALLOC), or from the file
metadata (RAW,SPARSE,COMPRESSED,CONT, META). [3]

The tsk_fs_attr_walk() walks over the given attribute's blocks. In case we
are inspecting attributes of compressed files, the flag will report the
*file* status (COMPRESSED) yet will not able to tell us what the
compression algorithm did (1,2,3) to that block. It will still correctly
give us the address: 0 if sparse (case 1 or 3) or the correct number
otherwise (case 2 or 3).

[1]
https://en.wikipedia.org/wiki/NTFS#Attribute_lists.2C_attributes.2C_and_streams
[2] http://www.digital-evidence.org/fsfa/  -  Chapter 11
[3]
http://www.sleuthkit.org/sleuthkit/docs/api-docs/4.2/tsk__fs_8h.html#a1e6bf157f5d258191bf5d8ae31ee7148
-  "Note that some of these are set only by file_walk because they are
file-level details, such as compression and sparse."


> Thanks,
> --
> Pino Toscano
> ___
> Libguestfs mailing list
> Libguestfs@redhat.com
> https://www.redhat.com/mailman/listinfo/libguestfs
>
___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH v3 1/3] New API: internal_find_block

2016-09-23 Thread NoxDaFox
2016-09-23 11:52 GMT+03:00 Pino Toscano :

> On Tuesday, 20 September 2016 16:19:30 CEST Matteo Cafasso wrote:
> > +  for (index = 0; index < count; index++) {
> > +fsattr = tsk_fs_file_attr_get_idx (fsfile, index);
> > +
> > +if (fsattr != NULL && fsattr->flags & TSK_FS_ATTR_NONRES)
> > +  tsk_fs_attr_walk (fsattr, flags, attrwalk_callback, (void *)
> );
>
> The return code of tsk_fs_attr_walk must be checked.
>

I'll fix.


>
> > +/* Attribute walk, searches the given block within the FS node
> attributes.
> > + *
> > + * Return TSK_WALK_CONT on success, TSK_WALK_ERROR on error.
> > + */
> > +static TSK_WALK_RET_ENUM
> > +attrwalk_callback (TSK_FS_FILE *fsfile, TSK_OFF_T offset,
> > +   TSK_DADDR_T blkaddr, char *buf, size_t size,
> > +   TSK_FS_BLOCK_FLAG_ENUM flags, void *data)
> > +{
> > +  findblk_data *blkdata = (findblk_data *) data;
> > +
> > +  if (!(flags & TSK_FS_BLOCK_FLAG_SPARSE) && blkaddr == blkdata->block)
> {
> > +blkdata->found = true;
>
> If we want to ignore sparse blocks, wouldn't it make sense to pass
> TSK_FS_FILE_WALK_FLAG_NOSPARSE as additional flag to tsk_fs_attr_walk
> above?
>
> Also, my concerns about this that I replied in v2 still stand: is the
> documentation obsolete, or does it document what is the expected
> behaviour? In the former case, then it could be ok to partially
> disregard what it said; in the latter, the code should follow what
> it describes.
>
> In any case, you should get in touch with the sleuthkit developers,
> and get their feedback about that.
>

Right, I'll write something on their devs list.


>
> --
> Pino Toscano
> ___
> Libguestfs mailing list
> Libguestfs@redhat.com
> https://www.redhat.com/mailman/listinfo/libguestfs
>
___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH v2 1/3] New API: internal_find_block

2016-09-20 Thread NoxDaFox
2016-09-20 11:38 GMT+03:00 Pino Toscano :

> On Monday, 19 September 2016 23:26:57 CEST Matteo Cafasso wrote:
> > The internal_find_block command searches all entries referring to the
> > given filesystem data block and returns a tsk_dirent structure
> > for each of them.
> >
> > For filesystems such as NTFS which do not delete the block mapping
> > when removing files, it is possible to get multiple non-allocated
> > entries for the same block.
> >
> > The gathered list of tsk_dirent structs is serialised into XDR format
> > and written to a file by the appliance.
> >
> > Signed-off-by: Matteo Cafasso 
> > ---
>
> The idea is fine, there are few notes below.
>
> > +int
> > +do_internal_find_block (const mountable_t *mountable, int64_t block)
> > +{
> > +  int ret = -1;
> > +  TSK_FS_INFO *fs = NULL;
> > +  TSK_IMG_INFO *img = NULL;  /* Used internally by tsk_fs_dir_walk */
> > +  const int flags =
> > +TSK_FS_DIR_WALK_FLAG_ALLOC | TSK_FS_DIR_WALK_FLAG_UNALLOC |
> > +TSK_FS_DIR_WALK_FLAG_RECURSE | TSK_FS_DIR_WALK_FLAG_NOORPHAN;
> > +
> > +  ret = open_filesystem (mountable->device, , );
> > +  if (ret < 0)
> > +return ret;
> > +
> > +  reply (NULL, NULL);  /* Reply message. */
> > +
> > +  ret = tsk_fs_dir_walk (fs, fs->root_inum, flags,
> > + findblk_callback, (void *) );
> > +  if (ret == 0)
> > +ret = send_file_end (0);  /* File transfer end. */
> > +  else
> > +send_file_end (1);  /* Cancel file transfer. */
> > +
> > +  fs->close (fs);
> > +  img->close (img);
> > +
> > +  return ret;
> > +}
> > +
> > +
>
> (One extra empty line.)
>
> > +static TSK_WALK_RET_ENUM
> > +findblk_callback (TSK_FS_FILE *fsfile, const char *path, void *data)
> > +{
> > +  findblk_data blkdata;
> > +  const TSK_FS_ATTR *fsattr = NULL;
> > +  int ret = 0, count = 0, index = 0;
> > +  uint64_t *block = (uint64_t *) data;
>
> Just dereference the pointer directly, so it will not be needed later
> on:
>
>   const uint64_t block = * (uint64_t *) data;
>
> Or, even better, this can be done only when needed, ...
>
> > +  const int flags = TSK_FS_FILE_WALK_FLAG_AONLY |
> TSK_FS_FILE_WALK_FLAG_SLACK;
> > +
> > +  if (entry_is_dot (fsfile))
> > +return TSK_WALK_CONT;
> > +
> > +  blkdata.found = false;
> > +  blkdata.block = *block;
>
> ... here:
>
>   blkdata.block = * (uint64_t *) data;
>
> > +  /* Retrieve block list */
> > +  count = tsk_fs_file_attr_getsize (fsfile);
> > +
> > +  for (index = 0; index < count; index++) {
> > +fsattr = tsk_fs_file_attr_get_idx (fsfile, index);
> > +
> > +if (fsattr != NULL && fsattr->flags & TSK_FS_ATTR_NONRES)
> > +  tsk_fs_attr_walk (fsattr, flags, attrwalk_callback, (void*)
> );
>
> (Minor style nitpick: space missing between "void" and "*".)
>
> Should the return value of tsk_fs_attr_walk be checked for failures,
> and return TSK_WALK_ERROR in case of error?
>
> > +/* Attribute walk, searches the given block within the FS node
> attributes. */
>
> Even if within 80 columns, I'd split it at 70 for readability.
>
> > +static TSK_WALK_RET_ENUM
> > +attrwalk_callback (TSK_FS_FILE *fsfile, TSK_OFF_T offset,
> > +   TSK_DADDR_T blkaddr, char *buf, size_t size,
> > +   TSK_FS_BLOCK_FLAG_ENUM flags, void *data)
> > +{
> > +  findblk_data *blkdata = (findblk_data *) data;
> > +
> > +  if (blkaddr == blkdata->block) {
> > +blkdata->found = true;
>
> If I read the sleuthkit API docs, blkaddr will be meaningful only if
> flags contains TSK_FS_BLOCK_FLAG_RAW.  Should attrwalk_callback check
> for it?
>
I was thinking the same but I then took a look at its usage within the
sleuthkit tool and it seems not being checked.
https://github.com/sleuthkit/sleuthkit/blob/develop/tsk/fs/ifind_lib.c#L479

Not sure whether to trust the API docs or the code. My main concern is
ignoring some relevant blocks.

I will test it with both solutions and see.

I will also fix the rest.

Thanks for the comments.


>
> Thanks,
> --
> Pino Toscano
> ___
> Libguestfs mailing list
> Libguestfs@redhat.com
> https://www.redhat.com/mailman/listinfo/libguestfs
>
___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

[Libguestfs] Libguestfs based vulnerability scanner

2016-08-31 Thread noxdafox

Greetings,

I built a small proof-of-concept and I've been suggested to share it 
with the community.


The tool consists of a vulnerability scanner based on Libguestfs.

The tool lists all the installed applications within a disk image and 
queries a CVE database via REST interface. The data gets aggregated in 
order to provide a report of the vulnerable applications within the disk 
image.


Here's a concrete example:
http://pastebin.com/w6DZkwCg

A possible use case could be the vulnerability assessment and management 
of Cloud instances.


The tool is part of a library I've been building to help automating 
security assessment and forensics analysis of disk images.

https://github.com/noxdafox/vminspect

I did not test it much yet. Therefore, it might raise several false 
positives or miss important vulnerabilities but considering it's ~ 100 
lines of Python code, I'd say is a good starting point.


The tool is relying on cve-search REST APIs to retrieve the 
vulnerability list.

https://github.com/adulau/cve-search

___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] [PATCH v2 1/6] filesystem_walk: fixed root inode listing

2016-08-30 Thread noxdafox

On 26/08/16 15:58, Pino Toscano wrote:

On Friday, 26 August 2016 15:15:17 CEST noxdafox wrote:

On 26/08/16 14:15, Pino Toscano wrote:

On Thursday, 25 August 2016 23:53:51 CEST Matteo Cafasso wrote:

With the current implementation, the root inode of the given partition
is ignored.

The root inode is now reported. Its name will be a single dot '.'
reproducing the TSK API.

Signed-off-by: Matteo Cafasso <noxda...@gmail.com>
---
   daemon/tsk.c | 17 ++---
   1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/daemon/tsk.c b/daemon/tsk.c
index dd368d7..6e6df6d 100644
--- a/daemon/tsk.c
+++ b/daemon/tsk.c
@@ -48,6 +48,7 @@ static char file_type (TSK_FS_FILE *);
   static int file_flags (TSK_FS_FILE *fsfile);
   static void file_metadata (TSK_FS_META *, guestfs_int_tsk_dirent *);
   static int send_dirent_info (guestfs_int_tsk_dirent *);
+static int entry_is_dot(TSK_FS_FILE *);
   static void reply_with_tsk_error (const char *);

Since in patch #2 this forward declaration is moved, put it at the
right place already in this patch.


   int
@@ -113,9 +114,7 @@ fswalk_callback (TSK_FS_FILE *fsfile, const char *path, 
void *data)
 CLEANUP_FREE char *fname = NULL;
 struct guestfs_int_tsk_dirent dirent;
   
-  /* Ignore ./ and ../ */

-  ret = TSK_FS_ISDOT (fsfile->name->name);
-  if (ret != 0)
+  if (entry_is_dot(fsfile))
   return TSK_WALK_CONT;

Nitpick: add a space between the function name and the opening round
bracket.


 /* Build the full relative path of the entry */
@@ -271,6 +270,18 @@ reply_with_tsk_error (const char *funcname)
   reply_with_error ("%s: unknown error", funcname);
   }
   
+/* Check whether the entry is dot and is not Root. */

+static int
+entry_is_dot(TSK_FS_FILE *fsfile)

Ditto.


+{
+  if (TSK_FS_ISDOT (fsfile->name->name))
+if (fsfile->fs_info->root_inum != fsfile->name->meta_addr ||
+strcmp (fsfile->name->name, "."))

Simply merge the two if's into a single if (A && B) condition?
Also, the strcmp is already done by TSK_FS_ISDOT, isn't it?
So this should be like:

if (TSK_FS_ISDOT (fsfile->name->name)
&& (fsfile->fs_info->root_inum != fsfile->name->meta_addr))

It's a bit more complicated, that's why I preferred to keep the two if
statements separated.

Most probably I expressed myself in an unclear way:

* TSK_FS_ISDOT (foo) returns truen when foo is "." or ".."

* strcmp (foo, ".") -- btw, please use STREQ/STRNEQ -- returns true
   when foo is not "."

Didn't know about STREQ, it indeed makes the code more readable.

The logic was:

if IS_DOT
. if (IS_NOT_ROOT or IS_NOT_SINGLE_DOT)
.. SKIP_THE_ENTRY
else
. PARSE_THE_ENTRY

I changed it in something more readable:

if IS_DOT
. if (IS_ROOT and IS_SINGLE_DOT)
.. PARSE_THE_ENTRY
. else
.. SKIP_THE_ENTRY
else
. PARSE_THE_ENTRY



TSK_FS_ISDOT returns whether the string is either '.' or '..'.
Yet we want to return Root so we check if that's the case.

OK.


Problem is that we'd return multiple entries for Root because:
.
..
etc/..
bin/..

fsfile->name->name is a filename only, isn't it?
Yes, the callback returns the fsfile struct and the path in two separate 
parameters.



Are all matching the statement:

fsfile->fs_info->root_inum != fsfile->name->meta_addr

I don't understand this: if the statement will match all the dot and
dot-dot entries, why is it checked at all?
Or it will be valid for all the dot and dot-dot inodes different than
the ones in the root, right?
As I explained in the last patch series, the callback will be called for 
every entry.


. <-- the Root entry
etc/.
etc/.. <-- again the Root entry
etc/systemd/.
etc/systemd/..
bin/.
bin/.. <-- again the Root entry

Therefore we need to make sure we return the Root entry only once.



___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH v2 1/6] filesystem_walk: fixed root inode listing

2016-08-26 Thread noxdafox

On 26/08/16 14:15, Pino Toscano wrote:

On Thursday, 25 August 2016 23:53:51 CEST Matteo Cafasso wrote:

With the current implementation, the root inode of the given partition
is ignored.

The root inode is now reported. Its name will be a single dot '.'
reproducing the TSK API.

Signed-off-by: Matteo Cafasso 
---
  daemon/tsk.c | 17 ++---
  1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/daemon/tsk.c b/daemon/tsk.c
index dd368d7..6e6df6d 100644
--- a/daemon/tsk.c
+++ b/daemon/tsk.c
@@ -48,6 +48,7 @@ static char file_type (TSK_FS_FILE *);
  static int file_flags (TSK_FS_FILE *fsfile);
  static void file_metadata (TSK_FS_META *, guestfs_int_tsk_dirent *);
  static int send_dirent_info (guestfs_int_tsk_dirent *);
+static int entry_is_dot(TSK_FS_FILE *);
  static void reply_with_tsk_error (const char *);

Since in patch #2 this forward declaration is moved, put it at the
right place already in this patch.


  int
@@ -113,9 +114,7 @@ fswalk_callback (TSK_FS_FILE *fsfile, const char *path, 
void *data)
CLEANUP_FREE char *fname = NULL;
struct guestfs_int_tsk_dirent dirent;
  
-  /* Ignore ./ and ../ */

-  ret = TSK_FS_ISDOT (fsfile->name->name);
-  if (ret != 0)
+  if (entry_is_dot(fsfile))
  return TSK_WALK_CONT;

Nitpick: add a space between the function name and the opening round
bracket.


/* Build the full relative path of the entry */
@@ -271,6 +270,18 @@ reply_with_tsk_error (const char *funcname)
  reply_with_error ("%s: unknown error", funcname);
  }
  
+/* Check whether the entry is dot and is not Root. */

+static int
+entry_is_dot(TSK_FS_FILE *fsfile)

Ditto.


+{
+  if (TSK_FS_ISDOT (fsfile->name->name))
+if (fsfile->fs_info->root_inum != fsfile->name->meta_addr ||
+strcmp (fsfile->name->name, "."))

Simply merge the two if's into a single if (A && B) condition?
Also, the strcmp is already done by TSK_FS_ISDOT, isn't it?
So this should be like:

   if (TSK_FS_ISDOT (fsfile->name->name)
   && (fsfile->fs_info->root_inum != fsfile->name->meta_addr))
It's a bit more complicated, that's why I preferred to keep the two if 
statements separated.


TSK_FS_ISDOT returns whether the string is either '.' or '..'.
Yet we want to return Root so we check if that's the case.

Problem is that we'd return multiple entries for Root because:
.
..
etc/..
bin/..

Are all matching the statement:

fsfile->fs_info->root_inum != fsfile->name->meta_addr


Either we check that we return it only once (which complicates the logic 
quite a bit) or we strictly ensure the entry 'meta_addr' is root and the 
name is '.'.
I'll add a comment to make the logic more clear. Yet if you have better 
ideas they're more than welcome.






___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH 0/3] New API: find_inode

2016-08-25 Thread NoxDaFox
2016-08-25 16:12 GMT+03:00 Pino Toscano <ptosc...@redhat.com>:

> On Thursday, 25 August 2016 16:05:47 CEST NoxDaFox wrote:
> > 2016-08-25 14:09 GMT+03:00 Pino Toscano <ptosc...@redhat.com>:
> >
> > > On Wednesday, 24 August 2016 23:59:53 CEST Matteo Cafasso wrote:
> > > > The find_inode API allows the User to search all the entries
> referring
> > > > to a given inode and returns a tsk_dirent structure for each of them.
> > > >
> > > > As I didn't want to change unrelated code, there is a little bit
> > > > of code duplication at the moment. Plan is to refactor the logic
> > > > in a dedicated set of patches.
> > >
> > > The general idea looks ok, but I'd rather see the duplication dealt
> > > with sooner than later.
> > >
> > In the previous submissions, non related changes were rejected therefore
> I
> > thought that was the custom.
>
> I don't see how the two cases are the same: what was "rejected" was a
> single patch contaning both additions documented in the commit message,
> and unrelated changes such as formatting fixes. I remember to have said
> that it's a no-go as *single* patch, but they can be sent (and
> integrated) as different commits.
>
Then I totally misunderstood.

>
> In this case, refactoring and de-duplication of code should be done in
> different commits as well.
>

> > Moreover I'll add another API find_block (block_number --> tsk_dirents
> > referring to it) and I think is easier to refactor the code once all the
> > use cases are in place as the picture gets more clear.
>
> More reasons to refactor *before*: since you already planned more APIs,
> it's easier to just refactor one in advance with all the common code
> needed, and use it later. Also, it eases a lot the review of the
> patches, since it will be less code added.
>
Seems reasonable, I'll proceed and re-submit then.

>
> --
> Pino Toscano
> ___
> Libguestfs mailing list
> Libguestfs@redhat.com
> https://www.redhat.com/mailman/listinfo/libguestfs
>
___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH 1/3] New API: internal_find_inode

2016-08-25 Thread NoxDaFox
2016-08-25 14:19 GMT+03:00 Pino Toscano :

> On Wednesday, 24 August 2016 23:59:54 CEST Matteo Cafasso wrote:
> > The internal_find_inode command searches all entries referring to the
> > given inode and returns a tsk_dirent structure for each of them.
> >
> > The command is able to retrieve information regarding deleted
> > or unaccessible files where other commands such as stat or find
> > would fail.
> >
> > The gathered list of tsk_dirent structs is serialised into XDR format
> > and written to a file by the appliance.
> >
> > Signed-off-by: Matteo Cafasso 
> > ---
> >  daemon/tsk.c | 75 ++
> ++
> >  generator/actions.ml |  9 +++
> >  src/MAX_PROC_NR  |  2 +-
> >  3 files changed, 85 insertions(+), 1 deletion(-)
> >
> > diff --git a/daemon/tsk.c b/daemon/tsk.c
> > index dd368d7..beb92a3 100644
> > --- a/daemon/tsk.c
> > +++ b/daemon/tsk.c
> > @@ -44,6 +44,7 @@ enum tsk_dirent_flags {
> >
> >  static int open_filesystem (const char *, TSK_IMG_INFO **, TSK_FS_INFO
> **);
> >  static TSK_WALK_RET_ENUM fswalk_callback (TSK_FS_FILE *, const char *,
> void *);
> > +static TSK_WALK_RET_ENUM ifind_callback (TSK_FS_FILE *, const char *,
> void *);
> >  static char file_type (TSK_FS_FILE *);
> >  static int file_flags (TSK_FS_FILE *fsfile);
> >  static void file_metadata (TSK_FS_META *, guestfs_int_tsk_dirent *);
> > @@ -78,6 +79,35 @@ do_internal_filesystem_walk (const mountable_t
> *mountable)
> >return ret;
> >  }
> >
> > +int
> > +do_internal_find_inode (const mountable_t *mountable, int64_t inode)
> > +{
> > +  int ret = -1;
> > +  TSK_FS_INFO *fs = NULL;
> > +  TSK_IMG_INFO *img = NULL;  /* Used internally by tsk_fs_dir_walk */
> > +  const int flags =
> > +TSK_FS_DIR_WALK_FLAG_ALLOC | TSK_FS_DIR_WALK_FLAG_UNALLOC |
> > +TSK_FS_DIR_WALK_FLAG_RECURSE | TSK_FS_DIR_WALK_FLAG_NOORPHAN;
> > +
> > +  ret = open_filesystem (mountable->device, , );
> > +  if (ret < 0)
> > +return ret;
> > +
> > +  reply (NULL, NULL);  /* Reply message. */
> > +
> > +  ret = tsk_fs_dir_walk (fs, fs->root_inum, flags, ifind_callback,
> > + (void *) );
>
> Here 'inode' is int64_t ...
>
> > +  if (ret == 0)
> > +ret = send_file_end (0);  /* File transfer end. */
> > +  else
> > +send_file_end (1);  /* Cancel file transfer. */
> > +
> > +  fs->close (fs);
> > +  img->close (img);
> > +
> > +  return ret;
> > +}
> > +
> >  /* Inspect the device and initialises the img and fs structures.
> >   * Return 0 on success, -1 on error.
> >   */
> > @@ -141,6 +171,51 @@ fswalk_callback (TSK_FS_FILE *fsfile, const char
> *path, void *data)
> >return ret;
> >  }
> >
> > +/* Find inode callback, it gets called on every FS node.
> > + * Parse the node, encode it into an XDR structure and send it to the
> appliance.
> > + * Return TSK_WALK_CONT on success, TSK_WALK_ERROR on error.
> > + */
> > +static TSK_WALK_RET_ENUM
> > +ifind_callback (TSK_FS_FILE *fsfile, const char *path, void *data)
> > +{
> > +  int ret = 0;
> > +  CLEANUP_FREE char *fname = NULL;
> > +  uint64_t *inode = (uint64_t *) data;
>
> ... but then here you cast it as uint64_t.  I don't see an immediate
> issue with it, but please keep in mind that this kind of things in C
> (i.e. cast a typed pointer to void*, and cast it back as some other
> type) is dangerous, and should be avoided as much as possible.
>
I do agree with your point. Reason behind the cast is that the comparison
few lines below is done against a uint64_t.

Moreover, other related APIs parameters are int64_t (download_inode and
download_blocks). This would make the APIs inconsistent between each other.

If I picture myself as a user I would wonder why download_inode() wants a
int64_t and find_inode() wants a uint64_t.


>
> I guess the proper solution would be adding a new UInt64 type (and UInt
> as well) to the generator...
>
I am not familiar with the generator architecture. What would adding a type
to it imply from the practical point of view?

>
> Thanks,
> --
> Pino Toscano
> ___
> Libguestfs mailing list
> Libguestfs@redhat.com
> https://www.redhat.com/mailman/listinfo/libguestfs
>
___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH 0/3] New API: find_inode

2016-08-25 Thread NoxDaFox
2016-08-25 14:09 GMT+03:00 Pino Toscano :

> On Wednesday, 24 August 2016 23:59:53 CEST Matteo Cafasso wrote:
> > The find_inode API allows the User to search all the entries referring
> > to a given inode and returns a tsk_dirent structure for each of them.
> >
> > As I didn't want to change unrelated code, there is a little bit
> > of code duplication at the moment. Plan is to refactor the logic
> > in a dedicated set of patches.
>
> The general idea looks ok, but I'd rather see the duplication dealt
> with sooner than later.
>
In the previous submissions, non related changes were rejected therefore I
thought that was the custom.

Moreover I'll add another API find_block (block_number --> tsk_dirents
referring to it) and I think is easier to refactor the code once all the
use cases are in place as the picture gets more clear.


> Thanks,
> --
> Pino Toscano
> ___
> Libguestfs mailing list
> Libguestfs@redhat.com
> https://www.redhat.com/mailman/listinfo/libguestfs
>
___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH v2 0/2] Added download_blocks API

2016-07-20 Thread noxdafox

On 20/07/16 17:04, Pino Toscano wrote:

Hi,

On Sunday, 17 July 2016 20:40:18 CEST Matteo Cafasso wrote:

v2:

  - Rebase on top of master

Matteo Cafasso (2):
   New API: download_blocks
   Added download_blocks API test

  daemon/sleuthkit.c| 41 ++-
  generator/actions.ml  | 24 
  gobject/Makefile.inc  |  2 ++
  src/MAX_PROC_NR   |  2 +-
  tests/tsk/Makefile.am |  1 +
  tests/tsk/test-download-blocks.sh | 58 +++
  6 files changed, 126 insertions(+), 2 deletions(-)
  create mode 100755 tests/tsk/test-download-blocks.sh

The series LGTM, I pushed it after removing the extra change in
do_download_inode and fixing the version.

More a curiosity question than a complain or something else: how are
these APIs are supposed to be used?  What is the forensics-related
workflow using them?
Current focus is deleted/unaccessible files retrieval as I believe this 
is the most interesting feature for libguestfs users.


A forensic workflow example would be:
 * start libguestfs and identify the disk partition where your data is
 * run filesystem_walk to get list of files which are visible within 
that disk partition
 * if the deleted file you want to recover is in that list, you'll get 
its inode

 * use download_inode to try recovering the deleted file

For Ext3+ filesystems the thing is a bit more complicated. These 
filesystems remove the block links when the file gets deleted making its 
recovery more difficult. Only choice is carving out the data and 
download_blocks is the function which allows you to do so. What the User 
needs is an API capable of mapping disk blocks to files and then he/she 
will be able to recover them using download_blocks.


Most of the APIs I am introducing are inspired from TSK ones. Here's a 
more detailed example on how to retrieve deleted data from disks.

http://wiki.sleuthkit.org/index.php?title=FS_Analysis

Afterwards, we could focus on more interesting topics such as evidence 
gathering and forensics analysis. Automating it is a challenging topic 
as most of the "evidence reconstruction" requires careful thinking as 
the data might have been tampered or obfuscated.


Yet there are quite interesting features we can add which could support 
forensic analysis as well as cloud security solutions. Think about 
libguestfs scanning Open Stack instance disks to detect anomalies within 
cloud deployments. libguestfs is the perfect tool as it easily allows to 
abstract both the disk virtualisation technology (qcow2, vmdk etc..) and 
the guest Operating System.


You can find an example on libguestfs-based VM scanning solution in here:
https://github.com/noxdafox/vminspect

If you check the "timeline" command implementation, you'll find few of 
the new APIs in use.



Considering they are quite specific, I was
thinking about adding a documentation paragraph and/or some example
to describe/show them better, what do you think?


This is a good question, I was thinking about a blog post to start with 
but a paragraph in the documentation sounds good as well. Let me know if 
you need help for that, I can provide some real-life example.



Thanks,


___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs
___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH 1/2] filesystem_walk: more information into tsk_dirent

2016-07-04 Thread noxdafox

On 04/07/16 16:25, Pino Toscano wrote:

On Monday 04 July 2016 00:00:59 Matteo Cafasso wrote:

Access, modification, last status change and creation time in
Unix format as for statns.

Number of links pointing to a given entry.

If the entry is a symbolic link, report its target path.

A new flag (DIRENT_COMPRESSED 0x04) indicating whether the file is
compressed using native filesystem compression support.

Signed-off-by: Matteo Cafasso 
---
  daemon/tsk.c | 39 +++
  generator/actions.ml | 34 ++
  generator/structs.ml | 20 ++--
  3 files changed, 75 insertions(+), 18 deletions(-)

diff --git a/daemon/tsk.c b/daemon/tsk.c
index 446213e..8657bf3 100644
--- a/daemon/tsk.c
+++ b/daemon/tsk.c
@@ -38,7 +38,8 @@
  enum tsk_dirent_flags {
DIRENT_UNALLOC = 0x00,
DIRENT_ALLOC = 0x01,
-  DIRENT_REALLOC = 0x02
+  DIRENT_REALLOC = 0x02,
+  DIRENT_COMPRESSED = 0x04
  };

  static int open_filesystem (const char *, TSK_IMG_INFO **, TSK_FS_INFO **);
@@ -108,6 +109,7 @@ fswalk_callback (TSK_FS_FILE *fsfile, const char *path, 
void *data)
  {
int ret = 0;
CLEANUP_FREE char *fname = NULL;
+  CLEANUP_FREE char *flink = NULL;
struct guestfs_int_tsk_dirent dirent;

/* Ignore ./ and ../ */
@@ -122,20 +124,38 @@ fswalk_callback (TSK_FS_FILE *fsfile, const char *path, 
void *data)
  return TSK_WALK_ERROR;
}

+  /* Set dirent fields */
+  memset (, 0, sizeof dirent);
+
dirent.tsk_inode = fsfile->name->meta_addr;
dirent.tsk_type = file_type (fsfile);
dirent.tsk_size = (fsfile->meta != NULL) ? fsfile->meta->size : -1;
dirent.tsk_name = fname;
dirent.tsk_flags = file_flags (fsfile);
-  dirent.tsk_spare1 = dirent.tsk_spare2 = dirent.tsk_spare3 =
-dirent.tsk_spare4 = dirent.tsk_spare5 = dirent.tsk_spare6 =
-dirent.tsk_spare7 = dirent.tsk_spare8 = dirent.tsk_spare9 =
-dirent.tsk_spare10 = dirent.tsk_spare11 = 0;
+
+  if (fsfile->meta != NULL) {
+dirent.tsk_nlink = fsfile->meta->nlink;
+dirent.tsk_atime_sec = fsfile->meta->atime;
+dirent.tsk_atime_nsec = fsfile->meta->atime_nano;
+dirent.tsk_mtime_sec = fsfile->meta->mtime;
+dirent.tsk_mtime_nsec = fsfile->meta->mtime_nano;
+dirent.tsk_ctime_sec = fsfile->meta->ctime;
+dirent.tsk_ctime_nsec = fsfile->meta->ctime_nano;
+dirent.tsk_crtime_sec = fsfile->meta->crtime;
+dirent.tsk_crtime_nsec = fsfile->meta->crtime_nano;
+
+ret = asprintf (, "%s", fsfile->meta->link);
+if (ret < 0) {
+  perror ("asprintf");
+  return TSK_WALK_ERROR;
+}

The asprintf is simply duplicating the string, so strdup can be a
better (and faster) alternative.
OTOH, why do you need to duplicate fsfile->meta->link at all, given
that the copy will be free'd at the end of the function?
Was not sure what to return in case of NULL value and asprintf was 
"nicely" replacing it with a (null) string. If empty string is the way 
I'll fix it then.


Will fix the rest as well.



+
+dirent.tsk_link = flink;
+  }

Also note that NULL values for FString in structs cannot be returned
in daemon calls, you need to have empty strings for the unset values.


ret = send_dirent_info ();
-  ret = (ret == 0) ? TSK_WALK_CONT : TSK_WALK_ERROR;

-  return ret;
+  return (ret == 0) ? TSK_WALK_CONT : TSK_WALK_ERROR;

Unrelated change.


  }

  /* Inspect fsfile to identify its type. */
@@ -175,7 +195,7 @@ file_type (TSK_FS_FILE *fsfile)
return 'u';
  }

-/* Inspect fsfile to retrieve the file allocation state. */
+/* Inspect fsfile to retrieve file allocation and compression status. */
  static int
  file_flags (TSK_FS_FILE *fsfile)
  {
@@ -188,6 +208,9 @@ file_flags (TSK_FS_FILE *fsfile)
else
  flags |= DIRENT_ALLOC;

+  if (fsfile->meta && fsfile->meta->flags & TSK_FS_META_FLAG_COMP)
+flags |= DIRENT_COMPRESSED;
+
return flags;
  }

diff --git a/generator/actions.ml b/generator/actions.ml
index e0931b8..1591863 100644
--- a/generator/actions.ml
+++ b/generator/actions.ml
@@ -3615,8 +3615,42 @@ This generally implies the metadata has been reallocated 
to a new file.
  Therefore, information such as file type and file size
  might not correspond with the ones of the original deleted entry.

+=item 0x0004
+
+The bit is set to C<1> when the file is compressed using filesystem
+native compression support (NTFS). The API is not able to detect
+application level compression.
+
  =back

+=item 'tsk_atime_sec'
+
+=item 'tsk_atime_nsec'
+
+=item 'tsk_mtime_sec'
+
+=item 'tsk_mtime_nsec'
+
+=item 'tsk_ctime_sec'
+
+=item 'tsk_ctime_nsec'
+
+=item 'tsk_crtime_sec'
+
+=item 'tsk_crtime_nsec'
+
+Respectively, access, modification, last status change and creation
+time in Unix format in seconds and nanoseconds.
+
+=item 'tsk_nlink'
+
+Number of file names pointing to this entry.
+
+=item 'tsk_link'
+
+If the entry is a symbolic link, this field will contain the path
+to the target file.
+
  =back

  

Re: [Libguestfs] [PATCH] Reserve entries to tsk_dirent struct

2016-06-28 Thread noxdafox

On 28/06/16 23:01, Richard W.M. Jones wrote:

On Tue, Jun 28, 2016 at 10:49:16PM +0300, Matteo Cafasso wrote:

Already implemented entries.

tsk_inode
tsk_type
tsk_size
tsk_name
tsk_flags

Easy ones to add.

tsk_atime_sec
tsk_atime_nsec
tsk_mtime_sec
tsk_mtime_nsec
tsk_ctime_sec
tsk_ctime_nsec
tsk_blksize
tsk_blocks

Further ideas.

tsk_nlink
tsk_link_name

Signed-off-by: Matteo Cafasso 
---
  daemon/tsk.c  |  4 +++-
  generator/structs.ml  |  6 ++
  tests/tsk/test-filesystem-walk.sh | 16 ++--
  3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/daemon/tsk.c b/daemon/tsk.c
index 65159ad..446213e 100644
--- a/daemon/tsk.c
+++ b/daemon/tsk.c
@@ -128,7 +128,9 @@ fswalk_callback (TSK_FS_FILE *fsfile, const char *path, 
void *data)
dirent.tsk_name = fname;
dirent.tsk_flags = file_flags (fsfile);
dirent.tsk_spare1 = dirent.tsk_spare2 = dirent.tsk_spare3 =
-dirent.tsk_spare4 = dirent.tsk_spare5 = 0;
+dirent.tsk_spare4 = dirent.tsk_spare5 = dirent.tsk_spare6 =
+dirent.tsk_spare7 = dirent.tsk_spare8 = dirent.tsk_spare9 =
+dirent.tsk_spare10 = dirent.tsk_spare11 = 0;

ret = send_dirent_info ();
ret = (ret == 0) ? TSK_WALK_CONT : TSK_WALK_ERROR;
diff --git a/generator/structs.ml b/generator/structs.ml
index acc0661..eb8931f 100644
--- a/generator/structs.ml
+++ b/generator/structs.ml
@@ -459,6 +459,12 @@ let structs = [
  "tsk_spare3", FInt64;
  "tsk_spare4", FInt64;
  "tsk_spare5", FInt64;
+"tsk_spare6", FInt64;
+"tsk_spare7", FInt64;
+"tsk_spare8", FInt64;
+"tsk_spare9", FInt64;
+"tsk_spare10", FInt64;
+"tsk_spare11", FInt64;
  ];
  s_camel_name = "TSKDirent" };

diff --git a/tests/tsk/test-filesystem-walk.sh 
b/tests/tsk/test-filesystem-walk.sh
index c816267..6ee3f71 100755
--- a/tests/tsk/test-filesystem-walk.sh
+++ b/tests/tsk/test-filesystem-walk.sh
@@ -55,7 +55,13 @@ tsk_spare1: 0
  tsk_spare2: 0
  tsk_spare3: 0
  tsk_spare4: 0
-tsk_spare5: 0 }'
+tsk_spare5: 0
+tsk_spare6: 0
+tsk_spare7: 0
+tsk_spare8: 0
+tsk_spare9: 0
+tsk_spare10: 0
+tsk_spare11: 0 }'
  if [ $? != 0 ]; then
  echo "$0: \$MFT not found in files list."
  echo "File list:"
@@ -73,7 +79,13 @@ tsk_spare1: 0
  tsk_spare2: 0
  tsk_spare3: 0
  tsk_spare4: 0
-tsk_spare5: 0 }'
+tsk_spare5: 0
+tsk_spare6: 0
+tsk_spare7: 0
+tsk_spare8: 0
+tsk_spare9: 0
+tsk_spare10: 0
+tsk_spare11: 0 }'
  if [ $? != 0 ]; then
  echo "$0: /test.txt not found in files list."
  echo "File list:"


First a note: This patch is only valid because we have not yet
released this API in a stable release of libguestfs.  After an API
becomes stable we never change it.

This is why I am reserving them now.


I'm a bit confused - what is the aim of this patch?  Is it to reserve
fields we might implement in future, and if so, why don't we just
implement them now?

Just to make sure I am now slowing down any stable release.
The patch to add new fields is still a WIP.
If the next stable release due date is still far then you can NACK this 
patch.


Rich.



___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] [PATCH v8 1/3] New API: internal_filesystem_walk

2016-06-15 Thread noxdafox



On 15/06/16 16:56, Richard W.M. Jones wrote:

On Mon, Jun 13, 2016 at 07:50:52PM +0300, Matteo Cafasso wrote:

diff --git a/generator/structs.ml b/generator/structs.ml
index 6017ba6..3c2cc61 100644
--- a/generator/structs.ml
+++ b/generator/structs.ml
@@ -444,6 +444,19 @@ let structs = [
  ];
  s_camel_name = "InternalMountable";
};
+
+  (* The Sleuth Kit directory entry information. *)
+  { defaults with
+s_name = "tsk_dirent";
+s_cols = [
+"tsk_inode", FUInt64;
+"tsk_type", FChar;
+"tsk_size", FInt64;
+"tsk_name", FString;
+"tsk_flags", FUInt32;

Note if you ever need to add more columns in future, you won't be able
to, unless you reserve some space in the struct now by adding:

"tsk_spare1", FInt64;
"tsk_spare2", FInt64;
"tsk_spare3", FInt64;
"tsk_spare4", FInt64;
"tsk_spare5", FInt64;
"tsk_spare6", FInt64;

I can't say if you'll need more columns here, or if the set you have
now is the final set that you'll ever need.
I was wondering as actually there would be more information to report to 
the users.

I'll reserve a bit of space just in case.

I will also apply the other suggested changes.


This patch looks OK to me.

Rich.



___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] [PATCH v3 5/5] appliance: Added filesystem_walk command tests

2016-04-05 Thread noxdafox

On 05/04/16 20:33, Pino Toscano wrote:

On Tuesday 05 April 2016 18:47:32 Matteo Cafasso wrote:

The tests check that the filesystem_walk command is able to retrieve
information regarding both existing and deleted files.

A NTFS image is used as Ext3+ filesystems deletion is more aggressive
in terms of metadata removal.

Signed-off-by: Matteo Cafasso 
---
  tests/tsk/Makefile.am |  3 +-
  tests/tsk/test-filesystem-walk.sh | 62 +++
  2 files changed, 64 insertions(+), 1 deletion(-)
  create mode 100755 tests/tsk/test-filesystem-walk.sh

diff --git a/tests/tsk/Makefile.am b/tests/tsk/Makefile.am
index 0cd7c03..f9b2fef 100644
--- a/tests/tsk/Makefile.am
+++ b/tests/tsk/Makefile.am
@@ -18,7 +18,8 @@
  include $(top_srcdir)/subdir-rules.mk

  TESTS = \
-   test-download-inode.sh
+   test-download-inode.sh \
+   test-filesystem-walk.sh

  TESTS_ENVIRONMENT = $(top_builddir)/run --test

diff --git a/tests/tsk/test-filesystem-walk.sh 
b/tests/tsk/test-filesystem-walk.sh
new file mode 100755
index 000..ab7c1a9
--- /dev/null
+++ b/tests/tsk/test-filesystem-walk.sh
@@ -0,0 +1,62 @@
+#!/bin/bash -
+# libguestfs
+# Copyright (C) 2016 Red Hat Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+# Test the filesystem-walk command.
+
+if [ -n "$SKIP_TEST_FILESYSTEM_WALK_SH" ]; then
+echo "$0: test skipped because environment variable is set."
+exit 77
+fi
+
+# Skip if TSK is not supported by the appliance.
+if ! guestfish add /dev/null : run : available "libtsk"; then
+echo "$0: skipped because TSK is not available in the appliance"
+exit 77
+fi
+
+if [ ! -s ../../test-data/phony-guests/windows.img ]; then
+echo "$0: skipped because windows.img is zero-sized"
+exit 77
+fi
+
+# create and delete a file then list the filesystem content
+output=$(guestfish --ro -a ../../test-data/phony-guests/windows.img \
+   run :\
+   mount /dev/sda2 / :  \
+   write /test.txt "foobar" :   \
+   rm /test.txt :   \
+   umount / :   \
+   filesystem-walk /dev/sda2)

This is a bit unreadable, a better approach is to read commands from
stdin; see for example fish/test-copy.sh.


+
+# test $MFT is in the list
+echo $output | grep -q "{ tsk_inode: 0 tsk_type: r tsk_size: .* tsk_name: \$MFT 
tsk_allocated: 1 }"

Hmm are you sure this works when tracing is disabled?  The default
output in guestfish for structs is each field in a single line.


I tried it without tracing and it works as well. Just to be sure, I'll 
add the -z flag to grep to ignore newlines.




Unless you compare the whole output like other tests do, a better
solution could be write this test using a scripting language like Perl:
I think most, if not all, of the non-bash tests are Perl-based, and it
would allow to do better checks for the return values.
I'm not familiar with Perl therefore it would require a bit more of time 
for little value.
I could do it in Python but I believe bash for such simple cases is way 
more readable and effective.


Thanks,


___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH v3 1/5] generator: Added tsk_dirent struct

2016-04-05 Thread noxdafox

On 05/04/16 19:33, Pino Toscano wrote:

On Tuesday 05 April 2016 18:47:28 Matteo Cafasso wrote:

The tsk_dirent struct contains the information gathered via TSK APIs.

The struct contains the following fields:
  * tsk_inode: inode of a file
  * tsk_type: type of file such as for dirwalk command
  * tsk_size: file size in bytes
  * tsk_name: path relative to its disk partition
  * tsk_allocated: whether the file has been deleted

Signed-off-by: Matteo Cafasso 
---
  generator/structs.ml | 16 ++--
  1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/generator/structs.ml b/generator/structs.ml
index 6017ba6..d986fd9 100644
--- a/generator/structs.ml
+++ b/generator/structs.ml
@@ -442,8 +442,20 @@ let structs = [
  "im_device", FString;
  "im_volume", FString;
  ];
-s_camel_name = "InternalMountable";
-  };
+s_camel_name = "InternalMountable" };

Unneeded change.


+  (* The Sleuth Kit directory entry information. *)
+  { defaults with
+s_name = "tsk_dirent";
+s_cols = [
+"tsk_inode", FUInt64;
+"tsk_type", FChar;
+"tsk_size", FInt64;
+"tsk_name", FString;
+"tsk_allocated", FUInt32;

Once added to the public API, a struct cannot be extended anymore
(it would break the ABI).  IMHO it would make more sense to have
a tsk_flags instead of tsk_allocated, documenting the values of the
flags/bits set: this way, adding a new simple boolean flag won't
require a new tsk_dirent2 (see e.g. the application2 struct).


The application2 structs shows pretty well the issue but does not have 
any bitwise flag.
Is there an example I could use as a reference? Especially I'd like to 
see how flags are exposed and documented.




Thanks,


___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH v2 3/5] daemon: Added internal_filesystem_walk command

2016-04-04 Thread noxdafox

On 04/04/16 15:15, Pino Toscano wrote:

On Monday 04 April 2016 14:58:35 NoxDaFox wrote:

+
+static int open_filesystem (const char *device,
+TSK_IMG_INFO **img, TSK_FS_INFO **fs);
+static TSK_WALK_RET_ENUM fswalk_callback (TSK_FS_FILE *fsfile,
+  const char *path, void *data);

Single line for forward declarations.


Even if they are longer than 80 chars?

Yep.


+static char file_type (TSK_FS_FILE *fsfile);
+static int send_dirent_info (guestfs_int_tsk_dirent *dirent);
+static void reply_with_tsk_error (const char *funcname);
+
+int
+do_internal_filesystem_walk (const mountable_t *mountable)
+{
+  int ret = -1;
+  TSK_FS_INFO *fs = NULL;
+  TSK_IMG_INFO *img = NULL;  /* Used internally by tsk_fs_dir_walk */
+  int flags = TSK_FS_DIR_WALK_FLAG_ALLOC | TSK_FS_DIR_WALK_FLAG_UNALLOC

|

+TSK_FS_DIR_WALK_FLAG_RECURSE | TSK_FS_DIR_WALK_FLAG_NOORPHAN;
+
+  ret = open_filesystem (mountable->device, , );
+  if (ret < 0)
+return ret;
+
+  reply (NULL, NULL);  /* Reply message. */
+
+  ret = tsk_fs_dir_walk (fs, fs->root_inum, flags, fswalk_callback,

NULL);

+  if (ret == 0)
+ret = send_file_end (0);  /* File transfer end. */
+  else
+send_file_end (1);  /* Cancel file transfer. */
+
+  fs->close (fs);
+  img->close (img);
+
+  return ret;
+}
+
+/* Inspect the device and initialises the img and fs structures.
+ * Return 0 on success, -1 on error.
+ */
+static int
+open_filesystem (const char *device, TSK_IMG_INFO **img, TSK_FS_INFO

**fs)

+{
+  const char *images[] = { device };
+
+  *img = tsk_img_open (1, images, TSK_IMG_TYPE_DETECT , 0);
+  if (*img == NULL) {
+reply_with_tsk_error ("tsk_image_open");
+return -1;
+  }
+
+  *fs = tsk_fs_open_img (*img, 0, TSK_FS_TYPE_DETECT);
+  if (*fs == NULL) {
+reply_with_tsk_error ("tsk_fs_open_img");
+(*img)->close (*img);
+return -1;
+  }
+
+  return 0;
+}
+
+/* Filesystem walk callback, it gets called on every FS node.
+ * Parse the node, encode it into an XDR structure and send it to the

appliance.

+ * Return TSK_WALK_CONT on success, TSK_WALK_ERROR on error.
+ */
+static TSK_WALK_RET_ENUM
+fswalk_callback (TSK_FS_FILE *fsfile, const char *path, void *data)
+{
+  int ret = 0;
+  CLEANUP_FREE char *fname = NULL;
+  struct guestfs_int_tsk_dirent dirent;
+
+  /* Ignore ./ and ../ */
+  ret = TSK_FS_ISDOT (fsfile->name->name);
+  if (ret != 0)
+return TSK_WALK_CONT;
+
+  /* Build the full relative path of the entry */
+  ret = asprintf_nowarn (, "%Q%Q", path, fsfile->name->name);

Why the quoting?  We don't quote results in similar APIs (e.g. readdir).


I didn't understand this one. I checked daemon/readdir.c and I found no
asprintf examples there.

$ ./run guestfish -N fs -m /dev/sda1 touch "/file with spaces" : readdir /
[0] = {
   ino: 12
   ftyp: r
   name: file with spaces
}
[1] = {
   ino: 2
   ftyp: d
   name: .
}
[2] = {
   ino: 11
   ftyp: d
   name: lost+found
}
[3] = {
   ino: 2
   ftyp: d
   name: ..
}

You can see the file names are not quoted.


As long as I'm not missing something, the double quote there is not 
adding quotes to the names but passing the string formatting parameter 
to 'asprintf'.

int asprintf(char **strp, const char *fmt, ...);

If I remove it, I get this error.

tsk.c: In function 'fswalk_callback':
tsk.c:112:34: error: expected expression before '%' token
   ret = asprintf_nowarn (, %Q%Q, path, fsfile->name->name);

If I keep it, I get this output.

./run guestfish --ro -a ubuntu.qcow2 run : filesystem_walk /dev/sda1 | less
[0] = {
  tsk_inode: 11
  tsk_type: d
  tsk_size: 16384
  tsk_name: lost\+found
  tsk_allocated: 1
}
[1] = {
  tsk_inode: 12
  tsk_type: l
  tsk_size: 33
  tsk_name: initrd.img
  tsk_allocated: 1
}

As you can see names are not quoted.




+  if (ret < 0) {
+fprintf (stderr, "asprintf: %m");
+return TSK_WALK_ERROR;
+  }
+
+  dirent.tsk_inode = fsfile->name->meta_addr;
+  dirent.tsk_type = file_type (fsfile);
+  dirent.tsk_size = (fsfile->meta != NULL) ? fsfile->meta->size : 0;

If 'meta' is null, then I guess the size should be -1 to indicate it
was not available; otherwise, there is no difference between an empty
file, and a file whose metadata could not be read.


The issue is that even if 'meta' is non-null, yet the value could be 0. In
cases where the file has been deleted, TSK does its best to retrieve as
much as it can and set to 0 the rest (same applies with inode for example,
the inode is set to 0 instead of -1).

The command documentation reports this "issue" (or feature?).

In this case, the problem is on the library, which reports what can be
a valid file size.  OTOH, if we know for sure that tsk could not
determine the metadata of the file, then let's report that to the
users so they can act depending on that.


I looked at the problem a bit deeper and it seems 

Re: [Libguestfs] [PATCH v2 4/5] appliance: Added filesystem_walk command

2016-04-04 Thread NoxDaFox
2016-04-04 13:09 GMT+03:00 Pino Toscano :

> Hi,
>
> some of the comments for patch #3 apply also for this one, namely:
> - wrapping of commit message
> - indentation of forward declarations
> - usage of XDR deserialization from guestfs_protocol.h
> -
>
> On Sunday 03 April 2016 16:30:49 Matteo Cafasso wrote:
> > The filesystem_walk command is the appliance's
> > counterpart of the daemon's
> > internal_filesystem_walk command.
> >
> > It writes the daemon's command output
> > on a temporary file and parses it, deserialising
> > the XDR formatted tsk_dirent structs.
> >
> > It returns to the caller the list
> > of tsk_dirent structs generated by the
> > internal_filesystem_walk command.
> >
> > Signed-off-by: Matteo Cafasso 
> > ---
> >  generator/actions.ml |  69 ++
> >  src/Makefile.am  |   1 +
> >  src/tsk.c| 162
> +++
> >  3 files changed, 232 insertions(+)
> >  create mode 100644 src/tsk.c
> >
> > diff --git a/generator/actions.ml b/generator/actions.ml
> > index 449ffa0..9457c3f 100644
> > --- a/generator/actions.ml
> > +++ b/generator/actions.ml
> > @@ -3546,6 +3546,75 @@ The environment variable C
> controls the default
> >  value: If C is set, then that is the default.
> >  Else F is the default." };
> >
> > +  { defaults with
> > +name = "filesystem_walk"; added = (1, 33, 17);
> > +style = RStructList ("dirents", "tsk_dirent"), [Mountable
> "device";], [];
> > +optional = Some "libtsk";
> > +progress = true; cancellable = true;
> > +shortdesc = "walk through the filesystem content";
> > +longdesc = "\
> > +Walk through the internal structures of a disk partition
> > +(eg. F) in order to return a list of all the files
> > +and directories stored within.
> > +
> > +It is not necessary to mount the disk partition to run this command.
> > +
> > +All entries in the filesystem are returned, excluding C<.> and
> > +C<..>. This function can list deleted or unaccessible files.
> > +The entries are I sorted.
> > +
> > +If the entry is not allocated (ex: it has been deleted),
> > +its inode, type or size might not be recovered correctly.
> > +In such case, the inode and the size will be 0 while the type
> > +will be unidentified 'u'.
>
> As said for patch #3, unknown sizes should be -1 and not 0.
>
> > +This call returns as well basic file type information about each
> > +file.  The C field will contain one of the following
> characters:
> > +
> > +=over 4
> > +
> > +=item 'b'
> > +
> > +Block special
> > +
> > +=item 'c'
> > +
> > +Char special
> > +
> > +=item 'd'
> > +
> > +Directory
> > +
> > +=item 'f'
> > +
> > +FIFO (named pipe)
> > +
> > +=item 'l'
> > +
> > +Symbolic link
> > +
> > +=item 'r'
> > +
> > +Regular file
> > +
> > +=item 's'
> > +
> > +Socket
> > +
> > +=item 'h'
> > +
> > +Shadow inode (Solaris)
> > +
> > +=item 'w'
> > +
> > +Whiteout inode (BSD)
> > +
> > +=item 'u'
> > +
> > +Unknown file type
> > +
> > +=back" };
> > +
> >  ]
> >
> >  (* daemon_functions are any functions which cause some action
> > diff --git a/src/Makefile.am b/src/Makefile.am
> > index 3b4cd10..9f8af4c 100644
> > --- a/src/Makefile.am
> > +++ b/src/Makefile.am
> > @@ -130,6 +130,7 @@ libguestfs_la_SOURCES = \
> >   structs-copy.c \
> >   structs-free.c \
> >   tmpdirs.c \
> > + tsk.c \
> >   whole-file.c \
> >   libguestfs.syms
> >
> > diff --git a/src/tsk.c b/src/tsk.c
> > new file mode 100644
> > index 000..d77bc0a
> > --- /dev/null
> > +++ b/src/tsk.c
> > @@ -0,0 +1,162 @@
> > +/* libguestfs
> > + * Copyright (C) 2016 Red Hat Inc.
> > + *
> > + * This library is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2 of the License, or (at your option) any later version.
> > + *
> > + * This library is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with this library; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> > + */
> > +
> > +#include 
> > +
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#include "full-read.h"
> > +
> > +#include "guestfs.h"
> > +#include "guestfs_protocol.h"
> > +#include "guestfs-internal.h"
> > +#include "guestfs-internal-all.h"
> > +#include "guestfs-internal-actions.h"
> > +
> > +static struct guestfs_tsk_dirent_list *parse_filesystem_walk
> > +(guestfs_h *g, char *buf, size_t bufsize);
> > +int 

Re: [Libguestfs] [PATCH v2 3/5] daemon: Added internal_filesystem_walk command

2016-04-04 Thread NoxDaFox
2016-04-04 12:48 GMT+03:00 Pino Toscano :

> On Sunday 03 April 2016 16:30:48 Matteo Cafasso wrote:
> > The internal_filesystem_walk command walks
> > through the FS structure of a disk partition
> > and returns all the files or directories
> > which could be found.
> >
> > The command is able to retrieve information
> > regarding deleted or unaccessible files as well
> > where other commands such as stat or find
> > would fail.
> >
> > The gathered list of tsk_dirent structs
> > is serialised into XDR format and written
> > to a file by the appliance.
>
> Not that it is a big issue, but you can wrap commit messages at the
> 72 columns.
>
> > Signed-off-by: Matteo Cafasso 
> > ---
> >  daemon/Makefile.am   |   4 +-
> >  daemon/tsk.c | 225
> +++
> >  generator/actions.ml |  25 ++
> >  src/MAX_PROC_NR  |   2 +-
> >  4 files changed, 254 insertions(+), 2 deletions(-)
> >  create mode 100644 daemon/tsk.c
> >
> > diff --git a/daemon/Makefile.am b/daemon/Makefile.am
> > index beb7962..03bf71f 100644
> > --- a/daemon/Makefile.am
> > +++ b/daemon/Makefile.am
> > @@ -179,6 +179,7 @@ guestfsd_SOURCES = \
> >   sync.c \
> >   syslinux.c \
> >   tar.c \
> > + tsk.c \
> >   truncate.c \
> >   umask.c \
> >   upload.c \
> > @@ -209,7 +210,8 @@ guestfsd_LDADD = \
> >   $(LIB_CLOCK_GETTIME) \
> >   $(LIBINTL) \
> >   $(SERVENT_LIB) \
> > - $(PCRE_LIBS)
> > + $(PCRE_LIBS) \
> > + $(TSK_LIBS)
> >
> >  guestfsd_CPPFLAGS = \
> >   -I$(top_srcdir)/gnulib/lib \
> > diff --git a/daemon/tsk.c b/daemon/tsk.c
> > new file mode 100644
> > index 000..ac44106
> > --- /dev/null
> > +++ b/daemon/tsk.c
> > @@ -0,0 +1,225 @@
> > +/* libguestfs - the guestfsd daemon
> > + * Copyright (C) 2016 Red Hat Inc.
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License as published by
> > + * the Free Software Foundation; either version 2 of the License, or
> > + * (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA.
> > + */
> > +
> > +#include 
> > +
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#include "guestfs_protocol.h"
> > +#include "daemon.h"
> > +#include "actions.h"
> > +#include "optgroups.h"
> > +
> > +#ifdef HAVE_LIBTSK
> > +
> > +#include 
> > +
> > +/* File types map similar to dirent. */
> > +#define TSK_FILE_TYPE_NUM 10
> > +char TSK_FILE_TYPE[TSK_FILE_TYPE_NUM] = {
> > +  'u', 'f', 'c', 'd', 'b', 'r', 'l', 's', 'h', 'w'
> > +};
>
> I see the libtsk already uses TSK_* and tsk_* prefixes for its own
> stuff, so I'd avoid using the same prefixes for local variables.
>
> Also, make the string as static const, since it does not need to be
> modified at all (and thus can then be placed into .rodata).
>

Good spot! I'll fix it thanks!


>
> > +
> > +static int open_filesystem (const char *device,
> > +TSK_IMG_INFO **img, TSK_FS_INFO **fs);
> > +static TSK_WALK_RET_ENUM fswalk_callback (TSK_FS_FILE *fsfile,
> > +  const char *path, void *data);
>
> Single line for forward declarations.
>

Even if they are longer than 80 chars?


>
> > +static char file_type (TSK_FS_FILE *fsfile);
> > +static int send_dirent_info (guestfs_int_tsk_dirent *dirent);
> > +static void reply_with_tsk_error (const char *funcname);
> > +
> > +int
> > +do_internal_filesystem_walk (const mountable_t *mountable)
> > +{
> > +  int ret = -1;
> > +  TSK_FS_INFO *fs = NULL;
> > +  TSK_IMG_INFO *img = NULL;  /* Used internally by tsk_fs_dir_walk */
> > +  int flags = TSK_FS_DIR_WALK_FLAG_ALLOC | TSK_FS_DIR_WALK_FLAG_UNALLOC
> |
> > +TSK_FS_DIR_WALK_FLAG_RECURSE | TSK_FS_DIR_WALK_FLAG_NOORPHAN;
> > +
> > +  ret = open_filesystem (mountable->device, , );
> > +  if (ret < 0)
> > +return ret;
> > +
> > +  reply (NULL, NULL);  /* Reply message. */
> > +
> > +  ret = tsk_fs_dir_walk (fs, fs->root_inum, flags, fswalk_callback,
> NULL);
> > +  if (ret == 0)
> > +ret = send_file_end (0);  /* File transfer end. */
> > +  else
> > +send_file_end (1);  /* Cancel file transfer. */
> > +
> > +  fs->close (fs);
> > +  img->close (img);
> > +
> > +  return ret;
> > +}
> > +
> > +/* Inspect the device and initialises the img and fs structures.
> > + * Return 0 on success, -1 on error.
> > + */
> > 

Re: [Libguestfs] [PATCH 2/3] added icat API to retrieve deleted or inaccessible files

2016-03-07 Thread noxdafox

On 07/03/16 21:45, Richard W.M. Jones wrote:

Thanks, I have pushed this patch series.

Could you consider changing:


+optional = Some "icat";

I think it would be nice to have a single feature, and to call the
feature "sleuthkit" or "forensics" or something like that.  We don't
need to have one feature per API since installation of a single
package (sleuthkit) is sufficient to make all the APIs available.

This was something I was a bit confused about.
For what I've got, libguestfs checks the availability of something by 
inspecting its path.


TSK is a collection of tools (icat, fls, mmls, blkls...) therefore I was 
using `icat` to test its presence within the appliance.

How could I do it better?

Shall I call it `optional = Some "sleuthkit"` and then in the code check 
for "icat" instead of using the given parameter? Is this considered a 
"clean" solution?


If so I'll provide patch.


Rich.



___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] [PATCH 0/2] added icat and fls0 APIs for deleted files recovery

2016-03-07 Thread noxdafox



On 07/03/16 21:31, Richard W.M. Jones wrote:

On Mon, Mar 07, 2016 at 08:14:41PM +0200, noxdafox wrote:

As the API documentation says, this is the low level API which I
have provided as an example.

I took inspiration from the guestfs_ls0 API which does a similar job
storing the content of a directory onto a host file.

If I understood correctly (the dynamic code generation is still
confusing me a bit), the way Libguestfs implements commands which
could have a large output is via first dumping it onto a local file
and then iterating over it.
This command would list the entire content of a disk including the
deleted files therefore we need to expect a large output.

Your understanding is correct.  But the fls0 API still isn't safe
because (I assume) it cannot handle filenames containing '\n'.
I haven't considered this issue. This is why guestfs_ls0 separates the 
results using a '\0' character right?

I'll try to reproduce this and see how TSK gives me the output.


There's another API which handles arbitrary length RStructList
returns, which is: guestfs_lxattrlist / guestfs_internal_lxattrlist
(see src/file.c:guestfs_impl_lxattrlist and daemon/xattr.c).

I will take a look at these ones thanks!



What is missing is the higher level implementation which would
pretty much look like the libguestfs_ls API. I need to better
understand how to implement it and suggestions are more than
appreciated. I tried to trace back how the guestfs_find is
implemented for example, but I'm still a bit disoriented by the
automagic code generation.

See guestfs_impl_ls in src/file.c.  All non_daemon_functions are
implemented by some guestfs_impl_* function in the library side.
I guess I'll come back with a complete solution with both low level and 
high level implementation.



Does TSK have a machine-readable mode?  If it does, it'll definitely
make things easier if (eg) JSON or XML output is available.  If not,
push upstream to add that to TSK -- it's a simple change for them,
which will make their tools much more usable, a win for everyone.

I personally disagree on this. The TSK `fls` command is a clone of
the bash `ls` one. Maybe it's more similar to `ls -al` as it returns
additional information. IMHO asking to upstream to add JSON or XML
output format would sound pretty much as asking the same to bash for
the `ls` utility.

The end result is to still return a list of structs or a list of
strings. But parsing the `fls` output shouldn't be that hard. It's
documentation is here:
http://wiki.sleuthkit.org/index.php?title=Fls

Well I still think it would be better to make this parsable.  If I
want to get information about a file in a shell script, I use the
'stat(1)' program since that has machine-readable output (stat -c).
Indeed but in such case you know what to expect as the set of 
information is a closed and well defined one.
In this case the options are unfortunately many. I can of course propose 
the idea to upstream but I guess they won't like it much.


Rich.



___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] [PATCH 1/2] added icat and fls0 APIs

2016-03-07 Thread noxdafox

On 07/03/16 13:32, Richard W.M. Jones wrote:

On Sun, Mar 06, 2016 at 05:42:25PM +0200, Matteo Cafasso wrote:

+static int
+file_out (const char *cmd)
+{
+  int r;
+  FILE *fp;
+  char buffer[GUESTFS_MAX_CHUNK_SIZE];

Soon libguestfs will prevent you from using large stack allocations.
This is easy to fix.  See:
https://www.redhat.com/archives/libguestfs/2016-March/msg00052.html

Will fix it.



diff --git a/generator/actions.ml b/generator/actions.ml
index 287d7f5..ff6aa3f 100644
--- a/generator/actions.ml
+++ b/generator/actions.ml
@@ -12919,6 +12919,39 @@ within the C<$Extend> folder.
  The filesystem from which to extract the file must be unmounted,
  otherwise the call will fail." };

+  { defaults with
+name = "icat"; added = (1, 33, 14);
+style = RErr, [Mountable "device"; Int64 "inode"; FileOut "filename"], [];
+proc_nr = Some 464;
+optional = Some "icat";
+progress = true; cancellable = true;
+shortdesc = "download a file to the local machine given its inode";
+longdesc = "\
+Download a file given its inode from the disk partition (eg. F)
+and save it as F on the local machine.
+
+This allows to download deleted or inaccessible files." };

This one looks OK.


+  { defaults with
+name = "fls0"; added = (1, 33, 14);
+style = RErr, [Mountable "device"; FileOut "filename"], [];
+proc_nr = Some 465;
+optional = Some "icat";
+progress = true; cancellable = true;
+shortdesc = "list the content of the disk";
+longdesc = "\
+This specialized command is used to get a listing of
+the content of the disk partition (eg. F).
+The list of filenames is written to the local file F (on the host).
+
+The command can display hidden or deleted files.
+
+The output information contains the file type, the metadata address
+and the full path of the file.
+
+If the file was deleted, an asterisk will be shown between the file type
+and the metadata address." };

As discussed, this shouldn't be a FileOut API at all.  It should
return a list of structs (RStructList).

If the output could be very large then we may need to split it into an
internal API, but let's not worry about that right now.

This is exactly the internal API that's why I appended the 0 to the name xD.

As I explained in the previous e-mail. What is missing is the higher 
level API returning the RStructList.


Rich.



___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] [PATCH 0/2] added icat and fls0 APIs for deleted files recovery

2016-03-07 Thread noxdafox

On 07/03/16 13:29, Richard W.M. Jones wrote:

On Sun, Mar 06, 2016 at 05:42:24PM +0200, Matteo Cafasso wrote:

As discussed in the topic: 
https://www.redhat.com/archives/libguestfs/2016-March/msg00018.html

I'd like to add to libguestfs the disk forensics capabilities offered by The 
Sleuth Kit.
http://www.sleuthkit.org/

The two APIs I'm adding with the patch are a simple example of which type of 
features TSK can enable.

A few comments in general terms:

The current splitting of the commits doesn't make much sense to me.
I think it would be better as:

  - commit to add TSK to the appliance

  - commit to add the icat API

  - tests for icat

  - commit to add the fls0 API

  - tests for fls0

although it would be fine to combine the tests with the new API, or
even have all the tests as a single separate commit (as now).

This benefits you because it will allow patches to go upstream
earlier.  For example, a commit to add TSK to the appliance is a
simple and obvious change that I see no problem with.  Also the icat
API is closer to being ready than the fls0 API (see below for
explanation).

Indeed I've done quite a poor job in this. I will split it as suggested.



 fls0 /dev/sda2 /home/noxdafox/disk-content.txt

r/r 15711-128-1:
$Recycle.Bin/S-1-5-21-2379395878-2832339042-1309242031-1000/desktop.ini
-/r * 60015-128-1:  
$Recycle.Bin/S-1-5-21-2379395878-2832339042-1309242031-1000/$R07QQZ2.txt
-/r * 60015-128-3:  
$Recycle.Bin/S-1-5-21-2379395878-2832339042-1309242031-1000/$R07QQZ2.txt:Zone.Identifier

What is `/home/noxdafox/disk-content.txt'?

It's the local (host side) file where to store the command output.


The problem with this API is it pushes all the parsing up in the
stack, to libguestfs consumers.

In general we'd like to avoid that and have just one place where all
parsing needs to be done (ie. libguestfs itself), so it'd be nicer to
have an API that returns a list of structs (RStructList) with all the
important fields parsed out.
As the API documentation says, this is the low level API which I have 
provided as an example.


I took inspiration from the guestfs_ls0 API which does a similar job 
storing the content of a directory onto a host file.


If I understood correctly (the dynamic code generation is still 
confusing me a bit), the way Libguestfs implements commands which could 
have a large output is via first dumping it onto a local file and then 
iterating over it.
This command would list the entire content of a disk including the 
deleted files therefore we need to expect a large output.


What is missing is the higher level implementation which would pretty 
much look like the libguestfs_ls API. I need to better understand how to 
implement it and suggestions are more than appreciated. I tried to trace 
back how the guestfs_find is implemented for example, but I'm still a 
bit disoriented by the automagic code generation.


Does TSK have a machine-readable mode?  If it does, it'll definitely
make things easier if (eg) JSON or XML output is available.  If not,
push upstream to add that to TSK -- it's a simple change for them,
which will make their tools much more usable, a win for everyone.
I personally disagree on this. The TSK `fls` command is a clone of the 
bash `ls` one. Maybe it's more similar to `ls -al` as it returns 
additional information. IMHO asking to upstream to add JSON or XML 
output format would sound pretty much as asking the same to bash for the 
`ls` utility.


The end result is to still return a list of structs or a list of 
strings. But parsing the `fls` output shouldn't be that hard. It's 
documentation is here:

http://wiki.sleuthkit.org/index.php?title=Fls



Rich.



___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] Libguestfs as filesystem forensic tool

2016-03-02 Thread noxdafox



On 02/03/16 18:24, Richard W.M. Jones wrote:

On Wed, Mar 02, 2016 at 05:59:32PM +0200, noxdafox wrote:

One of the patches I'm talking about would add TSK (The Sleuth Kit)
as a dependency within the appliance.

This would bring new APIs such as:
  'fls' more powerful 'ls' command allowing to get list of deleted
files or timelines at a given path.
  'icat' similar to ntfscat-i but it supports multiple FS.

Yet I'm not sure whether it's desirable as it is for a narrow use
case and on my Debian box TSK is a 12Mb binary.

Yes that's a rather large dependency.

However it's possible to use optgroups ["optional" field in
generator/actions.ml] and subpackaging to mean that end users don't
need to install this dependency unless they want it.
If I understood correctly, I just need to set the optional field in the 
API and then issue the command: "./configure --with-extra-packages=... " 
right?


Would need to see the patches before really deciding.

Rich.



___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] Libguestfs as filesystem forensic tool

2016-03-02 Thread noxdafox

On 02/03/16 17:53, Richard W.M. Jones wrote:

On Wed, Mar 02, 2016 at 05:47:40PM +0200, noxdafox wrote:

Greetings,

I am playing around with the idea of using libguestfs as a forensic
tool to investigate VM disk images.

Some use cases as example:
  * Sandbox for malware analysis.
  * Incident response in cloud environments.

Libguestfs is a precious resource in this case as it allows to
abstract the disk image internals and expose them as mountable
devices.

Combined with some state of the art tool such as The Sleuth Kit it
would turn it into a pretty powerful forensic tool.
http://www.sleuthkit.org/

I played around with some proof-of-concept and the idea seems to work.

The question I'd like to ask is if this feature would interest the
libguestfs community or if I shall fork the project
(libguestforensic?) and, if so, what is the preferable way to do it.

Actually I believe parts of libguestfs (and especially hivex) are
already used in this way.

Anyhow you're free to fork libguestfs provided you obey the license.
It may be easier/less work if you submit patches upstream where they
make sense for the upstream project, such as generally useful APIs
(like the ntfscat-i API).
One of the patches I'm talking about would add TSK (The Sleuth Kit) as a 
dependency within the appliance.


This would bring new APIs such as:
 'fls' more powerful 'ls' command allowing to get list of deleted files 
or timelines at a given path.

 'icat' similar to ntfscat-i but it supports multiple FS.

Yet I'm not sure whether it's desirable as it is for a narrow use case 
and on my Debian box TSK is a 12Mb binary.


Rich.



___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


[Libguestfs] [libguestfs] Libguestfs as filesystem forensic tool

2016-03-02 Thread noxdafox

Greetings,

I am playing around with the idea of using libguestfs as a forensic tool 
to investigate VM disk images.


Some use cases as example:
 * Sandbox for malware analysis.
 * Incident response in cloud environments.

Libguestfs is a precious resource in this case as it allows to abstract 
the disk image internals and expose them as mountable devices.


Combined with some state of the art tool such as The Sleuth Kit it would 
turn it into a pretty powerful forensic tool.

http://www.sleuthkit.org/

I played around with some proof-of-concept and the idea seems to work.

The question I'd like to ask is if this feature would interest the 
libguestfs community or if I shall fork the project (libguestforensic?) 
and, if so, what is the preferable way to do it.


Thank you.

___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] [PATCH] added ntfscat_i api

2016-02-28 Thread noxdafox
It seems the requirement is to still have Jessie mirrors enabled as it 
fails even when installing the packages previously mentioned. I'll 
switch to use APT-Pinning with a hybrid stable/testing distro.


Debian testing seems not to be the friendliest environment for building 
multi-dependencies binaries. Now I eventually succeeded to run the tests 
even though I have way more packages installed than I was expecting X-D.


I'll proceed writing the tests for the new API. Thanks for the support.

On 28/02/16 13:52, Richard W.M. Jones wrote:

There's one possible source of the problem:

I discovered that if you don't have all the appliance packages
installed on the host before your first libguestfs build, then any
missing packages don't get added to appliance/supermin.d/packages.
Compare your appliance/packagelist with appliance/supermin.d/packages
and look for packages present in the first file and missing in the
second file -- mdadm may be missing.

If you later install those missing packages, the dependencies in
appliance/Makefile.am never force appliance/supermin.d/packages to be
rebuilt, so those packages will be forever missing from the appliance.
Even 'make -C appliance clean all' will not fix this.

You can work around this with:

   rm appliance/stamp-supermin
   make

which forces appliance/supermin.d/packages to be rebuilt from scratch.

Of course 'git clean -xdf' should have also fixed this, so I don't
know if it applies to the problem you're having.

Rich.



___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] [PATCH] added ntfscat_i api

2016-02-27 Thread noxdafox

On 27/02/16 22:09, Richard W.M. Jones wrote:

On Sat, Feb 27, 2016 at 09:55:32PM +0200, noxdafox wrote:

On 27/02/16 11:23, Richard W.M. Jones wrote:

[...]

md_create: feature 'mdadm' is not available in this
build of libguestfs.  Read 'AVAILABILITY' in the guestfs(3) man page for
how to check for the availability of features. at 
/home/noxdafox/development/libguestfs/test-data/phony-guests/make-fedora-img.pl
line 103.

All the other packages were already installed. Most of these
packages seem to be available to other Debian branches but not on
Stretch.

The Debian package 'mdadm' is installed on the machine or not?

$ dpkg -l | grep mdadm
ii  mdadm 3.4-1   amd64tool to
administer Linux MD arrays (software RAID)

Assuming it is installed, what is the output of:

   rm -rf tmp/.guestfs-*
   ./run guestfish run : supported

?

That precise command fails with the following message:

libguestfs: error: you must call guestfs_add_drive before guestfs_launch

Ah right, that's because the default backend is 'direct', but on my
machine I have it set to 'libvirt', which supports hotplugging and
thus doesn't require you to specify any drives.


mdadm no

OK this is strange, and wrong.

Can you try:

   ./run virt-rescue --scratch

In the virt-rescue shell:

   ls -l {/bin,/sbin,/usr/bin,/usr/sbin}/mdadm

> ls -l {/bin,/sbin,/usr/bin,/usr/sbin}/mdadm
ls: cannot access '/bin/mdadm': No such file or directory
ls: cannot access '/sbin/mdadm': No such file or directory
ls: cannot access '/usr/bin/mdadm': No such file or directory
ls: cannot access '/usr/sbin/mdadm': No such file or directory


That should show the mdadm binary copied into the appliance, but
likely it does not, because the test here is failing:

https://github.com/libguestfs/libguestfs/blob/master/daemon/md.c#L45
https://github.com/libguestfs/libguestfs/blob/master/daemon/guestfsd.c#L1076

It could be that mdadm is something odd, like a dead symlink?

$ ls -l /sbin/mdadm
-rwxr-xr-x 1 root root 542376 Feb 19 19:03 /sbin/mdadm


Anyway, I'll fire up a Debian box in my cloud and see if I can
reproduce this also.

Thanks for the time you're spending in this.


Rich.



___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] [PATCH] added ntfscat_i api

2016-02-27 Thread noxdafox

On 27/02/16 11:23, Richard W.M. Jones wrote:

[...]

md_create: feature 'mdadm' is not available in this
build of libguestfs.  Read 'AVAILABILITY' in the guestfs(3) man page for
how to check for the availability of features. at 
/home/noxdafox/development/libguestfs/test-data/phony-guests/make-fedora-img.pl
line 103.

All the other packages were already installed. Most of these
packages seem to be available to other Debian branches but not on
Stretch.

The Debian package 'mdadm' is installed on the machine or not?

$ dpkg -l | grep mdadm
ii  mdadm 3.4-1   amd64tool to 
administer Linux MD arrays (software RAID)


Assuming it is installed, what is the output of:

   rm -rf tmp/.guestfs-*
   ./run guestfish run : supported

?

That precise command fails with the following message:

libguestfs: error: you must call guestfs_add_drive before guestfs_launch

If I feed a disk image I get it to work and returns:

 acl yes
  augeas yes
  blkdiscard yes
blkdiscardzeroes yes
   btrfs no
extlinux no
  fstrim yes
   gdisk yes
grub no
   hivex yes
 inotify yes
 journal yes
 ldm no
   linuxcaps yes
 linuxfsuuid yes
linuxmodules yes
 linuxxattrs yes
luks no
lvm2 yes
   mdadm no
   mknod yes
  ntfs3g yes
   ntfsprogs yes
realpath yes
   rsync yes
   scrub no
 selinux yes
syslinux no
  wipefs yes
 xfs no
  xz yes
zerofree no


Rich.



___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] [PATCH] added ntfscat_i api

2016-02-27 Thread noxdafox

On 27/02/16 01:05, Richard W.M. Jones wrote:

On Sat, Feb 27, 2016 at 12:53:51AM +0200, noxdafox wrote:

On 26/02/16 10:12, Richard W.M. Jones wrote:

On Fri, Feb 26, 2016 at 12:16:22AM +0200, noxdafox wrote:

According to autogen.sh output Perl bindings and virt tools seem to
be missing, could it be related to this? Are the tests relying to
such dependencies?

Yes, the tests rely on Perl bindings working, so I guess you need to
install whatever missing dependencies are needed to make Perl work.

Rich.


I eventually installed the list of build dependencies from the
source package. Which theoretically means I have all the required
dependencies for building the library.

Now it complains about missing builtin mdadm feature.

SRCDIR=. LAYOUT=partitions-md ../../run --test ./make-fedora-img.pl
md_create: feature 'mdadm' is not available in this
build of libguestfs.  Read 'AVAILABILITY' in the guestfs(3) man page for
how to check for the availability of features. at 
/home/noxdafox/development/libguestfs/test-data/phony-guests/make-fedora-img.pl
line 103.

This is a bug of sorts.  It should skip building these images if mdadm
is not available.  The reason we don't skip it is that mdadm is very
commonly available on every Linux distro, so there's not much point.

Do you really have all the dependencies installed?  You seem to have
the ones from appliance/packagelist{.in} missing:

   http://libguestfs.org/guestfs-building.1.html#full-list-of-requirements

Try:

   sudo apt-get install `cat appliance/packagelist`

This is the list of unavailable package in Stretch.

E: Package 'gfs-tools' has no installation candidate
E: Package 'gfs2-tools' has no installation candidate
E: Package 'gfs2-utils' has no installation candidate
E: Unable to locate package libsystemd-id128-0
E: Unable to locate package libsystemd-journal0
E: Package 'linux-image' has no installation candidate
E: Unable to locate package ufsutils
E: Package 'module-init-tools' has no installation candidate
E: Unable to locate package procps-ng
E: Unable to locate package util-linux-ng

All the other packages were already installed. Most of these packages 
seem to be available to other Debian branches but not on Stretch.


(you can ignore any unavailable packages).

Rich.



___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] [PATCH] added ntfscat_i api

2016-02-26 Thread noxdafox

On 26/02/16 10:12, Richard W.M. Jones wrote:

On Fri, Feb 26, 2016 at 12:16:22AM +0200, noxdafox wrote:

According to autogen.sh output Perl bindings and virt tools seem to
be missing, could it be related to this? Are the tests relying to
such dependencies?

Yes, the tests rely on Perl bindings working, so I guess you need to
install whatever missing dependencies are needed to make Perl work.

Rich.

I eventually installed the list of build dependencies from the source 
package. Which theoretically means I have all the required dependencies 
for building the library.


Now it complains about missing builtin mdadm feature.

SRCDIR=. LAYOUT=partitions-md ../../run --test ./make-fedora-img.pl
md_create: feature 'mdadm' is not available in this
build of libguestfs.  Read 'AVAILABILITY' in the guestfs(3) man page for
how to check for the availability of features. at 
/home/noxdafox/development/libguestfs/test-data/phony-guests/make-fedora-img.pl 
line 103.


Is it possible to skip the build of certain images and jump directly to 
the interested one (Windows in my case)?


___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] [PATCH] added ntfscat_i api

2016-02-25 Thread noxdafox

On 25/02/16 23:18, Richard W.M. Jones wrote:

On Thu, Feb 25, 2016 at 10:46:10PM +0200, noxdafox wrote:

On 25/02/16 10:54, Richard W.M. Jones wrote:

Apply the attached patch, followed by doing:

   make -C perl clean
   ./configure
   make

Rich.


I applied the patch, unfortunately the issue is still present.

This is what I get if I run interactive Perl console.

   DB<1> use Sys::Guestfs
Can't locate loadable object for module Sys::Guestfs in @INC (@INC
contains: /home/noxdafox/development/libguestfs/perl/lib /etc/perl
/usr/local/lib/x86_64-linux-gnu/perl/5.22.1
/usr/local/share/perl/5.22.1 /usr/lib/x86_64-linux-gnu/perl5/5.22
/usr/share/perl5 /usr/lib/x86_64-linux-gnu/perl/5.22
/usr/share/perl/5.22 /usr/local/lib/site_perl
/usr/lib/x86_64-linux-gnu/perl-base .) at (eval
8)[/usr/share/perl/5.22/perl5db.pl:737] line 2.
Compilation failed in require at (eval
8)[/usr/share/perl/5.22/perl5db.pl:737] line 2.
  at (eval 8)[/usr/share/perl/5.22/perl5db.pl:737] line 2.
 main::BEGIN() called at (eval
8)[/usr/share/perl/5.22/perl5db.pl:737] line 2
 eval {...} called at (eval
8)[/usr/share/perl/5.22/perl5db.pl:737] line 2
 eval 'no strict; ($@, $!, $^E, $,, $/, $\\, $^W) =
@DB::saved;package main; $^D = $^D | $DB::db_stop;
use Sys::Guestfs;
' called at /usr/share/perl/5.22/perl5db.pl line 737
 DB::eval called at /usr/share/perl/5.22/perl5db.pl line 3110
 DB::DB called at -e line 1
BEGIN failed--compilation aborted at (eval
8)[/usr/share/perl/5.22/perl5db.pl:737] line 2.
  at (eval 8)[/usr/share/perl/5.22/perl5db.pl:737] line 2.
 eval 'no strict; ($@, $!, $^E, $,, $/, $\\, $^W) =
@DB::saved;package main; $^D = $^D | $DB::db_stop;
use Sys::Guestfs;
' called at /usr/share/perl/5.22/perl5db.pl line 737
 DB::eval called at /usr/share/perl/5.22/perl5db.pl line 3110
 DB::DB called at -e line 1

I have no experience with Perl. Is it failing evaluating the module?

I don't understand why this is happening, but the error means that it
cannot load the binary part of the module (ie. normally
libguestfs/perl/blib/arch/auto/Sys/Guestfs/Guestfs.so).
According to autogen.sh output Perl bindings and virt tools seem to be 
missing, could it be related to this? Are the tests relying to such 
dependencies?


This is how we have configured the optional components for you today:

Daemon .. yes
Appliance ... yes
QEMU  /usr/bin/qemu-system-x86_64
guestfish and C-based virt tools  yes
FUSE filesystem . no
GNU gettext for i18n  yes
virt-p2v  no
OCaml bindings .. yes
OCaml-based virt tools .. yes
Perl bindings ... no
Perl-based virt tools ... no
Python bindings . yes
Ruby bindings ... no
Java bindings ... yes
Haskell bindings  no
PHP bindings  no
Erlang bindings . no
Lua bindings  no
Go bindings . no
gobject bindings  no
gobject introspection ... no
bash completion . yes



I checked the module and it's contained in the path
/home/noxdafox/development/libguestfs/perl/lib which is present in
@INC.

It ought to be loading the module from perl/blib/... which is the
compiled copy of the module.

Anyway, suggest doing something like this [NB: it will DELETE any
non-checked-in files in your local directory]:

   git clean -xdf

and rebuilding again.

Unfortunately it didn't help either.


Rich.



___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] [PATCH] added ntfscat_i api

2016-02-25 Thread noxdafox

On 25/02/16 10:54, Richard W.M. Jones wrote:

Apply the attached patch, followed by doing:

   make -C perl clean
   ./configure
   make

Rich.


I applied the patch, unfortunately the issue is still present.

This is what I get if I run interactive Perl console.

  DB<1> use Sys::Guestfs
Can't locate loadable object for module Sys::Guestfs in @INC (@INC 
contains: /home/noxdafox/development/libguestfs/perl/lib /etc/perl 
/usr/local/lib/x86_64-linux-gnu/perl/5.22.1 /usr/local/share/perl/5.22.1 
/usr/lib/x86_64-linux-gnu/perl5/5.22 /usr/share/perl5 
/usr/lib/x86_64-linux-gnu/perl/5.22 /usr/share/perl/5.22 
/usr/local/lib/site_perl /usr/lib/x86_64-linux-gnu/perl-base .) at (eval 
8)[/usr/share/perl/5.22/perl5db.pl:737] line 2.
Compilation failed in require at (eval 
8)[/usr/share/perl/5.22/perl5db.pl:737] line 2.

 at (eval 8)[/usr/share/perl/5.22/perl5db.pl:737] line 2.
main::BEGIN() called at (eval 
8)[/usr/share/perl/5.22/perl5db.pl:737] line 2
eval {...} called at (eval 
8)[/usr/share/perl/5.22/perl5db.pl:737] line 2
eval 'no strict; ($@, $!, $^E, $,, $/, $\\, $^W) = 
@DB::saved;package main; $^D = $^D | $DB::db_stop;

use Sys::Guestfs;
' called at /usr/share/perl/5.22/perl5db.pl line 737
DB::eval called at /usr/share/perl/5.22/perl5db.pl line 3110
DB::DB called at -e line 1
BEGIN failed--compilation aborted at (eval 
8)[/usr/share/perl/5.22/perl5db.pl:737] line 2.

 at (eval 8)[/usr/share/perl/5.22/perl5db.pl:737] line 2.
eval 'no strict; ($@, $!, $^E, $,, $/, $\\, $^W) = 
@DB::saved;package main; $^D = $^D | $DB::db_stop;

use Sys::Guestfs;
' called at /usr/share/perl/5.22/perl5db.pl line 737
DB::eval called at /usr/share/perl/5.22/perl5db.pl line 3110
DB::DB called at -e line 1

I have no experience with Perl. Is it failing evaluating the module?
I checked the module and it's contained in the path 
/home/noxdafox/development/libguestfs/perl/lib which is present in @INC.


___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] [PATCH] added ntfscat_i api

2016-02-24 Thread noxdafox

On 23/02/16 10:09, Richard W.M. Jones wrote:

On Mon, Feb 22, 2016 at 11:58:28PM +0200, noxdafox wrote:

Once fixed that and few other things I got stuck with this:

SRCDIR=. LAYOUT=partitions ../../run --test ./make-fedora-img.pl
Can't locate loadable object for module Sys::Guestfs in @INC (@INC
contains: /home/noxdafox/development/libguestfs/perl/blib/lib
/home/noxdafox/development/libguestfs/perl/blib/arch
/home/noxdafox/development/libguestfs/perl/lib /etc/perl
/usr/local/lib/x86_64-linux-gnu/perl/5.22.1
/usr/local/share/perl/5.22.1 /usr/lib/x86_64-linux-gnu/perl5/5.22
/usr/share/perl5 /usr/lib/x86_64-linux-gnu/perl/5.22
/usr/share/perl/5.22 /usr/local/lib/site_perl
/usr/lib/x86_64-linux-gnu/perl-base .) at ./make-fedora-img.pl line
27.
Compilation failed in require at ./make-fedora-img.pl line 27.

The module is looking for should be in
/home/noxdafox/development/libguestfs/perl/lib and it's included in
@INC.

I'm on Debian Jessie.

It must be a missing dependency.  Did you run `apt-get build-dep libguestfs'?

Rich.

I just noticed the mistake, I'm not using Debian Jessie but Stretch 
(testing).


The missing dependency seems to be the package gfs2-utils which is 
present in Debian Jessie and Debian Sid but not in Stretch.


# apt-get build-dep libguestfs

The following packages have unmet dependencies:
 builddeps:libguestfs : Depends: gfs2-utils but it is not installable

___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] [PATCH] added ntfscat_i api

2016-02-22 Thread noxdafox



On 22/02/16 19:43, Richard W.M. Jones wrote:

On Mon, Feb 22, 2016 at 07:23:45PM +0200, noxdafox wrote:

On 22/02/16 17:26, Richard W.M. Jones wrote:

On Sun, Feb 21, 2016 at 11:22:23PM +0200, Matteo Cafasso wrote:

Adding ntfscat_i command for downloading files based on their inode number.

This allows the dowload of files unaccessible otherwise from a NTFS guest disk 
image.

The patch seems fine, but it really needs a test otherwise this
feature could silently break.

I was thinking the same but I stumbled over an issue which prevented
me from running the tests.
I'm not sure this is the right place where to discuss the problem
but here's few lines about it.

make  blank-disk.img blank-part.img blank-fs.img blank-bootroot.img
blank-bootrootlv.img debian.img fedora.img fedora-md1.img
fedora-md2.img fedora-btrfs.img ubuntu.img archlinux.img coreos.img
windows.img guests-all-good.xml
make[3]: Entering directory
'/home/noxdafox/development/libguestfs/test-data/phony-guests'
make[3]: 'blank-disk.img' is up to date.
make[3]: 'blank-part.img' is up to date.
make[3]: 'blank-fs.img' is up to date.
make[3]: 'blank-bootroot.img' is up to date.
make[3]: 'blank-bootrootlv.img' is up to date.
make[3]: 'debian.img' is up to date.
rm -f fedora-name.db fedora-name.db-t
no fedora-name.db-t < fedora-name.db.txt
/bin/bash: no: command not found

What happens here is it's trying to run ``$(DB_LOAD) fedora-name.db-t''
but the db_load program wasn't found by your configure.  I think you
need to install whatever is the db4-utils (Berkeley DB 4) on your OS.

Alternately you can grab the pre-built databases from a recent
tarball, since we distribute this stuff in the tarball so it doesn't
need to be rebuilt for most end users.

Rich.

Once fixed that and few other things I got stuck with this:

SRCDIR=. LAYOUT=partitions ../../run --test ./make-fedora-img.pl
Can't locate loadable object for module Sys::Guestfs in @INC (@INC 
contains: /home/noxdafox/development/libguestfs/perl/blib/lib 
/home/noxdafox/development/libguestfs/perl/blib/arch 
/home/noxdafox/development/libguestfs/perl/lib /etc/perl 
/usr/local/lib/x86_64-linux-gnu/perl/5.22.1 /usr/local/share/perl/5.22.1 
/usr/lib/x86_64-linux-gnu/perl5/5.22 /usr/share/perl5 
/usr/lib/x86_64-linux-gnu/perl/5.22 /usr/share/perl/5.22 
/usr/local/lib/site_perl /usr/lib/x86_64-linux-gnu/perl-base .) at 
./make-fedora-img.pl line 27.

Compilation failed in require at ./make-fedora-img.pl line 27.

The module is looking for should be in 
/home/noxdafox/development/libguestfs/perl/lib and it's included in @INC.


I'm on Debian Jessie.



Makefile:1793: recipe for target 'fedora-name.db' failed
make[3]: *** [fedora-name.db] Error 127

Searching around it seems some dependency is missing but ./configure
is not helping me to find which one.


Have a look at the tests/ntfsclone/ subdirectory for the general idea.

This was also another issue, I was not sure ntfs was supported by
the test suite and I didn't find any documentation about it. I'll
take a look at these ones.

One problem with writing the test (indeed, with the general idea) is
how do you discover which inode numbers can be downloaded?  Does NTFS
have some standard inode numbers for things like the MFT?

The $MFT file has alway 0 as index number.

Rich.


---
  daemon/ntfs.c| 62 
  generator/actions.ml | 15 +
  2 files changed, 77 insertions(+)

diff --git a/daemon/ntfs.c b/daemon/ntfs.c
index 568899e..58f62fa 100644
--- a/daemon/ntfs.c
+++ b/daemon/ntfs.c
@@ -266,3 +266,65 @@ do_ntfsfix (const char *device, int clearbadsectors)

return 0;
  }
+
+int
+do_ntfscat_i (const mountable_t *mountable, int64_t inode)
+{
+  int r;
+  FILE *fp;
+  CLEANUP_FREE char *cmd = NULL;
+  char buffer[GUESTFS_MAX_CHUNK_SIZE];
+
+  /* Inode must be greater than 0 */
+  if (inode < 0) {
+reply_with_error("Inode must be greater than 0");
+return -1;
+  }
+
+  /* Construct the command. */
+  if (asprintf_nowarn (, "ntfscat -i %ld %s",
+   inode, mountable->device) == -1) {
+reply_with_perror ("asprintf");
+return -1;
+  }
+
+  if (verbose)
+fprintf (stderr, "%s\n", cmd);
+
+  fp = popen (cmd, "r");
+  if (fp == NULL) {
+reply_with_perror ("%s", cmd);
+return -1;
+  }
+
+  /* Now we must send the reply message, before the file contents.  After
+   * this there is no opportunity in the protocol to send any error
+   * message back.  Instead we can only cancel the transfer.
+   */
+  reply (NULL, NULL);
+
+  while ((r = fread (buffer, 1, sizeof buffer, fp)) > 0) {
+if (send_file_write (buffer, r) < 0) {
+  pclose (fp);
+  return -1;
+}
+  }
+
+  if (ferror (fp)) {
+fprintf (stderr, "fread: %ld: %m\n", inode);
+send_file_end (1); /* Cancel. */
+pclose (fp);
+return -1;
+  }
+
+  if (pclose (fp) != 0) {
+fp

Re: [Libguestfs] [PATCH] added ntfscat_i api

2016-02-22 Thread noxdafox

On 22/02/16 17:26, Richard W.M. Jones wrote:

On Sun, Feb 21, 2016 at 11:22:23PM +0200, Matteo Cafasso wrote:

Adding ntfscat_i command for downloading files based on their inode number.

This allows the dowload of files unaccessible otherwise from a NTFS guest disk 
image.

The patch seems fine, but it really needs a test otherwise this
feature could silently break.
I was thinking the same but I stumbled over an issue which prevented me 
from running the tests.
I'm not sure this is the right place where to discuss the problem but 
here's few lines about it.


make  blank-disk.img blank-part.img blank-fs.img blank-bootroot.img 
blank-bootrootlv.img debian.img fedora.img fedora-md1.img fedora-md2.img 
fedora-btrfs.img ubuntu.img archlinux.img coreos.img windows.img 
guests-all-good.xml
make[3]: Entering directory 
'/home/noxdafox/development/libguestfs/test-data/phony-guests'

make[3]: 'blank-disk.img' is up to date.
make[3]: 'blank-part.img' is up to date.
make[3]: 'blank-fs.img' is up to date.
make[3]: 'blank-bootroot.img' is up to date.
make[3]: 'blank-bootrootlv.img' is up to date.
make[3]: 'debian.img' is up to date.
rm -f fedora-name.db fedora-name.db-t
no fedora-name.db-t < fedora-name.db.txt
/bin/bash: no: command not found
Makefile:1793: recipe for target 'fedora-name.db' failed
make[3]: *** [fedora-name.db] Error 127

Searching around it seems some dependency is missing but ./configure is 
not helping me to find which one.




Have a look at the tests/ntfsclone/ subdirectory for the general idea.
This was also another issue, I was not sure ntfs was supported by the 
test suite and I didn't find any documentation about it. I'll take a 
look at these ones.


One problem with writing the test (indeed, with the general idea) is
how do you discover which inode numbers can be downloaded?  Does NTFS
have some standard inode numbers for things like the MFT?

The $MFT file has alway 0 as index number.


Rich.


---
  daemon/ntfs.c| 62 
  generator/actions.ml | 15 +
  2 files changed, 77 insertions(+)

diff --git a/daemon/ntfs.c b/daemon/ntfs.c
index 568899e..58f62fa 100644
--- a/daemon/ntfs.c
+++ b/daemon/ntfs.c
@@ -266,3 +266,65 @@ do_ntfsfix (const char *device, int clearbadsectors)

return 0;
  }
+
+int
+do_ntfscat_i (const mountable_t *mountable, int64_t inode)
+{
+  int r;
+  FILE *fp;
+  CLEANUP_FREE char *cmd = NULL;
+  char buffer[GUESTFS_MAX_CHUNK_SIZE];
+
+  /* Inode must be greater than 0 */
+  if (inode < 0) {
+reply_with_error("Inode must be greater than 0");
+return -1;
+  }
+
+  /* Construct the command. */
+  if (asprintf_nowarn (, "ntfscat -i %ld %s",
+   inode, mountable->device) == -1) {
+reply_with_perror ("asprintf");
+return -1;
+  }
+
+  if (verbose)
+fprintf (stderr, "%s\n", cmd);
+
+  fp = popen (cmd, "r");
+  if (fp == NULL) {
+reply_with_perror ("%s", cmd);
+return -1;
+  }
+
+  /* Now we must send the reply message, before the file contents.  After
+   * this there is no opportunity in the protocol to send any error
+   * message back.  Instead we can only cancel the transfer.
+   */
+  reply (NULL, NULL);
+
+  while ((r = fread (buffer, 1, sizeof buffer, fp)) > 0) {
+if (send_file_write (buffer, r) < 0) {
+  pclose (fp);
+  return -1;
+}
+  }
+
+  if (ferror (fp)) {
+fprintf (stderr, "fread: %ld: %m\n", inode);
+send_file_end (1); /* Cancel. */
+pclose (fp);
+return -1;
+  }
+
+  if (pclose (fp) != 0) {
+fprintf (stderr, "pclose: %ld: %m\n", inode);
+send_file_end (1); /* Cancel. */
+return -1;
+  }
+
+  if (send_file_end (0))   /* Normal end of file. */
+return -1;
+
+  return 0;
+}
diff --git a/generator/actions.ml b/generator/actions.ml
index eb45392..18418aa 100644
--- a/generator/actions.ml
+++ b/generator/actions.ml
@@ -12891,6 +12891,21 @@ This is equivalent to C.

  See also L<sgdisk(8)>." };

+  { defaults with
+name = "ntfscat_i"; added = (1, 33, 12);
+style = RErr, [Mountable "device"; Int64 "inode"; FileOut "filename"], [];
+proc_nr = Some 463;
+progress = true; cancellable = true;
+shortdesc = "download a file to the local machine given its inode";
+longdesc = "\
+Download a file given its inode from a NTFS filesystem and save it as 
F
+on the local machine.
+
+This allows to download some otherwise unaccessible files such as the ones
+within the $Extend folder.
+
+F can also be a named pipe." };
+
  ]

  (* Non-API meta-commands available only in guestfish.
--
2.7.0


___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] extract NTFS Master File Table for analysis

2016-02-18 Thread noxdafox



On 02/02/16 21:35, Richard W.M. Jones wrote:

On Tue, Feb 02, 2016 at 07:40:12PM +0200, noxdafox wrote:

Greetings,

I'm playing around an idea and I'd like to ask you some questions.

I'd like to extract the MFT table from a disk image file. The idea
is to employ it to build a sort of reverse lookup table which, given
a cluster, could retrieve the corresponding file with the related
metadata.

Such table could be used to optimize the analysis of disk snapshots
in order to collect the changes which happened on the disk. As the
disk snapshots contains only the new or modified clusters, I could
avoid exploring the whole FS content and focus on what has really
changed on disk.

Did you explore the concept anyhow?

No.


Is there a way I can use libguestfs to locate and extract the MFT
table from a disk image?

If there's an ntfsprogs command that does this (ntfsinfo --mft maybe?)
then it's really easy to extract the output from that command.  You
could hack it together using `debug sh', search this page:

   http://libguestfs.org/guestfs-faq.1.html

... but if you wanted to do it "properly" then you could add an API
modelled on one of the `FileOut' APIs, eg:

   https://github.com/libguestfs/libguestfs/blob/master/daemon/base64.c#L100

For information on adding APIs, see:

   http://libguestfs.org/guestfs-hacking.1.html#adding-a-new-api
I played around a bit and I need to confess I am impressed on how easy 
is to add functionalities to libguestfs.


I could easily extract the Master File Table using the download API and 
parse it with third party tools.


I'd like to extract as well the Update Sequence Number Journal 
($UsnJrnl) but it seems unaccessible via it's path (C:\$Extend\$UsnJrnl).
I tried on a real disk and it seems to be a limitation of the NTFS-3g 
driver: it can extract C:\$MTF and C:\$LogFile, it can list C:\$Extend 
content but it cannot access those files.


Curiously enough, stat() syscall on C:\$Extend\$UsnJrnl seems to work 
and returns the correct inode number. Yet the size is wrong as it 
reports 0 while the real one is > 9Mb.


The next step I tried was to use ntfscat command in the following 
manner: ntfscat -i  /dev/sdXX and it worked 
flawlessly.


So I proceeded adding such API to libguestfs and I could extract the 
journal without any issue. The UsnJrnl file is very handy to check what 
changes were made on disk. Not only it's faster than using virt-diff on 
two different snapshots but it also shows much more relevant 
information. I could for example track down temporary files created and 
deleted within the two snapshots.


All of this to say I'd like to add the possibility of extracting files 
via their inode. This functionality has the advantage of not requiring 
the FS to be mounted. Would libguestfs benefit from this?


If so how should I proceed? Which API names to use?

Most straightforward would be something like:

  ntfsicat(device, inode)

or

  ntfsidownload(device, inode)

I guess also linux guest disks would benefit from this but this requires 
a bit more research.




This question of how do you find which disk block is associated with a
particular file comes up often enough that I have looked at it various
times on my blog:

   
https://rwmj.wordpress.com/2014/02/21/use-guestfish-and-nbdkit-to-examine-physical-disk-locations/

   https://rwmj.wordpress.com/2014/11/23/mapping-files-to-disk/

Rich.



___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


[Libguestfs] extract NTFS Master File Table for analysis

2016-02-02 Thread noxdafox

Greetings,

I'm playing around an idea and I'd like to ask you some questions.

I'd like to extract the MFT table from a disk image file. The idea is to 
employ it to build a sort of reverse lookup table which, given a 
cluster, could retrieve the corresponding file with the related metadata.


Such table could be used to optimize the analysis of disk snapshots in 
order to collect the changes which happened on the disk. As the disk 
snapshots contains only the new or modified clusters, I could avoid 
exploring the whole FS content and focus on what has really changed on disk.


Did you explore the concept anyhow? Is there a way I can use libguestfs 
to locate and extract the MFT table from a disk image?


Thank you.

___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] Concurrent scanning of same disk

2015-05-28 Thread NoxDaFox
2015-05-28 11:10 GMT+03:00 Richard W.M. Jones rjo...@redhat.com:

 On Thu, May 28, 2015 at 10:57:51AM +0300, NoxDaFox wrote:
  2015-05-28 10:40 GMT+03:00 Richard W.M. Jones rjo...@redhat.com:
 
   On Thu, May 28, 2015 at 10:33:48AM +0300, NoxDaFox wrote:
To create the snapshots I'm using the libvirt command
 snapshotCreateXML
with no flag set. Does libvirt support consistent snapshotting or
 shall I
rely on QEMU backup new feature only?
  
   According to: http://wiki.libvirt.org/page/Snapshots
   virDomainSnapshotCreateXML is only consistent if the guest is paused
   during the operation.
  
   The new qemu feature is called drive-backup
   (http://wiki.qemu.org/Features/IncrementalBackup).  Unless things
   changed recently, it is not exposed through libvirt, so the only way
   to use it is by sending qemu monitor commands
   (
  
 http://kashyapc.com/2013/03/31/multiple-ways-to-access-qemu-monitor-protocol-qmp/
   ).
  
   This is all pretty bleeding edge.  I still think you'd be better off
   just ignoring snapshots that fail and moving on to the next one.
  
   Rich.
 
 
  I might be missing something then as the guest is actually paused during
  the acquisition of the snapshot.
 
  I pause the guest, take a screenshot, a core dump and a snapshot, then I
  resume the guest. Proof is that I can clearly analyse the memory core
 dump
  without any problem.

 Note a core dump doesn't normally include the guest's disk.  It just
 contains the guest's memory, so it's not relevant for consistency.

  Maybe I am breaking the disk's consistency once I extract the dump
 through
  the qemu-img command?
 
  The command is:
  qemu-img convert -f qcow2 -o backing_file=guest_disk.qcow2 -O qcow2 -s
  snapshot_n guest_disk.qcow2 new_disk_for_libguestfs.qcow2

 Is the guest paused when you do this?  If not, then this will create
 an inconsistent snapshot.

  Could it be that, as the backing file is pointing to the guest's disk
 which
  will evolve in time, when guestfs tries to read the data sees
  incosistencies?

 qemu-img convert makes a full copy, so it's not guestfs that's the
 problem, but qemu-img.  The copy is not done instantaneously.

  The guest_disk.qcow2 is a COW clone of a base_disk.qcow2, what if I
 rebase
  the new_disk_for_libguestfs.qcow2 to the base_disk.qcow2?

 Many copies and snapshots.  I'm thoroughly confused ...

 Anyhow, unless you either make a full copy while the guest is paused,
 *or* you use a point-in-time snapshot feature of either qemu
 (drive-backup) or your host filesystem, you're not making a consistent
 snapshot.

 Rich.


Ok I definitely got confused with the APIs then.

I thought the guest was supposed to be paused only when calling
virDomainSnapshotCreateXML
not also afterwards when creating the new disk file with qemu-img.

I made a couple of changes and the hive corruption issue seems to be gone.
The RuntimeError: file receive cancelled by daemon still persists. From
the guestfs trace I can't see any evidence if not what seems a sort of
overflow:

sha1sum: ./Windows/Prefetch/ReadyBoot/Trace2.fx: Value too large for
defined data type
[   65.347452] perf interrupt took too long (5124  5000), lowering
kernel.perf_event_max_sample_rate to 25000
[  139.668206] perf interrupt took too long (10140  1), lowering
kernel.perf_event_max_sample_rate to 12500
pclose: /: Success
guestfsd: main_loop: proc 244 (checksums_out) took 244.89 seconds
libguestfs: trace: checksums_out = -1 (error)

Is there something else wrong?
___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

[Libguestfs] Fwd: Inspection of disk snapshots

2015-03-24 Thread NoxDaFox
I was sure I was doing something wrong as I'm not yet fully aware of QCOW2
snapshot feature and how it interacts with libguestfs.

I'll try to explain better the scenario:

I have several hosts running lots of VMs which are generated from few base
images, say A, B, C the base images (backing file) and A1, A2, A*, B1, B2,
B* clones on top of which the newly spawned VMs are running.
I need to collect the disk states of A*, B*, C* machines and see what has
been written there. I don't care about the whole content as the base images
content A, B, C are well known to me, only thing it matters are the deltas
of the new clones.

One more piece in the puzzle is that the inspection does not happen on the
hosts running the VMs but on a dedicated server.

My idea was to collect those snapshots (generic term not the QEMU one)
from the hosts and send them to my inspection server. As A, B and C are
accessible from that server only thing I need is to rebase those snapshot
to correctly inspect them through libguestfs, and it proved to work (I'm
using readonly mode as I only care about reading the disks). I'm not really
interested in having consistent point-in-time state of the disks as the
operation is done several times a day so I can cope with semi-consistent
data as it can be easily re-constructed.

My real problem comes when I try to inspect the disk snapshot: libguestfs
will, of course, let me see the whole content of the disks, which means A +
A*. Apart from the waste of CPU time spend on looking at files I already
know the state (the ones contained in A), it generates a lot of noise. A
Linux base image with some library installed consists in 20+ K files,
installing something extra (Apache server for example) just brings some
hundreds new files and I'm interested only in those ones.

So my real question is: is there a way to distinguish the files contained
in the two different disk images (A and A1) or shall I think about a
totally different approach?

Thank you.


2015-03-24 0:43 GMT+02:00 Richard W.M. Jones rjo...@redhat.com:

 On Mon, Mar 23, 2015 at 10:41:01PM +, Richard W.M. Jones wrote:
  On Mon, Mar 23, 2015 at 04:34:21PM +0200, NoxDaFox wrote:
   Greetings,
  
   I have the following typical scenario: given one or more qcow2 base
 images
   I clone them with COW and start the VMs.
  
   At a certain point I'd like to inspect them in order to see their
 evolution
   compared to the known base images. To do so I was thinking about
 taking a
   disk snapshot of each VM and inspect its content through libguestfs
 (using
   it's Python bindings).
  
   Obviously I need the base image in order for libguestfs to correctly
 guess
   the OS, the FS structure etc.. Problem is that that point when I
 inspect
   the disk I get the whole disk state including the base image content
 (30K+
   files and directories).
  
   This is not an issue but it's a very heavy operation considering that
 some
   of the snapshots are few megabytes while the base images are several
   gigabytes.
  
   Is there a way to programmatically instruct libguestfs to limit the
   inspection to the sole snapshot?
   Would it work as well with other disk format (vmdk linked clones for
   example)?
  
   For better comprehension I'll show the sequence I'm doing (I might do
 it
   wrong of course):
  
   virsh -c qemu:///system snapshot-create --disk-only domain-ID
  
   I get the snapshot location from its XML description and then:
  
   qemu-img convert -f qcow2 -O qcow2 base_image.qcow2 snapshot.qcow2
 
  This makes a copy of the whole disk image.  It's also not a consistent
  (point in time) copy.

 Oh I see that you're copying the _snapshot_ that you created with
 libvirt; it's not a whole disk copy.  There's still not any point in
 doing this, and what I said below stands.

   At that point I mount it through libguestfs and inspect its content.
 
  As long as you use the 'readonly=1' flag (which is really *essential*,
  else you'll get disk corruption), you can just point libguestfs at the
  base image:
 
g = guestfs.GuestFS (python_return_dict=True)
g.add_drive_opts (base_image.qcow2, format=qcow2, readonly=1)
 
  That also doesn't get you a consistent snapshot, but it'll work most
  of the time, and give you a clear error in libguestfs when it doesn't
  (and won't corrupt your base disk or anything like that, provided
  you're using readonly=1).
 
  The effect of the readonly=1 flag is to create an external snapshot.
  It is roughly the equivalent of doing:
 
qemu-img create -f qcow2 -b base_image snapshot.qcow2
 point libguestfs at snapshot.qcow2 
 
  If you want lightweight, consistent, point-in-time snapshots (which it
  sounds like you do), qemu has recently been adding this capability.
  See the 'drive-backup' monitor command.  I've not tried using that and
  I don't know if it is wired up through libvirt, but libguestfs should
  be able to consume it since it's just an NBD source.

 Rich.

 --
 Richard Jones

[Libguestfs] Inspection of disk snapshots

2015-03-23 Thread NoxDaFox
Greetings,

I have the following typical scenario: given one or more qcow2 base images
I clone them with COW and start the VMs.

At a certain point I'd like to inspect them in order to see their evolution
compared to the known base images. To do so I was thinking about taking a
disk snapshot of each VM and inspect its content through libguestfs (using
it's Python bindings).

Obviously I need the base image in order for libguestfs to correctly guess
the OS, the FS structure etc.. Problem is that that point when I inspect
the disk I get the whole disk state including the base image content (30K+
files and directories).

This is not an issue but it's a very heavy operation considering that some
of the snapshots are few megabytes while the base images are several
gigabytes.

Is there a way to programmatically instruct libguestfs to limit the
inspection to the sole snapshot?
Would it work as well with other disk format (vmdk linked clones for
example)?

For better comprehension I'll show the sequence I'm doing (I might do it
wrong of course):

virsh -c qemu:///system snapshot-create --disk-only domain-ID

I get the snapshot location from its XML description and then:

qemu-img convert -f qcow2 -O qcow2 base_image.qcow2 snapshot.qcow2

At that point I mount it through libguestfs and inspect its content.

Thank you.
___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

[Libguestfs] Inspect running disk image

2013-01-23 Thread NoxDaFox
Greetings,

I'd like to monitor the FS activities (read/write) at runtime on
several virtual disk images running on Qemu KVM.
The aim is to periodically inspect these images to identify possible
Windows registry modification, file creation and so on..

What should be the optimal procedure? Shall I launch a new handler
each time? It is a quite expensive procedure and it takes a lot of
time on loaded systems.
Would be enough to mount/unmount the disk at each read?

noxdafox

___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs