On Wed, 2020-07-01 at 17:39 -0500, Benjamin Marzinski wrote:
> If kpartx is used on top of shared storage, and a device has its
> partition table changed on one machine, and then kpartx is run on
> another, it may not see the new data, because the cache still
> contains
> the old data, and there is nothing to tell the machine running kpartx
> to
> invalidate it. To solve this, kpartx should read the devices using
> direct io.
> 
> One issue with how this code has been updated is that the original
> code
> for getblock() always read 1024 bytes. The new code reads a logical
> sector size chunk of the device, and returns a pointer to the 512
> byte
> sector that the caller asked for, within that (possibly larger)
> chunk.
> This means that if the logical sector size is 512, then the code is
> now
> only reading 512 bytes.  Looking through the code for the various
> partition types, I can't see a case where more than 512 bytes is
> needed
> and getblock() is used.  If anyone has a reason why this code should
> be
> reading 1024 bytes at minmum, I can certainly change this.  But when
> I
> looked, I couldn't find a case where reading 512 bytes would cause a
> problem.
> 
> Signed-off-by: Benjamin Marzinski <bmarz...@redhat.com>

The value 1024 comes from first partx in util-linux, where Christophe 
pulled the first kpartx from: 
http://ftp.be.debian.org/pub/linux/utils/util-linux/v2.10/util-linux-2.10m.tar.gz
 
I, too, see no reason for reading more than a single block.

Two nits below. Here's one more, I'd have preferred two separate
patches for these two different issues. But never mind.

Regards,
Martin

> ---
>  kpartx/dasd.c   |  7 ++++---
>  kpartx/gpt.c    | 22 ++++++++++------------
>  kpartx/kpartx.c | 46 +++++++++++++++++++++++++++++++++++-----------
>  kpartx/kpartx.h |  2 ++
>  4 files changed, 51 insertions(+), 26 deletions(-)
> 
> diff --git a/kpartx/dasd.c b/kpartx/dasd.c
> index 14b9d3aa..f0398645 100644
> --- a/kpartx/dasd.c
> +++ b/kpartx/dasd.c
> @@ -22,6 +22,7 @@
>   * along with this program.  If not, see <
> http://www.gnu.org/licenses/>;.
>   */
>  
> +#define _GNU_SOURCE
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <unistd.h>
> @@ -117,13 +118,13 @@ read_dasd_pt(int fd, __attribute__((unused))
> struct slice all,
>  
>               sprintf(pathname, "/dev/.kpartx-node-%u-%u",
>                       (unsigned int)major(dev), (unsigned
> int)minor(dev));
> -             if ((fd_dasd = open(pathname, O_RDONLY)) == -1) {
> +             if ((fd_dasd = open(pathname, O_RDONLY | O_DIRECT)) ==
> -1) {
>                       /* Devicenode does not exist. Try to create one
> */
>                       if (mknod(pathname, 0600 | S_IFBLK, dev) == -1)
> {
>                               /* Couldn't create a device node */
>                               return -1;
>                       }
> -                     fd_dasd = open(pathname, O_RDONLY);
> +                     fd_dasd = open(pathname, O_RDONLY | O_DIRECT);
>                       /*
>                        * The file will vanish when the last process
> (we)
>                        * has ceased to access it.
> @@ -175,7 +176,7 @@ read_dasd_pt(int fd, __attribute__((unused))
> struct slice all,
>        * Get volume label, extract name and type.
>        */
>  
> -     if (!(data = (unsigned char *)malloc(blocksize)))
> +     if (aligned_malloc((void **)&data, blocksize, NULL))
>               goto out;
>  
>  
> diff --git a/kpartx/gpt.c b/kpartx/gpt.c
> index 785b34ea..f7fefb70 100644
> --- a/kpartx/gpt.c
> +++ b/kpartx/gpt.c
> @@ -243,8 +243,7 @@ alloc_read_gpt_entries(int fd, gpt_header * gpt)
>  
>       if (!count) return NULL;
>  
> -     pte = (gpt_entry *)malloc(count);
> -     if (!pte)
> +     if (aligned_malloc((void **)&pte, get_sector_size(fd), &count))
>               return NULL;
>       memset(pte, 0, count);
>  
> @@ -269,12 +268,11 @@ static gpt_header *
>  alloc_read_gpt_header(int fd, uint64_t lba)
>  {
>       gpt_header *gpt;
> -     gpt = (gpt_header *)
> -         malloc(sizeof (gpt_header));
> -     if (!gpt)
> +     size_t size = sizeof (gpt_header);
> +     if (aligned_malloc((void **)&gpt, get_sector_size(fd), &size))
>               return NULL;
> -     memset(gpt, 0, sizeof (*gpt));
> -     if (!read_lba(fd, lba, gpt, sizeof (gpt_header))) {
> +     memset(gpt, 0, size);
> +     if (!read_lba(fd, lba, gpt, size)) {
>               free(gpt);
>               return NULL;
>       }
> @@ -498,6 +496,7 @@ find_valid_gpt(int fd, gpt_header ** gpt,
> gpt_entry ** ptes)
>       gpt_header *pgpt = NULL, *agpt = NULL;
>       gpt_entry *pptes = NULL, *aptes = NULL;
>       legacy_mbr *legacymbr = NULL;
> +     size_t size = sizeof(legacy_mbr);
>       uint64_t lastlba;
>       if (!gpt || !ptes)
>               return 0;
> @@ -526,11 +525,10 @@ find_valid_gpt(int fd, gpt_header ** gpt,
> gpt_entry ** ptes)
>       }
>  
>       /* This will be added to the EFI Spec. per Intel after v1.02.
> */
> -     legacymbr = malloc(sizeof (*legacymbr));
> -     if (legacymbr) {
> -             memset(legacymbr, 0, sizeof (*legacymbr));
> -             read_lba(fd, 0, (uint8_t *) legacymbr,
> -                      sizeof (*legacymbr));
> +     if (aligned_malloc((void **)&legacymbr, get_sector_size(fd),
> +                        &size) == 0) {
> +             memset(legacymbr, 0, size);
> +             read_lba(fd, 0, (uint8_t *) legacymbr, size);
>               good_pmbr = is_pmbr_valid(legacymbr);
>               free(legacymbr);
>               legacymbr=NULL;
> diff --git a/kpartx/kpartx.c b/kpartx/kpartx.c
> index d3620c5c..4716dd4d 100644
> --- a/kpartx/kpartx.c
> +++ b/kpartx/kpartx.c
> @@ -19,6 +19,7 @@
>   * cva, 2002-10-26
>   */
>  
> +#define _GNU_SOURCE
>  #include <stdio.h>
>  #include <fcntl.h>
>  #include <errno.h>
> @@ -41,7 +42,6 @@
>  
>  #define SIZE(a) (sizeof(a)/sizeof((a)[0]))
>  
> -#define READ_SIZE    1024
>  #define MAXTYPES     64
>  #define MAXSLICES    256
>  #define DM_TARGET    "linear"
> @@ -388,7 +388,7 @@ main(int argc, char **argv){
>               set_delimiter(mapname, delim);
>       }
>  
> -     fd = open(device, O_RDONLY);
> +     fd = open(device, O_RDONLY | O_DIRECT);
>  
>       if (fd == -1) {
>               perror(device);
> @@ -690,9 +690,9 @@ xmalloc (size_t size) {
>   */
>  
>  static int
> -sseek(int fd, unsigned int secnr) {
> +sseek(int fd, unsigned int secnr, int secsz) {
>       off64_t in, out;
> -     in = ((off64_t) secnr << 9);
> +     in = ((off64_t) secnr * secsz);
>       out = 1;
>  
>       if ((out = lseek64(fd, in, SEEK_SET)) != in)
> @@ -703,6 +703,21 @@ sseek(int fd, unsigned int secnr) {
>       return 0;
>  }
>  
> +int
> +aligned_malloc(void **mem_p, size_t align, size_t *size_p)
> +{
> +     size_t pgsize = getpagesize();

Nit: I'd use a static variable here and call getpagesize() only once.

> +     size_t size = pgsize;
> +     if (!mem_p || !align || (size_p && !*size_p))
> +             return EINVAL;
> +
> +     if (size_p)
> +             *size_p = size = ((*size_p + align - 1) / align) *
> align;

It would be cleaner to set *size_p only after successful return from
posix_memalign().

> +
> +     return posix_memalign(mem_p, pgsize, size);
> +}




> +
> +/* always in sector size blocks */
>  static
>  struct block {
>       unsigned int secnr;
> @@ -710,30 +725,39 @@ struct block {
>       struct block *next;
>  } *blockhead;
>  
> +/* blknr is always in 512 byte blocks */
>  char *
> -getblock (int fd, unsigned int secnr) {
> +getblock (int fd, unsigned int blknr) {
> +     unsigned int secsz = get_sector_size(fd);
> +     unsigned int blks_per_sec = secsz / 512;
> +     unsigned int secnr = blknr / blks_per_sec;
> +     unsigned int blk_off = (blknr % blks_per_sec) * 512;
>       struct block *bp;
>  
>       for (bp = blockhead; bp; bp = bp->next)
>  
>               if (bp->secnr == secnr)
> -                     return bp->block;
> +                     return bp->block + blk_off;
>  
> -     if (sseek(fd, secnr))
> +     if (sseek(fd, secnr, secsz))
>               return NULL;
>  
>       bp = xmalloc(sizeof(struct block));
>       bp->secnr = secnr;
>       bp->next = blockhead;
>       blockhead = bp;
> -     bp->block = (char *) xmalloc(READ_SIZE);
> +     if (aligned_malloc((void **)&bp->block, secsz, NULL)) {
> +             fprintf(stderr, "aligned_malloc failed\n");
> +             exit(1);
> +     }
>  
> -     if (read(fd, bp->block, READ_SIZE) != READ_SIZE) {
> +     if (read(fd, bp->block, secsz) != secsz) {
>               fprintf(stderr, "read error, sector %d\n", secnr);
> -             bp->block = NULL;
> +             blockhead = bp->next;
> +             return NULL;
>       }
>  
> -     return bp->block;
> +     return bp->block + blk_off;
>  }
>  
>  int
> diff --git a/kpartx/kpartx.h b/kpartx/kpartx.h
> index 67edeb82..727632c1 100644
> --- a/kpartx/kpartx.h
> +++ b/kpartx/kpartx.h
> @@ -1,6 +1,7 @@
>  #ifndef _KPARTX_H
>  #define _KPARTX_H
>  
> +#include <stddef.h>
>  #include <stdint.h>
>  #include <sys/ioctl.h>
>  
> @@ -61,6 +62,7 @@ extern ptreader read_mac_pt;
>  extern ptreader read_sun_pt;
>  extern ptreader read_ps3_pt;
>  
> +int aligned_malloc(void **mem_p, size_t align, size_t *size_p);
>  char *getblock(int fd, unsigned int secnr);
>  
>  static inline unsigned int

-- 
Dr. Martin Wilck <mwi...@suse.com>, Tel. +49 (0)911 74053 2107
SUSE  Software Solutions Germany GmbH
HRB 36809, AG Nürnberg GF: Felix
Imendörffer



--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Reply via email to