[PATCH 9/10] cxgb3 - register definitions

2006-12-21 Thread Divy Le Ray
From: Divy Le Ray <[EMAIL PROTECTED]>

This patch implements the registers definitions for the
Chelsio network adapter's driver.

Signed-off-by: Divy Le Ray <[EMAIL PROTECTED]>
---

 drivers/net/cxgb3/regs.h | 2195 ++
 1 files changed, 2195 insertions(+), 0 deletions(-)

diff --git a/drivers/net/cxgb3/regs.h b/drivers/net/cxgb3/regs.h
new file mode 100755
index 000..b56c5f5
--- /dev/null
+++ b/drivers/net/cxgb3/regs.h
@@ -0,0 +1,2195 @@
+#define A_SG_CONTROL 0x0
+
+#define S_DROPPKT20
+#define V_DROPPKT(x) ((x) << S_DROPPKT)
+#define F_DROPPKTV_DROPPKT(1U)
+
+#define S_EGRGENCTRL19
+#define V_EGRGENCTRL(x) ((x) << S_EGRGENCTRL)
+#define F_EGRGENCTRLV_EGRGENCTRL(1U)
+
+#define S_USERSPACESIZE14
+#define M_USERSPACESIZE0x1f
+#define V_USERSPACESIZE(x) ((x) << S_USERSPACESIZE)
+
+#define S_HOSTPAGESIZE11
+#define M_HOSTPAGESIZE0x7
+#define V_HOSTPAGESIZE(x) ((x) << S_HOSTPAGESIZE)
+
+#define S_FLMODE9
+#define V_FLMODE(x) ((x) << S_FLMODE)
+#define F_FLMODEV_FLMODE(1U)
+
+#define S_PKTSHIFT6
+#define M_PKTSHIFT0x7
+#define V_PKTSHIFT(x) ((x) << S_PKTSHIFT)
+
+#define S_ONEINTMULTQ5
+#define V_ONEINTMULTQ(x) ((x) << S_ONEINTMULTQ)
+#define F_ONEINTMULTQV_ONEINTMULTQ(1U)
+
+#define S_BIGENDIANINGRESS2
+#define V_BIGENDIANINGRESS(x) ((x) << S_BIGENDIANINGRESS)
+#define F_BIGENDIANINGRESSV_BIGENDIANINGRESS(1U)
+
+#define S_ISCSICOALESCING1
+#define V_ISCSICOALESCING(x) ((x) << S_ISCSICOALESCING)
+#define F_ISCSICOALESCINGV_ISCSICOALESCING(1U)
+
+#define S_GLOBALENABLE0
+#define V_GLOBALENABLE(x) ((x) << S_GLOBALENABLE)
+#define F_GLOBALENABLEV_GLOBALENABLE(1U)
+
+#define S_AVOIDCQOVFL24
+#define V_AVOIDCQOVFL(x) ((x) << S_AVOIDCQOVFL)
+#define F_AVOIDCQOVFLV_AVOIDCQOVFL(1U)
+
+#define S_OPTONEINTMULTQ23
+#define V_OPTONEINTMULTQ(x) ((x) << S_OPTONEINTMULTQ)
+#define F_OPTONEINTMULTQV_OPTONEINTMULTQ(1U)
+
+#define S_CQCRDTCTRL22
+#define V_CQCRDTCTRL(x) ((x) << S_CQCRDTCTRL)
+#define F_CQCRDTCTRLV_CQCRDTCTRL(1U)
+
+#define A_SG_KDOORBELL 0x4
+
+#define S_SELEGRCNTX31
+#define V_SELEGRCNTX(x) ((x) << S_SELEGRCNTX)
+#define F_SELEGRCNTXV_SELEGRCNTX(1U)
+
+#define S_EGRCNTX0
+#define M_EGRCNTX0x
+#define V_EGRCNTX(x) ((x) << S_EGRCNTX)
+
+#define A_SG_GTS 0x8
+
+#define S_RSPQ29
+#define M_RSPQ0x7
+#define V_RSPQ(x) ((x) << S_RSPQ)
+#define G_RSPQ(x) (((x) >> S_RSPQ) & M_RSPQ)
+
+#define S_NEWTIMER16
+#define M_NEWTIMER0x1fff
+#define V_NEWTIMER(x) ((x) << S_NEWTIMER)
+
+#define S_NEWINDEX0
+#define M_NEWINDEX0x
+#define V_NEWINDEX(x) ((x) << S_NEWINDEX)
+
+#define A_SG_CONTEXT_CMD 0xc
+
+#define S_CONTEXT_CMD_OPCODE28
+#define M_CONTEXT_CMD_OPCODE0xf
+#define V_CONTEXT_CMD_OPCODE(x) ((x) << S_CONTEXT_CMD_OPCODE)
+
+#define S_CONTEXT_CMD_BUSY27
+#define V_CONTEXT_CMD_BUSY(x) ((x) << S_CONTEXT_CMD_BUSY)
+#define F_CONTEXT_CMD_BUSYV_CONTEXT_CMD_BUSY(1U)
+
+#define S_CQ_CREDIT20
+
+#define M_CQ_CREDIT0x7f
+
+#define V_CQ_CREDIT(x) ((x) << S_CQ_CREDIT)
+
+#define G_CQ_CREDIT(x) (((x) >> S_CQ_CREDIT) & M_CQ_CREDIT)
+
+#define S_CQ19
+
+#define V_CQ(x) ((x) << S_CQ)
+#define F_CQV_CQ(1U)
+
+#define S_RESPONSEQ18
+#define V_RESPONSEQ(x) ((x) << S_RESPONSEQ)
+#define F_RESPONSEQV_RESPONSEQ(1U)
+
+#define S_EGRESS17
+#define V_EGRESS(x) ((x) << S_EGRESS)
+#define F_EGRESSV_EGRESS(1U)
+
+#define S_FREELIST16
+#define V_FREELIST(x) ((x) << S_FREELIST)
+#define F_FREELISTV_FREELIST(1U)
+
+#define S_CONTEXT0
+#define M_CONTEXT0x
+#define V_CONTEXT(x) ((x) << S_CONTEXT)
+
+#define G_CONTEXT(x) (((x) >> S_CONTEXT) & M_CONTEXT)
+
+#define A_SG_CONTEXT_DATA0 0x10
+
+#define A_SG_CONTEXT_DATA1 0x14
+
+#define A_SG_CONTEXT_DATA2 0x18
+
+#define A_SG_CONTEXT_DATA3 0x1c
+
+#define A_SG_CONTEXT_MASK0 0x20
+
+#define A_SG_CONTEXT_MASK1 0x24
+
+#define A_SG_CONTEXT_MASK2 0x28
+
+#define A_SG_CONTEXT_MASK3 0x2c
+
+#define A_SG_RSPQ_CREDIT_RETURN 0x30
+
+#define S_CREDITS0
+#define M_CREDITS0x
+#define V_CREDITS(x) ((x) << S_CREDITS)
+
+#define A_SG_DATA_INTR 0x34
+
+#define S_ERRINTR31
+#define V_ERRINTR(x) ((x) << S_ERRINTR)
+#define F_ERRINTRV_ERRINTR(1U)
+
+#define A_SG_HI_DRB_HI_THRSH 0x38
+
+#define A_SG_HI_DRB_LO_THRSH 0x3c
+
+#define A_SG_LO_DRB_HI_THRSH 0x40
+
+#define A_SG_LO_DRB_LO_THRSH 0x44
+
+#define A_SG_RSPQ_FL_STATUS 0x4c
+
+#define S_RSPQ0DISABLED8
+
+#define A_SG_EGR_RCQ_DRB_THRSH 0x54
+
+#define S_HIRCQDRBTHRSH16
+#define M_HIRCQDRBTHRSH0x7ff
+#define V_HIRCQDRBTHRSH(x) ((x) << S_HIRCQDRBTHRSH)
+
+#define S_LORCQDRBTHRSH0
+#define M_LORCQDRBTHRSH0x7ff
+#define V_LORCQDRBTHRSH(x) ((x) << S_LORCQDRBTHRSH)
+
+#define A_SG_EGR_CNTX_BADDR 0x58
+
+#define A_SG_INT_CAUSE 0x5c
+
+#define S_RSPQDISABLED3
+#define V_RSPQDISABLED(x) ((x) << S_RSPQDISABLED)
+#define F_RSPQDISABLEDV_RSPQDISABLED(1U)
+
+#define 

[PATCH 8/10] cxgb3 - offload capabilities

2006-12-21 Thread Divy Le Ray
From: Divy Le Ray <[EMAIL PROTECTED]>

This patch implements the offload capabilities of the
Chelsio network adapter's driver.

Signed-off-by: Divy Le Ray <[EMAIL PROTECTED]>
---

 drivers/net/cxgb3/cxgb3_offload.c | 1222 +
 drivers/net/cxgb3/l2t.c   |  450 ++
 2 files changed, 1672 insertions(+), 0 deletions(-)

diff --git a/drivers/net/cxgb3/cxgb3_offload.c 
b/drivers/net/cxgb3/cxgb3_offload.c
new file mode 100755
index 000..3abd4d2
--- /dev/null
+++ b/drivers/net/cxgb3/cxgb3_offload.c
@@ -0,0 +1,1222 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  - Redistributions of source code must retain the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer.
+ *
+ *  - Redistributions in binary form must reproduce the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer in the documentation and/or other materials
+ *provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "common.h"
+#include "regs.h"
+#include "cxgb3_ioctl.h"
+#include "cxgb3_ctl_defs.h"
+#include "cxgb3_defs.h"
+#include "l2t.h"
+#include "firmware_exports.h"
+#include "cxgb3_offload.h"
+
+static LIST_HEAD(client_list);
+static LIST_HEAD(ofld_dev_list);
+static DEFINE_MUTEX(cxgb3_db_lock);
+
+static DEFINE_RWLOCK(adapter_list_lock);
+static LIST_HEAD(adapter_list);
+
+static const unsigned int MAX_ATIDS = 64 * 1024;
+static const unsigned int ATID_BASE = 0x10;
+
+static inline int offload_activated(struct t3cdev *tdev)
+{
+   const struct adapter *adapter = tdev2adap(tdev);
+
+   return (test_bit(OFFLOAD_DEVMAP_BIT, >open_device_map));
+}
+
+/**
+ * cxgb3_register_client - register an offload client
+ * @client: the client
+ *
+ * Add the client to the client list,
+ * and call backs the client for each activated offload device
+ */
+void cxgb3_register_client(struct cxgb3_client *client)
+{
+   struct t3cdev *tdev;
+
+   mutex_lock(_db_lock);
+   list_add_tail(>client_list, _list);
+
+   if (client->add) {
+   list_for_each_entry(tdev, _dev_list, ofld_dev_list) {
+   if (offload_activated(tdev))
+   client->add(tdev);
+   }
+   }
+   mutex_unlock(_db_lock);
+}
+
+EXPORT_SYMBOL(cxgb3_register_client);
+
+/**
+ * cxgb3_unregister_client - unregister an offload client
+ * @client: the client
+ *
+ * Remove the client to the client list,
+ * and call backs the client for each activated offload device.
+ */
+void cxgb3_unregister_client(struct cxgb3_client *client)
+{
+   struct t3cdev *tdev;
+
+   mutex_lock(_db_lock);
+   list_del(>client_list);
+
+   if (client->remove) {
+   list_for_each_entry(tdev, _dev_list, ofld_dev_list) {
+   if (offload_activated(tdev))
+   client->remove(tdev);
+   }
+   }
+   mutex_unlock(_db_lock);
+}
+
+EXPORT_SYMBOL(cxgb3_unregister_client);
+
+/**
+ * cxgb3_add_clients - activate registered clients for an offload device
+ * @tdev: the offload device
+ *
+ * Call backs all registered clients once a offload device is activated
+ */
+void cxgb3_add_clients(struct t3cdev *tdev)
+{
+   struct cxgb3_client *client;
+
+   mutex_lock(_db_lock);
+   list_for_each_entry(client, _list, client_list) {
+   if (client->add)
+   client->add(tdev);
+   }
+   mutex_unlock(_db_lock);
+}
+
+/**
+ * cxgb3_remove_clients - deactivates registered clients
+ *for an offload device
+ * @tdev: the offload device
+ *
+ * Call backs all registered clients once a offload device is deactivated
+ */
+void 

[PATCH 7/10] cxgb3 - offload header files

2006-12-21 Thread Divy Le Ray
From: Divy Le Ray <[EMAIL PROTECTED]>

This patch implements the offload operations header files
for the Chelsio T3 network adapter's driver.

Signed-off-by: Divy Le Ray <[EMAIL PROTECTED]>
---

 drivers/net/cxgb3/cxgb3_ctl_defs.h |  142 
 drivers/net/cxgb3/cxgb3_defs.h |   99 ++
 drivers/net/cxgb3/cxgb3_offload.h  |  193 +
 drivers/net/cxgb3/l2t.h|  143 
 drivers/net/cxgb3/t3_cpl.h | 1426 
 drivers/net/cxgb3/t3cdev.h |   72 ++
 6 files changed, 2075 insertions(+), 0 deletions(-)

diff --git a/drivers/net/cxgb3/cxgb3_ctl_defs.h 
b/drivers/net/cxgb3/cxgb3_ctl_defs.h
new file mode 100755
index 000..0fdc365
--- /dev/null
+++ b/drivers/net/cxgb3/cxgb3_ctl_defs.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2003-2006 Chelsio Communications.  All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the LICENSE file included in this
+ * release for licensing terms and conditions.
+ */
+
+#ifndef _CXGB3_OFFLOAD_CTL_DEFS_H
+#define _CXGB3_OFFLOAD_CTL_DEFS_H
+
+enum {
+   GET_MAX_OUTSTANDING_WR,
+   GET_TX_MAX_CHUNK,
+   GET_TID_RANGE,
+   GET_STID_RANGE,
+   GET_RTBL_RANGE,
+   GET_L2T_CAPACITY,
+   GET_MTUS,
+   GET_WR_LEN,
+   GET_IFF_FROM_MAC,
+   GET_DDP_PARAMS,
+   GET_PORTS,
+
+   ULP_ISCSI_GET_PARAMS,
+   ULP_ISCSI_SET_PARAMS,
+
+   RDMA_GET_PARAMS,
+   RDMA_CQ_OP,
+   RDMA_CQ_SETUP,
+   RDMA_CQ_DISABLE,
+   RDMA_CTRL_QP_SETUP,
+   RDMA_GET_MEM,
+};
+
+/*
+ * Structure used to describe a TID range.  Valid TIDs are [base, base+num).
+ */
+struct tid_range {
+   unsigned int base;  /* first TID */
+   unsigned int num;   /* number of TIDs in range */
+};
+
+/*
+ * Structure used to request the size and contents of the MTU table.
+ */
+struct mtutab {
+   unsigned int size;  /* # of entries in the MTU table */
+   const unsigned short *mtus; /* the MTU table values */
+};
+
+struct net_device;
+
+/*
+ * Structure used to request the adapter net_device owning a given MAC address.
+ */
+struct iff_mac {
+   struct net_device *dev; /* the net_device */
+   const unsigned char *mac_addr;  /* MAC address to lookup */
+   u16 vlan_tag;
+};
+
+struct pci_dev;
+
+/*
+ * Structure used to request the TCP DDP parameters.
+ */
+struct ddp_params {
+   unsigned int llimit;/* TDDP region start address */
+   unsigned int ulimit;/* TDDP region end address */
+   unsigned int tag_mask;  /* TDDP tag mask */
+   struct pci_dev *pdev;
+};
+
+struct adap_ports {
+   unsigned int nports;/* number of ports on this adapter */
+   struct net_device *lldevs[2];
+};
+
+/*
+ * Structure used to return information to the iscsi layer.
+ */
+struct ulp_iscsi_info {
+   unsigned int offset;
+   unsigned int llimit;
+   unsigned int ulimit;
+   unsigned int tagmask;
+   unsigned int pgsz3;
+   unsigned int pgsz2;
+   unsigned int pgsz1;
+   unsigned int pgsz0;
+   unsigned int max_rxsz;
+   unsigned int max_txsz;
+   struct pci_dev *pdev;
+};
+
+/*
+ * Structure used to return information to the RDMA layer.
+ */
+struct rdma_info {
+   unsigned int tpt_base;  /* TPT base address */
+   unsigned int tpt_top;   /* TPT last entry address */
+   unsigned int pbl_base;  /* PBL base address */
+   unsigned int pbl_top;   /* PBL last entry address */
+   unsigned int rqt_base;  /* RQT base address */
+   unsigned int rqt_top;   /* RQT last entry address */
+   unsigned int udbell_len;/* user doorbell region length */
+   unsigned long udbell_physbase;  /* user doorbell physical start addr */
+   void __iomem *kdb_addr; /* kernel doorbell register address */
+   struct pci_dev *pdev;   /* associated PCI device */
+};
+
+/*
+ * Structure used to request an operation on an RDMA completion queue.
+ */
+struct rdma_cq_op {
+   unsigned int id;
+   unsigned int op;
+   unsigned int credits;
+};
+
+/*
+ * Structure used to setup RDMA completion queues.
+ */
+struct rdma_cq_setup {
+   unsigned int id;
+   unsigned long long base_addr;
+   unsigned int size;
+   unsigned int credits;
+   unsigned int credit_thres;
+   unsigned int ovfl_mode;
+};
+
+/*
+ * Structure used to setup the RDMA control egress context.
+ */
+struct rdma_ctrlqp_setup {
+   unsigned long long base_addr;
+   unsigned int size;
+};
+#endif /* _CXGB3_OFFLOAD_CTL_DEFS_H */
diff --git a/drivers/net/cxgb3/cxgb3_defs.h b/drivers/net/cxgb3/cxgb3_defs.h
new file mode 100755
index 000..82344c2
--- /dev/null
+++ b/drivers/net/cxgb3/cxgb3_defs.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 

[PATCH 10/10] cxgb3 - build files and versioning

2006-12-21 Thread Divy Le Ray
From: Divy Le Ray <[EMAIL PROTECTED]>

This patch implements build files and versioning for the 
Chelsio T3 network adapter's driver.

Signed-off-by: Divy Le Ray <[EMAIL PROTECTED]>
---

 drivers/net/Kconfig |   18 ++
 drivers/net/Makefile|1 +
 drivers/net/cxgb3/Makefile  |8 
 drivers/net/cxgb3/version.h |   24 
 4 files changed, 51 insertions(+), 0 deletions(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 8aa8dd0..f8742f1 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2392,6 +2392,24 @@ config CHELSIO_T1_NAPI
  NAPI is a driver API designed to reduce CPU and interrupt load
  when the driver is receiving lots of packets from the card.
 
+config CHELSIO_T3
+tristate "Chelsio Communications T3 10Gb Ethernet support"
+depends on PCI
+help
+  This driver supports Chelsio T3-based gigabit and 10Gb Ethernet
+  adapters.
+
+  For general information about Chelsio and our products, visit
+  our website at .
+
+  For customer support, please visit our customer support page at
+  .
+
+  Please send feedback to <[EMAIL PROTECTED]>.
+
+  To compile this driver as a module, choose M here: the module
+  will be called cxgb3.
+
 config EHEA
tristate "eHEA Ethernet support"
depends on IBMEBUS
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 4c0d4e5..5c66643 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_E1000) += e1000/
 obj-$(CONFIG_IBM_EMAC) += ibm_emac/
 obj-$(CONFIG_IXGB) += ixgb/
 obj-$(CONFIG_CHELSIO_T1) += chelsio/
+obj-$(CONFIG_CHELSIO_T3) += cxgb3/
 obj-$(CONFIG_EHEA) += ehea/
 obj-$(CONFIG_BONDING) += bonding/
 obj-$(CONFIG_GIANFAR) += gianfar_driver.o
diff --git a/drivers/net/cxgb3/Makefile b/drivers/net/cxgb3/Makefile
new file mode 100755
index 000..3434679
--- /dev/null
+++ b/drivers/net/cxgb3/Makefile
@@ -0,0 +1,8 @@
+#
+# Chelsio T3 driver
+#
+
+obj-$(CONFIG_CHELSIO_T3) += cxgb3.o
+
+cxgb3-objs := cxgb3_main.o ael1002.o vsc8211.o t3_hw.o mc5.o \
+ xgmac.o sge.o l2t.o cxgb3_offload.o
diff --git a/drivers/net/cxgb3/version.h b/drivers/net/cxgb3/version.h
new file mode 100755
index 000..1413ea3
--- /dev/null
+++ b/drivers/net/cxgb3/version.h
@@ -0,0 +1,24 @@
+/*
+ *   *
+ * File: *
+ *  version.h*
+ *   *
+ * Description:  *
+ *  Chelsio driver version defines.  *
+ *   *
+ * Copyright (c) 2003 - 2006 Chelsio Communications, Inc.*
+ * All rights reserved.  *
+ *   *
+ * Maintainers: [EMAIL PROTECTED]  *
+ *   *
+ * http://www.chelsio.com*
+ *   *
+ /
+/* $Date: 2006/10/31 18:57:51 $ $RCSfile: version.h,v $ $Revision: 1.3 $ */
+#ifndef __CHELSIO_VERSION_H
+#define __CHELSIO_VERSION_H
+#define DRV_DESC "Chelsio T3 Network Driver"
+#define DRV_NAME "cxgb3"
+/* Driver version */
+#define DRV_VERSION "1.0"
+#endif /* __CHELSIO_VERSION_H */
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 6/10] cxgb3 - on board memory, MAC and PHY

2006-12-21 Thread Divy Le Ray
From: Divy Le Ray <[EMAIL PROTECTED]>

This patch implements on board memory, MAC and PHY management
for the Chelsio T3 network adapter's driver.

Signed-off-by: Divy Le Ray <[EMAIL PROTECTED]>
---

 drivers/net/cxgb3/ael1002.c |  231 ++
 drivers/net/cxgb3/mc5.c |  453 +++
 drivers/net/cxgb3/vsc8211.c |  208 
 drivers/net/cxgb3/xgmac.c   |  389 +
 4 files changed, 1281 insertions(+), 0 deletions(-)

diff --git a/drivers/net/cxgb3/ael1002.c b/drivers/net/cxgb3/ael1002.c
new file mode 100755
index 000..93a90d8
--- /dev/null
+++ b/drivers/net/cxgb3/ael1002.c
@@ -0,0 +1,231 @@
+/*
+ * This file is part of the Chelsio T3 Ethernet driver.
+ *
+ * Copyright (C) 2005-2006 Chelsio Communications.  All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the LICENSE file included in this
+ * release for licensing terms and conditions.
+ */
+
+#include "common.h"
+#include "regs.h"
+
+enum {
+   AEL100X_TX_DISABLE = 9,
+   AEL100X_TX_CONFIG1 = 0xc002,
+   AEL1002_PWR_DOWN_HI = 0xc011,
+   AEL1002_PWR_DOWN_LO = 0xc012,
+   AEL1002_XFI_EQL = 0xc015,
+   AEL1002_LB_EN = 0xc017,
+
+   LASI_CTRL = 0x9002,
+   LASI_STAT = 0x9005
+};
+
+static void ael100x_txon(struct cphy *phy)
+{
+   int tx_on_gpio = phy->addr == 0 ? F_GPIO7_OUT_VAL : F_GPIO2_OUT_VAL;
+
+   msleep(100);
+   t3_set_reg_field(phy->adapter, A_T3DBG_GPIO_EN, 0, tx_on_gpio);
+   msleep(30);
+}
+
+static int ael1002_power_down(struct cphy *phy, int enable)
+{
+   int err;
+
+   err = mdio_write(phy, MDIO_DEV_PMA_PMD, AEL100X_TX_DISABLE, !!enable);
+   if (!err)
+   err = t3_mdio_change_bits(phy, MDIO_DEV_PMA_PMD, MII_BMCR,
+ BMCR_PDOWN, enable ? BMCR_PDOWN : 0);
+   return err;
+}
+
+static int ael1002_reset(struct cphy *phy, int wait)
+{
+   int err;
+
+   if ((err = ael1002_power_down(phy, 0)) ||
+   (err = mdio_write(phy, MDIO_DEV_PMA_PMD, AEL100X_TX_CONFIG1, 1)) ||
+   (err = mdio_write(phy, MDIO_DEV_PMA_PMD, AEL1002_PWR_DOWN_HI, 0)) ||
+   (err = mdio_write(phy, MDIO_DEV_PMA_PMD, AEL1002_PWR_DOWN_LO, 0)) ||
+   (err = mdio_write(phy, MDIO_DEV_PMA_PMD, AEL1002_XFI_EQL, 0x18)) ||
+   (err = t3_mdio_change_bits(phy, MDIO_DEV_PMA_PMD, AEL1002_LB_EN,
+  0, 1 << 5)))
+   return err;
+   return 0;
+}
+
+static int ael1002_intr_noop(struct cphy *phy)
+{
+   return 0;
+}
+
+static int ael100x_get_link_status(struct cphy *phy, int *link_ok,
+  int *speed, int *duplex, int *fc)
+{
+   if (link_ok) {
+   unsigned int status;
+   int err = mdio_read(phy, MDIO_DEV_PMA_PMD, MII_BMSR, );
+
+   /*
+* BMSR_LSTATUS is latch-low, so if it is 0 we need to read it
+* once more to get the current link state.
+*/
+   if (!err && !(status & BMSR_LSTATUS))
+   err = mdio_read(phy, MDIO_DEV_PMA_PMD, MII_BMSR,
+   );
+   if (err)
+   return err;
+   *link_ok = !!(status & BMSR_LSTATUS);
+   }
+   if (speed)
+   *speed = SPEED_1;
+   if (duplex)
+   *duplex = DUPLEX_FULL;
+   return 0;
+}
+
+static struct cphy_ops ael1002_ops = {
+   .reset = ael1002_reset,
+   .intr_enable = ael1002_intr_noop,
+   .intr_disable = ael1002_intr_noop,
+   .intr_clear = ael1002_intr_noop,
+   .intr_handler = ael1002_intr_noop,
+   .get_link_status = ael100x_get_link_status,
+   .power_down = ael1002_power_down,
+};
+
+void t3_ael1002_phy_prep(struct cphy *phy, struct adapter *adapter,
+int phy_addr, const struct mdio_ops *mdio_ops)
+{
+   cphy_init(phy, adapter, phy_addr, _ops, mdio_ops);
+   ael100x_txon(phy);
+}
+
+static int ael1006_reset(struct cphy *phy, int wait)
+{
+   return t3_phy_reset(phy, MDIO_DEV_PMA_PMD, wait);
+}
+
+static int ael1006_intr_enable(struct cphy *phy)
+{
+   return mdio_write(phy, MDIO_DEV_PMA_PMD, LASI_CTRL, 1);
+}
+
+static int ael1006_intr_disable(struct cphy *phy)
+{
+   return mdio_write(phy, MDIO_DEV_PMA_PMD, LASI_CTRL, 0);
+}
+
+static int ael1006_intr_clear(struct cphy *phy)
+{
+   u32 val;
+
+   return mdio_read(phy, MDIO_DEV_PMA_PMD, LASI_STAT, );
+}
+
+static int ael1006_intr_handler(struct cphy *phy)
+{
+   unsigned int status;
+   int err = mdio_read(phy, MDIO_DEV_PMA_PMD, LASI_STAT, );
+
+   if (err)
+   return err;
+   return (status & 1) ? cphy_cause_link_change : 0;
+}
+
+static int 

[PATCH 4/10] cxgb3 - HW access routines - part 2

2006-12-21 Thread divy
From: Divy Le Ray <[EMAIL PROTECTED]>

This patch implements the HW access routines for the
Chelsio T3 network adapter's driver.
This patch is split. This is the second part.

Signed-off-by: Divy Le Ray <[EMAIL PROTECTED]>
---
+/**
+ * t3_sge_write_context - write an SGE context
+ * @adapter: the adapter
+ * @id: the context id
+ * @type: the context type
+ *
+ * Program an SGE context with the values already loaded in the
+ * CONTEXT_DATA? registers.
+ */
+static int t3_sge_write_context(struct adapter *adapter, unsigned int id,
+   unsigned int type)
+{
+   t3_write_reg(adapter, A_SG_CONTEXT_MASK0, 0x);
+   t3_write_reg(adapter, A_SG_CONTEXT_MASK1, 0x);
+   t3_write_reg(adapter, A_SG_CONTEXT_MASK2, 0x);
+   t3_write_reg(adapter, A_SG_CONTEXT_MASK3, 0x);
+   t3_write_reg(adapter, A_SG_CONTEXT_CMD,
+V_CONTEXT_CMD_OPCODE(1) | type | V_CONTEXT(id));
+   return t3_wait_op_done(adapter, A_SG_CONTEXT_CMD, F_CONTEXT_CMD_BUSY,
+  0, 5, 1);
+}
+
+/**
+ * t3_sge_init_ecntxt - initialize an SGE egress context
+ * @adapter: the adapter to configure
+ * @id: the context id
+ * @gts_enable: whether to enable GTS for the context
+ * @type: the egress context type
+ * @respq: associated response queue
+ * @base_addr: base address of queue
+ * @size: number of queue entries
+ * @token: uP token
+ * @gen: initial generation value for the context
+ * @cidx: consumer pointer
+ *
+ * Initialize an SGE egress context and make it ready for use.  If the
+ * platform allows concurrent context operations, the caller is
+ * responsible for appropriate locking.
+ */
+int t3_sge_init_ecntxt(struct adapter *adapter, unsigned int id, int 
gts_enable,
+  enum sge_context_type type, int respq, u64 base_addr,
+  unsigned int size, unsigned int token, int gen,
+  unsigned int cidx)
+{
+   unsigned int credits = type == SGE_CNTXT_OFLD ? 0 : FW_WR_NUM;
+
+   if (base_addr & 0xfff)  /* must be 4K aligned */
+   return -EINVAL;
+   if (t3_read_reg(adapter, A_SG_CONTEXT_CMD) & F_CONTEXT_CMD_BUSY)
+   return -EBUSY;
+
+   base_addr >>= 12;
+   t3_write_reg(adapter, A_SG_CONTEXT_DATA0, V_EC_INDEX(cidx) |
+V_EC_CREDITS(credits) | V_EC_GTS(gts_enable));
+   t3_write_reg(adapter, A_SG_CONTEXT_DATA1, V_EC_SIZE(size) |
+V_EC_BASE_LO(base_addr & 0x));
+   base_addr >>= 16;
+   t3_write_reg(adapter, A_SG_CONTEXT_DATA2, base_addr);
+   base_addr >>= 32;
+   t3_write_reg(adapter, A_SG_CONTEXT_DATA3,
+V_EC_BASE_HI(base_addr & 0xf) | V_EC_RESPQ(respq) |
+V_EC_TYPE(type) | V_EC_GEN(gen) | V_EC_UP_TOKEN(token) |
+F_EC_VALID);
+   return t3_sge_write_context(adapter, id, F_EGRESS);
+}
+
+/**
+ * t3_sge_init_flcntxt - initialize an SGE free-buffer list context
+ * @adapter: the adapter to configure
+ * @id: the context id
+ * @gts_enable: whether to enable GTS for the context
+ * @base_addr: base address of queue
+ * @size: number of queue entries
+ * @bsize: size of each buffer for this queue
+ * @cong_thres: threshold to signal congestion to upstream producers
+ * @gen: initial generation value for the context
+ * @cidx: consumer pointer
+ *
+ * Initialize an SGE free list context and make it ready for use.  The
+ * caller is responsible for ensuring only one context operation occurs
+ * at a time.
+ */
+int t3_sge_init_flcntxt(struct adapter *adapter, unsigned int id,
+   int gts_enable, u64 base_addr, unsigned int size,
+   unsigned int bsize, unsigned int cong_thres, int gen,
+   unsigned int cidx)
+{
+   if (base_addr & 0xfff)  /* must be 4K aligned */
+   return -EINVAL;
+   if (t3_read_reg(adapter, A_SG_CONTEXT_CMD) & F_CONTEXT_CMD_BUSY)
+   return -EBUSY;
+
+   base_addr >>= 12;
+   t3_write_reg(adapter, A_SG_CONTEXT_DATA0, base_addr);
+   base_addr >>= 32;
+   t3_write_reg(adapter, A_SG_CONTEXT_DATA1,
+V_FL_BASE_HI((u32) base_addr) |
+V_FL_INDEX_LO(cidx & M_FL_INDEX_LO));
+   t3_write_reg(adapter, A_SG_CONTEXT_DATA2, V_FL_SIZE(size) |
+V_FL_GEN(gen) | V_FL_INDEX_HI(cidx >> 12) |
+V_FL_ENTRY_SIZE_LO(bsize & M_FL_ENTRY_SIZE_LO));
+   t3_write_reg(adapter, A_SG_CONTEXT_DATA3,
+V_FL_ENTRY_SIZE_HI(bsize >> (32 - S_FL_ENTRY_SIZE_LO)) |
+V_FL_CONG_THRES(cong_thres) | V_FL_GTS(gts_enable));
+   return t3_sge_write_context(adapter, id, F_FREELIST);
+}
+
+/**
+ * t3_sge_init_rspcntxt - initialize an SGE response queue context
+ * 

[PATCH 3/10] cxgb3 - HW access routines - part 1

2006-12-21 Thread divy
From: Divy Le Ray <[EMAIL PROTECTED]>

This patch implements the HW access routines for the
Chelsio T3 network adapter's driver.
This patch is split. This is the first part.

Signed-off-by: Divy Le Ray <[EMAIL PROTECTED]>
---
 drivers/net/cxgb3/t3_hw.c | 3354 +
 1 files changed, 3354 insertions(+), 0 deletions(-)

diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c
new file mode 100755
index 000..a4e2e57
--- /dev/null
+++ b/drivers/net/cxgb3/t3_hw.c
@@ -0,0 +1,3354 @@
+/*
+ * This file is part of the Chelsio T3 Ethernet driver.
+ *
+ * Copyright (C) 2003-2006 Chelsio Communications.  All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the LICENSE file included in this
+ * release for licensing terms and conditions.
+ */
+
+#include "common.h"
+#include "regs.h"
+#include "sge_defs.h"
+#include "firmware_exports.h"
+
+ /**
+  *t3_wait_op_done_val - wait until an operation is completed
+  *@adapter: the adapter performing the operation
+  *@reg: the register to check for completion
+  *@mask: a single-bit field within @reg that indicates completion
+  *@polarity: the value of the field when the operation is completed
+  *@attempts: number of check iterations
+  *@delay: delay in usecs between iterations
+  *@valp: where to store the value of the register at completion time
+  *
+  *Wait until an operation is completed by checking a bit in a register
+  *up to @attempts times.  If @valp is not NULL the value of the register
+  *at the time it indicated completion is stored there.  Returns 0 if the
+  *operation completes and -EAGAIN otherwise.
+  */
+
+int t3_wait_op_done_val(struct adapter *adapter, int reg, u32 mask,
+   int polarity, int attempts, int delay, u32 *valp)
+{
+   while (1) {
+   u32 val = t3_read_reg(adapter, reg);
+
+   if (!!(val & mask) == polarity) {
+   if (valp)
+   *valp = val;
+   return 0;
+   }
+   if (--attempts == 0)
+   return -EAGAIN;
+   if (delay)
+   udelay(delay);
+   }
+}
+
+/**
+ * t3_write_regs - write a bunch of registers
+ * @adapter: the adapter to program
+ * @p: an array of register address/register value pairs
+ * @n: the number of address/value pairs
+ * @offset: register address offset
+ *
+ * Takes an array of register address/register value pairs and writes each
+ * value to the corresponding register.  Register addresses are adjusted
+ * by the supplied offset.
+ */
+void t3_write_regs(struct adapter *adapter, const struct addr_val_pair *p,
+  int n, unsigned int offset)
+{
+   while (n--) {
+   t3_write_reg(adapter, p->reg_addr + offset, p->val);
+   p++;
+   }
+}
+
+/**
+ * t3_set_reg_field - set a register field to a value
+ * @adapter: the adapter to program
+ * @addr: the register address
+ * @mask: specifies the portion of the register to modify
+ * @val: the new value for the register field
+ *
+ * Sets a register field specified by the supplied mask to the
+ * given value.
+ */
+void t3_set_reg_field(struct adapter *adapter, unsigned int addr, u32 mask,
+ u32 val)
+{
+   u32 v = t3_read_reg(adapter, addr) & ~mask;
+
+   t3_write_reg(adapter, addr, v | val);
+   t3_read_reg(adapter, addr); /* flush */
+}
+
+/**
+ * t3_read_indirect - read indirectly addressed registers
+ * @adap: the adapter
+ * @addr_reg: register holding the indirect address
+ * @data_reg: register holding the value of the indirect register
+ * @vals: where the read register values are stored
+ * @start_idx: index of first indirect register to read
+ * @nregs: how many indirect registers to read
+ *
+ * Reads registers that are accessed indirectly through an address/data
+ * register pair.
+ */
+void t3_read_indirect(struct adapter *adap, unsigned int addr_reg,
+ unsigned int data_reg, u32 *vals, unsigned int nregs,
+ unsigned int start_idx)
+{
+   while (nregs--) {
+   t3_write_reg(adap, addr_reg, start_idx);
+   *vals++ = t3_read_reg(adap, data_reg);
+   start_idx++;
+   }
+}
+
+/**
+ * t3_mc7_bd_read - read from MC7 through backdoor accesses
+ * @mc7: identifies MC7 to read from
+ * @start: index of first 64-bit word to read
+ * @n: number of 64-bit words to read
+ * @buf: where to store the read result
+ *
+ * Read n 64-bit words from MC7 starting at word start, using backdoor
+ * accesses.
+ */
+int t3_mc7_bd_read(struct mc7 

[PATCH 1/10] cxgb3 - main header files

2006-12-21 Thread Divy Le Ray
From: Divy Le Ray <[EMAIL PROTECTED]>

This patch implements the main header files of
the Chelsio T3 network driver.

Signed-off-by: Divy Le Ray <[EMAIL PROTECTED]>
---

 drivers/net/cxgb3/adapter.h  |  255 
 drivers/net/cxgb3/common.h   |  709 ++
 drivers/net/cxgb3/cxgb3_ioctl.h  |  165 
 drivers/net/cxgb3/firmware_exports.h |  144 +++
 4 files changed, 1273 insertions(+), 0 deletions(-)

diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
new file mode 100755
index 000..16643f6
--- /dev/null
+++ b/drivers/net/cxgb3/adapter.h
@@ -0,0 +1,255 @@
+/*
+ * This file is part of the Chelsio T3 Ethernet driver for Linux.
+ *
+ * Copyright (C) 2003-2006 Chelsio Communications.  All rights reserved.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the LICENSE file included in this
+ * release for licensing terms and conditions.
+ */
+
+/* This file should not be included directly.  Include common.h instead. */
+
+#ifndef __T3_ADAPTER_H__
+#define __T3_ADAPTER_H__
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "t3cdev.h"
+#include 
+#include 
+#include 
+
+typedef irqreturn_t(*intr_handler_t) (int, void *);
+
+struct vlan_group;
+
+struct port_info {
+   struct vlan_group *vlan_grp;
+   const struct port_type_info *port_type;
+   u8 port_id;
+   u8 rx_csum_offload;
+   u8 nqsets;
+   u8 first_qset;
+   struct cphy phy;
+   struct cmac mac;
+   struct link_config link_config;
+   struct net_device_stats netstats;
+   int activity;
+};
+
+enum { /* adapter flags */
+   FULL_INIT_DONE = (1 << 0),
+   USING_MSI = (1 << 1),
+   USING_MSIX = (1 << 2),
+};
+
+struct rx_desc;
+struct rx_sw_desc;
+
+struct sge_fl {/* SGE per free-buffer list state */
+   unsigned int buf_size;  /* size of each Rx buffer */
+   unsigned int credits;   /* # of available Rx buffers */
+   unsigned int size;  /* capacity of free list */
+   unsigned int cidx;  /* consumer index */
+   unsigned int pidx;  /* producer index */
+   unsigned int gen;   /* free list generation */
+   struct rx_desc *desc;   /* address of HW Rx descriptor ring */
+   struct rx_sw_desc *sdesc;   /* address of SW Rx descriptor ring */
+   dma_addr_t phys_addr;   /* physical address of HW ring start */
+   unsigned int cntxt_id;  /* SGE context id for the free list */
+   unsigned long empty;/* # of times queue ran out of buffers */
+};
+
+/*
+ * Bundle size for grouping offload RX packets for delivery to the stack.
+ * Don't make this too big as we do prefetch on each packet in a bundle.
+ */
+# define RX_BUNDLE_SIZE 8
+
+struct rsp_desc;
+
+struct sge_rspq {  /* state for an SGE response queue */
+   unsigned int credits;   /* # of pending response credits */
+   unsigned int size;  /* capacity of response queue */
+   unsigned int cidx;  /* consumer index */
+   unsigned int gen;   /* current generation bit */
+   unsigned int polling;   /* is the queue serviced through NAPI? */
+   unsigned int holdoff_tmr;   /* interrupt holdoff timer in 100ns */
+   unsigned int next_holdoff;  /* holdoff time for next interrupt */
+   struct rsp_desc *desc;  /* address of HW response ring */
+   dma_addr_t phys_addr;   /* physical address of the ring */
+   unsigned int cntxt_id;  /* SGE context id for the response q */
+   spinlock_t lock;/* guards response processing */
+   struct sk_buff *rx_head;/* offload packet receive queue head */
+   struct sk_buff *rx_tail;/* offload packet receive queue tail */
+
+   unsigned long offload_pkts;
+   unsigned long offload_bundles;
+   unsigned long eth_pkts; /* # of ethernet packets */
+   unsigned long pure_rsps;/* # of pure (non-data) responses */
+   unsigned long imm_data; /* responses with immediate data */
+   unsigned long rx_drops; /* # of packets dropped due to no mem */
+   unsigned long async_notif; /* # of asynchronous notification events */
+   unsigned long empty;/* # of times queue ran out of credits */
+   unsigned long nomem;/* # of responses deferred due to no mem */
+   unsigned long unhandled_irqs;   /* # of spurious intrs */
+};
+
+struct tx_desc;
+struct tx_sw_desc;
+
+struct sge_txq {   /* state for an SGE Tx queue */
+   unsigned long flags;/* HW DMA fetch status */
+   unsigned int in_use;/* # of in-use Tx descriptors */
+   unsigned int size;  /* # of descriptors */
+   unsigned int processed; /* total # of descs HW has processed */
+   unsigned int cleaned;   /* total # of 

[PATCH 0/10] cxgb3: Chelsio T3 1G/10G ethernet device driver

2006-12-21 Thread Divy Le Ray

Jeff,

I resubmit the patch supporting the latest Chelsio T3 adapter.
It incorporates Arjan's feedbacks:
- remove unnecessary ifdefs
- updates the pci ressource managment
- add flush after register write.

It is built against Linus'tree.

A corresponding monolithic patch is available at this URL:
http://service.chelsio.com/kernel.org/cxgb3.patch.bz2

This driver is required by the Chelsio T3 RDMA driver
which was updated on 12/20/2006.

Cheers,
Divy
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [dm-devel] Re: [RFC PATCH 2/8] rqbased-dm: add block layer hook

2006-12-21 Thread Mike Christie
Kiyoshi Ueda wrote:
> Hi Jens,
> 
> On Thu, 21 Dec 2006 08:49:47 +0100, Jens Axboe <[EMAIL PROTECTED]> wrote:
>>> The new hook is needed for error handling in dm.
>>> For example, when an error occurred on a request, dm-multipath
>>> wants to try another path before returning EIO to application.
>>> Without the new hook, at the point of end_that_request_last(),
>>> the bios are already finished with error and can't be retried.
>> Ok, I see what you are getting at. The current ->end_io() is called when
>> the request has fully completed, you want notification for each chunk
>> potentially completed.
>>
>> I think a better design here would be to use ->end_io() as the full
>> completion handler, similar to how bio->bi_end_io() works. A request
>> originating from __make_request() would set something ala:
>>
>> int fs_end_io(struct request *rq, int error, unsigned int nr_bytes)
>> {
>> if (!__end_that_request_first(rq, err, nr_bytes)) {
>> end_that_request_last(rq, error);
>> return 0;
>> }
>>
>> return 1;
>> }
>>
>> and normal io completion from a driver would use a helper:
>>
>> int blk_complete_io(struct request *rq, int error, unsigned int nr_bytes)
>> {
>> return rq->end_io(rq, error, nr_bytes);
>> }
>>
>> instead of calling the functions manually. That would allow you to get
>> notification right at the beginning and do what you need, without adding
>> a special hook for this.
> 
> I'm not confident about what you mean.
> Something like this?
>   - __make_request() sets fs_end_io() to req->end_io()
>   - The driver calls blk_complete_io()
>* if it succeeds, the request is done
>* if it fails, the request is not completed
>  and the driver needs retry or something
>   - Current users of req->end_io() have to update/rewrite thier end_io.
>   - Features like mine will set its own end_io.
> It checks error and decides whether calling fs_end_io() or not.
> 
> Depending on drivers, there are some functions called between
> __end_that_request_first() and end_that_request_last().
> For example:
>   - add_disk_randomness()
>   - blk_queue_end_tag()
>   - floppy_off()
> So they might prevent such generalization.
> 
> 
> In addition to the suggested approach, what do you think about
> adding a new flag to req->cmd_flags which lets the end_io() handler
> not to return bio to upper layer?
> It will be useful for multipathing and can be done even within
> the current __end_that_request_first().
> For example,
> 
> static int __end_that_request_first()
> {
>   .
>   error = 0;
>   if (end_io_error(uptodate))
>   error = !uptodate ? -EIO : uptodate;
>   .
>   if (error && (req->cmd_flags & "NEW_FLAG"))
>   return 0; /* Tell the driver to call end_that_request_last() */
> 
>   total_types = bio_nbytes = 0;
>   while ((bio = req->bio) != NULL) {
>   . /* process of finishing bios */
>   }
>   .
> }
> 

Who would call end_that_request_first with the new flag set? The scsi
layer or multipath layer?

The end_io_first callout was a hack around the lack of stacking and
because I was not yet sure how to handle medium errors.

We hooked into end_that_request_first, because for SCSI we can get a
medium error and the scsi layer will complete the first X bytes of a
request, then retry the leftover part itself. For this error we want to
update the request and bio fields so that when the request is resent by
the scsi layer, the scatterlist will get made with the updated values.

Maybe if FAILFAST is made to cover all errors then we would not need
this type of hack. Having multipath handle medium errors seems a little
silly though since the scsi layer knows better what to do there.

Another alternative is to do something similar to what bio based dm does
today. The bio/bvec update code and bio mapping and stacking has a
similar problem. In dm we have that bio record/details code which copies
some of the bio fields and dm-mpath also does not do partial retries.
For example, on a medium error where part of a bio is successful but the
end part fails because of a transport error and needs to be retried this
will result in the entire bio being redriven.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/10] cxgb3 - main source file

2006-12-21 Thread Divy Le Ray

Arjan van de Ven wrote:

They are used to parameter the HW:
register access,


ethtool supports that, so shouldn't be an ioctl for sure

 configuration of queue sets, on board memory 
configuration,


I'm sure ethtool can do that too


firmware load, etc ...


and for this we have request_firmware() interface. 


adding device specific ioctl that duplicate functionality that exists or
should exist in a generic way isn't really acceptable for 2.6 kernels
anymore


Arjan,

The driver implements all the ethtool operations that apply to it.
The GETREG ioctl is left for debug purposes:
get_regs doesn't return clear-on-read registers while GETREG does.

Using request_firmware assumes that the driver knows the FW file name
and the driver initiates the load. That's not our model where we work
with different FWs, don't know what the names are, and the user 
initiates the load.


Cheers,
Divy
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 2.6.20-rc1 5/6] SA1100 GPIO wrappers

2006-12-21 Thread pHilipp Zabel

On 12/21/06, Andrew Morton <[EMAIL PROTECTED]> wrote:

On Wed, 20 Dec 2006 13:13:21 -0800
David Brownell <[EMAIL PROTECTED]> wrote:

> +#define gpio_get_value(gpio) \
> + (GPLR & GPIO_GPIO(gpio))
> +
> +#define gpio_set_value(gpio,value) \
> + ((value) ? (GPSR = GPIO_GPIO(gpio)) : (GPCR(gpio) = GPIO_GPIO(gpio)))

likewise.


I have done the same to the SA1100 wrappers as to the PXA wrappers now.
Maybe the non-inline functions in generic.c are overkill for those much simpler
macros on SA...

regards
Philipp

Index: linux-2.6/include/asm-arm/arch-sa1100/gpio.h
===
--- /dev/null   1970-01-01 00:00:00.0 +
+++ linux-2.6/include/asm-arm/arch-sa1100/gpio.h2006-12-22
08:07:08.0 +0100
@@ -0,0 +1,95 @@
+/*
+ * linux/include/asm-arm/arch-pxa/gpio.h
+ *
+ * SA1100 GPIO wrappers for arch-neutral GPIO calls
+ *
+ * Written by Philipp Zabel <[EMAIL PROTECTED]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef __ASM_ARCH_SA1100_GPIO_H
+#define __ASM_ARCH_SA1100_GPIO_H
+
+#include 
+#include 
+
+#include 
+
+static inline int gpio_request(unsigned gpio, const char *label)
+{
+   return 0;
+}
+
+static inline void gpio_free(unsigned gpio)
+{
+   return;
+}
+
+static inline int gpio_direction_input(unsigned gpio)
+{
+   if (gpio > GPIO_MAX)
+   return -EINVAL;
+   GPDR = (GPDR_In << gpio);
+}
+
+static inline int gpio_direction_output(unsigned gpio)
+{
+   if (gpio > GPIO_MAX)
+   return -EINVAL;
+   GPDR = (GPDR_Out << gpio);
+}
+
+static inline int __gpio_get_value(unsigned gpio)
+{
+   return GPLR & GPIO_GPIO(gpio);
+}
+
+#define gpio_get_value(gpio)   \
+   (__builtin_constant_p(gpio) ?   \
+__gpioe_get_value(gpio) :  \
+sa1100_gpio_get_value(gpio))
+
+static inline void __gpio_set_value(unsigned gpio, int value)
+{
+   if (value)
+   GPSR = GPIO_GPIO(gpio);
+   else
+   GPCR = GPIO_GPIO(gpio);
+}
+
+#define gpio_set_value(gpio,value) \
+   (__builtin_constant_p(gpio) ?   \
+__gpio_set_value(gpio, value) :\
+sa1100_gpio_set_value(gpio, value))
+
+static inline unsigned gpio_to_irq(unsigned gpio)
+{
+   if (gpio < 11)
+   return IRQ_GPIO0 + gpio;
+   else
+   return IRQ_GPIO11 - 11 + gpio;
+}
+
+static inline unsigned irq_to_gpio(unsigned irq)
+{
+   if (irq < IRQ_GPIO11_27)
+   return irq - IRQ_GPIO0;
+   else
+   return irq - IRQ_GPIO11 + 11;
+}
+
+#endif
Index: linux-2.6/arch/arm/mach-sa1100/generic.c
===
--- linux-2.6.orig/arch/arm/mach-sa1100/generic.c   2006-12-22
07:57:46.0 +0100
+++ linux-2.6/arch/arm/mach-sa1100/generic.c2006-12-22 08:12:51.0 
+0100
@@ -28,6 +28,8 @@
#include 
#include 

+#include 
+
#include "generic.h"

#define NR_FREQS16
@@ -139,6 +141,26 @@
}

/*
+ * Return GPIO level
+ */
+int sa1100_gpio_get_value(unsigned gpio)
+{
+   return __gpio_get_value(gpio);
+}
+
+EXPORT_SYMBOL(sa1100_gpio_get_value);
+
+/*
+ * Set output GPIO level
+ */
+void sa1100_gpio_set_value(unsigned gpio, int value)
+{
+   __gpio_set_value(gpio, value);
+}
+
+EXPORT_SYMBOL(sa1100_gpio_set_value);
+
+/*
 * Default power-off for SA1100
 */
static void sa1100_power_off(void)
Index: linux-2.6/include/asm-arm/arch-sa1100/hardware.h
===
--- linux-2.6.orig/include/asm-arm/arch-sa1100/hardware.h   2006-12-22
07:58:13.0 +0100
+++ linux-2.6/include/asm-arm/arch-sa1100/hardware.h2006-12-22
08:02:23.0 +0100
@@ -48,6 +48,16 @@

#endif

+/*
+ * Return GPIO level, nonzero means high, zero is low
+ */
+extern int sa1100_gpio_get_value(unsigned gpio);
+
+/*
+ * Set output GPIO level
+ */
+void sa1100_gpio_set_value(unsigned gpio, int value);
+
#include "SA-1100.h"

#ifdef CONFIG_SA1101
Index: linux-2.6/include/asm-arm/arch-sa1100/gpio.h
===
--- /dev/null	1970-01-01 00:00:00.0 +
+++ linux-2.6/include/asm-arm/arch-sa1100/gpio.h	2006-12-22 

Re: Patch "i386: Relocatable kernel support" causes instant reboot

2006-12-21 Thread Vivek Goyal
On Thu, Dec 21, 2006 at 06:45:57PM +0100, Alexander van Heukelum wrote:
> Hi,
> 
> Hmm. taking a peek at the bzImage there...
> 
> 1d80  41 00 56 45 53 41 00 56  69 64 65 6f 20 61 64 61
> |A.VESA.Video ada|
> 1d90  70 74 65 72 3a 20 00 00  00 b8 00 00 55 aa 5a 5a  |pter:
> ..U.ZZ|
> 1da0  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00
> ||
> *
> 1e00  4e 35 13 00 1f 8b 08 00  23 a4 89 45 02 03 b4 fd
> |N5..#..E|
>   -- -- -- -- ^^ ^^ ^^
> 
> This is the end of the realmode kernel, and it should be followed by the
> 32-bit code that is to be executed at (normally) 0x10, right? This
> is however not the case here. Where did arch/i386/boot/compressed/head.S
> go then? What is the significance of this value 0x0013354e? It is in
> fact
> exactly the size of the compressed kernel image.
> 
> I have no idea what went wrong, but it went wrong in the build process
> of the bzImage.
> 

Hi Alexander,

Excellent observation. I did an "od -Ax -tx1" on bzImage built by me and
I can see the right startup_32() code bytes at the end of real mode code.

001d20 74 65 72 3a 20 00 00 00 b8 00 00 55 aa 5a 5a 00
001d30 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
*
001e00 fc fa b8 18 00 00 00 8e d8 8e c0 8e e0 8e e8 8e
   ^^^
Following is the disassembly of startup_32() in
arch/386/boot/compressed/head.S

 :
   0:   fc  cld
   1:   fa  cli
   2:   b8 18 00 00 00  mov$0x18,%eax

So I can see 0x18b8fafc being rightly placed immediately after real
mode code (setup.S). But that does not seem to be the case with Jean's
bzImage.

The only place where size of compressed kernel (vmlinux.bin.gz) is placed
is piggy.o. Look at arch/i386/boot/compressed/vmlinux.scr. Here we put
the size of vmlinux.bin.gz in .data.compressed section before we put
actual vmlinux.bin.gz in this section.

Does that mean that somehow .data.compressed section was placed before
.text.head section? But that would be contarary to what
arch/i386/boot/compressed/vmlinux.lds instructs to linker.

At the same time I tried to find the pattern 0x18b8fafc in Jean's bzImage
but I can't find that. Does that mean that arch/i386/boot/compressed/head.S
was never compiled  and linked? 

Jean, can you please upload some more files. Should give some more idea
about what happened in your environment.

arch/i386/boot/vmlinux.bin
arch/i386/boot/compressed/piggy.o
arch/i386/boot/compressed/head.o

Thanks
Vivek
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [-mm patch] ptrace: make {put,get}reg work again for gs and fs

2006-12-21 Thread Jeremy Fitzhardinge
Andrew Morton wrote:
> OK, but you're using -mm, yes?  And -mm has (the rather irritating)
> convert-i386-pda-code-to-use-%fs.patch in it.
>
> So perhaps your fix is a -mm-only thing?
>   

Yes, I think that's true.


J
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


2.6.19: File system corruption "stuck" until device is replugged

2006-12-21 Thread Andrey Borzenkov
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

I had USB stick (fat32) that reported file system corruption on mount and 
hence was mounted read-only. No amount of umount/dosfsck/mount could make it 
rw again. dosfsck reported device as clean but it still would mount ro and I 
continued to see directory that had been deleted by the very first dosfsck 
run! I unplugged it, looked under Win2k - it was OK - and only then did I 
notice that directory claimed as corrupted did not even exist. Replugging 
it - mounted OK.

I am not sure if this is a bug or "work as designed". May be this is specific 
fat32 problem; still it does not look right?

TIA

- -andrey
-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.5 (GNU/Linux)

iD8DBQFFi2YfR6LMutpd94wRAquCAKC3n8DjRGRqDYdfP6tNGvlg5sG0MQCfQRNJ
89HQuNaAWuLzJkkKayVrLks=
=m0rH
-END PGP SIGNATURE-
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [-mm patch] ptrace: make {put,get}reg work again for gs and fs

2006-12-21 Thread Jeremy Fitzhardinge
Frederik Deweerdt wrote:
> On Thu, Dec 21, 2006 at 11:22:05AM -0800, Jeremy Fitzhardinge wrote:
>   
>> Frederik Deweerdt wrote:
>> 
>>> Following the i386 pda patches, it's not possible to set gs or fs value
>>> from gdb anymore. The following patch restores the old behaviour of
>>> getting and setting thread.gs of thread.fs respectively.
>>> Here's a gdb session *before* the patch:
>>> (gdb) info reg
>>> [...]
>>> fs 0x33 51
>>> gs 0x33 51
>>> (gdb) set $fs=0x
>>> (gdb) info reg
>>> [...]
>>> fs 0x33 51
>>> gs 0x33 51
>>> (gdb) set $gs=0x
>>> (gdb) info reg
>>> [...]
>>> fs 0x   65535
>>> gs 0x33 51
>>>
>>> Another one *after* the patch:
>>> (gdb) info reg
>>> [...]
>>> fs 0xd8 216
>>>   
>>>   
>> This doesn't look right.  This is the kernel's %fs, not usermode's
>> (which should be 0).
>>
>> 
>>> gs 0x33 51
>>> (gdb) set $fs=0x
>>> (gdb) info reg
>>> [...]
>>> fs 0x   65535
>>> gs 0x33 51
>>> (gdb) set $gs=0x
>>> (gdb) info reg
>>> [...]
>>> fs 0x   65535
>>> gs 0x   65535
>>>   
>>>   
>> Hm.  This shouldn't be possible since this is a bad selector, but I
>> guess ptrace/gdb doesn't really know that.  If you run the target (even
>> single step it), these should revert to 0.
>>
>> 
> Here's a third session that looks better:
>
> (gdb) info reg
> [...]
> fs 0x0  0
> gs 0x33 51
> (gdb) set $fs=0x
> (gdb) info reg
> [...]
> fs 0x   65535
> gs 0x33 51
> (gdb) set $gs=0x
> (gdb) info reg
> [...]
> fs 0x   65535
> gs 0x   65535
> (gdb) n
> Single stepping until exit from function main,
> which has no line number information.
> Cannot find user-level thread for LWP 10751: generic error
> (gdb) set $gs=0x33
> (gdb) set $fs=0
> (gdb) n
> Single stepping until exit from function main,
> which has no line number information.
> 0x08048c05 in __i686.get_pc_thunk.bx ()
> (gdb) info reg
> [...]
> fs 0x0  0
> gs 0x33 51
>
> This is a -mm1 kernel + your efl_offset fix + the attached patch.
> So the problem came from putreg still saving %gs to the stack where
> there's no slot for it, whereas getreg got things right.
>
> Regards,
> Frederik
>
> Signed-off-by: Frederik Deweerdt <[EMAIL PROTECTED]>
>
>
> diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
> index a803a49..d8f44db 100644
> --- a/arch/i386/kernel/ptrace.c
> +++ b/arch/i386/kernel/ptrace.c
> @@ -89,14 +89,14 @@ static int putreg(struct task_struct *child,
>   unsigned long regno, unsigned long value)
>  {
>   switch (regno >> 2) {
> - case FS:
> + case GS:
>   if (value && (value & 3) != 3)
>   return -EIO;
> - child->thread.fs = value;
> + child->thread.gs = value;
>   return 0;
>   case DS:
>   case ES:
> - case GS:
> + case FS:
>   if (value && (value & 3) != 3)
>   return -EIO;
>   value &= 0x;
>   

This patch is good.  convert-i386-pda-code-to-use-%fs-fixes.patch
touched this same code, but it didn't actually fix the problem.

J
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 2.6.20-rc1 4/6] PXA GPIO wrappers

2006-12-21 Thread pHilipp Zabel

On 12/21/06, Nicolas Pitre <[EMAIL PROTECTED]> wrote:

On Thu, 21 Dec 2006, pHilipp Zabel wrote:

> > > --- linux-2.6.orig/arch/arm/mach-pxa/generic.c2006-12-16
> > > +++ linux-2.6/arch/arm/mach-pxa/generic.c 2006-12-16
> > > 16:47:45.0
> > > @@ -129,6 +129,29 @@
> > > EXPORT_SYMBOL(pxa_gpio_mode);
> > >
> > > /*
> > > + * Return GPIO level, nonzero means high, zero is low
> > > + */
> > > +int pxa_gpio_get_value(unsigned gpio)
> > > +{
> > > + return GPLR(gpio) & GPIO_bit(gpio);
> > > +}
> > > +
> > > +EXPORT_SYMBOL(pxa_gpio_get_value);
> > > +
> > > +/*
> > > + * Set output GPIO level
> > > + */
> > > +void pxa_gpio_set_value(unsigned gpio, int value)
> > > +{
> > > + if (value)
> > > + GPSR(gpio) = GPIO_bit(gpio);
> > > + else
> > > + GPCR(gpio) = GPIO_bit(gpio);
> > > +}
> > > +
> > > +EXPORT_SYMBOL(pxa_gpio_set_value);
> >
> > Instead of duplicating code here, you probably should just reuse
> > __gpio_set_value() and __gpio_get_value() inside those functions.
>
> Probably? What I am wondering is this: can the compiler
> optimize away the range check that is duplicated in GPSR/GPCR
> and  GPIO_bit for __gpio_set/get_value? Or could we optimize
> this case by expanding the macros in place (which would mean
> duplicating code from pxa-regs.h)...

Sorry I don't quite follow you here.  Why would you expand the macro in
place?


And that is no surprise because I seem to have problems to follow
myself here now after a good night's rest. Basically I was thinking
that after expanding the macros in place the code could be optimized.
Of course, this doesn't have any advantage for the inlined functions
(gpio is constant, so most of the code will be optimized away anyway),
and shaving one or two words off pxa_gpio_setval is hardly worthwile.


My suggestion is only about not duplicating the source code.  The
generated assembly will be the same.

And your patch looks fine to me now, except for this:

+int pxa_gpio_get_value(unsigned gpio)
+{
+   __gpio_get_value(gpio);
+}

You certainly meant to add a "return" in there, right?


Oh yes. Sorry.

cheers
Philipp

Index: linux-2.6/include/asm-arm/arch-pxa/gpio.h
===
--- /dev/null   1970-01-01 00:00:00.0 +
+++ linux-2.6/include/asm-arm/arch-pxa/gpio.h   2006-12-21
20:07:48.0 +0100
@@ -0,0 +1,82 @@
+/*
+ * linux/include/asm-arm/arch-pxa/gpio.h
+ *
+ * PXA GPIO wrappers for arch-neutral GPIO calls
+ *
+ * Written by Philipp Zabel <[EMAIL PROTECTED]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef __ASM_ARCH_PXA_GPIO_H
+#define __ASM_ARCH_PXA_GPIO_H
+
+#include 
+#include 
+#include 
+
+#include 
+
+static inline int gpio_request(unsigned gpio, const char *label)
+{
+   return 0;
+}
+
+static inline void gpio_free(unsigned gpio)
+{
+   return;
+}
+
+static inline int gpio_direction_input(unsigned gpio)
+{
+   return pxa_gpio_mode(gpio | GPIO_IN);
+}
+
+static inline int gpio_direction_output(unsigned gpio)
+{
+   return pxa_gpio_mode(gpio | GPIO_OUT);
+}
+
+static inline int __gpio_get_value(unsigned gpio)
+{
+   return GPLR(gpio) & GPIO_bit(gpio);
+}
+
+#define gpio_get_value(gpio)   \
+   (__builtin_constant_p(gpio) ?   \
+__gpioe_get_value(gpio) :  \
+pxa_gpio_get_value(gpio))
+
+static inline void __gpio_set_value(unsigned gpio, int value)
+{
+   if (value)
+   GPSR(gpio) = GPIO_bit(gpio);
+   else
+   GPCR(gpio) = GPIO_bit(gpio);
+}
+
+#define gpio_set_value(gpio,value) \
+   (__builtin_constant_p(gpio) ?   \
+__gpio_set_value(gpio, value) :\
+pxa_gpio_set_value(gpio, value))
+
+#include /* cansleep wrappers */
+
+#define gpio_to_irq(gpio)  IRQ_GPIO(gpio)
+#define irq_to_gpio(irq)   IRQ_TO_GPIO(irq)
+
+
+#endif
Index: linux-2.6/arch/arm/mach-pxa/generic.c
===
--- linux-2.6.orig/arch/arm/mach-pxa/generic.c  2006-12-21
13:30:01.0 +0100
+++ linux-2.6/arch/arm/mach-pxa/generic.c   2006-12-21 21:25:24.0 
+0100
@@ -36,6 +36,7 @@
#include 

#include 
+#include 
#include 
#include 

Re: [-mm patch] ptrace: make {put,get}reg work again for gs and fs

2006-12-21 Thread Jeremy Fitzhardinge
Andrew Morton wrote:
> The below is what I have queued for urgent mainlining to address these
> problems.
>
> Is it sufficient?
>   

It is sufficient to fix the serious eflags-clobbering bug, but it
doesn't fix the read-and-modify correctness problem Frederik found.

J
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [-mm patch] ptrace: make {put,get}reg work again for gs and fs

2006-12-21 Thread Frederik Deweerdt
On Thu, Dec 21, 2006 at 06:11:08PM -0800, Andrew Morton wrote:
> On Thu, 21 Dec 2006 18:00:49 -0800
> Jeremy Fitzhardinge <[EMAIL PROTECTED]> wrote:
> 
> > Frederik Deweerdt wrote:
> > > This is a -mm1 kernel + your efl_offset fix + the attached patch.
> > > So the problem came from putreg still saving %gs to the stack where
> > > there's no slot for it, whereas getreg got things right.
> > >   
> > 
> > That patch looks good, but I think it is already effectively in Andrew's
> > queue, because I noticed some problems in there when I reviewed  the
> > convert-to-%fs patch.
> > 
> 
> The below is what I have queued for urgent mainlining to address these
> problems.
> 
> Is it sufficient?
> 
No, it's not. The patch below fixes the place where we get eflags, this
triggered the "BUG while gdb'ing" reports.
The one I sent was to fix a problem that only I reported, AFAIK: when
you use gdb/ptrace to modify %fs, the value gets written in the wrong
place (see gdb sessions). So, unless you have another patch fixing the
way putreg() writes %fs, the patch[1] I sent should also be queued for
mainline.

Regards,
Frederik

[1] http://lkml.org/lkml/2006/12/21/267
> 
> 
> 
> From: Jeremy Fitzhardinge <[EMAIL PROTECTED]>
> 
> The PDA patches introduced a bug in ptrace: it reads eflags from the wrong
> place on the target's stack, but writes it back to the correct place.  The
> result is a corrupted eflags, which is most visible when it turns interrupts
> off unexpectedly.
> 
> This patch fixes this by making the ptrace code a little less fragile.  It
> changes [gs]et_stack_long to take a straightforward byte offset into struct
> pt_regs, rather than requiring all callers to do a sizeof(struct pt_regs)
> offset adjustment.  This means that the eflag's offset (EFL_OFFSET) on the
> target stack can be simply computed with offsetof().
> 
> Signed-off-by: Jeremy Fitzhardinge <[EMAIL PROTECTED]>
> Cc: Frederik Deweerdt <[EMAIL PROTECTED]>
> Cc: Andi Kleen <[EMAIL PROTECTED]>
> Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
> ---
> 
>  arch/i386/kernel/ptrace.c |   21 ++---
>  1 file changed, 10 insertions(+), 11 deletions(-)
> 
> diff -puN 
> arch/i386/kernel/ptrace.c~ptrace-fix-efl_offset-value-according-to-i386-pda-changes
>  arch/i386/kernel/ptrace.c
> --- 
> a/arch/i386/kernel/ptrace.c~ptrace-fix-efl_offset-value-according-to-i386-pda-changes
> +++ a/arch/i386/kernel/ptrace.c
> @@ -45,7 +45,7 @@
>  /*
>   * Offset of eflags on child stack..
>   */
> -#define EFL_OFFSET ((EFL-2)*4-sizeof(struct pt_regs))
> +#define EFL_OFFSET offsetof(struct pt_regs, eflags)
>  
>  static inline struct pt_regs *get_child_regs(struct task_struct *task)
>  {
> @@ -54,24 +54,24 @@ static inline struct pt_regs *get_child_
>  }
>  
>  /*
> - * this routine will get a word off of the processes privileged stack. 
> - * the offset is how far from the base addr as stored in the TSS.  
> - * this routine assumes that all the privileged stacks are in our
> + * This routine will get a word off of the processes privileged stack.
> + * the offset is bytes into the pt_regs structure on the stack.
> + * This routine assumes that all the privileged stacks are in our
>   * data space.
>   */   
>  static inline int get_stack_long(struct task_struct *task, int offset)
>  {
>   unsigned char *stack;
>  
> - stack = (unsigned char *)task->thread.esp0;
> + stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs);
>   stack += offset;
>   return (*((int *)stack));
>  }
>  
>  /*
> - * this routine will put a word on the processes privileged stack. 
> - * the offset is how far from the base addr as stored in the TSS.  
> - * this routine assumes that all the privileged stacks are in our
> + * This routine will put a word on the processes privileged stack.
> + * the offset is bytes into the pt_regs structure on the stack.
> + * This routine assumes that all the privileged stacks are in our
>   * data space.
>   */
>  static inline int put_stack_long(struct task_struct *task, int offset,
> @@ -79,7 +79,7 @@ static inline int put_stack_long(struct 
>  {
>   unsigned char * stack;
>  
> - stack = (unsigned char *) task->thread.esp0;
> + stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs);
>   stack += offset;
>   *(unsigned long *) stack = data;
>   return 0;
> @@ -114,7 +114,7 @@ static int putreg(struct task_struct *ch
>   }
>   if (regno > ES*4)
>   regno -= 1*4;
> - put_stack_long(child, regno - sizeof(struct pt_regs), value);
> + put_stack_long(child, regno, value);
>   return 0;
>  }
>  
> @@ -137,7 +137,6 @@ static unsigned long getreg(struct task_
>   default:
>   if (regno > ES*4)
>   regno -= 1*4;
> - regno = regno - sizeof(struct pt_regs);
>   retval &= get_stack_long(child, regno);
>   }
>   return retval;
> _
> 
> 
-
To unsubscribe 

Re: Linux disk performance.

2006-12-21 Thread Manish Regmi

On 12/22/06, Bhanu Kalyan Chetlapalli <[EMAIL PROTECTED]> wrote:

>
> Thanks  for the suggestion but the performance was terrible when write
> cache was disabled.

Performance degradation is expected. But the point is - did the
anomaly, that you have pointed out, go away? Because if it did, then
it is the disk cache which is causing the issue, and you will have to
live with it. Else you will have to look elsewhere.


oops, sorry for incomplete answer.
Actually i did not tested thoroughly but my initial tests showed some
bumps and serious performance degradation. But anyway there was still
some bumps... :(

(sequence)(channel)(write time in microseconds)
0  06366
0  19949
0  210125
0  310165
0  411043
0  510129
0  610089
0  710165
0  871572
0  99882
0  10   8105
0  11   10085


--
---
regards
Manish Regmi

---
UNIX without a C Compiler is like eating Spaghetti with your mouth
sewn shut. It just doesn't make sense.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


2.6 kernel NFS root mount problem

2006-12-21 Thread Magdalena Raltcheva
Hi,

I'm tying to run 2.6.18 kernel on ARM  AT91RM9200DK board with NFS mount
root filesystem.

The printout from the boot is :


Loading:
#

#

#
 #
done
Bytes transferred = 1000118 (f42b6 hex)
## Booting image at 2100 ...
   Image Name:
   Image Type:   ARM Linux Kernel Image (gzip compressed)
   Data Size:154 Bytes = 976.6 kB
   Load Address: 20008000
   Entry Point:  20008000
   Verifying Checksum ... OK
   Uncompressing Kernel Image ... OK

Starting kernel ...

Linux version 2.6.18 ([EMAIL PROTECTED]) () #1 Fri Dec 22 14:45:47 CST 2006
CPU: ARM920T [41129200] revision 0 (ARMv4T), cr=c0003177
Machine: Atmel AT91RM9200
Memory policy: ECC disabled, Data cache writeback
Clocks: CPU 179 MHz, master 59 MHz, main 18.432 MHz
CPU0: D VIVT write-back cache
CPU0: I cache: 16384 bytes, associativity 64, 32 byte lines, 8 sets
CPU0: D cache: 16384 bytes, associativity 64, 32 byte lines, 8 sets
Built 1 zonelists.  Total pages: 8192
Kernel command line: root=/dev/nfs rw
nfsroot=192.168.0.10:/home_own/rootfs ip=192.168.0.12
console=ttyS0,115200 mem=32M
AT91: 128 gpio irqs in 4 banks
PID hash table entries: 256 (order: 8, 1024 bytes)
Console: colour dummy device 80x30
Dentry cache hash table entries: 4096 (order: 2, 16384 bytes)
Inode-cache hash table entries: 2048 (order: 1, 8192 bytes)
Memory: 32MB = 32MB total
Memory: 30292KB available (1824K code, 209K data, 88K init)
Mount-cache hash table entries: 512
CPU: Testing write buffer coherency: ok
NET: Registered protocol family 16
usbcore: registered new driver usbfs
usbcore: registered new driver hub
NET: Registered protocol family 2
IP route cache hash table entries: 256 (order: -2, 1024 bytes)
TCP established hash table entries: 1024 (order: 0, 4096 bytes)
TCP bind hash table entries: 512 (order: -1, 2048 bytes)
TCP: Hash tables configured (established 1024 bind 512)
TCP reno registered
NetWinder Floating Point Emulator V0.97 (double precision)
io scheduler noop registered
io scheduler anticipatory registered (default)
AT91 Watchdog Timer enabled (5 seconds, nowayout)
at91_usart.0: ttyS0 at MMIO 0xfefff200 (irq = 1) is a AT91_SERIAL
at91_usart.1: ttyS1 at MMIO 0xfffc4000 (irq = 7) is a AT91_SERIAL
nbd: registered device at major 43
eth0: Link now 100-FullDuplex
eth0: AT91 ethernet at 0xfefbc000 int=24 100-FullDuplex
(12:34:56:78:99:aa)
eth0: Davicom 9161 PHY (Copper)
physmap platform flash device: 0020 at 1000
Found: Atmel AT49BV16X
physmap-flash.0: Found 1 x16 devices at 0x0 in 16-bit bank
number of JEDEC chips: 1
cfi_cmdset_0002: Disabling erase-suspend-program due to code brokenness.

at91_cf: irqs det #64, io #0
usbmon: debugfs is not available
at91_ohci at91_ohci: AT91 OHCI
at91_ohci at91_ohci: new USB bus registered, assigned bus number 1
at91_ohci at91_ohci: irq 23, io mem 0x0030
usb usb1: Product: AT91 OHCI
usb usb1: Manufacturer: Linux 2.6.18 ohci_hcd
usb usb1: SerialNumber: at91
usb usb1: configuration #1 chosen from 1 choice
hub 1-0:1.0: USB hub found
hub 1-0:1.0: 2 ports detected
udc: at91_udc version 3 May 2006
mice: PS/2 mouse device common for all mice
i2c /dev entries driver
MMC: 4 wire bus mode not supported by this driver - using 1 wire
TCP bic registered
NET: Registered protocol family 1
NET: Registered protocol family 17
eth0: Link now 100-FullDuplex
IP-Config: Guessing netmask 255.255.255.0
IP-Config: Complete:
  device=eth0, addr=192.168.0.12, mask=255.255.255.0,
gw=255.255.255.255,
 host=192.168.0.12, domain=, nis-domain=(none),
 bootserver=255.255.255.255, rootserver=192.168.0.10, rootpath=
Root-NFS: Mounting /home_own/rootfs on server 192.168.0.10 as root
Root-NFS: rsize = 4096, wsize = 4096, timeo = 0, retrans = 0
Root-NFS: acreg (min,max) = (3,60), acdir (min,max) = (30,60)
Root-NFS: nfsd port = -1, mountd port = 0, flags = 0200
Looking up port of RPC 13/2 on 192.168.0.10
Root-NFS: Portmapper on server returned 2049 as nfsd port
Looking up port of RPC 15/1 on 192.168.0.10
Root-NFS: mountd port is 792
NFS:  nfs_mount(c0a8000a:/home_own/rootfs)
VFS: Unable to mount root fs via NFS, trying floppy.
VFS: Cannot open root device "nfs" or unknown-block(2,0)
Please append a correct "root=" boot option
Kernel panic - not syncing: VFS: Unable to mount root fs on
unknown-block(2,0)


As you can see the network and the NFS mount come successful. It fails
when to mount the root. I try to track the problem and it seems fails
when to do_mount the path doesn't exist. The root path is fixed as
"/root" if I change that to "/" it goes further but it fails on console.
The console problem  I think is still related to the root mount being
wrong even it passed with the change I did.

Can anyone help or point out where to look for clues .
-
To unsubscribe from this list: send the line "unsubscribe 

Re: Linux disk performance.

2006-12-21 Thread Bhanu Kalyan Chetlapalli

On 12/22/06, Manish Regmi <[EMAIL PROTECTED]> wrote:

On 12/22/06, Bhanu Kalyan Chetlapalli <[EMAIL PROTECTED]> wrote:
>
> I am assuming that your program is not seeking inbetween writes.
>
> Try disabling the Disk Cache, now-a-days some disks can have as much
> as 8MB write cache. so the disk might be buffering as much as it can,
> and trying to write only when it can no longer buffer. Since you have
> an app which continously write copious amounts of data, in order,
> disabling write cache might make some sense.
>

Thanks  for the suggestion but the performance was terrible when write
cache was disabled.


Performance degradation is expected. But the point is - did the
anomaly, that you have pointed out, go away? Because if it did, then
it is the disk cache which is causing the issue, and you will have to
live with it. Else you will have to look elsewhere.


--
---
regards
Manish Regmi

---
UNIX without a C Compiler is like eating Spaghetti with your mouth
sewn shut. It just doesn't make sense.




--
There is only one success - to be able to spend your life in your own way.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linux disk performance.

2006-12-21 Thread Manish Regmi

On 12/21/06, Erik Mouw <[EMAIL PROTECTED]> wrote:

Bursty video traffic is really an application that could take advantage
from the kernel buffering. Unless you want to reinvent the wheel and do
the buffering yourself (it is possible though, I've done it on IRIX).


But in my test O_DIRECT gave a slight better performance. Also the CPU
usage decreased.



BTW, why are you so keen on smooth-at-the-microlevel writeout? With
real time video applications it's only important not to drop frames.
How fast those frames will go to the disk isn't really an issue, as
long as you don't overflow the intermediate buffer.


Actually i dont require  smooth-at-the-microlevel writeout but the
timing bumps are overflowing the intermediate buffers . I was just
wondering if i could decrease the 20ms bumps to 3 ms as in other
writes.



Erik

--
They're all fools. Don't worry. Darwin may be slow, but he'll
eventually get them. -- Matthew Lammers in alt.sysadmin.recovery




--
---
regards
Manish Regmi

---
UNIX without a C Compiler is like eating Spaghetti with your mouth
sewn shut. It just doesn't make sense.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linux disk performance.

2006-12-21 Thread Manish Regmi

On 12/22/06, Bhanu Kalyan Chetlapalli <[EMAIL PROTECTED]> wrote:


I am assuming that your program is not seeking inbetween writes.

Try disabling the Disk Cache, now-a-days some disks can have as much
as 8MB write cache. so the disk might be buffering as much as it can,
and trying to write only when it can no longer buffer. Since you have
an app which continously write copious amounts of data, in order,
disabling write cache might make some sense.



Thanks  for the suggestion but the performance was terrible when write
cache was disabled.

--
---
regards
Manish Regmi

---
UNIX without a C Compiler is like eating Spaghetti with your mouth
sewn shut. It just doesn't make sense.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [BUG][PATCH] fix oom killer kills current every time if there is memory-less-node

2006-12-21 Thread KAMEZAWA Hiroyuki
On Thu, 21 Dec 2006 21:18:12 -0800
Paul Jackson <[EMAIL PROTECTED]> wrote:

> KAMEZAWA-san wrote:
> > But there is memory-less-node. contstrained_alloc() should get
> > memory_less_node into count.
> 
> This patch looks ok to me.
> 
> One line in the patch comment seems backward:
> 
>   If zone_list includes all nodes, it thinks oom is from mempolicy.
> 
> Shouldn't that be:
> 
>   If zone_list doesn't include all nodes, it thinks oom is from mempolicy.
> 
Ah, yes. (>_<, Thank you.

-Kame

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [BUG][PATCH] fix oom killer kills current every time if there is memory-less-node

2006-12-21 Thread Paul Jackson
KAMEZAWA-san wrote:
> But there is memory-less-node. contstrained_alloc() should get
> memory_less_node into count.

This patch looks ok to me.

One line in the patch comment seems backward:

  If zone_list includes all nodes, it thinks oom is from mempolicy.

Shouldn't that be:

  If zone_list doesn't include all nodes, it thinks oom is from mempolicy.

-- 
  I won't rest till it's the best ...
  Programmer, Linux Scalability
  Paul Jackson <[EMAIL PROTECTED]> 1.925.600.0401
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC][PATCH 2.6.19 take2 1/5] marking __init and remove drop initialization

2006-12-21 Thread Matt Mackall
On Thu, Dec 21, 2006 at 07:03:23PM +0900, Keiichi KII wrote:
>  - remove "drop" initialization in the netpoll structure.

Why?

-- 
Mathematics is the supreme nostalgia of our time.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Finding hardlinks

2006-12-21 Thread Jan Harkes
On Fri, Dec 22, 2006 at 12:49:42AM +0100, Mikulas Patocka wrote:
> On Thu, 21 Dec 2006, Jan Harkes wrote:
> >On Wed, Dec 20, 2006 at 12:44:42PM +0100, Miklos Szeredi wrote:
> >>The stat64.st_ino field is 64bit, so AFAICS you'd only need to extend
> >>the kstat.ino field to 64bit and fix those filesystems to fill in
> >>kstat correctly.
> >
> >Coda actually uses 128-bit file identifiers internally, so 64-bits
> >really doesn't cut it. Since the 128-bit space is used pretty sparsely
> 
> The problem is that if inode number collision happens occasionally, you 
> get data corruption with cp -a command --- it will just copy one file and 
> hardlink the other.

Our 128-bit space is fairly sparse and there is some regularity so we
optimized the hash to minimize the chance on collisions. This is also
useful for iget5_locked, each 32-bit ino_t is effectively a hash bucket
in our case and avoiding collisions makes the lookup in the inode cache
more efficient.

Another part is that only few applications actually care about hardlinks
(cp -a, rsync, tar/afio). All of these already could miss some files or
create false hardlinks when files in the tree are renamed during the
copy. We also have a special atomic volume snapshot function that is
used to create a backup, which backs up additional attributes that are
not visible through the standard POSIX/vfs api (directory acls,
creator/owner information, version-vector information for conflict
detection and resolution)

I've also found that most applications that care about hardlinks already
have a check whether the link count is greater than one and the object
is not a directory. This is probably done more for efficiency, it would
be a waste of memory to track every object as a possible hardlink.

And because Coda already restrict hardlinks in many cases they end up
not being used very much, or get converted by a cross-directory rename
to COW objects which of course have nlink == 1.

> If user (or script) doesn't specify that flag, it doesn't help. I think 
> the best solution for these filesystems would be either to add new syscall
>   int is_hardlink(char *filename1, char *filename2)
> (but I know adding syscall bloat may be objectionable)
> or add new field in statvfs ST_HAS_BROKEN_INO_T, that applications can 
> test and disable hardlink processing.

BROKEN_INO_T sounds a bit harsh, and something like that would really
have to be incorporated in the SuS standard for it to be useful. It also
would require all applications to be updated to check for this flag. On
the other hand if we don't worry about this flag we just have to fix the
few applications that do not yet check that nlink>1 && !IS_DIR. Those
applications would probably appreciate the resulting reduced memory
requirements and performance increase because they end up with
considerably fewer candidates in their internal list of potential
hardlinked objects.

Of course this doesn't solve the problem for some filesystem with
larger than 64-bit object identifiers that wants to support normal
hardlinked files. But adding a BROKEN_INO_T flag doesn't solve it
either, since the backup/copy would not perform hardlink processing in
which case such a file system can just as well always pretend that
i_nlink for files is always one.

Jan

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] mm: fix page_mkclean_one (was: 2.6.19 file content corruption on ext3)

2006-12-21 Thread Linus Torvalds


On Thu, 21 Dec 2006, Gordon Farquharson wrote:
> 
> I tested 2.6.19 with a version of Linus's patch that applies cleanly
> to 2.6.19 (patch appended to the end of this email) on ARM and apt-get
> failed. It did not segfault this time, but instead got stuck for about
> 20 to 30 minutes and was accessing the hard drive frequently.

Ok, there's definitely something screwy going on.

Andrew located at least one bug: we run cancel_dirty_page() too late in 
"truncate_complete_page()", which means that do_invalidatepage() ends up 
not clearing the page cache.

His patch is appended.

But it sounds like I probably misunderstood something, because I thought 
that Martin had acknowledged that this patch actually worked for him. 
Which sounded very similar to your setup (he has a 32M ARM box too, no?)

And your failure sounds a lot like one that David Miller is reporting. At 
the same time, my own shared file mmap tests on my own machines obviously 
work fine (I lower the dirty-writeback tresholds to force writeback more 
easily, and then mmap a file and write and rewrite to it in memory, and 
truncate it).

Maybe it's mount option issue? I've got data=ordered on my machine, are 
you perhaps runnign with something else?

Linus

---
commit 3e67c0987d7567ad41164a153dca9a43b11d
Author: Andrew Morton <[EMAIL PROTECTED]>
Date:   Thu Dec 21 11:00:33 2006 -0800

[PATCH] truncate: clear page dirtiness before running try_to_free_buffers()

truncate presently invalidates the dirty page's buffer_heads then shoots 
down
the page.  But try_to_free_buffers() will now bale out because the page is
dirty.

Net effect: the LRU gets filled with dirty pages which have invalidated
buffer_heads attached.  They have no ->mapping and hence cannot be cleaned.
The machine leaks memory at an enormous rate.

Fix this by cleaning the page before running try_to_free_buffers(), so
try_to_free_buffers() can do its work.

Also, remember to do dirty-page-acoounting in cancel_dirty_page() so the
machine won't wedge up trying to write non-existent dirty pages.

Probably still wrong, but now less so.

Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
Signed-off-by: Linus Torvalds <[EMAIL PROTECTED]>

diff --git a/mm/truncate.c b/mm/truncate.c
index bf9e296..89a5c35 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -60,11 +60,12 @@ void cancel_dirty_page(struct page *page, unsigned int 
account_size)
WARN_ON(++warncount < 5);
}

-   if (TestClearPageDirty(page) && account_size)
+   if (TestClearPageDirty(page) && account_size) {
+   dec_zone_page_state(page, NR_FILE_DIRTY);
task_io_account_cancelled_write(account_size);
+   }
 }
 
-
 /*
  * If truncate cannot remove the fs-private metadata from the page, the page
  * becomes anonymous.  It will be left on the LRU and may even be mapped into
@@ -81,11 +82,11 @@ truncate_complete_page(struct address_space *mapping, 
struct page *page)
if (page->mapping != mapping)
return;
 
+   cancel_dirty_page(page, PAGE_CACHE_SIZE);
+
if (PagePrivate(page))
do_invalidatepage(page, 0);
 
-   cancel_dirty_page(page, PAGE_CACHE_SIZE);
-
ClearPageUptodate(page);
ClearPageMappedToDisk(page);
remove_from_page_cache(page);
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [Bug 7505] Linux-2.6.18 fails to boot on AMD64 machine

2006-12-21 Thread Zhang, Yanmin
>>-Original Message-
>>From: Ard -kwaak- van Breemen [mailto:[EMAIL PROTECTED]
>>Sent: 2006年12月22日 5:06
>>To: Zhang, Yanmin
>>Cc: Andrew Morton; Chuck Ebbert; Yinghai Lu; [EMAIL PROTECTED]; [EMAIL 
>>PROTECTED]; linux-kernel@vger.kernel.org;
>>[EMAIL PROTECTED]; Eric W. Biederman
>>Subject: Re: [Bug 7505] Linux-2.6.18 fails to boot on AMD64 machine
>>
>>On Thu, Dec 21, 2006 at 04:04:04PM +0800, Zhang, Yanmin wrote:
>>> I couldn't reproduce it on my EM64T machine. I instrumented function 
>>> start_kernel and
>>> didn't find irq was enabled before calling init_IRQ. It'll be better if the 
>>> reporter could
>>> instrument function start_kernel to capture which function enables irq.
>>
>>Editing init/main.c:
>>preempt_disable();
>>if (!irqs_disabled())
>>printk("start_kernel(): bug: interrupts were enabled 
>> early\n");
>>printk("BLAAT17");
>>build_all_zonelists();
>>if (!irqs_disabled())
>>printk("start_kernel(): bug: interrupts were enabled 
>> early\n");
>>printk("BLAAT18");
>>page_alloc_init();
>>if (!irqs_disabled())
>>printk("start_kernel(): bug: interrupts were enabled 
>> early\n");
>>printk("BLAAT19");
>>printk(KERN_NOTICE "Kernel command line: %s\n", saved_command_line);
>>parse_early_param();
>>if (!irqs_disabled())
>>printk("start_kernel(): bug: interrupts were enabled 
>> early\n");
>>printk("BLAAT20");
>>parse_args("Booting kernel", command_line, __start___param,
>>   __stop___param - __start___param,
>>   _bootoption);
>>printk("BLAAT21");
>>if (!irqs_disabled())
>>printk("start_kernel(): bug: interrupts were enabled 
>> early\n");
>>sort_main_extable();
>>if (!irqs_disabled())
>>printk("start_kernel(): bug: interrupts were enabled 
>> early\n");
>>printk("BLAAT22");
>>trap_init();
>>if (!irqs_disabled())
>>printk("start_kernel(): bug: interrupts were enabled 
>> early\n");
>>printk("BLAAT23");
>>
>>Results in:
>>^MAllocating PCI resources starting at 8800 (gap: 8000:6000)
>>^MBLAAT12BLAAT13<6>PERCPU: Allocating 32960 bytes of per cpu data
>>^MBLAAT14BLAAT15BLAAT16BLAAT17Built 2 zonelists.  Total pages: 1032635
>>^MBLAAT18BLAAT19<5>Kernel command line: console=tty0 console=ttyS0,115200 
>>hdb=noprobe hdc=noprobe hdd=noprobe root=/dev/md0 ro panic=30
>>earlyprintk=serial,ttyS0,115200
>>^MBLAAT20<6>ide_setup: hdb=noprobe
>>^Mide_setup: hdc=noprobe
>>^Mide_setup: hdd=noprobe
>>^MBLAAT21start_kernel(): bug: interrupts were enabled early
>>^Mstart_kernel(): bug: interrupts were enabled early
>>^MBLAAT22Initializing CPU#0
>>
>>Hmmm, that actually doesn't make sense to me (unless parse_args is able to 
>>enable irq's).
I think parse_args enables irq when it calls callbacks.
Could you try below?
1) Test Andrew's patch of sema down_write;
2) Apply below patch and see what the output is when booting. If the output has
"[BUG]..address.", Pls. map the address to function name by System.map.

--- linux-2.6.19/kernel/params.c2006-12-08 15:32:49.0 +0800
+++ linux-2.6.19_work/kernel/params.c   2006-12-22 12:28:38.0 +0800
@@ -53,13 +53,22 @@ static int parse_one(char *param,
 int (*handle_unknown)(char *param, char *val))
 {
unsigned int i;
+   int result;
+   int irq_is_disabled;
 
/* Find parameter */
for (i = 0; i < num_params; i++) {
if (parameq(param, params[i].name)) {
DEBUGP("They are equal!  Calling %p\n",
   params[i].set);
-   return params[i].set(val, [i]);
+   irq_is_disabled = irqs_disabled();
+   result = params[i].set(val, [i]);
+   if (irq_is_disabled && !irqs_disabled())
+   {
+   printk("[BUG] parse_one: function %p enabled 
irq!\n",
+   params[i].set);
+   }
+   return result;
}
}
 
--- linux-2.6.19/init/main.c2006-12-08 15:32:49.0 +0800
+++ linux-2.6.19_work/init/main.c   2006-12-22 12:28:50.0 +0800
@@ -181,6 +181,7 @@ static int __init obsolete_checksetup(ch
 {
struct obs_kernel_param *p;
int had_early_param = 0;
+   int result, irq_is_disabled;
 
p = __setup_start;
do {
@@ -197,8 +198,17 @@ static int __init obsolete_checksetup(ch
printk(KERN_WARNING "Parameter %s is obsolete,"
   " ignored\n", p->str);
return 1;
-   } else if (p->setup_func(line + 

Re: [PATCH] mm: fix page_mkclean_one (was: 2.6.19 file content corruption on ext3)

2006-12-21 Thread Gordon Farquharson

On 12/21/06, Andrew Morton <[EMAIL PROTECTED]> wrote:


> Can the call to task_io_account_cancelled_write() simply be removed
> from cancel_dirty_page() for testing the patch with 2.6.19 (since
> 2.6.19 doesn't seem to have the task I/O accounting) ?

Yes.


I tested 2.6.19 with a version of Linus's patch that applies cleanly
to 2.6.19 (patch appended to the end of this email) on ARM and apt-get
failed. It did not segfault this time, but instead got stuck for about
20 to 30 minutes and was accessing the hard drive frequently.

Here is some background about the problem we see with apt which may
help somebody with knowledge of the apt source code analyse the
problem in the context of the patch. When apt-get is first run, it
generates pkgcache.bin and srcpkgcache.bin in /var/cache/apt. We have
found that these are the files that get corrupted when we apply the
patch "mm: tracking shared dirty pages" [1] to 2.6.18. The corruption
of these files is what causes apt-get to segfault. I have observed
that the normal operation of apt-get is that while apt-get is
generating these files, pkgcache.bin grows to 12582912 bytes, and when
apt-get finishes, pkgcache.bin is 6425533 bytes and srcpkgcache.bin is
64254483 bytes. This time, when apt-get exited, it had only created
pkgcache.bin which was still 12582912 bytes. Also, the patch caused
apt to slow down a lot. I ran apt-get -f install after apt had exited,
and it took so long that I killed it before it had finished.

I did not try 2.6.20-git, but I presume that this version is what
Martin tried earlier. Maybe Linus's patch doesn't work with 2.6.19,
because 2.6.19 is missing some other patch.

Gordon

[1] 
http://www2.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=d08b3851da41d0ee60851f2c75b118e1f7a5fc89

diff -Naupr linux-2.6.19.orig/fs/buffer.c linux-2.6.19/fs/buffer.c
--- linux-2.6.19.orig/fs/buffer.c   2006-11-29 14:57:37.0 -0700
+++ linux-2.6.19/fs/buffer.c2006-12-21 01:16:31.0 -0700
@@ -2832,7 +2832,7 @@ int try_to_free_buffers(struct page *pag
   int ret = 0;

   BUG_ON(!PageLocked(page));
-   if (PageWriteback(page))
+   if (PageDirty(page) || PageWriteback(page))
   return 0;

   if (mapping == NULL) {  /* can this still happen? */
@@ -2843,17 +2843,6 @@ int try_to_free_buffers(struct page *pag
   spin_lock(>private_lock);
   ret = drop_buffers(page, _to_free);
   spin_unlock(>private_lock);
-   if (ret) {
-   /*
-* If the filesystem writes its buffers by hand (eg ext3)
-* then we can have clean buffers against a dirty page.  We
-* clean the page here; otherwise later reattachment of buffers
-* could encounter a non-uptodate page, which is unresolvable.
-* This only applies in the rare case where try_to_free_buffers
-* succeeds but the page is not freed.
-*/
-   clear_page_dirty(page);
-   }
out:
   if (buffers_to_free) {
   struct buffer_head *bh = buffers_to_free;
diff -Naupr linux-2.6.19.orig/fs/hugetlbfs/inode.c
linux-2.6.19/fs/hugetlbfs/inode.c
--- linux-2.6.19.orig/fs/hugetlbfs/inode.c  2006-11-29
14:57:37.0 -0700
+++ linux-2.6.19/fs/hugetlbfs/inode.c   2006-12-21 01:15:21.0 -0700
@@ -176,7 +176,7 @@ static int hugetlbfs_commit_write(struct

 static void truncate_huge_page(struct page *page)
{
-   clear_page_dirty(page);
+   cancel_dirty_page(page, /* No IO accounting for huge pages? */0);
   ClearPageUptodate(page);
   remove_from_page_cache(page);
   put_page(page);
diff -Naupr linux-2.6.19.orig/include/linux/page-flags.h
linux-2.6.19/include/linux/page-flags.h
--- linux-2.6.19.orig/include/linux/page-flags.h2006-11-29
14:57:37.0 -0700
+++ linux-2.6.19/include/linux/page-flags.h 2006-12-21
01:15:21.0 -0700
@@ -253,15 +253,11 @@ static inline void SetPageUptodate(struc

 struct page;   /* forward declaration */

-int test_clear_page_dirty(struct page *page);
+extern void cancel_dirty_page(struct page *page, unsigned int account_size);
+
 int test_clear_page_writeback(struct page *page);
 int test_set_page_writeback(struct page *page);

-static inline void clear_page_dirty(struct page *page)
-{
-   test_clear_page_dirty(page);
-}
-
 static inline void set_page_writeback(struct page *page)
{
   test_set_page_writeback(page);
diff -Naupr linux-2.6.19.orig/mm/memory.c linux-2.6.19/mm/memory.c
--- linux-2.6.19.orig/mm/memory.c   2006-11-29 14:57:37.0 -0700
+++ linux-2.6.19/mm/memory.c2006-12-21 01:15:21.0 -0700
@@ -1832,6 +1832,33 @@ void unmap_mapping_range(struct address_
}
EXPORT_SYMBOL(unmap_mapping_range);

+static void check_last_page(struct address_space *mapping, loff_t size)
+{
+   pgoff_t index;
+   unsigned int offset;
+   struct page *page;
+
+   if (!mapping)
+ 

[BUG][PATCH] fix oom killer kills current every time if there is memory-less-node

2006-12-21 Thread KAMEZAWA Hiroyuki
constrained_alloc(), which is called to detect where oom is from,
checks passed zone_list().
If zone_list includes all nodes, it thinks oom is from mempolicy.

But there is memory-less-node. contstrained_alloc() should get
memory_less_node into count. Otherwise, current process will die
at any time. This patch fix it.

Signed-Off-By: KAMEZAWA Hiroyuki <[EMAIL PROTECTED]>



 mm/oom_kill.c |7 ++-
 1 files changed, 6 insertions(+), 1 deletion(-)

Index: devel-2.6.20-rc1-mm1/mm/oom_kill.c
===
--- devel-2.6.20-rc1-mm1.orig/mm/oom_kill.c 2006-12-16 13:47:59.0 
+0900
+++ devel-2.6.20-rc1-mm1/mm/oom_kill.c  2006-12-22 12:11:55.0 +0900
@@ -174,7 +174,12 @@
 {
 #ifdef CONFIG_NUMA
struct zone **z;
-   nodemask_t nodes = node_online_map;
+   nodemask_t nodes;
+   int node;
+   /* node has memory ? */
+   for_each_online_node(node)
+   if (NODE_DATA(node)->node_present_pages)
+   node_set(node, nodes);
 
for (z = zonelist->zones; *z; z++)
if (cpuset_zone_allowed_softwall(*z, gfp_mask))

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


PATA_SIS and SIS 5513

2006-12-21 Thread Ioan Ionita

pata_sis will not work with my CD-ROM

dmesg output when trying to mount a cd-rom:

ata2.00: qc timeout (cmd 0xa0)
ata2.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x2 frozen
ata2.00: (BMDMA stat 0x24)
ata2.00: cmd a0/01:00:00:00:00/00:00:00:00:00/a0 tag 0 cdb 0x28 data 4096 in
res 51/51:03:00:00:00/00:00:00:00:00/a0 Emask 0x5 (timeout)
ata2: port is slow to respond, please be patient (Status 0xd0)
ata2: port failed to respond (30 secs, Status 0xd0)
ata2: soft resetting port
ata2.00: configured for UDMA/33
ata2: EH complete
ata2.00: qc timeout (cmd 0xa0)
ata2.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x2 frozen
ata2.00: (BMDMA stat 0x24)
ata2.00: cmd a0/01:00:00:00:00/00:00:00:00:00/a0 tag 0 cdb 0x28 data 4096 in
res 51/51:03:00:00:00/00:00:00:00:00/a0 Emask 0x5 (timeout)
ata2: port is slow to respond, please be patient (Status 0xd0)
ata2: port failed to respond (30 secs, Status 0xd0)
ata2: soft resetting port
ata2.00: configured for UDMA/33
ata2: EH complete
ata2.00: qc timeout (cmd 0xa0)
ata2.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x2 frozen
ata2.00: (BMDMA stat 0x24)
ata2.00: cmd a0/01:00:00:00:00/00:00:00:00:00/a0 tag 0 cdb 0x28 data 4096 in
res 51/51:03:00:00:00/00:00:00:00:00/a0 Emask 0x5 (timeout)
[EMAIL PROTECTED] 21:52:39 ~/
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] rcu: rcutorture suspend fix

2006-12-21 Thread Paul E. McKenney
On Fri, Dec 22, 2006 at 01:08:13AM +0100, Ingo Molnar wrote:
> Subject: [patch] rcu: rcutorture suspend fix
> From: Ingo Molnar <[EMAIL PROTECTED]>
> 
> fix suspend hang: rcutorture threads need to be nofreeze.

Looks straightforward enough -- I take it that rcutorture continues
upon resume?  So I have to ask...  Would it make sense to simply unload
the rcutorture module upon suspend?

But either way this is an improvement, so...

Acked-by: Paul E. McKenney <[EMAIL PROTECTED]>
> Signed-off-by: Ingo Molnar <[EMAIL PROTECTED]>
> ---
>  kernel/rcutorture.c |3 +++
>  1 file changed, 3 insertions(+)
> 
> Index: linux/kernel/rcutorture.c
> ===
> --- linux.orig/kernel/rcutorture.c
> +++ linux/kernel/rcutorture.c
> @@ -522,6 +522,7 @@ rcu_torture_writer(void *arg)
> 
>   VERBOSE_PRINTK_STRING("rcu_torture_writer task started");
>   set_user_nice(current, 19);
> + current->flags |= PF_NOFREEZE;
> 
>   do {
>   schedule_timeout_uninterruptible(1);
> @@ -561,6 +562,7 @@ rcu_torture_fakewriter(void *arg)
> 
>   VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started");
>   set_user_nice(current, 19);
> + current->flags |= PF_NOFREEZE;
> 
>   do {
>   schedule_timeout_uninterruptible(1 + rcu_random()%10);
> @@ -591,6 +593,7 @@ rcu_torture_reader(void *arg)
> 
>   VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
>   set_user_nice(current, 19);
> + current->flags |= PF_NOFREEZE;
> 
>   do {
>   idx = cur_ops->readlock();
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [powerpc] Fix bogus BUG_ON() in in hugetlb_get_unmapped_area()

2006-12-21 Thread David Gibson
On Fri, Dec 22, 2006 at 01:31:26AM +0100, Segher Boessenkool wrote:
> > +   if (len > TASK_SIZE)
> > +   return -ENOMEM;
> 
> Shouldn't that be addr+len instead?  The check looks incomplete
> otherwise.  And you meant ">=" I guess?

No.  Have a look at the other hugetlb_get_unmapped_area()
implementations.  Because this is in the get_unmapped_area() path,
'addr' is just a hint, so checking addr+len would give bogus
failures.  This test is, I believe, essentially an optimization - if
it fails, we're never going to find a suitable addr, so we might as
well give up now.

> > -   /* Paranoia, caller should have dealt with this */
> > -   BUG_ON((addr + len) > 0x1UL);
> > -
> 
> Any real reason to remove the paranoia check?  If it's trivially
> always satisfied, the compiler will get rid of it for you :-)

Yes - this is the very bug on which was causing crashes - the "caller
should have dealt with this" comment is wrong.  The test has been
moved into htlb_check_hinted_area() and now simply fails (and so falls
back to searching for a suitable address), rather than BUG()ing.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [-mm patch] ptrace: make {put,get}reg work again for gs and fs

2006-12-21 Thread Jeremy Fitzhardinge
Frederik Deweerdt wrote:
> This is a -mm1 kernel + your efl_offset fix + the attached patch.
> So the problem came from putreg still saving %gs to the stack where
> there's no slot for it, whereas getreg got things right.
>   

That patch looks good, but I think it is already effectively in Andrew's
queue, because I noticed some problems in there when I reviewed  the
convert-to-%fs patch.

J
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Network drivers that don't suspend on interface down

2006-12-21 Thread Matt Domsch
On Thu, Dec 21, 2006 at 01:27:55PM -0500, [EMAIL PROTECTED] wrote:
> On Wed, 20 Dec 2006 22:06:51 EST, Dan Williams said:
> > It's also complicated because some switches are supposed to rfkill both
> > an 802.11 module _and_ a bluetooth module at the same time, or I guess
> > some laptops may even have one rfkill switch for each wireless device.
> 
> On my Dell D820, it's bios-selectable if the switch is enabled, or if
> it controls just the 802.11 card, or 802.11 and bluetooth, or just bluetooth,
> or 802.11 and mobile broadband, or ...
> 
> This way lies madness. :)
> 
> (Oddest part - said bios config screen offers the choices for bluetooth
> and mobile broadband even though the hardware config doesn't include it. ;)

In this case changing the UI based on presence (and thus the printed
docs etc.) winds up being difficult.  Think of it as an embedded
advertisement - you too could have bluetooth and mobile broadband... :-)

-Matt

-- 
Matt Domsch
Software Architect
Dell Linux Solutions linux.dell.com & www.dell.com/linux
Linux on Dell mailing lists @ http://lists.us.dell.com
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Patch 2/12] IPMI: remove interface number limits

2006-12-21 Thread Paul E. McKenney
On Fri, Dec 01, 2006 at 10:24:22PM -0600, Corey Minyard wrote:
> 
> This patch removes the arbitrary limit of number of IPMI interfaces.
> This has been tested with 8 interfaces.

I got a bit lost in this patch, so applied it to 2.6.19-rc6 and looked
over the resulting file.  Some of the issues predate this patch, which
I guess goes to show that I have not been paying enough attention.
I do not claim to be an IPMI expert, so may well be missing something.

In general, though, good application of RCU -- SMIs and locks don't
get along so well.  ;-)

All that said, here are my thoughts.  The first two issues (marked with
"!")  seem to need the most urgent attention.

Thanx, Paul

drivers/char/ipmi/ipmi_msghandler.c:

!   clean_up_interface_data(), line 394: What prevents an RCU reader
from finding the raw list_head "list" and interpreting stack
garbage as a semi-valid ipmi_smi_t?  (The list_add_rcu() adds
an unadorned struct list_head to the list.)

Unless the RCU readers do something clever to reject this
entry, should instead link an ipmi_smi_t into the list to
avoid confusing the readers.  See below, at least some
RCU readers are not being clever.

!   clean_up_interface_data(), line 395: If RCU readers expect to
terminate their list traversal by finding the header, they
could well be severely disappointed when the list_del_rcu()
removes it...  The header being removed is the cmd_rcvrs
field of the ipmi_smi_t.

o   ipmi_destroy_user() line 880 does such a scan, but
under the cmd_rcvrs_mutex, so OK.  Doesn't really
need the _rcu suffix here, but doesn't hurt to have it.

o   find_cmd_rcvr() line 992 does such a scan.  It is
invoked as follows:

o   Under cmd_rcvrs_mutex near line 1064 in
ipmi_unregister_for_cmd().

!   Under rcu_read_lock() near line 2686 in
handle_ipmb_get_msg_cmd(), called from
handle_new_recv_msg(), in turn called from:

o   ipmi_smi_msg_received() line 3287.
This may be called externally, so
cannot hold the cmd_rcvrs_mutex.

o   ipmi_timeout_handler() line 3428,
called from ipmi_timeout() near line
3493.  This is called from the
timeout system (setup_timer() and
mod_timer()), so cannot hold the
lock.

This loop also references a number of fields
outside of the list_head, so is a problem
for the addition of the raw struct list_head.

o   is_cmd_rcvr_exclusive() at line 1007.  This is called
from ipmi_register_for_cmd(), but under cmd_rcvrs_mutex,
so OK as is.

o   ipmi_register_smi() line 2494 initializes a newly
allocated structure, so not yet accessible to readers.

One way to fix this would be to leave the next pointer referencing
the old list header, so that readers would find their way home.
This would require waiting for a grace period after making this
change, for example (untested, probably broken, but hopefully gets
the idea across):

if (list_empty(>cmd_rcvrs))
INIT_LIST_HEAD();
else {
list->next = intf->cmd_rcvrs->next;
list->prev = intf->cmd_rcvrs->prev;
intf->cmd_rcvrs->next = >cmd_rcvrs;
intf->cmd_rcvrs->prev = >cmd_rcvrs;

/* List body still points to intf->cmd_rcvrs. */

synchronize_rcu();

/* All readers have exited list body. */

list->next->prev = 
list->prev->next = 
}

/*
 * Note that we -don't- need the synchronize_rcu()
 * currently following the mutex_unlock().
 */

If this sort of thing happens often, we should make a
list_privatize_rcu(global_list, local_receiving_list) or some such.

!   ipmi_register_smi() near line 2504: how does "i" get assigned
to intf->intf_num before the struct is visible to RCU
readers?  Or why doesn't it have to be?  It is initialized
to -1, so maybe that helps...

OK, I see the assignment at line 2561 near the end of
ipmi_register_smi() -- it "turns on" the new ipmi_smi_t structure.
But what keeps 

Re: my handy-dandy, "coding style" script

2006-12-21 Thread David Rientjes
On Fri, 22 Dec 2006, Jan Engelhardt wrote:

> >These casts can eliminate "return value unused" warnings.
> 
> But only when functions are tagged __must_check, and sprintf is not. 
> cmpxchg is one where (void) is 'needed', __as I wrote__ in a cxgb3 
> comment.
> 

gcc requires functions to be declared with the attribute 
warn_unused_result if a warning should be emitted in these cases.  So 
casting sprintf or any function without warn_unused_result to (void) is 
only visual noise within the source code.  Thus, the patch is correct.

David
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] add i386 idle notifier (take 3)

2006-12-21 Thread Adrian Bunk
On Thu, Dec 21, 2006 at 01:12:42AM -0800, Stephane Eranian wrote:
> Andrew,
> 
> On Wed, Dec 20, 2006 at 09:05:14PM -0800, Andrew Morton wrote:
> > On Wed, 20 Dec 2006 06:05:00 -0800
> > Stephane Eranian <[EMAIL PROTECTED]> wrote:
> > 
> > > Hello,
> > > 
> > > Here is the latest version of the idle notifier for i386.
> > > This patch is against 2.6.20-rc1 (GIT). In this kernel, the idle
> > > loop code was modified such that the lowest level idle
> > > routines do not have loops anymore (e.g., poll_idle). As such,
> > > we do not need to call enter_idle() in all the interrupt handlers.
> > > 
> > > This patch also duplicates the x86-64 bug fix for a race condition
> > > as posted by Venkatesh Pallipadi from Intel.
> > > 
> > > changelog:
> > >   - add idle notification mechanism to i386
> > > 
> > 
> > None of the above text is actually usable as a changelog entry.  We are
> > left wondering:
> > 
> > - why is this patch needed?
> > 
> > - what does it do?
> > 
> > - how does it do it?
> > 
> > The three questions which all changelogs should answer ;)
> 
> Sorry about that. Here is a new changelog:
> 
> changelog:
>   - add a notifier mechanism to the low level idle loop. You can
> register a callback function which gets invoked on entry and exit
> from the low level idle loop. The low level idle loop is defined as
> the polling loop, low-power call, or the mwait instruction. Interrupts
> processed by the idle thread are not considered part of the low level
> loop. The notifier can be used to measure precisely how much is spent
> in useless execution (or low power mode). The perfmon subsystem uses 
> it
> to turn on/off monitoring.


Why is this patch not submitted as part of the perfmon patch that also 
adds a user of this code?

And why does it bloat the kernel with EXPORT_SYMBOL's although even your 
perfmon-new-base-061204 doesn't seem to add any modular user?


> -Stephane

cu
Adrian

-- 

   "Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   "Only a promise," Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Network drivers that don't suspend on interface down

2006-12-21 Thread Herbert Xu
Matthew Garrett <[EMAIL PROTECTED]> wrote:
> 
> In terms of what I've seen on vaguely modern hardware, I'd guess at 
> e1000 and sky2 as the top ones. b44 is still common in cheaper hardware, 
> with via-rhine appearing at the very low end. I'll try to grep through 
> our hardware database results to get a stronger idea about percentages.

The Sony laptop that I bought a year ago still has an e100 chipset so
that's probably worth fixing too.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] mm: export cancel_dirty_page()

2006-12-21 Thread Linus Torvalds


On Fri, 22 Dec 2006, Ingo Molnar wrote:
> 
> ah, indeed - but i dont see a fundamental reason why hugetlbfs is not 
> modular. Nevertheless exporting this makes sense. My quick hack below to 
> guess to convert reiserfs (just to make the rpm build) also needs it.

Yes, it should be exported regardless.

Hoiwever, I'm not sure your reiserfs change is valid: why was that old 
code testing "PAGE_SIZE == bh->b_size"?

(Not that I see why the _old_ code would be valid either, and why you'd 
ever care about b_size being PAGE_SIZE, but I'm just wondering..)

Linus
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


"Please report the result to linux-kernel to fix this permanently"

2006-12-21 Thread Diego Calleja
There's a bug in the bugzilla (http://bugzilla.kernel.org/show_bug.cgi?id=7531) 
that
is asking to be reported here. The full dmesg (with and without 
'pci=assign-busses')
can be found in the link.


[17179574.14] Boot video device is :01:05.0
[17179574.14] PCI: Transparent bridge - :00:14.4
[17179574.14] PCI: Bus #06 (-#09) is hidden behind transparent bridge #05 
(-#05) (try 'pci=assign-busses')
[17179574.14] Please report the result to linux-kernel to fix this 
permanently
[17179574.14] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT]
[17179574.144000] ACPI: PCI Interrupt Link [LNKA] (IRQs 10 11) *0, disabled.
[17179574.144000] ACPI: PCI Interrupt Link [LNKB] (IRQs 10 11) *0, disabled.
[17179574.144000] ACPI: PCI Interrupt Link [LNKC] (IRQs 10 11) *0, disabled.
[17179574.144000] ACPI: PCI Interrupt Link [LNKD] (IRQs 10 11) *0, disabled.
[17179574.144000] ACPI: PCI Interrupt Link [LNKE] (IRQs 10 11) *0, disabled.
[17179574.144000] ACPI: PCI Interrupt Link [LNKF] (IRQs 10 11) *0, disabled.
[17179574.144000] ACPI: PCI Interrupt Link [LNKG] (IRQs 10 11) *0, disabled.
[17179574.144000] ACPI: PCI Interrupt Link [LNKH] (IRQs 10 11) *0, disabled.
[17179574.144000] ACPI: Embedded Controller [EC0] (gpe 24) interrupt mode.
[17179574.148000] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.P2P_._PRT]
[17179574.148000] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0.AGP_._PRT]

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: fuse, get_user_pages, flush_anon_page, aliasing caches and all that again

2006-12-21 Thread Randolph Chung

I understand now.  I'm not sure how the PARISC implementation can be
correct in this light.


According to cachetlb.txt:

  void flush_anon_page(struct page *page, unsigned long vmaddr)
When the kernel needs to access the contents of an anonymous
page, it calls this function (currently only
get_user_pages()).  Note: flush_dcache_page() deliberately
doesn't work for an anonymous page.  The default
implementation is a nop (and should remain so for all coherent
architectures).  For incoherent architectures, it should flush
the cache of the page at vmaddr in the current user process.
   

Is the documentation wrong?

randolph
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Binary Drivers

2006-12-21 Thread Valdis . Kletnieks
On Thu, 21 Dec 2006 16:12:57 CST, Scott Preece said:
> On 12/21/06, David Schwartz <[EMAIL PROTECTED]> wrote:

> > How would you feel if you bought a car and then discovered that the
> > manufacturer had welded the hood shut? How many people still do their own
> > oil changes anyway?
> ---
> 
> But there is no legal or moral obligation for the carmake to sell you
> the service manual for the vehicle or provide you with their periodic
> service bulletins...

As a matter of fact, at least in the US, the carmakers *do* have to supply
relevant information for emissions-control systems to alll repair shops:

42 U.S.C. § 7521(m)(5)

http://www.law.cornell.edu/uscode/html/uscode42/usc_sec_42_7521000-.html

Efforts to vastly expand that have been surfacing every Congressional session
for the last few years.  The most recent incarnation:

http://www.govtrack.us/congress/bill.xpd?bill=h109-2048



pgpfvdEkEqYV5.pgp
Description: PGP signature


Re: [powerpc] Fix bogus BUG_ON() in in hugetlb_get_unmapped_area()

2006-12-21 Thread Segher Boessenkool

+   if (len > TASK_SIZE)
+   return -ENOMEM;


Shouldn't that be addr+len instead?  The check looks incomplete
otherwise.  And you meant ">=" I guess?


-   /* Paranoia, caller should have dealt with this */
-   BUG_ON((addr + len) > 0x1UL);
-


Any real reason to remove the paranoia check?  If it's trivially
always satisfied, the compiler will get rid of it for you :-)

Cheers,


Segher

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


v2.6.20-rc1-rt3, yum/rpm

2006-12-21 Thread Ingo Molnar
i have released the 2.6.20-rc1-rt3 tree, which can be downloaded from 
the usual place:

  http://redhat.com/~mingo/realtime-preempt/

more info about the -rt patchset can be found on the RT wiki:

  http://rt.wiki.kernel.org

this is a rebase of -rt to v2.6.20, plus lots of fixes all around the 
place. Given that v2.6.20 is a stabilization release, i rebased the -rt 
tree earlier than usual, and it has worked out so far and is converging 
pretty fast. Changes since 2.6.19-rt6:

 - scheduling latency fixes on SMP systems

 - various high-res timers and dynticks fixes

 - all hardirq and softirq threads now default to SCHED_FIFO:50, use 
   rtirq, set_kthread_prio or raw chrt to tune them. Note: the naming of 
   IRQ threads has changed to "IRQ-123", from the "IRQ 123" naming, to 
   make it easier to script them.

 - NUMA/slab-rt fixes

 - tracer fixes

 - merge the latest ARM patches

 - lockless pagecache patchset from Nick Pigging, ported to -rt by Peter
   Zijlstra

 - files_lock scalability patchset from Peter Zijlstra

 - more /proc/lockdep dependency info from Jason Baron

 - latest lockdep fixes

 - latest e1000 fixes

 - /proc/timer_list for timer info and enhanced /proc/timer_stats 
   support via CONFIG_TIMER_STATS.

 - fixed bzImage boot hang when FUNCTION_TRACING/mcount enabled.

 - lots of other fixes i forgot about :)

 - merge to v2.6.20-rc1

 - merge to latest -git after v2.6.20-rc1

to build a 2.6.20-rc1-rt3 tree, the following patches should be applied:

  http://kernel.org/pub/linux/kernel/v2.6/linux-2.6.19.tar.bz2
  http://kernel.org/pub/linux/kernel/v2.6/testing/patch-2.6.20-rc1.bz2
  http://redhat.com/~mingo/realtime-preempt/patch-2.6.20-rc1-rt3

the -rt YUM repository for Fedora Core 6 and 5, for architectures x86_64 
and i686 can be activated via:

   cd /etc/yum.repos.d
   wget http://people.redhat.com/~mingo/realtime-preempt/rt.repo

   yum install kernel-rt.x86_64   # on x86_64
   yum install kernel-rt  # on i686

   yum update kernel-rt   # refresh - or enable yum-updatesd

(note: it will take 15-30 minutes from now on for the yum repository to 
be updated to -rt6)

as usual, bugreports, fixes and suggestions are welcome,

Ingo
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Linux disk performance.

2006-12-21 Thread Bhanu Kalyan Chetlapalli

On 12/20/06, Manish Regmi <[EMAIL PROTECTED]> wrote:

On 12/19/06, Nick Piggin <[EMAIL PROTECTED]> wrote:
> When you submit a request to an empty block device queue, it can
> get "plugged" for a number of timer ticks before any IO is actually
> started. This is done for efficiency reasons and is independent of
> the IO scheduler used.
>

Thanks for the information..

> Use the noop IO scheduler, as well as the attached patch, and let's
> see what your numbers look like.
>

Unfortunately i got the same results even after applying your patch. I
also tried putting
q->unplug_delay = 1;
But it did not work. The result was similar.


I am assuming that your program is not seeking inbetween writes.

Try disabling the Disk Cache, now-a-days some disks can have as much
as 8MB write cache. so the disk might be buffering as much as it can,
and trying to write only when it can no longer buffer. Since you have
an app which continously write copious amounts of data, in order,
disabling write cache might make some sense.


--
---
regards
Manish Regmi


Bhanu

---
UNIX without a C Compiler is like eating Spaghetti with your mouth
sewn shut. It just doesn't make sense.

--
Kernelnewbies: Help each other learn about the Linux kernel.
Archive:   http://mail.nl.linux.org/kernelnewbies/
FAQ:   http://kernelnewbies.org/faq/





--
There is only one success - to be able to spend your life in your own way.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[patch] rcu: rcutorture suspend fix

2006-12-21 Thread Ingo Molnar
Subject: [patch] rcu: rcutorture suspend fix
From: Ingo Molnar <[EMAIL PROTECTED]>

fix suspend hang: rcutorture threads need to be nofreeze.

Signed-off-by: Ingo Molnar <[EMAIL PROTECTED]>
---
 kernel/rcutorture.c |3 +++
 1 file changed, 3 insertions(+)

Index: linux/kernel/rcutorture.c
===
--- linux.orig/kernel/rcutorture.c
+++ linux/kernel/rcutorture.c
@@ -522,6 +522,7 @@ rcu_torture_writer(void *arg)
 
VERBOSE_PRINTK_STRING("rcu_torture_writer task started");
set_user_nice(current, 19);
+   current->flags |= PF_NOFREEZE;
 
do {
schedule_timeout_uninterruptible(1);
@@ -561,6 +562,7 @@ rcu_torture_fakewriter(void *arg)
 
VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started");
set_user_nice(current, 19);
+   current->flags |= PF_NOFREEZE;
 
do {
schedule_timeout_uninterruptible(1 + rcu_random()%10);
@@ -591,6 +593,7 @@ rcu_torture_reader(void *arg)
 
VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
set_user_nice(current, 19);
+   current->flags |= PF_NOFREEZE;
 
do {
idx = cur_ops->readlock();
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] change WARN_ON back to "BUG: at ..."

2006-12-21 Thread Ingo Molnar

* Jeremy Fitzhardinge <[EMAIL PROTECTED]> wrote:

> What's the intent of WARN_ON?  Presumably its different from BUG_ON, 
> otherwise you could just use BUG_ON.  Or if not, why not just have 
> BUG_ON?  I think in practice many WARN_ONs are clearly not intended to 
> be as serious as BUG_ON: [...]

you are quite wrong. For example i cannot remember the last time i added 
a BUG_ON() to the core kernel, because a BUG_ON() in core code only 
makes it harder to get the log output back to the developer! Often the 
system is still alive enough to get the information out but crashing it 
via BUG_ON() hides the messages . So for example i exclusively use 
WARN_ON() in core kernel code.

> [...] they warn about unimplemented things, transient hiccups, 
> clarifications of errno returns, etc. [...]

Your claims defy reality: i just checked the 200+ WARN_ON()s that are in 
linux/*/*.c, and /none/ is a 'transient' failure or hickup or 
'clarification'. Each one i checked signals a real kernel bug that i'd 
not want a production system to have. Non-fatal messages should and do 
get a normal KERN_ printk.

an no, i dont want to do a large-scale rename in either direction. Let 
it be up to the developer whether he wants to crash the kernel upon 
seeing a bug or not. But one thing is sure: a WARN_ON() is a kernel bug 
just as much as a BUG_ON(), in 99% of the cases.

here's the history of these primitives: BUG() used to be the only 
primitive back in the days, then came BUG_ON() and iirc i was the one 
who added WARN_ON() years ago, as a mechanism to signal kernel bugs in a 
less destructive way. And that's how it's used in the kernel. If you 
disagree and still understand it to be different, that's really your 
problem i think ...

Ingo
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: my handy-dandy, "coding style" script

2006-12-21 Thread Jan Engelhardt

On Dec 21 2006 15:40, Joe Perches wrote:
>On Fri, 2006-12-22 at 00:29 +0100, Jan Engelhardt wrote:
>> On Dec 21 2006 14:53, Joe Perches wrote:
>> >On Thu, 2006-12-21 at 21:52 +0100, Jan Engelhardt wrote:
>> >> http://lkml.org/lkml/2006/9/30/208
>> >@@ -1302,7 +1302,7 @@ static int acpi_battery_add(struct acpi_
>> >battery->init_state = 1;
>> >}
>> >-   (void)sprintf(dir_name, ACPI_BATTERY_DIR_NAME, id);
>> >+   sprintf(dir_name, ACPI_BATTERY_DIR_NAME, id);
>> >These casts can eliminate "return value unused" warnings.
>> 
>> But only when functions are tagged __must_check, and sprintf is not.
>
>or where -Wall is used.

00:50 takeshi:/dev/shm > cat bla.c
#include 
int main(void) {
char foo[42];
sprintf(foo, "bar");
return 0;
}
00:52 takeshi:/dev/shm > cc bla.c -Wall
(no warnings)

In case you were talking about kernel code, the same (i.e. no warnings) 
happens:

00:54 takeshi:/dev/shm > make -C /lib/modules/2.6.18.5-jen40b-default/build 
M=$PWD
make: Entering directory `/usr/src/linux-2.6.18.5-jen40b-obj/i386/default'
make -C ../../../linux-2.6.18.5-jen40b 
O=../linux-2.6.18.5-jen40b-obj/i386/default
  CC [M]  /dev/shm/bla2.o
  Building modules, stage 2.
  MODPOST
  LD [M]  /dev/shm/bla2.ko
make: Leaving directory `/usr/src/linux-2.6.18.5-jen40b-obj/i386/default'
00:54 takeshi:/dev/shm > cat Makefile
EXTRA_CFLAGS += -Wall
obj-m += bla2.o




-`J'
-- 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: newbie questions about while (1) in kernel mode and spinlocks

2006-12-21 Thread Steven Rostedt

On Thu, 2006-12-21 at 10:41 +0100, Sorin Manolache wrote:
> Dear list,
> 
> I am in the process of learning how to write linux device drivers.
> 
> I have a 2.6.16.5 kernel running on a monoprocessor machine.

We usually call that a Uniprocessor or just UP.

> #CONFIG_SMP is not set
> CONFIG_DEBUG_SPINLOCK=y.
> CONFIG_PREEMPT=y
> CONFIG_PREEMPT_BKL=y
> 
> First question:
> 
> I wrote
> 
> while (1)
> ;
> 
> in the read function of a test device driver. I expect the calling
> process to freeze, and then a timer interrupt to preempt the kernel
> and to schedule another process. This does not happen, the whole
> system freezes. I see no effect from pressing keys or moving the
> mouse. Why? The hardware interrupts are not disabled, are they? Why do
> the interrupt handlers not get executed?

Matters what you have in that code. Are you sure interrupts are not
handled? The can be, and you just don't notice, because the programs
that get affected by the interrupts are not able to run.

I don't know what your read function looks like, or how you got there,
but a while(1) would only slow the system down quite a bit. It shouldn't
lock it up. (/me goes to try it out on a dummy driver after writing
this).

> 
> Second question:
> 
> I wrote
> 
> spin_lock();
> down(); /* I know that one shouldn't sleep when holding a lock */
> /* but I want to understand why */

Well a spin_lock is just that. It spins.  What happens if you schedule,
and the next process that goes to run tries to grab that same spin_lock.
It spins, thinking the lock holder is on another CPU and it will be
released shortly. But then, the other CPU (assuming a 2x system) has a
process that tries to grab this same spin_lock, now the system is truely
dead locked. All CPUS are spinning waiting for the non-running process
to let go of the spin lock.

> spin_unlock();
> 
> in the read function and
> 
> up()
> 
> in the write function. The semaphore is initially locked, so the first
> process invoking down will sleep.
> 
> I invoke
> 
> cat /dev/test
> 
> and the process sleeps on the semaphore. Then I invoke
> 
> echo 1 > /dev/test
> 
> and I wake up the "cat" process.
> 
> Then I intend to invoke _two_ cat processes. I expect the first one to
> sleep on the semaphore and the second on to spin at the spin_lock.
> Then I expect to wake up the first process by invoking an echo, the
> first process to release the lock and the second process to sleep on
> the semaphore. What I get is that the system freezes as soon as I
> invoke the second "cat" process. Again, no effect from key presses or
> mouse movements. Why? Shouldn't the timer interrupt preempt the second
> "cat" process that spins on the spinlock and give control to something
> else, for example to the console where I could wake up the first "cat"
> process? Why do I not see any effect from mouse movements? Hardware
> interrupts are not disabled, are they?

A spin_lock will not preempt. So if you are doing this on a UP system, a
spin lock will only be a preempt disable (with CONFIG_PREEMPT=y).
There's no need for spin_locks for UP.

I'd have to take a look at the actual code to explain exactly what mess
you are making for yourself ;)

> 
> Third question:
> 
> The Linux Device Drivers book says that a spin_lock should not be
> shared between a process and an interrupt handler. The explanation is
> that the process may hold the lock, an interrupt occurs, the interrupt
> handler spins on the lock held by the process and the system freezes.
> Why should it freeze? Isn't it possible for the interrupt handler to
> re-enable interrupts as its first thing, then to spin at the lock, the
> timer interrupt to preempt the interrupt handler and to relinquish
> control to the process which in turn will finish its critical section
> and release the lock, making way for the interrupt handler to
> continue.

I believe that Paolo explained this, but I can go into more details if
you need.

> 
> Thank you very much for clarifying these issues.

No prob.

-- Steve

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Finding hardlinks

2006-12-21 Thread Mikulas Patocka

On Thu, 21 Dec 2006, Jan Harkes wrote:


On Wed, Dec 20, 2006 at 12:44:42PM +0100, Miklos Szeredi wrote:

The stat64.st_ino field is 64bit, so AFAICS you'd only need to extend
the kstat.ino field to 64bit and fix those filesystems to fill in
kstat correctly.


Coda actually uses 128-bit file identifiers internally, so 64-bits
really doesn't cut it. Since the 128-bit space is used pretty sparsely
there is a hash which avoids most collistions in 32-bit i_ino space, but
not completely. I can also imagine that at some point someone wants to
implement a git-based filesystem where it would be more natural to use
160-bit SHA1 hashes as unique object identifiers.

But Coda only allow hardlinks within a single directory and if someone
renames a hardlinked file and one of the names ends up in a different
directory we implicitly create a copy of the object. This actually
leverages off of the way we handle volume snapshots and the fact that we
use whole file caching and writes, so we only copy the metadata while
the data is 'copy-on-write'.


The problem is that if inode number collision happens occasionally, you 
get data corruption with cp -a command --- it will just copy one file and 
hardlink the other.



Any application that tries to be smart enough to keep track of which
files are hardlinked should (in my opinion) also have a way to disable
this behaviour.


If user (or script) doesn't specify that flag, it doesn't help. I think 
the best solution for these filesystems would be either to add new syscall

int is_hardlink(char *filename1, char *filename2)
(but I know adding syscall bloat may be objectionable)
or add new field in statvfs ST_HAS_BROKEN_INO_T, that applications can 
test and disable hardlink processing.


Mikulas


Jan


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] [DISCUSS] Make the variable NULL after freeing it.

2006-12-21 Thread Amit Choudhary
Hi,

Was just wondering if the _var_ in kfree(_var_) could be set to NULL after its 
freed. It may solve
the problem of accessing some freed memory as the kernel will crash since _var_ 
was set to NULL.

Does this make sense? If yes, then how about renaming kfree to something else 
and providing a
kfree macro that would do the following:

#define kfree(x) do { \
  new_kfree(x); \
  x = NULL; \
} while(0)

There might be other better ways too.

Regards,
Amit


__
Do You Yahoo!?
Tired of spam?  Yahoo! Mail has the best spam protection around 
http://mail.yahoo.com 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Binary Drivers

2006-12-21 Thread Martin Mares
Hello!

> I disagree. The manufacturer has a right to choose to sell its devices
> under any legal business model. Part of that model is deciding what
> level of support to provide and what systems to support in selling it.

At the very least, if they decide that they wish to provide a binary-only
driver for i386, then their claims that they support Linux (without telling
that they in fact support a single specific variant of Linux) are
(a) blatant lie, and (b) false advertising.

Have a nice fortnight
-- 
Martin `MJ' Mares  <[EMAIL PROTECTED]>   
http://mj.ucw.cz/   
Faculty of Math and Physics, Charles University, Prague, Czech Rep., Earth
COBOL -- Compiles Only Because Of Luck
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] mm: export cancel_dirty_page()

2006-12-21 Thread Ingo Molnar

* Adrian Bunk <[EMAIL PROTECTED]> wrote:

> On Fri, Dec 22, 2006 at 12:13:28AM +0100, Ingo Molnar wrote:
> > From: Ingo Molnar <[EMAIL PROTECTED]>
> > Subject: [patch] export cancel_dirty_page()
> > 
> > export cancel_dirty_page() - it's used by hugetlbfs which can be 
> > modular. (This makes my -git based kernel yum repository build again.)
> >...
> 
> No, it can't be:
> 
> config HUGETLBFS
> bool "HugeTLB file system support"
> 

ah, indeed - but i dont see a fundamental reason why hugetlbfs is not 
modular. Nevertheless exporting this makes sense. My quick hack below to 
guess to convert reiserfs (just to make the rpm build) also needs it.

Ingo

Index: linux/fs/reiserfs/stree.c
===
--- linux.orig/fs/reiserfs/stree.c
+++ linux/fs/reiserfs/stree.c
@@ -1439,6 +1439,8 @@ static void unmap_buffers(struct page *p
 
if (page) {
if (page_has_buffers(page)) {
+   cancel_dirty_page(page, PAGE_CACHE_SIZE);
+
tail_index = pos & (PAGE_CACHE_SIZE - 1);
cur_index = 0;
head = page_buffers(page);
@@ -1458,9 +1460,6 @@ static void unmap_buffers(struct page *p
}
bh = next;
} while (bh != head);
-   if (PAGE_SIZE == bh->b_size) {
-   clear_page_dirty(page);
-   }
}
}
 }
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


bcm43xx from 2.6.20-rc1-mm1 on HPC nx6325 (x86_64)

2006-12-21 Thread Rafael J. Wysocki
Hi,

I'm trying to make the bcm43xx driver out of the 2.6.20-rc1-mm1 kernel work on
an HPC nx6325, with no luck, so far, although I'm using a firmware that has
been reported to work with these boxes
(http://gentoo-wiki.com/HARDWARE_Gentoo_on_HP_Compaq_nx6325#Onboard_Wireless_.28802.11.29).

The driver loads and seems to operate the hardware to some extent, but there
seems to be a problem with interrupts.  Namely, the chip doesn't seem to
generate any.

Right after a fresh 'modprobe bcm43xx' I get the following messages in dmesg:

bcm43xx driver
ACPI: PCI Interrupt :30:00.0[A] -> GSI 18 (level, low) -> IRQ 18
PCI: Setting latency timer of device :30:00.0 to 64
bcm43xx: Chip ID 0x4311, rev 0x1
bcm43xx: Number of cores: 4
bcm43xx: Core 0: ID 0x800, rev 0x11, vendor 0x4243
bcm43xx: Core 1: ID 0x812, rev 0xa, vendor 0x4243
bcm43xx: Core 2: ID 0x817, rev 0x3, vendor 0x4243
bcm43xx: Core 3: ID 0x820, rev 0x1, vendor 0x4243
bcm43xx: PHY connected
bcm43xx: Detected PHY: Version: 4, Type 2, Revision 8
bcm43xx: Detected Radio: ID: 2205017f (Manuf: 17f Ver: 2050 Rev: 2)
bcm43xx: Radio turned off
bcm43xx: Radio turned off
PM: Adding info for No Bus:eth1
printk: 3 messages suppressed.
SoftMAC: ASSERTION FAILED (0) at: 
net/ieee80211/softmac/ieee80211softmac_wx.c:306:ieee80211softmac_wx_get_rate()

but, strangely enough, eth1 does not appear, but eth2 appears instead:

# ifconfig eth1 up
eth1: unknown interface: No such device
# ifconfig eth2 up
#

Now there are lots of

SoftMAC: ASSERTION FAILED (0) at: 
net/ieee80211/softmac/ieee80211softmac_wx.c:306:ieee80211softmac_wx_get_rate()

messages in dmesg followed by

bcm43xx: PHY connected
PM: Adding info for No Bus::30:00.0
PM: Removing info for No Bus::30:00.0
PM: Adding info for No Bus::30:00.0
PM: Removing info for No Bus::30:00.0
PM: Adding info for No Bus::30:00.0
PM: Removing info for No Bus::30:00.0
PM: Adding info for No Bus::30:00.0
PM: Removing info for No Bus::30:00.0
bcm43xx: Microcode rev 0x122, pl 0x98 (2004-11-16  07:21:20)
bcm43xx: Radio turned on
bcm43xx: Chip initialized
bcm43xx: 32-bit DMA initialized
bcm43xx: Keys cleared
bcm43xx: Selected 802.11 core (phytype 2)
PM: Adding info for No Bus:hw_random
ADDRCONF(NETDEV_UP): eth2: link is not ready

Now, if I run iwconfig on it, I get

eth2  IEEE 802.11b/g  ESSID:off/any  Nickname:"Broadcom 4311"
  Mode:Managed  Frequency=2.437 GHz  Access Point: Invalid
  Bit Rate=1 Mb/s   Tx-Power=18 dBm
  RTS thr:off   Fragment thr:off
  Encryption key:off
  Link Quality=0/100  Signal level=-256 dBm  Noise level=-256 dBm
  Rx invalid nwid:0  Rx invalid crypt:0  Rx invalid frag:0
  Tx excessive retries:0  Invalid misc:0   Missed beacon:0

and 'iwlist eth2 scan' says 'eth2  No scan results', although a working
access point is standing next to the box and the following line appears in
dmesg:

SoftMAC: Scanning finished: scanned 14 channels starting with channel 1

_But_ if I do 'cat /proc/interrupts' now, I get:

   CPU0   CPU1
  0:1247596  0-edge  timer
  1:   3939   1170   IO-APIC-edge  i8042
  8:  0  0   IO-APIC-edge  rtc
 12:150170   IO-APIC-edge  i8042
 14:  38129   6047   IO-APIC-edge  ide0
 16:  99585  18389   IO-APIC-fasteoi   libata, HDA Intel
 18:  0  0   IO-APIC-fasteoi   bcm43xx
 19:  48003   9582   IO-APIC-fasteoi   ohci_hcd:usb1, ohci_hcd:usb2, 
ehci_hcd:usb3
 20:  0  3   IO-APIC-fasteoi   yenta, tifm_7xx1, ohci1394, 
sdhci:slot0
 21:  11522   2467   IO-APIC-fasteoi   acpi
 23:  68971  11663   IO-APIC-fasteoi   eth0
NMI:  0  0
LOC:12476621247039
ERR: 10

so apparently there's something wrong.

Greetings,
Rafael


-- 
If you don't have the time to read,
you don't have the time or the tools to write.
- Stephen King
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: my handy-dandy, "coding style" script

2006-12-21 Thread Jan Engelhardt

On Dec 21 2006 14:53, Joe Perches wrote:
>On Thu, 2006-12-21 at 21:52 +0100, Jan Engelhardt wrote:
>> http://lkml.org/lkml/2006/9/30/208
>
>@@ -1302,7 +1302,7 @@ static int acpi_battery_add(struct acpi_
>   battery->init_state = 1;
>   }
> 
>-  (void)sprintf(dir_name, ACPI_BATTERY_DIR_NAME, id);
>+  sprintf(dir_name, ACPI_BATTERY_DIR_NAME, id);
>
>These casts can eliminate "return value unused" warnings.

But only when functions are tagged __must_check, and sprintf is not. 
cmpxchg is one where (void) is 'needed', __as I wrote__ in a cxgb3 
comment.

After applying this patch, there are no additional warnings:

00:19 ichi:/erk/kernel/linux-2.6.20-rc1 > make drivers/acpi/sbs.o
  CHK include/linux/version.h
  CHK include/linux/utsrelease.h
  CC [M]  drivers/acpi/sbs.o
00:21 ichi:/erk/kernel/linux-2.6.20-rc1 > grep MUST .config
CONFIG_ENABLE_MUST_CHECK=y

akpm, please include.

---

Remove some unnecessary (void) casts.

Signed-off-by: Jan Engelhardt <[EMAIL PROTECTED]>

Index: linux-2.6.20-rc1/drivers/acpi/sbs.c
===
--- linux-2.6.20-rc1.orig/drivers/acpi/sbs.c
+++ linux-2.6.20-rc1/drivers/acpi/sbs.c
@@ -1160,14 +1160,14 @@ acpi_battery_write_alarm(struct file *fi
if (result) {
ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
  "acpi_battery_set_alarm() failed\n"));
-   (void)acpi_battery_set_alarm(battery, old_alarm);
+   acpi_battery_set_alarm(battery, old_alarm);
goto end;
}
result = acpi_battery_get_alarm(battery);
if (result) {
ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
  "acpi_battery_get_alarm() failed\n"));
-   (void)acpi_battery_set_alarm(battery, old_alarm);
+   acpi_battery_set_alarm(battery, old_alarm);
goto end;
}
 
@@ -1302,7 +1302,7 @@ static int acpi_battery_add(struct acpi_
battery->init_state = 1;
}
 
-   (void)sprintf(dir_name, ACPI_BATTERY_DIR_NAME, id);
+   sprintf(dir_name, ACPI_BATTERY_DIR_NAME, id);
 
result = acpi_sbs_generic_add_fs(>battery_entry,
 acpi_battery_dir,
@@ -1485,7 +1485,7 @@ static int acpi_sbs_update_run(struct ac
}
 
if (old_battery_present != new_battery_present) {
-   (void)sprintf(dir_name, ACPI_BATTERY_DIR_NAME, id);
+   sprintf(dir_name, ACPI_BATTERY_DIR_NAME, id);
result = acpi_sbs_generate_event(sbs->device,
 
ACPI_SBS_BATTERY_NOTIFY_STATUS,
 new_battery_present,
@@ -1498,7 +1498,7 @@ static int acpi_sbs_update_run(struct ac
}
}
if (old_remaining_capacity != 
battery->state.remaining_capacity) {
-   (void)sprintf(dir_name, ACPI_BATTERY_DIR_NAME, id);
+   sprintf(dir_name, ACPI_BATTERY_DIR_NAME, id);
result = acpi_sbs_generate_event(sbs->device,
 
ACPI_SBS_BATTERY_NOTIFY_STATUS,
 new_battery_present,


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch] mm: export cancel_dirty_page()

2006-12-21 Thread Adrian Bunk
On Fri, Dec 22, 2006 at 12:13:28AM +0100, Ingo Molnar wrote:
> From: Ingo Molnar <[EMAIL PROTECTED]>
> Subject: [patch] export cancel_dirty_page()
> 
> export cancel_dirty_page() - it's used by hugetlbfs which can be 
> modular. (This makes my -git based kernel yum repository build again.)
>...

No, it can't be:

config HUGETLBFS
bool "HugeTLB file system support"


cu
Adrian

-- 

   "Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   "Only a promise," Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[patch] mm: export cancel_dirty_page()

2006-12-21 Thread Ingo Molnar
From: Ingo Molnar <[EMAIL PROTECTED]>
Subject: [patch] export cancel_dirty_page()

export cancel_dirty_page() - it's used by hugetlbfs which can be 
modular. (This makes my -git based kernel yum repository build again.)

Signed-off-by: Ingo Molnar <[EMAIL PROTECTED]>

Index: linux/mm/truncate.c
===
--- linux.orig/mm/truncate.c
+++ linux/mm/truncate.c
@@ -65,6 +65,7 @@ void cancel_dirty_page(struct page *page
task_io_account_cancelled_write(account_size);
}
 }
+EXPORT_SYMBOL(cancel_dirty_page);
 
 /*
  * If truncate cannot remove the fs-private metadata from the page, the page
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Sparc64 kernel oops (caused by bad scsi disk?)

2006-12-21 Thread Andrew Walrond
Installing a bad disk in a Sun D1000 (JBOD 12 disk scsi 2 array)
attached to a Sun E4500 (8proc, 8Gb ram) running a 64bit sparc64
2.6.18.3 SMP kernel causes this or similar oops when accessing the bad
disk:

sda: Current: sense key: Recovered Error
Additional sense: Address mark not found for data field
Info fld=0xa8d
sda: Current: sense key: Recovered Error
Additional sense: Write error - recovered with auto reallocation
Info fld=0xa98
sda: Current: sense key: Recovered Error
Additional sense: Address mark not found for data field
Info fld=0xad6
sda: Current: sense key: Recovered Error
Additional sense: Recovered data with retries
Info fld=0xaff
sda: Current: sense key: Recovered Error
Additional sense: Recovered data with retries
Info fld=0xb0f
eth5: Auto-Negotiation unsuccessful, trying force link mode
sda: Current: sense key: Recovered Error
Additional sense: Recovered data with negative head offset
Info fld=0xb2b
sda: Current: sense key: Recovered Error
Additional sense: Recovered data with retries
Info fld=0xb52
sda: Current: sense key: Recovered Error
Additional sense: Recovered data with retries
Info fld=0xb68
sda: Current: sense key: Recovered Error
Additional sense: Recovered data with retries
Info fld=0xbba
eth5: Link down, cable problem?
Unable to handle kernel NULL pointer dereference
tsk->{mm,active_mm}->context = 05a4
tsk->{mm,active_mm}->pgd = f80113976000
  \|/  \|/
  "@'/ .. \`@"
  /_| \__/ |_\
 \__U_/
swapper(0): Oops [#1]
TSTATE: 80f09607 TPC: 0052b51c TNPC: 0052b520 Y: 
Not tainted
TPC: 
g0: f80003d03660 g1: 11010080 g2:  g3: 
f801feaf
g4: 0072d280 g5: f8000350c000 g6: 00729280 g7: 
0050
o0: 00c0 o1: f801feaf1d60 o2:  o3: 
07fe0150e360
o4: 00c0 o5: f30bcf28 sp: 0072c3d1 ret_pc: 
005d7e24
RPC: 
l0: f801feaf1d40 l1: 0076 l2:  l3: 
0076
l4:  l5:  l6: f80004a74140 l7: 
0001
i0:  i1: f80004faa4c0 i2: 0072cf90 i3: 

i4:  i5: f80003d6b660 i6: 0072c491 i7: 
0046d20c
I7: 
Caller[0046d20c]: handle_IRQ_event+0x38/0x78
Caller[0046d308]: __do_IRQ+0xbc/0x13c
Caller[0041bec8]: handler_irq+0x7c/0x94
Caller[004108b4]: tl0_irq5+0x1c/0x20
Caller[004180e4]: cpu_idle+0x2c/0xa4
Caller[007ce6c0]: start_kernel+0x28c/0x294
Caller[004045d8]: setup_trap_table+0x0/0x100
Caller[]: 0x8
Instruction DUMP: da5a6000  c25a6008  8ea1e010  92026008  c272400b  
186a  92026008  808aa008
Kernel panic - not syncing: Aiee, killing interrupt handler!
 <0>Press Stop-A (L1-A) to return to the boot prom


Let me know if I can do anything to help chase this down.

Andrew Walrond

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/4] New firewire stack - updated patches

2006-12-21 Thread Stefan Richter
Kristian Høgsberg wrote:
> Here's a new set of patches for the new firewire stack.
...
> It is still work in progress, but at least now it should work across
> all architectures and endianesses.

Committed to linux1394-2.6.git.

BTW, I prepended "ieee1394:" to the titles of most of the commits to
this tree. From now on I will always do this on commits affecting
mainline's FireWire stack, and prepend "firewire:" to the titles of
commits affecting the JUJU stack. It's redundant but IMO helpful when
reading changelogs.
-- 
Stefan Richter
-=-=-==- ==-- =-=-=
http://arcgraph.de/sr/
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH 2/8] rqbased-dm: add block layer hook

2006-12-21 Thread business1



Kiyoshi Ueda wrote:
> 
> Hi Jens,
> 
> Sorry for the less explanation.
> 
> On Wed, 20 Dec 2006 14:49:24 +0100, Jens Axboe <[EMAIL PROTECTED]>
> wrote:
>> On Tue, Dec 19 2006, Kiyoshi Ueda wrote:
>> > This patch adds new "end_io_first" hook in __end_that_request_first()
>> > for request-based device-mapper.
>> 
>> What's this for, lack of stacking?
> 
> I don't understand the meaning of "lack of stacking" well but
> I guess that it means "Is the existing hook in end_that_request_last()
> not enough?"  If so, the answer is no.
> (If the geuss is wrong, please let me know.)
> 
> The new hook is needed for error handling in dm.
> For example, when an error occurred on a request, dm-multipath
> wants to try another path before returning EIO to application.
> Without the new hook, at the point of end_that_request_last(),
> the bios are already finished with error and can't be retried.
> 
> Thanks,
> Kiyoshi Ueda
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 
>   Look at me im boolin  
> http://www.thebusinesssuccessgroup.com/Real-Estate-Investment-training.html
-- 
View this message in context: 
http://www.nabble.com/-RFC-PATCH-2-8--rqbased-dm%3A-add-block-layer-hook-tf2848786.html#a8016586
Sent from the linux-kernel mailing list archive at Nabble.com.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH 2/8] rqbased-dm: add block layer hook

2006-12-21 Thread business1



Jens Axboe-5 wrote:
> 
> On Tue, Dec 19 2006, Kiyoshi Ueda wrote:
>> This patch adds new "end_io_first" hook in __end_that_request_first()
>> for request-based device-mapper.
> 
> What's this for, lack of stacking?
> 
> -- 
> Jens Axboe look at this it will halp
> http://www.thebusinesssuccessgroup.com/Real-Estate-Investment-training.html
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 
> 

-- 
View this message in context: 
http://www.nabble.com/-RFC-PATCH-2-8--rqbased-dm%3A-add-block-layer-hook-tf2848786.html#a8016555
Sent from the linux-kernel mailing list archive at Nabble.com.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH 2/8] rqbased-dm: add block layer hook

2006-12-21 Thread business1



Kiyoshi Ueda wrote:
> 
> Hi Jens,
> 
> On Thu, 21 Dec 2006 08:49:47 +0100, Jens Axboe <[EMAIL PROTECTED]>
> wrote:
>> > The new hook is needed for error handling in dm.
>> > For example, when an error occurred on a request, dm-multipath
>> > wants to try another path before returning EIO to application.
>> > Without the new hook, at the point of end_that_request_last(),
>> > the bios are already finished with error and can't be retried.
>> 
>> Ok, I see what you are getting at. The current ->end_io() is called when
>> the request has fully completed, you want notification for each chunk
>> potentially completed.
>> 
>> I think a better design here would be to use ->end_io() as the full
>> completion handler, similar to how bio->bi_end_io() works. A request
>> originating from __make_request() would set something ala:
>> 
>> int fs_end_io(struct request *rq, int error, unsigned int nr_bytes)
>> {
>> if (!__end_that_request_first(rq, err, nr_bytes)) {
>> end_that_request_last(rq, error);
>> return 0;
>> }
>> 
>> return 1;
>> }
>> 
>> and normal io completion from a driver would use a helper:
>> 
>> int blk_complete_io(struct request *rq, int error, unsigned int nr_bytes)
>> {
>> return rq->end_io(rq, error, nr_bytes);
>> }
>> 
>> instead of calling the functions manually. That would allow you to get
>> notification right at the beginning and do what you need, without adding
>> a special hook for this.
> 
> I'm not confident about what you mean.
> Something like this?
>   - __make_request() sets fs_end_io() to req->end_io()
>   - The driver calls blk_complete_io()
>* if it succeeds, the request is done
>* if it fails, the request is not completed
>  and the driver needs retry or something
>   - Current users of req->end_io() have to update/rewrite thier end_io.
>   - Features like mine will set its own end_io.
> It checks error and decides whether calling fs_end_io() or not.
> 
> Depending on drivers, there are some functions called between
> __end_that_request_first() and end_that_request_last().
> For example:
>   - add_disk_randomness()
>   - blk_queue_end_tag()
>   - floppy_off()
> So they might prevent such generalization.
> 
> 
> In addition to the suggested approach, what do you think about
> adding a new flag to req->cmd_flags which lets the end_io() handler
> not to return bio to upper layer?
> It will be useful for multipathing and can be done even within
> the current __end_that_request_first().
> For example,
> 
> static int __end_that_request_first()
> {
>   .
>   error = 0;
>   if (end_io_error(uptodate))
>   error = !uptodate ? -EIO : uptodate;
>   .
>   if (error && (req->cmd_flags & "NEW_FLAG"))
>   return 0; /* Tell the driver to call end_that_request_last() */
> 
>   total_types = bio_nbytes = 0;
>   while ((bio = req->bio) != NULL) {
>   . /* process of finishing bios */
>   }
>   .
> }
> 
> Thanks,
> Kiyoshi Ueda
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 
>   
> http://www.thebusinesssuccessgroup.com/Real-Estate-Investment-training.html

-- 
View this message in context: 
http://www.nabble.com/-RFC-PATCH-2-8--rqbased-dm%3A-add-block-layer-hook-tf2848786.html#a8016546
Sent from the linux-kernel mailing list archive at Nabble.com.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH 2/8] rqbased-dm: add block layer hook

2006-12-21 Thread business1



Kiyoshi Ueda wrote:
> 
> This patch adds new "end_io_first" hook in __end_that_request_first()
> for request-based device-mapper.
> 
> 
> Signed-off-by: Kiyoshi Ueda <[EMAIL PROTECTED]>
> Signed-off-by: Jun'ichi Nomura <[EMAIL PROTECTED]>
> 
> diff -rupN 1-blk-get-request-irqrestore/block/ll_rw_blk.c
> 2-add-generic-hook/block/ll_rw_blk.c
> --- 1-blk-get-request-irqrestore/block/ll_rw_blk.c2006-12-15
> 10:21:29.0 -0500
> +++ 2-add-generic-hook/block/ll_rw_blk.c  2006-12-15 10:23:30.0
> -0500
> @@ -260,6 +260,7 @@ static void rq_init(request_queue_t *q, 
>   rq->data = NULL;
>   rq->nr_phys_segments = 0;
>   rq->sense = NULL;
> + rq->end_io_first = NULL;
>   rq->end_io = NULL;
>   rq->end_io_data = NULL;
>   rq->completion_data = NULL;
> @@ -3216,6 +3217,22 @@ static int __end_that_request_first(stru
>  
>   blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
>  
> + if (!uptodate) {
> + if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))
> + printk("end_request: I/O error, dev %s, sector %llu\n",
> + req->rq_disk ? req->rq_disk->disk_name : "?",
> + (unsigned long long)req->sector);
> + }
> +
> + if (blk_fs_request(req) && req->rq_disk) {
> + const int rw = rq_data_dir(req);
> +
> + disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);
> + }
> +
> + if (req->end_io_first)
> + return req->end_io_first(req, uptodate, nr_bytes);
> +
>   /*
>* extend uptodate bool to allow < 0 value to be direct io error
>*/
> @@ -3230,19 +3247,6 @@ static int __end_that_request_first(stru
>   if (!blk_pc_request(req))
>   req->errors = 0;
>  
> - if (!uptodate) {
> - if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))
> - printk("end_request: I/O error, dev %s, sector %llu\n",
> - req->rq_disk ? req->rq_disk->disk_name : "?",
> - (unsigned long long)req->sector);
> - }
> -
> - if (blk_fs_request(req) && req->rq_disk) {
> - const int rw = rq_data_dir(req);
> -
> - disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);
> - }
> -
>   total_bytes = bio_nbytes = 0;
>   while ((bio = req->bio) != NULL) {
>   int nbytes;
> diff -rupN 1-blk-get-request-irqrestore/include/linux/blkdev.h
> 2-add-generic-hook/include/linux/blkdev.h
> --- 1-blk-get-request-irqrestore/include/linux/blkdev.h   2006-12-11
> 14:32:53.0 -0500
> +++ 2-add-generic-hook/include/linux/blkdev.h 2006-12-15
> 10:23:30.0 -0500
> @@ -126,6 +126,7 @@ void copy_io_context(struct io_context *
>  void swap_io_context(struct io_context **ioc1, struct io_context **ioc2);
>  
>  struct request;
> +typedef int (rq_end_first_fn)(struct request *, int, int);
>  typedef void (rq_end_io_fn)(struct request *, int);
>  
>  struct request_list {
> @@ -312,6 +313,7 @@ struct request {
>   /*
>* completion callback.
>*/
> + rq_end_first_fn *end_io_first;
>   rq_end_io_fn *end_io;
>   void *end_io_data;
>  };
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 
> 

-- 
View this message in context: 
http://www.nabble.com/-RFC-PATCH-2-8--rqbased-dm%3A-add-block-layer-hook-tf2848786.html#a8016520
Sent from the linux-kernel mailing list archive at Nabble.com.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[powerpc] Fix bogus BUG_ON() in in hugetlb_get_unmapped_area()

2006-12-21 Thread David Gibson
Andrew, Paulus, please apply

The powerpc specific version of hugetlb_get_unmapped_area() makes some
unwarranted assumptions about what checks have been made to its
parameters by its callers.  This will lead to a BUG_ON() if a 32-bit
process attempts to make a hugepage mapping which extends above
TASK_SIZE (4GB).

I'm not sure if these assumptions came about because they were valid
with earlier versions of the get_unmapped_area() path, or if it was
always broken.  Nonetheless this patch fixes the logic, and removes
the crash.

Signed-off-by: David Gibson <[EMAIL PROTECTED]>

Index: working-2.6/arch/powerpc/mm/hugetlbpage.c
===
--- working-2.6.orig/arch/powerpc/mm/hugetlbpage.c  2006-12-21 
14:54:15.0 +1100
+++ working-2.6/arch/powerpc/mm/hugetlbpage.c   2006-12-21 14:57:35.0 
+1100
@@ -744,7 +744,8 @@ static int htlb_check_hinted_area(unsign
struct vm_area_struct *vma;
 
vma = find_vma(current->mm, addr);
-   if (!vma || ((addr + len) <= vma->vm_start))
+   if (TASK_SIZE - len >= addr &&
+   (!vma || ((addr + len) <= vma->vm_start)))
return 0;
 
return -ENOMEM;
@@ -815,6 +816,8 @@ unsigned long hugetlb_get_unmapped_area(
return -EINVAL;
if (len & ~HPAGE_MASK)
return -EINVAL;
+   if (len > TASK_SIZE)
+   return -ENOMEM;
 
if (!cpu_has_feature(CPU_FTR_16M_PAGE))
return -EINVAL;
@@ -823,9 +826,6 @@ unsigned long hugetlb_get_unmapped_area(
BUG_ON((addr + len)  < addr);
 
if (test_thread_flag(TIF_32BIT)) {
-   /* Paranoia, caller should have dealt with this */
-   BUG_ON((addr + len) > 0x1UL);
-
curareas = current->mm->context.low_htlb_areas;
 
/* First see if we can use the hint address */

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Binary Drivers

2006-12-21 Thread Dave Neuer

On 12/21/06, Tomas Carnecky <[EMAIL PROTECTED]> wrote:

James Porter wrote:
> I'm pretty sure Linus has decided, basically he said the patches to
> prevent non-gpl binary drivers are not going into his tree unless every
> other tree adopts it. Of course the few supporting won't get off their
> high horse and try it on a different tree.

.. unfortunately, that doesn't make the legal status any clearer.


Well, FWIW, neither does some "decision" from the kernel authors; it
hinges on what is and what is not a derived work of the kernel, and
the only parties whose opinion matters here (the courts in the various
jurisdictions) haven't ruled on that yet, and won't until such time as
a copyright holder in the kernel sues someone for copyright
infringement.

Dave
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH 2/8] rqbased-dm: add block layer hook

2006-12-21 Thread Kiyoshi Ueda
Hi Jens,

On Thu, 21 Dec 2006 08:49:47 +0100, Jens Axboe <[EMAIL PROTECTED]> wrote:
> > The new hook is needed for error handling in dm.
> > For example, when an error occurred on a request, dm-multipath
> > wants to try another path before returning EIO to application.
> > Without the new hook, at the point of end_that_request_last(),
> > the bios are already finished with error and can't be retried.
> 
> Ok, I see what you are getting at. The current ->end_io() is called when
> the request has fully completed, you want notification for each chunk
> potentially completed.
> 
> I think a better design here would be to use ->end_io() as the full
> completion handler, similar to how bio->bi_end_io() works. A request
> originating from __make_request() would set something ala:
> 
> int fs_end_io(struct request *rq, int error, unsigned int nr_bytes)
> {
> if (!__end_that_request_first(rq, err, nr_bytes)) {
> end_that_request_last(rq, error);
> return 0;
> }
> 
> return 1;
> }
> 
> and normal io completion from a driver would use a helper:
> 
> int blk_complete_io(struct request *rq, int error, unsigned int nr_bytes)
> {
> return rq->end_io(rq, error, nr_bytes);
> }
> 
> instead of calling the functions manually. That would allow you to get
> notification right at the beginning and do what you need, without adding
> a special hook for this.

I'm not confident about what you mean.
Something like this?
  - __make_request() sets fs_end_io() to req->end_io()
  - The driver calls blk_complete_io()
   * if it succeeds, the request is done
   * if it fails, the request is not completed
 and the driver needs retry or something
  - Current users of req->end_io() have to update/rewrite thier end_io.
  - Features like mine will set its own end_io.
It checks error and decides whether calling fs_end_io() or not.

Depending on drivers, there are some functions called between
__end_that_request_first() and end_that_request_last().
For example:
  - add_disk_randomness()
  - blk_queue_end_tag()
  - floppy_off()
So they might prevent such generalization.


In addition to the suggested approach, what do you think about
adding a new flag to req->cmd_flags which lets the end_io() handler
not to return bio to upper layer?
It will be useful for multipathing and can be done even within
the current __end_that_request_first().
For example,

static int __end_that_request_first()
{
.
error = 0;
if (end_io_error(uptodate))
error = !uptodate ? -EIO : uptodate;
.
if (error && (req->cmd_flags & "NEW_FLAG"))
return 0; /* Tell the driver to call end_that_request_last() */

total_types = bio_nbytes = 0;
while ((bio = req->bio) != NULL) {
. /* process of finishing bios */
}
.
}

Thanks,
Kiyoshi Ueda

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [dm-devel] Re: [RFC PATCH 1/8] rqbased-dm: allow blk_get_request() to be called from interrupt context

2006-12-21 Thread Mike Christie
Jens Axboe wrote:
> On Thu, Dec 21 2006, Mike Christie wrote:
>> Jens Axboe wrote:
>>> On Thu, Dec 21 2006, Mike Christie wrote:
 Mike Christie wrote:
> Jens Axboe wrote:
>> On Thu, Dec 21 2006, Mike Christie wrote:
>>> Or the block layer code could set up the clone too. elv_next_request
>>> could prep a clone based on the orignal request for the driver then dm
>>> would not have to worry about that part.
>> It really can't, since it doesn't know how to allocate the clone
>> request. I'd rather export this functionality as helpers.
>>
> What do you think about dm's plan to break up make_request into a
> mapping function and in to the part the builds the bio into a request.
> This would fit well with them being helpers and being able to allocate
> the request from the correct context.
>
> I see patches for that did not get posted, but I thought Joe and
> Alasdair used to talk about that a lot and in the dm code I think there
> is sill comments about doing it. Maybe the dm comments mentioned the
> merge_fn, but I guess the merge_fn did not fit what they wanted to do or
> something. I think Alasdair talked about this at one of his talks at OLS
> or it was in a proposal for the kernel summit. I can dig up the mail if
> you want.
>
 Ignore that. The problem would be that we may not want to decide which
 path to use at map time.
>>> Latter part, or both paragraphs? Dipping into ->make_request_fn() for
>>> some parts do seem to make sense to me. It'll be cheaper than at
>>> potential soft irq time (from elv_next_request()).
>>>
>> I think we got crisscrossed.
>>
>> The original idea but using your helper suggestion would have been this:
>>
>> dm->make_request_fn(bio)
>> {
>>  rq = __make_request(bio)
>>  if (this is a new request) {
>>  allocate clone from either a real device/path specific 
>> mempool() or a
>> dm q mempool
>> }
>>
>>
>> dm->prep_fn(rq)
>> {
>>  setup clone rq fields based on orig request fields.
>> }
>>
>> dm->request_fn(rq)
>> {
>>  figure out which path to use;
>>  set rq->q;
>>  send cloned rq to real device;
>> }
> 
> This'll work nicely, much better.
> 
>> The second idea based on Joe and Alasdair to break up make_request would
>> just have been a more formal break up of the dm->make_request_fn above,
>> because I thought your comment about not knowing how to allocate the
>> clone request meant that we did not know which q's mempool to take the
>> request from if we were going to take the cloned request from the real
>> device/path's mempool. I guess this does not really matter since we can
>> have just a dm q mempool of requests to use for cloned requests.
> 
> Either approach is fine with me. Note that you need to be careful with
> foreign requests on a queue, see the elevator drain logic for barriers
> and scheduler switching.
> 

What I proposed may not work so nicely as is. I remember when we tried
this before, that because __make_request lets go of the q lock, the q
can then be unplugged or it can be unplugged from __make_request if you
hit the unplug threshold so we would not be able to easily allocate a
cloned request from the dm make request callout and set it to the
request that is allocated in make_request. You have to do some surgery
to the make_request function to make this work.

Maybe your preallocted requests that are used from the request_fn is a
better idea.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Binary Drivers

2006-12-21 Thread Scott Preece

On 12/21/06, David Schwartz <[EMAIL PROTECTED]> wrote:


> You say "It's rude to not play by our rules". They say "It's rude of
> you to expect us to change our business model to support your niche
> market differently from the way we support everyone else." Neither is
> wrong...

Honestly, I think it *is* wrong to sell someone a physical product and then
not tell them how to make it work. If you're not actually selling them the
physical product but selling them a way to get a particular thing done, then
don't represent that you're selling them physical product because that would
presumably include the right to use it any way they wanted provided it was
lawful.

How would you feel if you bought a car and then discovered that the
manufacturer had welded the hood shut? How many people still do their own
oil changes anyway?

---

But there is no legal or moral obligation for the carmake to sell you
the service manual for the vehicle or provide you with their periodic
service bulletins...

---


If you sell a physical product, you should also include the information
necessary to make that physical product *work*. If you don't, you aren't
actually selling the physical product, that is, the person is buying a right
to use that physical product some particular way and not the product itself.

---

The information needed to make it work does not necessarily include
any information about how it works. A closed driver is a perfectly
valid part of the product.

Try this thought experiment: suppose the "driver" were actually
implemented by firmware loaded into the device in the factory and not
field replaceable. Do you consider that to be immoral? Why should the
technological accident of the driver being plugged into the OS change
the appropriateness?

---


The law may come around on this issue. It has definitely done so on
companies that claim to be selling you cellphones but then later claim that
you need to pay them additional money if you want the access code to unlock
it and make it work with another carrier. If you own a physical phone, it
should come with the right to use it with any carrier it can be made to work
with, and a company with no ownership interest in the phone has no right to
withhold the information needed to make it do that so as to force you to use
their service.

---

No such change has occurred. There was a very limited legal change to
say that it did not violate copyright to attempt to circumvent the
protection of the lock. There is no legal requirement that the carrier
unlock the device (at least in the US). [I personally believe that
they should be required to, but I'm only responding to your assertion
that there has been a major change on this point.]

---

The same applies when you buy a graphics card and don't want to use it with
the manufacturer's drivers. If it's *your* graphics card, the manufacturer
has no legitimate interest in forcing you to use their drivers by
withholding information about what *you* bought.

---

I disagree. The manufacturer has a right to choose to sell its devices
under any legal business model. Part of that model is deciding what
level of support to provide and what systems to support in selling it.
It's not a question of whether they "have a legitimate interest in "
doing anything - they have the complete right to choose where to spend
their development dollars. Choosing not to write technical manuals for
the public is a completely valid choice. It's your option whether to
buy or not, knowing the manufacturer's choice.

scott
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Binary Drivers

2006-12-21 Thread Scott Preece

On 12/21/06, Eric W. Biederman <[EMAIL PROTECTED]> wrote:

"Scott Preece" <[EMAIL PROTECTED]> writes:

But as it happens that driver does not work for a large segment
percentage of linux users who potentially could place the card in
their system.  Did that driver support all 23 architectures?

---

Do they claim it does? There is NO moral obligation that they support
every piece of hardware in the world. They are offering a product
under certain terms. You can choose to buy it or not. If you have
standing, and believe that their driver infringes the Linux
copyrights, then you could also sue, but the most you could hope to
win is making the driver unavailable, which makes the hardware
unavailable. That still feels like a Pyrrhic victory to me.

---

The difference is that we don't expect the hardware manufactures to do
anything we only hope they will support linux.  Once they support
linux we do expect they will play well with others and if they don't
then it is rude.

---
Not everyone agrees that it is better to not have the device available
for Linux at all than to have it with a closed driver. Again, note
that the manufacturer services all other OS platforms with closed
drivers, so you're asking them to do something different, that
probably costs them something in startup cost, and potentially costs
them something in downstream support.

---


Please none of this amoral Neither is wrong crap.

---

It's not a moral question. The hardware vendor says - "This is what we
make. You can buy it if you like and we will support it to the extent
defined in our support policy. If those terms don't work for you, or
it doesn't work with your hardware, then we're sorry; we can't help
you at this time."

scott
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [-mm patch] ptrace: make {put,get}reg work again for gs and fs

2006-12-21 Thread Frederik Deweerdt
On Thu, Dec 21, 2006 at 11:22:05AM -0800, Jeremy Fitzhardinge wrote:
> Frederik Deweerdt wrote:
> > Following the i386 pda patches, it's not possible to set gs or fs value
> > from gdb anymore. The following patch restores the old behaviour of
> > getting and setting thread.gs of thread.fs respectively.
> > Here's a gdb session *before* the patch:
> > (gdb) info reg
> > [...]
> > fs 0x33 51
> > gs 0x33 51
> > (gdb) set $fs=0x
> > (gdb) info reg
> > [...]
> > fs 0x33 51
> > gs 0x33 51
> > (gdb) set $gs=0x
> > (gdb) info reg
> > [...]
> > fs 0x   65535
> > gs 0x33 51
> >
> > Another one *after* the patch:
> > (gdb) info reg
> > [...]
> > fs 0xd8 216
> >   
> 
> This doesn't look right.  This is the kernel's %fs, not usermode's
> (which should be 0).
> 
> > gs 0x33 51
> > (gdb) set $fs=0x
> > (gdb) info reg
> > [...]
> > fs 0x   65535
> > gs 0x33 51
> > (gdb) set $gs=0x
> > (gdb) info reg
> > [...]
> > fs 0x   65535
> > gs 0x   65535
> >   
> Hm.  This shouldn't be possible since this is a bad selector, but I
> guess ptrace/gdb doesn't really know that.  If you run the target (even
> single step it), these should revert to 0.
> 
Here's a third session that looks better:

(gdb) info reg
[...]
fs 0x0  0
gs 0x33 51
(gdb) set $fs=0x
(gdb) info reg
[...]
fs 0x   65535
gs 0x33 51
(gdb) set $gs=0x
(gdb) info reg
[...]
fs 0x   65535
gs 0x   65535
(gdb) n
Single stepping until exit from function main,
which has no line number information.
Cannot find user-level thread for LWP 10751: generic error
(gdb) set $gs=0x33
(gdb) set $fs=0
(gdb) n
Single stepping until exit from function main,
which has no line number information.
0x08048c05 in __i686.get_pc_thunk.bx ()
(gdb) info reg
[...]
fs 0x0  0
gs 0x33 51

This is a -mm1 kernel + your efl_offset fix + the attached patch.
So the problem came from putreg still saving %gs to the stack where
there's no slot for it, whereas getreg got things right.

Regards,
Frederik

Signed-off-by: Frederik Deweerdt <[EMAIL PROTECTED]>


diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index a803a49..d8f44db 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -89,14 +89,14 @@ static int putreg(struct task_struct *child,
unsigned long regno, unsigned long value)
 {
switch (regno >> 2) {
-   case FS:
+   case GS:
if (value && (value & 3) != 3)
return -EIO;
-   child->thread.fs = value;
+   child->thread.gs = value;
return 0;
case DS:
case ES:
-   case GS:
+   case FS:
if (value && (value & 3) != 3)
return -EIO;
value &= 0x;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: Binary Drivers

2006-12-21 Thread Niklas Steinkamp
David wrote: 
> Honestly, I think it *is* wrong to sell someone a physical product and then
> not tell them how to make it work. If you're not actually selling them the
> physical product but selling them a way to get a particular thing done, then
> don't represent that you're selling them physical product because that would
> presumably include the right to use it any way they wanted provided it was
> lawful.
> 
> How would you feel if you bought a car and then discovered that the
> manufacturer had welded the hood shut? How many people still do their own
> oil changes anyway?

I think he is right. The linux kernel is free software and there should be no 
closed source in it.
When Windows uses closed-source drivers, it doesnt matter, but the in the 
linux kernel should be only open-source.




__
"Ein Herz für Kinder" - Ihre Spende hilft! Aktion: www.deutschlandsegelt.de
Unser Dankeschön: Ihr Name auf dem Segel der 1. deutschen America's Cup-Yacht!

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: IO-APIC + timer doesn't work

2006-12-21 Thread Eric W. Biederman
"Lu, Yinghai" <[EMAIL PROTECTED]> writes:

> -Original Message-
> From: [EMAIL PROTECTED] [mailto:[EMAIL PROTECTED] 
> Sent: Thursday, December 21, 2006 12:47 PM
> To: Lu, Yinghai
>>> +static int add_irq_entry(int type, int irqflag, int bus, int irq,
> int apic, int
>>> pin)
>
>>This is fairly sane but probably belongs in mptable.c as a helper.
>
> mparse.c?

yep.

>>I am still trying to understand this enable_8259A_irq(0) case.
>>As far as I can tell this is a very backwards way of enabling
>>an ExtINT, as such it shouldn't be used until later.
>
>>YH do you have any insight why on some Nvidia chipsets we apic 0 pin 2
> doesn't
>>work for the timer interrupt.  I thought that was what we were using in
> LinuxBIOS
>>for the mptable.
>
> CK804's has problem. But later one seems fixed that problem.

Do you have any details?

Eric
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.19-rt14 e1000 shutdown problem

2006-12-21 Thread Tim Chen

On Thu, 2006-12-21 at 21:13 +0100, Ingo Molnar wrote:
> * Tim Chen <[EMAIL PROTECTED]> wrote:
> 
> > Ingo,
> > 
> > While trying out the 2.6.19.1-rt14 kernel with a x86_64 system with 
> > Clovertown processor, it hung when it was shutting down e1000 ethernet 
> > interface running the command:
> > 
> > /sbin/ip link set dev eth0 down
> 
> does the patch below solve it for you?
> 
>   Ingo
> 

Yes, the patch took care of the problem.  Thanks.

Tim
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: fuse, get_user_pages, flush_anon_page, aliasing caches and all that again

2006-12-21 Thread Miklos Szeredi
> >
> >The root of the problem is that copy_to_user() may cause page faults
> >on the userspace buffer, and the page fault might (in case of a
> >maliciously crafted filesystem) recurse into the filesystem itself.
> 
> Would it be worthwhile to mlock the page? I know that needs root
> privs or some capability, but a static buffer could be put aside when
> fusermount is run.

And how would the kernel ensure, that the buffer supplied by userspace
is mlocked and stays mlocked during the memory copy?  I don't think
that would simplify the kerel side much, and would complicate the
userspace side considerably.

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[patch] high-res timers: fix APIC event-broadcasting code

2006-12-21 Thread Ingo Molnar
Subject: [patch] high-res timers: fix APIC event-broadcasting code
From: Ingo Molnar <[EMAIL PROTECTED]>

this patch fixes a bug in the APCI-C3-turns-off-lapic related 
event-broadcasting code: it accidentally reactivated the global tick, 
instead of the global event emulation layer.

The effect of this bug was a rare bootup hang on one of my test-laptops 
- but it could also result in other types of timer related problems (but 
not hangs in an already running system), such as imprecise high-res 
timeouts.

Debugged via SysRq-Q.

Signed-off-by: Ingo Molnar <[EMAIL PROTECTED]>
Signed-off-by: Thomas Gleixner <[EMAIL PROTECTED]>
---
 arch/i386/kernel/apic.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

Index: linux-hres-timers.q/arch/i386/kernel/apic.c
===
--- linux-hres-timers.q.orig/arch/i386/kernel/apic.c
+++ linux-hres-timers.q/arch/i386/kernel/apic.c
@@ -506,7 +506,7 @@ void switch_APIC_timer_to_ipi(void *cpum
int cpu = smp_processor_id();
 
if (cpu_isset(cpu, mask) && levt->event_handler)
-   clockevents_set_global_broadcast(levt, 1);
+   clockevents_set_broadcast(levt, 1);
 }
 EXPORT_SYMBOL_GPL(switch_APIC_timer_to_ipi);
 
@@ -517,7 +517,7 @@ void switch_ipi_to_APIC_timer(void *cpum
int cpu = smp_processor_id();
 
if (cpu_isset(cpu, mask) && levt->event_handler)
-   clockevents_set_global_broadcast(levt, 0);
+   clockevents_set_broadcast(levt, 0);
 }
 EXPORT_SYMBOL_GPL(switch_ipi_to_APIC_timer);
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: IO-APIC + timer doesn't work

2006-12-21 Thread Lu, Yinghai
-Original Message-
From: [EMAIL PROTECTED] [mailto:[EMAIL PROTECTED] 
Sent: Thursday, December 21, 2006 12:47 PM
To: Lu, Yinghai
>> +static int add_irq_entry(int type, int irqflag, int bus, int irq,
int apic, int
>> pin)

>This is fairly sane but probably belongs in mptable.c as a helper.

mparse.c?


>I am still trying to understand this enable_8259A_irq(0) case.
>As far as I can tell this is a very backwards way of enabling
>an ExtINT, as such it shouldn't be used until later.

>YH do you have any insight why on some Nvidia chipsets we apic 0 pin 2
doesn't
>work for the timer interrupt.  I thought that was what we were using in
LinuxBIOS
>for the mptable.

CK804's has problem. But later one seems fixed that problem.

YH



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bugme-new] [Bug 7724] New: asm/types.h should define __u64 if isoc99

2006-12-21 Thread Ismail Donmez
21 Ara 2006 Per 22:58 tarihinde, David Miller şunları yazmıştı: 
> From: Andrew Morton <[EMAIL PROTECTED]>
> Date: Thu, 21 Dec 2006 12:49:54 -0800
>
> > >Summary: asm/types.h should define __u64 if isoc99
>
> Platform specific bug, and has nothing to do with networking.
>
> This problem will occur with any user visible interface definition
> that uses __u64, and there are several both in and outside the
> networking.

This bug hit KDE modules (kdebase/kdemultimedia/kdetv/...) many times, I 
workarounded with #undef ing __STRICT_ANSI__ before including kernel headers 
which is well ugly but works.

> x86 and perhaps others protect the __u64 definition with:
>
>   defined(__GNUC__) && !defined(__STRICT_ANSI__)
>
> for whatever reason, probably to avoid "long long" or something like
> that.  But even that theory makes no sense.

Indeed this restriction just breaks userspace apps.

Regards,
ismail

-- 
Bir gün yolda yürüyordum... Bir şarkı duydum... Kalbim acıdı... Bu kadar...
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] Enable Elektor ISA card on SMP

2006-12-21 Thread Norbert Tretkowski
The Elektor ISA card works fine on SMP, the patch below removes the
BROKEN_ON_SMP dependency.

Norbert


--- a/drivers/i2c/busses/Kconfig2006-12-21 21:31:27.0 +0100
+++ b/drivers/i2c/busses/Kconfig2006-12-21 21:32:27.0 +0100
@@ -86,7 +86,7 @@
 
 config I2C_ELEKTOR
tristate "Elektor ISA card"
-   depends on I2C && ISA && BROKEN_ON_SMP
+   depends on I2C && ISA
select I2C_ALGOPCF
help
  This supports the PCF8584 ISA bus I2C adapter.  Say Y if you own
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] ehca: fix kthread_create() error check

2006-12-21 Thread Heiko Carstens
> Index: 2.6-mm/drivers/infiniband/hw/ehca/ehca_irq.c
> ===
> --- 2.6-mm.orig/drivers/infiniband/hw/ehca/ehca_irq.c
> +++ 2.6-mm/drivers/infiniband/hw/ehca/ehca_irq.c
> @@ -670,11 +670,13 @@ static int comp_pool_callback(struct not
>  {
>   unsigned int cpu = (unsigned long)hcpu;
>   struct ehca_cpu_comp_task *cct;
> + struct task_struct *task;
> 
>   switch (action) {
>   case CPU_UP_PREPARE:
>   ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
> - if(!create_comp_task(pool, cpu)) {
> + task = create_comp_task(pool, cpu);
> + if (IS_ERR(task)) {
>   ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
>   return NOTIFY_BAD;
>   }

If this fails then the code will crash on CPU_UP_CANCELED. Because of
kthread_bind(cct->task,...). cct->task would be just the encoded error
number.

> @@ -730,7 +732,7 @@ int ehca_create_comp_pool(void)
> 
>   for_each_online_cpu(cpu) {
>   task = create_comp_task(pool, cpu);
> - if (task) {
> + if (!IS_ERR(task)) {
>   kthread_bind(task, cpu);
>   wake_up_process(task);
>   }

So you silently ignore errors and the module loads anyway?
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Binary Drivers

2006-12-21 Thread Valdis . Kletnieks
On Thu, 21 Dec 2006 12:50:00 PST, David Schwartz said:
> How would you feel if you bought a car and then discovered that the
> manufacturer had welded the hood shut? How many people still do their own
> oil changes anyway?

I know of at least one use case where a car *has* to have the doors welded
shut - stock car racing.  And there's requirements regarding how the hood
is fastened as well...


pgpoYKEfIMlSd.pgp
Description: PGP signature


Re: Updated Kernel Hacker's guide to git

2006-12-21 Thread Nigel Cunningham
Hi.

On Thu, 2006-12-21 at 06:44 -0500, Jeff Garzik wrote:
> Nigel Cunningham wrote:
> > Hi.
> > 
> > On Wed, 2006-12-20 at 22:04 -0500, Jeff Garzik wrote:
> >> I refreshed my git intro/cookbook for kernel hackers, at 
> >> http://linux.yyz.us/git-howto.html
> >>
> >> This describes most of the commands I use in day-to-day kernel hacking. 
> >>   Let me know if there are glaring errors or missing key commands.
> > 
> > Thanks for the work! I'd suggest also saying how to repack and cleanup.
> 
> Yes, I should mention repacking.  When you say cleanup, what 
> specifically do you mean?

Oh, I was just thinking of the related commands - prune-packed,
count-objects, fsck-objects and so on. (I know repack does prune-packed
when you use -d, but it might be handy to mention it anyway... or
not :>)

> > Could also be a good idea to go through the steps for uploading to
> > master.kernel.org or elsewhere?
> 
> Yes, push should be mentioned at the very least.

Nigel

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Mutex debug lock failure [was Re: Bad gcc-4.1.0 leads to Power4 crashes... and power5 too, actually

2006-12-21 Thread Linas Vepstas
On Thu, Dec 21, 2006 at 03:41:39PM +0100, Ingo Molnar wrote:
> On Wed, 2006-12-20 at 19:03 -0600, Linas Vepstas wrote:
> > Same kernel runs fine on power5. Although it does have patches
> > applied, those very same patches boot fine when applied to a slightly
> > older kernel (2.6.19-rc4).  I haven't been messing with buids or 
> > pci config space (at least not intentionaly).
> > 
> > I'll try again with an unpatched, unmodified kernel.
> 
> there have been a number of fixes to lockdep recently - could you try
> the kernel/lockdep.c file from latest -mm, does that fail too?
> 
> one possibility would be a chain-hash collision.

I see the same problem on linux-2.6.20-rc1-mm1 

The patch below fixes this, although I don't understand why 
this has become an issue just now:

Index: linux-2.6.20-rc1-mm1/kernel/mutex.c
===
--- linux-2.6.20-rc1-mm1.orig/kernel/mutex.c2006-12-19
16:19:34.0 -0600
+++ linux-2.6.20-rc1-mm1/kernel/mutex.c 2006-12-21 14:31:33.0
-0600
@@ -249,7 +249,7 @@ __mutex_unlock_common_slowpath(atomic_t
wake_up_process(waiter->task);
}

-   debug_mutex_clear_owner(lock);
+   // debug_mutex_clear_owner(lock);

spin_unlock_mutex(>wait_lock, flags);
 }


It obvious that this is the proximal cause of the failure of 
the double_unlock_mutex() mutex self-test.  However, both
the double-unlock test, and this clear_owner() call, are 
in linux-2.6.19-git7, which doesn't fail this test. So I conclude
that __mutex_unlock_common_slowpath() is never taken in 2.6.19
but is always taken on 2.6.20-rc1 (in particular, is taken
during the double-unlock test).

I don't know why that would be. 

It might be wise to add a test to make sure the slowpath
is taken only when it should be taken? Its sort of scary 
to think that it might be always taken, and that no one 
notices the problem...

I'm gonna be out until after Christmas. -- and so, 

Merry Christmas! 
 
--linas


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: fuse, get_user_pages, flush_anon_page, aliasing caches and all that again

2006-12-21 Thread Jan Engelhardt

On Dec 21 2006 18:51, Miklos Szeredi wrote:
>
>The root of the problem is that copy_to_user() may cause page faults
>on the userspace buffer, and the page fault might (in case of a
>maliciously crafted filesystem) recurse into the filesystem itself.

Would it be worthwhile to mlock the page? I know that needs root
privs or some capability, but a static buffer could be put aside when
fusermount is run.


-`J'
-- 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bug 7505] Linux-2.6.18 fails to boot on AMD64 machine

2006-12-21 Thread Ard -kwaak- van Breemen
On Thu, Dec 21, 2006 at 04:04:04PM +0800, Zhang, Yanmin wrote:
> I couldn't reproduce it on my EM64T machine. I instrumented function 
> start_kernel and
> didn't find irq was enabled before calling init_IRQ. It'll be better if the 
> reporter could
> instrument function start_kernel to capture which function enables irq.

Editing init/main.c:
preempt_disable();
if (!irqs_disabled())
printk("start_kernel(): bug: interrupts were enabled early\n");
printk("BLAAT17");
build_all_zonelists();
if (!irqs_disabled())
printk("start_kernel(): bug: interrupts were enabled early\n");
printk("BLAAT18");
page_alloc_init();
if (!irqs_disabled())
printk("start_kernel(): bug: interrupts were enabled early\n");
printk("BLAAT19");
printk(KERN_NOTICE "Kernel command line: %s\n", saved_command_line);
parse_early_param();
if (!irqs_disabled())
printk("start_kernel(): bug: interrupts were enabled early\n");
printk("BLAAT20");
parse_args("Booting kernel", command_line, __start___param,
   __stop___param - __start___param,
   _bootoption);
printk("BLAAT21");
if (!irqs_disabled())
printk("start_kernel(): bug: interrupts were enabled early\n");
sort_main_extable();
if (!irqs_disabled())
printk("start_kernel(): bug: interrupts were enabled early\n");
printk("BLAAT22");
trap_init();
if (!irqs_disabled())
printk("start_kernel(): bug: interrupts were enabled early\n");
printk("BLAAT23");

Results in:
^MAllocating PCI resources starting at 8800 (gap: 8000:6000)
^MBLAAT12BLAAT13<6>PERCPU: Allocating 32960 bytes of per cpu data
^MBLAAT14BLAAT15BLAAT16BLAAT17Built 2 zonelists.  Total pages: 1032635
^MBLAAT18BLAAT19<5>Kernel command line: console=tty0 console=ttyS0,115200 
hdb=noprobe hdc=noprobe hdd=noprobe root=/dev/md0 ro panic=30 
earlyprintk=serial,ttyS0,115200 
^MBLAAT20<6>ide_setup: hdb=noprobe
^Mide_setup: hdc=noprobe
^Mide_setup: hdd=noprobe
^MBLAAT21start_kernel(): bug: interrupts were enabled early
^Mstart_kernel(): bug: interrupts were enabled early
^MBLAAT22Initializing CPU#0

Hmmm, that actually doesn't make sense to me (unless parse_args is able to 
enable irq's).
-- 
program signature;
begin  { telegraaf.com
} writeln("<[EMAIL PROTECTED]> TEM2");
end
.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: Binary Drivers

2006-12-21 Thread David Lang

On Thu, 21 Dec 2006, David Schwartz wrote:


You say "It's rude to not play by our rules". They say "It's rude of
you to expect us to change our business model to support your niche
market differently from the way we support everyone else." Neither is
wrong...


Honestly, I think it *is* wrong to sell someone a physical product and then
not tell them how to make it work. If you're not actually selling them the
physical product but selling them a way to get a particular thing done, then
don't represent that you're selling them physical product because that would
presumably include the right to use it any way they wanted provided it was
lawful.

How would you feel if you bought a car and then discovered that the
manufacturer had welded the hood shut? How many people still do their own
oil changes anyway?


there are cars out there where the owner cannot change or add transmission fluid 
(I had a rental car spring a leak and found this out the hard way)


some people like this, some don't. vote with your money

David Lang
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bugme-new] [Bug 7724] New: asm/types.h should define __u64 if isoc99

2006-12-21 Thread David Miller
From: Andrew Morton <[EMAIL PROTECTED]>
Date: Thu, 21 Dec 2006 12:49:54 -0800

> >Summary: asm/types.h should define __u64 if isoc99

Platform specific bug, and has nothing to do with networking.

This problem will occur with any user visible interface definition
that uses __u64, and there are several both in and outside the
networking.

x86 and perhaps others protect the __u64 definition with:

defined(__GNUC__) && !defined(__STRICT_ANSI__)

for whatever reason, probably to avoid "long long" or something like
that.  But even that theory makes no sense.

I do not make this protection on any of the sparc ports, even 32-bit
sparc, for example, so I find it really strange that x86 does this.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: my handy-dandy, "coding style" script

2006-12-21 Thread Jan Engelhardt

On Dec 20 2006 17:42, Bill Davidsen wrote:
>
> Bearing in mind that some casts may have been put in when struct
> members had other values, may be needed on some hardware but not
> others, etc. Cleanups are good, but may not be as obvious as they
> appear.
>
> Not that there's a lack of places to remove visual cruft, but
> perhaps someone could look at casts and ask if each hides a real
> type mismatch.

http://lkml.org/lkml/2006/9/30/208

As much as I would like to go through the whole kernel tree, it's a
task quite big.


-`J'
-- 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [-mm patch] ptrace: make {put,get}reg work again for gs and fs

2006-12-21 Thread Frederik Deweerdt
On Thu, Dec 21, 2006 at 11:22:05AM -0800, Jeremy Fitzhardinge wrote:
> Frederik Deweerdt wrote:
> > Following the i386 pda patches, it's not possible to set gs or fs value
> > from gdb anymore. The following patch restores the old behaviour of
> > getting and setting thread.gs of thread.fs respectively.
> > Here's a gdb session *before* the patch:
> > (gdb) info reg
> > [...]
> > fs 0x33 51
> > gs 0x33 51
> > (gdb) set $fs=0x
> > (gdb) info reg
> > [...]
> > fs 0x33 51
> > gs 0x33 51
> > (gdb) set $gs=0x
> > (gdb) info reg
> > [...]
> > fs 0x   65535
> > gs 0x33 51
> >
> > Another one *after* the patch:
> > (gdb) info reg
> > [...]
> > fs 0xd8 216
> >   
> 
> This doesn't look right.  This is the kernel's %fs, not usermode's
> (which should be 0).
> 
Right, I missed that.
> > gs 0x33 51
> > (gdb) set $fs=0x
> > (gdb) info reg
> > [...]
> > fs 0x   65535
> > gs 0x33 51
> > (gdb) set $gs=0x
> > (gdb) info reg
> > [...]
> > fs 0x   65535
> > gs 0x   65535
> >   
> Hm.  This shouldn't be possible since this is a bad selector, but I
> guess ptrace/gdb doesn't really know that.  If you run the target (even
> single step it), these should revert to 0.
I does, my point there is just that in that case gdb would stick the
0x value in the right place, which it doesn't without the patch.
> 
> > Andrew, this goes on top of 
> > ptrace-fix-efl_offset-value-according-to-i386-pda-changes.patch
> > sent by Jeremy yesterday.
> >   
> 
> Don't think this is quite right yet.  Assuming the %gs->%fs patch has
> been applied, then the target %fs should be on its stack, and target %gs
> will be in thread.gs.  I'm not sure that thread.fs has any use, but I'd
> want to double check vm86 to be sure.
I'm not sure what you mean by the '%gs->%fs patch'. Do you refer to 
convert-i386-pda-code-to-use-%fs-fixes.patch
which is in -mm1?
Or is there another one I might have missed? For the record, I'm running
-mm1 + the efl_offset patch.

Regards,
Frederik
> 
> J
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 2.6.20-rc1 4/6] PXA GPIO wrappers

2006-12-21 Thread Bill Gatliff

Guys:


Probably? What I am wondering is this: can the compiler
optimize away the range check that is duplicated in GPSR/GPCR
and  GPIO_bit for __gpio_set/get_value? Or could we optimize
this case by expanding the macros in place (which would mean
duplicating code from pxa-regs.h)...
   



Who cares?  :)

I don't think there's much point in optimizing here, since these 
functions won't be hot paths anyway.  Yes, they'll be called in 
interrupt handlers and so we don't want them to be _too_ heavy, but 
compared to the overhead of an interrupt handler, a few extra 
instructions in the GPIO access will get lost in the noise.


Inlines generally seem to be more maintainable, give you a symbol that 
you can disassemble and breakpoint, etc.  I'll take them over the macro 
implementations any day, in this case even if there's a cost of a few 
instructions.


All IMHO, of course.


b.g.

--
Bill Gatliff
[EMAIL PROTECTED]

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: Binary Drivers

2006-12-21 Thread David Schwartz

> You say "It's rude to not play by our rules". They say "It's rude of
> you to expect us to change our business model to support your niche
> market differently from the way we support everyone else." Neither is
> wrong...

Honestly, I think it *is* wrong to sell someone a physical product and then
not tell them how to make it work. If you're not actually selling them the
physical product but selling them a way to get a particular thing done, then
don't represent that you're selling them physical product because that would
presumably include the right to use it any way they wanted provided it was
lawful.

How would you feel if you bought a car and then discovered that the
manufacturer had welded the hood shut? How many people still do their own
oil changes anyway?

If you sell a physical product, you should also include the information
necessary to make that physical product *work*. If you don't, you aren't
actually selling the physical product, that is, the person is buying a right
to use that physical product some particular way and not the product itself.

The law may come around on this issue. It has definitely done so on
companies that claim to be selling you cellphones but then later claim that
you need to pay them additional money if you want the access code to unlock
it and make it work with another carrier. If you own a physical phone, it
should come with the right to use it with any carrier it can be made to work
with, and a company with no ownership interest in the phone has no right to
withhold the information needed to make it do that so as to force you to use
their service.

The same applies when you buy a graphics card and don't want to use it with
the manufacturer's drivers. If it's *your* graphics card, the manufacturer
has no legitimate interest in forcing you to use their drivers by
withholding information about what *you* bought.

DS


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: IO-APIC + timer doesn't work

2006-12-21 Thread Eric W. Biederman
"Yinghai Lu" <[EMAIL PROTECTED]> writes:

> On 12/19/06, Eric W. Biederman <[EMAIL PROTECTED]> wrote:
>> So the pin2 case should be tested right after the pin1 case as we do
>> currently.  On most new boards that will be a complete noop.
>>
>> But it is better than our current blind guess at using ExtINT mode.
>>
>> I figure after we try what the BIOS has told us about and that
>> has failed we should first try the common irq 0 apic mappings,
>> and then try the common ExtINT mappings.
>
> Please check if this one is ok.
>
> [PATCH] x86_64: check_timer with io apic setup before try_apic_pin
>
> add io apic setup before try_apic_pin
>
> cc: Andi Kleen <[EMAIL PROTECTED]>
> cc: Eric W. Biederman <[EMAIL PROTECTED]>
> Signed-off-by: Yinghai Lu <[EMAIL PROTECTED]>
>
> diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
> index 2a1dcd5..6d09fc0 100644
> --- a/arch/x86_64/kernel/io_apic.c
> +++ b/arch/x86_64/kernel/io_apic.c
> @@ -273,10 +273,17 @@ static void add_pin_to_irq(unsigned int irq, int apic, 
> int
> pin)
>   struct irq_pin_list *entry = irq_2_pin + irq;
>  
>   BUG_ON(irq >= NR_IRQS);
> - while (entry->next)
> + while (entry->next) {
> + if (entry->apic == apic && entry->pin == pin) 
> + return;
> + if (entry->pin == -1) 
> + break;
>   entry = irq_2_pin + entry->next;
> + }
>  
>   if (entry->pin != -1) {
> + if (entry->apic == apic && entry->pin == pin) 
> + return;
>   entry->next = first_free_entry;
>   entry = irq_2_pin + entry->next;
>   if (++first_free_entry >= PIN_MAP_SIZE)

This change to add_pin_to_irq looks dubious.

We especially shouldn't hit a pin == -1 while next is still valid.
The problem is that the code that reads this at irq time does not
skip entries with entry->pin == -1.

Fixing the infrastructure should probably be a separate patch
so we don't get too many concepts confused in here.

> @@ -286,6 +293,24 @@ static void add_pin_to_irq(unsigned int irq, int apic, 
> int
> pin)
>   entry->pin = pin;
>  }
>  
> +static void remove_pin_to_irq(unsigned int irq, int apic, int pin)
> +{
> + struct irq_pin_list *entry = irq_2_pin + irq;
> +
> + BUG_ON(irq >= NR_IRQS);
> +
> + while (entry) {
> + if (entry->apic == apic && entry->pin == pin) {
> + entry->apic = -1;
> + entry->pin = -1;
> + break;
> + }
> + if (entry->next) 
> + entry = irq_2_pin + entry->next;
> + }
> +
> +}
> +
This change to remove_pin_to_irq is simply wrong.

> +static int add_irq_entry(int type, int irqflag, int bus, int irq, int apic, 
> int
> pin)
> +{
> +struct mpc_config_intsrc intsrc;
> + int idx;
> +
> +intsrc.mpc_type = MP_INTSRC;
> +intsrc.mpc_irqflag = irqflag; /* conforming */
> +intsrc.mpc_srcbus = bus;
> + intsrc.mpc_dstapic = (apic != -1) ? mp_ioapics[apic].mpc_apicid: 
> MP_APIC_ALL;
> +
> +intsrc.mpc_irqtype = type;
> +
> +intsrc.mpc_srcbusirq = irq;
> +intsrc.mpc_dstirq = pin;
> +
> +mp_irqs [mp_irq_entries] = intsrc;
> +Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
> +" IRQ %02x, APIC ID %x, APIC INT %02x\n",
> +intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
> +(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
> + intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
> +idx = mp_irq_entries;
> + if (++mp_irq_entries >= MAX_IRQ_SOURCES)
> +panic("Max # of irq sources exceeded!!\n");
> + return idx;

This is fairly sane but probably belongs in mptable.c as a helper.

>  /*
>   * Find the pin to which IRQ[irq] (ISA) is connected
>   */
> @@ -1570,6 +1658,22 @@ static inline void unlock_ExtINT_logic(void)
>   * fanatically on his truly buggy board.
>   */
>  
> +static void set_try_apic_pin(int apic, int pin, int type)
> +{
> + int idx;
> + int irq = 0;
> + int bus = 0; /* MP_ISA_BUS */
> + int irqflag = 5; /* MP_IRQ_TRIGGER_EDGE|MP_IRQ_POLARITY_HIGH */
> +
> + idx = find_irq_entry(apic,pin,type);
> +
> + if (idx == -1) 
> + idx = add_irq_entry(type, irqflag, bus, irq, apic, pin);
> +
> + add_pin_to_irq(irq, apic, pin);
> + setup_IO_APIC_irq(apic, pin, idx, irq);
> +}
> +
>  static int try_apic_pin(int apic, int pin, char *msg)
>  {
>   apic_printk(APIC_VERBOSE, KERN_INFO
> @@ -1588,7 +1692,7 @@ static int try_apic_pin(int apic, int pin, char *msg)
>   }
>   return 1;
>   }
> - clear_IO_APIC_pin(apic, pin);
> +
>   apic_printk(APIC_QUIET, KERN_ERR " .. failed\n");
>   return 0;
>  }
> @@ -1599,12 +1703,13 @@ static void check_timer(void)
>   int apic1, pin1, apic2, pin2;
>   int vector;
>   cpumask_t mask;
> 

Re: Updated Kernel Hacker's guide to git

2006-12-21 Thread Jeff Garzik

Guennadi Liakhovetski wrote:

On Wed, 20 Dec 2006, Jeff Garzik wrote:


I refreshed my git intro/cookbook for kernel hackers, at
http://linux.yyz.us/git-howto.html


Very nice, thanks! A couple of remarks from an absolute git newbie:

1. I heard "git am" is supposed to supersede apply-mbox


Hey, that's pretty neat.  Glad you told me, this should improve my 
workflow a bit.



2. What I often have problems with is - what to do if git spits at me a 
bunch of conflict messages after a seemingly safe pull or similar. Don't 
know if you want to cover those points but "git troubleshooting" would 
definitely be a valuable document.


Agreed.

Jeff


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 2.6.19 file content corruption on ext3

2006-12-21 Thread Andrew Morton
On Thu, 21 Dec 2006 14:03:20 +0100
Peter Zijlstra <[EMAIL PROTECTED]> wrote:

> On Tue, 2006-12-19 at 09:43 -0800, Linus Torvalds wrote:
> > 
> > Btw,
> >  here's a totally new tangent on this: it's possible that user code is 
> > simply BUGGY. 
> 
> depmod: BADNESS: written outside isize 22183

akpm:/usr/src/module-init-tools-3.3-pre1> grep -r mmap .
./zlibsupport.c:map = mmap(0, *size, PROT_READ|PROT_WRITE, MAP_PRIVATE, 
fd, 0);

So presumably it's in a library.

akpm:/usr/src/25> ldd /sbin/depmod
linux-gate.so.1 =>  (0xe000)
libc.so.6 => /lib/tls/i686/cmov/libc.so.6 (0x46afa000)
/lib/ld-linux.so.2 (0x4631d000)

worrisome.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Updated Kernel Hacker's guide to git

2006-12-21 Thread Guennadi Liakhovetski
On Wed, 20 Dec 2006, Jeff Garzik wrote:

> I refreshed my git intro/cookbook for kernel hackers, at
> http://linux.yyz.us/git-howto.html

Very nice, thanks! A couple of remarks from an absolute git newbie:

1. I heard "git am" is supposed to supersede apply-mbox

2. What I often have problems with is - what to do if git spits at me a 
bunch of conflict messages after a seemingly safe pull or similar. Don't 
know if you want to cover those points but "git troubleshooting" would 
definitely be a valuable document.

Thanks
Guennadi
---
Guennadi Liakhovetski
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Binary Drivers

2006-12-21 Thread Eric W. Biederman
Tomas Carnecky <[EMAIL PROTECTED]> writes:

> The problem is, nobody wants to decide what to do with closed source software 
> in
> Linux. I don't care how you decide, for or against binary drivers (well,
> actually I do but my opinion doesn't matter), just decide already!

The decision from Linus was simple.  Linus will not merge a patch that
attempts to prevent this from at a technical level.  No one has made
any exceptions to the GPL to say that GPL incompatible drivers are
allowed.  Therefore on a legal level kernel drivers with GPL
incompatible drivers are as illegal as the derivative works clause in
copyright law will allow us to make them.  If you want something
firmer you can go talk to your appropriate government about taking the
fuzz out of what is a derivative work. 

As a practical matter people not releasing source aren't playing well with us 
so we are not likely to play well with them.

Eric
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Alsa-devel] sound/isa/cmi8330.c: dead ENABLE_SB_MIXER code

2006-12-21 Thread Ondrej Zary
On Tuesday 19 December 2006 10:59, you wrote:
> At Mon, 4 Dec 2006 17:04:34 +0100,
>
> Adrian Bunk wrote:
> > In sound/isa/cmi8330.c, the ENABLE_SB_MIXER code is currently never
> > used.
> >
> > What's the story behind this?
> > Should ENABLE_SB_MIXER be enabled?
> > Or the code be removed?
>
> CMI8330 has a dual interface for SB and Adlib modes.  The mixer can
> also behave differently according to the mode.  The current code has
> mixer elements corresponding to both modes.

CMI8330 (and also CMI8329) appears like SB16 and WSS

> However, these mixer elements _seem_ to interactive with each other,
> and cannot be controlled individually.  That's why ENABLE_SB_MIXER is
> disabled.  I cannot check this issue any longer since the test board
> got broken long time ago...

The mixer is a bit weird - and probably different between CMI8329 and CMI8330. 
At least on my CMI8329A, the master volume does not work. And there are also 
some problems with PCM volume - I can decrease it in alsamixer but not 
increase - but it works both ways in XMMS...
I have also some boards with integrated CMI8330 - so I might test it 
sometimes.

> I don't think we would get many gain by changing this old code.
> (and the relevant part isn't so big.)
> Let's keep as it is.
>
>
> Takashi
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
Ondrej Zary
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bug 7505] Linux-2.6.18 fails to boot on AMD64 machine

2006-12-21 Thread Ard -kwaak- van Breemen
Hello,

On Thu, Dec 21, 2006 at 04:04:04PM +0800, Zhang, Yanmin wrote:
> I couldn't reproduce it on my EM64T machine. I instrumented function 
> start_kernel and
> didn't find irq was enabled before calling init_IRQ. It'll be better if the 
> reporter could
> instrument function start_kernel to capture which function enables irq.
Just diving into the sources.
Is that something like:
if(!raw_irqs_disabled_flags) printk "irqs are enabled";

(At that moment it might have crashed already.. :-)).

I don't see the complete context yet, but I hope the irq is
triggered after the irq is somehow enabled.

BTW: the panic occurs on half of my boards on tyan S2891 with 2
opterons, of which the only difference seems to be the purchase
date (and hence probably the motherboard revisions). (Haven't got
time yet to pull them out of the rack and compare the
motherboards).


-- 
program signature;
begin  { telegraaf.com
} writeln("<[EMAIL PROTECTED]> TEM2");
end
.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


  1   2   3   4   5   6   7   >