[email protected], [email protected], [email protected], [email protected]

Lei Wen Thu, 27 Jun 2013 02:39:48 -0700

Hi Morten and list,

I am current investigating HMP related stuff over TC2 platform, and
find a strange issue.
Here I have two module, both are creating fixed loaded thread, but one
module would
bound those thread to the cpu, while another don't do the bound operation.


[I test with linaro 2013.05 release to get below result]
With setting A7/A15 to the same frequency, 1G,
The result is:
Bounded one:
Five thread finished with 137s/137s/137s/138s/138s

unbounded one(With HMP related configuration enabled, which is the default one):
Five thread finished with 138s/275s/275s/275s/275s

unbounded one(With HMP related configuration disabled):
Five thread finished with 228s/229s/229s/229s/231s

So it seems to me, that current configuration don't make TC2 to run in
its full performance.
It worries me for it may downgrade the benchmark for somehow.

I haven't see into details, just post the result here to get your
guys' feedback.
While I get more detailed analysis, I would also post it here.

Thanks,
Lei

#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/sched.h>
#include <linux/workqueue.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/delay.h>
#include <linux/kthread.h>

#define LOOP_NUM	0x1000000
#define REPEAT_NUM	0x2000
static int busy_loop_test(void *data)
{
	int i, n = (int)data, j;
	struct timeval tv0, tv1;
	long us;
	unsigned long lpj;

	j = 0;
	do_gettimeofday(&tv0);
repeat:
	for (i = 0; i < LOOP_NUM; i ++)
		cpu_relax();
	schedule();
	if (j ++ < REPEAT_NUM)
		goto repeat;

	do_gettimeofday(&tv1);

	us = tv1.tv_usec - tv0.tv_usec;
	us += (tv1.tv_sec - tv0.tv_sec) * 1000000;
	printk("thread%d(%d) finish with time %d us (%d:%d)\n", n, smp_processor_id(),
			us, tv1.tv_sec - tv0.tv_sec,
			tv1.tv_usec- tv0.tv_usec);

	return 0;
}

#define THREAD_NUM	5
static int __init sched_test_init(void)
{
	struct task_struct *t;
	struct dentry *file;
	struct task_struct *p[THREAD_NUM];
	int i;

	for (i = 0; i < THREAD_NUM; i ++) {
		p[i] = kthread_create_on_node(busy_loop_test,
				i,
				cpu_to_node(i),
				"busy_loop/%d", i);
		get_task_struct(p[i]);
		kthread_bind(p[i], i);
	}

	for (i = 0; i < THREAD_NUM; i ++)
		wake_up_process(p[i]);

	return 0;
}

static void __exit sched_test_exit(void)
{
}

module_init(sched_test_init);
module_exit(sched_test_exit);
MODULE_LICENSE("GPL");

#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/sched.h>
#include <linux/workqueue.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/delay.h>
#include <linux/kthread.h>

#define LOOP_NUM	0x1000000
#define REPEAT_NUM	0x2000
static int busy_loop_test(void *data)
{
	int i, n = (int)data, j;
	struct timeval tv0, tv1;
	long us;
	unsigned long lpj;

	j = 0;
	do_gettimeofday(&tv0);
repeat:
	for (i = 0; i < LOOP_NUM; i ++)
		cpu_relax();
	schedule();
	if (j ++ < REPEAT_NUM)
		goto repeat;

	do_gettimeofday(&tv1);

	us = tv1.tv_usec - tv0.tv_usec;
	us += (tv1.tv_sec - tv0.tv_sec) * 1000000;
	printk("thread%d(%d) finish with time %d us (%d:%d)\n", n, smp_processor_id(),
			us, tv1.tv_sec - tv0.tv_sec,
			tv1.tv_usec- tv0.tv_usec);

	return 0;
}

#define THREAD_NUM	5
static int __init sched_test_init(void)
{
	struct task_struct *t;
	struct dentry *file;
	struct task_struct *p[THREAD_NUM];
	int i;

	for (i = 0; i < THREAD_NUM; i ++) {
		p[i] = kthread_create_on_node(busy_loop_test,
				i,
				-1,
				"busy_loop/%d", i);
		get_task_struct(p[i]);
	}

	for (i = 0; i < THREAD_NUM; i ++)
		wake_up_process(p[i]);

	return 0;
}

static void __exit sched_test_exit(void)
{
}

module_init(sched_test_init);
module_exit(sched_test_exit);
MODULE_LICENSE("GPL");

_______________________________________________
linaro-dev mailing list
[email protected]
http://lists.linaro.org/mailman/listinfo/linaro-dev

[email protected], [email protected], [email protected], [email protected]

Reply via email to