Hi Morten and list,
I am current investigating HMP related stuff over TC2 platform, and
find a strange issue.
Here I have two module, both are creating fixed loaded thread, but one
module would
bound those thread to the cpu, while another don't do the bound operation.
[I test with linaro 2013.05 release to get below result]
With setting A7/A15 to the same frequency, 1G,
The result is:
Bounded one:
Five thread finished with 137s/137s/137s/138s/138s
unbounded one(With HMP related configuration enabled, which is the default one):
Five thread finished with 138s/275s/275s/275s/275s
unbounded one(With HMP related configuration disabled):
Five thread finished with 228s/229s/229s/229s/231s
So it seems to me, that current configuration don't make TC2 to run in
its full performance.
It worries me for it may downgrade the benchmark for somehow.
I haven't see into details, just post the result here to get your
guys' feedback.
While I get more detailed analysis, I would also post it here.
Thanks,
Lei
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/sched.h>
#include <linux/workqueue.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#define LOOP_NUM 0x1000000
#define REPEAT_NUM 0x2000
static int busy_loop_test(void *data)
{
int i, n = (int)data, j;
struct timeval tv0, tv1;
long us;
unsigned long lpj;
j = 0;
do_gettimeofday(&tv0);
repeat:
for (i = 0; i < LOOP_NUM; i ++)
cpu_relax();
schedule();
if (j ++ < REPEAT_NUM)
goto repeat;
do_gettimeofday(&tv1);
us = tv1.tv_usec - tv0.tv_usec;
us += (tv1.tv_sec - tv0.tv_sec) * 1000000;
printk("thread%d(%d) finish with time %d us (%d:%d)\n", n, smp_processor_id(),
us, tv1.tv_sec - tv0.tv_sec,
tv1.tv_usec- tv0.tv_usec);
return 0;
}
#define THREAD_NUM 5
static int __init sched_test_init(void)
{
struct task_struct *t;
struct dentry *file;
struct task_struct *p[THREAD_NUM];
int i;
for (i = 0; i < THREAD_NUM; i ++) {
p[i] = kthread_create_on_node(busy_loop_test,
i,
cpu_to_node(i),
"busy_loop/%d", i);
get_task_struct(p[i]);
kthread_bind(p[i], i);
}
for (i = 0; i < THREAD_NUM; i ++)
wake_up_process(p[i]);
return 0;
}
static void __exit sched_test_exit(void)
{
}
module_init(sched_test_init);
module_exit(sched_test_exit);
MODULE_LICENSE("GPL");
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/sched.h>
#include <linux/workqueue.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#define LOOP_NUM 0x1000000
#define REPEAT_NUM 0x2000
static int busy_loop_test(void *data)
{
int i, n = (int)data, j;
struct timeval tv0, tv1;
long us;
unsigned long lpj;
j = 0;
do_gettimeofday(&tv0);
repeat:
for (i = 0; i < LOOP_NUM; i ++)
cpu_relax();
schedule();
if (j ++ < REPEAT_NUM)
goto repeat;
do_gettimeofday(&tv1);
us = tv1.tv_usec - tv0.tv_usec;
us += (tv1.tv_sec - tv0.tv_sec) * 1000000;
printk("thread%d(%d) finish with time %d us (%d:%d)\n", n, smp_processor_id(),
us, tv1.tv_sec - tv0.tv_sec,
tv1.tv_usec- tv0.tv_usec);
return 0;
}
#define THREAD_NUM 5
static int __init sched_test_init(void)
{
struct task_struct *t;
struct dentry *file;
struct task_struct *p[THREAD_NUM];
int i;
for (i = 0; i < THREAD_NUM; i ++) {
p[i] = kthread_create_on_node(busy_loop_test,
i,
-1,
"busy_loop/%d", i);
get_task_struct(p[i]);
}
for (i = 0; i < THREAD_NUM; i ++)
wake_up_process(p[i]);
return 0;
}
static void __exit sched_test_exit(void)
{
}
module_init(sched_test_init);
module_exit(sched_test_exit);
MODULE_LICENSE("GPL");
_______________________________________________
linaro-dev mailing list
[email protected]
http://lists.linaro.org/mailman/listinfo/linaro-dev