port some benchmarks
This commit is contained in:
parent
30a534e650
commit
009f8b5fb0
30 changed files with 5913 additions and 0 deletions
3
coremark/Makefile
Normal file
3
coremark/Makefile
Normal file
|
@ -0,0 +1,3 @@
|
|||
NAME = coremark
|
||||
SRCS = $(shell find -L ./src/ -name "*.c")
|
||||
include $(AM_HOME)/Makefile
|
188
coremark/include/core_portme.h
Executable file
188
coremark/include/core_portme.h
Executable file
|
@ -0,0 +1,188 @@
|
|||
/* Topic : Description
|
||||
This file contains configuration constants required to execute on different platforms
|
||||
*/
|
||||
|
||||
|
||||
#ifndef CORE_PORTME_H
|
||||
#define CORE_PORTME_H
|
||||
|
||||
#include <am.h>
|
||||
#include <klib.h>
|
||||
#include <klib-macros.h>
|
||||
|
||||
#define ITERATIONS 1000
|
||||
#define MEM_METHOD MEM_STATIC
|
||||
|
||||
/************************/
|
||||
/* Data types and settings */
|
||||
/************************/
|
||||
/* Configuration : HAS_FLOAT
|
||||
Define to 1 if the platform supports floating point.
|
||||
*/
|
||||
#ifndef HAS_FLOAT
|
||||
#define HAS_FLOAT 0
|
||||
#endif
|
||||
/* Configuration : HAS_TIME_H
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
*/
|
||||
#ifndef HAS_TIME_H
|
||||
#define HAS_TIME_H 0
|
||||
#endif
|
||||
/* Configuration : USE_CLOCK
|
||||
Define to 1 if platform has the time.h header file,
|
||||
and implementation of functions thereof.
|
||||
*/
|
||||
#ifndef USE_CLOCK
|
||||
#define USE_CLOCK 0
|
||||
#endif
|
||||
/* Configuration : HAS_STDIO
|
||||
Define to 1 if the platform has stdio.h.
|
||||
*/
|
||||
#ifndef HAS_STDIO
|
||||
#define HAS_STDIO 0
|
||||
#endif
|
||||
/* Configuration : HAS_PRINTF
|
||||
Define to 1 if the platform has stdio.h and implements the printf function.
|
||||
*/
|
||||
#ifndef HAS_PRINTF
|
||||
#define HAS_PRINTF 1
|
||||
#endif
|
||||
|
||||
/* Configuration : CORE_TICKS
|
||||
Define type of return from the timing functions.
|
||||
*/
|
||||
typedef uint32_t CORE_TICKS;
|
||||
|
||||
/* Definitions : COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
|
||||
Initialize these strings per platform
|
||||
*/
|
||||
#ifndef COMPILER_VERSION
|
||||
#ifdef __GNUC__
|
||||
#define COMPILER_VERSION "GCC"__VERSION__
|
||||
#else
|
||||
#define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)"
|
||||
#endif
|
||||
#endif
|
||||
#ifndef COMPILER_FLAGS
|
||||
#define COMPILER_FLAGS
|
||||
#endif
|
||||
#ifndef MEM_LOCATION
|
||||
#define MEM_LOCATION "STACK"
|
||||
#endif
|
||||
|
||||
/* Data Types :
|
||||
To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in <core_portme.h>.
|
||||
|
||||
*Imprtant* :
|
||||
ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!!
|
||||
*/
|
||||
typedef signed short ee_s16;
|
||||
typedef unsigned short ee_u16;
|
||||
typedef signed int ee_s32;
|
||||
typedef double ee_f32;
|
||||
typedef unsigned char ee_u8;
|
||||
typedef unsigned int ee_u32;
|
||||
typedef unsigned long ee_ptr_int;
|
||||
typedef size_t ee_size_t;
|
||||
/* align_mem :
|
||||
This macro is used to align an offset to point to a 32b value. It is used in the Matrix algorithm to initialize the input memory blocks.
|
||||
*/
|
||||
#define align_mem(x) (void *)(4 + (((unsigned long)(x) - 1) & ~3))
|
||||
|
||||
/* Configuration : SEED_METHOD
|
||||
Defines method to get seed values that cannot be computed at compile time.
|
||||
|
||||
Valid values :
|
||||
SEED_ARG - from command line.
|
||||
SEED_FUNC - from a system function.
|
||||
SEED_VOLATILE - from volatile variables.
|
||||
*/
|
||||
#ifndef SEED_METHOD
|
||||
#define SEED_METHOD SEED_VOLATILE
|
||||
#endif
|
||||
|
||||
/* Configuration : MEM_METHOD
|
||||
Defines method to get a block of memry.
|
||||
|
||||
Valid values :
|
||||
MEM_MALLOC - for platforms that implement malloc and have malloc.h.
|
||||
MEM_STATIC - to use a static memory array.
|
||||
MEM_STACK - to allocate the data block on the stack (NYI).
|
||||
*/
|
||||
#ifndef MEM_METHOD
|
||||
#define MEM_METHOD MEM_STACK
|
||||
#endif
|
||||
|
||||
/* Configuration : MULTITHREAD
|
||||
Define for parallel execution
|
||||
|
||||
Valid values :
|
||||
1 - only one context (default).
|
||||
N>1 - will execute N copies in parallel.
|
||||
|
||||
Note :
|
||||
If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined.
|
||||
|
||||
Two sample implementations are provided. Use <USE_PTHREAD> or <USE_FORK> to enable them.
|
||||
|
||||
It is valid to have a different implementation of <core_start_parallel> and <core_end_parallel> in <core_portme.c>,
|
||||
to fit a particular architecture.
|
||||
*/
|
||||
#ifndef MULTITHREAD
|
||||
#define MULTITHREAD 1
|
||||
#define USE_PTHREAD 0
|
||||
#define USE_FORK 0
|
||||
#define USE_SOCKET 0
|
||||
#endif
|
||||
|
||||
/* Configuration : MAIN_HAS_NOARGC
|
||||
Needed if platform does not support getting arguments to main.
|
||||
|
||||
Valid values :
|
||||
0 - argc/argv to main is supported
|
||||
1 - argc/argv to main is not supported
|
||||
|
||||
Note :
|
||||
This flag only matters if MULTITHREAD has been defined to a value greater then 1.
|
||||
*/
|
||||
#ifndef MAIN_HAS_NOARGC
|
||||
#define MAIN_HAS_NOARGC 0
|
||||
#endif
|
||||
|
||||
/* Configuration : MAIN_HAS_NORETURN
|
||||
Needed if platform does not support returning a value from main.
|
||||
|
||||
Valid values :
|
||||
0 - main returns an int, and return value will be 0.
|
||||
1 - platform does not support returning a value from main
|
||||
*/
|
||||
#ifndef MAIN_HAS_NORETURN
|
||||
#define MAIN_HAS_NORETURN 0
|
||||
#endif
|
||||
|
||||
/* Variable : default_num_contexts
|
||||
Not used for this simple port, must cintain the value 1.
|
||||
*/
|
||||
extern ee_u32 default_num_contexts;
|
||||
|
||||
typedef struct CORE_PORTABLE_S {
|
||||
ee_u8 portable_id;
|
||||
} core_portable;
|
||||
|
||||
/* target specific init/fini */
|
||||
void portable_init(core_portable *p, int *argc, char *argv[]);
|
||||
void portable_fini(core_portable *p);
|
||||
|
||||
#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) && !defined(VALIDATION_RUN)
|
||||
#if (TOTAL_DATA_SIZE==1200)
|
||||
#define PROFILE_RUN 1
|
||||
#elif (TOTAL_DATA_SIZE==2000)
|
||||
#define PERFORMANCE_RUN 1
|
||||
#else
|
||||
#define VALIDATION_RUN 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* CORE_PORTME_H */
|
174
coremark/include/coremark.h
Executable file
174
coremark/include/coremark.h
Executable file
|
@ -0,0 +1,174 @@
|
|||
/*
|
||||
Author : Shay Gal-On, EEMBC
|
||||
|
||||
This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009
|
||||
All rights reserved.
|
||||
|
||||
EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the
|
||||
CoreMark License that is distributed with the official EEMBC COREMARK Software release.
|
||||
If you received this EEMBC CoreMark Software without the accompanying CoreMark License,
|
||||
you must discontinue use and download the official release from www.coremark.org.
|
||||
|
||||
Also, if you are publicly displaying scores generated from the EEMBC CoreMark software,
|
||||
make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file.
|
||||
|
||||
EEMBC
|
||||
4354 Town Center Blvd. Suite 114-200
|
||||
El Dorado Hills, CA, 95762
|
||||
*/
|
||||
/* Topic: Description
|
||||
This file contains declarations of the various benchmark functions.
|
||||
*/
|
||||
|
||||
/* Configuration: TOTAL_DATA_SIZE
|
||||
Define total size for data algorithms will operate on
|
||||
*/
|
||||
#ifndef TOTAL_DATA_SIZE
|
||||
#define TOTAL_DATA_SIZE 2*1000
|
||||
#endif
|
||||
|
||||
#define SEED_ARG 0
|
||||
#define SEED_FUNC 1
|
||||
#define SEED_VOLATILE 2
|
||||
|
||||
#define MEM_STATIC 0
|
||||
#define MEM_MALLOC 1
|
||||
#define MEM_STACK 2
|
||||
|
||||
#include "core_portme.h"
|
||||
|
||||
#if HAS_STDIO
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
#if HAS_PRINTF
|
||||
#define ee_printf printf
|
||||
#endif
|
||||
|
||||
/* Actual benchmark execution in iterate */
|
||||
void *iterate(void *pres);
|
||||
|
||||
/* Typedef: secs_ret
|
||||
For machines that have floating point support, get number of seconds as a double.
|
||||
Otherwise an unsigned int.
|
||||
*/
|
||||
#if HAS_FLOAT
|
||||
typedef double secs_ret;
|
||||
#else
|
||||
typedef ee_u32 secs_ret;
|
||||
#endif
|
||||
|
||||
#if MAIN_HAS_NORETURN
|
||||
#define MAIN_RETURN_VAL
|
||||
#define MAIN_RETURN_TYPE void
|
||||
#else
|
||||
#define MAIN_RETURN_VAL 0
|
||||
#define MAIN_RETURN_TYPE int
|
||||
#endif
|
||||
|
||||
void start_time(void);
|
||||
void stop_time(void);
|
||||
CORE_TICKS get_time(void);
|
||||
secs_ret time_in_secs(CORE_TICKS ticks);
|
||||
|
||||
/* Misc useful functions */
|
||||
ee_u16 crcu8(ee_u8 data, ee_u16 crc);
|
||||
ee_u16 crc16(ee_s16 newval, ee_u16 crc);
|
||||
ee_u16 crcu16(ee_u16 newval, ee_u16 crc);
|
||||
ee_u16 crcu32(ee_u32 newval, ee_u16 crc);
|
||||
ee_u8 check_data_types();
|
||||
void *portable_malloc(ee_size_t size);
|
||||
void portable_free(void *p);
|
||||
ee_s32 parseval(char *valstring);
|
||||
|
||||
/* Algorithm IDS */
|
||||
#define ID_LIST (1<<0)
|
||||
#define ID_MATRIX (1<<1)
|
||||
#define ID_STATE (1<<2)
|
||||
#define ALL_ALGORITHMS_MASK (ID_LIST|ID_MATRIX|ID_STATE)
|
||||
#define NUM_ALGORITHMS 3
|
||||
|
||||
/* list data structures */
|
||||
typedef struct list_data_s {
|
||||
ee_s16 data16;
|
||||
ee_s16 idx;
|
||||
} list_data;
|
||||
|
||||
typedef struct list_head_s {
|
||||
struct list_head_s *next;
|
||||
struct list_data_s *info;
|
||||
} list_head;
|
||||
|
||||
|
||||
/*matrix benchmark related stuff */
|
||||
#define MATDAT_INT 1
|
||||
#if MATDAT_INT
|
||||
typedef ee_s16 MATDAT;
|
||||
typedef ee_s32 MATRES;
|
||||
#else
|
||||
typedef ee_f16 MATDAT;
|
||||
typedef ee_f32 MATRES;
|
||||
#endif
|
||||
|
||||
typedef struct MAT_PARAMS_S {
|
||||
int N;
|
||||
MATDAT *A;
|
||||
MATDAT *B;
|
||||
MATRES *C;
|
||||
} mat_params;
|
||||
|
||||
/* state machine related stuff */
|
||||
/* List of all the possible states for the FSM */
|
||||
typedef enum CORE_STATE {
|
||||
CORE_START=0,
|
||||
CORE_INVALID,
|
||||
CORE_S1,
|
||||
CORE_S2,
|
||||
CORE_INT,
|
||||
CORE_FLOAT,
|
||||
CORE_EXPONENT,
|
||||
CORE_SCIENTIFIC,
|
||||
NUM_CORE_STATES
|
||||
} core_state_e ;
|
||||
|
||||
|
||||
/* Helper structure to hold results */
|
||||
typedef struct RESULTS_S {
|
||||
/* inputs */
|
||||
ee_s16 seed1; /* Initializing seed */
|
||||
ee_s16 seed2; /* Initializing seed */
|
||||
ee_s16 seed3; /* Initializing seed */
|
||||
void *memblock[4]; /* Pointer to safe memory location */
|
||||
ee_u32 size; /* Size of the data */
|
||||
ee_u32 iterations; /* Number of iterations to execute */
|
||||
ee_u32 execs; /* Bitmask of operations to execute */
|
||||
struct list_head_s *list;
|
||||
mat_params mat;
|
||||
/* outputs */
|
||||
ee_u16 crc;
|
||||
ee_u16 crclist;
|
||||
ee_u16 crcmatrix;
|
||||
ee_u16 crcstate;
|
||||
ee_s16 err;
|
||||
/* ultithread specific */
|
||||
core_portable port;
|
||||
} core_results;
|
||||
|
||||
/* Multicore execution handling */
|
||||
#if (MULTITHREAD>1)
|
||||
ee_u8 core_start_parallel(core_results *res);
|
||||
ee_u8 core_stop_parallel(core_results *res);
|
||||
#endif
|
||||
|
||||
/* list benchmark functions */
|
||||
list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed);
|
||||
ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx);
|
||||
|
||||
/* state benchmark functions */
|
||||
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p);
|
||||
ee_u16 core_bench_state(ee_u32 blksize, ee_u8 *memblock,
|
||||
ee_s16 seed1, ee_s16 seed2, ee_s16 step, ee_u16 crc);
|
||||
|
||||
/* matrix benchmark functions */
|
||||
ee_u32 core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p);
|
||||
ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc);
|
||||
|
496
coremark/src/core_list_join.c
Executable file
496
coremark/src/core_list_join.c
Executable file
|
@ -0,0 +1,496 @@
|
|||
/*
|
||||
Author : Shay Gal-On, EEMBC
|
||||
|
||||
This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009
|
||||
All rights reserved.
|
||||
|
||||
EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the
|
||||
CoreMark License that is distributed with the official EEMBC COREMARK Software release.
|
||||
If you received this EEMBC CoreMark Software without the accompanying CoreMark License,
|
||||
you must discontinue use and download the official release from www.coremark.org.
|
||||
|
||||
Also, if you are publicly displaying scores generated from the EEMBC CoreMark software,
|
||||
make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file.
|
||||
|
||||
EEMBC
|
||||
4354 Town Center Blvd. Suite 114-200
|
||||
El Dorado Hills, CA, 95762
|
||||
*/
|
||||
|
||||
#include "coremark.h"
|
||||
/*
|
||||
Topic: Description
|
||||
Benchmark using a linked list.
|
||||
|
||||
Linked list is a common data structure used in many applications.
|
||||
|
||||
For our purposes, this will excercise the memory units of the processor.
|
||||
In particular, usage of the list pointers to find and alter data.
|
||||
|
||||
We are not using Malloc since some platforms do not support this library.
|
||||
|
||||
Instead, the memory block being passed in is used to create a list,
|
||||
and the benchmark takes care not to add more items then can be
|
||||
accomodated by the memory block. The porting layer will make sure
|
||||
that we have a valid memory block.
|
||||
|
||||
All operations are done in place, without using any extra memory.
|
||||
|
||||
The list itself contains list pointers and pointers to data items.
|
||||
Data items contain the following:
|
||||
|
||||
idx - An index that captures the initial order of the list.
|
||||
data - Variable data initialized based on the input parameters. The 16b are divided as follows:
|
||||
o Upper 8b are backup of original data.
|
||||
o Bit 7 indicates if the lower 7 bits are to be used as is or calculated.
|
||||
o Bits 0-2 indicate type of operation to perform to get a 7b value.
|
||||
o Bits 3-6 provide input for the operation.
|
||||
|
||||
*/
|
||||
|
||||
/* local functions */
|
||||
|
||||
list_head *core_list_find(list_head *list,list_data *info);
|
||||
list_head *core_list_reverse(list_head *list);
|
||||
list_head *core_list_remove(list_head *item);
|
||||
list_head *core_list_undo_remove(list_head *item_removed, list_head *item_modified);
|
||||
list_head *core_list_insert_new(list_head *insert_point
|
||||
, list_data *info, list_head **memblock, list_data **datablock
|
||||
, list_head *memblock_end, list_data *datablock_end);
|
||||
typedef ee_s32(*list_cmp)(list_data *a, list_data *b, core_results *res);
|
||||
list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res);
|
||||
|
||||
ee_s16 calc_func(ee_s16 *pdata, core_results *res) {
|
||||
ee_s16 data=*pdata;
|
||||
ee_s16 retval;
|
||||
ee_u8 optype=(data>>7) & 1; /* bit 7 indicates if the function result has been cached */
|
||||
if (optype) /* if cached, use cache */
|
||||
return (data & 0x007f);
|
||||
else { /* otherwise calculate and cache the result */
|
||||
ee_s16 flag=data & 0x7; /* bits 0-2 is type of function to perform */
|
||||
ee_s16 dtype=((data>>3) & 0xf); /* bits 3-6 is specific data for the operation */
|
||||
dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */
|
||||
switch (flag) {
|
||||
case 0:
|
||||
if (dtype<0x22) /* set min period for bit corruption */
|
||||
dtype=0x22;
|
||||
retval=core_bench_state(res->size,res->memblock[3],res->seed1,res->seed2,dtype,res->crc);
|
||||
if (res->crcstate==0)
|
||||
res->crcstate=retval;
|
||||
break;
|
||||
case 1:
|
||||
retval=core_bench_matrix(&(res->mat),dtype,res->crc);
|
||||
if (res->crcmatrix==0)
|
||||
res->crcmatrix=retval;
|
||||
break;
|
||||
default:
|
||||
retval=data;
|
||||
break;
|
||||
}
|
||||
res->crc=crcu16(retval,res->crc);
|
||||
retval &= 0x007f;
|
||||
*pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */
|
||||
return retval;
|
||||
}
|
||||
}
|
||||
/* Function: cmp_complex
|
||||
Compare the data item in a list cell.
|
||||
|
||||
Can be used by mergesort.
|
||||
*/
|
||||
ee_s32 cmp_complex(list_data *a, list_data *b, core_results *res) {
|
||||
ee_s16 val1=calc_func(&(a->data16),res);
|
||||
ee_s16 val2=calc_func(&(b->data16),res);
|
||||
return val1 - val2;
|
||||
}
|
||||
|
||||
/* Function: cmp_idx
|
||||
Compare the idx item in a list cell, and regen the data.
|
||||
|
||||
Can be used by mergesort.
|
||||
*/
|
||||
ee_s32 cmp_idx(list_data *a, list_data *b, core_results *res) {
|
||||
if (res==NULL) {
|
||||
a->data16 = (a->data16 & 0xff00) | (0x00ff & (a->data16>>8));
|
||||
b->data16 = (b->data16 & 0xff00) | (0x00ff & (b->data16>>8));
|
||||
}
|
||||
return a->idx - b->idx;
|
||||
}
|
||||
|
||||
void copy_info(list_data *to,list_data *from) {
|
||||
to->data16=from->data16;
|
||||
to->idx=from->idx;
|
||||
}
|
||||
|
||||
/* Benchmark for linked list:
|
||||
- Try to find multiple data items.
|
||||
- List sort
|
||||
- Operate on data from list (crc)
|
||||
- Single remove/reinsert
|
||||
* At the end of this function, the list is back to original state
|
||||
*/
|
||||
ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx) {
|
||||
ee_u16 retval=0;
|
||||
ee_u16 found=0,missed=0;
|
||||
list_head *list=res->list;
|
||||
ee_s16 find_num=res->seed3;
|
||||
list_head *this_find;
|
||||
list_head *finder, *remover;
|
||||
list_data info = {};
|
||||
ee_s16 i;
|
||||
|
||||
info.idx=finder_idx;
|
||||
/* find <find_num> values in the list, and change the list each time (reverse and cache if value found) */
|
||||
for (i=0; i<find_num; i++) {
|
||||
info.data16= (i & 0xff) ;
|
||||
this_find=core_list_find(list,&info);
|
||||
list=core_list_reverse(list);
|
||||
if (this_find==NULL) {
|
||||
missed++;
|
||||
retval+=(list->next->info->data16 >> 8) & 1;
|
||||
}
|
||||
else {
|
||||
found++;
|
||||
if (this_find->info->data16 & 0x1) /* use found value */
|
||||
retval+=(this_find->info->data16 >> 9) & 1;
|
||||
/* and cache next item at the head of the list (if any) */
|
||||
if (this_find->next != NULL) {
|
||||
finder = this_find->next;
|
||||
this_find->next = finder->next;
|
||||
finder->next=list->next;
|
||||
list->next=finder;
|
||||
}
|
||||
}
|
||||
if (info.idx>=0)
|
||||
info.idx++;
|
||||
#if CORE_DEBUG
|
||||
ee_printf("List find %d: [%d,%d,%d]\n",i,retval,missed,found);
|
||||
#endif
|
||||
}
|
||||
retval+=found*4-missed;
|
||||
/* sort the list by data content and remove one item*/
|
||||
if (finder_idx>0)
|
||||
list=core_list_mergesort(list,cmp_complex,res);
|
||||
remover=core_list_remove(list->next);
|
||||
/* CRC data content of list from location of index N forward, and then undo remove */
|
||||
finder=core_list_find(list,&info);
|
||||
if (!finder)
|
||||
finder=list->next;
|
||||
while (finder) {
|
||||
retval=crc16(list->info->data16,retval);
|
||||
finder=finder->next;
|
||||
}
|
||||
#if CORE_DEBUG
|
||||
ee_printf("List sort 1: %04x\n",retval);
|
||||
#endif
|
||||
remover=core_list_undo_remove(remover,list->next);
|
||||
/* sort the list by index, in effect returning the list to original state */
|
||||
list=core_list_mergesort(list,cmp_idx,NULL);
|
||||
/* CRC data content of list */
|
||||
finder=list->next;
|
||||
while (finder) {
|
||||
retval=crc16(list->info->data16,retval);
|
||||
finder=finder->next;
|
||||
}
|
||||
#if CORE_DEBUG
|
||||
ee_printf("List sort 2: %04x\n",retval);
|
||||
#endif
|
||||
return retval;
|
||||
}
|
||||
/* Function: core_list_init
|
||||
Initialize list with data.
|
||||
|
||||
Parameters:
|
||||
blksize - Size of memory to be initialized.
|
||||
memblock - Pointer to memory block.
|
||||
seed - Actual values chosen depend on the seed parameter.
|
||||
The seed parameter MUST be supplied from a source that cannot be determined at compile time
|
||||
|
||||
Returns:
|
||||
Pointer to the head of the list.
|
||||
|
||||
*/
|
||||
list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed) {
|
||||
/* calculated pointers for the list */
|
||||
ee_u32 per_item=16+sizeof(struct list_data_s);
|
||||
ee_u32 size=(blksize/per_item)-2; /* to accomodate systems with 64b pointers, and make sure same code is executed, set max list elements */
|
||||
list_head *memblock_end=memblock+size;
|
||||
list_data *datablock=(list_data *)(memblock_end);
|
||||
list_data *datablock_end=datablock+size;
|
||||
/* some useful variables */
|
||||
ee_u32 i;
|
||||
list_head *finder,*list=memblock;
|
||||
list_data info;
|
||||
|
||||
/* create a fake items for the list head and tail */
|
||||
list->next=NULL;
|
||||
list->info=datablock;
|
||||
list->info->idx=0x0000;
|
||||
list->info->data16=(ee_s16)0x8080;
|
||||
memblock++;
|
||||
datablock++;
|
||||
info.idx=0x7fff;
|
||||
info.data16=(ee_s16)0xffff;
|
||||
core_list_insert_new(list,&info,&memblock,&datablock,memblock_end,datablock_end);
|
||||
|
||||
/* then insert size items */
|
||||
for (i=0; i<size; i++) {
|
||||
ee_u16 datpat=((ee_u16)(seed^i) & 0xf);
|
||||
ee_u16 dat=(datpat<<3) | (i&0x7); /* alternate between algorithms */
|
||||
info.data16=(dat<<8) | dat; /* fill the data with actual data and upper bits with rebuild value */
|
||||
core_list_insert_new(list,&info,&memblock,&datablock,memblock_end,datablock_end);
|
||||
}
|
||||
/* and now index the list so we know initial seed order of the list */
|
||||
finder=list->next;
|
||||
i=1;
|
||||
while (finder->next!=NULL) {
|
||||
if (i<size/5) /* first 20% of the list in order */
|
||||
finder->info->idx=i++;
|
||||
else {
|
||||
ee_u16 pat=(ee_u16)(i++ ^ seed); /* get a pseudo random number */
|
||||
finder->info->idx=0x3fff & (((i & 0x07) << 8) | pat); /* make sure the mixed items end up after the ones in sequence */
|
||||
}
|
||||
finder=finder->next;
|
||||
}
|
||||
list = core_list_mergesort(list,cmp_idx,NULL);
|
||||
#if CORE_DEBUG
|
||||
ee_printf("Initialized list:\n");
|
||||
finder=list;
|
||||
while (finder) {
|
||||
ee_printf("[%04x,%04x]",finder->info->idx,(ee_u16)finder->info->data16);
|
||||
finder=finder->next;
|
||||
}
|
||||
ee_printf("\n");
|
||||
#endif
|
||||
return list;
|
||||
}
|
||||
|
||||
/* Function: core_list_insert
|
||||
Insert an item to the list
|
||||
|
||||
Parameters:
|
||||
insert_point - where to insert the item.
|
||||
info - data for the cell.
|
||||
memblock - pointer for the list header
|
||||
datablock - pointer for the list data
|
||||
memblock_end - end of region for list headers
|
||||
datablock_end - end of region for list data
|
||||
|
||||
Returns:
|
||||
Pointer to new item.
|
||||
*/
|
||||
list_head *core_list_insert_new(list_head *insert_point, list_data *info, list_head **memblock, list_data **datablock
|
||||
, list_head *memblock_end, list_data *datablock_end) {
|
||||
list_head *newitem;
|
||||
|
||||
if ((*memblock+1) >= memblock_end)
|
||||
return NULL;
|
||||
if ((*datablock+1) >= datablock_end)
|
||||
return NULL;
|
||||
|
||||
newitem=*memblock;
|
||||
(*memblock)++;
|
||||
newitem->next=insert_point->next;
|
||||
insert_point->next=newitem;
|
||||
|
||||
newitem->info=*datablock;
|
||||
(*datablock)++;
|
||||
copy_info(newitem->info,info);
|
||||
|
||||
return newitem;
|
||||
}
|
||||
|
||||
/* Function: core_list_remove
|
||||
Remove an item from the list.
|
||||
|
||||
Operation:
|
||||
For a singly linked list, remove by copying the data from the next item
|
||||
over to the current cell, and unlinking the next item.
|
||||
|
||||
Note:
|
||||
since there is always a fake item at the end of the list, no need to check for NULL.
|
||||
|
||||
Returns:
|
||||
Removed item.
|
||||
*/
|
||||
list_head *core_list_remove(list_head *item) {
|
||||
list_data *tmp;
|
||||
list_head *ret=item->next;
|
||||
/* swap data pointers */
|
||||
tmp=item->info;
|
||||
item->info=ret->info;
|
||||
ret->info=tmp;
|
||||
/* and eliminate item */
|
||||
item->next=item->next->next;
|
||||
ret->next=NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Function: core_list_undo_remove
|
||||
Undo a remove operation.
|
||||
|
||||
Operation:
|
||||
Since we want each iteration of the benchmark to be exactly the same,
|
||||
we need to be able to undo a remove.
|
||||
Link the removed item back into the list, and switch the info items.
|
||||
|
||||
Parameters:
|
||||
item_removed - Return value from the <core_list_remove>
|
||||
item_modified - List item that was modified during <core_list_remove>
|
||||
|
||||
Returns:
|
||||
The item that was linked back to the list.
|
||||
|
||||
*/
|
||||
list_head *core_list_undo_remove(list_head *item_removed, list_head *item_modified) {
|
||||
list_data *tmp;
|
||||
/* swap data pointers */
|
||||
tmp=item_removed->info;
|
||||
item_removed->info=item_modified->info;
|
||||
item_modified->info=tmp;
|
||||
/* and insert item */
|
||||
item_removed->next=item_modified->next;
|
||||
item_modified->next=item_removed;
|
||||
return item_removed;
|
||||
}
|
||||
|
||||
/* Function: core_list_find
|
||||
Find an item in the list
|
||||
|
||||
Operation:
|
||||
Find an item by idx (if not 0) or specific data value
|
||||
|
||||
Parameters:
|
||||
list - list head
|
||||
info - idx or data to find
|
||||
|
||||
Returns:
|
||||
Found item, or NULL if not found.
|
||||
*/
|
||||
list_head *core_list_find(list_head *list,list_data *info) {
|
||||
if (info->idx>=0) {
|
||||
while (list && (list->info->idx != info->idx))
|
||||
list=list->next;
|
||||
return list;
|
||||
} else {
|
||||
while (list && ((list->info->data16 & 0xff) != info->data16))
|
||||
list=list->next;
|
||||
return list;
|
||||
}
|
||||
}
|
||||
/* Function: core_list_reverse
|
||||
Reverse a list
|
||||
|
||||
Operation:
|
||||
Rearrange the pointers so the list is reversed.
|
||||
|
||||
Parameters:
|
||||
list - list head
|
||||
info - idx or data to find
|
||||
|
||||
Returns:
|
||||
Found item, or NULL if not found.
|
||||
*/
|
||||
|
||||
list_head *core_list_reverse(list_head *list) {
|
||||
list_head *next=NULL, *tmp;
|
||||
while (list) {
|
||||
tmp=list->next;
|
||||
list->next=next;
|
||||
next=list;
|
||||
list=tmp;
|
||||
}
|
||||
return next;
|
||||
}
|
||||
/* Function: core_list_mergesort
|
||||
Sort the list in place without recursion.
|
||||
|
||||
Description:
|
||||
Use mergesort, as for linked list this is a realistic solution.
|
||||
Also, since this is aimed at embedded, care was taken to use iterative rather then recursive algorithm.
|
||||
The sort can either return the list to original order (by idx) ,
|
||||
or use the data item to invoke other other algorithms and change the order of the list.
|
||||
|
||||
Parameters:
|
||||
list - list to be sorted.
|
||||
cmp - cmp function to use
|
||||
|
||||
Returns:
|
||||
New head of the list.
|
||||
|
||||
Note:
|
||||
We have a special header for the list that will always be first,
|
||||
but the algorithm could theoretically modify where the list starts.
|
||||
|
||||
*/
|
||||
list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res) {
|
||||
list_head *p, *q, *e, *tail;
|
||||
ee_s32 insize, nmerges, psize, qsize, i;
|
||||
|
||||
insize = 1;
|
||||
|
||||
while (1) {
|
||||
p = list;
|
||||
list = NULL;
|
||||
tail = NULL;
|
||||
|
||||
nmerges = 0; /* count number of merges we do in this pass */
|
||||
|
||||
while (p) {
|
||||
nmerges++; /* there exists a merge to be done */
|
||||
/* step `insize' places along from p */
|
||||
q = p;
|
||||
psize = 0;
|
||||
for (i = 0; i < insize; i++) {
|
||||
psize++;
|
||||
q = q->next;
|
||||
if (!q) break;
|
||||
}
|
||||
|
||||
/* if q hasn't fallen off end, we have two lists to merge */
|
||||
qsize = insize;
|
||||
|
||||
/* now we have two lists; merge them */
|
||||
while (psize > 0 || (qsize > 0 && q)) {
|
||||
|
||||
/* decide whether next element of merge comes from p or q */
|
||||
if (psize == 0) {
|
||||
/* p is empty; e must come from q. */
|
||||
e = q; q = q->next; qsize--;
|
||||
} else if (qsize == 0 || !q) {
|
||||
/* q is empty; e must come from p. */
|
||||
e = p; p = p->next; psize--;
|
||||
} else if (cmp(p->info,q->info,res) <= 0) {
|
||||
/* First element of p is lower (or same); e must come from p. */
|
||||
e = p; p = p->next; psize--;
|
||||
} else {
|
||||
/* First element of q is lower; e must come from q. */
|
||||
e = q; q = q->next; qsize--;
|
||||
}
|
||||
|
||||
/* add the next element to the merged list */
|
||||
if (tail) {
|
||||
tail->next = e;
|
||||
} else {
|
||||
list = e;
|
||||
}
|
||||
tail = e;
|
||||
}
|
||||
|
||||
/* now p has stepped `insize' places along, and q has too */
|
||||
p = q;
|
||||
}
|
||||
|
||||
tail->next = NULL;
|
||||
|
||||
/* If we have done only one merge, we're finished. */
|
||||
if (nmerges <= 1) /* allow for nmerges==0, the empty list case */
|
||||
return list;
|
||||
|
||||
/* Otherwise repeat, merging lists twice the size */
|
||||
insize *= 2;
|
||||
}
|
||||
#if COMPILER_REQUIRES_SORT_RETURN
|
||||
return list;
|
||||
#endif
|
||||
}
|
339
coremark/src/core_main.c
Executable file
339
coremark/src/core_main.c
Executable file
|
@ -0,0 +1,339 @@
|
|||
/*
|
||||
Author : Shay Gal-On, EEMBC
|
||||
|
||||
This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009
|
||||
All rights reserved.
|
||||
|
||||
EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the
|
||||
CoreMark License that is distributed with the official EEMBC COREMARK Software release.
|
||||
If you received this EEMBC CoreMark Software without the accompanying CoreMark License,
|
||||
you must discontinue use and download the official release from www.coremark.org.
|
||||
|
||||
Also, if you are publicly displaying scores generated from the EEMBC CoreMark software,
|
||||
make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file.
|
||||
|
||||
EEMBC
|
||||
4354 Town Center Blvd. Suite 114-200
|
||||
El Dorado Hills, CA, 95762
|
||||
*/
|
||||
/* File: core_main.c
|
||||
This file contains the framework to acquire a block of memory, seed initial parameters, tun t he benchmark and report the results.
|
||||
*/
|
||||
#include "coremark.h"
|
||||
|
||||
/* Function: iterate
|
||||
Run the benchmark for a specified number of iterations.
|
||||
|
||||
Operation:
|
||||
For each type of benchmarked algorithm:
|
||||
a - Initialize the data block for the algorithm.
|
||||
b - Execute the algorithm N times.
|
||||
|
||||
Returns:
|
||||
NULL.
|
||||
*/
|
||||
static ee_u16 list_known_crc[] = {(ee_u16)0xd4b0,(ee_u16)0x3340,(ee_u16)0x6a79,(ee_u16)0xe714,(ee_u16)0xe3c1};
|
||||
static ee_u16 matrix_known_crc[] = {(ee_u16)0xbe52,(ee_u16)0x1199,(ee_u16)0x5608,(ee_u16)0x1fd7,(ee_u16)0x0747};
|
||||
static ee_u16 state_known_crc[] = {(ee_u16)0x5e47,(ee_u16)0x39bf,(ee_u16)0xe5a4,(ee_u16)0x8e3a,(ee_u16)0x8d84};
|
||||
void *iterate(void *pres) {
|
||||
ee_u32 i;
|
||||
ee_u16 crc;
|
||||
core_results *res=(core_results *)pres;
|
||||
ee_u32 iterations=res->iterations;
|
||||
res->crc=0;
|
||||
res->crclist=0;
|
||||
res->crcmatrix=0;
|
||||
res->crcstate=0;
|
||||
|
||||
for (i=0; i<iterations; i++) {
|
||||
crc=core_bench_list(res,1);
|
||||
res->crc=crcu16(crc,res->crc);
|
||||
crc=core_bench_list(res,-1);
|
||||
res->crc=crcu16(crc,res->crc);
|
||||
if (i==0) res->crclist=res->crc;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if (SEED_METHOD==SEED_ARG)
|
||||
ee_s32 get_seed_args(int i, int argc, char *argv[]);
|
||||
#define get_seed(x) (ee_s16)get_seed_args(x,argc,argv)
|
||||
#define get_seed_32(x) get_seed_args(x,argc,argv)
|
||||
#else /* via function or volatile */
|
||||
ee_s32 get_seed_32(int i);
|
||||
#define get_seed(x) (ee_s16)get_seed_32(x)
|
||||
#endif
|
||||
|
||||
#if (MEM_METHOD==MEM_STATIC)
|
||||
ee_u8 static_memblk[TOTAL_DATA_SIZE];
|
||||
#endif
|
||||
char *mem_name[3] = {"Static","Heap","Stack"};
|
||||
/* Function: main
|
||||
Main entry routine for the benchmark.
|
||||
This function is responsible for the following steps:
|
||||
|
||||
1 - Initialize input seeds from a source that cannot be determined at compile time.
|
||||
2 - Initialize memory block for use.
|
||||
3 - Run and time the benchmark.
|
||||
4 - Report results, testing the validity of the output if the seeds are known.
|
||||
|
||||
Arguments:
|
||||
1 - first seed : Any value
|
||||
2 - second seed : Must be identical to first for iterations to be identical
|
||||
3 - third seed : Any value, should be at least an order of magnitude less then the input size, but bigger then 32.
|
||||
4 - Iterations : Special, if set to 0, iterations will be automatically determined such that the benchmark will run between 10 to 100 secs
|
||||
|
||||
*/
|
||||
|
||||
#if MAIN_HAS_NOARGC
|
||||
MAIN_RETURN_TYPE main(void) {
|
||||
int argc=0;
|
||||
char *argv[1];
|
||||
#else
|
||||
MAIN_RETURN_TYPE main(int argc, char *argv[]) {
|
||||
#endif
|
||||
ee_u16 i,j=0,num_algorithms=0;
|
||||
ee_s16 known_id=-1,total_errors=0;
|
||||
ee_u16 seedcrc=0;
|
||||
CORE_TICKS total_time;
|
||||
core_results results[MULTITHREAD];
|
||||
#if (MEM_METHOD==MEM_STACK)
|
||||
ee_u8 stack_memblock[TOTAL_DATA_SIZE*MULTITHREAD];
|
||||
#endif
|
||||
|
||||
ioe_init();
|
||||
|
||||
ee_printf("Running CoreMark for %d iterations\n", ITERATIONS);
|
||||
|
||||
/* first call any initializations needed */
|
||||
portable_init(&(results[0].port), &argc, argv);
|
||||
/* First some checks to make sure benchmark will run ok */
|
||||
if (sizeof(struct list_head_s)>128) {
|
||||
ee_printf("list_head structure too big for comparable data!\n");
|
||||
return MAIN_RETURN_VAL;
|
||||
}
|
||||
results[0].seed1=get_seed(1);
|
||||
results[0].seed2=get_seed(2);
|
||||
results[0].seed3=get_seed(3);
|
||||
results[0].iterations=get_seed_32(4);
|
||||
#if CORE_DEBUG
|
||||
results[0].iterations=1;
|
||||
#endif
|
||||
results[0].execs=get_seed_32(5);
|
||||
if (results[0].execs==0) { /* if not supplied, execute all algorithms */
|
||||
results[0].execs=ALL_ALGORITHMS_MASK;
|
||||
}
|
||||
/* put in some default values based on one seed only for easy testing */
|
||||
if ((results[0].seed1==0) && (results[0].seed2==0) && (results[0].seed3==0)) { /* validation run */
|
||||
results[0].seed1=0;
|
||||
results[0].seed2=0;
|
||||
results[0].seed3=0x66;
|
||||
}
|
||||
if ((results[0].seed1==1) && (results[0].seed2==0) && (results[0].seed3==0)) { /* perfromance run */
|
||||
results[0].seed1=0x3415;
|
||||
results[0].seed2=0x3415;
|
||||
results[0].seed3=0x66;
|
||||
}
|
||||
#if (MEM_METHOD==MEM_STATIC)
|
||||
results[0].memblock[0]=(void *)static_memblk;
|
||||
results[0].size=TOTAL_DATA_SIZE;
|
||||
results[0].err=0;
|
||||
#if (MULTITHREAD>1)
|
||||
#error "Cannot use a static data area with multiple contexts!"
|
||||
#endif
|
||||
#elif (MEM_METHOD==MEM_MALLOC)
|
||||
for (i=0 ; i<MULTITHREAD; i++) {
|
||||
ee_s32 malloc_override=get_seed(7);
|
||||
if (malloc_override != 0)
|
||||
results[i].size=malloc_override;
|
||||
else
|
||||
results[i].size=TOTAL_DATA_SIZE;
|
||||
results[i].memblock[0]=portable_malloc(results[i].size);
|
||||
results[i].seed1=results[0].seed1;
|
||||
results[i].seed2=results[0].seed2;
|
||||
results[i].seed3=results[0].seed3;
|
||||
results[i].err=0;
|
||||
results[i].execs=results[0].execs;
|
||||
}
|
||||
#elif (MEM_METHOD==MEM_STACK)
|
||||
for (i=0 ; i<MULTITHREAD; i++) {
|
||||
results[i].memblock[0]=stack_memblock+i*TOTAL_DATA_SIZE;
|
||||
results[i].size=TOTAL_DATA_SIZE;
|
||||
results[i].seed1=results[0].seed1;
|
||||
results[i].seed2=results[0].seed2;
|
||||
results[i].seed3=results[0].seed3;
|
||||
results[i].err=0;
|
||||
results[i].execs=results[0].execs;
|
||||
}
|
||||
#else
|
||||
#error "Please define a way to initialize a memory block."
|
||||
#endif
|
||||
/* Data init */
|
||||
/* Find out how space much we have based on number of algorithms */
|
||||
for (i=0; i<NUM_ALGORITHMS; i++) {
|
||||
if ((1<<(ee_u32)i) & results[0].execs)
|
||||
num_algorithms++;
|
||||
}
|
||||
for (i=0 ; i<MULTITHREAD; i++)
|
||||
results[i].size=results[i].size/num_algorithms;
|
||||
/* Assign pointers */
|
||||
for (i=0; i<NUM_ALGORITHMS; i++) {
|
||||
ee_u32 ctx;
|
||||
if ((1<<(ee_u32)i) & results[0].execs) {
|
||||
for (ctx=0 ; ctx<MULTITHREAD; ctx++)
|
||||
results[ctx].memblock[i+1]=(char *)(results[ctx].memblock[0])+results[0].size*j;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
/* call inits */
|
||||
for (i=0 ; i<MULTITHREAD; i++) {
|
||||
if (results[i].execs & ID_LIST) {
|
||||
results[i].list=core_list_init(results[0].size,results[i].memblock[1],results[i].seed1);
|
||||
}
|
||||
if (results[i].execs & ID_MATRIX) {
|
||||
core_init_matrix(results[0].size, results[i].memblock[2], (ee_s32)results[i].seed1 | (((ee_s32)results[i].seed2) << 16), &(results[i].mat) );
|
||||
}
|
||||
if (results[i].execs & ID_STATE) {
|
||||
core_init_state(results[0].size,results[i].seed1,results[i].memblock[3]);
|
||||
}
|
||||
}
|
||||
|
||||
/* automatically determine number of iterations if not set */
|
||||
if (results[0].iterations==0) {
|
||||
secs_ret secs_passed=0;
|
||||
ee_u32 divisor;
|
||||
results[0].iterations=1;
|
||||
while (secs_passed < (secs_ret)1) {
|
||||
results[0].iterations*=10;
|
||||
start_time();
|
||||
iterate(&results[0]);
|
||||
stop_time();
|
||||
secs_passed=time_in_secs(get_time());
|
||||
}
|
||||
/* now we know it executes for at least 1 sec, set actual run time at about 10 secs */
|
||||
divisor=(ee_u32)secs_passed;
|
||||
if (divisor==0) /* some machines cast float to int as 0 since this conversion is not defined by ANSI, but we know at least one second passed */
|
||||
divisor=1;
|
||||
results[0].iterations*=1+10/divisor;
|
||||
}
|
||||
/* perform actual benchmark */
|
||||
start_time();
|
||||
#if (MULTITHREAD>1)
|
||||
if (default_num_contexts>MULTITHREAD) {
|
||||
default_num_contexts=MULTITHREAD;
|
||||
}
|
||||
for (i=0 ; i<default_num_contexts; i++) {
|
||||
results[i].iterations=results[0].iterations;
|
||||
results[i].execs=results[0].execs;
|
||||
core_start_parallel(&results[i]);
|
||||
}
|
||||
for (i=0 ; i<default_num_contexts; i++) {
|
||||
core_stop_parallel(&results[i]);
|
||||
}
|
||||
#else
|
||||
iterate(&results[0]);
|
||||
#endif
|
||||
stop_time();
|
||||
total_time=get_time();
|
||||
/* get a function of the input to report */
|
||||
seedcrc=crc16(results[0].seed1,seedcrc);
|
||||
seedcrc=crc16(results[0].seed2,seedcrc);
|
||||
seedcrc=crc16(results[0].seed3,seedcrc);
|
||||
seedcrc=crc16(results[0].size,seedcrc);
|
||||
|
||||
switch (seedcrc) { /* test known output for common seeds */
|
||||
case 0x8a02: /* seed1=0, seed2=0, seed3=0x66, size 2000 per algorithm */
|
||||
known_id=0;
|
||||
ee_printf("6k performance run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x7b05: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 2000 per algorithm */
|
||||
known_id=1;
|
||||
ee_printf("6k validation run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x4eaf: /* seed1=0x8, seed2=0x8, seed3=0x8, size 400 per algorithm */
|
||||
known_id=2;
|
||||
ee_printf("Profile generation run parameters for coremark.\n");
|
||||
break;
|
||||
case 0xe9f5: /* seed1=0, seed2=0, seed3=0x66, size 666 per algorithm */
|
||||
known_id=3;
|
||||
ee_printf("2K performance run parameters for coremark.\n");
|
||||
break;
|
||||
case 0x18f2: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 666 per algorithm */
|
||||
known_id=4;
|
||||
ee_printf("2K validation run parameters for coremark.\n");
|
||||
break;
|
||||
default:
|
||||
total_errors=-1;
|
||||
break;
|
||||
}
|
||||
if (known_id>=0) {
|
||||
for (i=0 ; i<default_num_contexts; i++) {
|
||||
results[i].err=0;
|
||||
if ((results[i].execs & ID_LIST) &&
|
||||
(results[i].crclist!=list_known_crc[known_id])) {
|
||||
ee_printf("[%u]ERROR! list crc 0x%04x - should be 0x%04x\n",i,results[i].crclist,list_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
if ((results[i].execs & ID_MATRIX) &&
|
||||
(results[i].crcmatrix!=matrix_known_crc[known_id])) {
|
||||
ee_printf("[%u]ERROR! matrix crc 0x%04x - should be 0x%04x\n",i,results[i].crcmatrix,matrix_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
if ((results[i].execs & ID_STATE) &&
|
||||
(results[i].crcstate!=state_known_crc[known_id])) {
|
||||
ee_printf("[%u]ERROR! state crc 0x%04x - should be 0x%04x\n",i,results[i].crcstate,state_known_crc[known_id]);
|
||||
results[i].err++;
|
||||
}
|
||||
total_errors+=results[i].err;
|
||||
}
|
||||
}
|
||||
total_errors+=check_data_types();
|
||||
/* and report results */
|
||||
ee_printf("CoreMark Size : %d\n",(int)results[0].size);
|
||||
#if HAS_FLOAT
|
||||
ee_printf("Total time (ms) : %f\n",time_in_secs(total_time));
|
||||
if (time_in_secs(total_time) > 0)
|
||||
ee_printf("Iterations/mSec : %f\n",default_num_contexts*results[0].iterations/time_in_secs(total_time));
|
||||
#else
|
||||
ee_printf("Total time (ms) : %d\n",time_in_secs(total_time));
|
||||
#endif
|
||||
ee_printf("Iterations : %d\n",(int)default_num_contexts*results[0].iterations);
|
||||
ee_printf("Compiler version : %s\n",COMPILER_VERSION);
|
||||
#if (MULTITHREAD>1)
|
||||
ee_printf("Parallel %s : %d\n",PARALLEL_METHOD,default_num_contexts);
|
||||
#endif
|
||||
/* output for verification */
|
||||
ee_printf("seedcrc : 0x%04x\n",seedcrc);
|
||||
if (results[0].execs & ID_LIST)
|
||||
for (i=0 ; i<default_num_contexts; i++)
|
||||
ee_printf("[%d]crclist : 0x%04x\n",i,results[i].crclist);
|
||||
if (results[0].execs & ID_MATRIX)
|
||||
for (i=0 ; i<default_num_contexts; i++)
|
||||
ee_printf("[%d]crcmatrix : 0x%04x\n",i,results[i].crcmatrix);
|
||||
if (results[0].execs & ID_STATE)
|
||||
for (i=0 ; i<default_num_contexts; i++)
|
||||
ee_printf("[%d]crcstate : 0x%04x\n",i,results[i].crcstate);
|
||||
for (i=0 ; i<default_num_contexts; i++)
|
||||
ee_printf("[%d]crcfinal : 0x%04x\n",i,results[i].crc);
|
||||
ee_printf("Finised in %d ms.\n", (int)total_time);
|
||||
if (total_errors==0) {
|
||||
ee_printf("==================================================\n");
|
||||
ee_printf("CoreMark PASS %d Marks\n", 2921400 / time_in_secs(total_time) * ITERATIONS / 1000);
|
||||
ee_printf(" vs. 100000 Marks (i7-7700K @ 4.20GHz)\n");
|
||||
}
|
||||
if (total_errors>0)
|
||||
ee_printf("Errors detected\n");
|
||||
if (total_errors<0)
|
||||
ee_printf("Cannot validate operation for these seed values, please compare with results on a known platform.\n");
|
||||
|
||||
#if (MEM_METHOD==MEM_MALLOC)
|
||||
for (i=0 ; i<MULTITHREAD; i++)
|
||||
portable_free(results[i].memblock[0]);
|
||||
#endif
|
||||
/* And last call any target specific code for finalizing */
|
||||
portable_fini(&(results[0].port));
|
||||
|
||||
return MAIN_RETURN_VAL;
|
||||
}
|
||||
|
||||
|
308
coremark/src/core_matrix.c
Executable file
308
coremark/src/core_matrix.c
Executable file
|
@ -0,0 +1,308 @@
|
|||
/*
|
||||
Author : Shay Gal-On, EEMBC
|
||||
|
||||
This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009
|
||||
All rights reserved.
|
||||
|
||||
EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the
|
||||
CoreMark License that is distributed with the official EEMBC COREMARK Software release.
|
||||
If you received this EEMBC CoreMark Software without the accompanying CoreMark License,
|
||||
you must discontinue use and download the official release from www.coremark.org.
|
||||
|
||||
Also, if you are publicly displaying scores generated from the EEMBC CoreMark software,
|
||||
make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file.
|
||||
|
||||
EEMBC
|
||||
4354 Town Center Blvd. Suite 114-200
|
||||
El Dorado Hills, CA, 95762
|
||||
*/
|
||||
#include "coremark.h"
|
||||
/*
|
||||
Topic: Description
|
||||
Matrix manipulation benchmark
|
||||
|
||||
This very simple algorithm forms the basis of many more complex algorithms.
|
||||
|
||||
The tight inner loop is the focus of many optimizations (compiler as well as hardware based)
|
||||
and is thus relevant for embedded processing.
|
||||
|
||||
The total available data space will be divided to 3 parts:
|
||||
NxN Matrix A - initialized with small values (upper 3/4 of the bits all zero).
|
||||
NxN Matrix B - initialized with medium values (upper half of the bits all zero).
|
||||
NxN Matrix C - used for the result.
|
||||
|
||||
The actual values for A and B must be derived based on input that is not available at compile time.
|
||||
*/
|
||||
ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val);
|
||||
ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval);
|
||||
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val);
|
||||
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B);
|
||||
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val);
|
||||
|
||||
#define matrix_test_next(x) (x+1)
|
||||
#define matrix_clip(x,y) ((y) ? (x) & 0x0ff : (x) & 0x0ffff)
|
||||
#define matrix_big(x) (0xf000 | (x))
|
||||
#define bit_extract(x,from,to) (((x)>>(from)) & (~(0xffffffff << (to))))
|
||||
|
||||
#if CORE_DEBUG
|
||||
void printmat(MATDAT *A, ee_u32 N, char *name) {
|
||||
ee_u32 i,j;
|
||||
ee_printf("Matrix %s [%dx%d]:\n",name,N,N);
|
||||
for (i=0; i<N; i++) {
|
||||
for (j=0; j<N; j++) {
|
||||
if (j!=0)
|
||||
ee_printf(",");
|
||||
ee_printf("%d",A[i*N+j]);
|
||||
}
|
||||
ee_printf("\n");
|
||||
}
|
||||
}
|
||||
void printmatC(MATRES *C, ee_u32 N, char *name) {
|
||||
ee_u32 i,j;
|
||||
ee_printf("Matrix %s [%dx%d]:\n",name,N,N);
|
||||
for (i=0; i<N; i++) {
|
||||
for (j=0; j<N; j++) {
|
||||
if (j!=0)
|
||||
ee_printf(",");
|
||||
ee_printf("%d",C[i*N+j]);
|
||||
}
|
||||
ee_printf("\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
/* Function: core_bench_matrix
|
||||
Benchmark function
|
||||
|
||||
Iterate <matrix_test> N times,
|
||||
changing the matrix values slightly by a constant amount each time.
|
||||
*/
|
||||
ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc) {
|
||||
ee_u32 N=p->N;
|
||||
MATRES *C=p->C;
|
||||
MATDAT *A=p->A;
|
||||
MATDAT *B=p->B;
|
||||
MATDAT val=(MATDAT)seed;
|
||||
|
||||
crc=crc16(matrix_test(N,C,A,B,val),crc);
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
/* Function: matrix_test
|
||||
Perform matrix manipulation.
|
||||
|
||||
Parameters:
|
||||
N - Dimensions of the matrix.
|
||||
C - memory for result matrix.
|
||||
A - input matrix
|
||||
B - operator matrix (not changed during operations)
|
||||
|
||||
Returns:
|
||||
A CRC value that captures all results calculated in the function.
|
||||
In particular, crc of the value calculated on the result matrix
|
||||
after each step by <matrix_sum>.
|
||||
|
||||
Operation:
|
||||
|
||||
1 - Add a constant value to all elements of a matrix.
|
||||
2 - Multiply a matrix by a constant.
|
||||
3 - Multiply a matrix by a vector.
|
||||
4 - Multiply a matrix by a matrix.
|
||||
5 - Add a constant value to all elements of a matrix.
|
||||
|
||||
After the last step, matrix A is back to original contents.
|
||||
*/
|
||||
ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val) {
|
||||
ee_u16 crc=0;
|
||||
MATDAT clipval=matrix_big(val);
|
||||
|
||||
matrix_add_const(N,A,val); /* make sure data changes */
|
||||
#if CORE_DEBUG
|
||||
printmat(A,N,"matrix_add_const");
|
||||
#endif
|
||||
matrix_mul_const(N,C,A,val);
|
||||
crc=crc16(matrix_sum(N,C,clipval),crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C,N,"matrix_mul_const");
|
||||
#endif
|
||||
matrix_mul_vect(N,C,A,B);
|
||||
crc=crc16(matrix_sum(N,C,clipval),crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C,N,"matrix_mul_vect");
|
||||
#endif
|
||||
matrix_mul_matrix(N,C,A,B);
|
||||
crc=crc16(matrix_sum(N,C,clipval),crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C,N,"matrix_mul_matrix");
|
||||
#endif
|
||||
matrix_mul_matrix_bitextract(N,C,A,B);
|
||||
crc=crc16(matrix_sum(N,C,clipval),crc);
|
||||
#if CORE_DEBUG
|
||||
printmatC(C,N,"matrix_mul_matrix_bitextract");
|
||||
#endif
|
||||
|
||||
matrix_add_const(N,A,-val); /* return matrix to initial value */
|
||||
return crc;
|
||||
}
|
||||
|
||||
/* Function : matrix_init
|
||||
Initialize the memory block for matrix benchmarking.
|
||||
|
||||
Parameters:
|
||||
blksize - Size of memory to be initialized.
|
||||
memblk - Pointer to memory block.
|
||||
seed - Actual values chosen depend on the seed parameter.
|
||||
p - pointers to <mat_params> containing initialized matrixes.
|
||||
|
||||
Returns:
|
||||
Matrix dimensions.
|
||||
|
||||
Note:
|
||||
The seed parameter MUST be supplied from a source that cannot be determined at compile time
|
||||
*/
|
||||
ee_u32 core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p) {
|
||||
ee_u32 N=0;
|
||||
MATDAT *A;
|
||||
MATDAT *B;
|
||||
ee_s32 order=1;
|
||||
MATDAT val;
|
||||
ee_u32 i=0,j=0;
|
||||
if (seed==0)
|
||||
seed=1;
|
||||
while (j<blksize) {
|
||||
i++;
|
||||
j=i*i*2*4;
|
||||
}
|
||||
N=i-1;
|
||||
A=(MATDAT *)align_mem(memblk);
|
||||
B=A+N*N;
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
for (j=0; j<N; j++) {
|
||||
seed = ( ( order * seed ) % 65536 );
|
||||
val = (seed + order);
|
||||
val=matrix_clip(val,0);
|
||||
B[i*N+j] = val;
|
||||
val = (val + order);
|
||||
val=matrix_clip(val,1);
|
||||
A[i*N+j] = val;
|
||||
order++;
|
||||
}
|
||||
}
|
||||
|
||||
p->A=A;
|
||||
p->B=B;
|
||||
p->C=(MATRES *)align_mem(B+N*N);
|
||||
p->N=N;
|
||||
#if CORE_DEBUG
|
||||
printmat(A,N,"A");
|
||||
printmat(B,N,"B");
|
||||
#endif
|
||||
return N;
|
||||
}
|
||||
|
||||
/* Function: matrix_sum
|
||||
Calculate a function that depends on the values of elements in the matrix.
|
||||
|
||||
For each element, accumulate into a temporary variable.
|
||||
|
||||
As long as this value is under the parameter clipval,
|
||||
add 1 to the result if the element is bigger then the previous.
|
||||
|
||||
Otherwise, reset the accumulator and add 10 to the result.
|
||||
*/
|
||||
ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval) {
|
||||
MATRES tmp=0,prev=0,cur=0;
|
||||
ee_s16 ret=0;
|
||||
ee_u32 i,j;
|
||||
for (i=0; i<N; i++) {
|
||||
for (j=0; j<N; j++) {
|
||||
cur=C[i*N+j];
|
||||
tmp+=cur;
|
||||
if (tmp>clipval) {
|
||||
ret+=10;
|
||||
tmp=0;
|
||||
} else {
|
||||
ret += (cur>prev) ? 1 : 0;
|
||||
}
|
||||
prev=cur;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_const
|
||||
Multiply a matrix by a constant.
|
||||
This could be used as a scaler for instance.
|
||||
*/
|
||||
void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val) {
|
||||
ee_u32 i,j;
|
||||
for (i=0; i<N; i++) {
|
||||
for (j=0; j<N; j++) {
|
||||
C[i*N+j]=(MATRES)A[i*N+j] * (MATRES)val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_add_const
|
||||
Add a constant value to all elements of a matrix.
|
||||
*/
|
||||
void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val) {
|
||||
ee_u32 i,j;
|
||||
for (i=0; i<N; i++) {
|
||||
for (j=0; j<N; j++) {
|
||||
A[i*N+j] += val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_vect
|
||||
Multiply a matrix by a vector.
|
||||
This is common in many simple filters (e.g. fir where a vector of coefficients is applied to the matrix.)
|
||||
*/
|
||||
void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) {
|
||||
ee_u32 i,j;
|
||||
for (i=0; i<N; i++) {
|
||||
C[i]=0;
|
||||
for (j=0; j<N; j++) {
|
||||
C[i]+=(MATRES)A[i*N+j] * (MATRES)B[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_matrix
|
||||
Multiply a matrix by a matrix.
|
||||
Basic code is used in many algorithms, mostly with minor changes such as scaling.
|
||||
*/
|
||||
void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) {
|
||||
ee_u32 i,j,k;
|
||||
for (i=0; i<N; i++) {
|
||||
for (j=0; j<N; j++) {
|
||||
C[i*N+j]=0;
|
||||
for(k=0;k<N;k++)
|
||||
{
|
||||
C[i*N+j]+=(MATRES)A[i*N+k] * (MATRES)B[k*N+j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Function: matrix_mul_matrix_bitextract
|
||||
Multiply a matrix by a matrix, and extract some bits from the result.
|
||||
Basic code is used in many algorithms, mostly with minor changes such as scaling.
|
||||
*/
|
||||
void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) {
|
||||
ee_u32 i,j,k;
|
||||
for (i=0; i<N; i++) {
|
||||
for (j=0; j<N; j++) {
|
||||
C[i*N+j]=0;
|
||||
for(k=0;k<N;k++)
|
||||
{
|
||||
MATRES tmp=(MATRES)A[i*N+k] * (MATRES)B[k*N+j];
|
||||
C[i*N+j]+=bit_extract(tmp,2,4)*bit_extract(tmp,5,7);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
109
coremark/src/core_portme.c
Executable file
109
coremark/src/core_portme.c
Executable file
|
@ -0,0 +1,109 @@
|
|||
#include "coremark.h"
|
||||
|
||||
#if VALIDATION_RUN
|
||||
volatile ee_s32 seed1_volatile=0x3415;
|
||||
volatile ee_s32 seed2_volatile=0x3415;
|
||||
volatile ee_s32 seed3_volatile=0x66;
|
||||
#endif
|
||||
#if PERFORMANCE_RUN
|
||||
volatile ee_s32 seed1_volatile=0x0;
|
||||
volatile ee_s32 seed2_volatile=0x0;
|
||||
volatile ee_s32 seed3_volatile=0x66;
|
||||
#endif
|
||||
#if PROFILE_RUN
|
||||
volatile ee_s32 seed1_volatile=0x8;
|
||||
volatile ee_s32 seed2_volatile=0x8;
|
||||
volatile ee_s32 seed3_volatile=0x8;
|
||||
#endif
|
||||
volatile ee_s32 seed4_volatile=ITERATIONS;
|
||||
volatile ee_s32 seed5_volatile=0;
|
||||
/* Porting : Timing functions
|
||||
How to capture time and convert to seconds must be ported to whatever is supported by the platform.
|
||||
e.g. Read value from on board RTC, read value from cpu clock cycles performance counter etc.
|
||||
Sample implementation for standard time.h and windows.h definitions included.
|
||||
*/
|
||||
/* Define : TIMER_RES_DIVIDER
|
||||
Divider to trade off timer resolution and total time that can be measured.
|
||||
|
||||
Use lower values to increase resolution, but make sure that overflow does not occur.
|
||||
If there are issues with the return value overflowing, increase this value.
|
||||
*/
|
||||
#define NSECS_PER_SEC CLOCKS_PER_SEC
|
||||
#define CORETIMETYPE clock_t
|
||||
#define GETMYTIME(_t) (*_t=clock())
|
||||
#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
|
||||
#define TIMER_RES_DIVIDER 1
|
||||
#define SAMPLE_TIME_IMPLEMENTATION 1
|
||||
#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER)
|
||||
|
||||
static uint32_t uptime_ms() { return io_read(AM_TIMER_UPTIME).us / 1000; }
|
||||
|
||||
/** Define Host specific (POSIX), or target specific global time variables. */
|
||||
unsigned long start_time_val, stop_time_val;
|
||||
|
||||
/* Function : start_time
|
||||
This function will be called right before starting the timed portion of the benchmark.
|
||||
|
||||
Implementation may be capturing a system timer (as implemented in the example code)
|
||||
or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0.
|
||||
*/
|
||||
void start_time(void) {
|
||||
start_time_val = uptime_ms();
|
||||
}
|
||||
/* Function : stop_time
|
||||
This function will be called right after ending the timed portion of the benchmark.
|
||||
|
||||
Implementation may be capturing a system timer (as implemented in the example code)
|
||||
or other system parameters - e.g. reading the current value of cpu cycles counter.
|
||||
*/
|
||||
void stop_time(void) {
|
||||
stop_time_val = uptime_ms();
|
||||
}
|
||||
/* Function : get_time
|
||||
Return an abstract "ticks" number that signifies time on the system.
|
||||
|
||||
Actual value returned may be cpu cycles, milliseconds or any other value,
|
||||
as long as it can be converted to seconds by <time_in_secs>.
|
||||
This methodology is taken to accomodate any hardware or simulated platform.
|
||||
The sample implementation returns millisecs by default,
|
||||
and the resolution is controlled by <TIMER_RES_DIVIDER>
|
||||
*/
|
||||
CORE_TICKS get_time(void) {
|
||||
return stop_time_val - start_time_val;
|
||||
}
|
||||
|
||||
/* Function : time_in_secs
|
||||
Convert the value returned by get_time to seconds.
|
||||
|
||||
The <secs_ret> type is used to accomodate systems with no support for floating point.
|
||||
Default implementation implemented by the EE_TICKS_PER_SEC macro above.
|
||||
*/
|
||||
secs_ret time_in_secs(CORE_TICKS ticks) {
|
||||
return ticks;
|
||||
}
|
||||
|
||||
ee_u32 default_num_contexts=1;
|
||||
|
||||
/* Function : portable_init
|
||||
Target specific initialization code
|
||||
Test for some common mistakes.
|
||||
*/
|
||||
void portable_init(core_portable *p, int *argc, char *argv[])
|
||||
{
|
||||
if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) {
|
||||
ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n");
|
||||
}
|
||||
if (sizeof(ee_u32) != 4) {
|
||||
ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n");
|
||||
}
|
||||
p->portable_id=1;
|
||||
}
|
||||
/* Function : portable_fini
|
||||
Target specific final code
|
||||
*/
|
||||
void portable_fini(core_portable *p)
|
||||
{
|
||||
p->portable_id=0;
|
||||
}
|
||||
|
||||
|
277
coremark/src/core_state.c
Executable file
277
coremark/src/core_state.c
Executable file
|
@ -0,0 +1,277 @@
|
|||
/*
|
||||
Author : Shay Gal-On, EEMBC
|
||||
|
||||
This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009
|
||||
All rights reserved.
|
||||
|
||||
EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the
|
||||
CoreMark License that is distributed with the official EEMBC COREMARK Software release.
|
||||
If you received this EEMBC CoreMark Software without the accompanying CoreMark License,
|
||||
you must discontinue use and download the official release from www.coremark.org.
|
||||
|
||||
Also, if you are publicly displaying scores generated from the EEMBC CoreMark software,
|
||||
make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file.
|
||||
|
||||
EEMBC
|
||||
4354 Town Center Blvd. Suite 114-200
|
||||
El Dorado Hills, CA, 95762
|
||||
*/
|
||||
#include "coremark.h"
|
||||
/* local functions */
|
||||
enum CORE_STATE core_state_transition( ee_u8 **instr , ee_u32 *transition_count);
|
||||
|
||||
/*
|
||||
Topic: Description
|
||||
Simple state machines like this one are used in many embedded products.
|
||||
|
||||
For more complex state machines, sometimes a state transition table implementation is used instead,
|
||||
trading speed of direct coding for ease of maintenance.
|
||||
|
||||
Since the main goal of using a state machine in CoreMark is to excercise the switch/if behaviour,
|
||||
we are using a small moore machine.
|
||||
|
||||
In particular, this machine tests type of string input,
|
||||
trying to determine whether the input is a number or something else.
|
||||
(see core_state.png).
|
||||
*/
|
||||
|
||||
/* Function: core_bench_state
|
||||
Benchmark function
|
||||
|
||||
Go over the input twice, once direct, and once after introducing some corruption.
|
||||
*/
|
||||
ee_u16 core_bench_state(ee_u32 blksize, ee_u8 *memblock,
|
||||
ee_s16 seed1, ee_s16 seed2, ee_s16 step, ee_u16 crc)
|
||||
{
|
||||
ee_u32 final_counts[NUM_CORE_STATES];
|
||||
ee_u32 track_counts[NUM_CORE_STATES];
|
||||
ee_u8 *p=memblock;
|
||||
ee_u32 i;
|
||||
|
||||
|
||||
#if CORE_DEBUG
|
||||
ee_printf("State Bench: %d,%d,%d,%04x\n",seed1,seed2,step,crc);
|
||||
#endif
|
||||
for (i=0; i<NUM_CORE_STATES; i++) {
|
||||
final_counts[i]=track_counts[i]=0;
|
||||
}
|
||||
/* run the state machine over the input */
|
||||
while (*p!=0) {
|
||||
enum CORE_STATE fstate=core_state_transition(&p,track_counts);
|
||||
final_counts[fstate]++;
|
||||
#if CORE_DEBUG
|
||||
ee_printf("%d,",fstate);
|
||||
}
|
||||
ee_printf("\n");
|
||||
#else
|
||||
}
|
||||
#endif
|
||||
p=memblock;
|
||||
while (p < (memblock+blksize)) { /* insert some corruption */
|
||||
if (*p!=',')
|
||||
*p^=(ee_u8)seed1;
|
||||
p+=step;
|
||||
}
|
||||
p=memblock;
|
||||
/* run the state machine over the input again */
|
||||
while (*p!=0) {
|
||||
enum CORE_STATE fstate=core_state_transition(&p,track_counts);
|
||||
final_counts[fstate]++;
|
||||
#if CORE_DEBUG
|
||||
ee_printf("%d,",fstate);
|
||||
}
|
||||
ee_printf("\n");
|
||||
#else
|
||||
}
|
||||
#endif
|
||||
p=memblock;
|
||||
while (p < (memblock+blksize)) { /* undo corruption is seed1 and seed2 are equal */
|
||||
if (*p!=',')
|
||||
*p^=(ee_u8)seed2;
|
||||
p+=step;
|
||||
}
|
||||
/* end timing */
|
||||
for (i=0; i<NUM_CORE_STATES; i++) {
|
||||
crc=crcu32(final_counts[i],crc);
|
||||
crc=crcu32(track_counts[i],crc);
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
|
||||
/* Default initialization patterns */
|
||||
static ee_u8 *intpat[4] ={(ee_u8 *)"5012",(ee_u8 *)"1234",(ee_u8 *)"-874",(ee_u8 *)"+122"};
|
||||
static ee_u8 *floatpat[4]={(ee_u8 *)"35.54400",(ee_u8 *)".1234500",(ee_u8 *)"-110.700",(ee_u8 *)"+0.64400"};
|
||||
static ee_u8 *scipat[4] ={(ee_u8 *)"5.500e+3",(ee_u8 *)"-.123e-2",(ee_u8 *)"-87e+832",(ee_u8 *)"+0.6e-12"};
|
||||
static ee_u8 *errpat[4] ={(ee_u8 *)"T0.3e-1F",(ee_u8 *)"-T.T++Tq",(ee_u8 *)"1T3.4e4z",(ee_u8 *)"34.0e-T^"};
|
||||
|
||||
/* Function: core_init_state
|
||||
Initialize the input data for the state machine.
|
||||
|
||||
Populate the input with several predetermined strings, interspersed.
|
||||
Actual patterns chosen depend on the seed parameter.
|
||||
|
||||
Note:
|
||||
The seed parameter MUST be supplied from a source that cannot be determined at compile time
|
||||
*/
|
||||
void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p) {
|
||||
ee_u32 total=0,next=0,i;
|
||||
ee_u8 *buf=0;
|
||||
#if CORE_DEBUG
|
||||
ee_u8 *start=p;
|
||||
ee_printf("State: %d,%d\n",size,seed);
|
||||
#endif
|
||||
size--;
|
||||
next=0;
|
||||
while ((total+next+1)<size) {
|
||||
if (next>0) {
|
||||
for(i=0;i<next;i++)
|
||||
*(p+total+i)=buf[i];
|
||||
*(p+total+i)=',';
|
||||
total+=next+1;
|
||||
}
|
||||
seed++;
|
||||
switch (seed & 0x7) {
|
||||
case 0: /* int */
|
||||
case 1: /* int */
|
||||
case 2: /* int */
|
||||
buf=intpat[(seed>>3) & 0x3];
|
||||
next=4;
|
||||
break;
|
||||
case 3: /* float */
|
||||
case 4: /* float */
|
||||
buf=floatpat[(seed>>3) & 0x3];
|
||||
next=8;
|
||||
break;
|
||||
case 5: /* scientific */
|
||||
case 6: /* scientific */
|
||||
buf=scipat[(seed>>3) & 0x3];
|
||||
next=8;
|
||||
break;
|
||||
case 7: /* invalid */
|
||||
buf=errpat[(seed>>3) & 0x3];
|
||||
next=8;
|
||||
break;
|
||||
default: /* Never happen, just to make some compilers happy */
|
||||
break;
|
||||
}
|
||||
}
|
||||
size++;
|
||||
while (total<size) { /* fill the rest with 0 */
|
||||
*(p+total)=0;
|
||||
total++;
|
||||
}
|
||||
#if CORE_DEBUG
|
||||
ee_printf("State Input: %s\n",start);
|
||||
#endif
|
||||
}
|
||||
|
||||
static ee_u8 ee_isdigit(ee_u8 c) {
|
||||
ee_u8 retval;
|
||||
retval = ((c>='0') & (c<='9')) ? 1 : 0;
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* Function: core_state_transition
|
||||
Actual state machine.
|
||||
|
||||
The state machine will continue scanning until either:
|
||||
1 - an invalid input is detcted.
|
||||
2 - a valid number has been detected.
|
||||
|
||||
The input pointer is updated to point to the end of the token, and the end state is returned (either specific format determined or invalid).
|
||||
*/
|
||||
|
||||
enum CORE_STATE core_state_transition( ee_u8 **instr , ee_u32 *transition_count) {
|
||||
ee_u8 *str=*instr;
|
||||
ee_u8 NEXT_SYMBOL;
|
||||
enum CORE_STATE state=CORE_START;
|
||||
for( ; *str && state != CORE_INVALID; str++ ) {
|
||||
NEXT_SYMBOL = *str;
|
||||
if (NEXT_SYMBOL==',') /* end of this input */ {
|
||||
str++;
|
||||
break;
|
||||
}
|
||||
switch(state) {
|
||||
case CORE_START:
|
||||
if(ee_isdigit(NEXT_SYMBOL)) {
|
||||
state = CORE_INT;
|
||||
}
|
||||
else if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) {
|
||||
state = CORE_S1;
|
||||
}
|
||||
else if( NEXT_SYMBOL == '.' ) {
|
||||
state = CORE_FLOAT;
|
||||
}
|
||||
else {
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INVALID]++;
|
||||
}
|
||||
transition_count[CORE_START]++;
|
||||
break;
|
||||
case CORE_S1:
|
||||
if(ee_isdigit(NEXT_SYMBOL)) {
|
||||
state = CORE_INT;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
else if( NEXT_SYMBOL == '.' ) {
|
||||
state = CORE_FLOAT;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
else {
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_S1]++;
|
||||
}
|
||||
break;
|
||||
case CORE_INT:
|
||||
if( NEXT_SYMBOL == '.' ) {
|
||||
state = CORE_FLOAT;
|
||||
transition_count[CORE_INT]++;
|
||||
}
|
||||
else if(!ee_isdigit(NEXT_SYMBOL)) {
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_FLOAT:
|
||||
if( NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e' ) {
|
||||
state = CORE_S2;
|
||||
transition_count[CORE_FLOAT]++;
|
||||
}
|
||||
else if(!ee_isdigit(NEXT_SYMBOL)) {
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_FLOAT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_S2:
|
||||
if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) {
|
||||
state = CORE_EXPONENT;
|
||||
transition_count[CORE_S2]++;
|
||||
}
|
||||
else {
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_S2]++;
|
||||
}
|
||||
break;
|
||||
case CORE_EXPONENT:
|
||||
if(ee_isdigit(NEXT_SYMBOL)) {
|
||||
state = CORE_SCIENTIFIC;
|
||||
transition_count[CORE_EXPONENT]++;
|
||||
}
|
||||
else {
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_EXPONENT]++;
|
||||
}
|
||||
break;
|
||||
case CORE_SCIENTIFIC:
|
||||
if(!ee_isdigit(NEXT_SYMBOL)) {
|
||||
state = CORE_INVALID;
|
||||
transition_count[CORE_INVALID]++;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
*instr=str;
|
||||
return state;
|
||||
}
|
210
coremark/src/core_util.c
Executable file
210
coremark/src/core_util.c
Executable file
|
@ -0,0 +1,210 @@
|
|||
/*
|
||||
Author : Shay Gal-On, EEMBC
|
||||
|
||||
This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009
|
||||
All rights reserved.
|
||||
|
||||
EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the
|
||||
CoreMark License that is distributed with the official EEMBC COREMARK Software release.
|
||||
If you received this EEMBC CoreMark Software without the accompanying CoreMark License,
|
||||
you must discontinue use and download the official release from www.coremark.org.
|
||||
|
||||
Also, if you are publicly displaying scores generated from the EEMBC CoreMark software,
|
||||
make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file.
|
||||
|
||||
EEMBC
|
||||
4354 Town Center Blvd. Suite 114-200
|
||||
El Dorado Hills, CA, 95762
|
||||
*/
|
||||
#include "coremark.h"
|
||||
/* Function: get_seed
|
||||
Get a values that cannot be determined at compile time.
|
||||
|
||||
Since different embedded systems and compilers are used, 3 different methods are provided:
|
||||
1 - Using a volatile variable. This method is only valid if the compiler is forced to generate code that
|
||||
reads the value of a volatile variable from memory at run time.
|
||||
Please note, if using this method, you would need to modify core_portme.c to generate training profile.
|
||||
2 - Command line arguments. This is the preferred method if command line arguments are supported.
|
||||
3 - System function. If none of the first 2 methods is available on the platform,
|
||||
a system function which is not a stub can be used.
|
||||
|
||||
e.g. read the value on GPIO pins connected to switches, or invoke special simulator functions.
|
||||
*/
|
||||
#if (SEED_METHOD==SEED_VOLATILE)
|
||||
extern volatile ee_s32 seed1_volatile;
|
||||
extern volatile ee_s32 seed2_volatile;
|
||||
extern volatile ee_s32 seed3_volatile;
|
||||
extern volatile ee_s32 seed4_volatile;
|
||||
extern volatile ee_s32 seed5_volatile;
|
||||
ee_s32 get_seed_32(int i) {
|
||||
ee_s32 retval;
|
||||
switch (i) {
|
||||
case 1:
|
||||
retval=seed1_volatile;
|
||||
break;
|
||||
case 2:
|
||||
retval=seed2_volatile;
|
||||
break;
|
||||
case 3:
|
||||
retval=seed3_volatile;
|
||||
break;
|
||||
case 4:
|
||||
retval=seed4_volatile;
|
||||
break;
|
||||
case 5:
|
||||
retval=seed5_volatile;
|
||||
break;
|
||||
default:
|
||||
retval=0;
|
||||
break;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
#elif (SEED_METHOD==SEED_ARG)
|
||||
ee_s32 parseval(char *valstring) {
|
||||
ee_s32 retval=0;
|
||||
ee_s32 neg=1;
|
||||
int hexmode=0;
|
||||
if (*valstring == '-') {
|
||||
neg=-1;
|
||||
valstring++;
|
||||
}
|
||||
if ((valstring[0] == '0') && (valstring[1] == 'x')) {
|
||||
hexmode=1;
|
||||
valstring+=2;
|
||||
}
|
||||
/* first look for digits */
|
||||
if (hexmode) {
|
||||
while (((*valstring >= '0') && (*valstring <= '9')) || ((*valstring >= 'a') && (*valstring <= 'f'))) {
|
||||
ee_s32 digit=*valstring-'0';
|
||||
if (digit>9)
|
||||
digit=10+*valstring-'a';
|
||||
retval*=16;
|
||||
retval+=digit;
|
||||
valstring++;
|
||||
}
|
||||
} else {
|
||||
while ((*valstring >= '0') && (*valstring <= '9')) {
|
||||
ee_s32 digit=*valstring-'0';
|
||||
retval*=10;
|
||||
retval+=digit;
|
||||
valstring++;
|
||||
}
|
||||
}
|
||||
/* now add qualifiers */
|
||||
if (*valstring=='K')
|
||||
retval*=1024;
|
||||
if (*valstring=='M')
|
||||
retval*=1024*1024;
|
||||
|
||||
retval*=neg;
|
||||
return retval;
|
||||
}
|
||||
|
||||
ee_s32 get_seed_args(int i, int argc, char *argv[]) {
|
||||
if (argc>i)
|
||||
return parseval(argv[i]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif (SEED_METHOD==SEED_FUNC)
|
||||
/* If using OS based function, you must define and implement the functions below in core_portme.h and core_portme.c ! */
|
||||
ee_s32 get_seed_32(int i) {
|
||||
ee_s32 retval;
|
||||
switch (i) {
|
||||
case 1:
|
||||
retval=portme_sys1();
|
||||
break;
|
||||
case 2:
|
||||
retval=portme_sys2();
|
||||
break;
|
||||
case 3:
|
||||
retval=portme_sys3();
|
||||
break;
|
||||
case 4:
|
||||
retval=portme_sys4();
|
||||
break;
|
||||
case 5:
|
||||
retval=portme_sys5();
|
||||
break;
|
||||
default:
|
||||
retval=0;
|
||||
break;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Function: crc*
|
||||
Service functions to calculate 16b CRC code.
|
||||
|
||||
*/
|
||||
ee_u16 crcu8(ee_u8 data, ee_u16 crc )
|
||||
{
|
||||
ee_u8 i=0,x16=0,carry=0;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1));
|
||||
data >>= 1;
|
||||
|
||||
if (x16 == 1)
|
||||
{
|
||||
crc ^= 0x4002;
|
||||
carry = 1;
|
||||
}
|
||||
else
|
||||
carry = 0;
|
||||
crc >>= 1;
|
||||
if (carry)
|
||||
crc |= 0x8000;
|
||||
else
|
||||
crc &= 0x7fff;
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
ee_u16 crcu16(ee_u16 newval, ee_u16 crc) {
|
||||
crc=crcu8( (ee_u8) (newval) ,crc);
|
||||
crc=crcu8( (ee_u8) ((newval)>>8) ,crc);
|
||||
return crc;
|
||||
}
|
||||
ee_u16 crcu32(ee_u32 newval, ee_u16 crc) {
|
||||
crc=crc16((ee_s16) newval ,crc);
|
||||
crc=crc16((ee_s16) (newval>>16) ,crc);
|
||||
return crc;
|
||||
}
|
||||
ee_u16 crc16(ee_s16 newval, ee_u16 crc) {
|
||||
return crcu16((ee_u16)newval, crc);
|
||||
}
|
||||
|
||||
ee_u8 check_data_types() {
|
||||
ee_u8 retval=0;
|
||||
if (sizeof(ee_u8) != 1) {
|
||||
ee_printf("ERROR: ee_u8 is not an 8b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_u16) != 2) {
|
||||
ee_printf("ERROR: ee_u16 is not a 16b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_s16) != 2) {
|
||||
ee_printf("ERROR: ee_s16 is not a 16b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_s32) != 4) {
|
||||
ee_printf("ERROR: ee_s32 is not a 32b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_u32) != 4) {
|
||||
ee_printf("ERROR: ee_u32 is not a 32b datatype!\n");
|
||||
retval++;
|
||||
}
|
||||
if (sizeof(ee_ptr_int) != sizeof(int *)) {
|
||||
ee_printf("ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n");
|
||||
retval++;
|
||||
}
|
||||
if (retval>0) {
|
||||
ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n");
|
||||
}
|
||||
return retval;
|
||||
}
|
3
dhrystone/Makefile
Normal file
3
dhrystone/Makefile
Normal file
|
@ -0,0 +1,3 @@
|
|||
NAME = dhrystone
|
||||
SRCS = dry.c
|
||||
include $(AM_HOME)/Makefile
|
950
dhrystone/dry.c
Normal file
950
dhrystone/dry.c
Normal file
|
@ -0,0 +1,950 @@
|
|||
/****************** "DHRYSTONE" Benchmark Program ***************************/
|
||||
#define Version "C, Version 2.2"
|
||||
/* File: dhry_1.c (part 2 of 3)
|
||||
* Author: Reinhold P. Weicker
|
||||
* Siemens Nixdorf, Paderborn/Germany
|
||||
* weicker@specbench.org
|
||||
* Date: May 25, 1988
|
||||
* Modified: Steven Pemberton, CWI, Amsterdam; Steven.Pemberton@cwi.nl
|
||||
* Date: October, 1993; March 1995
|
||||
* Included both files into one source, that gets compiled
|
||||
* in two passes. Made program auto-compiling, and auto-running,
|
||||
* and generally made it much easier to use.
|
||||
*
|
||||
* Original Version (in Ada) published in
|
||||
* "Communications of the ACM" vol. 27., no. 10 (Oct. 1984),
|
||||
* pp. 1013 - 1030, together with the statistics
|
||||
* on which the distribution of statements etc. is based.
|
||||
*
|
||||
* In this C version, the following C library functions are used:
|
||||
* - strcpy, strcmp (inside the measurement loop)
|
||||
* - printf, scanf (outside the measurement loop)
|
||||
* In addition, Berkeley UNIX system calls "times ()" or "time ()"
|
||||
* are used for execution time measurement. For measurements
|
||||
* on other systems, these calls have to be changed.
|
||||
*
|
||||
* Collection of Results:
|
||||
* Reinhold Weicker (address see above) and
|
||||
*
|
||||
* Rick Richardson
|
||||
* PC Research. Inc.
|
||||
* 94 Apple Orchard Drive
|
||||
* Tinton Falls, NJ 07724
|
||||
* Phone: (201) 389-8963 (9-17 EST)
|
||||
* Usenet: ...!uunet!pcrat!rick
|
||||
*
|
||||
* Please send results to Rick Richardson and/or Reinhold Weicker.
|
||||
* Complete information should be given on hardware and software used.
|
||||
* Hardware information includes: Machine type, CPU, type and size
|
||||
* of caches; for microprocessors: clock frequency, memory speed
|
||||
* (number of wait states).
|
||||
* Software information includes: Compiler (and runtime library)
|
||||
* manufacturer and version, compilation switches, OS version.
|
||||
* The Operating System version may give an indication about the compiler;
|
||||
* Dhrystone itself performs no OS calls in the measurement loop.
|
||||
*
|
||||
* The complete output generated by the program should be mailed
|
||||
* such that at least some checks for correctness can be made.
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* Defines: The following "Defines" are possible:
|
||||
* -DREG (default: Not defined)
|
||||
* As an approximation to what an average C programmer
|
||||
* might do, causes the "register" storage class to be applied
|
||||
* - for local variables, if they are used (dynamically)
|
||||
* five or more times
|
||||
* - for parameters if they are used (dynamically)
|
||||
* six or more times
|
||||
* Note that an optimal "register" strategy is
|
||||
* compiler-dependent, and that "register" declarations
|
||||
* do not necessarily lead to faster execution.
|
||||
* -DNOSTRUCTASSIGN (default: Not defined)
|
||||
* Define if the C compiler does not support
|
||||
* assignment of structures.
|
||||
* -DNOENUMS (default: Not defined)
|
||||
* Define if the C compiler does not support
|
||||
* enumeration types.
|
||||
* -DTIMES (default)
|
||||
* -DTIME
|
||||
* The "times" function of UNIX (returning process times)
|
||||
* or the "time" function (returning wallclock time)
|
||||
* is used for measurement.
|
||||
* For single user machines, "time ()" is adequate. For
|
||||
* multi-user machines where you cannot get single-user
|
||||
* access, use the "times ()" function. If you have
|
||||
* neither, use a stopwatch in the dead of night.
|
||||
* "printf"s are provided marking the points "Start Timer"
|
||||
* and "Stop Timer". DO NOT use the UNIX "time(1)"
|
||||
* command, as this will measure the total time to
|
||||
* run this program, which will (erroneously) include
|
||||
* the time to allocate storage (malloc) and to perform
|
||||
* the initialization.
|
||||
* -DHZ=nnn
|
||||
* In Berkeley UNIX, the function "times" returns process
|
||||
* time in 1/HZ seconds, with HZ = 60 for most systems.
|
||||
* CHECK YOUR SYSTEM DESCRIPTION BEFORE YOU JUST APPLY
|
||||
* A VALUE.
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* History: Version C/2.1 was made for two reasons:
|
||||
*
|
||||
* 1) There was an obvious need for a common C version of
|
||||
* Dhrystone, since C is at present the most popular system
|
||||
* programming language for the class of processors
|
||||
* (microcomputers, minicomputers) where Dhrystone is used most.
|
||||
* There should be, as far as possible, only one C version of
|
||||
* Dhrystone such that results can be compared without
|
||||
* restrictions. In the past, the C versions distributed
|
||||
* by Rick Richardson (Version 1.1) and by Reinhold Weicker
|
||||
* had small (though not significant) differences.
|
||||
*
|
||||
* 2) As far as it is possible without changes to the Dhrystone
|
||||
* statistics, optimizing compilers should be prevented from
|
||||
* removing significant statements.
|
||||
*
|
||||
* This C version has been developed in cooperation with
|
||||
* Rick Richardson (Tinton Falls, NJ), it incorporates many
|
||||
* ideas from the "Version 1.1" distributed previously by
|
||||
* him over the UNIX network Usenet.
|
||||
* I also thank Chaim Benedelac (National Semiconductor),
|
||||
* David Ditzel (SUN), Earl Killian and John Mashey (MIPS),
|
||||
* Alan Smith and Rafael Saavedra-Barrera (UC at Berkeley)
|
||||
* for their help with comments on earlier versions of the
|
||||
* benchmark.
|
||||
*
|
||||
* Changes: In the initialization part, this version follows mostly
|
||||
* Rick Richardson's version distributed via Usenet, not the
|
||||
* version distributed earlier via floppy disk by Reinhold Weicker.
|
||||
* As a concession to older compilers, names have been made
|
||||
* unique within the first 8 characters.
|
||||
* Inside the measurement loop, this version follows the
|
||||
* version previously distributed by Reinhold Weicker.
|
||||
*
|
||||
* At several places in the benchmark, code has been added,
|
||||
* but within the measurement loop only in branches that
|
||||
* are not executed. The intention is that optimizing compilers
|
||||
* should be prevented from moving code out of the measurement
|
||||
* loop, or from removing code altogether. Since the statements
|
||||
* that are executed within the measurement loop have NOT been
|
||||
* changed, the numbers defining the "Dhrystone distribution"
|
||||
* (distribution of statements, operand types and locality)
|
||||
* still hold. Except for sophisticated optimizing compilers,
|
||||
* execution times for this version should be the same as
|
||||
* for previous versions.
|
||||
*
|
||||
* Since it has proven difficult to subtract the time for the
|
||||
* measurement loop overhead in a correct way, the loop check
|
||||
* has been made a part of the benchmark. This does have
|
||||
* an impact - though a very minor one - on the distribution
|
||||
* statistics which have been updated for this version.
|
||||
*
|
||||
* All changes within the measurement loop are described
|
||||
* and discussed in the companion paper "Rationale for
|
||||
* Dhrystone version 2".
|
||||
*
|
||||
* Because of the self-imposed limitation that the order and
|
||||
* distribution of the executed statements should not be
|
||||
* changed, there are still cases where optimizing compilers
|
||||
* may not generate code for some statements. To a certain
|
||||
* degree, this is unavoidable for small synthetic benchmarks.
|
||||
* Users of the benchmark are advised to check code listings
|
||||
* whether code is generated for all statements of Dhrystone.
|
||||
*
|
||||
* Version 2.1 is identical to version 2.0 distributed via
|
||||
* the UNIX network Usenet in March 1988 except that it corrects
|
||||
* some minor deficiencies that were found by users of version 2.0.
|
||||
* The only change within the measurement loop is that a
|
||||
* non-executed "else" part was added to the "if" statement in
|
||||
* Func_3, and a non-executed "else" part removed from Proc_3.
|
||||
*
|
||||
* Version C/2.2, Steven Pemberton, October 1993
|
||||
* Functionally, identical to version 2.2; the changes are in
|
||||
* how you compile and use it:
|
||||
* - Everything is in one file now, but compiled in 2 passes
|
||||
* - Compile (and run) by running the file through the shell: 'sh dhry.c"
|
||||
* - Uses the system definition of HZ if one can be found
|
||||
* - HZ must be defined, otherwise it won't compile (no defaults here)
|
||||
* - The (uninteresting) output is printed to stderr (dhry2 > /dev/null)
|
||||
* - The number of loops is passed as a parameter, rather than read
|
||||
* (dhry2 500000)
|
||||
* - If the number of loops is insufficient to get a good result,
|
||||
* it repeats it with loops*10 until it is enough (rather than just
|
||||
* stopping)
|
||||
* - Output says which sort of clock it is using, and the HZ value
|
||||
* - You can use -DREG instead of the -DREG=register of previous versions
|
||||
* - Some stylistic cleanups.
|
||||
*
|
||||
***************************************************************************
|
||||
*
|
||||
* Compilation model and measurement (IMPORTANT):
|
||||
*
|
||||
* The following "ground rules" apply for measurements:
|
||||
* - Separate compilation
|
||||
* - No procedure merging
|
||||
* - Otherwise, compiler optimizations are allowed but should be indicated
|
||||
* - Default results are those without register declarations
|
||||
* See the companion paper "Rationale for Dhrystone Version 2" for a more
|
||||
* detailed discussion of these ground rules.
|
||||
*
|
||||
* For 16-Bit processors (e.g. 80186, 80286), times for all compilation
|
||||
* models ("small", "medium", "large" etc.) should be given if possible,
|
||||
* together with a definition of these models for the compiler system used.
|
||||
*
|
||||
**************************************************************************
|
||||
*
|
||||
* Dhrystone (C version) statistics:
|
||||
*
|
||||
* [Comment from the first distribution, updated for version 2.
|
||||
* Note that because of language differences, the numbers are slightly
|
||||
* different from the Ada version.]
|
||||
*
|
||||
* The following program contains statements of a high level programming
|
||||
* language (here: C) in a distribution considered representative:
|
||||
*
|
||||
* assignments 52 (51.0 %)
|
||||
* control statements 33 (32.4 %)
|
||||
* procedure, function calls 17 (16.7 %)
|
||||
*
|
||||
* 103 statements are dynamically executed. The program is balanced with
|
||||
* respect to the three aspects:
|
||||
*
|
||||
* - statement type
|
||||
* - operand type
|
||||
* - operand locality
|
||||
* operand global, local, parameter, or constant.
|
||||
*
|
||||
* The combination of these three aspects is balanced only approximately.
|
||||
*
|
||||
* 1. Statement Type:
|
||||
* ----------------- number
|
||||
*
|
||||
* V1 = V2 9
|
||||
* (incl. V1 = F(..)
|
||||
* V = Constant 12
|
||||
* Assignment, 7
|
||||
* with array element
|
||||
* Assignment, 6
|
||||
* with record component
|
||||
* --
|
||||
* 34 34
|
||||
*
|
||||
* X = Y +|-|"&&"|"|" Z 5
|
||||
* X = Y +|-|"==" Constant 6
|
||||
* X = X +|- 1 3
|
||||
* X = Y *|/ Z 2
|
||||
* X = Expression, 1
|
||||
* two operators
|
||||
* X = Expression, 1
|
||||
* three operators
|
||||
* --
|
||||
* 18 18
|
||||
*
|
||||
* if .... 14
|
||||
* with "else" 7
|
||||
* without "else" 7
|
||||
* executed 3
|
||||
* not executed 4
|
||||
* for ... 7 | counted every time
|
||||
* while ... 4 | the loop condition
|
||||
* do ... while 1 | is evaluated
|
||||
* switch ... 1
|
||||
* break 1
|
||||
* declaration with 1
|
||||
* initialization
|
||||
* --
|
||||
* 34 34
|
||||
*
|
||||
* P (...) procedure call 11
|
||||
* user procedure 10
|
||||
* library procedure 1
|
||||
* X = F (...)
|
||||
* function call 6
|
||||
* user function 5
|
||||
* library function 1
|
||||
* --
|
||||
* 17 17
|
||||
* ---
|
||||
* 103
|
||||
*
|
||||
* The average number of parameters in procedure or function calls
|
||||
* is 1.82 (not counting the function values aX *
|
||||
*
|
||||
* 2. Operators
|
||||
* ------------
|
||||
* number approximate
|
||||
* percentage
|
||||
*
|
||||
* Arithmetic 32 50.8
|
||||
*
|
||||
* + 21 33.3
|
||||
* - 7 11.1
|
||||
* * 3 4.8
|
||||
* / (int div) 1 1.6
|
||||
*
|
||||
* Comparison 27 42.8
|
||||
*
|
||||
* == 9 14.3
|
||||
* /= 4 6.3
|
||||
* > 1 1.6
|
||||
* < 3 4.8
|
||||
* >= 1 1.6
|
||||
* <= 9 14.3
|
||||
*
|
||||
* Logic 4 6.3
|
||||
*
|
||||
* && (AND-THEN) 1 1.6
|
||||
* | (OR) 1 1.6
|
||||
* ! (NOT) 2 3.2
|
||||
*
|
||||
* -- -----
|
||||
* 63 100.1
|
||||
*
|
||||
*
|
||||
* 3. Operand Type (counted once per operand reference):
|
||||
* ---------------
|
||||
* number approximate
|
||||
* percentage
|
||||
*
|
||||
* Integer 175 72.3 %
|
||||
* Character 45 18.6 %
|
||||
* Pointer 12 5.0 %
|
||||
* String30 6 2.5 %
|
||||
* Array 2 0.8 %
|
||||
* Record 2 0.8 %
|
||||
* --- -------
|
||||
* 242 100.0 %
|
||||
*
|
||||
* When there is an access path leading to the final operand (e.g. a record
|
||||
* component), only the final data type on the access path is counted.
|
||||
*
|
||||
*
|
||||
* 4. Operand Locality:
|
||||
* -------------------
|
||||
* number approximate
|
||||
* percentage
|
||||
*
|
||||
* local variable 114 47.1 %
|
||||
* global variable 22 9.1 %
|
||||
* parameter 45 18.6 %
|
||||
* value 23 9.5 %
|
||||
* reference 22 9.1 %
|
||||
* function result 6 2.5 %
|
||||
* constant 55 22.7 %
|
||||
* --- -------
|
||||
* 242 100.0 %
|
||||
*
|
||||
* The program does not compute anything meaningful, but it is syntactically
|
||||
* and semantically correct. All variables have a value assigned to them
|
||||
* before they are used as a source operand.
|
||||
*
|
||||
* There has been no explicit effort to account for the effects of a
|
||||
* cache, or to balance the use of long or short displacements for code or
|
||||
* data.
|
||||
*
|
||||
***************************************************************************
|
||||
*/
|
||||
|
||||
/* Compiler and system dependent definitions: */
|
||||
|
||||
/* variables for time measurement: */
|
||||
|
||||
#include <am.h>
|
||||
#include <klib.h>
|
||||
#include <klib-macros.h>
|
||||
|
||||
static uint32_t uptime_ms() { return io_read(AM_TIMER_UPTIME).us / 1000; }
|
||||
#define Start_Timer() Begin_Time = uptime_ms()
|
||||
#define Stop_Timer() End_Time = uptime_ms()
|
||||
|
||||
#define NUMBER_OF_RUNS 500000 /* Default number of runs */
|
||||
#define PASS2
|
||||
|
||||
#ifdef NOSTRUCTASSIGN
|
||||
#define structassign(d, s) memcpy(&(d), &(s), sizeof(d))
|
||||
#else
|
||||
#define structassign(d, s) d = s
|
||||
#endif
|
||||
|
||||
#ifdef NOENUM
|
||||
#define Ident_1 0
|
||||
#define Ident_2 1
|
||||
#define Ident_3 2
|
||||
#define Ident_4 3
|
||||
#define Ident_5 4
|
||||
typedef int Enumeration;
|
||||
#else
|
||||
typedef enum {Ident_1, Ident_2, Ident_3, Ident_4, Ident_5}
|
||||
Enumeration;
|
||||
#endif
|
||||
/* for boolean and enumeration types in Ada, Pascal */
|
||||
|
||||
/* General definitions: */
|
||||
|
||||
|
||||
#define Null 0
|
||||
/* Value of a Null pointer */
|
||||
|
||||
typedef int One_Thirty;
|
||||
typedef int One_Fifty;
|
||||
typedef char Capital_Letter;
|
||||
typedef int Boolean;
|
||||
typedef char Str_30 [31];
|
||||
typedef int Arr_1_Dim [50];
|
||||
typedef int Arr_2_Dim [50] [50];
|
||||
|
||||
typedef struct record
|
||||
{
|
||||
struct record *Ptr_Comp;
|
||||
Enumeration Discr;
|
||||
union {
|
||||
struct {
|
||||
Enumeration Enum_Comp;
|
||||
int Int_Comp;
|
||||
char Str_Comp [31];
|
||||
} var_1;
|
||||
struct {
|
||||
Enumeration E_Comp_2;
|
||||
char Str_2_Comp [31];
|
||||
} var_2;
|
||||
struct {
|
||||
char Ch_1_Comp;
|
||||
char Ch_2_Comp;
|
||||
} var_3;
|
||||
} variant;
|
||||
} Rec_Type, *Rec_Pointer;
|
||||
|
||||
/* Global Variables: */
|
||||
|
||||
Rec_Pointer Ptr_Glob,
|
||||
Next_Ptr_Glob;
|
||||
int Int_Glob;
|
||||
Boolean Bool_Glob;
|
||||
char Ch_1_Glob,
|
||||
Ch_2_Glob;
|
||||
int Arr_1_Glob [50];
|
||||
int Arr_2_Glob [50] [50];
|
||||
|
||||
Enumeration Func_1 ();
|
||||
/* forward declaration necessary since Enumeration may not simply be int */
|
||||
|
||||
#ifndef REG
|
||||
Boolean Reg = false;
|
||||
#define REG
|
||||
/* REG becomes defined as empty */
|
||||
/* i.e. no register variables */
|
||||
#else
|
||||
Boolean Reg = true;
|
||||
#undef REG
|
||||
#define REG register
|
||||
#endif
|
||||
|
||||
Boolean Done;
|
||||
|
||||
long Begin_Time,
|
||||
End_Time,
|
||||
User_Time;
|
||||
float Microseconds,
|
||||
Dhrystones_Per_Second;
|
||||
|
||||
/* end of variables for time measurement */
|
||||
|
||||
static char memory[1024];
|
||||
static char *free_mem = &memory[0];
|
||||
|
||||
static char* myalloc(size_t size) {
|
||||
while ((unsigned long)free_mem % 4 != 0) free_mem ++;
|
||||
char *ret = free_mem;
|
||||
free_mem += size;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void Proc_6 (Enumeration, Enumeration*);
|
||||
void Proc_3 (Rec_Pointer*);
|
||||
void Proc_7 (One_Fifty a, One_Fifty b, One_Fifty* c);
|
||||
Boolean Func_2 (Str_30, Str_30);
|
||||
void Proc_8(Arr_1_Dim, Arr_2_Dim, int, int);
|
||||
Boolean Func_3 (Enumeration);
|
||||
|
||||
void Proc_1 (Ptr_Val_Par)
|
||||
/******************/
|
||||
|
||||
REG Rec_Pointer Ptr_Val_Par;
|
||||
/* executed once */
|
||||
{
|
||||
REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp;
|
||||
/* == Ptr_Glob_Next */
|
||||
/* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */
|
||||
/* corresponds to "rename" in Ada, "with" in Pascal */
|
||||
|
||||
structassign (*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob);
|
||||
Ptr_Val_Par->variant.var_1.Int_Comp = 5;
|
||||
Next_Record->variant.var_1.Int_Comp
|
||||
= Ptr_Val_Par->variant.var_1.Int_Comp;
|
||||
Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp;
|
||||
Proc_3 (&Next_Record->Ptr_Comp);
|
||||
/* Ptr_Val_Par->Ptr_Comp->Ptr_Comp
|
||||
== Ptr_Glob->Ptr_Comp */
|
||||
if (Next_Record->Discr == Ident_1)
|
||||
/* then, executed */
|
||||
{
|
||||
Next_Record->variant.var_1.Int_Comp = 6;
|
||||
Proc_6 (Ptr_Val_Par->variant.var_1.Enum_Comp,
|
||||
&Next_Record->variant.var_1.Enum_Comp);
|
||||
Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp;
|
||||
Proc_7 (Next_Record->variant.var_1.Int_Comp, 10,
|
||||
&Next_Record->variant.var_1.Int_Comp);
|
||||
}
|
||||
else /* not executed */
|
||||
structassign (*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp);
|
||||
} /* Proc_1 */
|
||||
|
||||
|
||||
void Proc_2 (Int_Par_Ref)
|
||||
/******************/
|
||||
/* executed once */
|
||||
/* *Int_Par_Ref == 1, becomes 4 */
|
||||
|
||||
One_Fifty *Int_Par_Ref;
|
||||
{
|
||||
One_Fifty Int_Loc;
|
||||
Enumeration Enum_Loc;
|
||||
|
||||
Int_Loc = *Int_Par_Ref + 10;
|
||||
do /* executed once */
|
||||
if (Ch_1_Glob == 'A')
|
||||
/* then, executed */
|
||||
{
|
||||
Int_Loc -= 1;
|
||||
*Int_Par_Ref = Int_Loc - Int_Glob;
|
||||
Enum_Loc = Ident_1;
|
||||
} /* if */
|
||||
while (Enum_Loc != Ident_1); /* true */
|
||||
} /* Proc_2 */
|
||||
|
||||
|
||||
void Proc_3 (Ptr_Ref_Par)
|
||||
/******************/
|
||||
/* executed once */
|
||||
/* Ptr_Ref_Par becomes Ptr_Glob */
|
||||
|
||||
Rec_Pointer *Ptr_Ref_Par;
|
||||
|
||||
{
|
||||
if (Ptr_Glob != Null)
|
||||
/* then, executed */
|
||||
*Ptr_Ref_Par = Ptr_Glob->Ptr_Comp;
|
||||
Proc_7 (10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp);
|
||||
} /* Proc_3 */
|
||||
|
||||
|
||||
void Proc_4 () /* without parameters */
|
||||
/*******/
|
||||
/* executed once */
|
||||
{
|
||||
Boolean Bool_Loc;
|
||||
|
||||
Bool_Loc = Ch_1_Glob == 'A';
|
||||
Bool_Glob = Bool_Loc | Bool_Glob;
|
||||
Ch_2_Glob = 'B';
|
||||
} /* Proc_4 */
|
||||
|
||||
|
||||
void Proc_5 () /* without parameters */
|
||||
/*******/
|
||||
/* executed once */
|
||||
{
|
||||
Ch_1_Glob = 'A';
|
||||
Bool_Glob = false;
|
||||
} /* Proc_5 */
|
||||
|
||||
|
||||
/* Procedure for the assignment of structures, */
|
||||
/* if the C compiler doesn't support this feature */
|
||||
#ifdef NOSTRUCTASSIGN
|
||||
memcpy (d, s, l)
|
||||
register char *d;
|
||||
register char *s;
|
||||
register int l;
|
||||
{
|
||||
while (l--) *d++ = *s++;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef REG
|
||||
#define REG
|
||||
/* REG becomes defined as empty */
|
||||
/* i.e. no register variables */
|
||||
#else
|
||||
#undef REG
|
||||
#define REG register
|
||||
#endif
|
||||
|
||||
extern int Int_Glob;
|
||||
extern char Ch_1_Glob;
|
||||
|
||||
|
||||
void Proc_6 (Enum_Val_Par, Enum_Ref_Par)
|
||||
/*********************************/
|
||||
/* executed once */
|
||||
/* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */
|
||||
|
||||
Enumeration Enum_Val_Par;
|
||||
Enumeration *Enum_Ref_Par;
|
||||
{
|
||||
*Enum_Ref_Par = Enum_Val_Par;
|
||||
if (! Func_3 (Enum_Val_Par))
|
||||
/* then, not executed */
|
||||
*Enum_Ref_Par = Ident_4;
|
||||
switch (Enum_Val_Par)
|
||||
{
|
||||
case Ident_1:
|
||||
*Enum_Ref_Par = Ident_1;
|
||||
break;
|
||||
case Ident_2:
|
||||
if (Int_Glob > 100)
|
||||
/* then */
|
||||
*Enum_Ref_Par = Ident_1;
|
||||
else *Enum_Ref_Par = Ident_4;
|
||||
break;
|
||||
case Ident_3: /* executed */
|
||||
*Enum_Ref_Par = Ident_2;
|
||||
break;
|
||||
case Ident_4: break;
|
||||
case Ident_5:
|
||||
*Enum_Ref_Par = Ident_3;
|
||||
break;
|
||||
} /* switch */
|
||||
} /* Proc_6 */
|
||||
|
||||
|
||||
void Proc_7 (One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val, One_Fifty *Int_Par_Ref)
|
||||
{
|
||||
One_Fifty Int_Loc;
|
||||
|
||||
Int_Loc = Int_1_Par_Val + 2;
|
||||
*Int_Par_Ref = Int_2_Par_Val + Int_Loc;
|
||||
} /* Proc_7 */
|
||||
|
||||
|
||||
void Proc_8 (Arr_1_Par_Ref, Arr_2_Par_Ref, Int_1_Par_Val, Int_2_Par_Val)
|
||||
/*********************************************************************/
|
||||
/* executed once */
|
||||
/* Int_Par_Val_1 == 3 */
|
||||
/* Int_Par_Val_2 == 7 */
|
||||
Arr_1_Dim Arr_1_Par_Ref;
|
||||
Arr_2_Dim Arr_2_Par_Ref;
|
||||
int Int_1_Par_Val;
|
||||
int Int_2_Par_Val;
|
||||
{
|
||||
REG One_Fifty Int_Index;
|
||||
REG One_Fifty Int_Loc;
|
||||
|
||||
Int_Loc = Int_1_Par_Val + 5;
|
||||
Arr_1_Par_Ref [Int_Loc] = Int_2_Par_Val;
|
||||
Arr_1_Par_Ref [Int_Loc+1] = Arr_1_Par_Ref [Int_Loc];
|
||||
Arr_1_Par_Ref [Int_Loc+30] = Int_Loc;
|
||||
for (Int_Index = Int_Loc; Int_Index <= Int_Loc+1; ++Int_Index)
|
||||
Arr_2_Par_Ref [Int_Loc] [Int_Index] = Int_Loc;
|
||||
Arr_2_Par_Ref [Int_Loc] [Int_Loc-1] += 1;
|
||||
Arr_2_Par_Ref [Int_Loc+20] [Int_Loc] = Arr_1_Par_Ref [Int_Loc];
|
||||
Int_Glob = 5;
|
||||
} /* Proc_8 */
|
||||
|
||||
|
||||
Enumeration Func_1 (Ch_1_Par_Val, Ch_2_Par_Val)
|
||||
/*************************************************/
|
||||
/* executed three times */
|
||||
/* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */
|
||||
/* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */
|
||||
/* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */
|
||||
|
||||
Capital_Letter Ch_1_Par_Val;
|
||||
Capital_Letter Ch_2_Par_Val;
|
||||
{
|
||||
Capital_Letter Ch_1_Loc;
|
||||
Capital_Letter Ch_2_Loc;
|
||||
|
||||
Ch_1_Loc = Ch_1_Par_Val;
|
||||
Ch_2_Loc = Ch_1_Loc;
|
||||
if (Ch_2_Loc != Ch_2_Par_Val)
|
||||
/* then, executed */
|
||||
return (Ident_1);
|
||||
else /* not executed */
|
||||
{
|
||||
Ch_1_Glob = Ch_1_Loc;
|
||||
return (Ident_2);
|
||||
}
|
||||
} /* Func_1 */
|
||||
|
||||
|
||||
Boolean Func_2 (Str_1_Par_Ref, Str_2_Par_Ref)
|
||||
/*************************************************/
|
||||
/* executed once */
|
||||
/* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */
|
||||
/* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */
|
||||
|
||||
Str_30 Str_1_Par_Ref;
|
||||
Str_30 Str_2_Par_Ref;
|
||||
{
|
||||
REG One_Thirty Int_Loc;
|
||||
Capital_Letter Ch_Loc;
|
||||
|
||||
Int_Loc = 2;
|
||||
while (Int_Loc <= 2) /* loop body executed once */
|
||||
if (Func_1 (Str_1_Par_Ref[Int_Loc],
|
||||
Str_2_Par_Ref[Int_Loc+1]) == Ident_1)
|
||||
/* then, executed */
|
||||
{
|
||||
Ch_Loc = 'A';
|
||||
Int_Loc += 1;
|
||||
} /* if, while */
|
||||
if (Ch_Loc >= 'W' && Ch_Loc < 'Z')
|
||||
/* then, not executed */
|
||||
Int_Loc = 7;
|
||||
if (Ch_Loc == 'R') {
|
||||
/* then, not executed */
|
||||
return (true);
|
||||
}
|
||||
else /* executed */
|
||||
{
|
||||
if (strcmp (Str_1_Par_Ref, Str_2_Par_Ref) > 0)
|
||||
{
|
||||
Int_Loc += 7;
|
||||
Int_Glob = Int_Loc;
|
||||
return (true);
|
||||
}
|
||||
else /* executed */
|
||||
return (false);
|
||||
} /* if Ch_Loc */
|
||||
} /* Func_2 */
|
||||
|
||||
|
||||
Boolean Func_3 (Enum_Par_Val)
|
||||
/***************************/
|
||||
/* executed once */
|
||||
/* Enum_Par_Val == Ident_3 */
|
||||
Enumeration Enum_Par_Val;
|
||||
{
|
||||
Enumeration Enum_Loc;
|
||||
|
||||
Enum_Loc = Enum_Par_Val;
|
||||
if (Enum_Loc == Ident_3)
|
||||
/* then, executed */
|
||||
return (true);
|
||||
else /* not executed */
|
||||
return (false);
|
||||
} /* Func_3 */
|
||||
|
||||
|
||||
Boolean pass = true;
|
||||
Boolean check(int cond) {
|
||||
if (!cond) pass = false;
|
||||
return cond;
|
||||
}
|
||||
int main ()
|
||||
/*****/
|
||||
|
||||
/* main program, corresponds to procedures */
|
||||
/* Main and Proc_0 in the Ada version */
|
||||
{
|
||||
One_Fifty Int_1_Loc;
|
||||
REG One_Fifty Int_2_Loc;
|
||||
One_Fifty Int_3_Loc;
|
||||
REG char Ch_Index;
|
||||
Enumeration Enum_Loc;
|
||||
Str_30 Str_1_Loc;
|
||||
Str_30 Str_2_Loc;
|
||||
REG int Run_Index;
|
||||
REG int Number_Of_Runs;
|
||||
|
||||
ioe_init();
|
||||
|
||||
Number_Of_Runs = NUMBER_OF_RUNS;
|
||||
|
||||
/* Initializations */
|
||||
|
||||
Next_Ptr_Glob = (Rec_Pointer) myalloc (sizeof (Rec_Type));
|
||||
Ptr_Glob = (Rec_Pointer) myalloc (sizeof (Rec_Type));
|
||||
|
||||
Ptr_Glob->Ptr_Comp = Next_Ptr_Glob;
|
||||
Ptr_Glob->Discr = Ident_1;
|
||||
Ptr_Glob->variant.var_1.Enum_Comp = Ident_3;
|
||||
Ptr_Glob->variant.var_1.Int_Comp = 40;
|
||||
strcpy (Ptr_Glob->variant.var_1.Str_Comp,
|
||||
"DHRYSTONE PROGRAM, SOME STRING");
|
||||
strcpy (Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING");
|
||||
|
||||
Arr_2_Glob [8][7] = 10;
|
||||
/* Was missing in published program. Without this statement, */
|
||||
/* Arr_2_Glob [8][7] would have an undefined value. */
|
||||
/* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */
|
||||
/* overflow may occur for this array element. */
|
||||
|
||||
printf ("Dhrystone Benchmark, Version %s\n", Version);
|
||||
|
||||
Done = false;
|
||||
while (!Done) {
|
||||
|
||||
printf ("Trying %d runs through Dhrystone.\n", Number_Of_Runs);
|
||||
|
||||
/***************/
|
||||
/* Start timer */
|
||||
/***************/
|
||||
|
||||
Start_Timer();
|
||||
|
||||
for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index)
|
||||
{
|
||||
|
||||
Proc_5();
|
||||
Proc_4();
|
||||
/* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */
|
||||
Int_1_Loc = 2;
|
||||
Int_2_Loc = 3;
|
||||
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
|
||||
Enum_Loc = Ident_2;
|
||||
Bool_Glob = ! Func_2 (Str_1_Loc, Str_2_Loc);
|
||||
/* Bool_Glob == 1 */
|
||||
while (Int_1_Loc < Int_2_Loc) /* loop body executed once */
|
||||
{
|
||||
Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc;
|
||||
/* Int_3_Loc == 7 */
|
||||
Proc_7 (Int_1_Loc, Int_2_Loc, &Int_3_Loc);
|
||||
/* Int_3_Loc == 7 */
|
||||
Int_1_Loc += 1;
|
||||
} /* while */
|
||||
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
|
||||
Proc_8 (Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc);
|
||||
/* Int_Glob == 5 */
|
||||
Proc_1 (Ptr_Glob);
|
||||
for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index)
|
||||
/* loop body executed twice */
|
||||
{
|
||||
if (Enum_Loc == Func_1 (Ch_Index, 'C'))
|
||||
/* then, not executed */
|
||||
{
|
||||
Proc_6 (Ident_1, &Enum_Loc);
|
||||
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING");
|
||||
Int_2_Loc = Run_Index;
|
||||
Int_Glob = Run_Index;
|
||||
}
|
||||
}
|
||||
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
|
||||
Int_2_Loc = Int_2_Loc * Int_1_Loc;
|
||||
Int_1_Loc = Int_2_Loc / Int_3_Loc;
|
||||
Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc;
|
||||
/* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */
|
||||
Proc_2 (&Int_1_Loc);
|
||||
/* Int_1_Loc == 5 */
|
||||
|
||||
} /* loop "for Run_Index" */
|
||||
|
||||
/**************/
|
||||
/* Stop timer */
|
||||
/**************/
|
||||
|
||||
Stop_Timer();
|
||||
|
||||
User_Time = End_Time - Begin_Time;
|
||||
|
||||
Done = true;
|
||||
}
|
||||
|
||||
if (!check(Int_Glob == 5)) {
|
||||
printf("Int_Glob: %d\n", Int_Glob);
|
||||
printf(" should be: %d\n", 5);
|
||||
}
|
||||
if (!check(Bool_Glob == 1)) {
|
||||
printf("Bool_Glob: %d\n", Bool_Glob);
|
||||
printf(" should be: %d\n", 1);
|
||||
}
|
||||
if (!check(Ch_1_Glob == 'A')) {
|
||||
printf("Ch_1_Glob: %c\n", Ch_1_Glob);
|
||||
printf(" should be: %c\n", 'A');
|
||||
}
|
||||
if (!check(Ch_2_Glob == 'B')) {
|
||||
printf("Ch_2_Glob: %c\n", Ch_2_Glob);
|
||||
printf(" should be: %c\n", 'B');
|
||||
}
|
||||
if (!check(Arr_1_Glob[8] == 7)) {
|
||||
printf("Arr_1_Glob[8]: %d\n", Arr_1_Glob[8]);
|
||||
printf(" should be: %d\n", 7);
|
||||
}
|
||||
if (!check(Arr_2_Glob[8][7] == Number_Of_Runs + 10)) {
|
||||
printf("Arr_2_Glob[8][7]: %d\n", Arr_2_Glob[8][7]);
|
||||
printf(" should be: Number_Of_Runs + 10\n");
|
||||
}
|
||||
|
||||
if (!check((int)Ptr_Glob->Discr == 0)) {
|
||||
printf("Ptr_Glob->Discr: %d\n", Ptr_Glob->Discr);
|
||||
printf(" should be: %d\n", 0);
|
||||
}
|
||||
if (!check(Ptr_Glob->variant.var_1.Enum_Comp == 2)) {
|
||||
printf("Ptr_Glob->Enum_Comp: %d\n", Ptr_Glob->variant.var_1.Enum_Comp);
|
||||
printf(" should be: %d\n", 2);
|
||||
}
|
||||
if (!check(Ptr_Glob->variant.var_1.Int_Comp == 17)) {
|
||||
printf("Ptr_Glob->Int_Comp: %d\n", Ptr_Glob->variant.var_1.Int_Comp);
|
||||
printf(" should be: %d\n", 17);
|
||||
}
|
||||
if (!check(strcmp(Ptr_Glob->variant.var_1.Str_Comp, "DHRYSTONE PROGRAM, SOME STRING") == 0)) {
|
||||
printf("Ptr_Glob->Str_Comp: %s\n", Ptr_Glob->variant.var_1.Str_Comp);
|
||||
printf(" should be: DHRYSTONE PROGRAM, SOME STRING\n");
|
||||
}
|
||||
|
||||
if (!check((int)Next_Ptr_Glob->Discr == 0)) {
|
||||
printf("Next_Ptr_Glob->Discr: %d\n", Next_Ptr_Glob->Discr);
|
||||
printf(" should be: %d\n", 0);
|
||||
}
|
||||
if (!check(Next_Ptr_Glob->variant.var_1.Enum_Comp == 1)) {
|
||||
printf("Next_Ptr_Glob->Enum_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
|
||||
printf(" should be: %d\n", 1);
|
||||
}
|
||||
if (!check(Next_Ptr_Glob->variant.var_1.Int_Comp == 18)) {
|
||||
printf("Next_Ptr_Glob->Int_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Int_Comp);
|
||||
printf(" should be: %d\n", 18);
|
||||
}
|
||||
if (!check(strcmp(Next_Ptr_Glob->variant.var_1.Str_Comp, "DHRYSTONE PROGRAM, SOME STRING") == 0)) {
|
||||
printf("Next_Ptr_Glob->Str_Comp: %s\n", Next_Ptr_Glob->variant.var_1.Str_Comp);
|
||||
printf(" should be: DHRYSTONE PROGRAM, SOME STRING\n");
|
||||
}
|
||||
|
||||
if (!check(Int_1_Loc == 5)) {
|
||||
printf("Int_1_Loc: %d\n", Int_1_Loc);
|
||||
printf(" should be: %d\n", 5);
|
||||
}
|
||||
if (!check(Int_2_Loc == 13)) {
|
||||
printf("Int_2_Loc: %d\n", Int_2_Loc);
|
||||
printf(" should be: %d\n", 13);
|
||||
}
|
||||
if (!check(Int_3_Loc == 7)) {
|
||||
printf("Int_3_Loc: %d\n", Int_3_Loc);
|
||||
printf(" should be: %d\n", 7);
|
||||
}
|
||||
if (!check(Enum_Loc == 1)) {
|
||||
printf("Enum_Loc: %d\n", Enum_Loc);
|
||||
printf(" should be: %d\n", 1);
|
||||
}
|
||||
|
||||
if (!check(strcmp(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING") == 0)) {
|
||||
printf("Str_1_Loc: %s\n", Str_1_Loc);
|
||||
printf(" should be: DHRYSTONE PROGRAM, 1'ST STRING\n");
|
||||
}
|
||||
if (!check(strcmp(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING") == 0)) {
|
||||
printf("Str_2_Loc: %s\n", Str_2_Loc);
|
||||
printf(" should be: DHRYSTONE PROGRAM, 2'ND STRING\n");
|
||||
}
|
||||
|
||||
printf ("Finished in %d ms\n", (int)User_Time);
|
||||
printf("==================================================\n");
|
||||
printf("Dhrystone %s %d Marks\n", pass ? "PASS" : "FAIL",
|
||||
880900 / (int)User_Time * NUMBER_OF_RUNS/ 500000);
|
||||
printf(" vs. 100000 Marks (i7-7700K @ 4.20GHz)\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
3
microbench/Makefile
Normal file
3
microbench/Makefile
Normal file
|
@ -0,0 +1,3 @@
|
|||
NAME = microbench
|
||||
SRCS = $(shell find -L ./src/ -name "*.c" -o -name "*.cc")
|
||||
include $(AM_HOME)/Makefile
|
113
microbench/include/benchmark.h
Normal file
113
microbench/include/benchmark.h
Normal file
|
@ -0,0 +1,113 @@
|
|||
#ifndef __BENCHMARK_H__
|
||||
#define __BENCHMARK_H__
|
||||
|
||||
#include <am.h>
|
||||
#include <klib.h>
|
||||
#include <klib-macros.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MB * 1024 * 1024
|
||||
#define KB * 1024
|
||||
|
||||
#define REF_CPU "i7-7700K @ 4.20GHz"
|
||||
#define REF_SCORE 100000
|
||||
|
||||
#define REPEAT 1
|
||||
|
||||
// size | heap | time | checksum
|
||||
#define QSORT_S { 100, 1 KB, 0, 0x08467105}
|
||||
#define QSORT_M { 30000, 128 KB, 0, 0xa3e99fe4}
|
||||
#define QSORT_L { 100000, 640 KB, 5114, 0xed8cff89}
|
||||
#define QUEEN_S { 8, 0 KB, 0, 0x0000005c}
|
||||
#define QUEEN_M { 11, 0 KB, 0, 0x00000a78}
|
||||
#define QUEEN_L { 12, 0 KB, 4707, 0x00003778}
|
||||
#define BF_S { 4, 32 KB, 0, 0xa6f0079e}
|
||||
#define BF_M { 25, 32 KB, 0, 0xa88f8a65}
|
||||
#define BF_L { 180, 32 KB, 23673, 0x9221e2b3}
|
||||
#define FIB_S { 2, 1 KB, 0, 0x7cfeddf0}
|
||||
#define FIB_M { 23, 16 KB, 0, 0x94ad8800}
|
||||
#define FIB_L { 91, 256 KB, 28318, 0xebdc5f80}
|
||||
#define SIEVE_S { 100, 1 KB, 0, 0x00000019}
|
||||
#define SIEVE_M { 200000, 32 KB, 0, 0x00004640}
|
||||
#define SIEVE_L {10000000, 2 MB, 39361, 0x000a2403}
|
||||
#define PZ15_S { 0, 1 KB, 0, 0x00000006}
|
||||
#define PZ15_M { 1, 256 KB, 0, 0x0000b0df}
|
||||
#define PZ15_L { 2, 2 MB, 4486, 0x00068b8c}
|
||||
#define DINIC_S { 10, 8 KB, 0, 0x0000019c}
|
||||
#define DINIC_M { 80, 512 KB, 0, 0x00004f99}
|
||||
#define DINIC_L { 128, 1 MB, 10882, 0x0000c248}
|
||||
#define LZIP_S { 128, 128 KB, 0, 0xe05fc832}
|
||||
#define LZIP_M { 50000, 1 MB, 0, 0xdc93e90c}
|
||||
#define LZIP_L { 1048576, 4 MB, 7593, 0x8d62c81f}
|
||||
#define SSORT_S { 100, 4 KB, 0, 0x4c555e09}
|
||||
#define SSORT_M { 10000, 512 KB, 0, 0x0db7909b}
|
||||
#define SSORT_L { 100000, 4 MB, 4504, 0x4f0ab431}
|
||||
#define MD5_S { 100, 1 KB, 0, 0xf902f28f}
|
||||
#define MD5_M { 200000, 256 KB, 0, 0xd4f9bc6d}
|
||||
#define MD5_L {10000000, 16 MB, 17239, 0x27286a42}
|
||||
|
||||
#define BENCHMARK_LIST(def) \
|
||||
def(qsort, "qsort", QSORT_S, QSORT_M, QSORT_L, "Quick sort") \
|
||||
def(queen, "queen", QUEEN_S, QUEEN_M, QUEEN_L, "Queen placement") \
|
||||
def( bf, "bf", BF_S, BF_M, BF_L, "Brainf**k interpreter") \
|
||||
def( fib, "fib", FIB_S, FIB_M, FIB_L, "Fibonacci number") \
|
||||
def(sieve, "sieve", SIEVE_S, SIEVE_M, SIEVE_L, "Eratosthenes sieve") \
|
||||
def( 15pz, "15pz", PZ15_S, PZ15_M, PZ15_L, "A* 15-puzzle search") \
|
||||
def(dinic, "dinic", DINIC_S, DINIC_M, DINIC_L, "Dinic's maxflow algorithm") \
|
||||
def( lzip, "lzip", LZIP_S, LZIP_M, LZIP_L, "Lzip compression") \
|
||||
def(ssort, "ssort", SSORT_S, SSORT_M, SSORT_L, "Suffix sort") \
|
||||
def( md5, "md5", MD5_S, MD5_M, MD5_L, "MD5 digest") \
|
||||
|
||||
// Each benchmark will run REPEAT times
|
||||
|
||||
#define DECL(_name, _sname, _s, _m, _l, _desc) \
|
||||
void bench_##_name##_prepare(); \
|
||||
void bench_##_name##_run(); \
|
||||
int bench_##_name##_validate();
|
||||
|
||||
BENCHMARK_LIST(DECL)
|
||||
|
||||
typedef struct Setting {
|
||||
int size;
|
||||
unsigned long mlim, ref;
|
||||
uint32_t checksum;
|
||||
} Setting;
|
||||
|
||||
typedef struct Benchmark {
|
||||
void (*prepare)();
|
||||
void (*run)();
|
||||
int (*validate)();
|
||||
const char *name, *desc;
|
||||
Setting settings[3];
|
||||
} Benchmark;
|
||||
|
||||
extern Benchmark *current;
|
||||
extern Setting *setting;
|
||||
|
||||
typedef struct Result {
|
||||
int pass;
|
||||
unsigned long tsc, msec;
|
||||
} Result;
|
||||
|
||||
void prepare(Result *res);
|
||||
void done(Result *res);
|
||||
|
||||
// memory allocation
|
||||
void* bench_alloc(size_t size);
|
||||
void bench_free(void *ptr);
|
||||
|
||||
// random number generator
|
||||
void bench_srand(uint32_t seed);
|
||||
uint32_t bench_rand(); // return a random number between 0..32767
|
||||
|
||||
// checksum
|
||||
uint32_t checksum(void *start, void *end);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
88
microbench/src/15pz/15pz.cc
Normal file
88
microbench/src/15pz/15pz.cc
Normal file
|
@ -0,0 +1,88 @@
|
|||
#include <benchmark.h>
|
||||
#include "puzzle.h"
|
||||
#include "heap.h"
|
||||
|
||||
const int N = 4;
|
||||
|
||||
static int PUZZLE_S[N*N] = {
|
||||
1, 2, 3, 4,
|
||||
5, 6, 7, 8,
|
||||
9, 10, 0, 11,
|
||||
13, 14, 15, 12,
|
||||
};
|
||||
|
||||
static int PUZZLE_M[N*N] = {
|
||||
1, 2, 3, 4,
|
||||
5, 6, 7, 8,
|
||||
12, 0, 14, 13,
|
||||
11, 15, 10, 9,
|
||||
};
|
||||
|
||||
static int PUZZLE_L[N*N] = {
|
||||
0, 2, 3, 4,
|
||||
9, 6, 7, 8,
|
||||
5, 11, 10, 12,
|
||||
1, 15, 13, 14,
|
||||
};
|
||||
|
||||
static int ans;
|
||||
|
||||
extern "C" {
|
||||
|
||||
void bench_15pz_prepare() {
|
||||
}
|
||||
|
||||
void bench_15pz_run() {
|
||||
N_puzzle<N> puzzle;
|
||||
int MAXN;
|
||||
|
||||
switch (setting->size) {
|
||||
case 0: puzzle = N_puzzle<N>(PUZZLE_S); MAXN = 10; break;
|
||||
case 1: puzzle = N_puzzle<N>(PUZZLE_M); MAXN = 2048; break;
|
||||
case 2: puzzle = N_puzzle<N>(PUZZLE_L); MAXN = 16384; break;
|
||||
default: assert(0);
|
||||
}
|
||||
assert(puzzle.solvable());
|
||||
|
||||
auto *heap = (Updatable_heap<N_puzzle<N>> *) bench_alloc(sizeof(Updatable_heap<N_puzzle<N>>));
|
||||
heap->init(MAXN);
|
||||
heap->push( puzzle, 0 );
|
||||
|
||||
int n = 0;
|
||||
ans = -1;
|
||||
|
||||
while( heap->size() != 0 && n != MAXN ) {
|
||||
N_puzzle<N> top = heap->pop();
|
||||
++n;
|
||||
|
||||
if ( top == N_puzzle<N>::solution() ) {
|
||||
// We are done
|
||||
ans = heap->length(top) * n;
|
||||
return;
|
||||
}
|
||||
|
||||
if ( top.tile_left_possible() ) {
|
||||
heap->push( top.tile_left(), heap->length( top ) + 1 );
|
||||
}
|
||||
|
||||
if ( top.tile_right_possible() ) {
|
||||
heap->push( top.tile_right(), heap->length( top ) + 1 );
|
||||
}
|
||||
|
||||
if ( top.tile_up_possible() ) {
|
||||
heap->push( top.tile_up(), heap->length( top ) + 1 );
|
||||
}
|
||||
|
||||
if ( top.tile_down_possible() ) {
|
||||
heap->push( top.tile_down(), heap->length( top ) + 1 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int bench_15pz_validate() {
|
||||
return (uint32_t)ans == setting->checksum;
|
||||
}
|
||||
|
||||
}
|
||||
|
227
microbench/src/15pz/heap.h
Normal file
227
microbench/src/15pz/heap.h
Normal file
|
@ -0,0 +1,227 @@
|
|||
// Author: Douglas Wilhelm Harder
|
||||
// Copyright (c) 2009 by Douglas Wilhelm Harder. All rights reserved.
|
||||
|
||||
template <typename T>
|
||||
T max(T a, T b) {
|
||||
return a > b ? a : b;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class Updatable_heap {
|
||||
private:
|
||||
int M;
|
||||
class Step;
|
||||
Step **hash_table;
|
||||
Step **heap;
|
||||
int heap_size;
|
||||
int maximum_heap_size;
|
||||
|
||||
void inline swap( int, int );
|
||||
void percolate_down();
|
||||
void percolate_up( int );
|
||||
Step *pointer( T const & ) const;
|
||||
|
||||
public:
|
||||
void init(int m);
|
||||
~Updatable_heap();
|
||||
T pop();
|
||||
void push( T const &, int );
|
||||
int size() const;
|
||||
int maximum_size() const;
|
||||
int length( T const & ) const;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class Updatable_heap<T>::Step {
|
||||
public:
|
||||
T element;
|
||||
Step *next;
|
||||
int heap_index;
|
||||
int path_length;
|
||||
int path_weight;
|
||||
bool visited;
|
||||
Step *previous_step;
|
||||
|
||||
void init( T const &, Step *, int, int );
|
||||
int length() const;
|
||||
int weight() const;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void Updatable_heap<T>::init(int m) {
|
||||
M = m;
|
||||
heap = (Step **)bench_alloc(sizeof(void *) * M);
|
||||
hash_table = (Step **)bench_alloc(sizeof(void *) * (M + 1));
|
||||
|
||||
heap_size = 0;
|
||||
maximum_heap_size = 0;
|
||||
for ( int i = 0; i < M; ++i ) {
|
||||
hash_table[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Updatable_heap<T>::~Updatable_heap() {
|
||||
for ( int i = 0; i < M; ++i ) {
|
||||
Step *ptr = hash_table[i];
|
||||
|
||||
while ( ptr != 0 ) {
|
||||
Step *tmp = ptr;
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T Updatable_heap<T>::pop() {
|
||||
if ( size() == 0 ) {
|
||||
return T();
|
||||
}
|
||||
|
||||
T top = heap[1]->element;
|
||||
|
||||
if ( size() == 1 ) {
|
||||
heap_size = 0;
|
||||
} else {
|
||||
assert( size() > 1 );
|
||||
|
||||
heap[1] = heap[size()];
|
||||
heap[1]->heap_index = 1;
|
||||
|
||||
--heap_size;
|
||||
percolate_down();
|
||||
}
|
||||
|
||||
return top;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void inline Updatable_heap<T>::swap( int i, int j ) {
|
||||
Step *tmp = heap[j];
|
||||
heap[j] = heap[i];
|
||||
heap[i] = tmp;
|
||||
|
||||
heap[i]->heap_index = i;
|
||||
heap[j]->heap_index = j;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Updatable_heap<T>::percolate_down() {
|
||||
int n = 1;
|
||||
|
||||
while ( 2*n + 1 <= size() ) {
|
||||
if ( heap[n]->weight() < heap[2*n]->weight() && heap[n]->weight() < heap[2*n + 1]->weight() ) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ( heap[2*n]->weight() < heap[2*n + 1]->weight() ) {
|
||||
swap( n, 2*n );
|
||||
n = 2*n;
|
||||
} else {
|
||||
assert( heap[2*n]->weight() >= heap[2*n + 1]->weight() );
|
||||
|
||||
swap( n, 2*n + 1 );
|
||||
n = 2*n + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ( 2*n == size() && heap[2*n]->weight() < heap[n]->weight() ) {
|
||||
swap( n, 2*n );
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Updatable_heap<T>::percolate_up( int n ) {
|
||||
while ( n != 1 ) {
|
||||
int parent = n/2;
|
||||
|
||||
if ( heap[parent]->weight() > heap[n]->weight() ) {
|
||||
swap( parent, n );
|
||||
n = parent;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Updatable_heap<T>::push( T const &pz, int path_length ) {
|
||||
Step *ptr = pointer( pz );
|
||||
|
||||
if ( ptr == 0 ) {
|
||||
assert( heap_size <= M );
|
||||
++heap_size;
|
||||
|
||||
Step *ptr = (Step*)bench_alloc(sizeof(Step));
|
||||
ptr->init( pz, hash_table[pz.hash() & (M - 1)], size(), path_length );
|
||||
hash_table[pz.hash() & (M - 1)] = ptr;
|
||||
heap[size()] = ptr;
|
||||
|
||||
percolate_up( size() );
|
||||
|
||||
maximum_heap_size = max( maximum_heap_size, size() );
|
||||
} else {
|
||||
if ( !ptr->visited ) {
|
||||
if ( path_length + ptr->element.lower_bound() < ptr->weight() ) {
|
||||
ptr->path_weight = path_length + ptr->element.lower_bound();
|
||||
percolate_up( ptr->heap_index );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int Updatable_heap<T>::size() const {
|
||||
return heap_size;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int Updatable_heap<T>::maximum_size() const {
|
||||
return maximum_heap_size;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int Updatable_heap<T>::length( T const &pz ) const {
|
||||
Step *ptr = pointer( pz );
|
||||
|
||||
return ( ptr == 0 ) ? 2147483647 : ptr->length();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename Updatable_heap<T>::Step *Updatable_heap<T>::pointer( T const &pz ) const {
|
||||
for ( Step *ptr = hash_table[pz.hash() & (M - 1)]; ptr != 0; ptr = ptr->next ) {
|
||||
if ( ptr->element == pz ) {
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/****************************************************
|
||||
* ************************************************ *
|
||||
* * Iterator * *
|
||||
* ************************************************ *
|
||||
****************************************************/
|
||||
|
||||
template <typename T>
|
||||
void Updatable_heap<T>::Step::init( T const &pz, Step *n, int hi, int dist ) {
|
||||
element = pz;
|
||||
next = n;
|
||||
heap_index = hi;
|
||||
path_length = dist;
|
||||
path_weight = dist + element.lower_bound();
|
||||
visited = false;
|
||||
previous_step = 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int Updatable_heap<T>::Step::length() const {
|
||||
return path_length;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int Updatable_heap<T>::Step::weight() const {
|
||||
return path_weight;
|
||||
}
|
||||
|
475
microbench/src/15pz/puzzle.h
Normal file
475
microbench/src/15pz/puzzle.h
Normal file
|
@ -0,0 +1,475 @@
|
|||
// Author: Douglas Wilhelm Harder
|
||||
// Copyright (c) 2009 by Douglas Wilhelm Harder. All rights reserved.
|
||||
// Url: https://ece.uwaterloo.ca/~dwharder/aads/Algorithms/N_puzzles/
|
||||
|
||||
template <int N>
|
||||
class N_puzzle {
|
||||
private:
|
||||
bool puzzle_valid;
|
||||
uint8_t zero_i, zero_j;
|
||||
int8_t manhattan_distance;
|
||||
int8_t puzzle[N][N];
|
||||
int hash_value;
|
||||
|
||||
void determine_hash();
|
||||
|
||||
static int abs( int n ) { return ( n < 0 ) ? -n : n; }
|
||||
|
||||
public:
|
||||
N_puzzle();
|
||||
N_puzzle( int array[N*N] );
|
||||
N_puzzle( N_puzzle const & );
|
||||
N_puzzle &operator=( N_puzzle const & );
|
||||
|
||||
bool solvable() const;
|
||||
bool valid() const;
|
||||
int lower_bound() const;
|
||||
unsigned int hash() const;
|
||||
|
||||
bool tile_up_possible() const;
|
||||
bool tile_down_possible() const;
|
||||
bool tile_left_possible() const;
|
||||
bool tile_right_possible() const;
|
||||
|
||||
N_puzzle tile_up() const;
|
||||
N_puzzle tile_down() const;
|
||||
N_puzzle tile_left() const;
|
||||
N_puzzle tile_right() const;
|
||||
|
||||
bool operator==( N_puzzle const & ) const;
|
||||
bool operator!=( N_puzzle const & ) const;
|
||||
|
||||
N_puzzle static solution();
|
||||
};
|
||||
|
||||
template < int N >
|
||||
N_puzzle<N>::N_puzzle():
|
||||
puzzle_valid( true ),
|
||||
manhattan_distance( 0 ) {
|
||||
int array[N*N];
|
||||
|
||||
for ( int i = 0; i < N*N; ++i ) {
|
||||
array[i] = i;
|
||||
}
|
||||
|
||||
int n = 0;
|
||||
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
int k = bench_rand() % (N*N - n);
|
||||
puzzle[i][j] = array[k];
|
||||
|
||||
if ( array[k] == 0 ) {
|
||||
zero_i = i;
|
||||
zero_j = j;
|
||||
} else {
|
||||
manhattan_distance += abs( ((array[k] - 1) / N) - i );
|
||||
manhattan_distance += abs( ((array[k] - 1) % N) - j );
|
||||
}
|
||||
|
||||
++n;
|
||||
array[k] = array[N*N - n];
|
||||
}
|
||||
}
|
||||
|
||||
determine_hash();
|
||||
}
|
||||
|
||||
template < int N >
|
||||
N_puzzle<N>::N_puzzle( int array[N*N] ):
|
||||
puzzle_valid( true ),
|
||||
manhattan_distance( 0 ) {
|
||||
bool check[N*N];
|
||||
|
||||
for ( int i = 0; i < N*N; ++i ) {
|
||||
check[i] = false;
|
||||
}
|
||||
|
||||
int n = 0;
|
||||
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
puzzle[i][j] = array[n];
|
||||
check[array[n]] = true;
|
||||
|
||||
if ( array[n] == 0 ) {
|
||||
zero_i = i;
|
||||
zero_j = j;
|
||||
} else {
|
||||
manhattan_distance += abs( ((array[n] - 1) / N) - i );
|
||||
manhattan_distance += abs( ((array[n] - 1) % N) - j );
|
||||
}
|
||||
|
||||
++n;
|
||||
}
|
||||
}
|
||||
|
||||
for ( int i = 0; i < N*N; ++i ) {
|
||||
if ( !check[i] ) {
|
||||
puzzle_valid = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
determine_hash();
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine a hash value for the puzzle.
|
||||
*/
|
||||
|
||||
template < int N >
|
||||
void N_puzzle<N>::determine_hash() {
|
||||
hash_value = 0;
|
||||
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
hash_value = hash_value*1973 + puzzle[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template < int N >
|
||||
N_puzzle<N>::N_puzzle( N_puzzle const &pz ):
|
||||
puzzle_valid( pz.puzzle_valid ),
|
||||
zero_i( pz.zero_i ),
|
||||
zero_j( pz.zero_j ),
|
||||
manhattan_distance( pz.manhattan_distance ),
|
||||
hash_value( pz.hash_value ) {
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
puzzle[i][j] = pz.puzzle[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template < int N >
|
||||
N_puzzle<N> &N_puzzle<N>::operator=( N_puzzle const &rhs ) {
|
||||
puzzle_valid = rhs.puzzle_valid;
|
||||
zero_i = rhs.zero_i;
|
||||
zero_j = rhs.zero_j;
|
||||
manhattan_distance = rhs.manhattan_distance;
|
||||
hash_value = rhs.hash_value;
|
||||
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
puzzle[i][j] = rhs.puzzle[i][j];
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Moving a tile up is possible as long as
|
||||
* the blank is not in the last row.
|
||||
*/
|
||||
|
||||
template <int N>
|
||||
bool N_puzzle<N>::tile_up_possible() const {
|
||||
return puzzle_valid && (zero_i != N - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Moving a tile down is possible as long as
|
||||
* the blank is not in the first row.
|
||||
*/
|
||||
|
||||
template <int N>
|
||||
bool N_puzzle<N>::tile_down_possible() const {
|
||||
return puzzle_valid && (zero_i != 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Moving a tile left is possible as long as
|
||||
* the blank is not in the last column.
|
||||
*/
|
||||
|
||||
template <int N>
|
||||
bool N_puzzle<N>::tile_left_possible() const {
|
||||
return puzzle_valid && (zero_j != N - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Moving a tile right is possible as long as
|
||||
* the blank is not in the first column.
|
||||
*/
|
||||
|
||||
template <int N>
|
||||
bool N_puzzle<N>::tile_right_possible() const {
|
||||
return puzzle_valid && (zero_j != 0);
|
||||
}
|
||||
|
||||
template <int N>
|
||||
N_puzzle<N> N_puzzle<N>::tile_up() const {
|
||||
if ( !puzzle_valid ) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
N_puzzle result( *this );
|
||||
|
||||
if ( zero_i == N - 1 ) {
|
||||
result.puzzle_valid = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
result.manhattan_distance +=
|
||||
abs( ((puzzle[zero_i + 1][zero_j] - 1) / N) - zero_i ) -
|
||||
abs( ((puzzle[zero_i + 1][zero_j] - 1) / N) - (zero_i + 1) );
|
||||
|
||||
result.puzzle[zero_i][zero_j] = puzzle[zero_i + 1][zero_j];
|
||||
++result.zero_i;
|
||||
result.puzzle[result.zero_i][zero_j] = 0;
|
||||
|
||||
result.determine_hash();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <int N>
|
||||
N_puzzle<N> N_puzzle<N>::tile_down() const {
|
||||
if ( !puzzle_valid ) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
N_puzzle result( *this );
|
||||
|
||||
if ( zero_i == 0 ) {
|
||||
result.puzzle_valid = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
result.manhattan_distance +=
|
||||
abs( ((puzzle[zero_i - 1][zero_j] - 1) / N) - zero_i ) -
|
||||
abs( ((puzzle[zero_i - 1][zero_j] - 1) / N) - (zero_i - 1) );
|
||||
|
||||
result.puzzle[zero_i][zero_j] = puzzle[zero_i - 1][zero_j];
|
||||
--result.zero_i;
|
||||
result.puzzle[result.zero_i][zero_j] = 0;
|
||||
|
||||
result.determine_hash();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <int N>
|
||||
N_puzzle<N> N_puzzle<N>::tile_left() const {
|
||||
if ( !puzzle_valid ) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
N_puzzle result( *this );
|
||||
|
||||
if ( zero_j == N - 1 ) {
|
||||
result.puzzle_valid = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
result.manhattan_distance +=
|
||||
abs( ((puzzle[zero_i][zero_j + 1] - 1) % N) - zero_j ) -
|
||||
abs( ((puzzle[zero_i][zero_j + 1] - 1) % N) - (zero_j + 1) );
|
||||
|
||||
result.puzzle[zero_i][zero_j] = puzzle[zero_i][zero_j + 1];
|
||||
++result.zero_j;
|
||||
result.puzzle[zero_i][result.zero_j] = 0;
|
||||
|
||||
result.determine_hash();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <int N>
|
||||
N_puzzle<N> N_puzzle<N>::tile_right() const {
|
||||
if ( !puzzle_valid ) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
N_puzzle result( *this );
|
||||
|
||||
if ( zero_j == 0 ) {
|
||||
result.puzzle_valid = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
result.manhattan_distance +=
|
||||
abs( ((puzzle[zero_i][zero_j - 1] - 1) % N) - zero_j ) -
|
||||
abs( ((puzzle[zero_i][zero_j - 1] - 1) % N) - (zero_j - 1) );
|
||||
|
||||
result.puzzle[zero_i][zero_j] = puzzle[zero_i][zero_j - 1];
|
||||
--result.zero_j;
|
||||
result.puzzle[zero_i][result.zero_j] = 0;
|
||||
|
||||
result.determine_hash();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the puzzle is solvable: that is, check the
|
||||
* number of inversions pluse the Manhattan distance of
|
||||
* the black from the lower-right corner.
|
||||
*
|
||||
* Run time: O(n^2)
|
||||
* Memory: O(n)
|
||||
*/
|
||||
|
||||
template <int N>
|
||||
bool N_puzzle<N>::solvable() const {
|
||||
if ( !valid() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int entries[N*N];
|
||||
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
if ( puzzle[i][j] == 0 ) {
|
||||
entries[N*i + j] = N*N;
|
||||
} else {
|
||||
entries[N*i + j] = puzzle[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int parity = 0;
|
||||
|
||||
for ( int i = 0; i < N*N; ++i ) {
|
||||
for ( int j = i + 1; j < N*N; ++j ) {
|
||||
if ( entries[i] > entries[j] ) {
|
||||
++parity;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parity += 2*N - 2 - zero_i - zero_j;
|
||||
|
||||
return ( (parity & 1) == 0 );
|
||||
}
|
||||
|
||||
template <int N>
|
||||
bool N_puzzle<N>::valid() const {
|
||||
return puzzle_valid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return either the Manhattan, Hamming, or discrete distance
|
||||
* between the puzzle and the solution.
|
||||
*/
|
||||
|
||||
template <int N>
|
||||
int N_puzzle<N>::lower_bound() const {
|
||||
// The Manhattan distance
|
||||
return valid() ? manhattan_distance : N*N*N;
|
||||
|
||||
int result = 0;
|
||||
int count = 1;
|
||||
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
if ( puzzle[i][j] != (count % N*N) ) {
|
||||
++result;
|
||||
}
|
||||
|
||||
++count;
|
||||
}
|
||||
}
|
||||
|
||||
// The Hamming distance, or
|
||||
return result;
|
||||
|
||||
// The discrete distance: converts the A* search to Dijkstra's algorithm
|
||||
// return ( result == 0 ) ? 0 : 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* puzzle1 == puzzle2
|
||||
*
|
||||
* Two puzzles are considered to be equal if their entries
|
||||
* are equal:
|
||||
* If either puzzle is not valid, return false.
|
||||
* If the hash values are different, they are different; return false.
|
||||
* Otherwise, check all entries to see if they are the same.
|
||||
*/
|
||||
|
||||
template < int N >
|
||||
bool N_puzzle<N>::operator==( N_puzzle const &rhs ) const {
|
||||
if ( !valid() || !rhs.valid() || hash() != rhs.hash() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
if ( puzzle[i][j] != rhs.puzzle[i][j] ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* puzzle1 != puzzle2
|
||||
*
|
||||
* Two puzzles are considered to be unequal if any of the entries
|
||||
* different:
|
||||
* If either puzzle is not valid, return false.
|
||||
* If the hash values are different, they are different; return true.
|
||||
* Otherwise, check all entries to see if they are the same.
|
||||
*/
|
||||
|
||||
template < int N >
|
||||
bool N_puzzle<N>::operator!=( N_puzzle const &rhs ) const {
|
||||
if ( !valid() || !rhs.valid() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( hash() != rhs.hash() ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for ( int i = 0; i < N; ++i ) {
|
||||
for ( int j = 0; j < N; ++j ) {
|
||||
if ( puzzle[i][j] != rhs.puzzle[i][j] ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* unsigned int hash() const
|
||||
*
|
||||
* Returns the pre-calculated hash value.
|
||||
*/
|
||||
|
||||
template < int N >
|
||||
unsigned int N_puzzle<N>::hash() const {
|
||||
return valid() ? hash_value : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* N_puzzle<N> solution()
|
||||
*
|
||||
* Returns the correct solution to the N puzzle:
|
||||
*
|
||||
* 1 2 3 1 2 3 4
|
||||
* 3x3: 4 5 6 4x4: 5 6 7 8
|
||||
* 7 8 9 10 11 12
|
||||
* 13 14 15
|
||||
*/
|
||||
|
||||
template <int N>
|
||||
N_puzzle<N> N_puzzle<N>::solution() {
|
||||
int array[N*N];
|
||||
|
||||
for ( int i = 0; i < N*N - 1; ++i ) {
|
||||
array[i] = i + 1;
|
||||
}
|
||||
|
||||
array[N*N - 1] = 0;
|
||||
|
||||
return N_puzzle<N>( array );
|
||||
}
|
||||
|
181
microbench/src/bench.c
Normal file
181
microbench/src/bench.c
Normal file
|
@ -0,0 +1,181 @@
|
|||
#include <am.h>
|
||||
#include <benchmark.h>
|
||||
#include <limits.h>
|
||||
#include <klib-macros.h>
|
||||
|
||||
Benchmark *current;
|
||||
Setting *setting;
|
||||
|
||||
static char *hbrk;
|
||||
|
||||
static uint32_t uptime_ms() { return io_read(AM_TIMER_UPTIME).us / 1000; }
|
||||
|
||||
// The benchmark list
|
||||
|
||||
#define ENTRY(_name, _sname, _s, _m, _l, _desc) \
|
||||
{ .prepare = bench_##_name##_prepare, \
|
||||
.run = bench_##_name##_run, \
|
||||
.validate = bench_##_name##_validate, \
|
||||
.name = _sname, \
|
||||
.desc = _desc, \
|
||||
.settings = {_s, _m, _l}, },
|
||||
|
||||
Benchmark benchmarks[] = {
|
||||
BENCHMARK_LIST(ENTRY)
|
||||
};
|
||||
|
||||
// Running a benchmark
|
||||
static void bench_prepare(Result *res) {
|
||||
res->msec = uptime_ms();
|
||||
}
|
||||
|
||||
static void bench_reset() {
|
||||
hbrk = (void *)ROUNDUP(heap.start, 8);
|
||||
}
|
||||
|
||||
static void bench_done(Result *res) {
|
||||
res->msec = uptime_ms() - res->msec;
|
||||
}
|
||||
|
||||
static const char *bench_check(Benchmark *bench) {
|
||||
uintptr_t freesp = (uintptr_t)heap.end - (uintptr_t)heap.start;
|
||||
if (freesp < setting->mlim) {
|
||||
return "(insufficient memory)";
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void run_once(Benchmark *b, Result *res) {
|
||||
bench_reset(); // reset malloc state
|
||||
current->prepare(); // call bechmark's prepare function
|
||||
bench_prepare(res); // clean everything, start timer
|
||||
current->run(); // run it
|
||||
bench_done(res); // collect results
|
||||
res->pass = current->validate();
|
||||
}
|
||||
|
||||
static unsigned long score(Benchmark *b, unsigned long tsc, unsigned long msec) {
|
||||
if (msec == 0) return 0;
|
||||
return (REF_SCORE / 1000) * setting->ref / msec;
|
||||
}
|
||||
|
||||
int main(const char *args) {
|
||||
const char *setting_name = args;
|
||||
if (args == NULL || strcmp(args, "") == 0) {
|
||||
printf("Empty mainargs. Use \"ref\" by default\n");
|
||||
setting_name = "ref";
|
||||
}
|
||||
int setting_id = -1;
|
||||
|
||||
if (strcmp(setting_name, "test" ) == 0) setting_id = 0;
|
||||
else if (strcmp(setting_name, "train") == 0) setting_id = 1;
|
||||
else if (strcmp(setting_name, "ref" ) == 0) setting_id = 2;
|
||||
else {
|
||||
printf("Invalid mainargs: \"%s\"; "
|
||||
"must be in {test, train, ref}\n", setting_name);
|
||||
halt(1);
|
||||
}
|
||||
|
||||
ioe_init();
|
||||
|
||||
printf("======= Running MicroBench [input *%s*] =======\n", setting_name);
|
||||
|
||||
unsigned long bench_score = 0;
|
||||
int pass = 1;
|
||||
uint32_t t0 = uptime_ms();
|
||||
|
||||
for (int i = 0; i < LENGTH(benchmarks); i ++) {
|
||||
Benchmark *bench = &benchmarks[i];
|
||||
current = bench;
|
||||
setting = &bench->settings[setting_id];
|
||||
const char *msg = bench_check(bench);
|
||||
printf("[%s] %s: ", bench->name, bench->desc);
|
||||
if (msg != NULL) {
|
||||
printf("Ignored %s\n", msg);
|
||||
} else {
|
||||
unsigned long msec = ULONG_MAX;
|
||||
int succ = 1;
|
||||
for (int i = 0; i < REPEAT; i ++) {
|
||||
Result res;
|
||||
run_once(bench, &res);
|
||||
printf(res.pass ? "*" : "X");
|
||||
succ &= res.pass;
|
||||
if (res.msec < msec) msec = res.msec;
|
||||
}
|
||||
|
||||
if (succ) printf(" Passed.");
|
||||
else printf(" Failed.");
|
||||
|
||||
pass &= succ;
|
||||
|
||||
unsigned long cur = score(bench, 0, msec);
|
||||
|
||||
printf("\n");
|
||||
if (setting_id != 0) {
|
||||
printf(" min time: %d ms [%d]\n", (unsigned int)msec, (unsigned int)cur);
|
||||
}
|
||||
|
||||
bench_score += cur;
|
||||
}
|
||||
}
|
||||
uint32_t t1 = uptime_ms();
|
||||
|
||||
bench_score /= LENGTH(benchmarks);
|
||||
|
||||
printf("==================================================\n");
|
||||
printf("MicroBench %s", pass ? "PASS" : "FAIL");
|
||||
if (setting_id == 2) {
|
||||
printf(" %d Marks\n", (unsigned int)bench_score);
|
||||
printf(" vs. %d Marks (%s)\n", REF_SCORE, REF_CPU);
|
||||
} else {
|
||||
printf("\n");
|
||||
}
|
||||
printf("Total time: %d ms\n", t1 - t0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Libraries
|
||||
|
||||
void* bench_alloc(size_t size) {
|
||||
size = (size_t)ROUNDUP(size, 8);
|
||||
char *old = hbrk;
|
||||
hbrk += size;
|
||||
assert((uintptr_t)heap.start <= (uintptr_t)hbrk && (uintptr_t)hbrk < (uintptr_t)heap.end);
|
||||
for (uint64_t *p = (uint64_t *)old; p != (uint64_t *)hbrk; p ++) {
|
||||
*p = 0;
|
||||
}
|
||||
assert((uintptr_t)hbrk - (uintptr_t)heap.start <= setting->mlim);
|
||||
return old;
|
||||
}
|
||||
|
||||
void bench_free(void *ptr) {
|
||||
}
|
||||
|
||||
static uint32_t seed = 1;
|
||||
|
||||
void bench_srand(uint32_t _seed) {
|
||||
seed = _seed & 0x7fff;
|
||||
}
|
||||
|
||||
uint32_t bench_rand() {
|
||||
seed = (seed * (uint32_t)214013L + (uint32_t)2531011L);
|
||||
return (seed >> 16) & 0x7fff;
|
||||
}
|
||||
|
||||
// FNV hash
|
||||
uint32_t checksum(void *start, void *end) {
|
||||
const uint32_t x = 16777619;
|
||||
uint32_t h1 = 2166136261u;
|
||||
for (uint8_t *p = (uint8_t*)start; p + 4 < (uint8_t*)end; p += 4) {
|
||||
for (int i = 0; i < 4; i ++) {
|
||||
h1 = (h1 ^ p[i]) * x;
|
||||
}
|
||||
}
|
||||
int32_t hash = (uint32_t)h1;
|
||||
hash += hash << 13;
|
||||
hash ^= hash >> 7;
|
||||
hash += hash << 3;
|
||||
hash ^= hash >> 17;
|
||||
hash += hash << 5;
|
||||
return hash;
|
||||
}
|
151
microbench/src/bf/bf.c
Normal file
151
microbench/src/bf/bf.c
Normal file
|
@ -0,0 +1,151 @@
|
|||
/*
|
||||
Brainfuck-C ( http://github.com/kgabis/brainfuck-c )
|
||||
Copyright (c) 2012 Krzysztof Gabis
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <benchmark.h>
|
||||
|
||||
static int ARR_SIZE;
|
||||
|
||||
#define CODE ">>+>>>>>,[>+>>,]>+[--[+<<<-]<[<+>-]<[<[->[<<<+>>>>+<-]<<[>>+>[->]<<[<]" \
|
||||
"<-]>]>>>+<[[-]<[>+<-]<]>[[>>>]+<<<-<[<<[<<<]>>+>[>>>]<-]<<[<<<]>[>>[>>" \
|
||||
">]<+<<[<<<]>-]]+<<<]+[->>>]>>]>>[.>>>]"
|
||||
|
||||
#define OP_END 0
|
||||
#define OP_INC_DP 1
|
||||
#define OP_DEC_DP 2
|
||||
#define OP_INC_VAL 3
|
||||
#define OP_DEC_VAL 4
|
||||
#define OP_OUT 5
|
||||
#define OP_IN 6
|
||||
#define OP_JMP_FWD 7
|
||||
#define OP_JMP_BCK 8
|
||||
|
||||
#define SUCCESS 0
|
||||
#define FAILURE 1
|
||||
|
||||
#define PROGRAM_SIZE 4096
|
||||
#define STACK_SIZE 512
|
||||
#define DATA_SIZE 4096
|
||||
|
||||
#define STACK_PUSH(A) (STACK[SP++] = A)
|
||||
#define STACK_POP() (STACK[--SP])
|
||||
#define STACK_EMPTY() (SP == 0)
|
||||
#define STACK_FULL() (SP == STACK_SIZE)
|
||||
|
||||
struct instruction_t {
|
||||
unsigned short operator;
|
||||
unsigned short operand;
|
||||
};
|
||||
|
||||
static struct instruction_t *PROGRAM;
|
||||
static unsigned short *STACK;
|
||||
static unsigned int SP;
|
||||
static const char *code;
|
||||
static char *input;
|
||||
|
||||
static int compile_bf() {
|
||||
unsigned short pc = 0, jmp_pc;
|
||||
for (; *code; code ++) {
|
||||
int c = *code;
|
||||
if (pc >= PROGRAM_SIZE) break;
|
||||
switch (c) {
|
||||
case '>': PROGRAM[pc].operator = OP_INC_DP; break;
|
||||
case '<': PROGRAM[pc].operator = OP_DEC_DP; break;
|
||||
case '+': PROGRAM[pc].operator = OP_INC_VAL; break;
|
||||
case '-': PROGRAM[pc].operator = OP_DEC_VAL; break;
|
||||
case '.': PROGRAM[pc].operator = OP_OUT; break;
|
||||
case ',': PROGRAM[pc].operator = OP_IN; break;
|
||||
case '[':
|
||||
PROGRAM[pc].operator = OP_JMP_FWD;
|
||||
if (STACK_FULL()) {
|
||||
return FAILURE;
|
||||
}
|
||||
STACK_PUSH(pc);
|
||||
break;
|
||||
case ']':
|
||||
if (STACK_EMPTY()) {
|
||||
return FAILURE;
|
||||
}
|
||||
jmp_pc = STACK_POP();
|
||||
PROGRAM[pc].operator = OP_JMP_BCK;
|
||||
PROGRAM[pc].operand = jmp_pc;
|
||||
PROGRAM[jmp_pc].operand = pc;
|
||||
break;
|
||||
default: pc--; break;
|
||||
}
|
||||
pc++;
|
||||
}
|
||||
if (!STACK_EMPTY() || pc == PROGRAM_SIZE) {
|
||||
return FAILURE;
|
||||
}
|
||||
PROGRAM[pc].operator = OP_END;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
static unsigned short *data;
|
||||
static char *output;
|
||||
static int noutput;
|
||||
|
||||
static void execute_bf() {
|
||||
unsigned int pc = 0, ptr = 0;
|
||||
while (PROGRAM[pc].operator != OP_END && ptr < DATA_SIZE) {
|
||||
switch (PROGRAM[pc].operator) {
|
||||
case OP_INC_DP: ptr++; break;
|
||||
case OP_DEC_DP: ptr--; break;
|
||||
case OP_INC_VAL: data[ptr]++; break;
|
||||
case OP_DEC_VAL: data[ptr]--; break;
|
||||
case OP_OUT: output[noutput ++] = data[ptr]; break;
|
||||
case OP_IN: data[ptr] = *(input ++); break;
|
||||
case OP_JMP_FWD: if(!data[ptr]) { pc = PROGRAM[pc].operand; } break;
|
||||
case OP_JMP_BCK: if(data[ptr]) { pc = PROGRAM[pc].operand; } break;
|
||||
default: return;
|
||||
}
|
||||
pc++;
|
||||
}
|
||||
}
|
||||
|
||||
void bench_bf_prepare() {
|
||||
ARR_SIZE = setting->size;
|
||||
SP = 0;
|
||||
PROGRAM = bench_alloc(sizeof(PROGRAM[0]) * PROGRAM_SIZE);
|
||||
STACK = bench_alloc(sizeof(STACK[0]) * STACK_SIZE);
|
||||
data = bench_alloc(sizeof(data[0]) * DATA_SIZE);
|
||||
code = CODE;
|
||||
input = bench_alloc(ARR_SIZE + 1);
|
||||
output = bench_alloc(DATA_SIZE);
|
||||
noutput = 0;
|
||||
|
||||
bench_srand(1);
|
||||
for (int i = 0; i < ARR_SIZE; i ++) {
|
||||
input[i] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"[bench_rand() % 62];
|
||||
}
|
||||
}
|
||||
|
||||
void bench_bf_run() {
|
||||
compile_bf();
|
||||
execute_bf();
|
||||
}
|
||||
|
||||
int bench_bf_validate() {
|
||||
uint32_t cs = checksum(output, output + noutput);
|
||||
return noutput == ARR_SIZE && cs == setting->checksum;
|
||||
}
|
138
microbench/src/dinic/dinic.cc
Normal file
138
microbench/src/dinic/dinic.cc
Normal file
|
@ -0,0 +1,138 @@
|
|||
#include <benchmark.h>
|
||||
|
||||
static int N;
|
||||
const int INF = 0x3f3f3f;
|
||||
|
||||
struct Edge {
|
||||
int from, to, cap, flow;
|
||||
Edge(){}
|
||||
Edge(int from, int to, int cap, int flow) {
|
||||
this->from = from;
|
||||
this->to = to;
|
||||
this->cap = cap;
|
||||
this->flow = flow;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
static inline T min(T x, T y) {
|
||||
return x < y ? x : y;
|
||||
}
|
||||
|
||||
struct Dinic {
|
||||
int n, m, s, t;
|
||||
Edge *edges;
|
||||
int *head, *nxt, *d, *cur, *queue;
|
||||
bool *vis;
|
||||
|
||||
void init(int n) {
|
||||
int nold = (n - 2) / 2;
|
||||
int maxm = (nold * nold + nold * 2) * 2;
|
||||
|
||||
edges = (Edge *)bench_alloc(sizeof(Edge) * maxm);
|
||||
head = (int *)bench_alloc(sizeof(int) * n);
|
||||
nxt = (int *)bench_alloc(sizeof(int) * maxm);
|
||||
vis = (bool *)bench_alloc(sizeof(bool) * n);
|
||||
d = (int *)bench_alloc(sizeof(int) * n);
|
||||
cur = (int *)bench_alloc(sizeof(int) * n);
|
||||
queue = (int *)bench_alloc(sizeof(int) * n);
|
||||
|
||||
this->n = n;
|
||||
for (int i = 0; i < n; i ++) {
|
||||
head[i] = -1;
|
||||
}
|
||||
m = 0;
|
||||
}
|
||||
|
||||
void AddEdge(int u, int v, int c) {
|
||||
if (c == 0) return;
|
||||
edges[m] = Edge(u, v, c, 0);
|
||||
nxt[m] = head[u];
|
||||
head[u] = m++;
|
||||
edges[m] = Edge(v, u, 0, 0);
|
||||
nxt[m] = head[v];
|
||||
head[v] = m++;
|
||||
}
|
||||
|
||||
bool BFS() {
|
||||
for (int i = 0; i < n; i ++) vis[i] = 0;
|
||||
int qf = 0, qr = 0;
|
||||
queue[qr ++] = s;
|
||||
d[s] = 0;
|
||||
vis[s] = 1;
|
||||
while (qf != qr) {
|
||||
int x = queue[qf ++];
|
||||
for (int i = head[x]; i != -1; i = nxt[i]) {
|
||||
Edge& e = edges[i];
|
||||
if (!vis[e.to] && e.cap > e.flow) {
|
||||
vis[e.to] = 1;
|
||||
d[e.to] = d[x] + 1;
|
||||
queue[qr ++] = e.to;
|
||||
}
|
||||
}
|
||||
}
|
||||
return vis[t];
|
||||
}
|
||||
|
||||
int DFS(int x, int a) {
|
||||
if (x == t || a == 0) return a;
|
||||
int flow = 0, f;
|
||||
for (int i = cur[x]; i != -1; i = nxt[i]) {
|
||||
Edge& e = edges[i];
|
||||
if (d[x] + 1 == d[e.to] && (f = DFS(e.to, min(a, e.cap-e.flow))) > 0) {
|
||||
e.flow += f;
|
||||
edges[i^1].flow -= f;
|
||||
flow += f;
|
||||
a -= f;
|
||||
if (a == 0) break;
|
||||
}
|
||||
}
|
||||
return flow;
|
||||
}
|
||||
|
||||
int Maxflow(int s, int t) {
|
||||
this -> s = s; this -> t = t;
|
||||
int flow = 0;
|
||||
while (BFS()) {
|
||||
for (int i = 0; i < n; i++)
|
||||
cur[i] = head[i];
|
||||
flow += DFS(s, INF);
|
||||
}
|
||||
return flow;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
extern "C" {
|
||||
|
||||
|
||||
static Dinic *G;
|
||||
static int ans;
|
||||
|
||||
void bench_dinic_prepare() {
|
||||
N = setting->size;
|
||||
bench_srand(1);
|
||||
int s = 2 * N, t = 2 * N + 1;
|
||||
G = (Dinic*)bench_alloc(sizeof(Dinic));
|
||||
G->init(2 * N + 2);
|
||||
for (int i = 0; i < N; i ++)
|
||||
for (int j = 0; j < N; j ++) {
|
||||
G->AddEdge(i, N + j, bench_rand() % 10);
|
||||
}
|
||||
|
||||
for (int i = 0; i < N; i ++) {
|
||||
G->AddEdge(s, i, bench_rand() % 1000);
|
||||
G->AddEdge(N + i, t, bench_rand() % 1000);
|
||||
}
|
||||
}
|
||||
|
||||
void bench_dinic_run() {
|
||||
ans = G->Maxflow(2 * N, 2 * N + 1);
|
||||
}
|
||||
|
||||
int bench_dinic_validate() {
|
||||
return (uint32_t)ans == setting->checksum;
|
||||
}
|
||||
}
|
||||
|
||||
|
64
microbench/src/fib/fib.c
Normal file
64
microbench/src/fib/fib.c
Normal file
|
@ -0,0 +1,64 @@
|
|||
#include <benchmark.h>
|
||||
|
||||
// f(n) = (f(n-1) + f(n-2) + .. f(n-m)) mod 2^32
|
||||
|
||||
#define N 2147483603
|
||||
static int M;
|
||||
|
||||
static void put(uint32_t *m, int i, int j, uint32_t data) {
|
||||
m[i * M + j] = data;
|
||||
}
|
||||
|
||||
static uint32_t get(uint32_t *m, int i, int j) {
|
||||
return m[i * M + j];
|
||||
}
|
||||
|
||||
static inline void mult(uint32_t *c, uint32_t *a, uint32_t *b) {
|
||||
for (int i = 0; i < M; i ++)
|
||||
for (int j = 0; j < M; j ++) {
|
||||
put(c, i, j, 0);
|
||||
for (int k = 0; k < M; k ++) {
|
||||
put(c, i, j, get(c, i, j) + get(a, i, k) * get(b, k, j));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void assign(uint32_t *a, uint32_t *b) {
|
||||
for (int i = 0; i < M; i ++)
|
||||
for (int j = 0; j < M; j ++)
|
||||
put(a, i, j, get(b, i, j));
|
||||
}
|
||||
|
||||
static uint32_t *A, *ans, *T, *tmp;
|
||||
|
||||
void bench_fib_prepare() {
|
||||
M = setting->size;
|
||||
int sz = sizeof(uint32_t) * M * M;
|
||||
A = bench_alloc(sz);
|
||||
T = bench_alloc(sz);
|
||||
ans = bench_alloc(sz);
|
||||
tmp = bench_alloc(sz);
|
||||
}
|
||||
|
||||
void bench_fib_run() {
|
||||
for (int i = 0; i < M; i ++)
|
||||
for (int j = 0; j < M; j ++) {
|
||||
uint32_t x = (i == M - 1 || j == i + 1);
|
||||
put(A, i, j, x);
|
||||
put(T, i, j, x);
|
||||
put(ans, i, j, i == j);
|
||||
}
|
||||
|
||||
for (int n = N; n > 0; n >>= 1) {
|
||||
if (n & 1) {
|
||||
mult(tmp, ans, T);
|
||||
assign(ans, tmp);
|
||||
}
|
||||
mult(tmp, T, T);
|
||||
assign(T, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
int bench_fib_validate() {
|
||||
return get(ans, M-1, M-1) == setting->checksum;
|
||||
}
|
29
microbench/src/lzip/lzip.c
Normal file
29
microbench/src/lzip/lzip.c
Normal file
|
@ -0,0 +1,29 @@
|
|||
#include "quicklz.h"
|
||||
#include <benchmark.h>
|
||||
|
||||
static int SIZE;
|
||||
|
||||
static qlz_state_compress *state;
|
||||
static char *blk;
|
||||
static char *compress;
|
||||
static int len;
|
||||
|
||||
void bench_lzip_prepare() {
|
||||
SIZE = setting->size;
|
||||
bench_srand(1);
|
||||
state = bench_alloc(sizeof(qlz_state_compress));
|
||||
blk = bench_alloc(SIZE);
|
||||
compress = bench_alloc(SIZE + 400);
|
||||
for (int i = 0; i < SIZE; i ++) {
|
||||
blk[i] = 'a' + bench_rand() % 26;
|
||||
}
|
||||
}
|
||||
|
||||
void bench_lzip_run() {
|
||||
len = qlz_compress(blk, compress, SIZE, state);
|
||||
}
|
||||
|
||||
int bench_lzip_validate() {
|
||||
return checksum(compress, compress + len) == setting->checksum;
|
||||
}
|
||||
|
761
microbench/src/lzip/quicklz.c
Normal file
761
microbench/src/lzip/quicklz.c
Normal file
|
@ -0,0 +1,761 @@
|
|||
// Fast data compression library
|
||||
// Copyright (C) 2006-2011 Lasse Mikkel Reinhold
|
||||
// lar@quicklz.com
|
||||
//
|
||||
// QuickLZ can be used for free under the GPL 1, 2 or 3 license (where anything
|
||||
// released into public must be open source) or under a commercial license if such
|
||||
// has been acquired (see http://www.quicklz.com/order.html). The commercial license
|
||||
// does not cover derived or ported versions created by third parties under GPL.
|
||||
|
||||
// 1.5.0 final
|
||||
|
||||
#include "quicklz.h"
|
||||
|
||||
#if QLZ_VERSION_MAJOR != 1 || QLZ_VERSION_MINOR != 5 || QLZ_VERSION_REVISION != 0
|
||||
#error quicklz.c and quicklz.h have different versions
|
||||
#endif
|
||||
|
||||
#define MINOFFSET 2
|
||||
#define UNCONDITIONAL_MATCHLEN 6
|
||||
#define UNCOMPRESSED_END 4
|
||||
#define CWORD_LEN 4
|
||||
|
||||
#if QLZ_COMPRESSION_LEVEL == 1 && defined QLZ_PTR_64 && QLZ_STREAMING_BUFFER == 0
|
||||
#define OFFSET_BASE source
|
||||
#define CAST (ui32)(size_t)
|
||||
#else
|
||||
#define OFFSET_BASE 0
|
||||
#define CAST
|
||||
#endif
|
||||
|
||||
int qlz_get_setting(int setting)
|
||||
{
|
||||
switch (setting)
|
||||
{
|
||||
case 0: return QLZ_COMPRESSION_LEVEL;
|
||||
case 1: return sizeof(qlz_state_compress);
|
||||
case 2: return sizeof(qlz_state_decompress);
|
||||
case 3: return QLZ_STREAMING_BUFFER;
|
||||
#ifdef QLZ_MEMORY_SAFE
|
||||
case 6: return 1;
|
||||
#else
|
||||
case 6: return 0;
|
||||
#endif
|
||||
case 7: return QLZ_VERSION_MAJOR;
|
||||
case 8: return QLZ_VERSION_MINOR;
|
||||
case 9: return QLZ_VERSION_REVISION;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
static int same(const unsigned char *src, size_t n)
|
||||
{
|
||||
while(n > 0 && *(src + n) == *src)
|
||||
n--;
|
||||
return n == 0 ? 1 : 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void reset_table_compress(qlz_state_compress *state)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < QLZ_HASH_VALUES; i++)
|
||||
{
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
state->hash[i].offset = 0;
|
||||
#else
|
||||
state->hash_counter[i] = 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static void reset_table_decompress(qlz_state_decompress *state)
|
||||
{
|
||||
int i;
|
||||
(void)state;
|
||||
(void)i;
|
||||
#if QLZ_COMPRESSION_LEVEL == 2
|
||||
for(i = 0; i < QLZ_HASH_VALUES; i++)
|
||||
{
|
||||
state->hash_counter[i] = 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static __inline ui32 hash_func(ui32 i)
|
||||
{
|
||||
#if QLZ_COMPRESSION_LEVEL == 2
|
||||
return ((i >> 9) ^ (i >> 13) ^ i) & (QLZ_HASH_VALUES - 1);
|
||||
#else
|
||||
return ((i >> 12) ^ i) & (QLZ_HASH_VALUES - 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
static __inline ui32 fast_read(void const *src, ui32 bytes)
|
||||
{
|
||||
uint32_t ret = 0;
|
||||
if (bytes >= 1 && bytes <= 4) {
|
||||
for (uint32_t i = 0; i < bytes; i ++) {
|
||||
ret |= ((uint8_t*)src)[i] << (i * 8);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __inline ui32 hashat(const unsigned char *src)
|
||||
{
|
||||
ui32 fetch, hash;
|
||||
fetch = fast_read(src, 3);
|
||||
hash = hash_func(fetch);
|
||||
return hash;
|
||||
}
|
||||
|
||||
static __inline void fast_write(ui32 f, void *dst, size_t bytes)
|
||||
{
|
||||
for (size_t i = 0; i != bytes; i ++) {
|
||||
((char*)dst)[i] = ((char*)&f)[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
size_t qlz_size_decompressed(const char *source)
|
||||
{
|
||||
ui32 n, r;
|
||||
n = (((*source) & 2) == 2) ? 4 : 1;
|
||||
r = fast_read(source + 1 + n, n);
|
||||
r = r & (0xffffffff >> ((4 - n)*8));
|
||||
return r;
|
||||
}
|
||||
|
||||
size_t qlz_size_compressed(const char *source)
|
||||
{
|
||||
ui32 n, r;
|
||||
n = (((*source) & 2) == 2) ? 4 : 1;
|
||||
r = fast_read(source + 1, n);
|
||||
r = r & (0xffffffff >> ((4 - n)*8));
|
||||
return r;
|
||||
}
|
||||
|
||||
size_t qlz_size_header(const char *source)
|
||||
{
|
||||
size_t n = 2*((((*source) & 2) == 2) ? 4 : 1) + 1;
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
static __inline void memcpy_up(unsigned char *dst, const unsigned char *src, ui32 n)
|
||||
{
|
||||
assert(0); // unaligned memory access
|
||||
}
|
||||
|
||||
static __inline void update_hash(qlz_state_decompress *state, const unsigned char *s)
|
||||
{
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
ui32 hash;
|
||||
hash = hashat(s);
|
||||
state->hash[hash].offset = s;
|
||||
state->hash_counter[hash] = 1;
|
||||
#elif QLZ_COMPRESSION_LEVEL == 2
|
||||
ui32 hash;
|
||||
unsigned char c;
|
||||
hash = hashat(s);
|
||||
c = state->hash_counter[hash];
|
||||
state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = s;
|
||||
c++;
|
||||
state->hash_counter[hash] = c;
|
||||
#endif
|
||||
(void)state;
|
||||
(void)s;
|
||||
}
|
||||
|
||||
#if QLZ_COMPRESSION_LEVEL <= 2
|
||||
static void update_hash_upto(qlz_state_decompress *state, unsigned char **lh, const unsigned char *max)
|
||||
{
|
||||
while(*lh < max)
|
||||
{
|
||||
(*lh)++;
|
||||
update_hash(state, *lh);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static size_t qlz_compress_core(const unsigned char *source, unsigned char *destination, size_t size, qlz_state_compress *state)
|
||||
{
|
||||
const unsigned char *last_byte = source + size - 1;
|
||||
const unsigned char *src = source;
|
||||
unsigned char *cword_ptr = destination;
|
||||
unsigned char *dst = destination + CWORD_LEN;
|
||||
ui32 cword_val = 1U << 31;
|
||||
const unsigned char *last_matchstart = last_byte - UNCONDITIONAL_MATCHLEN - UNCOMPRESSED_END;
|
||||
ui32 fetch = 0;
|
||||
unsigned int lits = 0;
|
||||
|
||||
(void) lits;
|
||||
|
||||
if(src <= last_matchstart)
|
||||
fetch = fast_read(src, 3);
|
||||
|
||||
while(src <= last_matchstart)
|
||||
{
|
||||
if ((cword_val & 1) == 1)
|
||||
{
|
||||
// store uncompressed if compression ratio is too low
|
||||
if (src > source + (size >> 1) && dst - destination > src - source - ((src - source) >> 5))
|
||||
return 0;
|
||||
|
||||
fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN);
|
||||
|
||||
cword_ptr = dst;
|
||||
dst += CWORD_LEN;
|
||||
cword_val = 1U << 31;
|
||||
fetch = fast_read(src, 3);
|
||||
}
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
{
|
||||
const unsigned char *o;
|
||||
ui32 hash, cached;
|
||||
|
||||
hash = hash_func(fetch);
|
||||
cached = fetch ^ state->hash[hash].cache;
|
||||
state->hash[hash].cache = fetch;
|
||||
|
||||
o = state->hash[hash].offset + OFFSET_BASE;
|
||||
state->hash[hash].offset = CAST(src - OFFSET_BASE);
|
||||
|
||||
if (cached == 0 && o != OFFSET_BASE && (src - o > MINOFFSET || (src == o + 1 && lits >= 3 && src > source + 3 && same(src - 3, 6))))
|
||||
{
|
||||
if (*(o + 3) != *(src + 3))
|
||||
{
|
||||
hash <<= 4;
|
||||
cword_val = (cword_val >> 1) | (1U << 31);
|
||||
fast_write((3 - 2) | hash, dst, 2);
|
||||
src += 3;
|
||||
dst += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
const unsigned char *old_src = src;
|
||||
size_t matchlen;
|
||||
hash <<= 4;
|
||||
|
||||
cword_val = (cword_val >> 1) | (1U << 31);
|
||||
src += 4;
|
||||
|
||||
if(*(o + (src - old_src)) == *src)
|
||||
{
|
||||
src++;
|
||||
if(*(o + (src - old_src)) == *src)
|
||||
{
|
||||
size_t q = last_byte - UNCOMPRESSED_END - (src - 5) + 1;
|
||||
size_t remaining = q > 255 ? 255 : q;
|
||||
src++;
|
||||
while(*(o + (src - old_src)) == *src && (size_t)(src - old_src) < remaining)
|
||||
src++;
|
||||
}
|
||||
}
|
||||
|
||||
matchlen = src - old_src;
|
||||
if (matchlen < 18)
|
||||
{
|
||||
fast_write((ui32)(matchlen - 2) | hash, dst, 2);
|
||||
dst += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
fast_write((ui32)(matchlen << 16) | hash, dst, 3);
|
||||
dst += 3;
|
||||
}
|
||||
}
|
||||
fetch = fast_read(src, 3);
|
||||
lits = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
lits++;
|
||||
*dst = *src;
|
||||
src++;
|
||||
dst++;
|
||||
cword_val = (cword_val >> 1);
|
||||
fetch = (fetch >> 8 & 0xffff) | (*(src + 2) << 16);
|
||||
}
|
||||
}
|
||||
#elif QLZ_COMPRESSION_LEVEL >= 2
|
||||
{
|
||||
const unsigned char *o, *offset2;
|
||||
ui32 hash, matchlen, k, m, best_k = 0;
|
||||
unsigned char c;
|
||||
size_t remaining = (last_byte - UNCOMPRESSED_END - src + 1) > 255 ? 255 : (last_byte - UNCOMPRESSED_END - src + 1);
|
||||
(void)best_k;
|
||||
|
||||
|
||||
//hash = hashat(src);
|
||||
fetch = fast_read(src, 3);
|
||||
hash = hash_func(fetch);
|
||||
|
||||
c = state->hash_counter[hash];
|
||||
|
||||
offset2 = state->hash[hash].offset[0];
|
||||
if(offset2 < src - MINOFFSET && c > 0 && ((fast_read(offset2, 3) ^ fetch) & 0xffffff) == 0)
|
||||
{
|
||||
matchlen = 3;
|
||||
if(*(offset2 + matchlen) == *(src + matchlen))
|
||||
{
|
||||
matchlen = 4;
|
||||
while(*(offset2 + matchlen) == *(src + matchlen) && matchlen < remaining)
|
||||
matchlen++;
|
||||
}
|
||||
}
|
||||
else
|
||||
matchlen = 0;
|
||||
for(k = 1; k < QLZ_POINTERS && c > k; k++)
|
||||
{
|
||||
o = state->hash[hash].offset[k];
|
||||
#if QLZ_COMPRESSION_LEVEL == 3
|
||||
if(((fast_read(o, 3) ^ fetch) & 0xffffff) == 0 && o < src - MINOFFSET)
|
||||
#elif QLZ_COMPRESSION_LEVEL == 2
|
||||
if(*(src + matchlen) == *(o + matchlen) && ((fast_read(o, 3) ^ fetch) & 0xffffff) == 0 && o < src - MINOFFSET)
|
||||
#endif
|
||||
{
|
||||
m = 3;
|
||||
while(*(o + m) == *(src + m) && m < remaining)
|
||||
m++;
|
||||
#if QLZ_COMPRESSION_LEVEL == 3
|
||||
if ((m > matchlen) || (m == matchlen && o > offset2))
|
||||
#elif QLZ_COMPRESSION_LEVEL == 2
|
||||
if (m > matchlen)
|
||||
#endif
|
||||
{
|
||||
offset2 = o;
|
||||
matchlen = m;
|
||||
best_k = k;
|
||||
}
|
||||
}
|
||||
}
|
||||
o = offset2;
|
||||
state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src;
|
||||
c++;
|
||||
state->hash_counter[hash] = c;
|
||||
|
||||
#if QLZ_COMPRESSION_LEVEL == 3
|
||||
if(matchlen > 2 && src - o < 131071)
|
||||
{
|
||||
ui32 u;
|
||||
size_t offset = src - o;
|
||||
|
||||
for(u = 1; u < matchlen; u++)
|
||||
{
|
||||
hash = hashat(src + u);
|
||||
c = state->hash_counter[hash]++;
|
||||
state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src + u;
|
||||
}
|
||||
|
||||
cword_val = (cword_val >> 1) | (1U << 31);
|
||||
src += matchlen;
|
||||
|
||||
if(matchlen == 3 && offset <= 63)
|
||||
{
|
||||
*dst = (unsigned char)(offset << 2);
|
||||
dst++;
|
||||
}
|
||||
else if (matchlen == 3 && offset <= 16383)
|
||||
{
|
||||
ui32 f = (ui32)((offset << 2) | 1);
|
||||
fast_write(f, dst, 2);
|
||||
dst += 2;
|
||||
}
|
||||
else if (matchlen <= 18 && offset <= 1023)
|
||||
{
|
||||
ui32 f = ((matchlen - 3) << 2) | ((ui32)offset << 6) | 2;
|
||||
fast_write(f, dst, 2);
|
||||
dst += 2;
|
||||
}
|
||||
|
||||
else if(matchlen <= 33)
|
||||
{
|
||||
ui32 f = ((matchlen - 2) << 2) | ((ui32)offset << 7) | 3;
|
||||
fast_write(f, dst, 3);
|
||||
dst += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
ui32 f = ((matchlen - 3) << 7) | ((ui32)offset << 15) | 3;
|
||||
fast_write(f, dst, 4);
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*dst = *src;
|
||||
src++;
|
||||
dst++;
|
||||
cword_val = (cword_val >> 1);
|
||||
}
|
||||
#elif QLZ_COMPRESSION_LEVEL == 2
|
||||
|
||||
if(matchlen > 2)
|
||||
{
|
||||
cword_val = (cword_val >> 1) | (1U << 31);
|
||||
src += matchlen;
|
||||
|
||||
if (matchlen < 10)
|
||||
{
|
||||
ui32 f = best_k | ((matchlen - 2) << 2) | (hash << 5);
|
||||
fast_write(f, dst, 2);
|
||||
dst += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
ui32 f = best_k | (matchlen << 16) | (hash << 5);
|
||||
fast_write(f, dst, 3);
|
||||
dst += 3;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*dst = *src;
|
||||
src++;
|
||||
dst++;
|
||||
cword_val = (cword_val >> 1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
while (src <= last_byte)
|
||||
{
|
||||
if ((cword_val & 1) == 1)
|
||||
{
|
||||
fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN);
|
||||
cword_ptr = dst;
|
||||
dst += CWORD_LEN;
|
||||
cword_val = 1U << 31;
|
||||
}
|
||||
#if QLZ_COMPRESSION_LEVEL < 3
|
||||
if (src <= last_byte - 3)
|
||||
{
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
ui32 hash, fetch;
|
||||
fetch = fast_read(src, 3);
|
||||
hash = hash_func(fetch);
|
||||
state->hash[hash].offset = CAST(src - OFFSET_BASE);
|
||||
state->hash[hash].cache = fetch;
|
||||
#elif QLZ_COMPRESSION_LEVEL == 2
|
||||
ui32 hash;
|
||||
unsigned char c;
|
||||
hash = hashat(src);
|
||||
c = state->hash_counter[hash];
|
||||
state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src;
|
||||
c++;
|
||||
state->hash_counter[hash] = c;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
*dst = *src;
|
||||
src++;
|
||||
dst++;
|
||||
cword_val = (cword_val >> 1);
|
||||
}
|
||||
|
||||
while((cword_val & 1) != 1)
|
||||
cword_val = (cword_val >> 1);
|
||||
|
||||
fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN);
|
||||
|
||||
// min. size must be 9 bytes so that the qlz_size functions can take 9 bytes as argument
|
||||
return dst - destination < 9 ? 9 : dst - destination;
|
||||
}
|
||||
|
||||
static size_t qlz_decompress_core(const unsigned char *source, unsigned char *destination, size_t size, qlz_state_decompress *state, const unsigned char *history)
|
||||
{
|
||||
const unsigned char *src = source + qlz_size_header((const char *)source);
|
||||
unsigned char *dst = destination;
|
||||
const unsigned char *last_destination_byte = destination + size - 1;
|
||||
ui32 cword_val = 1;
|
||||
const unsigned char *last_matchstart = last_destination_byte - UNCONDITIONAL_MATCHLEN - UNCOMPRESSED_END;
|
||||
unsigned char *last_hashed = destination - 1;
|
||||
const unsigned char *last_source_byte = source + qlz_size_compressed((const char *)source) - 1;
|
||||
static const ui32 bitlut[16] = {4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};
|
||||
|
||||
(void) last_source_byte;
|
||||
(void) last_hashed;
|
||||
(void) state;
|
||||
(void) history;
|
||||
|
||||
for(;;)
|
||||
{
|
||||
ui32 fetch;
|
||||
|
||||
if (cword_val == 1)
|
||||
{
|
||||
#ifdef QLZ_MEMORY_SAFE
|
||||
if(src + CWORD_LEN - 1 > last_source_byte)
|
||||
return 0;
|
||||
#endif
|
||||
cword_val = fast_read(src, CWORD_LEN);
|
||||
src += CWORD_LEN;
|
||||
}
|
||||
|
||||
#ifdef QLZ_MEMORY_SAFE
|
||||
if(src + 4 - 1 > last_source_byte)
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
fetch = fast_read(src, 4);
|
||||
|
||||
if ((cword_val & 1) == 1)
|
||||
{
|
||||
ui32 matchlen;
|
||||
const unsigned char *offset2;
|
||||
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
ui32 hash;
|
||||
cword_val = cword_val >> 1;
|
||||
hash = (fetch >> 4) & 0xfff;
|
||||
offset2 = (const unsigned char *)(size_t)state->hash[hash].offset;
|
||||
|
||||
if((fetch & 0xf) != 0)
|
||||
{
|
||||
matchlen = (fetch & 0xf) + 2;
|
||||
src += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
matchlen = *(src + 2);
|
||||
src += 3;
|
||||
}
|
||||
|
||||
#elif QLZ_COMPRESSION_LEVEL == 2
|
||||
ui32 hash;
|
||||
unsigned char c;
|
||||
cword_val = cword_val >> 1;
|
||||
hash = (fetch >> 5) & 0x7ff;
|
||||
c = (unsigned char)(fetch & 0x3);
|
||||
offset2 = state->hash[hash].offset[c];
|
||||
|
||||
if((fetch & (28)) != 0)
|
||||
{
|
||||
matchlen = ((fetch >> 2) & 0x7) + 2;
|
||||
src += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
matchlen = *(src + 2);
|
||||
src += 3;
|
||||
}
|
||||
|
||||
#elif QLZ_COMPRESSION_LEVEL == 3
|
||||
ui32 offset;
|
||||
cword_val = cword_val >> 1;
|
||||
if ((fetch & 3) == 0)
|
||||
{
|
||||
offset = (fetch & 0xff) >> 2;
|
||||
matchlen = 3;
|
||||
src++;
|
||||
}
|
||||
else if ((fetch & 2) == 0)
|
||||
{
|
||||
offset = (fetch & 0xffff) >> 2;
|
||||
matchlen = 3;
|
||||
src += 2;
|
||||
}
|
||||
else if ((fetch & 1) == 0)
|
||||
{
|
||||
offset = (fetch & 0xffff) >> 6;
|
||||
matchlen = ((fetch >> 2) & 15) + 3;
|
||||
src += 2;
|
||||
}
|
||||
else if ((fetch & 127) != 3)
|
||||
{
|
||||
offset = (fetch >> 7) & 0x1ffff;
|
||||
matchlen = ((fetch >> 2) & 0x1f) + 2;
|
||||
src += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
offset = (fetch >> 15);
|
||||
matchlen = ((fetch >> 7) & 255) + 3;
|
||||
src += 4;
|
||||
}
|
||||
|
||||
offset2 = dst - offset;
|
||||
#endif
|
||||
|
||||
#ifdef QLZ_MEMORY_SAFE
|
||||
if(offset2 < history || offset2 > dst - MINOFFSET - 1)
|
||||
return 0;
|
||||
|
||||
if(matchlen > (ui32)(last_destination_byte - dst - UNCOMPRESSED_END + 1))
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
memcpy_up(dst, offset2, matchlen);
|
||||
dst += matchlen;
|
||||
|
||||
#if QLZ_COMPRESSION_LEVEL <= 2
|
||||
update_hash_upto(state, &last_hashed, dst - matchlen);
|
||||
last_hashed = dst - 1;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dst < last_matchstart)
|
||||
{
|
||||
unsigned int n = bitlut[cword_val & 0xf];
|
||||
memcpy_up(dst, src, 4);
|
||||
cword_val = cword_val >> n;
|
||||
dst += n;
|
||||
src += n;
|
||||
#if QLZ_COMPRESSION_LEVEL <= 2
|
||||
update_hash_upto(state, &last_hashed, dst - 3);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
while(dst <= last_destination_byte)
|
||||
{
|
||||
if (cword_val == 1)
|
||||
{
|
||||
src += CWORD_LEN;
|
||||
cword_val = 1U << 31;
|
||||
}
|
||||
#ifdef QLZ_MEMORY_SAFE
|
||||
if(src >= last_source_byte + 1)
|
||||
return 0;
|
||||
#endif
|
||||
*dst = *src;
|
||||
dst++;
|
||||
src++;
|
||||
cword_val = cword_val >> 1;
|
||||
}
|
||||
|
||||
#if QLZ_COMPRESSION_LEVEL <= 2
|
||||
update_hash_upto(state, &last_hashed, last_destination_byte - 3); // todo, use constant
|
||||
#endif
|
||||
return size;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t qlz_compress(const void *source, char *destination, size_t size, qlz_state_compress *state)
|
||||
{
|
||||
size_t r;
|
||||
ui32 compressed;
|
||||
size_t base;
|
||||
|
||||
if(size == 0 || size > 0xffffffff - 400)
|
||||
return 0;
|
||||
|
||||
if(size < 216)
|
||||
base = 3;
|
||||
else
|
||||
base = 9;
|
||||
|
||||
#if QLZ_STREAMING_BUFFER > 0
|
||||
if (state->stream_counter + size - 1 >= QLZ_STREAMING_BUFFER)
|
||||
#endif
|
||||
{
|
||||
reset_table_compress(state);
|
||||
r = base + qlz_compress_core((const unsigned char *)source, (unsigned char*)destination + base, size, state);
|
||||
#if QLZ_STREAMING_BUFFER > 0
|
||||
reset_table_compress(state);
|
||||
#endif
|
||||
if(r == base)
|
||||
{
|
||||
bench_memcpy(destination + base, source, size);
|
||||
r = size + base;
|
||||
compressed = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
compressed = 1;
|
||||
}
|
||||
state->stream_counter = 0;
|
||||
}
|
||||
#if QLZ_STREAMING_BUFFER > 0
|
||||
else
|
||||
{
|
||||
unsigned char *src = state->stream_buffer + state->stream_counter;
|
||||
|
||||
bench_memcpy(src, source, size);
|
||||
r = base + qlz_compress_core(src, (unsigned char*)destination + base, size, state);
|
||||
|
||||
if(r == base)
|
||||
{
|
||||
bench_memcpy(destination + base, src, size);
|
||||
r = size + base;
|
||||
compressed = 0;
|
||||
reset_table_compress(state);
|
||||
}
|
||||
else
|
||||
{
|
||||
compressed = 1;
|
||||
}
|
||||
state->stream_counter += size;
|
||||
}
|
||||
#endif
|
||||
if(base == 3)
|
||||
{
|
||||
*destination = (unsigned char)(0 | compressed);
|
||||
*(destination + 1) = (unsigned char)r;
|
||||
*(destination + 2) = (unsigned char)size;
|
||||
}
|
||||
else
|
||||
{
|
||||
*destination = (unsigned char)(2 | compressed);
|
||||
fast_write((ui32)r, destination + 1, 4);
|
||||
fast_write((ui32)size, destination + 5, 4);
|
||||
}
|
||||
|
||||
*destination |= (QLZ_COMPRESSION_LEVEL << 2);
|
||||
*destination |= (1 << 6);
|
||||
*destination |= ((QLZ_STREAMING_BUFFER == 0 ? 0 : (QLZ_STREAMING_BUFFER == 100000 ? 1 : (QLZ_STREAMING_BUFFER == 1000000 ? 2 : 3))) << 4);
|
||||
|
||||
// 76543210
|
||||
// 01SSLLHC
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
size_t qlz_decompress(const char *source, void *destination, qlz_state_decompress *state)
|
||||
{
|
||||
size_t dsiz = qlz_size_decompressed(source);
|
||||
|
||||
#if QLZ_STREAMING_BUFFER > 0
|
||||
if (state->stream_counter + qlz_size_decompressed(source) - 1 >= QLZ_STREAMING_BUFFER)
|
||||
#endif
|
||||
{
|
||||
if((*source & 1) == 1)
|
||||
{
|
||||
reset_table_decompress(state);
|
||||
dsiz = qlz_decompress_core((const unsigned char *)source, (unsigned char *)destination, dsiz, state, (const unsigned char *)destination);
|
||||
}
|
||||
else
|
||||
{
|
||||
bench_memcpy(destination, source + qlz_size_header(source), dsiz);
|
||||
}
|
||||
state->stream_counter = 0;
|
||||
reset_table_decompress(state);
|
||||
}
|
||||
#if QLZ_STREAMING_BUFFER > 0
|
||||
else
|
||||
{
|
||||
unsigned char *dst = state->stream_buffer + state->stream_counter;
|
||||
if((*source & 1) == 1)
|
||||
{
|
||||
dsiz = qlz_decompress_core((const unsigned char *)source, dst, dsiz, state, (const unsigned char *)state->stream_buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
bench_memcpy(dst, source + qlz_size_header(source), dsiz);
|
||||
reset_table_decompress(state);
|
||||
}
|
||||
bench_memcpy(destination, dst, dsiz);
|
||||
state->stream_counter += dsiz;
|
||||
}
|
||||
#endif
|
||||
return dsiz;
|
||||
}
|
||||
|
164
microbench/src/lzip/quicklz.h
Normal file
164
microbench/src/lzip/quicklz.h
Normal file
|
@ -0,0 +1,164 @@
|
|||
#ifndef QLZ_HEADER
|
||||
#define QLZ_HEADER
|
||||
|
||||
#include <am.h>
|
||||
#include <klib.h>
|
||||
|
||||
static inline void* bench_memcpy(void* dst, const void* src, size_t n){
|
||||
assert(dst&&src);
|
||||
const char* s;
|
||||
char* d;
|
||||
if(src+n>dst&&src<dst){
|
||||
s=src+n;
|
||||
d=dst+n;
|
||||
while(n-->0)*--d=*--s;
|
||||
}
|
||||
else{
|
||||
s=src;
|
||||
d=dst;
|
||||
while(n-->0)*d++=*s++;
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
|
||||
// Fast data compression library
|
||||
// Copyright (C) 2006-2011 Lasse Mikkel Reinhold
|
||||
// lar@quicklz.com
|
||||
//
|
||||
// QuickLZ can be used for free under the GPL 1, 2 or 3 license (where anything
|
||||
// released into public must be open source) or under a commercial license if such
|
||||
// has been acquired (see http://www.quicklz.com/order.html). The commercial license
|
||||
// does not cover derived or ported versions created by third parties under GPL.
|
||||
|
||||
// You can edit following user settings. Data must be decompressed with the same
|
||||
// setting of QLZ_COMPRESSION_LEVEL and QLZ_STREAMING_BUFFER as it was compressed
|
||||
// (see manual). If QLZ_STREAMING_BUFFER > 0, scratch buffers must be initially
|
||||
// zeroed out (see manual). First #ifndef makes it possible to define settings from
|
||||
// the outside like the compiler command line.
|
||||
|
||||
// 1.5.0 final
|
||||
|
||||
#ifndef QLZ_COMPRESSION_LEVEL
|
||||
|
||||
// 1 gives fastest compression speed. 3 gives fastest decompression speed and best
|
||||
// compression ratio.
|
||||
//#define QLZ_COMPRESSION_LEVEL 1
|
||||
//#define QLZ_COMPRESSION_LEVEL 2
|
||||
//#define QLZ_COMPRESSION_LEVEL 3
|
||||
#define QLZ_COMPRESSION_LEVEL 2
|
||||
|
||||
// If > 0, zero out both states prior to first call to qlz_compress() or qlz_decompress()
|
||||
// and decompress packets in the same order as they were compressed
|
||||
#define QLZ_STREAMING_BUFFER 0
|
||||
//#define QLZ_STREAMING_BUFFER 100000
|
||||
//#define QLZ_STREAMING_BUFFER 1000000
|
||||
|
||||
// Guarantees that decompression of corrupted data cannot crash. Decreases decompression
|
||||
// speed 10-20%. Compression speed not affected.
|
||||
//#define QLZ_MEMORY_SAFE
|
||||
#endif
|
||||
|
||||
#define QLZ_VERSION_MAJOR 1
|
||||
#define QLZ_VERSION_MINOR 5
|
||||
#define QLZ_VERSION_REVISION 0
|
||||
|
||||
// Verify compression level
|
||||
#if QLZ_COMPRESSION_LEVEL != 1 && QLZ_COMPRESSION_LEVEL != 2 && QLZ_COMPRESSION_LEVEL != 3
|
||||
#error QLZ_COMPRESSION_LEVEL must be 1, 2 or 3
|
||||
#endif
|
||||
|
||||
typedef unsigned int ui32;
|
||||
typedef unsigned short int ui16;
|
||||
|
||||
// Decrease QLZ_POINTERS for level 3 to increase compression speed. Do not touch any other values!
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
#define QLZ_POINTERS 1
|
||||
#define QLZ_HASH_VALUES 4096
|
||||
#elif QLZ_COMPRESSION_LEVEL == 2
|
||||
#define QLZ_POINTERS 4
|
||||
#define QLZ_HASH_VALUES 2048
|
||||
#elif QLZ_COMPRESSION_LEVEL == 3
|
||||
#define QLZ_POINTERS 16
|
||||
#define QLZ_HASH_VALUES 4096
|
||||
#endif
|
||||
|
||||
// hash entry
|
||||
typedef struct
|
||||
{
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
ui32 cache;
|
||||
#if defined QLZ_PTR_64 && QLZ_STREAMING_BUFFER == 0
|
||||
unsigned int offset;
|
||||
#else
|
||||
const unsigned char *offset;
|
||||
#endif
|
||||
#else
|
||||
const unsigned char *offset[QLZ_POINTERS];
|
||||
#endif
|
||||
|
||||
} qlz_hash_compress;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
#if QLZ_COMPRESSION_LEVEL == 1
|
||||
const unsigned char *offset;
|
||||
#else
|
||||
const unsigned char *offset[QLZ_POINTERS];
|
||||
#endif
|
||||
} qlz_hash_decompress;
|
||||
|
||||
|
||||
// states
|
||||
typedef struct
|
||||
{
|
||||
#if QLZ_STREAMING_BUFFER > 0
|
||||
unsigned char stream_buffer[QLZ_STREAMING_BUFFER];
|
||||
#endif
|
||||
size_t stream_counter;
|
||||
qlz_hash_compress hash[QLZ_HASH_VALUES];
|
||||
unsigned char hash_counter[QLZ_HASH_VALUES];
|
||||
} qlz_state_compress;
|
||||
|
||||
|
||||
#if QLZ_COMPRESSION_LEVEL == 1 || QLZ_COMPRESSION_LEVEL == 2
|
||||
typedef struct
|
||||
{
|
||||
#if QLZ_STREAMING_BUFFER > 0
|
||||
unsigned char stream_buffer[QLZ_STREAMING_BUFFER];
|
||||
#endif
|
||||
qlz_hash_decompress hash[QLZ_HASH_VALUES];
|
||||
unsigned char hash_counter[QLZ_HASH_VALUES];
|
||||
size_t stream_counter;
|
||||
} qlz_state_decompress;
|
||||
#elif QLZ_COMPRESSION_LEVEL == 3
|
||||
typedef struct
|
||||
{
|
||||
#if QLZ_STREAMING_BUFFER > 0
|
||||
unsigned char stream_buffer[QLZ_STREAMING_BUFFER];
|
||||
#endif
|
||||
#if QLZ_COMPRESSION_LEVEL <= 2
|
||||
qlz_hash_decompress hash[QLZ_HASH_VALUES];
|
||||
#endif
|
||||
size_t stream_counter;
|
||||
} qlz_state_decompress;
|
||||
#endif
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Public functions of QuickLZ
|
||||
size_t qlz_size_decompressed(const char *source);
|
||||
size_t qlz_size_compressed(const char *source);
|
||||
size_t qlz_compress(const void *source, char *destination, size_t size, qlz_state_compress *state);
|
||||
size_t qlz_decompress(const char *source, void *destination, qlz_state_decompress *state);
|
||||
int qlz_get_setting(int setting);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
159
microbench/src/md5/md5.c
Normal file
159
microbench/src/md5/md5.c
Normal file
|
@ -0,0 +1,159 @@
|
|||
/*
|
||||
* Simple MD5 implementation (github.com/pod32g/md5)
|
||||
*
|
||||
*/
|
||||
|
||||
#include <benchmark.h>
|
||||
|
||||
static int N;
|
||||
|
||||
// Constants are the integer part of the sines of integers (in radians) * 2^32.
|
||||
const uint32_t k[64] = {
|
||||
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee ,
|
||||
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501 ,
|
||||
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be ,
|
||||
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 ,
|
||||
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa ,
|
||||
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8 ,
|
||||
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed ,
|
||||
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a ,
|
||||
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c ,
|
||||
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70 ,
|
||||
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05 ,
|
||||
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 ,
|
||||
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039 ,
|
||||
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1 ,
|
||||
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1 ,
|
||||
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 };
|
||||
|
||||
// r specifies the per-round shift amounts
|
||||
static const uint32_t r[] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
|
||||
5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
|
||||
4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
|
||||
6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};
|
||||
|
||||
// leftrotate function definition
|
||||
#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))
|
||||
|
||||
static void to_bytes(uint32_t val, uint8_t *bytes)
|
||||
{
|
||||
bytes[0] = (uint8_t) val;
|
||||
bytes[1] = (uint8_t) (val >> 8);
|
||||
bytes[2] = (uint8_t) (val >> 16);
|
||||
bytes[3] = (uint8_t) (val >> 24);
|
||||
}
|
||||
|
||||
static uint32_t to_int32(const uint8_t *bytes)
|
||||
{
|
||||
return (uint32_t) bytes[0]
|
||||
| ((uint32_t) bytes[1] << 8)
|
||||
| ((uint32_t) bytes[2] << 16)
|
||||
| ((uint32_t) bytes[3] << 24);
|
||||
}
|
||||
|
||||
static void md5(uint8_t *msg, size_t initial_len, uint8_t *digest) {
|
||||
|
||||
// These vars will contain the hash
|
||||
uint32_t h0, h1, h2, h3;
|
||||
|
||||
size_t new_len, offset;
|
||||
uint32_t w[16];
|
||||
uint32_t a, b, c, d, i, f, g, temp;
|
||||
|
||||
// Initialize variables - simple count in nibbles:
|
||||
h0 = 0x67452301;
|
||||
h1 = 0xefcdab89;
|
||||
h2 = 0x98badcfe;
|
||||
h3 = 0x10325476;
|
||||
|
||||
//Pre-processing:
|
||||
//append "1" bit to message
|
||||
//append "0" bits until message length in bits ≡ 448 (mod 512)
|
||||
//append length mod (2^64) to message
|
||||
|
||||
for (new_len = initial_len + 1; new_len % (512/8) != 448/8; new_len++)
|
||||
;
|
||||
|
||||
msg[initial_len] = 0x80; // append the "1" bit; most significant bit is "first"
|
||||
for (offset = initial_len + 1; offset < new_len; offset++)
|
||||
msg[offset] = 0; // append "0" bits
|
||||
|
||||
// append the len in bits at the end of the buffer.
|
||||
to_bytes(initial_len*8, msg + new_len);
|
||||
// initial_len>>29 == initial_len*8>>32, but avoids overflow.
|
||||
to_bytes(initial_len>>29, msg + new_len + 4);
|
||||
|
||||
// Process the message in successive 512-bit chunks:
|
||||
//for each 512-bit chunk of message:
|
||||
for(offset=0; offset<new_len; offset += (512/8)) {
|
||||
|
||||
// break chunk into sixteen 32-bit words w[j], 0 ≤ j ≤ 15
|
||||
for (i = 0; i < 16; i++)
|
||||
w[i] = to_int32(msg + offset + i*4);
|
||||
|
||||
// Initialize hash value for this chunk:
|
||||
a = h0;
|
||||
b = h1;
|
||||
c = h2;
|
||||
d = h3;
|
||||
|
||||
// Main loop:
|
||||
for(i = 0; i<64; i++) {
|
||||
|
||||
if (i < 16) {
|
||||
f = (b & c) | ((~b) & d);
|
||||
g = i;
|
||||
} else if (i < 32) {
|
||||
f = (d & b) | ((~d) & c);
|
||||
g = (5*i + 1) % 16;
|
||||
} else if (i < 48) {
|
||||
f = b ^ c ^ d;
|
||||
g = (3*i + 5) % 16;
|
||||
} else {
|
||||
f = c ^ (b | (~d));
|
||||
g = (7*i) % 16;
|
||||
}
|
||||
|
||||
temp = d;
|
||||
d = c;
|
||||
c = b;
|
||||
b = b + LEFTROTATE((a + f + k[i] + w[g]), r[i]);
|
||||
a = temp;
|
||||
|
||||
}
|
||||
|
||||
// Add this chunk's hash to result so far:
|
||||
h0 += a;
|
||||
h1 += b;
|
||||
h2 += c;
|
||||
h3 += d;
|
||||
|
||||
}
|
||||
|
||||
//var char digest[16] := h0 append h1 append h2 append h3 //(Output is in little-endian)
|
||||
to_bytes(h0, digest);
|
||||
to_bytes(h1, digest + 4);
|
||||
to_bytes(h2, digest + 8);
|
||||
to_bytes(h3, digest + 12);
|
||||
}
|
||||
|
||||
static uint8_t *str;
|
||||
static uint8_t *digest;
|
||||
|
||||
void bench_md5_prepare() {
|
||||
N = setting->size;
|
||||
bench_srand(1);
|
||||
str = bench_alloc(N);
|
||||
for (int i = 0; i < N; i ++) {
|
||||
str[i] = bench_rand();
|
||||
}
|
||||
digest = bench_alloc(16);
|
||||
}
|
||||
|
||||
void bench_md5_run() {
|
||||
md5(str, N, digest);
|
||||
}
|
||||
|
||||
int bench_md5_validate() {
|
||||
return checksum(digest, digest + 16) == setting->checksum;
|
||||
}
|
44
microbench/src/qsort/qsort.c
Normal file
44
microbench/src/qsort/qsort.c
Normal file
|
@ -0,0 +1,44 @@
|
|||
#include <benchmark.h>
|
||||
|
||||
static int N, *data;
|
||||
|
||||
void bench_qsort_prepare() {
|
||||
bench_srand(1);
|
||||
|
||||
N = setting->size;
|
||||
|
||||
data = bench_alloc(N * sizeof(int));
|
||||
for (int i = 0; i < N; i ++) {
|
||||
int a = bench_rand();
|
||||
int b = bench_rand();
|
||||
data[i] = (a << 16) | b;
|
||||
}
|
||||
}
|
||||
|
||||
static void swap(int *a, int *b) {
|
||||
int t = *a;
|
||||
*a = *b;
|
||||
*b = t;
|
||||
}
|
||||
|
||||
static void myqsort(int *a, int l, int r) {
|
||||
if (l < r) {
|
||||
int p = a[l], pivot = l, j;
|
||||
for (j = l + 1; j < r; j ++) {
|
||||
if (a[j] < p) {
|
||||
swap(&a[++pivot], &a[j]);
|
||||
}
|
||||
}
|
||||
swap(&a[pivot], &a[l]);
|
||||
myqsort(a, l, pivot);
|
||||
myqsort(a, pivot + 1, r);
|
||||
}
|
||||
}
|
||||
|
||||
void bench_qsort_run() {
|
||||
myqsort(data, 0, N);
|
||||
}
|
||||
|
||||
int bench_qsort_validate() {
|
||||
return checksum(data, data + N) == setting->checksum;
|
||||
}
|
32
microbench/src/queen/queen.c
Normal file
32
microbench/src/queen/queen.c
Normal file
|
@ -0,0 +1,32 @@
|
|||
#include <benchmark.h>
|
||||
|
||||
static unsigned int FULL;
|
||||
|
||||
static unsigned int dfs(unsigned int row, unsigned int ld, unsigned int rd) {
|
||||
if (row == FULL) {
|
||||
return 1;
|
||||
} else {
|
||||
unsigned int pos = FULL & (~(row | ld | rd)), ans = 0;
|
||||
while (pos) {
|
||||
unsigned int p = (pos & (~pos + 1));
|
||||
pos -= p;
|
||||
ans += dfs(row | p, (ld | p) << 1, (rd | p) >> 1);
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int ans;
|
||||
|
||||
void bench_queen_prepare() {
|
||||
ans = 0;
|
||||
FULL = (1 << setting->size) - 1;
|
||||
}
|
||||
|
||||
void bench_queen_run() {
|
||||
ans = dfs(0, 0, 0);
|
||||
}
|
||||
|
||||
int bench_queen_validate() {
|
||||
return ans == setting->checksum;
|
||||
}
|
42
microbench/src/sieve/sieve.c
Normal file
42
microbench/src/sieve/sieve.c
Normal file
|
@ -0,0 +1,42 @@
|
|||
#include <benchmark.h>
|
||||
|
||||
static int N;
|
||||
|
||||
static int ans;
|
||||
static uint32_t *primes;
|
||||
|
||||
static inline int get(int n) {
|
||||
return (primes[n >> 5] >> (n & 31)) & 1;
|
||||
}
|
||||
|
||||
static inline void clear(int n) {
|
||||
primes[n >> 5] &= ~(1ul << (n & 31));
|
||||
}
|
||||
|
||||
void bench_sieve_prepare() {
|
||||
N = setting->size;
|
||||
primes = (uint32_t*)bench_alloc(N / 8 + 128);
|
||||
for (int i = 0; i <= N / 32; i ++) {
|
||||
primes[i] = 0xffffffff;
|
||||
}
|
||||
}
|
||||
|
||||
void bench_sieve_run() {
|
||||
for (int i = 1; i <= N; i ++)
|
||||
if (!get(i)) return;
|
||||
for (int i = 2; i * i <= N; i ++) {
|
||||
if (get(i)) {
|
||||
for (int j = i + i; j <= N; j += i)
|
||||
clear(j);
|
||||
}
|
||||
}
|
||||
ans = 0;
|
||||
for (int i = 2; i <= N; i ++)
|
||||
if (get(i)) {
|
||||
ans ++;
|
||||
}
|
||||
}
|
||||
|
||||
int bench_sieve_validate() {
|
||||
return ans == setting->checksum;
|
||||
}
|
111
microbench/src/ssort/ssort.cc
Normal file
111
microbench/src/ssort/ssort.cc
Normal file
|
@ -0,0 +1,111 @@
|
|||
// This is the Skew algorithm's reference implementation.
|
||||
|
||||
#include <benchmark.h>
|
||||
|
||||
static int N;
|
||||
|
||||
inline bool leq(int a1, int a2, int b1, int b2) { // lexic. order for pairs
|
||||
return(a1 < b1 || (a1 == b1 && a2 <= b2));
|
||||
} // and triples
|
||||
inline bool leq(int a1, int a2, int a3, int b1, int b2, int b3) {
|
||||
return(a1 < b1 || (a1 == b1 && leq(a2,a3, b2,b3)));
|
||||
}
|
||||
// stably sort a[0..n-1] to b[0..n-1] with keys in 0..K from r
|
||||
static void radixPass(int* a, int* b, int* r, int n, int K)
|
||||
{ // count occurrences
|
||||
int* c = (int*)bench_alloc(sizeof(int)*(K+1));
|
||||
for (int i = 0; i <= K; i++) c[i] = 0; // reset counters
|
||||
for (int i = 0; i < n; i++) c[r[a[i]]]++; // count occurences
|
||||
for (int i = 0, sum = 0; i <= K; i++) { // exclusive prefix sums
|
||||
int t = c[i]; c[i] = sum; sum += t;
|
||||
}
|
||||
for (int i = 0; i < n; i++) b[c[r[a[i]]]++] = a[i]; // sort
|
||||
}
|
||||
|
||||
// find the suffix array SA of s[0..n-1] in {1..K}^n
|
||||
// require s[n]=s[n+1]=s[n+2]=0, n>=2
|
||||
void suffixArray(int* s, int* SA, int n, int K) {
|
||||
int n0=(n+2)/3, n1=(n+1)/3, n2=n/3, n02=n0+n2;
|
||||
int* s12 = (int*)bench_alloc(sizeof(int)*(n02+3)); s12[n02]= s12[n02+1]= s12[n02+2]=0;
|
||||
int* SA12 = (int*)bench_alloc(sizeof(int)*(n02+3)); SA12[n02]=SA12[n02+1]=SA12[n02+2]=0;
|
||||
int* s0 = (int*)bench_alloc(sizeof(int)*n0);
|
||||
int* SA0 = (int*)bench_alloc(sizeof(int)*n0);
|
||||
|
||||
// generate positions of mod 1 and mod 2 suffixes
|
||||
// the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1
|
||||
for (int i=0, j=0; i < n+(n0-n1); i++) if (i%3 != 0) s12[j++] = i;
|
||||
|
||||
// lsb radix sort the mod 1 and mod 2 triples
|
||||
radixPass(s12 , SA12, s+2, n02, K);
|
||||
radixPass(SA12, s12 , s+1, n02, K);
|
||||
radixPass(s12 , SA12, s , n02, K);
|
||||
|
||||
// find lexicographic names of triples
|
||||
int name = 0, c0 = -1, c1 = -1, c2 = -1;
|
||||
for (int i = 0; i < n02; i++) {
|
||||
if (s[SA12[i]] != c0 || s[SA12[i]+1] != c1 || s[SA12[i]+2] != c2) {
|
||||
name++; c0 = s[SA12[i]]; c1 = s[SA12[i]+1]; c2 = s[SA12[i]+2];
|
||||
}
|
||||
if (SA12[i] % 3 == 1) { s12[SA12[i]/3] = name; } // left half
|
||||
else { s12[SA12[i]/3 + n0] = name; } // right half
|
||||
}
|
||||
|
||||
// recurse if names are not yet unique
|
||||
if (name < n02) {
|
||||
suffixArray(s12, SA12, n02, name);
|
||||
// store unique names in s12 using the suffix array
|
||||
for (int i = 0; i < n02; i++) s12[SA12[i]] = i + 1;
|
||||
} else // generate the suffix array of s12 directly
|
||||
for (int i = 0; i < n02; i++) SA12[s12[i] - 1] = i;
|
||||
|
||||
// stably sort the mod 0 suffixes from SA12 by their first character
|
||||
for (int i=0, j=0; i < n02; i++) if (SA12[i] < n0) s0[j++] = 3*SA12[i];
|
||||
radixPass(s0, SA0, s, n0, K);
|
||||
|
||||
// merge sorted SA0 suffixes and sorted SA12 suffixes
|
||||
for (int p=0, t=n0-n1, k=0; k < n; k++) {
|
||||
#define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2)
|
||||
int i = GetI(); // pos of current offset 12 suffix
|
||||
int j = SA0[p]; // pos of current offset 0 suffix
|
||||
if (SA12[t] < n0 ?
|
||||
leq(s[i], s12[SA12[t] + n0], s[j], s12[j/3]) :
|
||||
leq(s[i],s[i+1],s12[SA12[t]-n0+1], s[j],s[j+1],s12[j/3+n0]))
|
||||
{ // suffix from SA12 is smaller
|
||||
SA[k] = i; t++;
|
||||
if (t == n02) { // done --- only SA0 suffixes left
|
||||
for (k++; p < n0; p++, k++) SA[k] = SA0[p];
|
||||
}
|
||||
} else {
|
||||
SA[k] = j; p++;
|
||||
if (p == n0) { // done --- only SA12 suffixes left
|
||||
for (k++; t < n02; t++, k++) SA[k] = GetI();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
static int *s, *sa;
|
||||
|
||||
void bench_ssort_prepare() {
|
||||
N = setting->size;
|
||||
bench_srand(1);
|
||||
s = (int*)bench_alloc(sizeof(int)*(N+10));
|
||||
sa = (int*)bench_alloc(sizeof(int)*(N+10));
|
||||
|
||||
for (int i = 0; i < N; i ++) {
|
||||
s[i] = bench_rand() % 26;
|
||||
}
|
||||
}
|
||||
|
||||
void bench_ssort_run() {
|
||||
suffixArray(s, sa, N, 26);
|
||||
}
|
||||
|
||||
int bench_ssort_validate() {
|
||||
return checksum(sa, sa + N) == setting->checksum;
|
||||
}
|
||||
|
||||
}
|
||||
|
3
thread-os/Makefile
Normal file
3
thread-os/Makefile
Normal file
|
@ -0,0 +1,3 @@
|
|||
NAME := thread-os
|
||||
SRCS := thread-os.c
|
||||
include $(AM_HOME)/Makefile
|
71
thread-os/thread-os.c
Normal file
71
thread-os/thread-os.c
Normal file
|
@ -0,0 +1,71 @@
|
|||
#include <am.h>
|
||||
#include <klib.h>
|
||||
#include <klib-macros.h>
|
||||
|
||||
#define MAX_CPU 8
|
||||
|
||||
typedef union task {
|
||||
struct {
|
||||
const char *name;
|
||||
union task *next;
|
||||
void (*entry)(void *);
|
||||
Context *context;
|
||||
};
|
||||
uint8_t stack[8192];
|
||||
} Task;
|
||||
|
||||
Task *currents[MAX_CPU];
|
||||
#define current currents[cpu_current()]
|
||||
|
||||
// user-defined tasks
|
||||
|
||||
int locked = 0;
|
||||
void lock() { while (atomic_xchg(&locked, 1)); }
|
||||
void unlock() { atomic_xchg(&locked, 0); }
|
||||
|
||||
void func(void *arg) {
|
||||
while (1) {
|
||||
lock();
|
||||
printf("Thread-%s on CPU #%d\n", arg, cpu_current());
|
||||
unlock();
|
||||
for (int volatile i = 0; i < 100000; i++) ;
|
||||
}
|
||||
}
|
||||
|
||||
Task tasks[] = {
|
||||
{ .name = "A", .entry = func },
|
||||
{ .name = "B", .entry = func },
|
||||
{ .name = "C", .entry = func },
|
||||
{ .name = "D", .entry = func },
|
||||
{ .name = "E", .entry = func },
|
||||
};
|
||||
|
||||
// ------------------
|
||||
|
||||
Context *on_interrupt(Event ev, Context *ctx) {
|
||||
extern Task tasks[];
|
||||
if (!current) current = &tasks[0];
|
||||
else current->context = ctx;
|
||||
do {
|
||||
current = current->next;
|
||||
} while ((current - tasks) % cpu_count() != cpu_current());
|
||||
return current->context;
|
||||
}
|
||||
|
||||
void mp_entry() {
|
||||
iset(true);
|
||||
yield();
|
||||
}
|
||||
|
||||
int main() {
|
||||
ioe_init();
|
||||
cte_init(on_interrupt);
|
||||
|
||||
for (int i = 0; i < LENGTH(tasks); i++) {
|
||||
Task *task = &tasks[i];
|
||||
Area stack = (Area) { &task->context + 1, task + 1 };
|
||||
task->context = kcontext(stack, task->entry, (void *)task->name);
|
||||
task->next = &tasks[(i + 1) % LENGTH(tasks)];
|
||||
}
|
||||
mpe_init(mp_entry);
|
||||
}
|
Loading…
Reference in a new issue