From 009f8b5fb056511cccda7fbbcc0a5645394e3c09 Mon Sep 17 00:00:00 2001 From: Yanyan Jiang Date: Tue, 11 Aug 2020 17:17:16 +0000 Subject: [PATCH] port some benchmarks --- coremark/Makefile | 3 + coremark/include/core_portme.h | 188 +++++++ coremark/include/coremark.h | 174 ++++++ coremark/src/core_list_join.c | 496 +++++++++++++++++ coremark/src/core_main.c | 339 ++++++++++++ coremark/src/core_matrix.c | 308 +++++++++++ coremark/src/core_portme.c | 109 ++++ coremark/src/core_state.c | 277 ++++++++++ coremark/src/core_util.c | 210 ++++++++ dhrystone/Makefile | 3 + dhrystone/dry.c | 950 +++++++++++++++++++++++++++++++++ microbench/Makefile | 3 + microbench/include/benchmark.h | 113 ++++ microbench/src/15pz/15pz.cc | 88 +++ microbench/src/15pz/heap.h | 227 ++++++++ microbench/src/15pz/puzzle.h | 475 +++++++++++++++++ microbench/src/bench.c | 181 +++++++ microbench/src/bf/bf.c | 151 ++++++ microbench/src/dinic/dinic.cc | 138 +++++ microbench/src/fib/fib.c | 64 +++ microbench/src/lzip/lzip.c | 29 + microbench/src/lzip/quicklz.c | 761 ++++++++++++++++++++++++++ microbench/src/lzip/quicklz.h | 164 ++++++ microbench/src/md5/md5.c | 159 ++++++ microbench/src/qsort/qsort.c | 44 ++ microbench/src/queen/queen.c | 32 ++ microbench/src/sieve/sieve.c | 42 ++ microbench/src/ssort/ssort.cc | 111 ++++ thread-os/Makefile | 3 + thread-os/thread-os.c | 71 +++ 30 files changed, 5913 insertions(+) create mode 100644 coremark/Makefile create mode 100755 coremark/include/core_portme.h create mode 100755 coremark/include/coremark.h create mode 100755 coremark/src/core_list_join.c create mode 100755 coremark/src/core_main.c create mode 100755 coremark/src/core_matrix.c create mode 100755 coremark/src/core_portme.c create mode 100755 coremark/src/core_state.c create mode 100755 coremark/src/core_util.c create mode 100644 dhrystone/Makefile create mode 100644 dhrystone/dry.c create mode 100644 microbench/Makefile create mode 100644 microbench/include/benchmark.h create mode 100644 microbench/src/15pz/15pz.cc create mode 100644 microbench/src/15pz/heap.h create mode 100644 microbench/src/15pz/puzzle.h create mode 100644 microbench/src/bench.c create mode 100644 microbench/src/bf/bf.c create mode 100644 microbench/src/dinic/dinic.cc create mode 100644 microbench/src/fib/fib.c create mode 100644 microbench/src/lzip/lzip.c create mode 100644 microbench/src/lzip/quicklz.c create mode 100644 microbench/src/lzip/quicklz.h create mode 100644 microbench/src/md5/md5.c create mode 100644 microbench/src/qsort/qsort.c create mode 100644 microbench/src/queen/queen.c create mode 100644 microbench/src/sieve/sieve.c create mode 100644 microbench/src/ssort/ssort.cc create mode 100644 thread-os/Makefile create mode 100644 thread-os/thread-os.c diff --git a/coremark/Makefile b/coremark/Makefile new file mode 100644 index 0000000..5d09eca --- /dev/null +++ b/coremark/Makefile @@ -0,0 +1,3 @@ +NAME = coremark +SRCS = $(shell find -L ./src/ -name "*.c") +include $(AM_HOME)/Makefile diff --git a/coremark/include/core_portme.h b/coremark/include/core_portme.h new file mode 100755 index 0000000..65278e3 --- /dev/null +++ b/coremark/include/core_portme.h @@ -0,0 +1,188 @@ +/* Topic : Description + This file contains configuration constants required to execute on different platforms +*/ + + +#ifndef CORE_PORTME_H +#define CORE_PORTME_H + +#include +#include +#include + +#define ITERATIONS 1000 +#define MEM_METHOD MEM_STATIC + +/************************/ +/* Data types and settings */ +/************************/ +/* Configuration : HAS_FLOAT + Define to 1 if the platform supports floating point. +*/ +#ifndef HAS_FLOAT +#define HAS_FLOAT 0 +#endif +/* Configuration : HAS_TIME_H + Define to 1 if platform has the time.h header file, + and implementation of functions thereof. +*/ +#ifndef HAS_TIME_H +#define HAS_TIME_H 0 +#endif +/* Configuration : USE_CLOCK + Define to 1 if platform has the time.h header file, + and implementation of functions thereof. +*/ +#ifndef USE_CLOCK +#define USE_CLOCK 0 +#endif +/* Configuration : HAS_STDIO + Define to 1 if the platform has stdio.h. +*/ +#ifndef HAS_STDIO +#define HAS_STDIO 0 +#endif +/* Configuration : HAS_PRINTF + Define to 1 if the platform has stdio.h and implements the printf function. +*/ +#ifndef HAS_PRINTF +#define HAS_PRINTF 1 +#endif + +/* Configuration : CORE_TICKS + Define type of return from the timing functions. + */ +typedef uint32_t CORE_TICKS; + +/* Definitions : COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION + Initialize these strings per platform +*/ +#ifndef COMPILER_VERSION + #ifdef __GNUC__ + #define COMPILER_VERSION "GCC"__VERSION__ + #else + #define COMPILER_VERSION "Please put compiler version here (e.g. gcc 4.1)" + #endif +#endif +#ifndef COMPILER_FLAGS + #define COMPILER_FLAGS +#endif +#ifndef MEM_LOCATION + #define MEM_LOCATION "STACK" +#endif + +/* Data Types : + To avoid compiler issues, define the data types that need ot be used for 8b, 16b and 32b in . + + *Imprtant* : + ee_ptr_int needs to be the data type used to hold pointers, otherwise coremark may fail!!! +*/ +typedef signed short ee_s16; +typedef unsigned short ee_u16; +typedef signed int ee_s32; +typedef double ee_f32; +typedef unsigned char ee_u8; +typedef unsigned int ee_u32; +typedef unsigned long ee_ptr_int; +typedef size_t ee_size_t; +/* align_mem : + This macro is used to align an offset to point to a 32b value. It is used in the Matrix algorithm to initialize the input memory blocks. +*/ +#define align_mem(x) (void *)(4 + (((unsigned long)(x) - 1) & ~3)) + +/* Configuration : SEED_METHOD + Defines method to get seed values that cannot be computed at compile time. + + Valid values : + SEED_ARG - from command line. + SEED_FUNC - from a system function. + SEED_VOLATILE - from volatile variables. +*/ +#ifndef SEED_METHOD +#define SEED_METHOD SEED_VOLATILE +#endif + +/* Configuration : MEM_METHOD + Defines method to get a block of memry. + + Valid values : + MEM_MALLOC - for platforms that implement malloc and have malloc.h. + MEM_STATIC - to use a static memory array. + MEM_STACK - to allocate the data block on the stack (NYI). +*/ +#ifndef MEM_METHOD +#define MEM_METHOD MEM_STACK +#endif + +/* Configuration : MULTITHREAD + Define for parallel execution + + Valid values : + 1 - only one context (default). + N>1 - will execute N copies in parallel. + + Note : + If this flag is defined to more then 1, an implementation for launching parallel contexts must be defined. + + Two sample implementations are provided. Use or to enable them. + + It is valid to have a different implementation of and in , + to fit a particular architecture. +*/ +#ifndef MULTITHREAD +#define MULTITHREAD 1 +#define USE_PTHREAD 0 +#define USE_FORK 0 +#define USE_SOCKET 0 +#endif + +/* Configuration : MAIN_HAS_NOARGC + Needed if platform does not support getting arguments to main. + + Valid values : + 0 - argc/argv to main is supported + 1 - argc/argv to main is not supported + + Note : + This flag only matters if MULTITHREAD has been defined to a value greater then 1. +*/ +#ifndef MAIN_HAS_NOARGC +#define MAIN_HAS_NOARGC 0 +#endif + +/* Configuration : MAIN_HAS_NORETURN + Needed if platform does not support returning a value from main. + + Valid values : + 0 - main returns an int, and return value will be 0. + 1 - platform does not support returning a value from main +*/ +#ifndef MAIN_HAS_NORETURN +#define MAIN_HAS_NORETURN 0 +#endif + +/* Variable : default_num_contexts + Not used for this simple port, must cintain the value 1. +*/ +extern ee_u32 default_num_contexts; + +typedef struct CORE_PORTABLE_S { + ee_u8 portable_id; +} core_portable; + +/* target specific init/fini */ +void portable_init(core_portable *p, int *argc, char *argv[]); +void portable_fini(core_portable *p); + +#if !defined(PROFILE_RUN) && !defined(PERFORMANCE_RUN) && !defined(VALIDATION_RUN) +#if (TOTAL_DATA_SIZE==1200) +#define PROFILE_RUN 1 +#elif (TOTAL_DATA_SIZE==2000) +#define PERFORMANCE_RUN 1 +#else +#define VALIDATION_RUN 1 +#endif +#endif + + +#endif /* CORE_PORTME_H */ diff --git a/coremark/include/coremark.h b/coremark/include/coremark.h new file mode 100755 index 0000000..14cb252 --- /dev/null +++ b/coremark/include/coremark.h @@ -0,0 +1,174 @@ +/* +Author : Shay Gal-On, EEMBC + +This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009 +All rights reserved. + +EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the +CoreMark License that is distributed with the official EEMBC COREMARK Software release. +If you received this EEMBC CoreMark Software without the accompanying CoreMark License, +you must discontinue use and download the official release from www.coremark.org. + +Also, if you are publicly displaying scores generated from the EEMBC CoreMark software, +make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file. + +EEMBC +4354 Town Center Blvd. Suite 114-200 +El Dorado Hills, CA, 95762 +*/ +/* Topic: Description + This file contains declarations of the various benchmark functions. +*/ + +/* Configuration: TOTAL_DATA_SIZE + Define total size for data algorithms will operate on +*/ +#ifndef TOTAL_DATA_SIZE +#define TOTAL_DATA_SIZE 2*1000 +#endif + +#define SEED_ARG 0 +#define SEED_FUNC 1 +#define SEED_VOLATILE 2 + +#define MEM_STATIC 0 +#define MEM_MALLOC 1 +#define MEM_STACK 2 + +#include "core_portme.h" + +#if HAS_STDIO +#include +#endif +#if HAS_PRINTF +#define ee_printf printf +#endif + +/* Actual benchmark execution in iterate */ +void *iterate(void *pres); + +/* Typedef: secs_ret + For machines that have floating point support, get number of seconds as a double. + Otherwise an unsigned int. +*/ +#if HAS_FLOAT +typedef double secs_ret; +#else +typedef ee_u32 secs_ret; +#endif + +#if MAIN_HAS_NORETURN +#define MAIN_RETURN_VAL +#define MAIN_RETURN_TYPE void +#else +#define MAIN_RETURN_VAL 0 +#define MAIN_RETURN_TYPE int +#endif + +void start_time(void); +void stop_time(void); +CORE_TICKS get_time(void); +secs_ret time_in_secs(CORE_TICKS ticks); + +/* Misc useful functions */ +ee_u16 crcu8(ee_u8 data, ee_u16 crc); +ee_u16 crc16(ee_s16 newval, ee_u16 crc); +ee_u16 crcu16(ee_u16 newval, ee_u16 crc); +ee_u16 crcu32(ee_u32 newval, ee_u16 crc); +ee_u8 check_data_types(); +void *portable_malloc(ee_size_t size); +void portable_free(void *p); +ee_s32 parseval(char *valstring); + +/* Algorithm IDS */ +#define ID_LIST (1<<0) +#define ID_MATRIX (1<<1) +#define ID_STATE (1<<2) +#define ALL_ALGORITHMS_MASK (ID_LIST|ID_MATRIX|ID_STATE) +#define NUM_ALGORITHMS 3 + +/* list data structures */ +typedef struct list_data_s { + ee_s16 data16; + ee_s16 idx; +} list_data; + +typedef struct list_head_s { + struct list_head_s *next; + struct list_data_s *info; +} list_head; + + +/*matrix benchmark related stuff */ +#define MATDAT_INT 1 +#if MATDAT_INT +typedef ee_s16 MATDAT; +typedef ee_s32 MATRES; +#else +typedef ee_f16 MATDAT; +typedef ee_f32 MATRES; +#endif + +typedef struct MAT_PARAMS_S { + int N; + MATDAT *A; + MATDAT *B; + MATRES *C; +} mat_params; + +/* state machine related stuff */ +/* List of all the possible states for the FSM */ +typedef enum CORE_STATE { + CORE_START=0, + CORE_INVALID, + CORE_S1, + CORE_S2, + CORE_INT, + CORE_FLOAT, + CORE_EXPONENT, + CORE_SCIENTIFIC, + NUM_CORE_STATES +} core_state_e ; + + +/* Helper structure to hold results */ +typedef struct RESULTS_S { + /* inputs */ + ee_s16 seed1; /* Initializing seed */ + ee_s16 seed2; /* Initializing seed */ + ee_s16 seed3; /* Initializing seed */ + void *memblock[4]; /* Pointer to safe memory location */ + ee_u32 size; /* Size of the data */ + ee_u32 iterations; /* Number of iterations to execute */ + ee_u32 execs; /* Bitmask of operations to execute */ + struct list_head_s *list; + mat_params mat; + /* outputs */ + ee_u16 crc; + ee_u16 crclist; + ee_u16 crcmatrix; + ee_u16 crcstate; + ee_s16 err; + /* ultithread specific */ + core_portable port; +} core_results; + +/* Multicore execution handling */ +#if (MULTITHREAD>1) +ee_u8 core_start_parallel(core_results *res); +ee_u8 core_stop_parallel(core_results *res); +#endif + +/* list benchmark functions */ +list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed); +ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx); + +/* state benchmark functions */ +void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p); +ee_u16 core_bench_state(ee_u32 blksize, ee_u8 *memblock, + ee_s16 seed1, ee_s16 seed2, ee_s16 step, ee_u16 crc); + +/* matrix benchmark functions */ +ee_u32 core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p); +ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc); + diff --git a/coremark/src/core_list_join.c b/coremark/src/core_list_join.c new file mode 100755 index 0000000..f33cdea --- /dev/null +++ b/coremark/src/core_list_join.c @@ -0,0 +1,496 @@ +/* +Author : Shay Gal-On, EEMBC + +This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009 +All rights reserved. + +EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the +CoreMark License that is distributed with the official EEMBC COREMARK Software release. +If you received this EEMBC CoreMark Software without the accompanying CoreMark License, +you must discontinue use and download the official release from www.coremark.org. + +Also, if you are publicly displaying scores generated from the EEMBC CoreMark software, +make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file. + +EEMBC +4354 Town Center Blvd. Suite 114-200 +El Dorado Hills, CA, 95762 +*/ + +#include "coremark.h" +/* +Topic: Description + Benchmark using a linked list. + + Linked list is a common data structure used in many applications. + + For our purposes, this will excercise the memory units of the processor. + In particular, usage of the list pointers to find and alter data. + + We are not using Malloc since some platforms do not support this library. + + Instead, the memory block being passed in is used to create a list, + and the benchmark takes care not to add more items then can be + accomodated by the memory block. The porting layer will make sure + that we have a valid memory block. + + All operations are done in place, without using any extra memory. + + The list itself contains list pointers and pointers to data items. + Data items contain the following: + + idx - An index that captures the initial order of the list. + data - Variable data initialized based on the input parameters. The 16b are divided as follows: + o Upper 8b are backup of original data. + o Bit 7 indicates if the lower 7 bits are to be used as is or calculated. + o Bits 0-2 indicate type of operation to perform to get a 7b value. + o Bits 3-6 provide input for the operation. + +*/ + +/* local functions */ + +list_head *core_list_find(list_head *list,list_data *info); +list_head *core_list_reverse(list_head *list); +list_head *core_list_remove(list_head *item); +list_head *core_list_undo_remove(list_head *item_removed, list_head *item_modified); +list_head *core_list_insert_new(list_head *insert_point + , list_data *info, list_head **memblock, list_data **datablock + , list_head *memblock_end, list_data *datablock_end); +typedef ee_s32(*list_cmp)(list_data *a, list_data *b, core_results *res); +list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res); + +ee_s16 calc_func(ee_s16 *pdata, core_results *res) { + ee_s16 data=*pdata; + ee_s16 retval; + ee_u8 optype=(data>>7) & 1; /* bit 7 indicates if the function result has been cached */ + if (optype) /* if cached, use cache */ + return (data & 0x007f); + else { /* otherwise calculate and cache the result */ + ee_s16 flag=data & 0x7; /* bits 0-2 is type of function to perform */ + ee_s16 dtype=((data>>3) & 0xf); /* bits 3-6 is specific data for the operation */ + dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */ + switch (flag) { + case 0: + if (dtype<0x22) /* set min period for bit corruption */ + dtype=0x22; + retval=core_bench_state(res->size,res->memblock[3],res->seed1,res->seed2,dtype,res->crc); + if (res->crcstate==0) + res->crcstate=retval; + break; + case 1: + retval=core_bench_matrix(&(res->mat),dtype,res->crc); + if (res->crcmatrix==0) + res->crcmatrix=retval; + break; + default: + retval=data; + break; + } + res->crc=crcu16(retval,res->crc); + retval &= 0x007f; + *pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */ + return retval; + } +} +/* Function: cmp_complex + Compare the data item in a list cell. + + Can be used by mergesort. +*/ +ee_s32 cmp_complex(list_data *a, list_data *b, core_results *res) { + ee_s16 val1=calc_func(&(a->data16),res); + ee_s16 val2=calc_func(&(b->data16),res); + return val1 - val2; +} + +/* Function: cmp_idx + Compare the idx item in a list cell, and regen the data. + + Can be used by mergesort. +*/ +ee_s32 cmp_idx(list_data *a, list_data *b, core_results *res) { + if (res==NULL) { + a->data16 = (a->data16 & 0xff00) | (0x00ff & (a->data16>>8)); + b->data16 = (b->data16 & 0xff00) | (0x00ff & (b->data16>>8)); + } + return a->idx - b->idx; +} + +void copy_info(list_data *to,list_data *from) { + to->data16=from->data16; + to->idx=from->idx; +} + +/* Benchmark for linked list: + - Try to find multiple data items. + - List sort + - Operate on data from list (crc) + - Single remove/reinsert + * At the end of this function, the list is back to original state +*/ +ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx) { + ee_u16 retval=0; + ee_u16 found=0,missed=0; + list_head *list=res->list; + ee_s16 find_num=res->seed3; + list_head *this_find; + list_head *finder, *remover; + list_data info = {}; + ee_s16 i; + + info.idx=finder_idx; + /* find values in the list, and change the list each time (reverse and cache if value found) */ + for (i=0; inext->info->data16 >> 8) & 1; + } + else { + found++; + if (this_find->info->data16 & 0x1) /* use found value */ + retval+=(this_find->info->data16 >> 9) & 1; + /* and cache next item at the head of the list (if any) */ + if (this_find->next != NULL) { + finder = this_find->next; + this_find->next = finder->next; + finder->next=list->next; + list->next=finder; + } + } + if (info.idx>=0) + info.idx++; +#if CORE_DEBUG + ee_printf("List find %d: [%d,%d,%d]\n",i,retval,missed,found); +#endif + } + retval+=found*4-missed; + /* sort the list by data content and remove one item*/ + if (finder_idx>0) + list=core_list_mergesort(list,cmp_complex,res); + remover=core_list_remove(list->next); + /* CRC data content of list from location of index N forward, and then undo remove */ + finder=core_list_find(list,&info); + if (!finder) + finder=list->next; + while (finder) { + retval=crc16(list->info->data16,retval); + finder=finder->next; + } +#if CORE_DEBUG + ee_printf("List sort 1: %04x\n",retval); +#endif + remover=core_list_undo_remove(remover,list->next); + /* sort the list by index, in effect returning the list to original state */ + list=core_list_mergesort(list,cmp_idx,NULL); + /* CRC data content of list */ + finder=list->next; + while (finder) { + retval=crc16(list->info->data16,retval); + finder=finder->next; + } +#if CORE_DEBUG + ee_printf("List sort 2: %04x\n",retval); +#endif + return retval; +} +/* Function: core_list_init + Initialize list with data. + + Parameters: + blksize - Size of memory to be initialized. + memblock - Pointer to memory block. + seed - Actual values chosen depend on the seed parameter. + The seed parameter MUST be supplied from a source that cannot be determined at compile time + + Returns: + Pointer to the head of the list. + +*/ +list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed) { + /* calculated pointers for the list */ + ee_u32 per_item=16+sizeof(struct list_data_s); + ee_u32 size=(blksize/per_item)-2; /* to accomodate systems with 64b pointers, and make sure same code is executed, set max list elements */ + list_head *memblock_end=memblock+size; + list_data *datablock=(list_data *)(memblock_end); + list_data *datablock_end=datablock+size; + /* some useful variables */ + ee_u32 i; + list_head *finder,*list=memblock; + list_data info; + + /* create a fake items for the list head and tail */ + list->next=NULL; + list->info=datablock; + list->info->idx=0x0000; + list->info->data16=(ee_s16)0x8080; + memblock++; + datablock++; + info.idx=0x7fff; + info.data16=(ee_s16)0xffff; + core_list_insert_new(list,&info,&memblock,&datablock,memblock_end,datablock_end); + + /* then insert size items */ + for (i=0; inext; + i=1; + while (finder->next!=NULL) { + if (iinfo->idx=i++; + else { + ee_u16 pat=(ee_u16)(i++ ^ seed); /* get a pseudo random number */ + finder->info->idx=0x3fff & (((i & 0x07) << 8) | pat); /* make sure the mixed items end up after the ones in sequence */ + } + finder=finder->next; + } + list = core_list_mergesort(list,cmp_idx,NULL); +#if CORE_DEBUG + ee_printf("Initialized list:\n"); + finder=list; + while (finder) { + ee_printf("[%04x,%04x]",finder->info->idx,(ee_u16)finder->info->data16); + finder=finder->next; + } + ee_printf("\n"); +#endif + return list; +} + +/* Function: core_list_insert + Insert an item to the list + + Parameters: + insert_point - where to insert the item. + info - data for the cell. + memblock - pointer for the list header + datablock - pointer for the list data + memblock_end - end of region for list headers + datablock_end - end of region for list data + + Returns: + Pointer to new item. +*/ +list_head *core_list_insert_new(list_head *insert_point, list_data *info, list_head **memblock, list_data **datablock + , list_head *memblock_end, list_data *datablock_end) { + list_head *newitem; + + if ((*memblock+1) >= memblock_end) + return NULL; + if ((*datablock+1) >= datablock_end) + return NULL; + + newitem=*memblock; + (*memblock)++; + newitem->next=insert_point->next; + insert_point->next=newitem; + + newitem->info=*datablock; + (*datablock)++; + copy_info(newitem->info,info); + + return newitem; +} + +/* Function: core_list_remove + Remove an item from the list. + + Operation: + For a singly linked list, remove by copying the data from the next item + over to the current cell, and unlinking the next item. + + Note: + since there is always a fake item at the end of the list, no need to check for NULL. + + Returns: + Removed item. +*/ +list_head *core_list_remove(list_head *item) { + list_data *tmp; + list_head *ret=item->next; + /* swap data pointers */ + tmp=item->info; + item->info=ret->info; + ret->info=tmp; + /* and eliminate item */ + item->next=item->next->next; + ret->next=NULL; + return ret; +} + +/* Function: core_list_undo_remove + Undo a remove operation. + + Operation: + Since we want each iteration of the benchmark to be exactly the same, + we need to be able to undo a remove. + Link the removed item back into the list, and switch the info items. + + Parameters: + item_removed - Return value from the + item_modified - List item that was modified during + + Returns: + The item that was linked back to the list. + +*/ +list_head *core_list_undo_remove(list_head *item_removed, list_head *item_modified) { + list_data *tmp; + /* swap data pointers */ + tmp=item_removed->info; + item_removed->info=item_modified->info; + item_modified->info=tmp; + /* and insert item */ + item_removed->next=item_modified->next; + item_modified->next=item_removed; + return item_removed; +} + +/* Function: core_list_find + Find an item in the list + + Operation: + Find an item by idx (if not 0) or specific data value + + Parameters: + list - list head + info - idx or data to find + + Returns: + Found item, or NULL if not found. +*/ +list_head *core_list_find(list_head *list,list_data *info) { + if (info->idx>=0) { + while (list && (list->info->idx != info->idx)) + list=list->next; + return list; + } else { + while (list && ((list->info->data16 & 0xff) != info->data16)) + list=list->next; + return list; + } +} +/* Function: core_list_reverse + Reverse a list + + Operation: + Rearrange the pointers so the list is reversed. + + Parameters: + list - list head + info - idx or data to find + + Returns: + Found item, or NULL if not found. +*/ + +list_head *core_list_reverse(list_head *list) { + list_head *next=NULL, *tmp; + while (list) { + tmp=list->next; + list->next=next; + next=list; + list=tmp; + } + return next; +} +/* Function: core_list_mergesort + Sort the list in place without recursion. + + Description: + Use mergesort, as for linked list this is a realistic solution. + Also, since this is aimed at embedded, care was taken to use iterative rather then recursive algorithm. + The sort can either return the list to original order (by idx) , + or use the data item to invoke other other algorithms and change the order of the list. + + Parameters: + list - list to be sorted. + cmp - cmp function to use + + Returns: + New head of the list. + + Note: + We have a special header for the list that will always be first, + but the algorithm could theoretically modify where the list starts. + + */ +list_head *core_list_mergesort(list_head *list, list_cmp cmp, core_results *res) { + list_head *p, *q, *e, *tail; + ee_s32 insize, nmerges, psize, qsize, i; + + insize = 1; + + while (1) { + p = list; + list = NULL; + tail = NULL; + + nmerges = 0; /* count number of merges we do in this pass */ + + while (p) { + nmerges++; /* there exists a merge to be done */ + /* step `insize' places along from p */ + q = p; + psize = 0; + for (i = 0; i < insize; i++) { + psize++; + q = q->next; + if (!q) break; + } + + /* if q hasn't fallen off end, we have two lists to merge */ + qsize = insize; + + /* now we have two lists; merge them */ + while (psize > 0 || (qsize > 0 && q)) { + + /* decide whether next element of merge comes from p or q */ + if (psize == 0) { + /* p is empty; e must come from q. */ + e = q; q = q->next; qsize--; + } else if (qsize == 0 || !q) { + /* q is empty; e must come from p. */ + e = p; p = p->next; psize--; + } else if (cmp(p->info,q->info,res) <= 0) { + /* First element of p is lower (or same); e must come from p. */ + e = p; p = p->next; psize--; + } else { + /* First element of q is lower; e must come from q. */ + e = q; q = q->next; qsize--; + } + + /* add the next element to the merged list */ + if (tail) { + tail->next = e; + } else { + list = e; + } + tail = e; + } + + /* now p has stepped `insize' places along, and q has too */ + p = q; + } + + tail->next = NULL; + + /* If we have done only one merge, we're finished. */ + if (nmerges <= 1) /* allow for nmerges==0, the empty list case */ + return list; + + /* Otherwise repeat, merging lists twice the size */ + insize *= 2; + } +#if COMPILER_REQUIRES_SORT_RETURN + return list; +#endif +} diff --git a/coremark/src/core_main.c b/coremark/src/core_main.c new file mode 100755 index 0000000..ed062fa --- /dev/null +++ b/coremark/src/core_main.c @@ -0,0 +1,339 @@ +/* +Author : Shay Gal-On, EEMBC + +This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009 +All rights reserved. + +EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the +CoreMark License that is distributed with the official EEMBC COREMARK Software release. +If you received this EEMBC CoreMark Software without the accompanying CoreMark License, +you must discontinue use and download the official release from www.coremark.org. + +Also, if you are publicly displaying scores generated from the EEMBC CoreMark software, +make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file. + +EEMBC +4354 Town Center Blvd. Suite 114-200 +El Dorado Hills, CA, 95762 +*/ +/* File: core_main.c + This file contains the framework to acquire a block of memory, seed initial parameters, tun t he benchmark and report the results. +*/ +#include "coremark.h" + +/* Function: iterate + Run the benchmark for a specified number of iterations. + + Operation: + For each type of benchmarked algorithm: + a - Initialize the data block for the algorithm. + b - Execute the algorithm N times. + + Returns: + NULL. +*/ +static ee_u16 list_known_crc[] = {(ee_u16)0xd4b0,(ee_u16)0x3340,(ee_u16)0x6a79,(ee_u16)0xe714,(ee_u16)0xe3c1}; +static ee_u16 matrix_known_crc[] = {(ee_u16)0xbe52,(ee_u16)0x1199,(ee_u16)0x5608,(ee_u16)0x1fd7,(ee_u16)0x0747}; +static ee_u16 state_known_crc[] = {(ee_u16)0x5e47,(ee_u16)0x39bf,(ee_u16)0xe5a4,(ee_u16)0x8e3a,(ee_u16)0x8d84}; +void *iterate(void *pres) { + ee_u32 i; + ee_u16 crc; + core_results *res=(core_results *)pres; + ee_u32 iterations=res->iterations; + res->crc=0; + res->crclist=0; + res->crcmatrix=0; + res->crcstate=0; + + for (i=0; icrc=crcu16(crc,res->crc); + crc=core_bench_list(res,-1); + res->crc=crcu16(crc,res->crc); + if (i==0) res->crclist=res->crc; + } + return NULL; +} + +#if (SEED_METHOD==SEED_ARG) +ee_s32 get_seed_args(int i, int argc, char *argv[]); +#define get_seed(x) (ee_s16)get_seed_args(x,argc,argv) +#define get_seed_32(x) get_seed_args(x,argc,argv) +#else /* via function or volatile */ +ee_s32 get_seed_32(int i); +#define get_seed(x) (ee_s16)get_seed_32(x) +#endif + +#if (MEM_METHOD==MEM_STATIC) +ee_u8 static_memblk[TOTAL_DATA_SIZE]; +#endif +char *mem_name[3] = {"Static","Heap","Stack"}; +/* Function: main + Main entry routine for the benchmark. + This function is responsible for the following steps: + + 1 - Initialize input seeds from a source that cannot be determined at compile time. + 2 - Initialize memory block for use. + 3 - Run and time the benchmark. + 4 - Report results, testing the validity of the output if the seeds are known. + + Arguments: + 1 - first seed : Any value + 2 - second seed : Must be identical to first for iterations to be identical + 3 - third seed : Any value, should be at least an order of magnitude less then the input size, but bigger then 32. + 4 - Iterations : Special, if set to 0, iterations will be automatically determined such that the benchmark will run between 10 to 100 secs + +*/ + +#if MAIN_HAS_NOARGC +MAIN_RETURN_TYPE main(void) { + int argc=0; + char *argv[1]; +#else +MAIN_RETURN_TYPE main(int argc, char *argv[]) { +#endif + ee_u16 i,j=0,num_algorithms=0; + ee_s16 known_id=-1,total_errors=0; + ee_u16 seedcrc=0; + CORE_TICKS total_time; + core_results results[MULTITHREAD]; +#if (MEM_METHOD==MEM_STACK) + ee_u8 stack_memblock[TOTAL_DATA_SIZE*MULTITHREAD]; +#endif + + ioe_init(); + + ee_printf("Running CoreMark for %d iterations\n", ITERATIONS); + + /* first call any initializations needed */ + portable_init(&(results[0].port), &argc, argv); + /* First some checks to make sure benchmark will run ok */ + if (sizeof(struct list_head_s)>128) { + ee_printf("list_head structure too big for comparable data!\n"); + return MAIN_RETURN_VAL; + } + results[0].seed1=get_seed(1); + results[0].seed2=get_seed(2); + results[0].seed3=get_seed(3); + results[0].iterations=get_seed_32(4); +#if CORE_DEBUG + results[0].iterations=1; +#endif + results[0].execs=get_seed_32(5); + if (results[0].execs==0) { /* if not supplied, execute all algorithms */ + results[0].execs=ALL_ALGORITHMS_MASK; + } + /* put in some default values based on one seed only for easy testing */ + if ((results[0].seed1==0) && (results[0].seed2==0) && (results[0].seed3==0)) { /* validation run */ + results[0].seed1=0; + results[0].seed2=0; + results[0].seed3=0x66; + } + if ((results[0].seed1==1) && (results[0].seed2==0) && (results[0].seed3==0)) { /* perfromance run */ + results[0].seed1=0x3415; + results[0].seed2=0x3415; + results[0].seed3=0x66; + } +#if (MEM_METHOD==MEM_STATIC) + results[0].memblock[0]=(void *)static_memblk; + results[0].size=TOTAL_DATA_SIZE; + results[0].err=0; + #if (MULTITHREAD>1) + #error "Cannot use a static data area with multiple contexts!" + #endif +#elif (MEM_METHOD==MEM_MALLOC) + for (i=0 ; i1) + if (default_num_contexts>MULTITHREAD) { + default_num_contexts=MULTITHREAD; + } + for (i=0 ; i=0) { + for (i=0 ; i 0) + ee_printf("Iterations/mSec : %f\n",default_num_contexts*results[0].iterations/time_in_secs(total_time)); +#else + ee_printf("Total time (ms) : %d\n",time_in_secs(total_time)); +#endif + ee_printf("Iterations : %d\n",(int)default_num_contexts*results[0].iterations); + ee_printf("Compiler version : %s\n",COMPILER_VERSION); +#if (MULTITHREAD>1) + ee_printf("Parallel %s : %d\n",PARALLEL_METHOD,default_num_contexts); +#endif + /* output for verification */ + ee_printf("seedcrc : 0x%04x\n",seedcrc); + if (results[0].execs & ID_LIST) + for (i=0 ; i0) + ee_printf("Errors detected\n"); + if (total_errors<0) + ee_printf("Cannot validate operation for these seed values, please compare with results on a known platform.\n"); + +#if (MEM_METHOD==MEM_MALLOC) + for (i=0 ; i>(from)) & (~(0xffffffff << (to)))) + +#if CORE_DEBUG +void printmat(MATDAT *A, ee_u32 N, char *name) { + ee_u32 i,j; + ee_printf("Matrix %s [%dx%d]:\n",name,N,N); + for (i=0; i N times, + changing the matrix values slightly by a constant amount each time. +*/ +ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc) { + ee_u32 N=p->N; + MATRES *C=p->C; + MATDAT *A=p->A; + MATDAT *B=p->B; + MATDAT val=(MATDAT)seed; + + crc=crc16(matrix_test(N,C,A,B,val),crc); + + return crc; +} + +/* Function: matrix_test + Perform matrix manipulation. + + Parameters: + N - Dimensions of the matrix. + C - memory for result matrix. + A - input matrix + B - operator matrix (not changed during operations) + + Returns: + A CRC value that captures all results calculated in the function. + In particular, crc of the value calculated on the result matrix + after each step by . + + Operation: + + 1 - Add a constant value to all elements of a matrix. + 2 - Multiply a matrix by a constant. + 3 - Multiply a matrix by a vector. + 4 - Multiply a matrix by a matrix. + 5 - Add a constant value to all elements of a matrix. + + After the last step, matrix A is back to original contents. +*/ +ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val) { + ee_u16 crc=0; + MATDAT clipval=matrix_big(val); + + matrix_add_const(N,A,val); /* make sure data changes */ +#if CORE_DEBUG + printmat(A,N,"matrix_add_const"); +#endif + matrix_mul_const(N,C,A,val); + crc=crc16(matrix_sum(N,C,clipval),crc); +#if CORE_DEBUG + printmatC(C,N,"matrix_mul_const"); +#endif + matrix_mul_vect(N,C,A,B); + crc=crc16(matrix_sum(N,C,clipval),crc); +#if CORE_DEBUG + printmatC(C,N,"matrix_mul_vect"); +#endif + matrix_mul_matrix(N,C,A,B); + crc=crc16(matrix_sum(N,C,clipval),crc); +#if CORE_DEBUG + printmatC(C,N,"matrix_mul_matrix"); +#endif + matrix_mul_matrix_bitextract(N,C,A,B); + crc=crc16(matrix_sum(N,C,clipval),crc); +#if CORE_DEBUG + printmatC(C,N,"matrix_mul_matrix_bitextract"); +#endif + + matrix_add_const(N,A,-val); /* return matrix to initial value */ + return crc; +} + +/* Function : matrix_init + Initialize the memory block for matrix benchmarking. + + Parameters: + blksize - Size of memory to be initialized. + memblk - Pointer to memory block. + seed - Actual values chosen depend on the seed parameter. + p - pointers to containing initialized matrixes. + + Returns: + Matrix dimensions. + + Note: + The seed parameter MUST be supplied from a source that cannot be determined at compile time +*/ +ee_u32 core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p) { + ee_u32 N=0; + MATDAT *A; + MATDAT *B; + ee_s32 order=1; + MATDAT val; + ee_u32 i=0,j=0; + if (seed==0) + seed=1; + while (jA=A; + p->B=B; + p->C=(MATRES *)align_mem(B+N*N); + p->N=N; +#if CORE_DEBUG + printmat(A,N,"A"); + printmat(B,N,"B"); +#endif + return N; +} + +/* Function: matrix_sum + Calculate a function that depends on the values of elements in the matrix. + + For each element, accumulate into a temporary variable. + + As long as this value is under the parameter clipval, + add 1 to the result if the element is bigger then the previous. + + Otherwise, reset the accumulator and add 10 to the result. +*/ +ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval) { + MATRES tmp=0,prev=0,cur=0; + ee_s16 ret=0; + ee_u32 i,j; + for (i=0; iclipval) { + ret+=10; + tmp=0; + } else { + ret += (cur>prev) ? 1 : 0; + } + prev=cur; + } + } + return ret; +} + +/* Function: matrix_mul_const + Multiply a matrix by a constant. + This could be used as a scaler for instance. +*/ +void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val) { + ee_u32 i,j; + for (i=0; i. + This methodology is taken to accomodate any hardware or simulated platform. + The sample implementation returns millisecs by default, + and the resolution is controlled by +*/ +CORE_TICKS get_time(void) { + return stop_time_val - start_time_val; +} + +/* Function : time_in_secs + Convert the value returned by get_time to seconds. + + The type is used to accomodate systems with no support for floating point. + Default implementation implemented by the EE_TICKS_PER_SEC macro above. +*/ +secs_ret time_in_secs(CORE_TICKS ticks) { + return ticks; +} + +ee_u32 default_num_contexts=1; + +/* Function : portable_init + Target specific initialization code + Test for some common mistakes. +*/ +void portable_init(core_portable *p, int *argc, char *argv[]) +{ + if (sizeof(ee_ptr_int) != sizeof(ee_u8 *)) { + ee_printf("ERROR! Please define ee_ptr_int to a type that holds a pointer!\n"); + } + if (sizeof(ee_u32) != 4) { + ee_printf("ERROR! Please define ee_u32 to a 32b unsigned type!\n"); + } + p->portable_id=1; +} +/* Function : portable_fini + Target specific final code +*/ +void portable_fini(core_portable *p) +{ + p->portable_id=0; +} + + diff --git a/coremark/src/core_state.c b/coremark/src/core_state.c new file mode 100755 index 0000000..ef9e880 --- /dev/null +++ b/coremark/src/core_state.c @@ -0,0 +1,277 @@ +/* +Author : Shay Gal-On, EEMBC + +This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009 +All rights reserved. + +EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the +CoreMark License that is distributed with the official EEMBC COREMARK Software release. +If you received this EEMBC CoreMark Software without the accompanying CoreMark License, +you must discontinue use and download the official release from www.coremark.org. + +Also, if you are publicly displaying scores generated from the EEMBC CoreMark software, +make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file. + +EEMBC +4354 Town Center Blvd. Suite 114-200 +El Dorado Hills, CA, 95762 +*/ +#include "coremark.h" +/* local functions */ +enum CORE_STATE core_state_transition( ee_u8 **instr , ee_u32 *transition_count); + +/* +Topic: Description + Simple state machines like this one are used in many embedded products. + + For more complex state machines, sometimes a state transition table implementation is used instead, + trading speed of direct coding for ease of maintenance. + + Since the main goal of using a state machine in CoreMark is to excercise the switch/if behaviour, + we are using a small moore machine. + + In particular, this machine tests type of string input, + trying to determine whether the input is a number or something else. + (see core_state.png). +*/ + +/* Function: core_bench_state + Benchmark function + + Go over the input twice, once direct, and once after introducing some corruption. +*/ +ee_u16 core_bench_state(ee_u32 blksize, ee_u8 *memblock, + ee_s16 seed1, ee_s16 seed2, ee_s16 step, ee_u16 crc) +{ + ee_u32 final_counts[NUM_CORE_STATES]; + ee_u32 track_counts[NUM_CORE_STATES]; + ee_u8 *p=memblock; + ee_u32 i; + + +#if CORE_DEBUG + ee_printf("State Bench: %d,%d,%d,%04x\n",seed1,seed2,step,crc); +#endif + for (i=0; i0) { + for(i=0;i>3) & 0x3]; + next=4; + break; + case 3: /* float */ + case 4: /* float */ + buf=floatpat[(seed>>3) & 0x3]; + next=8; + break; + case 5: /* scientific */ + case 6: /* scientific */ + buf=scipat[(seed>>3) & 0x3]; + next=8; + break; + case 7: /* invalid */ + buf=errpat[(seed>>3) & 0x3]; + next=8; + break; + default: /* Never happen, just to make some compilers happy */ + break; + } + } + size++; + while (total='0') & (c<='9')) ? 1 : 0; + return retval; +} + +/* Function: core_state_transition + Actual state machine. + + The state machine will continue scanning until either: + 1 - an invalid input is detcted. + 2 - a valid number has been detected. + + The input pointer is updated to point to the end of the token, and the end state is returned (either specific format determined or invalid). +*/ + +enum CORE_STATE core_state_transition( ee_u8 **instr , ee_u32 *transition_count) { + ee_u8 *str=*instr; + ee_u8 NEXT_SYMBOL; + enum CORE_STATE state=CORE_START; + for( ; *str && state != CORE_INVALID; str++ ) { + NEXT_SYMBOL = *str; + if (NEXT_SYMBOL==',') /* end of this input */ { + str++; + break; + } + switch(state) { + case CORE_START: + if(ee_isdigit(NEXT_SYMBOL)) { + state = CORE_INT; + } + else if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) { + state = CORE_S1; + } + else if( NEXT_SYMBOL == '.' ) { + state = CORE_FLOAT; + } + else { + state = CORE_INVALID; + transition_count[CORE_INVALID]++; + } + transition_count[CORE_START]++; + break; + case CORE_S1: + if(ee_isdigit(NEXT_SYMBOL)) { + state = CORE_INT; + transition_count[CORE_S1]++; + } + else if( NEXT_SYMBOL == '.' ) { + state = CORE_FLOAT; + transition_count[CORE_S1]++; + } + else { + state = CORE_INVALID; + transition_count[CORE_S1]++; + } + break; + case CORE_INT: + if( NEXT_SYMBOL == '.' ) { + state = CORE_FLOAT; + transition_count[CORE_INT]++; + } + else if(!ee_isdigit(NEXT_SYMBOL)) { + state = CORE_INVALID; + transition_count[CORE_INT]++; + } + break; + case CORE_FLOAT: + if( NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e' ) { + state = CORE_S2; + transition_count[CORE_FLOAT]++; + } + else if(!ee_isdigit(NEXT_SYMBOL)) { + state = CORE_INVALID; + transition_count[CORE_FLOAT]++; + } + break; + case CORE_S2: + if( NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-' ) { + state = CORE_EXPONENT; + transition_count[CORE_S2]++; + } + else { + state = CORE_INVALID; + transition_count[CORE_S2]++; + } + break; + case CORE_EXPONENT: + if(ee_isdigit(NEXT_SYMBOL)) { + state = CORE_SCIENTIFIC; + transition_count[CORE_EXPONENT]++; + } + else { + state = CORE_INVALID; + transition_count[CORE_EXPONENT]++; + } + break; + case CORE_SCIENTIFIC: + if(!ee_isdigit(NEXT_SYMBOL)) { + state = CORE_INVALID; + transition_count[CORE_INVALID]++; + } + break; + default: + break; + } + } + *instr=str; + return state; +} diff --git a/coremark/src/core_util.c b/coremark/src/core_util.c new file mode 100755 index 0000000..03f0fdc --- /dev/null +++ b/coremark/src/core_util.c @@ -0,0 +1,210 @@ +/* +Author : Shay Gal-On, EEMBC + +This file is part of EEMBC(R) and CoreMark(TM), which are Copyright (C) 2009 +All rights reserved. + +EEMBC CoreMark Software is a product of EEMBC and is provided under the terms of the +CoreMark License that is distributed with the official EEMBC COREMARK Software release. +If you received this EEMBC CoreMark Software without the accompanying CoreMark License, +you must discontinue use and download the official release from www.coremark.org. + +Also, if you are publicly displaying scores generated from the EEMBC CoreMark software, +make sure that you are in compliance with Run and Reporting rules specified in the accompanying readme.txt file. + +EEMBC +4354 Town Center Blvd. Suite 114-200 +El Dorado Hills, CA, 95762 +*/ +#include "coremark.h" +/* Function: get_seed + Get a values that cannot be determined at compile time. + + Since different embedded systems and compilers are used, 3 different methods are provided: + 1 - Using a volatile variable. This method is only valid if the compiler is forced to generate code that + reads the value of a volatile variable from memory at run time. + Please note, if using this method, you would need to modify core_portme.c to generate training profile. + 2 - Command line arguments. This is the preferred method if command line arguments are supported. + 3 - System function. If none of the first 2 methods is available on the platform, + a system function which is not a stub can be used. + + e.g. read the value on GPIO pins connected to switches, or invoke special simulator functions. +*/ +#if (SEED_METHOD==SEED_VOLATILE) + extern volatile ee_s32 seed1_volatile; + extern volatile ee_s32 seed2_volatile; + extern volatile ee_s32 seed3_volatile; + extern volatile ee_s32 seed4_volatile; + extern volatile ee_s32 seed5_volatile; + ee_s32 get_seed_32(int i) { + ee_s32 retval; + switch (i) { + case 1: + retval=seed1_volatile; + break; + case 2: + retval=seed2_volatile; + break; + case 3: + retval=seed3_volatile; + break; + case 4: + retval=seed4_volatile; + break; + case 5: + retval=seed5_volatile; + break; + default: + retval=0; + break; + } + return retval; + } +#elif (SEED_METHOD==SEED_ARG) +ee_s32 parseval(char *valstring) { + ee_s32 retval=0; + ee_s32 neg=1; + int hexmode=0; + if (*valstring == '-') { + neg=-1; + valstring++; + } + if ((valstring[0] == '0') && (valstring[1] == 'x')) { + hexmode=1; + valstring+=2; + } + /* first look for digits */ + if (hexmode) { + while (((*valstring >= '0') && (*valstring <= '9')) || ((*valstring >= 'a') && (*valstring <= 'f'))) { + ee_s32 digit=*valstring-'0'; + if (digit>9) + digit=10+*valstring-'a'; + retval*=16; + retval+=digit; + valstring++; + } + } else { + while ((*valstring >= '0') && (*valstring <= '9')) { + ee_s32 digit=*valstring-'0'; + retval*=10; + retval+=digit; + valstring++; + } + } + /* now add qualifiers */ + if (*valstring=='K') + retval*=1024; + if (*valstring=='M') + retval*=1024*1024; + + retval*=neg; + return retval; +} + +ee_s32 get_seed_args(int i, int argc, char *argv[]) { + if (argc>i) + return parseval(argv[i]); + return 0; +} + +#elif (SEED_METHOD==SEED_FUNC) +/* If using OS based function, you must define and implement the functions below in core_portme.h and core_portme.c ! */ +ee_s32 get_seed_32(int i) { + ee_s32 retval; + switch (i) { + case 1: + retval=portme_sys1(); + break; + case 2: + retval=portme_sys2(); + break; + case 3: + retval=portme_sys3(); + break; + case 4: + retval=portme_sys4(); + break; + case 5: + retval=portme_sys5(); + break; + default: + retval=0; + break; + } + return retval; +} +#endif + +/* Function: crc* + Service functions to calculate 16b CRC code. + +*/ +ee_u16 crcu8(ee_u8 data, ee_u16 crc ) +{ + ee_u8 i=0,x16=0,carry=0; + + for (i = 0; i < 8; i++) + { + x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1)); + data >>= 1; + + if (x16 == 1) + { + crc ^= 0x4002; + carry = 1; + } + else + carry = 0; + crc >>= 1; + if (carry) + crc |= 0x8000; + else + crc &= 0x7fff; + } + return crc; +} +ee_u16 crcu16(ee_u16 newval, ee_u16 crc) { + crc=crcu8( (ee_u8) (newval) ,crc); + crc=crcu8( (ee_u8) ((newval)>>8) ,crc); + return crc; +} +ee_u16 crcu32(ee_u32 newval, ee_u16 crc) { + crc=crc16((ee_s16) newval ,crc); + crc=crc16((ee_s16) (newval>>16) ,crc); + return crc; +} +ee_u16 crc16(ee_s16 newval, ee_u16 crc) { + return crcu16((ee_u16)newval, crc); +} + +ee_u8 check_data_types() { + ee_u8 retval=0; + if (sizeof(ee_u8) != 1) { + ee_printf("ERROR: ee_u8 is not an 8b datatype!\n"); + retval++; + } + if (sizeof(ee_u16) != 2) { + ee_printf("ERROR: ee_u16 is not a 16b datatype!\n"); + retval++; + } + if (sizeof(ee_s16) != 2) { + ee_printf("ERROR: ee_s16 is not a 16b datatype!\n"); + retval++; + } + if (sizeof(ee_s32) != 4) { + ee_printf("ERROR: ee_s32 is not a 32b datatype!\n"); + retval++; + } + if (sizeof(ee_u32) != 4) { + ee_printf("ERROR: ee_u32 is not a 32b datatype!\n"); + retval++; + } + if (sizeof(ee_ptr_int) != sizeof(int *)) { + ee_printf("ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n"); + retval++; + } + if (retval>0) { + ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n"); + } + return retval; +} diff --git a/dhrystone/Makefile b/dhrystone/Makefile new file mode 100644 index 0000000..5021855 --- /dev/null +++ b/dhrystone/Makefile @@ -0,0 +1,3 @@ +NAME = dhrystone +SRCS = dry.c +include $(AM_HOME)/Makefile diff --git a/dhrystone/dry.c b/dhrystone/dry.c new file mode 100644 index 0000000..542e39c --- /dev/null +++ b/dhrystone/dry.c @@ -0,0 +1,950 @@ +/****************** "DHRYSTONE" Benchmark Program ***************************/ +#define Version "C, Version 2.2" +/* File: dhry_1.c (part 2 of 3) + * Author: Reinhold P. Weicker + * Siemens Nixdorf, Paderborn/Germany + * weicker@specbench.org + * Date: May 25, 1988 + * Modified: Steven Pemberton, CWI, Amsterdam; Steven.Pemberton@cwi.nl + * Date: October, 1993; March 1995 + * Included both files into one source, that gets compiled + * in two passes. Made program auto-compiling, and auto-running, + * and generally made it much easier to use. + * + * Original Version (in Ada) published in + * "Communications of the ACM" vol. 27., no. 10 (Oct. 1984), + * pp. 1013 - 1030, together with the statistics + * on which the distribution of statements etc. is based. + * + * In this C version, the following C library functions are used: + * - strcpy, strcmp (inside the measurement loop) + * - printf, scanf (outside the measurement loop) + * In addition, Berkeley UNIX system calls "times ()" or "time ()" + * are used for execution time measurement. For measurements + * on other systems, these calls have to be changed. + * + * Collection of Results: + * Reinhold Weicker (address see above) and + * + * Rick Richardson + * PC Research. Inc. + * 94 Apple Orchard Drive + * Tinton Falls, NJ 07724 + * Phone: (201) 389-8963 (9-17 EST) + * Usenet: ...!uunet!pcrat!rick + * + * Please send results to Rick Richardson and/or Reinhold Weicker. + * Complete information should be given on hardware and software used. + * Hardware information includes: Machine type, CPU, type and size + * of caches; for microprocessors: clock frequency, memory speed + * (number of wait states). + * Software information includes: Compiler (and runtime library) + * manufacturer and version, compilation switches, OS version. + * The Operating System version may give an indication about the compiler; + * Dhrystone itself performs no OS calls in the measurement loop. + * + * The complete output generated by the program should be mailed + * such that at least some checks for correctness can be made. + * + *************************************************************************** + * + * Defines: The following "Defines" are possible: + * -DREG (default: Not defined) + * As an approximation to what an average C programmer + * might do, causes the "register" storage class to be applied + * - for local variables, if they are used (dynamically) + * five or more times + * - for parameters if they are used (dynamically) + * six or more times + * Note that an optimal "register" strategy is + * compiler-dependent, and that "register" declarations + * do not necessarily lead to faster execution. + * -DNOSTRUCTASSIGN (default: Not defined) + * Define if the C compiler does not support + * assignment of structures. + * -DNOENUMS (default: Not defined) + * Define if the C compiler does not support + * enumeration types. + * -DTIMES (default) + * -DTIME + * The "times" function of UNIX (returning process times) + * or the "time" function (returning wallclock time) + * is used for measurement. + * For single user machines, "time ()" is adequate. For + * multi-user machines where you cannot get single-user + * access, use the "times ()" function. If you have + * neither, use a stopwatch in the dead of night. + * "printf"s are provided marking the points "Start Timer" + * and "Stop Timer". DO NOT use the UNIX "time(1)" + * command, as this will measure the total time to + * run this program, which will (erroneously) include + * the time to allocate storage (malloc) and to perform + * the initialization. + * -DHZ=nnn + * In Berkeley UNIX, the function "times" returns process + * time in 1/HZ seconds, with HZ = 60 for most systems. + * CHECK YOUR SYSTEM DESCRIPTION BEFORE YOU JUST APPLY + * A VALUE. + * + *************************************************************************** + * + * History: Version C/2.1 was made for two reasons: + * + * 1) There was an obvious need for a common C version of + * Dhrystone, since C is at present the most popular system + * programming language for the class of processors + * (microcomputers, minicomputers) where Dhrystone is used most. + * There should be, as far as possible, only one C version of + * Dhrystone such that results can be compared without + * restrictions. In the past, the C versions distributed + * by Rick Richardson (Version 1.1) and by Reinhold Weicker + * had small (though not significant) differences. + * + * 2) As far as it is possible without changes to the Dhrystone + * statistics, optimizing compilers should be prevented from + * removing significant statements. + * + * This C version has been developed in cooperation with + * Rick Richardson (Tinton Falls, NJ), it incorporates many + * ideas from the "Version 1.1" distributed previously by + * him over the UNIX network Usenet. + * I also thank Chaim Benedelac (National Semiconductor), + * David Ditzel (SUN), Earl Killian and John Mashey (MIPS), + * Alan Smith and Rafael Saavedra-Barrera (UC at Berkeley) + * for their help with comments on earlier versions of the + * benchmark. + * + * Changes: In the initialization part, this version follows mostly + * Rick Richardson's version distributed via Usenet, not the + * version distributed earlier via floppy disk by Reinhold Weicker. + * As a concession to older compilers, names have been made + * unique within the first 8 characters. + * Inside the measurement loop, this version follows the + * version previously distributed by Reinhold Weicker. + * + * At several places in the benchmark, code has been added, + * but within the measurement loop only in branches that + * are not executed. The intention is that optimizing compilers + * should be prevented from moving code out of the measurement + * loop, or from removing code altogether. Since the statements + * that are executed within the measurement loop have NOT been + * changed, the numbers defining the "Dhrystone distribution" + * (distribution of statements, operand types and locality) + * still hold. Except for sophisticated optimizing compilers, + * execution times for this version should be the same as + * for previous versions. + * + * Since it has proven difficult to subtract the time for the + * measurement loop overhead in a correct way, the loop check + * has been made a part of the benchmark. This does have + * an impact - though a very minor one - on the distribution + * statistics which have been updated for this version. + * + * All changes within the measurement loop are described + * and discussed in the companion paper "Rationale for + * Dhrystone version 2". + * + * Because of the self-imposed limitation that the order and + * distribution of the executed statements should not be + * changed, there are still cases where optimizing compilers + * may not generate code for some statements. To a certain + * degree, this is unavoidable for small synthetic benchmarks. + * Users of the benchmark are advised to check code listings + * whether code is generated for all statements of Dhrystone. + * + * Version 2.1 is identical to version 2.0 distributed via + * the UNIX network Usenet in March 1988 except that it corrects + * some minor deficiencies that were found by users of version 2.0. + * The only change within the measurement loop is that a + * non-executed "else" part was added to the "if" statement in + * Func_3, and a non-executed "else" part removed from Proc_3. + * + * Version C/2.2, Steven Pemberton, October 1993 + * Functionally, identical to version 2.2; the changes are in + * how you compile and use it: + * - Everything is in one file now, but compiled in 2 passes + * - Compile (and run) by running the file through the shell: 'sh dhry.c" + * - Uses the system definition of HZ if one can be found + * - HZ must be defined, otherwise it won't compile (no defaults here) + * - The (uninteresting) output is printed to stderr (dhry2 > /dev/null) + * - The number of loops is passed as a parameter, rather than read + * (dhry2 500000) + * - If the number of loops is insufficient to get a good result, + * it repeats it with loops*10 until it is enough (rather than just + * stopping) + * - Output says which sort of clock it is using, and the HZ value + * - You can use -DREG instead of the -DREG=register of previous versions + * - Some stylistic cleanups. + * + *************************************************************************** + * + * Compilation model and measurement (IMPORTANT): + * + * The following "ground rules" apply for measurements: + * - Separate compilation + * - No procedure merging + * - Otherwise, compiler optimizations are allowed but should be indicated + * - Default results are those without register declarations + * See the companion paper "Rationale for Dhrystone Version 2" for a more + * detailed discussion of these ground rules. + * + * For 16-Bit processors (e.g. 80186, 80286), times for all compilation + * models ("small", "medium", "large" etc.) should be given if possible, + * together with a definition of these models for the compiler system used. + * + ************************************************************************** + * + * Dhrystone (C version) statistics: + * + * [Comment from the first distribution, updated for version 2. + * Note that because of language differences, the numbers are slightly + * different from the Ada version.] + * + * The following program contains statements of a high level programming + * language (here: C) in a distribution considered representative: + * + * assignments 52 (51.0 %) + * control statements 33 (32.4 %) + * procedure, function calls 17 (16.7 %) + * + * 103 statements are dynamically executed. The program is balanced with + * respect to the three aspects: + * + * - statement type + * - operand type + * - operand locality + * operand global, local, parameter, or constant. + * + * The combination of these three aspects is balanced only approximately. + * + * 1. Statement Type: + * ----------------- number + * + * V1 = V2 9 + * (incl. V1 = F(..) + * V = Constant 12 + * Assignment, 7 + * with array element + * Assignment, 6 + * with record component + * -- + * 34 34 + * + * X = Y +|-|"&&"|"|" Z 5 + * X = Y +|-|"==" Constant 6 + * X = X +|- 1 3 + * X = Y *|/ Z 2 + * X = Expression, 1 + * two operators + * X = Expression, 1 + * three operators + * -- + * 18 18 + * + * if .... 14 + * with "else" 7 + * without "else" 7 + * executed 3 + * not executed 4 + * for ... 7 | counted every time + * while ... 4 | the loop condition + * do ... while 1 | is evaluated + * switch ... 1 + * break 1 + * declaration with 1 + * initialization + * -- + * 34 34 + * + * P (...) procedure call 11 + * user procedure 10 + * library procedure 1 + * X = F (...) + * function call 6 + * user function 5 + * library function 1 + * -- + * 17 17 + * --- + * 103 + * + * The average number of parameters in procedure or function calls + * is 1.82 (not counting the function values aX * + * + * 2. Operators + * ------------ + * number approximate + * percentage + * + * Arithmetic 32 50.8 + * + * + 21 33.3 + * - 7 11.1 + * * 3 4.8 + * / (int div) 1 1.6 + * + * Comparison 27 42.8 + * + * == 9 14.3 + * /= 4 6.3 + * > 1 1.6 + * < 3 4.8 + * >= 1 1.6 + * <= 9 14.3 + * + * Logic 4 6.3 + * + * && (AND-THEN) 1 1.6 + * | (OR) 1 1.6 + * ! (NOT) 2 3.2 + * + * -- ----- + * 63 100.1 + * + * + * 3. Operand Type (counted once per operand reference): + * --------------- + * number approximate + * percentage + * + * Integer 175 72.3 % + * Character 45 18.6 % + * Pointer 12 5.0 % + * String30 6 2.5 % + * Array 2 0.8 % + * Record 2 0.8 % + * --- ------- + * 242 100.0 % + * + * When there is an access path leading to the final operand (e.g. a record + * component), only the final data type on the access path is counted. + * + * + * 4. Operand Locality: + * ------------------- + * number approximate + * percentage + * + * local variable 114 47.1 % + * global variable 22 9.1 % + * parameter 45 18.6 % + * value 23 9.5 % + * reference 22 9.1 % + * function result 6 2.5 % + * constant 55 22.7 % + * --- ------- + * 242 100.0 % + * + * The program does not compute anything meaningful, but it is syntactically + * and semantically correct. All variables have a value assigned to them + * before they are used as a source operand. + * + * There has been no explicit effort to account for the effects of a + * cache, or to balance the use of long or short displacements for code or + * data. + * + *************************************************************************** + */ + +/* Compiler and system dependent definitions: */ + +/* variables for time measurement: */ + +#include +#include +#include + +static uint32_t uptime_ms() { return io_read(AM_TIMER_UPTIME).us / 1000; } +#define Start_Timer() Begin_Time = uptime_ms() +#define Stop_Timer() End_Time = uptime_ms() + +#define NUMBER_OF_RUNS 500000 /* Default number of runs */ +#define PASS2 + +#ifdef NOSTRUCTASSIGN +#define structassign(d, s) memcpy(&(d), &(s), sizeof(d)) +#else +#define structassign(d, s) d = s +#endif + +#ifdef NOENUM +#define Ident_1 0 +#define Ident_2 1 +#define Ident_3 2 +#define Ident_4 3 +#define Ident_5 4 + typedef int Enumeration; +#else + typedef enum {Ident_1, Ident_2, Ident_3, Ident_4, Ident_5} + Enumeration; +#endif + /* for boolean and enumeration types in Ada, Pascal */ + +/* General definitions: */ + + +#define Null 0 + /* Value of a Null pointer */ + +typedef int One_Thirty; +typedef int One_Fifty; +typedef char Capital_Letter; +typedef int Boolean; +typedef char Str_30 [31]; +typedef int Arr_1_Dim [50]; +typedef int Arr_2_Dim [50] [50]; + +typedef struct record + { + struct record *Ptr_Comp; + Enumeration Discr; + union { + struct { + Enumeration Enum_Comp; + int Int_Comp; + char Str_Comp [31]; + } var_1; + struct { + Enumeration E_Comp_2; + char Str_2_Comp [31]; + } var_2; + struct { + char Ch_1_Comp; + char Ch_2_Comp; + } var_3; + } variant; + } Rec_Type, *Rec_Pointer; + +/* Global Variables: */ + +Rec_Pointer Ptr_Glob, + Next_Ptr_Glob; +int Int_Glob; +Boolean Bool_Glob; +char Ch_1_Glob, + Ch_2_Glob; +int Arr_1_Glob [50]; +int Arr_2_Glob [50] [50]; + +Enumeration Func_1 (); + /* forward declaration necessary since Enumeration may not simply be int */ + +#ifndef REG + Boolean Reg = false; +#define REG + /* REG becomes defined as empty */ + /* i.e. no register variables */ +#else + Boolean Reg = true; +#undef REG +#define REG register +#endif + +Boolean Done; + +long Begin_Time, + End_Time, + User_Time; +float Microseconds, + Dhrystones_Per_Second; + +/* end of variables for time measurement */ + +static char memory[1024]; +static char *free_mem = &memory[0]; + +static char* myalloc(size_t size) { + while ((unsigned long)free_mem % 4 != 0) free_mem ++; + char *ret = free_mem; + free_mem += size; + return ret; +} + +void Proc_6 (Enumeration, Enumeration*); +void Proc_3 (Rec_Pointer*); +void Proc_7 (One_Fifty a, One_Fifty b, One_Fifty* c); +Boolean Func_2 (Str_30, Str_30); +void Proc_8(Arr_1_Dim, Arr_2_Dim, int, int); +Boolean Func_3 (Enumeration); + +void Proc_1 (Ptr_Val_Par) +/******************/ + +REG Rec_Pointer Ptr_Val_Par; + /* executed once */ +{ + REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp; + /* == Ptr_Glob_Next */ + /* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */ + /* corresponds to "rename" in Ada, "with" in Pascal */ + + structassign (*Ptr_Val_Par->Ptr_Comp, *Ptr_Glob); + Ptr_Val_Par->variant.var_1.Int_Comp = 5; + Next_Record->variant.var_1.Int_Comp + = Ptr_Val_Par->variant.var_1.Int_Comp; + Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp; + Proc_3 (&Next_Record->Ptr_Comp); + /* Ptr_Val_Par->Ptr_Comp->Ptr_Comp + == Ptr_Glob->Ptr_Comp */ + if (Next_Record->Discr == Ident_1) + /* then, executed */ + { + Next_Record->variant.var_1.Int_Comp = 6; + Proc_6 (Ptr_Val_Par->variant.var_1.Enum_Comp, + &Next_Record->variant.var_1.Enum_Comp); + Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp; + Proc_7 (Next_Record->variant.var_1.Int_Comp, 10, + &Next_Record->variant.var_1.Int_Comp); + } + else /* not executed */ + structassign (*Ptr_Val_Par, *Ptr_Val_Par->Ptr_Comp); +} /* Proc_1 */ + + +void Proc_2 (Int_Par_Ref) +/******************/ + /* executed once */ + /* *Int_Par_Ref == 1, becomes 4 */ + +One_Fifty *Int_Par_Ref; +{ + One_Fifty Int_Loc; + Enumeration Enum_Loc; + + Int_Loc = *Int_Par_Ref + 10; + do /* executed once */ + if (Ch_1_Glob == 'A') + /* then, executed */ + { + Int_Loc -= 1; + *Int_Par_Ref = Int_Loc - Int_Glob; + Enum_Loc = Ident_1; + } /* if */ + while (Enum_Loc != Ident_1); /* true */ +} /* Proc_2 */ + + +void Proc_3 (Ptr_Ref_Par) +/******************/ + /* executed once */ + /* Ptr_Ref_Par becomes Ptr_Glob */ + +Rec_Pointer *Ptr_Ref_Par; + +{ + if (Ptr_Glob != Null) + /* then, executed */ + *Ptr_Ref_Par = Ptr_Glob->Ptr_Comp; + Proc_7 (10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp); +} /* Proc_3 */ + + +void Proc_4 () /* without parameters */ +/*******/ + /* executed once */ +{ + Boolean Bool_Loc; + + Bool_Loc = Ch_1_Glob == 'A'; + Bool_Glob = Bool_Loc | Bool_Glob; + Ch_2_Glob = 'B'; +} /* Proc_4 */ + + +void Proc_5 () /* without parameters */ +/*******/ + /* executed once */ +{ + Ch_1_Glob = 'A'; + Bool_Glob = false; +} /* Proc_5 */ + + + /* Procedure for the assignment of structures, */ + /* if the C compiler doesn't support this feature */ +#ifdef NOSTRUCTASSIGN +memcpy (d, s, l) +register char *d; +register char *s; +register int l; +{ + while (l--) *d++ = *s++; +} +#endif + + +#ifndef REG +#define REG + /* REG becomes defined as empty */ + /* i.e. no register variables */ +#else +#undef REG +#define REG register +#endif + +extern int Int_Glob; +extern char Ch_1_Glob; + + +void Proc_6 (Enum_Val_Par, Enum_Ref_Par) +/*********************************/ + /* executed once */ + /* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */ + +Enumeration Enum_Val_Par; +Enumeration *Enum_Ref_Par; +{ + *Enum_Ref_Par = Enum_Val_Par; + if (! Func_3 (Enum_Val_Par)) + /* then, not executed */ + *Enum_Ref_Par = Ident_4; + switch (Enum_Val_Par) + { + case Ident_1: + *Enum_Ref_Par = Ident_1; + break; + case Ident_2: + if (Int_Glob > 100) + /* then */ + *Enum_Ref_Par = Ident_1; + else *Enum_Ref_Par = Ident_4; + break; + case Ident_3: /* executed */ + *Enum_Ref_Par = Ident_2; + break; + case Ident_4: break; + case Ident_5: + *Enum_Ref_Par = Ident_3; + break; + } /* switch */ +} /* Proc_6 */ + + +void Proc_7 (One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val, One_Fifty *Int_Par_Ref) +{ + One_Fifty Int_Loc; + + Int_Loc = Int_1_Par_Val + 2; + *Int_Par_Ref = Int_2_Par_Val + Int_Loc; +} /* Proc_7 */ + + +void Proc_8 (Arr_1_Par_Ref, Arr_2_Par_Ref, Int_1_Par_Val, Int_2_Par_Val) +/*********************************************************************/ + /* executed once */ + /* Int_Par_Val_1 == 3 */ + /* Int_Par_Val_2 == 7 */ +Arr_1_Dim Arr_1_Par_Ref; +Arr_2_Dim Arr_2_Par_Ref; +int Int_1_Par_Val; +int Int_2_Par_Val; +{ + REG One_Fifty Int_Index; + REG One_Fifty Int_Loc; + + Int_Loc = Int_1_Par_Val + 5; + Arr_1_Par_Ref [Int_Loc] = Int_2_Par_Val; + Arr_1_Par_Ref [Int_Loc+1] = Arr_1_Par_Ref [Int_Loc]; + Arr_1_Par_Ref [Int_Loc+30] = Int_Loc; + for (Int_Index = Int_Loc; Int_Index <= Int_Loc+1; ++Int_Index) + Arr_2_Par_Ref [Int_Loc] [Int_Index] = Int_Loc; + Arr_2_Par_Ref [Int_Loc] [Int_Loc-1] += 1; + Arr_2_Par_Ref [Int_Loc+20] [Int_Loc] = Arr_1_Par_Ref [Int_Loc]; + Int_Glob = 5; +} /* Proc_8 */ + + +Enumeration Func_1 (Ch_1_Par_Val, Ch_2_Par_Val) +/*************************************************/ + /* executed three times */ + /* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */ + /* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */ + /* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */ + +Capital_Letter Ch_1_Par_Val; +Capital_Letter Ch_2_Par_Val; +{ + Capital_Letter Ch_1_Loc; + Capital_Letter Ch_2_Loc; + + Ch_1_Loc = Ch_1_Par_Val; + Ch_2_Loc = Ch_1_Loc; + if (Ch_2_Loc != Ch_2_Par_Val) + /* then, executed */ + return (Ident_1); + else /* not executed */ + { + Ch_1_Glob = Ch_1_Loc; + return (Ident_2); + } +} /* Func_1 */ + + +Boolean Func_2 (Str_1_Par_Ref, Str_2_Par_Ref) +/*************************************************/ + /* executed once */ + /* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */ + /* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */ + +Str_30 Str_1_Par_Ref; +Str_30 Str_2_Par_Ref; +{ + REG One_Thirty Int_Loc; + Capital_Letter Ch_Loc; + + Int_Loc = 2; + while (Int_Loc <= 2) /* loop body executed once */ + if (Func_1 (Str_1_Par_Ref[Int_Loc], + Str_2_Par_Ref[Int_Loc+1]) == Ident_1) + /* then, executed */ + { + Ch_Loc = 'A'; + Int_Loc += 1; + } /* if, while */ + if (Ch_Loc >= 'W' && Ch_Loc < 'Z') + /* then, not executed */ + Int_Loc = 7; + if (Ch_Loc == 'R') { + /* then, not executed */ + return (true); + } + else /* executed */ + { + if (strcmp (Str_1_Par_Ref, Str_2_Par_Ref) > 0) + { + Int_Loc += 7; + Int_Glob = Int_Loc; + return (true); + } + else /* executed */ + return (false); + } /* if Ch_Loc */ +} /* Func_2 */ + + +Boolean Func_3 (Enum_Par_Val) +/***************************/ + /* executed once */ + /* Enum_Par_Val == Ident_3 */ +Enumeration Enum_Par_Val; +{ + Enumeration Enum_Loc; + + Enum_Loc = Enum_Par_Val; + if (Enum_Loc == Ident_3) + /* then, executed */ + return (true); + else /* not executed */ + return (false); +} /* Func_3 */ + + +Boolean pass = true; +Boolean check(int cond) { + if (!cond) pass = false; + return cond; +} +int main () +/*****/ + + /* main program, corresponds to procedures */ + /* Main and Proc_0 in the Ada version */ +{ + One_Fifty Int_1_Loc; + REG One_Fifty Int_2_Loc; + One_Fifty Int_3_Loc; + REG char Ch_Index; + Enumeration Enum_Loc; + Str_30 Str_1_Loc; + Str_30 Str_2_Loc; + REG int Run_Index; + REG int Number_Of_Runs; + + ioe_init(); + + Number_Of_Runs = NUMBER_OF_RUNS; + + /* Initializations */ + + Next_Ptr_Glob = (Rec_Pointer) myalloc (sizeof (Rec_Type)); + Ptr_Glob = (Rec_Pointer) myalloc (sizeof (Rec_Type)); + + Ptr_Glob->Ptr_Comp = Next_Ptr_Glob; + Ptr_Glob->Discr = Ident_1; + Ptr_Glob->variant.var_1.Enum_Comp = Ident_3; + Ptr_Glob->variant.var_1.Int_Comp = 40; + strcpy (Ptr_Glob->variant.var_1.Str_Comp, + "DHRYSTONE PROGRAM, SOME STRING"); + strcpy (Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING"); + + Arr_2_Glob [8][7] = 10; + /* Was missing in published program. Without this statement, */ + /* Arr_2_Glob [8][7] would have an undefined value. */ + /* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */ + /* overflow may occur for this array element. */ + + printf ("Dhrystone Benchmark, Version %s\n", Version); + + Done = false; + while (!Done) { + + printf ("Trying %d runs through Dhrystone.\n", Number_Of_Runs); + + /***************/ + /* Start timer */ + /***************/ + + Start_Timer(); + + for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index) + { + + Proc_5(); + Proc_4(); + /* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */ + Int_1_Loc = 2; + Int_2_Loc = 3; + strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING"); + Enum_Loc = Ident_2; + Bool_Glob = ! Func_2 (Str_1_Loc, Str_2_Loc); + /* Bool_Glob == 1 */ + while (Int_1_Loc < Int_2_Loc) /* loop body executed once */ + { + Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc; + /* Int_3_Loc == 7 */ + Proc_7 (Int_1_Loc, Int_2_Loc, &Int_3_Loc); + /* Int_3_Loc == 7 */ + Int_1_Loc += 1; + } /* while */ + /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */ + Proc_8 (Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc); + /* Int_Glob == 5 */ + Proc_1 (Ptr_Glob); + for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index) + /* loop body executed twice */ + { + if (Enum_Loc == Func_1 (Ch_Index, 'C')) + /* then, not executed */ + { + Proc_6 (Ident_1, &Enum_Loc); + strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING"); + Int_2_Loc = Run_Index; + Int_Glob = Run_Index; + } + } + /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */ + Int_2_Loc = Int_2_Loc * Int_1_Loc; + Int_1_Loc = Int_2_Loc / Int_3_Loc; + Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc; + /* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */ + Proc_2 (&Int_1_Loc); + /* Int_1_Loc == 5 */ + + } /* loop "for Run_Index" */ + + /**************/ + /* Stop timer */ + /**************/ + + Stop_Timer(); + + User_Time = End_Time - Begin_Time; + + Done = true; + } + + if (!check(Int_Glob == 5)) { + printf("Int_Glob: %d\n", Int_Glob); + printf(" should be: %d\n", 5); + } + if (!check(Bool_Glob == 1)) { + printf("Bool_Glob: %d\n", Bool_Glob); + printf(" should be: %d\n", 1); + } + if (!check(Ch_1_Glob == 'A')) { + printf("Ch_1_Glob: %c\n", Ch_1_Glob); + printf(" should be: %c\n", 'A'); + } + if (!check(Ch_2_Glob == 'B')) { + printf("Ch_2_Glob: %c\n", Ch_2_Glob); + printf(" should be: %c\n", 'B'); + } + if (!check(Arr_1_Glob[8] == 7)) { + printf("Arr_1_Glob[8]: %d\n", Arr_1_Glob[8]); + printf(" should be: %d\n", 7); + } + if (!check(Arr_2_Glob[8][7] == Number_Of_Runs + 10)) { + printf("Arr_2_Glob[8][7]: %d\n", Arr_2_Glob[8][7]); + printf(" should be: Number_Of_Runs + 10\n"); + } + + if (!check((int)Ptr_Glob->Discr == 0)) { + printf("Ptr_Glob->Discr: %d\n", Ptr_Glob->Discr); + printf(" should be: %d\n", 0); + } + if (!check(Ptr_Glob->variant.var_1.Enum_Comp == 2)) { + printf("Ptr_Glob->Enum_Comp: %d\n", Ptr_Glob->variant.var_1.Enum_Comp); + printf(" should be: %d\n", 2); + } + if (!check(Ptr_Glob->variant.var_1.Int_Comp == 17)) { + printf("Ptr_Glob->Int_Comp: %d\n", Ptr_Glob->variant.var_1.Int_Comp); + printf(" should be: %d\n", 17); + } + if (!check(strcmp(Ptr_Glob->variant.var_1.Str_Comp, "DHRYSTONE PROGRAM, SOME STRING") == 0)) { + printf("Ptr_Glob->Str_Comp: %s\n", Ptr_Glob->variant.var_1.Str_Comp); + printf(" should be: DHRYSTONE PROGRAM, SOME STRING\n"); + } + + if (!check((int)Next_Ptr_Glob->Discr == 0)) { + printf("Next_Ptr_Glob->Discr: %d\n", Next_Ptr_Glob->Discr); + printf(" should be: %d\n", 0); + } + if (!check(Next_Ptr_Glob->variant.var_1.Enum_Comp == 1)) { + printf("Next_Ptr_Glob->Enum_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp); + printf(" should be: %d\n", 1); + } + if (!check(Next_Ptr_Glob->variant.var_1.Int_Comp == 18)) { + printf("Next_Ptr_Glob->Int_Comp: %d\n", Next_Ptr_Glob->variant.var_1.Int_Comp); + printf(" should be: %d\n", 18); + } + if (!check(strcmp(Next_Ptr_Glob->variant.var_1.Str_Comp, "DHRYSTONE PROGRAM, SOME STRING") == 0)) { + printf("Next_Ptr_Glob->Str_Comp: %s\n", Next_Ptr_Glob->variant.var_1.Str_Comp); + printf(" should be: DHRYSTONE PROGRAM, SOME STRING\n"); + } + + if (!check(Int_1_Loc == 5)) { + printf("Int_1_Loc: %d\n", Int_1_Loc); + printf(" should be: %d\n", 5); + } + if (!check(Int_2_Loc == 13)) { + printf("Int_2_Loc: %d\n", Int_2_Loc); + printf(" should be: %d\n", 13); + } + if (!check(Int_3_Loc == 7)) { + printf("Int_3_Loc: %d\n", Int_3_Loc); + printf(" should be: %d\n", 7); + } + if (!check(Enum_Loc == 1)) { + printf("Enum_Loc: %d\n", Enum_Loc); + printf(" should be: %d\n", 1); + } + + if (!check(strcmp(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING") == 0)) { + printf("Str_1_Loc: %s\n", Str_1_Loc); + printf(" should be: DHRYSTONE PROGRAM, 1'ST STRING\n"); + } + if (!check(strcmp(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING") == 0)) { + printf("Str_2_Loc: %s\n", Str_2_Loc); + printf(" should be: DHRYSTONE PROGRAM, 2'ND STRING\n"); + } + + printf ("Finished in %d ms\n", (int)User_Time); + printf("==================================================\n"); + printf("Dhrystone %s %d Marks\n", pass ? "PASS" : "FAIL", + 880900 / (int)User_Time * NUMBER_OF_RUNS/ 500000); + printf(" vs. 100000 Marks (i7-7700K @ 4.20GHz)\n"); + + return 0; +} + + diff --git a/microbench/Makefile b/microbench/Makefile new file mode 100644 index 0000000..d815e4f --- /dev/null +++ b/microbench/Makefile @@ -0,0 +1,3 @@ +NAME = microbench +SRCS = $(shell find -L ./src/ -name "*.c" -o -name "*.cc") +include $(AM_HOME)/Makefile diff --git a/microbench/include/benchmark.h b/microbench/include/benchmark.h new file mode 100644 index 0000000..48a9d9f --- /dev/null +++ b/microbench/include/benchmark.h @@ -0,0 +1,113 @@ +#ifndef __BENCHMARK_H__ +#define __BENCHMARK_H__ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define MB * 1024 * 1024 +#define KB * 1024 + +#define REF_CPU "i7-7700K @ 4.20GHz" +#define REF_SCORE 100000 + +#define REPEAT 1 + +// size | heap | time | checksum +#define QSORT_S { 100, 1 KB, 0, 0x08467105} +#define QSORT_M { 30000, 128 KB, 0, 0xa3e99fe4} +#define QSORT_L { 100000, 640 KB, 5114, 0xed8cff89} +#define QUEEN_S { 8, 0 KB, 0, 0x0000005c} +#define QUEEN_M { 11, 0 KB, 0, 0x00000a78} +#define QUEEN_L { 12, 0 KB, 4707, 0x00003778} +#define BF_S { 4, 32 KB, 0, 0xa6f0079e} +#define BF_M { 25, 32 KB, 0, 0xa88f8a65} +#define BF_L { 180, 32 KB, 23673, 0x9221e2b3} +#define FIB_S { 2, 1 KB, 0, 0x7cfeddf0} +#define FIB_M { 23, 16 KB, 0, 0x94ad8800} +#define FIB_L { 91, 256 KB, 28318, 0xebdc5f80} +#define SIEVE_S { 100, 1 KB, 0, 0x00000019} +#define SIEVE_M { 200000, 32 KB, 0, 0x00004640} +#define SIEVE_L {10000000, 2 MB, 39361, 0x000a2403} +#define PZ15_S { 0, 1 KB, 0, 0x00000006} +#define PZ15_M { 1, 256 KB, 0, 0x0000b0df} +#define PZ15_L { 2, 2 MB, 4486, 0x00068b8c} +#define DINIC_S { 10, 8 KB, 0, 0x0000019c} +#define DINIC_M { 80, 512 KB, 0, 0x00004f99} +#define DINIC_L { 128, 1 MB, 10882, 0x0000c248} +#define LZIP_S { 128, 128 KB, 0, 0xe05fc832} +#define LZIP_M { 50000, 1 MB, 0, 0xdc93e90c} +#define LZIP_L { 1048576, 4 MB, 7593, 0x8d62c81f} +#define SSORT_S { 100, 4 KB, 0, 0x4c555e09} +#define SSORT_M { 10000, 512 KB, 0, 0x0db7909b} +#define SSORT_L { 100000, 4 MB, 4504, 0x4f0ab431} +#define MD5_S { 100, 1 KB, 0, 0xf902f28f} +#define MD5_M { 200000, 256 KB, 0, 0xd4f9bc6d} +#define MD5_L {10000000, 16 MB, 17239, 0x27286a42} + +#define BENCHMARK_LIST(def) \ + def(qsort, "qsort", QSORT_S, QSORT_M, QSORT_L, "Quick sort") \ + def(queen, "queen", QUEEN_S, QUEEN_M, QUEEN_L, "Queen placement") \ + def( bf, "bf", BF_S, BF_M, BF_L, "Brainf**k interpreter") \ + def( fib, "fib", FIB_S, FIB_M, FIB_L, "Fibonacci number") \ + def(sieve, "sieve", SIEVE_S, SIEVE_M, SIEVE_L, "Eratosthenes sieve") \ + def( 15pz, "15pz", PZ15_S, PZ15_M, PZ15_L, "A* 15-puzzle search") \ + def(dinic, "dinic", DINIC_S, DINIC_M, DINIC_L, "Dinic's maxflow algorithm") \ + def( lzip, "lzip", LZIP_S, LZIP_M, LZIP_L, "Lzip compression") \ + def(ssort, "ssort", SSORT_S, SSORT_M, SSORT_L, "Suffix sort") \ + def( md5, "md5", MD5_S, MD5_M, MD5_L, "MD5 digest") \ + +// Each benchmark will run REPEAT times + +#define DECL(_name, _sname, _s, _m, _l, _desc) \ + void bench_##_name##_prepare(); \ + void bench_##_name##_run(); \ + int bench_##_name##_validate(); + +BENCHMARK_LIST(DECL) + +typedef struct Setting { + int size; + unsigned long mlim, ref; + uint32_t checksum; +} Setting; + +typedef struct Benchmark { + void (*prepare)(); + void (*run)(); + int (*validate)(); + const char *name, *desc; + Setting settings[3]; +} Benchmark; + +extern Benchmark *current; +extern Setting *setting; + +typedef struct Result { + int pass; + unsigned long tsc, msec; +} Result; + +void prepare(Result *res); +void done(Result *res); + +// memory allocation +void* bench_alloc(size_t size); +void bench_free(void *ptr); + +// random number generator +void bench_srand(uint32_t seed); +uint32_t bench_rand(); // return a random number between 0..32767 + +// checksum +uint32_t checksum(void *start, void *end); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/microbench/src/15pz/15pz.cc b/microbench/src/15pz/15pz.cc new file mode 100644 index 0000000..3274699 --- /dev/null +++ b/microbench/src/15pz/15pz.cc @@ -0,0 +1,88 @@ +#include +#include "puzzle.h" +#include "heap.h" + +const int N = 4; + +static int PUZZLE_S[N*N] = { + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 0, 11, + 13, 14, 15, 12, +}; + +static int PUZZLE_M[N*N] = { + 1, 2, 3, 4, + 5, 6, 7, 8, + 12, 0, 14, 13, + 11, 15, 10, 9, +}; + +static int PUZZLE_L[N*N] = { + 0, 2, 3, 4, + 9, 6, 7, 8, + 5, 11, 10, 12, + 1, 15, 13, 14, +}; + +static int ans; + +extern "C" { + +void bench_15pz_prepare() { +} + +void bench_15pz_run() { + N_puzzle puzzle; + int MAXN; + + switch (setting->size) { + case 0: puzzle = N_puzzle(PUZZLE_S); MAXN = 10; break; + case 1: puzzle = N_puzzle(PUZZLE_M); MAXN = 2048; break; + case 2: puzzle = N_puzzle(PUZZLE_L); MAXN = 16384; break; + default: assert(0); + } + assert(puzzle.solvable()); + + auto *heap = (Updatable_heap> *) bench_alloc(sizeof(Updatable_heap>)); + heap->init(MAXN); + heap->push( puzzle, 0 ); + + int n = 0; + ans = -1; + + while( heap->size() != 0 && n != MAXN ) { + N_puzzle top = heap->pop(); + ++n; + + if ( top == N_puzzle::solution() ) { + // We are done + ans = heap->length(top) * n; + return; + } + + if ( top.tile_left_possible() ) { + heap->push( top.tile_left(), heap->length( top ) + 1 ); + } + + if ( top.tile_right_possible() ) { + heap->push( top.tile_right(), heap->length( top ) + 1 ); + } + + if ( top.tile_up_possible() ) { + heap->push( top.tile_up(), heap->length( top ) + 1 ); + } + + if ( top.tile_down_possible() ) { + heap->push( top.tile_down(), heap->length( top ) + 1 ); + } + } +} + + +int bench_15pz_validate() { + return (uint32_t)ans == setting->checksum; +} + +} + diff --git a/microbench/src/15pz/heap.h b/microbench/src/15pz/heap.h new file mode 100644 index 0000000..9fa1e6a --- /dev/null +++ b/microbench/src/15pz/heap.h @@ -0,0 +1,227 @@ +// Author: Douglas Wilhelm Harder +// Copyright (c) 2009 by Douglas Wilhelm Harder. All rights reserved. + +template +T max(T a, T b) { + return a > b ? a : b; +} + +template +class Updatable_heap { + private: + int M; + class Step; + Step **hash_table; + Step **heap; + int heap_size; + int maximum_heap_size; + + void inline swap( int, int ); + void percolate_down(); + void percolate_up( int ); + Step *pointer( T const & ) const; + + public: + void init(int m); + ~Updatable_heap(); + T pop(); + void push( T const &, int ); + int size() const; + int maximum_size() const; + int length( T const & ) const; +}; + +template +class Updatable_heap::Step { + public: + T element; + Step *next; + int heap_index; + int path_length; + int path_weight; + bool visited; + Step *previous_step; + + void init( T const &, Step *, int, int ); + int length() const; + int weight() const; +}; + +template +void Updatable_heap::init(int m) { + M = m; + heap = (Step **)bench_alloc(sizeof(void *) * M); + hash_table = (Step **)bench_alloc(sizeof(void *) * (M + 1)); + + heap_size = 0; + maximum_heap_size = 0; + for ( int i = 0; i < M; ++i ) { + hash_table[i] = 0; + } +} + +template +Updatable_heap::~Updatable_heap() { + for ( int i = 0; i < M; ++i ) { + Step *ptr = hash_table[i]; + + while ( ptr != 0 ) { + Step *tmp = ptr; + ptr = ptr->next; + } + } +} + +template +T Updatable_heap::pop() { + if ( size() == 0 ) { + return T(); + } + + T top = heap[1]->element; + + if ( size() == 1 ) { + heap_size = 0; + } else { + assert( size() > 1 ); + + heap[1] = heap[size()]; + heap[1]->heap_index = 1; + + --heap_size; + percolate_down(); + } + + return top; +} + +template +void inline Updatable_heap::swap( int i, int j ) { + Step *tmp = heap[j]; + heap[j] = heap[i]; + heap[i] = tmp; + + heap[i]->heap_index = i; + heap[j]->heap_index = j; +} + +template +void Updatable_heap::percolate_down() { + int n = 1; + + while ( 2*n + 1 <= size() ) { + if ( heap[n]->weight() < heap[2*n]->weight() && heap[n]->weight() < heap[2*n + 1]->weight() ) { + return; + } + + if ( heap[2*n]->weight() < heap[2*n + 1]->weight() ) { + swap( n, 2*n ); + n = 2*n; + } else { + assert( heap[2*n]->weight() >= heap[2*n + 1]->weight() ); + + swap( n, 2*n + 1 ); + n = 2*n + 1; + } + } + + if ( 2*n == size() && heap[2*n]->weight() < heap[n]->weight() ) { + swap( n, 2*n ); + } +} + +template +void Updatable_heap::percolate_up( int n ) { + while ( n != 1 ) { + int parent = n/2; + + if ( heap[parent]->weight() > heap[n]->weight() ) { + swap( parent, n ); + n = parent; + } else { + return; + } + } +} + +template +void Updatable_heap::push( T const &pz, int path_length ) { + Step *ptr = pointer( pz ); + + if ( ptr == 0 ) { + assert( heap_size <= M ); + ++heap_size; + + Step *ptr = (Step*)bench_alloc(sizeof(Step)); + ptr->init( pz, hash_table[pz.hash() & (M - 1)], size(), path_length ); + hash_table[pz.hash() & (M - 1)] = ptr; + heap[size()] = ptr; + + percolate_up( size() ); + + maximum_heap_size = max( maximum_heap_size, size() ); + } else { + if ( !ptr->visited ) { + if ( path_length + ptr->element.lower_bound() < ptr->weight() ) { + ptr->path_weight = path_length + ptr->element.lower_bound(); + percolate_up( ptr->heap_index ); + } + } + } +} + +template +int Updatable_heap::size() const { + return heap_size; +} + +template +int Updatable_heap::maximum_size() const { + return maximum_heap_size; +} + +template +int Updatable_heap::length( T const &pz ) const { + Step *ptr = pointer( pz ); + + return ( ptr == 0 ) ? 2147483647 : ptr->length(); +} + +template +typename Updatable_heap::Step *Updatable_heap::pointer( T const &pz ) const { + for ( Step *ptr = hash_table[pz.hash() & (M - 1)]; ptr != 0; ptr = ptr->next ) { + if ( ptr->element == pz ) { + return ptr; + } + } + + return 0; +} + +/**************************************************** + * ************************************************ * + * * Iterator * * + * ************************************************ * + ****************************************************/ + +template +void Updatable_heap::Step::init( T const &pz, Step *n, int hi, int dist ) { + element = pz; + next = n; + heap_index = hi; + path_length = dist; + path_weight = dist + element.lower_bound(); + visited = false; + previous_step = 0; +} + +template +int Updatable_heap::Step::length() const { + return path_length; +} + +template +int Updatable_heap::Step::weight() const { + return path_weight; +} + diff --git a/microbench/src/15pz/puzzle.h b/microbench/src/15pz/puzzle.h new file mode 100644 index 0000000..c0607d5 --- /dev/null +++ b/microbench/src/15pz/puzzle.h @@ -0,0 +1,475 @@ +// Author: Douglas Wilhelm Harder +// Copyright (c) 2009 by Douglas Wilhelm Harder. All rights reserved. +// Url: https://ece.uwaterloo.ca/~dwharder/aads/Algorithms/N_puzzles/ + +template +class N_puzzle { + private: + bool puzzle_valid; + uint8_t zero_i, zero_j; + int8_t manhattan_distance; + int8_t puzzle[N][N]; + int hash_value; + + void determine_hash(); + + static int abs( int n ) { return ( n < 0 ) ? -n : n; } + + public: + N_puzzle(); + N_puzzle( int array[N*N] ); + N_puzzle( N_puzzle const & ); + N_puzzle &operator=( N_puzzle const & ); + + bool solvable() const; + bool valid() const; + int lower_bound() const; + unsigned int hash() const; + + bool tile_up_possible() const; + bool tile_down_possible() const; + bool tile_left_possible() const; + bool tile_right_possible() const; + + N_puzzle tile_up() const; + N_puzzle tile_down() const; + N_puzzle tile_left() const; + N_puzzle tile_right() const; + + bool operator==( N_puzzle const & ) const; + bool operator!=( N_puzzle const & ) const; + + N_puzzle static solution(); +}; + +template < int N > +N_puzzle::N_puzzle(): +puzzle_valid( true ), +manhattan_distance( 0 ) { + int array[N*N]; + + for ( int i = 0; i < N*N; ++i ) { + array[i] = i; + } + + int n = 0; + + for ( int i = 0; i < N; ++i ) { + for ( int j = 0; j < N; ++j ) { + int k = bench_rand() % (N*N - n); + puzzle[i][j] = array[k]; + + if ( array[k] == 0 ) { + zero_i = i; + zero_j = j; + } else { + manhattan_distance += abs( ((array[k] - 1) / N) - i ); + manhattan_distance += abs( ((array[k] - 1) % N) - j ); + } + + ++n; + array[k] = array[N*N - n]; + } + } + + determine_hash(); +} + +template < int N > +N_puzzle::N_puzzle( int array[N*N] ): +puzzle_valid( true ), +manhattan_distance( 0 ) { + bool check[N*N]; + + for ( int i = 0; i < N*N; ++i ) { + check[i] = false; + } + + int n = 0; + + for ( int i = 0; i < N; ++i ) { + for ( int j = 0; j < N; ++j ) { + puzzle[i][j] = array[n]; + check[array[n]] = true; + + if ( array[n] == 0 ) { + zero_i = i; + zero_j = j; + } else { + manhattan_distance += abs( ((array[n] - 1) / N) - i ); + manhattan_distance += abs( ((array[n] - 1) % N) - j ); + } + + ++n; + } + } + + for ( int i = 0; i < N*N; ++i ) { + if ( !check[i] ) { + puzzle_valid = false; + return; + } + } + + determine_hash(); +} + +/* + * Determine a hash value for the puzzle. + */ + +template < int N > +void N_puzzle::determine_hash() { + hash_value = 0; + + for ( int i = 0; i < N; ++i ) { + for ( int j = 0; j < N; ++j ) { + hash_value = hash_value*1973 + puzzle[i][j]; + } + } +} + +template < int N > +N_puzzle::N_puzzle( N_puzzle const &pz ): +puzzle_valid( pz.puzzle_valid ), +zero_i( pz.zero_i ), +zero_j( pz.zero_j ), +manhattan_distance( pz.manhattan_distance ), +hash_value( pz.hash_value ) { + for ( int i = 0; i < N; ++i ) { + for ( int j = 0; j < N; ++j ) { + puzzle[i][j] = pz.puzzle[i][j]; + } + } +} + +template < int N > +N_puzzle &N_puzzle::operator=( N_puzzle const &rhs ) { + puzzle_valid = rhs.puzzle_valid; + zero_i = rhs.zero_i; + zero_j = rhs.zero_j; + manhattan_distance = rhs.manhattan_distance; + hash_value = rhs.hash_value; + + for ( int i = 0; i < N; ++i ) { + for ( int j = 0; j < N; ++j ) { + puzzle[i][j] = rhs.puzzle[i][j]; + } + } + return *this; +} + + +/* + * Moving a tile up is possible as long as + * the blank is not in the last row. + */ + +template +bool N_puzzle::tile_up_possible() const { + return puzzle_valid && (zero_i != N - 1); +} + +/* + * Moving a tile down is possible as long as + * the blank is not in the first row. + */ + +template +bool N_puzzle::tile_down_possible() const { + return puzzle_valid && (zero_i != 0); +} + +/* + * Moving a tile left is possible as long as + * the blank is not in the last column. + */ + +template +bool N_puzzle::tile_left_possible() const { + return puzzle_valid && (zero_j != N - 1); +} + +/* + * Moving a tile right is possible as long as + * the blank is not in the first column. + */ + +template +bool N_puzzle::tile_right_possible() const { + return puzzle_valid && (zero_j != 0); +} + +template +N_puzzle N_puzzle::tile_up() const { + if ( !puzzle_valid ) { + return *this; + } + + N_puzzle result( *this ); + + if ( zero_i == N - 1 ) { + result.puzzle_valid = false; + return result; + } + + result.manhattan_distance += + abs( ((puzzle[zero_i + 1][zero_j] - 1) / N) - zero_i ) - + abs( ((puzzle[zero_i + 1][zero_j] - 1) / N) - (zero_i + 1) ); + + result.puzzle[zero_i][zero_j] = puzzle[zero_i + 1][zero_j]; + ++result.zero_i; + result.puzzle[result.zero_i][zero_j] = 0; + + result.determine_hash(); + + return result; +} + +template +N_puzzle N_puzzle::tile_down() const { + if ( !puzzle_valid ) { + return *this; + } + + N_puzzle result( *this ); + + if ( zero_i == 0 ) { + result.puzzle_valid = false; + return result; + } + + result.manhattan_distance += + abs( ((puzzle[zero_i - 1][zero_j] - 1) / N) - zero_i ) - + abs( ((puzzle[zero_i - 1][zero_j] - 1) / N) - (zero_i - 1) ); + + result.puzzle[zero_i][zero_j] = puzzle[zero_i - 1][zero_j]; + --result.zero_i; + result.puzzle[result.zero_i][zero_j] = 0; + + result.determine_hash(); + + return result; +} + +template +N_puzzle N_puzzle::tile_left() const { + if ( !puzzle_valid ) { + return *this; + } + + N_puzzle result( *this ); + + if ( zero_j == N - 1 ) { + result.puzzle_valid = false; + return result; + } + + result.manhattan_distance += + abs( ((puzzle[zero_i][zero_j + 1] - 1) % N) - zero_j ) - + abs( ((puzzle[zero_i][zero_j + 1] - 1) % N) - (zero_j + 1) ); + + result.puzzle[zero_i][zero_j] = puzzle[zero_i][zero_j + 1]; + ++result.zero_j; + result.puzzle[zero_i][result.zero_j] = 0; + + result.determine_hash(); + + return result; +} + +template +N_puzzle N_puzzle::tile_right() const { + if ( !puzzle_valid ) { + return *this; + } + + N_puzzle result( *this ); + + if ( zero_j == 0 ) { + result.puzzle_valid = false; + return result; + } + + result.manhattan_distance += + abs( ((puzzle[zero_i][zero_j - 1] - 1) % N) - zero_j ) - + abs( ((puzzle[zero_i][zero_j - 1] - 1) % N) - (zero_j - 1) ); + + result.puzzle[zero_i][zero_j] = puzzle[zero_i][zero_j - 1]; + --result.zero_j; + result.puzzle[zero_i][result.zero_j] = 0; + + result.determine_hash(); + + return result; +} + +/* + * Check if the puzzle is solvable: that is, check the + * number of inversions pluse the Manhattan distance of + * the black from the lower-right corner. + * + * Run time: O(n^2) + * Memory: O(n) + */ + +template +bool N_puzzle::solvable() const { + if ( !valid() ) { + return false; + } + + int entries[N*N]; + + for ( int i = 0; i < N; ++i ) { + for ( int j = 0; j < N; ++j ) { + if ( puzzle[i][j] == 0 ) { + entries[N*i + j] = N*N; + } else { + entries[N*i + j] = puzzle[i][j]; + } + } + } + + int parity = 0; + + for ( int i = 0; i < N*N; ++i ) { + for ( int j = i + 1; j < N*N; ++j ) { + if ( entries[i] > entries[j] ) { + ++parity; + } + } + } + + parity += 2*N - 2 - zero_i - zero_j; + + return ( (parity & 1) == 0 ); +} + +template +bool N_puzzle::valid() const { + return puzzle_valid; +} + +/* + * Return either the Manhattan, Hamming, or discrete distance + * between the puzzle and the solution. + */ + +template +int N_puzzle::lower_bound() const { + // The Manhattan distance + return valid() ? manhattan_distance : N*N*N; + + int result = 0; + int count = 1; + + for ( int i = 0; i < N; ++i ) { + for ( int j = 0; j < N; ++j ) { + if ( puzzle[i][j] != (count % N*N) ) { + ++result; + } + + ++count; + } + } + + // The Hamming distance, or + return result; + + // The discrete distance: converts the A* search to Dijkstra's algorithm + // return ( result == 0 ) ? 0 : 1; +} + +/* + * puzzle1 == puzzle2 + * + * Two puzzles are considered to be equal if their entries + * are equal: + * If either puzzle is not valid, return false. + * If the hash values are different, they are different; return false. + * Otherwise, check all entries to see if they are the same. + */ + +template < int N > +bool N_puzzle::operator==( N_puzzle const &rhs ) const { + if ( !valid() || !rhs.valid() || hash() != rhs.hash() ) { + return false; + } + + for ( int i = 0; i < N; ++i ) { + for ( int j = 0; j < N; ++j ) { + if ( puzzle[i][j] != rhs.puzzle[i][j] ) { + return false; + } + } + } + return true; +} + +/* + * puzzle1 != puzzle2 + * + * Two puzzles are considered to be unequal if any of the entries + * different: + * If either puzzle is not valid, return false. + * If the hash values are different, they are different; return true. + * Otherwise, check all entries to see if they are the same. + */ + +template < int N > +bool N_puzzle::operator!=( N_puzzle const &rhs ) const { + if ( !valid() || !rhs.valid() ) { + return false; + } + + if ( hash() != rhs.hash() ) { + return true; + } + + for ( int i = 0; i < N; ++i ) { + for ( int j = 0; j < N; ++j ) { + if ( puzzle[i][j] != rhs.puzzle[i][j] ) { + return true; + } + } + } + + return false; +} + +/* + * unsigned int hash() const + * + * Returns the pre-calculated hash value. + */ + +template < int N > +unsigned int N_puzzle::hash() const { + return valid() ? hash_value : 0; +} + +/* + * N_puzzle solution() + * + * Returns the correct solution to the N puzzle: + * + * 1 2 3 1 2 3 4 + * 3x3: 4 5 6 4x4: 5 6 7 8 + * 7 8 9 10 11 12 + * 13 14 15 + */ + +template +N_puzzle N_puzzle::solution() { + int array[N*N]; + + for ( int i = 0; i < N*N - 1; ++i ) { + array[i] = i + 1; + } + + array[N*N - 1] = 0; + + return N_puzzle( array ); +} + diff --git a/microbench/src/bench.c b/microbench/src/bench.c new file mode 100644 index 0000000..e34ca74 --- /dev/null +++ b/microbench/src/bench.c @@ -0,0 +1,181 @@ +#include +#include +#include +#include + +Benchmark *current; +Setting *setting; + +static char *hbrk; + +static uint32_t uptime_ms() { return io_read(AM_TIMER_UPTIME).us / 1000; } + +// The benchmark list + +#define ENTRY(_name, _sname, _s, _m, _l, _desc) \ + { .prepare = bench_##_name##_prepare, \ + .run = bench_##_name##_run, \ + .validate = bench_##_name##_validate, \ + .name = _sname, \ + .desc = _desc, \ + .settings = {_s, _m, _l}, }, + +Benchmark benchmarks[] = { + BENCHMARK_LIST(ENTRY) +}; + +// Running a benchmark +static void bench_prepare(Result *res) { + res->msec = uptime_ms(); +} + +static void bench_reset() { + hbrk = (void *)ROUNDUP(heap.start, 8); +} + +static void bench_done(Result *res) { + res->msec = uptime_ms() - res->msec; +} + +static const char *bench_check(Benchmark *bench) { + uintptr_t freesp = (uintptr_t)heap.end - (uintptr_t)heap.start; + if (freesp < setting->mlim) { + return "(insufficient memory)"; + } + return NULL; +} + +static void run_once(Benchmark *b, Result *res) { + bench_reset(); // reset malloc state + current->prepare(); // call bechmark's prepare function + bench_prepare(res); // clean everything, start timer + current->run(); // run it + bench_done(res); // collect results + res->pass = current->validate(); +} + +static unsigned long score(Benchmark *b, unsigned long tsc, unsigned long msec) { + if (msec == 0) return 0; + return (REF_SCORE / 1000) * setting->ref / msec; +} + +int main(const char *args) { + const char *setting_name = args; + if (args == NULL || strcmp(args, "") == 0) { + printf("Empty mainargs. Use \"ref\" by default\n"); + setting_name = "ref"; + } + int setting_id = -1; + + if (strcmp(setting_name, "test" ) == 0) setting_id = 0; + else if (strcmp(setting_name, "train") == 0) setting_id = 1; + else if (strcmp(setting_name, "ref" ) == 0) setting_id = 2; + else { + printf("Invalid mainargs: \"%s\"; " + "must be in {test, train, ref}\n", setting_name); + halt(1); + } + + ioe_init(); + + printf("======= Running MicroBench [input *%s*] =======\n", setting_name); + + unsigned long bench_score = 0; + int pass = 1; + uint32_t t0 = uptime_ms(); + + for (int i = 0; i < LENGTH(benchmarks); i ++) { + Benchmark *bench = &benchmarks[i]; + current = bench; + setting = &bench->settings[setting_id]; + const char *msg = bench_check(bench); + printf("[%s] %s: ", bench->name, bench->desc); + if (msg != NULL) { + printf("Ignored %s\n", msg); + } else { + unsigned long msec = ULONG_MAX; + int succ = 1; + for (int i = 0; i < REPEAT; i ++) { + Result res; + run_once(bench, &res); + printf(res.pass ? "*" : "X"); + succ &= res.pass; + if (res.msec < msec) msec = res.msec; + } + + if (succ) printf(" Passed."); + else printf(" Failed."); + + pass &= succ; + + unsigned long cur = score(bench, 0, msec); + + printf("\n"); + if (setting_id != 0) { + printf(" min time: %d ms [%d]\n", (unsigned int)msec, (unsigned int)cur); + } + + bench_score += cur; + } + } + uint32_t t1 = uptime_ms(); + + bench_score /= LENGTH(benchmarks); + + printf("==================================================\n"); + printf("MicroBench %s", pass ? "PASS" : "FAIL"); + if (setting_id == 2) { + printf(" %d Marks\n", (unsigned int)bench_score); + printf(" vs. %d Marks (%s)\n", REF_SCORE, REF_CPU); + } else { + printf("\n"); + } + printf("Total time: %d ms\n", t1 - t0); + return 0; +} + +// Libraries + +void* bench_alloc(size_t size) { + size = (size_t)ROUNDUP(size, 8); + char *old = hbrk; + hbrk += size; + assert((uintptr_t)heap.start <= (uintptr_t)hbrk && (uintptr_t)hbrk < (uintptr_t)heap.end); + for (uint64_t *p = (uint64_t *)old; p != (uint64_t *)hbrk; p ++) { + *p = 0; + } + assert((uintptr_t)hbrk - (uintptr_t)heap.start <= setting->mlim); + return old; +} + +void bench_free(void *ptr) { +} + +static uint32_t seed = 1; + +void bench_srand(uint32_t _seed) { + seed = _seed & 0x7fff; +} + +uint32_t bench_rand() { + seed = (seed * (uint32_t)214013L + (uint32_t)2531011L); + return (seed >> 16) & 0x7fff; +} + +// FNV hash +uint32_t checksum(void *start, void *end) { + const uint32_t x = 16777619; + uint32_t h1 = 2166136261u; + for (uint8_t *p = (uint8_t*)start; p + 4 < (uint8_t*)end; p += 4) { + for (int i = 0; i < 4; i ++) { + h1 = (h1 ^ p[i]) * x; + } + } + int32_t hash = (uint32_t)h1; + hash += hash << 13; + hash ^= hash >> 7; + hash += hash << 3; + hash ^= hash >> 17; + hash += hash << 5; + return hash; +} diff --git a/microbench/src/bf/bf.c b/microbench/src/bf/bf.c new file mode 100644 index 0000000..f07ba1e --- /dev/null +++ b/microbench/src/bf/bf.c @@ -0,0 +1,151 @@ +/* + Brainfuck-C ( http://github.com/kgabis/brainfuck-c ) + Copyright (c) 2012 Krzysztof Gabis + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#include + +static int ARR_SIZE; + +#define CODE ">>+>>>>>,[>+>>,]>+[--[+<<<-]<[<+>-]<[<[->[<<<+>>>>+<-]<<[>>+>[->]<<[<]" \ + "<-]>]>>>+<[[-]<[>+<-]<]>[[>>>]+<<<-<[<<[<<<]>>+>[>>>]<-]<<[<<<]>[>>[>>" \ + ">]<+<<[<<<]>-]]+<<<]+[->>>]>>]>>[.>>>]" + +#define OP_END 0 +#define OP_INC_DP 1 +#define OP_DEC_DP 2 +#define OP_INC_VAL 3 +#define OP_DEC_VAL 4 +#define OP_OUT 5 +#define OP_IN 6 +#define OP_JMP_FWD 7 +#define OP_JMP_BCK 8 + +#define SUCCESS 0 +#define FAILURE 1 + +#define PROGRAM_SIZE 4096 +#define STACK_SIZE 512 +#define DATA_SIZE 4096 + +#define STACK_PUSH(A) (STACK[SP++] = A) +#define STACK_POP() (STACK[--SP]) +#define STACK_EMPTY() (SP == 0) +#define STACK_FULL() (SP == STACK_SIZE) + +struct instruction_t { + unsigned short operator; + unsigned short operand; +}; + +static struct instruction_t *PROGRAM; +static unsigned short *STACK; +static unsigned int SP; +static const char *code; +static char *input; + +static int compile_bf() { + unsigned short pc = 0, jmp_pc; + for (; *code; code ++) { + int c = *code; + if (pc >= PROGRAM_SIZE) break; + switch (c) { + case '>': PROGRAM[pc].operator = OP_INC_DP; break; + case '<': PROGRAM[pc].operator = OP_DEC_DP; break; + case '+': PROGRAM[pc].operator = OP_INC_VAL; break; + case '-': PROGRAM[pc].operator = OP_DEC_VAL; break; + case '.': PROGRAM[pc].operator = OP_OUT; break; + case ',': PROGRAM[pc].operator = OP_IN; break; + case '[': + PROGRAM[pc].operator = OP_JMP_FWD; + if (STACK_FULL()) { + return FAILURE; + } + STACK_PUSH(pc); + break; + case ']': + if (STACK_EMPTY()) { + return FAILURE; + } + jmp_pc = STACK_POP(); + PROGRAM[pc].operator = OP_JMP_BCK; + PROGRAM[pc].operand = jmp_pc; + PROGRAM[jmp_pc].operand = pc; + break; + default: pc--; break; + } + pc++; + } + if (!STACK_EMPTY() || pc == PROGRAM_SIZE) { + return FAILURE; + } + PROGRAM[pc].operator = OP_END; + return SUCCESS; +} + +static unsigned short *data; +static char *output; +static int noutput; + +static void execute_bf() { + unsigned int pc = 0, ptr = 0; + while (PROGRAM[pc].operator != OP_END && ptr < DATA_SIZE) { + switch (PROGRAM[pc].operator) { + case OP_INC_DP: ptr++; break; + case OP_DEC_DP: ptr--; break; + case OP_INC_VAL: data[ptr]++; break; + case OP_DEC_VAL: data[ptr]--; break; + case OP_OUT: output[noutput ++] = data[ptr]; break; + case OP_IN: data[ptr] = *(input ++); break; + case OP_JMP_FWD: if(!data[ptr]) { pc = PROGRAM[pc].operand; } break; + case OP_JMP_BCK: if(data[ptr]) { pc = PROGRAM[pc].operand; } break; + default: return; + } + pc++; + } +} + +void bench_bf_prepare() { + ARR_SIZE = setting->size; + SP = 0; + PROGRAM = bench_alloc(sizeof(PROGRAM[0]) * PROGRAM_SIZE); + STACK = bench_alloc(sizeof(STACK[0]) * STACK_SIZE); + data = bench_alloc(sizeof(data[0]) * DATA_SIZE); + code = CODE; + input = bench_alloc(ARR_SIZE + 1); + output = bench_alloc(DATA_SIZE); + noutput = 0; + + bench_srand(1); + for (int i = 0; i < ARR_SIZE; i ++) { + input[i] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"[bench_rand() % 62]; + } +} + +void bench_bf_run() { + compile_bf(); + execute_bf(); +} + +int bench_bf_validate() { + uint32_t cs = checksum(output, output + noutput); + return noutput == ARR_SIZE && cs == setting->checksum; +} diff --git a/microbench/src/dinic/dinic.cc b/microbench/src/dinic/dinic.cc new file mode 100644 index 0000000..926458c --- /dev/null +++ b/microbench/src/dinic/dinic.cc @@ -0,0 +1,138 @@ +#include + +static int N; +const int INF = 0x3f3f3f; + +struct Edge { + int from, to, cap, flow; + Edge(){} + Edge(int from, int to, int cap, int flow) { + this->from = from; + this->to = to; + this->cap = cap; + this->flow = flow; + } +}; + +template +static inline T min(T x, T y) { + return x < y ? x : y; +} + +struct Dinic { + int n, m, s, t; + Edge *edges; + int *head, *nxt, *d, *cur, *queue; + bool *vis; + + void init(int n) { + int nold = (n - 2) / 2; + int maxm = (nold * nold + nold * 2) * 2; + + edges = (Edge *)bench_alloc(sizeof(Edge) * maxm); + head = (int *)bench_alloc(sizeof(int) * n); + nxt = (int *)bench_alloc(sizeof(int) * maxm); + vis = (bool *)bench_alloc(sizeof(bool) * n); + d = (int *)bench_alloc(sizeof(int) * n); + cur = (int *)bench_alloc(sizeof(int) * n); + queue = (int *)bench_alloc(sizeof(int) * n); + + this->n = n; + for (int i = 0; i < n; i ++) { + head[i] = -1; + } + m = 0; + } + + void AddEdge(int u, int v, int c) { + if (c == 0) return; + edges[m] = Edge(u, v, c, 0); + nxt[m] = head[u]; + head[u] = m++; + edges[m] = Edge(v, u, 0, 0); + nxt[m] = head[v]; + head[v] = m++; + } + + bool BFS() { + for (int i = 0; i < n; i ++) vis[i] = 0; + int qf = 0, qr = 0; + queue[qr ++] = s; + d[s] = 0; + vis[s] = 1; + while (qf != qr) { + int x = queue[qf ++]; + for (int i = head[x]; i != -1; i = nxt[i]) { + Edge& e = edges[i]; + if (!vis[e.to] && e.cap > e.flow) { + vis[e.to] = 1; + d[e.to] = d[x] + 1; + queue[qr ++] = e.to; + } + } + } + return vis[t]; + } + + int DFS(int x, int a) { + if (x == t || a == 0) return a; + int flow = 0, f; + for (int i = cur[x]; i != -1; i = nxt[i]) { + Edge& e = edges[i]; + if (d[x] + 1 == d[e.to] && (f = DFS(e.to, min(a, e.cap-e.flow))) > 0) { + e.flow += f; + edges[i^1].flow -= f; + flow += f; + a -= f; + if (a == 0) break; + } + } + return flow; + } + + int Maxflow(int s, int t) { + this -> s = s; this -> t = t; + int flow = 0; + while (BFS()) { + for (int i = 0; i < n; i++) + cur[i] = head[i]; + flow += DFS(s, INF); + } + return flow; + } +}; + + +extern "C" { + + +static Dinic *G; +static int ans; + +void bench_dinic_prepare() { + N = setting->size; + bench_srand(1); + int s = 2 * N, t = 2 * N + 1; + G = (Dinic*)bench_alloc(sizeof(Dinic)); + G->init(2 * N + 2); + for (int i = 0; i < N; i ++) + for (int j = 0; j < N; j ++) { + G->AddEdge(i, N + j, bench_rand() % 10); + } + + for (int i = 0; i < N; i ++) { + G->AddEdge(s, i, bench_rand() % 1000); + G->AddEdge(N + i, t, bench_rand() % 1000); + } +} + +void bench_dinic_run() { + ans = G->Maxflow(2 * N, 2 * N + 1); +} + +int bench_dinic_validate() { + return (uint32_t)ans == setting->checksum; +} +} + + diff --git a/microbench/src/fib/fib.c b/microbench/src/fib/fib.c new file mode 100644 index 0000000..b96be1f --- /dev/null +++ b/microbench/src/fib/fib.c @@ -0,0 +1,64 @@ +#include + +// f(n) = (f(n-1) + f(n-2) + .. f(n-m)) mod 2^32 + +#define N 2147483603 +static int M; + +static void put(uint32_t *m, int i, int j, uint32_t data) { + m[i * M + j] = data; +} + +static uint32_t get(uint32_t *m, int i, int j) { + return m[i * M + j]; +} + +static inline void mult(uint32_t *c, uint32_t *a, uint32_t *b) { + for (int i = 0; i < M; i ++) + for (int j = 0; j < M; j ++) { + put(c, i, j, 0); + for (int k = 0; k < M; k ++) { + put(c, i, j, get(c, i, j) + get(a, i, k) * get(b, k, j)); + } + } +} + +static inline void assign(uint32_t *a, uint32_t *b) { + for (int i = 0; i < M; i ++) + for (int j = 0; j < M; j ++) + put(a, i, j, get(b, i, j)); +} + +static uint32_t *A, *ans, *T, *tmp; + +void bench_fib_prepare() { + M = setting->size; + int sz = sizeof(uint32_t) * M * M; + A = bench_alloc(sz); + T = bench_alloc(sz); + ans = bench_alloc(sz); + tmp = bench_alloc(sz); +} + +void bench_fib_run() { + for (int i = 0; i < M; i ++) + for (int j = 0; j < M; j ++) { + uint32_t x = (i == M - 1 || j == i + 1); + put(A, i, j, x); + put(T, i, j, x); + put(ans, i, j, i == j); + } + + for (int n = N; n > 0; n >>= 1) { + if (n & 1) { + mult(tmp, ans, T); + assign(ans, tmp); + } + mult(tmp, T, T); + assign(T, tmp); + } +} + +int bench_fib_validate() { + return get(ans, M-1, M-1) == setting->checksum; +} diff --git a/microbench/src/lzip/lzip.c b/microbench/src/lzip/lzip.c new file mode 100644 index 0000000..f55765b --- /dev/null +++ b/microbench/src/lzip/lzip.c @@ -0,0 +1,29 @@ +#include "quicklz.h" +#include + +static int SIZE; + +static qlz_state_compress *state; +static char *blk; +static char *compress; +static int len; + +void bench_lzip_prepare() { + SIZE = setting->size; + bench_srand(1); + state = bench_alloc(sizeof(qlz_state_compress)); + blk = bench_alloc(SIZE); + compress = bench_alloc(SIZE + 400); + for (int i = 0; i < SIZE; i ++) { + blk[i] = 'a' + bench_rand() % 26; + } +} + +void bench_lzip_run() { + len = qlz_compress(blk, compress, SIZE, state); +} + +int bench_lzip_validate() { + return checksum(compress, compress + len) == setting->checksum; +} + diff --git a/microbench/src/lzip/quicklz.c b/microbench/src/lzip/quicklz.c new file mode 100644 index 0000000..9200abe --- /dev/null +++ b/microbench/src/lzip/quicklz.c @@ -0,0 +1,761 @@ +// Fast data compression library +// Copyright (C) 2006-2011 Lasse Mikkel Reinhold +// lar@quicklz.com +// +// QuickLZ can be used for free under the GPL 1, 2 or 3 license (where anything +// released into public must be open source) or under a commercial license if such +// has been acquired (see http://www.quicklz.com/order.html). The commercial license +// does not cover derived or ported versions created by third parties under GPL. + +// 1.5.0 final + +#include "quicklz.h" + +#if QLZ_VERSION_MAJOR != 1 || QLZ_VERSION_MINOR != 5 || QLZ_VERSION_REVISION != 0 + #error quicklz.c and quicklz.h have different versions +#endif + +#define MINOFFSET 2 +#define UNCONDITIONAL_MATCHLEN 6 +#define UNCOMPRESSED_END 4 +#define CWORD_LEN 4 + +#if QLZ_COMPRESSION_LEVEL == 1 && defined QLZ_PTR_64 && QLZ_STREAMING_BUFFER == 0 + #define OFFSET_BASE source + #define CAST (ui32)(size_t) +#else + #define OFFSET_BASE 0 + #define CAST +#endif + +int qlz_get_setting(int setting) +{ + switch (setting) + { + case 0: return QLZ_COMPRESSION_LEVEL; + case 1: return sizeof(qlz_state_compress); + case 2: return sizeof(qlz_state_decompress); + case 3: return QLZ_STREAMING_BUFFER; +#ifdef QLZ_MEMORY_SAFE + case 6: return 1; +#else + case 6: return 0; +#endif + case 7: return QLZ_VERSION_MAJOR; + case 8: return QLZ_VERSION_MINOR; + case 9: return QLZ_VERSION_REVISION; + } + return -1; +} + +#if QLZ_COMPRESSION_LEVEL == 1 +static int same(const unsigned char *src, size_t n) +{ + while(n > 0 && *(src + n) == *src) + n--; + return n == 0 ? 1 : 0; +} +#endif + +static void reset_table_compress(qlz_state_compress *state) +{ + int i; + for(i = 0; i < QLZ_HASH_VALUES; i++) + { +#if QLZ_COMPRESSION_LEVEL == 1 + state->hash[i].offset = 0; +#else + state->hash_counter[i] = 0; +#endif + } +} + +static void reset_table_decompress(qlz_state_decompress *state) +{ + int i; + (void)state; + (void)i; +#if QLZ_COMPRESSION_LEVEL == 2 + for(i = 0; i < QLZ_HASH_VALUES; i++) + { + state->hash_counter[i] = 0; + } +#endif +} + +static __inline ui32 hash_func(ui32 i) +{ +#if QLZ_COMPRESSION_LEVEL == 2 + return ((i >> 9) ^ (i >> 13) ^ i) & (QLZ_HASH_VALUES - 1); +#else + return ((i >> 12) ^ i) & (QLZ_HASH_VALUES - 1); +#endif +} + +static __inline ui32 fast_read(void const *src, ui32 bytes) +{ + uint32_t ret = 0; + if (bytes >= 1 && bytes <= 4) { + for (uint32_t i = 0; i < bytes; i ++) { + ret |= ((uint8_t*)src)[i] << (i * 8); + } + } + return ret; +} + +static __inline ui32 hashat(const unsigned char *src) +{ + ui32 fetch, hash; + fetch = fast_read(src, 3); + hash = hash_func(fetch); + return hash; +} + +static __inline void fast_write(ui32 f, void *dst, size_t bytes) +{ + for (size_t i = 0; i != bytes; i ++) { + ((char*)dst)[i] = ((char*)&f)[i]; + } +} + + +size_t qlz_size_decompressed(const char *source) +{ + ui32 n, r; + n = (((*source) & 2) == 2) ? 4 : 1; + r = fast_read(source + 1 + n, n); + r = r & (0xffffffff >> ((4 - n)*8)); + return r; +} + +size_t qlz_size_compressed(const char *source) +{ + ui32 n, r; + n = (((*source) & 2) == 2) ? 4 : 1; + r = fast_read(source + 1, n); + r = r & (0xffffffff >> ((4 - n)*8)); + return r; +} + +size_t qlz_size_header(const char *source) +{ + size_t n = 2*((((*source) & 2) == 2) ? 4 : 1) + 1; + return n; +} + + +static __inline void memcpy_up(unsigned char *dst, const unsigned char *src, ui32 n) +{ + assert(0); // unaligned memory access +} + +static __inline void update_hash(qlz_state_decompress *state, const unsigned char *s) +{ +#if QLZ_COMPRESSION_LEVEL == 1 + ui32 hash; + hash = hashat(s); + state->hash[hash].offset = s; + state->hash_counter[hash] = 1; +#elif QLZ_COMPRESSION_LEVEL == 2 + ui32 hash; + unsigned char c; + hash = hashat(s); + c = state->hash_counter[hash]; + state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = s; + c++; + state->hash_counter[hash] = c; +#endif + (void)state; + (void)s; +} + +#if QLZ_COMPRESSION_LEVEL <= 2 +static void update_hash_upto(qlz_state_decompress *state, unsigned char **lh, const unsigned char *max) +{ + while(*lh < max) + { + (*lh)++; + update_hash(state, *lh); + } +} +#endif + +static size_t qlz_compress_core(const unsigned char *source, unsigned char *destination, size_t size, qlz_state_compress *state) +{ + const unsigned char *last_byte = source + size - 1; + const unsigned char *src = source; + unsigned char *cword_ptr = destination; + unsigned char *dst = destination + CWORD_LEN; + ui32 cword_val = 1U << 31; + const unsigned char *last_matchstart = last_byte - UNCONDITIONAL_MATCHLEN - UNCOMPRESSED_END; + ui32 fetch = 0; + unsigned int lits = 0; + + (void) lits; + + if(src <= last_matchstart) + fetch = fast_read(src, 3); + + while(src <= last_matchstart) + { + if ((cword_val & 1) == 1) + { + // store uncompressed if compression ratio is too low + if (src > source + (size >> 1) && dst - destination > src - source - ((src - source) >> 5)) + return 0; + + fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN); + + cword_ptr = dst; + dst += CWORD_LEN; + cword_val = 1U << 31; + fetch = fast_read(src, 3); + } +#if QLZ_COMPRESSION_LEVEL == 1 + { + const unsigned char *o; + ui32 hash, cached; + + hash = hash_func(fetch); + cached = fetch ^ state->hash[hash].cache; + state->hash[hash].cache = fetch; + + o = state->hash[hash].offset + OFFSET_BASE; + state->hash[hash].offset = CAST(src - OFFSET_BASE); + + if (cached == 0 && o != OFFSET_BASE && (src - o > MINOFFSET || (src == o + 1 && lits >= 3 && src > source + 3 && same(src - 3, 6)))) + { + if (*(o + 3) != *(src + 3)) + { + hash <<= 4; + cword_val = (cword_val >> 1) | (1U << 31); + fast_write((3 - 2) | hash, dst, 2); + src += 3; + dst += 2; + } + else + { + const unsigned char *old_src = src; + size_t matchlen; + hash <<= 4; + + cword_val = (cword_val >> 1) | (1U << 31); + src += 4; + + if(*(o + (src - old_src)) == *src) + { + src++; + if(*(o + (src - old_src)) == *src) + { + size_t q = last_byte - UNCOMPRESSED_END - (src - 5) + 1; + size_t remaining = q > 255 ? 255 : q; + src++; + while(*(o + (src - old_src)) == *src && (size_t)(src - old_src) < remaining) + src++; + } + } + + matchlen = src - old_src; + if (matchlen < 18) + { + fast_write((ui32)(matchlen - 2) | hash, dst, 2); + dst += 2; + } + else + { + fast_write((ui32)(matchlen << 16) | hash, dst, 3); + dst += 3; + } + } + fetch = fast_read(src, 3); + lits = 0; + } + else + { + lits++; + *dst = *src; + src++; + dst++; + cword_val = (cword_val >> 1); + fetch = (fetch >> 8 & 0xffff) | (*(src + 2) << 16); + } + } +#elif QLZ_COMPRESSION_LEVEL >= 2 + { + const unsigned char *o, *offset2; + ui32 hash, matchlen, k, m, best_k = 0; + unsigned char c; + size_t remaining = (last_byte - UNCOMPRESSED_END - src + 1) > 255 ? 255 : (last_byte - UNCOMPRESSED_END - src + 1); + (void)best_k; + + + //hash = hashat(src); + fetch = fast_read(src, 3); + hash = hash_func(fetch); + + c = state->hash_counter[hash]; + + offset2 = state->hash[hash].offset[0]; + if(offset2 < src - MINOFFSET && c > 0 && ((fast_read(offset2, 3) ^ fetch) & 0xffffff) == 0) + { + matchlen = 3; + if(*(offset2 + matchlen) == *(src + matchlen)) + { + matchlen = 4; + while(*(offset2 + matchlen) == *(src + matchlen) && matchlen < remaining) + matchlen++; + } + } + else + matchlen = 0; + for(k = 1; k < QLZ_POINTERS && c > k; k++) + { + o = state->hash[hash].offset[k]; +#if QLZ_COMPRESSION_LEVEL == 3 + if(((fast_read(o, 3) ^ fetch) & 0xffffff) == 0 && o < src - MINOFFSET) +#elif QLZ_COMPRESSION_LEVEL == 2 + if(*(src + matchlen) == *(o + matchlen) && ((fast_read(o, 3) ^ fetch) & 0xffffff) == 0 && o < src - MINOFFSET) +#endif + { + m = 3; + while(*(o + m) == *(src + m) && m < remaining) + m++; +#if QLZ_COMPRESSION_LEVEL == 3 + if ((m > matchlen) || (m == matchlen && o > offset2)) +#elif QLZ_COMPRESSION_LEVEL == 2 + if (m > matchlen) +#endif + { + offset2 = o; + matchlen = m; + best_k = k; + } + } + } + o = offset2; + state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src; + c++; + state->hash_counter[hash] = c; + +#if QLZ_COMPRESSION_LEVEL == 3 + if(matchlen > 2 && src - o < 131071) + { + ui32 u; + size_t offset = src - o; + + for(u = 1; u < matchlen; u++) + { + hash = hashat(src + u); + c = state->hash_counter[hash]++; + state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src + u; + } + + cword_val = (cword_val >> 1) | (1U << 31); + src += matchlen; + + if(matchlen == 3 && offset <= 63) + { + *dst = (unsigned char)(offset << 2); + dst++; + } + else if (matchlen == 3 && offset <= 16383) + { + ui32 f = (ui32)((offset << 2) | 1); + fast_write(f, dst, 2); + dst += 2; + } + else if (matchlen <= 18 && offset <= 1023) + { + ui32 f = ((matchlen - 3) << 2) | ((ui32)offset << 6) | 2; + fast_write(f, dst, 2); + dst += 2; + } + + else if(matchlen <= 33) + { + ui32 f = ((matchlen - 2) << 2) | ((ui32)offset << 7) | 3; + fast_write(f, dst, 3); + dst += 3; + } + else + { + ui32 f = ((matchlen - 3) << 7) | ((ui32)offset << 15) | 3; + fast_write(f, dst, 4); + dst += 4; + } + } + else + { + *dst = *src; + src++; + dst++; + cword_val = (cword_val >> 1); + } +#elif QLZ_COMPRESSION_LEVEL == 2 + + if(matchlen > 2) + { + cword_val = (cword_val >> 1) | (1U << 31); + src += matchlen; + + if (matchlen < 10) + { + ui32 f = best_k | ((matchlen - 2) << 2) | (hash << 5); + fast_write(f, dst, 2); + dst += 2; + } + else + { + ui32 f = best_k | (matchlen << 16) | (hash << 5); + fast_write(f, dst, 3); + dst += 3; + } + } + else + { + *dst = *src; + src++; + dst++; + cword_val = (cword_val >> 1); + } +#endif + } +#endif + } + while (src <= last_byte) + { + if ((cword_val & 1) == 1) + { + fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN); + cword_ptr = dst; + dst += CWORD_LEN; + cword_val = 1U << 31; + } +#if QLZ_COMPRESSION_LEVEL < 3 + if (src <= last_byte - 3) + { +#if QLZ_COMPRESSION_LEVEL == 1 + ui32 hash, fetch; + fetch = fast_read(src, 3); + hash = hash_func(fetch); + state->hash[hash].offset = CAST(src - OFFSET_BASE); + state->hash[hash].cache = fetch; +#elif QLZ_COMPRESSION_LEVEL == 2 + ui32 hash; + unsigned char c; + hash = hashat(src); + c = state->hash_counter[hash]; + state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src; + c++; + state->hash_counter[hash] = c; +#endif + } +#endif + *dst = *src; + src++; + dst++; + cword_val = (cword_val >> 1); + } + + while((cword_val & 1) != 1) + cword_val = (cword_val >> 1); + + fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN); + + // min. size must be 9 bytes so that the qlz_size functions can take 9 bytes as argument + return dst - destination < 9 ? 9 : dst - destination; +} + +static size_t qlz_decompress_core(const unsigned char *source, unsigned char *destination, size_t size, qlz_state_decompress *state, const unsigned char *history) +{ + const unsigned char *src = source + qlz_size_header((const char *)source); + unsigned char *dst = destination; + const unsigned char *last_destination_byte = destination + size - 1; + ui32 cword_val = 1; + const unsigned char *last_matchstart = last_destination_byte - UNCONDITIONAL_MATCHLEN - UNCOMPRESSED_END; + unsigned char *last_hashed = destination - 1; + const unsigned char *last_source_byte = source + qlz_size_compressed((const char *)source) - 1; + static const ui32 bitlut[16] = {4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0}; + + (void) last_source_byte; + (void) last_hashed; + (void) state; + (void) history; + + for(;;) + { + ui32 fetch; + + if (cword_val == 1) + { +#ifdef QLZ_MEMORY_SAFE + if(src + CWORD_LEN - 1 > last_source_byte) + return 0; +#endif + cword_val = fast_read(src, CWORD_LEN); + src += CWORD_LEN; + } + +#ifdef QLZ_MEMORY_SAFE + if(src + 4 - 1 > last_source_byte) + return 0; +#endif + + fetch = fast_read(src, 4); + + if ((cword_val & 1) == 1) + { + ui32 matchlen; + const unsigned char *offset2; + +#if QLZ_COMPRESSION_LEVEL == 1 + ui32 hash; + cword_val = cword_val >> 1; + hash = (fetch >> 4) & 0xfff; + offset2 = (const unsigned char *)(size_t)state->hash[hash].offset; + + if((fetch & 0xf) != 0) + { + matchlen = (fetch & 0xf) + 2; + src += 2; + } + else + { + matchlen = *(src + 2); + src += 3; + } + +#elif QLZ_COMPRESSION_LEVEL == 2 + ui32 hash; + unsigned char c; + cword_val = cword_val >> 1; + hash = (fetch >> 5) & 0x7ff; + c = (unsigned char)(fetch & 0x3); + offset2 = state->hash[hash].offset[c]; + + if((fetch & (28)) != 0) + { + matchlen = ((fetch >> 2) & 0x7) + 2; + src += 2; + } + else + { + matchlen = *(src + 2); + src += 3; + } + +#elif QLZ_COMPRESSION_LEVEL == 3 + ui32 offset; + cword_val = cword_val >> 1; + if ((fetch & 3) == 0) + { + offset = (fetch & 0xff) >> 2; + matchlen = 3; + src++; + } + else if ((fetch & 2) == 0) + { + offset = (fetch & 0xffff) >> 2; + matchlen = 3; + src += 2; + } + else if ((fetch & 1) == 0) + { + offset = (fetch & 0xffff) >> 6; + matchlen = ((fetch >> 2) & 15) + 3; + src += 2; + } + else if ((fetch & 127) != 3) + { + offset = (fetch >> 7) & 0x1ffff; + matchlen = ((fetch >> 2) & 0x1f) + 2; + src += 3; + } + else + { + offset = (fetch >> 15); + matchlen = ((fetch >> 7) & 255) + 3; + src += 4; + } + + offset2 = dst - offset; +#endif + +#ifdef QLZ_MEMORY_SAFE + if(offset2 < history || offset2 > dst - MINOFFSET - 1) + return 0; + + if(matchlen > (ui32)(last_destination_byte - dst - UNCOMPRESSED_END + 1)) + return 0; +#endif + + memcpy_up(dst, offset2, matchlen); + dst += matchlen; + +#if QLZ_COMPRESSION_LEVEL <= 2 + update_hash_upto(state, &last_hashed, dst - matchlen); + last_hashed = dst - 1; +#endif + } + else + { + if (dst < last_matchstart) + { + unsigned int n = bitlut[cword_val & 0xf]; + memcpy_up(dst, src, 4); + cword_val = cword_val >> n; + dst += n; + src += n; +#if QLZ_COMPRESSION_LEVEL <= 2 + update_hash_upto(state, &last_hashed, dst - 3); +#endif + } + else + { + while(dst <= last_destination_byte) + { + if (cword_val == 1) + { + src += CWORD_LEN; + cword_val = 1U << 31; + } +#ifdef QLZ_MEMORY_SAFE + if(src >= last_source_byte + 1) + return 0; +#endif + *dst = *src; + dst++; + src++; + cword_val = cword_val >> 1; + } + +#if QLZ_COMPRESSION_LEVEL <= 2 + update_hash_upto(state, &last_hashed, last_destination_byte - 3); // todo, use constant +#endif + return size; + } + + } + } +} + +size_t qlz_compress(const void *source, char *destination, size_t size, qlz_state_compress *state) +{ + size_t r; + ui32 compressed; + size_t base; + + if(size == 0 || size > 0xffffffff - 400) + return 0; + + if(size < 216) + base = 3; + else + base = 9; + +#if QLZ_STREAMING_BUFFER > 0 + if (state->stream_counter + size - 1 >= QLZ_STREAMING_BUFFER) +#endif + { + reset_table_compress(state); + r = base + qlz_compress_core((const unsigned char *)source, (unsigned char*)destination + base, size, state); +#if QLZ_STREAMING_BUFFER > 0 + reset_table_compress(state); +#endif + if(r == base) + { + bench_memcpy(destination + base, source, size); + r = size + base; + compressed = 0; + } + else + { + compressed = 1; + } + state->stream_counter = 0; + } +#if QLZ_STREAMING_BUFFER > 0 + else + { + unsigned char *src = state->stream_buffer + state->stream_counter; + + bench_memcpy(src, source, size); + r = base + qlz_compress_core(src, (unsigned char*)destination + base, size, state); + + if(r == base) + { + bench_memcpy(destination + base, src, size); + r = size + base; + compressed = 0; + reset_table_compress(state); + } + else + { + compressed = 1; + } + state->stream_counter += size; + } +#endif + if(base == 3) + { + *destination = (unsigned char)(0 | compressed); + *(destination + 1) = (unsigned char)r; + *(destination + 2) = (unsigned char)size; + } + else + { + *destination = (unsigned char)(2 | compressed); + fast_write((ui32)r, destination + 1, 4); + fast_write((ui32)size, destination + 5, 4); + } + + *destination |= (QLZ_COMPRESSION_LEVEL << 2); + *destination |= (1 << 6); + *destination |= ((QLZ_STREAMING_BUFFER == 0 ? 0 : (QLZ_STREAMING_BUFFER == 100000 ? 1 : (QLZ_STREAMING_BUFFER == 1000000 ? 2 : 3))) << 4); + +// 76543210 +// 01SSLLHC + + return r; +} + +size_t qlz_decompress(const char *source, void *destination, qlz_state_decompress *state) +{ + size_t dsiz = qlz_size_decompressed(source); + +#if QLZ_STREAMING_BUFFER > 0 + if (state->stream_counter + qlz_size_decompressed(source) - 1 >= QLZ_STREAMING_BUFFER) +#endif + { + if((*source & 1) == 1) + { + reset_table_decompress(state); + dsiz = qlz_decompress_core((const unsigned char *)source, (unsigned char *)destination, dsiz, state, (const unsigned char *)destination); + } + else + { + bench_memcpy(destination, source + qlz_size_header(source), dsiz); + } + state->stream_counter = 0; + reset_table_decompress(state); + } +#if QLZ_STREAMING_BUFFER > 0 + else + { + unsigned char *dst = state->stream_buffer + state->stream_counter; + if((*source & 1) == 1) + { + dsiz = qlz_decompress_core((const unsigned char *)source, dst, dsiz, state, (const unsigned char *)state->stream_buffer); + } + else + { + bench_memcpy(dst, source + qlz_size_header(source), dsiz); + reset_table_decompress(state); + } + bench_memcpy(destination, dst, dsiz); + state->stream_counter += dsiz; + } +#endif + return dsiz; +} + diff --git a/microbench/src/lzip/quicklz.h b/microbench/src/lzip/quicklz.h new file mode 100644 index 0000000..bb87ee6 --- /dev/null +++ b/microbench/src/lzip/quicklz.h @@ -0,0 +1,164 @@ +#ifndef QLZ_HEADER +#define QLZ_HEADER + +#include +#include + +static inline void* bench_memcpy(void* dst, const void* src, size_t n){ + assert(dst&&src); + const char* s; + char* d; + if(src+n>dst&&src0)*--d=*--s; + } + else{ + s=src; + d=dst; + while(n-->0)*d++=*s++; + } + return dst; +} + + +// Fast data compression library +// Copyright (C) 2006-2011 Lasse Mikkel Reinhold +// lar@quicklz.com +// +// QuickLZ can be used for free under the GPL 1, 2 or 3 license (where anything +// released into public must be open source) or under a commercial license if such +// has been acquired (see http://www.quicklz.com/order.html). The commercial license +// does not cover derived or ported versions created by third parties under GPL. + +// You can edit following user settings. Data must be decompressed with the same +// setting of QLZ_COMPRESSION_LEVEL and QLZ_STREAMING_BUFFER as it was compressed +// (see manual). If QLZ_STREAMING_BUFFER > 0, scratch buffers must be initially +// zeroed out (see manual). First #ifndef makes it possible to define settings from +// the outside like the compiler command line. + +// 1.5.0 final + +#ifndef QLZ_COMPRESSION_LEVEL + + // 1 gives fastest compression speed. 3 gives fastest decompression speed and best + // compression ratio. + //#define QLZ_COMPRESSION_LEVEL 1 + //#define QLZ_COMPRESSION_LEVEL 2 + //#define QLZ_COMPRESSION_LEVEL 3 + #define QLZ_COMPRESSION_LEVEL 2 + + // If > 0, zero out both states prior to first call to qlz_compress() or qlz_decompress() + // and decompress packets in the same order as they were compressed + #define QLZ_STREAMING_BUFFER 0 + //#define QLZ_STREAMING_BUFFER 100000 + //#define QLZ_STREAMING_BUFFER 1000000 + + // Guarantees that decompression of corrupted data cannot crash. Decreases decompression + // speed 10-20%. Compression speed not affected. + //#define QLZ_MEMORY_SAFE +#endif + +#define QLZ_VERSION_MAJOR 1 +#define QLZ_VERSION_MINOR 5 +#define QLZ_VERSION_REVISION 0 + +// Verify compression level +#if QLZ_COMPRESSION_LEVEL != 1 && QLZ_COMPRESSION_LEVEL != 2 && QLZ_COMPRESSION_LEVEL != 3 +#error QLZ_COMPRESSION_LEVEL must be 1, 2 or 3 +#endif + +typedef unsigned int ui32; +typedef unsigned short int ui16; + +// Decrease QLZ_POINTERS for level 3 to increase compression speed. Do not touch any other values! +#if QLZ_COMPRESSION_LEVEL == 1 +#define QLZ_POINTERS 1 +#define QLZ_HASH_VALUES 4096 +#elif QLZ_COMPRESSION_LEVEL == 2 +#define QLZ_POINTERS 4 +#define QLZ_HASH_VALUES 2048 +#elif QLZ_COMPRESSION_LEVEL == 3 +#define QLZ_POINTERS 16 +#define QLZ_HASH_VALUES 4096 +#endif + +// hash entry +typedef struct +{ +#if QLZ_COMPRESSION_LEVEL == 1 + ui32 cache; +#if defined QLZ_PTR_64 && QLZ_STREAMING_BUFFER == 0 + unsigned int offset; +#else + const unsigned char *offset; +#endif +#else + const unsigned char *offset[QLZ_POINTERS]; +#endif + +} qlz_hash_compress; + +typedef struct +{ +#if QLZ_COMPRESSION_LEVEL == 1 + const unsigned char *offset; +#else + const unsigned char *offset[QLZ_POINTERS]; +#endif +} qlz_hash_decompress; + + +// states +typedef struct +{ + #if QLZ_STREAMING_BUFFER > 0 + unsigned char stream_buffer[QLZ_STREAMING_BUFFER]; + #endif + size_t stream_counter; + qlz_hash_compress hash[QLZ_HASH_VALUES]; + unsigned char hash_counter[QLZ_HASH_VALUES]; +} qlz_state_compress; + + +#if QLZ_COMPRESSION_LEVEL == 1 || QLZ_COMPRESSION_LEVEL == 2 + typedef struct + { +#if QLZ_STREAMING_BUFFER > 0 + unsigned char stream_buffer[QLZ_STREAMING_BUFFER]; +#endif + qlz_hash_decompress hash[QLZ_HASH_VALUES]; + unsigned char hash_counter[QLZ_HASH_VALUES]; + size_t stream_counter; + } qlz_state_decompress; +#elif QLZ_COMPRESSION_LEVEL == 3 + typedef struct + { +#if QLZ_STREAMING_BUFFER > 0 + unsigned char stream_buffer[QLZ_STREAMING_BUFFER]; +#endif +#if QLZ_COMPRESSION_LEVEL <= 2 + qlz_hash_decompress hash[QLZ_HASH_VALUES]; +#endif + size_t stream_counter; + } qlz_state_decompress; +#endif + + +#if defined (__cplusplus) +extern "C" { +#endif + +// Public functions of QuickLZ +size_t qlz_size_decompressed(const char *source); +size_t qlz_size_compressed(const char *source); +size_t qlz_compress(const void *source, char *destination, size_t size, qlz_state_compress *state); +size_t qlz_decompress(const char *source, void *destination, qlz_state_decompress *state); +int qlz_get_setting(int setting); + +#if defined (__cplusplus) +} +#endif + +#endif + diff --git a/microbench/src/md5/md5.c b/microbench/src/md5/md5.c new file mode 100644 index 0000000..5ce3d65 --- /dev/null +++ b/microbench/src/md5/md5.c @@ -0,0 +1,159 @@ +/* + * Simple MD5 implementation (github.com/pod32g/md5) + * + */ + +#include + +static int N; + +// Constants are the integer part of the sines of integers (in radians) * 2^32. +const uint32_t k[64] = { +0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee , +0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501 , +0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be , +0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 , +0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa , +0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8 , +0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed , +0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a , +0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c , +0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70 , +0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05 , +0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 , +0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039 , +0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1 , +0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1 , +0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 }; + +// r specifies the per-round shift amounts +static const uint32_t r[] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, + 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, + 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, + 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21}; + +// leftrotate function definition +#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c)))) + +static void to_bytes(uint32_t val, uint8_t *bytes) +{ + bytes[0] = (uint8_t) val; + bytes[1] = (uint8_t) (val >> 8); + bytes[2] = (uint8_t) (val >> 16); + bytes[3] = (uint8_t) (val >> 24); +} + +static uint32_t to_int32(const uint8_t *bytes) +{ + return (uint32_t) bytes[0] + | ((uint32_t) bytes[1] << 8) + | ((uint32_t) bytes[2] << 16) + | ((uint32_t) bytes[3] << 24); +} + +static void md5(uint8_t *msg, size_t initial_len, uint8_t *digest) { + + // These vars will contain the hash + uint32_t h0, h1, h2, h3; + + size_t new_len, offset; + uint32_t w[16]; + uint32_t a, b, c, d, i, f, g, temp; + + // Initialize variables - simple count in nibbles: + h0 = 0x67452301; + h1 = 0xefcdab89; + h2 = 0x98badcfe; + h3 = 0x10325476; + + //Pre-processing: + //append "1" bit to message + //append "0" bits until message length in bits ≡ 448 (mod 512) + //append length mod (2^64) to message + + for (new_len = initial_len + 1; new_len % (512/8) != 448/8; new_len++) + ; + + msg[initial_len] = 0x80; // append the "1" bit; most significant bit is "first" + for (offset = initial_len + 1; offset < new_len; offset++) + msg[offset] = 0; // append "0" bits + + // append the len in bits at the end of the buffer. + to_bytes(initial_len*8, msg + new_len); + // initial_len>>29 == initial_len*8>>32, but avoids overflow. + to_bytes(initial_len>>29, msg + new_len + 4); + + // Process the message in successive 512-bit chunks: + //for each 512-bit chunk of message: + for(offset=0; offsetsize; + bench_srand(1); + str = bench_alloc(N); + for (int i = 0; i < N; i ++) { + str[i] = bench_rand(); + } + digest = bench_alloc(16); +} + +void bench_md5_run() { + md5(str, N, digest); +} + +int bench_md5_validate() { + return checksum(digest, digest + 16) == setting->checksum; +} diff --git a/microbench/src/qsort/qsort.c b/microbench/src/qsort/qsort.c new file mode 100644 index 0000000..da9c593 --- /dev/null +++ b/microbench/src/qsort/qsort.c @@ -0,0 +1,44 @@ +#include + +static int N, *data; + +void bench_qsort_prepare() { + bench_srand(1); + + N = setting->size; + + data = bench_alloc(N * sizeof(int)); + for (int i = 0; i < N; i ++) { + int a = bench_rand(); + int b = bench_rand(); + data[i] = (a << 16) | b; + } +} + +static void swap(int *a, int *b) { + int t = *a; + *a = *b; + *b = t; +} + +static void myqsort(int *a, int l, int r) { + if (l < r) { + int p = a[l], pivot = l, j; + for (j = l + 1; j < r; j ++) { + if (a[j] < p) { + swap(&a[++pivot], &a[j]); + } + } + swap(&a[pivot], &a[l]); + myqsort(a, l, pivot); + myqsort(a, pivot + 1, r); + } +} + +void bench_qsort_run() { + myqsort(data, 0, N); +} + +int bench_qsort_validate() { + return checksum(data, data + N) == setting->checksum; +} diff --git a/microbench/src/queen/queen.c b/microbench/src/queen/queen.c new file mode 100644 index 0000000..91c0f32 --- /dev/null +++ b/microbench/src/queen/queen.c @@ -0,0 +1,32 @@ +#include + +static unsigned int FULL; + +static unsigned int dfs(unsigned int row, unsigned int ld, unsigned int rd) { + if (row == FULL) { + return 1; + } else { + unsigned int pos = FULL & (~(row | ld | rd)), ans = 0; + while (pos) { + unsigned int p = (pos & (~pos + 1)); + pos -= p; + ans += dfs(row | p, (ld | p) << 1, (rd | p) >> 1); + } + return ans; + } +} + +static unsigned int ans; + +void bench_queen_prepare() { + ans = 0; + FULL = (1 << setting->size) - 1; +} + +void bench_queen_run() { + ans = dfs(0, 0, 0); +} + +int bench_queen_validate() { + return ans == setting->checksum; +} diff --git a/microbench/src/sieve/sieve.c b/microbench/src/sieve/sieve.c new file mode 100644 index 0000000..01755f9 --- /dev/null +++ b/microbench/src/sieve/sieve.c @@ -0,0 +1,42 @@ +#include + +static int N; + +static int ans; +static uint32_t *primes; + +static inline int get(int n) { + return (primes[n >> 5] >> (n & 31)) & 1; +} + +static inline void clear(int n) { + primes[n >> 5] &= ~(1ul << (n & 31)); +} + +void bench_sieve_prepare() { + N = setting->size; + primes = (uint32_t*)bench_alloc(N / 8 + 128); + for (int i = 0; i <= N / 32; i ++) { + primes[i] = 0xffffffff; + } +} + +void bench_sieve_run() { + for (int i = 1; i <= N; i ++) + if (!get(i)) return; + for (int i = 2; i * i <= N; i ++) { + if (get(i)) { + for (int j = i + i; j <= N; j += i) + clear(j); + } + } + ans = 0; + for (int i = 2; i <= N; i ++) + if (get(i)) { + ans ++; + } +} + +int bench_sieve_validate() { + return ans == setting->checksum; +} diff --git a/microbench/src/ssort/ssort.cc b/microbench/src/ssort/ssort.cc new file mode 100644 index 0000000..12b8f24 --- /dev/null +++ b/microbench/src/ssort/ssort.cc @@ -0,0 +1,111 @@ +// This is the Skew algorithm's reference implementation. + +#include + +static int N; + +inline bool leq(int a1, int a2, int b1, int b2) { // lexic. order for pairs + return(a1 < b1 || (a1 == b1 && a2 <= b2)); +} // and triples +inline bool leq(int a1, int a2, int a3, int b1, int b2, int b3) { + return(a1 < b1 || (a1 == b1 && leq(a2,a3, b2,b3))); +} +// stably sort a[0..n-1] to b[0..n-1] with keys in 0..K from r +static void radixPass(int* a, int* b, int* r, int n, int K) +{ // count occurrences + int* c = (int*)bench_alloc(sizeof(int)*(K+1)); + for (int i = 0; i <= K; i++) c[i] = 0; // reset counters + for (int i = 0; i < n; i++) c[r[a[i]]]++; // count occurences + for (int i = 0, sum = 0; i <= K; i++) { // exclusive prefix sums + int t = c[i]; c[i] = sum; sum += t; + } + for (int i = 0; i < n; i++) b[c[r[a[i]]]++] = a[i]; // sort +} + +// find the suffix array SA of s[0..n-1] in {1..K}^n +// require s[n]=s[n+1]=s[n+2]=0, n>=2 +void suffixArray(int* s, int* SA, int n, int K) { + int n0=(n+2)/3, n1=(n+1)/3, n2=n/3, n02=n0+n2; + int* s12 = (int*)bench_alloc(sizeof(int)*(n02+3)); s12[n02]= s12[n02+1]= s12[n02+2]=0; + int* SA12 = (int*)bench_alloc(sizeof(int)*(n02+3)); SA12[n02]=SA12[n02+1]=SA12[n02+2]=0; + int* s0 = (int*)bench_alloc(sizeof(int)*n0); + int* SA0 = (int*)bench_alloc(sizeof(int)*n0); + + // generate positions of mod 1 and mod 2 suffixes + // the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1 + for (int i=0, j=0; i < n+(n0-n1); i++) if (i%3 != 0) s12[j++] = i; + + // lsb radix sort the mod 1 and mod 2 triples + radixPass(s12 , SA12, s+2, n02, K); + radixPass(SA12, s12 , s+1, n02, K); + radixPass(s12 , SA12, s , n02, K); + + // find lexicographic names of triples + int name = 0, c0 = -1, c1 = -1, c2 = -1; + for (int i = 0; i < n02; i++) { + if (s[SA12[i]] != c0 || s[SA12[i]+1] != c1 || s[SA12[i]+2] != c2) { + name++; c0 = s[SA12[i]]; c1 = s[SA12[i]+1]; c2 = s[SA12[i]+2]; + } + if (SA12[i] % 3 == 1) { s12[SA12[i]/3] = name; } // left half + else { s12[SA12[i]/3 + n0] = name; } // right half + } + + // recurse if names are not yet unique + if (name < n02) { + suffixArray(s12, SA12, n02, name); + // store unique names in s12 using the suffix array + for (int i = 0; i < n02; i++) s12[SA12[i]] = i + 1; + } else // generate the suffix array of s12 directly + for (int i = 0; i < n02; i++) SA12[s12[i] - 1] = i; + + // stably sort the mod 0 suffixes from SA12 by their first character + for (int i=0, j=0; i < n02; i++) if (SA12[i] < n0) s0[j++] = 3*SA12[i]; + radixPass(s0, SA0, s, n0, K); + + // merge sorted SA0 suffixes and sorted SA12 suffixes + for (int p=0, t=n0-n1, k=0; k < n; k++) { +#define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2) + int i = GetI(); // pos of current offset 12 suffix + int j = SA0[p]; // pos of current offset 0 suffix + if (SA12[t] < n0 ? + leq(s[i], s12[SA12[t] + n0], s[j], s12[j/3]) : + leq(s[i],s[i+1],s12[SA12[t]-n0+1], s[j],s[j+1],s12[j/3+n0])) + { // suffix from SA12 is smaller + SA[k] = i; t++; + if (t == n02) { // done --- only SA0 suffixes left + for (k++; p < n0; p++, k++) SA[k] = SA0[p]; + } + } else { + SA[k] = j; p++; + if (p == n0) { // done --- only SA12 suffixes left + for (k++; t < n02; t++, k++) SA[k] = GetI(); + } + } + } +} + +extern "C" { + +static int *s, *sa; + +void bench_ssort_prepare() { + N = setting->size; + bench_srand(1); + s = (int*)bench_alloc(sizeof(int)*(N+10)); + sa = (int*)bench_alloc(sizeof(int)*(N+10)); + + for (int i = 0; i < N; i ++) { + s[i] = bench_rand() % 26; + } +} + +void bench_ssort_run() { + suffixArray(s, sa, N, 26); +} + +int bench_ssort_validate() { + return checksum(sa, sa + N) == setting->checksum; +} + +} + diff --git a/thread-os/Makefile b/thread-os/Makefile new file mode 100644 index 0000000..dbe99b4 --- /dev/null +++ b/thread-os/Makefile @@ -0,0 +1,3 @@ +NAME := thread-os +SRCS := thread-os.c +include $(AM_HOME)/Makefile diff --git a/thread-os/thread-os.c b/thread-os/thread-os.c new file mode 100644 index 0000000..dbfdff9 --- /dev/null +++ b/thread-os/thread-os.c @@ -0,0 +1,71 @@ +#include +#include +#include + +#define MAX_CPU 8 + +typedef union task { + struct { + const char *name; + union task *next; + void (*entry)(void *); + Context *context; + }; + uint8_t stack[8192]; +} Task; + +Task *currents[MAX_CPU]; +#define current currents[cpu_current()] + +// user-defined tasks + +int locked = 0; +void lock() { while (atomic_xchg(&locked, 1)); } +void unlock() { atomic_xchg(&locked, 0); } + +void func(void *arg) { + while (1) { + lock(); + printf("Thread-%s on CPU #%d\n", arg, cpu_current()); + unlock(); + for (int volatile i = 0; i < 100000; i++) ; + } +} + +Task tasks[] = { + { .name = "A", .entry = func }, + { .name = "B", .entry = func }, + { .name = "C", .entry = func }, + { .name = "D", .entry = func }, + { .name = "E", .entry = func }, +}; + +// ------------------ + +Context *on_interrupt(Event ev, Context *ctx) { + extern Task tasks[]; + if (!current) current = &tasks[0]; + else current->context = ctx; + do { + current = current->next; + } while ((current - tasks) % cpu_count() != cpu_current()); + return current->context; +} + +void mp_entry() { + iset(true); + yield(); +} + +int main() { + ioe_init(); + cte_init(on_interrupt); + + for (int i = 0; i < LENGTH(tasks); i++) { + Task *task = &tasks[i]; + Area stack = (Area) { &task->context + 1, task + 1 }; + task->context = kcontext(stack, task->entry, (void *)task->name); + task->next = &tasks[(i + 1) % LENGTH(tasks)]; + } + mpe_init(mp_entry); +}