0.5.0 -- Limit processed beamlets, test cases, new configuration method

Features: A subset of the beamlets in the input ports can now be selected, such that when 488 beamlets are recorded, we can process all of them or just a subset on the range [lower, upper). The `docs/newProcessingMode.md` has been updated to reflect the changes in the kernels to support this. Implemented a set of test cases in the makefile under target `test`, which generates outputs and compares them to a set of hashes. The hashes on hand were generated using -ffast-math, so debug builds will not pass tests. The lofar_udp_reader option can now be set up using a struct rather than a large number of input parameters. The CLIs can now start processing data from a non-0 base port Some default structs have been provided for configuring the reader Fixes: Documentation pass across the repo Makefile will no longer always try to use ICC if it is available, and will default to whatever is passed in by CC and CXX. GCC compiles now use -ffast-math as well Makefile not uses `-command` syntax rather than `command; exit 0` to work around calls expected to fail Fixed lofar_udp_extractor passing the wrong number of beamlets to mockHeader for some processing modes CLIs now determine the number of output files from the lofar_udp_reader struct rather than trying (and often failing. whoops.) to predict it themselves CLIs will now raise an error if the input filename does not update when iterating over input filepaths Removed inconsistent documentation about the GUPPI RAW CLI Fixed some of the "Full Stokes Vector" processing modes (151-153) generating garbage on ICC by splitting the loop into two separate sets of function calls. (A compiler bug?) Fixed an incorrect base offset in some of the Stokes decimation modes (1,2,3) Fixed the Time-Major, Dual Pols (32) mode incorrectly calculating the output offset for the second half of data (missing bracket) The main processing loop is now a compile-time fixed if-else statement rather than a runtime switch (I thought it was fixed at compile time, I was mistaken). Added safeguards to prevent memory leaks from the CLI Fixed the Useful Stokes mode (160, did not effect decimated versions) having an incorrect number of output files Fixed the reader_step return value not being updated due to a missing OMP pragma Fixed reader_step attempting to iterate when there was no work to perform (cosmetic change)
David-McKenna · Nov 6, 2020 · cf118b0 · cf118b0
1 parent 39ff9b8
commit cf118b0
Show file tree

Hide file tree

Showing 17 changed files with 859 additions and 363 deletions.
diff --git a/Makefile b/Makefile
@@ -1,3 +1,8 @@
+# Don't default to sh/dash
+SHELL = bash
+
+# If we haven't been provided a compiler, check for icc/gcc
+ifeq (,$(CC))
 ifneq (,$(shell which icc))
 CC		= icc
 CXX		= icpc
@@ -8,16 +13,21 @@ else
 CC		= gcc
 CXX		= g++
 endif
+endif
 
-LIB_VER = 0.4
+# Library versions
+LIB_VER = 0.5
 LIB_VER_MINOR = 0
-CLI_VER = 0.2
+CLI_VER = 0.3
 
 # Detemrine the max threads per socket to speed up execution via OpenMP with ICC (GCC falls over if we set too many)
 THREADS = $(shell cat /proc/cpuinfo | uniq | grep -m 2 "siblings" | cut -d ":" -f 2 | sort --numeric --unique | awk '{printf("%d", $$1);}')
 
-CFLAGS 	+= -march=native -W -Wall -O3 -march=native -DVERSION=$(LIB_VER) -DVERSIONCLI=$(CLI_VER) -fPIC # -DBENCHMARK -g -DALLOW_VERBOSE #-D__SLOWDOWN
+CFLAGS 	+= -W -Wall -Ofast -march=native -DVERSION=$(LIB_VER) -DVERSIONCLI=$(CLI_VER) -fPIC # -DBENCHMARKING -g -DALLOW_VERBOSE #-D__SLOWDOWN
+# -fopt-info-missed=compiler_report_missed.log -fopt-info-vec=compiler_report_vec.log -fopt-info-loop=compiler_report_loop.log -fopt-info-inline=compiler_report_inline.log -fopt-info-omp=compiler_report_omp.log
 
+# Adjust flaged based on the compiler
+# ICC has a different code path and can use more threads as a result
 ifeq ($(CC), icc)
 AR = xiar
 CFLAGS += -fast -static -static-intel -qopenmp-link=static -DOMP_THREADS=$(THREADS)
@@ -26,11 +36,12 @@ AR = ar
 CFLAGS += -funswitch-loops -DOMP_THREADS=5
 endif
 
+# Ensure we're using C++17
 CXXFLAGS += $(CFLAGS) -std=c++17
-# -fopt-info-missed=compiler_report_missed.log -fopt-info-vec=compiler_report_vec.log -fopt-info-loop=compiler_report_loop.log -fopt-info-inline=compiler_report_inline.log -fopt-info-omp=compiler_report_omp.log
 
 LFLAGS 	+= -I./src -I./src/lib -I./src/CLI -I/usr/include/ -lzstd -fopenmp #-lefence
 
+# Define our general build targets
 OBJECTS = src/lib/lofar_udp_reader.o src/lib/lofar_udp_misc.o src/lib/lofar_udp_backends.o
 CLI_META_OBJECTS = src/CLI/lofar_cli_meta.o src/CLI/ascii_hdr_manager.o
 CLI_OBJECTS = $(OBJECTS) $(CLI_META_OBJECTS) src/CLI/lofar_cli_extractor.o src/CLI/lofar_cli_guppi_raw.o
@@ -39,30 +50,38 @@ LIBRARY_TARGET = liblofudpman.a
 
 PREFIX = /usr/local
 
+.INTERMEDIATE : ./tests/obj-generated-$(LIB_VER).$(LIB_VER_MINOR)
+
+# C -> CC
 %.o: %.c
 	$(CC) -c $(CFLAGS) -o ./$@ $< $(LFLAGS)
 
+# C++ -> CXX
 %.o: %.cpp
 	$(CXX) -c $(CXXFLAGS) -o ./$@ $< $(LFLAGS)
 
+# CLI -> link with C++
 all: $(CLI_OBJECTS) library
 	$(CXX) $(CXXFLAGS) src/CLI/lofar_cli_extractor.o $(CLI_META_OBJECTS) $(LIBRARY_TARGET)  -o ./lofar_udp_extractor $(LFLAGS)
 	$(CXX) $(CXXFLAGS) src/CLI/lofar_cli_guppi_raw.o $(CLI_META_OBJECTS) $(LIBRARY_TARGET) -o ./lofar_udp_guppi_raw $(LFLAGS)
 
+# Library -> *ar
 library: $(OBJECTS)
 	$(AR) rc $(LIBRARY_TARGET).$(LIB_VER).$(LIB_VER_MINOR) $(OBJECTS)
 	ln -sf ./$(LIBRARY_TARGET).$(LIB_VER).$(LIB_VER_MINOR) ./$(LIBRARY_TARGET)
 
+# Install CLI, headers, library
 install: all
 	mkdir -p $(PREFIX)/bin/ && mkdir -p $(PREFIX)/include/
 	cp ./lofar_udp_extractor $(PREFIX)/bin/
 	cp ./lofar_udp_guppi_raw $(PREFIX)/bin/
 	cp ./src/lib/*.h $(PREFIX)/include/
 	cp ./src/lib/*.hpp $(PREFIX)/include/
 	cp -P ./*.a* ${PREFIX}/lib/
-	cp -P ./*.a ${PREFIX}/lib/	
-	cp ./mockHeader/mockHeader $(PREFIX)/bin/; exit 0;
+	cp -P ./*.a ${PREFIX}/lib/
+	-cp ./mockHeader/mockHeader $(PREFIX)/bin/
 
+# Install CLI, headers, library, locally
 install-local: all
 	mkdir -p ~/.local/bin/ && mkdir -p ~/.local/include/
 	cp ./lofar_udp_extractor ~/.local/bin/
@@ -71,17 +90,20 @@ install-local: all
 	cp ./src/lib/*.hpp ~/.local/include/
 	cp -P ./*.a* ~/.local/lib/
 	cp -P ./*.a ~/.local/lib/
-	cp ./mockHeader/mockHeader ~/.local/bin/; exit 0;
+	-cp ./mockHeader/mockHeader ~/.local/bin/
 
+# Remove local build arifacts
 clean:
-	rm ./src/CLI/*.o; exit 0;
-	rm ./src/lib/*.o; exit 0;
-	rm ./*.a; exit 0;
-	rm ./*.a.*; exit 0;
-	rm ./compiler_report_*.log; exit 0;
-	rm ./lofar_udp_extractor; exit 0;
-	rm ./lofar_udp_guppi_raw; exit 0;
-
+	-rm ./src/CLI/*.o
+	-rm ./src/lib/*.o
+	-rm ./*.a
+	-rm ./*.a.*
+	-rm ./compiler_report_*.log
+	-rm ./lofar_udp_extractor
+	-rm ./lofar_udp_guppi_raw
+	-rm ./tests/output_*
+
+# Uninstall the software from the system
 remove:
 	rm $(PREFIX)/bin/lofar_udp_extractor
 	rm $(PREFIX)/bin/lofar_udp_guppi_raw
@@ -91,6 +113,7 @@ remove:
 	find . -name "*.a.*" -exec rm $(PREFIX)/lib/{} \;
 	make clean
 
+# Uninstall the software from the local user
 remove-local:
 	rm ~/.local/bin/lofar_udp_extractor
 	rm ~/.local/bin/lofar_udp_guppi_raw
@@ -101,7 +124,72 @@ remove-local:
 	make clean
 
 
-
+# Generate test outputs to ensure we haven't broken anything
+# Works based on output file md5 hashes, should be stable between
+# versions and builds.
+# 
+# The generated hashes were generated using -ffast-math and will
+# not be correct without that flag.
+test: ./tests/obj-generated-$(LIB_VER).$(LIB_VER_MINOR)
+	# . === source
+	. ./tests/hashVariables.txt; for output in ./tests/output*; do \
+		base=$$(basename $$output); \
+		md5hash=($$(md5sum $$output)); \
+		echo "$$base: $${md5hash[0]}, $${!base}"; \
+		if [[ "$${md5hash[0]}" != "$${!base}" ]]; then \
+			echo "Processed output $$output does not match expected hash. Exiting."; \
+			exit 1; \
+		fi; \
+	done; \
+	\
+	# TODO: check overlapping outputs, eg 100, 150, 160 against each other.
+
+	rm ./tests/output*
+
+
+# Build the objects to test
+# Stress test: multiple ports, compressed and uncompressed, every processing mode, an odd/small number of packets
+# In futre: consider dropping a few packets from the test case
+./tests/obj-generated-$(LIB_VER).$(LIB_VER_MINOR): test-samples
+	-rm ./tests/output*
+
+
+	for procMode in 0 1 2 10 11 20 21 30 31 32; do \
+		echo "Running lofar_udp_extractor -i ./tests/udp_1613%d_sample.zst -o './tests/output_'$$procModeStokes'_%d' -p $$procMode -m 501 -u 2"; \
+		lofar_udp_extractor -i ./tests/udp_1613%d_sample -o './tests/output_'$$procMode'_%d' -p $$procMode -m 501 -u 2; \
+	done
+
+	for procMode in 100 110 120 130 150 160; do \
+		for offset in 0 1 2 3 4; do \
+			procModeStokes="`expr $$procMode + $$offset`"; \
+			echo "Running lofar_udp_extractor -i ./tests/udp_1613%d_sample.zst -o './tests/output_'$$procModeStokes'_%d' -p $$procModeStokes -m 501 -u 2"; \
+			lofar_udp_extractor -i ./tests/udp_1613%d_sample.zst -o './tests/output_'$$procModeStokes'_%d' -p $$procModeStokes -m 501 -u 2; \
+		done; \
+	done
+
+	touch ./tests/obj-generated-$(LIB_VER).$(LIB_VER_MINOR)
+	rm ./tests/udp_*_sample
+
+# Decompress the input data
+test-samples:
+	for fil in ./tests/*zst; do \
+		zstd -d $$fil; \
+	done;
+
+# Generate hashes for the current output files
+test-make-hashes: ./tests/obj-generated-$(LIB_VER).$(LIB_VER_MINOR)
+	-rm ./tests/hashVariables.txt
+	touch ./tests/hashVariables.txt
+	for fil in ./tests/output*; \
+		do outp=($$(md5sum $$fil)); \
+		base=$$(basename $$fil); \
+		echo $$base='"'"$${outp[0]}"'"' >> ./tests/hashVariables.txt; \
+	done
+
+
+
+
+# Optional: build mockHeader
 mockHeader:
 	git clone https://github.com/David-McKenna/mockHeader && \
 	cd mockHeader && \

diff --git a/README.md b/README.md
@@ -18,7 +18,8 @@ While using the library, do be aware
 Future work should not break the exiting load/process iteration loop, and may consist of
 - Creating a wrapper python library to allow for easer interfacing within python scripts rather than requiring a C program (CFFI if I can strip out ifdefs?)
 - Investigating [blosc](https://github.com/Blosc/) [(examples link)](https://github.com/Blosc/c-blosc2/tree/master/examples) as an option to speed up some processing modes
-- Specifying specific beamlets to process rather than entire ports
+- Integrate mscorpol/dreamBeam for pointing corrections
+-- Complicated. If done, we'll need float outputs rather than char/shorts for many processing modes.
 
 Requirements
 ------------

diff --git a/docs/README_CLI.md b/docs/README_CLI.md
@@ -48,6 +48,11 @@ Arguments
 #### -u (int) [default: 4]
 - Number of input ports to iterate over
 
+#### -b (int),(int) [default: 0,0 === all inputs]
+- Indicies of beamlets to extract from the input dataset. Lower value is inclusive, higher value is exclusive
+- Eg, `-b 0,300` will return 300 beamlets, at indicies 0:299.
+- I wanted this to be inclusive on both ends but couldn't find a solid way to just index it as intended.
+
 #### -t (str) [default: T=0]
 - Starting time string, in UTC+0 and ISOT format (YYYY-MM-DDTHH:mm:ss)
 

diff --git a/docs/README_CLI_GUPPI_RAW.md b/docs/README_CLI_GUPPI_RAW.md
@@ -49,8 +49,7 @@ While most of the arguments are the same as the default CLI, as described in [*R
 
 #### -o (str) [default: ./output_%d]
 - Output file name, must contain at least *%d* when generating multiple outputs
-- *%s* will include the starting time stamp, *%ld* for the packet number **is not supported by this CLI**
-- These values must be added in order, so *%d_%s* is allowed to not print the packet number but *%ld_%d_%s* will not work.
+- Both *%s* for the starting time stamp, *%ld* for the packet number **are not supported by this CLI**
 
 #### -e (int) [default: INT_MAX]
 - Number of iterations to perform before closing the current file and opening a new one

diff --git a/docs/README_INTEGRATION.md b/docs/README_INTEGRATION.md
@@ -8,15 +8,6 @@ Include the reader header
 #include "lofar_udp_reader.h"
 ```
 
-
-You may need to manually include some C functions for some compilers (CUDA needed this in my case, I haven't caught all of the prototypes for C++). For a basic implementation, all you need is
-```
-extern "C"  {
-  int lofar_udp_reader_step(lofar_udp_reader *reader);
-  lofar_udp_reader* lofar_udp_meta_file_reader_setup(FILE **inputFiles, const int numPorts, const int replayDroppedPackets, const int processingMode, const int verbose, const long packetsPerIteration, const long startingPacket, const long packetsReadMax, const int compressedReader);
-}
-```
-
 Generate your reader
 ```
 lofar_udp_reader *reader;

diff --git a/docs/TODO b/docs/TODO
@@ -1,4 +1,5 @@
 TODO:
 
 Better approach to out of order packets?
-fread -> mmap operations
+fread -> mmap operations
+Test common outputs against eachother (100 vs 150 vs 160, etc)
diff --git a/docs/newProcessingMode.md b/docs/newProcessingMode.md
@@ -29,7 +29,7 @@ int lofar_udp_raw_udp_my_new_kernel(lofar_udp_meta *meta) {
 
 ```
 
-3. Create the task kernel in `lofar_udp_backends.hpp`, following the format of
+3. Create the task kernel in `lofar_udp_backends.hpp`, following the format below. Have a look at the existing kernels and you'll likely be able to find an input/putput idx calculation that suits what you are doing.
 
 ```
 template<typename I, typename O>
@@ -41,7 +41,7 @@ void inline udp_myNewKernel(params) {
 	#else
 	#pragma GCC unroll 61
 	#endif
-	for (int beamlet = 0; beamlet < portBeamlets; beamlet++) {
+	for (int beamlet = lowerBeamlet; beamlet < upperBeamlet; beamlet++) {
 		tsInOffset = <inIdx>;
 		tsOutOffset = <outIdx>;
 
@@ -65,12 +65,10 @@ void inline udp_myNewKernel(params) {
 4. Include the kernel in the switch statement of `int lofar_udp_raw_loop(lofar_udp_meta *meta)`
 
 ```
-case KERNEL_ENUM_VAL:
-	#ifdef __INTEL_COMPILER
-	#pragma omp task firstprivate(iLoop, lastInputPacketOffset)
-	#endif
+else if (trueState == KERNEL_ENUM_VAL) {
 	udp_myNewKernel<I, O>(iLoop, lastInputPacketOffset, timeStepSize....);
-	break;
+}
+...
 
 ```
 
@@ -84,4 +82,6 @@ case KERNEL_ENUM_VAL:
 
 6. Staying in `int lofar_udp_setup_processing(lofar_udp_meta *meta)`, run the maths on the input / output data sizes and add your case to the switch statement. If adding a completely new calculation, be sure to add a `break;` statement afterwards, as the compiler warning is disabled for this switch statement. In the case of a re-rodering operation, you will just need to define the number of output arrays.
 
-7. Add documentation to `README_CLI.md` and `lofar_cli_meta.c`.
+7. Add documentation to `README_CLI.md` and `lofar_cli_meta.c`.
+
+8. Generate hashes for the output mode by adding it to the makefiles' `./tests/obj-generated-$(LIB_VER).$(LIB_VER_MINOR)` target, either in the compressed or uncompressed loop. `make test-make-hashes` will generate an output and add a hash to tests/hashVariables.txt file in the git repo. Ensure no other mode hashes change.