From 7ba7400852ee27330c0ed1b4dfd7b9b13939a2bc Mon Sep 17 00:00:00 2001 From: xlauko Date: Tue, 1 Oct 2024 13:30:01 +0200 Subject: [PATCH] test: Add parser benchmarks. --- test/parsers/csv-a.c | 100 ++++++++++++++++++++++++++ test/parsers/csv-b.c | 79 ++++++++++++++++++++ test/parsers/expr-a.c | 35 +++++++++ test/parsers/expr-b.c | 93 ++++++++++++++++++++++++ test/parsers/img-a.c | 79 ++++++++++++++++++++ test/parsers/json-a.c | 151 +++++++++++++++++++++++++++++++++++++++ test/parsers/key-value.c | 98 +++++++++++++++++++++++++ test/parsers/png-a.c | 87 ++++++++++++++++++++++ test/parsers/rle-a.c | 75 +++++++++++++++++++ 9 files changed, 797 insertions(+) create mode 100644 test/parsers/csv-a.c create mode 100644 test/parsers/csv-b.c create mode 100644 test/parsers/expr-a.c create mode 100644 test/parsers/expr-b.c create mode 100644 test/parsers/img-a.c create mode 100644 test/parsers/json-a.c create mode 100644 test/parsers/key-value.c create mode 100644 test/parsers/png-a.c create mode 100644 test/parsers/rle-a.c diff --git a/test/parsers/csv-a.c b/test/parsers/csv-a.c new file mode 100644 index 0000000000..8352a64fdf --- /dev/null +++ b/test/parsers/csv-a.c @@ -0,0 +1,100 @@ +// RUN: %vast-front -vast-emit-mlir=hl %s -o - | %file-check %s -check-prefix=HL + +#include +#include +#include + +#define MAX_LINE_LEN 1024 +#define MAX_FIELD_LEN 256 + +int parse_csv_line(char *line, char *fields[]); + +// Non-parsing part: file handling and utility functions +// HL: hl.func @read_csv_file +void read_csv_file(const char *filename) { + FILE *file = fopen(filename, "r"); + if (!file) { + perror("Could not open file"); + exit(EXIT_FAILURE); + } + + char line[MAX_LINE_LEN]; + + // Parsing part: parsing lines and extracting fields + while (fgets(line, sizeof(line), file)) { + char *fields[MAX_FIELD_LEN]; + int field_count = parse_csv_line(line, fields); + + // Example use of parsed fields + printf("Parsed %d fields:\n", field_count); + for (int i = 0; i < field_count; ++i) { + printf("Field %d: %s\n", i, fields[i]); + } + printf("\n"); + + // Free the allocated memory for fields + for (int i = 0; i < field_count; ++i) { + free(fields[i]); + } + } + + fclose(file); +} + +// Parsing part: core CSV parsing logic +int parse_csv_line(char *line, char *fields[]) { + int count = 0; + char *start = line; + int in_quotes = 0; + + while (*start) { + // Skip whitespace + while (*start == ' ' || *start == '\t') start++; + + // Handle quotes + if (*start == '\"') { + in_quotes = 1; + start++; + } + + // Capture the beginning of the field + char *field_start = start; + + // Extract the field + while (*start && (in_quotes || (*start != ',' && *start != '\n'))) { + if (in_quotes && *start == '\"') { + if (*(start + 1) == '\"') { + start += 2; // Skip escaped quote + } else { + in_quotes = 0; // End of quoted field + start++; + break; + } + } else { + start++; + } + } + + // Allocate memory for the field and store it + int length = start - field_start; + fields[count] = (char *)malloc(length + 1); + strncpy(fields[count], field_start, length); + fields[count][length] = '\0'; + count++; + + // Skip comma or newline + if (*start == ',') start++; + } + + return count; +} + +int main(int argc, char *argv[]) { + if (argc != 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + exit(EXIT_FAILURE); + } + + read_csv_file(argv[1]); + return 0; +} diff --git a/test/parsers/csv-b.c b/test/parsers/csv-b.c new file mode 100644 index 0000000000..5cace834be --- /dev/null +++ b/test/parsers/csv-b.c @@ -0,0 +1,79 @@ +// RUN: %vast-front -vast-emit-mlir=hl %s -o - | %file-check %s -check-prefix=HL + +#include +#include +#include + +// Parsing part: A function to split a CSV line into tokens +// HL: hl.func @parse_csv_line +char **parse_csv_line(char *line, int *count) { + int capacity = 10; // Initial capacity for fields + char **fields = malloc(capacity * sizeof(char *)); + *count = 0; + + char *token = strtok(line, ","); + while (token != NULL) { + if (*count >= capacity) { + capacity *= 2; + fields = realloc(fields, capacity * sizeof(char *)); + } + // Trim whitespace and add token to fields + fields[*count] = strdup(token); + (*count)++; + + token = strtok(NULL, ","); + } + return fields; +} + +// Non-parsing part: A function to handle the parsed CSV fields +// HL: hl.func @handle_csv_fields +void handle_csv_fields(char **fields, int count) { + printf("Parsed fields:\n"); + for (int i = 0; i < count; ++i) { + printf("Field %d: %s\n", i + 1, fields[i]); + } +} + +// Parsing part: Read the file line by line +void parse_csv_file(const char *filename) { + FILE *file = fopen(filename, "r"); + if (file == NULL) { + perror("Error opening file"); + return; + } + + char line[256]; + while (fgets(line, sizeof(line), file)) { + // Remove newline character from the line + line[strcspn(line, "\n")] = '\0'; + + int count; + // Parse the line to extract CSV fields + char **fields = parse_csv_line(line, &count); + + // Handle the parsed fields + handle_csv_fields(fields, count); + + // Free allocated memory for fields + for (int i = 0; i < count; ++i) { + free(fields[i]); + } + free(fields); + } + + fclose(file); +} + +// Example usage +int main(int argc, char *argv[]) { + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + // Parsing part: Parse the CSV file + parse_csv_file(argv[1]); + + return 0; +} diff --git a/test/parsers/expr-a.c b/test/parsers/expr-a.c new file mode 100644 index 0000000000..c391d24ae2 --- /dev/null +++ b/test/parsers/expr-a.c @@ -0,0 +1,35 @@ +// RUN: %vast-front -vast-emit-mlir=hl %s -o - | %file-check %s -check-prefix=HL + +#include +#include + +// HL: hl.func @parse_number +int parse_number(const char **input) { + int value = 0; + while (isdigit(**input)) { + value = value * 10 + (**input - '0'); + (*input)++; + } + return value; +} + +// HL: hl.func @add_two_numbers +int add_two_numbers(const char *input) { + int num1 = parse_number(&input); + while (isspace(*input)) input++; // Skip spaces + input++; // Skip '+' + int num2 = parse_number(&input); + return num1 + num2; +} + +int main() { + char input[100]; + + printf("Enter an expression (e.g., 3 + 5): "); + fgets(input, sizeof(input), stdin); + + int result = add_two_numbers(input); + printf("Result: %d\n", result); + + return 0; +} diff --git a/test/parsers/expr-b.c b/test/parsers/expr-b.c new file mode 100644 index 0000000000..d8b56160e5 --- /dev/null +++ b/test/parsers/expr-b.c @@ -0,0 +1,93 @@ +// RUN: %vast-front -vast-emit-mlir=hl %s -o - | %file-check %s -check-prefix=HL + +#include +#include + +typedef enum { PLUS = '+', MINUS = '-', MUL = '*', DIV = '/', END = '\0' } TokenType; + +typedef struct { + TokenType type; + int value; +} Token; + +const char *input; + +// HL: hl.func @get_next_token +Token get_next_token() { + while (isspace(*input)) input++; // Skip spaces + + if (isdigit(*input)) { + int value = 0; + while (isdigit(*input)) { + value = value * 10 + (*input - '0'); + input++; + } + return (Token){ .type = END, .value = value }; // Number token + } else if (*input == '+' || *input == '-' || *input == '*' || *input == '/') { + TokenType type = *input; + input++; + return (Token){ .type = type }; // Operator token + } + + return (Token){ .type = END }; // End of input +} + +int parse_factor(); // Forward declaration + +// Parsing a term (factor possibly with '*' or '/') +int parse_term() { + int result = parse_factor(); + Token token = get_next_token(); + + while (token.type == MUL || token.type == DIV) { + if (token.type == MUL) { + result *= parse_factor(); + } else if (token.type == DIV) { + result /= parse_factor(); + } + token = get_next_token(); + } + + return result; +} + +// HL: hl.func @parse_factor +// Parsing a factor (number) +int parse_factor() { + Token token = get_next_token(); + if (token.type == END) { + return token.value; + } + return 0; // Fallback +} + +// HL: hl.func @parse_expression +// Parsing an expression (term possibly with '+' or '-') +int parse_expression() { + int result = parse_term(); + Token token = get_next_token(); + + while (token.type == PLUS || token.type == MINUS) { + if (token.type == PLUS) { + result += parse_term(); + } else if (token.type == MINUS) { + result -= parse_term(); + } + token = get_next_token(); + } + + return result; +} + +int main() { + char buffer[100]; + + printf("Enter an arithmetic expression (e.g., 3 + 5 * 2): "); + fgets(buffer, sizeof(buffer), stdin); + + input = buffer; + int result = parse_expression(); + printf("Result: %d\n", result); + + return 0; +} diff --git a/test/parsers/img-a.c b/test/parsers/img-a.c new file mode 100644 index 0000000000..b6439b2887 --- /dev/null +++ b/test/parsers/img-a.c @@ -0,0 +1,79 @@ +// RUN: %vast-front -vast-emit-mlir=hl %s -o - | %file-check %s -check-prefix=HL + +#include +#include +#include +#include + +// Function to display the image as ASCII (for grayscale images) +// HL: hl.func @display_ascii_image +void display_ascii_image(uint8_t *pixel_data, uint16_t width, uint16_t height) { + printf("Displaying image as ASCII:\n"); + for (uint16_t y = 0; y < height; ++y) { + for (uint16_t x = 0; x < width; ++x) { + // Map pixel values (0-255) to ASCII characters + uint8_t pixel = pixel_data[y * width + x]; + char ascii_char = (pixel < 128) ? '#' : ' '; + printf("%c", ascii_char); + } + printf("\n"); + } +} + +// Function to parse a binary file containing a SIMPL image +// HL: hl.func @parse_simpl_image +void parse_simpl_image(const char *filename) { + FILE *file = fopen(filename, "rb"); + if (!file) { + perror("Error opening file"); + return; + } + + // Read and validate the magic number (4 bytes) + char magic_number[5] = {0}; + fread(magic_number, sizeof(char), 4, file); + if (strncmp(magic_number, "SIML", 4) != 0) { + printf("Invalid file format!\n"); + fclose(file); + return; + } + printf("Valid SIMPL image format detected.\n"); + + // Read the width (2 bytes) and height (2 bytes) + uint16_t width, height; + fread(&width, sizeof(uint16_t), 1, file); + fread(&height, sizeof(uint16_t), 1, file); + + // Read bits per pixel (1 byte) + uint8_t bpp; + fread(&bpp, sizeof(uint8_t), 1, file); + + // Only support 8-bit grayscale images in this example + if (bpp != 8) { + printf("Unsupported bits per pixel: %u\n", bpp); + fclose(file); + return; + } + + printf("Width: %u, Height: %u, Bits per Pixel: %u\n", width, height, bpp); + + // Read pixel data (width * height bytes) + size_t pixel_data_size = width * height; + uint8_t *pixel_data = (uint8_t *)malloc(pixel_data_size); + fread(pixel_data, sizeof(uint8_t), pixel_data_size, file); + + // Close the file + fclose(file); + + // Display the image as ASCII (Non-Parsing Part) + display_ascii_image(pixel_data, width, height); + + // Free memory + free(pixel_data); +} + +int main() { + const char *filename = "image.simpl"; + parse_simpl_image(filename); + return 0; +} diff --git a/test/parsers/json-a.c b/test/parsers/json-a.c new file mode 100644 index 0000000000..c2e509d353 --- /dev/null +++ b/test/parsers/json-a.c @@ -0,0 +1,151 @@ +// RUN: %vast-front -vast-emit-mlir=hl %s -o - | %file-check %s -check-prefix=HL + +#include +#include +#include +#include + +// Structure to store a key-value pair +typedef struct { + char *key; + char *value; +} KeyValuePair; + +// Structure to store an entire JSON-like object +typedef struct { + KeyValuePair *pairs; + size_t pair_count; +} JsonObject; + +// Parsing part: Function to trim whitespace from a string +char *trim_whitespace(char *str) { + char *end; + + // Trim leading space + while (isspace((unsigned char)*str)) str++; + + if (*str == 0) // All spaces + return str; + + // Trim trailing space + end = str + strlen(str) - 1; + while (end > str && isspace((unsigned char)*end)) end--; + + // Write new null terminator + *(end + 1) = '\0'; + + return str; +} + +// Parsing part: Parse a key-value pair from a line like '"key": "value"' +// HL: hl.func @parse_key_value_pair +KeyValuePair parse_key_value_pair(char *line) { + KeyValuePair kvp = {NULL, NULL}; + + // Find the colon separator + char *delimiter = strchr(line, ':'); + if (delimiter == NULL) { + return kvp; // Return empty pair if format is invalid + } + + // Split the line into key and value + *delimiter = '\0'; + char *key = trim_whitespace(line); + char *value = trim_whitespace(delimiter + 1); + + // Remove quotes from key and value + if (key[0] == '\"') key++; + if (key[strlen(key) - 1] == '\"') key[strlen(key) - 1] = '\0'; + if (value[0] == '\"') value++; + if (value[strlen(value) - 1] == '\"') value[strlen(value) - 1] = '\0'; + + // Store trimmed and processed key-value pair + kvp.key = strdup(key); + kvp.value = strdup(value); + + return kvp; +} + +// Parsing part: Parse a JSON-like object from multiple lines +// HL: hl.func @parse_json_object +JsonObject parse_json_object(FILE *file) { + JsonObject obj; + obj.pairs = NULL; + obj.pair_count = 0; + + char line[256]; + size_t capacity = 10; + obj.pairs = malloc(capacity * sizeof(KeyValuePair)); + + // Read the file line by line until the closing '}' + while (fgets(line, sizeof(line), file)) { + // Trim whitespace and skip empty lines or braces + char *trimmed = trim_whitespace(line); + if (strlen(trimmed) == 0 || trimmed[0] == '{' || trimmed[0] == '}') { + continue; + } + + // Parse a key-value pair from the line + KeyValuePair kvp = parse_key_value_pair(trimmed); + + // Resize array if necessary + if (obj.pair_count >= capacity) { + capacity *= 2; + obj.pairs = realloc(obj.pairs, capacity * sizeof(KeyValuePair)); + } + + // Add key-value pair to object + obj.pairs[obj.pair_count++] = kvp; + } + + return obj; +} + +// Non-parsing part: Function to handle a parsed JsonObject +// HL: hl.func @handle_json_object +void handle_json_object(JsonObject obj) { + printf("Parsed JSON Object:\n"); + for (size_t i = 0; i < obj.pair_count; ++i) { + printf(" %s: %s\n", obj.pairs[i].key, obj.pairs[i].value); + } + printf("\n"); +} + +// Non-parsing part: Free memory allocated for a JsonObject +// HL: hl.func @free_json_object +void free_json_object(JsonObject obj) { + for (size_t i = 0; i < obj.pair_count; ++i) { + free(obj.pairs[i].key); + free(obj.pairs[i].value); + } + free(obj.pairs); +} + +// Main function: Parse a JSON-like file +int main(int argc, char *argv[]) { + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + // Open the file + FILE *file = fopen(argv[1], "r"); + if (file == NULL) { + perror("Error opening file"); + return 1; + } + + // Read and parse each JSON-like object in the file + while (!feof(file)) { + JsonObject obj = parse_json_object(file); + + // Handle the parsed JSON-like object + if (obj.pair_count > 0) { + handle_json_object(obj); + free_json_object(obj); + } + } + + fclose(file); + return 0; +} diff --git a/test/parsers/key-value.c b/test/parsers/key-value.c new file mode 100644 index 0000000000..d3fb6b9af7 --- /dev/null +++ b/test/parsers/key-value.c @@ -0,0 +1,98 @@ +// RUN: %vast-front -vast-emit-mlir=hl %s -o - | %file-check %s -check-prefix=HL + +#include +#include +#include +#include + +// A simple structure to store key-value pairs +typedef struct { + char *key; + char *value; +} KeyValuePair; + +// Parsing part: A function to trim whitespace from a string +char *trim_whitespace(char *str) { + char *end; + + // Trim leading space + while (isspace((unsigned char)*str)) str++; + + if (*str == 0) // All spaces + return str; + + // Trim trailing space + end = str + strlen(str) - 1; + while (end > str && isspace((unsigned char)*end)) end--; + + // Write new null terminator + *(end + 1) = '\0'; + + return str; +} + +// Parsing part: A function to parse a key-value pair +// HL: hl.func @parse_key_value +KeyValuePair parse_key_value(char *line) { + KeyValuePair kvp; + char *delimiter = strchr(line, '='); + + if (delimiter != NULL) { + // Split the string at '=' + *delimiter = '\0'; + kvp.key = trim_whitespace(line); + kvp.value = trim_whitespace(delimiter + 1); + } else { + // Invalid format, set key and value to NULL + kvp.key = NULL; + kvp.value = NULL; + } + + return kvp; +} + +// Non-parsing part: Handling the parsed data +// HL: hl.func @handle_key_value +void handle_key_value(KeyValuePair kvp) { + if (kvp.key && kvp.value) { + printf("Key: %s, Value: %s\n", kvp.key, kvp.value); + } else { + printf("Invalid key-value pair.\n"); + } +} + +// Parsing part: Read the file line by line +void parse_file(const char *filename) { + FILE *file = fopen(filename, "r"); + if (file == NULL) { + perror("Error opening file"); + return; + } + + char line[256]; + while (fgets(line, sizeof(line), file)) { + // Remove newline character from the line + line[strcspn(line, "\n")] = '\0'; + + // Parse the line to extract key-value + KeyValuePair kvp = parse_key_value(line); + + // Handle the parsed key-value pair + handle_key_value(kvp); + } + + fclose(file); +} + +// Example usage +int main(int argc, char *argv[]) { + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + // Parsing part: Parse the input file + parse_file(argv[1]); + + return 0; +} diff --git a/test/parsers/png-a.c b/test/parsers/png-a.c new file mode 100644 index 0000000000..fe271916f3 --- /dev/null +++ b/test/parsers/png-a.c @@ -0,0 +1,87 @@ +// RUN: %vast-front -vast-emit-mlir=hl %s -o - | %file-check %s -check-prefix=HL + +#include +#include +#include +#include + +// Function to read a 4-byte big-endian integer from a buffer +// HL: hl.func @read_uint32_be +uint32_t read_uint32_be(uint8_t *buffer) { + return (buffer[0] << 24) | (buffer[1] << 16) | (buffer[2] << 8) | buffer[3]; +} + +// Function to parse the IHDR chunk of a PNG file +// HL: hl.func @parse_png_ihdr_chunk +void parse_png_ihdr_chunk(FILE *file) { + uint8_t length_bytes[4]; + fread(length_bytes, sizeof(uint8_t), 4, file); + uint32_t length = read_uint32_be(length_bytes); + + char chunk_type[5] = {0}; + fread(chunk_type, sizeof(char), 4, file); + + if (strcmp(chunk_type, "IHDR") != 0) { + printf("First chunk is not IHDR. Invalid PNG file.\n"); + return; + } + + // Read IHDR data + uint8_t ihdr_data[13]; + fread(ihdr_data, sizeof(uint8_t), 13, file); + + // Read the CRC (4 bytes, we will skip verifying it for simplicity) + uint8_t crc[4]; + fread(crc, sizeof(uint8_t), 4, file); + + // Extract IHDR information + uint32_t width = read_uint32_be(&ihdr_data[0]); + uint32_t height = read_uint32_be(&ihdr_data[4]); + uint8_t bit_depth = ihdr_data[8]; + uint8_t color_type = ihdr_data[9]; + uint8_t compression_method = ihdr_data[10]; + uint8_t filter_method = ihdr_data[11]; + uint8_t interlace_method = ihdr_data[12]; + + printf("PNG IHDR Chunk:\n"); + printf(" - Width: %u pixels\n", width); + printf(" - Height: %u pixels\n", height); + printf(" - Bit Depth: %u\n", bit_depth); + printf(" - Color Type: %u\n", color_type); + printf(" - Compression Method: %u\n", compression_method); + printf(" - Filter Method: %u\n", filter_method); + printf(" - Interlace Method: %u\n", interlace_method); +} + +// Function to parse a basic PNG file +// HL: hl.func @parse_png +void parse_png(const char *filename) { + FILE *file = fopen(filename, "rb"); + if (!file) { + perror("Error opening file"); + return; + } + + // Read and validate the PNG signature (8 bytes) + uint8_t signature[8]; + fread(signature, sizeof(uint8_t), 8, file); + uint8_t png_signature[8] = {0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}; + if (memcmp(signature, png_signature, 8) != 0) { + printf("Invalid PNG signature.\n"); + fclose(file); + return; + } + printf("Valid PNG signature detected.\n"); + + // Parse chunks, focusing on IHDR for simplicity + parse_png_ihdr_chunk(file); + + // Close the file + fclose(file); +} + +int main() { + const char *filename = "image.png"; + parse_png(filename); + return 0; +} diff --git a/test/parsers/rle-a.c b/test/parsers/rle-a.c new file mode 100644 index 0000000000..0905dd9841 --- /dev/null +++ b/test/parsers/rle-a.c @@ -0,0 +1,75 @@ +// RUN: %vast-front -vast-emit-mlir=hl %s -o - | %file-check %s -check-prefix=HL + +#include +#include +#include +#include + +// Function to decompress RLE compressed data +// HL: hl.func @decompress_rle +void decompress_rle(uint8_t *compressed, size_t length, uint8_t **decompressed, size_t *decompressed_length) { + size_t idx = 0; + size_t out_idx = 0; + + // Estimate decompressed length + *decompressed_length = length * 2; // Upper bound + *decompressed = (uint8_t *)malloc(*decompressed_length); + + while (idx < length) { + uint8_t count = compressed[idx++]; + uint8_t value = compressed[idx++]; + + // Write 'count' instances of 'value' to the output + for (int i = 0; i < count; ++i) { + (*decompressed)[out_idx++] = value; + } + } + *decompressed_length = out_idx; // Update with the actual length +} + +// Function to parse a binary file containing compressed data +// HL: hl.func @parse_binary_file +void parse_binary_file(const char *filename) { + FILE *file = fopen(filename, "rb"); + if (!file) { + perror("Error opening file"); + return; + } + + // Read header (4 bytes) - contains length of compressed data + uint32_t compressed_length; + fread(&compressed_length, sizeof(uint32_t), 1, file); + + // Allocate memory for compressed data + uint8_t *compressed_data = (uint8_t *)malloc(compressed_length); + fread(compressed_data, sizeof(uint8_t), compressed_length, file); + + // Close the file after reading + fclose(file); + + printf("Read compressed data of length: %u bytes\n", compressed_length); + + // Decompression (Non-Parsing Part) + uint8_t *decompressed_data; + size_t decompressed_length; + decompress_rle(compressed_data, compressed_length, &decompressed_data, &decompressed_length); + + printf("Decompressed data length: %zu bytes\n", decompressed_length); + + // Print decompressed data + printf("Decompressed Data: "); + for (size_t i = 0; i < decompressed_length; ++i) { + printf("%c", decompressed_data[i]); + } + printf("\n"); + + // Free memory + free(compressed_data); + free(decompressed_data); +} + +int main() { + const char *filename = "compressed_data.bin"; + parse_binary_file(filename); + return 0; +}