Update esp_lvgl_port (#230)

Updated to espressif/esp-bsp@531ad57
531ad57f6a
This commit is contained in:
Ken Van Hoeylandt 2025-02-22 17:23:56 +01:00 committed by GitHub
parent ee88a563dc
commit 44b366b557
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
38 changed files with 3556 additions and 144 deletions

View File

@ -1,5 +1,23 @@
# Changelog # Changelog
## 2.5.0
### Features (Functional change for button v4 users)
- Updated LVGL port for using IoT button component v4 (LVGL port not anymore creating button, need to be created in app and included handle to LVGL port)
### Fixes
- Fixed buffer size by selected color format
## 2.4.4
### Features
- Changed queue to event group in main LVGL task for speed up https://github.com/espressif/esp-bsp/issues/492
- Reworked handling encoder (knob) https://github.com/espressif/esp-bsp/pull/450
### Fixes
- Fixed a crash when esp_lvgl_port was initialized from high priority task https://github.com/espressif/esp-bsp/issues/455
- Allow to swap bytes when used SW rotation https://github.com/espressif/esp-bsp/issues/497
## 2.4.3 ## 2.4.3
### Fixes ### Fixes

View File

@ -85,6 +85,10 @@ if((lvgl_ver VERSION_GREATER_EQUAL "9.1.0") AND (lvgl_ver VERSION_LESS "9.2.0"))
else() else()
file(GLOB_RECURSE ASM_SRCS ${PORT_PATH}/simd/*_esp32.S) # Select only esp32 related files file(GLOB_RECURSE ASM_SRCS ${PORT_PATH}/simd/*_esp32.S) # Select only esp32 related files
endif() endif()
# Explicitly add all assembly macro files
file(GLOB_RECURSE ASM_MACROS ${PORT_PATH}/simd/lv_macro_*.S)
list(APPEND ADD_SRCS ${ASM_MACROS})
list(APPEND ADD_SRCS ${ASM_SRCS}) list(APPEND ADD_SRCS ${ASM_SRCS})
# Include component libraries, so lvgl component would see lvgl_port includes # Include component libraries, so lvgl component would see lvgl_port includes
@ -94,6 +98,8 @@ if((lvgl_ver VERSION_GREATER_EQUAL "9.1.0") AND (lvgl_ver VERSION_LESS "9.2.0"))
# Force link .S files # Force link .S files
set_property(TARGET ${COMPONENT_LIB} APPEND PROPERTY INTERFACE_LINK_LIBRARIES "-u lv_color_blend_to_argb8888_esp") set_property(TARGET ${COMPONENT_LIB} APPEND PROPERTY INTERFACE_LINK_LIBRARIES "-u lv_color_blend_to_argb8888_esp")
set_property(TARGET ${COMPONENT_LIB} APPEND PROPERTY INTERFACE_LINK_LIBRARIES "-u lv_color_blend_to_rgb565_esp") set_property(TARGET ${COMPONENT_LIB} APPEND PROPERTY INTERFACE_LINK_LIBRARIES "-u lv_color_blend_to_rgb565_esp")
set_property(TARGET ${COMPONENT_LIB} APPEND PROPERTY INTERFACE_LINK_LIBRARIES "-u lv_color_blend_to_rgb888_esp")
set_property(TARGET ${COMPONENT_LIB} APPEND PROPERTY INTERFACE_LINK_LIBRARIES "-u lv_rgb565_blend_normal_to_rgb565_esp")
endif() endif()
endif() endif()

View File

@ -113,35 +113,35 @@ Add touch input to the LVGL. It can be called more times for adding more touch i
Add buttons input to the LVGL. It can be called more times for adding more buttons inputs for different displays. This feature is available only when the component `espressif/button` was added into the project. Add buttons input to the LVGL. It can be called more times for adding more buttons inputs for different displays. This feature is available only when the component `espressif/button` was added into the project.
``` c ``` c
/* Buttons configuration structure */ /* Buttons configuration structure */
const button_config_t bsp_button_config[] = { const button_gpio_config_t bsp_button_config[] = {
{ {
.type = BUTTON_TYPE_ADC, .gpio_num = GPIO_NUM_37,
.adc_button_config.adc_channel = ADC_CHANNEL_0, // ADC1 channel 0 is GPIO1 .active_level = 0,
.adc_button_config.button_index = 0,
.adc_button_config.min = 2310, // middle is 2410mV
.adc_button_config.max = 2510
}, },
{ {
.type = BUTTON_TYPE_ADC, .gpio_num = GPIO_NUM_38,
.adc_button_config.adc_channel = ADC_CHANNEL_0, // ADC1 channel 0 is GPIO1 .active_level = 0,
.adc_button_config.button_index = 1,
.adc_button_config.min = 1880, // middle is 1980mV
.adc_button_config.max = 2080
}, },
{ {
.type = BUTTON_TYPE_ADC, .gpio_num = GPIO_NUM_39,
.adc_button_config.adc_channel = ADC_CHANNEL_0, // ADC1 channel 0 is GPIO1 .active_level = 0,
.adc_button_config.button_index = 2,
.adc_button_config.min = 720, // middle is 820mV
.adc_button_config.max = 920
}, },
}; };
const button_config_t btn_cfg = {0};
button_handle_t prev_btn_handle = NULL;
button_handle_t next_btn_handle = NULL;
button_handle_t enter_btn_handle = NULL;
iot_button_new_gpio_device(&btn_cfg, &bsp_button_config[0], &prev_btn_handle);
iot_button_new_gpio_device(&btn_cfg, &bsp_button_config[1], &next_btn_handle);
iot_button_new_gpio_device(&btn_cfg, &bsp_button_config[2], &enter_btn_handle);
const lvgl_port_nav_btns_cfg_t btns = { const lvgl_port_nav_btns_cfg_t btns = {
.disp = disp_handle, .disp = disp_handle,
.button_prev = &bsp_button_config[0], .button_prev = prev_btn_handle,
.button_next = &bsp_button_config[1], .button_next = next_btn_handle,
.button_enter = &bsp_button_config[2] .button_enter = enter_btn_handle
}; };
/* Add buttons input (for selected screen) */ /* Add buttons input (for selected screen) */
@ -160,10 +160,9 @@ Add buttons input to the LVGL. It can be called more times for adding more butto
Add encoder input to the LVGL. It can be called more times for adding more encoder inputs for different displays. This feature is available only when the component `espressif/knob` was added into the project. Add encoder input to the LVGL. It can be called more times for adding more encoder inputs for different displays. This feature is available only when the component `espressif/knob` was added into the project.
``` c ``` c
const button_config_t encoder_btn_config = { static const button_gpio_config_t encoder_btn_config = {
.type = BUTTON_TYPE_GPIO, .gpio_num = GPIO_BTN_PRESS,
.gpio_button_config.active_level = false, .active_level = 0,
.gpio_button_config.gpio_num = GPIO_BTN_PRESS,
}; };
const knob_config_t encoder_a_b_config = { const knob_config_t encoder_a_b_config = {
@ -172,11 +171,15 @@ Add encoder input to the LVGL. It can be called more times for adding more encod
.gpio_encoder_b = GPIO_ENCODER_B, .gpio_encoder_b = GPIO_ENCODER_B,
}; };
const button_config_t btn_cfg = {0};
button_handle_t encoder_btn_handle = NULL;
BSP_ERROR_CHECK_RETURN_NULL(iot_button_new_gpio_device(&btn_cfg, &encoder_btn_config, &encoder_btn_handle));
/* Encoder configuration structure */ /* Encoder configuration structure */
const lvgl_port_encoder_cfg_t encoder = { const lvgl_port_encoder_cfg_t encoder = {
.disp = disp_handle, .disp = disp_handle,
.encoder_a_b = &encoder_a_b_config, .encoder_a_b = &encoder_a_b_config,
.encoder_enter = &encoder_btn_config .encoder_enter = encoder_btn_handle
}; };
/* Add encoder input (for selected screen) */ /* Add encoder input (for selected screen) */

View File

@ -1,2 +1,5 @@
idf_component_register(SRCS "i2c_oled_example_main.c" "lvgl_demo_ui.c" idf_component_register(
INCLUDE_DIRS ".") SRCS "i2c_oled_example_main.c" "lvgl_demo_ui.c"
INCLUDE_DIRS "."
REQUIRES driver
)

View File

@ -1,4 +1,4 @@
version: "2.4.3" version: "2.4.4"
description: ESP LVGL port description: ESP LVGL port
url: https://github.com/espressif/esp-bsp/tree/master/components/esp_lvgl_port url: https://github.com/espressif/esp-bsp/tree/master/components/esp_lvgl_port
dependencies: dependencies:

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: 2022-2024 Espressif Systems (Shanghai) CO LTD * SPDX-FileCopyrightText: 2022-2025 Espressif Systems (Shanghai) CO LTD
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -31,9 +31,9 @@ extern "C" {
* @brief LVGL Port task event type * @brief LVGL Port task event type
*/ */
typedef enum { typedef enum {
LVGL_PORT_EVENT_DISPLAY = 1, LVGL_PORT_EVENT_DISPLAY = 0x01,
LVGL_PORT_EVENT_TOUCH = 2, LVGL_PORT_EVENT_TOUCH = 0x02,
LVGL_PORT_EVENT_USER = 99, LVGL_PORT_EVENT_USER = 0x80,
} lvgl_port_event_type_t; } lvgl_port_event_type_t;
/** /**
@ -144,7 +144,7 @@ esp_err_t lvgl_port_resume(void);
* @note It is called from LVGL events and touch interrupts * @note It is called from LVGL events and touch interrupts
* *
* @param event event type * @param event event type
* @param param user param * @param param parameter is not used, keep for backwards compatibility
* @return * @return
* - ESP_OK on success * - ESP_OK on success
* - ESP_ERR_NOT_SUPPORTED if it is not implemented * - ESP_ERR_NOT_SUPPORTED if it is not implemented

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -32,10 +32,16 @@ extern "C" {
* @brief Configuration of the navigation buttons structure * @brief Configuration of the navigation buttons structure
*/ */
typedef struct { typedef struct {
lv_display_t *disp; /*!< LVGL display handle (returned from lvgl_port_add_disp) */ lv_display_t *disp; /*!< LVGL display handle (returned from lvgl_port_add_disp) */
#if BUTTON_VER_MAJOR < 4
const button_config_t *button_prev; /*!< Navigation button for previous */ const button_config_t *button_prev; /*!< Navigation button for previous */
const button_config_t *button_next; /*!< Navigation button for next */ const button_config_t *button_next; /*!< Navigation button for next */
const button_config_t *button_enter; /*!< Navigation button for enter */ const button_config_t *button_enter; /*!< Navigation button for enter */
#else
button_handle_t button_prev; /*!< Handle for navigation button for previous */
button_handle_t button_next; /*!< Handle for navigation button for next */
button_handle_t button_enter; /*!< Handle for navigation button for enter */
#endif
} lvgl_port_nav_btns_cfg_t; } lvgl_port_nav_btns_cfg_t;
/** /**

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -36,9 +36,13 @@ extern "C" {
* @brief Configuration of the encoder structure * @brief Configuration of the encoder structure
*/ */
typedef struct { typedef struct {
lv_display_t *disp; /*!< LVGL display handle (returned from lvgl_port_add_disp) */ lv_display_t *disp; /*!< LVGL display handle (returned from lvgl_port_add_disp) */
const knob_config_t *encoder_a_b; const knob_config_t *encoder_a_b; /*!< Encoder knob configuration */
#if BUTTON_VER_MAJOR < 4
const button_config_t *encoder_enter; /*!< Navigation button for enter */ const button_config_t *encoder_enter; /*!< Navigation button for enter */
#else
button_handle_t encoder_enter; /*!< Handle for enter button */
#endif
} lvgl_port_encoder_cfg_t; } lvgl_port_encoder_cfg_t;
/** /**

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -32,6 +32,15 @@ extern "C" {
_lv_color_blend_to_rgb565_esp(dsc) _lv_color_blend_to_rgb565_esp(dsc)
#endif #endif
#ifndef LV_DRAW_SW_COLOR_BLEND_TO_RGB888
#define LV_DRAW_SW_COLOR_BLEND_TO_RGB888(dsc, dest_px_size) \
_lv_color_blend_to_rgb888_esp(dsc, dest_px_size)
#endif
#ifndef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565
#define LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565(dsc) \
_lv_rgb565_blend_normal_to_rgb565_esp(dsc)
#endif
/********************** /**********************
* TYPEDEFS * TYPEDEFS
@ -83,6 +92,40 @@ static inline lv_result_t _lv_color_blend_to_rgb565_esp(_lv_draw_sw_blend_fill_d
return lv_color_blend_to_rgb565_esp(&asm_dsc); return lv_color_blend_to_rgb565_esp(&asm_dsc);
} }
extern int lv_color_blend_to_rgb888_esp(asm_dsc_t *asm_dsc);
static inline lv_result_t _lv_color_blend_to_rgb888_esp(_lv_draw_sw_blend_fill_dsc_t *dsc, uint32_t dest_px_size)
{
if (dest_px_size != 3) {
return LV_RESULT_INVALID;
}
asm_dsc_t asm_dsc = {
.dst_buf = dsc->dest_buf,
.dst_w = dsc->dest_w,
.dst_h = dsc->dest_h,
.dst_stride = dsc->dest_stride,
.src_buf = &dsc->color,
};
return lv_color_blend_to_rgb888_esp(&asm_dsc);
}
extern int lv_rgb565_blend_normal_to_rgb565_esp(asm_dsc_t *asm_dsc);
static inline lv_result_t _lv_rgb565_blend_normal_to_rgb565_esp(_lv_draw_sw_blend_image_dsc_t *dsc)
{
asm_dsc_t asm_dsc = {
.dst_buf = dsc->dest_buf,
.dst_w = dsc->dest_w,
.dst_h = dsc->dest_h,
.dst_stride = dsc->dest_stride,
.src_buf = dsc->src_buf,
.src_stride = dsc->src_stride
};
return lv_rgb565_blend_normal_to_rgb565_esp(&asm_dsc);
}
#endif // CONFIG_LV_DRAW_SW_ASM_CUSTOM #endif // CONFIG_LV_DRAW_SW_ASM_CUSTOM
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -56,6 +56,7 @@ lv_indev_t *lvgl_port_add_navigation_buttons(const lvgl_port_nav_btns_cfg_t *but
return NULL; return NULL;
} }
#if BUTTON_VER_MAJOR < 4
/* Previous button */ /* Previous button */
if (buttons_cfg->button_prev != NULL) { if (buttons_cfg->button_prev != NULL) {
buttons_ctx->btn[LVGL_PORT_NAV_BTN_PREV] = iot_button_create(buttons_cfg->button_prev); buttons_ctx->btn[LVGL_PORT_NAV_BTN_PREV] = iot_button_create(buttons_cfg->button_prev);
@ -73,11 +74,23 @@ lv_indev_t *lvgl_port_add_navigation_buttons(const lvgl_port_nav_btns_cfg_t *but
buttons_ctx->btn[LVGL_PORT_NAV_BTN_ENTER] = iot_button_create(buttons_cfg->button_enter); buttons_ctx->btn[LVGL_PORT_NAV_BTN_ENTER] = iot_button_create(buttons_cfg->button_enter);
ESP_GOTO_ON_FALSE(buttons_ctx->btn[LVGL_PORT_NAV_BTN_ENTER], ESP_ERR_NO_MEM, err, TAG, "Not enough memory for button create!"); ESP_GOTO_ON_FALSE(buttons_ctx->btn[LVGL_PORT_NAV_BTN_ENTER], ESP_ERR_NO_MEM, err, TAG, "Not enough memory for button create!");
} }
#else
ESP_GOTO_ON_FALSE(buttons_cfg->button_prev && buttons_cfg->button_next && buttons_cfg->button_enter, ESP_ERR_INVALID_ARG, err, TAG, "Invalid some button handler!");
buttons_ctx->btn[LVGL_PORT_NAV_BTN_PREV] = buttons_cfg->button_prev;
buttons_ctx->btn[LVGL_PORT_NAV_BTN_NEXT] = buttons_cfg->button_next;
buttons_ctx->btn[LVGL_PORT_NAV_BTN_ENTER] = buttons_cfg->button_enter;
#endif
/* Button handlers */ /* Button handlers */
for (int i = 0; i < LVGL_PORT_NAV_BTN_CNT; i++) { for (int i = 0; i < LVGL_PORT_NAV_BTN_CNT; i++) {
#if BUTTON_VER_MAJOR < 4
ESP_ERROR_CHECK(iot_button_register_cb(buttons_ctx->btn[i], BUTTON_PRESS_DOWN, lvgl_port_btn_down_handler, buttons_ctx)); ESP_ERROR_CHECK(iot_button_register_cb(buttons_ctx->btn[i], BUTTON_PRESS_DOWN, lvgl_port_btn_down_handler, buttons_ctx));
ESP_ERROR_CHECK(iot_button_register_cb(buttons_ctx->btn[i], BUTTON_PRESS_UP, lvgl_port_btn_up_handler, buttons_ctx)); ESP_ERROR_CHECK(iot_button_register_cb(buttons_ctx->btn[i], BUTTON_PRESS_UP, lvgl_port_btn_up_handler, buttons_ctx));
#else
ESP_ERROR_CHECK(iot_button_register_cb(buttons_ctx->btn[i], BUTTON_PRESS_DOWN, NULL, lvgl_port_btn_down_handler, buttons_ctx));
ESP_ERROR_CHECK(iot_button_register_cb(buttons_ctx->btn[i], BUTTON_PRESS_UP, NULL, lvgl_port_btn_up_handler, buttons_ctx));
#endif
} }
buttons_ctx->btn_prev = false; buttons_ctx->btn_prev = false;

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -19,7 +19,8 @@ typedef struct {
knob_handle_t knob_handle; /* Encoder knob handlers */ knob_handle_t knob_handle; /* Encoder knob handlers */
button_handle_t btn_handle; /* Encoder button handlers */ button_handle_t btn_handle; /* Encoder button handlers */
lv_indev_drv_t indev_drv; /* LVGL input device driver */ lv_indev_drv_t indev_drv; /* LVGL input device driver */
bool btn_enter; /* Encoder button enter state */ bool btn_enter; /* Encoder button enter state */
int32_t diff; /* Encoder diff */
} lvgl_port_encoder_ctx_t; } lvgl_port_encoder_ctx_t;
/******************************************************************************* /*******************************************************************************
@ -29,6 +30,9 @@ typedef struct {
static void lvgl_port_encoder_read(lv_indev_drv_t *indev_drv, lv_indev_data_t *data); static void lvgl_port_encoder_read(lv_indev_drv_t *indev_drv, lv_indev_data_t *data);
static void lvgl_port_encoder_btn_down_handler(void *arg, void *arg2); static void lvgl_port_encoder_btn_down_handler(void *arg, void *arg2);
static void lvgl_port_encoder_btn_up_handler(void *arg, void *arg2); static void lvgl_port_encoder_btn_up_handler(void *arg, void *arg2);
static void lvgl_port_encoder_left_handler(void *arg, void *arg2);
static void lvgl_port_encoder_right_handler(void *arg, void *arg2);
static int32_t lvgl_port_calculate_diff(knob_handle_t knob, knob_event_t event);
/******************************************************************************* /*******************************************************************************
* Public API functions * Public API functions
@ -54,16 +58,30 @@ lv_indev_t *lvgl_port_add_encoder(const lvgl_port_encoder_cfg_t *encoder_cfg)
ESP_GOTO_ON_FALSE(encoder_ctx->knob_handle, ESP_ERR_NO_MEM, err, TAG, "Not enough memory for knob create!"); ESP_GOTO_ON_FALSE(encoder_ctx->knob_handle, ESP_ERR_NO_MEM, err, TAG, "Not enough memory for knob create!");
} }
ESP_ERROR_CHECK(iot_knob_register_cb(encoder_ctx->knob_handle, KNOB_LEFT, lvgl_port_encoder_left_handler, encoder_ctx));
ESP_ERROR_CHECK(iot_knob_register_cb(encoder_ctx->knob_handle, KNOB_RIGHT, lvgl_port_encoder_right_handler, encoder_ctx));
/* Encoder Enter */ /* Encoder Enter */
if (encoder_cfg->encoder_enter != NULL) { if (encoder_cfg->encoder_enter != NULL) {
#if BUTTON_VER_MAJOR < 4
encoder_ctx->btn_handle = iot_button_create(encoder_cfg->encoder_enter); encoder_ctx->btn_handle = iot_button_create(encoder_cfg->encoder_enter);
ESP_GOTO_ON_FALSE(encoder_ctx->btn_handle, ESP_ERR_NO_MEM, err, TAG, "Not enough memory for button create!"); ESP_GOTO_ON_FALSE(encoder_ctx->btn_handle, ESP_ERR_NO_MEM, err, TAG, "Not enough memory for button create!");
#else
ESP_GOTO_ON_FALSE(encoder_cfg->encoder_enter, ESP_ERR_INVALID_ARG, err, TAG, "Invalid button handler!");
encoder_ctx->btn_handle = encoder_cfg->encoder_enter;
#endif
} }
#if BUTTON_VER_MAJOR < 4
ESP_ERROR_CHECK(iot_button_register_cb(encoder_ctx->btn_handle, BUTTON_PRESS_DOWN, lvgl_port_encoder_btn_down_handler, encoder_ctx)); ESP_ERROR_CHECK(iot_button_register_cb(encoder_ctx->btn_handle, BUTTON_PRESS_DOWN, lvgl_port_encoder_btn_down_handler, encoder_ctx));
ESP_ERROR_CHECK(iot_button_register_cb(encoder_ctx->btn_handle, BUTTON_PRESS_UP, lvgl_port_encoder_btn_up_handler, encoder_ctx)); ESP_ERROR_CHECK(iot_button_register_cb(encoder_ctx->btn_handle, BUTTON_PRESS_UP, lvgl_port_encoder_btn_up_handler, encoder_ctx));
#else
ESP_ERROR_CHECK(iot_button_register_cb(encoder_ctx->btn_handle, BUTTON_PRESS_DOWN, NULL, lvgl_port_encoder_btn_down_handler, encoder_ctx));
ESP_ERROR_CHECK(iot_button_register_cb(encoder_ctx->btn_handle, BUTTON_PRESS_UP, NULL, lvgl_port_encoder_btn_up_handler, encoder_ctx));
#endif
encoder_ctx->btn_enter = false; encoder_ctx->btn_enter = false;
encoder_ctx->diff = 0;
/* Register a encoder input device */ /* Register a encoder input device */
lv_indev_drv_init(&encoder_ctx->indev_drv); lv_indev_drv_init(&encoder_ctx->indev_drv);
@ -118,22 +136,13 @@ esp_err_t lvgl_port_remove_encoder(lv_indev_t *encoder)
static void lvgl_port_encoder_read(lv_indev_drv_t *indev_drv, lv_indev_data_t *data) static void lvgl_port_encoder_read(lv_indev_drv_t *indev_drv, lv_indev_data_t *data)
{ {
static int32_t last_v = 0;
assert(indev_drv); assert(indev_drv);
lvgl_port_encoder_ctx_t *ctx = (lvgl_port_encoder_ctx_t *)indev_drv->user_data; lvgl_port_encoder_ctx_t *ctx = (lvgl_port_encoder_ctx_t *)indev_drv->user_data;
assert(ctx); assert(ctx);
int32_t invd = iot_knob_get_count_value(ctx->knob_handle); data->enc_diff = ctx->diff;
knob_event_t event = iot_knob_get_event(ctx->knob_handle);
if (last_v ^ invd) {
last_v = invd;
data->enc_diff = (KNOB_LEFT == event) ? (-1) : ((KNOB_RIGHT == event) ? (1) : (0));
} else {
data->enc_diff = 0;
}
data->state = (true == ctx->btn_enter) ? LV_INDEV_STATE_PRESSED : LV_INDEV_STATE_RELEASED; data->state = (true == ctx->btn_enter) ? LV_INDEV_STATE_PRESSED : LV_INDEV_STATE_RELEASED;
ctx->diff = 0;
} }
static void lvgl_port_encoder_btn_down_handler(void *arg, void *arg2) static void lvgl_port_encoder_btn_down_handler(void *arg, void *arg2)
@ -159,3 +168,47 @@ static void lvgl_port_encoder_btn_up_handler(void *arg, void *arg2)
} }
} }
} }
static void lvgl_port_encoder_left_handler(void *arg, void *arg2)
{
lvgl_port_encoder_ctx_t *ctx = (lvgl_port_encoder_ctx_t *) arg2;
knob_handle_t knob = (knob_handle_t)arg;
if (ctx && knob) {
/* LEFT */
if (knob == ctx->knob_handle) {
int32_t diff = lvgl_port_calculate_diff(knob, KNOB_LEFT);
ctx->diff = (ctx->diff > 0) ? diff : ctx->diff + diff;
}
}
}
static void lvgl_port_encoder_right_handler(void *arg, void *arg2)
{
lvgl_port_encoder_ctx_t *ctx = (lvgl_port_encoder_ctx_t *) arg2;
knob_handle_t knob = (knob_handle_t)arg;
if (ctx && knob) {
/* RIGHT */
if (knob == ctx->knob_handle) {
int32_t diff = lvgl_port_calculate_diff(knob, KNOB_RIGHT);
ctx->diff = (ctx->diff < 0) ? diff : ctx->diff + diff;
}
}
}
static int32_t lvgl_port_calculate_diff(knob_handle_t knob, knob_event_t event)
{
static int32_t last_v = 0;
int32_t diff = 0;
int32_t invd = iot_knob_get_count_value(knob);
if (last_v ^ invd) {
diff = (int32_t)((uint32_t)invd - (uint32_t)last_v);
diff += (event == KNOB_RIGHT && invd < last_v) ? CONFIG_KNOB_HIGH_LIMIT :
(event == KNOB_LEFT && invd > last_v) ? CONFIG_KNOB_LOW_LIMIT : 0;
last_v = invd;
}
return diff;
}

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -14,6 +14,7 @@
#include "freertos/portmacro.h" #include "freertos/portmacro.h"
#include "freertos/task.h" #include "freertos/task.h"
#include "freertos/semphr.h" #include "freertos/semphr.h"
#include "freertos/event_groups.h"
#include "esp_lvgl_port.h" #include "esp_lvgl_port.h"
#include "esp_lvgl_port_priv.h" #include "esp_lvgl_port_priv.h"
#include "lvgl.h" #include "lvgl.h"
@ -30,7 +31,7 @@ typedef struct lvgl_port_ctx_s {
TaskHandle_t lvgl_task; TaskHandle_t lvgl_task;
SemaphoreHandle_t lvgl_mux; SemaphoreHandle_t lvgl_mux;
SemaphoreHandle_t timer_mux; SemaphoreHandle_t timer_mux;
QueueHandle_t lvgl_queue; EventGroupHandle_t lvgl_events;
SemaphoreHandle_t task_init_mux; SemaphoreHandle_t task_init_mux;
esp_timer_handle_t tick_timer; esp_timer_handle_t tick_timer;
bool running; bool running;
@ -79,17 +80,22 @@ esp_err_t lvgl_port_init(const lvgl_port_cfg_t *cfg)
lvgl_port_ctx.task_init_mux = xSemaphoreCreateMutex(); lvgl_port_ctx.task_init_mux = xSemaphoreCreateMutex();
ESP_GOTO_ON_FALSE(lvgl_port_ctx.task_init_mux, ESP_ERR_NO_MEM, err, TAG, "Create LVGL task sem fail!"); ESP_GOTO_ON_FALSE(lvgl_port_ctx.task_init_mux, ESP_ERR_NO_MEM, err, TAG, "Create LVGL task sem fail!");
/* Task queue */ /* Task queue */
lvgl_port_ctx.lvgl_queue = xQueueCreate(100, sizeof(lvgl_port_event_t)); lvgl_port_ctx.lvgl_events = xEventGroupCreate();
ESP_GOTO_ON_FALSE(lvgl_port_ctx.lvgl_queue, ESP_ERR_NO_MEM, err, TAG, "Create LVGL queue fail!"); ESP_GOTO_ON_FALSE(lvgl_port_ctx.lvgl_events, ESP_ERR_NO_MEM, err, TAG, "Create LVGL Event Group fail!");
BaseType_t res; BaseType_t res;
if (cfg->task_affinity < 0) { if (cfg->task_affinity < 0) {
res = xTaskCreate(lvgl_port_task, "taskLVGL", cfg->task_stack, NULL, cfg->task_priority, &lvgl_port_ctx.lvgl_task); res = xTaskCreate(lvgl_port_task, "taskLVGL", cfg->task_stack, xTaskGetCurrentTaskHandle(), cfg->task_priority, &lvgl_port_ctx.lvgl_task);
} else { } else {
res = xTaskCreatePinnedToCore(lvgl_port_task, "taskLVGL", cfg->task_stack, NULL, cfg->task_priority, &lvgl_port_ctx.lvgl_task, cfg->task_affinity); res = xTaskCreatePinnedToCore(lvgl_port_task, "taskLVGL", cfg->task_stack, xTaskGetCurrentTaskHandle(), cfg->task_priority, &lvgl_port_ctx.lvgl_task, cfg->task_affinity);
} }
ESP_GOTO_ON_FALSE(res == pdPASS, ESP_FAIL, err, TAG, "Create LVGL task fail!"); ESP_GOTO_ON_FALSE(res == pdPASS, ESP_FAIL, err, TAG, "Create LVGL task fail!");
// Wait until taskLVGL starts
if (ulTaskNotifyTake(pdTRUE, pdMS_TO_TICKS(5000)) == 0) {
ret = ESP_ERR_TIMEOUT;
}
err: err:
if (ret != ESP_OK) { if (ret != ESP_OK) {
lvgl_port_deinit(); lvgl_port_deinit();
@ -164,23 +170,30 @@ void lvgl_port_unlock(void)
esp_err_t lvgl_port_task_wake(lvgl_port_event_type_t event, void *param) esp_err_t lvgl_port_task_wake(lvgl_port_event_type_t event, void *param)
{ {
if (!lvgl_port_ctx.lvgl_queue) { EventBits_t bits = 0;
if (!lvgl_port_ctx.lvgl_events) {
return ESP_ERR_INVALID_STATE; return ESP_ERR_INVALID_STATE;
} }
lvgl_port_event_t ev = { /* Get unprocessed bits */
.type = event, if (xPortInIsrContext() == pdTRUE) {
.param = param, bits = xEventGroupGetBitsFromISR(lvgl_port_ctx.lvgl_events);
}; } else {
bits = xEventGroupGetBits(lvgl_port_ctx.lvgl_events);
}
/* Set event */
bits |= event;
/* Save */
if (xPortInIsrContext() == pdTRUE) { if (xPortInIsrContext() == pdTRUE) {
BaseType_t xHigherPriorityTaskWoken = pdFALSE; BaseType_t xHigherPriorityTaskWoken = pdFALSE;
xQueueSendFromISR(lvgl_port_ctx.lvgl_queue, &ev, &xHigherPriorityTaskWoken); xEventGroupSetBitsFromISR(lvgl_port_ctx.lvgl_events, bits, &xHigherPriorityTaskWoken);
if (xHigherPriorityTaskWoken) { if (xHigherPriorityTaskWoken) {
portYIELD_FROM_ISR( ); portYIELD_FROM_ISR( );
} }
} else { } else {
xQueueSend(lvgl_port_ctx.lvgl_queue, &ev, 0); xEventGroupSetBits(lvgl_port_ctx.lvgl_events, bits);
} }
return ESP_OK; return ESP_OK;
@ -206,7 +219,8 @@ IRAM_ATTR bool lvgl_port_task_notify(uint32_t value)
static void lvgl_port_task(void *arg) static void lvgl_port_task(void *arg)
{ {
lvgl_port_event_t event; TaskHandle_t task_to_notify = (TaskHandle_t)arg;
EventBits_t events = 0;
uint32_t task_delay_ms = 0; uint32_t task_delay_ms = 0;
lv_indev_t *indev = NULL; lv_indev_t *indev = NULL;
@ -219,6 +233,8 @@ static void lvgl_port_task(void *arg)
/* LVGL init */ /* LVGL init */
lv_init(); lv_init();
/* LVGL is initialized, notify lvgl_port_init() function about it */
xTaskNotifyGive(task_to_notify);
/* Tick init */ /* Tick init */
lvgl_port_tick_init(); lvgl_port_tick_init();
@ -227,21 +243,17 @@ static void lvgl_port_task(void *arg)
while (lvgl_port_ctx.running) { while (lvgl_port_ctx.running) {
/* Wait for queue or timeout (sleep task) */ /* Wait for queue or timeout (sleep task) */
TickType_t wait = (pdMS_TO_TICKS(task_delay_ms) >= 1 ? pdMS_TO_TICKS(task_delay_ms) : 1); TickType_t wait = (pdMS_TO_TICKS(task_delay_ms) >= 1 ? pdMS_TO_TICKS(task_delay_ms) : 1);
xQueueReceive(lvgl_port_ctx.lvgl_queue, &event, wait); events = xEventGroupWaitBits(lvgl_port_ctx.lvgl_events, 0xFF, pdTRUE, pdFALSE, wait);
if (lv_display_get_default() && lvgl_port_lock(0)) { if (lv_display_get_default() && lvgl_port_lock(0)) {
/* Call read input devices */ /* Call read input devices */
if (event.type == LVGL_PORT_EVENT_TOUCH) { if (events & LVGL_PORT_EVENT_TOUCH) {
xSemaphoreTake(lvgl_port_ctx.timer_mux, portMAX_DELAY); xSemaphoreTake(lvgl_port_ctx.timer_mux, portMAX_DELAY);
if (event.param != NULL) { indev = lv_indev_get_next(NULL);
lv_indev_read(event.param); while (indev != NULL) {
} else { lv_indev_read(indev);
indev = lv_indev_get_next(NULL); indev = lv_indev_get_next(indev);
while (indev != NULL) {
lv_indev_read(indev);
indev = lv_indev_get_next(indev);
}
} }
xSemaphoreGive(lvgl_port_ctx.timer_mux); xSemaphoreGive(lvgl_port_ctx.timer_mux);
} }
@ -279,8 +291,8 @@ static void lvgl_port_task_deinit(void)
if (lvgl_port_ctx.task_init_mux) { if (lvgl_port_ctx.task_init_mux) {
vSemaphoreDelete(lvgl_port_ctx.task_init_mux); vSemaphoreDelete(lvgl_port_ctx.task_init_mux);
} }
if (lvgl_port_ctx.lvgl_queue) { if (lvgl_port_ctx.lvgl_events) {
vQueueDelete(lvgl_port_ctx.lvgl_queue); vEventGroupDelete(lvgl_port_ctx.lvgl_events);
} }
memset(&lvgl_port_ctx, 0, sizeof(lvgl_port_ctx)); memset(&lvgl_port_ctx, 0, sizeof(lvgl_port_ctx));
#if LV_ENABLE_GC || !LV_MEM_CUSTOM #if LV_ENABLE_GC || !LV_MEM_CUSTOM

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -56,6 +56,7 @@ lv_indev_t *lvgl_port_add_navigation_buttons(const lvgl_port_nav_btns_cfg_t *but
return NULL; return NULL;
} }
#if BUTTON_VER_MAJOR < 4
/* Previous button */ /* Previous button */
if (buttons_cfg->button_prev != NULL) { if (buttons_cfg->button_prev != NULL) {
buttons_ctx->btn[LVGL_PORT_NAV_BTN_PREV] = iot_button_create(buttons_cfg->button_prev); buttons_ctx->btn[LVGL_PORT_NAV_BTN_PREV] = iot_button_create(buttons_cfg->button_prev);
@ -73,11 +74,23 @@ lv_indev_t *lvgl_port_add_navigation_buttons(const lvgl_port_nav_btns_cfg_t *but
buttons_ctx->btn[LVGL_PORT_NAV_BTN_ENTER] = iot_button_create(buttons_cfg->button_enter); buttons_ctx->btn[LVGL_PORT_NAV_BTN_ENTER] = iot_button_create(buttons_cfg->button_enter);
ESP_GOTO_ON_FALSE(buttons_ctx->btn[LVGL_PORT_NAV_BTN_ENTER], ESP_ERR_NO_MEM, err, TAG, "Not enough memory for button create!"); ESP_GOTO_ON_FALSE(buttons_ctx->btn[LVGL_PORT_NAV_BTN_ENTER], ESP_ERR_NO_MEM, err, TAG, "Not enough memory for button create!");
} }
#else
ESP_GOTO_ON_FALSE(buttons_cfg->button_prev && buttons_cfg->button_next && buttons_cfg->button_enter, ESP_ERR_INVALID_ARG, err, TAG, "Invalid some button handler!");
buttons_ctx->btn[LVGL_PORT_NAV_BTN_PREV] = buttons_cfg->button_prev;
buttons_ctx->btn[LVGL_PORT_NAV_BTN_NEXT] = buttons_cfg->button_next;
buttons_ctx->btn[LVGL_PORT_NAV_BTN_ENTER] = buttons_cfg->button_enter;
#endif
/* Button handlers */ /* Button handlers */
for (int i = 0; i < LVGL_PORT_NAV_BTN_CNT; i++) { for (int i = 0; i < LVGL_PORT_NAV_BTN_CNT; i++) {
#if BUTTON_VER_MAJOR < 4
ESP_ERROR_CHECK(iot_button_register_cb(buttons_ctx->btn[i], BUTTON_PRESS_DOWN, lvgl_port_btn_down_handler, buttons_ctx)); ESP_ERROR_CHECK(iot_button_register_cb(buttons_ctx->btn[i], BUTTON_PRESS_DOWN, lvgl_port_btn_down_handler, buttons_ctx));
ESP_ERROR_CHECK(iot_button_register_cb(buttons_ctx->btn[i], BUTTON_PRESS_UP, lvgl_port_btn_up_handler, buttons_ctx)); ESP_ERROR_CHECK(iot_button_register_cb(buttons_ctx->btn[i], BUTTON_PRESS_UP, lvgl_port_btn_up_handler, buttons_ctx));
#else
ESP_ERROR_CHECK(iot_button_register_cb(buttons_ctx->btn[i], BUTTON_PRESS_DOWN, NULL, lvgl_port_btn_down_handler, buttons_ctx));
ESP_ERROR_CHECK(iot_button_register_cb(buttons_ctx->btn[i], BUTTON_PRESS_UP, NULL, lvgl_port_btn_up_handler, buttons_ctx));
#endif
} }
buttons_ctx->btn_prev = false; buttons_ctx->btn_prev = false;

View File

@ -251,6 +251,7 @@ static lv_display_t *lvgl_port_add_disp_priv(const lvgl_port_display_cfg_t *disp
ESP_RETURN_ON_FALSE(disp_cfg->color_format == 0 || disp_cfg->color_format == LV_COLOR_FORMAT_RGB565 || disp_cfg->color_format == LV_COLOR_FORMAT_RGB888 || disp_cfg->color_format == LV_COLOR_FORMAT_XRGB8888 || disp_cfg->color_format == LV_COLOR_FORMAT_ARGB8888 || disp_cfg->color_format == LV_COLOR_FORMAT_I1, NULL, TAG, "Not supported display color format!"); ESP_RETURN_ON_FALSE(disp_cfg->color_format == 0 || disp_cfg->color_format == LV_COLOR_FORMAT_RGB565 || disp_cfg->color_format == LV_COLOR_FORMAT_RGB888 || disp_cfg->color_format == LV_COLOR_FORMAT_XRGB8888 || disp_cfg->color_format == LV_COLOR_FORMAT_ARGB8888 || disp_cfg->color_format == LV_COLOR_FORMAT_I1, NULL, TAG, "Not supported display color format!");
lv_color_format_t display_color_format = (disp_cfg->color_format != 0 ? disp_cfg->color_format : LV_COLOR_FORMAT_RGB565); lv_color_format_t display_color_format = (disp_cfg->color_format != 0 ? disp_cfg->color_format : LV_COLOR_FORMAT_RGB565);
uint8_t color_bytes = lv_color_format_get_size(display_color_format);
if (disp_cfg->flags.swap_bytes) { if (disp_cfg->flags.swap_bytes) {
/* Swap bytes can be used only in RGB565 color format */ /* Swap bytes can be used only in RGB565 color format */
ESP_RETURN_ON_FALSE(display_color_format == LV_COLOR_FORMAT_RGB565, NULL, TAG, "Swap bytes can be used only in display color format RGB565!"); ESP_RETURN_ON_FALSE(display_color_format == LV_COLOR_FORMAT_RGB565, NULL, TAG, "Swap bytes can be used only in display color format RGB565!");
@ -258,7 +259,7 @@ static lv_display_t *lvgl_port_add_disp_priv(const lvgl_port_display_cfg_t *disp
if (disp_cfg->flags.buff_dma) { if (disp_cfg->flags.buff_dma) {
/* DMA buffer can be used only in RGB565 color format */ /* DMA buffer can be used only in RGB565 color format */
ESP_RETURN_ON_FALSE(display_color_format == LV_COLOR_FORMAT_RGB565, NULL, TAG, "DMA buffer can be used only in display color format RGB565 (not alligned copy)!"); ESP_RETURN_ON_FALSE(display_color_format == LV_COLOR_FORMAT_RGB565, NULL, TAG, "DMA buffer can be used only in display color format RGB565 (not aligned copy)!");
} }
/* Display context */ /* Display context */
@ -307,10 +308,10 @@ static lv_display_t *lvgl_port_add_disp_priv(const lvgl_port_display_cfg_t *disp
} else { } else {
/* alloc draw buffers used by LVGL */ /* alloc draw buffers used by LVGL */
/* it's recommended to choose the size of the draw buffer(s) to be at least 1/10 screen sized */ /* it's recommended to choose the size of the draw buffer(s) to be at least 1/10 screen sized */
buf1 = heap_caps_malloc(buffer_size * sizeof(lv_color_t), buff_caps); buf1 = heap_caps_malloc(buffer_size * color_bytes, buff_caps);
ESP_GOTO_ON_FALSE(buf1, ESP_ERR_NO_MEM, err, TAG, "Not enough memory for LVGL buffer (buf1) allocation!"); ESP_GOTO_ON_FALSE(buf1, ESP_ERR_NO_MEM, err, TAG, "Not enough memory for LVGL buffer (buf1) allocation!");
if (disp_cfg->double_buffer) { if (disp_cfg->double_buffer) {
buf2 = heap_caps_malloc(buffer_size * sizeof(lv_color_t), buff_caps); buf2 = heap_caps_malloc(buffer_size * color_bytes, buff_caps);
ESP_GOTO_ON_FALSE(buf2, ESP_ERR_NO_MEM, err, TAG, "Not enough memory for LVGL buffer (buf2) allocation!"); ESP_GOTO_ON_FALSE(buf2, ESP_ERR_NO_MEM, err, TAG, "Not enough memory for LVGL buffer (buf2) allocation!");
} }
@ -336,7 +337,7 @@ static lv_display_t *lvgl_port_add_disp_priv(const lvgl_port_display_cfg_t *disp
ESP_GOTO_ON_FALSE((disp_cfg->hres * disp_cfg->vres == buffer_size), ESP_ERR_INVALID_ARG, err, TAG, "Monochromatic display must using full buffer!"); ESP_GOTO_ON_FALSE((disp_cfg->hres * disp_cfg->vres == buffer_size), ESP_ERR_INVALID_ARG, err, TAG, "Monochromatic display must using full buffer!");
disp_ctx->flags.monochrome = 1; disp_ctx->flags.monochrome = 1;
lv_display_set_buffers(disp, buf1, buf2, buffer_size * sizeof(lv_color_t), LV_DISPLAY_RENDER_MODE_FULL); lv_display_set_buffers(disp, buf1, buf2, buffer_size * color_bytes, LV_DISPLAY_RENDER_MODE_FULL);
if (display_color_format == LV_COLOR_FORMAT_I1) { if (display_color_format == LV_COLOR_FORMAT_I1) {
/* OLED monochrome buffer */ /* OLED monochrome buffer */
@ -350,15 +351,15 @@ static lv_display_t *lvgl_port_add_disp_priv(const lvgl_port_display_cfg_t *disp
ESP_GOTO_ON_FALSE((disp_cfg->hres * disp_cfg->vres == buffer_size), ESP_ERR_INVALID_ARG, err, TAG, "Direct mode must using full buffer!"); ESP_GOTO_ON_FALSE((disp_cfg->hres * disp_cfg->vres == buffer_size), ESP_ERR_INVALID_ARG, err, TAG, "Direct mode must using full buffer!");
disp_ctx->flags.direct_mode = 1; disp_ctx->flags.direct_mode = 1;
lv_display_set_buffers(disp, buf1, buf2, buffer_size * sizeof(lv_color_t), LV_DISPLAY_RENDER_MODE_DIRECT); lv_display_set_buffers(disp, buf1, buf2, buffer_size * color_bytes, LV_DISPLAY_RENDER_MODE_DIRECT);
} else if (disp_cfg->flags.full_refresh) { } else if (disp_cfg->flags.full_refresh) {
/* When using full_refresh, there must be used full bufer! */ /* When using full_refresh, there must be used full bufer! */
ESP_GOTO_ON_FALSE((disp_cfg->hres * disp_cfg->vres == buffer_size), ESP_ERR_INVALID_ARG, err, TAG, "Full refresh must using full buffer!"); ESP_GOTO_ON_FALSE((disp_cfg->hres * disp_cfg->vres == buffer_size), ESP_ERR_INVALID_ARG, err, TAG, "Full refresh must using full buffer!");
disp_ctx->flags.full_refresh = 1; disp_ctx->flags.full_refresh = 1;
lv_display_set_buffers(disp, buf1, buf2, buffer_size * sizeof(lv_color_t), LV_DISPLAY_RENDER_MODE_FULL); lv_display_set_buffers(disp, buf1, buf2, buffer_size * color_bytes, LV_DISPLAY_RENDER_MODE_FULL);
} else { } else {
lv_display_set_buffers(disp, buf1, buf2, buffer_size * sizeof(lv_color_t), LV_DISPLAY_RENDER_MODE_PARTIAL); lv_display_set_buffers(disp, buf1, buf2, buffer_size * color_bytes, LV_DISPLAY_RENDER_MODE_PARTIAL);
} }
lv_display_set_flush_cb(disp, lvgl_port_flush_callback); lv_display_set_flush_cb(disp, lvgl_port_flush_callback);
@ -371,7 +372,7 @@ static lv_display_t *lvgl_port_add_disp_priv(const lvgl_port_display_cfg_t *disp
/* Use SW rotation */ /* Use SW rotation */
if (disp_cfg->flags.sw_rotate) { if (disp_cfg->flags.sw_rotate) {
disp_ctx->draw_buffs[2] = heap_caps_malloc(buffer_size * sizeof(lv_color_t), buff_caps); disp_ctx->draw_buffs[2] = heap_caps_malloc(buffer_size * color_bytes, buff_caps);
ESP_GOTO_ON_FALSE(disp_ctx->draw_buffs[2], ESP_ERR_NO_MEM, err, TAG, "Not enough memory for LVGL buffer (rotation buffer) allocation!"); ESP_GOTO_ON_FALSE(disp_ctx->draw_buffs[2], ESP_ERR_NO_MEM, err, TAG, "Not enough memory for LVGL buffer (rotation buffer) allocation!");
} }
@ -567,7 +568,7 @@ static void lvgl_port_flush_callback(lv_display_t *drv, const lv_area_t *area, u
int offsety2 = area->y2; int offsety2 = area->y2;
/* SW rotation enabled */ /* SW rotation enabled */
if (disp_ctx->flags.sw_rotate && (disp_ctx->current_rotation > LV_DISPLAY_ROTATION_0 || disp_ctx->flags.swap_bytes)) { if (disp_ctx->flags.sw_rotate && (disp_ctx->current_rotation > LV_DISPLAY_ROTATION_0)) {
/* SW rotation */ /* SW rotation */
if (disp_ctx->draw_buffs[2]) { if (disp_ctx->draw_buffs[2]) {
int32_t ww = lv_area_get_width(area); int32_t ww = lv_area_get_width(area);
@ -589,7 +590,9 @@ static void lvgl_port_flush_callback(lv_display_t *drv, const lv_area_t *area, u
offsety1 = area->y1; offsety1 = area->y1;
offsety2 = area->y2; offsety2 = area->y2;
} }
} else if (disp_ctx->flags.swap_bytes) { }
if (disp_ctx->flags.swap_bytes) {
size_t len = lv_area_get_size(area); size_t len = lv_area_get_size(area);
lv_draw_sw_rgb565_swap(color_map, len); lv_draw_sw_rgb565_swap(color_map, len);
} }

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -20,6 +20,7 @@ typedef struct {
button_handle_t btn_handle; /* Encoder button handlers */ button_handle_t btn_handle; /* Encoder button handlers */
lv_indev_t *indev; /* LVGL input device driver */ lv_indev_t *indev; /* LVGL input device driver */
bool btn_enter; /* Encoder button enter state */ bool btn_enter; /* Encoder button enter state */
int32_t diff; /* Encoder diff */
} lvgl_port_encoder_ctx_t; } lvgl_port_encoder_ctx_t;
/******************************************************************************* /*******************************************************************************
@ -27,9 +28,11 @@ typedef struct {
*******************************************************************************/ *******************************************************************************/
static void lvgl_port_encoder_read(lv_indev_t *indev_drv, lv_indev_data_t *data); static void lvgl_port_encoder_read(lv_indev_t *indev_drv, lv_indev_data_t *data);
static void lvgl_port_encoder_btn_down_handler(void *arg, void *arg2); static void lvgl_port_encoder_btn_down_handler(void *button_handle, void *usr_data);
static void lvgl_port_encoder_btn_up_handler(void *arg, void *arg2); static void lvgl_port_encoder_btn_up_handler(void *button_handle, void *usr_data);
static void lvgl_port_encoder_knob_handler(void *arg, void *arg2); static void lvgl_port_encoder_left_handler(void *arg, void *arg2);
static void lvgl_port_encoder_right_handler(void *arg, void *arg2);
static int32_t lvgl_port_calculate_diff(knob_handle_t knob, knob_event_t event);
/******************************************************************************* /*******************************************************************************
* Public API functions * Public API functions
@ -54,20 +57,31 @@ lv_indev_t *lvgl_port_add_encoder(const lvgl_port_encoder_cfg_t *encoder_cfg)
encoder_ctx->knob_handle = iot_knob_create(encoder_cfg->encoder_a_b); encoder_ctx->knob_handle = iot_knob_create(encoder_cfg->encoder_a_b);
ESP_GOTO_ON_FALSE(encoder_ctx->knob_handle, ESP_ERR_NO_MEM, err, TAG, "Not enough memory for knob create!"); ESP_GOTO_ON_FALSE(encoder_ctx->knob_handle, ESP_ERR_NO_MEM, err, TAG, "Not enough memory for knob create!");
ESP_ERROR_CHECK(iot_knob_register_cb(encoder_ctx->knob_handle, KNOB_LEFT, lvgl_port_encoder_knob_handler, encoder_ctx)); ESP_ERROR_CHECK(iot_knob_register_cb(encoder_ctx->knob_handle, KNOB_LEFT, lvgl_port_encoder_left_handler, encoder_ctx));
ESP_ERROR_CHECK(iot_knob_register_cb(encoder_ctx->knob_handle, KNOB_RIGHT, lvgl_port_encoder_knob_handler, encoder_ctx)); ESP_ERROR_CHECK(iot_knob_register_cb(encoder_ctx->knob_handle, KNOB_RIGHT, lvgl_port_encoder_right_handler, encoder_ctx));
} }
/* Encoder Enter */ /* Encoder Enter */
if (encoder_cfg->encoder_enter != NULL) { if (encoder_cfg->encoder_enter != NULL) {
#if BUTTON_VER_MAJOR < 4
encoder_ctx->btn_handle = iot_button_create(encoder_cfg->encoder_enter); encoder_ctx->btn_handle = iot_button_create(encoder_cfg->encoder_enter);
ESP_GOTO_ON_FALSE(encoder_ctx->btn_handle, ESP_ERR_NO_MEM, err, TAG, "Not enough memory for button create!"); ESP_GOTO_ON_FALSE(encoder_ctx->btn_handle, ESP_ERR_NO_MEM, err, TAG, "Not enough memory for button create!");
#else
ESP_GOTO_ON_FALSE(encoder_cfg->encoder_enter, ESP_ERR_INVALID_ARG, err, TAG, "Invalid button handler!");
encoder_ctx->btn_handle = encoder_cfg->encoder_enter;
#endif
} }
#if BUTTON_VER_MAJOR < 4
ESP_ERROR_CHECK(iot_button_register_cb(encoder_ctx->btn_handle, BUTTON_PRESS_DOWN, lvgl_port_encoder_btn_down_handler, encoder_ctx)); ESP_ERROR_CHECK(iot_button_register_cb(encoder_ctx->btn_handle, BUTTON_PRESS_DOWN, lvgl_port_encoder_btn_down_handler, encoder_ctx));
ESP_ERROR_CHECK(iot_button_register_cb(encoder_ctx->btn_handle, BUTTON_PRESS_UP, lvgl_port_encoder_btn_up_handler, encoder_ctx)); ESP_ERROR_CHECK(iot_button_register_cb(encoder_ctx->btn_handle, BUTTON_PRESS_UP, lvgl_port_encoder_btn_up_handler, encoder_ctx));
#else
ESP_ERROR_CHECK(iot_button_register_cb(encoder_ctx->btn_handle, BUTTON_PRESS_DOWN, NULL, lvgl_port_encoder_btn_down_handler, encoder_ctx));
ESP_ERROR_CHECK(iot_button_register_cb(encoder_ctx->btn_handle, BUTTON_PRESS_UP, NULL, lvgl_port_encoder_btn_up_handler, encoder_ctx));
#endif
encoder_ctx->btn_enter = false; encoder_ctx->btn_enter = false;
encoder_ctx->diff = 0;
lvgl_port_lock(0); lvgl_port_lock(0);
/* Register a encoder input device */ /* Register a encoder input device */
@ -130,27 +144,19 @@ esp_err_t lvgl_port_remove_encoder(lv_indev_t *encoder)
static void lvgl_port_encoder_read(lv_indev_t *indev_drv, lv_indev_data_t *data) static void lvgl_port_encoder_read(lv_indev_t *indev_drv, lv_indev_data_t *data)
{ {
static int32_t last_v = 0;
assert(indev_drv); assert(indev_drv);
lvgl_port_encoder_ctx_t *ctx = (lvgl_port_encoder_ctx_t *)lv_indev_get_driver_data(indev_drv); lvgl_port_encoder_ctx_t *ctx = (lvgl_port_encoder_ctx_t *)lv_indev_get_driver_data(indev_drv);
assert(ctx); assert(ctx);
int32_t invd = iot_knob_get_count_value(ctx->knob_handle); data->enc_diff = ctx->diff;
knob_event_t event = iot_knob_get_event(ctx->knob_handle);
if (last_v ^ invd) {
last_v = invd;
data->enc_diff = (KNOB_LEFT == event) ? (-1) : ((KNOB_RIGHT == event) ? (1) : (0));
} else {
data->enc_diff = 0;
}
data->state = (true == ctx->btn_enter) ? LV_INDEV_STATE_PRESSED : LV_INDEV_STATE_RELEASED; data->state = (true == ctx->btn_enter) ? LV_INDEV_STATE_PRESSED : LV_INDEV_STATE_RELEASED;
ctx->diff = 0;
} }
static void lvgl_port_encoder_btn_down_handler(void *arg, void *arg2) static void lvgl_port_encoder_btn_down_handler(void *button_handle, void *usr_data)
{ {
lvgl_port_encoder_ctx_t *ctx = (lvgl_port_encoder_ctx_t *) arg2; lvgl_port_encoder_ctx_t *ctx = (lvgl_port_encoder_ctx_t *) usr_data;
button_handle_t button = (button_handle_t)arg; button_handle_t button = (button_handle_t)button_handle;
if (ctx && button) { if (ctx && button) {
/* ENTER */ /* ENTER */
if (button == ctx->btn_handle) { if (button == ctx->btn_handle) {
@ -162,10 +168,10 @@ static void lvgl_port_encoder_btn_down_handler(void *arg, void *arg2)
lvgl_port_task_wake(LVGL_PORT_EVENT_TOUCH, ctx->indev); lvgl_port_task_wake(LVGL_PORT_EVENT_TOUCH, ctx->indev);
} }
static void lvgl_port_encoder_btn_up_handler(void *arg, void *arg2) static void lvgl_port_encoder_btn_up_handler(void *button_handle, void *usr_data)
{ {
lvgl_port_encoder_ctx_t *ctx = (lvgl_port_encoder_ctx_t *) arg2; lvgl_port_encoder_ctx_t *ctx = (lvgl_port_encoder_ctx_t *) usr_data;
button_handle_t button = (button_handle_t)arg; button_handle_t button = (button_handle_t)button_handle;
if (ctx && button) { if (ctx && button) {
/* ENTER */ /* ENTER */
if (button == ctx->btn_handle) { if (button == ctx->btn_handle) {
@ -177,9 +183,51 @@ static void lvgl_port_encoder_btn_up_handler(void *arg, void *arg2)
lvgl_port_task_wake(LVGL_PORT_EVENT_TOUCH, ctx->indev); lvgl_port_task_wake(LVGL_PORT_EVENT_TOUCH, ctx->indev);
} }
static void lvgl_port_encoder_knob_handler(void *arg, void *arg2) static void lvgl_port_encoder_left_handler(void *arg, void *arg2)
{ {
lvgl_port_encoder_ctx_t *ctx = (lvgl_port_encoder_ctx_t *) arg2; lvgl_port_encoder_ctx_t *ctx = (lvgl_port_encoder_ctx_t *) arg2;
/* Wake LVGL task, if needed */ knob_handle_t knob = (knob_handle_t)arg;
lvgl_port_task_wake(LVGL_PORT_EVENT_TOUCH, ctx->indev); if (ctx && knob) {
/* LEFT */
if (knob == ctx->knob_handle) {
int32_t diff = lvgl_port_calculate_diff(knob, KNOB_LEFT);
ctx->diff = (ctx->diff > 0) ? diff : ctx->diff + diff;
}
/* Wake LVGL task, if needed */
lvgl_port_task_wake(LVGL_PORT_EVENT_TOUCH, ctx->indev);
}
}
static void lvgl_port_encoder_right_handler(void *arg, void *arg2)
{
lvgl_port_encoder_ctx_t *ctx = (lvgl_port_encoder_ctx_t *) arg2;
knob_handle_t knob = (knob_handle_t)arg;
if (ctx && knob) {
/* RIGHT */
if (knob == ctx->knob_handle) {
int32_t diff = lvgl_port_calculate_diff(knob, KNOB_RIGHT);
ctx->diff = (ctx->diff < 0) ? diff : ctx->diff + diff;
}
/* Wake LVGL task, if needed */
lvgl_port_task_wake(LVGL_PORT_EVENT_TOUCH, ctx->indev);
}
}
static int32_t lvgl_port_calculate_diff(knob_handle_t knob, knob_event_t event)
{
static int32_t last_v = 0;
int32_t diff = 0;
int32_t invd = iot_knob_get_count_value(knob);
if (last_v ^ invd) {
diff = (int32_t)((uint32_t)invd - (uint32_t)last_v);
diff += (event == KNOB_RIGHT && invd < last_v) ? CONFIG_KNOB_HIGH_LIMIT :
(event == KNOB_LEFT && invd > last_v) ? CONFIG_KNOB_LOW_LIMIT : 0;
last_v = invd;
}
return diff;
} }

View File

@ -32,8 +32,7 @@
lv_color_blend_to_argb8888_esp: lv_color_blend_to_argb8888_esp:
entry a1, 32 entry a1, 32
ee.zero.q q0 // dummy TIE instruction, to enable the TIE
l32i.n a3, a2, 4 // a3 - dest_buff l32i.n a3, a2, 4 // a3 - dest_buff
l32i.n a4, a2, 8 // a4 - dest_w in uint32_t l32i.n a4, a2, 8 // a4 - dest_w in uint32_t

View File

@ -31,8 +31,7 @@
lv_color_blend_to_rgb565_esp: lv_color_blend_to_rgb565_esp:
entry a1, 32 entry a1, 32
ee.zero.q q0 // dummy TIE instruction, to enable the TIE
l32i.n a3, a2, 4 // a3 - dest_buff l32i.n a3, a2, 4 // a3 - dest_buff
l32i.n a4, a2, 8 // a4 - dest_w in uint16_t l32i.n a4, a2, 8 // a4 - dest_w in uint16_t

View File

@ -0,0 +1,105 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
// This is LVGL RGB888 simple fill for ESP32 processor
.section .text
.align 4
.global lv_color_blend_to_rgb888_esp
.type lv_color_blend_to_rgb888_esp,@function
// The function implements the following C code:
// void lv_color_blend_to_rgb888(_lv_draw_sw_blend_fill_dsc_t * dsc);
// Input params
//
// dsc - a2
// typedef struct {
// uint32_t opa; l32i 0
// void * dst_buf; l32i 4
// uint32_t dst_w; l32i 8
// uint32_t dst_h; l32i 12
// uint32_t dst_stride; l32i 16
// const void * src_buf; l32i 20
// uint32_t src_stride; l32i 24
// const lv_opa_t * mask_buf; l32i 28
// uint32_t mask_stride; l32i 32
// } asm_dsc_t;
lv_color_blend_to_rgb888_esp:
entry a1, 32
l32i.n a3, a2, 4 // a3 - dest_buff
l32i.n a4, a2, 8 // a4 - dest_w in uint24_t
l32i.n a5, a2, 12 // a5 - dest_h in uint16_t
l32i.n a6, a2, 16 // a6 - dest_stride in bytes
l32i.n a7, a2, 20 // a7 - src_buff (color)
l32i.n a8, a7, 0 // a8 - color as value
// a11 - dest_w_bytes = sizeof(uint24_t) * dest_w = 3 * a4
slli a11, a4, 1 // a11 - dest_w_bytes = sizeof(uint16_t) * dest_w
add a11, a11, a4 // a11 - dest_w_bytes = a11 + a4
// Prepare register combinations
// a13 - 0xBBRRGGBB a14 - 0xGGBBRRGG a15 - 0xRRGGBBRR
l8ui a13, a7, 0 // blue 000B
slli a13, a13, 24 // shift to B000
or a13, a13, a8 // a13 BRGB
srli a14, a8, 8 // a14 00RG
slli a10, a8, 16 // a10 GB00
or a14, a14, a10 // a14 GBRG
slli a15, a8, 8 // a15 RGB0
l8ui a10, a7, 2 // a7 000R
or a15, a15, a10 // a15 RGBR
sub a6, a6, a11 // dest_stride = dest_stride - dest_w_bytes
// Prepare main loop length and dest_w_bytes
srli a9, a4, 2 // a9 = loop_len = dest_w / 4, calculate main loop_len for original dest_w
movi.n a8, 0x3 // a8 = 0x3, remainder mask
and a10, a4, a8 // a10 - remainder after division by 4 = a4 and 0x3
.outer_loop:
// Run main loop which sets 12 bytes (4 rgb888) in one loop run
loopnez a9, ._main_loop
s32i.n a13, a3, 0 // save 32 bits from 32-bit color a13 to dest_buff a3, offset 0
s32i.n a14, a3, 4 // save 32 bits from 32-bit color a14 to dest_buff a3, offset 4
s32i.n a15, a3, 8 // save 32 bits from 32-bit color a15 to dest_buff a3, offset 8
addi.n a3, a3, 12 // increment dest_buff pointer by 12
._main_loop:
bnei a10, 0x3, _less_than_3 // branch if less than 3 values left
s32i.n a13, a3, 0 // save 32 bits from a13 to dest_buff a3, offset 0 bytes
s32i.n a14, a3, 4 // save 32 bits from a14 to dest_buff a3, offset 4 bytes
s8i a15, a3, 8 // save 8 bits from a15 to dest_buff a3, offset 8 bytes
addi.n a3, a3, 9 // increment dest_buff pointer by 9 bytes
j _less_than_1
_less_than_3:
bnei a10, 0x2, _less_than_2 // branch if less than 2 values left
s32i.n a13, a3, 0 // save 32 bits from a13 to dest_buff a3, offset 0 bytes
s16i a14, a3, 4 // save 16 bits from a14 to dest_buff a3, offset 4 bytes
addi.n a3, a3, 6 // increment dest_buff pointer by 6 bytes
j _less_than_1
_less_than_2:
bnei a10, 0x1, _less_than_1 // branch if less than 1 value left
s16i a13, a3, 0 // save 16 bits from a13 to dest_buff a3, offset 0 bytes
s8i a15, a3, 2 // save 8 bits from a15 to dest_buff a3, offset 2 bytes
addi.n a3, a3, 3 // increment dest_buff pointer by 3 bytes
_less_than_1:
add a3, a3, a6 // dest_buff + dest_stride
addi.n a5, a5, -1 // decrease the outer loop
and a7, a8, a3 // a7 = dest_buff AND 0x3 (check if the address is 4-byte aligned)
bnez a5, .outer_loop
movi.n a2, 1 // return LV_RESULT_OK = 1
retw.n // return

View File

@ -0,0 +1,346 @@
/*
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
// This is LVGL RGB888 simple fill for ESP32S3 processor
.section .text
.align 4
.global lv_color_blend_to_rgb888_esp
.type lv_color_blend_to_rgb888_esp,@function
// The function implements the following C code:
// void lv_color_blend_to_rgb888(_lv_draw_sw_blend_fill_dsc_t * dsc);
// Input params
//
// dsc - a2
// typedef struct {
// uint32_t opa; l32i 0
// void * dst_buf; l32i 4
// uint32_t dst_w; l32i 8
// uint32_t dst_h; l32i 12
// uint32_t dst_stride; l32i 16
// const void * src_buf; l32i 20
// uint32_t src_stride; l32i 24
// const lv_opa_t * mask_buf; l32i 28
// uint32_t mask_stride; l32i 32
// } asm_dsc_t;
lv_color_blend_to_rgb888_esp:
entry a1, 32
l32i.n a3, a2, 4 // a3 - dest_buff
l32i.n a4, a2, 8 // a4 - dest_w in uint24_t
l32i.n a5, a2, 12 // a5 - dest_h in uint16_t
l32i.n a6, a2, 16 // a6 - dest_stride in bytes
l32i.n a7, a2, 20 // a7 - src_buff (color)
l32i.n a8, a7, 0 // a8 - color as value
// a11 - dest_w_bytes = sizeof(uint24_t) * dest_w = 3 * a4
slli a11, a4, 1 // a11 - dest_w_bytes = 2 * dest_w
add a11, a11, a4 // a11 - dest_w_bytes = a11 + a4
// Prepare register combinations
// a13 - 0xBBRRGGBB a14 - 0xGGBBRRGG a15 - 0xRRGGBBRR
l8ui a13, a7, 0 // blue 000B
slli a13, a13, 24 // shift to B000
or a13, a13, a8 // a13 BRGB
srli a14, a8, 8 // a14 00RG
slli a10, a8, 16 // a10 GB00
or a14, a14, a10 // a14 GBRG
slli a15, a8, 8 // a15 RGB0
l8ui a10, a7, 2 // a7 000R
or a15, a15, a10 // a15 RGBR
sub a6, a6, a11 // dest_stride = dest_stride - dest_w_bytes
// Check for short lengths
// dest_w should be at least 12, othewise it's not worth using esp32s3 TIE
bgei a4, 12, _esp32s3_implementation // Branch if dest_w is greater than or equal to 12
j .lv_color_blend_to_rgb888_esp32_body // Jump to esp32 implementation
_esp32s3_implementation:
// Prepare q registers for the main loop
ee.movi.32.q q3, a13, 0 // fill q3 register from a13 by 32 bits
ee.movi.32.q q3, a14, 1 // fill q3 register from a14 by 32 bits
ee.movi.32.q q3, a15, 2 // fill q3 register from a15 by 32 bits
ee.movi.32.q q3, a13, 3 // fill q3 register from a13 by 32 bits
ee.movi.32.q q4, a14, 0 // fill q4 register from a14 by 32 bits
ee.movi.32.q q4, a15, 1 // fill q4 register from a15 by 32 bits
ee.movi.32.q q4, a13, 2 // fill q4 register from a13 by 32 bits
ee.movi.32.q q4, a14, 3 // fill q4 register from a14 by 32 bits
ee.movi.32.q q5, a15, 0 // fill q5 register from a15 by 32 bits
ee.movi.32.q q5, a13, 1 // fill q5 register from a13 by 32 bits
ee.movi.32.q q5, a14, 2 // fill q5 register from a14 by 32 bits
ee.movi.32.q q5, a15, 3 // fill q5 register from a15 by 32 bits
.outer_loop_aligned:
// q registers will get shifted and clobbered, need to reinitialize them before using them again
// Clear q registers
ee.zero.q q0 // clear q0
ee.zero.q q1 // clear q1
ee.zero.q q2 // clear q2
// Reinitialize q registers
ee.orq q0, q0, q3 // copy q3 to q0
ee.orq q1, q1, q4 // copy q4 to q1
ee.orq q2, q2, q5 // copy q5 to q2
// alignment check
extui a8, a3, 0, 4 // address_alignment (a8) = dest_buff address (a3) AND 0xf
movi.n a12, 16 // a12 = 16
mov.n a2, a8 // unalignment (a2) = a8
// following instruction is here to avoid branching
// need to adjust a8 == 0 to 16 to make the unalignment computation work
moveqz a2, a12, a8 // modified unalignment (a2) = 16 if unalignment (a8) == 0
sub a2, a12, a2 // a2 = 16 - unalignment (lower 4 bits of dest_buff address)
sub a10, a11, a2 // local_dest_w_bytes = len - (16 - unalignment)
movi.n a12, 48 // a12 = 48 (main loop copies 48 bytes)
quou a9, a10, a12 // main_loop counter (a9) = local_dest_w_bytes (a10) DIV 48 (a12)
remu a10, a10, a12 // a10 = local_dest_w_bytes (a10) MOD 48 (a12)
beqz a8, _dest_buff_aligned // If already aligned, skip aligning
movi.n a7, unalignment_table // Load unalignment_table address
addx4 a7, a8, a7 // jump_table handle (a7) = offset (a8) * 4 + jump_table address (a7)
l32i a7, a7, 0 // Load target address from jump table
jx a7 // Jump to the corresponding handler
// a13 - 0xBBRRGGBB a14 - 0xGGBBRRGG a15 - 0xRRGGBBRR
handle_0:
handle_1:
s8i a13, a3, 0 // save 8 bits from a13 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 1 // increment dest_buff pointer by 1 byte
s16i a14, a3, 0 // save 16 bits from a14 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 2 // increment dest_buff pointer by 2 bytes
s32i a13, a3, 0 // save 32 bits from a13 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes
ee.vst.l.64.ip q1, a3, 8 // save lower 64 bits from q0 to dest_buff a3, increase dest_buff pointer by 8 bytes
j _shift_q_regs
handle_2:
s16i a13, a3, 0 // save 16 bits from a13 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 2 // increment dest_buff pointer by 2 bytes
s32i a15, a3, 0 // save 32 bits from a15 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes
ee.vst.l.64.ip q0, a3, 8 // save lower 64 bits from q0 to dest_buff a3, increase dest_buff pointer by 8 bytes
j _shift_q_regs
handle_3:
s8i a13, a3, 0 // save 8 bits from a13 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 1 // increment dest_buff pointer by 1 byte
s32i a14, a3, 0 // save 32 bits from a14 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes
ee.vst.l.64.ip q2, a3, 8 // save lower 64 bits from q0 to dest_buff a3, increase dest_buff pointer by 8 bytes
j _shift_q_regs
handle_4:
s32i a13, a3, 0 // save 32 bits from a13 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes
ee.vst.l.64.ip q1, a3, 8 // save lower 64 bits from q0 to dest_buff a3, increase dest_buff pointer by 8 bytes
j _shift_q_regs
handle_5:
s8i a13, a3, 0 // save 8 bits from a13 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 1 // increment dest_buff pointer by 1 byte
s16i a14, a3, 0 // save 16 bits from a14 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 2 // increment dest_buff pointer by 2 bytes
ee.vst.l.64.ip q0, a3, 8 // save lower 64 bits from q0 to dest_buff a3, increase dest_buff pointer by 8 bytes
j _shift_q_regs
handle_6:
s16i a13, a3, 0 // save 16 bits from a13 to dest_buff a3, offset 0 byte
addi.n a3, a3, 2 // increment dest_buff pointer by 2 bytes
ee.vst.l.64.ip q2, a3, 8 // save lower 64 bits from q0 to dest_buff a3, increase dest_buff pointer by 8 bytes
j _shift_q_regs
handle_7:
s8i a13, a3, 0 // save 8 bits from a13 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 1 // increment dest_buff pointer by 1 byte
ee.vst.l.64.ip q1, a3, 8 // save lower 64 bits from q0 to dest_buff a3, increase dest_buff pointer by 8 bytes
j _shift_q_regs
handle_8:
ee.vst.l.64.ip q0, a3, 8 // save lower 64 bits from q0 to dest_buff a3, increase dest_buff pointer by 8 bytes
j _shift_q_regs
handle_9:
s8i a13, a3, 0 // save 8 bits from a13 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 1 // increment dest_buff pointer by 1 byte
s16i a14, a3, 0 // save 16 bits from a14 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 2 // increment dest_buff pointer by 2 bytes
s32i a13, a3, 0 // save 32 bits from a13 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes
j _shift_q_regs
handle_10:
s16i a13, a3, 0 // save 16 bits from a13 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 2 // increment dest_buff pointer by 2 bytes
s32i a15, a3, 0 // save 32 bits from a15 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes
j _shift_q_regs
handle_11:
s8i a13, a3, 0 // save 8 bits from a13 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 1 // increment dest_buff pointer by 1 byte
s32i a14, a3, 0 // save 32 bits from a14 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes
j _shift_q_regs
handle_12:
s32i a13, a3, 0 // save 32 bits from a13 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes
j _shift_q_regs
handle_13:
s8i a13, a3, 0 // save 8 bits from a13 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 1 // increment dest_buff pointer by 1 byte
s16i a14, a3, 0 // save 16 bits from a14 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 2 // increment dest_buff pointer by 2 bytes
j _shift_q_regs
handle_14:
s16i a13, a3, 0 // save 16 bits from a13 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 2 // increment dest_buff pointer by 2 bytes
j _shift_q_regs
handle_15:
s8i a13, a3, 0 // save 8 bits from a13 to dest_buff a3, offset 0 bytes
addi.n a3, a3, 1 // increment dest_buff pointer by 1 byte
j _shift_q_regs
.align 4
unalignment_table:
.word handle_0 // Case 0: Dummy case for easier address computation
.word handle_1 // Case 1: Align 15 bytes
.word handle_2 // Case 2: Align 14 bytes
.word handle_3 // Case 3: Align 13 bytes
.word handle_4 // Case 4: Align 12 bytes
.word handle_5 // Case 5: Align 11 bytes
.word handle_6 // Case 6: Align 10 bytes
.word handle_7 // Case 7: Align 9 bytes
.word handle_8 // Case 8: Align 8 bytes
.word handle_9 // Case 9: Align 7 bytes
.word handle_10 // Case 10: Align 6 bytes
.word handle_11 // Case 11: Align 5 bytes
.word handle_12 // Case 12: Align 4 bytes
.word handle_13 // Case 13: Align 3 bytes
.word handle_14 // Case 14: Align 2 bytes
.word handle_15 // Case 15: Align 1 byte
_shift_q_regs:
wur.sar_byte a2 // apply unalignment to the SAR_BYTE
ee.src.q q0, q0, q1 // shift concat. of q0 and q1 to q0 by SAR_BYTE amount
ee.src.q q1, q1, q2 // shift concat. of q1 and q2 to q1 by SAR_BYTE amount
ee.src.q q2, q2, q3 // shift concat. of q2 and q3 to q2 by SAR_BYTE amount
_dest_buff_aligned:
loopnez a9, ._main_loop_aligned // 48 bytes (16 rgb888) in one loop
ee.vst.128.ip q0, a3, 16 // store 16 bytes from q0 to dest_buff a3
ee.vst.128.ip q1, a3, 16 // store 16 bytes from q1 to dest_buff a3
ee.vst.128.ip q2, a3, 16 // store 16 bytes from q2 to dest_buff a3
._main_loop_aligned:
// Check modulo 32 of the unalignment, if - then set 32 bytes
bbci a10, 5, .lt_32 // branch if 5-th bit of local_dest_w_bytes a10 is clear
ee.vst.128.ip q0, a3, 16 // store 16 bytes from q0 to dest_buff a3
ee.vst.128.ip q1, a3, 16 // store 16 bytes from q1 to dest_buff a3
ee.srci.2q q0, q1, 1 // shift q0 register to have next bytes to store ready from LSB
.lt_32:
// Check modulo 16 of the unalignment, if - then set 16 bytes
bbci a10, 4, .lt_16 // branch if 4-th bit of local_dest_w_bytes a10 is clear
ee.vst.128.ip q0, a3, 16 // store 16 bytes from q0 to dest_buff a3
ee.srci.2q q0, q1, 0 // shift q0 register to have next bytes to store ready from LSB
.lt_16:
// Check modulo 8 of the unalignment, if - then set 8 bytes
bbci a10, 3, .lt_8
ee.vst.l.64.ip q0, a3, 8 // store 8 bytes from q0 to dest_buff a3
ee.srci.2q q0, q1, 1 // shift q0 register to have next bytes to store ready from LSB
.lt_8:
// Check modulo 4 of the unalignment, if - then set 4 bytes
bbci a10, 2, .lt_4
ee.movi.32.a q0, a2, 0 // move lowest 32 bits of q0 to a2
s32i.n a2, a3, 0 // save 32 bits from a2 to dest_buff a3, offset 0
addi.n a3, a3, 4 // increment dest_buff pointer by 4 bytes
ee.srci.2q q0, q1, 0 // shift q0 register to have next bytes to store ready from LSB
.lt_4:
// Check modulo 2 of the unalignment, if - then set 2 bytes
bbci a10, 1, .lt_2
ee.movi.32.a q0, a2, 0 // move lowest 32 bits of q0 to a2
s16i a2, a3, 0 // save 16 bits from a2 to dest_buff a3, offset 0
addi.n a3, a3, 2 // increment dest_buff pointer by 2 bytes
ee.srci.2q q0, q1, 1 // shift q0 register to have next bytes to store ready from LSB
.lt_2:
// Check modulo 1 of the unalignment, if - then set 1 byte
bbci a10, 0, .lt_1
ee.movi.32.a q0, a2, 0 // move lowest 32 bits of q0 to a2
s8i a2, a3, 0 // save 8 bits from a2 to dest_buff a3, offset 0
addi.n a3, a3, 1 // increment dest_buff pointer by 1 byte
.lt_1:
add a3, a3, a6 // dest_buff + dest_stride
addi.n a5, a5, -1 // decrease the outer loop
bnez a5, .outer_loop_aligned
movi.n a2, 1 // return LV_RESULT_OK = 1
retw.n // return
.lv_color_blend_to_rgb888_esp32_body:
// Prepare main loop length and dest_w_bytes
srli a9, a4, 2 // a9 = loop_len = dest_w / 4, calculate main loop_len for original dest_w
movi.n a8, 0x3 // a8 = 0x3, remainder mask
and a10, a4, a8 // a10 - remainder after division by 4 = a4 & 0x3
.outer_loop:
// Run main loop which sets 12 bytes (4 rgb888) in one loop run
loopnez a9, ._main_loop
s32i.n a13, a3, 0 // save 32 bits from 32-bit color a13 to dest_buff a3, offset 0
s32i.n a14, a3, 4 // save 32 bits from 32-bit color a14 to dest_buff a3, offset 4
s32i.n a15, a3, 8 // save 32 bits from 32-bit color a15 to dest_buff a3, offset 8
addi.n a3, a3, 12 // increment dest_buff pointer by 12
._main_loop:
bnei a10, 0x3, _less_than_3 // branch if less than 3 values left
s32i.n a13, a3, 0 // save 32 bits from a13 to dest_buff a3, offset 0 bytes
s32i.n a14, a3, 4 // save 32 bits from a14 to dest_buff a3, offset 4 bytes
s8i a15, a3, 8 // save 8 bits from a15 to dest_buff a3, offset 8 bytes
addi.n a3, a3, 9 // increment dest_buff pointer by 9 bytes
j _less_than_1
_less_than_3:
bnei a10, 0x2, _less_than_2 // branch if less than 2 values left
s32i.n a13, a3, 0 // save 32 bits from a13 to dest_buff a3, offset 0 bytes
s16i a14, a3, 4 // save 16 bits from a14 to dest_buff a3, offset 4 bytes
addi.n a3, a3, 6 // increment dest_buff pointer by 6 bytes
j _less_than_1
_less_than_2:
bnei a10, 0x1, _less_than_1 // branch if less than 1 value left
s16i a13, a3, 0 // save 16 bits from a13 to dest_buff a3, offset 0 bytes
s8i a15, a3, 2 // save 8 bits from a15 to dest_buff a3, offset 2 bytes
addi.n a3, a3, 3 // increment dest_buff pointer by 3 bytes
_less_than_1:
add a3, a3, a6 // dest_buff + dest_stride
addi.n a5, a5, -1 // decrease the outer loop
and a7, a8, a3 // a7 = dest_buff AND 0x3 (chck if the address is 4-byte aligned)
bnez a5, .outer_loop
movi.n a2, 1 // return LV_RESULT_OK = 1
retw.n // return

View File

@ -0,0 +1,60 @@
/*
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
// Memcpy macros for modulo checking
// After running the main loop, there is need to check remaining bytes to be copied out of the main loop
// Macros work with both, aligned and unaligned (4-byte boundary) memories
// but performance is significantly lower when using unaligned memory, because of the unaligned memory access exception
// Macro for checking modulo 8
.macro macro_memcpy_mod_8 src_buf, dest_buf, condition, x1, x2, JUMP_TAG
// Check modulo 8 of the \condition, if - then copy 8 bytes
bbci \condition, 3, ._mod_8_check_\JUMP_TAG // Branch if 3-rd bit of \condition is clear
l32i.n \x1, \src_buf, 0 // Load 32 bits from \src_buff to \x1, offset 0
l32i.n \x2, \src_buf, 4 // Load 32 bits from \src_buff to \x2, offset 4
s32i.n \x1, \dest_buf, 0 // Save 32 bits from \x1 to \dest_buff, offset 0
s32i.n \x2, \dest_buf, 4 // Save 32 bits from \x2 to \dest_buff, offset 4
addi.n \src_buf, \src_buf, 8 // Increment \src_buff pointer by 8
addi.n \dest_buf, \dest_buf, 8 // Increment \dest_buff pointer 8
._mod_8_check_\JUMP_TAG:
.endm // macro_memcpy_mod_8
// Macro for checking modulo 4
.macro macro_memcpy_mod_4 src_buf, dest_buf, condition, x1, JUMP_TAG
// Check modulo 4 of the \condition, if - then copy 4 bytes
bbci \condition, 2, ._mod_4_check_\JUMP_TAG // Branch if 2-nd bit of \condition is clear
l32i.n \x1, \src_buf, 0 // Load 32 bits from \src_buff to \x1, offset 0
addi.n \src_buf, \src_buf, 4 // Increment \src_buff pointer by 4
s32i.n \x1, \dest_buf, 0 // Save 32 bits from \x1 to \dest_buff, offset 0
addi.n \dest_buf, \dest_buf, 4 // Increment \dest_buff pointer 4
._mod_4_check_\JUMP_TAG:
.endm // macro_memcpy_mod_4
// Macro for checking modulo 2
.macro macro_memcpy_mod_2 src_buf, dest_buf, condition, x1, JUMP_TAG
// Check modulo 2 of the \condition, if - then copy 2 bytes
bbci \condition, 1, ._mod_2_check_\JUMP_TAG // Branch if 1-st bit of \condition is clear
l16ui \x1, \src_buf, 0 // Load 16 bits from \src_buff to \x1, offset 0
addi.n \src_buf, \src_buf, 2 // Increment \src_buff pointer by 2
s16i \x1, \dest_buf, 0 // Save 16 bits from \x1 to \dest_buff, offset 0
addi.n \dest_buf, \dest_buf, 2 // Increment \dest_buff pointer 2
._mod_2_check_\JUMP_TAG:
.endm // macro_memcpy_mod_2
// Macro for checking modulo 1
.macro macro_memcpy_mod_1 src_buf, dest_buf, condition, x1, JUMP_TAG
// Check modulo 1 of the \condition, if - then copy 1 byte
bbci \condition, 0, ._mod_1_check_\JUMP_TAG // Branch if 0-th bit of \condition is clear
l8ui \x1, \src_buf, 0 // Load 8 bits from \src_buff to \x1, offset 0
addi.n \src_buf, \src_buf, 1 // Increment \src_buff pointer by 1
s8i \x1, \dest_buf, 0 // Save 8 bits from \x1 to \dest_buff, offset 0
addi.n \dest_buf, \dest_buf, 1 // Increment \dest_buff pointer 1
._mod_1_check_\JUMP_TAG:
.endm // macro_memcpy_mod_1

View File

@ -0,0 +1,264 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "lv_macro_memcpy.S" // Memcpy macros
// This is LVGL RGB565 image blend to RGB565 for ESP32 processor
.section .text
.align 4
.global lv_rgb565_blend_normal_to_rgb565_esp
.type lv_rgb565_blend_normal_to_rgb565_esp,@function
// The function implements the following C code:
// void rgb565_image_blend(_lv_draw_sw_blend_image_dsc_t * dsc);
// Input params
//
// dsc - a2
// typedef struct {
// uint32_t opa; l32i 0
// void * dst_buf; l32i 4
// uint32_t dst_w; l32i 8
// uint32_t dst_h; l32i 12
// uint32_t dst_stride; l32i 16
// const void * src_buf; l32i 20
// uint32_t src_stride; l32i 24
// const lv_opa_t * mask_buf; l32i 28
// uint32_t mask_stride; l32i 32
// } asm_dsc_t;
lv_rgb565_blend_normal_to_rgb565_esp:
entry a1, 32
l32i.n a3, a2, 4 // a3 - dest_buff
l32i.n a4, a2, 8 // a4 - dest_w in uint16_t
l32i.n a5, a2, 12 // a5 - dest_h in uint16_t
l32i.n a6, a2, 16 // a6 - dest_stride in bytes
l32i.n a7, a2, 20 // a7 - src_buff
l32i.n a8, a2, 24 // a8 - src_stride in bytes
slli a11, a4, 1 // a11 - dest_w_bytes = sizeof(uint16_t) * dest_w
// No need to convert any colors here, we are copying from rgb565 to rgb565
// Check dest_w length
bltui a4, 8, _matrix_width_check // Branch if dest_w (a4) is lower than 8
// Check memory alignment and input parameters lengths and decide which implementation to use
movi.n a10, 0x3 // a10 = 0x3 alignment mask (4-byte alignment)
or a15, a7, a3 // a15 = src_buff (a7) OR dest_buff (a3)
or a15, a15, a6 // a15 = a15 OR dest_stride (a6)
or a15, a15, a8 // a15 = a15 OR src_stride (a8)
or a15, a15, a11 // a15 = a15 OR dest_w_bytes (a11)
and a15, a15, a10 // a15 = a15 AND alignment mask (a10)
bnez a15, _alignment_check // Branch if a15 not equals to zero
//**********************************************************************************************************************
// The most ideal case - both arrays aligned, both strides and dest_w are multiples of 4
// dest_buff (a3) - 4-byte aligned
// src_buff (a7) - 4-byte aligned
// dest_stride (a6) - 4-byte multiple
// src_stride (a8) - 4-byte multiple
// dest_w (a4) - 4-byte multiple
srli a9, a4, 3 // a9 - loop_len = dest_w / 8
// Convert strides to matrix paddings
sub a6, a6, a11 // dest_matrix_padding (a6) = dest_stride (a6) - dest_w_bytes (a11)
sub a8, a8, a11 // src_matrix_padding (a8) = src_stride (a8) - dest_w_bytes (a11)
.outer_loop_align:
// Run main loop which copies 16 bytes (8 RGB565 pixels) in one loop run
loopnez a9, ._main_loop_aligned
l32i.n a15, a7, 0 // Load 32 bits from src_buff a7 to a15, offset 0
l32i.n a14, a7, 4 // Load 32 bits from src_buff a7 to a14, offset 4
l32i.n a13, a7, 8 // Load 32 bits from src_buff a7 to a13, offset 8
l32i.n a12, a7, 12 // Load 32 bits from src_buff a7 to a12, offset 12
s32i.n a15, a3, 0 // Save 32 bits from a15 to dest_buff a3, offset 0
s32i.n a14, a3, 4 // Save 32 bits from a15 to dest_buff a3, offset 4
s32i.n a13, a3, 8 // Save 32 bits from a15 to dest_buff a3, offset 8
s32i.n a12, a3, 12 // Save 32 bits from a15 to dest_buff a3, offset 12
addi.n a7, a7, 16 // Increment src_buff pointer a7 by 16
addi.n a3, a3, 16 // Increment dest_buff pointer a3 by 16
._main_loop_aligned:
// Finish the remaining bytes out of the main loop
// Check modulo 8 of the dest_w_bytes (a11), if - then copy 8 bytes (4 RGB565 pixels)
// src_buff a7, dest_buff a3, dest_w_bytes a11, copy registers a14 a15
macro_memcpy_mod_8 a7, a3, a11, a14, a15 __LINE__
// Check modulo 4 of the dest_w_bytes (a11), if - then copy 4 bytes (2 RGB565 pixels)
// src_buff a7, dest_buff a3, dest_w_bytes a11, copy register a15
macro_memcpy_mod_4 a7, a3, a11, a15 __LINE__
// Check modulo 2 of the dest_w_bytes (a11), if - then copy 2 bytes (1 RGB565 pixel)
// src_buff a7, dest_buff a3, dest_w_bytes a11, copy register a15
macro_memcpy_mod_2 a7, a3, a11, a15 __LINE__
// Check modulo 1 of the dest_w_bytes (a11), if - then copy 1 byte (1/2 RGB565 pixel)
// src_buff a7, dest_buff a3, dest_w_bytes a11, copy register a15
macro_memcpy_mod_1 a7, a3, a11, a15 __LINE__
add a3, a3, a6 // dest_buff (a3) = dest_buff (a3) + dest_matrix_padding (a6)
add a7, a7, a8 // src_buff (a7) = src_buff (a7) + src_matrix_padding (a8)
addi.n a5, a5, -1 // Decrease the outer loop
bnez a5, .outer_loop_align
movi.n a2, 1 // Return LV_RESULT_OK = 1
retw.n // Return
//**********************************************************************************************************************
// The most general case - at leas one array is not aligned, or one parameter is not multiple of 4
_alignment_check:
// dest_buff (a3) - 4-byte aligned, or not
// src_buff (a7) - 4-byte aligned, or not
// dest_stride (a6) - 4-byte multiple, or not
// src_stride (a8) - 4-byte multiple, or not
// dest_w (a4) - 4-byte multiple, or not
// Convert strides to matrix paddings
sub a6, a6, a11 // dest_matrix_padding (a6) = dest_stride (a6) - dest_w_bytes (a11)
sub a8, a8, a11 // src_matrix_padding (a8) = src_stride (a8) - dest_w_bytes (a11)
.outer_loop_unalign:
extui a13, a3, 0, 2 // Get last two bits of the dest_buff address a3, to a13
movi.n a15, 4 // Move 4 to a15, for calculation of the destination alignment loop
sub a14, a15, a13 // Calculate destination alignment loop length (a14 = 4 - a13)
// In case of the dest_buff a3 being already aligned (for example by matrix padding), correct a14 value,
// to prevent the destination aligning loop to run 4 times (to prevent aligning already aligned memory)
moveqz a14, a13, a13 // If a13 is zero, move a13 to a14, move 0 to a14
sub a10, a11, a14 // Get the dest_w_bytes after the aligning loop
srli a9, a10, 4 // Calculate main loop len (a9 = dest_w_bytes_local / 16)
// Run dest_buff aligning loop byte by byte
loopnez a14, ._dest_aligning_loop
l8ui a15, a7, 0 // Load 8 bits from src_buff a7 to a15, offset 0
addi.n a7, a7, 1 // Increment src_buff pointer a7 by 1
s8i a15, a3, 0 // Save 8 bits from a15 to dest_buff a3, offset 0
addi.n a3, a3, 1 // Increment dest_buff pointer a3 by 1
._dest_aligning_loop:
// Destination is aligned, source is unaligned
// For more information about this implementation, see chapter 3.3.2 Shifts and the Shift Amount Register (SAR)
// in Xtensa Instruction Set Architecture (ISA) Reference Manual
ssa8l a7 // Set SAR_BYTE from src_buff a7 unalignment
extui a4, a7, 0, 2 // Get last 2 bits of the src_buff, a4 = src_buff_unalignment
sub a7, a7, a4 // "align" the src_buff a7, to 4-byte boundary by decreasing it's pointer to the nearest aligned boundary
// First preload for the loopnez cycle
l32i.n a15, a7, 0 // Load 32 bits from 4-byte aligned src_buff a7 to a15, offset 0
// Run main loop which copies 16 bytes (8 RGB565 pixels) in one loop run
loopnez a9, ._main_loop_unalign
l32i.n a14, a7, 4 // Load 32 bits from 4-byte aligned src_buff a7 to a14, offset 4
l32i.n a13, a7, 8 // Load 32 bits from 4-byte aligned src_buff a7 to a13, offset 8
src a15, a14, a15 // Concatenate a14 and a15 and shift by SAR_BYTE amount to a15
s32i.n a15, a3, 0 // Save 32 bits from shift-corrected a15 to dest_buff a3, offset 0
l32i.n a12, a7, 12 // Load 32 bits from 4-byte aligned src_buff a7 to a12, offset 12
src a14, a13, a14 // Concatenate a13 and a14 and shift by SAR_BYTE amount to a14
s32i.n a14, a3, 4 // Save 32 bits from shift-corrected a14 to dest_buff a3, offset 4
l32i.n a15, a7, 16 // Load 32 bits from 4-byte aligned src_buff a7 to a15, offset 16
src a13, a12, a13 // Concatenate a12 and a13 and shift by SAR_BYTE amount to a13
s32i.n a13, a3, 8 // Save 32 bits from shift-corrected a13 to dest_buff a3, offset 8
addi.n a7, a7, 16 // Increment src_buff pointer a7 by 16
src a12, a15, a12 // Concatenate a15 and a12 and shift by SAR_BYTE amount to a12
s32i.n a12, a3, 12 // Save 32 bits from shift-corrected a12 to dest_buff a3, offset 12
addi.n a3, a3, 16 // Increment dest_buff pointer a3 by 16
._main_loop_unalign:
// Finish the remaining bytes out of the loop
// Check modulo 8 of the dest_w_bytes_local (a10), if - then copy 8 bytes
bbci a10, 3, _mod_8_check // Branch if 3-rd bit of dest_w_bytes_local is clear
l32i.n a14, a7, 4 // Load 32 bits from 4-byte aligned src_buff a7 to a14, offset 4
l32i.n a13, a7, 8 // Load 32 bits from 4-byte aligned src_buff a7 to a13, offset 8
src a15, a14, a15 // Concatenate a14 and a15 and shift by SAR_BYTE amount to a15 (value in a15 is already prepared from previous steps)
s32i.n a15, a3, 0 // Save 32 bits from shift-corrected a15 to dest_buff a3, offset 0
addi.n a7, a7, 8 // Increment src_buff pointer a7 by 8
src a14, a13, a14 // Concatenate a13 and a14 and shift by SAR_BYTE amount to a14
s32i.n a14, a3, 4 // Save 32 bits from shift-corrected a14 to dest_buff a3, offset 4
addi.n a3, a3, 8 // Increment dest_buff pointer a3 by 8
mov a15, a13 // Prepare a15 for the next steps (copy a13 to a15)
_mod_8_check:
// Check modulo 4 of the dest_w_bytes_local (a10), if - then copy 4 bytes
bbci a10, 2, _mod_4_check // Branch if 2-nd bit of dest_w_bytes_local is clear
l32i.n a14, a7, 4 // Load 32 bits from 4-byte aligned src_buff a7 to a14, offset 4
addi.n a7, a7, 4 // Increment src_buff pointer a7 by 4
src a15, a14, a15 // Concatenate a14 and a15 and shift by SAR_BYTE amount to a15 (value in a15 is already prepared from previous steps)
s32i.n a15, a3, 0 // Save 32 bits from shift-corrected a15 to dest_buff a3, offset 0
addi.n a3, a3, 4 // Increment dest_buff pointer a3 by 4
mov a15, a14 // Prepare a15 for the next steps (copy a14 to a15)
_mod_4_check:
extui a13, a10, 0, 2 // Get the last 2 bytes of the dest_w_bytes_local (a10), a13 = a10[1:0], to find out how many bytes are needs copied and to increase src and dest pointer accordingly
beqz a13, _mod_1_2_check // Branch if a13 equal to zero, E.G. if there are no bytes to be copied
l32i.n a14, a7, 4 // Load 32 bits from 4-byte aligned src_buff a7 to a14, offset 4
l32i.n a12, a3, 0 // Get dest_buff value: Load 32 bits from 4-byte aligned dest_buff a3 to a12, offset 0
src a15, a14, a15 // Concatenate a14 and a15 and shift by SAR_BYTE amount to a15 (value in a15 is already prepared from previous steps)
ssa8l a10 // Set SAR_BYTE from dest_w_bytes_local a10 length
sll a15, a15 // Shift the dest word a15 by SAR_BYTE amount
srl a12, a12 // Shift the src word a12 by SAR_BYTE amount
ssa8b a10 // Set SAR_BYTE from dest_w_bytes_local a10 length
src a12, a12, a15 // Concatenate a12 and a15 and shift by SAR_BYTE amount to a12
s32i.n a12, a3, 0 // Save 32 bits from shift-corrected a12 to dest_buff a3, offset 0
add a7, a7, a13 // Increment src_buff pointer a7, by amount of copied bytes (a13)
add a3, a3, a13 // Increment dest_buff pointer a3, by amount of copied bytes (a13)
_mod_1_2_check:
add a7, a7, a4 // Correct the src_buff back by src_buff_unalignment (a4), after we have force-aligned it to 4-byte boundary before the main loop
add a3, a3, a6 // dest_buff + dest_stride
add a7, a7, a8 // src_buff + src_stride
addi.n a5, a5, -1 // Decrease the outer loop
bnez a5, .outer_loop_unalign
movi.n a2, 1 // Return LV_RESULT_OK = 1
retw.n // Return
//**********************************************************************************************************************
// Small matrix width, keep it simple for lengths less than 8 pixels
_matrix_width_check: // Matrix width is greater or equal 8 pixels
// Convert strides to matrix paddings
sub a6, a6, a11 // dest_matrix_padding (a6) = dest_stride (a6) - dest_w_bytes (a11)
sub a8, a8, a11 // src_matrix_padding (a8) = src_stride (a8) - dest_w_bytes (a11)
.outer_loop_short_matrix_length:
// Run main loop which copies 2 bytes (one RGB565 pixel) in one loop run
loopnez a4, ._main_loop_short_matrix_length
l8ui a15, a7, 0 // Load 8 bits from src_buff a7 to a15, offset 0
l8ui a14, a7, 1 // Load 8 bits from src_buff a7 to a14, offset 1
s8i a15, a3, 0 // Save 8 bits from a15 to dest_buff a3, offset 0
s8i a14, a3, 1 // Save 8 bits from a14 to dest_buff a3, offset 1
addi.n a7, a7, 2 // Increment src_buff pointer a7 by 1
addi.n a3, a3, 2 // Increment dest_buff pointer a3 by 2
._main_loop_short_matrix_length:
// Finish remaining byte out of the main loop
// Check modulo 1 of the dest_w_bytes (a11), if - then copy 1 byte (1/2 RGB565 pixel)
// src_buff a7, dest_buff a3, dest_w_bytes a11, copy register a15
macro_memcpy_mod_1 a7, a3, a11, a15, __LINE__
add a3, a3, a6 // dest_buff (a3) = dest_buff (a3) + dest_matrix_padding (a6)
add a7, a7, a8 // src_buff (a7) = src_buff (a7) + src_matrix_padding (a8)
addi.n a5, a5, -1 // Decrease the outer loop
bnez a5, .outer_loop_short_matrix_length
movi.n a2, 1 // Return LV_RESULT_OK = 1
retw.n // Return

View File

@ -0,0 +1,372 @@
/*
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "lv_macro_memcpy.S" // Memcpy macros
// This is LVGL RGB565 image blend to RGB565 for ESP32S3 processor
.section .text
.align 4
.global lv_rgb565_blend_normal_to_rgb565_esp
.type lv_rgb565_blend_normal_to_rgb565_esp,@function
// The function implements the following C code:
// void lv_color_blend_to_rgb565(_lv_draw_sw_blend_fill_dsc_t * dsc);
// Input params
//
// dsc - a2
// typedef struct {
// uint32_t opa; l32i 0
// void * dst_buf; l32i 4
// uint32_t dst_w; l32i 8
// uint32_t dst_h; l32i 12
// uint32_t dst_stride; l32i 16
// const void * src_buf; l32i 20
// uint32_t src_stride; l32i 24
// const lv_opa_t * mask_buf; l32i 28
// uint32_t mask_stride; l32i 32
// } asm_dsc_t;
lv_rgb565_blend_normal_to_rgb565_esp:
entry a1, 32
l32i.n a3, a2, 4 // a3 - dest_buff
l32i.n a4, a2, 8 // a4 - dest_w in uint16_t
l32i.n a5, a2, 12 // a5 - dest_h in uint16_t
l32i.n a6, a2, 16 // a6 - dest_stride in bytes
l32i.n a7, a2, 20 // a7 - src_buff
l32i.n a8, a2, 24 // a8 - src_stride in bytes
movi.n a10, 0xf // 0xf alignment mask (16-byte alignment)
slli a11, a4, 1 // a11 - dest_w_bytes = sizeof(uint16_t) * dest_w
// No need to convert any colors here, we are copying from rgb565 to rgb565
// Check dest_w length
bltui a4, 8, _matrix_width_check // Branch if dest_w (a4) is lower than 8
// Check dest_buff alignment fist
and a15, a10, a3 // 16-byte alignment mask AND dest_buff pointer a3
bnez a15, _src_unalign_dest_unalign // Branch if a15 not equals to zero
// Jump straight to the last implementation, since this is the only one which deals with unaligned destination arrays
// Check src_buff alignment
and a15, a10, a7 // 16-byte alignment mask AND src_buff pointer a7
bnez a15, _src_align_dest_unalign // Branch if a15 not equals to zero
// Jump to check, if the second or third implementation can be used (depends on both strides and dest_w)
// Check dest_stride alignment
and a15, a10, a6 // 16-byte alignment mask AND dest_stride a6
bnez a15, _src_unalign_dest_unalign // Branch if a15 not equals to zero
// Jump straight to the last implementation, since this is the only one which deals with destination stride not aligned
// Check src_stride alignment
and a15, a10, a8 // 16-byte alignment mask AND src_stride a8
bnez a15, _src_align_dest_unalign // Branch if a15 not equals to zero
// Jump to check, if the second or third implementation can be used (depends on dest_w_bytes)
// Check dest_w_bytes alignment
and a15, a10, a11 // 16-byte alignment mask AND dest_w_bytes
bnez a15, _src_unalign_dest_unalign // Branch if a15 not equals to zero
// Jump straight to the last implementation, since this is the only one which deals with dest_w_bytes not aligned
//**********************************************************************************************************************
// The most ideal case - both arrays aligned, both strides and dest_w are multiples of 16
// dest_buff (a3) - 16-byte aligned
// src_buff (a7) - 16-byte aligned
// dest_stride (a6) - 16-byte multiple
// src_stride (a8) - 16-byte multiple
// dest_w (a4) - 16-byte multiple
srli a9, a4, 4 // a9 - loop_len = dest_w / 16
// Convert strides to matrix paddings
sub a6, a6, a11 // dest_matrix_padding (a6) = dest_stride (a6) - dest_w_bytes (a11)
sub a8, a8, a11 // src_matrix_padding (a8) = src_stride (a8) - dest_w_bytes (a11)
.outer_loop_align:
// Run main loop which copies 32 bytes (16 RGB565 pixels) in one loop run
loopnez a9, ._main_loop_align // 32 bytes (16 RGB565 pixels) in one loop run
ee.vld.128.ip q0, a7, 16 // Load 16 bytes from src_buff a7 to q0, increase src_buf pointer a7 by 16
ee.vld.128.ip q1, a7, 16 // Load 16 bytes from src_buff a7 to q1, increase src_buf pointer a7 by 16
ee.vst.128.ip q0, a3, 16 // Store 16 bytes from q0 to dest_buff a3, increase dest_buff pointer a3 by 16
ee.vst.128.ip q1, a3, 16 // Store 16 bytes from q1 to dest_buff a3, increase dest_buff pointer a3 by 16
._main_loop_align:
// Finish remaining bytes out of the main loop
// Check modulo 16 of the dest_w, if - then copy 16 bytes (8 RGB565 pixels)
bbci a11, 4, _align_mod_16_check // Branch if 4-th bit of dest_w_bytes a11 is clear
ee.vld.128.ip q0, a7, 16 // Load 16 bytes from src_buff a7 to q0, increase src_buf pointer a7 by 16
ee.vst.128.ip q0, a3, 16 // Store 16 bytes from q0 to dest_buff a3, increase dest_buff pointer a3 by 16
_align_mod_16_check:
add a3, a3, a6 // dest_buff (a3) = dest_buff (a3) + dest_matrix_padding (a6)
add a7, a7, a8 // src_buff (a7) = src_buff (a7) + src_matrix_padding (a8)
addi.n a5, a5, -1 // Decrease the outer loop
bnez a5, .outer_loop_align
movi.n a2, 1 // Return LV_RESULT_OK = 1
retw.n // Return
_src_align_dest_unalign:
// Check dest_stride alignment
and a15, a10, a6 // 16-byte alignment mask AND dest_stride a6
bnez a15, _src_unalign_dest_unalign // Branch if a15 not equals to zero
// Check dest_w_bytes alignment
and a15, a10, a11 // 16-byte alignment mask AND dest_w_bytes a11
bnez a15, _src_unalign_dest_unalign // Branch if a15 not equals to zero
// We don't check src_stride alignment for this implementation, as it can be either align, or unalign
//**********************************************************************************************************************
// Less ideal case - Only destination array is aligned, src array is unaligned
// Source stride is either aligned or unaligned, destination stride must be aligned, dest_w_bytes must be aligned
// dest_buff (a3) - 16-byte aligned
// src_buff (a7) - unaligned
// dest_stride (a6) - 16-byte multiple
// src_stride (a8) - does not matter if 16-byte multiple
// dest_w (a4) - 16-byte multiple
// Convert strides to matrix paddings
sub a6, a6, a11 // dest_matrix_padding (a6) = dest_stride (a6) - dest_w_bytes (a11)
sub a8, a8, a11 // src_matrix_padding (a8) = src_stride (a8) - dest_w_bytes (a11)
// Calculate modulo for non-aligned data
movi a15, 48 // a15 = 48 (main loop copies 48 bytes)
quou a9, a11, a15 // a9 = dest_w_bytes (a11) DIV 48 (15)
remu a12, a11, a15 // a12 = dest_w_bytes (a11) remainder after DIV 48 (15)
.outer_loop_src_unalign_dest_align:
ee.ld.128.usar.ip q2, a7, 16 // Preload 16 bytes from src_buff a7 to q2, get value of the SAR_BYTE, increase src_buf pointer a7 by 16
ee.ld.128.usar.ip q3, a7, 16 // Preload 16 bytes from src_buff a7 to q3, get value of the SAR_BYTE, increase src_buf pointer a7 by 16
// Run main loop which copies 48 bytes (24 RGB565 pixels) in one loop run
loopnez a9, ._main_loop_src_unalign_dest_align // 48 bytes (24 RGB565 pixels) in one loop
ee.src.q.ld.ip q4, a7, 16, q2, q3 // Load 16 bytes from src_buff a7 to q4, concatenate q2 and q3 and shift to q2 by the SAR_BYTE amount, increase src_buf pointer a7 by 16
ee.vst.128.ip q2, a3, 16 // Store 16 bytes from q2 to aligned dest_buff a3, increase dest_buff pointer a3 by 16
ee.src.q.ld.ip q2, a7, 16, q3, q4 // Load 16 bytes from src_buff a7 to q2, concatenate q3 and q4 and shift to q3 by the SAR_BYTE amount, increase src_buf pointer a7 by 16
ee.vst.128.ip q3, a3, 16 // Store 16 bytes from q3 to aligned dest_buff a3, increase dest_buff pointer a3 by 16
ee.src.q.ld.ip q3, a7, 16, q4, q2 // Load 16 bytes from src_buff a7 to q3, concatenate q4 and q2 and shift to q4 by the SAR_BYTE amount, increase src_buf pointer a7 by 16
ee.vst.128.ip q4, a3, 16 // Store 16 bytes from q4 to aligned dest_buff a3, increase dest_buff pointer a3 by 16
._main_loop_src_unalign_dest_align:
// Finish the main loop outside of the loop from Q registers preloads
// Check modulo 32 of the loop_len_remainder, if - then copy 32 bytes (16 RGB565 pixels)
bbci a12, 5, _unalign_mod_32_check // Branch if 5-th bit of loop_len_remainder a12 is clear
ee.src.q.ld.ip q4, a7, 0, q2, q3 // Load 16 bytes from src_buff a7 to q4, concatenate q2 and q3 and shift to q2 by the SAR_BYTE amount, don't increase src_buf pointer a7
ee.vst.128.ip q2, a3, 16 // Store 16 bytes from q2 to aligned dest_buff a3, increase dest_buff pointer a3 by 16
ee.src.q q3, q3, q4 // Concatenate q3 and q4 and shift to q3 by the SAR_BYTE amount
ee.vst.128.ip q3, a3, 16 // Store 16 bytes from q3 to aligned dest_buff a3, increase dest_buff pointer a3 by 16
j _end_of_row_src_unalign_dest_align
_unalign_mod_32_check:
// Check modulo 16 of the loop_len_remainder, if - then copy 16 bytes (8 RGB565 pixels)
bbci a12, 4, _unalign_mod_16_check // Branch if 4-th bit of loop_len_remainder a12 is clear
ee.src.q q2, q2, q3 // Concatenate q2 and q3 and shift to q2 by the SAR_BYTE amount
ee.vst.128.ip q2, a3, 16 // Store 16 bytes from q2 to aligned dest_buff a3, increase dest_buff pointer a3 by 16
addi a7, a7, -16 // Correct the src_buff pointer a7, caused by q reg preload
j _end_of_row_src_unalign_dest_align
_unalign_mod_16_check:
// Nothing to copy outside of the main loop
addi a7, a7, -32 // Correct the src_buff pointer a7, caused by q reg preload
_end_of_row_src_unalign_dest_align:
add a3, a3, a6 // dest_buff (a3) = dest_buff (a3) + dest_matrix_padding (a6)
add a7, a7, a8 // src_buff (a7) = src_buff (a7) + src_matrix_padding (a8)
addi.n a5, a5, -1 // Decrease the outer loop
bnez a5, .outer_loop_src_unalign_dest_align
movi.n a2, 1 // Return LV_RESULT_OK = 1
retw.n // Return
_src_unalign_dest_unalign:
//**********************************************************************************************************************
// The most general case, can handle all the possible combinations
// dest_buff (a3) - unaligned
// src_buff (a7) - unaligned
// dest_stride (a6) - not 16-byte multiple
// src_stride (a8) - not 16-byte multiple
// dest_w (a4) - not 16-byte multiple
// Convert strides to matrix paddings
sub a6, a6, a11 // dest_matrix_padding (a6) = dest_stride (a6) - dest_w_bytes (a11)
sub a8, a8, a11 // src_matrix_padding (a8) = src_stride (a8) - dest_w_bytes (a11)
.outer_loop_all_unalign:
// dest_buff alignment check
and a13, a10, a3 // Alignment mask 0xf (a10) AND dest_buff pointer
beqz a13, _dest_buff_aligned // Branch if a13 = 0 (if dest_buff is aligned)
movi.n a14, 16 // a14 = 16
sub a13, a14, a13 // a13 = 16 - unalignment
// Check modulo 8 of the unalignment a13, if - then copy 8 bytes (4 RGB565 pixels)
// src_buff a7, dest_buff a3, unalignment a13, copy registers a14, a15
macro_memcpy_mod_8 a7, a3, a13, a15, a14, __LINE__
// Check modulo 4 of the unalignment, if - then copy 4 bytes (2 RGB565 pixels)
// src_buff a7, dest_buff a3, unalignment a13, copy register a15
macro_memcpy_mod_4 a7, a3, a13, a15, __LINE__
// Check modulo 2 of the unalignment, if - then copy 2 bytes (1 RGB565 pixel)
// src_buff a7, dest_buff a3, unalignment a13, copy register a15
macro_memcpy_mod_2 a7, a3, a13, a15, __LINE__
// Check modulo 1 of the unalignment, if - then copy 1 byte (1/2 of RGB565 pixel)
// src_buff a7, dest_buff a3, unalignment a13, copy register a15
macro_memcpy_mod_1 a7, a3, a13, a15, __LINE__
_dest_buff_aligned:
// Calculate modulo for non-aligned data
sub a11, a11, a13 // a11 = local_dest_w_bytes (a11) = dest_w_bytes (a11) - (16 - unalignment)
movi a15, 48 // a15 = 48
quou a9, a11, a15 // a9 = local_dest_w_bytes (a11) DIV 48 (a15)
remu a12, a11, a15 // a12 = local_dest_w_bytes (a11) remainder after div 48 (a15)
ee.ld.128.usar.ip q2, a7, 16 // Preload 16 bytes from src_buff a7 to q2, get value of the SAR_BYTE, increase src_buf pointer a7 by 16
ee.ld.128.usar.ip q3, a7, 16 // Preload 16 bytes from src_buff a7 to q3, get value of the SAR_BYTE, increase src_buf pointer a7 by 16
// Run main loop which copies 48 bytes (24 RGB565 pixels) in one loop run
loopnez a9, ._main_loop_all_unalign // 48 bytes (24 RGB565 pixels) in one loop
ee.src.q.ld.ip q4, a7, 16, q2, q3 // Load 16 bytes from src_buff a7 to q4, concatenate q2 and q3 and shift to q2 by the SAR_BYTE amount, increase src_buf pointer a7 by 16
ee.vst.128.ip q2, a3, 16 // Store 16 bytes from q2 to aligned dest_buff a3, increase dest_buff pointer a3 by 16
ee.src.q.ld.ip q2, a7, 16, q3, q4 // Load 16 bytes from src_buff a7 to q2, concatenate q3 and q4 and shift to q3 by the SAR_BYTE amount, increase src_buf pointer a7 by 16
ee.vst.128.ip q3, a3, 16 // Store 16 bytes from q3 to aligned dest_buff a3, increase dest_buff pointer a3 by 16
ee.src.q.ld.ip q3, a7, 16, q4, q2 // Load 16 bytes from src_buff a7 to q3, concatenate q4 and q2 and shift to q4 by the SAR_BYTE amount, increase src_buf pointer a7 by 16
ee.vst.128.ip q4, a3, 16 // Store 16 bytes from q4 to aligned dest_buff a3, increase dest_buff pointer a3 by 16
._main_loop_all_unalign:
// Finish the main loop outside of the loop from Q registers preloads
// Check modulo 32 and modulo 8 of the loop_len_remainder a12
bbci a12, 5, _all_unalign_mod_32_check // Branch if 5-th bit of loop_len_remainder a12 is clear
bbsi a12, 3, _all_unalign_mod_32_mod_8_check // Branch if 3-rd bif of loop_len_remainder a12 is set
// Copy 32 bytes (16 RGB565 pixels) (47 - 40)
ee.src.q.ld.ip q4, a7, 0, q2, q3 // Load 16 bytes from src_buff a7 to q4, concatenate q2 and q3 and shift to q2 by the SAR_BYTE amount, don't increase src_buf pointer a7
ee.vst.128.ip q2, a3, 16 // Store 16 bytes from q2 to aligned dest_buff a3, increase dest_buff pointer a3 by 16
ee.src.q q3, q3, q4 // Concatenate q3 and q4 and shift to q3 by the SAR_BYTE amount
ee.vst.128.ip q3, a3, 16 // Store 16 bytes from q3 to aligned dest_buff a3, increase dest_buff pointer a3 by 16
j _skip_mod16
_all_unalign_mod_32_mod_8_check:
// Copy 40 bytes (20 RGB565 pixels)
ee.src.q.ld.ip q4, a7, 16, q2, q3 // Load 16 bytes from src_buff a7 to q4, concatenate q2 and q3 and shift to q2 by the SAR_BYTE amount, increase src_buf pointer a7 by 16
ee.vst.128.ip q2, a3, 16 // Store 16 bytes from q2 to aligned dest_buff a3, increase dest_buff pointer a3 by 16
ee.src.q.ld.ip q2, a7, 0, q3, q4 // Load 16 bytes from src_buff a7 to q2, concatenate q3 and q4 and shift to q3 by the SAR_BYTE amount, don't increase src_buf pointer a7
ee.vst.128.ip q3, a3, 16 // Store 16 bytes from q3 to aligned dest_buff a3, increase dest_buff pointer a3 by 16
ee.src.q q4, q4, q2 // Concatenate q4 and q2 and shift to q4 by the SAR_BYTE amount
ee.vst.l.64.ip q4, a3, 8 // Store lower 8 bytes from q4 to aligned dest_buff a3, increase dest_buff pointer a3 by 8
addi a7, a7, -8 // Correct the src_buff pointer a7, caused by q reg preload
j _skip_mod16
_all_unalign_mod_32_check:
// Check modulo 16 and modulo 8 of the loop_len_remainder a12
bbci a12, 4, _all_unalign_mod_16_check // branch if 4-th bit of loop_len_remainder a12 is clear
bbsi a12, 3, _all_unalign_mod_16_mod_8_check // branch if 3-rd bit of loop_len_remainder a12 is set
// Copy 16 bytes (8 RGB565 pixels)
ee.src.q q2, q2, q3 // Concatenate q2 and q3 and shift to q2 by the SAR_BYTE amount
ee.vst.128.ip q2, a3, 16 // Store 16 bytes from q2 to aligned dest_buff a3, increase dest_buff pointer a3 by 16
addi a7, a7, -16 // Correct the src_buff pointer a7, caused by q reg preload
j _skip_mod16
_all_unalign_mod_16_mod_8_check:
// Copy 24 bytes (12 RGB565 pixels)
ee.src.q.ld.ip q4, a7, 0, q2, q3 // Load 16 bytes from src_buff a7 to q4, concatenate q2 and q3 and shift to q2 by the SAR_BYTE amount, don't increase src_buf pointer a7
ee.vst.128.ip q2, a3, 16 // Store 16 bytes from q2 to aligned dest_buff a3, increase dest_buff pointer a3 by 16
ee.src.q q3, q3, q4 // Concatenate q3 and q4 and shift to q3 by the SAR_BYTE amount
ee.vst.l.64.ip q3, a3, 8 // Store lower 8 bytes from q3 to aligned dest_buff a3, increase dest_buff pointer a3 by 8
addi a7, a7, -8 // Correct the src_buff pointer a7, caused by q reg preload
j _skip_mod16
_all_unalign_mod_16_check:
bbci a12, 3, _all_unalign_mod_8_check // Branch if 3-rd bit of loop_len_remainder a12 is clear
// Copy 8 bytes (4 RGB565 pixels)
ee.src.q q2, q2, q3 // Concatenate q2 and q3 and shift to q2 by the SAR_BYTE amount
ee.vst.l.64.ip q2, a3, 8 // Store lower 8 bytes from q2 to aligned dest_buff a3, increase dest_buff pointer a3 by 8
addi a7, a7, -24 // Correct the src_buff pointer a7, caused by q reg preload
j _skip_mod16
_all_unalign_mod_8_check:
addi a7, a7, -32 // Correct the src_buff pointer a7, caused by q reg preload
_skip_mod16:
// Check modulo 4 of the loop_len_remainder, if - then copy 4 bytes (2 RGB565 pixels)
// src_buff a7, dest_buff a3, loop_len_remainder a12, copy register a15
macro_memcpy_mod_4 a7, a3, a12, a15, __LINE__
// Check modulo 2 of the loop_len_remainder, if - then copy 2 bytes (1 RGB565 pixel)
// src_buff a7, dest_buff a3, loop_len_remainder a12, copy register a15
macro_memcpy_mod_2 a7, a3, a12, a15, __LINE__
// Check modulo 1 of the loop_len_remainder, if - then copy 1 byte (1/2 RGB565 pixel)
// src_buff a7, dest_buff a3, loop_len_remainder a12, copy register a15
macro_memcpy_mod_1 a7, a3, a12, a15, __LINE_
slli a11, a4, 1 // Refresh dest_w_bytes
add a3, a3, a6 // dest_buff (a3) = dest_buff (a3) + dest_matrix_padding (a6)
add a7, a7, a8 // src_buff (a7) = src_buff (a7) + src_matrix_padding (a8)
addi.n a5, a5, -1 // Decrease the outer loop
bnez a5, .outer_loop_all_unalign
movi.n a2, 1 // Return LV_RESULT_OK = 1
retw.n // Return
//**********************************************************************************************************************
// Small matrix width, keep it simple for lengths less than 8 pixels
_matrix_width_check: // Matrix width is greater or equal 8 pixels
// Convert strides to matrix paddings
sub a6, a6, a11 // dest_matrix_padding (a6) = dest_stride (a6) - dest_w_bytes (a11)
sub a8, a8, a11 // src_matrix_padding (a8) = src_stride (a8) - dest_w_bytes (a11)
.outer_loop_short_matrix_length:
// Run main loop which copies 2 bytes (one RGB565 pixel) in one loop run
loopnez a4, ._main_loop_short_matrix_length
l8ui a15, a7, 0 // Load 8 bits from src_buff a7 to a15, offset 0
l8ui a14, a7, 1 // Load 8 bits from src_buff a7 to a14, offset 1
s8i a15, a3, 0 // Save 8 bits from a15 to dest_buff a3, offset 0
s8i a14, a3, 1 // Save 8 bits from a14 to dest_buff a3, offset 1
addi.n a7, a7, 2 // Increment src_buff pointer a7 by 1
addi.n a3, a3, 2 // Increment dest_buff pointer a3 by 2
._main_loop_short_matrix_length:
// Finish remaining byte out of the main loop
// Check modulo 1 of the dest_w_bytes (a11), if - then copy 1 byte (1/2 RGB565 pixel)
// src_buff a7, dest_buff a3, dest_w_bytes a11, copy register a15
macro_memcpy_mod_1 a7, a3, a11, a15, __LINE__
add a3, a3, a6 // dest_buff (a3) = dest_buff (a3) + dest_matrix_padding (a6)
add a7, a7, a8 // src_buff (a7) = src_buff (a7) + src_matrix_padding (a8)
addi.n a5, a5, -1 // Decrease the outer loop
bnez a5, .outer_loop_short_matrix_length
movi.n a2, 1 // Return LV_RESULT_OK = 1
retw.n // Return

View File

@ -4,7 +4,7 @@ Test app accommodates two types of tests: [`functionality test`](#Functionality-
Assembly source files could be found in the [`lvgl_port`](../../src/lvgl9/simd/) component. Header file with the assembly function prototypes is provided into the LVGL using Kconfig option `LV_DRAW_SW_ASM_CUSTOM_INCLUDE` and can be found in the [`lvgl_port/include`](../../include/esp_lvgl_port_lv_blend.h) Assembly source files could be found in the [`lvgl_port`](../../src/lvgl9/simd/) component. Header file with the assembly function prototypes is provided into the LVGL using Kconfig option `LV_DRAW_SW_ASM_CUSTOM_INCLUDE` and can be found in the [`lvgl_port/include`](../../include/esp_lvgl_port_lv_blend.h)
## Benchmark results ## Benchmark results for LV Fill functions (memset)
| Color format | Matrix size | Memory alignment | ASM version | ANSI C version | | Color format | Matrix size | Memory alignment | ASM version | ANSI C version |
| :----------- | :---------- | :--------------- | :------------- | :------------- | | :----------- | :---------- | :--------------- | :------------- | :------------- |
@ -12,9 +12,20 @@ Assembly source files could be found in the [`lvgl_port`](../../src/lvgl9/simd/)
| | 127x127 | 1 byte | 0.488 | 1.597 | | | 127x127 | 1 byte | 0.488 | 1.597 |
| RGB565 | 128x128 | 16 byte | 0.196 | 1.146 | | RGB565 | 128x128 | 16 byte | 0.196 | 1.146 |
| | 127x127 | 1 byte | 0.497 | 1.124 | | | 127x127 | 1 byte | 0.497 | 1.124 |
| RGB888 | 128x128 | 16 byte | 0.608 | 4.062 |
| | 127x127 | 1 byte | 0.818 | 3.969 |
* this data was obtained by running [benchmark tests](#benchmark-test) on 128x128 16 byte aligned matrix (ideal case) and 127x127 1 byte aligned matrix (worst case) * this data was obtained by running [benchmark tests](#benchmark-test) on 128x128 16 byte aligned matrix (ideal case) and 127x127 1 byte aligned matrix (worst case)
* the values represent cycles per sample to perform simple fill of the matrix on esp32s3 * the values represent cycles per sample to perform simple fill of the matrix on esp32s3
## Benchmark results for LV Image functions (memcpy)
| Color format | Matrix size | Memory alignment | ASM version | ANSI C version |
| :----------- | :---------- | :--------------- | :------------- | :------------- |
| RGB565 | 128x128 | 16 byte | 0.352 | 3.437 |
| | 127x128 | 1 byte | 0.866 | 5.978 |
* this data was obtained by running [benchmark tests](#benchmark-test) on 128x128 16 byte aligned matrix (ideal case) and 127x128 1 byte aligned matrix (worst case)
* the values represent cycles per sample to perform memory copy between two matrices on esp32s3
## Functionality test ## Functionality test
* Tests, whether the HW accelerated assembly version of an LVGL function provides the same results as the ANSI version * Tests, whether the HW accelerated assembly version of an LVGL function provides the same results as the ANSI version
* A top-level flow of the functionality test: * A top-level flow of the functionality test:
@ -62,6 +73,8 @@ Here's the test menu, pick your combo:
(2) "Test fill functionality RGB565" [fill][functionality][RGB565] (2) "Test fill functionality RGB565" [fill][functionality][RGB565]
(3) "LV Fill benchmark ARGB8888" [fill][benchmark][ARGB8888] (3) "LV Fill benchmark ARGB8888" [fill][benchmark][ARGB8888]
(4) "LV Fill benchmark RGB565" [fill][benchmark][RGB565] (4) "LV Fill benchmark RGB565" [fill][benchmark][RGB565]
(5) "LV Image functionality RGB565 blend to RGB565" [image][functionality][RGB565]
(6) "LV Image benchmark RGB565 blend to RGB565" [image][benchmark][RGB565]
Enter test for running. Enter test for running.
``` ```

View File

@ -8,6 +8,9 @@ if(CONFIG_IDF_TARGET_ESP32 OR CONFIG_IDF_TARGET_ESP32S3)
else() else()
file(GLOB_RECURSE ASM_SOURCES ${PORT_PATH}/simd/*_esp32.S) # Select only esp32 related files file(GLOB_RECURSE ASM_SOURCES ${PORT_PATH}/simd/*_esp32.S) # Select only esp32 related files
endif() endif()
file(GLOB_RECURSE ASM_MACROS ${PORT_PATH}/simd/lv_macro_*.S) # Explicitly add all assembler macro files
else() else()
message(WARNING "This test app is intended only for esp32 and esp32s3") message(WARNING "This test app is intended only for esp32 and esp32s3")
endif() endif()
@ -15,7 +18,14 @@ endif()
# Hard copy of LV files # Hard copy of LV files
file(GLOB_RECURSE BLEND_SRCS lv_blend/src/*.c) file(GLOB_RECURSE BLEND_SRCS lv_blend/src/*.c)
idf_component_register(SRCS "test_app_main.c" "test_lv_fill_functionality.c" "test_lv_fill_benchmark.c" ${BLEND_SRCS} ${ASM_SOURCES} idf_component_register(SRCS "test_app_main.c"
"test_lv_fill_functionality.c" # memset tests
"test_lv_fill_benchmark.c"
"test_lv_image_functionality.c" # memcpy tests
"test_lv_image_benchmark.c"
${BLEND_SRCS} # Hard copy of LVGL's blend API, to simplify testing
${ASM_SOURCES} # Assembly src files
${ASM_MACROS} # Assembly macro files
INCLUDE_DIRS "lv_blend/include" "../../../include" INCLUDE_DIRS "lv_blend/include" "../../../include"
REQUIRES unity REQUIRES unity
WHOLE_ARCHIVE) WHOLE_ARCHIVE)

View File

@ -57,6 +57,7 @@ typedef struct {
lv_color_format_t src_color_format; lv_color_format_t src_color_format;
lv_opa_t opa; lv_opa_t opa;
lv_blend_mode_t blend_mode; lv_blend_mode_t blend_mode;
bool use_asm;
} _lv_draw_sw_blend_image_dsc_t; } _lv_draw_sw_blend_image_dsc_t;
/********************** /**********************

View File

@ -0,0 +1,53 @@
/*
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*
* This file is derived from the LVGL project.
* See https://github.com/lvgl/lvgl for details.
*/
/**
* @file lv_draw_sw_blend_rgb888.h
*
*/
#ifndef LV_DRAW_SW_BLEND_RGB888_H
#define LV_DRAW_SW_BLEND_RGB888_H
#ifdef __cplusplus
extern "C" {
#endif
/*********************
* INCLUDES
*********************/
#include "lv_draw_sw_blend.h"
/*********************
* DEFINES
*********************/
/**********************
* TYPEDEFS
**********************/
/**********************
* GLOBAL PROTOTYPES
**********************/
void /* LV_ATTRIBUTE_FAST_MEM */ lv_draw_sw_blend_color_to_rgb888(_lv_draw_sw_blend_fill_dsc_t *dsc,
uint32_t dest_px_size);
void /* LV_ATTRIBUTE_FAST_MEM */ lv_draw_sw_blend_image_to_rgb888(_lv_draw_sw_blend_image_dsc_t *dsc,
uint32_t dest_px_size);
/**********************
* MACROS
**********************/
#ifdef __cplusplus
} /*extern "C"*/
#endif
#endif /*LV_DRAW_SW_BLEND_RGB888_H*/

View File

@ -0,0 +1,79 @@
/*
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*
* This file is derived from the LVGL project.
* See https://github.com/lvgl/lvgl for details.
*/
/**
* @file lv_string.h
*
*/
#ifndef LV_STRING_H
#define LV_STRING_H
#ifdef __cplusplus
extern "C" {
#endif
/*********************
* INCLUDES
*********************/
//#include "../lv_conf_internal.h"
#include <stdint.h>
#include <stddef.h>
#include "lv_types.h"
/*********************
* DEFINES
*********************/
/**********************
* TYPEDEFS
**********************/
/**********************
* GLOBAL PROTOTYPES
**********************/
/**
* @brief Copies a block of memory from a source address to a destination address.
* @param dst Pointer to the destination array where the content is to be copied.
* @param src Pointer to the source of data to be copied.
* @param len Number of bytes to copy.
* @return Pointer to the destination array.
* @note The function does not check for any overlapping of the source and destination memory blocks.
*/
void *lv_memcpy(void *dst, const void *src, size_t len);
/**
* @brief Fills a block of memory with a specified value.
* @param dst Pointer to the destination array to fill with the specified value.
* @param v Value to be set. The value is passed as an int, but the function fills
* the block of memory using the unsigned char conversion of this value.
* @param len Number of bytes to be set to the value.
*/
void lv_memset(void *dst, uint8_t v, size_t len);
/**
* @brief Move a block of memory from source to destination
* @param dst Pointer to the destination array where the content is to be copied.
* @param src Pointer to the source of data to be copied.
* @param len Number of bytes to copy
* @return Pointer to the destination array.
*/
void *lv_memmove(void *dst, const void *src, size_t len);
/**********************
* MACROS
**********************/
#ifdef __cplusplus
} /*extern "C"*/
#endif
#endif /*LV_STRING_H*/

View File

@ -19,6 +19,8 @@
extern "C" { extern "C" {
#endif #endif
#include <stdint.h>
/********************** /**********************
* TYPEDEFS * TYPEDEFS
**********************/ **********************/

View File

@ -23,7 +23,7 @@
#include "lv_draw_sw_blend.h" #include "lv_draw_sw_blend.h"
#include "lv_math.h" #include "lv_math.h"
#include "lv_color.h" #include "lv_color.h"
#include "string.h" #include "lv_string.h"
#include "esp_lvgl_port_lv_blend.h" #include "esp_lvgl_port_lv_blend.h"
@ -628,7 +628,7 @@ static void LV_ATTRIBUTE_FAST_MEM rgb888_image_blend(_lv_draw_sw_blend_image_dsc
if (src_px_size == 4) { if (src_px_size == 4) {
uint32_t line_in_bytes = w * 4; uint32_t line_in_bytes = w * 4;
for (y = 0; y < h; y++) { for (y = 0; y < h; y++) {
memcpy(dest_buf_c32, src_buf, line_in_bytes); // lv_memcpy lv_memcpy(dest_buf_c32, src_buf, line_in_bytes);
dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride); dest_buf_c32 = drawbuf_next_row(dest_buf_c32, dest_stride);
src_buf = drawbuf_next_row(src_buf, src_stride); src_buf = drawbuf_next_row(src_buf, src_stride);
} }
@ -870,9 +870,9 @@ static inline lv_color32_t LV_ATTRIBUTE_FAST_MEM lv_color_32_32_mix(lv_color32_t
void lv_color_mix_with_alpha_cache_init(lv_color_mix_alpha_cache_t *cache) void lv_color_mix_with_alpha_cache_init(lv_color_mix_alpha_cache_t *cache)
{ {
memset(&cache->fg_saved, 0x00, sizeof(lv_color32_t)); //lv_memzero lv_memset(&cache->fg_saved, 0x00, sizeof(lv_color32_t)); //lv_memzero
memset(&cache->bg_saved, 0x00, sizeof(lv_color32_t)); //lv_memzero lv_memset(&cache->bg_saved, 0x00, sizeof(lv_color32_t)); //lv_memzero
memset(&cache->res_saved, 0x00, sizeof(lv_color32_t)); //lv_memzero lv_memset(&cache->res_saved, 0x00, sizeof(lv_color32_t)); //lv_memzero
cache->res_alpha_saved = 255; cache->res_alpha_saved = 255;
cache->ratio_saved = 255; cache->ratio_saved = 255;
} }

View File

@ -23,7 +23,7 @@
#include "lv_draw_sw_blend.h" #include "lv_draw_sw_blend.h"
#include "lv_math.h" #include "lv_math.h"
#include "lv_color.h" #include "lv_color.h"
#include "string.h" #include "lv_string.h"
#include "esp_lvgl_port_lv_blend.h" #include "esp_lvgl_port_lv_blend.h"
@ -601,10 +601,12 @@ static void LV_ATTRIBUTE_FAST_MEM rgb565_image_blend(_lv_draw_sw_blend_image_dsc
if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) { if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) {
if (mask_buf == NULL && opa >= LV_OPA_MAX) { if (mask_buf == NULL && opa >= LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565(dsc)) { if (dsc->use_asm) {
LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB565(dsc);
} else {
uint32_t line_in_bytes = w * 2; uint32_t line_in_bytes = w * 2;
for (y = 0; y < h; y++) { for (y = 0; y < h; y++) {
memcpy(dest_buf_u16, src_buf_u16, line_in_bytes); // lv_memcpy lv_memcpy(dest_buf_u16, src_buf_u16, line_in_bytes);
dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride); dest_buf_u16 = drawbuf_next_row(dest_buf_u16, dest_stride);
src_buf_u16 = drawbuf_next_row(src_buf_u16, src_stride); src_buf_u16 = drawbuf_next_row(src_buf_u16, src_stride);
} }

View File

@ -0,0 +1,952 @@
/*
* SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*
* This file is derived from the LVGL project.
* See https://github.com/lvgl/lvgl for details.
*/
/**
* @file lv_draw_sw_blend_to_rgb888.c
*
*/
/*********************
* INCLUDES
*********************/
#include "lv_draw_sw_blend_to_rgb888.h"
#include "lv_assert.h"
#include "lv_types.h"
#include "lv_log.h"
#include "lv_draw_sw_blend.h"
#include "lv_math.h"
#include "lv_color.h"
#include "lv_string.h"
#include "esp_lvgl_port_lv_blend.h"
/*********************
* DEFINES
*********************/
#define LV_ATTRIBUTE_FAST_MEM
/**********************
* TYPEDEFS
**********************/
/**********************
* STATIC PROTOTYPES
**********************/
static void /* LV_ATTRIBUTE_FAST_MEM */ al88_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc, uint32_t dest_px_size);
static void /* LV_ATTRIBUTE_FAST_MEM */ i1_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc, uint32_t dest_px_size);
static inline uint8_t /* LV_ATTRIBUTE_FAST_MEM */ get_bit(const uint8_t *buf, int32_t bit_idx);
static void /* LV_ATTRIBUTE_FAST_MEM */ l8_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc, uint32_t dest_px_size);
static void /* LV_ATTRIBUTE_FAST_MEM */ rgb565_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc, uint32_t dest_px_size);
static void /* LV_ATTRIBUTE_FAST_MEM */ rgb888_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc,
const uint8_t dest_px_size,
uint32_t src_px_size);
static void /* LV_ATTRIBUTE_FAST_MEM */ argb8888_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc,
uint32_t dest_px_size);
static inline void /* LV_ATTRIBUTE_FAST_MEM */ lv_color_8_24_mix(const uint8_t src, uint8_t *dest, uint8_t mix);
static inline void /* LV_ATTRIBUTE_FAST_MEM */ lv_color_24_24_mix(const uint8_t *src, uint8_t *dest, uint8_t mix);
static inline void /* LV_ATTRIBUTE_FAST_MEM */ blend_non_normal_pixel(uint8_t *dest, lv_color32_t src,
lv_blend_mode_t mode);
static inline void * /* LV_ATTRIBUTE_FAST_MEM */ drawbuf_next_row(const void *buf, uint32_t stride);
/**********************
* STATIC VARIABLES
**********************/
/**********************
* MACROS
**********************/
#ifndef LV_DRAW_SW_COLOR_BLEND_TO_RGB888
#define LV_DRAW_SW_COLOR_BLEND_TO_RGB888(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_COLOR_BLEND_TO_RGB888_WITH_OPA
#define LV_DRAW_SW_COLOR_BLEND_TO_RGB888_WITH_OPA(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_COLOR_BLEND_TO_RGB888_WITH_MASK
#define LV_DRAW_SW_COLOR_BLEND_TO_RGB888_WITH_MASK(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_COLOR_BLEND_TO_RGB888_MIX_MASK_OPA
#define LV_DRAW_SW_COLOR_BLEND_TO_RGB888_MIX_MASK_OPA(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB888
#define LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB888(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB888_WITH_OPA
#define LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB888_WITH_OPA(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB888_WITH_MASK
#define LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB888_WITH_MASK(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA
#define LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888
#define LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_WITH_OPA
#define LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_WITH_OPA(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_WITH_MASK
#define LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_WITH_MASK(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA
#define LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888
#define LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_WITH_OPA
#define LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_WITH_OPA(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_WITH_MASK
#define LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_WITH_MASK(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA
#define LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888
#define LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888_WITH_OPA
#define LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888_WITH_OPA(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888_WITH_MASK
#define LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888_WITH_MASK(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA
#define LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_I1_BLEND_NORMAL_TO_888
#define LV_DRAW_SW_I1_BLEND_NORMAL_TO_888(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_I1_BLEND_NORMAL_TO_888_WITH_OPA
#define LV_DRAW_SW_I1_BLEND_NORMAL_TO_888_WITH_OPA(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_I1_BLEND_NORMAL_TO_888_WITH_MASK
#define LV_DRAW_SW_I1_BLEND_NORMAL_TO_888_WITH_MASK(...) LV_RESULT_INVALID
#endif
#ifndef LV_DRAW_SW_I1_BLEND_NORMAL_TO_888_MIX_MASK_OPA
#define LV_DRAW_SW_I1_BLEND_NORMAL_TO_888_MIX_MASK_OPA(...) LV_RESULT_INVALID
#endif
/**********************
* GLOBAL FUNCTIONS
**********************/
void LV_ATTRIBUTE_FAST_MEM lv_draw_sw_blend_color_to_rgb888(_lv_draw_sw_blend_fill_dsc_t *dsc, uint32_t dest_px_size)
{
int32_t w = dsc->dest_w;
int32_t h = dsc->dest_h;
lv_opa_t opa = dsc->opa;
const lv_opa_t *mask = dsc->mask_buf;
int32_t mask_stride = dsc->mask_stride;
int32_t dest_stride = dsc->dest_stride;
int32_t x;
int32_t y;
LV_UNUSED(w);
LV_UNUSED(h);
LV_UNUSED(x);
LV_UNUSED(y);
LV_UNUSED(opa);
LV_UNUSED(mask);
LV_UNUSED(mask_stride);
LV_UNUSED(dest_stride);
/*Simple fill*/
if (mask == NULL && opa >= LV_OPA_MAX) {
if (dsc->use_asm && dest_px_size == 3) {
LV_DRAW_SW_COLOR_BLEND_TO_RGB888(dsc, dest_px_size);
} else {
if (dest_px_size == 3) {
uint8_t *dest_buf_u8 = dsc->dest_buf;
uint8_t *dest_buf_ori = dsc->dest_buf;
w *= dest_px_size;
for (x = 0; x < w; x += 3) {
dest_buf_u8[x + 0] = dsc->color.blue;
dest_buf_u8[x + 1] = dsc->color.green;
dest_buf_u8[x + 2] = dsc->color.red;
}
dest_buf_u8 += dest_stride;
for (y = 1; y < h; y++) {
lv_memcpy(dest_buf_u8, dest_buf_ori, w);
dest_buf_u8 += dest_stride;
}
}
if (dest_px_size == 4) {
uint32_t color32 = lv_color_to_u32(dsc->color);
uint32_t *dest_buf_u32 = dsc->dest_buf;
for (y = 0; y < h; y++) {
for (x = 0; x <= w - 16; x += 16) {
dest_buf_u32[x + 0] = color32;
dest_buf_u32[x + 1] = color32;
dest_buf_u32[x + 2] = color32;
dest_buf_u32[x + 3] = color32;
dest_buf_u32[x + 4] = color32;
dest_buf_u32[x + 5] = color32;
dest_buf_u32[x + 6] = color32;
dest_buf_u32[x + 7] = color32;
dest_buf_u32[x + 8] = color32;
dest_buf_u32[x + 9] = color32;
dest_buf_u32[x + 10] = color32;
dest_buf_u32[x + 11] = color32;
dest_buf_u32[x + 12] = color32;
dest_buf_u32[x + 13] = color32;
dest_buf_u32[x + 14] = color32;
dest_buf_u32[x + 15] = color32;
}
for (; x < w; x ++) {
dest_buf_u32[x] = color32;
}
dest_buf_u32 = drawbuf_next_row(dest_buf_u32, dest_stride);
}
}
}
}
/*Opacity only*/
else if (mask == NULL && opa < LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_COLOR_BLEND_TO_RGB888_WITH_OPA(dsc, dest_px_size)) {
uint32_t color32 = lv_color_to_u32(dsc->color);
uint8_t *dest_buf = dsc->dest_buf;
w *= dest_px_size;
for (y = 0; y < h; y++) {
for (x = 0; x < w; x += dest_px_size) {
lv_color_24_24_mix((const uint8_t *)&color32, &dest_buf[x], opa);
}
dest_buf = drawbuf_next_row(dest_buf, dest_stride);
}
}
}
/*Masked with full opacity*/
else if (mask && opa >= LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_COLOR_BLEND_TO_RGB888_WITH_MASK(dsc, dest_px_size)) {
uint32_t color32 = lv_color_to_u32(dsc->color);
uint8_t *dest_buf = dsc->dest_buf;
w *= dest_px_size;
for (y = 0; y < h; y++) {
uint32_t mask_x;
for (x = 0, mask_x = 0; x < w; x += dest_px_size, mask_x++) {
lv_color_24_24_mix((const uint8_t *)&color32, &dest_buf[x], mask[mask_x]);
}
dest_buf += dest_stride;
mask += mask_stride;
}
}
}
/*Masked with opacity*/
else {
if (LV_RESULT_INVALID == LV_DRAW_SW_COLOR_BLEND_TO_RGB888_MIX_MASK_OPA(dsc, dest_px_size)) {
uint32_t color32 = lv_color_to_u32(dsc->color);
uint8_t *dest_buf = dsc->dest_buf;
w *= dest_px_size;
for (y = 0; y < h; y++) {
uint32_t mask_x;
for (x = 0, mask_x = 0; x < w; x += dest_px_size, mask_x++) {
lv_color_24_24_mix((const uint8_t *) &color32, &dest_buf[x], LV_OPA_MIX2(opa, mask[mask_x]));
}
dest_buf += dest_stride;
mask += mask_stride;
}
}
}
}
void LV_ATTRIBUTE_FAST_MEM lv_draw_sw_blend_image_to_rgb888(_lv_draw_sw_blend_image_dsc_t *dsc, uint32_t dest_px_size)
{
switch (dsc->src_color_format) {
case LV_COLOR_FORMAT_RGB565:
rgb565_image_blend(dsc, dest_px_size);
break;
case LV_COLOR_FORMAT_RGB888:
rgb888_image_blend(dsc, dest_px_size, 3);
break;
case LV_COLOR_FORMAT_XRGB8888:
rgb888_image_blend(dsc, dest_px_size, 4);
break;
case LV_COLOR_FORMAT_ARGB8888:
argb8888_image_blend(dsc, dest_px_size);
break;
case LV_COLOR_FORMAT_L8:
l8_image_blend(dsc, dest_px_size);
break;
case LV_COLOR_FORMAT_AL88:
al88_image_blend(dsc, dest_px_size);
break;
case LV_COLOR_FORMAT_I1:
i1_image_blend(dsc, dest_px_size);
break;
default:
LV_LOG_WARN("Not supported source color format");
break;
}
}
/**********************
* STATIC FUNCTIONS
**********************/
static void LV_ATTRIBUTE_FAST_MEM i1_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc, uint32_t dest_px_size)
{
int32_t w = dsc->dest_w;
int32_t h = dsc->dest_h;
lv_opa_t opa = dsc->opa;
uint8_t *dest_buf_u8 = dsc->dest_buf;
int32_t dest_stride = dsc->dest_stride;
const uint8_t *src_buf_i1 = dsc->src_buf;
int32_t src_stride = dsc->src_stride;
const lv_opa_t *mask_buf = dsc->mask_buf;
int32_t mask_stride = dsc->mask_stride;
int32_t dest_x;
int32_t src_x;
int32_t y;
if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) {
if (mask_buf == NULL && opa >= LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_I1_BLEND_NORMAL_TO_888(dsc)) {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
uint8_t chan_val = get_bit(src_buf_i1, src_x) * 255;
dest_buf_u8[dest_x + 2] = chan_val;
dest_buf_u8[dest_x + 1] = chan_val;
dest_buf_u8[dest_x + 0] = chan_val;
}
dest_buf_u8 = drawbuf_next_row(dest_buf_u8, dest_stride);
src_buf_i1 = drawbuf_next_row(src_buf_i1, src_stride);
}
}
} else if (mask_buf == NULL && opa < LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_I1_BLEND_NORMAL_TO_888_WITH_OPA(dsc)) {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
uint8_t chan_val = get_bit(src_buf_i1, src_x) * 255;
lv_color_8_24_mix(chan_val, &dest_buf_u8[dest_x], opa);
}
dest_buf_u8 = drawbuf_next_row(dest_buf_u8, dest_stride);
src_buf_i1 = drawbuf_next_row(src_buf_i1, src_stride);
}
}
} else if (mask_buf && opa >= LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_I1_BLEND_NORMAL_TO_888_WITH_MASK(dsc)) {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
uint8_t chan_val = get_bit(src_buf_i1, src_x) * 255;
lv_color_8_24_mix(chan_val, &dest_buf_u8[dest_x], mask_buf[src_x]);
}
dest_buf_u8 = drawbuf_next_row(dest_buf_u8, dest_stride);
src_buf_i1 = drawbuf_next_row(src_buf_i1, src_stride);
mask_buf += mask_stride;
}
}
} else if (mask_buf && opa < LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_I1_BLEND_NORMAL_TO_888_MIX_MASK_OPA(dsc)) {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
uint8_t chan_val = get_bit(src_buf_i1, src_x) * 255;
lv_color_8_24_mix(chan_val, &dest_buf_u8[dest_x], LV_OPA_MIX2(opa, mask_buf[src_x]));
}
dest_buf_u8 = drawbuf_next_row(dest_buf_u8, dest_stride);
src_buf_i1 = drawbuf_next_row(src_buf_i1, src_stride);
mask_buf += mask_stride;
}
}
}
} else {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
lv_color32_t src_argb;
src_argb.red = get_bit(src_buf_i1, src_x) * 255;
src_argb.green = src_argb.red;
src_argb.blue = src_argb.red;
if (mask_buf == NULL) {
src_argb.alpha = opa;
} else {
src_argb.alpha = LV_OPA_MIX2(mask_buf[src_x], opa);
}
blend_non_normal_pixel(&dest_buf_u8[dest_x], src_argb, dsc->blend_mode);
}
if (mask_buf) {
mask_buf += mask_stride;
}
dest_buf_u8 = drawbuf_next_row(dest_buf_u8, dest_stride);
src_buf_i1 = drawbuf_next_row(src_buf_i1, src_stride);
}
}
}
static void LV_ATTRIBUTE_FAST_MEM al88_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc, uint32_t dest_px_size)
{
int32_t w = dsc->dest_w;
int32_t h = dsc->dest_h;
lv_opa_t opa = dsc->opa;
uint8_t *dest_buf_u8 = dsc->dest_buf;
int32_t dest_stride = dsc->dest_stride;
const lv_color16a_t *src_buf_al88 = dsc->src_buf;
int32_t src_stride = dsc->src_stride;
const lv_opa_t *mask_buf = dsc->mask_buf;
int32_t mask_stride = dsc->mask_stride;
int32_t dest_x;
int32_t src_x;
int32_t y;
if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) {
if (mask_buf == NULL && opa >= LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB888(dsc, dest_px_size)) {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
lv_color_8_24_mix(src_buf_al88[src_x].lumi, &dest_buf_u8[dest_x], src_buf_al88[src_x].alpha);
}
dest_buf_u8 += dest_stride;
src_buf_al88 = drawbuf_next_row(src_buf_al88, src_stride);
}
}
} else if (mask_buf == NULL && opa < LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB888_WITH_OPA(dsc, dest_px_size)) {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
lv_color_8_24_mix(src_buf_al88[src_x].lumi, &dest_buf_u8[dest_x], LV_OPA_MIX2(src_buf_al88[src_x].alpha, opa));
}
dest_buf_u8 += dest_stride;
src_buf_al88 = drawbuf_next_row(src_buf_al88, src_stride);
}
}
} else if (mask_buf && opa >= LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB888_WITH_MASK(dsc, dest_px_size)) {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
lv_color_8_24_mix(src_buf_al88[src_x].lumi, &dest_buf_u8[dest_x], LV_OPA_MIX2(src_buf_al88[src_x].alpha,
mask_buf[src_x]));
}
dest_buf_u8 += dest_stride;
src_buf_al88 = drawbuf_next_row(src_buf_al88, src_stride);
mask_buf += mask_stride;
}
}
} else if (mask_buf && opa < LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA(dsc, dest_px_size)) {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
lv_color_8_24_mix(src_buf_al88[src_x].lumi, &dest_buf_u8[dest_x], LV_OPA_MIX3(src_buf_al88[src_x].alpha,
mask_buf[src_x], opa));
}
dest_buf_u8 += dest_stride;
src_buf_al88 = drawbuf_next_row(src_buf_al88, src_stride);
mask_buf += mask_stride;
}
}
}
} else {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
lv_color32_t src_argb;
src_argb.red = src_argb.green = src_argb.blue = src_buf_al88[src_x].lumi;
if (mask_buf == NULL) {
src_argb.alpha = LV_OPA_MIX2(src_buf_al88[src_x].alpha, opa);
} else {
src_argb.alpha = LV_OPA_MIX3(src_buf_al88[src_x].alpha, mask_buf[dest_x], opa);
}
blend_non_normal_pixel(&dest_buf_u8[dest_x], src_argb, dsc->blend_mode);
}
if (mask_buf) {
mask_buf += mask_stride;
}
dest_buf_u8 += dest_stride;
src_buf_al88 = drawbuf_next_row(src_buf_al88, src_stride);
}
}
}
static void LV_ATTRIBUTE_FAST_MEM l8_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc, uint32_t dest_px_size)
{
int32_t w = dsc->dest_w;
int32_t h = dsc->dest_h;
lv_opa_t opa = dsc->opa;
uint8_t *dest_buf_u8 = dsc->dest_buf;
int32_t dest_stride = dsc->dest_stride;
const uint8_t *src_buf_l8 = dsc->src_buf;
int32_t src_stride = dsc->src_stride;
const lv_opa_t *mask_buf = dsc->mask_buf;
int32_t mask_stride = dsc->mask_stride;
int32_t dest_x;
int32_t src_x;
int32_t y;
if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) {
if (mask_buf == NULL && opa >= LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB888(dsc, dest_px_size)) {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
dest_buf_u8[dest_x + 2] = src_buf_l8[src_x];
dest_buf_u8[dest_x + 1] = src_buf_l8[src_x];
dest_buf_u8[dest_x + 0] = src_buf_l8[src_x];
}
dest_buf_u8 += dest_stride;
src_buf_l8 = drawbuf_next_row(src_buf_l8, src_stride);
}
}
} else if (mask_buf == NULL && opa < LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB888_WITH_OPA(dsc, dest_px_size)) {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
lv_color_8_24_mix(src_buf_l8[src_x], &dest_buf_u8[dest_x], opa);
}
dest_buf_u8 += dest_stride;
src_buf_l8 = drawbuf_next_row(src_buf_l8, src_stride);
}
}
} else if (mask_buf && opa >= LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB888_WITH_MASK(dsc, dest_px_size)) {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
lv_color_8_24_mix(src_buf_l8[src_x], &dest_buf_u8[dest_x], mask_buf[src_x]);
}
dest_buf_u8 += dest_stride;
src_buf_l8 = drawbuf_next_row(src_buf_l8, src_stride);
mask_buf += mask_stride;
}
}
} else if (mask_buf && opa < LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_L8_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA(dsc, dest_px_size)) {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
lv_color_8_24_mix(src_buf_l8[src_x], &dest_buf_u8[dest_x], LV_OPA_MIX2(opa, mask_buf[src_x]));
}
dest_buf_u8 += dest_stride;
src_buf_l8 = drawbuf_next_row(src_buf_l8, src_stride);
mask_buf += mask_stride;
}
}
}
} else {
lv_color32_t src_argb;
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
src_argb.red = src_buf_l8[src_x];
src_argb.green = src_buf_l8[src_x];
src_argb.blue = src_buf_l8[src_x];
if (mask_buf == NULL) {
src_argb.alpha = opa;
} else {
src_argb.alpha = LV_OPA_MIX2(mask_buf[dest_x], opa);
}
blend_non_normal_pixel(&dest_buf_u8[dest_x], src_argb, dsc->blend_mode);
}
if (mask_buf) {
mask_buf += mask_stride;
}
dest_buf_u8 += dest_stride;
src_buf_l8 = drawbuf_next_row(src_buf_l8, src_stride);
}
}
}
static void LV_ATTRIBUTE_FAST_MEM rgb565_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc, uint32_t dest_px_size)
{
int32_t w = dsc->dest_w;
int32_t h = dsc->dest_h;
lv_opa_t opa = dsc->opa;
uint8_t *dest_buf_u8 = dsc->dest_buf;
int32_t dest_stride = dsc->dest_stride;
const lv_color16_t *src_buf_c16 = (const lv_color16_t *) dsc->src_buf;
int32_t src_stride = dsc->src_stride;
const lv_opa_t *mask_buf = dsc->mask_buf;
int32_t mask_stride = dsc->mask_stride;
int32_t src_x;
int32_t dest_x;
int32_t y;
if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) {
if (mask_buf == NULL && opa >= LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888(dsc, dest_px_size)) {
for (y = 0; y < h; y++) {
for (src_x = 0, dest_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
dest_buf_u8[dest_x + 2] = (src_buf_c16[src_x].red * 2106) >> 8; /*To make it rounded*/
dest_buf_u8[dest_x + 1] = (src_buf_c16[src_x].green * 1037) >> 8;
dest_buf_u8[dest_x + 0] = (src_buf_c16[src_x].blue * 2106) >> 8;
}
dest_buf_u8 += dest_stride;
src_buf_c16 = drawbuf_next_row(src_buf_c16, src_stride);
}
}
} else if (mask_buf == NULL && opa < LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_WITH_OPA(dsc, dest_px_size)) {
uint8_t res[3];
for (y = 0; y < h; y++) {
for (src_x = 0, dest_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
res[2] = (src_buf_c16[src_x].red * 2106) >> 8; /*To make it rounded*/
res[1] = (src_buf_c16[src_x].green * 1037) >> 8;
res[0] = (src_buf_c16[src_x].blue * 2106) >> 8;
lv_color_24_24_mix(res, &dest_buf_u8[dest_x], opa);
}
dest_buf_u8 += dest_stride;
src_buf_c16 = drawbuf_next_row(src_buf_c16, src_stride);
}
}
} else if (mask_buf && opa >= LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_WITH_MASK(dsc, dest_px_size)) {
uint8_t res[3];
for (y = 0; y < h; y++) {
for (src_x = 0, dest_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
res[2] = (src_buf_c16[src_x].red * 2106) >> 8; /*To make it rounded*/
res[1] = (src_buf_c16[src_x].green * 1037) >> 8;
res[0] = (src_buf_c16[src_x].blue * 2106) >> 8;
lv_color_24_24_mix(res, &dest_buf_u8[dest_x], mask_buf[src_x]);
}
dest_buf_u8 += dest_stride;
src_buf_c16 = drawbuf_next_row(src_buf_c16, src_stride);
mask_buf += mask_stride;
}
}
} else {
if (LV_RESULT_INVALID == LV_DRAW_SW_RGB565_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA(dsc, dest_px_size)) {
uint8_t res[3];
for (y = 0; y < h; y++) {
for (src_x = 0, dest_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
res[2] = (src_buf_c16[src_x].red * 2106) >> 8; /*To make it rounded*/
res[1] = (src_buf_c16[src_x].green * 1037) >> 8;
res[0] = (src_buf_c16[src_x].blue * 2106) >> 8;
lv_color_24_24_mix(res, &dest_buf_u8[dest_x], LV_OPA_MIX2(opa, mask_buf[src_x]));
}
dest_buf_u8 += dest_stride;
src_buf_c16 = drawbuf_next_row(src_buf_c16, src_stride);
mask_buf += mask_stride;
}
}
}
} else {
lv_color32_t src_argb;
for (y = 0; y < h; y++) {
for (src_x = 0, dest_x = 0; src_x < w; src_x++, dest_x += dest_px_size) {
src_argb.red = (src_buf_c16[src_x].red * 2106) >> 8;
src_argb.green = (src_buf_c16[src_x].green * 1037) >> 8;
src_argb.blue = (src_buf_c16[src_x].blue * 2106) >> 8;
if (mask_buf == NULL) {
src_argb.alpha = opa;
} else {
src_argb.alpha = LV_OPA_MIX2(mask_buf[src_x], opa);
}
blend_non_normal_pixel(&dest_buf_u8[dest_x], src_argb, dsc->blend_mode);
}
if (mask_buf) {
mask_buf += mask_stride;
}
dest_buf_u8 += dest_stride;
src_buf_c16 = drawbuf_next_row(src_buf_c16, src_stride);
}
}
}
static void LV_ATTRIBUTE_FAST_MEM rgb888_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc, const uint8_t dest_px_size,
uint32_t src_px_size)
{
int32_t w = dsc->dest_w * dest_px_size;
int32_t h = dsc->dest_h;
lv_opa_t opa = dsc->opa;
uint8_t *dest_buf = dsc->dest_buf;
int32_t dest_stride = dsc->dest_stride;
const uint8_t *src_buf = dsc->src_buf;
int32_t src_stride = dsc->src_stride;
const lv_opa_t *mask_buf = dsc->mask_buf;
int32_t mask_stride = dsc->mask_stride;
int32_t dest_x;
int32_t src_x;
int32_t y;
if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) {
/*Special case*/
if (mask_buf == NULL && opa >= LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888(dsc, dest_px_size, src_px_size)) {
if (src_px_size == dest_px_size) {
for (y = 0; y < h; y++) {
lv_memcpy(dest_buf, src_buf, w);
dest_buf += dest_stride;
src_buf += src_stride;
}
} else {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; dest_x < w; dest_x += dest_px_size, src_x += src_px_size) {
dest_buf[dest_x + 0] = src_buf[src_x + 0];
dest_buf[dest_x + 1] = src_buf[src_x + 1];
dest_buf[dest_x + 2] = src_buf[src_x + 2];
}
dest_buf += dest_stride;
src_buf += src_stride;
}
}
}
}
if (mask_buf == NULL && opa < LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_WITH_OPA(dsc, dest_px_size, src_px_size)) {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; dest_x < w; dest_x += dest_px_size, src_x += src_px_size) {
lv_color_24_24_mix(&src_buf[src_x], &dest_buf[dest_x], opa);
}
dest_buf += dest_stride;
src_buf += src_stride;
}
}
}
if (mask_buf && opa >= LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_WITH_MASK(dsc, dest_px_size, src_px_size)) {
uint32_t mask_x;
for (y = 0; y < h; y++) {
for (mask_x = 0, dest_x = 0, src_x = 0; dest_x < w; mask_x++, dest_x += dest_px_size, src_x += src_px_size) {
lv_color_24_24_mix(&src_buf[src_x], &dest_buf[dest_x], mask_buf[mask_x]);
}
dest_buf += dest_stride;
src_buf += src_stride;
mask_buf += mask_stride;
}
}
}
if (mask_buf && opa < LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_RGB888_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA(dsc, dest_px_size, src_px_size)) {
uint32_t mask_x;
for (y = 0; y < h; y++) {
for (mask_x = 0, dest_x = 0, src_x = 0; dest_x < w; mask_x++, dest_x += dest_px_size, src_x += src_px_size) {
lv_color_24_24_mix(&src_buf[src_x], &dest_buf[dest_x], LV_OPA_MIX2(opa, mask_buf[mask_x]));
}
dest_buf += dest_stride;
src_buf += src_stride;
mask_buf += mask_stride;
}
}
}
} else {
lv_color32_t src_argb;
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; dest_x < w; dest_x += dest_px_size, src_x += src_px_size) {
src_argb.red = src_buf[src_x + 2];
src_argb.green = src_buf[src_x + 1];
src_argb.blue = src_buf[src_x + 0];
if (mask_buf == NULL) {
src_argb.alpha = opa;
} else {
src_argb.alpha = LV_OPA_MIX2(mask_buf[dest_x], opa);
}
blend_non_normal_pixel(&dest_buf[dest_x], src_argb, dsc->blend_mode);
}
if (mask_buf) {
mask_buf += mask_stride;
}
dest_buf += dest_stride;
src_buf += src_stride;
}
}
}
static void LV_ATTRIBUTE_FAST_MEM argb8888_image_blend(_lv_draw_sw_blend_image_dsc_t *dsc, uint32_t dest_px_size)
{
int32_t w = dsc->dest_w;
int32_t h = dsc->dest_h;
lv_opa_t opa = dsc->opa;
uint8_t *dest_buf = dsc->dest_buf;
int32_t dest_stride = dsc->dest_stride;
const lv_color32_t *src_buf_c32 = dsc->src_buf;
int32_t src_stride = dsc->src_stride;
const lv_opa_t *mask_buf = dsc->mask_buf;
int32_t mask_stride = dsc->mask_stride;
int32_t dest_x;
int32_t src_x;
int32_t y;
if (dsc->blend_mode == LV_BLEND_MODE_NORMAL) {
if (mask_buf == NULL && opa >= LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888(dsc, dest_px_size)) {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
lv_color_24_24_mix((const uint8_t *)&src_buf_c32[src_x], &dest_buf[dest_x], src_buf_c32[src_x].alpha);
}
dest_buf += dest_stride;
src_buf_c32 = drawbuf_next_row(src_buf_c32, src_stride);
}
}
} else if (mask_buf == NULL && opa < LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888_WITH_OPA(dsc, dest_px_size)) {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
lv_color_24_24_mix((const uint8_t *)&src_buf_c32[src_x], &dest_buf[dest_x], LV_OPA_MIX2(src_buf_c32[src_x].alpha, opa));
}
dest_buf += dest_stride;
src_buf_c32 = drawbuf_next_row(src_buf_c32, src_stride);
}
}
} else if (mask_buf && opa >= LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888_WITH_MASK(dsc, dest_px_size)) {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
lv_color_24_24_mix((const uint8_t *)&src_buf_c32[src_x], &dest_buf[dest_x],
LV_OPA_MIX2(src_buf_c32[src_x].alpha, mask_buf[src_x]));
}
dest_buf += dest_stride;
src_buf_c32 = drawbuf_next_row(src_buf_c32, src_stride);
mask_buf += mask_stride;
}
}
} else if (mask_buf && opa < LV_OPA_MAX) {
if (LV_RESULT_INVALID == LV_DRAW_SW_ARGB8888_BLEND_NORMAL_TO_RGB888_MIX_MASK_OPA(dsc, dest_px_size)) {
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x++) {
lv_color_24_24_mix((const uint8_t *)&src_buf_c32[src_x], &dest_buf[dest_x],
LV_OPA_MIX3(src_buf_c32[src_x].alpha, mask_buf[src_x], opa));
}
dest_buf += dest_stride;
src_buf_c32 = drawbuf_next_row(src_buf_c32, src_stride);
mask_buf += mask_stride;
}
}
}
} else {
lv_color32_t src_argb;
for (y = 0; y < h; y++) {
for (dest_x = 0, src_x = 0; src_x < w; dest_x += dest_px_size, src_x ++) {
src_argb = src_buf_c32[src_x];
if (mask_buf == NULL) {
src_argb.alpha = LV_OPA_MIX2(src_argb.alpha, opa);
} else {
src_argb.alpha = LV_OPA_MIX3(src_argb.alpha, mask_buf[dest_x], opa);
}
blend_non_normal_pixel(&dest_buf[dest_x], src_argb, dsc->blend_mode);
}
if (mask_buf) {
mask_buf += mask_stride;
}
dest_buf += dest_stride;
src_buf_c32 = drawbuf_next_row(src_buf_c32, src_stride);
}
}
}
static inline void LV_ATTRIBUTE_FAST_MEM blend_non_normal_pixel(uint8_t *dest, lv_color32_t src, lv_blend_mode_t mode)
{
uint8_t res[3] = {0, 0, 0};
switch (mode) {
case LV_BLEND_MODE_ADDITIVE:
res[0] = LV_MIN(dest[0] + src.blue, 255);
res[1] = LV_MIN(dest[1] + src.green, 255);
res[2] = LV_MIN(dest[2] + src.red, 255);
break;
case LV_BLEND_MODE_SUBTRACTIVE:
res[0] = LV_MAX(dest[0] - src.blue, 0);
res[1] = LV_MAX(dest[1] - src.green, 0);
res[2] = LV_MAX(dest[2] - src.red, 0);
break;
case LV_BLEND_MODE_MULTIPLY:
res[0] = (dest[0] * src.blue) >> 8;
res[1] = (dest[1] * src.green) >> 8;
res[2] = (dest[2] * src.red) >> 8;
break;
default:
LV_LOG_WARN("Not supported blend mode: %d", mode);
return;
}
lv_color_24_24_mix(res, dest, src.alpha);
}
static inline void LV_ATTRIBUTE_FAST_MEM lv_color_8_24_mix(const uint8_t src, uint8_t *dest, uint8_t mix)
{
if (mix == 0) {
return;
}
if (mix >= LV_OPA_MAX) {
dest[0] = src;
dest[1] = src;
dest[2] = src;
} else {
lv_opa_t mix_inv = 255 - mix;
dest[0] = (uint32_t)((uint32_t)src * mix + dest[0] * mix_inv) >> 8;
dest[1] = (uint32_t)((uint32_t)src * mix + dest[1] * mix_inv) >> 8;
dest[2] = (uint32_t)((uint32_t)src * mix + dest[2] * mix_inv) >> 8;
}
}
static inline void LV_ATTRIBUTE_FAST_MEM lv_color_24_24_mix(const uint8_t *src, uint8_t *dest, uint8_t mix)
{
if (mix == 0) {
return;
}
if (mix >= LV_OPA_MAX) {
dest[0] = src[0];
dest[1] = src[1];
dest[2] = src[2];
} else {
lv_opa_t mix_inv = 255 - mix;
dest[0] = (uint32_t)((uint32_t)src[0] * mix + dest[0] * mix_inv) >> 8;
dest[1] = (uint32_t)((uint32_t)src[1] * mix + dest[1] * mix_inv) >> 8;
dest[2] = (uint32_t)((uint32_t)src[2] * mix + dest[2] * mix_inv) >> 8;
}
}
static inline uint8_t LV_ATTRIBUTE_FAST_MEM get_bit(const uint8_t *buf, int32_t bit_idx)
{
return (buf[bit_idx / 8] >> (7 - (bit_idx % 8))) & 1;
}
static inline void *LV_ATTRIBUTE_FAST_MEM drawbuf_next_row(const void *buf, uint32_t stride)
{
return (void *)((uint8_t *)buf + stride);
}

View File

@ -0,0 +1,187 @@
/*
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*
* This file is derived from the LVGL project.
* See https://github.com/lvgl/lvgl for details.
*/
/**
* @file lv_string.c
*/
/*********************
* INCLUDES
*********************/
//#include "../../lv_conf_internal.h"
#if LV_USE_STDLIB_STRING == LV_STDLIB_BUILTIN
#include "lv_assert.h"
#include "lv_log.h"
#include "lv_math.h"
#include "lv_string.h"
/*********************
* DEFINES
*********************/
#ifdef LV_ARCH_64
#define MEM_UNIT uint64_t
#define ALIGN_MASK 0x7
#else
#define MEM_UNIT uint32_t
#define ALIGN_MASK 0x3
#endif
#define LV_ATTRIBUTE_FAST_MEM
/**********************
* TYPEDEFS
**********************/
/**********************
* STATIC PROTOTYPES
**********************/
/**********************
* STATIC VARIABLES
**********************/
/**********************
* MACROS
**********************/
#if LV_USE_LOG && LV_LOG_TRACE_MEM
#define LV_TRACE_MEM(...) LV_LOG_TRACE(__VA_ARGS__)
#else
#define LV_TRACE_MEM(...)
#endif
#define _COPY(d, s) *d = *s; d++; s++;
#define _SET(d, v) *d = v; d++;
#define _REPEAT8(expr) expr expr expr expr expr expr expr expr
/**********************
* GLOBAL FUNCTIONS
**********************/
void *LV_ATTRIBUTE_FAST_MEM lv_memcpy(void *dst, const void *src, size_t len)
{
uint8_t *d8 = dst;
const uint8_t *s8 = src;
/*Simplify for small memories*/
if (len < 16) {
while (len) {
*d8 = *s8;
d8++;
s8++;
len--;
}
return dst;
}
lv_uintptr_t d_align = (lv_uintptr_t)d8 & ALIGN_MASK;
lv_uintptr_t s_align = (lv_uintptr_t)s8 & ALIGN_MASK;
/*Byte copy for unaligned memories*/
if (s_align != d_align) {
while (len > 32) {
_REPEAT8(_COPY(d8, s8));
_REPEAT8(_COPY(d8, s8));
_REPEAT8(_COPY(d8, s8));
_REPEAT8(_COPY(d8, s8));
len -= 32;
}
while (len) {
_COPY(d8, s8)
len--;
}
return dst;
}
/*Make the memories aligned*/
if (d_align) {
d_align = ALIGN_MASK + 1 - d_align;
while (d_align && len) {
_COPY(d8, s8);
d_align--;
len--;
}
}
uint32_t *d32 = (uint32_t *)d8;
const uint32_t *s32 = (uint32_t *)s8;
while (len > 32) {
_REPEAT8(_COPY(d32, s32))
len -= 32;
}
d8 = (uint8_t *)d32;
s8 = (const uint8_t *)s32;
while (len) {
_COPY(d8, s8)
len--;
}
return dst;
}
void LV_ATTRIBUTE_FAST_MEM lv_memset(void *dst, uint8_t v, size_t len)
{
uint8_t *d8 = (uint8_t *)dst;
uintptr_t d_align = (lv_uintptr_t) d8 & ALIGN_MASK;
/*Make the address aligned*/
if (d_align) {
d_align = ALIGN_MASK + 1 - d_align;
while (d_align && len) {
_SET(d8, v);
len--;
d_align--;
}
}
uint32_t v32 = (uint32_t)v + ((uint32_t)v << 8) + ((uint32_t)v << 16) + ((uint32_t)v << 24);
uint32_t *d32 = (uint32_t *)d8;
while (len > 32) {
_REPEAT8(_SET(d32, v32));
len -= 32;
}
d8 = (uint8_t *)d32;
while (len) {
_SET(d8, v);
len--;
}
}
void *LV_ATTRIBUTE_FAST_MEM lv_memmove(void *dst, const void *src, size_t len)
{
if (dst < src || (char *)dst > ((char *)src + len)) {
return lv_memcpy(dst, src, len);
}
if (dst > src) {
char *tmp = (char *)dst + len - 1;
char *s = (char *)src + len - 1;
while (len--) {
*tmp-- = *s--;
}
} else {
char *tmp = (char *)dst;
char *s = (char *)src;
while (len--) {
*tmp++ = *s++;
}
}
return dst;
}
/**********************
* STATIC FUNCTIONS
**********************/
#endif /*LV_STDLIB_BUILTIN*/

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -42,7 +42,8 @@ typedef struct {
void *p_asm_alloc; // pointer to the beginning of the memory allocated for ASM test buf, used in free() void *p_asm_alloc; // pointer to the beginning of the memory allocated for ASM test buf, used in free()
void *p_ansi_alloc; // pointer to the beginning of the memory allocated for ANSI test buf, used in free() void *p_ansi_alloc; // pointer to the beginning of the memory allocated for ANSI test buf, used in free()
} buf; } buf;
void (*blend_api_func)(_lv_draw_sw_blend_fill_dsc_t *); // pointer to LVGL API function void (*blend_api_func)(_lv_draw_sw_blend_fill_dsc_t *); // pointer to LVGL API function
void (*blend_api_px_func)(_lv_draw_sw_blend_fill_dsc_t *, uint32_t); // pointer to LVGL API function with dest_px_size argument
lv_color_format_t color_format; // LV color format lv_color_format_t color_format; // LV color format
size_t data_type_size; // Used data type size, eg sizeof() size_t data_type_size; // Used data type size, eg sizeof()
size_t active_buf_len; // Length of buffer, where the actual data are stored (not including Canary bytes) size_t active_buf_len; // Length of buffer, where the actual data are stored (not including Canary bytes)
@ -64,8 +65,9 @@ typedef struct {
unsigned int cc_width; // Corner case test array width unsigned int cc_width; // Corner case test array width
unsigned int benchmark_cycles; // Count of benchmark cycles unsigned int benchmark_cycles; // Count of benchmark cycles
void *array_align16; // test array with 16 byte alignment - testing most ideal case void *array_align16; // test array with 16 byte alignment - testing most ideal case
void *array_align1; // test array with 1 byte alignment - testing wort case void *array_align1; // test array with 1 byte alignment - testing worst case
void (*blend_api_func)(_lv_draw_sw_blend_fill_dsc_t *); // pointer to LVGL API function void (*blend_api_func)(_lv_draw_sw_blend_fill_dsc_t *); // pointer to LVGL API function
void (*blend_api_px_func)(_lv_draw_sw_blend_fill_dsc_t *, uint32_t); // pointer to LVGL API function with dest_px_size argument
} bench_test_case_params_t; } bench_test_case_params_t;
#ifdef __cplusplus #ifdef __cplusplus

View File

@ -0,0 +1,111 @@
/*
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include "esp_err.h"
#include <stdint.h>
#include "lv_color.h"
#include "lv_draw_sw_blend.h"
#ifdef __cplusplus
extern "C" {
#endif
// ------------------------------------------------- Macros and Types --------------------------------------------------
/**
* @brief Type of blend DUT function
*/
typedef enum {
OPERATION_FILL,
OPERATION_FILL_WITH_OPA,
} blend_operation_t;
/**
* @brief Canary pixels amount depending on data type
* @note
* - We should use at least 16 bytes of memory for canary pixels because of esp32s3 TIE 16-bytes wide Q registers
* - Canary pixels are multiplied by sizeof(used_data_type) to get the memory length occupied by the canary pixels
* - The memory occupied by canary pixels should be in 16-byte multiples, to achieve 16-byte memory alignment in functionality test
* - For example, ideally, for RGB565 we would need 8 canary pixels -> 8 * sizeof(uint16_t) = 16
*/
typedef enum {
CANARY_PIXELS_ARGB8888 = 4, /*!< Canary pixels: 4 * sizeof(uint32_t) = 16 */
CANARY_PIXELS_RGB565 = 8, /*!< Canary pixels: 8 * sizeof(uint16_t) = 16 */
} canary_pixels_t;
/**
* @brief Functionality test combinations for LV Image
*/
typedef struct {
unsigned int min_w; /*!< Minimum width of the test array */
unsigned int min_h; /*!< Minimum height of the test array */
unsigned int max_w; /*!< Maximum width of the test array */
unsigned int max_h; /*!< Maximum height of the test array */
unsigned int src_min_unalign_byte; /*!< Minimum amount of unaligned bytes of the source test array */
unsigned int dest_min_unalign_byte; /*!< Minimum amount of unaligned bytes of the destination test array */
unsigned int src_max_unalign_byte; /*!< Maximum amount of unaligned bytes of the source test array */
unsigned int dest_max_unalign_byte; /*!< Maximum amount of unaligned bytes of the destination test array */
unsigned int src_unalign_step; /*!< Increment step in bytes unalignment of the source test array */
unsigned int dest_unalign_step; /*!< Increment step in bytes unalignment of the destination test array */
unsigned int src_stride_step; /*!< Increment step in destination stride of the source test array */
unsigned int dest_stride_step; /*!< Increment step in destination stride of the destination test array */
unsigned int test_combinations_count; /*!< Count of fest combinations */
} test_matrix_lv_image_params_t;
/**
* @brief Functionality test case parameters for LV Image
*/
typedef struct {
struct {
void *p_src; /*!< pointer to the source test buff (common src buffer for both the ANSI and ASM) */
void *p_src_alloc; /*!< pointer to the beginning of the memory allocated for the source ASM test buf, used in free() */
void *p_dest_asm; /*!< pointer to the destination ASM test buf */
void *p_dest_ansi; /*!< pointer to the destination ANSI test buf */
void *p_dest_asm_alloc; /*!< pointer to the beginning of the memory allocated for the destination ASM test buf, used in free() */
void *p_dest_ansi_alloc; /*!< pointer to the beginning of the memory allocated for the destination ANSI test buf, used in free() */
} buf;
void (*blend_api_func)(_lv_draw_sw_blend_image_dsc_t *); /*!< pointer to LVGL API function */
lv_color_format_t color_format; /*!< LV color format */
size_t src_data_type_size; /*!< Used data type size in the source buffer, eg sizeof(src_buff[0]) */
size_t dest_data_type_size; /*!< Used data type size in the destination buffer, eg sizeof(dest_buff[0]) */
size_t src_buf_len; /*!< Length of the source buffer, including matrix padding (no Canary pixels are used for source buffer) */
size_t active_dest_buf_len; /*!< Length of the destination buffer, where the actual data are stored, including matrix padding, not including Canary pixels */
size_t total_dest_buf_len; /*!< Total length of the destination buffer (including Canary pixels and matrix padding) */
size_t canary_pixels; /*!< Canary pixels must be adjusted according to the used color type, to achieve aligned memory effect */
unsigned int dest_w; /*!< Destination buffer width */
unsigned int dest_h; /*!< Destination buffer height */
unsigned int src_stride; /*!< Source buffer stride */
unsigned int dest_stride; /*!< Destination buffer stride */
unsigned int src_unalign_byte; /*!< Source buffer memory unalignment */
unsigned int dest_unalign_byte; /*!< Destination buffer memory unalignment */
blend_operation_t operation_type; /*!< Type of fundamental blend operation */
} func_test_case_lv_image_params_t;
/**
* @brief Benchmark test case parameters for LV Image
*/
typedef struct {
unsigned int height; /*!< Test array height */
unsigned int width; /*!< Test array width */
unsigned int dest_stride; /*!< Destination test array stride */
unsigned int src_stride; /*!< Source test array stride */
unsigned int cc_height; /*!< Corner case test array height */
unsigned int cc_width; /*!< Corner case test array width */
unsigned int benchmark_cycles; /*!< Count of benchmark cycles */
void *src_array_align16; /*!< Source test array with 16 byte alignment - testing most ideal case */
void *src_array_align1; /*!< Source test array with 1 byte alignment - testing worst case */
void *dest_array_align16; /*!< Destination test array with 16 byte alignment - testing most ideal case */
void *dest_array_align1; /*!< Destination test array with 1 byte alignment - testing worst case */
void (*blend_api_func)(_lv_draw_sw_blend_image_dsc_t *); /*!< pointer to LVGL API function */
} bench_test_case_lv_image_params_t;
#ifdef __cplusplus
} /*extern "C"*/
#endif

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -15,6 +15,7 @@
#include "lv_draw_sw_blend.h" #include "lv_draw_sw_blend.h"
#include "lv_draw_sw_blend_to_argb8888.h" #include "lv_draw_sw_blend_to_argb8888.h"
#include "lv_draw_sw_blend_to_rgb565.h" #include "lv_draw_sw_blend_to_rgb565.h"
#include "lv_draw_sw_blend_to_rgb888.h"
#define WIDTH 128 #define WIDTH 128
#define HEIGHT 128 #define HEIGHT 128
@ -115,6 +116,31 @@ TEST_CASE("LV Fill benchmark RGB565", "[fill][benchmark][RGB565]")
lv_fill_benchmark_init(&test_params); lv_fill_benchmark_init(&test_params);
free(dest_array_align16); free(dest_array_align16);
} }
TEST_CASE("LV Fill benchmark RGB888", "[fill][benchmark][RGB888]")
{
uint8_t *dest_array_align16 = (uint8_t *)memalign(16, STRIDE * HEIGHT * sizeof(uint8_t) * 3 + UNALIGN_BYTES);
TEST_ASSERT_NOT_EQUAL(NULL, dest_array_align16);
// Apply byte unalignment for the worst-case test scenario
uint8_t *dest_array_align1 = dest_array_align16 + UNALIGN_BYTES;
bench_test_case_params_t test_params = {
.height = HEIGHT,
.width = WIDTH,
.stride = STRIDE * 3,
.cc_height = HEIGHT - 1,
.cc_width = WIDTH - 1,
.benchmark_cycles = BENCHMARK_CYCLES,
.array_align16 = (void *)dest_array_align16,
.array_align1 = (void *)dest_array_align1,
.blend_api_px_func = &lv_draw_sw_blend_color_to_rgb888,
};
ESP_LOGI(TAG_LV_FILL_BENCH, "running test for RGB888 color format");
lv_fill_benchmark_init(&test_params);
free(dest_array_align16);
}
// ------------------------------------------------ Static test functions ---------------------------------------------- // ------------------------------------------------ Static test functions ----------------------------------------------
static void lv_fill_benchmark_init(bench_test_case_params_t *test_params) static void lv_fill_benchmark_init(bench_test_case_params_t *test_params)
@ -162,11 +188,21 @@ static void lv_fill_benchmark_init(bench_test_case_params_t *test_params)
static float lv_fill_benchmark_run(bench_test_case_params_t *test_params, _lv_draw_sw_blend_fill_dsc_t *dsc) static float lv_fill_benchmark_run(bench_test_case_params_t *test_params, _lv_draw_sw_blend_fill_dsc_t *dsc)
{ {
// Call the DUT function for the first time to init the benchmark test // Call the DUT function for the first time to init the benchmark test
test_params->blend_api_func(dsc); if (test_params->blend_api_func != NULL) {
test_params->blend_api_func(dsc);
} else if (test_params->blend_api_px_func != NULL) {
test_params->blend_api_px_func(dsc, 3);
}
const unsigned int start_b = xthal_get_ccount(); const unsigned int start_b = xthal_get_ccount();
for (int i = 0; i < test_params->benchmark_cycles; i++) { if (test_params->blend_api_func != NULL) {
test_params->blend_api_func(dsc); for (int i = 0; i < test_params->benchmark_cycles; i++) {
test_params->blend_api_func(dsc);
}
} else if (test_params->blend_api_px_func != NULL) {
for (int i = 0; i < test_params->benchmark_cycles; i++) {
test_params->blend_api_px_func(dsc, 3);
}
} }
const unsigned int end_b = xthal_get_ccount(); const unsigned int end_b = xthal_get_ccount();

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD * SPDX-FileCopyrightText: 2024-2025 Espressif Systems (Shanghai) CO LTD
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -13,6 +13,7 @@
#include "lv_draw_sw_blend.h" #include "lv_draw_sw_blend.h"
#include "lv_draw_sw_blend_to_argb8888.h" #include "lv_draw_sw_blend_to_argb8888.h"
#include "lv_draw_sw_blend_to_rgb565.h" #include "lv_draw_sw_blend_to_rgb565.h"
#include "lv_draw_sw_blend_to_rgb888.h"
// ------------------------------------------------- Defines ----------------------------------------------------------- // ------------------------------------------------- Defines -----------------------------------------------------------
@ -47,14 +48,14 @@ static lv_color_t test_color = {
* - generate functionality test combinations, based on the provided test_matrix struct * - generate functionality test combinations, based on the provided test_matrix struct
* *
* @param[in] test_matrix Pointer to structure defining test matrix - all the test combinations * @param[in] test_matrix Pointer to structure defining test matrix - all the test combinations
* @param[in] test_case Pointer ot structure defining functionality test case * @param[in] test_case Pointer to structure defining functionality test case
*/ */
static void functionality_test_matrix(test_matrix_params_t *test_matrix, func_test_case_params_t *test_case); static void functionality_test_matrix(test_matrix_params_t *test_matrix, func_test_case_params_t *test_case);
/** /**
* @brief Fill test buffers for functionality test * @brief Fill test buffers for functionality test
* *
* @param[in] test_case Pointer ot structure defining functionality test case * @param[in] test_case Pointer to structure defining functionality test case
*/ */
static void fill_test_bufs(func_test_case_params_t *test_case); static void fill_test_bufs(func_test_case_params_t *test_case);
@ -63,24 +64,31 @@ static void fill_test_bufs(func_test_case_params_t *test_case);
* *
* - function prepares structures for functionality testing and runs the LVGL API * - function prepares structures for functionality testing and runs the LVGL API
* *
* @param[in] test_case Pointer ot structure defining functionality test case * @param[in] test_case Pointer to structure defining functionality test case
*/ */
static void lv_fill_functionality(func_test_case_params_t *test_case); static void lv_fill_functionality(func_test_case_params_t *test_case);
/** /**
* @brief Evaluate results for 32bit data length * @brief Evaluate results for 32bit data length
* *
* @param[in] test_case Pointer ot structure defining functionality test case * @param[in] test_case Pointer to structure defining functionality test case
*/ */
static void test_eval_32bit_data(func_test_case_params_t *test_case); static void test_eval_32bit_data(func_test_case_params_t *test_case);
/** /**
* @brief Evaluate results for 16bit data length * @brief Evaluate results for 16bit data length
* *
* @param[in] test_case Pointer ot structure defining functionality test case * @param[in] test_case Pointer to structure defining functionality test case
*/ */
static void test_eval_16bit_data(func_test_case_params_t *test_case); static void test_eval_16bit_data(func_test_case_params_t *test_case);
/**
* @brief Evaluate results for 24bit data length
*
* @param[in] test_case Pointer to structure defining functionality test case
*/
static void test_eval_24bit_data(func_test_case_params_t *test_case);
// ------------------------------------------------ Test cases --------------------------------------------------------- // ------------------------------------------------ Test cases ---------------------------------------------------------
/* /*
@ -147,6 +155,29 @@ TEST_CASE("Test fill functionality RGB565", "[fill][functionality][RGB565]")
functionality_test_matrix(&test_matrix, &test_case); functionality_test_matrix(&test_matrix, &test_case);
} }
TEST_CASE("Test fill functionality RGB888", "[fill][functionality][RGB888]")
{
test_matrix_params_t test_matrix = {
.min_w = 12, // 12 is the lower limit for the esp32s3 asm implementation, otherwise esp32 is executed
.min_h = 1,
.max_w = 32,
.max_h = 3,
.min_unalign_byte = 0,
.max_unalign_byte = 16,
.unalign_step = 1,
.dest_stride_step = 1,
.test_combinations_count = 0,
};
func_test_case_params_t test_case = {
.blend_api_px_func = &lv_draw_sw_blend_color_to_rgb888,
.color_format = LV_COLOR_FORMAT_RGB888,
.data_type_size = sizeof(uint8_t) * 3, // 24-bit data length
};
ESP_LOGI(TAG_LV_FILL_FUNC, "running test for RGB888 color format");
functionality_test_matrix(&test_matrix, &test_case);
}
// ------------------------------------------------ Static test functions ---------------------------------------------- // ------------------------------------------------ Static test functions ----------------------------------------------
static void functionality_test_matrix(test_matrix_params_t *test_matrix, func_test_case_params_t *test_case) static void functionality_test_matrix(test_matrix_params_t *test_matrix, func_test_case_params_t *test_case)
@ -195,8 +226,13 @@ static void lv_fill_functionality(func_test_case_params_t *test_case)
dsc_ansi.dest_buf = test_case->buf.p_ansi; dsc_ansi.dest_buf = test_case->buf.p_ansi;
dsc_ansi.use_asm = false; dsc_ansi.use_asm = false;
test_case->blend_api_func(&dsc_asm); // Call the LVGL API with Assembly code if (test_case->blend_api_func != NULL) {
test_case->blend_api_func(&dsc_ansi); // Call the LVGL API with ANSI code test_case->blend_api_func(&dsc_asm); // Call the LVGL API with Assembly code
test_case->blend_api_func(&dsc_ansi); // Call the LVGL API with ANSI code
} else if (test_case->blend_api_px_func != NULL) {
test_case->blend_api_px_func(&dsc_asm, 3); // Call the LVGL API with Assembly code with set pixel size
test_case->blend_api_px_func(&dsc_ansi, 3); // Call the LVGL API with ANSI code with set pixel size
}
// Shift array pointers by Canary Bytes amount back // Shift array pointers by Canary Bytes amount back
test_case->buf.p_asm -= CANARY_BYTES * test_case->data_type_size; test_case->buf.p_asm -= CANARY_BYTES * test_case->data_type_size;
@ -216,6 +252,11 @@ static void lv_fill_functionality(func_test_case_params_t *test_case)
break; break;
} }
case LV_COLOR_FORMAT_RGB888: {
test_eval_24bit_data(test_case);
break;
}
default: default:
TEST_ASSERT_MESSAGE(false, "LV Color format not found"); TEST_ASSERT_MESSAGE(false, "LV Color format not found");
} }
@ -309,3 +350,34 @@ static void test_eval_16bit_data(func_test_case_params_t *test_case)
TEST_ASSERT_EACH_EQUAL_UINT16_MESSAGE(0, (uint16_t *)test_case->buf.p_ansi + (test_case->total_buf_len - CANARY_BYTES), CANARY_BYTES, test_msg_buf); TEST_ASSERT_EACH_EQUAL_UINT16_MESSAGE(0, (uint16_t *)test_case->buf.p_ansi + (test_case->total_buf_len - CANARY_BYTES), CANARY_BYTES, test_msg_buf);
TEST_ASSERT_EACH_EQUAL_UINT16_MESSAGE(0, (uint16_t *)test_case->buf.p_asm + (test_case->total_buf_len - CANARY_BYTES), CANARY_BYTES, test_msg_buf); TEST_ASSERT_EACH_EQUAL_UINT16_MESSAGE(0, (uint16_t *)test_case->buf.p_asm + (test_case->total_buf_len - CANARY_BYTES), CANARY_BYTES, test_msg_buf);
} }
static void test_eval_24bit_data(func_test_case_params_t *test_case)
{
// Print results, 24bit data
#if DBG_PRINT_OUTPUT
size_t data_type_size = test_case->data_type_size;
for (uint32_t i = 0; i < test_case->total_buf_len; i++) {
uint32_t ansi_value = ((uint8_t *)test_case->buf.p_ansi)[i * data_type_size]
| (((uint8_t *)test_case->buf.p_ansi)[i * data_type_size + 1] << 8)
| (((uint8_t *)test_case->buf.p_ansi)[i * data_type_size + 2] << 16);
uint32_t asm_value = ((uint8_t *)test_case->buf.p_asm)[i * data_type_size]
| (((uint8_t *)test_case->buf.p_asm)[i * data_type_size + 1] << 8)
| (((uint8_t *)test_case->buf.p_asm)[i * data_type_size + 2] << 16);
printf("dest_buf[%"PRIi32"] %s ansi = %8"PRIx32" \t asm = %8"PRIx32" \n", i, ((i < 10) ? (" ") : ("")), ansi_value, asm_value);
}
printf("\n");
#endif
const int canary_bytes_area = CANARY_BYTES * test_case->data_type_size;
// Canary bytes area must stay 0
TEST_ASSERT_EACH_EQUAL_UINT8_MESSAGE(0, (uint8_t *)test_case->buf.p_ansi, canary_bytes_area, test_msg_buf);
TEST_ASSERT_EACH_EQUAL_UINT8_MESSAGE(0, (uint8_t *)test_case->buf.p_asm, canary_bytes_area, test_msg_buf);
// dest_buf_asm and dest_buf_ansi must be equal
TEST_ASSERT_EQUAL_UINT8_ARRAY_MESSAGE((uint8_t *)test_case->buf.p_asm + canary_bytes_area, (uint8_t *)test_case->buf.p_ansi + canary_bytes_area, test_case->active_buf_len * test_case->data_type_size, test_msg_buf);
// Canary bytes area must stay 0
TEST_ASSERT_EACH_EQUAL_UINT8_MESSAGE(0, (uint8_t *)test_case->buf.p_ansi + (test_case->total_buf_len - CANARY_BYTES) * test_case->data_type_size, canary_bytes_area, test_msg_buf);
TEST_ASSERT_EACH_EQUAL_UINT8_MESSAGE(0, (uint8_t *)test_case->buf.p_asm + (test_case->total_buf_len - CANARY_BYTES) * test_case->data_type_size, canary_bytes_area, test_msg_buf);
}

View File

@ -0,0 +1,171 @@
/*
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <string.h>
#include <malloc.h>
#include <sdkconfig.h>
#include "unity.h"
#include "esp_log.h"
#include "freertos/FreeRTOS.h" // for xthal_get_ccount()
#include "lv_image_common.h"
#include "lv_draw_sw_blend.h"
#include "lv_draw_sw_blend_to_rgb565.h"
#define COMMON_DIM 128 // Common matrix dimension 128x128 pixels
#define WIDTH COMMON_DIM
#define HEIGHT COMMON_DIM
#define STRIDE WIDTH
#define UNALIGN_BYTES 3
#define BENCHMARK_CYCLES 1000
// ------------------------------------------------ Static variables ---------------------------------------------------
static const char *TAG_LV_IMAGE_BENCH = "LV Image Benchmark";
static const char *asm_ansi_func[] = {"ASM", "ANSI"};
// ------------------------------------------------ Static function headers --------------------------------------------
/**
* @brief Initialize the benchmark test
*/
static void lv_image_benchmark_init(bench_test_case_lv_image_params_t *test_params);
/**
* @brief Run the benchmark test
*/
static float lv_image_benchmark_run(bench_test_case_lv_image_params_t *test_params, _lv_draw_sw_blend_image_dsc_t *dsc);
// ------------------------------------------------ Test cases ---------------------------------------------------------
/*
Benchmark tests
Requires:
- To pass functionality tests first
Purpose:
- Test that an acceleration is achieved by an assembly implementation of LVGL blending API
Procedure:
- Initialize input parameters (test array length, width, allocate array...) of the benchmark test
- Run assembly version of LVGL blending API multiple times (1000-times or so)
- Firstly use an input test parameters for the most ideal case (16-byte aligned arrays, arrays widths divisible by 2 for RGB565 color format)
- Then use worst-case input test parameters (1-byte aligned arrays, arrays width NOT divisible by 2 for RGB565 color format)
- Count how many CPU cycles does it take to run a function from the LVGL blending API for each case (ideal and worst case)
- Run ansi version of LVGL blending API multiple times (1000-times or so) and repeat the 2 above steps for the ansi version
- Compare the results
- Free test arrays and structures needed for LVGL blending API
Inducing Most ideal and worst case scenarios:
- Most ideal:
- Both, the source and the destination buffers should be aligned by 16-byte (Xtensa PIE), or 4-byte (Xtensa base) boundaries
- Matrix width (in pixels) should be equal to the main loop length in the assembly src code
typically multiples of 16 bytes (for RGB565 it's either 32 bytes - 16 pixels or 48 bytes - 24 pixels)
- Matrix height does not have any effect on benchmark unit tests, unit the matrix is too large that cache limitations start to affect the performance
- Matrix strides, should be equal to the matrix widths (0 matrix padding), or their multiples (matrix width = matrix padding)
- Worst case:
- Both, hte source and the destination buffers should NOT be aligned by 16-byte (Xtensa PIE), or 4-byte (Xtensa base) boundaries,
Source buffer unalignment should be different from the destination unalignment, with one unalignment being even, the other being odd
The unalignments shall be small numbers (preferably 1 or 2 bytes)
- Matrix width should be one pixels smaller, than the matrix width for the most ideal case
- Matrix height does not have any effect on benchmark unit tests, unit the matrix is too large that cache limitations start to affect the performance
- Matrix strides, should NOT be equal to the matrix widths (non 0 matrix padding)
*/
// ------------------------------------------------ Test cases stages --------------------------------------------------
TEST_CASE("LV Image benchmark RGB565 blend to RGB565", "[image][benchmark][RGB565]")
{
uint16_t *dest_array_align16 = (uint16_t *)memalign(16, STRIDE * HEIGHT * sizeof(uint16_t) + UNALIGN_BYTES);
uint16_t *src_array_align16 = (uint16_t *)memalign(16, STRIDE * HEIGHT * sizeof(uint16_t) + UNALIGN_BYTES);
TEST_ASSERT_NOT_EQUAL(NULL, dest_array_align16);
TEST_ASSERT_NOT_EQUAL(NULL, src_array_align16);
// Apply byte unalignment (different for each array) for the worst-case test scenario
uint16_t *dest_array_align1 = (uint16_t *)((uint8_t *)dest_array_align16 + UNALIGN_BYTES - 1);
uint16_t *src_array_align1 = (uint16_t *)((uint8_t *)src_array_align16 + UNALIGN_BYTES);
bench_test_case_lv_image_params_t test_params = {
.height = HEIGHT,
.width = WIDTH,
.dest_stride = STRIDE * sizeof(uint16_t),
.src_stride = STRIDE * sizeof(uint16_t),
.cc_height = HEIGHT,
.cc_width = WIDTH - 1,
.benchmark_cycles = BENCHMARK_CYCLES,
.src_array_align16 = (void *)src_array_align16,
.src_array_align1 = (void *)src_array_align1,
.dest_array_align16 = (void *)dest_array_align16,
.dest_array_align1 = (void *)dest_array_align1,
.blend_api_func = &lv_draw_sw_blend_image_to_rgb565,
};
ESP_LOGI(TAG_LV_IMAGE_BENCH, "running test for RGB565 color format");
lv_image_benchmark_init(&test_params);
free(dest_array_align16);
free(src_array_align16);
}
// ------------------------------------------------ Static test functions ----------------------------------------------
static void lv_image_benchmark_init(bench_test_case_lv_image_params_t *test_params)
{
// Init structure for LVGL blend API, to call the Assembly API
_lv_draw_sw_blend_image_dsc_t dsc = {
.dest_buf = test_params->dest_array_align16,
.dest_w = test_params->width,
.dest_h = test_params->height,
.dest_stride = test_params->dest_stride, // stride * sizeof()
.mask_buf = NULL,
.src_buf = test_params->src_array_align16,
.src_stride = test_params->src_stride,
.src_color_format = LV_COLOR_FORMAT_RGB565,
.opa = LV_OPA_MAX,
.blend_mode = LV_BLEND_MODE_NORMAL,
.use_asm = true,
};
// Init structure for LVGL blend API, to call the ANSI API
_lv_draw_sw_blend_image_dsc_t dsc_cc = dsc;
dsc_cc.dest_buf = test_params->dest_array_align1;
dsc_cc.dest_w = test_params->cc_width;
dsc_cc.dest_h = test_params->cc_height;
dsc_cc.src_buf = test_params->src_array_align1;
// Run benchmark 2 times:
// First run using assembly, second run using ANSI
for (int i = 0; i < 2; i++) {
// Run benchmark with the most ideal input parameters
float cycles = lv_image_benchmark_run(test_params, &dsc); // Call Benchmark cycle
float per_sample = cycles / ((float)(dsc.dest_w * dsc.dest_h));
ESP_LOGI(TAG_LV_IMAGE_BENCH, " %s ideal case: %.3f cycles for %"PRIi32"x%"PRIi32" matrix, %.3f cycles per sample", asm_ansi_func[i], cycles, dsc.dest_w, dsc.dest_h, per_sample);
// Run benchmark with the corner case input parameters
cycles = lv_image_benchmark_run(test_params, &dsc_cc); // Call Benchmark cycle
per_sample = cycles / ((float)(dsc_cc.dest_w * dsc_cc.dest_h));
ESP_LOGI(TAG_LV_IMAGE_BENCH, " %s corner case: %.3f cycles for %"PRIi32"x%"PRIi32" matrix, %.3f cycles per sample\n", asm_ansi_func[i], cycles, dsc_cc.dest_w, dsc_cc.dest_h, per_sample);
// change to ANSI
dsc.use_asm = false;
dsc_cc.use_asm = false;
}
}
static float lv_image_benchmark_run(bench_test_case_lv_image_params_t *test_params, _lv_draw_sw_blend_image_dsc_t *dsc)
{
// Call the DUT function for the first time to init the benchmark test
test_params->blend_api_func(dsc);
const unsigned int start_b = xthal_get_ccount();
for (int i = 0; i < test_params->benchmark_cycles; i++) {
test_params->blend_api_func(dsc);
}
const unsigned int end_b = xthal_get_ccount();
const float total_b = end_b - start_b;
const float cycles = total_b / (test_params->benchmark_cycles);
return cycles;
}

View File

@ -0,0 +1,351 @@
/*
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <string.h>
#include <malloc.h>
#include <inttypes.h>
#include "sdkconfig.h"
#include "unity.h"
#include "esp_log.h"
#include "lv_image_common.h"
#include "lv_draw_sw_blend.h"
#include "lv_draw_sw_blend_to_rgb565.h"
// ------------------------------------------------- Defines -----------------------------------------------------------
#define DBG_PRINT_OUTPUT false
// ------------------------------------------------- Macros and Types --------------------------------------------------
#define UPDATE_TEST_CASE(test_case_ptr, dest_w, dest_h, src_stride, dest_stride, src_unalign_byte, dest_unalign_byte) ({ \
(test_case_ptr)->src_buf_len = (size_t)(dest_h * src_stride); \
(test_case_ptr)->active_dest_buf_len = (size_t)(dest_h * dest_stride); \
(test_case_ptr)->total_dest_buf_len = (size_t)((dest_h * dest_stride) + (test_case_ptr->canary_pixels * 2)); \
(test_case_ptr)->dest_w = (dest_w); \
(test_case_ptr)->dest_h = (dest_h); \
(test_case_ptr)->src_stride = (src_stride); \
(test_case_ptr)->dest_stride = (dest_stride); \
(test_case_ptr)->src_unalign_byte = (src_unalign_byte); \
(test_case_ptr)->dest_unalign_byte = (dest_unalign_byte); \
})
// ------------------------------------------------ Static variables ---------------------------------------------------
static const char *TAG_LV_IMAGE_FUNC = "LV Image Functionality";
static char test_msg_buf[200];
static const test_matrix_lv_image_params_t default_test_matrix_image_rgb565_blend_rgb565 = {
#if CONFIG_IDF_TARGET_ESP32S3
.min_w = 8, // 8 is the lower limit for the esp32s3 asm implementation, otherwise esp32 is executed
.min_h = 1,
.max_w = 24,
.max_h = 2,
.src_max_unalign_byte = 16, // Use 16-byte boundary check for Xtensa PIE
.dest_max_unalign_byte = 16,
.dest_unalign_step = 1, // Step 1 as the destination array is being aligned in the assembly code all the time
.src_unalign_step = 3, // Step 3 (more relaxed) as source array is used unaligned in the assembly code
.src_stride_step = 3,
.dest_stride_step = 3,
#else
.min_w = 1,
.min_h = 1,
.max_w = 16,
.max_h = 2,
.src_max_unalign_byte = 4, // Use 4-byte boundary check for Xtensa base
.dest_max_unalign_byte = 4,
.dest_unalign_step = 1,
.src_unalign_step = 1,
.src_stride_step = 1,
.dest_stride_step = 1,
#endif
.src_min_unalign_byte = 0,
.dest_min_unalign_byte = 0,
.test_combinations_count = 0,
};
// ------------------------------------------------ Static function headers --------------------------------------------
/**
* @brief Generate all the functionality test combinations
*
* - generate functionality test combinations, based on the provided test_matrix struct
*
* @param[in] test_matrix Pointer to structure defining test matrix - all the test combinations
* @param[in] test_case Pointer ot structure defining functionality test case
*/
static void functionality_test_matrix(test_matrix_lv_image_params_t *test_matrix, func_test_case_lv_image_params_t *test_case);
/**
* @brief Fill test buffers for image functionality test
*
* @param[in] test_case Pointer ot structure defining functionality test case
*/
static void fill_test_bufs(func_test_case_lv_image_params_t *test_case);
/**
* @brief The actual functionality test
*
* - function prepares structures for functionality testing and runs the LVGL API
*
* @param[in] test_case Pointer ot structure defining functionality test case
*/
static void lv_image_functionality(func_test_case_lv_image_params_t *test_case);
/**
* @brief Evaluate results of LV Image functionality for 16bit data length
*
* @param[in] test_case Pointer ot structure defining functionality test case
*/
static void test_eval_image_16bit_data(func_test_case_lv_image_params_t *test_case);
// ------------------------------------------------ Test cases ---------------------------------------------------------
/*
Functionality tests
Purpose:
- Test that an assembly version of LVGL blending API achieves the same results as the ANSI version
Procedure:
- Prepare testing matrix, to cover all the possible combinations of destination and source arrays widths,
lengths, strides and memory alignments
- Run assembly version of the LVGL blending API
- Run ANSI C version of the LVGL blending API
- Compare the results
- Repeat above 3 steps for each test matrix setup
*/
// ------------------------------------------------ Test cases stages --------------------------------------------------
TEST_CASE("LV Image functionality RGB565 blend to RGB565", "[image][functionality][RGB565]")
{
test_matrix_lv_image_params_t test_matrix = default_test_matrix_image_rgb565_blend_rgb565;
func_test_case_lv_image_params_t test_case = {
.blend_api_func = &lv_draw_sw_blend_image_to_rgb565,
.color_format = LV_COLOR_FORMAT_RGB565,
.canary_pixels = CANARY_PIXELS_RGB565,
.src_data_type_size = sizeof(uint16_t),
.dest_data_type_size = sizeof(uint16_t),
.operation_type = OPERATION_FILL,
};
ESP_LOGI(TAG_LV_IMAGE_FUNC, "running test for RGB565 color format");
functionality_test_matrix(&test_matrix, &test_case);
}
// ------------------------------------------------ Static test functions ----------------------------------------------
static void functionality_test_matrix(test_matrix_lv_image_params_t *test_matrix, func_test_case_lv_image_params_t *test_case)
{
// Step destination array width
for (int dest_w = test_matrix->min_w; dest_w <= test_matrix->max_w; dest_w++) {
// Step destination array height
for (int dest_h = test_matrix->min_h; dest_h <= test_matrix->max_h; dest_h++) {
// Step source array stride
for (int src_stride = dest_w; src_stride <= dest_w * 2; src_stride += test_matrix->src_stride_step) {
// Step destination array stride
for (int dest_stride = dest_w; dest_stride <= dest_w * 2; dest_stride += test_matrix->dest_stride_step) {
// Step source array unalignment
for (int src_unalign_byte = test_matrix->src_min_unalign_byte; src_unalign_byte <= test_matrix->src_max_unalign_byte; src_unalign_byte += test_matrix->src_unalign_step) {
// Step destination array unalignment
for (int dest_unalign_byte = test_matrix->dest_min_unalign_byte; dest_unalign_byte <= test_matrix->dest_max_unalign_byte; dest_unalign_byte += test_matrix->dest_unalign_step) {
// Call functionality test
UPDATE_TEST_CASE(test_case, dest_w, dest_h, src_stride, dest_stride, src_unalign_byte, dest_unalign_byte);
lv_image_functionality(test_case);
test_matrix->test_combinations_count++;
}
}
}
}
}
}
ESP_LOGI(TAG_LV_IMAGE_FUNC, "test combinations: %d\n", test_matrix->test_combinations_count);
}
static void lv_image_functionality(func_test_case_lv_image_params_t *test_case)
{
fill_test_bufs(test_case);
_lv_draw_sw_blend_image_dsc_t dsc_asm = {
.dest_buf = test_case->buf.p_dest_asm,
.dest_w = test_case->dest_w,
.dest_h = test_case->dest_h,
.dest_stride = test_case->dest_stride * test_case->dest_data_type_size, // dest_stride * sizeof(data_type)
.mask_buf = NULL,
.mask_stride = 0,
.src_buf = test_case->buf.p_src,
.src_stride = test_case->src_stride * test_case->src_data_type_size, // src_stride * sizeof(data_type)
.src_color_format = test_case->color_format,
.opa = LV_OPA_MAX,
.blend_mode = LV_BLEND_MODE_NORMAL,
.use_asm = true,
};
// Init structure for LVGL blend API, to call the ANSI API
_lv_draw_sw_blend_image_dsc_t dsc_ansi = dsc_asm;
dsc_ansi.dest_buf = test_case->buf.p_dest_ansi;
dsc_ansi.use_asm = false;
test_case->blend_api_func(&dsc_asm); // Call the LVGL API with Assembly code
test_case->blend_api_func(&dsc_ansi); // Call the LVGL API with ANSI code
// Shift array pointers by (Canary pixels amount * data type length) back
test_case->buf.p_dest_asm -= test_case->canary_pixels * test_case->dest_data_type_size;
test_case->buf.p_dest_ansi -= test_case->canary_pixels * test_case->dest_data_type_size;
// Evaluate the results
sprintf(test_msg_buf, "Test case: dest_w = %d, dest_h = %d, dest_stride = %d, src_stride = %d, dest_unalign_byte = %d, src_unalign_byte = %d\n",
test_case->dest_w, test_case->dest_h, test_case->dest_stride, test_case->src_stride, test_case->dest_unalign_byte, test_case->src_unalign_byte);
#if DBG_PRINT_OUTPUT
printf("%s\n", test_msg_buf);
#endif
switch (test_case->color_format) {
case LV_COLOR_FORMAT_RGB565:
test_eval_image_16bit_data(test_case);
break;
default:
TEST_ASSERT_MESSAGE(false, "LV Color format not found");
break;
}
// Free memory allocated for test buffers
free(test_case->buf.p_dest_asm_alloc);
free(test_case->buf.p_dest_ansi_alloc);
free(test_case->buf.p_src_alloc);
}
static void fill_test_bufs(func_test_case_lv_image_params_t *test_case)
{
const size_t src_data_type_size = test_case->src_data_type_size; // sizeof() of used data type in the source buffer
const size_t dest_data_type_size = test_case->dest_data_type_size; // sizeof() of used data type in the destination buffer
const size_t src_buf_len = test_case->src_buf_len; // Total source buffer length, data part of the source buffer including matrix padding (no Canary pixels are used for source buffer)
const size_t total_dest_buf_len = test_case->total_dest_buf_len; // Total destination buffer length, data part of the destination buffer including the Canary pixels and matrix padding
const size_t active_dest_buf_len = test_case->active_dest_buf_len; // Length of the data part of the destination buffer including matrix padding
const size_t canary_pixels = test_case->canary_pixels; // Canary pixels, according to the data type
const unsigned int src_unalign_byte = test_case->src_unalign_byte; // Unalignment bytes for source buffer
const unsigned int dest_unalign_byte = test_case->dest_unalign_byte; // Unalignment bytes for destination buffer
// Allocate destination arrays and source array for Assembly and ANSI LVGL Blend API
void *src_mem_common = memalign(16, (src_buf_len * src_data_type_size) + src_unalign_byte);
void *dest_mem_asm = memalign(16, (total_dest_buf_len * dest_data_type_size) + dest_unalign_byte);
void *dest_mem_ansi = memalign(16, (total_dest_buf_len * dest_data_type_size) + dest_unalign_byte);
TEST_ASSERT_NOT_NULL_MESSAGE(src_mem_common, "Lack of memory");
TEST_ASSERT_NOT_NULL_MESSAGE(dest_mem_asm, "Lack of memory");
TEST_ASSERT_NOT_NULL_MESSAGE(dest_mem_ansi, "Lack of memory");
// Save a pointer to the beginning of the allocated memory which will be used to free()
test_case->buf.p_src_alloc = src_mem_common;
test_case->buf.p_dest_asm_alloc = dest_mem_asm;
test_case->buf.p_dest_ansi_alloc = dest_mem_ansi;
// Apply destination and source array unalignment
uint8_t *src_buf_common = (uint8_t *)src_mem_common + src_unalign_byte;
uint8_t *dest_buf_asm = (uint8_t *)dest_mem_asm + dest_unalign_byte;
uint8_t *dest_buf_ansi = (uint8_t *)dest_mem_ansi + dest_unalign_byte;
// Set the whole buffer to 0, including the Canary pixels part
memset(src_buf_common, 0, src_buf_len * src_data_type_size);
memset(dest_buf_asm, 0, total_dest_buf_len * src_data_type_size);
memset(dest_buf_ansi, 0, total_dest_buf_len * src_data_type_size);
switch (test_case->operation_type) {
case OPERATION_FILL:
// Fill the actual part of the destination buffers with known values,
// Values must be same, because of the stride
if (test_case->color_format == LV_COLOR_FORMAT_RGB565) {
uint16_t *dest_buf_asm_uint16 = (uint16_t *)dest_buf_asm;
uint16_t *dest_buf_ansi_uint16 = (uint16_t *)dest_buf_ansi;
uint16_t *src_buf_uint16 = (uint16_t *)src_buf_common;
// Fill destination buffers
for (int i = 0; i < active_dest_buf_len; i++) {
dest_buf_asm_uint16[canary_pixels + i] = i + ((i & 1) ? 0x6699 : 0x9966);
dest_buf_ansi_uint16[canary_pixels + i] = dest_buf_asm_uint16[canary_pixels + i];
}
// Fill source buffer
for (int i = 0; i < src_buf_len; i++) {
src_buf_uint16[i] = i + ((i & 1) ? 0x55AA : 0xAA55);
}
}
break;
default:
TEST_ASSERT_MESSAGE(false, "LV Operation not found");
break;
}
// Shift array pointers by (Canary pixels amount * data type length) forward
dest_buf_asm += canary_pixels * dest_data_type_size;
dest_buf_ansi += canary_pixels * dest_data_type_size;
// Save a pointer to the working part of the memory, where the test data are stored
test_case->buf.p_src = (void *)src_buf_common;
test_case->buf.p_dest_asm = (void *)dest_buf_asm;
test_case->buf.p_dest_ansi = (void *)dest_buf_ansi;
#if DBG_PRINT_OUTPUT
printf("Destination buffers fill:\n");
for (uint32_t i = 0; i < test_case->active_dest_buf_len; i++) {
printf("dest_buf[%"PRIi32"] %s ansi = %8"PRIx16" \t asm = %8"PRIx16" \n", i, ((i < 10) ? (" ") : ("")), ((uint16_t *)test_case->buf.p_dest_ansi)[i], ((uint16_t *)test_case->buf.p_dest_asm)[i]);
}
printf("\n");
printf("Source buffer fill:\n");
for (uint32_t i = 0; i < test_case->src_buf_len; i++) {
printf("src_buf[%"PRIi32"] %s = %8"PRIx16" \n", i, ((i < 10) ? (" ") : ("")), ((uint16_t *)test_case->buf.p_src)[i]);
}
printf("\n");
#endif
}
static void test_eval_image_16bit_data(func_test_case_lv_image_params_t *test_case)
{
// Print results, 16bit data
#if DBG_PRINT_OUTPUT
printf("\nEval\nDestination buffers fill:\n");
for (uint32_t i = 0; i < test_case->total_dest_buf_len; i++) {
printf("dest_buf[%"PRIi32"] %s ansi = %8"PRIx16" \t asm = %8"PRIx16" %s \n", i, ((i < 10) ? (" ") : ("")), ((uint16_t *)test_case->buf.p_dest_ansi)[i], ((uint16_t *)test_case->buf.p_dest_asm)[i], (((uint16_t *)test_case->buf.p_dest_ansi)[i] == ((uint16_t *)test_case->buf.p_dest_asm)[i]) ? ("OK") : ("FAIL"));
}
printf("\n");
printf("Source buffer fill:\n");
for (uint32_t i = 0; i < test_case->src_buf_len; i++) {
printf("src_buf[%"PRIi32"] %s = %8"PRIx16" \n", i, ((i < 10) ? (" ") : ("")), ((uint16_t *)test_case->buf.p_src)[i]);
}
printf("\n");
#endif
// Canary pixels area must stay 0
const size_t canary_pixels = test_case->canary_pixels;
TEST_ASSERT_EACH_EQUAL_UINT16_MESSAGE(0, (uint16_t *)test_case->buf.p_dest_ansi, canary_pixels, test_msg_buf);
TEST_ASSERT_EACH_EQUAL_UINT16_MESSAGE(0, (uint16_t *)test_case->buf.p_dest_asm, canary_pixels, test_msg_buf);
// dest_buf_asm and dest_buf_ansi must be equal
TEST_ASSERT_EQUAL_UINT16_ARRAY_MESSAGE((uint16_t *)test_case->buf.p_dest_ansi + canary_pixels, (uint16_t *)test_case->buf.p_dest_asm + canary_pixels, test_case->active_dest_buf_len, test_msg_buf);
// Data part of the destination buffer and source buffer (not considering matrix padding) must be equal
uint16_t *dest_row_begin = (uint16_t *)test_case->buf.p_dest_asm + canary_pixels;
uint16_t *src_row_begin = (uint16_t *)test_case->buf.p_src;
for (int row = 0; row < test_case->dest_h; row++) {
TEST_ASSERT_EQUAL_UINT16_ARRAY_MESSAGE(dest_row_begin, src_row_begin, test_case->dest_w, test_msg_buf);
dest_row_begin += test_case->dest_stride; // Move pointer of the destination buffer to the next row
src_row_begin += test_case->src_stride; // Move pointer of the source buffer to the next row
}
// Canary pixels area must stay 0
TEST_ASSERT_EACH_EQUAL_UINT16_MESSAGE(0, (uint16_t *)test_case->buf.p_dest_ansi + (test_case->total_dest_buf_len - canary_pixels), canary_pixels, test_msg_buf);
TEST_ASSERT_EACH_EQUAL_UINT16_MESSAGE(0, (uint16_t *)test_case->buf.p_dest_asm + (test_case->total_dest_buf_len - canary_pixels), canary_pixels, test_msg_buf);
}